ManagedCuda.CudaOccupancy.cudaOccMaxBlocksPerSMSmemLimit C# (CSharp) Method

cudaOccMaxBlocksPerSMSmemLimit() private static method

private static cudaOccMaxBlocksPerSMSmemLimit ( cudaOccResult result, cudaOccDeviceProp properties, cudaOccFuncAttributes attributes, cudaOccDeviceState state, int blockSize, ManagedCuda.BasicTypes.SizeT dynamicSmemSize ) : int
result cudaOccResult
properties cudaOccDeviceProp
attributes cudaOccFuncAttributes
state cudaOccDeviceState
blockSize int
dynamicSmemSize ManagedCuda.BasicTypes.SizeT
return int
        private static int cudaOccMaxBlocksPerSMSmemLimit(
			cudaOccResult result,
			cudaOccDeviceProp     properties,
			cudaOccFuncAttributes attributes,
			cudaOccDeviceState    state,
			int                   blockSize,
			SizeT                 dynamicSmemSize)
        {
            int allocationGranularity;
            SizeT userSmemPreference;
            SizeT totalSmemUsagePerCTA;
            SizeT smemAllocatedPerCTA;
            SizeT sharedMemPerMultiprocessor;
            int maxBlocks;

            allocationGranularity = cudaOccSMemAllocationGranularity(properties);

            // Obtain the user preferred shared memory size. This setting is ignored if
            // user requests more shared memory than preferred.
            //
            userSmemPreference = cudaOccSMemPerMultiprocessor(properties, state.cacheConfig);

            totalSmemUsagePerCTA = attributes.sharedSizeBytes + dynamicSmemSize;
            smemAllocatedPerCTA = __occRoundUp((int)totalSmemUsagePerCTA, (int)allocationGranularity);

            if (smemAllocatedPerCTA > properties.sharedMemPerBlock) {
                maxBlocks = 0;
            }
            else {
                // User requested shared memory limit is used as long as it is greater
                // than the total shared memory used per CTA, i.e. as long as at least
                // one CTA can be launched. Otherwise, the maximum shared memory limit
                // is used instead.
                //
                if (userSmemPreference >= smemAllocatedPerCTA) {
                    sharedMemPerMultiprocessor = userSmemPreference;
                }
                else{
                    sharedMemPerMultiprocessor = properties.sharedMemPerMultiprocessor;
                }

                if (smemAllocatedPerCTA > 0) {
                    maxBlocks = (int)(sharedMemPerMultiprocessor / smemAllocatedPerCTA);
                }
                else {
                    maxBlocks = int.MaxValue;
                }
            }

            result.AllocatedSharedMemPerBlock = smemAllocatedPerCTA;

            return maxBlocks;
        }