ManagedCuda.CudaOccupancy.cudaOccMaxPotentialOccupancyBlockSize C# (CSharp) Method

cudaOccMaxPotentialOccupancyBlockSize() public static method

public static cudaOccMaxPotentialOccupancyBlockSize ( int &minGridSize, int &blockSize, cudaOccDeviceProp properties, cudaOccFuncAttributes attributes, cudaOccDeviceState state, del_blockSizeToDynamicSMemSize blockSizeToDynamicSMemSize, ManagedCuda.BasicTypes.SizeT dynamicSMemSize ) : void
minGridSize int
blockSize int
properties cudaOccDeviceProp
attributes cudaOccFuncAttributes
state cudaOccDeviceState
blockSizeToDynamicSMemSize del_blockSizeToDynamicSMemSize
dynamicSMemSize ManagedCuda.BasicTypes.SizeT
return void
        public static void cudaOccMaxPotentialOccupancyBlockSize(
			ref int                         minGridSize,
			ref int                         blockSize,
			cudaOccDeviceProp     properties,
			cudaOccFuncAttributes attributes,
			cudaOccDeviceState    state,
			del_blockSizeToDynamicSMemSize blockSizeToDynamicSMemSize,
			SizeT                       dynamicSMemSize)
        {
            cudaOccResult result = new cudaOccResult();

            // Limits
            int occupancyLimit;
            int granularity;
            int blockSizeLimit;

            // Recorded maximum
            int maxBlockSize = 0;
            int numBlocks    = 0;
            int maxOccupancy = 0;

            // Temporary
            int blockSizeToTryAligned;
            int blockSizeToTry;
            int blockSizeLimitAligned;
            int occupancyInBlocks;
            int occupancyInThreads;

            ///////////////////////////
            // Check user input
            ///////////////////////////

            //if (!minGridSize || !blockSize || !properties || !attributes || !state) {
            //	return CUDA_OCC_ERROR_INVALID_INPUT;
            //}

            cudaOccInputCheck(properties, attributes, state);

            /////////////////////////////////////////////////////////////////////////////////
            // Try each block size, and pick the block size with maximum occupancy
            /////////////////////////////////////////////////////////////////////////////////

            occupancyLimit = properties.maxThreadsPerMultiProcessor;
            granularity    = properties.warpSize;

            blockSizeLimit        = __occMin(properties.maxThreadsPerBlock, attributes.maxThreadsPerBlock);
            blockSizeLimitAligned = __occRoundUp(blockSizeLimit, granularity);

            for (blockSizeToTryAligned = blockSizeLimitAligned; blockSizeToTryAligned > 0; blockSizeToTryAligned -= granularity) {
                blockSizeToTry = __occMin(blockSizeLimit, blockSizeToTryAligned);

                // Ignore dynamicSMemSize if the user provides a mapping
                //
                if (blockSizeToDynamicSMemSize != null) {
                    dynamicSMemSize = blockSizeToDynamicSMemSize(blockSizeToTry);
                }

                cudaOccMaxActiveBlocksPerMultiprocessor(
                    result,
                    properties,
                    attributes,
                    state,
                    blockSizeToTry,
                    dynamicSMemSize);

                //if (status != CUDA_OCC_SUCCESS) {
                //	return status;
                //}

                occupancyInBlocks = result.ActiveBlocksPerMultiProcessor;
                occupancyInThreads = blockSizeToTry * occupancyInBlocks;

                if (occupancyInThreads > maxOccupancy) {
                    maxBlockSize = blockSizeToTry;
                    numBlocks    = occupancyInBlocks;
                    maxOccupancy = occupancyInThreads;
                }

                // Early out if we have reached the maximum
                //
                if (occupancyLimit == maxOccupancy) {
                    break;
                }
            }

            ///////////////////////////
            // Return best available
            ///////////////////////////

            // Suggested min grid size to achieve a full machine launch
            //
            minGridSize = numBlocks * properties.numSms;
            blockSize = maxBlockSize;
        }

Same methods

CudaOccupancy::cudaOccMaxPotentialOccupancyBlockSize ( int &minGridSize, int &blockSize, cudaOccDeviceProp properties, cudaOccFuncAttributes attributes, cudaOccDeviceState state, ManagedCuda.BasicTypes.SizeT dynamicSMemSize ) : void