public static void cudaOccMaxPotentialOccupancyBlockSize(
ref int minGridSize,
ref int blockSize,
cudaOccDeviceProp properties,
cudaOccFuncAttributes attributes,
cudaOccDeviceState state,
del_blockSizeToDynamicSMemSize blockSizeToDynamicSMemSize,
SizeT dynamicSMemSize)
{
cudaOccResult result = new cudaOccResult();
// Limits
int occupancyLimit;
int granularity;
int blockSizeLimit;
// Recorded maximum
int maxBlockSize = 0;
int numBlocks = 0;
int maxOccupancy = 0;
// Temporary
int blockSizeToTryAligned;
int blockSizeToTry;
int blockSizeLimitAligned;
int occupancyInBlocks;
int occupancyInThreads;
///////////////////////////
// Check user input
///////////////////////////
//if (!minGridSize || !blockSize || !properties || !attributes || !state) {
// return CUDA_OCC_ERROR_INVALID_INPUT;
//}
cudaOccInputCheck(properties, attributes, state);
/////////////////////////////////////////////////////////////////////////////////
// Try each block size, and pick the block size with maximum occupancy
/////////////////////////////////////////////////////////////////////////////////
occupancyLimit = properties.maxThreadsPerMultiProcessor;
granularity = properties.warpSize;
blockSizeLimit = __occMin(properties.maxThreadsPerBlock, attributes.maxThreadsPerBlock);
blockSizeLimitAligned = __occRoundUp(blockSizeLimit, granularity);
for (blockSizeToTryAligned = blockSizeLimitAligned; blockSizeToTryAligned > 0; blockSizeToTryAligned -= granularity) {
blockSizeToTry = __occMin(blockSizeLimit, blockSizeToTryAligned);
// Ignore dynamicSMemSize if the user provides a mapping
//
if (blockSizeToDynamicSMemSize != null) {
dynamicSMemSize = blockSizeToDynamicSMemSize(blockSizeToTry);
}
cudaOccMaxActiveBlocksPerMultiprocessor(
result,
properties,
attributes,
state,
blockSizeToTry,
dynamicSMemSize);
//if (status != CUDA_OCC_SUCCESS) {
// return status;
//}
occupancyInBlocks = result.ActiveBlocksPerMultiProcessor;
occupancyInThreads = blockSizeToTry * occupancyInBlocks;
if (occupancyInThreads > maxOccupancy) {
maxBlockSize = blockSizeToTry;
numBlocks = occupancyInBlocks;
maxOccupancy = occupancyInThreads;
}
// Early out if we have reached the maximum
//
if (occupancyLimit == maxOccupancy) {
break;
}
}
///////////////////////////
// Return best available
///////////////////////////
// Suggested min grid size to achieve a full machine launch
//
minGridSize = numBlocks * properties.numSms;
blockSize = maxBlockSize;
}