public void NormDiff_L2(NPPImage_8uC3 tpl, CudaDeviceVariable<double> pNormDiff, int nCOI, NPPImage_8uC1 pMask, CudaDeviceVariable<byte> buffer)
{
int bufferSize = NormDiffL2MaskedGetBufferHostSize();
if (bufferSize > buffer.Size) throw new NPPException("Provided buffer is too small.");
status = NPPNativeMethods.NPPi.NormDiff.nppiNormDiff_L2_8u_C3CMR(_devPtrRoi, _pitch, tpl.DevicePointerRoi, tpl.Pitch, pMask.DevicePointerRoi, pMask.Pitch, _sizeRoi, nCOI, pNormDiff.DevicePointer, buffer.DevicePointer);
Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiNormDiff_L2_8u_C3CMR", status));
NPPException.CheckNppStatus(status, this);
}