internal unsafe void Work(Work work)
{
// allocate buffers to hold hashing work
byte[] round1Blocks, round2Blocks;
uint[] round1State, round1State2Mid, round2State;
// allocate buffers and create partial hash
PrepareWork(work, out round1Blocks, out round1State, out round2Blocks, out round2State);
// build message schedule without nonce
uint* W = stackalloc uint[64];
fixed (byte* round1BlocksPtr = round1Blocks)
Sha256.Schedule(round1BlocksPtr + Sha256.SHA256_BLOCK_SIZE, W);
// complete first three rounds of block 2
round1State2Mid = Sha256.AllocateStateBuffer();
Array.Copy(round1State, round1State2Mid, Sha256.SHA256_STATE_SIZE);
Sha256.Round(ref round1State2Mid[0], ref round1State2Mid[1], ref round1State2Mid[2], ref round1State2Mid[3], ref round1State2Mid[4], ref round1State2Mid[5], ref round1State2Mid[6], ref round1State2Mid[7], W, 0);
Sha256.Round(ref round1State2Mid[0], ref round1State2Mid[1], ref round1State2Mid[2], ref round1State2Mid[3], ref round1State2Mid[4], ref round1State2Mid[5], ref round1State2Mid[6], ref round1State2Mid[7], W, 1);
Sha256.Round(ref round1State2Mid[0], ref round1State2Mid[1], ref round1State2Mid[2], ref round1State2Mid[3], ref round1State2Mid[4], ref round1State2Mid[5], ref round1State2Mid[6], ref round1State2Mid[7], W, 2);
// precalculated peices that are independent of nonce
uint W16 = W[16];
uint W17 = W[17];
uint W18 = W[18];
uint W19 = W[19];
uint W31 = W[31];
uint W32 = W[32];
uint PreVal4 = round1State[4] + (Sha256.Rotr(round1State2Mid[1], 6) ^ Sha256.Rotr(round1State2Mid[1], 11) ^ Sha256.Rotr(round1State2Mid[1], 25)) + (round1State2Mid[3] ^ (round1State2Mid[1] & (round1State2Mid[2] ^ round1State2Mid[3]))) + Sha256.K[3];
uint T1 = (Sha256.Rotr(round1State2Mid[5], 2) ^ Sha256.Rotr(round1State2Mid[5], 13) ^ Sha256.Rotr(round1State2Mid[5], 22)) + ((round1State2Mid[5] & round1State2Mid[6]) | (round1State2Mid[7] & (round1State2Mid[5] | round1State2Mid[6])));
uint PreVal4_state0 = PreVal4 + round1State[0];
uint PreVal4_state0_K7 = (uint)(PreVal4_state0 + Sha256.K[7]);
uint PreVal4_T1 = PreVal4 + T1;
uint B1_plus_K6 = (uint)(round1State2Mid[1] + Sha256.K[6]);
uint C1_plus_K5 = (uint)(round1State2Mid[2] + Sha256.K[5]);
uint W16_plus_K16 = (uint)(W16 + Sha256.K[16]);
uint W17_plus_K17 = (uint)(W17 + Sha256.K[17]);
// clear output buffers, in case they've already been used
uint[] outputZero = new uint[16];
clQueue.WriteToBuffer(outputZero, clBuffer0, true, null);
clQueue.WriteToBuffer(outputZero, clBuffer1, true, null);
// to hold output buffer
uint[] output = new uint[16];
// swaps between true and false to allow a kernel to execute while testing output of last run
bool outputAlt = true;
// size of local work groups
long localWorkSize = Gpu.WorkSize;
// number of items to dispatch to GPU at a time
long globalWorkSize = localWorkSize * localWorkSize * 8;
// begin working at 0
uint nonce = 0;
// continue dispatching work to the GPU
while (true)
{
// if one loop has completed
if (nonce > 0)
{
// read output into current output buffer then reset buffer
clQueue.ReadFromBuffer(outputAlt ? clBuffer0 : clBuffer1, ref output, true, null);
// scan output buffer for produced nonce values
fixed (uint* o = output)
for (int j = 0; j < 16; j++)
if (o[j] != 0)
{
// replace header data on work
fixed (byte* headerPtr = work.Header)
((uint*)headerPtr)[19] = output[j];
// submit work for validation
Context.SubmitWork(this, work, GetType().Name);
// clear output buffer
clQueue.WriteToBuffer(outputZero, outputAlt ? clBuffer0 : clBuffer1, true, null);
}
}
// execute kernel with computed values
clQueue.Finish();
clKernel.SetValueArgument(0, PreVal4_state0);
clKernel.SetValueArgument(1, PreVal4_state0_K7);
clKernel.SetValueArgument(2, PreVal4_T1);
clKernel.SetValueArgument(3, W18);
clKernel.SetValueArgument(4, W19);
clKernel.SetValueArgument(5, W16);
clKernel.SetValueArgument(6, W17);
clKernel.SetValueArgument(7, W16_plus_K16);
clKernel.SetValueArgument(8, W17_plus_K17);
clKernel.SetValueArgument(9, W31);
clKernel.SetValueArgument(10, W32);
clKernel.SetValueArgument(11, (uint)(round1State2Mid[3] + 0xB956c25bL));
clKernel.SetValueArgument(12, round1State2Mid[1]);
clKernel.SetValueArgument(13, round1State2Mid[2]);
clKernel.SetValueArgument(14, round1State2Mid[7]);
clKernel.SetValueArgument(15, round1State2Mid[5]);
clKernel.SetValueArgument(16, round1State2Mid[6]);
clKernel.SetValueArgument(17, C1_plus_K5);
clKernel.SetValueArgument(18, B1_plus_K6);
clKernel.SetValueArgument(19, round1State[0]);
clKernel.SetValueArgument(20, round1State[1]);
clKernel.SetValueArgument(21, round1State[2]);
clKernel.SetValueArgument(22, round1State[3]);
clKernel.SetValueArgument(23, round1State[4]);
clKernel.SetValueArgument(24, round1State[5]);
clKernel.SetValueArgument(25, round1State[6]);
clKernel.SetValueArgument(26, round1State[7]);
clKernel.SetMemoryArgument(27, outputAlt ? clBuffer0 : clBuffer1);
clQueue.Execute(clKernel, null, new long[] { globalWorkSize }, new long[] { localWorkSize }, null);
// report that we just hashed the work size number of hashes
if (!Progress(work, globalWorkSize))
break;
// update nonce and check whether it is now less than the work size, which indicates it overflowed
if ((nonce += (uint)globalWorkSize) < (uint)globalWorkSize)
break;
// next loop deals with other output buffer
outputAlt = !outputAlt;
}
}