private void EncryptBlock(
uint[,] KW)
{
int r;
uint r0, r1, r2, r3;
C0 ^= KW[0,0];
C1 ^= KW[0,1];
C2 ^= KW[0,2];
C3 ^= KW[0,3];
for (r = 1; r < ROUNDS - 1;) {
r0 = T0[C0&255] ^ T1[(C1>>8)&255] ^ T2[(C2>>16)&255] ^ T3[(C3>>24)&255] ^ KW[r,0];
r1 = T0[C1&255] ^ T1[(C2>>8)&255] ^ T2[(C3>>16)&255] ^ T3[(C0>>24)&255] ^ KW[r,1];
r2 = T0[C2&255] ^ T1[(C3>>8)&255] ^ T2[(C0>>16)&255] ^ T3[(C1>>24)&255] ^ KW[r,2];
r3 = T0[C3&255] ^ T1[(C0>>8)&255] ^ T2[(C1>>16)&255] ^ T3[(C2>>24)&255] ^ KW[r++,3];
C0 = T0[r0&255] ^ T1[(r1>>8)&255] ^ T2[(r2>>16)&255] ^ T3[(r3>>24)&255] ^ KW[r,0];
C1 = T0[r1&255] ^ T1[(r2>>8)&255] ^ T2[(r3>>16)&255] ^ T3[(r0>>24)&255] ^ KW[r,1];
C2 = T0[r2&255] ^ T1[(r3>>8)&255] ^ T2[(r0>>16)&255] ^ T3[(r1>>24)&255] ^ KW[r,2];
C3 = T0[r3&255] ^ T1[(r0>>8)&255] ^ T2[(r1>>16)&255] ^ T3[(r2>>24)&255] ^ KW[r++,3];
}
r0 = T0[C0&255] ^ T1[(C1>>8)&255] ^ T2[(C2>>16)&255] ^ T3[(C3>>24)&255] ^ KW[r,0];
r1 = T0[C1&255] ^ T1[(C2>>8)&255] ^ T2[(C3>>16)&255] ^ T3[(C0>>24)&255] ^ KW[r,1];
r2 = T0[C2&255] ^ T1[(C3>>8)&255] ^ T2[(C0>>16)&255] ^ T3[(C1>>24)&255] ^ KW[r,2];
r3 = T0[C3&255] ^ T1[(C0>>8)&255] ^ T2[(C1>>16)&255] ^ T3[(C2>>24)&255] ^ KW[r++,3];
// the final round's table is a simple function of S so we don't use a whole other four tables for it
C0 = (uint)S[r0&255] ^ (((uint)S[(r1>>8)&255])<<8) ^ (((uint)S[(r2>>16)&255])<<16) ^ (((uint)S[(r3>>24)&255])<<24) ^ KW[r,0];
C1 = (uint)S[r1&255] ^ (((uint)S[(r2>>8)&255])<<8) ^ (((uint)S[(r3>>16)&255])<<16) ^ (((uint)S[(r0>>24)&255])<<24) ^ KW[r,1];
C2 = (uint)S[r2&255] ^ (((uint)S[(r3>>8)&255])<<8) ^ (((uint)S[(r0>>16)&255])<<16) ^ (((uint)S[(r1>>24)&255])<<24) ^ KW[r,2];
C3 = (uint)S[r3&255] ^ (((uint)S[(r0>>8)&255])<<8) ^ (((uint)S[(r1>>16)&255])<<16) ^ (((uint)S[(r2>>24)&255])<<24) ^ KW[r,3];
}