public static long Pop_xor(long[] A, long[] B, int wordOffset, int numWords)
{
int n = wordOffset + numWords;
long tot = 0, tot8 = 0;
long ones = 0, twos = 0, fours = 0;
int i;
for (i = wordOffset; i <= n - 8; i += 8)
{
/*** C macro from Hacker's Delight
#define CSA(h,l, a,b,c) \
{unsigned u = a ^ b; unsigned v = c; \
h = (a & b) | (u & v); l = u ^ v;}
***/
long twosA, twosB, foursA, foursB, eights;
// CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i+1] ^ B[i+1]))
{
long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]);
long u = ones ^ b;
twosA = (ones & b) | (u & c);
ones = u ^ c;
}
// CSA(twosB, ones, ones, (A[i+2] ^ B[i+2]), (A[i+3] ^ B[i+3]))
{
long b = (A[i + 2] ^ B[i + 2]), c = (A[i + 3] ^ B[i + 3]);
long u = ones ^ b;
twosB = (ones & b) | (u & c);
ones = u ^ c;
}
//CSA(foursA, twos, twos, twosA, twosB)
{
long u = twos ^ twosA;
foursA = (twos & twosA) | (u & twosB);
twos = u ^ twosB;
}
//CSA(twosA, ones, ones, (A[i+4] ^ B[i+4]), (A[i+5] ^ B[i+5]))
{
long b = (A[i + 4] ^ B[i + 4]), c = (A[i + 5] ^ B[i + 5]);
long u = ones ^ b;
twosA = (ones & b) | (u & c);
ones = u ^ c;
}
// CSA(twosB, ones, ones, (A[i+6] ^ B[i+6]), (A[i+7] ^ B[i+7]))
{
long b = (A[i + 6] ^ B[i + 6]), c = (A[i + 7] ^ B[i + 7]);
long u = ones ^ b;
twosB = (ones & b) | (u & c);
ones = u ^ c;
}
//CSA(foursB, twos, twos, twosA, twosB)
{
long u = twos ^ twosA;
foursB = (twos & twosA) | (u & twosB);
twos = u ^ twosB;
}
//CSA(eights, fours, fours, foursA, foursB)
{
long u = fours ^ foursA;
eights = (fours & foursA) | (u & foursB);
fours = u ^ foursB;
}
tot8 += Pop(eights);
}
if (i <= n - 4)
{
long twosA, twosB, foursA, eights;
{
long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]);
long u = ones ^ b;
twosA = (ones & b) | (u & c);
ones = u ^ c;
}
{
long b = (A[i + 2] ^ B[i + 2]), c = (A[i + 3] ^ B[i + 3]);
long u = ones ^ b;
twosB = (ones & b) | (u & c);
ones = u ^ c;
}
{
long u = twos ^ twosA;
foursA = (twos & twosA) | (u & twosB);
twos = u ^ twosB;
}
eights = fours & foursA;
fours = fours ^ foursA;
tot8 += Pop(eights);
i += 4;
}
if (i <= n - 2)
{
long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]);
long u = ones ^ b;
long twosA = (ones & b) | (u & c);
ones = u ^ c;
long foursA = twos & twosA;
twos = twos ^ twosA;
long eights = fours & foursA;
fours = fours ^ foursA;
tot8 += Pop(eights);
i += 2;
}
if (i < n)
{
tot += Pop((A[i] ^ B[i]));
}
tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3);
return tot;
}