private static int GetBitCount(ulong[] value)
{
int data = 0;
uint size = (uint)value.Length;
const ulong m1 = 0x5555555555555555;
const ulong m2 = 0x3333333333333333;
const ulong m4 = 0x0F0F0F0F0F0F0F0F;
const ulong m8 = 0x00FF00FF00FF00FF;
const ulong m16 = 0x0000FFFF0000FFFF;
const ulong h01 = 0x0101010101010101;
uint bitCount = 0;
uint limit30 = size - size % 30;
// 64-bit tree merging (merging3)
for (uint i = 0; i < limit30; i += 30, data += 30)
{
ulong acc = 0;
for (uint j = 0; j < 30; j += 3)
{
ulong count1 = value[data + j];
ulong count2 = value[data + j + 1];
ulong half1 = value[data + j + 2];
ulong half2 = half1;
half1 &= m1;
half2 = (half2 >> 1) & m1;
count1 -= (count1 >> 1) & m1;
count2 -= (count2 >> 1) & m1;
count1 += half1;
count2 += half2;
count1 = (count1 & m2) + ((count1 >> 2) & m2);
count1 += (count2 & m2) + ((count2 >> 2) & m2);
acc += (count1 & m4) + ((count1 >> 4) & m4);
}
acc = (acc & m8) + ((acc >> 8) & m8);
acc = (acc + (acc >> 16)) & m16;
acc = acc + (acc >> 32);
bitCount += (uint)acc;
}
// count the bits of the remaining bytes (MAX 29*8) using
// "Counting bits set, in parallel" from the "Bit Twiddling Hacks",
// the code uses wikipedia's 64-bit popcount_3() implementation:
// http://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation
for (uint i = 0; i < size - limit30; i++)
{
ulong x = value[data + i];
x = x - ((x >> 1) & m1);
x = (x & m2) + ((x >> 2) & m2);
x = (x + (x >> 4)) & m4;
bitCount += (uint)((x * h01) >> 56);
}
return (int)bitCount;
}