protected unsafe override void ReadBestFitTable()
{
// Lock so we don't confuse ourselves.
lock (InternalSyncObject)
{
// If we got a best fit array already then don't do this
if (arrayUnicodeBestFit == null)
{
//
// Read in Best Fit table.
//
// First we have to advance past original character mapping table
// Move to the beginning of the data section
byte[] buffer = new byte[m_dataSize];
lock (s_streamLock)
{
s_codePagesEncodingDataStream.Seek(m_firstDataWordOffset, SeekOrigin.Begin);
s_codePagesEncodingDataStream.Read(buffer, 0, m_dataSize);
}
fixed (byte* pBuffer = buffer)
{
char* pData = (char*)pBuffer;
// We start at bytes position 0
int bytesPosition = 0;
while (bytesPosition < 0x10000)
{
// Get the next byte
char input = *pData;
pData++;
// build our table:
if (input == 1)
{
// Use next data as our byte position
bytesPosition = (int)(*pData);
pData++;
}
else if (input < 0x20 && input > 0)
{
// Advance input characters
bytesPosition += input;
}
else
{
// All other cases add 1 to bytes position
bytesPosition++;
}
}
// Now bytesPosition is at start of bytes->unicode best fit table
char* pBytes2Unicode = pData;
// Now pData should be pointing to first word of bytes -> unicode best fit table
// (which we're also not using at the moment)
int iBestFitCount = 0;
bytesPosition = *pData;
pData++;
while (bytesPosition < 0x10000)
{
// Get the next byte
char input = *pData;
pData++;
// build our table:
if (input == 1)
{
// Use next data as our byte position
bytesPosition = (int)(*pData);
pData++;
}
else if (input < 0x20 && input > 0)
{
// Advance input characters
bytesPosition += input;
}
else
{
// Use this character (unless it's unknown, unk just skips 1)
if (input != UNICODE_REPLACEMENT_CHAR)
{
int correctedChar = bytesPosition;
if (CleanUpBytes(ref correctedChar))
{
// Sometimes correction makes them the same as no best fit, skip those.
if (mapBytesToUnicode[correctedChar] != input)
{
iBestFitCount++;
}
}
}
// Position gets incremented in any case.
bytesPosition++;
}
}
// Now we know how big the best fit table has to be
char[] arrayTemp = new char[iBestFitCount * 2];
// Now we know how many best fits we have, so go back & read them in
iBestFitCount = 0;
pData = pBytes2Unicode;
bytesPosition = *pData;
pData++;
bool bOutOfOrder = false;
// Read it all in again
while (bytesPosition < 0x10000)
{
// Get the next byte
char input = *pData;
pData++;
// build our table:
if (input == 1)
{
// Use next data as our byte position
bytesPosition = (int)(*pData);
pData++;
}
else if (input < 0x20 && input > 0)
{
// Advance input characters
bytesPosition += input;
}
else
{
// Use this character (unless its unknown, unk just skips 1)
if (input != UNICODE_REPLACEMENT_CHAR)
{
int correctedChar = bytesPosition;
if (CleanUpBytes(ref correctedChar))
{
// Sometimes correction makes them same as no best fit, skip those.
if (mapBytesToUnicode[correctedChar] != input)
{
if (correctedChar != bytesPosition)
bOutOfOrder = true;
arrayTemp[iBestFitCount++] = unchecked((char)correctedChar);
arrayTemp[iBestFitCount++] = input;
}
}
}
// Position gets incremented in any case.
bytesPosition++;
}
}
// If they're out of order we need to sort them.
if (bOutOfOrder)
{
Debug.Assert((arrayTemp.Length / 2) < 20,
"[DBCSCodePageEncoding.ReadBestFitTable]Expected small best fit table < 20 for code page " + CodePage + ", not " + arrayTemp.Length / 2);
for (int i = 0; i < arrayTemp.Length - 2; i += 2)
{
int iSmallest = i;
char cSmallest = arrayTemp[i];
for (int j = i + 2; j < arrayTemp.Length; j += 2)
{
// Find smallest one for front
if (cSmallest > arrayTemp[j])
{
cSmallest = arrayTemp[j];
iSmallest = j;
}
}
// If smallest one is something else, switch them
if (iSmallest != i)
{
char temp = arrayTemp[iSmallest];
arrayTemp[iSmallest] = arrayTemp[i];
arrayTemp[i] = temp;
temp = arrayTemp[iSmallest + 1];
arrayTemp[iSmallest + 1] = arrayTemp[i + 1];
arrayTemp[i + 1] = temp;
}
}
}
// Remember our array
arrayBytesBestFit = arrayTemp;
// Now were at beginning of Unicode -> Bytes best fit table, need to count them
char* pUnicode2Bytes = pData;
int unicodePosition = *(pData++);
iBestFitCount = 0;
while (unicodePosition < 0x10000)
{
// Get the next byte
char input = *pData;
pData++;
// build our table:
if (input == 1)
{
// Use next data as our byte position
unicodePosition = (int)*pData;
pData++;
}
else if (input < 0x20 && input > 0)
{
// Advance input characters
unicodePosition += input;
}
else
{
// Same as our unicodePosition or use this character
if (input > 0)
iBestFitCount++;
unicodePosition++;
}
}
// Allocate our table
arrayTemp = new char[iBestFitCount * 2];
// Now do it again to fill the array with real values
pData = pUnicode2Bytes;
unicodePosition = *(pData++);
iBestFitCount = 0;
while (unicodePosition < 0x10000)
{
// Get the next byte
char input = *pData;
pData++;
// build our table:
if (input == 1)
{
// Use next data as our byte position
unicodePosition = (int)*pData;
pData++;
}
else if (input < 0x20 && input > 0)
{
// Advance input characters
unicodePosition += input;
}
else
{
if (input > 0)
{
// Use this character, may need to clean it up
int correctedChar = (int)input;
if (CleanUpBytes(ref correctedChar))
{
arrayTemp[iBestFitCount++] = unchecked((char)unicodePosition);
// Have to map it to Unicode because best fit will need Unicode value of best fit char.
arrayTemp[iBestFitCount++] = mapBytesToUnicode[correctedChar];
}
}
unicodePosition++;
}
}
// Remember our array
arrayUnicodeBestFit = arrayTemp;
}
}
}
}