System.Text.DBCSCodePageEncoding.ReadBestFitTable C# (CSharp) Method

DBCSCodePageEncoding Class Documentation 显示文件 Open project: dotnet/corefx
ReadBestFitTable() private method

private ReadBestFitTable ( ) : void
return	void
        protected unsafe override void ReadBestFitTable()
        {
            // Lock so we don't confuse ourselves.
            lock (InternalSyncObject)
            {
                // If we got a best fit array already then don't do this
                if (arrayUnicodeBestFit == null)
                {
                    //
                    // Read in Best Fit table.
                    //

                    // First we have to advance past original character mapping table
                    // Move to the beginning of the data section
                    byte[] buffer = new byte[m_dataSize];
                    lock (s_streamLock)
                    {
                        s_codePagesEncodingDataStream.Seek(m_firstDataWordOffset, SeekOrigin.Begin);
                        s_codePagesEncodingDataStream.Read(buffer, 0, m_dataSize);
                    }

                    fixed (byte* pBuffer = buffer)
                    {
                        char* pData = (char*)pBuffer;

                        // We start at bytes position 0
                        int bytesPosition = 0;

                        while (bytesPosition < 0x10000)
                        {
                            // Get the next byte
                            char input = *pData;
                            pData++;

                            // build our table:
                            if (input == 1)
                            {
                                // Use next data as our byte position
                                bytesPosition = (int)(*pData);
                                pData++;
                            }
                            else if (input < 0x20 && input > 0)
                            {
                                // Advance input characters
                                bytesPosition += input;
                            }
                            else
                            {
                                // All other cases add 1 to bytes position
                                bytesPosition++;
                            }
                        }

                        // Now bytesPosition is at start of bytes->unicode best fit table
                        char* pBytes2Unicode = pData;

                        // Now pData should be pointing to first word of bytes -> unicode best fit table
                        // (which we're also not using at the moment)
                        int iBestFitCount = 0;
                        bytesPosition = *pData;
                        pData++;

                        while (bytesPosition < 0x10000)
                        {
                            // Get the next byte
                            char input = *pData;
                            pData++;

                            // build our table:
                            if (input == 1)
                            {
                                // Use next data as our byte position
                                bytesPosition = (int)(*pData);
                                pData++;
                            }
                            else if (input < 0x20 && input > 0)
                            {
                                // Advance input characters
                                bytesPosition += input;
                            }
                            else
                            {
                                // Use this character (unless it's unknown, unk just skips 1)
                                if (input != UNICODE_REPLACEMENT_CHAR)
                                {
                                    int correctedChar = bytesPosition;
                                    if (CleanUpBytes(ref correctedChar))
                                    {
                                        // Sometimes correction makes them the same as no best fit, skip those.
                                        if (mapBytesToUnicode[correctedChar] != input)
                                        {
                                            iBestFitCount++;
                                        }
                                    }
                                }

                                // Position gets incremented in any case.
                                bytesPosition++;
                            }
                        }

                        // Now we know how big the best fit table has to be
                        char[] arrayTemp = new char[iBestFitCount * 2];

                        // Now we know how many best fits we have, so go back & read them in
                        iBestFitCount = 0;
                        pData = pBytes2Unicode;
                        bytesPosition = *pData;
                        pData++;
                        bool bOutOfOrder = false;

                        // Read it all in again
                        while (bytesPosition < 0x10000)
                        {
                            // Get the next byte
                            char input = *pData;
                            pData++;

                            // build our table:
                            if (input == 1)
                            {
                                // Use next data as our byte position
                                bytesPosition = (int)(*pData);
                                pData++;
                            }
                            else if (input < 0x20 && input > 0)
                            {
                                // Advance input characters
                                bytesPosition += input;
                            }
                            else
                            {
                                // Use this character (unless its unknown, unk just skips 1)
                                if (input != UNICODE_REPLACEMENT_CHAR)
                                {
                                    int correctedChar = bytesPosition;
                                    if (CleanUpBytes(ref correctedChar))
                                    {
                                        // Sometimes correction makes them same as no best fit, skip those.
                                        if (mapBytesToUnicode[correctedChar] != input)
                                        {
                                            if (correctedChar != bytesPosition)
                                                bOutOfOrder = true;

                                            arrayTemp[iBestFitCount++] = unchecked((char)correctedChar);
                                            arrayTemp[iBestFitCount++] = input;
                                        }
                                    }
                                }

                                // Position gets incremented in any case.
                                bytesPosition++;
                            }
                        }

                        // If they're out of order we need to sort them.
                        if (bOutOfOrder)
                        {
                            Debug.Assert((arrayTemp.Length / 2) < 20,
                                "[DBCSCodePageEncoding.ReadBestFitTable]Expected small best fit table < 20 for code page " + CodePage + ", not " + arrayTemp.Length / 2);

                            for (int i = 0; i < arrayTemp.Length - 2; i += 2)
                            {
                                int iSmallest = i;
                                char cSmallest = arrayTemp[i];

                                for (int j = i + 2; j < arrayTemp.Length; j += 2)
                                {
                                    // Find smallest one for front
                                    if (cSmallest > arrayTemp[j])
                                    {
                                        cSmallest = arrayTemp[j];
                                        iSmallest = j;
                                    }
                                }

                                // If smallest one is something else, switch them
                                if (iSmallest != i)
                                {
                                    char temp = arrayTemp[iSmallest];
                                    arrayTemp[iSmallest] = arrayTemp[i];
                                    arrayTemp[i] = temp;
                                    temp = arrayTemp[iSmallest + 1];
                                    arrayTemp[iSmallest + 1] = arrayTemp[i + 1];
                                    arrayTemp[i + 1] = temp;
                                }
                            }
                        }

                        // Remember our array
                        arrayBytesBestFit = arrayTemp;

                        // Now were at beginning of Unicode -> Bytes best fit table, need to count them
                        char* pUnicode2Bytes = pData;
                        int unicodePosition = *(pData++);
                        iBestFitCount = 0;

                        while (unicodePosition < 0x10000)
                        {
                            // Get the next byte
                            char input = *pData;
                            pData++;

                            // build our table:
                            if (input == 1)
                            {
                                // Use next data as our byte position
                                unicodePosition = (int)*pData;
                                pData++;
                            }
                            else if (input < 0x20 && input > 0)
                            {
                                // Advance input characters
                                unicodePosition += input;
                            }
                            else
                            {
                                // Same as our unicodePosition or use this character
                                if (input > 0)
                                    iBestFitCount++;
                                unicodePosition++;
                            }
                        }

                        // Allocate our table
                        arrayTemp = new char[iBestFitCount * 2];

                        // Now do it again to fill the array with real values
                        pData = pUnicode2Bytes;
                        unicodePosition = *(pData++);
                        iBestFitCount = 0;

                        while (unicodePosition < 0x10000)
                        {
                            // Get the next byte
                            char input = *pData;
                            pData++;

                            // build our table:
                            if (input == 1)
                            {
                                // Use next data as our byte position
                                unicodePosition = (int)*pData;
                                pData++;
                            }
                            else if (input < 0x20 && input > 0)
                            {
                                // Advance input characters
                                unicodePosition += input;
                            }
                            else
                            {
                                if (input > 0)
                                {
                                    // Use this character, may need to clean it up
                                    int correctedChar = (int)input;
                                    if (CleanUpBytes(ref correctedChar))
                                    {
                                        arrayTemp[iBestFitCount++] = unchecked((char)unicodePosition);
                                        // Have to map it to Unicode because best fit will need Unicode value of best fit char.
                                        arrayTemp[iBestFitCount++] = mapBytesToUnicode[correctedChar];
                                    }
                                }
                                unicodePosition++;
                            }
                        }

                        // Remember our array
                        arrayUnicodeBestFit = arrayTemp;
                    }
                }
            }
        }
DBCSCodePageEncoding
CleanUpBytes
CleanUpEndBytes
DBCSCodePageEncoding
GetByteCount
GetBytes
GetCharCount
GetChars
GetDecoder
GetMaxByteCount
GetMaxCharCount
LoadManagedCodePage
ReadBestFitTable