System.Text.ISCIIEncoding.GetChars C# (CSharp) Method

ISCIIEncoding Class Documentation 显示文件 Open project: dotnet/corefx
GetChars() private method

private GetChars ( byte bytes, int byteCount, char chars, int charCount, DecoderNLS baseDecoder ) : int
bytes	byte
byteCount	int
chars	char
charCount	int
baseDecoder	DecoderNLS
return	int
        public unsafe override int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS baseDecoder)
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            // Allow null chars for counting
            Debug.Assert(bytes != null, "[ISCIIEncoding.GetChars]bytes is null");
            Debug.Assert(byteCount >= 0, "[ISCIIEncoding.GetChars]byteCount is negative");
            //            Debug.Assert(chars != null, "[ISCIIEncoding.GetChars]chars is null");
            Debug.Assert(charCount >= 0, "[ISCIIEncoding.GetChars]charCount is negative");

            // Need the ISCII Decoder
            ISCIIDecoder decoder = (ISCIIDecoder)baseDecoder;

            // Get our info.
            EncodingCharBuffer buffer = new EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount);

            int currentCodePage = _defaultCodePage;
            bool bLastATR = false;
            bool bLastVirama = false;
            bool bLastDevenagariStressAbbr = false;
            char cLastCharForNextNukta = '\0';
            char cLastCharForNoNextNukta = '\0';

            // See if there's anything in our decoder
            if (decoder != null)
            {
                currentCodePage = decoder.currentCodePage;
                bLastATR = decoder.bLastATR;
                bLastVirama = decoder.bLastVirama;
                bLastDevenagariStressAbbr = decoder.bLastDevenagariStressAbbr;
                cLastCharForNextNukta = decoder.cLastCharForNextNukta;
                cLastCharForNoNextNukta = decoder.cLastCharForNoNextNukta;
            }

            bool bLastSpecial = bLastVirama | bLastATR | bLastDevenagariStressAbbr |
                (cLastCharForNextNukta != '\0');

            // Get our current code page index (some code pages are dups)
            int currentCodePageIndex = -1;
            Debug.Assert(currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi,
                "[ISCIIEncoding.GetChars]Decoder code page must be >= Devanagari and <= Punjabi, not " + currentCodePage);

            if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi)
            {
                currentCodePageIndex = s_IndicMappingIndex[currentCodePage];
            }

            // Loop through our input
            while (buffer.MoreData)
            {
                byte b = buffer.GetNextByte();

                // See if last one was special
                if (bLastSpecial)
                {
                    // Now it won't be
                    bLastSpecial = false;

                    // One and only one of our flags should be set
                    Debug.Assert(((bLastVirama ? 1 : 0) + (bLastATR ? 1 : 0) +
                               (bLastDevenagariStressAbbr ? 1 : 0) +
                               ((cLastCharForNextNukta > 0) ? 1 : 0)) == 1,
                        String.Format(CultureInfo.InvariantCulture,
                            "[ISCIIEncoding.GetChars]Special cases require 1 and only 1 special case flag: LastATR {0} Dev. {1} Nukta {2}",
                            bLastATR, bLastDevenagariStressAbbr, cLastCharForNextNukta));
                    // If the last one was an ATR, then we'll have to do ATR stuff
                    if (bLastATR)
                    {
                        // We only support Devanagari - Punjabi
                        if (b >= (0x40 | CodeDevanagari) && b <= (0x40 | CodePunjabi))
                        {
                            // Remember the code page
                            currentCodePage = b & 0xf;
                            currentCodePageIndex = s_IndicMappingIndex[currentCodePage];
                            // No longer last ATR
                            bLastATR = false;
                            continue;
                        }

                        // Change back to default?
                        if (b == 0x40)
                        {
                            currentCodePage = _defaultCodePage;
                            currentCodePageIndex = -1;

                            if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi)
                            {
                                currentCodePageIndex = s_IndicMappingIndex[currentCodePage];
                            }
                            // No longer last ATR
                            bLastATR = false;
                            continue;
                        }

                        // We don't support Roman
                        if (b == 0x41)
                        {
                            currentCodePage = _defaultCodePage;
                            currentCodePageIndex = -1;

                            if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi)
                            {
                                currentCodePageIndex = s_IndicMappingIndex[currentCodePage];
                            }

                            // Even though we don't know how to support Roman, windows didn't add a ? so we don't either.
                            // No longer last ATR
                            bLastATR = false;
                            continue;
                        }

                        // Other code pages & ATR codes not supported, fallback the ATR
                        // If fails, decrements the buffer, which is OK, we remember ATR state.
                        if (!buffer.Fallback(ControlATR))
                            break;

                        // No longer last ATR (fell back)
                        bLastATR = false;

                        // we know we can't have any of these other modes
                        Debug.Assert(bLastVirama == false, "[ISCIIEncoding.GetChars] Expected no bLastVirama in bLastATR mode");
                        Debug.Assert(bLastDevenagariStressAbbr == false, "[ISCIIEncoding.GetChars] Expected no bLastDevenagariStressAbbr in bLastATR mode");
                        Debug.Assert(cLastCharForNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNextNukta in bLastATR mode");
                        Debug.Assert(cLastCharForNoNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNoNextNukta in bLastATR mode");
                        // Keep processing this byte
                    }
                    else if (bLastVirama)
                    {
                        // If last was Virama, then we might need ZWNJ or ZWJ instead
                        if (b == Virama)
                        {
                            // If no room, then stop
                            if (!buffer.AddChar(ZWNJ))
                                break;
                            bLastVirama = false;
                            continue;
                        }
                        if (b == Nukta)
                        {
                            // If no room, then stop
                            if (!buffer.AddChar(ZWJ))
                                break;
                            bLastVirama = false;
                            continue;
                        }

                        // No longer in this mode, fall through to handle character
                        // (Virama itself was added when flag was set last iteration)
                        bLastVirama = false;

                        // We know we can't have any of these other modes
                        Debug.Assert(bLastATR == false, "[ISCIIEncoding.GetChars] Expected no bLastATR in bLastVirama mode");
                        Debug.Assert(bLastDevenagariStressAbbr == false, "[ISCIIEncoding.GetChars] Expected no bLastDevenagariStressAbbr in bLastVirama mode");
                        Debug.Assert(cLastCharForNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNextNukta in bLastVirama mode");
                        Debug.Assert(cLastCharForNoNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNoNextNukta in bLastVirama mode");
                    }
                    else if (bLastDevenagariStressAbbr)
                    {
                        // Last byte was an 0xf0 (ext).
                        // If current is b8 or bf, then we have 952 or 970.  Otherwise fallback
                        if (b == 0xb8)
                        {
                            // It was a 0xb8
                            if (!buffer.AddChar('\x0952'))         // Devanagari stress sign anudatta
                                break;
                            bLastDevenagariStressAbbr = false;
                            continue;
                        }

                        if (b == 0xbf)
                        {
                            // It was a 0xbf
                            if (!buffer.AddChar('\x0970'))         // Devanagari abbr. sign
                                break;
                            bLastDevenagariStressAbbr = false;
                            continue;
                        }

                        // Wasn't an expected pattern, do fallback for f0 (ext)
                        // if fails, fallback will back up our buffer
                        if (!buffer.Fallback(DevenagariExt))
                            break;

                        // Keep processing this byte (turn off mode)
                        // (last character was added when mode was set)
                        bLastDevenagariStressAbbr = false;

                        Debug.Assert(bLastATR == false, "[ISCIIEncoding.GetChars] Expected no bLastATR in bLastDevenagariStressAbbr mode");
                        Debug.Assert(bLastVirama == false, "[ISCIIEncoding.GetChars] Expected no bLastVirama in bLastDevenagariStressAbbr mode");
                        Debug.Assert(cLastCharForNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNextNukta in bLastDevenagariStressAbbr mode");
                        Debug.Assert(cLastCharForNoNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNoNextNukta in bLastDevenagariStressAbbr mode");
                    }
                    else
                    {
                        // We were checking for next char being a nukta
                        Debug.Assert(cLastCharForNextNukta > 0 && cLastCharForNoNextNukta > 0,
                            "[ISCIIEncoding.GetChars]No other special case found, but cLastCharFor(No)NextNukta variable(s) aren't set.");

                        // We'll either add combined char or last char
                        if (b == Nukta)
                        {
                            // We combine nukta with previous char
                            if (!buffer.AddChar(cLastCharForNextNukta))
                                break;

                            // Done already
                            cLastCharForNextNukta = cLastCharForNoNextNukta = '\0';
                            continue;
                        }

                        // No Nukta, just add last character and keep processing current byte
                        if (!buffer.AddChar(cLastCharForNoNextNukta))
                            break;

                        // Keep processing this byte, turn off mode.
                        cLastCharForNextNukta = cLastCharForNoNextNukta = '\0';

                        Debug.Assert(bLastATR == false, "[ISCIIEncoding.GetChars] Expected no bLastATR in cLastCharForNextNukta mode");
                        Debug.Assert(bLastVirama == false, "[ISCIIEncoding.GetChars] Expected no bLastVirama in cLastCharForNextNukta mode");
                        Debug.Assert(bLastDevenagariStressAbbr == false, "[ISCIIEncoding.GetChars] Expected no bLastDevenagariStressAbbr in cLastCharForNextNukta mode");
                    }
                }

                // Now bLastSpecial should be false and all flags false.
                Debug.Assert(!bLastSpecial && !bLastDevenagariStressAbbr && !bLastVirama && !bLastATR &&
                          cLastCharForNextNukta == '\0',
                          "[ISCIIEncoding.GetChars]No special state for last code point should exist at this point.");

                // If its a simple byte, just add it
                if (b < MultiByteBegin)
                {
                    if (!buffer.AddChar((char)b))
                        break;
                    continue;
                }

                // See if its an ATR marker
                if (b == ControlATR)
                {
                    bLastATR = bLastSpecial = true;
                    continue;
                }

                Debug.Assert(currentCodePageIndex != -1, "[ISCIIEncoding.GetChars]Expected valid currentCodePageIndex != -1");
                char ch = s_IndicMapping[currentCodePageIndex, 0, b - MultiByteBegin];
                char cAlt = s_IndicMapping[currentCodePageIndex, 1, b - MultiByteBegin];

                // If no 2nd char, just add it, also lonely Nuktas get added as well.
                if (cAlt == 0 || b == Nukta)
                {
                    // If it was an unknown character do fallback

                    // ? if not known.
                    if (ch == 0)
                    {
                        // Fallback the unknown byte
                        if (!buffer.Fallback(b))
                            break;
                    }
                    else
                    {
                        // Add the known character
                        if (!buffer.AddChar(ch))
                            break;
                    }
                    continue;
                }

                // if b == Virama set last Virama so we can do ZWJ or ZWNJ next time if needed.
                if (b == Virama)
                {
                    // Add Virama
                    if (!buffer.AddChar(ch))
                        break;
                    bLastVirama = bLastSpecial = true;
                    continue;
                }

                // See if its one that changes with a Nukta
                if ((cAlt & 0xF000) == 0)
                {
                    // It could change if next char is a nukta
                    bLastSpecial = true;
                    cLastCharForNextNukta = cAlt;
                    cLastCharForNoNextNukta = ch;
                    continue;
                }

                // We must be the Devenagari special case for F0, B8 & F0, BF
                Debug.Assert(currentCodePage == CodeDevanagari && b == DevenagariExt,
                    String.Format(CultureInfo.InvariantCulture,
                        "[ISCIIEncoding.GetChars] Devenagari special case must {0} not {1} or in Devanagari code page {2} not {3}.",
                        DevenagariExt, b, CodeDevanagari, currentCodePage));
                bLastDevenagariStressAbbr = bLastSpecial = true;
            }

            // If we don't have a decoder, or if we had to flush, then we need to get rid
            // of last ATR, LastNoNextNukta and LastDevenagariExt.
            if (decoder == null || decoder.MustFlush)
            {
                // If these fail (because of Convert with insufficient buffer), then they'll turn off MustFlush as well.
                if (bLastATR)
                {
                    // Have to add ATR fallback
                    if (buffer.Fallback(ControlATR))
                        bLastATR = false;
                    else
                        // If not successful, convert will maintain state for next time, also
                        // AddChar will have decremented our byte count, however we need it to remain the same
                        buffer.GetNextByte();
                }
                else if (bLastDevenagariStressAbbr)
                {
                    // Have to do fallback for DevenagariExt
                    if (buffer.Fallback(DevenagariExt))
                        bLastDevenagariStressAbbr = false;
                    else
                        // If not successful, convert will maintain state for next time, also
                        // AddChar will have decremented our byte count, however we need it to remain the same
                        buffer.GetNextByte();
                }
                else if (cLastCharForNoNextNukta != '\0')
                {
                    // Have to add our last char because there was no next nukta
                    if (buffer.AddChar(cLastCharForNoNextNukta))
                        cLastCharForNoNextNukta = cLastCharForNextNukta = '\0';
                    else
                        // If not successful, convert will maintain state for next time, also
                        // AddChar will have decremented our byte count, however we need it to remain the same
                        buffer.GetNextByte();
                }
                // LastVirama is unimportant for flushing decoder.
            }

            // Remember any left over stuff
            // (only remember if we aren't counting)
            if (decoder != null && chars != null)
            {
                // If not flushing or have state (from convert) then need to remember state
                if (!decoder.MustFlush ||
                    cLastCharForNoNextNukta != '\0' || bLastATR || bLastDevenagariStressAbbr)
                {
                    // Either not flushing or had state (from convert)
                    Debug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
                        "[ISCIIEncoding.GetChars]Expected no state or not converting or not flushing");
                    decoder.currentCodePage = currentCodePage;
                    decoder.bLastVirama = bLastVirama;
                    decoder.bLastATR = bLastATR;
                    decoder.bLastDevenagariStressAbbr = bLastDevenagariStressAbbr;
                    decoder.cLastCharForNextNukta = cLastCharForNextNukta;
                    decoder.cLastCharForNoNextNukta = cLastCharForNoNextNukta;
                }
                else
                {
                    decoder.currentCodePage = _defaultCodePage;
                    decoder.bLastVirama = false;
                    decoder.bLastATR = false;
                    decoder.bLastDevenagariStressAbbr = false;
                    decoder.cLastCharForNextNukta = '\0';
                    decoder.cLastCharForNoNextNukta = '\0';
                }
                decoder.m_bytesUsed = buffer.BytesUsed;
            }
            // Otherwise we already did fallback and added extra things

            // Return the # of characters we found
            return buffer.Count;
        }
ISCIIEncoding
GetByteCount
GetBytes
GetCharCount
GetChars
GetDecoder
GetEncoder
GetHashCode
GetMaxByteCount
GetMaxCharCount
ISCIIEncoding
ISerializable