System.Text.ISO2022Encoding.GetCharsCP52936 C# (CSharp) Method

ISO2022Encoding Class Documentation Show file Open project: dotnet/corefx
GetCharsCP52936() private method

private GetCharsCP52936 ( byte bytes, int byteCount, char chars, int charCount, ISO2022Decoder decoder ) : int
bytes	byte
byteCount	int
chars	char
charCount	int
decoder	ISO2022Decoder
return	int
        private unsafe int GetCharsCP52936(byte* bytes, int byteCount,
                                                char* chars, int charCount, ISO2022Decoder decoder)
        {
            Debug.Assert(byteCount >= 0, "[ISO2022Encoding.GetCharsCP52936]count >=0");
            Debug.Assert(bytes != null, "[ISO2022Encoding.GetCharsCP52936]bytes!=null");

            // Get our info.
            EncodingCharBuffer buffer = new EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount);

            // No mode information yet
            ISO2022Modes currentMode = ISO2022Modes.ModeASCII;
            int byteLeftOver = -1;
            bool bUsedDecoder = false;

            if (decoder != null)
            {
                currentMode = decoder.currentMode;
                // See if we have leftover decoder buffer to use
                // Don't want to mess up decoder if we're counting or throw an exception
                if (decoder.bytesLeftOverCount != 0)
                {
                    // Load our bytesLeftOver
                    byteLeftOver = decoder.bytesLeftOver[0];
                }
            }

            // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings.
            while (buffer.MoreData || byteLeftOver >= 0)
            {
                byte ch;

                // May have a left over byte
                if (byteLeftOver >= 0)
                {
                    ch = (byte)byteLeftOver;
                    byteLeftOver = -1;
                }
                else
                {
                    ch = buffer.GetNextByte();
                }

                // We're in escape mode
                if (ch == '~')
                {
                    // Next char is type of switch
                    if (!buffer.MoreData)
                    {
                        // We don't have anything left, it'll be in decoder or a ?
                        // don't fail if we are allowing overflows
                        if (decoder == null || decoder.MustFlush)
                        {
                            // We'll be a '?'
                            buffer.Fallback(ch);
                            // break if we fail & break if we don't (because !MoreData)
                            // Add succeeded, continue
                            break;
                        }

                        // Stick it in decoder
                        if (decoder != null)
                            decoder.ClearMustFlush();

                        if (chars != null)
                        {
                            decoder.bytesLeftOverCount = 1;
                            decoder.bytesLeftOver[0] = (byte)'~';
                            bUsedDecoder = true;
                        }
                        break;
                    }

                    // What type is it?, get 2nd byte
                    ch = buffer.GetNextByte();

                    if (ch == '~' && currentMode == ISO2022Modes.ModeASCII)
                    {
                        // Its just a ~~ replacement for ~, add it
                        if (!buffer.AddChar((char)ch, 2))
                            // Add failed, break for converting
                            break;

                        // Add succeeded, continue
                        continue;
                    }
                    else if (ch == '{')
                    {
                        // Switching to Double Byte mode
                        currentMode = ISO2022Modes.ModeHZ;
                        continue;
                    }
                    else if (ch == '}')
                    {
                        // Switching to ASCII mode
                        currentMode = ISO2022Modes.ModeASCII;
                        continue;
                    }
                    else if (ch == '\n')
                    {
                        // Ignore ~\n sequence
                        continue;
                    }
                    else
                    {
                        // Unknown escape, back up and try the '~' as a "normal" byte or lead byte
                        buffer.AdjustBytes(-1);
                        ch = (byte)'~';
                    }
                }

                // go ahead and add our data
                if (currentMode != ISO2022Modes.ModeASCII)
                {
                    // Should be ModeHZ
                    Debug.Assert(currentMode == ISO2022Modes.ModeHZ, "[ISO2022Encoding.GetCharsCP52936]Expected ModeHZ");
                    char cm;

                    // Everett allowed characters < 0x20 to be passed as if they were ASCII
                    if (ch < 0x20)
                    {
                        // Emit it as ASCII
                        goto STOREASCII;
                    }

                    // Its multibyte, should have another byte
                    if (!buffer.MoreData)
                    {
                        // No bytes left
                        // don't fail if we are allowing overflows
                        if (decoder == null || decoder.MustFlush)
                        {
                            // Not enough bytes, fallback lead byte
                            buffer.Fallback(ch);

                            // Break if we fail & break because !MoreData
                            break;
                        }

                        if (decoder != null)
                            decoder.ClearMustFlush();

                        // Stick it in decoder
                        if (chars != null)
                        {
                            decoder.bytesLeftOverCount = 1;
                            decoder.bytesLeftOver[0] = ch;
                            bUsedDecoder = true;
                        }
                        break;
                    }

                    // Everett uses space as an escape character for single SBCS bytes
                    byte ch2 = buffer.GetNextByte();
                    ushort iBytes = (ushort)(ch << 8 | ch2);

                    if (ch == ' ' && ch2 != 0)
                    {
                        // Get next char and treat it like ASCII (Everett treated space like an escape
                        // allowing the next char to be just ascii)
                        cm = (char)ch2;
                        goto STOREMULTIBYTE;
                    }

                    // Bytes should be in range: lead byte 0x21-0x77, trail byte: 0x21 - 0x7e
                    if ((ch < 0x21 || ch > 0x77 || ch2 < 0x21 || ch2 > 0x7e) &&
                    // Everett allowed high bit mappings for same characters (but only if both bits set)
                        (ch < 0xa1 || ch > 0xf7 || ch2 < 0xa1 || ch2 > 0xfe))
                    {
                        // For some reason Everett allowed XX20 to become unicode 3000... (ideo sp)
                        if (ch2 == 0x20 && 0x21 <= ch && ch <= 0x7d)
                        {
                            iBytes = 0x2121;
                            goto MULTIBYTE;
                        }

                        // Illegal char, use fallback.  If lead byte is 0 have to do it special and do it first
                        if (!buffer.Fallback((byte)(iBytes >> 8), (byte)(iBytes)))
                            break;
                        continue;
                    }

                MULTIBYTE:
                    iBytes |= 0x8080;
                    // Look up the multibyte char to stick it in our data

                    // We have a iBytes to try to convert.
                    cm = mapBytesToUnicode[iBytes];

                STOREMULTIBYTE:

                    // See if it was unknown
                    if (cm == UNKNOWN_CHAR_FLAG && iBytes != 0)
                    {
                        // Fall back the unknown stuff
                        if (!buffer.Fallback((byte)(iBytes >> 8), (byte)(iBytes)))
                            break;
                        continue;
                    }

                    if (!buffer.AddChar(cm, 2))
                        break;              // convert ran out of buffer, stop
                    continue;
                }

            // Just ASCII
            // We allow some chars > 7f because everett did, so we have to look them up.
            STOREASCII:
                char c = mapBytesToUnicode[ch];

                // Check if it was unknown
                if ((c == UNKNOWN_CHAR_FLAG || c == 0) && (ch != 0))
                {
                    // fallback the unkown bytes
                    if (!buffer.Fallback((byte)ch))
                        break;
                    continue;
                }

                // Go ahead and add our ASCII character
                if (!buffer.AddChar(c))
                    break;                  // convert ran out of buffer, stop
            }

            // Need to remember our state, IF we're not counting
            if (chars != null && decoder != null)
            {
                if (!bUsedDecoder)
                {
                    // If we didn't use it, clear the byte left over
                    decoder.bytesLeftOverCount = 0;
                }

                if (decoder.MustFlush && decoder.bytesLeftOverCount == 0)
                {
                    decoder.currentMode = ISO2022Modes.ModeASCII;
                }
                else
                {
                    // Either not flushing or had state (from convert)
                    Debug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
                        "[ISO2022Encoding.GetCharsCP52936]Expected no state or not converting or not flushing");

                    decoder.currentMode = currentMode;
                }
                decoder.m_bytesUsed = buffer.BytesUsed;
            }

            // Return # of characters we found
            return buffer.Count;
        }
ISO2022Encoding
CheckEscapeSequenceJP
CheckEscapeSequenceKR
CleanUpBytes
DecrementEscapeBytes
GetByteCount
GetBytes
GetBytesCP50225KR
GetBytesCP5022xJP
GetBytesCP52936
GetCharCount
GetChars