System.Text.ISO2022Encoding.CleanUpBytes C# (CSharp) Method

CleanUpBytes() protected method

protected CleanUpBytes ( int &bytes ) : bool
bytes int
return bool
        protected override bool CleanUpBytes(ref int bytes)
        {
            switch (CodePage)
            {
                // 932 based code pages
                case 50220:
                case 50221:
                case 50222:
                    {
                        if (bytes >= 0x100)
                        {
                            // map extended char (0xfa40-0xfc4b) to a special range
                            // (ported from mlang)
                            if (bytes >= 0xfa40 && bytes <= 0xfc4b)
                            {
                                if (bytes >= 0xfa40 && bytes <= 0xfa5b)
                                {
                                    if (bytes <= 0xfa49)
                                        bytes = bytes - 0x0b51;
                                    else if (bytes >= 0xfa4a && bytes <= 0xfa53)
                                        bytes = bytes - 0x072f6;
                                    else if (bytes >= 0xfa54 && bytes <= 0xfa57)
                                        bytes = bytes - 0x0b5b;
                                    else if (bytes == 0xfa58)
                                        bytes = 0x878a;
                                    else if (bytes == 0xfa59)
                                        bytes = 0x8782;
                                    else if (bytes == 0xfa5a)
                                        bytes = 0x8784;
                                    else if (bytes == 0xfa5b)
                                        bytes = 0x879a;
                                }
                                else if (bytes >= 0xfa5c && bytes <= 0xfc4b)
                                {
                                    byte tc = unchecked((byte)bytes);
                                    if (tc < 0x5c)
                                        bytes = bytes - 0x0d5f;
                                    else if (tc >= 0x80 && tc <= 0x9B)
                                        bytes = bytes - 0x0d1d;
                                    else
                                        bytes = bytes - 0x0d1c;
                                }
                            }

                            // Convert 932 code page to 20932 like code page range
                            // (also ported from mlang)
                            byte bLead = unchecked((byte)(bytes >> 8));
                            byte bTrail = unchecked((byte)bytes);

                            bLead -= ((bLead > (byte)0x9f) ? (byte)0xb1 : (byte)0x71);
                            bLead = (byte)((bLead << 1) + 1);
                            if (bTrail > (byte)0x9e)
                            {
                                bTrail -= (byte)0x7e;
                                bLead++;
                            }
                            else
                            {
                                if (bTrail > (byte)0x7e)
                                    bTrail--;
                                bTrail -= (byte)0x1f;
                            }

                            bytes = ((int)bLead) << 8 | (int)bTrail;
                            // Don't step out of our allocated lead byte area.
                            // All DBCS lead and trail bytes should be >= 0x21 and <= 0x7e
                            // This is commented out because Everett/Mlang had illegal PUA
                            // mappings to ISO2022 code pages that we're maintaining.
                            //                        if ((bytes & 0xFF00) < 0x2100 || (bytes & 0xFF00) > 0x7e00 ||
                            //                          (bytes & 0xFF) < 0x21 || (bytes & 0xFF) > 0x7e)
                            //                        return false;
                        }
                        else
                        {
                            // Adjust 1/2 Katakana
                            if (bytes >= 0xa1 && bytes <= 0xdf)
                                bytes += (LEADBYTE_HALFWIDTH << 8) - 0x80;

                            // 0x81-0x9f and 0xe0-0xfc CP 932
                            // 0x8e and 0xa1-0xfe      CP 20932 (we don't use 8e though)
                            // b0-df is 1/2 Katakana
                            if (bytes >= 0x81 &&
                                (bytes <= 0x9f ||
                                 (bytes >= 0xe0 && bytes <= 0xfc)))
                            {
                                // Don't do lead bytes, we use escape sequences instead.
                                return false;
                            }
                        }
                        break;
                    }
                case 50225:
                    {
                        // For 50225 since we don't rely on lead byte marks, return false and don't add them,
                        // esp. since we're only a 7 bit code page.
                        if (bytes >= 0x80 && bytes <= 0xff)
                            return false;

                        // Ignore characters out of range (a1-7f)
                        if (bytes >= 0x100 &&
                            ((bytes & 0xff) < 0xa1 || (bytes & 0xff) == 0xff ||
                             (bytes & 0xff00) < 0xa100 || (bytes & 0xff00) == 0xff00))
                            return false;

                        // May as well get them into our 7 bit range
                        bytes &= 0x7f7f;

                        break;
                    }
                case 52936:
                    {
                        // Since we don't rely on lead byte marks for 52936, get rid of them so we
                        // don't end up with extra weird fffe mappings.
                        if (bytes >= 0x81 && bytes <= 0xfe)
                            return false;

                        break;
                    }
            }

            return true;
        }