protected override bool CleanUpBytes(ref int bytes)
{
switch (CodePage)
{
// 932 based code pages
case 50220:
case 50221:
case 50222:
{
if (bytes >= 0x100)
{
// map extended char (0xfa40-0xfc4b) to a special range
// (ported from mlang)
if (bytes >= 0xfa40 && bytes <= 0xfc4b)
{
if (bytes >= 0xfa40 && bytes <= 0xfa5b)
{
if (bytes <= 0xfa49)
bytes = bytes - 0x0b51;
else if (bytes >= 0xfa4a && bytes <= 0xfa53)
bytes = bytes - 0x072f6;
else if (bytes >= 0xfa54 && bytes <= 0xfa57)
bytes = bytes - 0x0b5b;
else if (bytes == 0xfa58)
bytes = 0x878a;
else if (bytes == 0xfa59)
bytes = 0x8782;
else if (bytes == 0xfa5a)
bytes = 0x8784;
else if (bytes == 0xfa5b)
bytes = 0x879a;
}
else if (bytes >= 0xfa5c && bytes <= 0xfc4b)
{
byte tc = unchecked((byte)bytes);
if (tc < 0x5c)
bytes = bytes - 0x0d5f;
else if (tc >= 0x80 && tc <= 0x9B)
bytes = bytes - 0x0d1d;
else
bytes = bytes - 0x0d1c;
}
}
// Convert 932 code page to 20932 like code page range
// (also ported from mlang)
byte bLead = unchecked((byte)(bytes >> 8));
byte bTrail = unchecked((byte)bytes);
bLead -= ((bLead > (byte)0x9f) ? (byte)0xb1 : (byte)0x71);
bLead = (byte)((bLead << 1) + 1);
if (bTrail > (byte)0x9e)
{
bTrail -= (byte)0x7e;
bLead++;
}
else
{
if (bTrail > (byte)0x7e)
bTrail--;
bTrail -= (byte)0x1f;
}
bytes = ((int)bLead) << 8 | (int)bTrail;
// Don't step out of our allocated lead byte area.
// All DBCS lead and trail bytes should be >= 0x21 and <= 0x7e
// This is commented out because Everett/Mlang had illegal PUA
// mappings to ISO2022 code pages that we're maintaining.
// if ((bytes & 0xFF00) < 0x2100 || (bytes & 0xFF00) > 0x7e00 ||
// (bytes & 0xFF) < 0x21 || (bytes & 0xFF) > 0x7e)
// return false;
}
else
{
// Adjust 1/2 Katakana
if (bytes >= 0xa1 && bytes <= 0xdf)
bytes += (LEADBYTE_HALFWIDTH << 8) - 0x80;
// 0x81-0x9f and 0xe0-0xfc CP 932
// 0x8e and 0xa1-0xfe CP 20932 (we don't use 8e though)
// b0-df is 1/2 Katakana
if (bytes >= 0x81 &&
(bytes <= 0x9f ||
(bytes >= 0xe0 && bytes <= 0xfc)))
{
// Don't do lead bytes, we use escape sequences instead.
return false;
}
}
break;
}
case 50225:
{
// For 50225 since we don't rely on lead byte marks, return false and don't add them,
// esp. since we're only a 7 bit code page.
if (bytes >= 0x80 && bytes <= 0xff)
return false;
// Ignore characters out of range (a1-7f)
if (bytes >= 0x100 &&
((bytes & 0xff) < 0xa1 || (bytes & 0xff) == 0xff ||
(bytes & 0xff00) < 0xa100 || (bytes & 0xff00) == 0xff00))
return false;
// May as well get them into our 7 bit range
bytes &= 0x7f7f;
break;
}
case 52936:
{
// Since we don't rely on lead byte marks for 52936, get rid of them so we
// don't end up with extra weird fffe mappings.
if (bytes >= 0x81 && bytes <= 0xfe)
return false;
break;
}
}
return true;
}