private unsafe int GetCharsCP52936(byte* bytes, int byteCount,
char* chars, int charCount, ISO2022Decoder decoder)
{
Debug.Assert(byteCount >= 0, "[ISO2022Encoding.GetCharsCP52936]count >=0");
Debug.Assert(bytes != null, "[ISO2022Encoding.GetCharsCP52936]bytes!=null");
// Get our info.
EncodingCharBuffer buffer = new EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount);
// No mode information yet
ISO2022Modes currentMode = ISO2022Modes.ModeASCII;
int byteLeftOver = -1;
bool bUsedDecoder = false;
if (decoder != null)
{
currentMode = decoder.currentMode;
// See if we have leftover decoder buffer to use
// Don't want to mess up decoder if we're counting or throw an exception
if (decoder.bytesLeftOverCount != 0)
{
// Load our bytesLeftOver
byteLeftOver = decoder.bytesLeftOver[0];
}
}
// Do this until the end, just do '?' replacement because we don't have fallbacks for decodings.
while (buffer.MoreData || byteLeftOver >= 0)
{
byte ch;
// May have a left over byte
if (byteLeftOver >= 0)
{
ch = (byte)byteLeftOver;
byteLeftOver = -1;
}
else
{
ch = buffer.GetNextByte();
}
// We're in escape mode
if (ch == '~')
{
// Next char is type of switch
if (!buffer.MoreData)
{
// We don't have anything left, it'll be in decoder or a ?
// don't fail if we are allowing overflows
if (decoder == null || decoder.MustFlush)
{
// We'll be a '?'
buffer.Fallback(ch);
// break if we fail & break if we don't (because !MoreData)
// Add succeeded, continue
break;
}
// Stick it in decoder
if (decoder != null)
decoder.ClearMustFlush();
if (chars != null)
{
decoder.bytesLeftOverCount = 1;
decoder.bytesLeftOver[0] = (byte)'~';
bUsedDecoder = true;
}
break;
}
// What type is it?, get 2nd byte
ch = buffer.GetNextByte();
if (ch == '~' && currentMode == ISO2022Modes.ModeASCII)
{
// Its just a ~~ replacement for ~, add it
if (!buffer.AddChar((char)ch, 2))
// Add failed, break for converting
break;
// Add succeeded, continue
continue;
}
else if (ch == '{')
{
// Switching to Double Byte mode
currentMode = ISO2022Modes.ModeHZ;
continue;
}
else if (ch == '}')
{
// Switching to ASCII mode
currentMode = ISO2022Modes.ModeASCII;
continue;
}
else if (ch == '\n')
{
// Ignore ~\n sequence
continue;
}
else
{
// Unknown escape, back up and try the '~' as a "normal" byte or lead byte
buffer.AdjustBytes(-1);
ch = (byte)'~';
}
}
// go ahead and add our data
if (currentMode != ISO2022Modes.ModeASCII)
{
// Should be ModeHZ
Debug.Assert(currentMode == ISO2022Modes.ModeHZ, "[ISO2022Encoding.GetCharsCP52936]Expected ModeHZ");
char cm;
// Everett allowed characters < 0x20 to be passed as if they were ASCII
if (ch < 0x20)
{
// Emit it as ASCII
goto STOREASCII;
}
// Its multibyte, should have another byte
if (!buffer.MoreData)
{
// No bytes left
// don't fail if we are allowing overflows
if (decoder == null || decoder.MustFlush)
{
// Not enough bytes, fallback lead byte
buffer.Fallback(ch);
// Break if we fail & break because !MoreData
break;
}
if (decoder != null)
decoder.ClearMustFlush();
// Stick it in decoder
if (chars != null)
{
decoder.bytesLeftOverCount = 1;
decoder.bytesLeftOver[0] = ch;
bUsedDecoder = true;
}
break;
}
// Everett uses space as an escape character for single SBCS bytes
byte ch2 = buffer.GetNextByte();
ushort iBytes = (ushort)(ch << 8 | ch2);
if (ch == ' ' && ch2 != 0)
{
// Get next char and treat it like ASCII (Everett treated space like an escape
// allowing the next char to be just ascii)
cm = (char)ch2;
goto STOREMULTIBYTE;
}
// Bytes should be in range: lead byte 0x21-0x77, trail byte: 0x21 - 0x7e
if ((ch < 0x21 || ch > 0x77 || ch2 < 0x21 || ch2 > 0x7e) &&
// Everett allowed high bit mappings for same characters (but only if both bits set)
(ch < 0xa1 || ch > 0xf7 || ch2 < 0xa1 || ch2 > 0xfe))
{
// For some reason Everett allowed XX20 to become unicode 3000... (ideo sp)
if (ch2 == 0x20 && 0x21 <= ch && ch <= 0x7d)
{
iBytes = 0x2121;
goto MULTIBYTE;
}
// Illegal char, use fallback. If lead byte is 0 have to do it special and do it first
if (!buffer.Fallback((byte)(iBytes >> 8), (byte)(iBytes)))
break;
continue;
}
MULTIBYTE:
iBytes |= 0x8080;
// Look up the multibyte char to stick it in our data
// We have a iBytes to try to convert.
cm = mapBytesToUnicode[iBytes];
STOREMULTIBYTE:
// See if it was unknown
if (cm == UNKNOWN_CHAR_FLAG && iBytes != 0)
{
// Fall back the unknown stuff
if (!buffer.Fallback((byte)(iBytes >> 8), (byte)(iBytes)))
break;
continue;
}
if (!buffer.AddChar(cm, 2))
break; // convert ran out of buffer, stop
continue;
}
// Just ASCII
// We allow some chars > 7f because everett did, so we have to look them up.
STOREASCII:
char c = mapBytesToUnicode[ch];
// Check if it was unknown
if ((c == UNKNOWN_CHAR_FLAG || c == 0) && (ch != 0))
{
// fallback the unkown bytes
if (!buffer.Fallback((byte)ch))
break;
continue;
}
// Go ahead and add our ASCII character
if (!buffer.AddChar(c))
break; // convert ran out of buffer, stop
}
// Need to remember our state, IF we're not counting
if (chars != null && decoder != null)
{
if (!bUsedDecoder)
{
// If we didn't use it, clear the byte left over
decoder.bytesLeftOverCount = 0;
}
if (decoder.MustFlush && decoder.bytesLeftOverCount == 0)
{
decoder.currentMode = ISO2022Modes.ModeASCII;
}
else
{
// Either not flushing or had state (from convert)
Debug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
"[ISO2022Encoding.GetCharsCP52936]Expected no state or not converting or not flushing");
decoder.currentMode = currentMode;
}
decoder.m_bytesUsed = buffer.BytesUsed;
}
// Return # of characters we found
return buffer.Count;
}