public unsafe override int GetChars(byte* bytes, int byteCount,
char* chars, int charCount, DecoderNLS baseDecoder)
{
// Just need to ASSERT, this is called by something else internal that checked parameters already
// Allow null chars for counting
Debug.Assert(bytes != null, "[ISCIIEncoding.GetChars]bytes is null");
Debug.Assert(byteCount >= 0, "[ISCIIEncoding.GetChars]byteCount is negative");
// Debug.Assert(chars != null, "[ISCIIEncoding.GetChars]chars is null");
Debug.Assert(charCount >= 0, "[ISCIIEncoding.GetChars]charCount is negative");
// Need the ISCII Decoder
ISCIIDecoder decoder = (ISCIIDecoder)baseDecoder;
// Get our info.
EncodingCharBuffer buffer = new EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount);
int currentCodePage = _defaultCodePage;
bool bLastATR = false;
bool bLastVirama = false;
bool bLastDevenagariStressAbbr = false;
char cLastCharForNextNukta = '\0';
char cLastCharForNoNextNukta = '\0';
// See if there's anything in our decoder
if (decoder != null)
{
currentCodePage = decoder.currentCodePage;
bLastATR = decoder.bLastATR;
bLastVirama = decoder.bLastVirama;
bLastDevenagariStressAbbr = decoder.bLastDevenagariStressAbbr;
cLastCharForNextNukta = decoder.cLastCharForNextNukta;
cLastCharForNoNextNukta = decoder.cLastCharForNoNextNukta;
}
bool bLastSpecial = bLastVirama | bLastATR | bLastDevenagariStressAbbr |
(cLastCharForNextNukta != '\0');
// Get our current code page index (some code pages are dups)
int currentCodePageIndex = -1;
Debug.Assert(currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi,
"[ISCIIEncoding.GetChars]Decoder code page must be >= Devanagari and <= Punjabi, not " + currentCodePage);
if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi)
{
currentCodePageIndex = s_IndicMappingIndex[currentCodePage];
}
// Loop through our input
while (buffer.MoreData)
{
byte b = buffer.GetNextByte();
// See if last one was special
if (bLastSpecial)
{
// Now it won't be
bLastSpecial = false;
// One and only one of our flags should be set
Debug.Assert(((bLastVirama ? 1 : 0) + (bLastATR ? 1 : 0) +
(bLastDevenagariStressAbbr ? 1 : 0) +
((cLastCharForNextNukta > 0) ? 1 : 0)) == 1,
String.Format(CultureInfo.InvariantCulture,
"[ISCIIEncoding.GetChars]Special cases require 1 and only 1 special case flag: LastATR {0} Dev. {1} Nukta {2}",
bLastATR, bLastDevenagariStressAbbr, cLastCharForNextNukta));
// If the last one was an ATR, then we'll have to do ATR stuff
if (bLastATR)
{
// We only support Devanagari - Punjabi
if (b >= (0x40 | CodeDevanagari) && b <= (0x40 | CodePunjabi))
{
// Remember the code page
currentCodePage = b & 0xf;
currentCodePageIndex = s_IndicMappingIndex[currentCodePage];
// No longer last ATR
bLastATR = false;
continue;
}
// Change back to default?
if (b == 0x40)
{
currentCodePage = _defaultCodePage;
currentCodePageIndex = -1;
if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi)
{
currentCodePageIndex = s_IndicMappingIndex[currentCodePage];
}
// No longer last ATR
bLastATR = false;
continue;
}
// We don't support Roman
if (b == 0x41)
{
currentCodePage = _defaultCodePage;
currentCodePageIndex = -1;
if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi)
{
currentCodePageIndex = s_IndicMappingIndex[currentCodePage];
}
// Even though we don't know how to support Roman, windows didn't add a ? so we don't either.
// No longer last ATR
bLastATR = false;
continue;
}
// Other code pages & ATR codes not supported, fallback the ATR
// If fails, decrements the buffer, which is OK, we remember ATR state.
if (!buffer.Fallback(ControlATR))
break;
// No longer last ATR (fell back)
bLastATR = false;
// we know we can't have any of these other modes
Debug.Assert(bLastVirama == false, "[ISCIIEncoding.GetChars] Expected no bLastVirama in bLastATR mode");
Debug.Assert(bLastDevenagariStressAbbr == false, "[ISCIIEncoding.GetChars] Expected no bLastDevenagariStressAbbr in bLastATR mode");
Debug.Assert(cLastCharForNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNextNukta in bLastATR mode");
Debug.Assert(cLastCharForNoNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNoNextNukta in bLastATR mode");
// Keep processing this byte
}
else if (bLastVirama)
{
// If last was Virama, then we might need ZWNJ or ZWJ instead
if (b == Virama)
{
// If no room, then stop
if (!buffer.AddChar(ZWNJ))
break;
bLastVirama = false;
continue;
}
if (b == Nukta)
{
// If no room, then stop
if (!buffer.AddChar(ZWJ))
break;
bLastVirama = false;
continue;
}
// No longer in this mode, fall through to handle character
// (Virama itself was added when flag was set last iteration)
bLastVirama = false;
// We know we can't have any of these other modes
Debug.Assert(bLastATR == false, "[ISCIIEncoding.GetChars] Expected no bLastATR in bLastVirama mode");
Debug.Assert(bLastDevenagariStressAbbr == false, "[ISCIIEncoding.GetChars] Expected no bLastDevenagariStressAbbr in bLastVirama mode");
Debug.Assert(cLastCharForNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNextNukta in bLastVirama mode");
Debug.Assert(cLastCharForNoNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNoNextNukta in bLastVirama mode");
}
else if (bLastDevenagariStressAbbr)
{
// Last byte was an 0xf0 (ext).
// If current is b8 or bf, then we have 952 or 970. Otherwise fallback
if (b == 0xb8)
{
// It was a 0xb8
if (!buffer.AddChar('\x0952')) // Devanagari stress sign anudatta
break;
bLastDevenagariStressAbbr = false;
continue;
}
if (b == 0xbf)
{
// It was a 0xbf
if (!buffer.AddChar('\x0970')) // Devanagari abbr. sign
break;
bLastDevenagariStressAbbr = false;
continue;
}
// Wasn't an expected pattern, do fallback for f0 (ext)
// if fails, fallback will back up our buffer
if (!buffer.Fallback(DevenagariExt))
break;
// Keep processing this byte (turn off mode)
// (last character was added when mode was set)
bLastDevenagariStressAbbr = false;
Debug.Assert(bLastATR == false, "[ISCIIEncoding.GetChars] Expected no bLastATR in bLastDevenagariStressAbbr mode");
Debug.Assert(bLastVirama == false, "[ISCIIEncoding.GetChars] Expected no bLastVirama in bLastDevenagariStressAbbr mode");
Debug.Assert(cLastCharForNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNextNukta in bLastDevenagariStressAbbr mode");
Debug.Assert(cLastCharForNoNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNoNextNukta in bLastDevenagariStressAbbr mode");
}
else
{
// We were checking for next char being a nukta
Debug.Assert(cLastCharForNextNukta > 0 && cLastCharForNoNextNukta > 0,
"[ISCIIEncoding.GetChars]No other special case found, but cLastCharFor(No)NextNukta variable(s) aren't set.");
// We'll either add combined char or last char
if (b == Nukta)
{
// We combine nukta with previous char
if (!buffer.AddChar(cLastCharForNextNukta))
break;
// Done already
cLastCharForNextNukta = cLastCharForNoNextNukta = '\0';
continue;
}
// No Nukta, just add last character and keep processing current byte
if (!buffer.AddChar(cLastCharForNoNextNukta))
break;
// Keep processing this byte, turn off mode.
cLastCharForNextNukta = cLastCharForNoNextNukta = '\0';
Debug.Assert(bLastATR == false, "[ISCIIEncoding.GetChars] Expected no bLastATR in cLastCharForNextNukta mode");
Debug.Assert(bLastVirama == false, "[ISCIIEncoding.GetChars] Expected no bLastVirama in cLastCharForNextNukta mode");
Debug.Assert(bLastDevenagariStressAbbr == false, "[ISCIIEncoding.GetChars] Expected no bLastDevenagariStressAbbr in cLastCharForNextNukta mode");
}
}
// Now bLastSpecial should be false and all flags false.
Debug.Assert(!bLastSpecial && !bLastDevenagariStressAbbr && !bLastVirama && !bLastATR &&
cLastCharForNextNukta == '\0',
"[ISCIIEncoding.GetChars]No special state for last code point should exist at this point.");
// If its a simple byte, just add it
if (b < MultiByteBegin)
{
if (!buffer.AddChar((char)b))
break;
continue;
}
// See if its an ATR marker
if (b == ControlATR)
{
bLastATR = bLastSpecial = true;
continue;
}
Debug.Assert(currentCodePageIndex != -1, "[ISCIIEncoding.GetChars]Expected valid currentCodePageIndex != -1");
char ch = s_IndicMapping[currentCodePageIndex, 0, b - MultiByteBegin];
char cAlt = s_IndicMapping[currentCodePageIndex, 1, b - MultiByteBegin];
// If no 2nd char, just add it, also lonely Nuktas get added as well.
if (cAlt == 0 || b == Nukta)
{
// If it was an unknown character do fallback
// ? if not known.
if (ch == 0)
{
// Fallback the unknown byte
if (!buffer.Fallback(b))
break;
}
else
{
// Add the known character
if (!buffer.AddChar(ch))
break;
}
continue;
}
// if b == Virama set last Virama so we can do ZWJ or ZWNJ next time if needed.
if (b == Virama)
{
// Add Virama
if (!buffer.AddChar(ch))
break;
bLastVirama = bLastSpecial = true;
continue;
}
// See if its one that changes with a Nukta
if ((cAlt & 0xF000) == 0)
{
// It could change if next char is a nukta
bLastSpecial = true;
cLastCharForNextNukta = cAlt;
cLastCharForNoNextNukta = ch;
continue;
}
// We must be the Devenagari special case for F0, B8 & F0, BF
Debug.Assert(currentCodePage == CodeDevanagari && b == DevenagariExt,
String.Format(CultureInfo.InvariantCulture,
"[ISCIIEncoding.GetChars] Devenagari special case must {0} not {1} or in Devanagari code page {2} not {3}.",
DevenagariExt, b, CodeDevanagari, currentCodePage));
bLastDevenagariStressAbbr = bLastSpecial = true;
}
// If we don't have a decoder, or if we had to flush, then we need to get rid
// of last ATR, LastNoNextNukta and LastDevenagariExt.
if (decoder == null || decoder.MustFlush)
{
// If these fail (because of Convert with insufficient buffer), then they'll turn off MustFlush as well.
if (bLastATR)
{
// Have to add ATR fallback
if (buffer.Fallback(ControlATR))
bLastATR = false;
else
// If not successful, convert will maintain state for next time, also
// AddChar will have decremented our byte count, however we need it to remain the same
buffer.GetNextByte();
}
else if (bLastDevenagariStressAbbr)
{
// Have to do fallback for DevenagariExt
if (buffer.Fallback(DevenagariExt))
bLastDevenagariStressAbbr = false;
else
// If not successful, convert will maintain state for next time, also
// AddChar will have decremented our byte count, however we need it to remain the same
buffer.GetNextByte();
}
else if (cLastCharForNoNextNukta != '\0')
{
// Have to add our last char because there was no next nukta
if (buffer.AddChar(cLastCharForNoNextNukta))
cLastCharForNoNextNukta = cLastCharForNextNukta = '\0';
else
// If not successful, convert will maintain state for next time, also
// AddChar will have decremented our byte count, however we need it to remain the same
buffer.GetNextByte();
}
// LastVirama is unimportant for flushing decoder.
}
// Remember any left over stuff
// (only remember if we aren't counting)
if (decoder != null && chars != null)
{
// If not flushing or have state (from convert) then need to remember state
if (!decoder.MustFlush ||
cLastCharForNoNextNukta != '\0' || bLastATR || bLastDevenagariStressAbbr)
{
// Either not flushing or had state (from convert)
Debug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
"[ISCIIEncoding.GetChars]Expected no state or not converting or not flushing");
decoder.currentCodePage = currentCodePage;
decoder.bLastVirama = bLastVirama;
decoder.bLastATR = bLastATR;
decoder.bLastDevenagariStressAbbr = bLastDevenagariStressAbbr;
decoder.cLastCharForNextNukta = cLastCharForNextNukta;
decoder.cLastCharForNoNextNukta = cLastCharForNoNextNukta;
}
else
{
decoder.currentCodePage = _defaultCodePage;
decoder.bLastVirama = false;
decoder.bLastATR = false;
decoder.bLastDevenagariStressAbbr = false;
decoder.cLastCharForNextNukta = '\0';
decoder.cLastCharForNoNextNukta = '\0';
}
decoder.m_bytesUsed = buffer.BytesUsed;
}
// Otherwise we already did fallback and added extra things
// Return the # of characters we found
return buffer.Count;
}