private void ParseRtf( TextReader reader )
{
curText = new StringBuilder();
unicodeSkipCountStack.Clear();
codePageStack.Clear();
unicodeSkipCount = 1;
level = 0;
tagCountAtLastGroupStart = 0;
tagCount = 0;
fontTableStartLevel = -1;
targetFont = null;
expectingThemeFont = false;
fontToCodePageMapping.Clear();
hexDecodingBuffer.SetLength( 0 );
UpdateEncoding( RtfSpec.AnsiCodePage );
int groupCount = 0;
const int eof = -1;
int nextChar = PeekNextChar( reader, false );
bool backslashAlreadyConsumed = false;
while ( nextChar != eof )
{
int peekChar = 0;
bool peekCharValid = false;
switch ( nextChar )
{
case '\\':
if ( !backslashAlreadyConsumed )
{
reader.Read(); // must still consume the 'peek'ed char
}
int secondChar = PeekNextChar( reader, true );
switch ( secondChar )
{
case '\\':
case '{':
case '}':
curText.Append( ReadOneChar( reader ) ); // must still consume the 'peek'ed char
break;
case '\n':
case '\r':
reader.Read(); // must still consume the 'peek'ed char
// must be treated as a 'par' tag if preceded by a backslash
// (see RTF spec page 144)
HandleTag( reader, new RtfTag( RtfSpec.TagParagraph ) );
break;
case '\'':
reader.Read(); // must still consume the 'peek'ed char
char hex1 = (char)ReadOneByte( reader );
char hex2 = (char)ReadOneByte( reader );
if ( !IsHexDigit( hex1 ) )
{
throw new RtfHexEncodingException( Strings.InvalidFirstHexDigit( hex1 ) );
}
if ( !IsHexDigit( hex2 ) )
{
throw new RtfHexEncodingException( Strings.InvalidSecondHexDigit( hex2 ) );
}
int decodedByte = Int32.Parse( "" + hex1 + hex2, NumberStyles.HexNumber );
hexDecodingBuffer.WriteByte( (byte)decodedByte );
peekChar = PeekNextChar( reader, false );
peekCharValid = true;
bool mustFlushHexContent = true;
if ( peekChar == '\\' )
{
reader.Read();
backslashAlreadyConsumed = true;
int continuationChar = PeekNextChar( reader, false );
if ( continuationChar == '\'' )
{
mustFlushHexContent = false;
}
}
if ( mustFlushHexContent )
{
// we may _NOT_ handle hex content in a character-by-character way as
// this results in invalid text for japanese/chinese content ...
// -> we wait until the following content is non-hex and then flush the
// pending data. ugly but necessary with our decoding model.
DecodeCurrentHexBuffer();
}
break;
case '|':
case '~':
case '-':
case '_':
case ':':
case '*':
HandleTag( reader, new RtfTag( "" + ReadOneChar( reader ) ) ); // must still consume the 'peek'ed char
break;
default:
ParseTag( reader );
break;
}
break;
case '\n':
case '\r':
reader.Read(); // must still consume the 'peek'ed char
break;
case '\t':
reader.Read(); // must still consume the 'peek'ed char
// should be treated as a 'tab' tag (see RTF spec page 144)
HandleTag( reader, new RtfTag( RtfSpec.TagTabulator ) );
break;
case '{':
reader.Read(); // must still consume the 'peek'ed char
FlushText();
NotifyGroupBegin();
tagCountAtLastGroupStart = tagCount;
unicodeSkipCountStack.Push( unicodeSkipCount );
codePageStack.Push( encoding == null ? 0 : encoding.CodePage );
level++;
break;
case '}':
reader.Read(); // must still consume the 'peek'ed char
FlushText();
if ( level > 0 )
{
unicodeSkipCount = (int)unicodeSkipCountStack.Pop();
if ( fontTableStartLevel == level )
{
fontTableStartLevel = -1;
targetFont = null;
expectingThemeFont = false;
}
UpdateEncoding( (int)codePageStack.Pop() );
level--;
NotifyGroupEnd();
groupCount++;
}
else
{
throw new RtfBraceNestingException( Strings.ToManyBraces );
}
break;
default:
curText.Append( ReadOneChar( reader ) ); // must still consume the 'peek'ed char
break;
}
if ( level == 0 && IgnoreContentAfterRootGroup )
{
break;
}
if ( peekCharValid )
{
nextChar = peekChar;
}
else
{
nextChar = PeekNextChar( reader, false );
backslashAlreadyConsumed = false;
}
}
FlushText();
reader.Close();
if ( level > 0 )
{
throw new RtfBraceNestingException( Strings.ToFewBraces );
}
if ( groupCount == 0 )
{
throw new RtfEmptyDocumentException( Strings.NoRtfContent );
}
curText = null;
}