internal void GetNextContentToken()
{
Debug.Assert(_nextTokenType != HtmlTokenType.EOF);
_nextToken.Length = 0;
if (this.IsAtEndOfStream)
{
_nextTokenType = HtmlTokenType.EOF;
return;
}
if (this.IsAtTagStart)
{
this.GetNextCharacter();
if (this.NextCharacter == '/')
{
_nextToken.Append("</");
_nextTokenType = HtmlTokenType.ClosingTagStart;
// advance
this.GetNextCharacter();
_ignoreNextWhitespace = false; // Whitespaces after closing tags are significant
}
else
{
_nextTokenType = HtmlTokenType.OpeningTagStart;
_nextToken.Append("<");
_ignoreNextWhitespace = true; // Whitespaces after opening tags are insignificant
}
}
else if (this.IsAtDirectiveStart)
{
// either a comment or CDATA
this.GetNextCharacter();
if (_lookAheadCharacter == '[')
{
// cdata
this.ReadDynamicContent();
}
else if (_lookAheadCharacter == '-')
{
this.ReadComment();
}
else
{
// neither a comment nor cdata, should be something like DOCTYPE
// skip till the next tag ender
this.ReadUnknownDirective();
}
}
else
{
// read text content, unless you encounter a tag
_nextTokenType = HtmlTokenType.Text;
while (!this.IsAtTagStart && !this.IsAtEndOfStream && !this.IsAtDirectiveStart)
{
if (this.NextCharacter == '<' && !this.IsNextCharacterEntity && _lookAheadCharacter == '?')
{
// ignore processing directive
this.SkipProcessingDirective();
}
else
{
if (this.NextCharacter <= ' ')
{
// Respect xml:preserve or its equivalents for whitespace processing
if (_ignoreNextWhitespace)
{
// Ignore repeated whitespaces
}
else
{
// Treat any control character sequence as one whitespace
_nextToken.Append(' ');
}
_ignoreNextWhitespace = true; // and keep ignoring the following whitespaces
}
else
{
_nextToken.Append(this.NextCharacter);
_ignoreNextWhitespace = false;
}
this.GetNextCharacter();
}
}
}
}
// --------------------------------------------------------------------- // // Constructors // // --------------------------------------------------------------------- #region Constructors /// <summary> /// Constructor. Initializes the _htmlLexicalAnalayzer element with the given input string /// </summary> /// <param name="inputString"> /// string to parsed into well-formed Html /// </param> private HtmlParser(string inputString) { // Create an output xml document _document = new XmlDocument(); // initialize open tag stack _openedElements = new Stack <XmlElement>(); _pendingInlineElements = new Stack <XmlElement>(); // initialize lexical analyzer _htmlLexicalAnalyzer = new HtmlLexicalAnalyzer(inputString); // get first token from input, expecting text _htmlLexicalAnalyzer.GetNextContentToken(); }