public virtual void Parse(Lexer lexer, Node body, short mode)
{
bool checkstack;
mode = Lexer.IGNORE_WHITESPACE;
checkstack = true;
TagCollection tt = lexer.Options.TagTable;
while (true)
{
Node node = lexer.GetToken(mode);
if (node == null)
{
break;
}
if (node.Tag == body.Tag && node.Type == Node.END_TAG)
{
body.Closed = true;
Node.TrimSpaces(lexer, body);
lexer.SeenBodyEndTag = 1;
mode = Lexer.IGNORE_WHITESPACE;
if (body.Parent.Tag == tt.TagNoframes)
{
break;
}
continue;
}
if (node.Tag == tt.TagNoframes)
{
if (node.Type == Node.START_TAG)
{
Node.InsertNodeAtEnd(body, node);
ParseBlock.Parse(lexer, node, mode);
continue;
}
if (node.Type == Node.END_TAG && body.Parent.Tag == tt.TagNoframes)
{
Node.TrimSpaces(lexer, body);
lexer.UngetToken();
break;
}
}
if ((node.Tag == tt.TagFrame || node.Tag == tt.TagFrameset) && body.Parent.Tag == tt.TagNoframes)
{
Node.TrimSpaces(lexer, body);
lexer.UngetToken();
break;
}
if (node.Tag == tt.TagHtml)
{
if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
{
Report.Warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
}
continue;
}
bool iswhitenode = node.Type == Node.TEXT_NODE && node.End <= node.Start + 1 &&
node.Textarray[node.Start] == (sbyte) ' ';
/* deal with comments etc. */
if (Node.InsertMisc(body, node))
{
continue;
}
if (lexer.SeenBodyEndTag == 1 && !iswhitenode)
{
++lexer.SeenBodyEndTag;
Report.Warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
}
/* mixed content model permits text */
if (node.Type == Node.TEXT_NODE)
{
if (iswhitenode && mode == Lexer.IGNORE_WHITESPACE)
{
continue;
}
if (lexer.Options.EncloseText && !iswhitenode)
{
lexer.UngetToken();
Node para = lexer.InferredTag("p");
Node.InsertNodeAtEnd(body, para);
ParseTag(lexer, para, mode);
mode = Lexer.MIXED_CONTENT;
continue;
}
/* strict doesn't allow text here */
lexer.Versions &= ~ (HtmlVersion.Html40Strict | HtmlVersion.Html20);
if (checkstack)
{
checkstack = false;
if (lexer.InlineDup(node) > 0)
{
continue;
}
}
Node.InsertNodeAtEnd(body, node);
mode = Lexer.MIXED_CONTENT;
continue;
}
if (node.Type == Node.DOC_TYPE_TAG)
{
Node.InsertDocType(lexer, body, node);
continue;
}
/* discard unknown and PARAM tags */
if (node.Tag == null || node.Tag == tt.TagParam)
{
Report.Warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
continue;
}
/*
Netscape allows LI and DD directly in BODY
We infer UL or DL respectively and use this
boolean to exclude block-level elements so as
to match Netscape's observed behaviour.
*/
lexer.ExcludeBlocks = false;
if ((node.Tag.Model & ContentModel.BLOCK) == 0 && (node.Tag.Model & ContentModel.INLINE) == 0)
{
/* avoid this error message being issued twice */
if ((node.Tag.Model & ContentModel.HEAD) == 0)
{
Report.Warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
}
if ((node.Tag.Model & ContentModel.HTML) != 0)
{
/* copy body attributes if current body was inferred */
if (node.Tag == tt.TagBody && body.Isimplicit && body.Attributes == null)
{
body.Attributes = node.Attributes;
node.Attributes = null;
}
continue;
}
if ((node.Tag.Model & ContentModel.HEAD) != 0)
{
MoveToHead(lexer, body, node);
continue;
}
if ((node.Tag.Model & ContentModel.LIST) != 0)
{
lexer.UngetToken();
node = lexer.InferredTag("ul");
Node.AddClass(node, "noindent");
lexer.ExcludeBlocks = true;
}
else if ((node.Tag.Model & ContentModel.DEFLIST) != 0)
{
lexer.UngetToken();
node = lexer.InferredTag("dl");
lexer.ExcludeBlocks = true;
}
else if ((node.Tag.Model & (ContentModel.TABLE | ContentModel.ROWGRP | ContentModel.ROW)) != 0)
{
lexer.UngetToken();
node = lexer.InferredTag("table");
lexer.ExcludeBlocks = true;
}
else
{
/* AQ: The following line is from the official C
version of tidy. It doesn't make sense to me
because the '!' operator has higher precedence
than the '&' operator. It seems to me that the
expression always evaluates to 0.
if (!node->tag->model & (CM_ROW | CM_FIELD))
AQ: 13Jan2000 fixed in C tidy
*/
if ((node.Tag.Model & (ContentModel.ROW | ContentModel.FIELD)) == 0)
{
lexer.UngetToken();
return;
}
/* ignore </td> </th> <option> etc. */
continue;
}
}
if (node.Type == Node.END_TAG)
{
if (node.Tag == tt.TagBr)
{
node.Type = Node.START_TAG;
}
else if (node.Tag == tt.TagP)
{
Node.CoerceNode(lexer, node, tt.TagBr);
Node.InsertNodeAtEnd(body, node);
node = lexer.InferredTag("br");
}
else if ((node.Tag.Model & ContentModel.INLINE) != 0)
{
lexer.PopInline(node);
}
}
if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
{
if (((node.Tag.Model & ContentModel.INLINE) != 0) &&
(node.Tag.Model & ContentModel.MIXED) == 0)
{
/* HTML4 strict doesn't allow inline content here */
/* but HTML2 does allow img elements as children of body */
if (node.Tag == tt.TagImg)
{
lexer.Versions &= ~ HtmlVersion.Html40Strict;
}
else
{
lexer.Versions &= ~ (HtmlVersion.Html40Strict | HtmlVersion.Html20);
}
if (checkstack && !node.Isimplicit)
{
checkstack = false;
if (lexer.InlineDup(node) > 0)
{
continue;
}
}
mode = Lexer.MIXED_CONTENT;
}
else
{
checkstack = true;
mode = Lexer.IGNORE_WHITESPACE;
}
if (node.Isimplicit)
{
Report.Warning(lexer, body, node, Report.INSERTING_TAG);
}
Node.InsertNodeAtEnd(body, node);
ParseTag(lexer, node, mode);
continue;
}
/* discard unexpected tags */
Report.Warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
}
}