public virtual bool FixDocType(Node root)
{
var guessed = HtmlVersion.Html40Strict;
int i;
if (BadDoctype)
{
Report.Warning(this, null, null, Report.MALFORMED_DOCTYPE);
}
if (Options.XmlOut)
{
return true;
}
Node doctype = root.FindDocType();
if (Options.DocType == DocType.Omit)
{
if (doctype != null)
{
Node.DiscardElement(doctype);
}
return true;
}
if (Options.DocType == DocType.Strict)
{
Node.DiscardElement(doctype);
doctype = null;
guessed = HtmlVersion.Html40Strict;
}
else if (Options.DocType == DocType.Loose)
{
Node.DiscardElement(doctype);
doctype = null;
guessed = HtmlVersion.Html40Loose;
}
else if (Options.DocType == DocType.Auto)
{
if (doctype != null)
{
if (Doctype == HtmlVersion.Unknown)
{
return false;
}
switch (Doctype)
{
case HtmlVersion.Unknown:
return false;
case HtmlVersion.Html20:
if ((Versions & HtmlVersion.Html20) != 0)
{
return true;
}
break; /* to replace old version by new */
case HtmlVersion.Html32:
if ((Versions & HtmlVersion.Html32) != 0)
{
return true;
}
break; /* to replace old version by new */
case HtmlVersion.Html40Strict:
if ((Versions & HtmlVersion.Html40Strict) != 0)
{
return true;
}
break; /* to replace old version by new */
case HtmlVersion.Html40Loose:
if ((Versions & HtmlVersion.Html40Loose) != 0)
{
return true;
}
break; /* to replace old version by new */
case HtmlVersion.Frames:
if ((Versions & HtmlVersion.Frames) != 0)
{
return true;
}
break; /* to replace old version by new */
}
/* INCONSISTENT_VERSION warning is now issued by ApparentVersion() */
}
/* choose new doctype */
guessed = GetHtmlVersion();
}
if (guessed == HtmlVersion.Unknown)
{
return false;
}
/* for XML use the Voyager system identifier */
if (Options.XmlOut || Options.XmlTags || Isvoyager)
{
if (doctype != null)
Node.DiscardElement(doctype);
for (i = 0; i < W3CVersion.Length; ++i)
{
if (guessed == W3CVersion[i].Version)
{
FixHtmlNameSpace(root, W3CVersion[i].Profile);
break;
}
}
return true;
}
if (doctype == null)
{
doctype = NewNode(Node.DOC_TYPE_TAG, Lexbuf, 0, 0);
doctype.Next = root.Content;
doctype.Parent = root;
doctype.Prev = null;
root.Content = doctype;
}
Txtstart = Lexsize;
Txtend = Lexsize;
/* use the appropriate public identifier */
AddStringLiteral("html PUBLIC ");
if (Options.DocType == DocType.User && Options.DocTypeStr != null)
{
AddStringLiteral(Options.DocTypeStr);
}
else if (guessed == HtmlVersion.Html20)
{
AddStringLiteral("\"-//IETF//DTD HTML 2.0//EN\"");
}
else
{
AddStringLiteral("\"-//W3C//DTD ");
for (i = 0; i < W3CVersion.Length; ++i)
{
if (guessed == W3CVersion[i].Version)
{
AddStringLiteral(W3CVersion[i].Name);
break;
}
}
AddStringLiteral("//EN\"");
}
Txtend = Lexsize;
doctype.Start = Txtstart;
doctype.End = Txtend;
return true;
}
/// <summary> /// Internal routine that actually does the parsing. The caller /// can pass either an InputStream or file name. If both are passed, /// the file name is preferred. /// </summary> internal Node ParseInternal(Stream input, Stream output, TidyMessageCollection messages) { Node document = null; Out o = new OutImpl(); /* normal output stream */ /* ensure config is self-consistent */ _options.Adjust(); if (input != null) { var lexer = new Lexer(new ClsStreamInImpl(input, _options.CharEncoding, _options.TabSize), _options) { Messages = messages }; /* store pointer to lexer in input stream to allow character encoding errors to be reported */ lexer.Input.Lexer = lexer; /* Tidy doesn't alter the doctype for generic XML docs */ Node doctype; if (_options.XmlTags) { document = ParserImpl.ParseXmlDocument(lexer); } else { document = ParserImpl.ParseDocument(lexer); if (!document.CheckNodeIntegrity()) { Report.BadTree(lexer); return null; } var cleaner = new Clean(_options.TagTable); /* simplifies <b><b> ... </b> ...</b> etc. */ cleaner.NestedEmphasis(document); /* cleans up <dir>indented text</dir> etc. */ cleaner.List2Bq(document); cleaner.Bq2Div(document); /* replaces i by em and b by strong */ if (_options.LogicalEmphasis) { cleaner.EmFromI(document); } if (_options.Word2000 && cleaner.IsWord2000(document, _options.TagTable)) { /* prune Word2000's <![if ...]> ... <![endif]> */ cleaner.DropSections(lexer, document); /* drop style & class attributes and empty p, span elements */ cleaner.CleanWord2000(lexer, document); } /* replaces presentational markup by style rules */ if (_options.MakeClean || _options.DropFontTags) { cleaner.CleanTree(lexer, document); } if (!document.CheckNodeIntegrity()) { Report.BadTree(lexer); return null; } doctype = document.FindDocType(); if (document.Content != null) { if (_options.Xhtml) { lexer.SetXhtmlDocType(document); } else { lexer.FixDocType(document); } if (_options.TidyMark) { lexer.AddGenerator(document); } } /* ensure presence of initial <?XML version="1.0"?> */ if (_options.XmlOut && _options.XmlPi) { lexer.FixXmlPi(document); } if (document.Content != null) { Report.ReportVersion(lexer, doctype); Report.ReportNumWarnings(lexer); } } if (lexer.Messages.Errors > 0) { Report.NeedsAuthorIntervention(lexer); } o.State = StreamIn.FSM_ASCII; o.Encoding = _options.CharEncoding; if (lexer.Messages.Errors == 0) { PPrint pprint; if (_options.BurstSlides) { /* remove doctype to avoid potential clash with markup introduced when bursting into slides */ /* discard the document type */ doctype = document.FindDocType(); if (doctype != null) { Node.DiscardElement(doctype); } /* slides use transitional features */ lexer.Versions |= HtmlVersion.Html40Loose; /* and patch up doctype to match */ if (_options.Xhtml) { lexer.SetXhtmlDocType(document); } else { lexer.FixDocType(document); } /* find the body element which may be implicit */ Node body = document.FindBody(_options.TagTable); if (body != null) { pprint = new PPrint(_options); Report.ReportNumberOfSlides(lexer, pprint.CountSlides(body)); pprint.CreateSlides(lexer, document); } else { Report.MissingBody(lexer); } } else if (output != null) { pprint = new PPrint(_options); o.Output = output; if (_options.XmlTags) { pprint.PrintXmlTree(o, 0, 0, lexer, document); } else { pprint.PrintTree(o, 0, 0, lexer, document); } pprint.FlushLine(o, 0); } } Report.ErrorSummary(lexer); } return document; }