HtmlKit.HtmlTokenizer.ReadNextToken C# (CSharp) Method

ReadNextToken() public method

Reads the next token.
Reads the next token.
public ReadNextToken ( HtmlToken &token ) : bool
token HtmlToken THe token that was read.
return bool
		public bool ReadNextToken (out HtmlToken token)
		{
			do {
				switch (TokenizerState) {
				case HtmlTokenizerState.Data:
					token = ReadData ();
					break;
				case HtmlTokenizerState.CharacterReferenceInData:
					token = ReadCharacterReferenceInData ();
					break;
				case HtmlTokenizerState.RcData:
					token = ReadRcData ();
					break;
				case HtmlTokenizerState.CharacterReferenceInRcData:
					token = ReadCharacterReferenceInRcData ();
					break;
				case HtmlTokenizerState.RawText:
					token = ReadRawText ();
					break;
				case HtmlTokenizerState.ScriptData:
					token = ReadScriptData ();
					break;
				case HtmlTokenizerState.PlainText:
					token = ReadPlainText ();
					break;
				case HtmlTokenizerState.TagOpen:
					token = ReadTagOpen ();
					break;
				case HtmlTokenizerState.EndTagOpen:
					token = ReadEndTagOpen ();
					break;
				case HtmlTokenizerState.TagName:
					token = ReadTagName ();
					break;
				case HtmlTokenizerState.RcDataLessThan:
					token = ReadRcDataLessThan ();
					break;
				case HtmlTokenizerState.RcDataEndTagOpen:
					token = ReadRcDataEndTagOpen ();
					break;
				case HtmlTokenizerState.RcDataEndTagName:
					token = ReadRcDataEndTagName ();
					break;
				case HtmlTokenizerState.RawTextLessThan:
					token = ReadRawTextLessThan ();
					break;
				case HtmlTokenizerState.RawTextEndTagOpen:
					token = ReadRawTextEndTagOpen ();
					break;
				case HtmlTokenizerState.RawTextEndTagName:
					token = ReadRawTextEndTagName ();
					break;
				case HtmlTokenizerState.ScriptDataLessThan:
					token = ReadScriptDataLessThan ();
					break;
				case HtmlTokenizerState.ScriptDataEndTagOpen:
					token = ReadScriptDataEndTagOpen ();
					break;
				case HtmlTokenizerState.ScriptDataEndTagName:
					token = ReadScriptDataEndTagName ();
					break;
				case HtmlTokenizerState.ScriptDataEscapeStart:
					token = ReadScriptDataEscapeStart ();
					break;
				case HtmlTokenizerState.ScriptDataEscapeStartDash:
					token = ReadScriptDataEscapeStartDash ();
					break;
				case HtmlTokenizerState.ScriptDataEscaped:
					token = ReadScriptDataEscaped ();
					break;
				case HtmlTokenizerState.ScriptDataEscapedDash:
					token = ReadScriptDataEscapedDash ();
					break;
				case HtmlTokenizerState.ScriptDataEscapedDashDash:
					token = ReadScriptDataEscapedDashDash ();
					break;
				case HtmlTokenizerState.ScriptDataEscapedLessThan:
					token = ReadScriptDataEscapedLessThan ();
					break;
				case HtmlTokenizerState.ScriptDataEscapedEndTagOpen:
					token = ReadScriptDataEscapedEndTagOpen ();
					break;
				case HtmlTokenizerState.ScriptDataEscapedEndTagName:
					token = ReadScriptDataEscapedEndTagName ();
					break;
				case HtmlTokenizerState.ScriptDataDoubleEscapeStart:
					token = ReadScriptDataDoubleEscapeStart ();
					break;
				case HtmlTokenizerState.ScriptDataDoubleEscaped:
					token = ReadScriptDataDoubleEscaped ();
					break;
				case HtmlTokenizerState.ScriptDataDoubleEscapedDash:
					token = ReadScriptDataDoubleEscapedDash ();
					break;
				case HtmlTokenizerState.ScriptDataDoubleEscapedDashDash:
					token = ReadScriptDataDoubleEscapedDashDash ();
					break;
				case HtmlTokenizerState.ScriptDataDoubleEscapedLessThan:
					token = ReadScriptDataDoubleEscapedLessThan ();
					break;
				case HtmlTokenizerState.ScriptDataDoubleEscapeEnd:
					token = ReadScriptDataDoubleEscapeEnd ();
					break;
				case HtmlTokenizerState.BeforeAttributeName:
					token = ReadBeforeAttributeName ();
					break;
				case HtmlTokenizerState.AttributeName:
					token = ReadAttributeName ();
					break;
				case HtmlTokenizerState.AfterAttributeName:
					token = ReadAfterAttributeName ();
					break;
				case HtmlTokenizerState.BeforeAttributeValue:
					token = ReadBeforeAttributeValue ();
					break;
				case HtmlTokenizerState.AttributeValueQuoted:
					token = ReadAttributeValueQuoted ();
					break;
				case HtmlTokenizerState.AttributeValueUnquoted:
					token = ReadAttributeValueUnquoted ();
					break;
				case HtmlTokenizerState.CharacterReferenceInAttributeValue:
					token = ReadCharacterReferenceInAttributeValue ();
					break;
				case HtmlTokenizerState.AfterAttributeValueQuoted:
					token = ReadAfterAttributeValueQuoted ();
					break;
				case HtmlTokenizerState.SelfClosingStartTag:
					token = ReadSelfClosingStartTag ();
					break;
				case HtmlTokenizerState.BogusComment:
					token = ReadBogusComment ();
					break;
				case HtmlTokenizerState.MarkupDeclarationOpen:
					token = ReadMarkupDeclarationOpen ();
					break;
				case HtmlTokenizerState.CommentStart:
					token = ReadCommentStart ();
					break;
				case HtmlTokenizerState.CommentStartDash:
					token = ReadCommentStartDash ();
					break;
				case HtmlTokenizerState.Comment:
					token = ReadComment ();
					break;
				case HtmlTokenizerState.CommentEndDash:
					token = ReadCommentEndDash ();
					break;
				case HtmlTokenizerState.CommentEnd:
					token = ReadCommentEnd ();
					break;
				case HtmlTokenizerState.CommentEndBang:
					token = ReadCommentEndBang ();
					break;
				case HtmlTokenizerState.DocType:
					token = ReadDocType ();
					break;
				case HtmlTokenizerState.BeforeDocTypeName:
					token = ReadBeforeDocTypeName ();
					break;
				case HtmlTokenizerState.DocTypeName:
					token = ReadDocTypeName ();
					break;
				case HtmlTokenizerState.AfterDocTypeName:
					token = ReadAfterDocTypeName ();
					break;
				case HtmlTokenizerState.AfterDocTypePublicKeyword:
					token = ReadAfterDocTypePublicKeyword ();
					break;
				case HtmlTokenizerState.BeforeDocTypePublicIdentifier:
					token = ReadBeforeDocTypePublicIdentifier ();
					break;
				case HtmlTokenizerState.DocTypePublicIdentifierQuoted:
					token = ReadDocTypePublicIdentifierQuoted ();
					break;
				case HtmlTokenizerState.AfterDocTypePublicIdentifier:
					token = ReadAfterDocTypePublicIdentifier ();
					break;
				case HtmlTokenizerState.BetweenDocTypePublicAndSystemIdentifiers:
					token = ReadBetweenDocTypePublicAndSystemIdentifiers ();
					break;
				case HtmlTokenizerState.AfterDocTypeSystemKeyword:
					token = ReadAfterDocTypeSystemKeyword ();
					break;
				case HtmlTokenizerState.BeforeDocTypeSystemIdentifier:
					token = ReadBeforeDocTypeSystemIdentifier ();
					break;
				case HtmlTokenizerState.DocTypeSystemIdentifierQuoted:
					token = ReadDocTypeSystemIdentifierQuoted ();
					break;
				case HtmlTokenizerState.AfterDocTypeSystemIdentifier:
					token = ReadAfterDocTypeSystemIdentifier ();
					break;
				case HtmlTokenizerState.BogusDocType:
					token = ReadBogusDocType ();
					break;
				case HtmlTokenizerState.CDataSection:
					token = ReadCDataSection ();
					break;
				case HtmlTokenizerState.EndOfFile:
				default:
					token = null;
					return false;
				}
			} while (token == null);

			return true;
		}
	}

Usage Example

Example #1
0
        static void VerifyHtmlTokenizerOutput(string path)
        {
            var tokens = Path.ChangeExtension(path, ".tokens");
            var expected = File.Exists(tokens) ? File.ReadAllText(tokens) : string.Empty;
            var actual = new StringBuilder(); 

            using (var textReader = File.OpenText(path))
            {
                var tokenizer = new HtmlTokenizer(textReader);
                HtmlToken token;

                Assert.AreEqual(HtmlTokenizerState.Data, tokenizer.TokenizerState);

                while (tokenizer.ReadNextToken(out token))
                {
                    actual.AppendFormat("{0}: ", token.Kind);

                    switch (token.Kind)
                    {
                        case HtmlTokenKind.Data:
                            var text = (HtmlDataToken)token;

                            for (int i = 0; i < text.Data.Length; i++)
                            {
                                switch (text.Data[i])
                                {
                                    case '\f': actual.Append("\\f"); break;
                                    case '\t': actual.Append("\\t"); break;
                                    case '\r': actual.Append("\\r"); break;
                                    case '\n': actual.Append("\\n"); break;
                                    default: actual.Append(text.Data[i]); break;
                                }
                            }
                            actual.AppendLine();
                            break;
                        case HtmlTokenKind.Tag:
                            var tag = (HtmlTagToken)token;

                            actual.AppendFormat("<{0}{1}", tag.IsEndTag ? "/" : "", tag.Name);

                            foreach (var attribute in tag.Attributes)
                            {
                                if (attribute.Value != null)
                                    actual.AppendFormat(" {0}={1}", attribute.Name, Quote(attribute.Value));
                                else
                                    actual.AppendFormat(" {0}", attribute.Name);
                            }

                            actual.Append(tag.IsEmptyElement ? "/>" : ">");

                            actual.AppendLine();
                            break;
                        case HtmlTokenKind.Comment:
                            var comment = (HtmlCommentToken)token;
                            actual.AppendLine(comment.Comment);
                            break;
                        case HtmlTokenKind.DocType:
                            var doctype = (HtmlDocTypeToken)token;

                            if (doctype.ForceQuirksMode)
                                actual.Append("<!-- force quirks mode -->");

                            actual.Append("<!DOCTYPE");

                            if (doctype.Name != null)
                                actual.AppendFormat(" {0}", doctype.Name.ToUpperInvariant());

                            if (doctype.PublicIdentifier != null)
                            {
                                actual.AppendFormat(" PUBLIC {0}", Quote(doctype.PublicIdentifier));
                                if (doctype.SystemIdentifier != null)
                                    actual.AppendFormat(" {0}", Quote(doctype.SystemIdentifier));
                            }
                            else if (doctype.SystemIdentifier != null)
                            {
                                actual.AppendFormat(" SYSTEM {0}", Quote(doctype.SystemIdentifier));
                            }

                            actual.Append(">");
                            actual.AppendLine();
                            break;
                        default:
                            Assert.Fail("Unhandled token type: {0}", token.Kind);
                            break;
                    }
                }

                Assert.AreEqual(HtmlTokenizerState.EndOfFile, tokenizer.TokenizerState);
            }

            if (!File.Exists(tokens))
                File.WriteAllText(tokens, actual.ToString());

            Assert.AreEqual(expected, actual.ToString(), "The token stream does not match the expected tokens.");
        }
All Usage Examples Of HtmlKit.HtmlTokenizer::ReadNextToken
HtmlTokenizer