public bool Read()
{
_nodeType = HtmlNodeType.None;
_name.Length = 0;
_value.Length = 0;
_isEmptyElement = false;
var attrName = new StringBuilder();
var attrValue = new StringBuilder();
var quoteStyle = '"';
var customDoctype = false;
StringBuilder entity = null;
while (_reader.Read())
{
char c = _reader.Current;
switch (_state)
{
case State.Text:
if (c == '&')
{
entity = new StringBuilder();
_state = State.Amp;
}
else if (c == '<')
{
_state = State.Lt;
if (_value.Length > 0)
{
_nodeType = HtmlNodeType.Text;
return(true);
}
}
else
{
_value.Append(c);
}
break;
case State.Amp:
if (c == ';')
{
_state = State.Text;
if (entity.Length > 0)
{
_value.Append(DecodeEntity("&" + entity + ";"));
}
else
{
_value.Append("&");
_value.Append(";");
}
}
else if (c == '#' && entity.Length == 0)
{
entity.Append(c);
}
else if (Char.IsLetterOrDigit(c))
{
entity.Append(c);
}
else
{
_state = State.Text;
_reader.Push(c);
if (entity.Length > 0)
{
_value.Append(DecodeEntity("&" + entity + ";"));
}
else
{
_value.Append("&");
}
entity = null;
}
break;
case State.Lt:
if (c == '/')
{
_state = State.ElemClose;
}
else if (c == '?' && _reader.Match("xml"))
{
_state = State.XmlDeclaration;
_reader.Read(3);
}
else if (c == '?')
{
_state = State.Pi;
}
else if (c == '!' && _reader.Match("--"))
{
_reader.Read(2);
_state = State.Comment;
}
else if (c == '!' && _reader.Match("[CDATA["))
{
_reader.Read(7);
_state = State.CData;
}
else if (c == '!' && _reader.Match("DOCTYPE"))
{
_reader.Read(7);
_state = State.DocType;
}
else if (!Char.IsLetter(c))
{
_state = State.Text;
_value.Append('<');
_value.Append(c);
}
else
{
_attributes = new StringDictionary();
_state = State.ElemName;
_name.Append(c);
}
break;
case State.ElemName:
if (Char.IsWhiteSpace(c))
{
_state = State.ElemAttributes;
}
else if (c == '/')
{
_isEmptyElement = true;
_state = State.ElemSingle;
}
else if (c == '>')
{
_state = State.Text;
_nodeType = HtmlNodeType.Element;
return(true);
}
else
{
_name.Append(c);
}
break;
case State.ElemClose:
if (c == '>')
{
_state = State.Text;
_nodeType = HtmlNodeType.EndElement;
return(true);
}
_name.Append(c);
break;
case State.ElemSingle:
if (c == '>')
{
_state = State.Text;
_nodeType = HtmlNodeType.Element;
return(true);
}
_state = State.Text;
_nodeType = HtmlNodeType.None;
_name.Length = 0;
_value.Length = 0;
_value.Append(c);
break;
case State.ElemAttributes:
if (c == '>')
{
_state = State.Text;
_nodeType = HtmlNodeType.Element;
return(true);
}
else if (c == '/')
{
_isEmptyElement = true;
_state = State.ElemSingle;
}
else if (Char.IsWhiteSpace(c))
{
}
else
{
_state = State.AttrKey;
attrName.Append(c);
}
break;
case State.Comment:
if (c == '-' && _reader.Match("->"))
{
_reader.Read(2);
_state = State.Text;
_nodeType = HtmlNodeType.Comment;
return(true);
}
_value.Append(c);
break;
case State.CData:
if (c == ']' && _reader.Match("]>"))
{
_reader.Read(2);
_state = State.Text;
_nodeType = HtmlNodeType.CDATA;
return(true);
}
_value.Append(c);
break;
case State.XmlDeclaration:
if (c == '?' && _reader.Match(">"))
{
_reader.Read(1);
_state = State.Text;
_nodeType = HtmlNodeType.XmlDeclaration;
return(true);
}
_value.Append(c);
break;
case State.DocType:
if (c == '[')
{
customDoctype = true;
}
else
{
if (customDoctype)
{
if (c == ']' && _reader.Match(">"))
{
_reader.Read(1);
_state = State.Text;
_nodeType = HtmlNodeType.DocumentType;
return(true);
}
_value.Append(c);
}
else
{
if (c == '>')
{
_state = State.Text;
_nodeType = HtmlNodeType.DocumentType;
return(true);
}
_name.Append(c);
}
}
break;
case State.Pi:
if (c == '?' && _reader.Match(">"))
{
_reader.Read(1);
_state = State.Text;
_nodeType = HtmlNodeType.ProcessingInstruction;
return(true);
}
if (Char.IsWhiteSpace(c))
{
_state = State.PiValue;
}
else
{
_name.Append(c);
}
break;
case State.PiValue:
if (c == '?' && _reader.Match(">"))
{
_reader.Read(1);
_state = State.Text;
_nodeType = HtmlNodeType.ProcessingInstruction;
return(true);
}
_value.Append(c);
break;
case State.AttrKey:
if (Char.IsWhiteSpace(c))
{
_state = State.AttrEq;
}
else if (c == '=')
{
_state = State.AttrValue;
}
else if (c == '>')
{
_attributes[attrName.ToString()] = null;
_state = State.ElemAttributes;
_reader.Push(c);
attrName.Length = 0;
attrValue.Length = 0;
}
else
{
attrName.Append(c);
}
break;
case State.AttrEq:
if (Char.IsWhiteSpace(c))
{
}
else if (c == '=')
{
_state = State.AttrValue;
}
else
{
_attributes[attrName.ToString()] = null;
_state = State.ElemAttributes;
_reader.Push(c);
attrName.Length = 0;
attrValue.Length = 0;
}
break;
case State.AttrValue:
if (Char.IsWhiteSpace(c))
{
}
else if (c == '"' || c == '\'')
{
quoteStyle = c;
_state = State.AttrQuote;
}
else
{
quoteStyle = ' ';
_state = State.AttrQuote;
attrValue.Append(c);
}
break;
case State.AttrQuote:
if (c == quoteStyle || (' ' == quoteStyle && c == '>'))
{
_attributes[attrName.ToString()] = HttpUtility.HtmlDecode(attrValue.ToString());
_state = State.ElemAttributes;
if (' ' == quoteStyle && c == '>')
{
_reader.Push(c);
}
attrName.Length = 0;
attrValue.Length = 0;
}
else
{
attrValue.Append(c);
}
break;
}
}
switch (_state)
{
case State.Text:
_state = 0;
if (_value.Length > 0)
{
_nodeType = HtmlNodeType.Text;
return(true);
}
return(false);
case State.Amp:
_state = 0;
_value.Append('&');
_nodeType = HtmlNodeType.Text;
return(true);
case State.Lt:
_state = 0;
_value.Append('<');
_nodeType = HtmlNodeType.Text;
return(true);
}
return(false);
}