private List<Token> TokenizeHTML(string text)
{
int pos = 0;
int tagStart = 0;
var tokens = new List<Token>();
// this regex is derived from the _tokenize() subroutine in Brad Choate's MTRegex plugin.
// http://www.bradchoate.com/past/mtregex.php
foreach (Match m in _htmlTokens.Matches(text))
{
tagStart = m.Index;
if (pos < tagStart)
tokens.Add(new Token(TokenType.Text, text.Substring(pos, tagStart - pos)));
tokens.Add(new Token(TokenType.Tag, m.Value));
pos = tagStart + m.Length;
}
if (pos < text.Length)
tokens.Add(new Token(TokenType.Text, text.Substring(pos, text.Length - pos)));
return tokens;
}