// Scan the input string, creating tokens for anything special
public void Tokenize(string str, int start, int len)
{
// Prepare
base.Reset(str, start, len);
m_Tokens.Clear();
List <Token> emphasis_marks = null;
List <Abbreviation> Abbreviations = m_Markdown.GetAbbreviations();
bool ExtraMode = m_Markdown.ExtraMode;
// Scan string
int start_text_token = position;
while (!eof)
{
int end_text_token = position;
// Work out token
Token token = null;
switch (current)
{
case '*':
case '_':
// Create emphasis mark
token = CreateEmphasisMark();
if (token != null)
{
// Store marks in a separate list the we'll resolve later
switch (token.type)
{
case TokenType.internal_mark:
case TokenType.opening_mark:
case TokenType.closing_mark:
if (emphasis_marks == null)
{
emphasis_marks = new List <Token>();
}
emphasis_marks.Add(token);
break;
}
}
break;
case '`':
token = ProcessCodeSpan();
break;
case '[':
case '!':
{
// Process link reference
int linkpos = position;
token = ProcessLinkOrImageOrFootnote();
// Rewind if invalid syntax
// (the '[' or '!' will be treated as a regular character and processed below)
if (token == null)
{
position = linkpos;
}
break;
}
case '<':
{
// Is it a valid html tag?
int save = position;
HtmlTag tag = HtmlTag.Parse(this);
if (tag != null)
{
if (!m_Markdown.SafeMode || tag.IsSafe())
{
// Yes, create a token for it
token = CreateToken(TokenType.HtmlTag, save, position - save);
}
else
{
// No, rewrite and encode it
position = save;
}
}
else
{
// No, rewind and check if it's a valid autolink eg: <google.com>
position = save;
token = ProcessAutoLink();
if (token == null)
{
position = save;
}
}
break;
}
case '&':
{
// Is it a valid html entity
int save = position;
string unused = null;
if (SkipHtmlEntity(ref unused))
{
// Yes, create a token for it
token = CreateToken(TokenType.Html, save, position - save);
}
break;
}
case ' ':
{
// Check for double space at end of a line
if (CharAtOffset(1) == ' ' && IsLineEnd(CharAtOffset(2)))
{
// Yes, skip it
SkipForward(2);
// Don't put br's at the end of a paragraph
if (!eof)
{
SkipEol();
token = CreateToken(TokenType.br, end_text_token, 0);
}
}
break;
}
case '\\':
{
// Special handling for escaping <autolinks>
/*
* if (CharAtOffset(1) == '<')
* {
* // Is it an autolink?
* int savepos = position;
* SkipForward(1);
* bool AutoLink = ProcessAutoLink() != null;
* position = savepos;
*
* if (AutoLink)
* {
* token = CreateToken(TokenType.Text, position + 1, 1);
* SkipForward(2);
* }
* }
* else
*/
{
// Check followed by an escapable character
if (Utils.IsEscapableChar(CharAtOffset(1), ExtraMode))
{
token = CreateToken(TokenType.Text, position + 1, 1);
SkipForward(2);
}
}
break;
}
}
// Look for abbreviations.
if (token == null && Abbreviations != null && !Char.IsLetterOrDigit(CharAtOffset(-1)))
{
var savepos = position;
foreach (var abbr in Abbreviations)
{
if (SkipString(abbr.Abbr) && !Char.IsLetterOrDigit(current))
{
token = CreateToken(TokenType.abbreviation, abbr);
break;
}
position = savepos;
}
}
// If token found, append any preceeding text and the new token to the token list
if (token != null)
{
// Create a token for everything up to the special character
if (end_text_token > start_text_token)
{
m_Tokens.Add(CreateToken(TokenType.Text, start_text_token, end_text_token - start_text_token));
}
// Add the new token
m_Tokens.Add(token);
// Remember where the next text token starts
start_text_token = position;
}
else
{
// Skip a single character and keep looking
SkipForward(1);
}
}
// Append a token for any trailing text after the last token.
if (position > start_text_token)
{
m_Tokens.Add(CreateToken(TokenType.Text, start_text_token, position - start_text_token));
}
// Do we need to resolve and emphasis marks?
if (emphasis_marks != null)
{
ResolveEmphasisMarks(m_Tokens, emphasis_marks);
}
// Done!
return;
}