public IList<ScriptToken> Tokenize( String str, String source )
{
const char varOpener = '$', quote = '"', slash = '/', backslash = '\\', openbrace = '{', closebrace = '}', colon = ':', star = '*';
char c = (char)0;
char lastChar = (char)0;
StringBuilder lexeme = new StringBuilder();
uint line = 1, lastQuote = 0;
ScriptState state = ScriptState.Ready;
List<ScriptToken> tokens = new List<ScriptToken>();
for ( int index = 0; index < str.Length; index++ )
{
lastChar = c;
c = str[ index ];
if ( c == quote )
lastQuote = line;
switch ( state )
{
#region Ready
case ScriptState.Ready:
if ( c == slash && lastChar == slash )
{
// Comment start, clear out the lexeme
lexeme = new StringBuilder();
state = ScriptState.Comment;
}
else if ( c == star && lastChar == slash )
{
// Comment start, clear out the lexeme
lexeme = new StringBuilder();
state = ScriptState.MultiComment;
}
else if ( c == quote )
{
// Clear out the lexeme ready to be filled with quotes!
lexeme = new StringBuilder( c.ToString() );
state = ScriptState.Quote;
}
else if ( c == varOpener )
{
// Set up to read in a variable
lexeme = new StringBuilder( c.ToString() );
state = ScriptState.Var;
}
else if ( IsNewline( c ) )
{
lexeme = new StringBuilder( c.ToString() );
SetToken( lexeme, line, source, tokens );
}
else if ( !IsWhitespace( c ) )
{
lexeme = new StringBuilder( c.ToString() );
if ( c == slash )
state = ScriptState.PossibleComment;
else
state = ScriptState.Word;
}
break;
#endregion Ready
#region Comment
case ScriptState.Comment:
// This newline happens to be ignored automatically
if ( IsNewline( c ) )
state = ScriptState.Ready;
break;
#endregion Comment
#region MultiComment
case ScriptState.MultiComment:
if ( c == slash && lastChar == star )
state = ScriptState.Ready;
break;
#endregion MultiComment
#region PossibleComment
case ScriptState.PossibleComment:
if ( c == slash && lastChar == slash )
{
lexeme = new StringBuilder();
state = ScriptState.Comment;
break;
}
else if ( c == star && lastChar == slash )
{
lexeme = new StringBuilder();
state = ScriptState.MultiComment;
break;
}
else
state = ScriptState.Word;
break;
#endregion PossibleComment
#region Word
case ScriptState.Word:
if ( IsNewline( c ) )
{
SetToken( lexeme, line, source, tokens );
lexeme = new StringBuilder( c.ToString() );
SetToken( lexeme, line, source, tokens );
state = ScriptState.Ready;
}
else if ( IsWhitespace( c ) )
{
SetToken( lexeme, line, source, tokens );
state = ScriptState.Ready;
}
else if ( c == openbrace || c == closebrace || c == colon )
{
SetToken( lexeme, line, source, tokens );
lexeme = new StringBuilder( c.ToString() );
SetToken( lexeme, line, source, tokens );
state = ScriptState.Ready;
}
else
{
lexeme.Append( c );
}
break;
#endregion Word
#region Quote
case ScriptState.Quote:
if ( c != backslash )
{
// Allow embedded quotes with escaping
if ( c == quote && lastChar == backslash )
{
lexeme.Append( c );
}
else if ( c == quote )
{
lexeme.Append( c );
SetToken( lexeme, line, source, tokens );
state = ScriptState.Ready;
}
else
{
// Backtrack here and allow a backslash normally within the quote
if ( lastChar == backslash )
{
lexeme.Append( "\\" );
lexeme.Append( c );
}
else
lexeme.Append( c );
}
}
break;
#endregion Quote
#region Var
case ScriptState.Var:
if ( IsNewline( c ) )
{
SetToken( lexeme, line, source, tokens );
lexeme = new StringBuilder( c.ToString() );
SetToken( lexeme, line, source, tokens );
state = ScriptState.Ready;
}
else if ( IsWhitespace( c ) )
{
SetToken( lexeme, line, source, tokens );
state = ScriptState.Ready;
}
else if ( c == openbrace || c == closebrace || c == colon )
{
SetToken( lexeme, line, source, tokens );
lexeme = new StringBuilder( c.ToString() );
SetToken( lexeme, line, source, tokens );
state = ScriptState.Ready;
}
else
{
lexeme.Append( c );
}
break;
#endregion Var
}
// Separate check for newlines just to track line numbers
if ( IsNewline( c ) )
line++;
}
// Check for valid exit states
if ( state == ScriptState.Word || state == ScriptState.Var )
{
if ( lexeme.Length != 0 )
SetToken( lexeme, line, source, tokens );
}
else
{
if ( state == ScriptState.Quote )
{
throw new Exception( String.Format( "no matching \" found for \" at line {0}", lastQuote ) );
}
}
return tokens;
}