public virtual IList<IToken> Tokenize(string pattern)
{
// split pattern into chunks: sea (raw input) and islands (<ID>, <expr>)
IList<Chunk> chunks = Split(pattern);
// create token stream from text and tags
IList<IToken> tokens = new List<IToken>();
foreach (Chunk chunk in chunks)
{
if (chunk is TagChunk)
{
TagChunk tagChunk = (TagChunk)chunk;
// add special rule token or conjure up new token from name
if (System.Char.IsUpper(tagChunk.Tag[0]))
{
int ttype = parser.GetTokenType(tagChunk.Tag);
if (ttype == TokenConstants.InvalidType)
{
throw new ArgumentException("Unknown token " + tagChunk.Tag + " in pattern: " + pattern);
}
TokenTagToken t = new TokenTagToken(tagChunk.Tag, ttype, tagChunk.Label);
tokens.Add(t);
}
else
{
if (System.Char.IsLower(tagChunk.Tag[0]))
{
int ruleIndex = parser.GetRuleIndex(tagChunk.Tag);
if (ruleIndex == -1)
{
throw new ArgumentException("Unknown rule " + tagChunk.Tag + " in pattern: " + pattern);
}
int ruleImaginaryTokenType = parser.GetATNWithBypassAlts().ruleToTokenType[ruleIndex];
tokens.Add(new RuleTagToken(tagChunk.Tag, ruleImaginaryTokenType, tagChunk.Label));
}
else
{
throw new ArgumentException("invalid tag: " + tagChunk.Tag + " in pattern: " + pattern);
}
}
}
else
{
TextChunk textChunk = (TextChunk)chunk;
AntlrInputStream @in = new AntlrInputStream(textChunk.Text);
lexer.SetInputStream(@in);
IToken t = lexer.NextToken();
while (t.Type != TokenConstants.EOF)
{
tokens.Add(t);
t = lexer.NextToken();
}
}
}
// System.out.println("tokens="+tokens);
return tokens;
}