/** Build lexer grammar from combined grammar that looks like:
*
* (COMBINED_GRAMMAR A
* (tokens { X (= Y 'y'))
* (OPTIONS (= x 'y'))
* (@ members {foo})
* (@ lexer header {package jj;})
* (RULES (RULE .+)))
*
* Move rules and actions to new tree, don't dup. Split AST apart.
* We'll have this Grammar share token symbols later; don't generate
* tokenVocab or tokens{} section. Copy over named actions.
*
* Side-effects: it removes children from GRAMMAR & RULES nodes
* in combined AST. Anything cut out is dup'd before
* adding to lexer to avoid "who's ur daddy" issues
*/
public virtual GrammarRootAST ExtractImplicitLexer(Grammar combinedGrammar)
{
GrammarRootAST combinedAST = combinedGrammar.ast;
//tool.log("grammar", "before="+combinedAST.toStringTree());
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.Token.InputStream);
GrammarAST[] elements = combinedAST.GetChildrenAsArray();
// MAKE A GRAMMAR ROOT and ID
string lexerName = combinedAST.GetChild(0).Text + "Lexer";
GrammarRootAST lexerAST =
new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream);
lexerAST.grammarType = ANTLRParser.LEXER;
lexerAST.Token.InputStream = combinedAST.Token.InputStream;
lexerAST.AddChild((ITree)adaptor.Create(ANTLRParser.ID, lexerName));
// COPY OPTIONS
GrammarAST optionsRoot =
(GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.OPTIONS);
if (optionsRoot != null && optionsRoot.ChildCount != 0)
{
GrammarAST lexerOptionsRoot = (GrammarAST)adaptor.DupNode(optionsRoot);
lexerAST.AddChild(lexerOptionsRoot);
GrammarAST[] options = optionsRoot.GetChildrenAsArray();
foreach (GrammarAST o in options)
{
string optionName = o.GetChild(0).Text;
if (Grammar.lexerOptions.Contains(optionName) &&
!Grammar.doNotCopyOptionsToLexer.Contains(optionName))
{
GrammarAST optionTree = (GrammarAST)adaptor.DupTree(o);
lexerOptionsRoot.AddChild(optionTree);
lexerAST.SetOption(optionName, (GrammarAST)optionTree.GetChild(1));
}
}
}
// COPY all named actions, but only move those with lexer:: scope
IList<GrammarAST> actionsWeMoved = new List<GrammarAST>();
foreach (GrammarAST e in elements)
{
if (e.Type == ANTLRParser.AT)
{
lexerAST.AddChild((ITree)adaptor.DupTree(e));
if (e.GetChild(0).Text.Equals("lexer"))
{
actionsWeMoved.Add(e);
}
}
}
foreach (GrammarAST r in actionsWeMoved)
{
combinedAST.DeleteChild(r);
}
GrammarAST combinedRulesRoot =
(GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.RULES);
if (combinedRulesRoot == null)
return lexerAST;
// MOVE lexer rules
GrammarAST lexerRulesRoot = (GrammarAST)adaptor.Create(ANTLRParser.RULES, "RULES");
lexerAST.AddChild(lexerRulesRoot);
IList<GrammarAST> rulesWeMoved = new List<GrammarAST>();
GrammarASTWithOptions[] rules;
if (combinedRulesRoot.ChildCount > 0)
{
rules = combinedRulesRoot.Children.Cast<GrammarASTWithOptions>().ToArray();
}
else
{
rules = new GrammarASTWithOptions[0];
}
foreach (GrammarASTWithOptions r in rules)
{
string ruleName = r.GetChild(0).Text;
if (Grammar.IsTokenName(ruleName))
{
lexerRulesRoot.AddChild((ITree)adaptor.DupTree(r));
rulesWeMoved.Add(r);
}
}
foreach (GrammarAST r in rulesWeMoved)
{
combinedRulesRoot.DeleteChild(r);
}
// Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if'
IList<System.Tuple<GrammarAST, GrammarAST>> litAliases =
Grammar.GetStringLiteralAliasesFromLexerRules(lexerAST);
ISet<string> stringLiterals = combinedGrammar.GetStringLiterals();
// add strings from combined grammar (and imported grammars) into lexer
// put them first as they are keywords; must resolve ambigs to these rules
// tool.log("grammar", "strings from parser: "+stringLiterals);
int insertIndex = 0;
foreach (string lit in stringLiterals)
{
// if lexer already has a rule for literal, continue
if (litAliases != null)
{
foreach (System.Tuple<GrammarAST, GrammarAST> pair in litAliases)
{
GrammarAST litAST = pair.Item2;
if (lit.Equals(litAST.Text))
goto continueNextLit;
}
}
// create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>))
string rname = combinedGrammar.GetStringLiteralLexerRuleName(lit);
// can't use wizard; need special node types
GrammarAST litRule = new RuleAST(ANTLRParser.RULE);
BlockAST blk = new BlockAST(ANTLRParser.BLOCK);
AltAST alt = new AltAST(ANTLRParser.ALT);
TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit));
alt.AddChild(slit);
blk.AddChild(alt);
CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname);
litRule.AddChild(new TerminalAST(idToken));
litRule.AddChild(blk);
lexerRulesRoot.InsertChild(insertIndex, litRule);
// lexerRulesRoot.getChildren().add(0, litRule);
lexerRulesRoot.FreshenParentAndChildIndexes(); // reset indexes and set litRule parent
// next literal will be added after the one just added
insertIndex++;
continueNextLit:
;
}
// TODO: take out after stable if slow
lexerAST.SanityCheckParentAndChildIndexes();
combinedAST.SanityCheckParentAndChildIndexes();
// tool.log("grammar", combinedAST.toTokenString());
combinedGrammar.tool.Log("grammar", "after extract implicit lexer =" + combinedAST.ToStringTree());
combinedGrammar.tool.Log("grammar", "lexer =" + lexerAST.ToStringTree());
if (lexerRulesRoot.ChildCount == 0)
return null;
return lexerAST;
}