package antlr; /* ANTLR Translator Generator * Project led by Terence Parr at http://www.cs.usfca.edu * Software rights: http://www.antlr.org/license.html * * $Id: NBSCodeGenerator.java,v 1.1 2007/04/03 14:35:25 gwielenga Exp $ */ import java.util.Enumeration; import antlr.collections.impl.BitSet; import antlr.collections.impl.Vector; import java.io.PrintWriter; //SAS: changed for proper text file io import java.io.IOException; import java.io.FileWriter; /**Generate P.nbs, a representation of P with or without actions */ public class NBSCodeGenerator extends CodeGenerator { /** non-zero if inside syntactic predicate generation */ protected int syntacticPredLevel = 0; /** true during lexer generation, false during parser generation */ protected boolean doingLexRules = false; protected boolean firstElementInAlt; protected AlternativeElement prevAltElem = null; // what was generated last? /** Create a Diagnostic code-generator using the given Grammar * The caller must still call setTool, setBehavior, and setAnalyzer * before generating code. */ public NBSCodeGenerator() { super(); charFormatter = new JavaCharFormatter(); } /** Encode a string for printing in a NBS document.. * e.g. encode '<' '>' and similar stuff * @param s the string to encode */ static String HTMLEncode(String s) { return s; /**** StringBuffer buf = new StringBuffer(); for (int i = 0, len = s.length(); i < len; i++) { char c = s.charAt(i); if (c == '&') buf.append("&"); else if (c == '\"') buf.append("""); else if (c == '\'') buf.append("'"); else if (c == '<') buf.append("<"); else if (c == '>') buf.append(">"); else buf.append(c); } return buf.toString(); ***/ } public void gen() { // Do the code generation try { // Loop over all grammars Enumeration grammarIter = behavior.grammars.elements(); while (grammarIter.hasMoreElements()) { Grammar g = (Grammar)grammarIter.nextElement(); // Connect all the components to each other /* g.setGrammarAnalyzer(analyzer); analyzer.setGrammar(g); */ g.setCodeGenerator(this); // To get right overloading behavior across hetrogeneous grammars g.generate(); if (antlrTool.hasError()) { antlrTool.fatalError("Exiting due to errors."); } } } catch (IOException e) { antlrTool.reportException(e, null); } } /** Generate code for the given grammar element. * @param blk The {...} action to generate */ public void gen(ActionElement action) { // no-op } /** Generate code for the given grammar element. * @param blk The "x|y|z|..." block to generate */ public void gen(AlternativeBlock blk) { genGenericBlock(blk, ""); } /** Generate code for the given grammar element. * @param blk The block-end element to generate. Block-end * elements are synthesized by the grammar parser to represent * the end of a block. */ public void gen(BlockEndElement end) { // no-op } /** Generate code for the given grammar element. * @param blk The character literal reference to generate */ public void gen(CharLiteralElement atom) { if (atom.not) { _print("~"); } _print(HTMLEncode(atom.atomText) + " "); } /** Generate code for the given grammar element. * @param blk The character-range reference to generate */ public void gen(CharRangeElement r) { print(r.beginText + "-" + r.endText + " "); } /** Generate the lexer NBS file */ public void gen(LexerGrammar g) throws IOException { setGrammar(g); antlrTool.reportProgress("Generating " + grammar.getClassName() + ".nbs"); currentOutput = antlrTool.openOutputFile(grammar.getClassName() + ".nbs"); //SAS: changed for proper text file io tabs = 0; doingLexRules = true; // Generate header common to all TXT output files genHeader(); // Output the user-defined lexer premamble // RK: guess not.. // println(grammar.preambleAction.getText()); // Generate lexer class definition println(""); // print javadoc comment if any /********** if (grammar.comment != null) { _println(HTMLEncode(grammar.comment)); } println("Definition of lexer " + grammar.getClassName() + ", which is a subclass of " + grammar.getSuperClass() + "."); ***********/ // Generate user-defined parser class members // printAction(grammar.classMemberAction.getText()); /* // Generate string literals println(""); println("*** String literals used in the parser"); println("The following string literals were used in the parser."); println("An actual code generator would arrange to place these literals"); println("into a table in the generated lexer, so that actions in the"); println("generated lexer could match token text against the literals."); println("String literals used in the lexer are not listed here, as they"); println("are incorporated into the mainstream lexer processing."); tabs++; // Enumerate all of the symbols and look for string literal symbols Enumeration ids = grammar.getSymbols(); while ( ids.hasMoreElements() ) { GrammarSymbol sym = (GrammarSymbol)ids.nextElement(); // Only processing string literals -- reject other symbol entries if ( sym instanceof StringLiteralSymbol ) { StringLiteralSymbol s = (StringLiteralSymbol)sym; println(s.getId() + " = " + s.getTokenType()); } } tabs--; println("*** End of string literals used by the parser"); */ // Generate nextToken() rule. // nextToken() is a synthetic lexer rule that is the implicit OR of all // user-defined lexer rules. /********** genNextToken(); ***********/ // Generate code for each rule in the lexer Enumeration ids = grammar.rules.elements(); while (ids.hasMoreElements()) { RuleSymbol rs = (RuleSymbol)ids.nextElement(); if (!rs.id.equals("mnextToken")) { genRule(rs); } } // Close the lexer output file currentOutput.close(); currentOutput = null; doingLexRules = false; } /** Generate code for the given grammar element. * @param blk The (...)+ block to generate */ public void gen(OneOrMoreBlock blk) { genGenericBlock(blk, "+"); } /** Generate the parser NBS file */ public void gen(ParserGrammar g) throws IOException { setGrammar(g); // Open the output stream for the parser and set the currentOutput antlrTool.reportProgress("Generating " + grammar.getClassName() + ".nbs"); currentOutput = antlrTool.openOutputFile(grammar.getClassName() + ".nbs"); tabs = 0; // Generate the header common to all output files. genHeader(); // Generate parser class definition println(""); // print javadoc comment if any /**********8 if (grammar.comment != null) { _println(HTMLEncode(grammar.comment)); } println("Definition of parser " + grammar.getClassName() + ", which is a subclass of " + grammar.getSuperClass() + "."); ***********/ tabs++; // Enumerate the parser rules Enumeration rules = grammar.rules.elements(); while (rules.hasMoreElements()) { println(""); // Get the rules from the list and downcast it to proper type GrammarSymbol sym = (GrammarSymbol)rules.nextElement(); // Only process parser rules if (sym instanceof RuleSymbol) { genRule((RuleSymbol)sym); } } tabs--; println(""); genTail(); // Close the parser output stream currentOutput.close(); currentOutput = null; } /** Generate code for the given grammar element. * @param blk The rule-reference to generate */ public void gen(RuleRefElement rr) { RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule); // Generate the actual rule description //_print(""); _print(rr.targetRule); //_print(""); // RK: Leave out args.. // if (rr.args != null) { // _print("["+rr.args+"]"); // } _print(" "); } /** Generate code for the given grammar element. * @param blk The string-literal reference to generate */ public void gen(StringLiteralElement atom) { if (atom.not) { _print("~"); } _print(HTMLEncode(atom.atomText)); _print(" "); } /** Generate code for the given grammar element. * @param blk The token-range reference to generate */ public void gen(TokenRangeElement r) { print(r.beginText + "-" + r.endText + " "); } /** Generate code for the given grammar element. * @param blk The token-reference to generate */ public void gen(TokenRefElement atom) { if (atom.not) { _print("~"); } _print(atom.atomText); _print(" "); } public void gen(TreeElement t) { print(t + " "); } /** Generate the tree-walker TXT file */ public void gen(TreeWalkerGrammar g) throws IOException { setGrammar(g); // Open the output stream for the parser and set the currentOutput antlrTool.reportProgress("Generating " + grammar.getClassName() + ".html"); currentOutput = antlrTool.openOutputFile(grammar.getClassName() + ".html"); //SAS: changed for proper text file io tabs = 0; // Generate the header common to all output files. genHeader(); // Output the user-defined parser premamble println(""); // println("*** Tree-walker Preamble Action."); // println("This action will appear before the declaration of your tree-walker class:"); // tabs++; // println(grammar.preambleAction.getText()); // tabs--; // println("*** End of tree-walker Preamble Action"); // Generate tree-walker class definition println(""); // print javadoc comment if any if (grammar.comment != null) { _println(HTMLEncode(grammar.comment)); } println("Definition of tree parser " + grammar.getClassName() + ", which is a subclass of " + grammar.getSuperClass() + "."); // Generate user-defined tree-walker class members // println(""); // println("*** User-defined tree-walker class members:"); // println("These are the member declarations that you defined for your class:"); // tabs++; // printAction(grammar.classMemberAction.getText()); // tabs--; // println("*** End of user-defined tree-walker class members"); // Generate code for each rule in the grammar println(""); // println("*** tree-walker rules:"); tabs++; // Enumerate the tree-walker rules Enumeration rules = grammar.rules.elements(); while (rules.hasMoreElements()) { println(""); // Get the rules from the list and downcast it to proper type GrammarSymbol sym = (GrammarSymbol)rules.nextElement(); // Only process tree-walker rules if (sym instanceof RuleSymbol) { genRule((RuleSymbol)sym); } } tabs--; println(""); // println("*** End of tree-walker rules"); // println(""); // println("*** End of tree-walker"); // Close the tree-walker output stream currentOutput.close(); currentOutput = null; } /** Generate a wildcard element */ public void gen(WildcardElement wc) { /* if ( wc.getLabel()!=null ) { _print(wc.getLabel()+"="); } */ _print(". "); } /** Generate code for the given grammar element. * @param blk The (...)* block to generate */ public void gen(ZeroOrMoreBlock blk) { genGenericBlock(blk, "*"); } protected void genAlt(Alternative alt) { if (alt.getTreeSpecifier() != null) { _print(alt.getTreeSpecifier().getText()); } prevAltElem = null; for (AlternativeElement elem = alt.head; !(elem instanceof BlockEndElement); elem = elem.next) { elem.generate(); firstElementInAlt = false; prevAltElem = elem; } } /** Generate the header for a block, which may be a RuleBlock or a * plain AlternativeBLock. This generates any variable declarations, * init-actions, and syntactic-predicate-testing variables. * @blk The block for which the preamble is to be generated. */ // protected void genBlockPreamble(AlternativeBlock blk) { // RK: don't dump out init actions // dump out init action // if ( blk.initAction!=null ) { // printAction("{" + blk.initAction + "}"); // } // } /**Generate common code for a block of alternatives; return a postscript * that needs to be generated at the end of the block. Other routines * may append else-clauses and such for error checking before the postfix * is generated. */ public void genCommonBlock(AlternativeBlock blk) { for (int i = 0; i < blk.alternatives.size(); i++) { Alternative alt = blk.getAlternativeAt(i); AlternativeElement elem = alt.head; // dump alt operator | if (i > 0 && blk.alternatives.size() > 1) { _println(""); print("|\t"); } // Dump the alternative, starting with predicates // boolean save = firstElementInAlt; firstElementInAlt = true; tabs++; // in case we do a newline in alt, increase the tab indent // RK: don't dump semantic/syntactic predicates // only obscures grammar. // // Dump semantic predicates // // if (alt.semPred != null) { // println("{" + alt.semPred + "}?"); // } // Dump syntactic predicate // if (alt.synPred != null) { // genSynPred(alt.synPred); // } genAlt(alt); tabs--; firstElementInAlt = save; } } /** Generate a textual representation of the follow set * for a block. * @param blk The rule block of interest */ public void genFollowSetForRuleBlock(RuleBlock blk) { Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(1, blk.endNode); printSet(grammar.maxk, 1, follow); } protected void genGenericBlock(AlternativeBlock blk, String blkOp) { if (blk.alternatives.size() > 1) { // make sure we start on a new line if (!firstElementInAlt) { // only do newline if the last element wasn't a multi-line block if (prevAltElem == null || !(prevAltElem instanceof AlternativeBlock) || ((AlternativeBlock)prevAltElem).alternatives.size() == 1) { _println(""); print("(\t"); } else { _print("(\t"); } // _println(""); // print("(\t"); } else { _print("(\t"); } } else { _print("( "); } // RK: don't dump init actions // genBlockPreamble(blk); genCommonBlock(blk); if (blk.alternatives.size() > 1) { _println(""); print(")" + blkOp + " "); // if not last element of alt, need newline & to indent if (!(blk.next instanceof BlockEndElement)) { _println(""); print(""); } } else { _print(")" + blkOp + " "); } } /** Generate a header that is common to all TXT files */ protected void genHeader() { println("############################################################"); println("# ANTLR - generated NBS file from " + antlrTool.grammarFile); println("#"); println("# ANTLR Version " + antlrTool.version + "; 1989-2005"); println("############################################################"); println(""); /********************* println(""); println(""); println(""); println("Grammar " + antlrTool.grammarFile + ""); println(""); println(""); println(""); println(""); println(""); println(""); println("
"); println("Grammar " + grammar.getClassName() + "
"); println("ANTLR-generated NBS file from " + antlrTool.grammarFile); println("

"); println("Terence Parr, MageLang Institute"); println("
ANTLR Version " + antlrTool.version + "; 1989-2005"); println("

"); println("
");
*******************/
        // RK: see no reason for printing include files and stuff...
//		tabs++;
//		printAction(behavior.getHeaderAction(""));
//		tabs--;
    }

    /**Generate the lookahead set for an alternate. */
    protected void genLookaheadSetForAlt(Alternative alt) {
        if (doingLexRules && alt.cache[1].containsEpsilon()) {
            println("MATCHES ALL");
            return;
        }
        int depth = alt.lookaheadDepth;
        if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
            // if the decision is nondeterministic, do the best we can: LL(k)
            // any predicates that are around will be generated later.
            depth = grammar.maxk;
        }
        for (int i = 1; i <= depth; i++) {
            Lookahead lookahead = alt.cache[i];
            printSet(depth, i, lookahead);
        }
    }

    /** Generate a textual representation of the lookahead set
     * for a block.
     * @param blk  The block of interest
     */
    public void genLookaheadSetForBlock(AlternativeBlock blk) {
        // Find the maximal lookahead depth over all alternatives
        int depth = 0;
        for (int i = 0; i < blk.alternatives.size(); i++) {
            Alternative alt = blk.getAlternativeAt(i);
            if (alt.lookaheadDepth == GrammarAnalyzer.NONDETERMINISTIC) {
                depth = grammar.maxk;
                break;
            }
            else if (depth < alt.lookaheadDepth) {
                depth = alt.lookaheadDepth;
            }
        }

        for (int i = 1; i <= depth; i++) {
            Lookahead lookahead = grammar.theLLkAnalyzer.look(i, blk);
            printSet(depth, i, lookahead);
        }
    }

    /** Generate the nextToken rule.
     * nextToken is a synthetic lexer rule that is the implicit OR of all
     * user-defined lexer rules.
     */
    public void genNextToken() {
        println("");
        println("/** Lexer nextToken rule:");
        println(" *  The lexer nextToken rule is synthesized from all of the user-defined");
        println(" *  lexer rules.  It logically consists of one big alternative block with");
        println(" *  each user-defined rule being an alternative.");
        println(" */");

        // Create the synthesized rule block for nextToken consisting
        // of an alternate block containing all the user-defined lexer rules.
        RuleBlock blk = MakeGrammar.createNextTokenRule(grammar, grammar.rules, "nextToken");

        // Define the nextToken rule symbol
        RuleSymbol nextTokenRs = new RuleSymbol("mnextToken");
        nextTokenRs.setDefined();
        nextTokenRs.setBlock(blk);
        nextTokenRs.access = "private";
        grammar.define(nextTokenRs);

        /*
		// Analyze the synthesized block
		if (!grammar.theLLkAnalyzer.deterministic(blk))
		{
			println("The grammar analyzer has determined that the synthesized");
			println("nextToken rule is non-deterministic (i.e., it has ambiguities)");
			println("This means that there is some overlap of the character");
			println("lookahead for two or more of your lexer rules.");
		}
		*/

        genCommonBlock(blk);
    }

    /** Generate code for a named rule block
     * @param s The RuleSymbol describing the rule to generate
     */
    public void genRule(RuleSymbol s) {
        if (s == null || !s.isDefined()) return;	// undefined rule
        println("");
/************
        if (s.comment != null) {
            _println(HTMLEncode(s.comment));
        }
        if (s.access.length() != 0) {
            if (!s.access.equals("public")) {
                _print(s.access + " ");
            }
        }
        _print("");
************/
if(doingLexRules) {
        _print("TOKEN:");
        _print(s.getId());
        _print(": ( ");
}else {
        _print(s.getId());
        _print(" = ");
}

        // Get rule return type and arguments
        RuleBlock rblk = s.getBlock();

        // RK: for NBS output not of much value...
        // Gen method return value(s)
//		if (rblk.returnAction != null) {
//			_print("["+rblk.returnAction+"]");
//		}
        // Gen arguments
//		if (rblk.argAction != null)
//		{
//				_print(" returns [" + rblk.argAction+"]");
//		}
        //_println("");
        tabs++;
        //print(":\t");

        // Dump any init-action
        // genBlockPreamble(rblk);

        // Dump the alternates of the rule
        genCommonBlock(rblk);

        _println(" )");
        tabs--;
    }

    /** Generate the syntactic predicate.  This basically generates
     * the alternative block, buts tracks if we are inside a synPred
     * @param blk  The syntactic predicate block
     */
    protected void genSynPred(SynPredBlock blk) {
        syntacticPredLevel++;
        genGenericBlock(blk, " =>");
        syntacticPredLevel--;
    }

    public void genTail() {
        println("");
/****
        println("
"); println(""); println("
"); ****/ } /** Generate the token types TXT file */ protected void genTokenTypes(TokenManager tm) throws IOException { // Open the token output TXT file and set the currentOutput stream antlrTool.reportProgress("Generating " + tm.getName() + TokenTypesFileSuffix + TokenTypesFileExt); currentOutput = antlrTool.openOutputFile(tm.getName() + TokenTypesFileSuffix + TokenTypesFileExt); //SAS: changed for proper text file io tabs = 0; // Generate the header common to all diagnostic files genHeader(); // Generate a string for each token. This creates a static // array of Strings indexed by token type. println(""); println("*** Tokens used by the parser"); println("This is a list of the token numeric values and the corresponding"); println("token identifiers. Some tokens are literals, and because of that"); println("they have no identifiers. Literals are double-quoted."); tabs++; // Enumerate all the valid token types Vector v = tm.getVocabulary(); for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) { String s = (String)v.elementAt(i); if (s != null) { println(s + " = " + i); } } // Close the interface tabs--; println("*** End of tokens used by the parser"); // Close the tokens output file currentOutput.close(); currentOutput = null; } /** Get a string for an expression to generate creation of an AST subtree. * @param v A Vector of String, where each element is an expression in the target language yielding an AST node. */ public String getASTCreateString(Vector v) { return null; } /** Get a string for an expression to generate creating of an AST node * @param str The arguments to the AST constructor */ public String getASTCreateString(GrammarAtom atom, String str) { return null; } /** Map an identifier to it's corresponding tree-node variable. * This is context-sensitive, depending on the rule and alternative * being generated * @param id The identifier name to map * @param forInput true if the input tree node variable is to be returned, otherwise the output variable is returned. */ public String mapTreeId(String id, ActionTransInfo tInfo) { return id; } /// unused. protected String processActionForSpecialSymbols(String actionStr, int line, RuleBlock currentRule, ActionTransInfo tInfo) { return actionStr; } /** Format a lookahead or follow set. * @param depth The depth of the entire lookahead/follow * @param k The lookahead level to print * @param lookahead The lookahead/follow set to print */ public void printSet(int depth, int k, Lookahead lookahead) { int numCols = 5; int[] elems = lookahead.fset.toArray(); if (depth != 1) { print("k==" + k + ": {"); } else { print("{ "); } if (elems.length > numCols) { _println(""); tabs++; print(""); } int column = 0; for (int i = 0; i < elems.length; i++) { column++; if (column > numCols) { _println(""); print(""); column = 0; } if (doingLexRules) { _print(charFormatter.literalChar(elems[i])); } else { _print((String)grammar.tokenManager.getVocabulary().elementAt(elems[i])); } if (i != elems.length - 1) { _print(", "); } } if (elems.length > numCols) { _println(""); tabs--; print(""); } _println(" }"); } }