我已经完成了c-minus的antlr格式语法文件

5 views

Skip to first unread message

Gao wei

unread,

Aug 30, 2005, 5:48:30 AM8/30/05

to SDL Eclipse Plugin

/*
*       Parser for C-Minus
* Copyright (C) 2005 Gao Wei <rock...@gmail.com>
*
*
*/

class CMinusParser extends Parser;
options{
k=4;
buildAST=true;
}

program : declaration_list ;
declaration_list :declaration(COMMA declaration)*;
declaration:var_declaration | fun_declaration;
var_declaration:type_specifier ID ";"
                |type_specifier ID "[" NUM "];";
type_specifier:INT|VOID;
fun_declaration:type_specifier ID "(" params ")" compound_stmt;
params:param_list|VOID;
param_list:param (COMMA param)*;
param:type_specifier ID|type_specifier ID "[]";
compound_stmt:"{" local_declarations statement_list "}";
local_declarations:(var_declaration )*;
statement_list:(statement)*;
statement:expression_stmt
          |compound_stmt
          |selection_stmt
          |interation_stmt
          |return_stmt;
expression_stmt:expression ";"|";";
selection_stmt: "if" "(" expression ")" statement ("else" statement) ;
interation_stmt: "while" "(" expression ")" statement;
return_stmt: RETURN ";"|RETURN expression ";";

var : ID|ID"[" expression "]";
expression : var "=" simple_expression;
simple_expression:additive_expression (relop additive_expression);
additive_expression:term (addop term)*;
addop:"+"|"-";
term:factor (mulop factor)*;
factor:"(" expression ")"|var|call|NUM;
mulop:"*"|"/";
relop:"<"|">"|"="|"!=";
call:ID "(" args ")";
args:(arg_list)*;
arg_list:expression ( COMMA expression )*;

class CMinusLexer extends Lexer;
options {
   exportVocab=Java;      // call the vocabulary "Java"
   testLiterals=false;    // don't automatically test for literals
   k=4;                   // four characters of lookahead
   charVocabulary='\u0003'..'\uFFFF';
   // without inlining some bitset tests, couldn't do unicode;
   // I need to make ANTLR generate smaller bitsets; see
   // bottom of JavaLexer.java
   codeGenBitsetTestThreshold=20;
}
{
   void deferredNewline(){
   return;
   }
}

LPARENT : '(';
RPARENT : ')';
COMMA    : ',';
ID       : ('a'..'z'|'A'..'Z')+;
NUM      : ('0'..'9')+ ;
WS       : ( ' '
          | '\r' '\n'
          | '\n'
          | '\t'
          )
          {$setType(Token.SKIP);}
          ;
// multiple-line comments
ML_COMMENT
   :   "/*"
       (   /*   '\r' '\n' can be matched in one alternative or by matching
               '\r' in one iteration and '\n' in another. I am trying to
               handle any flavor of newline that comes in, but the language
               that allows both "\r\n" and "\r" and "\n" to all be valid
               newline is ambiguous. Consequently, the resulting grammar
               must be ambiguous. I'm shutting this warning off.
           */
           options {
               generateAmbigWarnings=false;
           }
       :
           { LA(2)!='/' }? '*'
       |   '\r' '\n'       {newline();}
       |   '\r'           {newline();}
       |   '\n'           {newline();}
       |   ~('*'|'\n'|'\r')
       )*
       "*/"
       {$setType(Token.SKIP);}
   ;

CPPComment
        :
                "//" ( ~('\n') )*
                        {$setType(Token.SKIP);}
        ;

Reply all

Reply to author

Forward

0 new messages