/*
* Parser for C-Minus
* Copyright (C) 2005 Gao Wei <
rock...@gmail.com>
*
*
*/
class CMinusParser extends Parser;
options{
k=4;
buildAST=true;
}
program : declaration_list ;
declaration_list :declaration(COMMA declaration)*;
declaration:var_declaration | fun_declaration;
var_declaration:type_specifier ID ";"
|type_specifier ID "[" NUM "];";
type_specifier:INT|VOID;
fun_declaration:type_specifier ID "(" params ")" compound_stmt;
params:param_list|VOID;
param_list:param (COMMA param)*;
param:type_specifier ID|type_specifier ID "[]";
compound_stmt:"{" local_declarations statement_list "}";
local_declarations:(var_declaration )*;
statement_list:(statement)*;
statement:expression_stmt
|compound_stmt
|selection_stmt
|interation_stmt
|return_stmt;
expression_stmt:expression ";"|";";
selection_stmt: "if" "(" expression ")" statement ("else" statement) ;
interation_stmt: "while" "(" expression ")" statement;
return_stmt: RETURN ";"|RETURN expression ";";
var : ID|ID"[" expression "]";
expression : var "=" simple_expression;
simple_expression:additive_expression (relop additive_expression);
additive_expression:term (addop term)*;
addop:"+"|"-";
term:factor (mulop factor)*;
factor:"(" expression ")"|var|call|NUM;
mulop:"*"|"/";
relop:"<"|">"|"="|"!=";
call:ID "(" args ")";
args:(arg_list)*;
arg_list:expression ( COMMA expression )*;
class CMinusLexer extends Lexer;
options {
exportVocab=Java; // call the vocabulary "Java"
testLiterals=false; // don't automatically test for literals
k=4;
// four characters of lookahead
charVocabulary='\u0003'..'\uFFFF';
// without inlining some bitset tests, couldn't do unicode;
// I need to make ANTLR generate smaller bitsets; see
// bottom of JavaLexer.java
codeGenBitsetTestThreshold=20;
}
{
void deferredNewline(){
return;
}
}
LPARENT : '(';
RPARENT : ')';
COMMA : ',';
ID : ('a'..'z'|'A'..'Z')+;
NUM : ('0'..'9')+ ;
WS : ( ' '
| '\r' '\n'
| '\n'
| '\t'
)
{$setType(Token.SKIP);}
;
// multiple-line comments
ML_COMMENT
: "/*"
(
/* '\r' '\n' can be matched in one alternative
or by matching
'\r' in one iteration and '\n' in another. I am trying to
handle any flavor of newline that comes in, but the language
that allows both "\r\n" and "\r" and "\n" to all be valid
newline is ambiguous. Consequently, the resulting grammar
must be ambiguous. I'm shutting this warning off.
*/
options {
generateAmbigWarnings=false;
}
:
{ LA(2)!='/' }? '*'
| '\r' '\n' {newline();}
|
'\r'
{newline();}
|
'\n'
{newline();}
| ~('*'|'\n'|'\r')
)*
"*/"
{$setType(Token.SKIP);}
;
CPPComment
:
"//" ( ~('\n') )*
{$setType(Token.SKIP);}
;