## $Id: grammar_rules.pg 17096 2007-02-20 20:20:48Z paultcochrane $ ## TITLE ## Pynie::Grammar -- a grammar for parsing Python ## ## DESCRIPTION ## ## These are a set of rules for parsing programs written in Python. ## Many of the rules are derived from the "Python Language Reference ## Manual", version 2.5 by Guido van Rossum (Fred L. Drake, Jr., Editor). ## Available online at http://docs.python.org/ref/ and ## http://docs.python.org/ref/grammar.txt . grammar Pynie::Grammar ; ## C defines the top level rule to the HLLCompiler. token TOP { } token newline { [ \n ]+ } token ws { \h* [ <'#'> \N* ]? } token file_input { ^ [ | ]* [ $ | ] } token suite { | | [ ]* [ | ] } token statement { | | } token stmt_list { [ <';'> ]* <';'>? } token compound_stmt { | | | | | | | } rule if_stmt { <'if'> <':'> [ <'elif'> <':'> ]* [ <'else'> <':'> ]? } rule while_stmt { <'while'> <':'> [ <'else'> <':'> ]? } rule for_stmt { <'for'> <'in'> <':'> [ <'else'> <':'> ]? } rule try_stmt { | } rule try1_stmt { <'try'> <':'> [ <'except'> [ [ <','> ]? ]? <':'> ]+ [ <'else'> <':'> ]? [ <'finally'> <':'> ]? } rule try2_stmt { <'try'> <':'> <'finally'> <':'> } rule with_stmt { <'with'> [ <'as'> ]? <':'> } rule funcdef { ? <'def'> <'('> ? <')'> <':'> } rule decorators { + } rule decorator { <'@'> [ <'('> [ <','>? ]? <')'> ]? } rule argument_list { | [ <','> ]? [ <','> <'*'> ]? [ <','> <'**'> ]? | [ <','> <'*'> ]? [ <','> <'**'> ]? | <','> <'*'> [ <','> <'**'> ]? | <','> <'**'> } rule positional_arguments { [ <','> ]* } rule keyword_arguments { [ <','> ]* } rule keyword_item { <'='> } rule dotted_name { [ <'.'> ]* } rule funcname { } rule parameter_list { [ <','> ]* [ <'*'> [ <','> <'**'> ]? | <'**'> | <','>? ] } rule defparameter { [ <'='> ]? } rule sublist { [ <','> ]* <','>? } rule parameter { | <'('> <')'> } rule classdef { <'class'> ? <':'> } rule classname { } rule inheritance { <'('> ? <')'> } rule or_test { [ <'or'> ]* } rule and_test { [ <'and'> ]* } rule not_test { | <'not'> } rule comparison { [ ]* } rule comp_operator { <'<'> | <'>'> | <'=='> | <'>='> | <'<='> | <'<>'> | <'!='> | <'is'> <'not'>? | <'not'>? <'in'> } rule lambda_form { <'lambda'> ? <':'> } token simple_stmt { | | | | | | | | | | | | | | | } rule expression_stmt { } rule assert_stmt { <'assert'> [ <','> ]? } rule assignment_stmt { [ <'='> ]+ } rule augmented_assignment_stmt { } rule target_list { [ <','> ]* <','>? } rule target { | | <'('> <')'> | <'['> <']'> | | | } rule call { | <'('> [ ? <','>? ]? <')'> | <'('> [ <','>? | ] <')'> } rule attributeref { <'.'> } rule subscription { <'['> <']'> } rule slicing { | } rule simple_slicing { <'['> <']'> } rule extended_slicing { <'['> <']'> } rule slice_list { [ <','> ]* <','>? } rule slice_item { | | <'...'> } rule proper_slice { | } rule short_slice { ? <':'> ? } rule long_slice { <':'> ? } token identifier { [ | <'_'> ] \w* } token name { <[a..z]> [ | <'_'> ]* } rule print_stmt { <'print'> [ [ <','> ]* (<','>?) ]? } rule pass_stmt { <'pass'> } rule del_stmt { <'del'> } rule return_stmt { <'return'> ? } rule yield_stmt { <'yield'> } rule break_stmt { <'break'> } rule continue_stmt { <'continue'> } rule raise_stmt { <'raise'> [ [ <','> [ <','> ]? ]? ]? } rule global_stmt { <'global'> [ <','> ]* } rule exec_stmt { <'exec'> [ <'in'> [ <','> ]? ]? } rule import_stmt { | <'import'> ? [ <','> ? ]* | ? [ <','> ? ]* | <'('> ? [ <','> ? ]* <','>? <')'> | <'*'> } rule import_module { <'from'> <'import'> } rule import_alias { <'as'> } rule module { [ <'.'> ]* } token literal { | | | | | } token integer { | <[1..9]> \d* | <'0'> | <'0'> <[0..7]>+ | <'0'> <[xX]> [ \d | <[a..f]> | <[A..F]> ]+ } token intpart { \d+ } token longinteger { <[lL]> } token imagnumber { [ | ] <[jJ]> } token floatnumber { | \d* <'.'> \d+ | \d+ <'.'> | [ <'.'>? \d+ ] <[eE]> <[+\-]> \d+ } token stringliteral { ? [ | ] } token shortstring { | " ( [ \\ . | <-["\n]> ]* ) " | ' ( [ \\ . | <-['\n]> ]* ) ' } regex longstring { | """ ( [ \\ . | . ]* ) """ | ''' ( [ \\ . | . ]* ) ''' } token stringprefix { <'r'> | <'u'> | <'ur'> | <'R'> | <'U'> | <'UR'> | <'Ur'> | <'uR'> } rule expression_list { [ , ]* <','>? } rule list_display { <'['> ? <']'> } rule listmaker { [ | [ <','> ]* <','>? ] } rule list_iter { | } rule list_for { <'for'> <'in'> ? } rule test { | } rule testlist { [ <','> ]* <','>? } rule list_if { <'if'> ? } token augop { | <'+='> | <'-='> | <'*='> | <'/='> | <'\%='> | <'**='> | <'>>='> | <'<<='> | <'&='> | <'^='> | <'|='> } rule primary { * } rule postop { | | | | } rule atom { | | } rule parenth_form { <'('> ? <')'> } rule enclosure { | | | | | } rule generator_expression { <'('> <')'> } rule genexpr_for { <'for'> <'in'> ? } rule genexpr_iter { | } rule genexpr_if { <'if'> ? } rule dict_display { <'{'> ? <'}'> } rule key_datum_list { [ <','> ]* <','>? } rule key_datum { <':'> } rule string_conversion { <'`'> <'`'> } rule expression { | [ <'if'> <'else'> ]? | } rule power { [ <'**'> ]? } rule u_expr { | | <'-'> | <'+'> | <'\~'> } rule m_expr { [ [<'*'> | <'//'> | <'/'> | <'\%'> ] ]* } rule a_expr { [ [ <'+'> | <'-'> ] ]* } rule shift_expr { [ [ <'<<'> | <'>>'> ] ]* } rule and_expr { [ <'&'> ]* } rule xor_expr { [ <'^'> ]* } rule or_expr { [ <'|'> ]* } ## This identifies operators for the bottom-up parser ## See operator precedence table at http://docs.python.org/ref/summary.html #token 'expression' is optable { ... } # #proto 'term:' is precedence('=') # is parsed(&term) # { ... } # ## should this be primary?? #token term { # | # | # | #} # #proto 'infix:**' is looser('term:') {...} # #proto 'prefix:~' is looser('infix:**') { ... } #proto 'prefix:+' is looser('prefix:~') { ... } #proto 'prefix:-' is equiv('prefix:+') { ... } # #proto 'infix:*' is looser('prefix:+') # is pirop('mul') # { ... } # #proto 'infix:/' is equiv('infix:*') # is pirop('div') # { ... } # #proto 'infix:%' is equiv('infix:*') # is pirop('mod') # { ... } # #proto 'infix:+' is looser('infix:*') # is pirop('add') # { ... } # #proto 'infix:-' is equiv('infix:+') # is pirop('sub') # { ... } # # # #proto 'infix:<<' is looser('infix:+') { ... } #proto 'infix:>>' is equiv('infix:<<') { ... } #proto 'infix:&' is looser('infix:<<') { ... } #proto 'infix:^' is looser('infix:&') { ... } #proto 'infix:|' is looser('infix:^') { ... } # #proto 'infix:==' is looser('infix:|') { ... } #proto 'infix:!=' is equiv('infix:==') { ... } #proto 'infix:<=' is equiv('infix:==') { ... } #proto 'infix:>=' is equiv('infix:==') { ... } #proto 'infix:<' is equiv('infix:==') { ... } #proto 'infix:>' is equiv('infix:==') { ... } # ## "is not" infix operator is handled by combining 'prefix:not' and 'infix:is' #proto 'infix:is' is looser('infix:==') { ... } #proto 'infix:in' is looser('infix:is') { ... } # #proto 'prefix:not' is looser('infix:in') { ... } # ## to handle "not in" as infix op, combine 'prefix:in' and 'infix:not' ## this also allows for "x = 1 not 2", which is of course Wrong. ## Fix this later (new category of operator??) ## #proto 'infix:not' is looser('infix:in') { ... } #proto 'prefix:in' is looser('infix:is') { ... } # #proto 'infix:and' is looser('prefix:not') { ... } #proto 'infix:or' is looser('infix:and') { ... } # ## Python reserved words and keywords token reserved { | <'None'> } token keyword { [ <'and'> | <'assert'> | <'break'> | <'class'> | <'continue'> | <'def'> | <'del'> | <'elif'> | <'else'> | <'except'> | <'exec'> | <'finally'> | <'for'> | <'from'> | <'global'> | <'if'> | <'import'> | <'in'> | <'is'> | <'lambda'> | <'not'> | <'or'> | <'pass'> | <'print'> | <'raise'> | <'return'> | <'try'> | <'while'> | <'with'> | <'yield'> ] \b } ## vim: expandtab sw=4