Account Options

  1. Sign in
The old Google Groups will be going away soon, but your browser is incompatible with the new version.
Google Groups Home
« Groups Home
Question about parser/parsing technics
There are currently too many topics in this group that display first. To make this topic appear first, remove this option from another topic.
There was an error processing your request. Please try again.
flag
  2 messages - Collapse all  -  Translate all to Translated (View all originals)
The group you are posting to is a Usenet group. Messages posted to this group will make your email address visible to anyone on the Internet.
Your reply message has not been sent.
Your post was successful
 
From:
To:
Cc:
Followup To:
Add Cc | Add Followup-to | Edit Subject
Subject:
Validation:
For verification purposes please type the characters you see in the picture below or the numbers you hear by clicking the accessibility icon. Listen and type the numbers you hear
 
spy...@gmail.com  
View profile  
 More options Mar 6 2012, 8:21 am
Newsgroups: comp.compilers
From: spy...@gmail.com
Date: Tue, 6 Mar 2012 05:21:30 -0800 (PST)
Local: Tues, Mar 6 2012 8:21 am
Subject: Question about parser/parsing technics
Hi everyone,

I was wondering if it exists a parser (fault tolerant) which can create an (maybe) incomplete concrete syntax tree according to an incomplete grammar ?
Create only parts of the tree that it recognizes, and the rest are inside a token 'unknown' (or something else).
informal & light example:
grammar:
- root -> Add
- Add -> Int + Int
- Int -> [0..9]+
for expression: "1 + 41" it will create a complete tree
for expression: "1 + 3.14" it create a tree where the right operand of Add is (or tagged as) unrecognized.

thanks for pointers you will give me.


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
Jens Kallup  
View profile  
 More options Mar 6 2012, 1:52 pm
Newsgroups: comp.compilers
From: Jens Kallup <jkal...@web.de>
Date: Tue, 06 Mar 2012 19:52:29 +0100
Local: Tues, Mar 6 2012 1:52 pm
Subject: Re: Question about parser/parsing technics
Hello,

you can try this grammar for a simple language:

//here the lexer.l
//compile: flex lexer.l

%{
#include "syntaxtree.h"
#include "string.h"
#include <stdlib.h>
#include <QMessageBox>
#include "y.tab.cc"

extern int lineno;
char num_text[2048];

#ifdef FLEX_SCANNER
#define INPUT_EOF EOF
#else
#define INPUT_EOF 0
#endif

extern "C" int yywrap() { return 1; }

void start_code_parser(FILE *fp)
{
     /*
     YY_BUFFER_STATE old_flexer = YY_CURRENT_BUFFER;
     YY_BUFFER_STATE new_flexer = yy_create_buffer(fp,YY_BUF_SIZE);
     yy_switch_to_buffer(new_flexer);
     while (yyparse() != INPUT_EOF)

     yy_delete_buffer(new_flexer);
     if (old_flexer != NULL)
     yy_switch_to_buffer(old_flexer);
     */
     yyparse();

}

extern void RestartApplication(void);
%}

%x COMMENT
%x STRING_BEG
%x STRING_END

%%

"\*\*".*\n            {  BEGIN(INITIAL); lineno++; }
"/*"                    { BEGIN(COMMENT); }
<COMMENT>(.*|\n*)"*/"   { BEGIN(INITIAL); }
"//".*\n    { lineno++; }
"if"        { BEGIN(INITIAL); return _IF_;}
"else"      { BEGIN(INITIAL); return _ELSE_;}
"endif"     { BEGIN(INITIAL); return _END_IF_;}
"repeat"    { BEGIN(INITIAL); return _REPEAT_;}
"until"     { BEGIN(INITIAL); return _UNTIL_;}
"scan"      { BEGIN(INITIAL); return _SCAN_;}
(\?)        { BEGIN(INITIAL); return _PRINT_;}
"local"     { BEGIN(INITIAL); return _LOCAL_;}
"parameter" { BEGIN(INITIAL); return _PARAMETER_;}
"endfor"    { BEGIN(INITIAL); return _ENDFOR_;}
"new"       { BEGIN(INITIAL); return _NEW_;}
"for"       { BEGIN(INITIAL); return _FOR_;}
"while"     { BEGIN(INITIAL); return _WHILE_;}
"return"    { BEGIN(INITIAL); return _RETURN_;}
"=="        { BEGIN(INITIAL); return _EQUAL_;}
">="           { BEGIN(INITIAL); return _GREQL_;}
"<="           { BEGIN(INITIAL); return _LWEQL_;}
"=>"           { BEGIN(INITIAL); return _GREQL_;}
"=<"        { BEGIN(INITIAL); return _LWEQL_;}
"++"          { BEGIN(INITIAL); return _PLUS_PLUS_; }
"--"          { BEGIN(INITIAL); return _MINUS_MINUS_; }
"+="          { BEGIN(INITIAL); return _PLUS_ASSIGN_; }
"-="          { BEGIN(INITIAL); return _MINUS_ASSIGN_; }
"*="          { BEGIN(INITIAL); return _TIMES_ASSIGN_; }
"/="          { BEGIN(INITIAL); return _DIV_ASSIGN_; }
"class"               { BEGIN(INITIAL); return _CLASS_; }
"of"          { BEGIN(INITIAL); return _OF_; }
"endclass"    { BEGIN(INITIAL); return _ENDCLASS_; }
"set"         { BEGIN(INITIAL); return _SET_; }
"with"                { BEGIN(INITIAL); return _WITH_; }
"endwith"     { BEGIN(INITIAL); return _ENDWITH_; }
"to"          { BEGIN(INITIAL); return _TO_; }
"precision"   { BEGIN(INITIAL); return _PRECISION_; }
"round"       { BEGIN(INITIAL); return _ROUND_; }
"space"       { BEGIN(INITIAL); return _SPACE_; }
"replicate"   { BEGIN(INITIAL); return _REPLICATE_; }
"trim"                { BEGIN(INITIAL); return _TRIM_; }
"rtrim"       { BEGIN(INITIAL); return _RTRIM_; }
"ltrim"       { BEGIN(INITIAL); return _LTRIM_; }
"abs"         { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ABS_; }
"sin"         { BEGIN(INITIAL); yylval.name = strdup(yytext); return _SIN_; }
"cos"         { BEGIN(INITIAL); yylval.name = strdup(yytext); return _COS_; }
"tan"         { BEGIN(INITIAL); yylval.name = strdup(yytext); return _TAN_; }
"asin"                { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ASIN_; }
"acos"                { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ACOS_; }
"atan"                { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ATAN_; }
"sqrt"                { BEGIN(INITIAL); yylval.name = strdup(yytext); return _SQRT_; }
"="   { BEGIN(INITIAL); return _ASSIGN_; }
"+"   { BEGIN(INITIAL); return _PLUS_; }
"-"   { BEGIN(INITIAL); return _MINUS_; }
"*"   { BEGIN(INITIAL); return _TIMES_; }
"/"   { BEGIN(INITIAL); return _DIV_;}
"("   { BEGIN(INITIAL); return _OBR_;}
")"   { BEGIN(INITIAL); return _CBR_;}
","   { BEGIN(INITIAL); return _COMMA_; }
(([-+]?)([0-9]+['.'0-9]*))      {
                BEGIN(INITIAL);
                yylval.val = (double) atof(yytext);
                return _NUM_;
                                }
[a-zA-Z_]+[0-9a-zA-Z_]* {
             yylval.name = strdup(yytext);
             return _ID_;
             }
(\")                               { BEGIN(INITIAL); BEGIN(STRING_BEG); }
<STRING_BEG>([0-9a-zA-Z_ \t\.\!\=\?\(\)\[\]\:\.\,\+\-\*\/\^]*)    {
yylval.name = strdup(yytext); BEGIN(STRING_END); }
<STRING_END>(\")             { BEGIN(INITIAL); return _STRING_; }
[\n]*       { lineno++; }
[ \t]*      ;
[\.]    { BEGIN(INITIAL); return _POINT_; }
.       {
                 ::yyrestart(yyin);
                QMessageBox::about(NULL,"pFehler",QString("Fehlerzeichen: %1 in Zeile:
%2").arg(yytext[0]).arg(lineno));
                lineno = 1;

                if_label  = 0;
                expr_type = 2;

                text_counter = 0;
                char_counter = 0;
                nvar_counter = 0;

                RestartApplication();
        }
%%

// here the bison lang-y file
// compile: bison.exe -d lang.y
%{
#include <stdio.h>
#include <stdlib.h>
,,,
%}

%union {
        double  val;
        char*   name;
        char*   str;
        int     id;
        int     label;

};

%token _IF_ _THEN_ _ELSE_ _END_IF_ _REPEAT_ _UNTIL_ _SCAN_ _PRINT_ _ID_
_NUM_ _EQUAL_ _FOR_ _WHILE_ _RETURN_ _DBLNUM_
%token _LWEQL_ _GREQL_ _ASSIGN_ _PLUS_ _MINUS_ _TIMES_ _DIV_ _STRBE_
_COMMA_ _TRIM_ _LTRIM_ _RTRIM_
%token _NEW_ _LOCAL_ _PARAMETER_ _ENDFOR_
%token _MINUS_ASSIGN_ _TIMES_ASSIGN_ _DIV_ASSIGN_ _PLUS_ASSIGN_ _ROUND_
_SET_ _TO_ _PRECISION_
%token _CLASS_ _OF_ _ENDCLASS_ _WITH_ _ENDWITH_ _POINT_
%token <name> _ASIN_ _ACOS_ _ATAN_ _SIN_ _COS_ _SQRT_ _TAN_ _STRING_
_OBR_ _CBR_ _SPACE_
%token <name> _PLUS_PLUS_ _MINUS_MINUS_
%token <name> _ABS_ _REPLICATE_
%type <val> _NUM_
%type <name> _ID_
%type <id> stmt_seq stmt  if_stmt repeat_stmt for_stmt while_stmt
assign_stmt read_stmt write_stmt
%type <id> exp simple_exp term factor exec_stmt handle_string ari_stmt
set_stmt class_stmt with_stmt with_this
%type <id> object_vars class_cmd_stmt misc_stmt local_stmt

program :
     | stmt_seq {  }
     ;

stmt_seq
     : { }
     | stmt_seq stmt { }
     | stmt { }
     ;

stmt: if_stmt     {  }
     | repeat_stmt {  }
     | assign_stmt {  }
     | read_stmt   {  }
     | write_stmt  {  }
     | for_stmt    {  }
     | while_stmt  {  }
     | ari_stmt    {  }
     | set_stmt    {  }
     | class_stmt  {  }
     | misc_stmt   {  }
     ;

assign_stmt
     : _ID_ _ASSIGN_ { assign_flag = true; } exp
     {
        code_str += QString("\tfstp qword [_LC%1]\n").arg($1);

         if (!symbol_labels.contains(QString("_LC%1").arg($1)))
         {
             data_str += QString("_LC%1: dq 0.00\n").arg($1);
             symbol_labels << QString("_LC%1").arg($1);
         }

        assign_flag = false;
     }
;

exp:
       simple_exp '<'     simple_exp { expr_type = 0; }
     | simple_exp '>'     simple_exp { expr_type = 1; }
     | simple_exp _EQUAL_ simple_exp { expr_type = 2; }
     | simple_exp _GREQL_ simple_exp { expr_type = 3; }
     | simple_exp _LWEQL_ simple_exp { expr_type = 4; }
     | simple_exp                   { }
     ;

simple_exp :
     term _PLUS_ simple_exp
     {
        code_str += QString("\tfaddp st1, st0\n");
     }
     | term _MINUS_ simple_exp
     {
        code_str += QString("\tfsubp st1, st0\n");
     }
     | term { }
     ;

term:
     factor _TIMES_ term
     {
        code_str += QString("\tfmulp st1, st0\n");
     }
     | factor _DIV_ term
     {
        code_str += QString("\tfdivp st1, st0\n");
     }
     | factor {}
     ;

factor
     : _OBR_ exp _CBR_  {
         code_str += QString("\tfld qword [_LC%1]\n").arg(FLastID);
     }
     | _NUM_
     {
        temp_str  = QString("%1").arg((double)$1); if (temp_str.contains('.')
== false)
        temp_str += QString(".00");

        nvar_counter++;

        code_str.append(QString("\tfld qword [_LC%1]\n").arg(nvar_counter-1));
        data_str += QString("_LC%1:\tdq %2\n").arg(nvar_counter-1).arg(temp_str);

        last_result = NUMBER;
     }
     | _ROUND_ _OBR_ { assign_flag = true; } exp {
        code_str += QString(
                "\tfstp qword [_LCtmp]\n"
                "\tpush dword [_LC%1+4]\n"
                "\tpush dword [_LC%1]\n"
                ).arg(nvar_counter-1);
     } _COMMA_ exp _CBR_ {
        code_str += QString(
                "\tfstp qword [_LCtmp]\n"
                "\tpush dword [_LC%1+4]\n"
                "\tpush dword [_LC%1]\n"
                "\tcall _round_val\n"
                ).arg(nvar_counter-1);
     }
     | _ID_
     {
        last_result = 0;

         //if (do_function_call)
        code_str += QString("\tfld qword [_LC%1]\n").arg($1);

         FLastID = $1;
        stack_counter += 4;

         if (!symbol_labels.contains(QString("_LC%1").arg($1)))
         {
             data_str += QString("_LC%1: dq 0.00\n").arg($1);
             symbol_labels << QString("_LC%1").arg($1);
         }
     }
     | _ID_ _PLUS_PLUS_ {
        code_str += QString(
                "\tfld qword [_LC%1]\n"
                "\tfld qword [_LCtmp_1]\n"
                "\tfaddp qword [_LC%1]\n"
                ).arg($1);
     }
     | _ID_ _MINUS_MINUS_ {
        code_str += QString(
                "\tfld qword [_LC%1]\n"
                "\tfld qword [_LCtmp_1]\n"
                "\tfsub qword [_LC%1]\n"
                "\tfstp qword [_LC%1]\n"
                ).arg($1);
     }
     | _SIN_  _OBR_ exp _CBR_ { internal_pusher("sin" ); }
     | _ASIN_ _OBR_ exp _CBR_ { internal_pusher("asin"); }
     | _ACOS_ _OBR_ exp _CBR_ { internal_pusher("acos"); }
     | _ATAN_ _OBR_ exp _CBR_ { internal_pusher("atan"); }
     | _COS_  _OBR_ exp _CBR_ { internal_pusher("cos" ); }
     | _TAN_  _OBR_ exp _CBR_ { internal_pusher("tan" ); }
     | _SQRT_ _OBR_ exp _CBR_ { internal_pusher("sqrt"); }
     | _ABS_  _OBR_ exp _CBR_ { code_str += QString("\tfchs\n"); }
     ;

endif_stmt
     : _END_IF_ { }
     ;
%%

void yyerror(char * message)
{
     QMessageBox::about(NULL,"error",QString("%1: line
%2").arg(message).arg(lineno));
     ::yyrestart(yyin);

}

void internal_pusher(QString id)
{
   if (id == "sin"
   ||  id == "cos"
   ||  id == "sqrt" )
   code_str += QString(
        "\tf%1\n"
        "\tfstp qword [_LC%1]\n"
        "\tfld qword [_LC%1]\n"
        ).arg(id);

   if (id == "asin"
   ||  id == "acos"
   ||  id == "atan"
   ||  id == "tan" )
   code_str += QString(
        "\tfstp qword [_LCtmp]\n"
        "\tpush dword [_LCtmp+4]\n"
        "\tpush dword [_LCtmp]\n"
        "\tcall _%1\n"
        "\tadd esp, 12\n").arg(id);

}

cheers ...

read more »


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
End of messages
« Back to Discussions « Newer topic     Older topic »