Here's my simple project source code:
bison.y
flex.l
flex_bison.cpp
flex.l:
%option noyywrap
%{
#include <string>
#include <cstring>
#include "bison.tab.hpp"
#define FT_SAVE_TOKEN yylval.literal = strndup(yytext, yyleng)
#define FT_TOKEN(t) (yylval.token = t)
%}
%%
"True" return FT_TRUE;
"False" return FT_FALSE;
"let" return FT_LET;
"Nil" return FT_NIL;
"if" return FT_IF;
"elseif" return FT_ELSEIF;
"else" return FT_ELSE;
"switch" return FT_SWITCH;
"case" return FT_CASE;
"otherwise" return FT_OTHERWISE;
"for" return FT_FOR;
"while" return FT_WHILE;
"break" return FT_BREAK;
"continue" return FT_CONTINUE;
"func" return FT_FUNC;
"class" return FT_CLASS;
"type" return FT_TYPE;
"isinstance" return FT_ISINSTANCE;
"import" return FT_IMPORT;
"return" return FT_RETURN;
"void" return FT_VOID;
"and" return FT_LOGICALAND;
"or" return FT_LOGICALOR;
"not" return FT_LOGICALNOT;
"int" return FT_INTEGER_KEYWORD;
"uint" return FT_UNSIGNED_INTEGER_KEYWORD;
"double" return FT_DOUBLE_KEYWORD;
[ \t\v\n\f\r] ;
[a-zA-Z_][a-zA-Z0-9_]* FT_SAVE_TOKEN; return FT_IDENTIFIER;
[0-9]+"."[0-9]+([Ee][+-]?[0-9]+)? FT_SAVE_TOKEN; return FT_DOUBLE;
[0-9]+([Ee][+-]?[0-9]+)? FT_SAVE_TOKEN; return FT_INTEGER;
\"(\\.|[^\\"])*\" FT_SAVE_TOKEN; return FT_STRING;
"+" return FT_TOKEN(FT_ADD);
"-" return FT_TOKEN(FT_SUB);
"*" return FT_TOKEN(FT_MUL);
"/" return FT_TOKEN(FT_DIV);
"%" return FT_TOKEN(FT_MOD);
"!" return FT_TOKEN(FT_BITNOT);
"&" return FT_TOKEN(FT_BITAND);
"|" return FT_TOKEN(FT_BITOR);
"~" return FT_TOKEN(FT_BITCOMPLEMENT);
"^" return FT_TOKEN(FT_BITXOR);
"=" return FT_TOKEN(FT_ASSIGN);
"+=" return FT_TOKEN(FT_ADDASSIGN);
"-=" return FT_TOKEN(FT_SUBASSIGN);
"*=" return FT_TOKEN(FT_MULASSIGN);
"/=" return FT_TOKEN(FT_DIVASSIGN);
"%=" return FT_TOKEN(FT_MODASSIGN);
"==" return FT_TOKEN(FT_EQ);
"!=" return FT_TOKEN(FT_NEQ);
"<" return FT_TOKEN(FT_LT);
"<=" return FT_TOKEN(FT_LE);
">" return FT_TOKEN(FT_GT);
">=" return FT_TOKEN(FT_GE);
"(" return FT_TOKEN(FT_LPAREN);
")" return FT_TOKEN(FT_RPAREN);
"[" return FT_TOKEN(FT_LBRACKET);
"]" return FT_TOKEN(FT_RBRACKET);
"{" return FT_TOKEN(FT_LBRACE);
"}" return FT_TOKEN(FT_RBRACE);
"," return FT_TOKEN(FT_COMMA);
";" return FT_TOKEN(FT_SEMI);
"?" return FT_TOKEN(FT_QUESTION);
":" return FT_TOKEN(FT_COLON);
"." return FT_TOKEN(FT_DOT);
. printf("Unknown token!n"); yyterminate();
%%
bison.y:
%{
#include <string>
#include <cstring>
#include <cstdio>
extern FILE *yyin;
extern int yylex();
void yyerror(const char *s) { printf("yyerror: %s\n", s); }
%}
/* Represents the many different ways we can access our data */
%union {
char *literal;
int token;
}
/* union.token: eof, keyword */
%token <token> FT_EOF
%token <token> FT_TRUE FT_FALSE FT_LET FT_NIL FT_IF FT_ELSEIF FT_ELSE FT_FOR FT_WHILE FT_BREAK FT_CONTINUE FT_SWITCH FT_CASE FT_OTHERWISE
%token <token> FT_FUNC FT_CLASS FT_TYPE FT_ISINSTANCE FT_IMPORT FT_RETURN FT_VOID FT_LOGICALAND FT_LOGICALOR FT_LOGICALNOT
%token <token> FT_INTEGER_KEYWORD FT_UNSIGNED_INTEGER_KEYWORD FT_DOUBLE_KEYWORD
/* union.literal, identifier, integer, double number, string */
%token <literal> FT_IDENTIFIER FT_INTEGER FT_DOUBLE FT_STRING
/* union.token: operator, comparator, punctuation */
%token <token> FT_ADD FT_SUB FT_MUL FT_DIV FT_MOD FT_BITNOT FT_BITAND FT_BITOR FT_BITCOMPLEMENT FT_BITXOR
%token <token> FT_ASSIGN FT_ADDASSIGN FT_SUBASSIGN FT_MULASSIGN FT_DIVASSIGN FT_MODASSIGN FT_EQ FT_NEQ FT_LT FT_LE FT_GT FT_GE
%token <token> FT_LPAREN FT_RPAREN FT_LBRACKET FT_RBRACKET FT_LBRACE FT_RBRACE FT_COMMA FT_SEMI FT_QUESTION FT_COLON FT_DOT
/*
%type <ident> ident
%type <expr> numeric expr
%type <varvec> func_decl_args
%type <exprvec> call_args
%type <block> program stmts block
%type <stmt> stmt var_decl func_decl
%type <token> comparison
*/
/* operator/comparator precedence */
%left FT_DOT FT_LPAREN FT_RPAREN FT_LBRACKET FT_RBRACKET
%left FT_MUL FT_DIV FT_MOD
%left FT_ADD FT_SUB
%left FT_LT FT_LE FT_GT FT_GE FT_EQ FT_NEQ
%left FT_BITNOT FT_BITAND FT_BITOR FT_BITXOR FT_BITCOMPLEMENT
%left FT_LOGICALNOT FT_LOGICALAND FT_LOGICALOR
%left FT_ASSIGN FT_ADDASSIGN FT_SUBASSIGN FT_MULASSIGN FT_DIVASSIGN FT_MODASSIGN
/*
%start program
*/
%%
primary_expression : FT_IDENTIFIER
| FT_INTEGER
| FT_DOUBLE
| FT_STRING
| '(' expression ')'
;
postfix_expression : primary_expression
/*| postfix_expression '[' expression ']'*/
| postfix_expression '(' ')'
| postfix_expression '(' argument_expression_list ')'
/*| postfix_expression '.' IDENTIFIER*/
;
argument_expression_list : assignment_expression
| argument_expression_list ',' assignment_expression
;
unary_expression : postfix_expression
| unary_operator postfix_expression
;
unary_operator : FT_BITAND
| FT_BITOR
| FT_BITNOT
| FT_BITCOMPLEMENT
| FT_BITXOR
| FT_ADD
| FT_SUB
;
/*
cast_expression : unary_expression
| '(' type_name ')' cast_expression
;
*/
multiplicative_expression : unary_expression
| multiplicative_expression FT_MUL unary_expression
| multiplicative_expression FT_DIV unary_expression
| multiplicative_expression FT_MOD unary_expression
;
additive_expression : multiplicative_expression
| additive_expression FT_ADD multiplicative_expression
| additive_expression FT_SUB multiplicative_expression
;
/*
shift_expression : additive_expression
| shift_expression '<<' additive_expression
| shift_expression '>>' additive_expression
;
*/
relational_expression : additive_expression
| relational_expression FT_LT additive_expression
| relational_expression FT_LE additive_expression
| relational_expression FT_GT additive_expression
| relational_expression FT_GE additive_expression
;
equality_expression : relational_expression
| equality_expression FT_EQ relational_expression
| equality_expression FT_NEQ relational_expression
;
/*
bit_and_expression : equality_expression
| bitand_expression '&' equality_expression
;
bit_xor_expression : bit_and_expression
| bit_xor_expression '^' bit_and_expression
;
bit_or_expression : bit_xor_expression
| bit_or_expression '|' bit_xor_expression
;
*/
logical_not_expression : equality_expression
| logical_not_expression FT_LOGICALNOT equality_expression
;
logical_and_expression : logical_not_expression
| logical_and_expression FT_LOGICALAND logical_not_expression
;
logical_or_expression : logical_and_expression
| logical_or_expression FT_LOGICALOR logical_and_expression
;
assignment_expression : logical_or_expression
| unary_expression assignment_operator assignment_expression
;
assignment_operator : FT_ASSIGN
| FT_MULASSIGN
| FT_DIVASSIGN
| FT_MODASSIGN
| FT_ADDASSIGN
| FT_SUBASSIGN
;
constant_expression : logical_or_expression
;
expression : assignment_expression
;
unit : external_declaration
| unit external_declaration
;
external_declaration : function_declaration
| declaration
;
declaration : FT_LET declaration_init_list FT_SEMI
;
declaration_init_list : declaration_init
| declaration_init_list declaration_init
;
declaration_init : FT_IDENTIFIER FT_ASSIGN constant_expression
;
function_declaration : FT_FUNC FT_IDENTIFIER FT_LPAREN function_arg_list FT_RPAREN compound_statement
| FT_FUNC FT_IDENTIFIER FT_LPAREN FT_RPAREN compound_statement
;
function_arg_list : function_arg
| function_arg_list FT_COMMA function_arg
;
function_arg : FT_IDENTIFIER
;
compound_statement : FT_LBRACE FT_RBRACE
| FT_LBRACE statement_list FT_RBRACE
| FT_LBRACE declaration_list FT_RBRACE
;
statement_list : statement
| statement_list statement
;
declaration_list : declaration
| declaration_list declaration
;
statement : compound_statement
| expression_statement
| selection_statement
| iteration_statement
| jump_statement
;
expression_statement : FT_SEMI
| expression FT_SEMI
;
selection_statement : FT_IF FT_LPAREN expression FT_RPAREN statement
| FT_IF FT_LPAREN expression FT_RPAREN statement FT_ELSE statement
| FT_SWITCH FT_LPAREN expression FT_RPAREN statement
;
iteration_statement : FT_WHILE FT_LPAREN expression FT_RPAREN statement
| FT_FOR FT_LPAREN expression_statement expression_statement FT_RPAREN statement
| FT_FOR FT_LPAREN expression_statement expression_statement expression FT_RPAREN statement
;
jump_statement : FT_CONTINUE FT_SEMI
| FT_BREAK FT_SEMI
| FT_RETURN FT_SEMI
| FT_RETURN expression FT_SEMI
;
%%
flex_bison.cpp:
#include "bison.tab.hpp"
#include <cstdio>
int main(int argc, char **argv) {
if (argc <= 1) {
printf("error! filename missing!\n");
return 0;
}
FILE *fp = fopen(argv[1], "r");
yyin = fp;
int t;
while ((t = yylex()) != 0) {
printf("token: %d", t);
if (t == FT_IDENTIFIER || t == FT_INTEGER || t == FT_DOUBLE ||
t == FT_STRING) {
printf("literal: %s\n", yylval.literal);
} else {
printf("\n");
}
}
fclose(fp);
return 0;
}
generate code through commands:
$ flex -o flex.yy.cpp flex.l
$ bison -d -o bison.tab.hpp bison.y
$ g++ -o test.exe bison.tab.cpp flex.yy.cpp flex_bison.cpp
Here's the error message:
use of undeclared identifier 'yyin'
use of undeclared identifier 'yylex`
Do I have to define yyin and yylex before main function with code below?
extern FILE *yyin;
extern int yylex(void);
Even if the quoted code would be added to the output file, you still would get complaints about undefined yylex and yyin. Your code only declares these things, it does not define them. Neither yacc nor bison define these for you, you have to provide those functions yourself (you can use for example lex or flex to generate them). Take a look at bison documentation. The RPN example has a short example for a yylex function.
Edit after question was edited: as per the documentation of bison, the header generated from the -d option
Pretend that ‘%defines’ was specified, i.e., write an extra output
file containing macro definitions for the token type names defined
in the grammar, as well as a few other declarations.
This does not include the things you specified in the prologue, in particular not the declaration of yyin and yylex. If you need these declarations in multiple files then you may want to declare them in a separate header file and include that header file from bison.y and all other files that require that declaration (like flex_bison.cpp).
Related
I have the following code below and I am receiving the following error
parser.y:111.47-48: error: $$ for the midrule at $5 of ‘statement’ has no declared type
111 | REDUCE operator reductions ENDREDUCE {$$ = $3;} ';'|
I know it's generated because I didn't declare a type for something in the statement, I need help understanding line 111. Also the REAL_LITERAL is a float, that I should add a float to the union and create token like this %token <f_value>REAL_LITERAL.
include <iostream>
#include <string>
#include <vector>
#include <map>
using namespace std;
#include "math.h"
#include "values.h"
#include "listing.h"
#include "symbols.h"
int yylex();
void yyerror(const char* message);
Symbols<int> symbols;
int result;
double *params;
%}
%define parse.error verbose
%union
{
CharPtr iden;
Operators oper;
int value;
}
%token <iden> IDENTIFIER
%token <value>INT_LITERAL REAL_LITERAL BOOL_LITERAL CASE TRUE FALSE
%token ARROW
%token <oper> ADDOP MULOP RELOP OROP NOTOP REMOP EXPOP
%token ANDOP
%token BEGIN_ BOOLEAN END ENDREDUCE FUNCTION INTEGER IS REDUCE RETURNS
%token THEN WHEN
%token ELSE ENDCASE ENDIF IF OTHERS REAL
%type <value> body statement_ statement reductions expression binary relation term
factor primary
%type <oper> operator
%left OROP
%left ANDOP
%left RELOP
%left ADDOP
%left MULOP REMOP
%right EXPOP
%left NOTOP
%%
function:
function_header optional_variable body {result = $3;} ;
function_header:
FUNCTION IDENTIFIER parameters RETURNS type ';' |
FUNCTION IDENTIFIER RETURNS type ';' |
FUNCTION IDENTIFIER optional_parameters RETURNS type ';' |
error ';' ;
optional_variable:
optional_variable variable |
error ';' ;
;
variable:
IDENTIFIER ':' type IS statement_ {symbols.insert($1, $5);} ;
variables:
variable variables |
;
type:
INTEGER |
BOOLEAN ;
optional_parameters:
parameters |
;
parameters:
parameter ',' parameters |
parameter ;
parameter:
IDENTIFIER ':' type ;
type:
INTEGER |
REAL |
BOOLEAN ;
body:
BEGIN_ statement_ END ';' {$$ = $2;} ;
statement_:
statement ';' |
error ';' {$$ = 0;} ;
statement:
expression |
REDUCE operator reductions ENDREDUCE {$$ = $3;} ';'|
IF expression THEN statement_ ELSE statement_ ENDIF
{
if ($2 == true) {
$$ = $4;
}
else {
$$ = $6;
}
}';' /*|
CASE expression IS cases OTHERS ARROW statement_ ENDCASE
{$$ = $<value>4 == $1 ? $4 : $7;} ;
cases:
cases case
{$$ = $<value>1 == $1 ? $1 : $2;} |
%empty {$$ = NAN;};
case:
case WHEN INT_LITERAL ARROW statement_ |
;
*/
operator:
ADDOP |
RELOP |
EXPOP |
MULOP ;
reductions:
reductions statement_ {$$ = evaluateReduction($<oper>0, $1, $2);} |
{$$ = $<oper>0 == ADD ? 0 : 1;} ;
expression:
expression OROP binary {$$ = $1 || $3;} |
binary;
binary:
binary ANDOP relation {$$ = $1 && $3;} |
relation ;
relation:
relation RELOP term {$$ = evaluateRelational($1, $2, $3);} |
term ;
term:
term ADDOP factor {$$ = evaluateArithmetic($1, $2, $3);} |
factor ;
factor:
factor MULOP primary {$$ = evaluateArithmetic($1, $2, $3);} |
primary ;
primary:
'(' expression ')' {$$ = $2;} |
INT_LITERAL |
IDENTIFIER {if (!symbols.find($1, $$)) appendError(UNDECLARED, $1);} ;
%%
void yyerror(const char* message)
{
appendError(SYNTAX, message);
}
int main(int argc, char *argv[])
{
firstLine();
yyparse();
if (lastLine() == 0)
cout << "Result = " << result << endl;
return 0;
}
The basic problem is that in-rule actions do not get a default type in bison (unlike yacc). So in your action
REDUCE operator reductions ENDREDUCE {$$ = $3;} ';'
there's no %type for $$ so you need to specify it explicitly -- perhaps something like { $<value>$ = $3; }. That's equivalent to what yacc would do here, as it gives in-rule actions the same type as the lhs, even though there's not really anything connecting them.
The bigger issue is that this really makes no sense -- an in-rule action like this does NOT set the value for the symbol being reduced. That can only happen in the end-of-rule action. So this is just copying a value to a temp and then throwing it away, never doing anything with it. The implicit end-rule action just does { $$ = $1; } which makes no sense as statement and REDUCE have different types.
I am learning Flex/Bison and we are currently on the part about semantics, previously have dealt with lexical and syntax errors. I have googled extensively and haven't been able to find a solution to my error. I am having trouble trying to understand why I need to declare '$4' when I thought it to be automatically done.
When I try to makefile I get this error:
flex scanner.l
mv lex.yy.c scanner.c
bison -d -v parser.y
paser.y:114.71-72: error: $4 of 'case' has no declared type
114 | case WHEN INT_LITERAL ARROW statement_ {case_statements.push_back($4);};
Here is the pseudo code I am trying to follow:
statement:
CASE expression IS cases OTHERS ARROW statement_ ENDCASE
{If the attribute of cases, is a number then
return it as the attribute otherwise return the
attribute of the OTHERS clause};
cases:
cases case
{if the attribute of cases is a number then return it as the
attribute otherwise return the attribute of case} |
%empty
{Set the attribute to the sentinel NAN} ;
case:
WHEN INT_LITERAL ARROW statement_
{$-2 contains the value of the expression after CASE.
It must be compared with the attribute of INT_LITERAL.
If they match the attribute of this production
should become the attribute of statement_
If they don't match, the attribute should be set to the
sentinel value NAN} ;
parser.y:
%{
#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <math.h>
using namespace std;
#include "values.h"
#include "listing.h"
#include "symbols.h"
#include <stdlib.h>
#include <stdio.h>
int yylex();
void yyerror(const char* message);
Symbols<int> symbols;
//----------------------------------------------------------------------------------------------
vector<int> case_statements; //<<<<<<<<<<<<Is this wrong?
//---------------------------------------------------------------------------------------------
int result;
double *params;
%}
%define parse.error verbose
%union
{
CharPtr iden;
Operators oper;
int value;
}
%token <iden> IDENTIFIER
%token <value> INT_LITERAL REAL_LITERAL BOOL_LITERAL CASE TRUE FALSE
%token <oper> ADDOP MULOP RELOP OROP NOTOP REMOP EXPOP
%token ANDOP
%token BEGIN_ BOOLEAN END ENDREDUCE FUNCTION INTEGER IS REDUCE RETURNS
%token THEN WHEN ARROW
%token ELSE ENDCASE ENDIF IF OTHERS REAL
%type <value> body statement_ statement reductions expression relation term
factor case cases exponent unary primary
%type <oper> operator
%%
function:
function_header optional_variable body {result = $3;} ;
function_header:
FUNCTION IDENTIFIER optional_parameter RETURNS type ';' |
FUNCTION IDENTIFIER RETURNS type ';' |
error ';' ;
optional_variable:
optional_variable variable |
error ';' |
%empty ;
variable:
IDENTIFIER ':' type IS statement_ ;
parameters:
parameter optional_parameter;
optional_parameter:
optional_parameter ',' parameter |
%empty ;
parameter:
IDENTIFIER ':' type {symbols.insert($1, params[0]);} ;
type:
INTEGER |
REAL |
BOOLEAN ;
body:
BEGIN_ statement_ END ';' {$$ = $2;} ;
statement_:
statement ';' |
error ';' {$$ = 0;} ;
statement:
expression |
REDUCE operator reductions ENDREDUCE {$$ = $3;} |
IF expression THEN statement_ ELSE statement_ ENDIF {
if ($2 == true) {
$$ = $4;
}
else {
$$ = $6;
}
} ; |
CASE expression IS cases OTHERS ARROW statement_ ENDCASE {$$ = $<value>4 == $1 ? $4 : $7;} ;
cases:
cases case {$$ = $<value>1 == $1 ? $1 : $2;} |
%empty {$$ = NAN;} ;
//-----------------------------------------------------------------------------------------------------------
case:
case WHEN INT_LITERAL ARROW statement_ {case_statements.push_back($4);} ; //<<<<<<<<<How do I declare $4?
//-------------------------------------------------------------------------------------------------------------
operator:
ADDOP |
RELOP |
EXPOP |
MULOP ;
reductions:
reductions statement_ {$$ = evaluateReduction($<oper>0, $1, $2);} |
{$$ = $<oper>0 == ADD ? 0 : 1;} %empty ;
expression:
expression OROP relation {$$ = $1 || $3;} |
relation ;
expression:
expression ANDOP relation {$$ = $1 && $3;} |
relation ;
relation:
relation RELOP term {$$ = evaluateRelational($1, $2, $3);} |
term ;
term:
term ADDOP factor {$$ = evaluateArithmetic($1, $2, $3);} |
factor ;
factor:
factor MULOP primary {$$ = evaluateArithmetic($1, $2, $3);} |
factor REMOP exponent {$$ = $1 % $3;} |
exponent ;
exponent:
unary |
unary EXPOP exponent {$$ = pow($1, $3);} ;
unary:
NOTOP primary {$$ = $2;} |
primary;
primary:
'(' expression ')' {$$ = $2;} |
INT_LITERAL |
REAL_LITERAL |
BOOL_LITERAL |
IDENTIFIER {if (!symbols.find($1, $$)) appendError(UNDECLARED, $1);} ;
%%
void yyerror(const char* message)
{
appendError(SYNTAX, message);
}
int main(int argc, char *argv[])
{
params = new double[argc - 1]
for (int i = 1; i < argc; i++)
{
params[i - 1] = atof(argv[i]);
}
firstLine();
yyparse();
if (lastLine() == 0)
cout << "Result = " << result << endl;
return 0;
}
You need to assign a value/type to statement_:
statement_:
statement ';' {$$ = $1;}|
error ';' {$$ = MISMATCH;} ;
I'm trying to integrate error recovery in my grammar. From the bison manual, the simplest error recovery would be skip the current line. But in my flex file, I have no action regarding the newline so the parser would not know about it. So I want the parser to ignore everything until it encounters a semicolon in case of an error.
I have the following grammar:
start : program;
program : program unit
| unit
;
unit : var_declaration
| func_declaration
| func_definition
;
func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON
| type_specifier ID LPAREN RPAREN SEMICOLON
;
func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement
| type_specifier ID LPAREN RPAREN compound_statement
;
parameter_list : parameter_list COMMA type_specifier ID
| parameter_list COMMA type_specifier
| type_specifier ID
| type_specifier
;
compound_statement : LCURL statements RCURL
| LCURL RCURL
;
var_declaration : type_specifier declaration_list SEMICOLON
;
type_specifier : INT
| FLOAT
| VOID
;
declaration_list : declaration_list COMMA ID
| declaration_list COMMA ID LTHIRD CONST_INT RTHIRD
| ID
| ID LTHIRD CONST_INT RTHIRD
;
statements : statement
| statements statement
;
statement : var_declaration
| expression_statement
| compound_statement
| FOR LPAREN expression_statement expression_statement expression RPAREN statement
| IF LPAREN expression RPAREN statement
| IF LPAREN expression RPAREN statement ELSE statement
| WHILE LPAREN expression RPAREN statement
| PRINTLN LPAREN ID RPAREN SEMICOLON
| RETURN expression SEMICOLON
;
expression_statement : SEMICOLON
| expression SEMICOLON
;
variable : ID
| ID LTHIRD expression RTHIRD
;
expression : logic_expression
| variable ASSIGNOP logic_expression
;
logic_expression : rel_expression
| rel_expression LOGICOP rel_expression
;
rel_expression : simple_expression
| simple_expression RELOP simple_expression
;
simple_expression : term
| simple_expression ADDOP term
;
term : unary_expression
| term MULOP unary_expression
;
unary_expression : ADDOP unary_expression
| NOT unary_expression
| factor
;
factor : variable
| ID LPAREN argument_list RPAREN
| LPAREN expression RPAREN
| CONST_INT
| CONST_FLOAT
| variable INCOP
| variable DECOP
;
argument_list : arguments
|
;
arguments : arguments COMMA logic_expression
| logic_expression
;
I'm currently working on the following input:
int main(){
int a[2],c,i,j ; float c;
a[2.5]=1;
i=2.3
j=2%3.7;
a=4;
func(a);
b=8;
return 0;
}
When the parser encounters i = 2.3, it won't stop parsing but rather continue doing so after reporting a syntax error.
Based on the grammar, where should I put my error production so that the parser can continue parsing without any conflict ? And possibly shed some light on other syntax errors like missing a RPAREN or Curly braces ? How should I approach to add the error production for a given grammar ?
I'm playing around with Flex, BISON and LLVM, creating my own programming language to understand how compilers work better. I've got a basic parser working and a syntax going off of this series of blog posts http://gnuu.org/2009/09/18/writing-your-own-toy-compiler/
However, I can't figure out how to extend it to include strings, such as string myVar = "testing 123".
Here's my list of tokens...
[ \t\n] ;
"extern" return TOKEN(TEXTERN);
"return" return TOKEN(TRETURN);
[a-zA-Z_][a-zA-Z0-9_]* SAVE_TOKEN; return TIDENTIFIER;
[0-9]+\.[0-9]* SAVE_TOKEN; return TDOUBLE;
[0-9]+ SAVE_TOKEN; return TINTEGER;
\"[^\n"]+\" SAVE_TOKEN; return TSTRING;
"=" return TOKEN(TEQUAL);
"==" return TOKEN(TCEQ);
"!=" return TOKEN(TCNE);
"<" return TOKEN(TCLT);
"<=" return TOKEN(TCLE);
">" return TOKEN(TCGT);
">=" return TOKEN(TCGE);
"(" return TOKEN(TLPAREN);
")" return TOKEN(TRPAREN);
"{" return TOKEN(TLBRACE);
"}" return TOKEN(TRBRACE);
"=>" return TOKEN(TCLO);
"co" return TOKEN(TCO);
"const" return TOKEN(TCONST);
"let" return TOKEN(TLET);
"." return TOKEN(TDOT);
"," return TOKEN(TCOMMA);
"+" return TOKEN(TPLUS);
"-" return TOKEN(TMINUS);
"*" return TOKEN(TMUL);
"/" return TOKEN(TDIV);
. printf("Unknown token!\n"); yyterminate();
Here's my parser...
%{
#include "node.h"
#include <cstdio>
#include <cstdlib>
NBlock *programBlock; /* the top level root node of our final AST */
extern int yylex();
void yyerror(const char *s) { std::printf("Error: %s\n", s);std::exit(1); }
%}
/* Represents the many different ways we can access our data */
%union {
Node *node;
NBlock *block;
NExpression *expr;
NStatement *stmt;
NIdentifier *ident;
NVariableDeclaration *var_decl;
std::vector<NVariableDeclaration*> *varvec;
std::vector<NExpression*> *exprvec;
std::string *string;
int token;
}
/* Define our terminal symbols (tokens). This should
match our tokens.l lex file. We also define the node type
they represent.
*/
%token <string> TIDENTIFIER TINTEGER TDOUBLE TSTRING
%token <token> TCEQ TCNE TCLT TCLE TCGT TCGE TEQUAL
%token <token> TLPAREN TRPAREN TLBRACE TRBRACE TCOMMA TDOT TCLO TCO TCONST TLET
%token <token> TPLUS TMINUS TMUL TDIV
%token <token> TRETURN TEXTERN
/* Define the type of node our nonterminal symbols represent.
The types refer to the %union declaration above. Ex: when
we call an ident (defined by union type ident) we are really
calling an (NIdentifier*). It makes the compiler happy.
*/
%type <ident> ident
%type <expr> numeric expr string
%type <varvec> func_decl_args
%type <exprvec> call_args
%type <block> program stmts block
%type <stmt> stmt var_decl func_decl extern_decl const_func_decl let_func_decl
%type <token> comparison
/* Operator precedence for mathematical operators */
%left TPLUS TMINUS
%left TMUL TDIV
%start program
%%
program : stmts { programBlock = $1; }
;
stmts : stmt { $$ = new NBlock(); $$->statements.push_back($<stmt>1); }
| stmts stmt { $1->statements.push_back($<stmt>2); }
;
stmt : var_decl | func_decl | extern_decl | const_func_decl | let_func_decl
| expr { $$ = new NExpressionStatement(*$1); }
| TRETURN expr { $$ = new NReturnStatement(*$2); }
;
block : TLBRACE stmts TRBRACE { $$ = $2; }
| TLBRACE TRBRACE { $$ = new NBlock(); }
;
var_decl : ident ident { $$ = new NVariableDeclaration(*$1, *$2); }
| ident ident TEQUAL expr { $$ = new NVariableDeclaration(*$1, *$2, $4); }
;
extern_decl : TEXTERN ident ident TLPAREN func_decl_args TRPAREN
{ $$ = new NExternDeclaration(*$2, *$3, *$5); delete $5; }
;
const_func_decl : TCONST ident ident TEQUAL TLPAREN func_decl_args TRPAREN TCLO block
{ $$ = new NFunctionDeclaration(*$2, *$3, *$6, *$9); delete $6; }
;
let_func_decl : TLET ident ident TEQUAL TLPAREN func_decl_args TRPAREN TCLO block
{ $$ = new NFunctionDeclaration(*$2, *$3, *$6, *$9); delete $6; }
;
func_decl : ident ident TEQUAL TLPAREN func_decl_args TRPAREN TCLO block
{ $$ = new NFunctionDeclaration(*$1, *$2, *$5, *$8); delete $5; }
;
func_decl_args : /*blank*/ { $$ = new VariableList(); }
| var_decl { $$ = new VariableList(); $$->push_back($<var_decl>1); }
| func_decl_args TCOMMA var_decl { $1->push_back($<var_decl>3); }
;
ident : TIDENTIFIER { $$ = new NIdentifier(*$1); delete $1; }
;
string : TSTRING { $$ = new NString($1->c_str()); delete $1; }
;
numeric : TINTEGER { $$ = new NInteger(atol($1->c_str())); delete $1; }
| TDOUBLE { $$ = new NDouble(atof($1->c_str())); delete $1; }
;
expr : ident TEQUAL expr { $$ = new NAssignment(*$<ident>1, *$3); }
| ident TLPAREN call_args TRPAREN { $$ = new NMethodCall(*$1, *$3); delete $3; }
| ident { $<ident>$ = $1; }
| numeric
| expr TMUL expr { $$ = new NBinaryOperator(*$1, $2, *$3); }
| expr TDIV expr { $$ = new NBinaryOperator(*$1, $2, *$3); }
| expr TPLUS expr { $$ = new NBinaryOperator(*$1, $2, *$3); }
| expr TMINUS expr { $$ = new NBinaryOperator(*$1, $2, *$3); }
| expr comparison expr { $$ = new NBinaryOperator(*$1, $2, *$3); }
| TLPAREN expr TRPAREN { $$ = $2; }
;
call_args : /*blank*/ { $$ = new ExpressionList(); }
| expr { $$ = new ExpressionList(); $$->push_back($1); }
| call_args TCOMMA expr { $1->push_back($3); }
;
comparison : TCEQ | TCNE | TCLT | TCLE | TCGT | TCGE;
%%
Finally, here's my code generation C++ code for strings...
Value* NString::codeGen(CodeGenContext& context)
{
// Generate the type for the global var
ArrayType* ArrayTy_0 = ArrayType::get(IntegerType::get(getGlobalContext(), 8), value.size() +1 );
// create global var which holds the constant string.
GlobalVariable* gvar_array__str = new GlobalVariable(*context.module,
/*Type=*/ArrayTy_0,
/*isConstant=*/true,
GlobalValue::PrivateLinkage,
/*Initializer=*/0, // has initializer, specified below
".str");
gvar_array__str->setAlignment(1);
// create the contents for the string global.
Constant* const_array_str = ConstantDataArray::getString(getGlobalContext(), value);
// Initialize the global with the string
gvar_array__str->setInitializer(const_array_str);
// generate access pointer to the string
std::vector<Constant*> const_ptr_8_indices;
ConstantInt* const_int = ConstantInt::get(getGlobalContext(), APInt(64, StringRef("0"), 10));
const_ptr_8_indices.push_back(const_int);
const_ptr_8_indices.push_back(const_int);
Constant* const_ptr_8 = ConstantExpr::getGetElementPtr(ArrayTy_0, gvar_array__str, const_ptr_8_indices);
return const_ptr_8;
}
In my own syntax, when I run... int myInt = 123 that works fine, however, string myString = "123" that triggers a syntax error.
I am writing a compiler in C++ (using Visual Studio) for a small scripting language and I use this C parsing library.
So, I followed instructions from the documentation and I ended up on this peace of code:
int main()
{
mpc_parser_t* Int = mpc_new("int");
mpc_parser_t* Char = mpc_new("char");
mpc_parser_t* String = mpc_new("string");
mpc_parser_t* Id = mpc_new("id");
mpc_parser_t* Type = mpc_new("type");
mpc_parser_t* Formal = mpc_new("formal");
mpc_parser_t* Header = mpc_new("header");
mpc_parser_t* FuncDecl = mpc_new("funcdecl");
mpc_parser_t* VarDef = mpc_new("vardef");
mpc_parser_t* Expr = mpc_new("expr");
mpc_parser_t* Call = mpc_new("call");
mpc_parser_t* Atom = mpc_new("atom");
mpc_parser_t* Simple = mpc_new("simple");
mpc_parser_t* SimpleList = mpc_new("simplelist");
mpc_parser_t* Stmt = mpc_new("stmt");
mpc_parser_t* FuncDef = mpc_new("funcdef");
mpc_parser_t* Program = mpc_new("program");
/* Define them with the following Language */
mpca_lang(MPCA_LANG_DEFAULT,
" \
int : /-?[0-9]+/ ; \
char : /'[a-zA-Z0-9!##$%^&*()\\_+-,.\\/<>?;'|\"`~]'/ ; \
string : /\"(\\\\.|[^\"])*\"/ ; \
id : /[a-zA-Z][a-zA-Z0-9_-]*/ ; \
type : \"int\" | \"bool\" | \"char\" | <type> '[' ']' | \"list\" '[' <type> ']' ; \
formal : (\"ref\")? <type> <id> (',' <id>)* ; \
header : <type>? <id> '(' (<formal> (';' <formal>)*)? ')' ; \
funcdecl : \"decl\" <header> ; \
vardef : <type> <id> (',' <id>)* ; \
expr : <atom> | <int> | <char> | '(' <expr> ')' \
| ('+' | '-') <expr> | <expr> ('+' | '-' | '*' | '/' | \"mod\") <expr> \
| <expr> ('=' | \"<>\" | '<' | '>' | \"<=\" | \">=\") <expr> \
| \"true\" | \"false\" | \"not\" <expr> | <expr> (\"and\" | \"or\") <expr> \
| \"new\" <type> '[' <expr> ']' | \"nil\" | \"nil?\" '(' <expr> ')' \
| <expr> '#' <expr> | \"head\" '(' <expr> ')' | \"tail\" '(' <expr> ')' ; \
call : <id> '(' (<expr> (',' <expr>)*)? ')' ; \
atom : <id> | <string> | <atom> '[' <expr> ']' | <call> ; \
simple : \"skip\" | <atom> \":=\" <expr> | <call> ; \
simplelist : <simple> (',' <simple>)* ; \
stmt : <simple> | \"exit\" | \"return\" <expr> \
| \"if\" <expr> ':' <stmt>+ (\"elif\" <expr> ':' <stmt>+)* \
(\"else\" ':' <stmt>+)? \"end\" \
| \"for\" <simplelist> ';' <expr> ';' <simplelist> ':' <stmt>+ \"end\" ; \
funcdef : \"def\" <header> ':' (<funcdef> | <funcdecl> | <vardef>)* <stmt>+ \"end\" ; \
program : /^/ <funcdef> /$/ ; \
",
Int, Char, String, Id, Type, Formal, Header, FuncDecl, VarDef, Expr,
Call, Atom, Simple, SimpleList, Stmt, FuncDef, Program);
mpc_result_t r;
char* input = "def hey () : return 1 end";
if(mpc_parse("input", input, Program, &r))
{
mpc_ast_print((mpc_ast_t*)r.output);
mpc_ast_delete((mpc_ast_t*)r.output);
}
else
{
mpc_err_print(r.error);
mpc_err_delete(r.error);
}
PAUSE("Press any key to continue . . .");
/* Undefine and Delete our Parsers */
mpc_cleanup(17, Int, Char, String, Id, Type, Formal, Header, FuncDecl, VarDef, Expr,
Call, Atom, Simple, SimpleList, Stmt, FuncDef, Program);
return 0;
}
The problem is that I run into an huge loop in mpc_parse. That loop never actually reaches the end. After some time I get this exception:
Unhandled exception at 0x00CBBC9C in TonyCC.exe: 0xC0000005: Access violation reading location 0x0000000C.
I don't know why. I suspect there is something wrong with my grammar but I cannot figure out what.
If someone has used this library before, do you have any idea what the problem might be?
Note: I know it is difficult to read the grammar from C code so here is an image of the grammar: