flex/bison based compiler: parsing asm[assembly commands] inside C program - c++
I'm new to flex and bison. I want to write a compiler that read C program and translate it to my processor commands that are similar to assembly. I downloaded a pre-written compiler that uses flex and bison. I should change the scanner.l and parser.y as it can process the asm commands that are inside my C code like asm [asm command1 \n asm command2 \n asm command3 \n ...]. Which definitions and rules should I add to these two files?
scanner.l:
%{
#include "scanner.h"
#include "y.tab.h"
#include <stdio.h>
#include <stdlib.h>
#define MAX_STR_CONST 1000
char string_buf[MAX_STR_CONST];
char *string_buf_ptr;
int line_num = 1;
int line_pos = 1;
void updatePosition();
#define YY_USER_ACTION updatePosition();
%}
NUMBER (0)|([1-9][0-9]*)
HEXNUM ((0x)|(0X))([a-fA-F0-9]+)
IDENT [a-zA-Z_][a-zA-Z0-9_]*
%x comment
%x str
%option noyywrap
%option yylineno
%option nounput
%%
\" string_buf_ptr = string_buf; BEGIN(str);
<str>{
\" { /* saw closing quote - all done */
BEGIN(INITIAL);
*string_buf_ptr = '\0';
/* return string constant token type and
* value to parser
*/
yylval.strConst = new std::string(string_buf);
return T_STR_CONST;
}
\n {
/* error - unterminated string constant */
/* generate error message */
yyerror("Unterminated string constant.");
}
<<EOF>> { return T_UNTERM_STRING; }
\\[0-7]{1,3} {
/* octal escape sequence */
int result;
(void) sscanf( yytext + 1, "%o", &result );
if ( result > 0xff )
/* error, constant is out-of-bounds */
*string_buf_ptr++ = result;
}
\\[0-9]+ {
/* generate error - bad escape sequence; something
* like '\48' or '\0777777'
*/
yyerror("Bad string escape sequence.");
}
\\n *string_buf_ptr++ = '\n';
\\t *string_buf_ptr++ = '\t';
\\r *string_buf_ptr++ = '\r';
\\b *string_buf_ptr++ = '\b';
\\f *string_buf_ptr++ = '\f';
\\(.|\n) *string_buf_ptr++ = yytext[1];
[^\\\n\"]+ {
char *yptr = yytext;
while ( *yptr )
*string_buf_ptr++ = *yptr++;
}
}
"/*" BEGIN(comment);
<comment>{
[^*\n]* /* eat anything that's not a '*' */
"*"+[^*/\n]* /* eat up '*'s not followed by '/'s */
\n
<<EOF>> { return T_UNTERM_COMMENT; }
"*"+"/" BEGIN(INITIAL);
}
"do" { return T_DO; }
"while" { return T_WHILE; }
"for" { return T_FOR; }
"if" { return T_IF; }
"else" { return T_ELSE; }
"int" { return T_INT_TYPE; }
"string" { return T_STRING_TYPE; }
"void" { return T_VOID_TYPE; }
"struct" { return T_STRUCT; }
"return" { return T_RETURN; }
"switch" { return T_SWITCH; }
"case" { return T_CASE; }
"default" { return T_DEFAULT; }
"break" { return T_BREAK; }
"continue" { return T_CONTINUE; }
"sizeof" { return T_SIZEOF; }
"{" { return '{'; }
"}" { return '}'; }
"(" { return '('; }
")" { return ')'; }
"[" { return '['; }
"]" { return ']'; }
"+" { return '+'; }
"-" { return '-'; }
"*" { return '*'; }
"/" { return '/'; }
"%" { return '%'; }
"=" { return '='; }
">" { return '>'; }
"<" { return '<'; }
"!" { return '!'; }
"|" { return '|'; }
"&" { return '&'; }
"^" { return '^'; }
"~" { return '~'; }
"." { return '.'; }
":" { return ':'; }
";" { return ';'; }
"," { return ','; }
"<<" { return T_LEFT_SHIFT; }
">>" { return T_RIGHT_SHIFT; }
"&&" { return T_BOOL_AND; }
"||" { return T_BOOL_OR; }
"+=" { return T_PLUS_EQUALS; }
"-=" { return T_MINUS_EQUALS; }
"*=" { return T_STAR_EQUALS; }
"/=" { return T_DIV_EQUALS; }
"%=" { return T_MOD_EQUALS; }
"==" { return T_EQUAL; }
"<=" { return T_LESS_OR_EQUAL; }
">=" { return T_GREATER_OR_EQUAL; }
"!=" { return T_NOT_EQUAL; }
"|=" { return T_BIT_OR_EQUALS; }
"&=" { return T_BIT_AND_EQUALS; }
"^=" { return T_BIT_XOR_EQUALS; }
"~=" { return T_BIT_NOT_EQUALS; }
"->" { return T_ARROW; }
"<<=" { return T_LEFT_SHIFT_EQUALS; }
">>=" { return T_RIGHT_SHIFT_EQUALS; }
"++" { return T_PLUS_PLUS; }
"--" { return T_MINUS_MINUS; }
" "|"\t"|"\r"|"\n"|"const" {}
{HEXNUM} { yylval.intConst = std::strtoul(yytext, NULL, 0); return T_INT_CONST; }
{NUMBER} { yylval.intConst = atoi(yytext); return T_INT_CONST; }
{IDENT} { yylval.ident = new std::string(yytext); return T_IDENT; }
. {{ char err[] = "Unknown Character: a"; err[strlen(err)-1] = *yytext; yyerror(err); }}
%%
/**
* This function is called on every token, and updates the yylloc global variable, which stores the
* location/position of the current token.
*/
void updatePosition() {
yylloc.first_line = line_num;
yylloc.first_column = line_pos;
char* text = yytext;
while(*text != '\0') {
if(*text == '\n') {
line_num++;
line_pos = 1;
} else {
line_pos++;
}
text++;
}
yylloc.last_line = line_num;
yylloc.last_column = line_pos;
}
parser.y:
%code requires {
#include "Declaration.h"
#include "Expression.h"
#include "Statement.h"
#include "Type.h"
#include "Parser.h"
#include "Util.h"
extern Program* program_out;
}
%locations
%define parse.lac full
%error-verbose
%{
#include "Parser.h"
#include "scanner.h"
#include <string>
#include <iostream>
#include "Type.h"
%}
//%parse-param {Program*& out}
%union {
char* cstr;
std::string* ident;
std::string* strConst;
unsigned int intConst;
Type* type;
std::vector<Declaration*>* declareList;
Declaration* declare;
ConstantExpression* constant;
std::vector<FunctionParameter*>* paramList;
FunctionParameter* param;
std::vector<StructMember*>* structMemberList;
StructMember* structMember;
StatementBlock* statementBlock;
Statement* statement;
std::vector<Statement*>* statementList;
Expression* expression;
std::vector<Expression*>* expressionList;
}
%type <type> type
%type <cstr> root
%type <declareList> root_declare_list
%type <declare> root_declare
%type <constant> constant
%type <paramList> param_list non_empty_param_list
%type <param> param
%type <structMemberList> struct_list
%type <structMember> struct_member;
%type <statementBlock> statement_block
%type <statementList> statement_list
%type <statement> statement
%type <expression> expression
%type <expressionList> argument_list non_empty_argument_list
%token <ident> T_IDENT
%token <strConst> T_STR_CONST
%token <intConst> T_INT_CONST
%token T_IF T_ELSE T_FOR T_WHILE T_DO T_SIZEOF
%token T_INT_TYPE T_STRING_TYPE T_VOID_TYPE T_STRUCT
%token T_RETURN T_SWITCH T_CASE T_DEFAULT T_BREAK T_CONTINUE
%token T_BOOL_OR T_BOOL_AND
%token T_LEFT_SHIFT T_RIGHT_SHIFT T_PLUS_EQUALS T_MINUS_EQUALS
%token T_STAR_EQUALS T_DIV_EQUALS T_MOD_EQUALS T_EQUAL
%token T_LESS_OR_EQUAL T_GREATER_OR_EQUAL T_NOT_EQUAL
%token T_BIT_OR_EQUALS T_BIT_AND_EQUALS T_BIT_XOR_EQUALS
%token T_BIT_NOT_EQUALS T_ARROW T_LEFT_SHIFT_EQUALS
%token T_RIGHT_SHIFT_EQUALS T_PLUS_PLUS T_MINUS_MINUS
%token T_UNTERM_STRING T_UNTERM_COMMENT
/* tokens for precedence */
%token PREC_ADDRESS PREC_DEREFERENCE PREC_UNARY_MINUS PREC_UNARY_PLUS
%token PREC_SUFFIX_PLUS_PLUS PREC_SUFFIX_MINUS_MINUS
%token PREC_PREFIX_PLUS_PLUS PREC_PREFIX_MINUS_MINUS
%token PREC_APPLICATION
/* lowest precedence */
%left ','
%right T_BIT_AND_EQUALS T_BIT_XOR_EQUALS T_BIT_OR_EQUALS
%right T_LEFT_SHIFT_EQUALS T_RIGHT_SHIFT_EQUALS
%right T_STAR_EQUALS T_DIV_EQUALS T_MOD_EQUALS
%right T_PLUS_EQUALS T_MINUS_EQUALS
%right '='
%left T_BOOL_OR
%left T_BOOL_AND
%left '|'
%left '^'
%left '&'
%left T_EQUAL T_NOT_EQUAL
%left '>' T_GREATER_OR_EQUAL
%left '<' T_LESS_OR_EQUAL
%left T_LEFT_SHIFT T_RIGHT_SHIFT
%left '+' '-'
%left '*' '/' '%'
%right PREC_ADDRESS
%right PREC_DEREFERENCE
%right '!' '~'
%right PREC_UNARY_PLUS PREC_UNARY_MINUS
%right PREC_PREFIX_PLUS_PLUS PREC_PREFIX_MINUS_MINUS
%right T_PLUS_PLUS T_MINUS_MINUS
%left T_ARROW
%left '.'
%left '['
%left PREC_APPLICATION
%left PREC_SUFFIX_PLUS_PLUS PREC_SUFFIX_MINUS_MINUS
%nonassoc T_IF
%nonassoc T_ELSE
/* highest precedence */
%%
root:
root_declare_list { $$ = NULL; program_out = new Program(#$, *$1); delete $1; }
;
root_declare_list:
root_declare_list root_declare { $$ = $1; $1->push_back($2); }
| { $$ = new std::vector<Declaration*>(); }
;
root_declare:
type T_IDENT '(' param_list ')' ';' { $$ = new FunctionPrototype(#$, $1, *$2, *$4); delete $2; delete $4; }
| type T_IDENT '(' param_list ')' statement_block { $$ = new FunctionDeclaration(#$, $1, *$2, *$4, $6); delete $2; delete $4; }
| type T_IDENT ';' { $$ = new GlobalVarDeclaration(#$, $1, *$2); delete $2; }
| type T_IDENT '[' T_INT_CONST ']' ';' { $$ = new GlobalArrayDeclaration(#$, $1, *$2, $4); delete $2; }
| type T_IDENT '=' constant ';' { $$ = new GlobalVarDeclarationInit(#$, $1, *$2, $4); delete $2; }
| T_STRUCT T_IDENT '{' struct_list '}' ';' { $$ = new StructDeclaration(#$, *$2, *$4); delete $2; delete $4; }
| T_STRUCT T_IDENT ';' { $$ = new StructPredeclaration(#$, *$2); delete $2; }
;
constant:
T_INT_CONST { $$ = new IntConstantExpression(#$, $1); }
| T_STR_CONST { $$ = new StringConstantExpression(#$, *$1); delete $1; }
;
param_list:
non_empty_param_list { $$ = $1; }
| { $$ = new std::vector<FunctionParameter*>(); }
;
non_empty_param_list:
non_empty_param_list ',' param { $$ = $1; $1->push_back($3); }
| param { $$ = new std::vector<FunctionParameter*>({$1}); }
;
param:
type T_IDENT { $$ = new FunctionParameter(#$, $1, *$2); delete $2; }
;
struct_list:
struct_list struct_member ';' { $$ = $1; $1->push_back($2); }
| { $$ = new std::vector<StructMember*>(); }
;
struct_member:
type T_IDENT { $$ = new StructMember(#$, $1, *$2); delete $2; }
;
type:
type '*' { $$ = new PointerType($1); }
| T_STRUCT T_IDENT { $$ = new StructType(*$2); delete $2; }
| T_INT_TYPE { $$ = new IntType(); }
| T_VOID_TYPE { $$ = new VoidType(); }
| T_STRING_TYPE { $$ = new StringType(); }
;
statement_block:
'{' statement_list '}' { $$ = new StatementBlock(#$, *$2); delete $2; }
;
statement_list:
statement_list statement { $$ = $1; $1->push_back($2); }
| { $$ = new std::vector<Statement*>(); }
;
statement:
expression ';' { $$ = $1; }
| type T_IDENT ';' { $$ = new VarDeclaration(#$, $1, *$2); delete $2; }
| type T_IDENT '=' expression ';' { $$ = new VarDeclarationInit(#$, $1, *$2, $4); delete $2; }
| type T_IDENT '[' T_INT_CONST ']' ';' { $$ = new ArrayDeclaration(#$, $1, *$2, $4); delete $2; }
| T_WHILE '(' expression ')' statement { $$ = new WhileStatement(#$, $3, $5); }
| T_DO statement T_WHILE '(' expression ')' ';' { $$ = new DoWhileStatement(#$, $2, $5); }
| T_FOR '(' expression ';' expression ';' expression ')' statement { $$ = new ForStatement(#$, $3, $5, $7, $9); }
| statement_block { $$ = $1; }
| T_IF '(' expression ')' statement %prec T_IF { $$ = new IfStatement(#$, $3, $5); }
| T_IF '(' expression ')' statement T_ELSE statement { $$ = new IfElseStatement(#$, $3, $5, $7); }
| T_BREAK ';' { $$ = new BreakStatement(#$); }
| T_CONTINUE ';' { $$ = new ContinueStatement(#$); }
| T_SWITCH '(' expression ')' '{' statement_list '}' { $$ = new SwitchStatement(#$, $3, *$6); delete $6; }
| T_CASE T_INT_CONST ':' { $$ = new CaseStatement(#$, $2); }
| T_DEFAULT ':' { $$ = new DefaultStatement(#$); }
| T_RETURN expression ';' { $$ = new ReturnStatement(#$, $2); }
;
expression:
expression '=' expression { $$ = new AssignExpression(#$, $1, $3); }
| expression T_PLUS_EQUALS expression { $$ = new AssignExpression(#$, $1, new BinaryOperatorExpression(#3, $1->clone(), "+", $3)); }
| expression T_MINUS_EQUALS expression { $$ = new AssignExpression(#$, $1, new BinaryOperatorExpression(#3, $1->clone(), "-", $3)); }
| expression T_STAR_EQUALS expression { $$ = new AssignExpression(#$, $1, new BinaryOperatorExpression(#3, $1->clone(), "*", $3)); }
| expression T_DIV_EQUALS expression { $$ = new AssignExpression(#$, $1, new BinaryOperatorExpression(#3, $1->clone(), "/", $3)); }
| expression T_MOD_EQUALS expression { $$ = new AssignExpression(#$, $1, new BinaryOperatorExpression(#3, $1->clone(), "%", $3)); }
| expression T_BIT_AND_EQUALS expression { $$ = new AssignExpression(#$, $1, new BinaryOperatorExpression(#3, $1->clone(), "&", $3)); }
| expression T_BIT_OR_EQUALS expression { $$ = new AssignExpression(#$, $1, new BinaryOperatorExpression(#3, $1->clone(), "|", $3)); }
| expression T_BIT_XOR_EQUALS expression { $$ = new AssignExpression(#$, $1, new BinaryOperatorExpression(#3, $1->clone(), "^", $3)); }
| expression T_LEFT_SHIFT_EQUALS expression { $$ = new AssignExpression(#$, $1, new BinaryOperatorExpression(#3, $1->clone(), "<<", $3)); }
| expression T_RIGHT_SHIFT_EQUALS expression { $$ = new AssignExpression(#$, $1, new BinaryOperatorExpression(#3, $1->clone(), ">>", $3)); }
| expression T_PLUS_PLUS %prec PREC_SUFFIX_PLUS_PLUS { $$ = new UnaryAssignExpression(#$, $1, "++"); }
| T_PLUS_PLUS expression %prec PREC_PREFIX_PLUS_PLUS { $$ = new UnaryAssignExpression(#$, "++", $2); }
| expression T_MINUS_MINUS %prec PREC_SUFFIX_MINUS_MINUS { $$ = new UnaryAssignExpression(#$, $1, "--"); }
| T_MINUS_MINUS expression %prec PREC_PREFIX_MINUS_MINUS { $$ = new UnaryAssignExpression(#$, "--", $2); }
| constant { $$ = $1; }
| '(' expression ')' { $$ = $2; }
| T_IDENT '(' argument_list ')' %prec PREC_APPLICATION { $$ = new FunctionCallExpression(#$, *$1, *$3); delete $1; delete $3; }
| T_SIZEOF '(' type ')' { $$ = new SizeofExpression(#$, $3); }
| '!' expression { $$ = new UnaryOperatorExpression(#$, "!", $2); }
| '~' expression { $$ = new UnaryOperatorExpression(#$, "~", $2); }
| '+' expression %prec PREC_UNARY_PLUS { $$ = new UnaryOperatorExpression(#$, "+", $2); }
| '-' expression %prec PREC_UNARY_MINUS { $$ = new UnaryOperatorExpression(#$, "-", $2); }
| '*' expression %prec PREC_DEREFERENCE { $$ = new ArraySubscriptExpression(#$, $2, new IntConstantExpression(#2, 0)); }
| '&' expression %prec PREC_ADDRESS { $$ = new UnaryOperatorExpression(#$, "&", $2); }
| expression '+' expression { $$ = new BinaryOperatorExpression(#$, $1, "+", $3); }
| expression '-' expression { $$ = new BinaryOperatorExpression(#$, $1, "-", $3); }
| expression '*' expression { $$ = new BinaryOperatorExpression(#$, $1, "*", $3); }
| expression '/' expression { $$ = new BinaryOperatorExpression(#$, $1, "/", $3); }
| expression '%' expression { $$ = new BinaryOperatorExpression(#$, $1, "%", $3); }
| expression '&' expression { $$ = new BinaryOperatorExpression(#$, $1, "&", $3); }
| expression '|' expression { $$ = new BinaryOperatorExpression(#$, $1, "|", $3); }
| expression '^' expression { $$ = new BinaryOperatorExpression(#$, $1, "^", $3); }
| expression T_BOOL_AND expression { $$ = new BinaryOperatorExpression(#$, $1, "&&", $3); }
| expression T_BOOL_OR expression { $$ = new BinaryOperatorExpression(#$, $1, "||", $3); }
| expression T_LEFT_SHIFT expression { $$ = new BinaryOperatorExpression(#$, $1, "<<", $3); }
| expression T_RIGHT_SHIFT expression { $$ = new BinaryOperatorExpression(#$, $1, ">>", $3); }
| expression T_EQUAL expression { $$ = new BinaryOperatorConditionExpression(#$, $1, "==", $3); }
| expression T_NOT_EQUAL expression { $$ = new BinaryOperatorConditionExpression(#$, $1, "!=", $3); }
| expression '<' expression { $$ = new BinaryOperatorConditionExpression(#$, $1, "<", $3); }
| expression '>' expression { $$ = new BinaryOperatorConditionExpression(#$, $1, ">", $3); }
| expression T_LESS_OR_EQUAL expression { $$ = new BinaryOperatorConditionExpression(#$, $1, "<=", $3); }
| expression T_GREATER_OR_EQUAL expression { $$ = new BinaryOperatorConditionExpression(#$, $1, ">=", $3); }
| T_IDENT { $$ = new VarExpression(#$, *$1); delete $1; }
| expression '.' T_IDENT { $$ = new StructMemberExpression(#$, $1, *$3); delete $3; }
| expression T_ARROW T_IDENT { $$ = new StructMemberExpression(#$, $1, *$3); delete $3; }
| expression '[' expression ']' { $$ = new ArraySubscriptExpression(#$, $1, $3); }
;
argument_list:
non_empty_argument_list { $$ = $1; }
| { $$ = new std::vector<Expression*>(); }
;
non_empty_argument_list:
non_empty_argument_list ',' expression { $$ = $1; $1->push_back($3); }
| expression { $$ = new std::vector<Expression*>({$1}); }
;
Related
flex/bison gives me a syntax error after printing the result and if another input is written to work
After running the compiler and type the entry on, works fine. Then if I type another entry (and if it also works ok) it gave me Syntax Error. I must mention I am a new in the world of flex/bison. To be honest I do not know what's could be wrong, some one please help? Here is my lex code: %{ #include <stdio.h> #include "calc.tab.h" void yyerror(char *); %} %option noyywrap DIGIT -?[0-9] NUM -?{DIGIT}+ %% {NUM} { yylval = atoi(yytext); return NUMBER; } [-()+*/;] { return *yytext; } "evaluar" { return EVALUAR; } [[:blank:]] ; \r {} . yyerror("caracter invalido"); %% and here it is my bison code: %{ #include <stdio.h> int yylex(void); void yyerror(char *s); %} %token NUMBER EVALUAR %start INICIO %left '+' '-' %left '*' '/' %% INICIO : EVALUAR '(' Expr ')' ';' { printf("\nResultado=%d\n", $3); } ; Expr : Expr '+' Expr { $$ = $1 + $3; } | Expr '-' Expr { $$ = $1 - $3; } | Expr '*' Expr { $$ = $1 * $3; } | Expr '/' Expr { $$ = $1 / $3; } | NUMBER { $$ = $1; } ; %% int main(){ return(yyparse()); } void yyerror(char *s){ printf("\n%s\n", s); } int yywrap(){ return 1; } Here is an example of the output: C:\Users\Uchih\Desktop\bison>a evaluar(2+3); Resultado=5 evaluar(3+2); syntax error
Your parser is written to accept a single input INICIO rule/clause, after which it will expect an EOF (and will exit after it sees it). Since instead you have a second INICIO, you get a syntax error message. To fix this, you want your grammar to accept one or more things. Add a rule like this: input: INICIO | input INICIO ; and change the start to %start input
Bison/Flext Concat Char *
I made a .l and a .y files for the parsing and the calculation result for my new language: it is working fine! A string like this: SET(IFEL(MAJEQ(IFEL(EQ(VAL(16),MUL(VAL(2),VAL(8))),VAL(11),VAL(10)),VAL(10)),MUL(VAL(3),VAL(4)),SUB(VAL(6),VAL(2)))) is correctly parsed and calculated by my two files: %{ #include <stdio.h> #include <string> #include <cstring> using namespace std; extern int yylex(); extern void yyerror(char*); %} //Symbols %union { char *str_val; int int_val; }; %token OPEN; %token CLOSE; %token SET; %token STORE; %token MUL; %token ADD; %token DIV; %token SUB; %token ABS; %token IFEL; %token AND; %token OR; %token NOT; %token MAJEQ; %token MINEQ; %token MAJ; %token MIN; %token EQ; %token GET; %token S; /* separator */ %token VAR; %token VAL; %token <int_val> NUMBER %token <str_val> IDENTIFIER %type <int_val> Exp %type <int_val> Cond %type <int_val> Check %type <int_val> Var %start Expression %% Expression: /* empty */ | SET OPEN Exp CLOSE { printf("value set %d\n",$3); } | STORE OPEN VAR OPEN IDENTIFIER CLOSE S Exp CLOSE { printf("var %s set on %d\n",$5,$8); } ; Exp: Var | IFEL OPEN Cond S Exp S Exp CLOSE { if($3==1){ $$ = $5; }else{ $$ = $7; } } | ADD OPEN Exp S Exp CLOSE { $$ = $3+$5; } | SUB OPEN Exp S Exp CLOSE { $$ = $3-$5; } | MUL OPEN Exp S Exp CLOSE { $$ = $3*$5; } | DIV OPEN Exp S Exp CLOSE { $$ = $3/$5; //TBD check div 0 } | ABS OPEN Exp CLOSE { $$ = $3; //TBD } ; Cond: NOT OPEN Cond CLOSE { int result = $3; if(result==1) $$ = 0; else $$ = 1; } | AND OPEN Cond S Cond CLOSE { int result1 = $3; int result2 = $5; if(result1==1 && result2==1) $$ = 1; else $$ = 0; } | OR OPEN Cond S Cond CLOSE { int result1 = $3; int result2 = $5; if(result1==1 || result2==1) $$ = 1; else $$ = 0; } | Check ; Check: MAJ OPEN Exp S Exp CLOSE { int val1 = $3; int val2 = $5; if(val1>val2) $$ = 1; else $$ = 0; } | MIN OPEN Exp S Exp CLOSE { int val1 = $3; int val2 = $5; if(val1<val2) $$ = 1; else $$ = 0; } | EQ OPEN Exp S Exp CLOSE { int val1 = $3; int val2 = $5; if(val1==val2) $$ = 1; else $$ = 0; } | MAJEQ OPEN Exp S Exp CLOSE { int val1 = $3; int val2 = $5; if(val1>=val2) $$ = 1; else $$ = 0; } | MINEQ OPEN Exp S Exp CLOSE { int val1 = $3; int val2 = $5; if(val1<=val2) $$ = 1; else $$ = 0; } ; Var: VAR OPEN IDENTIFIER CLOSE { $$ = atoi($3); //TBD } | VAL OPEN NUMBER CLOSE { $$ = $3; } | GET OPEN CLOSE { $$ = 11; //TBD } ; %% and %{ #include <string> #include "expression.tab.h" void yyerror(char*); extern void printVars(); int yyparse(void); %} %% [ \t\n]+ { /* ignore */ }; "(" return(OPEN); ")" return(CLOSE); "SET" return(SET); "STORE" return(STORE); "MUL" return(MUL); "ADD" return(ADD); "DIV" return(DIV); "SUB" return(SUB); "ABS" return(ABS); "IFEL" return(IFEL); "NOT" return(NOT); "AND" return(AND); "OR" return(OR); "MAJEQ" return(MAJEQ); "MINEQ" return(MINEQ); "MAJ" return(MAJ); "MIN" return(MIN); "EQ" return(EQ); "VAR" return(VAR); "VAL" return(VAL); "GET" return(GET); "," return(S); [[:digit:]]+ { yylval.int_val = atoi(yytext); return NUMBER;} [[:alnum:]]+ { yylval.str_val = strdup(yytext); return IDENTIFIER;} . return yytext[0]; %% void yyerror(char *s){ printf("<ERR> %s at %s in this line:\n", s, yytext); } int yywrap (void){ } int main(int num_args, char** args){ if(num_args != 2) {printf("usage: ./parser filename\n"); exit(0);} FILE* file = fopen(args[1],"r"); if(file == NULL) {printf("couldn't open %s\n",args[1]); exit(0);} yyin = file; yyparse(); fclose(file); } But actually the value inside Var in input as you can see will not be static but should be dynamic. So my next step is to modify the project: instead of calculating should write a C++ code in order to make the calculation dynamic. My questions: 1) do you have a better solution instead of concatenate every step a char * for making the code? 2) If not, can you help me to find a smart way to concatenate all the strings and solving the following error that I face while compiling: expression.y:75:43: error: invalid operands of types ‘const char [2]’ and ‘char*’ to binary ‘operator+’ $$ = "("+$3+"-"+$5+")"; ... I would like to don't use the "malloc" every time... char* str; str = malloc(1+strlen(text1)+strlen(text2)); strcpy(str, text1); strcat(str, text2); is there any smarter way? Following the flex and bison modified files: expression.l %{ #include <string> #include "expression.tab.h" void yyerror(char*); extern void printVars(); int yyparse(void); %} %% [ \t\n]+ { /* ignore */ }; "(" return(OPEN); ")" return(CLOSE); "SET" return(SET); "STORE" return(STORE); "MUL" return(MUL); "ADD" return(ADD); "DIV" return(DIV); "SUB" return(SUB); "ABS" return(ABS); "IFEL" return(IFEL); "NOT" return(NOT); "AND" return(AND); "OR" return(OR); "MAJEQ" return(MAJEQ); "MINEQ" return(MINEQ); "MAJ" return(MAJ); "MIN" return(MIN); "EQ" return(EQ); "VAR" return(VAR); "VAL" return(VAL); "GET" return(GET); "," return(S); ([a-z0-9]+)|([0-9]+.[0-9]+) { yylval.str_val = strdup(yytext); return IDENTIFIER;} . return yytext[0]; %% void yyerror(char *s){ printf("<ERR> %s at %s in this line:\n", s, yytext); } int yywrap (void){ } int main(int num_args, char** args){ if(num_args != 2) {printf("usage: ./parser filename\n"); exit(0);} FILE* file = fopen(args[1],"r"); if(file == NULL) {printf("couldn't open %s\n",args[1]); exit(0);} yyin = file; yyparse(); fclose(file); } expression.y %{ #include <stdio.h> #include <string> #include <cstring> using namespace std; extern int yylex(); extern void yyerror(char*); %} //Symbols %union { char *str_val; int int_val; }; %token OPEN; %token CLOSE; %token SET; %token STORE; %token MUL; %token ADD; %token DIV; %token SUB; %token ABS; %token IFEL; %token AND; %token OR; %token NOT; %token MAJEQ; %token MINEQ; %token MAJ; %token MIN; %token EQ; %token GET; %token S; /* separator */ %token VAR; %token VAL; %token <str_val> IDENTIFIER %type <str_val> Exp %type <str_val> Cond %type <str_val> Check %type <str_val> Var %start Expression %% Expression: /* empty */ | SET OPEN Exp CLOSE { printf("%s\n",$3); } | STORE OPEN VAR OPEN IDENTIFIER CLOSE S Exp CLOSE { printf("var %s with following code:\n%s\n",$5,$8); } ; Exp: Var | IFEL OPEN Cond S Exp S Exp CLOSE { $$ = "("+$3+" == 'true') ? ("+$5+") : ("+$7+")"; } | ADD OPEN Exp S Exp CLOSE { $$ = "("+$3+"+"+$5+")"; } | SUB OPEN Exp S Exp CLOSE { $$ = "("+$3+"-"+$5+")"; } | MUL OPEN Exp S Exp CLOSE { $$ = "("+$3+"*"+$5+")"; } | DIV OPEN Exp S Exp CLOSE { $$ = "("+$3+"/"+$5+")"; //TBD check div 0 } | ABS OPEN Exp CLOSE { $$ = "("+$3+">0) ? "+$3+" : "(+$3+"*(-1))"; } ; Cond: NOT OPEN Cond CLOSE { $$ = "("+$3+"=='true') ? 'false' : 'true'"; } | AND OPEN Cond S Cond CLOSE { $$ = "("+$3+"=='true' && "+$5+"=='true') ? 'true' : 'false'"; } | OR OPEN Cond S Cond CLOSE { $$ = "("+$3+"=='true' || "+$5+"=='true') ? 'true' : 'false'"; } | Check ; Check: MAJ OPEN Exp S Exp CLOSE { $$ = "("+$3+">"+$5+") ? 'true' : 'false'"; } | MIN OPEN Exp S Exp CLOSE { $$ = "("+$3+"<"+$5+") ? 'true' : 'false'"; } | EQ OPEN Exp S Exp CLOSE { $$ = "("+$3+"=="+$5+") ? 'true' : 'false'"; } | MAJEQ OPEN Exp S Exp CLOSE { $$ = "("+$3+">="+$5+") ? 'true' : 'false'"; } | MINEQ OPEN Exp S Exp CLOSE { $$ = "("+$3+"<="+$5+") ? 'true' : 'false'"; } ; Var: VAR OPEN IDENTIFIER CLOSE { //TBD check if variable exists in the engine $$ = $3; } | VAL OPEN IDENTIFIER CLOSE { //TBD check correct value $$ = $3; } | GET OPEN CLOSE { $$ = "getField()"; //TBD to implement in the engine } ; %%
It's difficult to do string concatenation without some form of memory allocation. Of course, it is possible avoid avoid malloc -- you could use new instead, or hide the memory allocation inside of a std::string or std::stringstream -- but in the end, you're going to have to deal with dynamic memory allocation, and furthermore with releasing the memory when you no longer need it. It's worth noting that your (correct) use of strdup in your scanner action for IDENTIFIER is a memory leak, because you never free the allocated memory. So you already need to deal with this issue. As you note, doing string concatenation in C can be pretty clunky. In a case like this, it's worth the trouble to reduce the clunkiness. My preferred solution is my wrapper function concatf, whose prototype is just like printf except that it returns a malloc'd character string instead of printing. (See this answer for implementations on various platforms). With the help of this function, it would be possible to write: Exp: Var | IFEL OPEN Cond S Exp S Exp CLOSE { $$ = concatf("(%s == 'true') ? (%s) : (%s)", $3, $5, $7); } Note that x == 'true' is not valid C++. You probably meant == true, but that's a dangerous idiom; better is an explicit cast to bool (although that's actually redundant in the context of the ternary operator), so I think you actually want $$ = concatf("bool(%s) ? (%s) : (%s)", $3, $5, $7); or just $$ = concatf("(%s) ? (%s) : (%s)", $3, $5, $7); But, as mentioned above, that results in memory leaks because the malloc'd strings are never freed. So let's fix that. First, in each action, it is necessary to explicitly free all malloc'd values which are never used again. In simple cases like yours, that will be all malloc'd values, except for unit productions in which the malloc'd value is just assigned to a different non-terminal. Since all IDENTIFIER have semantic values created by strdup, it's reasonable to assume that all str_val values have been malloc'd (and this needs to be a constraint; if you ever create a str_val value from a literal character string, you'll end up with a problem). Now, we can write the rule: Exp: Var { /* No free needed; this is a unit production */ } | IFEL OPEN Cond S Exp S Exp CLOSE { $$ = concatf("(%s) ? (%s) : (%s)", $3, $5, $7); free($3); free($5); free($7); } Another example. Note the added strdup in the last rule. Var: VAR OPEN IDENTIFIER CLOSE { $$ = $3; /* No free needed; value is moved on the stack */ } | VAL OPEN IDENTIFIER CLOSE { $$ = $3; /* As above */ } | GET OPEN CLOSE { $$ = strdup("getField()"); /* str_val's must be malloc'd */ } ; (There are alternatives to calling strdup on literals, but usually the use case is uncommon, and the overhead is slight.) That style will handle all cases where rule actions are executed, but there are also occasions when bison will discard values from the stack without every invoking a rule. That will happen during error recovery, and at the end of an unsuccessful parse when the parser stack is non-empty. To assist with this case, bison lets you declare a destructor action, which will be invoked on each stack value which it discards. In this case, the declaration is almost trivial: %destructor { free($$); } <str_val>
Well... I solved the issue in this way: ... Exp: Var | IFEL OPEN Cond S Exp S Exp CLOSE { string t1 = $3; string t2 = $5; string t3 = $7; string result = "("+t1+" == 'true') ? ("+t2+") : ("+t3+")"; $$ = (char*)result.c_str(); } ... It is working fine...
Flex/Bison not evaluating properly
For some reason or another, bison doesn't want to do any evaluation. Compilation of all files goes smoothly and the program runs. When I enter the expression 4+5 and press return, it creates tokens for 4 + 5 respectively. I can even put in some printf into the places where bison recognizes the attributes of each token including the plus (43). However the program never evaluates this production expr '+' term { $$ = $1 + $3; }. It's simply never called at least to my knowledge and even if it was this production assign '\n' { printf("%d\n", $1); } never prints out the value. Upon ^D to quit, it fires void yyerror(const char *). Any help on this matter is much appreciated. Thanks! //FLEX %{ //#include <stdio.h> #include "y.tab.h" %} %option noyywrap letter [A-Za-z] digit [0-9] space [ \t] var {letter} int {digit}+ ws {space}+ %% {var} { yylval = (int)yytext[0]; return VAR; } {int} { yylval = atoi(yytext); return CONST; } {ws} { } . { return (int)yytext[0]; } %% /* nothing */ . //BISON %{ //INCLUDE //#include <ctype.h> //DEFINE #define YYDEBUG 1 //PROTOTYPE void yyerror(const char *); void print_welcome(); int get_val(int); void set_val(int, int); %} %token CONST %token VAR %% session : { print_welcome(); } eval ; eval : eval line | ; line : assign '\n' { printf("%d\n", $1); } ; assign : VAR '=' expr { set_val($1, $3); $$ = $3; } | expr { $$ = $1; } ; expr : expr '+' term { $$ = $1 + $3; } | expr '-' term { $$ = $1 - $3; } | term { $$ = $1; } ; term : term '*' factor { $$ = $1 * $3; } | term '/' factor { $$ = $1 / $3; } | term '%' factor { $$ = $1 % $3; } | factor { $$ = $1; } ; factor : '(' expr ')' { $$ = $2; } | CONST { $$ = $1; } | VAR { $$ = get_val($1); } ; %% void yyerror(const char * s) { fprintf(stderr, "%s\n", s); } void print_welcome() { printf("Welcome to the Simple Expression Evaluator.\n"); printf("Enter one expression per line, end with ^D\n\n"); } static int val_tab[26]; int get_val(int var) { return val_tab[var - 'A']; } void set_val(int var, int val) { val_tab[var - 'A'] = val; } . //MAIN //PROTOTYPE int yyparse(); int main() { extern int yydebug; yydebug = 0; yyparse(); return 0; }
Your lex file does not have any rule which matches \n, because in lex/flex, . matches any character except line-end. The default rule for lex (or flex) echoes and otherwise ignores the matched character, so that's what happens to the \n. Since the parser won't be able to accept a line unless it sees a \n token, it will eventually be forced to present you with a syntax error. So you need to change the rule . { return (int)yytext[0]; } to .|\n { return (int)yytext[0]; } (I wouldn't have bothered with the cast to int but it's certainly not doing any harm, so I left it in.)
printing tokens using lex and yacc
I have a lex file , a yacc file and main.cpp file. My main.cpp looks like int main(int argc, char **argv) { if (argc == 1) { int token; curr_filename = "<stdin>"; yyin = stdin; yyparse(); } else { for (int i = 1; i < argc; ++i) { curr_filename = argv[i]; yyin = std::fopen(argv[i], "r"); if (yyin) { yyparse(); std::fclose(yyin); } else { utility::print_error(argv[i], "cannot be opened"); } } } if (yynerrs > 0) { std::cerr << "Compilation halted due to lexical or syntax errors.\n"; exit(1); } This is helping to do a parse .But now i want to print token generated from the lex file also. So i do a little amendment to it by calling yylex() as follows int main(int argc, char **argv) { if (argc == 1) { int token; curr_filename = "<stdin>"; yyin = stdin; // calling yylex to get token while(token= yylex()) { switch(token){ case 258 : std::cout << "class" ; default : std::cout << "token " ; } yyparse(); } //rest of the code same but nothing is getting printed to output. Any help how to get tokens printed on standard output or file flex file %option noyywrap %option yylineno %{ #include "flexbison.hpp" #include "tokentable.hpp" #include "symboltable.hpp" #include "y.tab.h" #include <stdio.h> #define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno; static const int MAX_STR_CONST = 1025; char string_buf[MAX_STR_CONST]; // buffer to store string contstants encountered in source file char *string_buf_ptr; int num_comment = 0; // count to keep track how many opening comment tokens have been encountered std::size_t curr_lineno = 0; // keep track of current line number of source file bool str_too_long = false; // used to handle string constant size error check %} %x COMMENT %x LINECOMMENT %x STRING DARROW => %% "(*" { BEGIN(COMMENT); num_comment++; } "*)" { if (num_comment <= 0) { yylval.error_msg = "Unmatched *)"; return ERROR; } } <COMMENT>"*)" { num_comment--; if (num_comment < 0) { yylval.error_msg = "Unmatched *)"; return ERROR; } if (num_comment == 0) { BEGIN(INITIAL); } } <COMMENT>"(*" { num_comment++; } <COMMENT>[^\n] { // eat everything within comments } <COMMENT>\n { ++curr_lineno; } "--"[^\n]* { BEGIN(LINECOMMENT); } <LINECOMMENT>\n { ++curr_lineno; BEGIN(INITIAL); } <COMMENT><<EOF>> { BEGIN(INITIAL); yylval.error_msg = "EOF in comment"; return ERROR; } "=>" { return DARROW; } (?i:class) { return CLASS; } (?i:else) { return ELSE; } (?i:in) { return IN; } (?i:then) { return THEN; } (?i:fi) { return FI; } (?i:if) { return IF; } (?i:inherits) { return INHERITS; } (?i:let) { return LET; } (?i:loop) { return LOOP; } (?i:pool) { return POOL; } (?i:while) { return WHILE; } (?i:case) { return CASE; } (?i:esac) { return ESAC; } (?i:of) { return OF; } (?i:new) { return NEW; } (?i:isvoid) { return ISVOID; } (?i:not) { return NOT; } t(?i:rue) { yylval.boolean = true; return BOOL_CONST; } f(?i:alse) { yylval.boolean = false; return BOOL_CONST; } [0-9]+ { yylval.symbol = inttable().add(yytext); return INT_CONST; } "<=" { return LE; } "<-" { return ASSIGN; } [A-Z][a-zA-Z0-9_]* { yylval.symbol = idtable().add(yytext); return TYPEID; } [a-z][a-zA-Z0-9_]* { yylval.symbol = idtable().add(yytext); return OBJECTID; } ";"|","|"{"|"}"|":"|"("|")"|"+"|"-"|"*"|"/"|"="|"~"|"<"|"."|"#" { return *yytext; } \n { ++curr_lineno; } [ \f\r\t\v] { // eat whitespace } /* * String constants (C syntax) * Escape sequence \c is accepted for all characters c. Except for * \n \t \b \f, the result is c. * */ \" { BEGIN(STRING); string_buf_ptr = string_buf; memset(string_buf, 0, MAX_STR_CONST); } <STRING>\" { BEGIN(INITIAL); yylval.symbol = stringtable().add(string_buf); return STR_CONST; } <STRING>\0[^\n]*\" { BEGIN(INITIAL); if (str_too_long) { str_too_long = false; } else { yylval.error_msg = "String contains null character"; return ERROR; } } <STRING>\0[^"]*\n { if (str_too_long) { yyinput(); /* eat quote */ BEGIN(INITIAL); str_too_long = false; } else { if (yytext[yyleng - 1] != '\\') { BEGIN(INITIAL); yylval.error_msg = "String contains null character"; return ERROR; } } } <STRING><<EOF>> { BEGIN(INITIAL); yylval.error_msg = "EOF in string constant"; return ERROR; } <STRING>\\ { if (strlen(string_buf) >= MAX_STR_CONST - 1) { str_too_long = true; unput('\0'); yylval.error_msg = "String constant too long"; return ERROR; } char ahead = yyinput(); switch (ahead) { case 'b': *string_buf_ptr++ = '\b'; break; case 't': *string_buf_ptr++ = '\t'; break; case 'n': *string_buf_ptr++ = '\n'; break; case 'f': *string_buf_ptr++ = '\f'; break; case '\n': ++curr_lineno; *string_buf_ptr++ = '\n'; break; case '\0': unput(ahead); break; default: *string_buf_ptr++ = ahead; } } <STRING>\n { ++curr_lineno; BEGIN(INITIAL); yylval.error_msg = "Unterminated string constant"; return ERROR; } <STRING>. { if (strlen(string_buf) >= MAX_STR_CONST - 1) { str_too_long = true; unput('\0'); yylval.error_msg = "String constant too long"; return ERROR; } *string_buf_ptr++ = *yytext; } . /* error for invalid tokens */ { yylval.error_msg = std::string(yytext) + " is not a valid character in the current context."; return ERROR; } %% bison file %{ #include "flexbison.hpp" #include "symboltable.hpp" #include "tokentable.hpp" #include "ast.hpp" #include <iostream> // convinience function for setting location of each ast node #define SETLOC(lval,node) (lval)->setloc((node).first_line, curr_filename) // both defined in main.cpp extern ProgramPtr ast_root; extern std::string curr_filename; // both defined in lexer extern int yylex(); extern int yylineno; void yyerror(char *); %} %token CLASS 258 ELSE 259 FI 260 IF 261 IN 262 %token INHERITS 263 LET 264 LOOP 265 POOL 266 THEN 267 WHILE 268 %token CASE 269 ESAC 270 OF 271 DARROW 272 NEW 273 ISVOID 274 %token <symbol> STR_CONST 275 INT_CONST 276 %token <boolean> BOOL_CONST 277 %token <symbol> TYPEID 278 OBJECTID 279 %token ASSIGN 280 NOT 281 LE 282 ERROR 283 %type <program> program %type <clazz> class %type <classes> class_list %type <attribute> attribute %type <attributes> attribute_list %type <method> method %type <methods> method_list %type <expression> expression %type <expression> let_expr %type <expressions> expression_list %type <expressions> method_expr_list %type <formal> formal %type <formals> formal_list %type <branch> case %type <cases> case_list %nonassoc '=' %left LET %right ASSIGN %left NOT %left '+' '-' %left '*' '/' %left ISVOID %left '~' %left '#' %left '.' %nonassoc LE '<' %% program : class_list { #$ = #1; ast_root = std::make_shared<Program>($1); } ; class_list : class { $$ = Classes(); $$.push_back($1); } | class_list class { $$.push_back($2); } ; class : CLASS TYPEID '{' attribute_list method_list '}' ';' { $$ = std::make_shared<Class>($2, idtable().add("Object"), $4, $5); SETLOC($$, #1); } | CLASS TYPEID INHERITS TYPEID '{' attribute_list method_list '}' ';' { $$ = std::make_shared<Class>($2, $4, $6, $7); SETLOC($$, #1); } | error ';' { yyerrok; } ; attribute_list : attribute ';' { $$ = Attributes(); $$.push_back($1); } | attribute_list attribute ';' { $$.push_back($2); } | error ';' { yyerrok; } ; attribute : OBJECTID ':' TYPEID { $$ = std::make_shared<Attribute>($1, $3, std::make_shared<NoExpr>()); SETLOC($$, #1); } | OBJECTID ':' TYPEID ASSIGN expression { $$ = std::make_shared<Attribute>($1, $3, $5); SETLOC($$, #5); } ; method_list : method ';' { $$ = Methods(); $$.push_back($1); } | method_list method ';' { $$.push_back($2); } | error ';' { yyerrok; } ; method : OBJECTID '(' formal_list ')' ':' TYPEID '{' expression '}' { $$ = std::make_shared<Method>($1, $6, $3, $8); SETLOC($$, #1); } | OBJECTID '(' ')' ':' TYPEID '{' expression '}' { $$ = std::make_shared<Method>($1, $5, Formals(), $7); SETLOC($$, #1); } ; formal_list : formal { $$ = Formals(); $$.push_back($1); } | formal_list ',' formal { $$.push_back($3); } ; formal : OBJECTID ':' TYPEID { $$ = std::make_shared<Formal>($1, $3); SETLOC($$, #1); } ; case_list : case { $$ = Cases(); $$.push_back($1); } | case_list case { $$.push_back($2); } ; case : OBJECTID ':' TYPEID DARROW expression ';' { $$ = std::make_shared<CaseBranch>($1, $3, $5); SETLOC($$, #5); } ; method_expr_list : expression { $$ = Expressions(); $$.push_back($1); } | method_expr_list ',' expression { $$.push_back($3); } ; expression_list : expression ';' { $$ = Expressions(); $$.push_back($1); } | expression_list expression ';' { $$.push_back($2); } | error ';' { yyerrok; } ; let_expr : OBJECTID ':' TYPEID IN expression %prec LET { $$ = std::make_shared<Let>($1, $3, std::make_shared<NoExpr>(), $5); SETLOC($$, #5); } | OBJECTID ':' TYPEID ASSIGN expression IN expression %prec LET { $$ = std::make_shared<Let>($1, $3, $5, $7); SETLOC($$, #5); } | OBJECTID ':' TYPEID ',' let_expr { $$ = std::make_shared<Let>($1, $3, std::make_shared<NoExpr>(), $5); SETLOC($$, #5); } | OBJECTID ':' TYPEID ASSIGN expression ',' let_expr { $$ = std::make_shared<Let>($1, $3, $5, $7); SETLOC($$, #4); } | error ',' let_expr { yyerrok; } ; expression : OBJECTID ASSIGN expression { $$ = std::make_shared<Assign>($1, $3); SETLOC($$, #3); } | expression '.' OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<DynamicDispatch>($1, $3, $5); SETLOC($$, #1); } | expression '.' OBJECTID '(' ')' { $$ = std::make_shared<DynamicDispatch>($1, $3, Expressions()); SETLOC($$, #1); } | expression '#' TYPEID '.' OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<StaticDispatch>($1, $3, $5, $7); SETLOC($$, #1); } | expression '#' TYPEID '.' OBJECTID '(' ')' { $$ = std::make_shared<StaticDispatch>($1, $3, $5, Expressions()); SETLOC($$, #1);} | OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<DynamicDispatch>(std::make_shared<Object>(idtable().add("self")), $1, $3); SETLOC($$, #1); } | OBJECTID '(' ')' { $$ = std::make_shared<DynamicDispatch>(std::make_shared<Object>(idtable().add("self")), $1, Expressions()); SETLOC($$, #1); } | IF expression THEN expression ELSE expression FI { $$ = std::make_shared<If>($2, $4, $6); SETLOC($$, #2); } | WHILE expression LOOP expression POOL { $$ = std::make_shared<While>($2, $4); SETLOC($$, #2); } | '{' expression_list '}' { $$ = std::make_shared<Block>($2); SETLOC($$, #2); } | LET let_expr { $$ = $2; SETLOC($$, #2); } | CASE expression OF case_list ESAC { $$ = std::make_shared<Case>($2, $4); SETLOC($$, #2); } | NEW TYPEID { $$ = std::make_shared<New>($2); SETLOC($$, #2); } | ISVOID expression { $$ = std::make_shared<IsVoid>($2); SETLOC($$, #2); } | expression '+' expression { $$ = std::make_shared<Plus>($1, $3); SETLOC($$, #1); } | expression '-' expression { $$ = std::make_shared<Sub>($1, $3); SETLOC($$, #1); } | expression '*' expression { $$ = std::make_shared<Mul>($1, $3); SETLOC($$, #1); } | expression '/' expression { $$ = std::make_shared<Div>($1, $3); SETLOC($$, #1); } | '~' expression { $$ = std::make_shared<Complement>($2); SETLOC($$, #2); } | expression '<' expression { $$ = std::make_shared<LessThan>($1, $3); SETLOC($$, #1); } | expression LE expression { $$ = std::make_shared<LessThanEqualTo>($1, $3); SETLOC($$, #1); } | expression '=' expression { $$ = std::make_shared<EqualTo>($1, $3); SETLOC($$, #1); } | NOT expression { $$ = std::make_shared<Not>($2); SETLOC($$, #2); } | '(' expression ')' { $$ = $2; SETLOC($$, #2); } | OBJECTID { $$ = std::make_shared<Object>($1); SETLOC($$, #1); } | INT_CONST { $$ = std::make_shared<IntConst>($1); SETLOC($$, #1); } | STR_CONST { $$ = std::make_shared<StringConst>($1); SETLOC($$, #1); } | BOOL_CONST { $$ = std::make_shared<BoolConst>($1); SETLOC($$, #1); } ; %% // utility function for converting bison tokens to its string representation // for better error reporting std::string convert_token(int token) { std::string rep; switch (token) { case CLASS: rep = "class"; break; case ELSE: rep = "else"; break; case FI: rep = "fi"; break; case IF: rep = "if"; break; case IN: rep = "in"; break; case INHERITS: rep = "inherits"; break; case LET: rep = "let"; break; case LOOP: rep = "loop"; break; case POOL: rep = "pool"; break; case THEN: rep = "then"; break; case WHILE: rep = "while"; break; case CASE: rep = "case"; break; case ESAC: rep = "esac"; break; case OF: rep = "of"; break; case DARROW: rep = "=>"; break; case NEW: rep = "new"; break; case ISVOID: rep = "isvoid"; break; case ASSIGN: rep = "<-"; break; case NOT: rep = "not"; break; case LE: rep = "<="; break; case STR_CONST: rep = "STR_CONST = " + yylval.symbol.get_val(); break; case INT_CONST: rep = "INT_CONST = " + yylval.symbol.get_val(); break; case BOOL_CONST: rep = "BOOL_CONST = " + yylval.boolean; break; case TYPEID: rep = "TYPEID = " + yylval.symbol.get_val(); break; case OBJECTID: rep = "OBJECTID = " + yylval.symbol.get_val(); break; default: rep = (char) token; } return rep; } void yyerror(char *) { if (yylval.error_msg.length() <= 0) std::cerr << curr_filename << ":" << yylineno << ": " << "error: " << "syntax error near or at character or token '" << convert_token(yychar) << "'\n"; else std::cerr << curr_filename << ":" << yylineno << ": " << "error: " << yylval.error_msg << "\n"; }
I'm not sure why you don't see any output, but I didn't look through all that code. If you call yylex from main, that will read and effectively discard one token. Then when you call yyparse, yyparse will call yylex itself until yylex returns 0. Presumably (but not certainly), the next time you call yylex from the while loop in main, it will again return 0 and the loop will end. The result should be that one word is printed from the while loop, followed by whatever output is produced by your yyparse (if any), which will possibly signal a syntax error since it never sees the first token from the input. I doubt that is what you wanted to do, but it's not totally clear. If you want to see the tokens as they are being lexed, then insert the statement to print the token in each lex action. Or tell flex to call the scanning function something else, like yylex_internal and create your own function called yylex() which calls yylex_internal and then prints the result before returning it. If, as seems likely, you are only interested in this for debugging purposes, then you'd probably be better off using the -d command line option to flex, which will generate debugging output automatically. It might not be exactly the debugging format you want, but it's a lot easier to do and undo :) To change the name of the yylex function generated by flex, insert something like the following in the code block at the top of the .l file: #define YY_DECL int yylex_internal() The flex-generated file declares the scanning function as follows: YY_DECL { /* body of function } So you can rename the function or add arguments, or even change the return type by defining the YY_DECL macro. See the Generated Scanner section of the flex manual. By the way, it's not generally considered good style to manually number all the terminal tokens, even though bison allows you to do it. You should just let bison number them, and include the definitions in a source file by #include "y.tab.h" (or whatever you've called the bison header file; you can easily change the name by using the -o option).
Bison %prec doesn't work
I'm implementing a simple calculator with flex and bison. I'd like the following input to give -4 and not 4: -2^2 In order to achieve -4, I had to declare the priority of ^ operator to be higher than the priority of the unary minus operator, but it doesn't work. This is the bison code: %{ #include <iostream> #include <math.h> using namespace std; void yyerror(const char *s); int yylex(); %} %union { int int_val; char* string_val; double double_val; } %token INTEGER %left '+' '-' %left '*' '/' '%' %left UMINUS UPLUS %right '^' %type <int_val> expr_int INTEGER %% program: line '\n' | '\n' { return 0; } ; line: expr_int { cout<<$1<<endl; return 0; } ; expr_int: expr_int '+' expr_int { $$ = $1 + $3; } | expr_int '-' expr_int { $$ = $1 - $3; } | expr_int '*' expr_int { $$ = $1 * $3; } | expr_int '^' expr_int { $$ = pow($1,$3); } | '-' INTEGER %prec UMINUS { $$ = -$2; } | '+' INTEGER %prec UPLUS { $$ = $2; } | INTEGER ; %% void yyerror(const char *s) { printf("error"); } int main(void) { while(yyparse()==0); return 0; } And this is the flex code: %{ #include <iostream> #include "calc.tab.h" using namespace std; void yyerror(const char *s); %} INTEGER [1-9][0-9]*|0 UNARY [+|\-] BINARY [+|\-|*|^|] WS [ \t]+ %% {INTEGER} { yylval.int_val=atoi(yytext); return INTEGER; } {UNARY}|{BINARY}|\n { return *yytext; } {WS} {} . {} %% ////////////////////////////////////////////////// int yywrap(void) { return 1; } // Callback at end of file Why doesnt bison first handle 2^2 and then adds the unary minus, like I defined? It keep printing 4 instead... Thanks alot for the helpers.
Your syntax for unary minus: '-' INTEGER %prec UMINUS does not allow its argument to be an expression. So it unambiguously grabs the following INTEGER and the %prec rule is never needed. <personal_opinion> The problem with %prec is that yacc/bison does not complain if the rule is not needed. So you never really know if it does anything or not. IMHO it's really better to just write an unambiguous grammar.