A parser program for the following grammar - regex

Write a parser (both Yacc and Lex files) that uses the following productions and actions:
S -> cSS {print “x”}
S -> a {print “y”}
S -> b {print “z”}
Indicate the string that it will print when the input is cacba.
I am getting this error: when I give input to it, it says valid input and also says syntax error.
My Scanner Code is this
%{
#include "prac.h"
%}
%%
[c] {return C; }
[a] {return A; }
[b] {return B; }
[ \t] ;
\n { return 0; }
. { return yytext[0]; }
%%
int yywrap(void) {
return 1;
}
And my yacc code is this:
%{
#include <stdio.h>
%}
%token A B C
%%
statement: S {printf("Valid Input"); }
;
S: C S S {printf("Print x\n");}
| A {printf("Print y\n");}
| B {printf("Print z\n");}
;
%%
int main()
{
return yyparse();
}
yyerror(char *s)
{
printf("\n%s\n",s);
printf("Invalid Input");
fprintf(stderr,"At line %d %s ",s,yylineno);
}
How can I fix this?

(Comments converted to an answer)
#ChrisDodd wrote:
Best guess -- you're running on windows, so you're getting a \r (carriage return) character before the newline which is causing your error. Try adding \r to the [ \t] pattern to ignore it.
#Cyclone wrote:
Change your fprintf() statement to fprintf(stderr, "At line %d %s", yylineno, s); not that it will solve your problem.
The OP wrote:
You mean I should add \r into \t so the new regex for it will be [\r\t] Am I right ?
#rici wrote:
#chris suggests [ \r\t]. If you have Windows somewhere in the loop, I agree.

Related

bison parse error in runtime confusion

I'm working on vmWare linux ubuntu on a bison-dlex project, and I have an error in my bison file which I can't get over. In my "line' definition I have " logExp '\n' " definition, but for some reason it never gets there even though it does recognize the expression as logExp.
line:
expr '\n' { printf("\nExpression = %d\n", $1); }
| logExp '\n' { printf("\nNEVER GETS HERE!!\n"); } //ERROR
;
logExp:
expr AND expr { $$ = 0 ; printf("\n$1=%d, $3=%d\n",$1,$3); } //PRINTS GOOD
| AND { }
;
input:
5&&6
output:
$1=5, $3=6
Error: parse error
If it recognizes the logExp, how come it doesn't recognize the line above??
..HELP ?

Useless rule in Bison

For some reason bison is rejecting a specific rule, the notequal_expression, beware that Im just starting to learn the whole concept so my line of thought is not so mature, the input file: ( The Error is: "string.y contains 1 useless nonterminal and 1 useless rule." )
/* Parser for StringC */
%{
/* ------------------------------------------------------------------
Initial code (copied verbatim to the output file)
------------------------------------------------------------------ */
// Includes
#include <malloc.h> // _alloca is used by the parser
#include <string.h> // strcpy
#include "lex.h" // the lexer
// Some yacc (bison) defines
#define YYDEBUG 1 // Generate debug code; needed for YYERROR_VERBOSE
#define YYERROR_VERBOSE // Give a more specific parse error message
// Error-reporting function must be defined by the caller
void Error (char *format, ...);
// Forward references
void yyerror (char *msg);
%}
/* ------------------------------------------------------------------
Yacc declarations
------------------------------------------------------------------ */
/* The structure for passing value between lexer and parser */
%union {
char *str;
}
%token ERROR_TOKEN IF ELSE PRINT INPUT ASSIGN EQUAL NOTEQUAL
%token CONCAT END_STMT OPEN_PAR CLOSE_PAR
%token BEGIN_CS END_CS
%token <str> ID STRING BOOLEAN
/*%type <type> type simple_type cast*/
%expect 1 /* shift/reduce conflict: dangling ELSE */
/* declaration */
%%
/* ------------------------------------------------------------------
Yacc grammar rules
------------------------------------------------------------------ */
program
: statement_list
;
statement_list
: statement_list statement
| /* empty */
;
statement
: END_STMT {puts ("Empty statement");}
| expression END_STMT {puts ("Expression statement");}
| PRINT expression END_STMT {puts ("Print statement");}
| INPUT identifier END_STMT {puts ("Input statement");}
| if_statement {puts ("If statement");}
| compound_statement {puts ("Compound statement");}
| error END_STMT {puts ("Error statement");}
| notequal_expression {puts ("Not equal statement");}
;
/* NOTE: This rule causes an unresolvable shift/reduce conflict;
That's why %expect 1 was added (see above) */
if_statement
: IF OPEN_PAR expression CLOSE_PAR statement optional_else_statement
;
optional_else_statement
: ELSE statement
| /* empty */
;
compound_statement
: BEGIN_CS statement_list END_CS
;
expression
: equal_expression
| OPEN_PAR expression CLOSE_PAR
;
equal_expression
: expression EQUAL assign_expression
| assign_expression
;
notequal_expression
: expression NOTEQUAL assign_expression
| NOTEQUAL assign_expression
;
assign_expression
: identifier ASSIGN assign_expression
| concat_expression
;
concat_expression
: concat_expression CONCAT simple_expression
| simple_expression
;
simple_expression
: identifier
| string
;
identifier
: ID {}
;
string
: STRING {}
;
bool
: BOOLEAN {}
;
%%
/* ------------------------------------------------------------------
Additional code (again copied verbatim to the output file)
------------------------------------------------------------------ */
The lexer:
/* Lexical analyzer for StringC */
%{
/* ------------------------------------------------------------------
Initial code (copied verbatim to the output file)
------------------------------------------------------------------ */
// Includes
#include <string.h> // strcpy, strncpy
#include <io.h> // isatty
#ifdef MSVC
#define isatty _isatty // for some reason isatty is called _isatty in VC..
#endif
#define _LEX_CPP_ // make sure our variables get created
#include "lex.h"
#include "lexsymb.h"
extern "C" int yywrap (); // the yywrap function is declared by the caller
// Forward references
void Identifier ();
void StringConstant ();
void BoolConstant ();
void EatComment ();
//// End of inititial code
%}
/* ------------------------------------------------------------------
Some macros (standard regular expressions)
------------------------------------------------------------------ */
LETTER [a-zA-Z_]
DIGIT [0-9]
IDENT {LETTER}({LETTER}|{DIGIT})*
STR \"[^\"]*\"
BOOL \(false|true)\
WSPACE [ \t]+
/* ------------------------------------------------------------------
The lexer rules
------------------------------------------------------------------ */
%%
"if" {return IF;}
"else" {return ELSE;}
"print" {return PRINT;}
"input" {return INPUT;}
"=" {return ASSIGN;}
"==" {return EQUAL;}
"!=" {return NOTEQUAL;} /* Not equal to */
"+" {return CONCAT;}
";" {return END_STMT;}
"(" {return OPEN_PAR;}
")" {return CLOSE_PAR;}
"{" {return BEGIN_CS;}
"}" {return END_CS;}
{BOOL} {BoolConstant (); return BOOLEAN;}
{STR} {StringConstant (); return STRING;}
{IDENT} {Identifier (); return ID;}
"//" {EatComment();} /* comment: skip */
\n {lineno++;} /* newline: count lines */
{WSPACE} {} /* whitespace: (do nothing) */
. {return ERROR_TOKEN;} /* other char: error, illegal token */
%%
/* ------------------------------------------------------------------
Additional code (again copied verbatim to the output file)
------------------------------------------------------------------ */
// The comment-skipping function: skip to end-of-line
void EatComment() {
char c;
while ((c = yyinput()) != '\n' && c != 0);
lineno++;
}
// Pass the id name
void Identifier () {
yylval.str = new char[strlen(yytext)+1];
strcpy (yylval.str, yytext);
}
// Pass the string constant
void StringConstant() {
int l = strlen(yytext)-2;
yylval.str = new char[l+1];
strncpy (yylval.str, &yytext[1], l); yylval.str[l] = 0;
}
void BoolConstant() {
int l = strlen(yytext)-2;
yylval.str = new char[l+1];
strncpy(yylval.str, &yytext[1], l); yylval.str[l] = 0;
}
Are you sure that it's notequal_expression that is causing the issue? The nonterminal and rule that are not used, as I read it, are
bool
: BOOLEAN {}
;
Perhaps instead of
simple_expression
: identifier
| string
;
you intended to code
simple_expression
: identifier
| string
| bool
;
There are two problems with the grammar. The first is the shift/reduce conflict you've already seen (and addressed with %expect 1. I prefer to address it in the grammar instead and use %expect 0 instead. You can do that by removing ELSE from the %token list and adding a line
%right THEN ELSE
To declare right associativity. Your language doesn't actually have a THEN keyword but that's fine. You can then remove completely the rule for optional_else_statement and reword the rule for if_statement as follows:
if_statement
: IF OPEN_PAR expression CLOSE_PAR statement %prec THEN
| IF OPEN_PAR expression CLOSE_PAR statement ELSE statement
;
There are those who prefer to resolve it this way, and others who advocate the %expect 1 approach. I prefer this way, but now that you have both methods, you can certainly choose for yourself.
For the other problem, the useless rule is definitely this one:
bool
: BOOLEAN {}
;
because the non-terminal bool is not used anywhere else in the grammar. This accounts for both "1 useless nonterminal and 1 useless rule" as reported by bison. To be able to identify this kind of thing for yourself, you can use
bison --report=solved -v string.y
This will create a string.output file which will contain a large but readable report including any resolved shift-reduce errors (such as your IF-ELSE construction) and also a complete set of states created by bison. It's very useful when attempting to troubleshoot grammar problems.

Shift/Reduce conflict in bison

I was trying to do a simple parsing of general html codes.
Here's my entire bison file (example4.y).
%{
#include <iostream>
#include <cstring>
using namespace std;
extern "C" int yylex();
extern "C" int yyparse();
extern "C" FILE *yyin;
void yyerror(const char *str)
{
cout<<"Error: "<<str<<"\n";
}
int yywrap()
{
return 0;
}
main()
{
yyparse();
}
%}
%token NUMBER LANGLE CLOSERANGLE RANGLE SLASH ANYTHING
%union
{
int intVal;
float floatVal;
char *strVal;
}
%%
tag: |
opening_tag anything closing_tag
{
if(strcmp($<strVal>1,$<strVal>3)==0){
cout<<"\n[i] Tag Matches: "<<$<strVal>1;
cout <<"\n[!] The text: "<<$<strVal>2;
} else {
cout<<"\n[!] Tag Mismatch: "<<$<strVal>1<<" and "<<$<strVal>3;
}
$<strVal>$ = $<strVal>2;
}
|
opening_tag tag closing_tag
{
if(strcmp($<strVal>1,$<strVal>3)==0){
cout<<"\n[i] Tag Matches: "<<$<strVal>1;
cout <<"\n[!] The text: "<<$<strVal>2;
} else {
cout<<"\n[!] Tag Mismatch: "<<$<strVal>1<<" and "<<$<strVal>3;
}
}
;
opening_tag:
LANGLE ANYTHING RANGLE
{
$<strVal>$ = $<strVal>2;
}
;
anything:
ANYTHING
{
$<strVal>$ = $<strVal>1;
}
;
closing_tag:
LANGLE SLASH ANYTHING RANGLE
{
$<strVal>$= $<strVal>3;
}
%%
The error i get is: example4.y: conflicts: 1 shift/reduce
I think it has to do something with opening_tag tag closing_tag but i could not think what's happening here?
Any help?
It's because of two rules that start with opening_tag. The parser has to decide between the rules by lookint at most one token ahead, but it cannot. <FOO> may lead to either rule, and this requires two more tokens of lookahead.
You can do this:
tag : /* nothing */
| opening_tag contents closing_tag
;
contents: tag
| anything
;
UPDATE This new grammar has a different shift/reduce conflict. (UPDATE2: or perhaps it's the same one). Because a tag can be empty, the parser cannot decide what to do at this input:
<Foo> <...
^
|
input is here
If the next symbol is a slash, then we have a closing tag, and the empty tag rule should be matched. If the next symbol is not a slash, then we have an opening tag, and the non-empty tag rule should be matched. But the parser cannot know, it is only allowed to look at <.
The solution would be to create a new token, LANGLE_SLASH, for the </ combination.
The problem is that tag can be empty, so that <x>< might be the beginning of opening_tag tag closing_tag or of opening_tag opening_tag. Consequently, bison cannot tell whether to reduce an empty tag before shifting the <.
You should be able to fix it by removing the empty production for tag and adding an explicit production for opening_tag closing_tag.

Generate Parse Tree in c

The following code is supposed to generate a parse tree of the input expression, but the problem is that the output E,T,F,S (functions used in the code). I want it to be something like:
a+b*c => E*c => E+b*c => a+b*c
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
char next;
void E(void);void T(void);
void S(void);void F(void);
void error(int);void scan(void);
void enter(char);
void leave(char);
void spaces(int);
int level = 0;
//The main should always be very simple
//First scan the string
//second check for end of string reached , if yes success and if not error.
//P ---> E '#'
int main(void){
printf("Input:");
scan(); E();
if (next != '#') error(1);
else printf("***** Successful parse *****\n");
}
//E ---> T {('+'|'-') T}
void E(void){
enter('E');
T();
while (next == '+' || next == '-') {
scan();
T();
}
leave('E');
}
//T ---> S {('*'|'/') S}
void T(void)
{
enter('T'); S();
while (next == '*' || next == '/') {
scan(); S();
}
leave('T');
}
//S ---> F '^' S | F
void S(void)
{
enter('S'); F();
if (next == '^') {
scan(); S();
}
leave('S');
}
//F ---> char | '(' E ')'
void F(void)
{
enter('F');
if (isalpha(next))
{
scan();
}
else if (next == '(') {
scan(); E();
if (next == ')')
scan();
else
error(2);
}
else {
error(3);
}
leave('F');
}
//Scan the entire input
void scan(void){
while (isspace(next = getchar()));
}
void error(int n)
{
printf("\n*** ERROR: %i\n", n);
exit(1);
}
void enter(char name)
{
spaces(level++);
printf("+-%c\n", name);
}
void leave(char name)
{
spaces(--level);
printf("+-%c\n", name);
}
//TO display the parse tree
void spaces(int local_level)
{
while (local_level-- > 0)
printf("| ");
}
Looks like a recursive descent parser. First, work out your grammar by hand. What you are expecting is not what your grammar says. You've got, from your comments,
E ---> T {('+'|'-') T} expression
T ---> S {('*'|'/') S} term
S ---> F '^' S | F subexpression?
F ---> char | '(' E ')' factor
The definitions of E and T put * at a higher precedence than + so there is no way that you will get E*c. If you want that, you'll have to switch the grammar to
E ---> T {('*'|'/') T} expression
T ---> S {('+'|'-') S} term
If you just want the output to include the rest of the expression,
Get the whole line in
Change your scanner or lexer to get the next character from that scanned line. Mark this as the scanned point.
Change your Enter routine to print the the mnemonic as well as the line from the scanned point.
You don't get to choose the parse tree. I guess you don't understand the output, but (again) I guess you'd like
a+b*c => F+b*c => S+b*c => T+b*c => T+F*c => T+S*c => T+S*F => T+S*S => T+T => E
So here are a few questions to help.
what kind of parsing is going on there? bottom up or top down?
what state the parser is in when it prints enter/leave E, or T?
If you answered bottom up to the first question, what does E, T, S, F, F denotes (enter E, enter T, enter S, enter F, leave F)? When you have leave F that means the parser successfully recognised a non-terminal.
Try the input string 1+b*c. What do you get? Why do you get an error after E, T, S, F?
The output you seem to require can be easily produced if you understand what is produced at the moment. Hope this helps.

Multiline comment declaration with flex/bison

I have today a problem with my flex/bison script. It doesn't detect the multiline comment.
%x COMMENT_MULTI
#\[ yy_push_state(COMMENT_MULTI);
<COMMENT_MULTI>"]#" yy_pop_state();
<COMMENT_MULTI>"\n" {
yylloc->lines(yyleng);
yylloc->step();
}
<COMMENT_MULTI>.?
Can you help me?
This is the solution I ended up using in my bison definition of C++ comments and block comments:
%x COMMENT
"//".*\n ;
"/*" BEGIN(COMMENT);
<COMMENT>"/*" printf("%s%d\n", "Warning: multiple comments opened at line: ", yylineno);
<COMMENT>"*/" BEGIN(INITIAL);
<COMMENT>"EOF" yyerror("Reached end of file while scanning comment");
<COMMENT>.|"\n" ;
/*DECLARING A SPECIFIC STATE FOR THE BLOCK COMMENT*/
%x BLOCK_COMMENT
%%
\/\* { // BEGINING OF A BLOCK COMMENT: ENTERS INTO 'BLOCK_COMMENT' STATE
BEGIN(BLOCK_COMMENT);
blockcomment_line_start = line;
blockcomment_col_start = frcol;
strncat(block_comment, yytext, sizeof(block_comment));
}
<BLOCK_COMMENT>\*\/ { // END OF BLOCK COMMENT
if(strlen(block_comment)+strlen(yytext) >= BLOCK_COMMENT_BUFFER-10){ //ADDS "(...)" AT THE END OF THE STRING IN CASE IT HAS BEEN TRUNCATED
block_comment[1013] = '\0';
strcat(block_comment," (...) ");
}
strncat(block_comment, yytext, sizeof(block_comment));
printf("#(%.3d,%.3d)\tBLOCK COMMENT\t- %s\n", blockcomment_line_start, blockcomment_col_start, block_comment);
BEGIN(INITIAL); //COMES BACK TO THE INITIAL STATE
}
<BLOCK_COMMENT>\n {
line++; // STILL HAS TO INCREMENT THE LINE NUMBER WHEN THERE'S A LINE BREAK INSIDE THE COMMENT
//strcat(block_comment, "\\n");
strncat(block_comment, "\\n", sizeof(block_comment));
}
<BLOCK_COMMENT>. { // IGNORE ALL OTHER CHARACTERS WHILE IN 'BLOCK_COMMENT' STATE
//strcat(block_comment, yytext);
strncat(block_comment, yytext, sizeof(block_comment));
}
<BLOCK_COMMENT><<EOF>> {
printf("ERROR! THE BLOCK COMMENT OPENED IN #(%d,%d) HASN'T BEEN CLOSED! \t\n", blockcomment_line_start, blockcomment_col_start);
return;
}