Why does this scanner not eat whitespaces? - regex

These are my lexer-definitions, there are many lexer-definitions but this one is mine. I have several regexes trying to capture and ignore whitespace, from this sample. The error I get is that in line 1: 14 there is a $undefined Symbold to be found - that is of asci-value 32.
Also known as space.
OIL_VERSION = "314159OS";
CPU AT91SAM7S256
{
//Test Coment
OS HTOSEK
{
STATUS = EXTENDED;
STARTUPHOOK = TRUE;
ERRORHOOK = FALSE;
SHUTDOWNHOOK = FALSE;
PRETASKHOOK = FALSE;
POSTTASKHOOK = FALSE;
USEGETSERVICEID = FALSE;
USEPARAMETERACCESS = FALSE;
USERESSCHEDULER = FALSE;
USR_STACK_SIZE=3000;
};
/* Definition of application mode */
APPMODE appmode1{};
/* Definition of resource */
RESOURCE resource1
{
RESOURCEPROPERTY = STANDARD;
};
/* Definition of event */
EVENT event1
{
MASK = AUTO;
};
...
Lex Capture Definitions:
%{ /*** C/C++ Declarations ***/
#define MAX_INCLUDE_DEPTH 16
#include <string>
#include <sstream>
#define SSTR( x ) dynamic_cast< std::ostringstream & >( \
( std::ostringstream() << std::dec << x ) ).str()
#include "scanner.h"
/* import the parser's token type into a local typedef */
typedef implementation::Parser::token token;
typedef implementation::Parser::token_type token_type;
/* By default yylex returns int, we use token_type. Unfortunately yyterminate
* by default returns 0, which is not of token_type. */
#define yyterminate() return token::END
/* This disables inclusion of unistd.h, which is not available under Visual C++
* on Win32. The C++ scanner uses STL streams instead. */
#define YY_NO_UNISTD_H
static int once = 0;
static int lineno = 1;
static void nextLine()
{
lineno++;
}
//convert a str to int
int fromInt(char *s)
{
int i;
int m;
m = 1;
i = 0;
if (s[0]=='-'){
m = -1;
i = 1;
}
else if(s[0]=='+')
i = 1;
return((atoi(s+i))*m);
}
int fromHex(char *s)
{
return((int)strtol(s, NULL, 16));
}
int LineCounter=0;
%}
/*** Flex Declarations and Options ***/
/* enable c++ scanner class generation */
%option c++
/* change the name of the scanner class. results in "ExampleFlexLexer" */
%option prefix="Example"
/* the manual says "somewhat more optimized" */
%option batch
/* enable scanner to generate debug output. disable this for release
* versions. */
%option debug
/* no support for include files is planned */
%option yywrap nounput
/* enables the use of start condition stacks */
%option stack
%x C_COMMENT
%x incl
/* The following paragraph suffices to track locations accurately. Each time
* yylex is invoked, the begin position is moved onto the end position. */
%{
#define YY_USER_ACTION yylloc->columns(yyleng);
%}
%% /*** Regular Expressions Part ***/
/* code to place at the beginning of yylex() */
%{
// reset location
yylloc->step();
%}
"/*" { BEGIN(C_COMMENT); }
<C_COMMENT>"*/" { BEGIN(INITIAL); }
<C_COMMENT>. { }
"=" { return(token::EQ);
}
"[" { return(token::LBRACK);
}
"]" { return(token::RBRACK);
}
"OS" { return(token::OSEK);
}
"EVENT" { return(token::EVENT);
}
"TASK" { return(token::TASK);
}
"ALARM" { return(token::ALARM);
}
"COUNTER" { return(token::COUNTER);
}
"OIL_VERSION" { return(token::OIL_VERSION);
}
"APPMODE" { return(token::APPMODE);
}
"CPU" { return (token::CPU);
}
"true"|"TRUE" { yylval->integerVal =1; return(token::VAL_BOOL);
}
"false"|"FALSE" { yylval->integerVal =0; return(token::VAL_BOOL);
}
"BOOLEAN" { return(token::BOOLEAN);
}
"INT" { return(token::INT);
}
"{" { return(token::LBRACE);
}
"}" { return(token::RBRACE);
}
":" { return(token::COLON);
}
"," { return(token::COMMA);
}
";" { return(token::SEMI);
}
([_A-Za-z])([a-zA-Z0-9!^_])* {yylval->stringVal = new std::string(yytext, yyleng);
return(token::STRING);
}
(([+-])?([0-9])*) {yylval->integerVal = fromInt( yytext );
return(token::NUMERAL);
}
(("0x")([0-9ABCDEFabcdef])*) {yylval->integerVal = fromHex( yytext );
return(token::NUMERAL);
}
(([-+]?[1-9][0-9]+\.[0-9]*)|([-+]?[0-9]*\.[0-9]+)|([-+]?[1-9]+))([eE][-+]?[0-9]+)?(f)? { yylval->doubleVal=atof(yytext);
return (token::VAL_FLOAT);
}
[\n\r]+ {
//yylloc->lines(yyleng);
yylloc->step();
LineCounter++;
//return token::EOL;
}
[\r\n]+ {
//yylloc->lines(yyleng);
yylloc->step();LineCounter++;
//return token::EOL;
}
[\t\r]+ { /* gobble up white-spaces */ yylloc->step(); }
[\s]+ { yylloc->step(); }
\"([^\"])*\" {
yytext[yyleng-1]= 0;
yylval->stringVal = new std::string( yytext, yyleng);
return(token::STRING);
}
. {
unsigned int temp;
temp= (unsigned int)(*yytext);
std::stringstream str2;
str2<<temp;
std::cout<<"Unknown character"<<*yytext<<" as Asci-value : "<<str2.str()<<std::endl;
return static_cast<token_type>(*yytext);
}
%% /*** Additional Code ***/
namespace implementation {
Scanner::Scanner(std::istream* in,
std::ostream* out)
: ExampleFlexLexer(in, out)
{
}
Scanner::~Scanner()
{
}
void Scanner::set_debug(bool b)
{
yy_flex_debug = b;
}
}
/* This implementation of ExampleFlexLexer::yylex() is required to fill the
* vtable of the class ExampleFlexLexer. We define the scanner's main yylex
* function via YY_DECL to reside in the Scanner class instead. */
#ifdef yylex
#undef yylex
#endif
int ExampleFlexLexer::yylex()
{
std::cerr << "in ExampleFlexLexer::yylex() !" << std::endl;
return 0;
}
/* When the scanner receives an end-of-file indication from YY_INPUT, it then
* checks the yywrap() function. If yywrap() returns false (zero), then it is
* assumed that the function has gone ahead and set up `yyin' to point to
* another input file, and scanning continues. If it returns true (non-zero),
* then the scanner terminates, returning 0 to its caller. */
int ExampleFlexLexer::yywrap()
{
return 1;
}
I modified the last rule, so it simply doesent try to cast any unknown text and print out the ascisymbols it captures.. resulting in 32 47 47 32 " // ".
Will try to print out the stream..

flex does not implement perlisms such as \s. The only backslash escape sequences it recognized are standard C escapes such as \n. If you want to recognise a space character, use " ".
By the way, [\n\r]+ and [\r\n]+ recognize exactly the same thing: one or more repetitions of a single character which is either a newline or a return. So the second such rule will never match. I think flex will warn you about that.

Related

A function uses a different amount of memory when I call it with the same conditions the second time

I have a problem with C++ and memory. Here's the pseudocode:
main.cpp
#include <iostream>
#include "seq.h"
int main(int argc, char *argv[]) {
SnpSite snp_site("/mnt/c/Users/manht/Downloads/s_typhi_wong_holt.aln.gz");
snp_site.test(); // run the first time
snp_site.test(); // run the second time
}
seq.h
#include "file_handler.h"
#include <stdio.h>
class SnpSite {
private:
string inputfile;
FileHandler fh;
public:
SnpSite(char* _inputfile);
int is_unknown(char base);
void test();
};
seq.cpp
#include "seq.h"
SnpSite::SnpSite(char* _inputfile) {
fh = FileHandler();
inputfile = _inputfile;
}
void SnpSite::test() {
string sample_name, seq;
this->fh.open(this->inputfile.c_str());
this->fh.assign_next_sample_to(&sample_name, &seq);
this->fh.close();
}
file_handler.h
#ifndef SEQ_H_
#include <zlib.h>
#include <utility>
#include <ctype.h>
#include "my_string.h"
#include <string>
using namespace std;
#define SEQ_H_
typedef bool (*match_func)(int c, int delimiter);
class FileHandler {
private:
gzFile file;
char buffer[2048]; // Static allocation for better performance.
int buffer_start, buffer_end;
bool eof;
void get_until(int delimiter, string *s);
public:
FileHandler();
FileHandler(int _buffer_size);
void open(const char* filename);
void close();
void assign_next_sample_to(string *name, string *seq);
int next_char();
bool is_eof();
};
#endif
file_handler.cpp
#include "file_handler.h"
FileHandler::FileHandler() {
buffer_start = -1;
buffer_end = -1;
eof = false;
}
void FileHandler::open(const char* filename) {
file = gzopen(filename, "r");
eof = false;
}
void FileHandler::close() {
gzclose(file);
}
int FileHandler::next_char() {
/* Read current character and increase cursor (buffer_start) by 1.*/
if (buffer_start >= buffer_end) {
buffer_end = gzread(file, buffer, 2048);
buffer_start = -1;
if (buffer_end == 0) eof = true;
}
return buffer[++buffer_start];
}
bool FileHandler::is_eof() {
return eof;
}
#define SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
#define SEP_TAB 1 // isspace() && !' '
#define SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows)
#define SEP_MAX 2
// list of function to compare c and delimiter, need exactly 2 arguments.
bool match_space(int c, int delimter) {
return isspace(c);
}
bool match_tab(int c, int delimter) {
return isspace(c) && c != ' ';
}
bool match_newline(int c, int delimter) {
return c == '\n';
}
bool match_char(int c, int delimter) {
return c == delimter;
}
bool no_match(int c, int delimiter) {
return false;
}
// end list.
void FileHandler::get_until(int delimiter, string *s) {
/*
Read till delimiter and append bytes read to s.
When done cursor will be at the end of the line.
*/
match_func match; // function to check if a char match delimiter
switch (delimiter) {
case SEP_SPACE:
match = match_space;
break;
case SEP_TAB:
match = match_tab;
break;
case SEP_LINE:
match = match_newline;
break;
default:
if (delimiter > SEP_MAX) match = match_char;
else match = no_match;
}
// begin process
int i = buffer_start;
while (!match(buffer[i], delimiter)) {
if (buffer_start >= buffer_end) {
buffer_end = gzread(file, buffer, 2048);
buffer_start = 0;
i = 0;
if (buffer_end == 0) {
eof = true;
break;
}
}
while (!match(buffer[i], delimiter) && i < buffer_end) i++;
s->append((char*)(buffer + buffer_start), i - buffer_start);
buffer_start = i;
}
}
/*
Get next sample name and sequence, assign it to *name and *seq.
(Note: this function do not read quality score for QUAL file).
*/
void FileHandler::assign_next_sample_to(string *name, string *seq) {
/* Get next sample name and sequence, assign it to *name and *seq.*/
name->erase();
seq->erase();
int c;
while (!eof && (c = next_char()) != '>' && c != '#') {} // read until meet sample name
get_until(SEP_SPACE, name); // get sample name
while (!eof && (c = next_char()) != '>' && c != '#' && c != '+') {
if (c == '\n') continue;
get_until(SEP_LINE, seq); // read sequence
}
buffer_start--; // step back to the end of sequence
}
I don't use any dynamic allocation, and when I traced memory usage by PID in htop, I found something that I can't explain:
The first time I call test():
At the beginning of the function, my process uses 6168 KBytes.
At the end of the function, my process uses 13998 Kbytes.
The second time I call test():
At the beginning of the function, my process uses 6304 Kbytes.
At the end of the function, my process uses 21664 Kbytes.
The length of the seq variable is 4809037 and sample_name is 11 in both cases. I don't understand why memory usage is so different between them. Hope someone can find out and explain it to me, it helps me a lot. Thanks
This happens because of this line:
s->append((char*)(buffer + buffer_start), i - buffer_start);
Strings are dynamically allocated and every time the initial size is exceeded a new larger memory block is allocated. You can read more about this here: Chapter 4. Optimize String Use: A Case Study.

G++ isn't recognizing my c++ classes when compiling my flex and bison code

I am writing a lexer and parser combo that is storing arm8 assembly code in a data structure for further testing. However, when compiling the code, the compiler doesn't recognize my imported classes as legitimate data types.
I have been following this guide: https://gnuu.org/2009/09/18/writing-your-own-toy-compiler/ to some extent. I tried changing the output settings of bison to produce a c++ file and it fixed partially fixed the problem, but it opened a whole other can of worms I'm hoping to avoid. All of the guides I've looked at use c++ code in the process and I don't really understand why it fails here.
assembly_bison.y:
%{
#include <cstdio>
#include <iostream>
#include <string>
#include "instructionds.h"
#include "AssemblyBlock.h"
using namespace std;
extern int yylex();
extern int yyparse();
extern FILE *yyin;
AssemblyBlock *assembly = new AssemblyBlock();
STP *input;
void yyerror(const char *s);
%}
%union {
long long imm;
std::string *string;
int token;
}
%token STP INSTRUCTION
%token STACKPOINTER "sp"
%token <imm> IMMEDIATE
%token <string> DIRECTIVE LABEL INLINELABEL
%token <token> REGISTER64 REGISTER32
%token <token> COMMA ","
%token <token> BANG "!"
%token <token> OPENBRACKET "["
%token <token> CLOSEBRACKET "]"
%%
document:
document line
| /* empty */
;
line:
LABEL
| DIRECTIVE {/* */}
| LABEL instruction
| instruction
;
instruction:
stp
;
stp:
STP REGISTER64 "," REGISTER64 "," "[" "sp" "," IMMEDIATE "]" "!"
{
input = new STP(true, true, $2, $4, -1, $9);
assembly->insert(input);
}
%%
int main(int, char**) {
// Open a file handle to a particular file:
FILE *myfile = fopen("Hello_World_Assembly_Code.asm", "r");
// Make sure it is valid:
if (!myfile) {
cout << "I can't open a.snazzle.file!" << endl;
return -1;
}
// Set Flex to read from it instead of defaulting to STDIN:
yyin = myfile;
// Parse through the input:
yyparse();
}
void yyerror(const char *s) {
cout << "EEK, parse error! Message: " << s << endl;
// might as well halt now:
exit(-1);
}
assembly_lexer.l
%{
#include <cstdio>
#include <string>
#include "instructionds.h"
#include "AssemblyBlock.h"
#include "parser.hpp"
#define SAVE_TOKEN yylval.string = new std::string(yytext, yyleng)
#define TOKEN(t) (yylval.token = t)
%}
%option noyywrap
delim [ \t\n]
ws [delim+]
letter [A-Za-z]
digit [0-9]
id {letter}({letter}|{digit})*
alphanumeric [A-Za-z0-9]
%%
{delim} {/* no action and return */}
\.L[A-Z0-9]*: { SAVE_TOKEN; return LABEL; }
\.[a-z0-9_]+.* { SAVE_TOKEN; return DIRECTIVE; }
{alphanumeric}+\: { SAVE_TOKEN; return LABEL; }
stp { return STP; }
add { return INSTRUCTION; }
adrp { return INSTRUCTION; }
bl { return INSTRUCTION; }
mov { return INSTRUCTION; }
ldp { return INSTRUCTION; }
ret { return INSTRUCTION; }
sp { return STACKPOINTER; }
x{digit}+ { yylval.register = stoi(yytext.substr(1,yytext.length())); return REGISTER64; }
w{digit}+ { yylval.register = stoi(yytext.substr(1,yytext.length())); return REGISTER32; }
, { return TOKEN(COMMA); }
\.L[A-Z0-9]* { yylval.sval = strdup(yytext); return INLINELABEL; } //Needs revision
\[ { return TOKEN(OPENBRACKET); }
\] { return TOKEN(CLOSEBRACKET); }
:{id}: { }
#?[+-]?{digit}+ { if(yytext[0] == '#') yytext.erase(0); yylval.imm = stoll(yytext); return IMMEDIATE } //Needs revision
{alphanumeric}+ { SAVE_TOKEN; return LABEL; }
! { return TOKEN(BANG); }
%%
instructionds.h:
#pragma once
class Instruction {
public:
virtual void print();
};
class STP : public Instruction{
private:
//Possible inputs
int Rn1;
int Rn2;
int Xn;
bool SP;
long long immediate;
//Instruction Modes
bool is64;
bool isPreindex;
public:
STP(bool is64, bool isPreindex, int n1, int n2, int Xn, long long immediate);
void print();
};
AssemblyBlock.h:
#pragma once
#include "instructionds.h"
struct InstStruct {
Instruction* line;
struct InstStruct *prev;
struct InstStruct *next;
};
class AssemblyBlock {
private:
struct InstStruct *head;
public:
AssemblyBlock();
void insert(Instruction *inst);
void display();
};
I can add the .cpp files for the classes if necessary later.
When I compile the code using the following commands, I get these errors. The compiler doesn't seem to read the headers. I used a test file to make sure that the classes I built work outside of bison and everything worked perfectly. If anyone knows more about this, I really appreciate your help.
mattersonline#mattersonline-VirtualBox:~/Documents/flex/soonergy$ bison -d -o parser.cpp assembly_bison.y
assembly_bison.y: warning: 1 shift/reduce conflict [-Wconflicts-sr]
mattersonline#mattersonline-VirtualBox:~/Documents/flex/soonergy$ flex -o tokens.cpp assembly_lexer.l
mattersonline#mattersonline-VirtualBox:~/Documents/flex/soonergy$ g++ -o parser parser.cpp tokens.cpp AssemblyBlock.cpp instructionds.cpp
assembly_bison.y:15:5: error: ‘STP’ does not name a type
STP *input;
^~~
assembly_bison.y: In function ‘int yyparse()’:
assembly_bison.y:56:13: error: ‘input’ was not declared in this scope
input = new STP(true, true, $2, $4, -1, $9);
^~~~~
assembly_bison.y:56:13: note: suggested alternative: ‘ino_t’
input = new STP(true, true, $2, $4, -1, $9);
^~~~~
ino_t
assembly_bison.y:56:25: error: expected type-specifier before ‘STP’
input = new STP(true, true, $2, $4, -1, $9);
^~~
};
The issue seems to be that STP is both the name of a type and the name of a token in the grammar. Renaming your type or token to something else should fix this.
Hope this helps!

Flex Bison constructed compiler prints correct results then gives syntax error

I am trying to construct a compiler. I built it, but when I try it, it gives an error. Actually my bison created test.tab.cc file includes debugging tags, but I could not use them My test case is:
ERROR.NEVER.A
My compiler prints the right things ("errrneverrraaabbbr") but after that error method is somehow called and gives the error:
"Error: syntax error at 1.1"
Does it mean first row first element, and what is wrong with it?
%skeleton "lalr1.cc"
%require "3.0"
%debug
%defines
%define api.namespace {YK}
%define parser_class_name {YK_Parser}
%code requires{
namespace YK {
class YK_Driver;
class YK_Scanner;
}
// The following definitions is missing when %locations isn't used
# ifndef YY_NULLPTR
# if defined __cplusplus && 201103L <= __cplusplus
# define YY_NULLPTR nullptr
# else
# define YY_NULLPTR 0
# endif
# endif
}
%parse-param { YK_Scanner &scanner }
%parse-param { YK_Driver &driver }
%code{
#include <iostream>
#include <cstdlib>
#include <fstream>
/* include for all driver functions */
#include "yk_driver.hpp"
#undef yylex
#define yylex scanner.yylex
}
%define api.value.type variant
%define parse.assert
%token DOT
%token ERROR
%token NEVER
%token A
%token B
%token C
%token BE
%token AF
%start program
%%
program : rule ;
rule : vio DOT temp DOT event{std::cout<<"r";};
vio : ERROR{std::cout<<"errr";}
;
temp : NEVER{std::cout<<"neverrr";};
event : basic{std::cout<<"bbb";}
| basic DOT temp DOT basic{std::cout<<"c";}
;
temporal : BE{std::cout<<"beeee";}
| AF{std::cout<<"affff";}
;
basic : A{std::cout<<"aaa";}
| B{std::cout<<"bbb";}
| C{std::cout<<"c";}
;
void test::test_Parser::error( const location_type &l, const std::string &err_message )
{
std::cerr << std::endl << "Error: " << err_message << " at " << l << "\n";
}
%{
/* C++ string header, for string ops below */
#include <string>
/* Implementation of yyFlexScanner */
#include "yk_scanner.hpp"
#undef YY_DECL
#define YY_DECL int YK::YK_Scanner::yylex( YK::YK_Parser::semantic_type * const lval, YK::YK_Parser::location_type *location )
/* typedef to make the returns for the tokens shorter */
using token = YK::YK_Parser::token;
/* define yyterminate as this instead of NULL */
#define yyterminate() return( token::END )
/* msvc2010 requires that we exclude this header file. */
#define YY_NO_UNISTD_H
/* update location on matching */
#define YY_USER_ACTION loc->step(); loc->columns(yyleng);
%}
%option debug
%option nodefault
%option yyclass="YK::YK_Scanner"
%option noyywrap
%option c++
%%
%{ /** Code executed at the beginning of yylex **/
yylval = lval;
%}
[ \t\n]+ {}
. {return token::DOT;}
ERROR {return token::ERROR;}
NEVER {return token::NEVER;}
A {return token::A;}
B {return token::B;}
C {return token::C;}
BE {return token::BE;}
AF {return token::AF;}
PS. Inside yyerrlab label, my error string is generated. One of the following code is responsible for error.
if (yyn < 0 || yylast_ < yyn || yycheck_[yyn] != yyla.type_get ())
goto yydefault;
// Reduce or error.
yyn = yytable_[yyn];
if (yyn <= 0)
{
if (yy_table_value_is_error_ (yyn))
goto yyerrlab;
yyn = -yyn;
goto yyreduce;
}
// Discard the token being shifted.
yyempty = true;
// Count tokens shifted since error; after three, turn off error status.
if (yyerrstatus_)
--yyerrstatus_;
/*-----------------------------------------------------------.
| yydefault -- do the default action for the current state. |
`-----------------------------------------------------------*/
yydefault:
yyn = yydefact_[yystack_[0].state];
if (yyn == 0)
goto yyerrlab;
goto yyreduce;
#include <iostream>
#include <cstdlib>
#include <cstring>
#include "yk_driver.hpp"
int main( const int argc, const char **argv )
{
/** check for the right # of arguments **/
if( argc == 2 )
{
YK::YK_Driver driver;
/** example for piping input from terminal, i.e., using cat **/
if( std::strncmp( argv[ 1 ], "-o", 2 ) == 0 )
{
driver.parse( std::cin );
}
/** simple help menu **/
else if( std::strncmp( argv[ 1 ], "-h", 2 ) == 0 )
{
std::cout << "use -o for pipe to std::cin\n";
std::cout << "just give a filename to count from a file\n";
std::cout << "use -h to get this menu\n";
return( EXIT_SUCCESS );
}
/** example reading input from a file **/
else
{
/** assume file, prod code, use stat to check **/
driver.parse( argv[1] );
}
driver.print( std::cout ) << "\n";
}
else
{
/** exit with failure condition **/
return ( EXIT_FAILURE );
}
return( EXIT_SUCCESS );
}
Most likely, this is caused by whatever is in your input after the A at the end of the line -- after parsing the rule (which is apparently being reduced, according to the ouput you show), it will be expecting an EOF to reduce the program and return. If your lexer returns something other than an EOF, you'll get an error. Perhaps a \r (carriage return), since your lexer does not appear to ignore that?

Regular expression for string excluding literal quotation marks

I have the following config file that I am trying to parse.
[ main ]
e_type=0x1B
username="username"
appname="applicationname"
In the lex file (test.l) specified below,the regular expression for STR is \"[^\"]*\" so that it recognizes everything within quotes.When I access the value of "username" or "applicationname" inside the parser file using $N variable, it contains the literal string.I just want
username and applicationname i.e without string quotation marks.
Is there a standard way to acheive this.
I have the following lex file (test.l)
%option noyywrap
%option yylineno
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "y.tab.h"
int yylinenu = 1;
int yycolno=1;
/**
* Forward declerations
**/
void Number ();
void HexaNumber ();
unsigned char getHexaLex (char c);
unsigned int strtol16 (char * str);
%}
%option nounput
%option noinput
%option case-insensitive
/*-----------------------------------------------------------------
Some macros (standard regular expressions)
------------------------------------------------------------------*/
DIGIT [0-9]
HEXALETTER [a-fA-F]
HEXANUMBER [0][x](({DIGIT}|{HEXALETTER})+)
NUM {DIGIT}+
HEXA ({DIGIT}|{HEXALETTER}|[*])
STR \"[^\"]*\"
WSPACE [ \t]*
NEWLINE [\n\r]
/*----------------------------------------------------------------
The lexer rules
------------------------------------------------------------------*/
%%
e_type { yylval.str = yytext; return T_E_TYPE; }
main { yylval.str = yytext; return T_MAIN_SECTION;}
{HEXANUMBER} { yylval.n = atoi(yytext); HexaNumber(); return T_NUMBER; }
= { return T_EQUAL; }
"[" { return T_OPEN_BRACKET; }
"]" { return T_CLOSE_BRACKET;}
appname { Custom_tag(); return T_APPNAME; }
username { Custom_tag(); return T_APPNAME; }
[^\t\n\r] { }
{WSPACE} { } /* whitespace: (do nothing) */
{NEWLINE} {
yylinenu++;
return T_EOL;
}
{STR} { Generic_string(); return T_STRING;}
%%
void Number () {
yylval.n = atol(yytext);
}
void Generic_string() {
yylval.str = malloc(strlen(yytext)+1);
strcpy (yylval.str, yytext);
}
You have a pointer to the matched token (yytext) and its length (yyleng), so it is quite simple to remove the quotes:
void Generic_string() {
yylval.str = malloc(yyleng - 1); // length - 2 (quotes) + 1 (NUL)
memcpy (yylval.str, yytext + 1, yyleng - 2); // copy all but quotes
yylval.str[yyleng - 2] = 0; // NUL-terminate
}
Personally, I'd suggest avoiding use of global variables in Generic_string, both to simplify future implementation of a reentrant scanner, and to make the process a bit more flexible:
{STR} { yylval.str = duplicate_segment(yytext + 1, yyleng - 2);
return T_STRING;
}
/* ... */
char* duplicate_segment(const char* token, int token_length) {
char* dup = malloc(token_length + 1);
if (!dup) { /* handle memory allocation error */ }
memcpy(dup, token, token_length);
dup[token_length] = 0;
return dup;
}

I'd like to use ifstream and ofstream in C++ to mimic C#'s BinaryReader / BinaryWriter functionality

I'm looking for a way to write floats/ints/strings to a file and read them as floats/ints/strings. (basically read/write as ios::binary).
I ended up writing it myself. Just wanted to share it with others.
It might not be optimized, but I had some difficulties finding C++ code that mimics C#'s BinaryReader & BinaryWriter classes. So I created one class that handles both read and write.
Quick things to note:
1) "BM" is just a prefix for my classes.
2) BMLogging is a helper class that simply does:
cout << "bla bla bla" << endl;
So you can ignore the calls to BMLogging, I kept them to highlight the cases where we could warn the user.
Here's the code:
#include <iostream>
#include <fstream>
using namespace std;
// Create the macro so we don't repeat the code over and over again.
#define BMBINARY_READ(reader,value) reader.read((char *)&value, sizeof(value))
enum BMBinaryIOMode
{
None = 0,
Read,
Write
};
class BMBinaryIO
{
// the output file stream to write onto a file
ofstream writer;
// the input file stream to read from a file
ifstream reader;
// the filepath of the file we're working with
string filePath;
// the current active mode.
BMBinaryIOMode currentMode;
public:
BMBinaryIO()
{
currentMode = BMBinaryIOMode::None;
}
// the destructor will be responsible for checking if we forgot to close
// the file
~BMBinaryIO()
{
if(writer.is_open())
{
BMLogging::error(BMLoggingClass::BinaryIO, "You forgot to call close() after finishing with the file! Closing it...");
writer.close();
}
if(reader.is_open())
{
BMLogging::error(BMLoggingClass::BinaryIO, "You forgot to call close() after finishing with the file! Closing it...");
reader.close();
}
}
// opens a file with either read or write mode. Returns whether
// the open operation was successful
bool open(string fileFullPath, BMBinaryIOMode mode)
{
filePath = fileFullPath;
BMLogging::info(BMLoggingClass::BinaryIO, "Opening file: " + filePath);
// Write mode
if(mode == BMBinaryIOMode::Write)
{
currentMode = mode;
// check if we had a previously opened file to close it
if(writer.is_open())
writer.close();
writer.open(filePath, ios::binary);
if(!writer.is_open())
{
BMLogging::error(BMLoggingClass::BinaryIO, "Could not open file for write: " + filePath);
currentMode = BMBinaryIOMode::None;
}
}
// Read mode
else if(mode == BMBinaryIOMode::Read)
{
currentMode = mode;
// check if we had a previously opened file to close it
if(reader.is_open())
reader.close();
reader.open(filePath, ios::binary);
if(!reader.is_open())
{
BMLogging::error(BMLoggingClass::BinaryIO, "Could not open file for read: " + filePath);
currentMode = BMBinaryIOMode::None;
}
}
// if the mode is still the NONE/initial one -> we failed
return currentMode == BMBinaryIOMode::None ? false : true;
}
// closes the file
void close()
{
if(currentMode == BMBinaryIOMode::Write)
{
writer.close();
}
else if(currentMode == BMBinaryIOMode::Read)
{
reader.close();
}
}
bool checkWritabilityStatus()
{
if(currentMode != BMBinaryIOMode::Write)
{
BMLogging::error(BMLoggingClass::BinaryIO, "Trying to write with a non Writable mode!");
return false;
}
return true;
}
// Generic write method that will write any value to a file (except a string,
// for strings use writeString instead).
void write(void *value, size_t size)
{
if(!checkWritabilityStatus())
return;
// write the value to the file.
writer.write((const char *)value, size);
}
// Writes a string to the file
void writeString(string str)
{
if(!checkWritabilityStatus())
return;
// first add a \0 at the end of the string so we can detect
// the end of string when reading it
str += '\0';
// create char pointer from string.
char* text = (char *)(str.c_str());
// find the length of the string.
unsigned long size = str.size();
// write the whole string including the null.
writer.write((const char *)text, size);
}
// helper to check if we're allowed to read
bool checkReadabilityStatus()
{
if(currentMode != BMBinaryIOMode::Read)
{
BMLogging::error(BMLoggingClass::BinaryIO, "Trying to read with a non Readable mode!");
return false;
}
// check if we hit the end of the file.
if(reader.eof())
{
BMLogging::error(BMLoggingClass::BinaryIO, "Trying to read but reached the end of file!");
reader.close();
currentMode = BMBinaryIOMode::None;
return false;
}
return true;
}
// reads a boolean value
bool readBoolean()
{
if(checkReadabilityStatus())
{
bool value = false;
BMBINARY_READ(reader, value);
return value;
}
return false;
}
// reads a character value
char readChar()
{
if(checkReadabilityStatus())
{
char value = 0;
BMBINARY_READ(reader, value);
return value;
}
return 0;
}
// read an integer value
int readInt()
{
if(checkReadabilityStatus())
{
int value = 0;
BMBINARY_READ(reader, value);
return value;
}
return 0;
}
// read a float value
float readFloat()
{
if(checkReadabilityStatus())
{
float value = 0;
BMBINARY_READ(reader, value);
return value;
}
return 0;
}
// read a double value
double readDouble()
{
if(checkReadabilityStatus())
{
double value = 0;
BMBINARY_READ(reader, value);
return value;
}
return 0;
}
// read a string value
string readString()
{
if(checkReadabilityStatus())
{
char c;
string result = "";
while((c = readChar()) != '\0')
{
result += c;
}
return result;
}
return "";
}
};
EDIT: I replaced all the read/write methods above with these: (updated the usage code as well)
// Generic write method that will write any value to a file (except a string,
// for strings use writeString instead)
template<typename T>
void write(T &value)
{
if(!checkWritabilityStatus())
return;
// write the value to the file.
writer.write((const char *)&value, sizeof(value));
}
// Writes a string to the file
void writeString(string str)
{
if(!checkWritabilityStatus())
return;
// first add a \0 at the end of the string so we can detect
// the end of string when reading it
str += '\0';
// create char pointer from string.
char* text = (char *)(str.c_str());
// find the length of the string.
unsigned long size = str.size();
// write the whole string including the null.
writer.write((const char *)text, size);
}
// reads any type of value except strings.
template<typename T>
T read()
{
checkReadabilityStatus();
T value;
reader.read((char *)&value, sizeof(value));
return value;
}
// reads any type of value except strings.
template<typename T>
void read(T &value)
{
if(checkReadabilityStatus())
{
reader.read((char *)&value, sizeof(value));
}
}
// read a string value
string readString()
{
if(checkReadabilityStatus())
{
char c;
string result = "";
while((c = read<char>()) != '\0')
{
result += c;
}
return result;
}
return "";
}
// read a string value
void readString(string &result)
{
if(checkReadabilityStatus())
{
char c;
result = "";
while((c = read<char>()) != '\0')
{
result += c;
}
}
}
This is how you would use it to WRITE:
string myPath = "somepath to the file";
BMBinaryIO binaryIO;
if(binaryIO.open(myPath, BMBinaryIOMode::Write))
{
float value = 165;
binaryIO.write(value);
char valueC = 'K';
binaryIO.write(valueC);
double valueD = 1231.99;
binaryIO.write(valueD);
string valueStr = "spawnAt(100,200)";
binaryIO.writeString(valueStr);
valueStr = "helpAt(32,3)";
binaryIO.writeString(valueStr);
binaryIO.close();
}
Here's how you would use it to READ:
string myPath = "some path to the same file";
if(binaryIO.open(myPath, BMBinaryIOMode::Read))
{
cout << binaryIO.read<float>() << endl;
cout << binaryIO.read<char>() << endl;
double valueD = 0;
binaryIO.read(valueD); // or you could use read<double()
cout << valueD << endl;
cout << binaryIO.readString() << endl;
cout << binaryIO.readString() << endl;
binaryIO.close();
}
EDIT 2: You could even write/read a whole structure in 1 line:
struct Vertex {
float x, y;
};
Vertex vtx; vtx.x = 2.5f; vtx.y = 10.0f;
// to write it
binaryIO.write(vtx);
// to read it
Vertex vtxRead;
binaryIO.read(vtxRead); // option 1
vtxRead = binaryIO.read<Vertex>(); // option 2
Hope my code is clear enough.
I subclassed ifstream and ofstream: ibfstream and obfstream. I made a little helper class that would detect the endianness of the machine I was compiling/running on. Then I added a flag for ibfstream and obfstream that indicated whether bytes in primitive types should be flipped. These classes also had methods to read/write primitive types and arrays of such types flipping the byte order as necessary. Finally, I set ios::binary for these classes by default.
I was often working on a little-endian machine and wanting to write big-endian files or vice versa. This was used in a program that did a lot of I/O with 3D graphics files of various formats.
I subclassed ifstream and ofstream: ibfstream and obfstream. I made a class that would detect the endianness of the machine I was compiling/running on. Then I added a flag for ibfstream and obfstream that indicated whether bytes in primitive types should be flipped. These classes also had methods to read/write primitive types and arrays of such types flipping the byte order as necessary.
I was often working on a little-endian machine and wanting to write big-endian files or vice versa. This was used in a program tht did a lot of I/O with 3D graphics files of various formats.