So I have been solving problems from this book called
Programming: Principles and Practice Using C++ by Bjarne Stroustrup to learn C++ and have been enjoying it a lot. I recently got stuck on this question in Chapter 10 (Input and Output
Streams).
The question is given below:
Add a command from x to the calculator from Chapter 7 that makes it
take input from a file x. Add a command to y to the calculator that
makes it write its output (both standard output and error output) to
file y.
The source code for the above mentioned calculator is given below:
/*
Simple Calculator
This program implements a basic expression calculator.
Input from cin; output to cout.
The grammar for input is:
Calculation:
Statement
Print
Quit
Statement:
Declaration
Expression
Declaration:
"let" Name "=" Expression
Print:
;
Quit:
q
Expression:
Term
Term '+' Expression //addition
Term '-' Expression //subtraction
Term:
Primary
Term '*' Primary //multiplication
Term '/' Primary //division
Term '%' Primary //mod division
Primary:
Number
'(' Expression ')'
'-' Primary
'+' Primary
Number:
floating-point-literal
Input comes from cin through the Token_stream called ts.
*/
#include "../../std_lib_facilities.h"
class Token //a very simple user-defined type
{
public:
char kind = ' '; //what kind of token
double value = 0; //for numbers: a value
string name = " "; //for strings: a name
Token(char ch) :kind{ ch } { } //initialize kind with ch
Token(char ch, double val) :kind{ ch }, value{ val } {} //initializes kind and value
Token(char ch, string n) :kind{ ch }, name{ n } { } //initializes kind and name
};
class Token_stream
{
public:
//user interface
Token get(); //get a Token
void putback(Token t); //put a Token back
void ignore(char c); //discard characters up to and including a c
private:
//implementation details
//(not directly accessible to users of Token_stream)
bool full{ false }; //is there a Token in the buffer?
Token buffer = 0; //here is where we keep a Token put back with putback()
};
class Variable
{
public:
string name = " ";
string const_name = " ";
double value = 0;
};
vector<Variable>var_table;
double get_value(string s)
//return the value of the Variable named s
{
for (const Variable& v : var_table) {
if (v.name == s) return v.value;
if (v.const_name == s) return v.value;
}
error("get: undefined variable ", s);
return 1;
}
void set_value(string s, double d)
//set the Variable named s to d
{
for (Variable& v : var_table)
{
if (v.name == s)
{
v.value = d;
return;
}
if (v.const_name == s)
{
error("set: cannot assign a new value to an existing constant");
return;
}
}
error("set: undefined variable", s);
}
const char number = '8'; //t.kind == number means that t is a number Token
const char quit = 'q'; //t.kind == quit means that t is a quit Token
const char print = ';'; //t.kind == print means that t is a print token
const char name = 'a'; //name token
const char constant = 'c'; //constant token
const string constdeclkey = "const"; //constant keyword
const char let = 'L'; //declaration token
const string declkey = "let"; //declaration keyword
const string prompt = "> ";
const string result = "= "; //used to indicate that what follows is a result
void Token_stream::putback(Token t)
{
if (full) error("putback() into a full buffer");
buffer = t; //copy t to buffer
full = true; //buffer is now full
}
void Token_stream::ignore(char c)
//c represents the kind of Token
{
//first look in buffer:
if (full && c == buffer.kind)
{
full = false;
return;
}
full = false;
//now search input:
char ch = 0;
while (cin >> ch)
if (ch == c) return;
}
Token Token_stream::get()
{
if (full)
{
full = false; //do we already have a Token ready?
return buffer; //remove Token from buffer
}
char ch;
cin >> ch; //note that >> skips whitespace(space, newline, tab, etc)
switch (ch)
{
case ';': //for "print"
case 'q': //for "quit"
case '(':
case ')':
case '+':
case '-':
case '*':
case '/':
case '%':
case '=':
return Token{ ch }; //let each character represent itself
case '.':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
{
cin.putback(ch); //put digit back into the input stream
double val;
cin >> val; //read a floating-point number
return Token{ number, val }; //let '8' represent a "a number"
}
default:
if (isalpha(ch))
{
string s;
s += ch;
while (cin.get(ch) && (isalpha(ch) || isdigit(ch) || ch == '_')) s += ch;
cin.putback(ch);
if (s == declkey) return Token(let); //declaration keyword
else if (s == constdeclkey) return Token(constant);
return Token{ name, s };
}
error("Bad Token");
return 1;
}
}
bool is_declared(string var)
//is var already in var_table?
{
for (const Variable& v : var_table)
{
if (v.name == var) return true;
}
for (const Variable& v : var_table)
{
if (v.const_name == var) return true;
}
return false;
}
double define_name(string var, double val)
//add (var, val) to var_table
{
if (is_declared(var)) set_value(var, val);
Variable v;
v.name = var;
v.value = val;
var_table.push_back(v);
return val;
}
double define_const_name(const string constant, double val)
//add (constant, val) to var_table
{
if (is_declared(constant)) error(constant, " constant declared twice");
Variable v;
v.const_name = constant;
v.value = val;
var_table.push_back(v);
return val;
}
Token_stream ts; //makes a Token_stream called ts that reads from cin
double expression(); //declaration so that primary() can call it
double declaration()
//assume we have seen "let"
//handle: name = expression
//declare a variable called "name" with the initial value "expression"
{
Token t = ts.get();
if (t.kind != name) error("name expected in declaration");
string var_name = t.name;
Token t2 = ts.get();
if (t2.kind != '=') error("= missing in declaration of ", var_name);
double d = expression();
define_name(var_name, d);
return d;
}
double const_declaration()
//assume we have seen "const"
//handle: const_name = expression
//declare a constant called "const" with the initial value "expression"
{
Token t = ts.get();
if (t.kind != name) error("constant_name expected in declaration");
const string const_name = t.name;
Token t2 = ts.get();
if (t2.kind != '=') error("= missing in declaration of ", const_name);
double d = expression();
define_const_name(const_name, d);
return d;
}
double primary()
//deals with numbers and parantheses
//calls expression() and get_token()
{
Token t = ts.get();
switch (t.kind)
{
case '(': //handle '(' expression ')'
{
double d = expression();
t = ts.get();
if (t.kind != ')') error("')' expected");
return d;
}
case number: //we use '8' to represent a number
return t.value; //return the number's value
case '-':
return -primary();
case '+':
return primary();
case name:
return get_value(t.name);
default:
error("primary expected");
return 1;
}
}
double term()
//deals with * and -
//calls primary and get_token()
{
double left = primary(); //read and evaluate a Primary
Token t = ts.get(); //get the next token
while (true)
{
switch (t.kind) //see which kind of token it is
{
case '*':
left *= primary(); //evaluate Primary and multiply
t = ts.get();
break;
case '/':
{
double d = primary(); //evaluate Primary and divide
if (d == 0) error("divide by zero"); //check if primary isnt zero
left /= d;
t = ts.get();
break;
}
case '%':
{
double d = primary();
if (d == 0) error("divide by zero");
left = fmod(left, d);
t = ts.get();
break;
}
default:
ts.putback(t); //put t back into Token stream
return left; //return result to Expression
}
}
}
double expression()
//deals with + and -
//calls term() and get_token()
{
double left = term(); //read and evaluate an Term
Token t = ts.get(); //get the next token from the Token stream
while (true)
{
switch (t.kind) //see which kind of token it is
{
case '+':
left += term(); //evaluate Term and add
t = ts.get();
break;
case '-':
left -= term(); //evaluate Term and subtract
t = ts.get();
break;
default:
ts.putback(t); //put t back into the token stream
return left; //return the answer
}
}
}
void clean_up_mess() //B)
{
//skip until we find a print
ts.ignore(print);
}
double statement()
{
Token t = ts.get();
switch (t.kind)
{
case let:
return declaration();
case constant:
return const_declaration();
default:
ts.putback(t);
return expression();
}
}
void calculate() //expression evaluation loop
{
double val = 0;
while (cin)
try
{
cout << prompt; //print prompt
Token t = ts.get();
while (t.kind == print) //';' for "print now"
t = ts.get(); //eat ';'
if (t.kind == quit) //'q' for "quit"
{
return;
}
ts.putback(t);
cout << result << statement() << '\n';
}
catch (exception& e)
{
cerr << e.what() << '\n'; //write error message
clean_up_mess();
}
}
int main() //main loop and deals with errors
{
try
{
//predefined names:
define_const_name("pi", 3.1415926535);
define_const_name("e", 2.7182818284);
calculate();
keep_window_open();
return 0;
}
catch (exception& e)
{
cerr << e.what() << '\n';
keep_window_open("~~");
return 1;
}
catch (...)
{
cerr << "exception \n";
keep_window_open();
return 2;
}
}
The calculator kind of works like this:
> (2+3)-4*(5/6); //Input
= 1.66667 //Output
> //Next Input
It can also declare variables and constants and then use them back into for calculations:
> let x = 2; //Variable 1
= 2 //assigned value for Var 1
> const y = 3; //Constant 1
= 3 //assigned value Const 1
> x + y; //added var 1 to const 1
= 5 //result
> //next input
Also ; acts like a print character. The expression gets evaluated and displayed as soon as it reads ; and then it clears the token stream and gets ready to read the next input.
The custom header file std.lib.facilities.h is a collection of most standard C++ libraries and functions some of which are written by Bjarne Stroustrup and might have been used in the code given above.
I have also left the code for that header file just in case if someone is curious:
/*
std_lib_facilities.h
*/
/*
simple "Programming: Principles and Practice using C++ (second edition)" course header to
be used for the first few weeks.
It provides the most common standard headers (in the global namespace)
and minimal exception/error support.
Students: please don't try to understand the details of headers just yet.
All will be explained. This header is primarily used so that you don't have
to understand every concept all at once.
By Chapter 10, you don't need this file and after Chapter 21, you'll understand it
Revised April 25, 2010: simple_error() added
Revised November 25 2013: remove support for pre-C++11 compilers, use C++11: <chrono>
Revised November 28 2013: add a few container algorithms
Revised June 8 2014: added #ifndef to workaround Microsoft C++11 weakness
Revised Febrary 2 2015: randint() can now be seeded (see exercise 5.13).
Revised August 3, 2020: a cleanup removing support for ancient compilers
*/
#ifndef H112
#define H112 080315L
#include<iostream>
#include<iomanip>
#include<fstream>
#include<sstream>
#include<cmath>
#include<cstdlib>
#include<string>
#include<list>
#include <forward_list>
#include<vector>
#include<unordered_map>
#include<algorithm>
#include <array>
#include <regex>
#include<random>
#include<stdexcept>
//------------------------------------------------------------------------------
typedef long Unicode;
//------------------------------------------------------------------------------
using namespace std;
template<class T> string to_string(const T& t)
{
ostringstream os;
os << t;
return os.str();
}
struct Range_error : out_of_range { // enhanced vector range error reporting
int index;
Range_error(int i) :out_of_range("Range error: " + to_string(i)), index(i) { }
};
// trivially range-checked vector (no iterator checking):
template< class T> struct Vector : public std::vector<T> {
using size_type = typename std::vector<T>::size_type;
/* #ifdef _MSC_VER
// microsoft doesn't yet support C++11 inheriting constructors
Vector() { }
explicit Vector(size_type n) :std::vector<T>(n) {}
Vector(size_type n, const T& v) :std::vector<T>(n, v) {}
template <class I>
Vector(I first, I last) : std::vector<T>(first, last) {}
Vector(initializer_list<T> list) : std::vector<T>(list) {}
*/
using std::vector<T>::vector; // inheriting constructor
T& operator[](unsigned int i) // rather than return at(i);
{
if (i<0 || this->size() <= i) throw Range_error(i);
return std::vector<T>::operator[](i);
}
const T& operator[](unsigned int i) const
{
if (i<0 || this->size() <= i) throw Range_error(i);
return std::vector<T>::operator[](i);
}
};
// disgusting macro hack to get a range checked vector:
#define vector Vector
// trivially range-checked string (no iterator checking):
struct String : std::string {
using size_type = std::string::size_type;
// using string::string;
char& operator[](unsigned int i) // rather than return at(i);
{
if (i<0 || size() <= i) throw Range_error(i);
return std::string::operator[](i);
}
const char& operator[](unsigned int i) const
{
if (i<0 || size() <= i) throw Range_error(i);
return std::string::operator[](i);
}
};
namespace std {
template<> struct hash<String>
{
size_t operator()(const String& s) const
{
return hash<std::string>()(s);
}
};
} // of namespace std
struct Exit : runtime_error {
Exit() : runtime_error("Exit") {}
};
// error() simply disguises throws:
inline void error(const string& s)
{
throw runtime_error(s);
}
inline void error(const string& s, const string& s2)
{
error(s + s2);
}
inline void error(const string& s, int i)
{
ostringstream os;
os << s << ": " << i;
error(os.str());
}
template<class T> char* as_bytes(T& i) // needed for binary I/O
{
void* addr = &i; // get the address of the first byte
// of memory used to store the object
return static_cast<char*>(addr); // treat that memory as bytes
}
inline void keep_window_open()
{
cin.clear();
cout << "Please enter a character to exit\n";
char ch;
cin >> ch;
return;
}
inline void keep_window_open(string s)
{
if (s == "") return;
cin.clear();
cin.ignore(120, '\n');
for (;;) {
cout << "Please enter " << s << " to exit\n";
string ss;
while (cin >> ss && ss != s)
cout << "Please enter " << s << " to exit\n";
return;
}
}
// error function to be used (only) until error() is introduced in Chapter 5:
inline void simple_error(string s) // write ``error: s and exit program
{
cerr << "error: " << s << '\n';
keep_window_open(); // for some Windows environments
exit(1);
}
// make std::min() and std::max() accessible on systems with antisocial macros:
#undef min
#undef max
// run-time checked narrowing cast (type conversion). See ???.
template<class R, class A> R narrow_cast(const A& a)
{
R r = R(a);
if (A(r) != a) error(string("info loss"));
return r;
}
// random number generators. See 24.7.
inline default_random_engine& get_rand()
{
static default_random_engine ran; // note: not thread_local
return ran;
};
inline void seed_randint(int s) { get_rand().seed(s); }
inline int randint(int min, int max) { return uniform_int_distribution<>{min, max}(get_rand()); }
inline int randint(int max) { return randint(0, max); }
//inline double sqrt(int x) { return sqrt(double(x)); } // to match C++0x
// container algorithms. See 21.9. // C++ has better versions of this:
template<typename C>
using Value_type = typename C::value_type;
template<typename C>
using Iterator = typename C::iterator;
template<typename C>
// requires Container<C>()
void sort(C& c)
{
std::sort(c.begin(), c.end());
}
template<typename C, typename Pred>
// requires Container<C>() && Binary_Predicate<Value_type<C>>()
void sort(C& c, Pred p)
{
std::sort(c.begin(), c.end(), p);
}
template<typename C, typename Val>
// requires Container<C>() && Equality_comparable<C,Val>()
Iterator<C> find(C& c, Val v)
{
return std::find(c.begin(), c.end(), v);
}
template<typename C, typename Pred>
// requires Container<C>() && Predicate<Pred,Value_type<C>>()
Iterator<C> find_if(C& c, Pred p)
{
return std::find_if(c.begin(), c.end(), p);
}
#endif //H112
So to start with this problem I added the constant character from and constant string from_key just under all of the other constants in the calculator code.
const char from = 'f'; // file input read token
const string from_key = "from"; // file input read keyword
Then I added a new case for from token in the statement()'s function definiton
double statement()
{
Token t = ts.get();
switch (t.kind)
{
/.../
case from:
return input();
/.../
}
}
Ignore the function input() being returned from this case. That's kind of my main problem with this question. I will come back to it after the next part.
I followed it by adding a Token return for from's case in Token Token_stream::get() function's definition
Token Token_stream::get()
{
if (full)
{
/.../
}
char ch;
cin >> ch; //note that >> skips whitespace(space, newline, tab, etc)
switch (ch)
{
/.../
default:
if (isalpha(ch))
{
string s;
s += ch;
while (cin.get(ch) && (isalpha(ch) || isdigit(ch) || ch == '_')) s += ch;
cin.putback(ch);
if (s == declkey) return Token(let); //declaration keyword
else if (s == constdeclkey) return Token(constant);
else if (s == from_key) return Token(from);
return Token{ name, s };
}
error("Bad Token");
return 1;
}
}
After that I started writing the input() function which I mentioned seconds ago. It looks like this for now:
double input()
{
Token t = ts.get();
if (t.kind != name) error("read-file name expected in declaration");
string read_file = t.name;
ifstream ifs{ read_file + ".txt" };
if (!ifs) error("cannot open file " + read_file + ".txt");
while (ifs)
{
try
{
//idk what to do here
}
catch (exception& e)
{
cerr << e.what() << '\n';
clean_up_mess();
}
return 0;
}
}
So what I want to do is pretty simple.
I just want Token Token_stream::get() function to read input from ifs rather than cin, kind of like redirect the istream to ifs while its reading data from the file then switch it back to cin when its done. I'm thinking about calling the statement() function back here to print the evaluation on screen for now and then I might redirect it again to write the output to a file y.txt as it's mentioned in the question.
My issue is that I really don't know how to do this. I tried doing an if-else statement to switch istream from cin to ifs but getting ifs out from input()'s scope to Token Token_stream::get()'s scope is a big hurdle for me.
I'm writing a lexer in C++ and I was able to compile the program and link it just fine; however when it comes to running it, I get a massive output dump, and I don't know why it's happening or if it's even related to the program I wrote. It's I cant even copy the entire dump but here is a gif of it happening.
Here is the program just for good measure.
I have compiling and running with cygwin on windows 10.
token.h
#ifndef TOKEN_H
#define TOKEN_H
#include <string>
class ice_lexer;
enum ice_type_enum {
ICE_NONE,
ICE_EOF,
ICE_MODULE,
ICE_IDENT,
ICE_FLT,
ICE_NUM,
ICE_STR,
// SYMBOLS
ICE_RPARA,
ICE_LPARA,
ICE_RBRAC,
ICE_LBRAC,
ICE_RCURL,
ICE_LCURL,
ICE_PERIOD,
ICE_COLON,
ICE_SEMIC,
ICE_COMMA,
// COMPARISON
ICE_EQLTO,
ICE_NOTEQL,
ICE_GRT,
ICE_GRTEQL,
ICE_LES,
ICE_LESEQL,
// ASSIGNMENT
ICE_EQL,
ICE_ADDEQL,
ICE_SUBEQL,
ICE_MULEQL,
ICE_DIVEQL,
ICE_CAREQL,
// URNARY
ICE_URNARYNOT,
ICE_URNARYSUB,
// BINARY
ICE_ADD,
ICE_SUB,
ICE_MUL,
ICE_DIV,
ICE_MOD,
ICE_CAR,
ICE_CAT,
// TERNARY
ICE_TERNARY,
// KEYWORDS
ICE_IMPORT,
ICE_AS,
ICE_WHILE,
ICE_FOR,
ICE_BREAK,
ICE_IF,
ICE_RETURN,
ICE_TRUE,
ICE_FALSE,
// TYPES
ICE_TYPEDEF,
ICE_CLASS,
ICE_ENUM,
ICE_INT,
ICE_FLOAT,
ICE_STRING,
ICE_BOOLEAN,
ICE_VOID,
ICE_STRUCT
};
typedef struct ice_keyword {
std::string word;
ice_type_enum type;
} ice_keyword;
class ice_type {
public:
std::string string;
ice_type_enum type;
ice_type(ice_type_enum t);
};
class ice_token {
public:
ice_type type;
size_t line;
size_t pos;
std::string str;
double flt;
size_t num;
ice_token(const ice_lexer* lexer, ice_type_enum t);
~ice_token() {};
void print();
};
#endif
token.cpp
#include <string>
#include <iostream>
#include "token.h"
#include "lexer.h"
static const std::string ice_type_enum_strings[] = {
// ABSTRACT
"no type",
"end of file",
"module",
"identifier",
"floating",
"number",
"string",
// SYMBOLS
"left para",
"right para",
"left brac",
"right brac",
"left curl",
"right curl",
"period",
"colon",
"semi",
"comma",
// COMPARISON
"eql to",
"not eql",
"grt",
"grt eql",
"les",
"les eql",
// ASSIGNMENT
"eql",
"add eql",
"sub eql",
"mul eql",
"div eql",
"car eql",
// URNARY
"urnarynot",
"urnarysub",
// BINARY
"add",
"sub",
"mul",
"div",
"mod",
"car",
"cat",
// TERNARY
"ternary",
// KEYWORDS
"import",
"as",
"while",
"for",
"break",
"if",
"return",
"true",
"false",
// TYPES
"typedef",
"class",
"enum",
"int",
"float",
"string",
"bool",
"void",
"struct"
};
ice_type::ice_type(ice_type_enum t) {
string = ice_type_enum_strings[t];
type = t;
}
ice_token::ice_token(const ice_lexer* lexer, ice_type_enum t):type(t) {
line = lexer->line;
pos = lexer->pos;
num = 0;
}
void ice_token::print() {
std::cout << "TYPE [ STR: \'" << this->type.string << "\' | ENUM: " << this->type.type << " ]:" << std::endl;
std::cout << "\tLINE NO. " << this->line << " IN THE " << this->pos << "TH ROW" << std::endl;
if (this->type.type == ICE_NUM)
std::cout << "\tNUMBER VALUE: \'" << this->num << "\'" << std::endl;
if (this->type.type == ICE_FLT)
std::cout << "\tFLOAT VALUE: \'" << this->flt << "\'" << std::endl;
if (this->type.type != ICE_NUM && this->type.type != ICE_FLT)
std::cout << "\tSTR VALUE: \'" << this->str << "\'" << std::endl;
}
lexer.h
#ifndef LEXER_H
#define LEXER_H
#include "token.h"
#include <string>
class ice_token;
#define newline() { \
this->line++; \
this->pos = 0; \
}
class ice_lexer {
public:
std::string source;
size_t length;
size_t line;
size_t pos;
size_t ptr;
ice_lexer(std::string file_name);
~ice_lexer() {};
ice_token lexer_next();
private:
const char look(int offset);
const char advance(int offset);
void comment(void);
void nested(void);
ice_token symbol(ice_type_enum type, std::string string, int move);
void ident(void);
};
#endif
lexer.cpp
#include <fstream>
#include <string>
#include <assert.h>
#include "token.h"
#include "lexer.h"
extern const ice_keyword keywords[] = {
{"import", ICE_IMPORT},
{"as", ICE_AS},
{"while", ICE_WHILE},
{"for", ICE_FOR},
{"break", ICE_BREAK},
{"if", ICE_IF},
{"return", ICE_RETURN},
{"true", ICE_TRUE},
{"false", ICE_FALSE},
{"typedef", ICE_TYPEDEF},
{"class", ICE_CLASS},
{"enum", ICE_ENUM},
{"int", ICE_INT},
{"float", ICE_FLOAT},
{"string", ICE_STRING},
{"bool", ICE_BOOLEAN},
{"void", ICE_VOID},
{"struct", ICE_STRUCT}
};
ice_lexer::ice_lexer(std::string file_name) {
std::ifstream input_file(file_name);
std::string content((std::istreambuf_iterator<char>(input_file)),
(std::istreambuf_iterator<char>()));
line = 1;
pos = 0;
ptr = 0;
}
const char ice_lexer::look(int offset = 0) {
if (this->ptr + offset < this->length)
return '\0';
return this->source[this->ptr + offset];
}
const char ice_lexer::advance(int offset) {
assert(this->ptr + offset < this->length);
return look();
}
void ice_lexer::comment(void) {
while (look() != '\n') { advance(1); }
newline();
}
void ice_lexer::nested(void) {
size_t depth = 1;
while (depth > 0) {
if (look(0) == '\\' && look(1) == '\\') {
advance(2);
depth--;
} else if (look(0) == '/' && look(1) == '/') {
advance(2);
depth++;
} else {
advance(1);
}
}
}
ice_token ice_lexer::symbol(ice_type_enum type, std::string string, int move) {
ice_token token(this, type);
token.str = string;
advance(move);
return token;
}
ice_token ice_lexer::lexer_next() {
/*
while (look() != '\0') {
switch (look()) {
case '+':
if (look(1) == '=')
return symbol(ICE_ADDEQL, "+=", 2);
else
return symbol(ICE_ADD, "+", 1);
break;
case '-':
if (look(1) == '=')
return symbol(ICE_ADDEQL, "+=", 2);
else
return symbol(ICE_SUB, "-", 1);
break;
case '*':
if (look(1) == '=')
return symbol(ICE_ADDEQL, "+=", 2);
else
return symbol(ICE_MUL, "*", 1);
break;
case '/':
if (look(1) == '=')
return symbol(ICE_DIVEQL, "/=", 2);
else if(look(1) == '/')
nested();
else
return symbol(ICE_DIV, "/", 1);
break;
case '^':
if (look(1) == '=')
return symbol(ICE_CAREQL, "^=", 2);
else
return symbol(ICE_CAR, "^", 1);
break;
case '~':
comment();
case '(':
return symbol(ICE_LPARA, "(", 1);
break;
case ')':
return symbol(ICE_RPARA, ")", 1);
break;
case '[':
return symbol(ICE_LBRAC, "[", 1);
break;
case ']':
return symbol(ICE_RBRAC, "]", 1);
break;
case '{':
return symbol(ICE_LCURL, "{", 1);
break;
case '}':
return symbol(ICE_RCURL, "}", 1);
break;
default:
break;
}
}
*/
}
I did a couple of changes and kinda got it working. Some of the stuff you're doing is a bit wonky.
ice_lexer::ice_lexer(std::string file_name) {
std::ifstream input_file(file_name);
std::string content((std::istreambuf_iterator<char>(input_file)),
(std::istreambuf_iterator<char>()));
line = 1;
pos = 0;
ptr = 0;
}
You're reading the file into content, which is a local variable. You probably meant to read it into source. You're also not updating length, which I guess should be source.size().
There's a bug in ice_lexer::look():
const char ice_lexer::look(int offset = 0) {
if (this->ptr + offset < this->length)
return '\0';
return this->source[this->ptr + offset];
}
The condition should be >=, not <. Another similar bug in ice_lexer::advance():
const char ice_lexer::advance(int offset) {
assert(this->ptr + offset < this->length);
return look();
}
should be <=.
When you see garbage on the console, it usually means that you're printing a string that doesn't have a \0 at the end, and it's just running away in memory until it finds a \0 somewhere. It might also crash if you're reading somewhere you're not supposed to. This in turn probably means that you have uninitialized variables.
Learn to use your debugger properly. This was pretty easy to figure out by stepping through functions and looking at variables.
I have a struct that looks like this:
// A lexical scope that keeps track of every declared variable
// in a function
struct LexState
{
// A map containing the index of a variable in it's function
// The key is the variable name
std::unordered_map<std::string, int> _vars;
Function *_function;
LexState *_parent;
};
This is the code using the struct:
#define DD(msg) std::cout << "Debug: " << msg << '\n'
#define DD1(msg, p1) std::cout << "Debug: " << msg << p1 << '\n'
#define DD2(msg, p1, p2) std::cout << "Debug: " << msg << p1 << p2 << '\n'
// A lexical scope that keeps track of every declared variable
// in a function
struct LexState
{
// A map containing the index of a variable in it's function
// The key is the variable name
std::unordered_map<std::string, int> _vars;
Function *_function;
LexState *_parent;
LexState(LexState *parent, Function *function)
{
_parent = parent;
_function = function;
}
Value *SearchVar(char *name)
{
if (_parent == nullptr)
{
return nullptr;
}
std::string str(name);
auto var = _parent->_vars.find(str);
if (var == _parent->_vars.end())
{
return _parent->SearchVar(name);
}
return _function->GetLocalVar(var->second)->GetValue();
}
};
class Compiler
{
public:
State *_state;
Function *_current_function;
GC *_gc;
LexState *_ls;
Compiler(State *state) :
_state(state)
{
_current_function = nullptr;
_gc = state->GetGC();
_current_function = new Function(nullptr);
_state->_main_function = _current_function;
_ls = nullptr;
}
void PushFunction()
{
Function *new_function = new Function(_current_function);
_current_function = new_function;
LexState *new_ls = new LexState(_ls, new_function);
// even now, right after creating new_ls, new_ls->_vars is 0
printf("%p\n", &new_ls->_vars);
_ls = new_ls;
}
void PopFunction()
{
_current_function = _current_function->_parent;
LexState *parent = _ls->_parent;
delete _ls;
_ls = parent;
}
int DeclareVar(const Symbol *node)
{
assert(node->_type == NODE_SYMBOL);
DD("Declaring variable");
auto token = node->_token;
char name[token->len + 1];
memcpy(name, token->pos, token->len);
name[token->len] = '\0';
int idx = _current_function->AddLocalVar(name);
std::string key(name);
printf("%p\n", &_ls->_vars);
if (_ls != nullptr)
_ls->_vars.insert(std::make_pair(key, idx));
else
DD("LexState nullptr");
DD("Variable declared");
return idx;
}
void Compile(const Node *node)
{
switch (node->_type)
{
case NODE_CHUNK:
CompileChunk((Chunk *)node);
break;
case NODE_BLOCK:
CompileBlock((Block *)node);
break;
case NODE_FUNCTION_DEF:
CompileFunctionDef((FunctionDef *)node);
break;
case NODE_CONDITIONAL:
CompileConditional((ConditionalStatement *)node);
break;
case NODE_BINARY_EXPR:
CompileBinaryExpr((BinaryExpr *)node);
break;
case NODE_UNARY_EXPR:
CompileUnaryExpr((UnaryExpr *)node);
break;
case NODE_SYMBOL:
CompileSymbol((Symbol *)node);
break;
case NODE_STRING_LITERAL:
CompileStringLiteral((StringLiteral *)node);
break;
case NODE_BOOL_LITERAL:
CompileBoolLiteral((BoolLiteral *)node);
break;
case NODE_INT_LITERAL:
CompileIntLiteral((IntLiteral *)node);
break;
}
}
void CompileChunk(const Chunk *chunk)
{
Compile(chunk->_block);
AddCode(OP_HALT);
}
void CompileBlock(const Block *block)
{
std::vector<Node *> vec = block->_vec;
for (auto it = vec.begin(); it != vec.end(); it++)
{
Compile(*it);
}
}
void CompileFunctionDef(const FunctionDef *func)
{
Value v;
int f = AddConstant(v);
AddCode(OP_PUSH, f);
Value *vp = _current_function->GetConstant(f);
if (func->_name)
{
int fvar = DeclareVar((Symbol *)func->_name);
AddCode(OP_STOR_LOCAL, fvar);
}
ArgList *argsnode = (ArgList *)func->_args;
auto args = argsnode->_vec;
int argcount = args.size();
PushFunction();
auto closure = new Closure(_current_function);
closure->_argcount = argcount;
std::cout << argcount << '\n';
vp->_closure = closure;
vp->_type = VALUE_CLOSURE;
// Compiling inside function
// Arguments are compiled in reserved order, because in the function call
// the passed arguments will be pushed in the order they are passed
for (auto it = args.rbegin(); it != args.rend(); it++)
{
int var = DeclareVar((Symbol *)*it);
AddCode(OP_STOR_LOCAL, var);
}
if (func->_guard)
{
Compile(func->_guard);
}
else
{
Compile(func->_body);
}
AddCode(OP_RETURN);
// End function
PopFunction();
}
void CompileConditional(const ConditionalStatement *node)
{
auto function = _current_function;
Compile(node->_condition);
int cond_res_idx = function->AddLocalVar();
AddCode(OP_DUP);
// Store the condition result into an internal variable
AddCode(OP_STOR_LOCAL, cond_res_idx);
AddCode(OP_DUP);
int true_jmp = AddCode(OP_JMP_T, 0);
int false_jmp = AddCode(OP_JMP_F, 0);
// Save index of the first code of the block
int block_idx = function->_code.size();
// Jump to block when condition is true
function->ChangeCode(true_jmp, block_idx);
Compile(node->_expr1);
AddCode(OP_PUSH_LOCAL, cond_res_idx);
// Jump to the end of the whole if-elif-else statement
// if the condition result was true
int exit_jmp = AddCode(OP_JMP_T, 0);
// Save index of the first code after this statement
int right_idx = function->_code.size();
function->ChangeCode(false_jmp, right_idx);
if (node->_expr2 != nullptr)
{
Compile(node->_expr2);
}
else
{
AddCode(OP_PUSH_NIL);
}
int end_idx = function->_code.size();
function->ChangeCode(exit_jmp, end_idx);
}
void CompileBinaryExpr(const BinaryExpr *expr)
{
auto function = _current_function;
auto token = expr->_op->type;
if (token == TOKEN_ASSIGN)
{
if (expr->_left->_type == NODE_SYMBOL)
{
int var = DeclareVar((Symbol *)expr->_left);
Compile(expr->_right);
AddCode(OP_STOR_LOCAL, var);
return;
}
}
// A function call
if (token == TOKEN_LEFTPAREN)
{
ArgList *arglist = (ArgList *)expr->_right;
auto args = arglist->_vec;
int argcount = args.size();
// A function call cannot have more than 255 arguments
assert(argcount < 256);
for (auto it = args.begin(), end = args.end();
it != end; it++)
{
Compile(*it);
}
Compile(expr->_left);
AddCode(OP_CALL, argcount);
return;
}
Compile(expr->_left);
// Both 'and' and 'or' expressions does short circuit
if (token == TOKEN_BOOL_AND ||
token == TOKEN_BOOL_OR)
{
AddCode(OP_DUP);
OpType op = (token == TOKEN_BOOL_AND) ? OP_JMP_F : OP_JMP_T;
int idx = AddCode(op, function->_ip + 1);
Compile(expr->_right);
int next = function->_code.size();
uint32_t instr = function->_code[idx];
function->ChangeCode(idx, next);
return;
}
// No need for lazy evaluation, compile normally
Compile(expr->_right);
switch (expr->_op->type)
{
case TOKEN_ADD:
AddCode(OP_ADD);
break;
case TOKEN_SUB:
AddCode(OP_SUB);
break;
case TOKEN_MUL:
AddCode(OP_MUL);
break;
case TOKEN_DIV:
AddCode(OP_DIV);
break;
case TOKEN_POW:
AddCode(OP_POW);
break;
case TOKEN_AND:
AddCode(OP_AND);
break;
case TOKEN_OR:
AddCode(OP_OR);
break;
case TOKEN_XOR:
AddCode(OP_XOR);
break;
case TOKEN_LT:
AddCode(OP_LT);
break;
case TOKEN_GT:
AddCode(OP_GT);
break;
case TOKEN_LTEQ:
AddCode(OP_LTEQ);
break;
case TOKEN_GTEQ:
AddCode(OP_GTEQ);
break;
case TOKEN_SHIFT_L:
AddCode(OP_SHIFT_L);
break;
case TOKEN_SHIFT_R:
AddCode(OP_SHIFT_R);
break;
}
}
void CompileUnaryExpr(const UnaryExpr *expr)
{
Compile(expr->_right);
switch (expr->_token->type)
{
case TOKEN_SUB:
AddCode(OP_NEGATE);
break;
case TOKEN_NOT:
AddCode(OP_NOT);
break;
case TOKEN_BOOL_NOT:
AddCode(OP_BOOL_NOT);
break;
}
}
// This function gets called only when it's a reference
void CompileSymbol(const Symbol *node)
{
auto token = node->_token;
char name[token->len + 1];
memcpy(name, token->pos, token->len);
name[token->len] = '\0';
DD1("Searching reference: ", name);
Value *upvalue = _ls->SearchVar(name);
if (upvalue)
{
int idx = _current_function->AddUpValue(upvalue);
AddCode(OP_PUSH_UPVALUE, idx);
return;
}
int idx = _current_function->GetLocalVarIndex(name);
AddCode(OP_PUSH_LOCAL, idx);
}
void CompileStringLiteral(const StringLiteral *sl)
{
Value v(sl->_token->str, sl->_token->len);
AddCode(OP_PUSH, AddConstant(v));
}
void CompileBoolLiteral(const BoolLiteral *bl)
{
Value v(bl->_token->type == TOKEN_TRUE);
AddCode(OP_PUSH, AddConstant(v));
}
void CompileIntLiteral(const IntLiteral *il)
{
Value v(il->_token->num);
AddCode(OP_PUSH, AddConstant(v));
}
int AddCode(OpType code)
{
return _current_function->AddCode(code);
}
int AddCode(OpType code, int a)
{
return _current_function->AddCode(code, a);
}
int AddConstant(const Value &v)
{
return _current_function->AddConstant(v);
}
};
The program outputs:
Declaring variable
0
It crashes before the DD("Variable declared") part.
Even though i'm new to C++, I'm pretty sure I don't need to allocate the unordered map myself, right? As it's not a pointer, it will be allocated when I do new LexState
So is there any initialization/allocation I am forgetting?
%i is not the right format specifier to use to print a pointer. You are most likely getting the output that represents a truncated value of the pointer. Use %p instead.
printf("%p\n", &state->_vars);
I don't see anything wrong using state->_vars. See working code at http://ideone.com/YAJK5K.
I can't use boost::spirit in my environment. But I would like to use STL and boost as much as possible to build my own expression evaluator. Is there such an alternative to boost::spirit?
The following code includes unit tests and a complete parser I wrote in an about 90 minute session at ACCU 200x (8 or 9). If you need more, it might be easy enough to extend. You can make it do doubles by defining Parse::value_type, or extracting it into a separate header file and make it a template class.
Or you can take the test cases and try yourself. (it uses CUTE from http://cute-test.com)
#include "cute.h"
#include "ide_listener.h"
#include "cute_runner.h"
#include <cctype>
#include <map>
namespace {
class Parser {
typedef int value_type;
typedef std::vector<value_type> valuestack;
typedef std::vector<char> opstack;
typedef std::map<std::string,value_type> memory;
public:
memory variables;
private:
void evaluateSingleOperator(char op,value_type &result,value_type operand) {
switch(op) {
case '+': result += operand; break;
case '-': result -= operand; break;
case '*': result *= operand; break;
case '/': result /= operand; break;
default: throw("invalid operand");
}
}
void evaluateStacks(valuestack &values, opstack &ops) {
while(ops.size() && values.size()>1) {
char op = ops.back(); ops.pop_back();
value_type operand = values.back(); values.pop_back();
evaluateSingleOperator(op,values.back(),operand);
}
}
bool higherPrecedenceOrLeftAssociative(char last, char current) {
return (last == current)||(last == '*' || last == '/') ;
}
bool shouldEvaluate(char op,opstack const &ops) {
return ops.size() > 0 && higherPrecedenceOrLeftAssociative(ops.back(),op);
}
std::string parseVariableName(std::istream &is) {
std::string variable;
char nextchar=0;
while ((is >> nextchar) && isalpha(nextchar)) {
variable += nextchar;
}
if (variable.size() == 0) throw std::string("internal parse error");
is.unget();
return variable;
}
int peekWithSkipWhiteSpace(std::istream &is) {
int nextchar = EOF;
while(isspace(nextchar = is.peek())) is.get();
return nextchar;
}
value_type getOperand(std::istream &is) {
int nextchar = peekWithSkipWhiteSpace(is);
if (nextchar == EOF) throw std::string("syntax error operand expected");
if (isdigit(nextchar)){
value_type operand=0;
if (!(is >> operand)) throw std::string("syntax error getting number") ;
return operand;
} else if ('(' == nextchar) {
is.get();
return parse(is);
} else if (isalpha(nextchar)) {
std::string variable= parseVariableName(is);
if( parseAssignmentOperator(is)) {
variables[variable] = parse(is);
} else {
if (!variables.count(variable)) throw std::string("undefined variable: ")+variable;
}
return variables[variable];
}
throw std::string("syntax error");
}
bool parseAssignmentOperator(std::istream &is) {
int nextchar = peekWithSkipWhiteSpace(is);
if ('=' != nextchar) {
return false;
}
is.get();
return true;
}
public:
value_type parse(std::istream &is) {
is >> std::skipws;
valuestack values;
opstack ops;
values.push_back(getOperand(is));
char op=')';
while((is >>op) && op != ')') {
if (shouldEvaluate(op, ops)) {
evaluateStacks(values, ops);
}
values.push_back(getOperand(is));
ops.push_back(op);
}
evaluateStacks(values,ops);
return values.back();
}
value_type eval(std::string s) {
std::istringstream is(s);
return parse(is);
}
};
int eval(std::string s) {
return Parser().eval(s);
}
void shouldThrowEmptyExpression() {
eval("");
}
void shouldThrowSyntaxError() {
eval("()");
}
void testSimpleNumber() {
ASSERT_EQUAL(5,eval("5"));
}
void testSimpleAdd() {
ASSERT_EQUAL(10,eval("5 +5"));
}
void testMultiAdd() {
ASSERT_EQUAL(10,eval("1 + 2 + 3+4"));
}
void testSimpleSubtract() {
ASSERT_EQUAL(5,eval("6-1"));
}
void testTenPlus12Minus100() {
ASSERT_EQUAL(-78,eval("10+12-100"));
}
void testMultiply() {
ASSERT_EQUAL(50,eval("10*5"));
}
void testDivision() {
ASSERT_EQUAL(7,eval("21/3"));
}
void testAddThenMultiply() {
ASSERT_EQUAL(21,eval("1+4 *5"));
}
void testAddThenMultiplyAdd() {
ASSERT_EQUAL(16,eval("1+4*5 -5"));
}
void testAddSubSub() {
ASSERT_EQUAL(-4,eval("1+2-3-4"));
}
void testSimpleParenthesis() {
ASSERT_EQUAL(1,eval("(1)"));
}
void testSimpleOperandParenthesis() {
ASSERT_EQUAL(2,eval("1+(1)"));
}
void testParenthesis() {
ASSERT_EQUAL(5,eval("2*(1+4)-5"));
}
void testNestedParenthesis() {
ASSERT_EQUAL(16,eval("2*(1+(4*3)-5)"));
}
void testDeeplyNestedParenthesis() {
ASSERT_EQUAL(8,eval("((2*((1+(4*3)-5)))/2)"));
}
void testSimpleAssignment() {
Parser p;
ASSERT_EQUAL(1, p.eval("a=1*(2-1)"));
ASSERT_EQUAL(8, p.eval("a+7"));
ASSERT_EQUAL(1, p.eval("2-a"));
}
void testLongerVariables() {
Parser p;
ASSERT_EQUAL(1, p.eval("aLongVariableName=1*(2-1)"));
ASSERT_EQUAL(42, p.eval("AnotherVariable=7*(4+2)"));
ASSERT_EQUAL(1, p.eval("2-(aLongVariableName*AnotherVariable)/42"));
}
void shouldThrowUndefined() {
eval("2 * undefinedVariable");
}
void runSuite(){
cute::suite s;
//TODO add your test here
s.push_back(CUTE_EXPECT(CUTE(shouldThrowEmptyExpression),std::string));
s.push_back(CUTE_EXPECT(CUTE(shouldThrowSyntaxError),std::string));
s.push_back(CUTE(testSimpleNumber));
s.push_back(CUTE(testSimpleAdd));
s.push_back(CUTE(testMultiAdd));
s.push_back(CUTE(testSimpleSubtract));
s.push_back(CUTE(testTenPlus12Minus100));
s.push_back(CUTE(testMultiply));
s.push_back(CUTE(testDivision));
s.push_back(CUTE(testAddThenMultiply));
s.push_back(CUTE(testAddSubSub));
s.push_back(CUTE(testAddThenMultiplyAdd));
s.push_back(CUTE(testSimpleParenthesis));
s.push_back(CUTE(testSimpleOperandParenthesis));
s.push_back(CUTE(testParenthesis));
s.push_back(CUTE(testNestedParenthesis));
s.push_back(CUTE(testDeeplyNestedParenthesis));
s.push_back(CUTE(testSimpleAssignment));
s.push_back(CUTE(testLongerVariables));
s.push_back(CUTE_EXPECT(CUTE(shouldThrowUndefined),std::string));
cute::ide_listener lis;
cute::makeRunner(lis)(s, "The Suite");
}
}
int main(){
runSuite();
}
YACC++ is a very good tool for parser generator for c++ applications. ANTLR is also agood option howver that does not have good documentation for its usage in C/C++.