C++ simple operations (+,-,/,*) evaluation class - c++

I am looking for a C++ class I can incorporate into a project I am working on.
the functionality I need is evaluation of string operations to numerical form: for example "2 + 3*7" should evaluate to 23.
I do realize what I am asking is a kind of an interpreter, and that there are tools to build them, by my background in CS is very poor so I would appreciate if you can point me to a ready made class .

This should do exactly what you want. You can test it live at: http://www.wowpanda.net/calc
It uses Reverse Polish Notation and supports:
Operator precedence (5 + 5 * 5 = 30 not 50)
Parens ((5 + 5) * 5 = 50)
The following operators: +, -, *, /
EDIT: you'll probably want to remove the Abs() at the bottom; for my needs 0 - 5 should be 5 and not -5!
static bool Rpn(const string expression, vector<string> &output)
{
output.clear();
char *end;
vector<string> operator_stack;
bool expecting_operator = false;
for (const char *ptr = expression.c_str(); *ptr; ++ptr) {
if (IsSpace(*ptr))
continue;
/* Is it a number? */
if (!expecting_operator) {
double number = strtod(ptr, &end);
if (end != ptr) {
/* Okay, it's a number */
output.push_back(boost::lexical_cast<string>(number));
ptr = end - 1;
expecting_operator = true;
continue;
}
}
if (*ptr == '(') {
operator_stack.push_back("(");
expecting_operator = false;
continue;
}
if (*ptr == ')') {
while (operator_stack.size() && operator_stack.back() != "(") {
output.push_back(operator_stack.back());
operator_stack.pop_back();
}
if (!operator_stack.size())
return false; /* Mismatched parenthesis */
expecting_operator = true;
operator_stack.pop_back(); /* Pop '(' */
continue;
}
if (*ptr == '+' || *ptr == '-') {
while (operator_stack.size() && IsMathOperator(operator_stack.back())) {
output.push_back(operator_stack.back());
operator_stack.pop_back();
}
operator_stack.push_back(boost::lexical_cast<string>(*ptr));
expecting_operator = false;
continue;
}
if (*ptr == '*' || *ptr == '/') {
while (operator_stack.size() && (operator_stack.back() == "*" || operator_stack.back() == "/")) {
output.push_back(operator_stack.back());
operator_stack.pop_back();
}
operator_stack.push_back(boost::lexical_cast<string>(*ptr));
expecting_operator = false;
continue;
}
/* Error */
return false;
}
while (operator_stack.size()) {
if (!IsMathOperator(operator_stack.back()))
return false;
output.push_back(operator_stack.back());
operator_stack.pop_back();
}
return true;
} // Rpn
/***************************************************************************************/
bool Calc(const string expression, double &output)
{
vector<string> rpn;
if (!Rpn(expression, rpn))
return false;
vector<double> tmp;
for (size_t i = 0; i < rpn.size(); ++i) {
if (IsMathOperator(rpn[i])) {
if (tmp.size() < 2)
return false;
double two = tmp.back();
tmp.pop_back();
double one = tmp.back();
tmp.pop_back();
double result;
switch (rpn[i][0]) {
case '*':
result = one * two;
break;
case '/':
result = one / two;
break;
case '+':
result = one + two;
break;
case '-':
result = one - two;
break;
default:
return false;
}
tmp.push_back(result);
continue;
}
tmp.push_back(atof(rpn[i].c_str()));
continue;
}
if (tmp.size() != 1)
return false;
output = Abs(tmp.back());
return true;
} // Calc
/***************************************************************************************/

boost::spirit comes with a calculator example which would do what you need:
http://www.boost.org/doc/libs/1_33_1/libs/spirit/example/fundamental/ast_calc.cpp

muParser is written in C++ and does just what you need.

C++ in Action, in addition to being a great book on C++, includes a fully working calculator, doing what you need (and actually much more). And the book is available for free online

Related

How do I only import the class from a module once [duplicate]

This question already has answers here:
What is causing the "error LNK2005: already defined in .obj" s errors in my code?
(1 answer)
What does this error mean and how do I solve it: error LNK2005: "<symbol>" already defined in <file.obj>
(1 answer)
Closed 2 months ago.
I'm trying to import a lightweight maths parsing library. It is only available as a .cpp file. (this is the library)
When I import it using #include mathparser.cpp, I get loads of LNK2005 errors, saying it is defining all the class methods again.
I'm not actually defining them in the main file though, why might these errors be occuring, and what should I do to fix them?
You should never #include a cpp file. You should include header files, and ensure they have header guards (or use #pragma once). In this case the mathparser.cpp should be split up such that the parser class is in its own header and cpp file, then just main is in the cpp file.
mathparser.h
#pragma once
#include < iostream >
#include < cstdlib >
#include < cctype >
#include < cstring >
#include < math.h >
#define PI 3.14159265358979323846
using namespace std;
enum types { DELIMITER = 1, VARIABLE, NUMBER, FUNCTION };
const int NUMVARS = 26;
class parser {
char *exp_ptr; // points to the expression
char token[256]; // holds current token
char tok_type; // holds token's type
double vars[NUMVARS]; // holds variable's values
void eval_exp1(double &result);
void eval_exp2(double &result);
void eval_exp3(double &result);
void eval_exp4(double &result);
void eval_exp5(double &result);
void eval_exp6(double &result);
void get_token();
public:
parser();
double eval_exp(char *exp);
char errormsg[64];
};
mathparser.cpp
#include "stdafx.h"
#include "mathparser.h"
// Parser constructor.
parser::parser()
{
int i;
exp_ptr = NULL;
for (i = 0; i < NUMVARS; i++)
vars[i] = 0.0;
errormsg[0] = '\0';
}
// Parser entry point.
double parser::eval_exp(char *exp)
{
errormsg[0] = '\0';
double result;
exp_ptr = exp;
get_token();
if (!*token)
{
strcpy(errormsg, "No Expression Present"); // no expression present
return (double)0;
}
eval_exp1(result);
if (*token) // last token must be null
strcpy(errormsg, "Syntax Error");
return result;
}
// Process an assignment.
void parser::eval_exp1(double &result)
{
int slot;
char temp_token[80];
if (tok_type == VARIABLE)
{
// save old token
char *t_ptr = exp_ptr;
strcpy(temp_token, token);
// compute the index of the variable
slot = *token - 'A';
get_token();
if (*token != '=')
{
exp_ptr = t_ptr; // return current token
strcpy(token, temp_token); // restore old token
tok_type = VARIABLE;
}
else {
get_token(); // get next part of exp
eval_exp2(result);
vars[slot] = result;
return;
}
}
eval_exp2(result);
}
// Add or subtract two terms.
void parser::eval_exp2(double &result)
{
register char op;
double temp;
eval_exp3(result);
while ((op = *token) == '+' || op == '-')
{
get_token();
eval_exp3(temp);
switch (op)
{
case '-':
result = result - temp;
break;
case '+':
result = result + temp;
break;
}
}
}
// Multiply or divide two factors.
void parser::eval_exp3(double &result)
{
register char op;
double temp;
eval_exp4(result);
while ((op = *token) == '*' || op == '/')
{
get_token();
eval_exp4(temp);
switch (op)
{
case '*':
result = result * temp;
break;
case '/':
result = result / temp;
break;
}
}
}
// Process an exponent.
void parser::eval_exp4(double &result)
{
double temp;
eval_exp5(result);
while (*token == '^')
{
get_token();
eval_exp5(temp);
result = pow(result, temp);
}
}
// Evaluate a unary + or -.
void parser::eval_exp5(double &result)
{
register char op;
op = 0;
if ((tok_type == DELIMITER) && *token == '+' || *token == '-')
{
op = *token;
get_token();
}
eval_exp6(result);
if (op == '-')
result = -result;
}
// Process a function, a parenthesized expression, a value or a variable
void parser::eval_exp6(double &result)
{
bool isfunc = (tok_type == FUNCTION);
char temp_token[80];
if (isfunc)
{
strcpy(temp_token, token);
get_token();
}
if ((*token == '('))
{
get_token();
eval_exp2(result);
if (*token != ')')
strcpy(errormsg, "Unbalanced Parentheses");
if (isfunc)
{
if (!strcmp(temp_token, "SIN"))
result = sin(PI / 180 * result);
else if (!strcmp(temp_token, "COS"))
result = cos(PI / 180 * result);
else if (!strcmp(temp_token, "TAN"))
result = tan(PI / 180 * result);
else if (!strcmp(temp_token, "ASIN"))
result = 180 / PI*asin(result);
else if (!strcmp(temp_token, "ACOS"))
result = 180 / PI*acos(result);
else if (!strcmp(temp_token, "ATAN"))
result = 180 / PI*atan(result);
else if (!strcmp(temp_token, "SINH"))
result = sinh(result);
else if (!strcmp(temp_token, "COSH"))
result = cosh(result);
else if (!strcmp(temp_token, "TANH"))
result = tanh(result);
else if (!strcmp(temp_token, "ASINH"))
result = asinh(result);
else if (!strcmp(temp_token, "ACOSH"))
result = acosh(result);
else if (!strcmp(temp_token, "ATANH"))
result = atanh(result);
else if (!strcmp(temp_token, "LN"))
result = log(result);
else if (!strcmp(temp_token, "LOG"))
result = log10(result);
else if (!strcmp(temp_token, "EXP"))
result = exp(result);
else if (!strcmp(temp_token, "SQRT"))
result = sqrt(result);
else if (!strcmp(temp_token, "SQR"))
result = result*result;
else if (!strcmp(temp_token, "ROUND"))
result = round(result);
else if (!strcmp(temp_token, "INT"))
result = floor(result);
else
strcpy(errormsg, "Unknown Function");
}
get_token();
}
else
switch (tok_type)
{
case VARIABLE:
result = vars[*token - 'A'];
get_token();
return;
case NUMBER:
result = atof(token);
get_token();
return;
default:
strcpy(errormsg, "Syntax Error");
}
}
// Obtain the next token.
void parser::get_token()
{
register char *temp;
tok_type = 0;
temp = token;
*temp = '\0';
if (!*exp_ptr) // at end of expression
return;
while (isspace(*exp_ptr)) // skip over white space
++exp_ptr;
if (strchr("+-*/%^=()", *exp_ptr))
{
tok_type = DELIMITER;
*temp++ = *exp_ptr++; // advance to next char
}
else if (isalpha(*exp_ptr))
{
while (!strchr(" +-/*%^=()\t\r", *exp_ptr) && (*exp_ptr))
*temp++ = toupper(*exp_ptr++);
while (isspace(*exp_ptr)) // skip over white space
++exp_ptr;
tok_type = (*exp_ptr == '(') ? FUNCTION : VARIABLE;
}
else if (isdigit(*exp_ptr) || *exp_ptr == '.')
{
while (!strchr(" +-/*%^=()\t\r", *exp_ptr) && (*exp_ptr))
*temp++ = toupper(*exp_ptr++);
tok_type = NUMBER;
}
*temp = '\0';
if ((tok_type == VARIABLE) && (token[1]))
strcpy(errormsg, "Only first letter of variables is considered");
}
main.cpp
#include "mathparser.h"
int main()
{
char expstr[256];
parser ob;
cout << "Math expression parser. Enter a blank line to stop.\n\n";
do
{
cout << "Enter expression: ";
cin.getline(expstr, 255);
double ans = ob.eval_exp(expstr);
if (*ob.errormsg)
cout << "Error: " << ob.errormsg << "\n\n";
else
cout << "Answer: " << ans << "\n\n";
} while (*expstr);
return 0;
}
Then in your code you can #include "mathparser.h" to instantiate the parser class for your purposes.
Note that this code itself is full of poor practices and therefore a bad reference to study for learning modern C++, but that is outside the scope of your current question.

Failing to parse different math operators

This question is a follow-up from this one. Basically I'm trying to make a parser which calculates the total result of a string. 5+5+3*2/1 should give 16. This already works for strings only containing plusses and mins, so -55-44+1-2+123-54442+327737+1-2 successfully gives 273317.
It however does not work when plusses/mins get mixed with times/divides. So 1*2-2*3 returns 6 instead of -4. I think this is because I try to respect the order in which math needs to be executed (first plusses and mins, than times and division), but the operator somehow doesn't get updated.
#include <iostream>
#include <string>
#include <algorithm>
//Enumeration of all the possible
//math operators
enum Operator {
PLUS,
MIN,
TIMES,
DIVIDE,
UNDEFINED
};
/************************IGNORE********************/
char operatorToChar(Operator o) {
switch(o) {
case Operator::PLUS:
return '+';
break;
case Operator::MIN:
return '-';
break;
case Operator::TIMES:
return '*';
break;
case Operator::DIVIDE:
return '/';
break;
default:
return '0';
break;
}
}
/***************************************************/
/*
* Function to check if there are still times- or divide-operators in the action string.
* This to respect the order of math (first times and divides, than plusses and mins)
*
* :param action: The action string
* :return bool: Returns true if a '*' or '/' is found
*/
bool timesAndDividesGone(std::string& action) {
for (char& c : action) {
if (c == '*' || c == '/') {
return false;
}
}
return true;
}
/*
* Function to convert char to Operator
* :param c: One of the following '+', '-', '*', '/'
* :return Operator: Operating matching the character
*/
Operator charToOperator(char c) {
switch(c) {
case '+':
return Operator::PLUS;
break;
case '-':
return Operator::MIN;
break;
case '*':
return Operator::TIMES;
break;
case '/':
return Operator::DIVIDE;
break;
default:
return Operator::UNDEFINED;
break;
}
}
/*
* Function to do maths on two numbers, the math to do is decided by the operator
* :param x: First number
* :param y: Second number
* :param o: Operator (Plus, Min, Times or Divide)
* :return double: Result of the calculation
*
* Example:
* math(5, 5, Operator::Plus) == 10
*
*/
double math(double x, double y, Operator o) {
double z = 0;
switch (o) {
case Operator::PLUS:
z = x + y;
break;
case Operator::MIN:
z = x - y;
break;
case Operator::TIMES:
z = x * y;
break;
case Operator::DIVIDE:
z = x / y;
break;
}
return z;
}
/*
* Recursive function performing all the calculations from an action string.
* For example, if the string actions has value "5+7" in the first recursive run
* result should contain 12 after the last recursion.
*
* :param result: Double containing the calculated result after the last recursion
* :param actions: Action string (what you type in your calculator; e.g: 5+5). We analyze the first character of this string each time and add it to first_nr, second_nr, or make it the operator. First character gets deleted after each recursion
* :param first_nr: Empty at first recursion, number of left side of the operator. So in 55+77 this paramater will be "55". Gets resetted at the next operator
* :param second_nr: Idem as first_nr but for the right side of the operator.
* :param oper: Operation to calculate the first_nr and second_nr
*/
double calculate(double& result, std::string& actions, std::string& first_nr, std::string& second_nr, Operator& oper) {
//DEBUG OUTPUT:
std::cout << actions << " Gives ";
std::cout << std::to_string(result) << std::endl;
//Base-condition:
//If action string is empty return
if (actions == "") {
//Scenario for when first action is an operator
//e.g: 1+1-
if (second_nr == "")
second_nr = "0";
//Update result
result = math(std::stod(first_nr), std::stod(second_nr), oper);
return result;
}
//Get first character from action string
char c = actions[0];
//Making sure order of math is respected (first times and divdes)
//and than plus and min
char operatorInChar[4] = {'*', '/'};
if (timesAndDividesGone(actions)) {
operatorInChar[2] = '+';
operatorInChar[3] = '-';
}
//If first character is an operator
if (std::find(std::begin(operatorInChar), std::end(operatorInChar), c) != std::end(operatorInChar)) {
//Scenario for when first action is an operator
//e.g: -1+1
if (first_nr == "") {
if (actions[1] == '*')
first_nr = "1";
else
first_nr = "0";
}
//If operator is not yet set in a previous recursion
if (oper == Operator::UNDEFINED) {
oper = charToOperator(c);
//If second_nr is not empty, we need to calculate the two numbers together
if (second_nr != "") {
//Update result
result = math(std::stod(first_nr), std::stod(second_nr), oper);
}
} else {
//Update result
result = math(std::stod(first_nr), std::stod(second_nr), oper);
first_nr = std::to_string(result);
second_nr = "";
//Remove first character from action string because it's analysed in this recursion
actions = actions.erase(0, 1);
oper = charToOperator(c);
return calculate(result, actions, first_nr, second_nr, oper);
}
} else {
//If the character is not a operator but a number we append it to the correct nr
//we add to first_nr if the operator is not yet set, if we already encountered an operator
//we add to second_nr.
//e.g: actions = "123+789"
if (oper == Operator::UNDEFINED) {
first_nr += c;
} else {
second_nr += c;
}
}
//Remove first character from action string because it's analysed in this recursion
actions = actions.erase(0, 1);
//DEBUG OUTPUT:
//std::cout << first_nr << operatorToChar(oper) << second_nr << std::endl;
//std::cout << std::endl << actions << " Gives ";
//std::cout << std::to_string(result) << std::endl;
//Make recursive call
return calculate(result, actions, first_nr, second_nr, oper);
}
int main() {
//String we want to calculate
std::string str = "1*2-2*3";
std::string str_copy_for_output = str;
//Variables
double result = 0;
std::string first_nr = "";
std::string second_nr = "";
Operator oper = Operator::UNDEFINED;
//Call function
int calculation = calculate(result, str, first_nr, second_nr, oper);
//Output
std::cout << std::endl << str_copy_for_output << " = " << calculation << std::endl;
return 0;
}
tl;dr
This code works perfectly for strings only containing plusses and mins or only times and divides. Combining times and divides messes it up. Probably the operator parameter fails to update. How to fix this?
I'm sorry if I did not not analyze your code in detail because it is way too much complicated for what you are trying to do. Therefore I will not tell you where is exactly the problem, instead I will propose you something more simple.
One way or another you need to manage a stack because an algebraic expression must be handled as a tree structure and the evaluation process has to follow that structure. It can't be handled as a flat structure and you can't escape the management of operator precedence. In addition to that an expression is normally evaluated from left to right (left associativity).
That said if you really don't want to use a parsing tool (which IMHO would be more simple and clean), it is always possible to parse "manually". In that case you may avoid to manage an explicit stack by using the call stack itself as demonstrated in the following code:
#include <iostream>
int precedenceOf(char op) {
switch (op) {
case '+':
case '-':
return 4;
case '*':
case '/':
return 3;
}
return 0; // never happen
}
const int MAX_PRECEDENCE = 4;
double computeOp(double left, double right, char c) {
switch (c) {
case '+': return left + right;
case '-': return left - right;
case '*': return left * right;
case '/': return left / right;
}
return 0; // never happen
}
char readOperator(const char*& expr)
{
// read the operator
while (*expr != 0) {
switch (*expr) {
case '+':
case '-':
case '*':
case '/':
{
char res = *expr;
expr++;
return res;
}
case ' ':
break;
}
expr++;
}
return 0;
}
double readOperand(const char*& expr)
{
double result = 0;
while (*expr != 0 && *expr == ' ') expr++;
while (*expr != 0) {
if (*expr >= '0' && *expr <= '9')
result = result * 10 + *expr - '0';
else
return result;
expr++;
}
return result;
}
double eval(const char*& expr, int breakPrecedence = MAX_PRECEDENCE + 1);
// evalRight function reads the right part of an expression and evaluates it
// (up to the point where an operator with precedence 'breakPrecedence' is reached)
// returns the computation of the expression with the left operand passed as parameter.
double evalRight(const char*& expr, int breakPrecedence, double leftOperand)
{
do
{
auto posBeforeOp = expr;
auto op = readOperator(expr);
if (op == 0)
return leftOperand; // end of expression reached, meaning there is no right part
auto prec = precedenceOf(op);
if (prec >= breakPrecedence)
{
expr = posBeforeOp; // we backtrack before the operator (which will be handled by one of our caller)
return leftOperand;
}
// reads and evaluates the expression on the right hand side
auto rightOperand = eval(expr, prec);
// computes the current operation, the result becoming the new left operand of the next operation
leftOperand = computeOp(leftOperand, rightOperand, op);
} while (true);
}
// eval function reads an expression and evaluates it (evaluates it up to the point where an operator with precedence 'breakPrecedence' is reached)
// returns the evaluation of the expression
double eval(const char*& expr, int breakPrecedence)
{
auto leftOperand = readOperand(expr);
return evalRight(expr, breakPrecedence, leftOperand);
}
int main()
{
auto expression = "1 + 1 * 2 - 2 * 3 + 1";
std::cout << "result = " << eval(expression); // prints: result = -2
return 0;
}
To keep the code as simple as possible the provided expression is assumed to be syntactically correct. It's up to you to add some checks if you want.
Hope this helps.
As you said
I'd like to craft something of my own, this is not production-code. Just hobby.
so probably you want to learn a thing or two. That's why I won't write any code here and steal all the fun from you.
Looks like you should start from the basics. I could've recommend you the Dragon Book but you probably want to get your hands dirty right away instead of reading the classics for a week. So you can start with PEGs - it's really simple.
I've started to love parsing after I've read this article.
In your case the grammar will be quite simple:
Expr ← Sum
Sum ← Product (('+' / '-') Product)*
Product ← Value (('*' / '/') Value)*
Value ← [0-9]+
With functions you can rewrite it like this
value = repeat_at_least_once(character("0"),...,character("9"))
product = sequence(value , repeat(one_of(character("*"),character("/")), value )
expr = sequence(product, repeat(one_of(character("+"),character("-")), product)
All you have to do now - write these functions :) It will be not much longer than the code you've written, if not shorter.
If you fill confident, you can even implement packrat parsing with left recursion support, in this case you grammar will be even simpler.
IMHO, your current approach (doing multiplications and divisions first, then continuing with addition and subtraction, and all in one function) will be painful at best. Your calculate function is very hard to reason about already, because it mixes multiple cases already, e.g.
first pass or second pass (depending on the content of string action, which is the current status of the expression, which you modify from call to call)
first_nr empty/filled
second_nr empty/filled
Now imagine that more operators are added, like ^ and ( and ). I do understand that this is a hobby project. But even if you get this to work one day, you will not be able to understand it a week later.
Since you want to reuse your current code, how about this:
Think about how you yourself (as a human being) would go about this? There are multiple approaches. Independent of the specific algorithm they consist of two parts:
Tokenization (identifying numbers and operators)
Evaluation (combine those numbers and operators)
You are mixing both parts in your code. It would be much simpler for you and anybody you are asking for help if you separated them.
Tokenization is simple (you are doing it already, although I would recommend to treat the expression string as read-only).
Evaluation is more tricky, because you have to think about operator precedence. But again, it helps to think about how you would do it as a human. You might read from left to right. How do you handle that as a person? You might evaluate sub expressions with higher precedence first (as you intend to do now). How do you store the tokens? Think of different data structures. Lists, stacks, or queues for examples.
There are many ways. Once you found one, looking at some literature should be fun.
Enjoy!
While I clearly stated I did not want a postfix solution, I actually realized it's the most sane solution. I made a postfix solution myself with the help of tutorials (and still learnt a lot!). Thanks everyone for the help and suggestions.
#include <iostream>
#include <string>
#include <stack>
/*
* Function to check if a given character is an operator (+, -, *, /) or not
* :param c: Character to check
* :return bool: Returns true if parameter c is an operator
*/
bool isOperator(char c) {
char operators[4] = {'+', '-', '*', '/'};
if (std::find(std::begin(operators), std::end(operators), c) != std::end(operators)) {
return true;
}
return false;
}
/*
* Function to get the precedence matching the character
*
* :param a: Character containing the operator to get precedence from
* :return int: Integer representing precedence. Operators with high precedence (e.g * and /) return a higher value than e.g + and -.
*
* Example:
* precedence('*') > precedence('+') == true
*
*/
int precedence(char a) {
switch (a) {
case '+': return 1;
break;
case '-': return 1;
break;
case '*': return 2;
break;
case '/': return 2;
break;
}
return -1;
}
/*
* Function to convert an infix string to postfix notation
* :param infix: Infix string
* :return string: returns postfix string
*
* Example:
* std::string s = "5+5";
* toPostfix(s) == "5 5 +"
*
*/
std::string toPostfix(std::string& infix) {
std::string postfix = "";
//Stack to hold operators and nr is a helper string to
//group digits in numbers
std::stack<char> stack;
std::string nr = "";
//If first character is a minus-operator (AKA a negative number)
//add "0"
if (infix[0] == '-') {
infix = "0" + infix;
}
//Looping over infix string
for (int i = 0; i < infix.size(); i++) {
//If current evaluated character ain't an operator, it's a digit
if (!isOperator(infix[i])) {
//If digit is in a group of digits (AKA a number) put the whole number in nr
while (!isOperator(infix[i]) && i < infix.size()) {
nr += infix[i];
i++;
}
i--;
//Append the number to the postfix string
postfix += nr + " ";
nr = "";
} else {
//This block is executed when evaluated character is an operator
//If stack is empty, or the evaluated operator is higher than the one in the stack
//push it to the stack (Needs to be appended to the postfix string later)
if (stack.size() == 0 || precedence(infix[i]) > precedence(stack.top())) {
stack.push(infix[i]);
} else {
//While the stack contacts a higher or equally high precedence as currently
//evaluated operator
while (precedence(stack.top()) >= precedence(infix[i])) {
//We append the top of the stack to the postfix string
postfix += stack.top();
postfix += ' ';
stack.pop();
if (stack.size() == 0) {
break;
}
}
//Push evaluated operator to stack
stack.push(infix[i]);
}
}
}
//Append all remaining operators to the postfix string
while (stack.size() != 0) {
postfix += stack.top();
stack.pop();
}
return postfix;
}
/*
* Evaluate two numbers regaring the used operator
* :param x: First number to do evaluation with
* :param y: Second number to do evaluation with
* :param _operator: Operator to do calculation with
* :return double: Result of the evaluation
*
* Example:
* x: 5
* y: 60
* _operator: +
* = 65
*/
double evaluate(double x, double y, char _operator) {
switch(_operator) {
case '+':
return x + y;
break;
case '-':
return x - y;
break;
case '*':
return x * y;
break;
case '/':
return x / y;
break;
}
return 0;
}
/*
* Calculate the result of an infix string
* :param s: String containing the infix notation
* :return double: Result of the calculation
*
* Example:
* std::string s = "5+5";
* calculate(s) == 10
*/
double calculate(std::string& s) {
//Convert infix to postfix
s = toPostfix(s);
//Stack holding operators and nr (string) for separating numbers
std::stack<double> stack;
std::string nr = "";
//Looping over postfix string
for (int i = 0; i < s.size(); i++) {
if (s[i] == ' ') {
continue;
}
//If evaluated character is a digit,
//put it in nr
if (isdigit(s[i])) {
//If digit is first of a group of digits, put that group of digits
//AKA a number in nr
while (isdigit(s[i])) {
nr += s[i];
i++;
}
i--;
//Pushing nr in stack
stack.push(std::stod(nr));
nr = "";
} else {
//If current evaluated character is not a digit
//but an operator, do a calculation
//Retrieve first number for calculation
int x = stack.top();
stack.pop();
//Retrieve second number for calculation
int y = stack.top();
stack.pop();
//Put evaluation result in integer and push into stack
int result = evaluate(y, x, s[i]);
stack.push(result);
}
}
//Final number is in stack
return stack.top();
}
int main() {
std::string s = "-5*5-2*2+3-10/5";
std::cout << calculate(s) << std::endl;
}
you need divided calculation for several steps
copy expression to writable memory and check/normalize it:
.check that all chars valid (positive)
.remove spaces
.convert all to low (or upper) case (if case you use hex expressions)
.some operators take 2 symbols ( ==, !=, >=, <=, <<, >>, ||, && ) - replace it to single symbol, from not valid (negative) range
remove ( ) if exist - calculate expressions in ():
.find first ) symbol from begin
.find last ( before it.
.check that after ) and before ( was separator symbols (operator or begin/end of string) but not digit.
.format new string where you replace (..) with it digital result
remove (calculate) all unary operators (+, -, !, ~)
.unary operators - on right side must have digit and on left - another operator(or begin of string), but not digit
.format new string with result of unary operator
remove (calculate) all binary operators.
.we need calculate in reverse precedence - so first need calculate/remove operators with lowest precedence.
.so need do loop by operators (from low to high precedence) - search operator symbol in string.
.if found - A op B - calculate separate A and B and then apply op.
convert string to integer
.now, after all ( ) and operators removed - only digit must be in string
example of code:
namespace Eval
{
typedef INT_PTR (* fn_b_op)(INT_PTR a, INT_PTR b);
typedef INT_PTR (* fn_u_op)(INT_PTR a);
struct b_op_arr { fn_b_op pfn; char c; };
struct u_op_arr { fn_u_op pfn; char c; };
struct name_to_char { char b[3]; char c;};
static INT_PTR fn1_bnt(INT_PTR a){ return !a; }
static INT_PTR fn1_not(INT_PTR a){ return ~a; }
static INT_PTR fn1_add(INT_PTR a){ return +a; }
static INT_PTR fn1_sub(INT_PTR a){ return -a; }
static INT_PTR fn2Land(INT_PTR a,INT_PTR b){ return a && b; }
static INT_PTR fn2_Lor(INT_PTR a,INT_PTR b){ return a || b; }
static INT_PTR fn2_equ(INT_PTR a,INT_PTR b){ return a == b; }
static INT_PTR fn2_nqu(INT_PTR a,INT_PTR b){ return a != b; }
static INT_PTR fn2_lqu(INT_PTR a,INT_PTR b){ return a < b; }
static INT_PTR fn2_gqu(INT_PTR a,INT_PTR b){ return a > b; }
static INT_PTR fn2_leu(INT_PTR a,INT_PTR b){ return a <= b; }
static INT_PTR fn2_geu(INT_PTR a,INT_PTR b){ return a >= b; }
static INT_PTR fn2_add(INT_PTR a,INT_PTR b){ return a + b; }
static INT_PTR fn2_sub(INT_PTR a,INT_PTR b){ return a - b; }
static INT_PTR fn2_mul(INT_PTR a,INT_PTR b){ return a * b; }
static INT_PTR fn2_div(INT_PTR a,INT_PTR b){ return a / b; }
static INT_PTR fn2_dv2(INT_PTR a,INT_PTR b){ return a % b; }
static INT_PTR fn2_lsh(INT_PTR a,INT_PTR b){ return (UINT_PTR)a << b; }
static INT_PTR fn2_rsh(INT_PTR a,INT_PTR b){ return (UINT_PTR)a >> b; }
static INT_PTR fn2_xor(INT_PTR a,INT_PTR b){ return a ^ b; }
static INT_PTR fn2_and(INT_PTR a,INT_PTR b){ return a & b; }
static INT_PTR fn2__or(INT_PTR a,INT_PTR b){ return a | b; }
enum /*: char*/ { equ = -0x80, not_equ, less_equ, gre_equ, l_or, l_and, r_shift, l_shift };
inline static b_op_arr b_arr[] =
{
{fn2_mul, '*'}, {fn2_div, '/'}, {fn2_lsh, l_shift}, {fn2_rsh, r_shift},
{fn2_xor, '^'}, {fn2_dv2, '%'}, {fn2_and, '&'}, {fn2__or, '|'},
{fn2_equ, equ}, {fn2_nqu, not_equ}, {fn2_lqu, '<'}, {fn2_gqu, '>'},
{fn2_leu, less_equ},{fn2_geu, gre_equ},{fn2_add, '+'}, {fn2_sub, '-'},
{fn2Land, l_and}, {fn2_Lor, l_or}
};
inline static u_op_arr u_arr[] =
{
{fn1_add, '+'}, {fn1_sub, '-'}, {fn1_bnt,'!'}, {fn1_not,'~'}
};
inline static name_to_char _2_to_1[] =
{
{"==", equ}, {"!=", not_equ}, {"<=", less_equ}, {">=", gre_equ },
{">>", r_shift}, {"<<", l_shift}, {"||", l_or}, {"&&", l_and},
};
void initBits(LONG bits[], const char cc[], ULONG n)
{
do
{
_bittestandset(bits, cc[--n]);
} while (n);
}
static bool IsSeparatorSymbol(char c)
{
static LONG bits[8];
static bool bInit;
if (!bInit)
{
// acquire
static const char cc[] = {
'*', '/', '+', '-', '^', '%', '&', '|', '<', '>', '!', '~', '(', ')',
equ, not_equ, less_equ, gre_equ, l_or, l_and, r_shift, l_shift, 0
};
initBits(bits, cc, _countof(cc));
// release
bInit = true;
}
return _bittest(bits, c);
}
static bool IsUnaryOpSymbol(char c)
{
static LONG bits[8];
static bool bInit;
if (!bInit)
{
// acquire
static char cc[] = {
'+', '-', '!', '~'
};
initBits(bits, cc, _countof(cc));
// release
bInit = true;
}
return _bittest(bits, c);
}
static bool IsDigit(char c)
{
static LONG bits[8];
static bool bInit;
if (!bInit)
{
// acquire
static char cc[] = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};
initBits(bits, cc, _countof(cc));
// release
bInit = true;
}
return _bittest(bits, c);
}
__int64 strtol64_16(char* sz, char** psz)
{
__int64 r = 0;
while (char c = *sz)
{
ULONG d;
if ((ULONG)(c - '0') <= '9' - '0')
{
d = (c - '0');
}
else if ((ULONG)(c - 'a') <= 'z' - 'a')
{
d = (c - 'a') + 10;
}
else
{
break;
}
r = (r << 4) + d;
sz++;
}
*psz = sz;
return r;
}
bool Normalize(const char* psz, char* buf, size_t s)
{
int len = 0;
do
{
--s;
char c = *psz++, d;
// is valid char
if (c < 0) return false;
// skip space
if (c == ' ') continue;
if ((ULONG)(c - 'A') < (ULONG)('Z' - 'A'))
{
c += 'a' - 'A';
}
// not last char
if (s)
{
d = *psz;
int k = _countof(_2_to_1);
do
{
if (_2_to_1[--k].b[0] == c && _2_to_1[k].b[1] == d)
{
c = _2_to_1[k].c, psz++, --s;
break;
}
} while (k);
}
*buf++ = c, len++;
} while (s);
return 0 < len;
}
char* format_new_str(const char* a, INT_PTR r, const char* b)
{
static const char format[] = "%s%I64x%s";
int len = _scprintf(format, a, r, b);
if (0 < len)
{
if (char* buf = new char [++len])
{
if (0 < sprintf_s(buf, len, format, a, r, b))
{
DbgPrint("++%p\n\"%s\"\n", buf, buf);
return buf;
}
delete buf;
}
}
return 0;
}
bool _calc (char* str, INT_PTR& result)
{
DbgPrint("\"%s\"\n", str);
struct SB
{
char* str;
SB() : str(0) {}
~SB()
{
operator <<(0);
}
void operator <<(char* psz)
{
if (str)
{
DbgPrint("--%p\n", str);
delete [] str;
}
str = psz;
}
} sb;
size_t len = strlen(str);
if (!len)
{
return false;
}
char b, c;
int l;
INT_PTR r, q;
//1. remove ( )
char *psz = str, *pc = 0, *buf;
for (;;)
{
switch (*psz++)
{
case '(':
pc = psz;
continue;
case ')':
if (!pc || !IsSeparatorSymbol(*psz) || (pc > str + 1 && !IsSeparatorSymbol(pc[-2]))) return false;
psz[-1] = 0, pc[-1] = 0;
if (_calc(pc, r) && (buf = format_new_str(str, r, psz)))
{
sb << buf;
psz = str = buf, pc = 0;
continue;
}
return false;
case 0:
goto __2;
}
}
__2:
//2. remove unary op
psz = str;
do
{
if (IsDigit(c = *psz) && str < psz && IsUnaryOpSymbol(c = psz[-1]) && (psz == str + 1 || IsSeparatorSymbol(psz[-2])))
{
psz[-1] = 0;
l = _countof(u_arr);
do
{
if (u_arr[--l].c == c)
{
r = strtol64_16(psz, &psz);
if (IsSeparatorSymbol(*psz))
{
r = u_arr[l].pfn(r);
if (buf = format_new_str(str, r, psz))
{
sb << buf;
psz = str = buf;
goto __2;
}
}
break;
}
} while (l);
return false;
}
} while (psz++, c);
//3. remove binary op
l = _countof(b_arr);
do
{
c = b_arr[--l].c;
psz = str;
do
{
if (c == (b = *psz++))
{
psz[-1] = 0;
if (_calc(psz, q) && _calc(str, r))
{
result = b_arr[l].pfn(r, q);
return true;
}
return false;
}
} while (b);
} while (l);
result = strtol64_16(str, &str);
return !*str;
}
bool calc(const char* psz, INT_PTR& result)
{
bool fOk = false;
if (size_t s = strlen(psz))
{
if (char* buf = new char[++s])
{
if (Normalize(psz, buf, s))
{
fOk = _calc(buf, result);
}
delete [] buf;
}
}
return fOk;
}
};
use
INT_PTR r;
Eval::calc(str, r);
While reading Learning Go I've implemented some of the suggested training programs. One of which has nearly the same requirements as yours, although I have to admit, that yours is a bit more evolved. So, I hope you can get something out of this code (I know it's not C++, but I'm sure you can read it):
package main
import (
"fmt"
"os"
"bufio"
"stack"
"strconv"
)
func readInput() string {
reader := bufio.NewReader(os.Stdin)
switch in, ok := reader.ReadString('\n'); true {
case ok != nil:
fmt.Printf("Failed to read inputs: %v", ok)
return "error"
default:
return in[:len(in)-1]
}
}
func isdigit(in string) bool {
_,ok := strconv.Atoi(in)
return ok == nil
}
func isOperation(in string) bool {
chars := []rune(in)
return '+' == chars[0] || '-' == chars[0] || '*' == chars[0] || '/' == chars[0]
}
func calc(operation string, op2, op1 int) float32 {
chars := []rune(operation)
switch chars[0] {
case '+':
return float32(op1 + op2)
case '-':
return float32(op1 - op2)
case '*':
return float32(op1 * op2)
case '/':
return float32(op1) / float32(op2)
}
print("Failed to recognize operation: ")
println(operation)
fmt.Printf("%v\n", chars)
return 0.0
}
func main() {
var st stack.Stack
fmt.Println("Calculator.")
fmt.Println("Please input operations and then one of + - * / for calculation,")
fmt.Println("or anything else for exit.")
LOOP: for {
in := readInput()
switch {
case isdigit(in):
i,_ := strconv.Atoi(in)
st.Push(i)
case isOperation(in):
op2 := st.Pop()
op1 := st.Pop()
res := calc(in, op2, op1)
st.Push(int(res))
fmt.Println(res)
default:
fmt.Println("Exit")
break LOOP
}
}
}
... similar, isn't it?

Interview: Machine coding / regex (Better alternative to my solution)

The following is the interview question:
Machine coding round: (Time 1hr)
Expression is given and a string testCase, need to evaluate the testCase is valid or not for expression
Expression may contain:
letters [a-z]
'.' ('.' represents any char in [a-z])
'*' ('*' has same property as in normal RegExp)
'^' ('^' represents start of the String)
'$' ('$' represents end of String)
Sample cases:
Expression Test Case Valid
ab ab true
a*b aaaaaab true
a*b*c* abc true
a*b*c aaabccc false
^abc*b abccccb true
^abc*b abbccccb false
^abcd$ abcd true
^abc*abc$ abcabc true
^abc.abc$ abczabc true
^ab..*abc$ abyxxxxabc true
My approach:
Convert the given regular expression into concatenation(ab), alteration(a|b), (a*) kleenstar.
And add + for concatenation.
For example:
abc$ => .*+a+b+c
^ab..*abc$ => a+b+.+.*+a+b+c
Convert into postfix notation based on precedence.
(parantheses>kleen_star>concatenation>..)
(a|b)*+c => ab|*c+
Build NFA based on Thompson construction
Backtracking / traversing through NFA by maintaining a set of states.
When I started implementing it, it took me a lot more than 1 hour. I felt that the step 3 was very time consuming. I built the NFA by using postfix notation +stack and by adding new states and transitions as needed.
So, I was wondering if there is faster alternative solution this question? Or maybe a faster way to implement step 3. I found this CareerCup link where someone mentioned in the comment that it was from some programming contest. So If someone has solved this previously or has a better solution to this question, I'd be happy to know where I went wrong.
Some derivation of Levenshtein distance comes to mind - possibly not the fastest algorithm, but it should be quick to implement.
We can ignore ^ at the start and $ at the end - anywhere else is invalid.
Then we construct a 2D grid where each row represents a unit [1] in the expression and each column represents a character in the test string.
[1]: A "unit" here refers to a single character, with the exception that * shall be attached to the previous character
So for a*b*c and aaabccc, we get something like:
a a a b c c c
a*
b*
c
Each cell can have a boolean value indicating validity.
Now, for each cell, set it to valid if either of these hold:
The value in the left neighbour is valid and the row is x* or .* and the column is x (x being any character a-z)
This corresponds to a * matching one additional character.
The value in the upper-left neighbour is valid and the row is x or . and the column is x (x being any character a-z)
This corresponds to a single-character match.
The value in the top neighbour is valid and the row is x* or .*.
This corresponds to the * matching nothing.
Then check if the bottom-right-most cell is valid.
So, for the above example, we get: (V indicating valid)
a a a b c c c
a* V V V - - - -
b* - - - V - - -
c - - - - V - -
Since the bottom-right cell isn't valid, we return invalid.
Running time: O(stringLength*expressionLength).
You should notice that we're mostly exploring a fairly small part of the grid.
This solution can be improved by making it a recursive solution making use of memoization (and just calling the recursive solution for the bottom-right cell).
This will give us a best-case performance of O(1), but still a worst-case performance of O(stringLength*expressionLength).
My solution assumes the expression must match the entire string, as inferred from the result of the above example being invalid (as per the question).
If it can instead match a substring, we can modify this slightly so, if the cell is in the top row it's valid if:
The row is x* or .*.
The row is x or . and the column is x.
Given only 1 hour we can use simple way.
Split pattern into tokens: a*b.c => { a* b . c }.
If pattern doesn't start with ^ then add .* in the beginning, else remove ^.
If pattern doesn't end with $ then add .* in the end, else remove $.
Then we use recursion: going 3 way in case if we have recurring pattern (increase pattern index by 1, increase word index by 1, increase both indices by 1), going one way if it is not recurring pattern (increase both indices by 1).
Sample code in C#
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
namespace ReTest
{
class Program
{
static void Main(string[] args)
{
Debug.Assert(IsMatch("ab", "ab") == true);
Debug.Assert(IsMatch("aaaaaab", "a*b") == true);
Debug.Assert(IsMatch("abc", "a*b*c*") == true);
Debug.Assert(IsMatch("aaabccc", "a*b*c") == true); /* original false, but it should be true */
Debug.Assert(IsMatch("abccccb", "^abc*b") == true);
Debug.Assert(IsMatch("abbccccb", "^abc*b") == false);
Debug.Assert(IsMatch("abcd", "^abcd$") == true);
Debug.Assert(IsMatch("abcabc", "^abc*abc$") == true);
Debug.Assert(IsMatch("abczabc", "^abc.abc$") == true);
Debug.Assert(IsMatch("abyxxxxabc", "^ab..*abc$") == true);
}
static bool IsMatch(string input, string pattern)
{
List<PatternToken> patternTokens = new List<PatternToken>();
for (int i = 0; i < pattern.Length; i++)
{
char token = pattern[i];
if (token == '^')
{
if (i == 0)
patternTokens.Add(new PatternToken { Token = token, Occurence = Occurence.Single });
else
throw new ArgumentException("input");
}
else if (char.IsLower(token) || token == '.')
{
if (i < pattern.Length - 1 && pattern[i + 1] == '*')
{
patternTokens.Add(new PatternToken { Token = token, Occurence = Occurence.Multiple });
i++;
}
else
patternTokens.Add(new PatternToken { Token = token, Occurence = Occurence.Single });
}
else if (token == '$')
{
if (i == pattern.Length - 1)
patternTokens.Add(new PatternToken { Token = token, Occurence = Occurence.Single });
else
throw new ArgumentException("input");
}
else
throw new ArgumentException("input");
}
PatternToken firstPatternToken = patternTokens.First();
if (firstPatternToken.Token == '^')
patternTokens.RemoveAt(0);
else
patternTokens.Insert(0, new PatternToken { Token = '.', Occurence = Occurence.Multiple });
PatternToken lastPatternToken = patternTokens.Last();
if (lastPatternToken.Token == '$')
patternTokens.RemoveAt(patternTokens.Count - 1);
else
patternTokens.Add(new PatternToken { Token = '.', Occurence = Occurence.Multiple });
return IsMatch(input, 0, patternTokens, 0);
}
static bool IsMatch(string input, int inputIndex, IList<PatternToken> pattern, int patternIndex)
{
if (inputIndex == input.Length)
{
if (patternIndex == pattern.Count || (patternIndex == pattern.Count - 1 && pattern[patternIndex].Occurence == Occurence.Multiple))
return true;
else
return false;
}
else if (inputIndex < input.Length && patternIndex < pattern.Count)
{
char c = input[inputIndex];
PatternToken patternToken = pattern[patternIndex];
if (patternToken.Token == '.' || patternToken.Token == c)
{
if (patternToken.Occurence == Occurence.Single)
return IsMatch(input, inputIndex + 1, pattern, patternIndex + 1);
else
return IsMatch(input, inputIndex, pattern, patternIndex + 1) ||
IsMatch(input, inputIndex + 1, pattern, patternIndex) ||
IsMatch(input, inputIndex + 1, pattern, patternIndex + 1);
}
else
return false;
}
else
return false;
}
class PatternToken
{
public char Token { get; set; }
public Occurence Occurence { get; set; }
public override string ToString()
{
if (Occurence == Occurence.Single)
return Token.ToString();
else
return Token.ToString() + "*";
}
}
enum Occurence
{
Single,
Multiple
}
}
}
Here is a solution in Java. Space and Time is O(n). Inline comments are provided for more clarity:
/**
* #author Santhosh Kumar
*
*/
public class ExpressionProblemSolution {
public static void main(String[] args) {
System.out.println("---------- ExpressionProblemSolution - start ---------- \n");
ExpressionProblemSolution evs = new ExpressionProblemSolution();
evs.runMatchTests();
System.out.println("\n---------- ExpressionProblemSolution - end ---------- ");
}
// simple node structure to keep expression terms
class Node {
Character ch; // char [a-z]
Character sch; // special char (^, *, $, .)
Node next;
Node(Character ch1, Character sch1) {
ch = ch1;
sch = sch1;
}
Node add(Character ch1, Character sch1) {
this.next = new Node(ch1, sch1);
return this.next;
}
Node next() {
return this.next;
}
public String toString() {
return "[ch=" + ch + ", sch=" + sch + "]";
}
}
private boolean letters(char ch) {
return (ch >= 'a' && ch <= 'z');
}
private boolean specialChars(char ch) {
return (ch == '.' || ch == '^' || ch == '*' || ch == '$');
}
private void validate(String expression) {
// if expression has invalid chars throw runtime exception
if (expression == null) {
throw new RuntimeException(
"Expression can't be null, but it can be empty");
}
char[] expr = expression.toCharArray();
for (int i = 0; i < expr.length; i++) {
if (!letters(expr[i]) && !specialChars(expr[i])) {
throw new RuntimeException(
"Expression contains invalid char at position=" + i
+ ", invalid_char=" + expr[i]
+ " (allowed chars are 'a-z', *, . ^, * and $)");
}
}
}
// Parse the expression and split them into terms and add to list
// the list is FSM (Finite State Machine). The list is used during
// the process step to iterate through the machine states based
// on the input string
//
// expression = a*b*c has 3 terms -> [a*] [b*] [c]
// expression = ^ab.*c$ has 4 terms -> [^a] [b] [.*] [c$]
//
// Timing : O(n) n -> expression length
// Space : O(n) n -> expression length decides the no.of terms stored in the list
private Node preprocess(String expression) {
debug("preprocess - start [" + expression + "]");
validate(expression);
Node root = new Node(' ', ' '); // root node with empty values
Node current = root;
char[] expr = expression.toCharArray();
int i = 0, n = expr.length;
while (i < n) {
debug("i=" + i);
if (expr[i] == '^') { // it is prefix operator, so it always linked
// to the char after that
if (i + 1 < n) {
if (i == 0) { // ^ indicates start of the expression, so it
// must be first in the expr string
current = current.add(expr[i + 1], expr[i]);
i += 2;
continue;
} else {
throw new RuntimeException(
"Special char ^ should be present only at the first position of the expression (position="
+ i + ", char=" + expr[i] + ")");
}
} else {
throw new RuntimeException(
"Expression missing after ^ (position=" + i
+ ", char=" + expr[i] + ")");
}
} else if (letters(expr[i]) || expr[i] == '.') { // [a-z] or .
if (i + 1 < n) {
char nextCh = expr[i + 1];
if (nextCh == '$' && i + 1 != n - 1) { // if $, then it must
// be at the last
// position of the
// expression
throw new RuntimeException(
"Special char $ should be present only at the last position of the expression (position="
+ (i + 1)
+ ", char="
+ expr[i + 1]
+ ")");
}
if (nextCh == '$' || nextCh == '*') { // a* or b$
current = current.add(expr[i], nextCh);
i += 2;
continue;
} else {
current = current.add(expr[i], expr[i] == '.' ? expr[i]
: null);
i++;
continue;
}
} else { // a or b
current = current.add(expr[i], null);
i++;
continue;
}
} else {
throw new RuntimeException("Invalid char - (position=" + (i)
+ ", char=" + expr[i] + ")");
}
}
debug("preprocess - end");
return root;
}
// Traverse over the terms in the list and iterate and match the input string
// The terms list is the FSM (Finite State Machine); the end of list indicates
// end state. That is, input is valid and matching the expression
//
// Timing : O(n) for pre-processing + O(n) for processing = 2O(n) = ~O(n) where n -> expression length
// Timing : O(2n) ~ O(n)
// Space : O(n) where n -> expression length decides the no.of terms stored in the list
public boolean process(String expression, String testString) {
Node root = preprocess(expression);
print(root);
Node current = root.next();
if (root == null || current == null)
return false;
int i = 0;
int n = testString.length();
debug("input-string-length=" + n);
char[] test = testString.toCharArray();
// while (i < n && current != null) {
while (current != null) {
debug("process: i=" + i);
debug("process: ch=" + current.ch + ", sch=" + current.sch);
if (current.sch == null) { // no special char just [a-z] case
if (test[i] != current.ch) { // test char and current state char
// should match
return false;
} else {
i++;
current = current.next();
continue;
}
} else if (current.sch == '^') { // process start char
if (i == 0 && test[i] == current.ch) {
i++;
current = current.next();
continue;
} else {
return false;
}
} else if (current.sch == '$') { // process end char
if (i == n - 1 && test[i] == current.ch) {
i++;
current = current.next();
continue;
} else {
return false;
}
} else if (current.sch == '*') { // process repeat char
if (letters(current.ch)) { // like a* or b*
while (i < n && test[i] == current.ch)
i++; // move i till end of repeat char
current = current.next();
continue;
} else if (current.ch == '.') { // like .*
Node nextNode = current.next();
print(nextNode);
if (nextNode != null) {
Character nextChar = nextNode.ch;
Character nextSChar = nextNode.sch;
// a.*z = az or (you need to check the next state in the
// list)
if (test[i] == nextChar) { // test [i] == 'z'
i++;
current = current.next();
continue;
} else {
// a.*z = abz or
// a.*z = abbz
char tch = test[i]; // get 'b'
while (i + 1 < n && test[++i] == tch)
; // move i till end of repeat char
current = current.next();
continue;
}
}
} else { // like $* or ^*
debug("process: return false-1");
return false;
}
} else if (current.sch == '.') { // process any char
if (!letters(test[i])) {
return false;
}
i++;
current = current.next();
continue;
}
}
if (i == n && current == null) {
// string position is out of bound
// list is at end ie. exhausted both expression and input
// FSM reached the end state, hence the input is valid and matches the given expression
return true;
} else {
return false;
}
}
public void debug(Object str) {
boolean debug = false;
if (debug) {
System.out.println("[debug] " + str);
}
}
private void print(Node node) {
StringBuilder sb = new StringBuilder();
while (node != null) {
sb.append(node + " ");
node = node.next();
}
sb.append("\n");
debug(sb.toString());
}
public boolean match(String expr, String input) {
boolean result = process(expr, input);
System.out.printf("\n%-20s %-20s %-20s\n", expr, input, result);
return result;
}
public void runMatchTests() {
match("ab", "ab");
match("a*b", "aaaaaab");
match("a*b*c*", "abc");
match("a*b*c", "aaabccc");
match("^abc*b", "abccccb");
match("^abc*b", "abccccbb");
match("^abcd$", "abcd");
match("^abc*abc$", "abcabc");
match("^abc.abc$", "abczabc");
match("^ab..*abc$", "abyxxxxabc");
match("a*b*", ""); // handles empty input string
match("xyza*b*", "xyz");
}}
int regex_validate(char *reg, char *test) {
char *ptr = reg;
while (*test) {
switch(*ptr) {
case '.':
{
test++; ptr++; continue;
break;
}
case '*':
{
if (*(ptr-1) == *test) {
test++; continue;
}
else if (*(ptr-1) == '.' && (*test == *(test-1))) {
test++; continue;
}
else {
ptr++; continue;
}
break;
}
case '^':
{
ptr++;
while ( ptr && test && *ptr == *test) {
ptr++; test++;
}
if (!ptr && !test)
return 1;
if (ptr && test && (*ptr == '$' || *ptr == '*' || *ptr == '.')) {
continue;
}
else {
return 0;
}
break;
}
case '$':
{
if (*test)
return 0;
break;
}
default:
{
printf("default case.\n");
if (*ptr != *test) {
return 0;
}
test++; ptr++; continue;
}
break;
}
}
return 1;
}
int main () {
printf("regex=%d\n", regex_validate("ab", "ab"));
printf("regex=%d\n", regex_validate("a*b", "aaaaaab"));
printf("regex=%d\n", regex_validate("^abc.abc$", "abcdabc"));
printf("regex=%d\n", regex_validate("^abc*abc$", "abcabc"));
printf("regex=%d\n", regex_validate("^abc*b", "abccccb"));
printf("regex=%d\n", regex_validate("^abc*b", "abbccccb"));
return 0;
}

Boolean Function to Check Validity of Expression Recursively?

I want to create a kind of parser of the form:
#include <iostream>
#include <string>
#include <sstream>
#include <cctype>
using namespace std;
bool isValid(istringstream& is)
{
char ch;
is.get(ch); //I know get(ch) is a good start but this is as for as I got :)
.......
....
}
int main()
{
string s;
while(getline(cin,s))
{
istringstream is(s);
cout<<(isValid(is)? "Expression OK" : "Not OK")<<endl;
}
}
A boolean function that returns TRUE if the sequence of char is of the form "5" or "(5+3)" or "((5+3)+6)" or "(((4+2)+1)+6)" ...etc and FALSE for any other case
Basically, an expression will be considered as valid if it is either a single digit or of the form "open parenthesis-single digit-plus sign-single digit-close parenthesis"
Valid Expression = single digit
and
Valid Expression = (Valid Expression + Valid Expression)
Given that there is no limit to the size of the above form (number of opening and closing parenthesis..etc.) I'd like to do that using recursion
Being the newbie that I am.. Thank you for any helpful input!
To do a recursive solution you're gonna want to read the string into a buffer first, then do something like this:
int expression(char* str) {
if (*str == '(') {
int e1 = expression(str + 1);
if (e1 == -1 || *(str + 1 + e) != '+') {
return -1;
}
int e2 = expression(str + 1 + e + 1);
if (e2 == -1 || *(str + 1 + e + 1 + e2) != ')') {
return -1;
}
return 1 + e1 + 1 + e2 + 1;
}
if (*str >= '0' || *str <= '9') {
return 1;
}
return -1;
}
bool isvalid(char* str) {
int e1 = expression(str);
if (e1 < 0) {
return false;
}
if (e1 == strlen(str)) {
return true;
}
if (*(str + e1) != '+') {
return false;
}
int e2 = expression(str + e1 + 1);
if (e2 < 0) {
return false;
}
return (e1 + 1 + e2 == strlen(str));
}
Basically, the expression function returns the length of the valid expression at it's argument. If it's argument begins with a parenthesis, it gets the length of the expression after that, verifies the plus after that, then verifies the closing parenthesis after the next expression. If the argument begins with a number, return 1. If something is messed up, return -1. Then using that function we can figure out whether or not the string is valid by some sums and the length of the string.
I haven't tested the function at all, but the only case this might fail in that I can think of would be excessive parenthesis: ((5)) for example.
An alternative to recursion could be some sort of lexical parsing such as this:
enum {
ExpectingLeftExpression,
ExpectingRightExpression,
ExpectingPlus,
ExpectingEnd,
} ParseState;
// returns true if str is valid
bool check(char* str) {
ParseState state = ExpectingLeftExpression;
do {
switch (state) {
case ExpectingLeftExpression:
if (*str == '(') {
} else if (*str >= '0' && *str <= '9') {
state = ExpectingPlus;
} else {
printf("Error: Expected left hand expression.");
return false;
}
break;
case ExpectingPlus:
if (*str == '+') {
state = ExpectingRightExpression;
} else {
printf("Error: Expected plus.");
return false;
}
break;
case ExpectingRightExpression:
if (*str == '(') {
state = ExpectingLeftExpression;
} else if (*str >= '0' && *str <= '9') {
state = ExpectingEnd;
} else {
printf("Error: Expected right hand expression.");
return false;
}
break;
}
} while (*(++str));
return true;
}
That function's not complete at all, but you should be able to see where it's going. I think the recursion works better in this case anyways.

Expression tree giving wrong answer

EDIT
This is homework so no straight up code please. Just hints, thank you!
I'm working on a project that will use an expression tree to derive a variety of things and then perform operations on them. Right now I'm not too worried about the deriving part, I just want to get the operations part down.
The expression tree code that I'm using works for integers but once I input "x" or any other variable my answer is wrong. My program works with postfix expression strings... below is an example of what is right and wrong.
5 6 + returns 11. correct
5x 6x + returns 11. incorrect needs to be 11x
Here is my code:
// This is the expression tree code I'm using
#ifndef EXPRNODE_H
#define EXPRNODE_H
#include <cstdlib> // for NULL
using namespace std;
//====================================== class ExprNode
class ExprNode {
public:
ExprNode(char oper, ExprNode* left, ExprNode* right);
ExprNode(int val);
int eval() const; // Evaluate expr tree. Return result.
private:
char _op; // one of +, -, *, /, #
int _value; // integer value used for constants.
ExprNode* _left; // left subtree
ExprNode* _right; // right subtree
};
#endif
//============================================= ExprNode constructor
// Constructs node for a binary operator.
ExprNode::ExprNode(char oper, ExprNode* left, ExprNode* right) {
_op = oper;
_left = left;
_right = right;
}
//============================================== ExprNode constructor
// Constructs a node for an integer constant
ExprNode::ExprNode(int v) {
_op = '#';
_value = v;
_left = NULL;
_right = NULL;
}
//===================================================== ExprNode::eval
int ExprNode::eval() const {
// Recursively evaluate expression tree and return result.
int result;
switch (_op) {
case '+':
result = _left->eval() + _right->eval();
break;
case '-':
result = _left->eval() - _right->eval();
break;
case '*':
result = _left->eval() * _right->eval();
break;
case '/':
result = _left->eval() / _right->eval();
break;
case '#':
result = _value; // an integer constant
break;
}
return result;
}
bool isOperator (char operand)
{
return operand == '+' || operand == '-' || operand == '*' || operand == '/' || operand == '^';
}
bool isNumber (char potentialNumber)
{
return potentialNumber >= '0' && potentialNumber <= '9';
}
bool isX (char letter)
{
return letter == 'x' || letter == 'X';
}
I'm not going to include the code going from infix to postfix because it is unnecessary (I think).... next is the code for the expression tree and calculations
// the expression string is the postfix expression I returned previously
void expressionTree(string expression)
{
string tempNum = "";
string tempNum2 = "";
int count = 1;
int tempNumInt;
int tempNum2Int;
// creates a blank total value and blank numbers
ExprNode* totalVal = new ExprNode('+', new ExprNode(0), new ExprNode(0));
ExprNode* tNum;
ExprNode* tNum2;
// loop through the postfix expression
for (unsigned int iterator = 0; iterator < expression.length(); iterator++)
{
if (isOperator(expression[iterator]))
{
// Don't need to worry about at the moment
if (expression[iterator] == '^')
{
// go to derivative later
}
else
{
if (count % 2 != 0)
{
// we'll do different derivatives here.... for now just add, subtract, multiply, divide
totalVal = new ExprNode(expression[iterator], tNum, tNum2);
}
else if (count % 2 == 0 && expression[iterator] == '+' || expression[iterator] == '*')
{
totalVal = new ExprNode(expression[iterator], tNum, totalVal);
}
else if (count % 2 == 0 && expression[iterator] == '-' || expression[iterator] == '/')
{
totalVal = new ExprNode(expression[iterator], totalVal, tNum);
}
}
count++;
}
if (isNumber(expression[iterator]) && count % 2 != 0)
{
tempNum += expression[iterator];
}
else if (isNumber(expression[iterator]) && count % 2 == 0)
{
tempNum2 += expression[iterator];
}
if (expression[iterator] == ' ' && count % 2 != 0)
{
tempNumInt = atoi (tempNum.c_str());
tNum = new ExprNode(tempNumInt);
tempNum = "";
count++;
}
else if (expression[iterator] == ' ' && count % 2 == 0)
{
tempNum2Int = atoi (tempNum2.c_str());
tNum2 = new ExprNode(tempNum2Int);
tempNum2 = "";
count++;
}
else if (expression[iterator] == ' ')
{
count++;
}
}
cout << totalVal->eval() << endl;
}
I'll try to explain anything that is unclear. Thanks in advance.
I'm not pointing out the exact mistake, but giving you an advice: int ExprNode::eval() const should not return 'int'. That's not enough to handle the variable results, like "11x" (this cannot be represented with a simple int). You'll have to create your own structure that stores the integer part and the variable part of the result (with this last one being optional).