How do I parse and evaluate expressions in an infix calculator grammar? I thought of two ways.
The 1st involves using two stacks. One is for numbers and the other is for operators, and I would assess the operator precedence and association in order to figure out how to evaluate an expression.
The second method involves converting the infix expression to postfix which I have no idea how I'd go about doing. It was just an idea. Currently I set up my program with the intention to use the 1st method.
#include <iostream>
#include <string>
#include <sstream>
using namespace std;
bool die(const string &msg);
//stack class
class Stack{
public:
Stack();
void push(const double &val);
void push(const string &oper);
double popnum();
string popop();
double getopele();
double getnumele();
private:
static const unsigned MAX=30;
string opstack[MAX];
double numstack[MAX];
unsigned opele;
unsigned numele;
};
//operator type
struct OP{
string name;
void * func;
unsigned arity;
unsigned prec;
bool lass;
string descrip;
};
//operator table
OP op[]={{"+", add, 2, 4, true, "2+3 is 5"},
{"-", subtract, 2, 4, true, "2-3 is -1"},
{"*", multiply, 2, 6, true, "2*3 is 6"},
{"/", divide, 2, 6, true, "2/3 is 0.666666..., div by 0 illegal"}};
unsigned OPELE =sizeof(op)/sizeof(op[0]);
//operators
bool add(double &r, double &x, double &y);
bool subtract(double &r, double &x, double &y);
bool multiply(double &r, double &x, double &y);
bool divide(double &r, double &x, double &y);
//Manip
unsigned findindex(string token, OP op[], unsigned OPELE);
bool parse(double &t, const string &token);
bool evaluate(double &result, string line);
bool weird(double x);
int main(){
for(string line; getline(cin, line);){
if(line=="QUIT") break;
if(line.empty()) continue;
if(line=="DOC")
for(unsigned i=0; i<OPELE; i++)
cout<<op[i].name<<" | "<<op[i].descrip<<'\n';
double result;
if(evaluate(result, line)){
cout<<result<<'\n';
}else{
cout<<"Could not understand input\n\n";
}
}
}
Stack::Stack(){
opele=0;
numele=0;
}
void Stack::push(const double &val){
if(MAX) die("Stack Overflow");
numstack[numele++]=val;
}
void Stack::push(const string &oper){
if(MAX) die("Stack Overflow");
opstack[opele++]=oper;
}
double Stack::popnum(){
if(!numele) die("Stack Underflow");
return numstack[--numele];
}
string Stack::popop(){
if(!opele) die("Stack Underflow");
return opstack[--opele];
}
double Stack::getopele(){
return opele;
}
double Stack::getnumele(){
return numele;
}
bool add(double &r, double &x, double &y){
double t = x + y;
if( weird(t) ) return false;
r = t;
return true;
}
bool subtract(double &r, double &x, double &y){
double t = x - y;
if( weird(t) ) return false;
result = t;
return true;
}
bool multiply( double & r, double& x, double &y ){
double t = x * y;
if( weird(t) ) return false;
result = t;
return true;
}
bool divide( double & result, double &x, double &y ){
double t = x / y;
if( weird(t) ) return false;
result = t;
return true;
}
unsigned findindex(string token, OP op[], unsigned OPELE){
for(unsigned i=0l i<OPELE; i++)
if(op[i].name==token)
return i;
return UINT_MAX;
}
bool parse(double &t, const string &token){
istringstream sin( token );
double t;
if( !(sin >>t) ) return false;
char junk;
if( sin >>junk ) return false;
value = t;
return true;
}
bool evaluate(double &result, string line){
istringstream sin(line);
Stack s;
for(string token; sin>>token;){
double t;
if(parse(t, token)){
s.push(t);
}else if(
}
}
bool weird( double x ){
return x != x || x != 0 && x == 2*x;
}
This will be a long read, but anyway, I will share with you the algorithm I use to parse an infix expression and store it as a binary tree. Not Stack, but binary tree. Parsing that will give the postfix order easily. I don't say this is the best algorithm out there, but this works for my scripting language.
The algorithm:
We have a method which operates on a "current node" of a binary tree and a "current expression". The nodes contain a "data" field and a "type" field.
Stage 1: Simple things, such as "4" go directly into the node, and we specify the type to be as "DATA", ie. use this information as it is.
Stage 2: Now, Let's consider the following expression:
a) 2 + 3
this will be transformed into the following binary tree:
+
/ \
2 3
So, the operators go into the nodes and the operands go into the leafs. Transofrming the expression a) into the tree is pretty simple: find the operator, put in the "current" node of the tree, specify the type of the node to be operator "PLUS", and what is left of it goes into the tree to the left part of the node, what is right of it goes into the right tree. Nice and simple, using the information from Stage 1 the two leafs will be "DATA" leafs with value 2 and 3.
Stage 3: But for a more complex expression:
b) 2 * 3 + 4
The tree will be:
+
/ \ 4
*
/ \
2 3
So we need to modify the algorithm above to the following: Find the first operator which has the highest precedence (considering c++ guidelines... precedence of + (plus) and - (minus) is 6, while precedence of * (multiply), / (divide) and % (modulo) is 5) in the expression, divide the expression into two parts (before operand with highest precedence and after operand with highest precedence) and call recursively the method for the two parts, while placing the operator with the highest precedence into the current node. So, we do create a tree wit hdata like:
+
/ \
/ call method with "4"
call method with "2*3"
and at this stage we fall back to "Stage 2" for the call ("2*3") and "Stage 1" for the call "4".
Stage 4: What if there are paranthesis in the expression? Such as
c) 2 * (3 + 4)
This will give us the tree:
*
/ \
2 +
/ \
3 4
We modify the algorithm to be like:
while the current expression is enclosed in a paranthesis remove the paranthesis from it and restart the algorithm. Be careful. (2 + 3 * 4 + 5) is considered to be enclosed in a parnethesis while (2+3)*(4+5) is NOT. So, it's not just the starting and ending characters of the expression, but you effectively need to count the parantheses. (this is a recursive method, don't be afraid of the first step...)
now find the first operator with the highest precedence outside the parantheses of the expression. Again, take the left and right sides of the expression and call the method again and again till you end up at "Stage 1" ie. with a single data element.
Now this is an algorithm for an expression which consists of plain numbers and operators. For more complex information you might need to refine it to suit your needs. If you consider it worth, take a look at https://sourceforge.net/p/nap-script/mercurial/ci/default/tree/compiler/interpreter.cpp . This contains a full implementation (in C) of the algorithm above with regard to more complex notions (variables, method calls, postfix/prefix operators, etc...) The method is build_expr_tree, starts at line 1327.
The method of recursive descent is the simplest way to implement a correct expression parser by hand. Here the programming language stack does the same thing as the explicit stack you're trying to use. There are many RD examples to be found with google, and any good compiler book will have some.
The linked Wikipedia page shows a parser, but not how to add evaluation. So below is a complete rudimentary expression evaluator in C. It could be easily wrapped in a C++ class with the globals becoming instance variables. It's missing features you'd need in a production system. For example, when it finds an error, it just exits. C++ exceptions will easily allow you to unwind the recursion and continue. It also needs protections against numerical overflow, divide-by-zero, etc., which you obviously know how to do.
The idea of recursive descent is to transform the grammar of the desired language into a form called LL(1). When that's done, there are fixed rules - guarenteed to work every time - for transforming the grammar rules into procedures. I've done this below by hand. There are tools to do it automatically.
So this evaluator is very easy to extend. Just add the necessary grammar rule, then implement the needed enhancements to scanner, parser, and evaluation code. For example, a built-in function rule would be unsigned_factor -> FUNCTION_NAME ( expr ), where the scanner recognizes all function names as the same token and the unsigned_factor C function is augmented to parse and compute values.
I had to include a small scanner to get a working program. Note more than half the code is the scanner. Basic RD parsers are simple.
They get more complex if you add error recovery: the intelligent ability to skip just past an error and continue parsing, while emitting only one precisely worded error message. But then again, this adds lots of complexity to any parser.
// Bare bones scanner and parser for the following LL(1) grammar:
// expr -> term { [+-] term } ; An expression is terms separated by add ops.
// term -> factor { [*/] factor } ; A term is factors separated by mul ops.
// factor -> unsigned_factor ; A signed factor is a factor,
// | - unsigned_factor ; possibly with leading minus sign
// unsigned_factor -> ( expr ) ; An unsigned factor is a parenthesized expression
// | NUMBER ; or a number
//
// The parser returns the floating point value of the expression.
#include <stdio.h>
#include <stdlib.h>
// The token buffer. We never check for overflow! Do so in production code.
char buf[1024];
int n = 0;
// The current character.
int ch;
// The look-ahead token. This is the 1 in LL(1).
enum { ADD_OP, MUL_OP, LEFT_PAREN, RIGHT_PAREN, NUMBER, END_INPUT } look_ahead;
// Forward declarations.
void init(void);
void advance(void);
double expr(void);
void error(char *msg);
// Parse expressions, one per line.
int main(void)
{
init();
while (1) {
double val = expr();
printf("val: %f\n", val);
if (look_ahead != END_INPUT) error("junk after expression");
advance(); // past end of input mark
}
return 0;
}
// Just die on any error.
void error(char *msg)
{
fprintf(stderr, "Error: %s. I quit.\n", msg);
exit(1);
}
// Buffer the current character and read a new one.
void read()
{
buf[n++] = ch;
buf[n] = '\0'; // Terminate the string.
ch = getchar();
}
// Ignore the current character.
void ignore()
{
ch = getchar();
}
// Reset the token buffer.
void reset()
{
n = 0;
buf[0] = '\0';
}
// The scanner. A tiny deterministic finite automaton.
int scan()
{
reset();
START:
switch (ch) {
case ' ': case '\t': case '\r':
ignore();
goto START;
case '-': case '+':
read();
return ADD_OP;
case '*': case '/':
read();
return MUL_OP;
case '(':
read();
return LEFT_PAREN;
case ')':
read();
return RIGHT_PAREN;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
read();
goto IN_LEADING_DIGITS;
case '\n':
ch = ' '; // delayed ignore()
return END_INPUT;
default:
error("bad character");
}
IN_LEADING_DIGITS:
switch (ch) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
read();
goto IN_LEADING_DIGITS;
case '.':
read();
goto IN_TRAILING_DIGITS;
default:
return NUMBER;
}
IN_TRAILING_DIGITS:
switch (ch) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
read();
goto IN_TRAILING_DIGITS;
default:
return NUMBER;
}
}
// To advance is just to replace the look-ahead.
void advance()
{
look_ahead = scan();
}
// Clear the token buffer and read the first look-ahead.
void init()
{
reset();
ignore(); // junk current character
advance();
}
double unsigned_factor()
{
double rtn = 0;
switch (look_ahead) {
case NUMBER:
sscanf(buf, "%lf", &rtn);
advance();
break;
case LEFT_PAREN:
advance();
rtn = expr();
if (look_ahead != RIGHT_PAREN) error("missing ')'");
advance();
break;
default:
error("unexpected token");
}
return rtn;
}
double factor()
{
double rtn = 0;
// If there is a leading minus...
if (look_ahead == ADD_OP && buf[0] == '-') {
advance();
rtn = -unsigned_factor();
}
else
rtn = unsigned_factor();
return rtn;
}
double term()
{
double rtn = factor();
while (look_ahead == MUL_OP) {
switch(buf[0]) {
case '*':
advance();
rtn *= factor();
break;
case '/':
advance();
rtn /= factor();
break;
}
}
return rtn;
}
double expr()
{
double rtn = term();
while (look_ahead == ADD_OP) {
switch(buf[0]) {
case '+':
advance();
rtn += term();
break;
case '-':
advance();
rtn -= term();
break;
}
}
return rtn;
}
And running the program:
1 + 2 * 3
val: 7.000000
(1 + 2) * 3
val: 9.000000
Related
I am trying to make a newtons 2nd law calculator and when I enter the values for the calculation nothing happens. I am not sure if they aren't being returned to the main function or what. What I wanted to do was have the user input a character which was then taken by a function containing a switch statement, in the cases of the switch statement would function calls to functions that did the arithmetic, those functions would then return a value to the switch function and the switch function would return a value to the main function which would then print the value to the screen.
// Newtons2ndlaw.cpp : This file contains the 'main' function. Program
execution begins and ends there.
//
#include "pch.h"
#include <stdio.h>
#include <stdlib.h>
#include<iostream>
void varselect(char, float *);
void force(float *);
void acceleration(float *);
void mass(float *);
int main()
{
float eqvalue;
char operation;
printf("Welcome to Newtons 2nd law solver!\n");
system("PAUSE");
printf("Would you like to solve for a Force, Acceleration or Mass?\nType 'F'(for force), 'A'(for acceleration), 'M'(for mass) to select a variable\n");
scanf_s("%c", &operation,1);
if (operation == 'f' || operation == 'F' || operation == 'a' || operation == 'A' || operation == 'm' || operation == 'M') //logic for determing what the user entered
{}
else
{
printf("Please enter a valid character.");
}
varselect(operation,&eqvalue); //function call to receive float value from varselect function
if (operation == 'f' || operation == 'F') //{
{
printf("The force = %f",eqvalue);
}
//this block determines what character string to display with calculated float value
else if (operation == 'a' || operation == 'A')
{
printf("The acceleration = %f", eqvalue);
}
else if (operation == 'm' || operation == 'M')
{
printf("the Mass = %f", eqvalue);
}
} //}
void varselect(char x, float *j)
//this function recieves the user inputed char value and returns the calculated float value to function call.
{ //switch allows program to "understand" users unwillingness to press shift before a,f,m like printf statement tells them to do.
switch (x) // also allows each variable to call its own function.
{
case 'f':
float getval;
force(&getval);
*j = getval;
return;
break;
}
switch (x)
{
case 'F':
float getval;
force(&getval);
*j = getval;
return;
break;
}
switch (x)
{
case 'a':
float getval;
acceleration(&getval);
*j = getval;
return;
break;
}
switch (x)
{
case 'A':
float getval;
acceleration(&getval);
*j = getval;
return;
break;
}
switch (x)
{
case 'm':
float getval;
mass(&getval);
*j = getval;
return;
break;
}
switch (x)
{
case 'M':
float getval;
mass(&getval);
*j = getval;
return;
break;
}
return;
}
void force(float *fma)
{
float acceleration, mass;
printf("Enter a value for 'Mass', then 'Acceleration'.\n\n");
scanf_s("%f\n%f\n", &mass, &acceleration, 2);
*fma = mass * acceleration;
return;
}
void acceleration(float *afm)
{ //functions to take input from user and return float to varselect function
float force, mass;
printf("Enter a value for 'Force', then 'Mass'.\n\n");
scanf_s("%f\n%f\n", &force, &mass, 1);
*afm = force / mass;
return;
}
void mass(float *fam)
{
float force, acceleration;
printf("Enter a value for 'Force', then 'Acceleration'.\n\n");
scanf_s("%f\n%f\n", &force, &acceleration, 1);
*fam = force / acceleration;
return;
}
I think its notting to do with your code... What i can say is that if you have an int or double or float function it works the same as void. You put in the parameters that you need, maybe like for the Newton's second law or whatever you need. And there you can add the switch function. And after the switch you return the output.
After the switch after its {} you put return output or the name of the variable you want to return.
To get it on the console you just make like this:
std::cout<<(function name) output(/*variables that are in the function parameter or whatever */ 13, 4.0f) <
I am currently working on a RPN calculator, it takes an infix expression converts it to postfix and shows the answer. I mostly got it right, but when I pop the answer from the stack if shows only the last digit of the result
ex
Enter infix: (1+1)*13+10/2
Postfix: 11+13*102/+
Result is: 1
Enter infix: 2*13+10/2
Postfix: 213*102/+
Result is:1
It gets it right for this kind of inputs
Enter infix: 3*2+5
Postfix: 32*5+
Result is : 11
#include<iostream>
#include<stdio.h>
#include<string.h>
#include<math.h>
#include<stdlib.h>
using namespace std;
class infix2postfix
{
public:
void push(int symbol);
int pop();
void infix_to_postfix();
int priority(char symbol);
int isEmpty();
int white_space(char);
int eval_post();
};
char infix[100], postfix[100];
int stack[100];
int top;
int main()
{
infix2postfix ip;
top=-1;
cout<<"Enter infix : ";
gets(infix);
ip.infix_to_postfix();
cout<<"Postfix : "<<postfix<<endl;
cout<<"Result is : "<<ip.eval_post()<<endl;
return 1;
}
void infix2postfix :: infix_to_postfix()
{
int i,p=0;
char next;
char symbol;
for(i=0; i<strlen(infix); i++)
{
symbol=infix[i];
if(!white_space(symbol))
{
switch(symbol)
{
case '(':
push(symbol);
break;
case ')':
while((next=pop())!='(')
postfix[p++] = next;
break;
case '+':
case '-':
case '*':
case '/':
case '%':
case '^':
while( !isEmpty( ) && priority(stack[top])>= priority(symbol) )
postfix[p++]=pop();
push(symbol);
break;
default: /*if an operand comes*/
postfix[p++]=symbol;
}
}
}
while(!isEmpty( ))
postfix[p++]=pop();
postfix[p]='\0'; /*End postfix with'\0' to make it a string*/
}
/*This function returns the priority of the operator*/
int infix2postfix :: priority(char symbol)
{
switch(symbol)
{
case '(':
return 0;
case '+':
case '-':
return 1;
case '*':
case '/':
case '%':
return 2;
case '^':
return 3;
default :
return 0;
}
}
void infix2postfix :: push(int symbol)
{
if(top>100)
{
cout<<"Stack overflow\n";
exit(1);
}
stack[++top]=symbol;
}
int infix2postfix :: pop()
{
if( isEmpty() )
{
cout<<"Stack underflow\n";
exit(1);
}
return (stack[top--]);
}
int infix2postfix :: isEmpty()
{
if(top==-1)
return 1;
else
return 0;
}
int infix2postfix :: white_space(char symbol)
{
if( symbol == ' ' || symbol == '\t' )
return 1;
else
return 0;
}
int infix2postfix :: eval_post()
{
int a,b,i,temp,result;
for(i=0; i<strlen(postfix); i++)
{
if(postfix[i]<='9' && postfix[i]>='0')
push(postfix[i]-'0');
else
{
a=pop();
b=pop();
switch(postfix[i])
{
case '+':
temp=b+a;
break;
case '-':
temp=b-a;
break;
case '*':
temp=b*a;
break;
case '/':
temp=b/a;
break;
case '%':
temp=b%a;
break;
case '^':
temp=pow(b,a);
}
push(temp);
}
}
result=pop();
return result;
}
Consider what happens when eval_post() is given 213*102/+ to work with. Let's start in the middle, with the '1' after the asterisk. The '1' is a digit, so push it [stack ends with: 1]. Similarly, the 0 and 2 get pushed [stack ends with: 1, 0, 2]. Then the division symbol is encountered, so pop 2 and 0, then push 0/2 = 0 [stack ends with: 1, 0]. Finally, the addition symbol is encountered, so pop 0 and 1, then push 1+0=1, which is then popped as your answer.
One symptom of your problem is that, if things work, the stack should be empty when eval_post() returns. However, it is not empty when your infix includes numbers with more than one digit. Note that "10" gets pushed onto the stack as two numbers: "1" followed by "0". You want the value 10 pushed.
There are also some style problems with the code, but this appears to be the main functional problem.
Is there an efficient/elegant way to compare a list of ints with a given int in C++. Lets say I have a quartet of numbers: P, Q, R, S and I need to compare them with a given int T to get different case scenarios.
if (P == T && Q != T && R != T && S != T) case('P');
else if (P != T && Q == T && R != T && S != T) case('Q');
else if (P != T && Q != T && R == T && S != T) case('R');
else if (P != T && Q != T && R != T && S == T) case('S');
Similarly I will have 6 paired cases: PQ, QR, PR ..., 4 triplet cases and 1 quartet case. As you can see, lot of explicit comparisons! Is there an elegant way to do this using STL containers?
I need one of these cases as my output: 'P', 'Q', .., 'PQ', 'RS',.. 'PQR', 'PQS'... 'PQRS'. I plan to do a switch-case as all these combinations will invoke different codes.
Use a lookup map with a Boolean-tuple as key:
std::map<std::tuple<bool,bool,bool,bool>,value> map;
Looking up the value is then simply something like this:
map.find({T==P,T==Q,T==R,T==S});
(wrap this in a lookup function to make it easier of course).
The value of the map will be determined what you actually need. It can be a string, a numeric value, or even an std::function (lambda) to be executed if the key is found.
Alternatively you can also use an unordered_map, but then you need to provide a hash-function yourself as STL has no default hash-function for tuples.
You could use bitwise operations to combine the results of the four tests into a single 0..15 integer value. Note that each comparison is only performed once.
Once you have the resulting value it can be used as an index into an array or std::vector or a key lookup into an associative container. I've used a switch with the result in this simple example.
int mask = (P == T) << 3 | (Q == T) << 2 | (R == T) << 1 | (S == T);
switch (mask) // 0..15
{
case 15: // PQRS
break;
case 14: // PQR
break;
case 13: // PQS
break;
case 12: // PQ
break;
case 11: // PRS
break;
case 10: // PR
break;
case 9: // PS
break;
case 8: // P
break;
case 7: // QRS
break;
case 6: // QR
break;
case 5: // QS
break;
case 4: // Q
break;
case 3: // RS
break;
case 2: // R
break;
case 1: // S
break;
case 0: // none
break;
}
Try matching a bit-field.
If we consider a 4 bit-field 0000 we can set the first bit if T==P the second if T==Q and so on:
unsigned index{(P==T?1U:0U)|(Q==T?2U:0U)|(R==T?4U:0U)|(S==T?8U:0U)};
Then just index into an array.
NB 1: I notice another answer using the same trick with a switch which will also work.
NB 2: You can regard this implementation as a hash-table using a 'perfect hash'.
#include <iostream>
#include <string>
//Actual Solution Starts Here.
const std::string match_data[] {
"",//0000
"P",//0001
"Q",//0010
"PQ",//0011
"R",//0100
"PR",//0101
"PQ",//0101
"PQR",//0111
"S",//1000
"PS",//1001
"QS",//1010
"PQS",//1011
"RS",//1100
"PRS",//1101
"PQS",//1101
"PQRS"//1111
};
std::string match(int T, int P, int Q, int R, int S){
unsigned index{(P==T?1U:0U)|(Q==T?2U:0U)|(R==T?4U:0U)|(S==T?8U:0U)};
return match_data[index];
}
//Actual Solution Ends Here.
//The rest is a trivial test harness...
bool error{false};
int check(int T, int P, int Q, int R, int S, const std::string& expect){
const auto result{match(T,P,Q,R,S)};
if(result!=expect){
std::cout<<"Error ("<<T<<','<<P<<','<<Q<<','<<R<<','<<S<<")=="<<result<<"!="<<expect<<std::endl;
error=true;
return 1;
}
return 0;
}
int main() {
int errors{0};
errors+=check(7,1,2,3,4,"");
errors+=check(7,7,2,3,4,"P");
errors+=check(9,1,9,3,4,"Q");
errors+=check(117,1,6,117,4,"R");
errors+=check(13,1,7,3,13,"S");
errors+=check(132,132,132,132,132,"PQRS");
errors+=check(98,98,98,3,4,"PQ");
errors+=check(9876,56,87,9876,4,"R");
errors+=check(1,1,0,1,0,"PR");
errors+=check(78,78,78,78,0,"PQR");
if(errors==0&&!error){
std::cout<<"Success"<<std::endl;
}else{
std::cout<<"ERRORS: "<<errors<<std::endl;
}
return 0;
}
Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 7 years ago.
Improve this question
I am relatively new to C/C++ and i was wondering how I could use for loop so I wouldn't have to make a bunch of switch statements I have already made the switch statement I just need help on integrating the for loop into it. Thank you.
#include <stdio.h>
#pragma warning(disable : 4996)
int main() {
char ch;
ch = getchar();
int f, a = 10, b = 20;
printf("ch = %c\n", ch);
switch (ch) {
case '+': f = a + b; printf("f = %d\n", f); break;
case '-': f = a - b; printf("f = %d\n", f); break;
case '*': f = a * b; printf("f = %d\n", f); break;
case '/': f = a / b; printf("f = %d\n", f); break;
default: printf("invalid operator\n");
}
}
the purpose of the program is to enter in either +,-,*,/ and then based on that input it will execute the case that was entered in so + would add a and be together.
Basically, you need to map operator characters to operations.
Your current code represents that mapping via execution flow control.
In C++ the standard library's map collection is a good choice for instead representing it as data, so that you don't even need to use a loop. In C an array of structs where each contains a char and a function pointer, can do the same job. However, you then have to define the functions yourself, because unlike the C++ standard library the C standard library doesn't provide convenient named functions for the arithmetic operations.
In a similar fashion, a Boolean state, whether something is true or false, can be represented as an execution position or as data, usually as a variable of type bool. What to choose is mainly engineering gut feeling. Sometimes representation via flow control is simplest and most clear, sometimes representation as data is simplest and most clear.
C++ example, mostly reproducing the given example code's effect, but with the mapping as data:
#include <iostream>
#include <functional>
#include <map>
using namespace std;
auto main() -> int
{
const map<char, function<int(int,int)>> op =
{
{ '+', plus<int>() },
{ '-', minus<int>() },
{ '*', multiplies<int>() },
{ '/', divides<int>() }
};
char ch;
cout << "Operator? "; cin >> ch;
cout << "ch = '" << ch << "'\n";
if( op.count( ch ) == 0 )
{
cout << "invalid operator\n";
}
else
{
const int a = 10;
const int b = 20;
cout << "f = " << op.at( ch )( a, b ) << "\n";
}
}
Corresponding C example, which does include a for loop as mentioned in the question:
#include <stdio.h>
int plus( int a, int b ) { return a+b; }
int minus( int a, int b ) { return a-b; }
int multiplies( int a, int b ) { return a*b; }
int divides( int a, int b ) { return a/b; }
typedef int(*Func_ptr)(int, int);
struct Mapping
{
char ch;
Func_ptr f;
};
const struct Mapping op[] =
{
{ '+', plus },
{ '-', minus },
{ '*', multiplies },
{ '/', divides }
};
const int n_ops = sizeof( op )/sizeof( *op );
Func_ptr op_at( char ch )
{
for( int i = 0; i < n_ops; ++i )
{
if( op[i].ch == ch ) { return op[i].f; }
}
return NULL;
}
int main()
{
int ch; // Note: type `int` to accommodate EOF value.
printf( "Operator? " ); ch = getchar();
printf( "ch = '%c'\n", ch );
if( op_at( ch ) == NULL )
{
printf( "invalid operator\n" );
}
else
{
const int a = 10;
const int b = 20;
printf( "f = %d\n", op_at( ch )( a, b ) );
}
}
C11, I think it was, introduced some machinery for effectively overloading functions so they can be used much like overloaded functions in C++. I don't remember much about and didn't use it here. I would suggest that if you need to handle different data types, just use different function names.
Note that the C example also compiles as C++, so both these examples are technically C++. However, the last example is in C style, using C idioms and C i/o, and does things that are unnecessary in C++. We usually just say that such code is C, not that it's C style; such code might not always compile as C++, because while C is largely a subset of C++ these are two different, separate languages: there is technically no such thing as C/C++.
i was wondering how I could use for loop so I wouldn't have to make a bunch of switch statements I have already made the switch statement I just need help on integrating the for loop into it.
A for loop is used to repeat a block of statements until a condition is met.
A switch statement is used for making a decision about which block of statements to run based on a given value, instead of using a bunch of if statements.
It doesn't make sense to replace a switch statement with a for loop. What does make sense is using a switch statement inside of a loop, to act on something that the loop generates, like asking for user input. For example:
#include <stdio.h>
#pragma warning(disable : 4996)
int main() {
int f, a = 10, b = 20;
char ch;
int finished = 0;
do {
ch = getchar();
printf("ch = %c\n", ch);
switch (ch) {
case '+': f = a + b; printf("f = %d\n", f); break;
case '-': f = a - b; printf("f = %d\n", f); break;
case '*': f = a * b; printf("f = %d\n", f); break;
case '/': f = a / b; printf("f = %d\n", f); break;
case 'q': finished = 1; break;
default: printf("invalid operator\n"); break;
}
}
while (!finished);
return 0;
}
Suppose I am a genome scientist trying to store extremely long strings of characters, each of which represents two bits of information (i.e. each element is either G, A, T, or C). Because the strings are incredibly long, I need to be able to store a string of length N in precisely 2N bits (or rather, N/4 bytes).
With that motivation in mind, I am looking for a generalization of std::bitset (or boost::dynamic_bitset<>) that works on two-bit values instead of single-bit values. I want to store N such two-bit values, each of which can be 0, 1, 2, or 3. I need the data packed as closely as possible in memory, so vector<char> will not work (as it wastes a factor of 4 of memory).
What is the best way to achieve my goal? One option is to wrap the existing bitset templates with customized operator[], iterators, etc., but I'd prefer to use an existing library if at all possible.
std::bitset<> is fixed length and you probably do not want that.
I think you should go ahead and wrap std::vector<bool>.
Note that std::vector<bool> is optimised for space, but has the benefit that it is dynamic in size.
Presumably you need to read the genome of arbitrary length on from somewhere.
Have a think about whether you need much of an API to access it; you might only need a couple of methods.
#Jefffrey's answer already covers the relevant code, if for bitset<>.
[ I am not familiar with boost::dynamic_bitset<> and what it might give over vector.]
One further thought is whether it might be convenient for you to work with quads of letters, a quad nicely filling a char in space.
class Genome
{
public:
enum class Letter {A,C,G,T};
Genome(const std::string& source)
{
code_.resize(source.size() * 2);
for (unsigned index = 0; index != source.size(); ++index)
{
char text = source[index];
Letter letter = textToLetter(text);
set(index, letter);
}
}
static Letter textToLetter(char text)
{
// Or search through the array `letterText`.
// Or come up with a neat but unintelligible one liner ...
Letter letter = Letter::A;
switch (text)
{
case 'A':
letter = Letter::A;
break;
case 'C':
letter = Letter::C;
break;
case 'G':
letter = Letter::G;
break;
case 'T':
letter = Letter::T;
break;
default:
// Invalid - handle error.
break;
}
return letter;
}
static char letterToText(Letter l)
{
return letterText[(unsigned)l];
}
// Add bounds checking
Letter get(unsigned index) const
{
unsigned distance = index * 2;
char numeric = code_[distance] + code_[distance + 1] * 2;
return Letter(numeric);
}
// Add bounds checking
void set(unsigned index, Letter value)
{
unsigned distance = index * 2;
bool low = (unsigned)value & 1;
bool high = (bool)((unsigned)value & 2);
code_[distance] = low;
code_[distance + 1] = high;
}
unsigned size()
{
return code_.size() / 2;
}
// Extend by numLetters, initially set to 'A'
void extend(unsigned numLetters)
{
code_.resize(code_.size() + numLetters * 2);
}
private:
static char letterText[4];
std::vector<bool> code_;
};
char Genome::letterText [4] = { 'A', 'C', 'G', 'T' };
int main()
{
Genome g("GATT");
g.extend(3);
g.set(5, Genome::Letter::C);
for (unsigned i = 0; i != g.size(); ++i)
std::cout << Genome::letterToText(g.get(i));
std::cout << std::endl;
return 0;
}
You have two choices.
Given:
enum class nucleobase { a, c, g, t };
You have two choices. You can:
use a single std::bitset and play with indexing
use std::bitset in combination with another container
For the first, you can just define a couple of functions that target the correct number of bits per set/get:
template<std::size_t N>
void set(std::bitset<N>& bits, std::size_t i, nucleobase x) {
switch (x) {
case nucleobase::a: bits.set(i * 2, 0); bits.set(i * 2 + 1, 0); break;
case nucleobase::c: bits.set(i * 2, 0); bits.set(i * 2 + 1, 1); break;
case nucleobase::g: bits.set(i * 2, 1); bits.set(i * 2 + 1, 0); break;
case nucleobase::t: bits.set(i * 2, 1); bits.set(i * 2 + 1, 1); break;
}
}
template<std::size_t N>
nucleobase get(const std::bitset<N>& bits, std::size_t i) {
if (!bits[i * 2])
if (!bits[i * 2 + 1]) return nucleobase::a;
else return nucleobase::c;
else
if (!bits[i * 2 + 1]) return nucleobase::g;
else return nucleobase::t;
}
Live demo
The above is just an example and a terrible one (it's almost 4AM here and I really need to sleep).
For the second you just need to map alleles and bits:
bit_pair bits_for(nucleobase x) {
switch (x) {
case nucleobase::a: return bit_pair("00"); break;
case nucleobase::c: return bit_pair("10"); break;
case nucleobase::g: return bit_pair("01"); break;
case nucleobase::t: return bit_pair("11"); break;
}
}
nucleobase nucleobase_for(bit_pair x) {
switch (x.to_ulong()) {
case 0: return nucleobase::a; break;
case 1: return nucleobase::c; break;
case 2: return nucleobase::g; break;
case 3: return nucleobase::t; break;
default: return nucleobase::a; break; // just for the warning
}
}
Live demo
Of course if you need runtime length you can just use boost::dynamic_bitset and std::vector.
Here's what I use for fixed-length k-mers.
#include <cstdint>
#include <cstdlib>
#include <ostream>
enum class nucleotide { A, C, G, T };
inline std::ostream&
operator<<(std::ostream& pOut, nucleotide pNt)
{
switch (pNt) {
case nucleotide::A: pOut << 'A'; break;
case nucleotide::C: pOut << 'C'; break;
case nucleotide::G: pOut << 'G'; break;
case nucleotide::T: pOut << 'T'; break;
}
return pOut;
}
class kmer_base;
class nucleotide_proxy {
public:
operator nucleotide() const {
return nucleotide((*mWord >> (mPosition * 2)) & 3);
};
nucleotide_proxy& operator=(nucleotide pNt) {
uint64_t word = *mWord;
word &= ~(uint64_t(3) << (mPosition*2));
word |= uint64_t(pNt) << (mPosition*2);
*mWord = word;
return *this;
};
private:
friend class kmer_base;
nucleotide_proxy(uint64_t* pWord, uint8_t pPosition)
: mWord(pWord), mPosition(pPosition)
{
}
uint64_t* mWord;
uint8_t mPosition;
};
class kmer_base {
protected:
nucleotide_proxy access(uint64_t* pWord, size_t pPosition)
{
return nucleotide_proxy(pWord + (pPosition / 32), (pPosition & 31));
}
const nucleotide_proxy access(uint64_t* pWord, size_t pPosition) const
{
return nucleotide_proxy(pWord + (pPosition / 32), (pPosition & 31));
}
};
template<int K>
class kmer : public kmer_base
{
enum { Words = (K + 31) / 32 };
public:
nucleotide_proxy operator[](size_t pOutdex) {
return access(mWords, pOutdex);
}
const nucleotide_proxy operator[](size_t pOutdex) const {
return access(mWords, pOutdex);
}
private:
uint64_t mWords[Words];
};
Extending this to dynamic-length k-mere is left as an exercise; it's pretty easy once you have nucleotide_proxy at your disposal. Implementing the reverse complement operator efficiently is also left as an exercise.