Related
I've been looking for ways to count the number of words in a string, but specifically for strings that may contain typos (i.e. "_This_is_a___test" as opposed to "This_is_a_test"). Most of the pages I've looked at only handle single spaces.
This is actually my first time programming in C++, and I don't have much other programming experience to speak of (2 years of college in C and Java). Although what I have is functional, I'm also aware it's complex, and I'm wondering if there is a more efficient way to achieve the same results?
This is what I have currently. Before I run the string through numWords(), I run it through a trim function that removes leading whitespace, then check that there are still characters remaining.
int numWords(string str) {
int count = 1;
for (int i = 0; i < str.size(); i++) {
if (str[i] == ' ' || str[i] == '\t' || str[i] == '\n') {
bool repeat = true;
int j = 1;
while (j < (str.size() - i) && repeat) {
if (str[i + j] != ' ' && str[i + j] != '\t' && str[i + j] != '\n') {
repeat = false;
i = i + j;
count++;
}
else
j++;
}
}
}
return count;
}
Also, I wrote mine to take a string argument, but most of the examples I've seen used (char* str) instead, which I wasn't sure how to use with my input string.
You don't need all those stringstreams to count word boundary
#include <string>
#include <cctype>
int numWords(std::string str)
{
bool space = true; // not in word
int count = 0;
for(auto c:str){
if(std::isspace(c))space=true;
else{
if(space)++count;
space=false;
}
}
return count;
}
One solution is to utilize std::istringstream to count the number of words and to skip over spaces automatically.
#include <sstream>
#include <string>
#include <iostream>
int numWords(std::string str)
{
int count = 0;
std::istringstream strm(str);
std::string word;
while (strm >> word)
++count;
return count;
}
int main()
{
std::cout << numWords(" This is a test ");
}
Output:
4
Albeit as mentioned std::istringstream is more "heavier" in terms of performance than writing your own loop.
Sam's comment made me write a function that does not allocate strings for words. But just creates string_views on the input string.
#include <cassert>
#include <cctype>
#include <vector>
#include <string_view>
#include <iostream>
std::vector<std::string_view> get_words(const std::string& input)
{
std::vector<std::string_view> words;
// the first word begins at an alpha character
auto begin_of_word = std::find_if(input.begin(), input.end(), [](const char c) { return std::isalpha(c); });
auto end_of_word = input.begin();
auto end_of_input = input.end();
// parse the whole string
while (end_of_word != end_of_input)
{
// as long as you see text characters move end_of_word one back
while ((end_of_word != end_of_input) && std::isalpha(*end_of_word)) end_of_word++;
// create a string view from begin of word to end of word.
// no new string memory will be allocated
// std::vector will do some dynamic memory allocation to store string_view (metadata of word positions)
words.emplace_back(begin_of_word, end_of_word);
// then skip all non readable characters.
while ((end_of_word != end_of_input) && !std::isalpha(*end_of_word) ) end_of_word++;
// and if we haven't reached the end then we are at the beginning of a new word.
if ( end_of_word != input.end()) begin_of_word = end_of_word;
}
return words;
}
int main()
{
std::string input{ "This, this is a test!" };
auto words = get_words(input);
for (const auto& word : words)
{
std::cout << word << "\n";
}
return 0;
}
You can use standard function std::distance with std::istringstream the following way
#include <iostream>
#include <sstream>
#include <string>
#include <iterator>
int main()
{
std::string s( " This is a test" );
std::istringstream iss( s );
auto count = std::distance( std::istream_iterator<std::string>( iss ),
std::istream_iterator<std::string>() );
std::cout << count << '\n';
}
The program output is
4
If you want you can place the call of std::distance in a separate function like
#include <iostream>
#include <sstream>
#include <string>
#include <iterator>
size_t numWords( const std::string &s )
{
std::istringstream iss( s );
return std::distance( std::istream_iterator<std::string>( iss ),
std::istream_iterator<std::string>() );
}
int main()
{
std::string s( " This is a test" );
std::cout << numWords( s ) << '\n';
}
If separators can include other characters apart from white space characters as for example punctuations then you should use methods of the class std::string or std::string_view find_first_of and find_first_not_of.
Here is a demonstration program.
#include <iostream>
#include <string>
#include <string_view>
size_t numWords( const std::string_view s, std::string_view delim = " \t" )
{
size_t count = 0;
for ( std::string_view::size_type pos = 0;
( pos = s.find_first_not_of( delim, pos ) ) != std::string_view::npos;
pos = s.find_first_of( delim, pos ) )
{
++count;
}
return count;
}
int main()
{
std::string s( "Is it a test ? Yes ! Now we will run it ..." );
std::cout << numWords( s, " \t!?.," ) << '\n';
}
The program output is
10
you can do it easily with regex
int numWords(std::string str)
{
std::regex re("\\S+"); // or `[^ \t\n]+` to exactly match the question
return std::distance(
std::sregex_iterator(str.begin(), str.end(), re),
std::sregex_iterator()
);
}
I have a string of items (see code). I want to say when a specific item from that list is found. In my example I want the output to be 3 since the item is found after the first two items. I can print out the separate items to the console but I cannot figure out how to do a count on these two items. I think it is because of the while loop... I always get numbers like 11 instead of two separate 1s. Any tips? :)
#include <iostream>
#include <string>
using namespace std;
int main() {
string items = "box,cat,dog,cat";
string delim = ",";
size_t pos = 0;
string token;
string item1 = "dog";
int count = 0;
`;
while ((pos = items.find(delim)) != string::npos)
{
token = items.substr(0, pos);
if (token != item1)
{
cout << token << endl; //here I would like to increment count for every
//item before item1 (dog) is found
items.erase(0, pos + 1);
}
else if (token == item1)
return 0;
}
return 0; //output: box cat
}
I replaced your search algorithm with the method explode, that separates your string by a delimiter and returns a vector, which is better suited for searching and getting the element count:
#include <string>
#include <vector>
#include <sstream>
#include <iostream>
#include <algorithm>
std::vector<std::string> explode(const std::string& s, char delim)
{
std::vector<std::string> result;
std::istringstream iss(s);
for (std::string token; std::getline(iss, token, delim); )
{
result.push_back(std::move(token));
}
return result;
}
int main()
{
std::string items = "box,cat,dog,cat";
std::string item1 = "dog";
char delim = ',';
auto resultVec = explode(items, delim);
auto itResult = std::find_if(resultVec.begin(), resultVec.end()
, [&item1](const auto& resultString)
{
return item1 == resultString;
});
if (itResult != resultVec.end())
{
auto index(std::distance(resultVec.begin(), itResult) + 1); // index is zero based
std::cout << index;
}
return 0;
}
By using std::find_if you can get the position of item1 by iterator, which you can use with std::distance to get the count of elements that are in front of it.
Credits for the explode method go to this post: Is there an equivalent in C++ of PHP's explode() function?
There are many ways to Rome. Here an additional solution using a std::regex.
But main approach is the same as the accepted answer. Using modern C++17 language elements, it is a little bit more compact.
#include <iostream>
#include <string>
#include <regex>
#include <iterator>
#include <vector>
const std::regex re{ "," };
int main() {
std::string items{ "box,cat,dog,cat" };
// Split String and put all sub-items in a vector
std::vector subItems(std::sregex_token_iterator(items.begin(), items.end(), re, -1), {});
// Search and check if found and show result
if (auto it = std::find(subItems.begin(), subItems.end(), "dog"); it != subItems.end())
std::cout << "Found at position: " << std::distance(subItems.begin(), it) + 1 << '\n';
else
std::cout << "Not found.\n";
return 0;
}
I want to split the strings on each line of my text file into an array, similar to the split() function in python. my desired syntax is a loop that enters every split-string into the next index of an array,
so for example if my string:
"ab,cd,ef,gh,ij"
, every time I encounter a comma then I would:
datafile >> arr1[i]
and my array would end up:
arr1 = [ab,cd,ef,gh,ij]
a mock code without reading a text file is provided below
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <string.h>
#include <string>
using namespace std;
int main(){
char str[] = "ab,cd,ef,gh,ij"; //" ex str in place of file contents/fstream sFile;"
const int NUM = 5;
string sArr[NUM];//empty array
char *token = strtok(str, ",");
for (int i=0; i < NUM; i++)
while((token!=NULL)){
("%s\n", token) >> sArr[i];
token = strtok(NULL, ",");
}
cout >> sArr;
return 0;
}
In C++ you can read a file line by line and directly get a std::string.
You will found below an example I made with a split() proposal as you requested, and a main() example of reading a file:
Example
data file:
ab,cd,ef,gh
ij,kl,mn
c++ code:
#include <fstream>
#include <iostream>
#include <vector>
std::vector<std::string> split(const std::string & s, char c);
int main()
{
std::string file_path("data.txt"); // I assumed you have that kind of file
std::ifstream in_s(file_path);
std::vector <std::vector<std::string>> content;
if(in_s)
{
std::string line;
std::vector <std::string> vec;
while(getline(in_s, line))
{
for(const std::string & str : split(line, ','))
vec.push_back(str);
content.push_back(vec);
vec.clear();
}
in_s.close();
}
else
std::cout << "Could not open: " + file_path << std::endl;
for(const std::vector<std::string> & str_vec : content)
{
for(unsigned int i = 0; i < str_vec.size(); ++i)
std::cout << str_vec[i] << ((i == str_vec.size()-1) ? ("") : (" : "));
std::cout << std::endl;
}
return 0;
}
std::vector<std::string> split(const std::string & s, char c)
{
std::vector<std::string> splitted;
std::string word;
for(char ch : s)
{
if((ch == c) && (!word.empty()))
{
splitted.push_back(word);
word.clear();
}
else
word += ch;
}
if(!word.empty())
splitted.push_back(word);
return splitted;
}
output:
ab : cd : ef : gh
ij : kl : mn
I hope it will help.
So, a few things to fix. Firstly, arrays and NUM are kind of limiting - you have to fix up NUM whenever you change the input string, so C++ provides std::vector which can resize itself to however many strings it finds. Secondly, you want to call strtok until it returns nullptr once, and you can do that with one loop. With both your for and NUM you call strtok too many times - even after it has returned nullptr. Next, to put the token into a std::string, you would assign using my_string = token; rather than ("%s\n", token) >> my_string - which is a broken mix of printf() formatting and C++ streaming notation. Lastly, to print the elements you've extracted, you can use another loop. All these changes are illustrated below.
char str[] = "ab,cd,ef,gh,ij";
std::vector<std::string> strings;
char* token = strtok(str, ",");
while ((token != nullptr))
{
strings.push_back(token);
token = strtok(NULL, ",");
}
for (const auto& s : strings)
cout >> s >> '\n';
Your code is overly complicated and wrong.
You probably want this:
#include <iostream>
#include <string>
#include <string.h>
using namespace std;
int main() {
char str[] = "ab,cd,ef,gh,ij"; //" ex str in place of file contents/fstream sFile;"
const int NUM = 5;
string sArr[NUM];//empty array
char *token = strtok(str, ",");
int max = 0;
while ((token != NULL)) {
sArr[max++] = token;
token = strtok(NULL, ",");
}
for (int i = 0; i < max; i++)
cout << sArr[i] << "\n";
return 0;
}
This code is still poor and no bound checking is done.
But anyway, you should rather do it the C++ way as suggested in the other answers.
Use boost::split
#include <boost/algorithm/string.hpp>
[...]
std::vector<std::string> strings;
std::string val("ab,cd,ef,gh,ij");
boost::split(strings, val, boost::is_any_of(","));
You could do something like this
std::string str = "ab,cd,ef,gh,ij";
std::vector<std::string> TokenList;
std::string::size_type lastPos = 0;
std::string::size_type pos = str.find_first_of(',', lastPos);
while(pos != std::string::npos)
{
std::string temp(str, lastPos, pos - lastPos);
TokenList.push_back(temp);
lastPos = pos + 1;
pos = str.find_first_of(',', lastPos);
}
if(lastPos != str.size())
{
std::string temp(str, lastPos, str.size());
TokenList.push_back(temp);
}
for(int i = 0; i < TokenList.size(); i++)
std::cout << TokenList.at(i) << std::endl;
This question already has answers here:
How do I iterate over the words of a string?
(84 answers)
Closed 4 years ago.
If I have a std::string containing a comma-separated list of numbers, what's the simplest way to parse out the numbers and put them in an integer array?
I don't want to generalise this out into parsing anything else. Just a simple string of comma separated integer numbers such as "1,1,1,1,2,1,1,1,0".
Input one number at a time, and check whether the following character is ,. If so, discard it.
#include <vector>
#include <string>
#include <sstream>
#include <iostream>
int main()
{
std::string str = "1,2,3,4,5,6";
std::vector<int> vect;
std::stringstream ss(str);
for (int i; ss >> i;) {
vect.push_back(i);
if (ss.peek() == ',')
ss.ignore();
}
for (std::size_t i = 0; i < vect.size(); i++)
std::cout << vect[i] << std::endl;
}
Something less verbose, std and takes anything separated by a comma.
stringstream ss( "1,1,1,1, or something else ,1,1,1,0" );
vector<string> result;
while( ss.good() )
{
string substr;
getline( ss, substr, ',' );
result.push_back( substr );
}
Yet another, rather different, approach: use a special locale that treats commas as white space:
#include <locale>
#include <vector>
struct csv_reader: std::ctype<char> {
csv_reader(): std::ctype<char>(get_table()) {}
static std::ctype_base::mask const* get_table() {
static std::vector<std::ctype_base::mask> rc(table_size, std::ctype_base::mask());
rc[','] = std::ctype_base::space;
rc['\n'] = std::ctype_base::space;
rc[' '] = std::ctype_base::space;
return &rc[0];
}
};
To use this, you imbue() a stream with a locale that includes this facet. Once you've done that, you can read numbers as if the commas weren't there at all. Just for example, we'll read comma-delimited numbers from input, and write then out one-per line on standard output:
#include <algorithm>
#include <iterator>
#include <iostream>
int main() {
std::cin.imbue(std::locale(std::locale(), new csv_reader()));
std::copy(std::istream_iterator<int>(std::cin),
std::istream_iterator<int>(),
std::ostream_iterator<int>(std::cout, "\n"));
return 0;
}
The C++ String Toolkit Library (Strtk) has the following solution to your problem:
#include <string>
#include <deque>
#include <vector>
#include "strtk.hpp"
int main()
{
std::string int_string = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15";
std::vector<int> int_list;
strtk::parse(int_string,",",int_list);
std::string double_string = "123.456|789.012|345.678|901.234|567.890";
std::deque<double> double_list;
strtk::parse(double_string,"|",double_list);
return 0;
}
More examples can be found Here
Alternative solution using generic algorithms and Boost.Tokenizer:
struct ToInt
{
int operator()(string const &str) { return atoi(str.c_str()); }
};
string values = "1,2,3,4,5,9,8,7,6";
vector<int> ints;
tokenizer<> tok(values);
transform(tok.begin(), tok.end(), back_inserter(ints), ToInt());
Lots of pretty terrible answers here so I'll add mine (including test program):
#include <string>
#include <iostream>
#include <cstddef>
template<typename StringFunction>
void splitString(const std::string &str, char delimiter, StringFunction f) {
std::size_t from = 0;
for (std::size_t i = 0; i < str.size(); ++i) {
if (str[i] == delimiter) {
f(str, from, i);
from = i + 1;
}
}
if (from <= str.size())
f(str, from, str.size());
}
int main(int argc, char* argv[]) {
if (argc != 2)
return 1;
splitString(argv[1], ',', [](const std::string &s, std::size_t from, std::size_t to) {
std::cout << "`" << s.substr(from, to - from) << "`\n";
});
return 0;
}
Nice properties:
No dependencies (e.g. boost)
Not an insane one-liner
Easy to understand (I hope)
Handles spaces perfectly fine
Doesn't allocate splits if you don't want to, e.g. you can process them with a lambda as shown.
Doesn't add characters one at a time - should be fast.
If using C++17 you could change it to use a std::stringview and then it won't do any allocations and should be extremely fast.
Some design choices you may wish to change:
Empty entries are not ignored.
An empty string will call f() once.
Example inputs and outputs:
"" -> {""}
"," -> {"", ""}
"1," -> {"1", ""}
"1" -> {"1"}
" " -> {" "}
"1, 2," -> {"1", " 2", ""}
" ,, " -> {" ", "", " "}
You could also use the following function.
void tokenize(const string& str, vector<string>& tokens, const string& delimiters = ",")
{
// Skip delimiters at beginning.
string::size_type lastPos = str.find_first_not_of(delimiters, 0);
// Find first non-delimiter.
string::size_type pos = str.find_first_of(delimiters, lastPos);
while (string::npos != pos || string::npos != lastPos) {
// Found a token, add it to the vector.
tokens.push_back(str.substr(lastPos, pos - lastPos));
// Skip delimiters.
lastPos = str.find_first_not_of(delimiters, pos);
// Find next non-delimiter.
pos = str.find_first_of(delimiters, lastPos);
}
}
std::string input="1,1,1,1,2,1,1,1,0";
std::vector<long> output;
for(std::string::size_type p0=0,p1=input.find(',');
p1!=std::string::npos || p0!=std::string::npos;
(p0=(p1==std::string::npos)?p1:++p1),p1=input.find(',',p0) )
output.push_back( strtol(input.c_str()+p0,NULL,0) );
It would be a good idea to check for conversion errors in strtol(), of course. Maybe the code may benefit from some other error checks as well.
I'm surprised no one has proposed a solution using std::regex yet:
#include <string>
#include <algorithm>
#include <vector>
#include <regex>
void parse_csint( const std::string& str, std::vector<int>& result ) {
typedef std::regex_iterator<std::string::const_iterator> re_iterator;
typedef re_iterator::value_type re_iterated;
std::regex re("(\\d+)");
re_iterator rit( str.begin(), str.end(), re );
re_iterator rend;
std::transform( rit, rend, std::back_inserter(result),
[]( const re_iterated& it ){ return std::stoi(it[1]); } );
}
This function inserts all integers at the back of the input vector. You can tweak the regular expression to include negative integers, or floating point numbers, etc.
#include <sstream>
#include <vector>
const char *input = "1,1,1,1,2,1,1,1,0";
int main() {
std::stringstream ss(input);
std::vector<int> output;
int i;
while (ss >> i) {
output.push_back(i);
ss.ignore(1);
}
}
Bad input (for instance consecutive separators) will mess this up, but you did say simple.
string exp = "token1 token2 token3";
char delimiter = ' ';
vector<string> str;
string acc = "";
for(int i = 0; i < exp.size(); i++)
{
if(exp[i] == delimiter)
{
str.push_back(acc);
acc = "";
}
else
acc += exp[i];
}
bool GetList (const std::string& src, std::vector<int>& res)
{
using boost::lexical_cast;
using boost::bad_lexical_cast;
bool success = true;
typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
boost::char_separator<char> sepa(",");
tokenizer tokens(src, sepa);
for (tokenizer::iterator tok_iter = tokens.begin();
tok_iter != tokens.end(); ++tok_iter) {
try {
res.push_back(lexical_cast<int>(*tok_iter));
}
catch (bad_lexical_cast &) {
success = false;
}
}
return success;
}
I cannot yet comment (getting started on the site) but added a more generic version of Jerry Coffin's fantastic ctype's derived class to his post.
Thanks Jerry for the super idea.
(Because it must be peer-reviewed, adding it here too temporarily)
struct SeparatorReader: std::ctype<char>
{
template<typename T>
SeparatorReader(const T &seps): std::ctype<char>(get_table(seps), true) {}
template<typename T>
std::ctype_base::mask const *get_table(const T &seps) {
auto &&rc = new std::ctype_base::mask[std::ctype<char>::table_size]();
for(auto &&sep: seps)
rc[static_cast<unsigned char>(sep)] = std::ctype_base::space;
return &rc[0];
}
};
This is the simplest way, which I used a lot. It works for any one-character delimiter.
#include<bits/stdc++.h>
using namespace std;
int main() {
string str;
cin >> str;
int temp;
vector<int> result;
char ch;
stringstream ss(str);
do
{
ss>>temp;
result.push_back(temp);
}while(ss>>ch);
for(int i=0 ; i < result.size() ; i++)
cout<<result[i]<<endl;
return 0;
}
simple structure, easily adaptable, easy maintenance.
std::string stringIn = "my,csv,,is 10233478,separated,by commas";
std::vector<std::string> commaSeparated(1);
int commaCounter = 0;
for (int i=0; i<stringIn.size(); i++) {
if (stringIn[i] == ",") {
commaSeparated.push_back("");
commaCounter++;
} else {
commaSeparated.at(commaCounter) += stringIn[i];
}
}
in the end you will have a vector of strings with every element in the sentence separated by spaces. empty strings are saved as separate items.
Simple Copy/Paste function, based on the boost tokenizer.
void strToIntArray(std::string string, int* array, int array_len) {
boost::tokenizer<> tok(string);
int i = 0;
for(boost::tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
if(i < array_len)
array[i] = atoi(beg->c_str());
i++;
}
void ExplodeString( const std::string& string, const char separator, std::list<int>& result ) {
if( string.size() ) {
std::string::const_iterator last = string.begin();
for( std::string::const_iterator i=string.begin(); i!=string.end(); ++i ) {
if( *i == separator ) {
const std::string str(last,i);
int id = atoi(str.c_str());
result.push_back(id);
last = i;
++ last;
}
}
if( last != string.end() ) result.push_back( atoi(&*last) );
}
}
#include <sstream>
#include <vector>
#include <algorithm>
#include <iterator>
const char *input = ",,29870,1,abc,2,1,1,1,0";
int main()
{
std::stringstream ss(input);
std::vector<int> output;
int i;
while ( !ss.eof() )
{
int c = ss.peek() ;
if ( c < '0' || c > '9' )
{
ss.ignore(1);
continue;
}
if (ss >> i)
{
output.push_back(i);
}
}
std::copy(output.begin(), output.end(), std::ostream_iterator<int> (std::cout, " ") );
return 0;
}
Is there any inbuilt function available to get strings between two delimiter string in C++?
Input string
(23567)=(58765)+(67888)+(65678)
Expected Output
23567
58765
67888
65678
include <iostream>
#include <stdexcept>
#include <string>
#include <sstream>
#include <vector>
std::vector<std::string> tokenize(const std::string& input)
{
std::vector<std::string> result;
std::istringstream stream(input);
std::string thingie; // please choose a better name, my inspiration is absent today
while(std::getline(stream, thingie, '('))
{
if(std::getline(stream, thingie, ')'))
result.push_back(thingie);
else
throw std::runtime_error("expected \')\' to match \'(\'.");
}
return result;
}
void rtc()
{
ifstream myfile(test.txt);
if(myfile.is_open())
while (!myfile.eof())
{
getline(myfile,line);
auto tokens = tokenize(line);
for(auto&& item : tokens)
std::cout << item << '\n';
}
Error C4430 missing type specifier int assumed note:c++ does not support default int
ErrorC2440initializing cannot convertfrom std::vector<_ty>to int
Error C2059syntac error empty declaration
Error C2143syntax error missing;before&&
Error C2059syntax error:')'
Use std::getline:
#include <iostream>
#include <stdexcept>
#include <string>
#include <sstream>
#include <vector>
std::vector<std::string> tokenize(const std::string& input)
{
std::vector<std::string> result;
std::istringstream stream(input);
std::string thingie; // please choose a better name, my inspiration is absent today
while(std::getline(stream, thingie, '('))
{
if(std::getline(stream, thingie, ')'))
result.push_back(thingie);
else
throw std::runtime_error("expected \')\' to match \'(\'.");
}
return result;
}
int main()
{
std::string test = "(23567)=(58765)+(67888)+(65678)";
auto tokens = tokenize(test);
for(auto&& item : tokens)
std::cout << item << '\n';
}
Live example here.
For those not entirely convinced by the awesome robustness of this solution, I specialized this for double inputs between the parentheses, and used boost::lexical_cast to verify the input:
#include <iostream>
#include <stdexcept>
#include <string>
#include <sstream>
#include <vector>
#include <boost/lexical_cast.hpp>
std::vector<double> tokenize(const std::string& input)
{
std::vector<double> result;
std::istringstream stream(input);
std::string thingie; // please choose a better name, my inspiration is absent today
while(std::getline(stream, thingie, '('))
{
if(std::getline(stream, thingie, ')'))
{
try
{
result.push_back(boost::lexical_cast<double>(thingie));
}
catch(...)
{
throw std::runtime_error("This wasn't just a number, was it?");
}
}
else
throw std::runtime_error("expected \')\' to match \'(\'.");
}
return result;
}
int main()
{
std::string test = "(23567)=(58765)+(67888)+(65678)";
auto tokens = tokenize(test);
for(auto&& item : tokens)
std::cout << item << '\n';
test = "(2h567)=(58765)+(67888)+(65678)";
tokens = tokenize(test);
}
Live example here. Now go cry about how bad strtok really is, or how bad/unportable the general <regex> implementations are currently. Also, for those who doubt boost::lexical_cast performance-wise, please see the results for yourself.
strpbrk can be used to find the start of each token
or strcspn can be used to count the characters until the next token
then strspn can be used to find the length of each token.
const char tokenChars[] = "0123456789";
char token = input; // say input is "(23567)=(58765)+(67888)+(65678)"
while( 0 != (token = strpbrk( token, tokenChars )) ) // find token
{
size_t tokenLen = strspn( token, token_chars ); // find length of token
// print out tokenLen characters of token here!
token+= tokenLen; // go to end of token
}
http://www.cplusplus.com/reference/cstring/strspn/
http://www.cplusplus.com/reference/cstring/strcspn/
http://www.cplusplus.com/reference/cstring/strpbrk/
Here's the answer if you wanna use pointers:
char test[32] = "(23567)=(58765)+(67888)+(65678)";
char *output = NULL;
char *pos = (char *)test;
int length = 0;
while (*pos != '\0') {
if(*pos == '(' || *pos == ')' || *pos == '+' || *pos == '=') {
*pos = '\0';
if (length > 0) {
output = new char[length + 1];
strncpy_s(output, length + 1, pos - length, length + 1);
length = 0;
cout << output << endl;
delete [] output;
output = NULL;
}
} else {
length++;
}
pos++;
}
While some of the commentators may hate it I like this:
for (p = std::strtok(input, "+"); p != NULL; p = std::strtok(NULL, "+"))
{
// do more stuff
}
This won't work off the bat - the delimiters need expanding - it demonstrates the ease of use.
const char input[] = "(2av67q)=(ble ble)+(67888)+(qpa)";
int s = 0;
for(int i = 0; input[i]; i++)
{
if ( input[i] == ')' )
{
cout << endl;
s = 0;
}
else if ( input[i] == '(' )
{
s = 1;
continue;
}
else
{
if ( s == 1 )
{
cout << input[i];
}
}
}
result:
2av67q
ble ble
67888
qpa
Here is a solution using a regular expression:
std::vector<std::string> get_numbers(std::string const& s)
{
static std::regex regex(R"(^\((\d+)\)=\((\d+)\)(?:\+\((\d+)\))+$)",
std::regex_constants::ECMAScript
| std::regex_constants::optimize);
std::vector<std::string> results;
std::sregex_iterator matches(s.cbegin(), s.cend(), regex);
for (auto first = matches->cbegin(), last = matches->cend();
last != first;
++first)
{
results.push_back(first->str());
}
return results;
}