Related
This question already has answers here:
C++ function split string into words
(1 answer)
taking input of a string word by word
(3 answers)
Right way to split an std::string into a vector<string>
(12 answers)
Closed last year.
myStr = input("Enter something - ")
// say I enter "Hi there"
arrayStr = myStr.split()
print(arrayStr)
// Output: ['Hi', 'there']
What is the exact C++ equivalent of this code? (My aim is to further iterate over the array and perform comparisons with other arrays).
One way of doing this would be using std::vector and std::istringstream as shown below:
#include <iostream>
#include <string>
#include<sstream>
#include <vector>
int main()
{
std::string input, temp;
//take input from user
std::getline(std::cin, input);
//create a vector that will hold the individual words
std::vector<std::string> vectorOfString;
std::istringstream ss(input);
//go word by word
while(ss >> temp)
{
vectorOfString.emplace_back(temp);
}
//iterate over all elements of the vector and print them out
for(const std::string& element: vectorOfString)
{
std::cout<<element<<std::endl;
}
return 0;
}
You can use string_views to avoid generating copies of the input string (efficient in memory), it literally will give you views on the words in the string, like this :
#include <iostream>
#include <string_view>
#include <vector>
inline bool is_delimiter(const char c)
{
// order by frequency in your input for optimal performance
return (c == ' ') || (c == ',') || (c == '.') || (c == '\n') || (c == '!') || (c == '?');
}
auto split_view(const char* line)
{
const char* word_start_pos = line;
const char* p = line;
std::size_t letter_count{ 0 };
std::vector<std::string_view> words;
// while parsing hasn't seen the terminating 0
while(*p != '\0')
{
// if it is a character from a word then start counting the letters in the word
if (!is_delimiter(*p))
{
letter_count++;
}
else
{
//delimiter reached and word detected
if (letter_count > 0)
{
//add another string view to the characters in the input string
// this will call the constructor of string_view with arguments const char* and size
words.emplace_back(word_start_pos, letter_count);
// skip to the next word
word_start_pos += letter_count;
}
// skip delimiters for as long as you encounter them
word_start_pos++;
letter_count = 0ul;
}
// move on to the next character
++p;
}
return words;
}
int main()
{
auto words = split_view("the quick brown fox is fast. And the lazy dog is asleep!");
for (const auto& word : words)
{
std::cout << word << "\n";
}
return 0;
}
#include <string>
#include <sstream>
#include <vector>
#include <iterator>
template <typename Out>
void split(const std::string &s, char delim, Out result) {
std::istringstream iss(s);
std::string item;
while (std::getline(iss, item, delim)) {
*result++ = item;
}
}
std::vector<std::string> split(const std::string &s, char delim) {
std::vector<std::string> elems;
split(s, delim, std::back_inserter(elems));
return elems;
}
std::vector<std::string> x = split("one:two::three", ':');
Where 'x' is your converted array with 4 elements.
Basically #AnoopRana's solution but using STL algorithms and removing punctuation signs from words:
[Demo]
#include <cctype> // ispunct
#include <algorithm> // copy, transform
#include <iostream> // cout
#include <iterator> // istream_iterator, ostream_iterator
#include <sstream> // istringstream
#include <string>
#include <vector>
int main() {
const std::string s{"In the beginning, there was simply the event and its consequences."};
std::vector<std::string> ws{};
std::istringstream iss{s};
std::transform(std::istream_iterator<std::string>{iss}, {},
std::back_inserter(ws), [](std::string w) {
w.erase(std::remove_if(std::begin(w), std::end(w),
[](unsigned char c) { return std::ispunct(c); }),
std::end(w));
return w;
});
std::copy(std::cbegin(ws), std::cend(ws), std::ostream_iterator<std::string>{std::cout, "\n"});
}
// Outputs:
//
// In
// the
// beginning
// there
// was
// simply
// the
// event
// and
// its
// consequences
The function takes a string containing of comma(,) separated numbers as string and converts into numbers. Sometimes it produces a garbage value at the end.
vector<int> parseInts(string str)
{
int as[200]={0};
int i=0,j=0;
for(;str[i]!='\0';i++)
{
while(str[i]!=','&&str[i]!='\0')
{as[j]= as[j]*10 +str[i] -'0';
i++;}
j++;
}
vector<int>rr;
for(int i=0;i<j;i++)
rr.push_back(as[i]);
return rr;
}
If you're writing in C++, use C++ features instead of C-style string manipulation. You can combine std::istringstream, std::getline(), and std::stoi() into a very short solution. (Also note that you should take the argument by const reference since you do not modify it.)
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
std::vector<int> parseInts(std::string const & str) {
std::vector<int> values;
std::istringstream src{str};
std::string buf;
while (std::getline(src, buf, ',')) {
// Note no error checking on this conversion -- exercise for the reader.
values.push_back(std::stoi(buf));
}
return values;
}
(Demo)
The code doesn't handle whitespace and inputs with more than 200 numbers.
An alternative working solution:
#include <iostream>
#include <sstream>
#include <iterator>
#include <algorithm>
#include <vector>
std::vector<int> parseInts(std::string s) {
std::replace(s.begin(), s.end(), ',', ' ');
std::istringstream ss(std::move(s));
return std::vector<int>{
std::istream_iterator<int>{ss},
std::istream_iterator<int>{}
};
}
int main() {
auto v = parseInts("1,2 , 3 ,,, 4,5,,,");
for(auto i : v)
std::cout << i << '\n';
}
Output:
1
2
3
4
5
You never really asked a question. If you are looking for an elegant method, then I provide that below. If you are asking us to debug the code, then that is a different matter.
First here is a nice utility for splitting a string
std::vector<std::string> split(const std::string& str, char delim) {
std::vector<std::string> strings;
size_t start;
size_t end = 0;
while ((start = str.find_first_not_of(delim, end)) != std::string::npos) {
end = str.find(delim, start);
strings.push_back(str.substr(start, end - start));
}
return strings;
}
First split the string on commas:
std::vector<std::string> strings = split(str, ',');
Then covert each to an int
std::vector<int> ints;
for (auto s : strings)
ints.push_back(std::stoi(s))
I want to split a string by any occurrence of and.
First of all I have to make it clear that I do not intend to use any regex as a delimiter.
I run the following code:
#include <iostream>
#include <regex>
#include <boost/algorithm/string.hpp>
int main()
{
std::vector<std::string> results;
std::string text=
"Alexievich, Svetlana and Lindahl,Tomas and Campbell,William";
boost::split(
results,
text,
boost::is_any_of(" and "),
boost::token_compress_off
);
for(auto result:results)
{
std::cout<<result<<"\n";
}
return 0;
}
and the results are different from what I expect:
Alexievich,
Svetl
Li
hl,Tom
s
C
mpbell,Willi
m
It seems every character in the delimiter acts separately while I need to have the whole and as a delimiter.
Please do not link to this boost example unless you are sure that it will work for my case.
<algorithm> contains search - right tool for this task.
vector<string> results;
const string text{ "Alexievich, Svetlana and Lindahl,Tomas and Campbell,William" };
const string delim{ " and " };
for (auto p = cbegin(text); p != cend(text); ) {
const auto n = search(p, cend(text), cbegin(delim), cend(delim));
results.emplace_back(p, n);
p = n;
if (cend(text) != n) // we found delim, skip over it.
p += delim.length();
}
The old-fashioned way:
#include <iostream>
#include <string>
#include <vector>
int main()
{
std::vector<std::string> results;
std::string text=
"Alexievich, Svetlana and Lindahl,Tomas and Campbell,William";
size_t pos = 0;
for (;;) {
size_t next = text.find("and", pos);
results.push_back(text.substr(pos, next - pos));
if (next == std::string::npos) break;
pos = next + 3;
}
for(auto result:results)
{
std::cout<<result<<"\n";
}
return 0;
}
Packaging into a reusable function is left as an exercise for the reader.
This question already has answers here:
How do I iterate over the words of a string?
(84 answers)
Closed 4 years ago.
If I have a std::string containing a comma-separated list of numbers, what's the simplest way to parse out the numbers and put them in an integer array?
I don't want to generalise this out into parsing anything else. Just a simple string of comma separated integer numbers such as "1,1,1,1,2,1,1,1,0".
Input one number at a time, and check whether the following character is ,. If so, discard it.
#include <vector>
#include <string>
#include <sstream>
#include <iostream>
int main()
{
std::string str = "1,2,3,4,5,6";
std::vector<int> vect;
std::stringstream ss(str);
for (int i; ss >> i;) {
vect.push_back(i);
if (ss.peek() == ',')
ss.ignore();
}
for (std::size_t i = 0; i < vect.size(); i++)
std::cout << vect[i] << std::endl;
}
Something less verbose, std and takes anything separated by a comma.
stringstream ss( "1,1,1,1, or something else ,1,1,1,0" );
vector<string> result;
while( ss.good() )
{
string substr;
getline( ss, substr, ',' );
result.push_back( substr );
}
Yet another, rather different, approach: use a special locale that treats commas as white space:
#include <locale>
#include <vector>
struct csv_reader: std::ctype<char> {
csv_reader(): std::ctype<char>(get_table()) {}
static std::ctype_base::mask const* get_table() {
static std::vector<std::ctype_base::mask> rc(table_size, std::ctype_base::mask());
rc[','] = std::ctype_base::space;
rc['\n'] = std::ctype_base::space;
rc[' '] = std::ctype_base::space;
return &rc[0];
}
};
To use this, you imbue() a stream with a locale that includes this facet. Once you've done that, you can read numbers as if the commas weren't there at all. Just for example, we'll read comma-delimited numbers from input, and write then out one-per line on standard output:
#include <algorithm>
#include <iterator>
#include <iostream>
int main() {
std::cin.imbue(std::locale(std::locale(), new csv_reader()));
std::copy(std::istream_iterator<int>(std::cin),
std::istream_iterator<int>(),
std::ostream_iterator<int>(std::cout, "\n"));
return 0;
}
The C++ String Toolkit Library (Strtk) has the following solution to your problem:
#include <string>
#include <deque>
#include <vector>
#include "strtk.hpp"
int main()
{
std::string int_string = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15";
std::vector<int> int_list;
strtk::parse(int_string,",",int_list);
std::string double_string = "123.456|789.012|345.678|901.234|567.890";
std::deque<double> double_list;
strtk::parse(double_string,"|",double_list);
return 0;
}
More examples can be found Here
Alternative solution using generic algorithms and Boost.Tokenizer:
struct ToInt
{
int operator()(string const &str) { return atoi(str.c_str()); }
};
string values = "1,2,3,4,5,9,8,7,6";
vector<int> ints;
tokenizer<> tok(values);
transform(tok.begin(), tok.end(), back_inserter(ints), ToInt());
Lots of pretty terrible answers here so I'll add mine (including test program):
#include <string>
#include <iostream>
#include <cstddef>
template<typename StringFunction>
void splitString(const std::string &str, char delimiter, StringFunction f) {
std::size_t from = 0;
for (std::size_t i = 0; i < str.size(); ++i) {
if (str[i] == delimiter) {
f(str, from, i);
from = i + 1;
}
}
if (from <= str.size())
f(str, from, str.size());
}
int main(int argc, char* argv[]) {
if (argc != 2)
return 1;
splitString(argv[1], ',', [](const std::string &s, std::size_t from, std::size_t to) {
std::cout << "`" << s.substr(from, to - from) << "`\n";
});
return 0;
}
Nice properties:
No dependencies (e.g. boost)
Not an insane one-liner
Easy to understand (I hope)
Handles spaces perfectly fine
Doesn't allocate splits if you don't want to, e.g. you can process them with a lambda as shown.
Doesn't add characters one at a time - should be fast.
If using C++17 you could change it to use a std::stringview and then it won't do any allocations and should be extremely fast.
Some design choices you may wish to change:
Empty entries are not ignored.
An empty string will call f() once.
Example inputs and outputs:
"" -> {""}
"," -> {"", ""}
"1," -> {"1", ""}
"1" -> {"1"}
" " -> {" "}
"1, 2," -> {"1", " 2", ""}
" ,, " -> {" ", "", " "}
You could also use the following function.
void tokenize(const string& str, vector<string>& tokens, const string& delimiters = ",")
{
// Skip delimiters at beginning.
string::size_type lastPos = str.find_first_not_of(delimiters, 0);
// Find first non-delimiter.
string::size_type pos = str.find_first_of(delimiters, lastPos);
while (string::npos != pos || string::npos != lastPos) {
// Found a token, add it to the vector.
tokens.push_back(str.substr(lastPos, pos - lastPos));
// Skip delimiters.
lastPos = str.find_first_not_of(delimiters, pos);
// Find next non-delimiter.
pos = str.find_first_of(delimiters, lastPos);
}
}
std::string input="1,1,1,1,2,1,1,1,0";
std::vector<long> output;
for(std::string::size_type p0=0,p1=input.find(',');
p1!=std::string::npos || p0!=std::string::npos;
(p0=(p1==std::string::npos)?p1:++p1),p1=input.find(',',p0) )
output.push_back( strtol(input.c_str()+p0,NULL,0) );
It would be a good idea to check for conversion errors in strtol(), of course. Maybe the code may benefit from some other error checks as well.
I'm surprised no one has proposed a solution using std::regex yet:
#include <string>
#include <algorithm>
#include <vector>
#include <regex>
void parse_csint( const std::string& str, std::vector<int>& result ) {
typedef std::regex_iterator<std::string::const_iterator> re_iterator;
typedef re_iterator::value_type re_iterated;
std::regex re("(\\d+)");
re_iterator rit( str.begin(), str.end(), re );
re_iterator rend;
std::transform( rit, rend, std::back_inserter(result),
[]( const re_iterated& it ){ return std::stoi(it[1]); } );
}
This function inserts all integers at the back of the input vector. You can tweak the regular expression to include negative integers, or floating point numbers, etc.
#include <sstream>
#include <vector>
const char *input = "1,1,1,1,2,1,1,1,0";
int main() {
std::stringstream ss(input);
std::vector<int> output;
int i;
while (ss >> i) {
output.push_back(i);
ss.ignore(1);
}
}
Bad input (for instance consecutive separators) will mess this up, but you did say simple.
string exp = "token1 token2 token3";
char delimiter = ' ';
vector<string> str;
string acc = "";
for(int i = 0; i < exp.size(); i++)
{
if(exp[i] == delimiter)
{
str.push_back(acc);
acc = "";
}
else
acc += exp[i];
}
bool GetList (const std::string& src, std::vector<int>& res)
{
using boost::lexical_cast;
using boost::bad_lexical_cast;
bool success = true;
typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
boost::char_separator<char> sepa(",");
tokenizer tokens(src, sepa);
for (tokenizer::iterator tok_iter = tokens.begin();
tok_iter != tokens.end(); ++tok_iter) {
try {
res.push_back(lexical_cast<int>(*tok_iter));
}
catch (bad_lexical_cast &) {
success = false;
}
}
return success;
}
I cannot yet comment (getting started on the site) but added a more generic version of Jerry Coffin's fantastic ctype's derived class to his post.
Thanks Jerry for the super idea.
(Because it must be peer-reviewed, adding it here too temporarily)
struct SeparatorReader: std::ctype<char>
{
template<typename T>
SeparatorReader(const T &seps): std::ctype<char>(get_table(seps), true) {}
template<typename T>
std::ctype_base::mask const *get_table(const T &seps) {
auto &&rc = new std::ctype_base::mask[std::ctype<char>::table_size]();
for(auto &&sep: seps)
rc[static_cast<unsigned char>(sep)] = std::ctype_base::space;
return &rc[0];
}
};
This is the simplest way, which I used a lot. It works for any one-character delimiter.
#include<bits/stdc++.h>
using namespace std;
int main() {
string str;
cin >> str;
int temp;
vector<int> result;
char ch;
stringstream ss(str);
do
{
ss>>temp;
result.push_back(temp);
}while(ss>>ch);
for(int i=0 ; i < result.size() ; i++)
cout<<result[i]<<endl;
return 0;
}
simple structure, easily adaptable, easy maintenance.
std::string stringIn = "my,csv,,is 10233478,separated,by commas";
std::vector<std::string> commaSeparated(1);
int commaCounter = 0;
for (int i=0; i<stringIn.size(); i++) {
if (stringIn[i] == ",") {
commaSeparated.push_back("");
commaCounter++;
} else {
commaSeparated.at(commaCounter) += stringIn[i];
}
}
in the end you will have a vector of strings with every element in the sentence separated by spaces. empty strings are saved as separate items.
Simple Copy/Paste function, based on the boost tokenizer.
void strToIntArray(std::string string, int* array, int array_len) {
boost::tokenizer<> tok(string);
int i = 0;
for(boost::tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
if(i < array_len)
array[i] = atoi(beg->c_str());
i++;
}
void ExplodeString( const std::string& string, const char separator, std::list<int>& result ) {
if( string.size() ) {
std::string::const_iterator last = string.begin();
for( std::string::const_iterator i=string.begin(); i!=string.end(); ++i ) {
if( *i == separator ) {
const std::string str(last,i);
int id = atoi(str.c_str());
result.push_back(id);
last = i;
++ last;
}
}
if( last != string.end() ) result.push_back( atoi(&*last) );
}
}
#include <sstream>
#include <vector>
#include <algorithm>
#include <iterator>
const char *input = ",,29870,1,abc,2,1,1,1,0";
int main()
{
std::stringstream ss(input);
std::vector<int> output;
int i;
while ( !ss.eof() )
{
int c = ss.peek() ;
if ( c < '0' || c > '9' )
{
ss.ignore(1);
continue;
}
if (ss >> i)
{
output.push_back(i);
}
}
std::copy(output.begin(), output.end(), std::ostream_iterator<int> (std::cout, " ") );
return 0;
}
I have a comma separated integers and I want to store them in std::vector<int>. Currently I am manually doing it. Is there any built-in function which did the above functionality?
Edit:
I was in hurry and forget to put full details
Actually i have string (to be exact Unicode string) containing CSvs e.g. "1,2,3,4,5"
Now i want to store them in std::vector<int> so in above case my vector would have five elements pushed into it. Currently i am doing this by manual but its slow as well as there is lot of mess with that code
It's probably not be the most efficient way, but here's a way to do it using the TR1 regex functionality (I also use C++0x lambda syntax in this sample, but obviously it could also be done without that):
#include <iostream>
#include <algorithm>
#include <vector>
#include <regex>
#include <iterator>
#include <cstdlib>
std::vector<int> GetList(const std::wstring &input)
{
std::vector<int> result;
std::wsregex_iterator::regex_type rex(L"(\\d+)(,|$)");
std::wsregex_iterator it(input.begin(), input.end(), rex);
std::transform(it, std::wsregex_iterator(), std::back_inserter(result),
[] (const std::wsregex_iterator::value_type &m)
{ return std::wcstol(m[1].str().c_str(), nullptr, 10); });
return result;
}
You can do this using purely in STL for simplicity (easy to reading, no complex libs needed), which will be fast for coding, but not the fastest in terms of execution speed (though you can probably tweak it a little, like pre-reserving space in the vector:
std::vector<int> GetValues(std::wstring s, wchar_t delim)
{
std::vector<int> v;
std::wstring i;
std::wstringstream ss(s);
while(std::getline(ss,i,delim))
{
std::wstringstream c(i);
int x;
c >> x;
v.push_back(x);
}
return v;
}
(no forwarding(&&) or atoi to keep the code portable).
Sadly, the STLÂ doesn't allow you to split a string on a separator. You can use boost to do it though: (requires a recent C++ compiler such as MSVC 2010 or GCC 4.5)
#include <vector>
#include <string>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>
using namespace std;
int main(int argc, char** argv)
{
string input = "1,2,3,4";
vector<string> strs;
boost::split(strs, input, boost::is_any_of(","));
vector<int> result;
transform(
strs.begin(), strs.end(), back_inserter(result),
[](const string& s) -> int { return boost::lexical_cast<int>(s); }
);
for (auto i = result.begin(); i != result.end(); ++i)
cout << *i << endl;
}
The quick and dirty option is to use the C string library strtok() function, and atoi():
void Split(char * string, std::vector<int>& intVec)
{
char * pNext = strtok(string, ",");
while (pNext != NULL)
{
intVec.push_back(atoi(pNext));
pNext = strtok(NULL, ",");
}
}
Insert your own input data validation as required.
See:
http://www.cplusplus.com/reference/clibrary/cstring/strtok/
http://www.cplusplus.com/reference/clibrary/cstdlib/atoi/
As well as the wide string versions:
http://msdn.microsoft.com/en-us/library/2c8d19sb%28v=vs.71%29.aspx
http://msdn.microsoft.com/en-us/library/aa273408%28v=vs.60%29.aspx
EDIT:
Note that strtok() will modify your original string, so pass a copy if need be.
Try this:
It will read any type (that can be read with >>) separated by any char (that you choose).
Note: After the object is read there should can only be space between the object and the separator. Thus for things like ObjectSepReader<std::string, ','> it will read a word list separated by ','.
This makes it simple to use our standard algorithms:
#include <vector>
#include <sstream>
#include <iostream>
#include <iterator>
#include <algorithm>
int main()
{
std::stringstream data("1,2,3,4,5,6,7,8,9");
std::vector<int> vdata;
// Read the data from a stream
std::copy(std::istream_iterator<ObjectSepReader<int, ','> >(data),
std::istream_iterator<ObjectSepReader<int, ','> >(),
std::back_inserter(vdata)
);
// Copy data to output for testing
std::copy(vdata.begin(), vdata.end(), std::ostream_iterator<int>(std::cout," "));
}
The secret class to make it work.
template<typename T,char S>
struct ObjectSepReader
{
T value;
operator T const&() const {return value;}
};
template<typename T,char S>
std::istream& operator>>(std::istream& stream, ObjectSepReader<T,S>& data)
{
char terminator;
std::string line;
std::getline(stream, line, S);
std::stringstream linestream(line + ':');
if (!(linestream >> data.value >> terminator) || (linestream.tellg() != line.size()+1) || (terminator != ':'))
{ stream.setstate(std::ios::badbit);
}
return stream;
}
Personally I'd make a structure and have the vector contain instances of the struct.
Like so:
struct ExampleStruct
{
int a;
int b;
int c;
};
vector<ExampleStruct> structVec;
How about this?
#include <string>
#include <vector>
#include <functional>
#include <algorithm>
#include <iostream>
struct PickIntFunc
{
PickIntFunc(std::vector<int>& vecInt): _vecInt(vecInt),_pBegin(0){}
char operator () (const char& aChar)
{
if(aChar == ',' || aChar == 0)
{
_vecInt.push_back(atoi(std::string(_pBegin,&aChar).c_str()));
_pBegin = 0;
}
else
{
if(_pBegin == 0)
{
_pBegin = &aChar;
}
}
return aChar;
}
const char* _pBegin;
std::vector<int>& _vecInt;
};
int _tmain(int argc, _TCHAR* argv[])
{
std::vector<int> vecInt;
char intStr[] = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20";
std::for_each(intStr,intStr+sizeof(intStr),PickIntFunc(vecInt));
// Now test it
std::for_each(vecInt.begin(),vecInt.end(), [] (int i) { std::cout << i << std::endl;});
return 0;
}