Related
What is the right way to split a string into a vector of strings? Delimiter is space or comma.
A convenient way would be boost's string algorithms library.
#include <boost/algorithm/string/classification.hpp> // Include boost::for is_any_of
#include <boost/algorithm/string/split.hpp> // Include for boost::split
// ...
std::vector<std::string> words;
std::string s;
boost::split(words, s, boost::is_any_of(", "), boost::token_compress_on);
For space separated strings, then you can do this:
std::string s = "What is the right way to split a string into a vector of strings";
std::stringstream ss(s);
std::istream_iterator<std::string> begin(ss);
std::istream_iterator<std::string> end;
std::vector<std::string> vstrings(begin, end);
std::copy(vstrings.begin(), vstrings.end(), std::ostream_iterator<std::string>(std::cout, "\n"));
Output:
What
is
the
right
way
to
split
a
string
into
a
vector
of
strings
string that have both comma and space
struct tokens: std::ctype<char>
{
tokens(): std::ctype<char>(get_table()) {}
static std::ctype_base::mask const* get_table()
{
typedef std::ctype<char> cctype;
static const cctype::mask *const_rc= cctype::classic_table();
static cctype::mask rc[cctype::table_size];
std::memcpy(rc, const_rc, cctype::table_size * sizeof(cctype::mask));
rc[','] = std::ctype_base::space;
rc[' '] = std::ctype_base::space;
return &rc[0];
}
};
std::string s = "right way, wrong way, correct way";
std::stringstream ss(s);
ss.imbue(std::locale(std::locale(), new tokens()));
std::istream_iterator<std::string> begin(ss);
std::istream_iterator<std::string> end;
std::vector<std::string> vstrings(begin, end);
std::copy(vstrings.begin(), vstrings.end(), std::ostream_iterator<std::string>(std::cout, "\n"));
Output:
right
way
wrong
way
correct
way
You can use getline with delimiter:
string s, tmp;
stringstream ss(s);
vector<string> words;
while(getline(ss, tmp, ',')){
words.push_back(tmp);
.....
}
vector<string> split(string str, string token){
vector<string>result;
while(str.size()){
int index = str.find(token);
if(index!=string::npos){
result.push_back(str.substr(0,index));
str = str.substr(index+token.size());
if(str.size()==0)result.push_back(str);
}else{
result.push_back(str);
str = "";
}
}
return result;
}
split("1,2,3",",") ==> ["1","2","3"]
split("1,2,",",") ==> ["1","2",""]
split("1token2token3","token") ==> ["1","2","3"]
If the string has both spaces and commas you can use the string class function
found_index = myString.find_first_of(delims_str, begin_index)
in a loop. Checking for != npos and inserting into a vector. If you prefer old school you can also use C's
strtok()
method.
std::vector<std::string> split(std::string text, char delim) {
std::string line;
std::vector<std::string> vec;
std::stringstream ss(text);
while(std::getline(ss, line, delim)) {
vec.push_back(line);
}
return vec;
}
split("String will be split", ' ') -> {"String", "will", "be", "split"}
split("Hello, how are you?", ',') -> {"Hello", "how are you?"}
EDIT: Here's a thing I made, this can use multi-char delimiters, albeit I'm not 100% sure if it always works:
std::vector<std::string> split(std::string text, std::string delim) {
std::vector<std::string> vec;
size_t pos = 0, prevPos = 0;
while (1) {
pos = text.find(delim, prevPos);
if (pos == std::string::npos) {
vec.push_back(text.substr(prevPos));
return vec;
}
vec.push_back(text.substr(prevPos, pos - prevPos));
prevPos = pos + delim.length();
}
}
Tweaked version from Techie Delight:
#include <string>
#include <vector>
std::vector<std::string> split(const std::string& str, char delim) {
std::vector<std::string> strings;
size_t start;
size_t end = 0;
while ((start = str.find_first_not_of(delim, end)) != std::string::npos) {
end = str.find(delim, start);
strings.push_back(str.substr(start, end - start));
}
return strings;
}
i made this custom function that will convert the line to vector
#include <iostream>
#include <vector>
#include <ctime>
#include <string>
using namespace std;
int main(){
string line;
getline(cin, line);
int len = line.length();
vector<string> subArray;
for (int j = 0, k = 0; j < len; j++) {
if (line[j] == ' ') {
string ch = line.substr(k, j - k);
k = j+1;
subArray.push_back(ch);
}
if (j == len - 1) {
string ch = line.substr(k, j - k+1);
subArray.push_back(ch);
}
}
return 0;
}
Here is a modified version of roach's solution that splits based on a string of single character delimiters + supports the option to compress duplicate delimiters.
std::vector<std::string> split(std::string text, std::string delim, bool compress)
{
std::vector<std::string> vec;
size_t pos = 0, prevPos = 0;
while (1)
{
pos = text.find_first_of(delim, prevPos);
while(compress)
{
if( prevPos == pos )
prevPos++;
else
break;
pos = text.find_first_of(delim, prevPos);
}
if (pos == std::string::npos) {
if(prevPos != text.size())
vec.push_back(text.substr(prevPos));
return vec;
}
vec.push_back(text.substr(prevPos, pos - prevPos));
prevPos = pos + 1;
}
}
Example without compress:
std::string s = " 1.2 foo#foo . ";
auto res = split(s, ".# ", false);
for(auto i : res)
std::cout << "string {" << i << "}" << std::endl;
Output:
string {}
string {}
string {1}
string {2}
string {}
string {foo}
string {foo}
string {}
string {}
With compress split(s, ".# ", true);
string {1}
string {2}
string {foo}
string {foo}
Here's a function that will split up a string into a vector but it doesn't include empty strings in the output vector.
vector<string> split(string str, string token) {
vector<string> result;
while (str.size()) {
int index = str.find(token);
string substr;
if ((substr = str.substr(0, index)) == "") {
str = str.substr(index + token.size());
} else if (index != string::npos) {
result.push_back(substr);
str = str.substr(index + token.size());
} else {
result.push_back(str);
str = "";
}
}
return result;
}
Note: The above was adapted from this answer.
Usage
void test() {
string a = "hello : world : ok : fine";
auto r = split(a, " : ", 2);
for (auto e: r) {
cout << e << endl;
}
}
static inline std::vector<std::string> split(const std::string &str, const std::string &delimiter = " ", const int max_elements = 0) {
std::vector<std::string> tokens;
std::string::size_type start_index = 0;
while (true) {
std::string::size_type next_index = str.find(delimiter, start_index);
if (next_index == std::string::npos) {
tokens.push_back(str.substr(start_index));
break;
} else {
tokens.push_back(str.substr(start_index, next_index - start_index));
start_index = next_index + delimiter.length();
}
if (max_elements > 0 && tokens.size() == max_elements - 1) {
tokens.push_back(str.substr(start_index));
break;
}
}
return tokens;
}
Here is my variant that work somelike as explode function in PHP, we provide given string and delimiters list.
std::vector< std::string > explode(const std::string& data, const std::string& delimiters) {
auto is_delim = [&](auto & c) { return delimiters.find(c) != std::string::npos; };
std::vector< std::string > result;
for (std::string::size_type i(0), len(data.length()), pos(0); i <= len; i++) {
if (is_delim(data[i]) || i == len) {
auto tok = data.substr(pos, i - pos);
if ( !tok.empty() )
result.push_back( tok );
pos = i + 1;
}
} return result;
}
example of usage
std::string test_delimiters("hello, there is lots of, delimiters, that may be even together, ");
auto dem_res = explode(test_delimiters, " ,"); // space or comma
for (auto word : dem_res) {
std::cout << word << '\n';
} std::cout << "end\n";
the ouput:
hello
there
is
lots
of
delimiters
that
may
be
even
together
end
I have a long string that I'm iterating through, and at each iteration I compare a section of the string to a constant and store some parts of the string. In my actual code, this code runs millions of times and is the main bottleneck. I think it's due to the excessive use of std::string::substr.
#include <iostream>
#include <map>
#include <string>
#include <vector>
int main() {
std::string str("0=My,1=comma,2=separated,3=string,0=with,3=repeated,7=IDs");
std::vector<std::string> out0;
std::map<std::string, std::string> out;
size_t pos = str.find(',');
// loop over the string, collecting "key=value" pairs
while (pos < str.size() - 1) {
if (str.substr(pos + 1, 2) == "0=") {
auto newPos = str.find(',', pos + 3);
out0.push_back(str.substr(pos + 3, newPos - pos - 3);
pos = newPos;
} else {
size_t eqPos = str.find('=', pos + 1);
auto newPos = str.find(',', eqPos + 1);
out[str.substr(pos + 1, eqPos - pos - 1)] = str.substr(eqPos + 1, newPos - eqPos - 1);
}
}
// print out the data structures (this doesn't happen in my actual code)
std::cout << "out0:";
for (auto& entry : out0) {
std::cout << ' ' << entry;
}
std::cout << std::endl;
std::cout << "out:";
for (auto it : out) {
std::cout << ' ' << it->first << '=' << it->second;
}
}
Here are my questions:
How can I perform comparisons on the string without performing a copy and without writing the comparison for each character, e.g. str[pos + 1] == '0' && str[pos + 2] == '=' && ...?
How can I store references to substrings, instead of making copies every time I add to out0 and out?
This may be a great case for the use of char *, but I've never used it before.
Edit:
Unfortunately, I've only got C++11; otherwise, std::string_view is the best answer. Is there a way to accomplish the storage of references without std::string_view?
If you have C++17, you can use string_view thus: (untested code):
string_view sv{str.data() + pos, 2};
if (sv == "0=") ...
No copies. Or even (all in one go):
if (string_view{str.data() + pos, 2} == "0=") ...
If you don't have string_view, you can use char_traits:
if (std::char_traits<char>::compare(str.data() + pos, "0=", 2) == 0) ...
Since people have posted std::string_view, here is the plain old C pointers version.
(Didn't test though, but it'll give you the idea)
See below:
std::string str("0=My,1=comma,2=separated,3=string,0=with,3=repeated,7=IDs");
std::string substr("test");
.
. Inside some function
.
const char *str_p = str.c_str(); // String you want to compare with a substring
const char *substr_p = substr.c_str(); // Your substring
size_t str_len = str.length();
size_t substr_len = substr.length();
bool comparison_result = true;
for(size_t i = 0; i < str_len - substr_len; i++) {
for(size_t j = 0; j < substr_len; j++) {
if(*(str_p + i + j) != *(substr_p + j)) {
comparison_result = false;
break;
}
if (j == substr_len - 1) { // We can only reach here when substring is hit
comparison_result = true;
i = str_len - substr_len;
break;
}
}
}
return comparison_result;
EDIT:
Due to #Toby Speight's suggestion in the comments (which I find very nice), I'm implementing a std::memcmp() version as well. In that case, the inner loop becomes:
.
. Inside some function
.
const char *str_p = str.c_str(); // String you want to compare with a substring
const char *substr_p = substr.c_str(); // Your substring
size_t str_len = str.length();
size_t substr_len = substr.length();
bool comparison_result = false;
for(size_t i = 0; i < str_len - substr_len; i++) {
if(std::memcmp(str_p + i, substr_p, substr_len) == 0) {
comparison_result = true;
break;
}
}
return comparison_result;
EDIT:
We got another request, this time from #Alexander Zhang, let's implement it:
.
. Inside some function
.
const char *str_p = str.c_str(); // String you want to compare with a substring
const char *substr_p = substr.c_str(); // Your substring
size_t str_len = str.length();
size_t substr_len = substr.length();
bool comparison_result = false;
for(size_t i = 0; i < str_len - substr_len; i++) {
if(std::memcmp(&str_p[i], &substr_p[0], substr_len) == 0) {
comparison_result = true;
break;
}
}
return comparison_result;
Use a std::string_view instead of std::string for the key and value of of out. std::string_view holds a pointer to the string, and a size of the string, so it is very light weight. This lets you extract the information you need, but without having to copy any of the characters in string and any potential memory allocations of creating those strings.
What you'll need to do is get a string_view from the std::string, and then use that string_view to get all of the sub strings you need.
std::string has compare() methods that take a const char* substring as input. You don't need to use std::string::substr() to compare substrings, eg:
#include <iostream>
#include <map>
#include <string>
#include <vector>
int main() {
std::string str("0=My,1=comma,2=separated,3=string,0=with,3=repeated,7=IDs");
std::vector<std::string> out0;
std::map<std::string, std::string> out;
size_t startPos = 0, delimPos, nameStart, nameEnd, valueStart, valueEnd;
// loop over the string, collecting "key=value" pairs
while (startPos < str.size()){
nameStart = startPos;
delimPos = str.find_first_of("=,", startPos, 2);
if (delimPos == std::string::npos) {
nameEnd = valueStart = valueEnd = str.size();
}
else {
nameEnd = delimPos;
if (str[delimPos] == '=') {
valueStart = nameEnd + 1;
valueEnd = str.find(',', valueStart);
if (valueEnd == std::string::npos) {
valueEnd = str.size();
}
}
else {
valueStart = valueEnd = nameEnd;
}
}
// TODO: if needed, adjust name(Start|End) and value(Start|End) to
// ignore leading/trailing whitespace around the name and value
// substrings...
if (str.compare(nameStart, nameEnd - nameStart, "0", 1) == 0) {
out0.push_back(str.substr(valueStart, valueEnd - valueStart));
} else {
out[str.substr(nameStart, nameEnd - nameStart)] = str.substr(valueStart, valueEnd - valueStart);
}
startPos = valueEnd + 1;
}
// print out the data structures
std::cout << "out0:";
for (auto& entry : out0) {
std::cout << ' ' << entry;
}
std::cout << std::endl;
std::cout << "out:";
for (auto it : out) {
std::cout << ' ' << it->first << '=' << it->second;
}
}
Output:
out0: My with
out: 1=comma 2=separated 3=repeated 7=IDs
Live Demo
You could take this a step further to eliminate the use of substr() altogether by not storing std::string values in your std::vector and std::map at all, but rather store std::pair<char*, size_t>:
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <utility>
using StrView = std::pair<const char*, size_t>;
StrView makeStrView(const char *str, size_t size) {
return std::make_pair(str, size);
}
struct compareStrView {
bool operator()(const StrView &lhs, const StrView &rhs) const {
if (lhs.second == rhs.second)
return (std::char_traits<char>::compare(lhs.first, rhs.first, lhs.second) < 0);
return (lhs.second < rhs.second);
}
};
std::ostream& operator<<(std::ostream &os, const StrView &rhs) {
return os.write(rhs.first, rhs.second);
}
int main() {
std::string str("0=My,1=comma,2=separated,3=string,0=with,3=repeated,7=IDs");
std::vector<StrView> out0;
std::map<StrView, StrView, compareStrView> out;
size_t startPos = 0, delimPos, nameStart, nameEnd, valueStart, valueEnd;
// loop over the string, collecting "key=value" pairs
while (startPos < str.size()){
nameStart = startPos;
delimPos = str.find_first_of("=,", startPos, 2);
if (delimPos == std::string::npos) {
nameEnd = valueStart = valueEnd = str.size();
}
else {
nameEnd = delimPos;
if (str[delimPos] == '=') {
valueStart = nameEnd + 1;
valueEnd = str.find(',', valueStart);
if (valueEnd == std::string::npos) {
valueEnd = str.size();
}
}
else {
valueStart = valueEnd = nameEnd;
}
}
// TODO: if needed, adjust nameStart/End and valueStartEnd to
// ignore leading/trailing whitespace around the name and value
// substrings...
if (str.compare(nameStart, nameEnd - nameStart, "0", 1) == 0) {
out0.push_back(makeStrView(&str[valueStart], valueEnd - valueStart));
} else {
out[makeStrView(&str[nameStart], nameEnd - nameStart)] = makeStrView(&str[valueStart], valueEnd - valueStart);
}
startPos = valueEnd + 1;
}
// print out the data structures
std::cout << "out0:";
for (auto& entry : out0) {
std::cout << ' ' << entry;
}
std::cout << std::endl;
std::cout << "out:";
for (auto &it : out) {
std::cout << ' ' << it.first << '=' << it.second;
}
}
Output:
out0: My with
out: 1=comma 2=separated 3=repeated 7=IDs
Live Demo
In C++17, you can use std::string_view instead:
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <string_view>
int main() {
std::string str("0=My,1=comma,2=separated,3=string,0=with,3=repeated,7=IDs");
std::string_view sv(str);
std::vector<std::string_view> out0;
std::map<std::string_view, std::string_view> out;
size_t startPos = 0, delimPos, nameStart, nameEnd, valueStart, valueEnd;
// loop over the string, collecting "key=value" pairs
while (startPos < sv.size()){
nameStart = startPos;
delimPos = sv.find_first_of("=,", startPos, 2);
if (delimPos == std::string_view::npos) {
nameEnd = valueStart = valueEnd = sv.size();
}
else {
nameEnd = delimPos;
if (sv[delimPos] == '=') {
valueStart = nameEnd + 1;
valueEnd = sv.find(',', valueStart);
if (valueEnd == std::string_view::npos) {
valueEnd = sv.size();
}
}
else {
valueStart = valueEnd = nameEnd;
}
}
// TODO: if needed, adjust nameStart/End and valueStartEnd to
// ignore leading/trailing whitespace around the name and value
// substrings...
if (sv.compare(nameStart, nameEnd - nameStart, "0", 1) == 0) {
out0.push_back(sv.substr(valueStart, valueEnd - valueStart));
} else {
out[sv.substr(nameStart, nameEnd - nameStart)] = sv.substr(valueStart, valueEnd - valueStart);
}
startPos = valueEnd + 1;
}
// print out the data structures
std::cout << "out0:";
for (auto& entry : out0) {
std::cout << ' ' << entry;
}
std::cout << std::endl;
std::cout << "out:";
for (auto &it : out) {
std::cout << ' ' << it.first << '=' << it.second;
}
}
You can try to use Regex to split the value pair tuples.
Although haven't tested if any faster
This expression should do the trick, just get all the match (all the pairs)
(?:(\d)+=(?:([^,]*),?))*?
https://regex101.com/r/PDZMq0/1
So basically I want to create a format function that accepts a string, and replaces words in that string with whatever the user wants to be replaced. At first I had some issues with non-deferencable iterators until I realized that when you change the the size of a string you can invalid any iterators. It doesn't throw anymore exceptions now now the output is the same as the input. Any advice???
string& formatFn(string& s, string& oldWord, string& newWord)
{
string word = "";
for (auto iter1 = s.begin(); iter1 != s.end(); ++iter1)
{
string tmpWord = "";
if (!isblank(*iter1)) // Testing for whitespace
{
tmpWord += *iter1;
if (tmpWord == oldWord)
{
string::iterator beg = iter1 - word.size();
string::iterator end = iter1;
auto sIter = s.erase(beg, end); // Get the position returned by erase
auto i = sIter - s.begin(); // Get an index
s = s.insert(s[i], newWord);
}
}
if (isblank(*iter1))
{
tmpWord.clear();
}
}
return s;
}
If you already use string why don`t use all methods?
for (auto it = text.find(o_text); it != string::npos; it = text.find(o_text)){
text.replace(it, o_text.size(), n_text);
}
string::iterator beg = iter1 - word.size();
I'm not sure what word actually does. You are trying to delete oldWord, right? Then it should be:
string::iterator beg = iter1 - oldWord.size();
EDIT : This is an improved version of your code:
string formatFn(const string& s, const string& oldWord, const string& newWord) {
string result = ""; // holds the string we want to return
string word = ""; // while iterating over 's', holds the current word
for (auto iter1 = s.begin(); iter1 != s.end(); ++iter1) {
if (!isblank(*iter1))
word += *iter1;
else { // if it is a whitespace, it must be the end of some word
// if 'word' is not same as 'oldword', just append it
// otherwise append 'newWord' instead
if (word == oldWord)
result += newWord;
else
result += word;
result += *iter1;
word.clear(); // reset 'word' to hold the next word in s
}
}
// the end of the string might not end with a whitespace, so the last word
// might be skipped if you don't make this test
if (word == oldWord)
result += newWord;
else
result += word;
return result;
}
You are over-complicating it:
std::string replace_all(std::string s, const std::string& sOld, const std::string& sNew)
{
std::size_t p = s.find(sOld);
while (p != std::string::npos)
{
s.replace(p, sOld.length(), sNew);
p = s.find(sOld, p + sNew.length());
}
return s;
}
If you are looking to replace whole words only (which your current attempt will not do):
#include <iostream>
#include <string>
bool test(const std::string& s, const std::string& sOld, std::size_t pos)
{
return (pos == 0 || !::isalpha(s[pos - 1])) && (!::isalpha(s[pos + sOld.length()]) || pos + sOld.length() >= s.length());
}
std::size_t find_word(const std::string& s, const std::string& sOld, std::size_t pos)
{
pos = s.find(sOld, pos);
while (pos != std::string::npos && (!test(s, sOld, pos) && pos < s.length()))
{
pos++;
pos = s.find(sOld, pos);
}
return pos;
}
std::string replace_all(std::string s, const std::string& sOld, const std::string& sNew)
{
std::size_t p = find_word(s, sOld, 0);
while (p != std::string::npos && p < s.length())
{
s.replace(p, sOld.length(), sNew);
p = find_word(s, sOld, p + sNew.length());
}
return s;
}
int main()
{
std::string sOrig = "eat Heat eat beat sweat cheat eat";
std::string sOld = "eat";
std::string sNew = "ate";
std::string sResult = replace_all(sOrig, sOld, sNew);
std::cout << "Result: " << sResult << std::endl;
// Output: "ate Heat ate beat sweat cheat ate"
return 0;
}
There is a very useful function in Python called strip(). Any similar ones in C++?
I use this:
#include <string>
#include <cctype>
std::string strip(const std::string &inpt)
{
auto start_it = inpt.begin();
auto end_it = inpt.rbegin();
while (std::isspace(*start_it))
++start_it;
while (std::isspace(*end_it))
++end_it;
return std::string(start_it, end_it.base());
}
There's nothing built-in; I used to use something like the following:
template <std::ctype_base::mask mask>
class IsNot
{
std::locale myLocale; // To ensure lifetime of facet...
std::ctype<char> const* myCType;
public:
IsNot( std::locale const& l = std::locale() )
: myLocale( l )
, myCType( &std::use_facet<std::ctype<char> >( l ) )
{
}
bool operator()( char ch ) const
{
return ! myCType->is( mask, ch );
}
};
typedef IsNot<std::ctype_base::space> IsNotSpace;
std::string
trim( std::string const& original )
{
std::string::const_iterator right = std::find_if( original.rbegin(), original.rend(), IsNotSpace() ).base();
std::string::const_iterator left = std::find_if(original.begin(), right, IsNotSpace() );
return std::string( left, right );
}
which works pretty well. (I now have a significantly more complex
version which handles UTF-8 correctly.)
void strip(std::string &str)
{
if (str.length() != 0)
{
auto w = std::string(" ") ;
auto n = std::string("\n") ;
auto r = std::string("\t") ;
auto t = std::string("\r") ;
auto v = std::string(1 ,str.front());
while((v == w) || (v==t) || (v==r) || (v==n))
{
str.erase(str.begin());
v = std::string(1 ,str.front());
}
v = std::string(1 , str.back());
while((v ==w) || (v==t) || (v==r) || (v==n))
{
str.erase(str.end() - 1 );
v = std::string(1 , str.back());
}
}
This is on top of the answer provided by Ferdi Kedef to make it safer.
void strip(std::string& str)
{
if (str.length() == 0) {
return;
}
auto start_it = str.begin();
auto end_it = str.rbegin();
while (std::isspace(*start_it)) {
++start_it;
if (start_it == str.end()) break;
}
while (std::isspace(*end_it)) {
++end_it;
if (end_it == str.rend()) break;
}
int start_pos = start_it - str.begin();
int end_pos = end_it.base() - str.begin();
str = start_pos <= end_pos ? std::string(start_it, end_it.base()) : "";
}
What is the right way to split a string into a vector of strings? Delimiter is space or comma.
A convenient way would be boost's string algorithms library.
#include <boost/algorithm/string/classification.hpp> // Include boost::for is_any_of
#include <boost/algorithm/string/split.hpp> // Include for boost::split
// ...
std::vector<std::string> words;
std::string s;
boost::split(words, s, boost::is_any_of(", "), boost::token_compress_on);
For space separated strings, then you can do this:
std::string s = "What is the right way to split a string into a vector of strings";
std::stringstream ss(s);
std::istream_iterator<std::string> begin(ss);
std::istream_iterator<std::string> end;
std::vector<std::string> vstrings(begin, end);
std::copy(vstrings.begin(), vstrings.end(), std::ostream_iterator<std::string>(std::cout, "\n"));
Output:
What
is
the
right
way
to
split
a
string
into
a
vector
of
strings
string that have both comma and space
struct tokens: std::ctype<char>
{
tokens(): std::ctype<char>(get_table()) {}
static std::ctype_base::mask const* get_table()
{
typedef std::ctype<char> cctype;
static const cctype::mask *const_rc= cctype::classic_table();
static cctype::mask rc[cctype::table_size];
std::memcpy(rc, const_rc, cctype::table_size * sizeof(cctype::mask));
rc[','] = std::ctype_base::space;
rc[' '] = std::ctype_base::space;
return &rc[0];
}
};
std::string s = "right way, wrong way, correct way";
std::stringstream ss(s);
ss.imbue(std::locale(std::locale(), new tokens()));
std::istream_iterator<std::string> begin(ss);
std::istream_iterator<std::string> end;
std::vector<std::string> vstrings(begin, end);
std::copy(vstrings.begin(), vstrings.end(), std::ostream_iterator<std::string>(std::cout, "\n"));
Output:
right
way
wrong
way
correct
way
You can use getline with delimiter:
string s, tmp;
stringstream ss(s);
vector<string> words;
while(getline(ss, tmp, ',')){
words.push_back(tmp);
.....
}
vector<string> split(string str, string token){
vector<string>result;
while(str.size()){
int index = str.find(token);
if(index!=string::npos){
result.push_back(str.substr(0,index));
str = str.substr(index+token.size());
if(str.size()==0)result.push_back(str);
}else{
result.push_back(str);
str = "";
}
}
return result;
}
split("1,2,3",",") ==> ["1","2","3"]
split("1,2,",",") ==> ["1","2",""]
split("1token2token3","token") ==> ["1","2","3"]
If the string has both spaces and commas you can use the string class function
found_index = myString.find_first_of(delims_str, begin_index)
in a loop. Checking for != npos and inserting into a vector. If you prefer old school you can also use C's
strtok()
method.
std::vector<std::string> split(std::string text, char delim) {
std::string line;
std::vector<std::string> vec;
std::stringstream ss(text);
while(std::getline(ss, line, delim)) {
vec.push_back(line);
}
return vec;
}
split("String will be split", ' ') -> {"String", "will", "be", "split"}
split("Hello, how are you?", ',') -> {"Hello", "how are you?"}
EDIT: Here's a thing I made, this can use multi-char delimiters, albeit I'm not 100% sure if it always works:
std::vector<std::string> split(std::string text, std::string delim) {
std::vector<std::string> vec;
size_t pos = 0, prevPos = 0;
while (1) {
pos = text.find(delim, prevPos);
if (pos == std::string::npos) {
vec.push_back(text.substr(prevPos));
return vec;
}
vec.push_back(text.substr(prevPos, pos - prevPos));
prevPos = pos + delim.length();
}
}
Tweaked version from Techie Delight:
#include <string>
#include <vector>
std::vector<std::string> split(const std::string& str, char delim) {
std::vector<std::string> strings;
size_t start;
size_t end = 0;
while ((start = str.find_first_not_of(delim, end)) != std::string::npos) {
end = str.find(delim, start);
strings.push_back(str.substr(start, end - start));
}
return strings;
}
i made this custom function that will convert the line to vector
#include <iostream>
#include <vector>
#include <ctime>
#include <string>
using namespace std;
int main(){
string line;
getline(cin, line);
int len = line.length();
vector<string> subArray;
for (int j = 0, k = 0; j < len; j++) {
if (line[j] == ' ') {
string ch = line.substr(k, j - k);
k = j+1;
subArray.push_back(ch);
}
if (j == len - 1) {
string ch = line.substr(k, j - k+1);
subArray.push_back(ch);
}
}
return 0;
}
Here is a modified version of roach's solution that splits based on a string of single character delimiters + supports the option to compress duplicate delimiters.
std::vector<std::string> split(std::string text, std::string delim, bool compress)
{
std::vector<std::string> vec;
size_t pos = 0, prevPos = 0;
while (1)
{
pos = text.find_first_of(delim, prevPos);
while(compress)
{
if( prevPos == pos )
prevPos++;
else
break;
pos = text.find_first_of(delim, prevPos);
}
if (pos == std::string::npos) {
if(prevPos != text.size())
vec.push_back(text.substr(prevPos));
return vec;
}
vec.push_back(text.substr(prevPos, pos - prevPos));
prevPos = pos + 1;
}
}
Example without compress:
std::string s = " 1.2 foo#foo . ";
auto res = split(s, ".# ", false);
for(auto i : res)
std::cout << "string {" << i << "}" << std::endl;
Output:
string {}
string {}
string {1}
string {2}
string {}
string {foo}
string {foo}
string {}
string {}
With compress split(s, ".# ", true);
string {1}
string {2}
string {foo}
string {foo}
Here's a function that will split up a string into a vector but it doesn't include empty strings in the output vector.
vector<string> split(string str, string token) {
vector<string> result;
while (str.size()) {
int index = str.find(token);
string substr;
if ((substr = str.substr(0, index)) == "") {
str = str.substr(index + token.size());
} else if (index != string::npos) {
result.push_back(substr);
str = str.substr(index + token.size());
} else {
result.push_back(str);
str = "";
}
}
return result;
}
Note: The above was adapted from this answer.
Usage
void test() {
string a = "hello : world : ok : fine";
auto r = split(a, " : ", 2);
for (auto e: r) {
cout << e << endl;
}
}
static inline std::vector<std::string> split(const std::string &str, const std::string &delimiter = " ", const int max_elements = 0) {
std::vector<std::string> tokens;
std::string::size_type start_index = 0;
while (true) {
std::string::size_type next_index = str.find(delimiter, start_index);
if (next_index == std::string::npos) {
tokens.push_back(str.substr(start_index));
break;
} else {
tokens.push_back(str.substr(start_index, next_index - start_index));
start_index = next_index + delimiter.length();
}
if (max_elements > 0 && tokens.size() == max_elements - 1) {
tokens.push_back(str.substr(start_index));
break;
}
}
return tokens;
}
Here is my variant that work somelike as explode function in PHP, we provide given string and delimiters list.
std::vector< std::string > explode(const std::string& data, const std::string& delimiters) {
auto is_delim = [&](auto & c) { return delimiters.find(c) != std::string::npos; };
std::vector< std::string > result;
for (std::string::size_type i(0), len(data.length()), pos(0); i <= len; i++) {
if (is_delim(data[i]) || i == len) {
auto tok = data.substr(pos, i - pos);
if ( !tok.empty() )
result.push_back( tok );
pos = i + 1;
}
} return result;
}
example of usage
std::string test_delimiters("hello, there is lots of, delimiters, that may be even together, ");
auto dem_res = explode(test_delimiters, " ,"); // space or comma
for (auto word : dem_res) {
std::cout << word << '\n';
} std::cout << "end\n";
the ouput:
hello
there
is
lots
of
delimiters
that
may
be
even
together
end