I have an XML string such as the one below:
< thing TYPE="array" UNITS="meters">1.0,1.3,1.2,1.7,1.4,1.9< /thing>
I'm trying to put each pair of numbers into a std::vector< std::pair< double,double > >. It should look something like this when finished:
< (1.0,1.3), (1.2,1.7), (1.4,1.9) >
I know one way I could do this is search for each comma in the string to find the individual numbers and create a substring, then convert the substring to a double and fill one of the numbers in the pair. However, this seems like an overly complicated way to accomplish this task. Is there a simple way I could do this, maybe by using std::istringstream? Thanks in advance!
What about using getline()?
#include <vector>
#include <sstream>
#include <iostream>
int main()
{
std::vector<std::pair<double, double>> vpss;
std::string str1, str2;
std::istringstream iss("1.0,1.3,1.2,1.7,1.4,1.9");
while ( std::getline(iss, str1, ',') && std::getline(iss, str2, ',') )
{
std::cout << str1 << ", " << str2 << std::endl;
vpss.emplace_back(std::stod(str1), std::stod(str2));
}
return 0;
}
Another solution could be put the comma in a char variable
#include <vector>
#include <sstream>
#include <iostream>
int main()
{
std::vector<std::pair<double, double>> vpss;
char ch;
double d1, d2;
std::istringstream iss("1.0,1.3,1.2,1.7,1.4,1.9");
while ( iss >> d1 >> ch >> d2 )
{
std::cout << d1 << ", " << d2 << std::endl;
vpss.emplace_back(d1, d2);
iss >> ch;
}
return 0;
}
You can use regex to capture the data you want, and std::stringstream to parse it
#include <iostream>
#include <string>
#include <vector>
#include <sstream>
#include <regex>
#include <utility>
using namespace std;
int main()
{
auto xml_str = R"(< thing TYPE="array" UNITS="meters">1.0,1.3,1.2,1.7,1.4,1.9< /thing>)"s;
auto r = regex(R"(>(\d.*\d)<)"s);
auto m = smatch{};
auto vec = vector<pair<double, double>>{};
// Determine if there is a match
auto beg = cbegin(xml_str);
while (regex_search(beg, cend(xml_str), m, r)) {
// Create a string that holds the 1st capture group
auto str = string(m[1].first, m[1].second);
auto ss = stringstream{str};
auto token1 = ""s;
auto token2 = ""s;
auto d1 = double{};
auto d2 = double{};
// Parse
while (getline(ss, token1, ',')) {
getline(ss, token2, ',');
d1 = stod(token1);
d2 = stod(token2);
vec.emplace_back(make_pair(d1, d2));
}
// Set new start position for next search
beg = m[0].second;
}
// Print vector content
auto count = 1;
for (const auto& i : vec) {
if (count == 3) {
cout << "(" << i.first << ", " << i.second << ")\n";
count = 1;
}
else {
cout << "(" << i.first << ", " << i.second << "), ";
count++;
}
}
}
(Compile with -std=c++14)
Output:
(1, 1.3), (1.2, 1.7), (1.4, 1.9)
Live Demo
Related
What would be easiest method to split a string using c++11?
I've seen the method used by this post, but I feel that there ought to be a less verbose way of doing it using the new standard.
Edit: I would like to have a vector<string> as a result and be able to delimitate on a single character.
std::regex_token_iterator performs generic tokenization based on a regex. It may or may not be overkill for doing simple splitting on a single character, but it works and is not too verbose:
std::vector<std::string> split(const string& input, const string& regex) {
// passing -1 as the submatch index parameter performs splitting
std::regex re(regex);
std::sregex_token_iterator
first{input.begin(), input.end(), re, -1},
last;
return {first, last};
}
Here is a (maybe less verbose) way to split string (based on the post you mentioned).
#include <string>
#include <sstream>
#include <vector>
std::vector<std::string> split(const std::string &s, char delim) {
std::stringstream ss(s);
std::string item;
std::vector<std::string> elems;
while (std::getline(ss, item, delim)) {
elems.push_back(item);
// elems.push_back(std::move(item)); // if C++11 (based on comment from #mchiasson)
}
return elems;
}
Here's an example of splitting a string and populating a vector with the extracted elements using boost.
#include <boost/algorithm/string.hpp>
std::string my_input("A,B,EE");
std::vector<std::string> results;
boost::algorithm::split(results, my_input, boost::is_any_of(","));
assert(results[0] == "A");
assert(results[1] == "B");
assert(results[2] == "EE");
Another regex solution inspired by other answers but hopefully shorter and easier to read:
std::string s{"String to split here, and here, and here,..."};
std::regex regex{R"([\s,]+)"}; // split on space and comma
std::sregex_token_iterator it{s.begin(), s.end(), regex, -1};
std::vector<std::string> words{it, {}};
I don't know if this is less verbose, but it might be easier to grok for those more seasoned in dynamic languages such as javascript. The only C++11 features it uses is auto and range-based for loop.
#include <string>
#include <cctype>
#include <iostream>
#include <vector>
using namespace std;
int main()
{
string s = "hello how are you won't you tell me your name";
vector<string> tokens;
string token;
for (const auto& c: s) {
if (!isspace(c))
token += c;
else {
if (token.length()) tokens.push_back(token);
token.clear();
}
}
if (token.length()) tokens.push_back(token);
return 0;
}
#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
using namespace std;
vector<string> split(const string& str, int delimiter(int) = ::isspace){
vector<string> result;
auto e=str.end();
auto i=str.begin();
while(i!=e){
i=find_if_not(i,e, delimiter);
if(i==e) break;
auto j=find_if(i,e, delimiter);
result.push_back(string(i,j));
i=j;
}
return result;
}
int main(){
string line;
getline(cin,line);
vector<string> result = split(line);
for(auto s: result){
cout<<s<<endl;
}
}
My choice is boost::tokenizer but I didn't have any heavy tasks and test with huge data.
Example from boost doc with lambda modification:
#include <iostream>
#include <boost/tokenizer.hpp>
#include <string>
#include <vector>
int main()
{
using namespace std;
using namespace boost;
string s = "This is, a test";
vector<string> v;
tokenizer<> tok(s);
for_each (tok.begin(), tok.end(), [&v](const string & s) { v.push_back(s); } );
// result 4 items: 1)This 2)is 3)a 4)test
return 0;
}
This is my answer. Verbose, readable and efficient.
std::vector<std::string> tokenize(const std::string& s, char c) {
auto end = s.cend();
auto start = end;
std::vector<std::string> v;
for( auto it = s.cbegin(); it != end; ++it ) {
if( *it != c ) {
if( start == end )
start = it;
continue;
}
if( start != end ) {
v.emplace_back(start, it);
start = end;
}
}
if( start != end )
v.emplace_back(start, end);
return v;
}
#include <string>
#include <vector>
#include <sstream>
inline vector<string> split(const string& s) {
vector<string> result;
istringstream iss(s);
for (string w; iss >> w; )
result.push_back(w);
return result;
}
Here is a C++11 solution that uses only std::string::find(). The delimiter can be any number of characters long. Parsed tokens are output via an output iterator, which is typically a std::back_inserter in my code.
I have not tested this with UTF-8, but I expect it should work as long as the input and delimiter are both valid UTF-8 strings.
#include <string>
template<class Iter>
Iter splitStrings(const std::string &s, const std::string &delim, Iter out)
{
if (delim.empty()) {
*out++ = s;
return out;
}
size_t a = 0, b = s.find(delim);
for ( ; b != std::string::npos;
a = b + delim.length(), b = s.find(delim, a))
{
*out++ = std::move(s.substr(a, b - a));
}
*out++ = std::move(s.substr(a, s.length() - a));
return out;
}
Some test cases:
void test()
{
std::vector<std::string> out;
size_t counter;
std::cout << "Empty input:" << std::endl;
out.clear();
splitStrings("", ",", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
std::cout << "Non-empty input, empty delimiter:" << std::endl;
out.clear();
splitStrings("Hello, world!", "", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
std::cout << "Non-empty input, non-empty delimiter"
", no delimiter in string:" << std::endl;
out.clear();
splitStrings("abxycdxyxydefxya", "xyz", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
std::cout << "Non-empty input, non-empty delimiter"
", delimiter exists string:" << std::endl;
out.clear();
splitStrings("abxycdxy!!xydefxya", "xy", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
std::cout << "Non-empty input, non-empty delimiter"
", delimiter exists string"
", input contains blank token:" << std::endl;
out.clear();
splitStrings("abxycdxyxydefxya", "xy", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
std::cout << "Non-empty input, non-empty delimiter"
", delimiter exists string"
", nothing after last delimiter:" << std::endl;
out.clear();
splitStrings("abxycdxyxydefxy", "xy", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
std::cout << "Non-empty input, non-empty delimiter"
", only delimiter exists string:" << std::endl;
out.clear();
splitStrings("xy", "xy", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
}
Expected output:
Empty input:
0:
Non-empty input, empty delimiter:
0: Hello, world!
Non-empty input, non-empty delimiter, no delimiter in string:
0: abxycdxyxydefxya
Non-empty input, non-empty delimiter, delimiter exists string:
0: ab
1: cd
2: !!
3: def
4: a
Non-empty input, non-empty delimiter, delimiter exists string, input contains blank token:
0: ab
1: cd
2:
3: def
4: a
Non-empty input, non-empty delimiter, delimiter exists string, nothing after last delimiter:
0: ab
1: cd
2:
3: def
4:
Non-empty input, non-empty delimiter, only delimiter exists string:
0:
1:
One possible way of doing this is finding all occurrences of the split string and storing locations to a list. Then count input string characters and when you get to a position where there is a 'search hit' in the position list then you jump forward by 'length of the split string'. This approach takes a split string of any length. Here is my tested and working solution.
#include <iostream>
#include <string>
#include <list>
#include <vector>
using namespace std;
vector<string> Split(string input_string, string search_string)
{
list<int> search_hit_list;
vector<string> word_list;
size_t search_position, search_start = 0;
// Find start positions of every substring occurence and store positions to a hit list.
while ( (search_position = input_string.find(search_string, search_start) ) != string::npos) {
search_hit_list.push_back(search_position);
search_start = search_position + search_string.size();
}
// Iterate through hit list and reconstruct substring start and length positions
int character_counter = 0;
int start, length;
for (auto hit_position : search_hit_list) {
// Skip over substrings we are splitting with. This also skips over repeating substrings.
if (character_counter == hit_position) {
character_counter = character_counter + search_string.size();
continue;
}
start = character_counter;
character_counter = hit_position;
length = character_counter - start;
word_list.push_back(input_string.substr(start, length));
character_counter = character_counter + search_string.size();
}
// If the search string is not found in the input string, then return the whole input_string.
if (word_list.size() == 0) {
word_list.push_back(input_string);
return word_list;
}
// The last substring might be still be unprocessed, get it.
if (character_counter < input_string.size()) {
word_list.push_back(input_string.substr(character_counter, input_string.size() - character_counter));
}
return word_list;
}
int main() {
vector<string> word_list;
string search_string = " ";
// search_string = "the";
string text = "thetheThis is some text to test with the split-thethe function.";
word_list = Split(text, search_string);
for (auto item : word_list) {
cout << "'" << item << "'" << endl;
}
cout << endl;
}
What would be easiest method to split a string using c++11?
I've seen the method used by this post, but I feel that there ought to be a less verbose way of doing it using the new standard.
Edit: I would like to have a vector<string> as a result and be able to delimitate on a single character.
std::regex_token_iterator performs generic tokenization based on a regex. It may or may not be overkill for doing simple splitting on a single character, but it works and is not too verbose:
std::vector<std::string> split(const string& input, const string& regex) {
// passing -1 as the submatch index parameter performs splitting
std::regex re(regex);
std::sregex_token_iterator
first{input.begin(), input.end(), re, -1},
last;
return {first, last};
}
Here is a (maybe less verbose) way to split string (based on the post you mentioned).
#include <string>
#include <sstream>
#include <vector>
std::vector<std::string> split(const std::string &s, char delim) {
std::stringstream ss(s);
std::string item;
std::vector<std::string> elems;
while (std::getline(ss, item, delim)) {
elems.push_back(item);
// elems.push_back(std::move(item)); // if C++11 (based on comment from #mchiasson)
}
return elems;
}
Here's an example of splitting a string and populating a vector with the extracted elements using boost.
#include <boost/algorithm/string.hpp>
std::string my_input("A,B,EE");
std::vector<std::string> results;
boost::algorithm::split(results, my_input, boost::is_any_of(","));
assert(results[0] == "A");
assert(results[1] == "B");
assert(results[2] == "EE");
Another regex solution inspired by other answers but hopefully shorter and easier to read:
std::string s{"String to split here, and here, and here,..."};
std::regex regex{R"([\s,]+)"}; // split on space and comma
std::sregex_token_iterator it{s.begin(), s.end(), regex, -1};
std::vector<std::string> words{it, {}};
I don't know if this is less verbose, but it might be easier to grok for those more seasoned in dynamic languages such as javascript. The only C++11 features it uses is auto and range-based for loop.
#include <string>
#include <cctype>
#include <iostream>
#include <vector>
using namespace std;
int main()
{
string s = "hello how are you won't you tell me your name";
vector<string> tokens;
string token;
for (const auto& c: s) {
if (!isspace(c))
token += c;
else {
if (token.length()) tokens.push_back(token);
token.clear();
}
}
if (token.length()) tokens.push_back(token);
return 0;
}
#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
using namespace std;
vector<string> split(const string& str, int delimiter(int) = ::isspace){
vector<string> result;
auto e=str.end();
auto i=str.begin();
while(i!=e){
i=find_if_not(i,e, delimiter);
if(i==e) break;
auto j=find_if(i,e, delimiter);
result.push_back(string(i,j));
i=j;
}
return result;
}
int main(){
string line;
getline(cin,line);
vector<string> result = split(line);
for(auto s: result){
cout<<s<<endl;
}
}
My choice is boost::tokenizer but I didn't have any heavy tasks and test with huge data.
Example from boost doc with lambda modification:
#include <iostream>
#include <boost/tokenizer.hpp>
#include <string>
#include <vector>
int main()
{
using namespace std;
using namespace boost;
string s = "This is, a test";
vector<string> v;
tokenizer<> tok(s);
for_each (tok.begin(), tok.end(), [&v](const string & s) { v.push_back(s); } );
// result 4 items: 1)This 2)is 3)a 4)test
return 0;
}
This is my answer. Verbose, readable and efficient.
std::vector<std::string> tokenize(const std::string& s, char c) {
auto end = s.cend();
auto start = end;
std::vector<std::string> v;
for( auto it = s.cbegin(); it != end; ++it ) {
if( *it != c ) {
if( start == end )
start = it;
continue;
}
if( start != end ) {
v.emplace_back(start, it);
start = end;
}
}
if( start != end )
v.emplace_back(start, end);
return v;
}
#include <string>
#include <vector>
#include <sstream>
inline vector<string> split(const string& s) {
vector<string> result;
istringstream iss(s);
for (string w; iss >> w; )
result.push_back(w);
return result;
}
Here is a C++11 solution that uses only std::string::find(). The delimiter can be any number of characters long. Parsed tokens are output via an output iterator, which is typically a std::back_inserter in my code.
I have not tested this with UTF-8, but I expect it should work as long as the input and delimiter are both valid UTF-8 strings.
#include <string>
template<class Iter>
Iter splitStrings(const std::string &s, const std::string &delim, Iter out)
{
if (delim.empty()) {
*out++ = s;
return out;
}
size_t a = 0, b = s.find(delim);
for ( ; b != std::string::npos;
a = b + delim.length(), b = s.find(delim, a))
{
*out++ = std::move(s.substr(a, b - a));
}
*out++ = std::move(s.substr(a, s.length() - a));
return out;
}
Some test cases:
void test()
{
std::vector<std::string> out;
size_t counter;
std::cout << "Empty input:" << std::endl;
out.clear();
splitStrings("", ",", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
std::cout << "Non-empty input, empty delimiter:" << std::endl;
out.clear();
splitStrings("Hello, world!", "", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
std::cout << "Non-empty input, non-empty delimiter"
", no delimiter in string:" << std::endl;
out.clear();
splitStrings("abxycdxyxydefxya", "xyz", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
std::cout << "Non-empty input, non-empty delimiter"
", delimiter exists string:" << std::endl;
out.clear();
splitStrings("abxycdxy!!xydefxya", "xy", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
std::cout << "Non-empty input, non-empty delimiter"
", delimiter exists string"
", input contains blank token:" << std::endl;
out.clear();
splitStrings("abxycdxyxydefxya", "xy", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
std::cout << "Non-empty input, non-empty delimiter"
", delimiter exists string"
", nothing after last delimiter:" << std::endl;
out.clear();
splitStrings("abxycdxyxydefxy", "xy", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
std::cout << "Non-empty input, non-empty delimiter"
", only delimiter exists string:" << std::endl;
out.clear();
splitStrings("xy", "xy", std::back_inserter(out));
counter = 0;
for (auto i = out.begin(); i != out.end(); ++i, ++counter) {
std::cout << counter << ": " << *i << std::endl;
}
}
Expected output:
Empty input:
0:
Non-empty input, empty delimiter:
0: Hello, world!
Non-empty input, non-empty delimiter, no delimiter in string:
0: abxycdxyxydefxya
Non-empty input, non-empty delimiter, delimiter exists string:
0: ab
1: cd
2: !!
3: def
4: a
Non-empty input, non-empty delimiter, delimiter exists string, input contains blank token:
0: ab
1: cd
2:
3: def
4: a
Non-empty input, non-empty delimiter, delimiter exists string, nothing after last delimiter:
0: ab
1: cd
2:
3: def
4:
Non-empty input, non-empty delimiter, only delimiter exists string:
0:
1:
One possible way of doing this is finding all occurrences of the split string and storing locations to a list. Then count input string characters and when you get to a position where there is a 'search hit' in the position list then you jump forward by 'length of the split string'. This approach takes a split string of any length. Here is my tested and working solution.
#include <iostream>
#include <string>
#include <list>
#include <vector>
using namespace std;
vector<string> Split(string input_string, string search_string)
{
list<int> search_hit_list;
vector<string> word_list;
size_t search_position, search_start = 0;
// Find start positions of every substring occurence and store positions to a hit list.
while ( (search_position = input_string.find(search_string, search_start) ) != string::npos) {
search_hit_list.push_back(search_position);
search_start = search_position + search_string.size();
}
// Iterate through hit list and reconstruct substring start and length positions
int character_counter = 0;
int start, length;
for (auto hit_position : search_hit_list) {
// Skip over substrings we are splitting with. This also skips over repeating substrings.
if (character_counter == hit_position) {
character_counter = character_counter + search_string.size();
continue;
}
start = character_counter;
character_counter = hit_position;
length = character_counter - start;
word_list.push_back(input_string.substr(start, length));
character_counter = character_counter + search_string.size();
}
// If the search string is not found in the input string, then return the whole input_string.
if (word_list.size() == 0) {
word_list.push_back(input_string);
return word_list;
}
// The last substring might be still be unprocessed, get it.
if (character_counter < input_string.size()) {
word_list.push_back(input_string.substr(character_counter, input_string.size() - character_counter));
}
return word_list;
}
int main() {
vector<string> word_list;
string search_string = " ";
// search_string = "the";
string text = "thetheThis is some text to test with the split-thethe function.";
word_list = Split(text, search_string);
for (auto item : word_list) {
cout << "'" << item << "'" << endl;
}
cout << endl;
}
I am completely lost and have been trying for hours to read from a file named "movies.txt" and storing the info from it into arrays, because it has semicolons. Any help? Thanks.
movies.txt:
The Avengers ; 2012 ; 89 ; 623357910.79
Guardians of the Galaxy ; 2014 ; 96 ; 333130696.46
Code:
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
struct Movie {
std::string name;
int year;
int rating;
double earnings;
};
int main()
{
const int MAX_SIZE = 100;
Movie movieList[MAX_SIZE];
std::string line;
int i = 0;
std::ifstream movieFile;
movieFile.open("movies.txt");
while (getline(movieFile, line, ';'))
{
movieFile >> movieList[i].name >> movieList[i].year >> movieList[i].rating >> movieList[i].earnings;
i++;
}
movieFile.close();
std::cout << movieList[0].name << " " << movieList[0].year << " " << movieList[0].rating << " " << movieList[0].earnings << std::endl;
std::cout << movieList[1].name << " " << movieList[1].year << " " << movieList[1].rating << " " << movieList[1].earnings << std::endl;
return 0;
}
What I want is to have:
movieList[0].name = "The Avengers";
movieList[0].year = 2012;
movieList[0].rating = 89;
movieList[0].earnings = 623357910.79;
movieList[1].name = "Guardians of the Galaxy";
movieList[1].year = 2014;
movieList[1].rating = 96;
movieList[1].earnings = 333130696.46;
I amended your code.
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <vector>
struct Movie {
std::string name;
int year;
int rating;
double earnings;
};
std::vector<std::string>
split(const std::string &s, char delim = ',')
{
std::vector<std::string> elems;
std::stringstream ss(s);
std::string item;
while (std::getline(ss, item, delim))
{
elems.push_back(item);
}
return elems;
}
int main()
{
std::vector<Movie> movieList;
std::string line;
std::ifstream movieFile;
movieFile.open("movies.txt");
while (getline(movieFile, line))
{
std::vector<std::string> columns = split(line,';');
Movie movie;
movie.name = columns[0];
movie.year = std::stoi(columns[1]);
movie.rating = std::stoi(columns[2]);
movie.earnings = std::stof(columns[3]);
movieList.push_back(movie);
}
movieFile.close();
for (const Movie & m: movieList)
{
std::cout << m.name << " " << m.year << " " << m.rating << " " << m.earnings << std::endl;
}
return 0;
}
Basicly, I added a split function that splits the lines using ';'. Also I use vector to store the movies rather than hard coded array of movies. Much better this way.
P.S. Second version without vectors
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <vector>
struct Movie {
std::string name;
int year;
int rating;
double earnings;
};
void split(const std::string &s, char delim, std::string elems[])
{
std::stringstream ss(s);
std::string item;
int i = 0;
while (std::getline(ss, item, delim))
{
elems[i++] = item;
}
}
int main()
{
//std::vector<Movie> movieList;
const int MAX_SIZE = 100;
Movie movieList[MAX_SIZE];
int movieNo = 0;
std::string line;
std::ifstream movieFile;
movieFile.open("/home/marcin/testing/movies.txt");
std::string columns[4];
while (getline(movieFile, line))
{
split(line,';', columns);
movieList[movieNo].name = columns[0];
movieList[movieNo].year = std::stoi(columns[1]);
movieList[movieNo].rating = std::stoi(columns[2]);
movieList[movieNo].earnings = std::stof(columns[3]);
++movieNo;
}
movieFile.close();
for (int i =0; i < movieNo; ++i) {
std::cout << movieList[i].name
<< " "
<< movieList[i].year
<< " "
<< movieList[i].rating
<< " "
<< movieList[i].earnings
<< std::endl;
}
return 0;
}
Use getline(my_movieFile, movie_name, ';') to get the name of the movie up to the ;.
You'll need to figure out how to remove the trailing whitespace from the name if necessary.. you can search for examples.
Read the rest of the line using getline(movieFile, line)
Use std::replace to replace all ; with a space in line
Put line into a std::stringstream.
Then extract the remaining fields from the stringstream using the >> operators.
Put this in loop do { ... } while (movieFile);
Also, don't hardcode an arbitrary number of movies. Use a std::vector<Movie> and push_back to add new ones.
I think you want to break your line into tokens using something like std::strtok. Check out the reference here. The example given on that page uses a blank as a separator, you would use a semicolon.
So having a string like remixsettings_bits=1; wysiwyg=1,2,3,abc; remixclosed_tabs=0; remixgroup_closed_tabs=786432; remixlang=0; remixchk=5; remixsid=35d4f9907281708019490d07728c27ca5c10e5de7a869c322222225e3219e; audio_vol=100
I wonder how to parse tham into map name <-> value using boost::spirit and than be capable to write it back using boost::spirit?
Update:
So what I have done:
#include <iostream>
#include <sstream>
#include <string>
#include <map>
//...
std::map<std::string, std::string> user_control::parse_cookie( std::string cookie_data )
{
std::map<std::string, std::string> parsed_cookie;
std::string token, token2;
std::istringstream iss(cookie_data);
while ( getline(iss, token, ' ') )
{
std::string name, val;
std::istringstream iss2(token);
int num = 0 ;
while ( getline(iss2, token2, '=') )
{
if ( num == 0)
{
name = token2;
num++;
}
else
{
val = token2;
std::string::iterator it = val.end() - 1;
if (*it == ';')
val.erase(it);
}
}
std::cout << "name: " << name << " value: " << val << std::endl;
parsed_cookie.insert(std::pair<std::string, std::string>(name, val));
}
return parsed_cookie;
}
but I really wonder how to port my code into boost::spirit code.
This should do the trick, parsing pairs and printing the results using Karma, although we should probably both go read Hartmut's article!
#include <boost/spirit/include/qi.hpp> // Parsing
#include <boost/spirit/include/karma.hpp> // Generation
#include <boost/fusion/adapted/std_pair.hpp> // Make std::pair a fusion vector
int main( int argc, char**argv)
{
using namespace boost::spirit;
std::string str = "keyA=value1; keyB=value2;keyC=value3;";
std::map<std::string,std::string> contents;
std::string::iterator first = str.begin();
std::string::iterator last = str.end();
const bool result = qi::phrase_parse(first,last,
*( *(qi::char_-"=") >> qi::lit("=") >> *(qi::char_-";") >> -qi::lit(";") ),
ascii::space, contents);
assert(result && first==last);
std::cout << karma::format(*(karma::string << '=' <<
karma::string << karma::eol), contents);
}
Have you seen this parser article and this generator article? AFAICT, they explain exactly what you're trying to do.
I have any sequence (or sentence) and i want to extract the last 2 strings.
For example,
sdfsdfds sdfs dfsd fgsd 3 dsfds should produce: 3 dsfds
sdfsd (dfgdg)gfdg fg 6 gg should produce: 6 gg
You can use std::string::find_last_of function to find spaces.
int main()
{
std::string test = "sdfsdfds sdfs dfsd fgsd 3 dsfds";
size_t found1 = test.find_last_of( " " );
if ( found1 != string::npos ) {
size_t found2 = test.find_last_of( " ", found1-1 );
if ( found2 != string::npos )
std::cout << test.substr(found2+1, found1-found2-1) << std::endl;
std::cout << test.substr(found1+1) << std::endl;
}
return 0;
}
The following will work if your strings are whitespace separated.
#include <iostream>
#include <string>
#include <sstream>
#include <vector>
using namespace std;
int main()
{
string str = "jfdf fhfeif shfowejef dhfojfe";
stringstream sstr(str);
vector<string> vstr;
while(sstr >> str)
{
vstr.push_back(str);
}
if (vstr.size() >= 2)
cout << vstr[vstr.size()-2] << ' ';
if (vstr.size())
cout << vstr[vstr.size()-1] << endl;
return 0;
}
Returns the strings in the wrong order, but if that doesn't matter,
std::string s ("some words here");
std::string::size_type j;
for(int i=0; i<2; ++i) {
if((j = s.find_last_of(' ')) == std::string::npos) {
// there aren't two strings, throw, return, or do something else
return 0;
}
std::cout << s.c_str()+j+1;
s = " " + s.substr(0,j);
}
Alternatively,
struct extract_two_words {
friend std::istream& operator>> (std::istream& in , extract_two_words& etw);
std::string word1;
std::string word2;
};
std::istream& operator>> (std::istream& in , extract_two_words& etw) {
std::string str1, str2;
while(in) {
in >> str1;
in >> str2;
}
etw.word2 = str1;
etw.word1 = str2;
}
I would encourage you to have a look at the Boost library. It has algorithms and data structures that help you tremendously. Here's how to solve your problem using Boost.StringAlgo:
#include <boost/algorithm/string/split.hpp>
#include <iostream>
#include <vector>
#include <string>
int main()
{
std::string test = "sdfsdfds sdfs dfsd fgsd 3 dsfds";
std::vector<std::string> v;
boost::algorithm::split(v, test, [](char c) { return c==' ';});
std::cout << "Second to last: " << v.at(v.size()-2) << std::endl;
std::cout << "Last: " << v.at(v.size()-1) << std::endl;
}
I would also encourage you to always use the vector::at method instead of []. This will give you proper error handling.
int main()
{
std::string test = "sdfsdfds sdfs dfsd fgsd 3 dsfds";
size_t pos = test.length();
for (int i=0; i < 2; i++)
pos = test.find_last_of(" ", pos-1);
std::cout << test.substr(pos+1) << std::endl;
}
Simpler :)