Counting the tokens when you tokenize the string in C++? - c++

Java has this easy method to count the tokens that you tokenize:
import java.util.*;
public class Program
{
public static void main(String[] args)
{
String str =
"This is/some text/that I am/parsing/using StringTokenizer/.";
StringTokenizer strTok =
new StringTokenizer(str, "/", false);
System.out.println("Count...");
System.out.println(strTok.countTokens());
}
}
Output:Count...6
Is there any easy way to do in C++?

You could use std::istringstreamclass along with function std::getline. For example
#include <iostream>
#include <sstream>
#include <string>
int main()
{
char s[] = "This is/some text/that I am/parsing/using StringTokenizer/.";
std::istringstream is( s );
size_t count = 0;
std::string line;
while ( std::getline( is, line, '/' ) ) ++count;
std::cout << "There are " << count << " tokens" << std::endl;
}
The output is
There are 6 tokens
Or
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
int main()
{
char s[] = "This is/some text/that I am/parsing/using StringTokenizer/.";
std::istringstream is( s );
std::vector<std::string> v;
std::string line;
while ( std::getline( is, line, '/' ) ) v.push_back( line );
std::cout << "There are " << v.size() << " tokens" << std::endl;
}
To build again the string from the vector you could use for example the following code
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
int main()
{
char s[] = "This is/some text/that I am/parsing/using StringTokenizer/.";
std::istringstream is( s );
std::vector<std::string> v;
std::string line;
while ( std::getline( is, line, '/' ) ) v.push_back( line );
std::cout << "There are " << v.size() << " tokens" << std::endl;
std::string s1;
bool first = true;
for ( const std::string &t : v )
{
if ( first ) first = false;
else s1 += '/';
s1 += t;
}
std::cout << s1 << std::endl;
}
Or you could use standard algorithm std::replace declared in header <algorithm> to replace one delimeter to another in the original string.
If your compiler does not support the range based for loop then you can write instead
for ( std::vector<std::string>::size_type i = 0; i < v.size(); i++ )
{
if ( i != 0 ) s1 += '/';
s1 += v[i];
}

You could try this:
std::vector<std::string> v(std::istream_iterator<std::string>(std::cin), {});
std::cout << "Count..." << v.size() << "\n";
This will of course tokenize at spaces, not at arbitrary separators. To split on arbitary separators, we need std::getline, but now we don't have an easy istream_iterator. Thankfully, this is a solved problem. So we write:
#include <iostream>
#include <iterator>
#include <string>
#include <vector>
namespace detail
{
template <char Sep = '\n'>
class Line : public std::string
{
friend std::istream & operator>>(std::istream & is, Line & line)
{
return std::getline(is, line, Sep);
}
};
}
int main()
{
std::vector<std::string> v(std::istream_iterator<detail::Line<'/'>>(std::cin), {});
std::cout << "Count..." << v.size() << "\n";
for (auto const & s : v) std::cout << s << "\n";
}
If you want to tokenize an existing string rather than the standard input, use a string stream, i.e. replace std::cin with iss, where we have:
#include <sstream>
std::istringstream iss(my_input_string);

Related

How to skip blank spaces when reading in a file c++

Here is the codeshare link of the exact input file: https://codeshare.io/5DBkgY
Ok, as you can see, ​there are 2 blank lines, (or tabs) between 8 and ROD. How would I skip that and continue with the program? I am trying to put each line into 3 vectors (so keys, lamp, and rod into one vector etc). Here is my code (but it does not skip the blank line).:
#include <string>
#include <iostream>
#include <sstream>
#include <vector>
#include <fstream>
using namespace std;
int main() {
ifstream objFile;
string inputName;
string outputName;
string header;
cout << "Enter image file name: ";
cin >> inputName;
objFile.open(inputName);
string name;
vector<string> name2;
string description;
vector<string> description2;
string initialLocation;
vector<string> initialLocation2;
string line;
if(objFile) {
while(!objFile.eof()){
getline(objFile, line);
name = line;
name2.push_back(name);
getline(objFile, line);
description = line;
description2.push_back(description);
getline(objFile, line);
initialLocation = line;
initialLocation2.push_back(initialLocation);
} else {
cout << "not working" << endl;
}
for (std::vector<string>::const_iterator i = name2.begin(); i != name2.end(); ++i)
std::cout << *i << ' ';
for (std::vector<string>::const_iterator i = description2.begin(); i != description2.end(); ++i)
std::cout << *i << ' ';
for (std::vector<string>::const_iterator i = initialLocation2.begin(); i != initialLocation2.end(); ++i)
std::cout << *i << ' ';
#include <cstddef> // std::size_t
#include <cctype> // std::isspace()
#include <string>
#include <vector>
#include <fstream>
#include <iostream>
bool is_empty(std::string const &str)
{
for (auto const &ch : str)
if (!std::isspace(static_cast<char unsigned>(ch)))
return false;
return true;
}
int main()
{
std::cout << "Enter image file name: ";
std::string filename;
std::getline(std::cin, filename); // at least on Windows paths containing whitespace
// are valid.
std::ifstream obj_file{ filename }; // define variables as close to where they're used
// as possible and use the ctors for initialization.
if (!obj_file.is_open()) { // *)
std::cerr << "Couldn't open \"" << filename << "\" for reading :(\n\n";
return EXIT_FAILURE;
}
std::vector<std::string> name;
std::vector<std::string> description;
std::vector<std::string> initial_location;
std::string line;
std::vector<std::string> *destinations[] = { &name, &description, &initial_location };
for (std::size_t i{}; std::getline(obj_file, line); ++i) {
if (is_empty(line)) { // if line only consists of whitespace
--i;
continue; // skip it.
}
destinations[i % std::size(destinations)]->push_back(line);
}
for (auto const &s : name)
std::cout << s << '\n';
for (auto const &s : description)
std::cout << s << '\n';
for (auto const &s : initial_location)
std::cout << s << '\n';
}
... initial_locations look like integers, though.
*) Better early exit if something bad happens. Instead of
if (obj_file) {
// do stuff
}
else {
// exit
}
-->
if(!obj_file)
// exit
// do stuff
makes your code easier to read and takes away one level of indentation for the most parts.

C++: why is vector implementation not yielding comma-delimited values?

I have a database.txt file with comma-separated values:
Name,ID,Year,Gender
I would like to extract each of these elements.
I've started with this code (I've already looked at all the other similar questions and implemented what they've suggested), but it's not printing each piece:
// reading a text file
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <sstream>
using namespace std;
int main () {
string line;
ifstream myfile ("database.txt");
if (myfile.is_open())
{
while ( getline (myfile,line) )
{
std::string str = line;
std::vector<int> vect;
std::stringstream ss(str);
int i;
while (ss >> i)
{
vect.push_back(i);
if (ss.peek() == ',')
ss.ignore();
}
for (i=0; i< vect.size(); i++)
std::cout << vect.at(i)<<std::endl;
//cout << line << '\n';
}
myfile.close();
}
else cout << "Unable to open file";
return 0;
}
How can I modify it to be able to extract each value: name, ID, year, and gender? What am I doing wrong?
Use this function to split each line:
vector<string> split(const string &s, char delim) {
stringstream ss(s);
string item;
vector<string> tokens;
while (getline(ss, item, delim)) {
tokens.push_back(item);
}
return tokens;
}
and your code be like:
// reading a text file
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <sstream>
using namespace std;
int main () {
string line;
ifstream myfile ("database.txt");
if (myfile.is_open())
{
while ( getline (myfile,line) )
{
std::string str = line;
std::vector<string> vect;
vect = split(str, ',') ;
for (int i=0; i< vect.size(); i++)
std::cout << vect.at(i)<<std::endl;
//cout << line << '\n';
}
myfile.close();
}
else cout << "Unable to open file";
return 0;
}
With the help of a utility function and a data structure you can simplify this quite easily.
#include <iostream>
#include <fstream>
#include <vector>
#include <algorithm>
std::vector<std::string> splitString( const std::string& stringToSplit, const std::string& delimiter, const bool keepEmpty ) {
std::vector<std::string> results;
if ( delimiter.empty() {
results.push_back( stringToSplit );
return results;
}
std::string::const_iterator itSubStrStart = stringToSplit.begin(), itSubStrEnd;
while( true ) {
itSubStrEnd = std::search( itSubStrStart, stringToSplit.end(), delimiter.begin(), delimiter.end() );
std::string temp( itSubStrStart, itSubStrEnd );
if ( keepEmpty || !temp.empty() )
results.push_back( temp );
if ( itSubStrEnd == stringToSplit.end() )
break;
itSubStrStart = itSubStrEnd + delimiter.size();
}
return results;
}
struct DataEntry {
std::string name;
unsigned id;
unsigned year;
std::string gender;
};
int main() {
std::string line;
std::fstream file;
file.open( "database.txt" );
std::vector<DataEntry> entries;
std::vector<std::string> elements;
while( file >> line ) {
elements = splitString( line, "," );
DataEntry entry;
entry.name = elements[0];
entry.id = std::stoul( elements[1] );
entry.year = std::stoul( elements[2] );
entry.gender = elements[3];
entries.push_back( entry );
}
file.close();
for ( auto& e : entries ) {
std::cout << e.name << " " << e.id << " "
<< e.year << " " << e.gender << '\n';
}
std::cout << "\nPress any key and enter to quit.\n";
std::cin.get();
return 0;
}
database.txt
John,12345,2010,M
Jane,54321,2012,F
output
John 12345 2010 M
Jane 54321 2012 F
This makes life it a lot easier just by reading in a single line first; then parsing that line of text and from there doing what you will with that data; either storing it to a struct, printing it, manipulating it etc.
Edit
You need to be aware of the fact that when reading in lines of text, and parsing them if you have something like this in your text file:
John Doe,12345,2010,M
It will not give you what you would expect. I'll leave that for you to figure out.

How to read a integer value from a string in C++? [duplicate]

I realize that this question may have been asked several times in the past, but I am going to continue regardless.
I have a program that is going to get a string of numbers from keyboard input. The numbers will always be in the form "66 33 9" Essentially, every number is separated with a space, and the user input will always contain a different amount of numbers.
I'm aware that using 'sscanf' would work if the amount of numbers in every user-entered string was constant, but this is not the case for me. Also, because I'm new to C++, I'd prefer dealing with 'string' variables rather than arrays of chars.
I assume you want to read an entire line, and parse that as input. So, first grab the line:
std::string input;
std::getline(std::cin, input);
Now put that in a stringstream:
std::stringstream stream(input);
and parse
while(1) {
int n;
stream >> n;
if(!stream)
break;
std::cout << "Found integer: " << n << "\n";
}
Remember to include
#include <string>
#include <sstream>
The C++ String Toolkit Library (Strtk) has the following solution to your problem:
#include <iostream>
#include <string>
#include <deque>
#include <algorithm>
#include <iterator>
#include "strtk.hpp"
int main()
{
std::string s = "1 23 456 7890";
std::deque<int> int_list;
strtk::parse(s," ",int_list);
std::copy(int_list.begin(),
int_list.end(),
std::ostream_iterator<int>(std::cout,"\t"));
return 0;
}
More examples can be found Here
#include <string>
#include <vector>
#include <iterator>
#include <sstream>
#include <iostream>
int main() {
std::string input;
while ( std::getline( std::cin, input ) )
{
std::vector<int> inputs;
std::istringstream in( input );
std::copy( std::istream_iterator<int>( in ), std::istream_iterator<int>(),
std::back_inserter( inputs ) );
// Log process:
std::cout << "Read " << inputs.size() << " integers from string '"
<< input << "'" << std::endl;
std::cout << "\tvalues: ";
std::copy( inputs.begin(), inputs.end(),
std::ostream_iterator<int>( std::cout, " " ) );
std::cout << std::endl;
}
}
#include <string>
#include <vector>
#include <sstream>
#include <iostream>
using namespace std;
int ReadNumbers( const string & s, vector <int> & v ) {
istringstream is( s );
int n;
while( is >> n ) {
v.push_back( n );
}
return v.size();
}
int main() {
string s;
vector <int> v;
getline( cin, s );
ReadNumbers( s, v );
for ( int i = 0; i < v.size(); i++ ) {
cout << "number is " << v[i] << endl;
}
}
// get string
std::string input_str;
std::getline( std::cin, input_str );
// convert to a stream
std::stringstream in( input_str );
// convert to vector of ints
std::vector<int> ints;
copy( std::istream_iterator<int, char>(in), std::istream_iterator<int, char>(), back_inserter( ints ) );
Here is how to split your string into strings along the spaces. Then you can process them one-by-one.
Generic solution for unsigned values (dealing with prefix '-' takes an extra bool):
template<typename InIter, typename OutIter>
void ConvertNumbers(InIter begin, InIter end, OutIter out)
{
typename OutIter::value_type accum = 0;
for(; begin != end; ++begin)
{
typename InIter::value_type c = *begin;
if (c==' ') {
*out++ = accum; accum = 0; break;
} else if (c>='0' && c <='9') {
accum *= 10; accum += c-'0';
}
}
*out++ = accum;
// Dealing with the last number is slightly complicated because it
// could be considered wrong for "1 2 " (produces 1 2 0) but that's similar
// to "1 2" which produces 1 0 2. For either case, determine if that worries
// you. If so: Add an extra bool for state, which is set by the first digit,
// reset by space, and tested before doing *out++=accum.
}
Try strtoken to separate the string first, then you will deal with each string.

Convert separate digits into one whole number C++

I need a little more help. I have managed to convert all my chars input from a text file into digits.
Example:
Input from file:
$1,9,56#%34,9
!4.23#$4,983
Output:
1956
349
423
4983
Now, I need to take those individual digits the 1 9 5 6 and make it read as a whole number. The output would look the same but they would actually be whole numbers. Make sense? I have to do this in my outer loop. It also has to be an EOF loop. So, I know I need to take the first digit and multiply it by 10 and add the next digit then multiply all that by 10 until I reach the last number. How can I write that in an efficient non-crashing way?
The input.txt file has the input stated above.
This is what I have so far...
Any help is greatly appreciated
/*
*/
//Character Processing Algorithm
#include <fstream>
#include <iostream>
#include <cctype>
using namespace std;
char const nwln = '\n';
int main ()
{
ifstream data;
ofstream out;
char ch;
char lastch;
int sum;
data.open ("lincoln.txt"); //file for input
if (!data)
{
cout << "Error!!! Failure to Open lincoln.txt" << endl;
system ("pause");
return 1;
}
out.open ("out.txt"); //file for output
if (!out)
{
cout << "Error!!! Failure to Open out.txt" << endl;
system ("pause");
return 1;
}
data.get (ch); // priming read for end-of-file loop
while (data)
{
sum = 0;
while ((ch != nwln) && data)
{
if (isdigit(ch))
out<<ch;
if (ch == '#')
out<<endl;
{
;
}
lastch = ch;
data.get (ch); // update for inner loop
} // inner loop
if (lastch != '#')
out<<endl;
data.get (ch); // update for outer loop
} //outer loop
cout << "The End..." << endl;
data.close (); out.close ();
system ("pause");
return 0;
} //main
If you need simply to output all numbers in the standard stream std::cout (or some other stream as for example file) then you can use the following code as an example. I only substituted the file input for std::cin input in variable line. You can use file input instead of the standard stream.
Also instead of
std::ostream_iterator<char>( std::cout ),
use
std::ostream_iterator<char>( out ),
and instead of
std::cout << std::endl;
use
out << std::endl;
after the std::copy_if call.
Here is the example
#include <iostream>
#include <iterator>
#include <string>
#include <sstream>
#include <algorithm>
#include <cctype>
int main()
{
std::string line;
while ( std::getline( std::cin, line) ) // instead of std::cin use data
{
// std::cout << line << std::endl;
std::string word;
std::istringstream is( line );
while ( std::getline( is, word, '#' ) )
{
// std::cout << word << std::endl;
auto it = std::find_if( word.begin(), word.end(),
[]( char c ) { return ( std::isdigit( c ) ); } );
if ( it != word.end() )
{
std::copy_if( it, word.end(),
std::ostream_iterator<char>( std::cout ),
[]( char c ) { return ( std::isdigit( c ) ); } );
std::cout << std::endl;
}
}
}
}
Test input data is
$1,9,56#%34,9
!4.23#$4,983
The output is
1956
349
423
4983
Or you can define the lambda before its using.
#include <iostream>
#include <iterator>
#include <string>
#include <sstream>
#include <algorithm>
#include <cctype>
int main()
{
std::string line;
while ( std::getline( std::cin, line) ) // instead of std::cin use data
{
// std::cout << line << std::endl;
std::string word;
std::istringstream is( line );
while ( std::getline( is, word, '#' ) )
{
// std::cout << word << std::endl;
auto lm_IsDigit = []( char c ) { return ( std::isdigit( c ) ); };
auto it = std::find_if( word.begin(), word.end(), lm_IsDigit );
if ( it != word.end() )
{
std::copy_if( it, word.end(),
std::ostream_iterator<char>( std::cout ),
lm_IsDigit );
std::cout << std::endl;
}
}
}
}
Read the input file character by character. To check if a character is a digit, use std::isdigit. Then add the number to the back of a string.
If you need to convert a string to an integer, use std::stoi

extracting last 2 words from a sequence of strings, space-separated

I have any sequence (or sentence) and i want to extract the last 2 strings.
For example,
sdfsdfds sdfs dfsd fgsd 3 dsfds should produce: 3 dsfds
sdfsd (dfgdg)gfdg fg 6 gg should produce: 6 gg
You can use std::string::find_last_of function to find spaces.
int main()
{
std::string test = "sdfsdfds sdfs dfsd fgsd 3 dsfds";
size_t found1 = test.find_last_of( " " );
if ( found1 != string::npos ) {
size_t found2 = test.find_last_of( " ", found1-1 );
if ( found2 != string::npos )
std::cout << test.substr(found2+1, found1-found2-1) << std::endl;
std::cout << test.substr(found1+1) << std::endl;
}
return 0;
}
The following will work if your strings are whitespace separated.
#include <iostream>
#include <string>
#include <sstream>
#include <vector>
using namespace std;
int main()
{
string str = "jfdf fhfeif shfowejef dhfojfe";
stringstream sstr(str);
vector<string> vstr;
while(sstr >> str)
{
vstr.push_back(str);
}
if (vstr.size() >= 2)
cout << vstr[vstr.size()-2] << ' ';
if (vstr.size())
cout << vstr[vstr.size()-1] << endl;
return 0;
}
Returns the strings in the wrong order, but if that doesn't matter,
std::string s ("some words here");
std::string::size_type j;
for(int i=0; i<2; ++i) {
if((j = s.find_last_of(' ')) == std::string::npos) {
// there aren't two strings, throw, return, or do something else
return 0;
}
std::cout << s.c_str()+j+1;
s = " " + s.substr(0,j);
}
Alternatively,
struct extract_two_words {
friend std::istream& operator>> (std::istream& in , extract_two_words& etw);
std::string word1;
std::string word2;
};
std::istream& operator>> (std::istream& in , extract_two_words& etw) {
std::string str1, str2;
while(in) {
in >> str1;
in >> str2;
}
etw.word2 = str1;
etw.word1 = str2;
}
I would encourage you to have a look at the Boost library. It has algorithms and data structures that help you tremendously. Here's how to solve your problem using Boost.StringAlgo:
#include <boost/algorithm/string/split.hpp>
#include <iostream>
#include <vector>
#include <string>
int main()
{
std::string test = "sdfsdfds sdfs dfsd fgsd 3 dsfds";
std::vector<std::string> v;
boost::algorithm::split(v, test, [](char c) { return c==' ';});
std::cout << "Second to last: " << v.at(v.size()-2) << std::endl;
std::cout << "Last: " << v.at(v.size()-1) << std::endl;
}
I would also encourage you to always use the vector::at method instead of []. This will give you proper error handling.
int main()
{
std::string test = "sdfsdfds sdfs dfsd fgsd 3 dsfds";
size_t pos = test.length();
for (int i=0; i < 2; i++)
pos = test.find_last_of(" ", pos-1);
std::cout << test.substr(pos+1) << std::endl;
}
Simpler :)