Read every word in a string C++ - c++

I am trying to read every word a string. I want a string to go in and the first word to come out, then I'll process it, then the second, and so on. But the internet isn't helping me, I know it's probably right under my nose but I can't figure it out!
string lex(string filecontent) {
string t = filecontent;
getline(cin, t);
istringstream iss(t);
string word;
while (iss >> word) {
return word;
}
}
int main() {
string data = load_file(); // Returns a string of words
cout << data;
cout << lex(data);
getchar();
}
Right now this works... sort of it prints out a lot of random gibberish and crazy characters, The file I'm reading's output is ok I check this at cout << data and it is what I expect. Any ideas?

Here is the solution I think you are looking for:
int main() {
string data = load_file(); // Returns a string of words
istringstream iss(data);
while(iss)
{
string tok;
iss >> tok;
cout << "token: " << tok << endl;
//you can do what ever you want with the token here
}
}

Have a look at this, it should help you.
main.cpp
#include "stdafx.h"
#include "Utility.h"
int main() {
using namespace util;
std::string fileName( "sample.txt" );
if ( fileName.empty() ) {
std::cout << "Missing or invalid filename." << std::endl;
return RETURN_ERROR;
}
std::string line;
std::vector<std::string> results;
std::fstream fin;
// Try To Open File For Reading
fin.open( fileName.c_str(), std::ios_base::in );
if ( !fin.is_open() ) {
std::cout << "Can not open file(" << fileName << ") for reading." << std::endl;
return RETURN_ERROR;
}
// Read Line By Line To Get Data Contents Store Into String To Be Parsed
while ( !fin.eof() ) {
std::getline( fin, line );
// Parse Each Line Using Space Character As Delimiter
results = Utility::splitString( line, " " );
// Print The Results On Each Iteration Of This While Loop
// This Is Where You Would Parse The Data Or Store Results Into
// Class Objects, Variables Or Structures.
for ( unsigned u = 0; u < results.size(); u++ ) {
std::cout << results[u] << " ";
}
std::cout << std::endl;
}
// Close File Pointer
fin.close();
// Now Print The Full Vector Of Results - This Is To Show You That Each
// New Line Will Be Overwritten And That Only The Last Line Of The File Will
// Be Stored After The While Loop.
std::cout << "\n-------------------------------------\n";
for ( unsigned u = 0; u < results.size(); u++ ) {
std::cout << results[u] << " ";
}
Utility::pressAnyKeyToQuit();
return RETURN_OK;
} // main
sample.txt
Please help me parse this text file
It spans multiple lines of text
I would like to get each individual word
stdafx.h - Some of these include files may not be needed they are here for I have a larger solution that requires them.
#ifndef STDAFX_H
#define STDAFX_H
#include <Windows.h>
#include <stdio.h>
#include <tchar.h>
#include <conio.h>
#include <string>
#include <sstream>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <vector>
#include <array>
#include <memory>
#include <queue>
#include <functional>
#include <algorithm>
// User Application Specific
// #include "ExceptionHandler.h" - One Of My Class Objects Not Used Here
namespace util {
enum ReturnCode {
RETURN_OK = 0,
RETURN_ERROR = 1,
}; // ReturnCode
extern const unsigned INVALID_UNSIGNED;
extern const unsigned INVALID_UNSIGNED_SHORT;
} // namespace util
#endif // STDAFX_H
stdafx.cpp
#include "stdafx.h"
namespace util {
const unsigned INVALID_UNSIGNED = static_cast<const unsigned>( -1 );
const unsigned INVALID_UNSIGNED_SHORT = static_cast<const unsigned short>( -1 );
} // namespace util
Utility.h
#ifndef UTILITY_H
#define UTILITY_H
namespace util {
class Utility {
public:
static void pressAnyKeyToQuit();
static std::string toUpper(const std::string& str);
static std::string toLower(const std::string& str);
static std::string trim(const std::string& str, const std::string elementsToTrim = " \t\n\r");
static unsigned convertToUnsigned(const std::string& str);
static int convertToInt(const std::string& str);
static float convertToFloat(const std::string& str);
static std::vector<std::string> splitString(const std::string& strStringToSplit, const std::string& strDelimiter, const bool keepEmpty = true);
private:
Utility(); // Private - Not A Class Object
Utility(const Utility& c); // Not Implemented
Utility& operator=(const Utility& c); // Not Implemented
template<typename T>
static bool stringToValue(const std::string& str, T* pValue, unsigned uNumValues);
template<typename T>
static T getValue(const std::string& str, std::size_t& remainder);
}; // Utility
#include "Utility.inl"
} // namespace util
#endif // UTILITY_H
Utility.inl
// ----------------------------------------------------------------------------
// stringToValue()
template<typename T>
static bool Utility::stringToValue(const std::string& str, T* pValue, unsigned uNumValues) {
int numCommas = std::count(str.begin(), str.end(), ',');
if (numCommas != uNumValues - 1) {
return false;
}
std::size_t remainder;
pValue[0] = getValue<T>(str, remainder);
if (uNumValues == 1) {
if (str.size() != remainder) {
return false;
}
}
else {
std::size_t offset = remainder;
if (str.at(offset) != ',') {
return false;
}
unsigned uLastIdx = uNumValues - 1;
for (unsigned u = 1; u < uNumValues; ++u) {
pValue[u] = getValue<T>(str.substr(++offset), remainder);
offset += remainder;
if ((u < uLastIdx && str.at(offset) != ',') ||
(u == uLastIdx && offset != str.size()))
{
return false;
}
}
}
return true;
} // stringToValue
Utility.cpp
#include "stdafx.h"
#include "Utility.h"
namespace util {
// ----------------------------------------------------------------------------
// pressAnyKeyToQuit()
void Utility::pressAnyKeyToQuit() {
std::cout << "\nPress any key to quit" << std::endl;
_getch();
} // pressAnyKeyToQuit
// ----------------------------------------------------------------------------
// toUpper()
std::string Utility::toUpper( const std::string& str ) {
std::string result = str;
std::transform( str.begin(), str.end(), result.begin(), ::toupper );
return result;
} // toUpper
// ----------------------------------------------------------------------------
// toLower()
std::string Utility::toLower( const std::string& str ) {
std::string result = str;
std::transform( str.begin(), str.end(), result.begin(), ::tolower );
return result;
} // toLower
// ----------------------------------------------------------------------------
// trim()
// Removes Elements To Trim From Left And Right Side Of The str
std::string Utility::trim( const std::string& str, const std::string elementsToTrim ) {
std::basic_string<char>::size_type firstIndex = str.find_first_not_of( elementsToTrim );
if ( firstIndex == std::string::npos ) {
return std::string(); // Nothing Left
}
std::basic_string<char>::size_type lastIndex = str.find_last_not_of( elementsToTrim );
return str.substr( firstIndex, lastIndex - firstIndex + 1 );
} // trim
// ----------------------------------------------------------------------------
// getValue()
template<>
float Utility::getValue( const std::string& str, std::size_t& remainder ) {
return std::stof( str, &remainder );
} // getValue <float>
// ----------------------------------------------------------------------------
// getValue()
template<>
int Utility::getValue( const std::string& str, std::size_t& remainder ) {
return std::stoi( str, &remainder );
} // getValue <int>
// ----------------------------------------------------------------------------
// getValue()
template<>
unsigned Utility::getValue( const std::string& str, std::size_t& remainder ) {
return std::stoul( str, &remainder );
} // getValue <unsigned>
// ----------------------------------------------------------------------------
// convertToUnsigned()
unsigned Utility::convertToUnsigned( const std::string& str ) {
unsigned u = 0;
if ( !stringToValue( str, &u, 1 ) ) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " Bad conversion of [" << str << "] to unsigned";
throw strStream.str();
}
return u;
} // convertToUnsigned
// ----------------------------------------------------------------------------
// convertToInt()
int Utility::convertToInt( const std::string& str ) {
int i = 0;
if ( !stringToValue( str, &i, 1 ) ) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " Bad conversion of [" << str << "] to int";
throw strStream.str();
}
return i;
} // convertToInt
// ----------------------------------------------------------------------------
// convertToFloat()
float Utility::convertToFloat(const std::string& str) {
float f = 0;
if (!stringToValue(str, &f, 1)) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " Bad conversion of [" << str << "] to float";
throw strStream.str();
}
return f;
} // convertToFloat
// ----------------------------------------------------------------------------
// splitString()
std::vector<std::string> Utility::splitString( const std::string& strStringToSplit, const std::string& strDelimiter, const bool keepEmpty ) {
std::vector<std::string> vResult;
if ( strDelimiter.empty() ) {
vResult.push_back( strStringToSplit );
return vResult;
}
std::string::const_iterator itSubStrStart = strStringToSplit.begin(), itSubStrEnd;
while ( true ) {
itSubStrEnd = search( itSubStrStart, strStringToSplit.end(), strDelimiter.begin(), strDelimiter.end() );
std::string strTemp( itSubStrStart, itSubStrEnd );
if ( keepEmpty || !strTemp.empty() ) {
vResult.push_back( strTemp );
}
if ( itSubStrEnd == strStringToSplit.end() ) {
break;
}
itSubStrStart = itSubStrEnd + strDelimiter.size();
}
return vResult;
} // splitString
} // namspace util
In my small utility library I have a function that will split a string that can use any delimiter that the user defines. It will search for the first occurrence of that character delimiter and it will save everything before it into a string and it will push that string into a vector of strings, and it will continue this for every occurrence of that character until it is finished parsing the full string that is passed to it. It will then return a vector of strings back to the user. This is very helpful when engaged in parsing text files or even just data types with long strings that need to be broken down. Now if there is a case where you are parsing a text file and lets say you need to have more than one word as a single string, this can be done but requires more work on your part. For example a text file might have personal record on a single line.
LastName, FirstName MiddleInitial Age Phone# Address
Cook, John S 33 1-888-323-4545 324 Complex Avenue
And you would want the 324 Complex Avenue to be in a single string also you don't want the comma stored after the last name. Your structure in code to store this info might look like this:
struct PersonalRecord {
std::string firstName;
std::string lastName;
char middleInitial;
unsigned age;
std::string phoneNumber;
std:string address;
};
What you would have to do is after you read this line in from your file on that same iteration of the while loop is you would have to do multiple parsing.
You would first start by using a temporary string and vector of strings and use the utility function splitString with the delimeter being the comma. So this would save 2 strings in the temp vector of strings the first being: Cook and the second being the rest of the line after the comma including the leading space. The reason you have the temp string and temp vector of strings is that you will need to pop values at when needed. So in this case we would have to do the following, but first how do we resolve the case with multiple words to one string? We can change the line of text in the text file to be enclosed with double quotes as such:
textfile
Cook, John S 33 1-888-323-4545 "324 Complex Avenue"
Evens, Sue A 24 1-888-323-6996 "128 Mission Rd"
Adams, Chris B 49 1-777-293-8234 "2304 Helms Drive"
Then parse it with this logic flow or algorithm.
main.cpp
#including "stdafx.h"
#including "Utility.h"
int main() {
using namespace util;
std::string strFilename( "personalRecord.txt" );
std::ifstream file;
std::string strLine;
std::vector<std::string> vTemp;
std::vector<std::string> vResult;
std::vector<PersonalRecord> vData;
// Open File For Reading
file.open( strFilename.c_str() );
// Check For Error Of Opening File
if ( !file.is_open() ) {
std::cout << "Error opening file (" << strFilename << ")" << std::endl;
return RETURN_ERROR;
}
// Continue Until End Of File
while( !file.eof() ) {
// Get Single Full Line Save To String
std::getline( file, strLine );
// Check For Comma
vTemp = Utility::splitString( strLine, ",");
// Save First String For Laster
std::string lastName = vTemp[0];
// Split String Using A Double Quote Delimiter Delimiter
vTemp = Utility::splitString( vTemp[1], "\"" );
// Check To See If vTemp Has More Than One String
if ( vTemp.size() > 1 ) {
// We Need To Use Pop Back To Account For Last Double Quote
vTemp.pop_back(); // Remove Last Double Quote
std::string temp = vTemp.back();
vTemp.pop_back(); // Remove Wanted String From vTemp.
// At This Point We Need To Parse vTemp Again Using Space Delimiter
vResult = Utility::splitString( vTemp[0], " " );
// Need To Account For Leading Space In Vector
vResult[0].erase();
// Need To Account For Last Space In Vector
vResult.pop_back();
// Now We Can Push Our Last String Back Into vResult
vResult.push_back( temp );
// Replace The First String " " With Our LastName
vResult[0] = lastName;
} else if ( vTemp.size() == 1 ) {
// Just Parse vTemp Using Space Delimiter
vResult = Utility::splitString( vTemp[0], " " );
}
// Print Out Results For Validity
for ( unsigned u = 0; u < vResult.size(); u++) {
std::cout << vResult.at(u) << " ";
}
std::cout << std::endl;
// Here Is Where You Would Populate Your Variables, Structures Or Classes On Each Pass Of The While Loop.
// With This Structure There Should Only Be 8 Entries Into Our vResult
PersonalRecord temp;
temp.lastName = vResult[0];
temp.firstName = vResult[1];
temp.middleInitial = vResult[2][0];
temp.age = Utility::convertToUnsigned( vResult[3] );
temp.phoneNumber = vResult[4];
temp.address = vResult[5];
vData.push_back( temp );
} // while
// Close File
file.close();
std::cout << std::endl << std::endl;
// Print Using Structure For Validity
std::cout << "---------------------------------------\n";
for ( unsigned u = 0; u < vData.size(); u++ ) {
std::cout << vData[u].lastName << " "
<< vData[u].firstName << " "
<< vData[u].middleInitial << " "
<< vData[u].age << " "
<< vData[u].phoneNumber << " "
<< vData[u].address << std::endl;
}
Utility::pressAnyKeyToQuit();
return RETURN_OK;
} // main
So both consideration and are has to be taken when parsing text or strings. You have to account for every single character including your carriage returns, spaces etc. So the format that the text file is written in has to be considered.
Yes the splitString() will also parse tabs, you would just have to use "\t" for tabs, etc. Just remember that it will make a split at every occurrence. So if you have a sentence that has a colon ":" in it, but then you decide to use the colon as your delimiter between values, it will split that sentence as well. Now you could have different rules for each line of text from the file and if you know what line you are on you can parse each line accordingly. This is why most people prefer to write their code to read and parse binary, because it is much easier to program, then writing a text parser.
I chose to use the PersonalRecord structure to show you how you can extract strings from a line of text and to convert them to basic types such as int, float or double by using some of my other functions in my Utility class. All methods in this class are declared as static and the constructor is private, so the class name acts as a wrapper or a namespace so to speak. You can not create an instance of a Utility util; // invalid object. Just include the header file and use the class name with the scope resolution operator :: to access any of the functions and make sure you are using the namespace util.

Related

Easiest way to load variables from a text file

I have a program where I want to load Variables from a text file to use them as default variables.
The text file should look like this:
Name=No Name
Age=8
Gender=male
etc.
Is there a simpler way and if not how do I do that in the place with the question marks?
My Code look like this:
int Age;
std::string Name;
bool male;
if(f.is_open())
{
while (!f.eof())
{
getline(f, line);
if (line.find("Name=") == std::string::npos)
{
Name=?????;
continue;
}
else if (line.find("Gender=") == std::string::npos)
{
if(????? == "true"); then
male=true;
else
male=false;
continue;
}
else if (line.find("Age=") == std::string::npos)
{
Age=?????;
continue;
}
//etc. ...
}
f.close();
Is there a simpler way?
You could use a serialization library, like cereal or Boost, as #JesperJuhl suggested.
However, I would strongly suggest to take a step back, and review your approach. You are asking for an improvement, but you don't have a good solution at this point, because Why is iostream::eof inside a loop condition considered wrong?
As I had written here, I will use std::getline() as the loop condition instead of ios::eof(), in order to parse the file, line by line.
How do I do that in the place with the question marks?
Then, for every line, I will tokenize it, based on a delimiter (equal sign in your case), in order to extract two tokens, the name of the variable and its default value. Read more about it in Parse (split) a string in C++ using string delimiter (standard C++)
Afterwards, I would use an if-else approach (You could use a switch statement instead) to check the name of the variable, and assign its default value to the actual variables of the program.
Full code example:
#include <iostream>
#include <string>
#include <fstream>
int main(void) {
std::string defaultName, gender;
int age;
std::ifstream infile("mytextfile.txt");
std::string line, varName, defaultValue;
std::string delimiter = "=";
while (std::getline(infile, line)) {
varName = line.substr(0, line.find(delimiter));
defaultValue = line.substr(line.find(delimiter) + 1);
if(varName == "Name") {
defaultName = defaultValue;
continue;
} else if(varName == "Age") {
age = std::stoi(defaultValue);
continue;
} else if(varName == "Gender") {
gender = defaultValue;
continue;
} else {
std::cout << "Unknown entry: " << line << std::endl;
}
}
std::cout << defaultName << ", " << age << ", " << gender << std::endl;
return 0;
}
Output:
No Name, 8, male
If you feel a need to write it yourself instead of using a ready library, you could use a std::unordered_map<> and add some streaming and extraction support around it. Here's an example with comments in the code:
#include <string>
#include <unordered_map>
class KeyValue { // Key Value
std::unordered_map<std::string, std::string> m_kv{};
public:
// at() is used to get a reference to a Value given the supplied Key. It uses
// the function with the same name in the unordered_map.
inline std::string& at(const std::string& Key) { return m_kv.at(Key); }
inline const std::string& at(const std::string& Key) const { return m_kv.at(Key); }
// The "as<T>" function below is used to extract values from the map.
// The exact version of the function that will be used depends on the type
// you want to extract from the string. Explicit specializations of the function
// are declared outside the class.
// A generic conversion function to anything that can be constructed from a std::string
template<typename T>
T as(const std::string& Key) const {
return at(Key);
}
// A function to extract directly into a variable using the proper as<T>
template<typename T>
void extract_to(T& var, const std::string& Key) const {
var = as<T>(Key);
}
// A friend function to read from an input stream (like an open file) and
// populate the unordered_map.
friend std::istream& operator>>(std::istream&, KeyValue&);
};
// Explicit specializations of KeyValue::as<T>()
// floats
template<>
float KeyValue::as(const std::string& Key) const {
return std::stof(at(Key));
}
template<>
double KeyValue::as(const std::string& Key) const {
return std::stod(at(Key));
}
template<>
long double KeyValue::as(const std::string& Key) const {
return std::stold(at(Key));
}
// signed integers
template<>
int KeyValue::as(const std::string& Key) const {
return std::stoi(at(Key));
}
template<>
long KeyValue::as(const std::string& Key) const {
return std::stol(at(Key));
}
template<>
long long KeyValue::as(const std::string& Key) const {
return std::stoll(at(Key));
}
// unsigned integers
template<>
unsigned KeyValue::as(const std::string& Key) const {
return std::stoul(at(Key));
}
template<>
unsigned long KeyValue::as(const std::string& Key) const {
return std::stoul(at(Key));
}
template<>
unsigned long long KeyValue::as(const std::string& Key) const {
return std::stoull(at(Key));
}
// bool
template<>
bool KeyValue::as(const std::string& Key) const {
const std::string& val = at(Key);
if(val=="true" || val=="1") return true;
else if(val=="false" || val=="0") return false;
throw std::range_error("\"" + Key + "\" is neither true nor false");
}
// the friend function that extracts key value strings from a stream
std::istream& operator>>(std::istream& is, KeyValue& kv) {
std::string line;
// read one line at a time
while(std::getline(is, line)) {
auto pos = line.find('=');
if(pos == std::string::npos || pos == 0) {
// if '=' was not found (or found at pos 0), set the failbit on the stream
is.setstate(std::ios::failbit);
} else {
// if '=' was found, put the Key and Value in the map by
// using substr() to split the line where the '=' was found
kv.m_kv.emplace(line.substr(0, pos), line.substr(pos + 1));
}
}
return is;
}
With that in place, you can read a file and populate the variables that you've preferably put in a class / struct. Example:
#include <fstream>
struct Variables {
std::string Name{};
unsigned int Age{};
std::string Gender{};
double PI{};
bool Hungry{};
bool Sad{};
Variables(const std::string& filename) {
std::ifstream is(filename);
if(is) {
KeyValue tmp;
is >> tmp; // stream the whole file into tmp
// extract values
tmp.extract_to(Name, "Name");
tmp.extract_to(Age, "Age");
tmp.extract_to(Gender, "Gender");
tmp.extract_to(PI, "PI");
tmp.extract_to(Hungry, "Hungry");
tmp.extract_to(Sad, "Sad");
} else throw std::runtime_error("Could not read \""+filename+"\".");
}
};
Example data file (vars.dat):
Name=No name
Age=8
Gender=male
PI=3.14159
Hungry=true
Sad=false
...and a main example::
#include <iostream>
int main() {
try {
Variables var("vars.dat"); // open file and populate variables
std::cout << std::boolalpha
<< "Name: " << var.Name << "\n"
<< "Age: " << var.Age << "\n"
<< "Gender: " << var.Gender << "\n"
<< "PI: " << var.PI << "\n"
<< "Hungry: " << var.Hungry << "\n"
<< "Sad: " << var.Sad << "\n";
} catch(const std::exception& ex) {
std::cerr << ex.what() << "\n";
}
}
I tried to simplify the solution of #Ted Lyngmo:
... I think it is not the fastest way and not the best, but it is more simple and more short:
#include <sstream>
class loadVars
{
public:
std::string file;
loadVars() { }
//Input ->
loadVars(std::string Text) {
this->setFile(Text);
}
loadVars(std::istream& is) {
this->setFile(is);
}
friend void operator>>(std::istream& is, loadVars& lv) {
lv.file = std::string((std::istreambuf_iterator<char>(is)), std::istreambuf_iterator<char>());
}
void setFile(std::string Text) {
this->file = Text;
}
void setFile(std::istream& is) {
this->file = std::string((std::istreambuf_iterator<char>(is)), std::istreambuf_iterator<char>());
}
//<-
std::string extract_to_first(std::string to) {
std::string line;
std::stringstream s_string = std::stringstream(this->file);
while (std::getline(s_string, line)) {
if(line.find("=") != std::string::npos) {
if(line.substr(0,line.find("=")) == to) {
return line.substr(line.find("=")+1);
}
}
}
return "-1";
}
};
I would not reinvent this. As suggested, libraries for serialization exist. Consider Boost.PropertyTree as an example and Boost can be helpful to learn in general.

remove the last occurrence of character in string in file c++

I have a function which reads each line in a file (using ifstream), modifies that line and then writes to a file with many "{key,value}," (by using fstream), I need to remove the last comma in that file, but after searching, I still have no idea to do this.
Does anyone have any suggestion please ?
current output file:
//bein file
std::map <std::string, std::string> my_map =
...
{key,value},
{key,value},
{last_key,last_value},
};
//some comment
expected output(remove the last comma):
//bein file
std::map <std::string, std::string> my_map =
...
{key,value},
{key,value},
{last_key,last_value}
};
//some comment
my code is here:
#include <windows.h>
#include <queue>
#include <string>
#include <iostream>
#include <regex>
#include <fstream>
#include <iterator>
#include <stdio.h>
// I think MS's names for some things are obnoxious.
const HANDLE HNULL = INVALID_HANDLE_VALUE;
const int A_DIR = FILE_ATTRIBUTE_DIRECTORY;
std::string write_cpp_file(std::string path, std::string line, std::string file_name)
{
std::string result = "output\\values.cpp";
if (path.find("values-en-rGB") != std::string::npos) {
std::fstream file("./output/result.cpp", std::ios::out | std::ios::app);
if (file)
{
file << line << std::endl;
file.close();
}
}
return result;
}
void analyze_file(std::string const &path, WIN32_FIND_DATA const &file) {
//std::cout << "path: " << path << "\n";
//std::cout << "file name: " << file.cFileName << "\n";
std::string file_name = path+file.cFileName;
std::cout << "processing file: " << file_name << "\n";
std::ifstream empSalariesOld(file_name);
//ofstream empSalariesNew("EmpSalariesNew.txt");
if (empSalariesOld)
{
std::string line;
std::regex open_comment("(.*)(<!--)(.*)");
std::regex close_comment("(.*)(-->)(.*)");
std::regex string_tag("(.*)(<string name=)(.*)");
std::regex find1("<string name=");
std::string replace1 = "{";
std::regex find2("\">");
std::string replace2 = "\",\"";
std::regex find3("</string>");
std::string replace3 = "\"},";
std::string result;
while (getline(empSalariesOld, line))
{
if (!std::regex_match(line, open_comment) &&
!std::regex_match(line, close_comment) &&
std::regex_match(line, string_tag) )
{
result = std::regex_replace(line, find1, replace1);
result = std::regex_replace(result, find2, replace2);
result = std::regex_replace(result, find3, replace3);
std::string cpp_file = write_cpp_file(path, result, file_name);
}
}
}
empSalariesOld.close();
//empSalariesNew.close();
}
//process each file in folder/subfolder
void convert(std::string const &folder_name) {
HANDLE finder; // for FindFirstFile
WIN32_FIND_DATA file; // data about current file.
std::priority_queue<std::string, std::vector<std::string>,
std::greater<std::string> > dirs;
dirs.push(folder_name); // start with passed directory
do {
std::string path = dirs.top();// retrieve directory to search
dirs.pop();
if (path[path.size()-1] != '\\') // normalize the name.
path += "\\";
std::string mask = path + "*"; // create mask for searching
// traverse a directory. Search for sub-dirs separately, because we
// don't want a mask to apply to directory names. "*.cpp" should find
// "a\b.cpp", even though "a" doesn't match "*.cpp".
//
// First search for files:
if (HNULL==(finder=FindFirstFile(mask.c_str(), &file)))
continue;
do {
if (!(file.dwFileAttributes & A_DIR))
analyze_file(path, file);
} while (FindNextFile(finder, &file));
FindClose(finder);
// Then search for subdirectories:
if (HNULL==(finder=FindFirstFile((path + "*").c_str(), &file)))
continue;
do {
if ((file.dwFileAttributes & A_DIR) && (file.cFileName[0] != '.'))
dirs.push(path + file.cFileName);
} while (FindNextFile(finder, &file));
FindClose(finder);
} while (!dirs.empty());
}
void create_output_folder()
{
std::string command = "del /Q ";
std::string path = "output\\*.cpp";
system(command.append(path).c_str());
CreateDirectory("output", NULL);
}
int main()
{
create_output_folder();
convert("./Strings");
std::cout << "finish convert" << "\n";
return 0;
}
You can do the following do a first write to file without any , outside the loop. Then for the others write inside the loop you print the , at the beginning of the line:
if (getline(empSalariesOld, line))
{
if (!std::regex_match(line, open_comment) &&
!std::regex_match(line, close_comment) &&
std::regex_match(line, string_tag) )
{
result = std::regex_replace(line, find1, replace1);
result = std::regex_replace(result, find2, replace2);
result = std::regex_replace(result, find3, replace3);
//remove the first char which is the comma
result = result.substr(1, result.size()-1)
std::string cpp_file = write_cpp_file(path, result, file_name);
}
}
while (getline(empSalariesOld, line))
{
if (!std::regex_match(line, open_comment) &&
!std::regex_match(line, close_comment) &&
std::regex_match(line, string_tag) )
{
result = std::regex_replace(line, find1, replace1);
result = std::regex_replace(result, find2, replace2);
result = std::regex_replace(result, find3, replace3);
std::string cpp_file = write_cpp_file(path, result, file_name);
}
}
I am guessing you are putting the comma in replace3. You can:
std::string replace1 = ",\n{"; \\put the comma at beginning of line then got to the next line
...
std::string replace3 = "\"}"
And since you are putting the new line in replace1 you should remove it from new file:
file << line;

C++: why is vector implementation not yielding comma-delimited values?

I have a database.txt file with comma-separated values:
Name,ID,Year,Gender
I would like to extract each of these elements.
I've started with this code (I've already looked at all the other similar questions and implemented what they've suggested), but it's not printing each piece:
// reading a text file
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <sstream>
using namespace std;
int main () {
string line;
ifstream myfile ("database.txt");
if (myfile.is_open())
{
while ( getline (myfile,line) )
{
std::string str = line;
std::vector<int> vect;
std::stringstream ss(str);
int i;
while (ss >> i)
{
vect.push_back(i);
if (ss.peek() == ',')
ss.ignore();
}
for (i=0; i< vect.size(); i++)
std::cout << vect.at(i)<<std::endl;
//cout << line << '\n';
}
myfile.close();
}
else cout << "Unable to open file";
return 0;
}
How can I modify it to be able to extract each value: name, ID, year, and gender? What am I doing wrong?
Use this function to split each line:
vector<string> split(const string &s, char delim) {
stringstream ss(s);
string item;
vector<string> tokens;
while (getline(ss, item, delim)) {
tokens.push_back(item);
}
return tokens;
}
and your code be like:
// reading a text file
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <sstream>
using namespace std;
int main () {
string line;
ifstream myfile ("database.txt");
if (myfile.is_open())
{
while ( getline (myfile,line) )
{
std::string str = line;
std::vector<string> vect;
vect = split(str, ',') ;
for (int i=0; i< vect.size(); i++)
std::cout << vect.at(i)<<std::endl;
//cout << line << '\n';
}
myfile.close();
}
else cout << "Unable to open file";
return 0;
}
With the help of a utility function and a data structure you can simplify this quite easily.
#include <iostream>
#include <fstream>
#include <vector>
#include <algorithm>
std::vector<std::string> splitString( const std::string& stringToSplit, const std::string& delimiter, const bool keepEmpty ) {
std::vector<std::string> results;
if ( delimiter.empty() {
results.push_back( stringToSplit );
return results;
}
std::string::const_iterator itSubStrStart = stringToSplit.begin(), itSubStrEnd;
while( true ) {
itSubStrEnd = std::search( itSubStrStart, stringToSplit.end(), delimiter.begin(), delimiter.end() );
std::string temp( itSubStrStart, itSubStrEnd );
if ( keepEmpty || !temp.empty() )
results.push_back( temp );
if ( itSubStrEnd == stringToSplit.end() )
break;
itSubStrStart = itSubStrEnd + delimiter.size();
}
return results;
}
struct DataEntry {
std::string name;
unsigned id;
unsigned year;
std::string gender;
};
int main() {
std::string line;
std::fstream file;
file.open( "database.txt" );
std::vector<DataEntry> entries;
std::vector<std::string> elements;
while( file >> line ) {
elements = splitString( line, "," );
DataEntry entry;
entry.name = elements[0];
entry.id = std::stoul( elements[1] );
entry.year = std::stoul( elements[2] );
entry.gender = elements[3];
entries.push_back( entry );
}
file.close();
for ( auto& e : entries ) {
std::cout << e.name << " " << e.id << " "
<< e.year << " " << e.gender << '\n';
}
std::cout << "\nPress any key and enter to quit.\n";
std::cin.get();
return 0;
}
database.txt
John,12345,2010,M
Jane,54321,2012,F
output
John 12345 2010 M
Jane 54321 2012 F
This makes life it a lot easier just by reading in a single line first; then parsing that line of text and from there doing what you will with that data; either storing it to a struct, printing it, manipulating it etc.
Edit
You need to be aware of the fact that when reading in lines of text, and parsing them if you have something like this in your text file:
John Doe,12345,2010,M
It will not give you what you would expect. I'll leave that for you to figure out.

Storing data from an unknown number of files

I have used the following piece of code to read from multiple .dat files and parse them. This code uses 3D vectors to store data after the reading process. However, I would like that the data corresponding to each single file be independent from the others. The issue is that the number of files varies, and is unknown at compile time; hence, the number of vectors varies too. I would like to know if there is any solution for this.
vector<vector<vector<string>>> masterList;
for (int i = 0; i < files.size(); ++i) {
cout << "file name: " << files[i] << endl;
fin.open(files[i].c_str());
if (!fin.is_open()) {
// error occurs!!
// break or exit according to your needs
cout<<"error"<<endl;
}
std::vector<vector<string>> tokens;
int current_line = 0;
std::string line;
while (std::getline(fin, line))
{
cout<<"line number: "<<current_line<<endl;
// Create an empty vector for this line
tokens.push_back(vector<string>());
//copy line into is
std::istringstream is(line);
std::string token;
int n = 0;
//parsing
while (getline(is, token, DELIMITER))
{
tokens[current_line].push_back(token);
cout<<"token["<<current_line<<"]["<<n<<"] = " << token <<endl;
n++;
}
cout<<"\n";
current_line++;
}
fin.clear();
fin.close();
masterList.push_back(tokens);
}
So, the main issue I'm facing is: how to create a variable number of 2D vectors to store the data corresponding to each single file, when I don't know how many files there are at compile time.
Modify the list of files in the main to adapt the size of your "master data". If the length of file names is variable, then parse it first (or get it one way or another first), and then execute the parsing on the dat files. If the filenames are known at run time only, and asynchronously with that, then add a new element in the list each time you get a new filename (you can use events for that for example, take a look at https://github.com/Sheljohn/siglot).
Note that list elements are independent in memory, and that lists support deletion/insertion in constant time. That way, data corresponding to each file is independent from the other. If you want to retrieve the data specific to a file (knowing the filename), either iterate on the list to find the corresponding file (linear time) or trade the list for an unordered_map (amortized constant time).
#include <string>
#include <list>
#include <vector>
#include <iostream>
#include <sstream>
#include <fstream>
#include <iterator>
#include <algorithm>
using namespace std;
#define AVG_LINES_PER_FILE 100
/**
* [tokenize_string Tokenize input string 'words' and put elements in vector 'tokens'.]
* #param words [Space separated data-string.]
* #param tokens [Vector of strings.]
*/
void tokenize_string( string& words, vector<string>& tokens )
{
unsigned n = count( words.begin(), words.end(), ' ' );
tokens.reserve(n);
istringstream iss(words);
copy(
istream_iterator<string>(iss),
istream_iterator<string>(),
back_inserter<vector<string> >(tokens)
);
}
/**
* Contains data parsed from a single .dat file
*/
class DATFileData
{
public:
typedef vector<string> line_type;
typedef vector<line_type> data_type;
DATFileData( const char* fname = nullptr )
{
m_fdata.reserve(AVG_LINES_PER_FILE);
m_fdata.clear();
if ( fname ) parse_file(fname);
}
// Check if the object contains data
inline operator bool() const { return m_fdata.size(); }
// Parse file
bool parse_file( const char* fname )
{
string line;
m_fdata.clear();
ifstream fin( fname );
if ( fin.is_open() )
{
while ( fin.good() )
{
getline(fin,line);
m_fdata.push_back(line_type());
tokenize_string( line, m_fdata.back() );
}
fin.close();
m_fname = fname;
cout << "Parsed " << m_fdata.size() << " lines in file '" << fname << "'." << endl;
return true;
}
else
{
cerr << "Could not parse file '" << fname << "'!" << endl;
return false;
}
}
// Get data
inline unsigned size() const { return m_fdata.size(); }
inline const char* filename() const { return m_fname.empty() ? nullptr : m_fname.c_str(); }
inline const data_type& data() const { return m_fdata; }
inline const line_type& line( const unsigned& i ) const { return m_fdata.at(i); }
private:
string m_fname;
data_type m_fdata;
};
int main()
{
unsigned fcount = 0;
vector<string> files = {"some/file/path.dat","another/one.dat"};
list<DATFileData> data(files.size());
for ( DATFileData& d: data )
d.parse_file( files[fcount++].c_str() );
cout << endl << files.size() << " files parsed successfully." << endl;
}

C++ String tokenisation from 3D .obj files

I'm pretty new to C++ and was looking for a good way to pull the data out of this line.
A sample line that I might need to tokenise is
f 11/65/11 16/70/16 17/69/17
I have a tokenisation method that splits strings into a vector as delimited by a string which may be useful
static void Tokenise(const string& str, vector<string>& tokens, const string& delimiters = " ")
The only way I can think of doing it is to tokenise with " " as a delimiter, remove the first item from the resulting vector, then tokenise each part by itself. Is there a good way to do this all in one?
I see the question is tagged as C++ but the absolutely easiest way to do this is with scanf
int indices[3][3];
sscanf(buffer, "f %d/%d/%d %d/%d/%d %d/%d/%d", &indices[0][0], &indices[0][1],...);
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
class parse_error : public std::exception {};
template< typename Target >
inline Target convert_to(const std::string& value)
{
std::istringstream iss(value);
Target target;
iss >> target >> std::ws;
if(!iss || !iss.eof()) throw parse_error();
return target;
}
template< typename T >
inline T read_delimited_value(std::istream& is, char delim)
{
std::string value;
std::getline(is,value,delim);
if(!is) throw parse_error();
return convert_to<T>(value);
}
template< typename It >
inline void output(std::ostream& os, It begin, It end)
{
while(begin!=end)
os << *begin++ << ' ';
}
int main()
{
std::vector<int> values;
const std::string line = "f 11/65/11 16/70/16 17/69/17";
std::istringstream iss(line);
std::string value;
std::getline(iss,value,' ');
if(value!="f" || !iss) throw parse_error();
while(iss.good()) {
values.push_back( read_delimited_value<int>(iss,'/') );
values.push_back( read_delimited_value<int>(iss,'/') );
values.push_back( read_delimited_value<int>(iss,' ') );
}
if(!iss.eof()) throw parse_error();
output( std::cout, values.begin(), values.end() );
std::cout << '\n';
return 0;
}
You should take a look at Boost.Tokenizer and especially this:
// char_sep_example_1.cpp
#include <iostream>
#include <boost/tokenizer.hpp>
#include <string>
int main()
{
std::string str = ";;Hello|world||-foo--bar;yow;baz|";
typedef boost::tokenizer<boost::char_separator<char> >
tokenizer;
boost::char_separator<char> sep("-;|");
tokenizer tokens(str, sep);
for (tokenizer::iterator tok_iter = tokens.begin();
tok_iter != tokens.end(); ++tok_iter)
std::cout << "<" << *tok_iter << "> ";
std::cout << "\n";
return EXIT_SUCCESS;
}
Judging from the sample line you can use two delimiters ' ' and '/' and you will get all your numbers.
static void Tokenise(const string& str, vector<string>& tokens, const string& delimiters = " /")
You can remove easily the first part until the first blank or the just after the f ( you can get the rest after the first blank with
istringstream iss( line );
std::getline( iss, restStr ,' ' )
Then you can use your tokenize function first on blank space and then on '/', or just use a set of std::getline and istringstreams in one loop.
int main()
{
std::string s = "f 1/2/3 4/4/2";
std::istringstream issLine( s );
std::string result;
// remove the first "f"
std::getline( issLine, result, ' ' );
// parse blanks
while( std::getline( issLine, result, ' ' ) )
{
std::istringstream issToken( result );
std::string token;
//parse content
while( std::getline( issToken, token, '/' ))
{
std::cout << token << ',';
// add your data in whatever you want
}
std::cout << std::endl;
}
}