C++ remove punctuation marks and spaces from a string - c++

How can I remove punctuation marks and spaces from a string in a simple way without using any library functions?

int main()
{
string s = "abc de.fghi..jkl,m no";
for (int i = 0; i < s.size(); i++)
{
if (s[i] == ' ' || s[i] == '.' || s[i] == ',')
{
s.erase(i, 1); // remove ith char from string
i--; // reduce i with one so you don't miss any char
}
}
cout << s << endl;
}

Assuming you can use library I/O like <iostream> and types like std::string and you just don't want to use the <cctype> functions like ispunct().
#include <iostream>
#include <string>
int main()
{
const std::string myString = "This. is a string with ,.] stuff in, it.";
const std::string puncts = " [];',./{}:\"?><`~!-_";
std::string output;
for (const auto& ch : myString)
{
bool found = false;
for (const auto& p : puncts)
{
if (ch == p)
{
found = true;
break;
}
}
if (!found)
output += ch;
}
std::cout << output << '\n';
return 0;
}
No idea about the performance, I'm sure it can be done in multiple better ways.

Related

How can i split adjacent numbers and letters in c++?

I've got a large text document that including adjacent numbers and letters.
Just like that,
JACK1940383DAVID30284HAROLD68372TROY4392 etc.
How can i split this like below in C++
List: Jack / 1940383 , David/30284, ...
You can use std::string::find_first_of() and std::string::find_first_not_of() in a loop, using std::string::substr() to extract each piece, eg:
std::string s = "JACK1940383DAVID30284HAROLD68372TROY4392";
std::string::size_type start = 0, end;
while ((end = s.find_first_of("0123456789", start)) != std::string::npos) {
std::string name = s.substr(start, end-start);
start = end;
int number;
if ((end = s.find_first_not_of("0123456789", start)) != std::string::npos) {
number = std::stoi(s.substr(start, end-start));
}
else {
number = std::stoi(s.substr(start));
}
start = end;
// use name and number as needed...
}
Online Demo
You can use regex like this:
#include <iostream>
#include <string>
#include <regex>
#include <vector>
// create a struct to group your data
// this makes it easy to store it in a vector.
struct person_t
{
std::string name;
std::string number;
};
// overloaded output operator for printing one person's details
std::ostream& operator<<(std::ostream& os, const person_t& person)
{
std::cout << person.name << ": " << person.number << std::endl;
return os;
}
// get a vector of person_t based on the input
auto get_persons(const std::string& input)
{
// make a regex in this case a regex that will match one or more capital letters
// and groups them using the ()
// then match one or more digits and group them too.
static const std::regex rx{ "([A-Z]+)([0-9]+)" };
std::smatch match;
// a vector to hold all the persons
std::vector<person_t> persons;
// start at begin of string and look for first part of the string
// that matches the regex.
auto cbegin = input.cbegin();
while (std::regex_search(cbegin, input.cend(), match, rx))
{
// match[0] will contain the whole match,
// match[1]-match[n] will contain the groups from the regular expressions
// match[1] will contain the match with characters and thus the name
// match[2] will contain the match with the numbers and thus the number.
// create a person_t struct with this info
person_t person{ match[1], match[2] };
// and add it to the vector
persons.push_back(person);
cbegin = match.suffix().first;
}
return persons;
}
int main()
{
// parse and split the string
auto persons = get_persons("JACK1940383DAVID30284HAROLD68372TROY4392");
// show the output
for (const auto& person : persons)
{
std::cout << person;
}
}
As pointed in other good answers you can use
find_first_of(), find_first_not_of() and substr() from std::string in a loop
regex
But it may be too much. I will add 3 more examples that you may find
simpler.
The first 2 programs expects the file name on the command line for (my) convenience here, and the test file is in.txt. Contents are the same as posted
JACK1940383DAVID30284HAROLD68372TROY4392
The last example just parses the string data declared as a char[]
1. Using fscanf()
Since the target is to consume formatted data, fscanf() is an option. As the data structure is very simple, the program is just a one line loop:
char mask[] = "%50[^0-9]%50[0-9]";
while ( 2 == fscanf(F, mask, tk_key, tk_value))
std::cout << tk_key << "/" << tk_value << "\n";
program output
output is the same for all examples
JACK/1940383
DAVID/30284
HAROLD/68372
TROY/4392
code for ex. 1
#include <errno.h>
#include <iostream>
int main(int argc,char** argv)
{
if (argc < 2)
{ std::cerr << "Use: pgm FileName\n";
return -1;
}
FILE* F = fopen(argv[1], "r");
if (F == NULL)
{
perror("Could not open file");
return -1;
}
std::cerr << "File: \"" << argv[1] << "\"\n";
char tk_key[50], tk_value[50];
char mask[] = "%50[^0-9]%50[0-9]";
while ( 2 == fscanf(F, mask, tk_key, tk_value))
std::cout << tk_key << "/" << tk_value << "\n";
fclose(F);
return 0;
}
using a state machine
There are just 2 states so it is not a fancy FSA ;) State machines are good for representing this kind of stuff, albeit here this seems to be overkill.
#define S_LETTER 0
#define S_DIGIT 1
#include <algorithm>
#include <iostream>
#include <fstream>
using iich = std::istream_iterator<char>;
int main(int argc,char** argv)
{
std::ifstream in_file{argv[1]};
if ( not in_file.good()) return -1;
iich p {in_file}, eofile{};
std::string token{}; // string to build values
char st = S_LETTER; // state value for FSA
std::for_each(p, eofile,
[&token,&st](char ch)
{
char temp = 0;
switch (st)
{
case S_LETTER:
if ((ch >= '0') && (ch <= '9'))
{
std::cout << token << "/";
token = ch;
st = S_DIGIT; // now in number
}
else token += ch; // concat in string
break;
case S_DIGIT:
default:
if ((ch < '0') || (ch > '9'))
{ // is a letter
std::cout << token << "\n";
token = ch;
st = S_LETTER; // now in name
}
else token += ch; // concat in string
break;
}; // switch()
});
std::cout << token << "\n"; // print last token
}
Here we have no loop. for_each gets the data from an iterator and passes it to a function that builds the name and the value as strings and couts them
Output is the same
3. a simple FSA to consume the data
#define S_LETTER 0
#define S_DIGIT 1
#include <iostream>
int main(void)
{
char one[] = "JACK1940383DAVID30284HAROLD68372TROY4392";
char* p = (char*)&one;
char* token = p;
char st = S_LETTER;
char temp = 0;
while (*p != 0)
{
switch (st)
{
case S_LETTER:
if ((*p >= '0') && (*p <= '9'))
{
temp = *p;
*p = 0;
std::cout << token << "/";
*p = temp;
token = p;
st = S_DIGIT; // now in number
}
break;
case S_DIGIT:
default:
if ( (*p < '0') || (*p > '9'))
{ // letter
temp = *p;
*p = 0;
std::cout << token << "\n";
*p = temp;
token = p;
st = S_LETTER; // now in name
}
break;
}; // switch()
p += 1; // next symbol
}; // while()
std::cout << token << "\n"; // print last token
}
This code just uses a C-style loop to parse the input data

Find second to last word in a string C++

Hi I need to find second to last word in a string. Right now below program is printing the last one.
#include <iostream>
#include <string>
using namespace std;
int main() {
string text{"some line with text"};
// find last space, counting from backwards
int i = text.length() - 2; // last character
while (i != 0 && !isspace(text[i]))
{
--i;
}
string lastword = text.substr(i+1); // +1 to skip leading space
cout << lastword << endl;
return 0;
}
Output: (Printing last word)
text
You can split the string into words and hold the previous word before saving the current word.
#include <iostream>
#include <string>
#include <sstream>
int main() {
std::string text{"some line with text"};
std::stringstream ss(text);
std::string previousword, lastword, newword;
while (ss >> newword) {
previousword = lastword;
lastword = newword;
}
std::cout << previousword << std::endl;
return 0;
}
Also note that using using namespace std; is discouraged.
You don't need any loops. Just add error checking:
int main() {
std::string text{ "some line with text" };
std::size_t pos2 = text.rfind(' ');
std::size_t pos1 = text.rfind(' ', pos2-1);
std::cout << text.substr(pos1+1, pos2-pos1-1) << std::endl;
return 0;
}
Just keep counting spaces until you arrive to the word you want or use a stringstream as MikeCAT proposed.
Here there is a function that finds any last word number without having to copy the entire string in a stringstream:
#include <iostream>
#include <string>
using std::string;
using std::cout;
using std::endl;
string getNLastWord(string text, int n)
{
bool insideAWord = false;
int wordNum = 0;
int wordEnd = -1;
for(int i = text.size() - 1; i > 0; i--)
{
if(text[i] != ' ' && !insideAWord)
{
wordNum++;
insideAWord = true;
}
else if(text[i] == ' ')
{
insideAWord = false;
}
if(wordNum == n)
{
wordEnd = i;
break;
}
}
if(wordEnd == -1)
{
cout << "There are no " << n << " words from right." << endl;
}
else
{
int wordStart;
for(wordStart = wordEnd; wordStart > 0; wordStart--)
{
if(text[wordStart] == ' ')
{
wordStart++;
break;
}
}
return text.substr(wordStart,wordEnd+1-wordStart);
}
return "";
}
int main() {
string text = "some text";
cout << getNLastWord(text,2);
return 0;
}

Validating user input is valid number or not [invalid conversion from 'char' to 'char*']

Ok so I am a beginner in c/c++ and I am creating this little program that checks if the input provided by user is valid number or not, if it is then it prints " it is a number" or else it prints "it is a character string"
Some example output
1 - is a number
-1.1 - is a number
1......1 - is a character string
three - is a character string
.12 is a character string
+0.12 is a number
ABC123ABC - is a character string
I'm getting this error in my code. If someone could help me fix this I would really appreciate it. TIA
cpp:52:23: error: invalid conversion from 'char' to 'char*' [-fpermissive]
if (!isNum(c[i]))
{
~~~^
task1.cpp:5:19: note: initializing argument 1 of 'bool isNum(char*)'
bool isNum(char * p){
My code
#include <iostream>
bool isNum(char * p){
if (NULL == p || *p == '\0'){
return false;
}
int dot = 0;
int plus = 0;
int minus = 0;
while(*p){
char a = *p;
switch (a)
{
//Only allows 1 dot
case '.':
if (++dot > 1){
return false;
}
break;
//only allows 1 plus sign
case '+':
if (++plus > 1){
return false;
}
//only allows 1 minus sign
case '-':
if (++minus > 1){
return false;
}
//Only allows 0-9
default:
if (a < '0' || a > '9'){
return false;
}
}
p++;
}
return true;
}
int main(){
//char array of size 1024
char c[1024];
std::cout << "Enter something: ";
std::cin >> c;
for(int i = 0; i < sizeof(c); i++){
if (!isNum(c[i])){
std::cout << c << " is a character string";
}
else {
std::cout << c << " is a number";
}
}
}
If you want to practice complicated algorithms, parsing numbers is a good exercise. But if your goal is to write useful, simple programs, you are on the wrong track. In C++, many common tasks are already solved by the C++ standard library, you just have to use them.
#include <iostream>
#include <sstream>
#include <string>
int main() {
std::string line;
if (!std::getline(std::cin, line)) {
std::cerr << "error reading the line\n";
return 1;
}
std::istringstream in{line};
double num;
if (in >> num && in.peek() == EOF) {
std::cout << "it's a number, " << num << "\n";
} else {
std::cout << "it's not a number\n";
}
}
The above code reads more high-level than your code. Most importantly it can handle arbitrary long lines without crashing the program.
I'm not intimately familiar with the C++ headers, so I may have forgotten to include some others. But the rest of the code should be ok, even though I didn't test it.
The following function isNumber would work for you.
Here I use a dynamic character sequence std::string which enables us to input any size strings less shorter than std::string::max_size.
We can check whether a given character is a digit or not by std::isdigit.
No extra copies and object creation would show good performance.
Whitespace characters are not allowed in the left and right side of the input string.
I also write the explicit type of the iterators and avoid using auto because you are tagging C++98:
#include <string>
#include <cctype>
bool isNumber(const std::string& s)
{
// this also validates the following access to s[0]
if(s.empty()){
return false;
}
const std::size_t offset = (s[0] == '+' || s[0] == '-') ? 1 : 0;
std::string::const_iterator begin = s.begin() + offset;
// this also validates the following dereferencing begin
if(begin == s.end()){
return false; // false if just a sign "+" or "-"
}
if(!std::isdigit(static_cast<unsigned char>(*begin))){
return false; // e.g. "+.123"
}
bool isdecimal = false;
for(std::string::const_iterator it = ++begin; it != s.end(); ++it)
{
if (!std::isdigit(static_cast<unsigned char>(*it)))
{
if(!isdecimal && (*it == '.'))
{
isdecimal = true;
if((it+1) == s.end()){
return false; // e.g. "+1."
}
}
else{
return false;
}
}
}
return true;
}
Now it is easy and straightforward to implement the main function:
DEMO
#include <iostream>
int main()
{
std::string s;
std::cout << "Enter something: ";
std::getline(std::cin, s);
std::cout << std::endl;
std::cout
<< s << " is a "
<< (isNumber(s) ? "number." : "character string.");
return 0;
}
There you go, i've commented the things i had changed
#include <iostream>
bool isNum(char * p) {
if (NULL == p || *p == '\0') {
return false;
}
int dot = 0;
char a = *p;
if (a<'0' || a>'9') {
if (a != '-' && a != '+') { return false; }
else p++;
}
if (*p<'0' || *p>'9') return false;
p++;
while (*p != '\0') {
a = *p;
switch (a)
{
//Only allows 1 dot
case '.':
if (++dot > 1) {
return false;
}
p++;
if (*p == '\0') return false;
break;
default:
if (a < '0' || a > '9') {
return false;
}
p++;
break;
}
}
return true;
}
int main() {
//char array of size 1024
char c[1024];
std::cout << "Enter something: ";
std::cin >> c;
// you don't need to loop through every character just pass your array of characters & your function is looping through it
if (!isNum(c)) {
std::cout << c << " is a character string";
}
else {
std::cout << c << " is a number";
}
}

Difficulties with string declaration/reference parameters (c++)

Last week I got an homework to write a function: the function gets a string and a char value and should divide the string in two parts, before and after the first occurrence of the existing char.
The code worked but my teacher told me to do it again, because it is not well written code. But I don't understand how to make it better. I understand so far that defining two strings with white spaces is not good, but i get out of bounds exceptions otherwise. Since the string input changes, the string size changes everytime.
#include <iostream>
#include <string>
using namespace std;
void divide(char search, string text, string& first_part, string& sec_part)
{
bool firstc = true;
int counter = 0;
for (int i = 0; i < text.size(); i++) {
if (text.at(i) != search && firstc) {
first_part.at(i) = text.at(i);
}
else if (text.at(i) == search&& firstc == true) {
firstc = false;
sec_part.at(counter) = text.at(i);
}
else {
sec_part.at(counter) = text.at(i);
counter++;
}
}
}
int main() {
string text;
string part1=" ";
string part2=" ";
char search_char;
cout << "Please enter text? ";
getline(cin, text);
cout << "Please enter a char: ? ";
cin >> search_char;
divide(search_char,text,aprt1,part2);
cout << "First string: " << part1 <<endl;
cout << "Second string: " << part2 << endl;
system("PAUSE");
return 0;
}
I would suggest you, learn to use c++ standard functions. there are plenty utility function that can help you in programming.
void divide(const std::string& text, char search, std::string& first_part, std::string& sec_part)
{
std::string::const_iterator pos = std::find(text.begin(), text.end(), search);
first_part.append(text, 0, pos - text.begin());
sec_part.append(text, pos - text.begin());
}
int main()
{
std::string text = "thisisfirst";
char search = 'f';
std::string first;
std::string second;
divide(text, search, first, second);
}
Here I used std::find that you can read about it from here and also Iterators.
You have some other mistakes. you are passing your text by value that will do a copy every time you call your function. pass it by reference but qualify it with const that will indicate it is an input parameter not an output.
Why is your teacher right ?
The fact that you need to initialize your destination strings with empty space is terrible:
If the input string is longer, you'll get out of bound errors.
If it's shorter, you got wrong answer, because in IT and programming, "It works " is not the same as "It works".
In addition, your code does not fit the specifications. It should work all the time, independently of the current value which is stored in your output strings.
Alternative 1: your code but working
Just clear the destination strings at the beginning. Then iterate as you did, but use += or push_back() to add chars at the end of the string.
void divide(char search, string text, string& first_part, string& sec_part)
{
bool firstc = true;
first_part.clear(); // make destinations strings empty
sec_part.clear();
for (int i = 0; i < text.size(); i++) {
char c = text.at(i);
if (firstc && c != search) {
first_part += c;
}
else if (firstc && c == search) {
firstc = false;
sec_part += c;
}
else {
sec_part += c;
}
}
}
I used a temporary c instead of text.at(i) or text\[i\], in order to avoid multiple indexing But this is not really required: nowadays, optimizing compilers should produce equivalent code, whatever variant you use here.
Alternative 2: use string member functions
This alternative uses the find() function, and then constructs a string from the start until that position, and another from that position. There is a special case when the character was not found.
void divide(char search, string text, string& first_part, string& sec_part)
{
auto pos = text.find(search);
first_part = string(text, 0, pos);
if (pos== string::npos)
sec_part.clear();
else sec_part = string(text, pos, string::npos);
}
As you understand yourself these declarations
string part1=" ";
string part2=" ";
do not make sense because the entered string in the object text can essentially exceed the both initialized strings. In this case using the string method at can result in throwing an exception or the strings will have trailing spaces.
From the description of the assignment it is not clear whether the searched character should be included in one of the strings. You suppose that the character should be included in the second string.
Take into account that the parameter text should be declared as a constant reference.
Also instead of using loops it is better to use methods of the class std::string such as for example find.
The function can look the following way
#include <iostream>
#include <string>
void divide(const std::string &text, char search, std::string &first_part, std::string &sec_part)
{
std::string::size_type pos = text.find(search);
first_part = text.substr(0, pos);
if (pos == std::string::npos)
{
sec_part.clear();
}
else
{
sec_part = text.substr(pos);
}
}
int main()
{
std::string text("Hello World");
std::string first_part;
std::string sec_part;
divide(text, ' ', first_part, sec_part);
std::cout << "\"" << text << "\"\n";
std::cout << "\"" << first_part << "\"\n";
std::cout << "\"" << sec_part << "\"\n";
}
The program output is
"Hello World"
"Hello"
" World"
As you can see the separating character is included in the second string though I think that maybe it would be better to exclude it from the both strings.
An alternative and in my opinion more clear approach can look the following way
#include <iostream>
#include <string>
#include <utility>
std::pair<std::string, std::string> divide(const std::string &s, char c)
{
std::string::size_type pos = s.find(c);
return { s.substr(0, pos), pos == std::string::npos ? "" : s.substr(pos) };
}
int main()
{
std::string text("Hello World");
auto p = divide(text, ' ');
std::cout << "\"" << text << "\"\n";
std::cout << "\"" << p.first << "\"\n";
std::cout << "\"" << p.second << "\"\n";
}
Your code will only work as long the character is found within part1.length(). You need something similar to this:
void string_split_once(const char s, const string & text, string & first, string & second) {
first.clear();
second.clear();
std::size_t pos = str.find(s);
if (pos != string::npos) {
first = text.substr(0, pos);
second = text.substr(pos);
}
}
The biggest problem I see is that you are using at where you should be using push_back. See std::basic_string::push_back. at is designed to access an existing character to read or modify it. push_back appends a new character to the string.
divide could look like this :
void divide(char search, string text, string& first_part,
string& sec_part)
{
bool firstc = true;
for (int i = 0; i < text.size(); i++) {
if (text.at(i) != search && firstc) {
first_part.push_back(text.at(i));
}
else if (text.at(i) == search&& firstc == true) {
firstc = false;
sec_part.push_back(text.at(i));
}
else {
sec_part.push_back(text.at(i));
}
}
}
Since you aren't handling exceptions, consider using text[i] rather than text.at(i).

C++ Tokenize a string with spaces and quotes

I would like to write something in C++ that tokenize a string. For the sake of clarity, consider the following string:
add string "this is a string with spaces!"
This must be split as follows:
add
string
this is a string with spaces!
Is there a quick and standard-library-based approach?
No library is needed. An iteration can do the task ( if it is as simple as you describe).
string str = "add string \"this is a string with space!\"";
for( size_t i=0; i<str.length(); i++){
char c = str[i];
if( c == ' ' ){
cout << endl;
}else if(c == '\"' ){
i++;
while( str[i] != '\"' ){ cout << str[i]; i++; }
}else{
cout << c;
}
}
that outputs
add
string
this is a string with space!
I wonder why this simple and C++ style solution is not presented here.
It's based on fact that if we first split string by \", then each even chunk is "inside" quotes, and each odd chunk should be additionally splitted by whitespaces.
No possibility for out_of_range or anything else.
unsigned counter = 0;
std::string segment;
std::stringstream stream_input(input);
while(std::getline(stream_input, segment, '\"'))
{
++counter;
if (counter % 2 == 0)
{
if (!segment.empty())
std::cout << segment << std::endl;
}
else
{
std::stringstream stream_segment(segment);
while(std::getline(stream_segment, segment, ' '))
if (!segment.empty())
std::cout << segment << std::endl;
}
}
Here is a complete function for it. Modify it according to need, it adds parts of string to a vector strings(qargs).
void split_in_args(std::vector<std::string>& qargs, std::string command){
int len = command.length();
bool qot = false, sqot = false;
int arglen;
for(int i = 0; i < len; i++) {
int start = i;
if(command[i] == '\"') {
qot = true;
}
else if(command[i] == '\'') sqot = true;
if(qot) {
i++;
start++;
while(i<len && command[i] != '\"')
i++;
if(i<len)
qot = false;
arglen = i-start;
i++;
}
else if(sqot) {
i++;
start++;
while(i<len && command[i] != '\'')
i++;
if(i<len)
sqot = false;
arglen = i-start;
i++;
}
else{
while(i<len && command[i]!=' ')
i++;
arglen = i-start;
}
qargs.push_back(command.substr(start, arglen));
}
for(int i=0;i<qargs.size();i++){
std::cout<<qargs[i]<<std::endl;
}
std::cout<<qargs.size();
if(qot || sqot) std::cout<<"One of the quotes is open\n";
}
The Boost library has a tokenizer class that can accept an escaped_list_separator. The combination of these look like they might provide what you are looking for.
Here are links to the boost documentation, current as of this post and almost certainly an old version by the time you read this.
https://www.boost.org/doc/libs/1_73_0/libs/tokenizer/doc/tokenizer.htm
https://www.boost.org/doc/libs/1_73_0/libs/tokenizer/doc/escaped_list_separator.htm
This example is stolen from the boost documentation. Forgive me for not creating my own example.
// simple_example_2.cpp
#include<iostream>
#include<boost/tokenizer.hpp>
#include<string>
int main(){
using namespace std;
using namespace boost;
string s = "Field 1,\"putting quotes around fields, allows commas\",Field 3";
tokenizer<escaped_list_separator<char> > tok(s);
for(tokenizer<escaped_list_separator<char> >::iterator beg=tok.begin(); beg!=tok.end();++beg){
cout << *beg << "\n";
}
}
I would define a class Token to read a single token from a stream.
Then using your code becomes very trivial.
#include <iostream>
#include <string>
int main()
{
// Simply read the tokens from the stream.
Token t;
while(std::cin >> t)
{
std::cout << "Got: " << t << "\n";
}
}
Stream objects like this are very easy to write:
class Token
{
// Just something to store the value in.
std::string value;
// Then define the input and output operators.
friend std::ostream& operator<<(std::ostream& str, Token const& output)
{
return str << output.value;
}
// Input is slightly harder than output.
// but not that difficult to get correct.
friend std::istream& operator>>(std::istream& str, Token& input)
{
std::string tmp;
if (str >> tmp)
{
if (tmp[0] != '"')
{
// We read a word that did not start with
// a quote mark. So we are done. Simply put
// it in the destination.
input.value = std::move(tmp);
}
else if (tmp.front() == '"' && tmp.back() == '"')
{
// we read a word with both open and close
// braces so just nock these off.
input.value = tmp.substr(1, tmp.size() - 2);
}
else
{
// We read a word that has but has a quote at the
// start. So need to get all the characters upt
// closing quote then add this to value.
std::string tail;
if (std::getline(str, tail, '"'))
{
// Everything worked
// update the input
input.value = tmp.substr(1) + tail;
}
}
}
return str;
}
};
I guess there is no straight forward approach with standard library. Indirectly following algo will work:
a) search for '\"' with string::find('\"') . If anything found search for next '\"' using string::find('\'',prevIndex), If found use string::substr(). Discard that part from the original string.
b) Now Serach for ' ' character in the same way.
NOTE: you have to iterate through the whole string.
Here is my solution, it's equivalent to python's shlex, shlex_join() is the inverse of shlex_split():
#include <cctype>
#include <iomanip>
#include <iostream>
#include <string>
#include <sstream>
#include <utility>
#include <vector>
// Splits the given string using POSIX shell-like syntax.
std::vector<std::string> shlex_split(const std::string& s)
{
std::vector<std::string> result;
std::string token;
char quote{};
bool escape{false};
for (char c : s)
{
if (escape)
{
escape = false;
if (quote && c != '\\' && c != quote)
token += '\\';
token += c;
}
else if (c == '\\')
{
escape = true;
}
else if (!quote && (c == '\'' || c == '\"'))
{
quote = c;
}
else if (quote && c == quote)
{
quote = '\0';
if (token.empty())
result.emplace_back();
}
else if (!isspace(c) || quote)
{
token += c;
}
else if (!token.empty())
{
result.push_back(std::move(token));
token.clear();
}
}
if (!token.empty())
{
result.push_back(std::move(token));
token.clear();
}
return result;
}
// Concatenates the given token list into a string. This function is the
// inverse of shlex_split().
std::string shlex_join(const std::vector<std::string>& tokens)
{
auto it = tokens.begin();
if (it == tokens.end())
return {};
std::ostringstream oss;
while (true)
{
if (it->empty() || it->find_first_of(R"( "\)") != std::string::npos)
oss << std::quoted(*it);
else
oss << *it;
if (++it != tokens.end())
oss << ' ';
else
break;
}
return oss.str();
}
void test(const std::string& s, const char* expected = nullptr)
{
if (!expected)
expected = s.c_str();
if (auto r = shlex_join(shlex_split(s)); r != expected)
std::cerr << '[' << s << "] -> [" << r << "], expected [" << expected << "]\n";
}
int main()
{
test("");
test(" ", "");
test("a");
test(" a ", "a");
test("a b", "a b");
test(R"(a \s b)", "a s b");
test(R"("a a" b)");
test(R"('a a' b)", R"("a a" b)");
test(R"(a \" b)", R"(a "\"" b)");
test(R"(a \\ b)", R"(a "\\" b)");
test(R"("a \" a" b)");
test(R"('a \' a' b)", R"("a ' a" b)");
test(R"("a \\ a" b)");
test(R"('a \\ a' b)", R"("a \\ a" b)");
test(R"('a \s a' b)", R"("a \\s a" b)");
test(R"("a \s a" b)", R"("a \\s a" b)");
test(R"('a \" a' b)", R"("a \\\" a" b)");
test(R"("a \' a" b)", R"("a \\' a" b)");
test(R"("" a)");
test(R"('' a)", R"("" a)");
test(R"(a "")");
test(R"(a '')", R"(a "")");
}
There is a standard-library-based approach in C++14 or later. But it is not quick.
#include <iomanip> // quoted
#include <iostream>
#include <sstream> // stringstream
#include <string>
using namespace std;
int main(int argc, char **argv) {
string str = "add string \"this is a string with spaces!\"";
stringstream ss(str);
string word;
while (ss >> quoted(word)) {
cout << word << endl;
}
return 0;
}