So I have a little problem, I want to achieve this in C++, but I don't know how to do it:
Given is a string containing random numbers, symbols, and letters:
std::string = "1653gbdtsr362g2v3f3t52bv^hdtvsbjj;hdfuue,9^1dkkns";
Now I'm trying to find all ^ characters, and if those are followed by a number between 0 and 9, delete the ^ and the number, so:
"^1ghhu^7dndn^g"
becomes:
"ghhudndn^g"
I know how to find and replace/erase chars from a string, but I don't know how to check if it's followed by a number in a not hard coded way. Any help is appreciated.
std::string s = "^1ghhu^7dndn^g";
for (int i = 0; i < s.length() - 1; ++i)
{
if (s[i] == '^' && std::isdigit(s[i + 1]))
{
s.erase(i, 2);
--i;
}
}
This needs these includes:
#include <string>
#include <cctype>
I would do it this way:
#include <iostream>
#include <string>
#include <utility>
#include <iterator>
template<class Iter, class OutIter>
OutIter remove_escaped_numbers(Iter first, Iter last, OutIter out) {
for ( ; first != last ; )
{
auto c = *first++;
if (c == '^' && first != last)
{
c = *first++;
if (std::isdigit(c))
continue;
else {
*out++ = '^';
*out++ = c;
}
}
else {
*out++ = c;
}
}
return out;
}
int main()
{
using namespace std::literals;
auto input = "^1ghhu^7dndn^g"s;
auto output = std::string{};
remove_escaped_numbers(input.begin(), input.end(), std::back_inserter(output));
std::cout << output << std::endl;
}
or this way:
#include <iostream>
#include <regex>
int main()
{
using namespace std::literals;
auto input = "^1ghhu^7dndn^g"s;
static const auto repl = std::regex { R"___(\^\d)___" };
auto output = std::regex_replace(input, repl, "");
std::cout << output << std::endl;
}
A solution using std::stringstream, and returning the input string cleared of caret-digit's.
#include <iostream>
#include <sstream>
#include <cctype>
int t404()
{
std::stringstream ss;
std::string inStr("1653gbdtsr362g2v3f3t52bv^hdtvsbjj;hdfuue,9^1dkkns");
for (size_t i = 0; i<inStr.size(); ++i)
{
if(('^' == inStr[i]) && isdigit(inStr[i+1]))
{
i += 1; // skip over caret followed by single digit
}
else
{
ss << inStr[i];
}
}
std::cout << inStr << std::endl; // compare input
std::cout << ss.str() << std::endl; // to results
return 0;
}
Output:
1653gbdtsr362g2v3f3t52bv^hdtvsbjj;hdfuue,9^1dkkns
1653gbdtsr362g2v3f3t52bv^hdtvsbjj;hdfuue,9dkkns
you can simply loop over the string and copy it while skipping the undesired chars. Here is a possible function to do it:
std::string filterString (std::string& s) {
std::string result = "";
std::string::iterator it = s.begin();
char c;
while (it != s.end()) {
if (*it == '^' && it != s.end() && (it + 1) != s.end()) {
c = *(it + 1);
if(c >= '0' && c <= '9') {
it += 2;
continue;
}
}
result.push_back(*it);
++ it;
}
return result;
}
A robust solution would be to use the regex library that C++11 brings in.
std::string input ("1653gbdtsr362g2v3f3t52bv^hdtvsbjj;hdfuue,9^1dkkns");
std::regex rx ("[\\^][\\d]{1}"); // "[\^][\d]{1}"
std::cout << std::regex_replace(input,rx,"woot");
>> 1653gbdtsr362g2v3f3t52bv^hdtvsbjj;hdfuue,9wootdkkns
This locates a "^" character ([\^]) followed by 1 ({1}) digit ([\d]) and replaces all occurances with "woot".
Hope this code can solve your problem:
#include <iostream>
#include <string>
int main()
{
std::string str = "^1ghhu^7dndn^g";
std::string::iterator first, last;
for ( std::string::iterator it=str.begin(); it!=str.end(); ++it)
{
if(*it == '^')
{
first = it;
it++;
while(isdigit(*it))
{
it++;
}
last = it - 1;
if(first != last)
{
if((last + 1) != str.end())
{
str.erase(first, last + 1);
}
else
{
str.erase(first, str.end());
break;
}
}
}
}
std::cout<< str << std::endl;
return 0;
}
The output:
$ ./erase
ghhudndn^g
Related
How do I take a text file from the command line that opens and reads it, and then count the top words in that file but also removes any special characters. I have this code done here and used maps but it isn't counting every word. For instance "hello." is one word and also "$#%hello<>?/". I have this file from the song shake it off that's supposed to read shake 78 times but I only counted 26 in this code.
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <vector>
using namespace std;
string ask(const string& msg) {
string ans;
cout << msg;
getline(cin, ans);
return ans;
}
int main() {
ifstream fin( ask("Enter file name: ").c_str() ) ;
if (fin.fail()) {
cerr << "ERROR"; // this is if the file fails to open
return 1;
}
map<string, int> wordCount;
string entity;
while (fin >> entity) {
vector<string> words;
for (int i = 0, a = 0; i < entity.length(); i++) {
char& c = entity[i];
if (c < 'A' || (c > 'Z' && c < 'a') || c > 'z') {
string word = entity.substr(a, i - a);
a = i + 1;
if (word.length() > 0)
words.push_back(word);
}
}
for (auto & word : words)
wordCount[word]++;
}
fin.close();
vector<string> topWords;
const size_t MAX_WORDS = 10;
for ( auto iter = wordCount.begin(); iter != wordCount.end(); iter ++ ) {
int som = 0, lim = topWords.size();
while (som < lim) {
int i = ( som + lim ) / 2;
int count = wordCount[topWords[i]];
if ( iter -> second > count)
lim = i;
else if ( iter -> second < count )
som = i + 1;
else
som = lim = i;
}
if (som < MAX_WORDS ) {
topWords.insert( topWords.begin() + som, iter -> first );
if ( topWords.size() > MAX_WORDS )
topWords.pop_back();
}
}
for (auto & topWord : topWords)
cout << "(" << wordCount[topWord] << ")\t" << topWord << endl;
return 0;
}
One last thing if yall can probably help me on is how would I also write a code that takes a number from the command line alongside the filename and with that number, display the number of top words corresponding with that number passed in the command line, I would assume there is a parse args involved maybe.
Thank you again!
https://s3.amazonaws.com/mimirplatform.production/files/48a9fa64-cddc-4e45-817f-3e16bd7772c2/shake_it_off.txt
!hi!
#hi#
#hi#
$hi$
%hi%
^hi^
&hi&
*hi*
(hi(
)hi)
_hi_
-hi-
+hi+
=hi=
~hi~
`hi`
:hi:
;hi;
'hi'
"hi"
<hi<
>hi>
/hi/
?hi?
{hi{
}hi}
[hi[
]hi]
|hi|
\hi\
bob bob bob bob bob bob bob !###$$%#&#^*()#*#)_++(#<><#:":bob###$$%#&#^*()#*#)_++(#<><#:":
!###$$%#&#^*()#*#)_++(#<><#:":bob###$$%#&#^*()#*#)_++(#<><#:": !###$$%#&#^*()#*#)_++(#<><#:":bob###$$%#&#^*()#*#)_++(#<><#:":
!###$$%#&#^*()#*#)_++(#<><#:":bob###$$%#&#^*()#*#)_++(#<><#:": !###$$%#&#^*()#*#)_++(#<><#:":bob###$$%#&#^*()#*#)_++(#<><#:
this is the special character test
Your original code is somewhat hard to refine, I have followed your description to get a program that uses STL.
Combine erase with remove_if to remove unwanted chars
Use set to resort by counts
If you have some experience with Boost, it's a use case with multimap or bimap, which can make the code even more cleaner.
#include <algorithm>
#include <fstream>
#include <iostream>
#include <map>
#include <set>
#include <string>
#include <vector>
using namespace std;
string ask(const string& msg) {
string ans;
cout << msg;
getline(cin, ans);
return ans;
}
int main() {
// ifstream fin(ask("Enter file name: ").c_str());
ifstream fin("shake_it_off.txt");
if (fin.fail()) {
cerr << "ERROR"; // this is if the file fails to open
return 1;
}
map<string, size_t> wordCount;
string entity;
while (fin >> entity) {
entity.erase(std::remove_if(entity.begin(), entity.end(),
[](char ch) { return !isalpha(ch); }),
entity.end());
wordCount[entity] += 1;
}
auto cmp = [](const std::pair<std::string, size_t>& lhs,
const std::pair<std::string, size_t>& rhs) {
return lhs.second > rhs.second;
};
std::multiset<std::pair<std::string, size_t>, decltype(cmp)> top(
wordCount.begin(), wordCount.end(), cmp);
auto it = top.begin();
const size_t MAX_WORDS = 10;
for (size_t i = 0; i < MAX_WORDS && it != top.end(); ++i, ++it) {
cout << "(" << it->first << ")\t" << it->second << endl;
}
return 0;
}
I needed some help with a method on how to parse a string into multiple substrings. The form of the string may be (if (a = b) (a) (b)) or something similar with many opening and closing parentheses. For instance I need a list of strings such as,
element(0) = "(if (a = b) (a) (b))"
element(1) = "(a = b)",
element(2) = "(a)", and
element(3) = "(b)".
I have already tried going through the string by each individual character using String.at() and counting the opening and closing parentheses. However this gets very tricky, and I don't believe its the most efficient or easy way to do this. Any ideas would be greatly appreciated!
You can start from simple algorithm with stack:
#include <iostream>
#include <stack>
#include <string>
#include <deque>
std::deque<std::string> parse(const std::string &str)
{
std::deque<std::string> result;
std::stack<std::string::const_iterator> stack;
for ( auto it = str.begin(); it != str.end();) {
if (*it == '(') {
stack.push(it++);
} else if (*it == ')') {
auto start = stack.top(); stack.pop();
result.push_back(std::string{start, ++it});
} else {
it++;
}
}
return result;
}
int main(int , char **) {
std::string input = "(if (a = b) (a) (b))";
auto output = parse(input);
for(const auto & s:output) {
std::cout << s << " ";
}
std::cout <<std::endl;
return 0;
}
Don't forget to add check if stack underflows
Or, if you want to preserve exact order as in question, use std::map<std::size_t, std::deque<std::string>>:
#include <iostream>
#include <stack>
#include <string>
#include <deque>
#include <map>
std::deque<std::string> parse(const std::string &str)
{
std::map<std::size_t, std::deque<std::string>> map;
std::stack<std::string::const_iterator> stack;
for ( auto it = str.begin(); it != str.end();) {
if (*it == '(') {
stack.push(it++);
} else if (*it == ')') {
auto start = stack.top(); stack.pop();
map[stack.size()].push_back(std::string{start, ++it});
} else {
it++;
}
}
std::deque<std::string> result;
for (const auto & p : map) {
for (const auto & s : p.second) {
result.push_back(s);
}
}
return result;
}
int main(int , char **) {
std::string input = "(if (a = b) (a) (b))";
auto output = parse(input);
for(const auto & s:output) {
std::cout << s << " ";
}
std::cout <<std::endl;
return 0;
}
I've written a function that removes spaces and dashes from a string. It then inserts a space after every 3rd character. My question is can anybody suggest a different way to do this not using stringstream?
#include <iostream>
#include <string>
#include <algorithm>
#include <sstream>
using namespace std;
string FormatString(string S) {
/*Count spaces and dashes*/
auto newEnd = remove_if(S.begin(), S.end(), [](char c){return c == ' ' || c == '-';});
S.erase(newEnd, S.end());
std::stringstream ss;
ss << S[0];
for (unsigned int i = 1; i < S.size(); i++) {
if (i%3==0) {ss << ' ';}
ss << S[i];
}
return ss.str();
}
int main() {
std::string testString("AA BB--- ash jutf-4499--5");
std::string result = FormatString(testString);
cout << result << endl;
return 0;
}
How about using the input string as the output:
#include <iostream>
#include <string>
#include <algorithm>
using namespace std;
string FormatString(string S) {
auto newEnd = remove_if(S.begin(), S.end(), [](char c){return c == ' ' || c == '-';});
S.erase(newEnd, S.end());
auto str_sz = S.length();
/* length + ceil(length/3) */
auto ret_length = str_sz + 1 + ((str_sz - 1) / 3);
S.resize(ret_length);
unsigned int p = S.size()-1;
S[p--] = '\0';
for (unsigned int i = str_sz-1; i>0; i--) {
S[p--] = S[i];
if (i%3 == 0)
S[p--] = ' ';
}
return S;
}
int main() {
std::string testString("AA BB--- ash jutf-4499--5");
std::string result = FormatString(testString);
cout << result << endl;
// AAB Bas hju tf4 499 5
return 0;
}
I'm attempting to tokenize a scripting language in C++ and am struggling currently with including further delimiters as tokens.
#ifndef TOKENIZER_H
#define TOKENIZER_H
#include <regex>
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
#include <cctype>
using namespace std;
regex re("[\\s]+");
vector<string> deconstructDelimit(const string &input) {
string trimmed = input.substr(input.find_first_not_of(" \t\f\v\n\r"));
vector<string> decons;
sregex_token_iterator it(trimmed.begin(), trimmed.end(), re, -1);
sregex_token_iterator reg_end;
for (; it != reg_end; ++it) {
decons.push_back(it->str());
}
return decons;
}
vector<string> tokenize(const string &input) {
vector<string> whitespace;
string currToken;
for (auto it = input.begin(); it != input.end(); ++it) {
if (*it == '\'') {
if (currToken.length()) {
vector<string> decons = deconstructDelimit(currToken);
whitespace.insert(whitespace.end(), decons.begin(), decons.end());
currToken.clear();
}
whitespace.push_back("\'");
++it;
while (*it != '\'' && it != input.end()) {
currToken += *it;
++it;
}
if (currToken.length()) whitespace.push_back(currToken);
whitespace.push_back("\'");
currToken.clear();
} else if (*it == '\"') {
if (currToken.length()) {
vector<string> decons = deconstructDelimit(currToken);
whitespace.insert(whitespace.end(), decons.begin(), decons.end());
currToken.clear();
}
whitespace.push_back("\"");
++it;
while (*it != '\"' && it != input.end()) {
currToken += *it;
++it;
}
if (currToken.length()) whitespace.push_back(currToken);
whitespace.push_back("\"");
currToken.clear();
} else {
currToken += *it;
}
}
if (currToken.length()) {
vector<string> decons = deconstructDelimit(currToken);
whitespace.insert(whitespace.end(), decons.begin(), decons.end());
}
return whitespace;
}
#endif
So far, it is able to convert this code:
i = 1
while(i <= 10) {
print i + " " then i++
}
into these tokens:
i
=
1
while(i
<=
10)
{
print
i
+
"
"
then
i++
}
However, I want to then split this string vector of tokens by other delimiters, such as operators (++, =, <=, +, etc.), keywords (while, then, etc.), and other grammar like parentheses and brackets, preferably without using boost. What would be the best way for me to achieve this, given the string vector output of my current progress?
Edit:
For example, the result of further tokenization would be:
i
=
1
while(i -> while, (, i
<=
10) -> 10, )
{
print
i
+
"
"
then
i++ -> i, ++
}
Which, expanded, would be:
i
=
1
while
(
i
<=
10
)
{
print
i
+
"
"
then
i
++
}
I had the exact same problem as you when I tried to separate items of a math expression using a regex. I successfully found a well working way to do it :
std::vector<std::string> resplit(const std::string& s, std::string rg_str = "\\s+"){
std::cmatch cm;
std::string reg_str = std::string("(.*?)(")+rg_str+std::string(")");
std::string str = s+std::string(" ");
unsigned a = 0;
unsigned b = 1;
std::string subs = str.substr(a, b-a);
std::vector<std::string> elements;
while(b <= str.length()){
subs = str.substr(a, b-a);
if(std::regex_match(subs.c_str(), cm, std::regex(reg_str), std::regex_constants::match_default)){
for(unsigned i=1; i<cm.size(); i++){
std::string cmi(cm[i]);
// The following part can be adapted if you want to include whitespaces or empty strings
if(!std::regex_match(cmi.c_str(), std::regex("\\s*"))){
elements.push_back(std::string(cm[i]));
}
}
a = b;
b = b+1;
} else {
b++;
}
}
return elements;
}
When I use it on resplit("sin(x^2) + 1", "[^0-9a-zPI.]|[ \\(\\)]");, I get : ["sin", "(", "x", "^", "2", ")", "+", "1"].
Don't forget to change :
if(!std::regex_match(cmi.c_str(), std::regex("\\s*"))){
elements.push_back(std::string(cm[i]));
}
into :
if(!std::regex_match(cmi.c_str(), std::regex(""))){
elements.push_back(std::string(cm[i]));
}
if you want to include spaces (it will remove empty strings though, but this is preferable).
I hope it's useful to someone. Have a nice day.
I had the same problem and here is my complete solution which consists of few helper functions:
#include <regex>
#include <string>
#include <iostream>
#include <algorithm>
void ltrim(std::string& str) {
str.erase(str.begin(), std::find_if(str.begin(), str.end(), [](int character) {
return !std::isspace(character);
}));
}
void rtrim(std::string& str) {
str.erase(std::find_if(str.rbegin(), str.rend(), [](int character) {
return !std::isspace(character);
}).base(), str.end());
}
void trim(std::string& str) {
ltrim(str);
rtrim(str);
}
bool is_empty(std::string const& str) {
return str.empty() || str.find_first_not_of(' ') == std::string::npos;
}
std::vector<std::string> split(std::string const& str, std::string const& pattern) {
std::regex regex(pattern);
std::vector<std::string> result(
std::sregex_token_iterator(str.begin(), str.end(), regex, {-1, 0}),
std::sregex_token_iterator()
);
for (auto& token : result) {
trim(token);
}
result.erase(
std::remove_if(
result.begin(),
result.end(),
[](std::string const& str) { return is_empty(str); }
),
result.end()
);
return result;
}
int main() {
for (auto &s: split("sin(x^2) + 1", "[^0-9a-zPI.]|[ \\(\\)]")) {
std::cout << s << '\n';
}
return 0;
}
The key thing I used is std::sregex_token_iterator. As the last argument to its constructor I passed {-1, 0} where -1 represents the parts that are not matched and 0 represents the entire match.
The result of the above code snippet is:
sin
(
x
^
2
)
+
1
I want to replace with a regular expression all the words in the text that are not in the dictionary on the unique identifier. How I can do it? Maybe using callback function?
std::string getToken(const std::smatch &m) {
static int x = 0;
std::string keyword = m[0].str();
std::set<std::string> keywords = {"foo", "bar"};
if (keywords.find(keyword) != keywords.end()) {
return keyword;
} else {
return "i" + x++;
}
}
std::string replacer(std::string text) {
std::string ret = text;
ret = std::regex_replace(ret , std::regex("\\b.*\\b"), getToken); // It's don't works
return ret;
}
Use regex_token_iterator
#include <regex>
#include <string>
#include <sstream>
#include <set>
#include <map>
std::string replacer(std::string text) {
std::string output_text;
std::set<std::string> keywords = { "foo", "bar" };
std::map<std::string, int> ids = {};
int counter = 0;
auto callback = [&](std::string const& m){
std::istringstream iss(m);
std::string n;
if (iss >> n)
{
if (keywords.find(m) != keywords.end()) {
output_text += m + " ";
}
else {
if (ids.find(m) != ids.end()) {
output_text += "ID" + std::to_string(ids[m]) + " ";
}
else {
// not found
ids[m] = counter;
output_text += "ID" + std::to_string(counter++) + " ";
}
}
}
else
{
output_text += m;
}
};
std::regex re("\\b\\w*\\b");
std::sregex_token_iterator
begin(text.begin(), text.end(), re, { -1, 0 }),
end;
std::for_each(begin, end, callback);
return output_text;
}