Custom Functor in std::set - c++

#include <iostream>
#include <set>
#include <algorithm>
using namespace std;
int order[26];
struct lexcmp
{
bool operator()(const string &s1,const string &s2)
{
int i=0;
int j=min(s1.size(),s2.size());
while(1)
{
if(order[s1[i]-'a']<order[s2[i]-'a'])
return true;
if(order[s1[i]-'a']>order[s2[i]-'a'])
return false;
if(i==j-1)
return false;
i++;
}
}
};
int main()
{
string s;
cin>>s;
for(int i=0;i<s.size();i++)
{
order[s[i]-'a']=i;
}
set<string,lexcmp> store;
int m;
cin>>m;
while(m--)
{
string q;
cin>>q;
store.insert(q);
}
for(auto i=store.begin();i!=store.end();i++)
{
cout<<*i<<endl;
}
}
return 0;
}
Problem in making the Custom Functor
The problem is, i have a new order of elements (instead of simple a-z). //Saved in order array
All i want is order the given strings on the based of new order.
for eg: Order is : bacdefghijklmnopqrstuvwxyz
So if the strings are ss , aa , bb
The new ordering will be bb,aa,ss.
The Code is working fine but it is giving me a problem while the strings are like "pas" "p" to be compared.
p should come before pas but it is coming after.
What modifications should i do in the functor?

Here's one approach:
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <algorithm>
#include <numeric>
#include <array>
#include <string>
#include <locale>
struct lexcmp {
lexcmp() { std::iota(order_.begin(), order_.end(), std::int_fast8_t{}); }
explicit lexcmp(std::string const& order) {
assert(order.size() == order_.size());
for (std::size_t i{}; i != order_.size(); ++i) {
char const order_letter = order[i];
assert(std::isalpha(order_letter, std::locale::classic()));
assert(std::islower(order_letter, std::locale::classic()));
order_[i] = order_letter - 'a';
}
auto unique_order_letters = [this]{
auto order = order_;
std::sort(order.begin(), order.end());
return order.end() - std::unique(order.begin(), order.end()) == 0;
};
assert(unique_order_letters());
}
bool operator ()(std::string const& a, std::string const& b) const {
auto const a_len = a.size(), b_len = b.size();
std::size_t i{};
for (auto const len = std::min(a_len, b_len); i != len; ++i) {
if (auto const diff = order_[a[i] - 'a'] - order_[b[i] - 'a']) {
return diff < 0;
}
}
return i == a_len && i != b_len;
}
private:
std::array<std::int_fast8_t, 26> order_;
};
Online Demo

Related

Avoiding or improving brute force method: Counting character repetition from all words in a dictionary text file

I wrote this utility function that will take the contents of a alpha dictionary file and will add up the repetition count of each letter or character of the alphabet.
This is what I have so far:
#include <algorithm>
#include <fstream>
#include <iostream>
#include <map>
#include <string>
#include <vector>
// this function just generates a map of each of the alphabet's
// character position within the alphabet.
void initCharIndexMap( std::map<unsigned, char>& index ) {
char c = 'a';
for ( unsigned i = 1; i < 27; i++ ) {
index[i] = c;
c++;
}
}
void countCharacterRepetition( std::vector<std::string>& words, const std::map<unsigned, char> index, std::map<char, unsigned>& weights ) {
unsigned count = 0;
for ( auto& s : words ) {
std::transform(s.begin(), s.end(), s.begin(), ::tolower );
for ( std::size_t i = 0; i < s.length(); i++ ) {
using It = std::map<unsigned, char>::const_iterator;
for ( It it = index.cbegin(); it != index.cend(); ++it ) {
if ( s[i] == it->second ) {
count++;
weights[it->second] += count;
}
count = 0;
}
}
}
}
int main() {
std::vector<std::string> words;
std::string line;
std::ifstream file;
file.open( "words_alpha.txt" );
while( std::getline( file, line )
words.push_back(line);
std::map<unsigned, char> index;
initCharIndexMap(index);
std::map<char, unsigned> weights;
countCharRepetition(words, index, weights);
for (auto& w : weights)
std::cout << w.first << ' ' << w.second << '\n';
return EXIT_SUCCESS;
}
It gives me this output which appears to be valid at first glance:
a 295794
b 63940
c 152980
d 113190
e 376455
f 39238
g 82627
h 92369
i 313008
j 5456
k 26814
l 194915
m 105208
n 251435
o 251596
p 113662
q 5883
r 246141
s 250284
t 230895
u 131495
v 33075
w 22407
x 10493
y 70578
z 14757
The dictionary text file that I am using can be found from this github page.
This appears to be working. It took about 3 minutes to process on my current machine which isn't horrible, however, this seems like a brute force approach. Is there a more efficient way of doing a task like this?
If you're just counting how many times each character appears, then all you need is this:
int frequency[26] = {};
for (auto const& str : words) {
for (int i=0; i<str.size(); i++) {
frequency[tolower(str[i]) - 'a']++;
}
}
for (int i=0; i<26; i++) {
cout << char(i + 'a') << " " << frequency[i] << endl;
}
If you want to include upper and lowercase characters, change the array size to 90, remove the tolower call, and change your loop so that it prints only if i is between a and z or A and Z.
If you are just going for performance, I would say you still have to read in the file char by char - but I think all the searching is processing that could be optimised.
I would say the following pseudo code should be faster (I'll try and knock up an example later):
void read_dictionary(char *fileName)
{
// Pre-sized array (faster access)
std::array<int, 26> alphabet_count = {0};
// Open the file
FILE *file = fopen(fileName, "r");
if (file == NULL)
return; //could not open file
// Read through the file
char c;
while ((c = fgetc(file)) != EOF)
{
// If it is a letter a-z
if ( ((c >= 'a') && (c <= 'z')) ||
{
// Increment the array value for that letter
++alphabet_count[c - 'a'];
}
// else if letter A-Z
else if ( ((c >= 'A') && (c <= 'Z')) ||
{
// Increment the array value for that letter
++alphabet_count[c - 'A'];
}
}
}
The point here is that we are not searching for matches we are using the char value to index into the array to increment the alphabet letter
All of the aforementioned answers assume continuity between a and z, and history will tell you that is not always the case. A solution doesn't need to assume this, and can still be efficient.
#include <iostream>
#include <fstream>
#include <iterator>
#include <climits>
#include <cctype>
int main(int argc, char *argv[])
{
if (argc < 2)
return EXIT_FAILURE;
unsigned int count[1U << CHAR_BIT] {};
std::ifstream inp(argv[1]);
for (std::istream_iterator<char> it(inp), it_eof; it != it_eof; ++it)
++count[ std::tolower(static_cast<unsigned char>(*it)) ];
for (unsigned i=0; i<(1U << CHAR_BIT); ++i)
{
if (std::isalpha(i) && count[i])
std::cout << static_cast<char>(i) << ' ' << count[i] << '\n';
}
}
Output
[~ user]$ clang++ --std=c++14 -O2 -o main main.cpp
[~ user] time ./main /usr/share/dict/words
a 199554
b 40433
c 103440
d 68191
e 235331
f 24165
g 47094
h 64356
i 201032
j 3167
k 16158
l 130463
m 70680
n 158743
o 170692
p 78163
q 3734
r 160985
s 139542
t 152831
u 87353
v 20177
w 13864
x 6932
y 51681
z 8460
real 0m0.085s
user 0m0.073s
sys 0m0.005s
That would probably be sufficiently fast enough for your application, whatever it is.
#include <array>
#include <fstream>
#include <iostream>
int main()
{
std::ifstream file;
file.open( "words_alpha.txt" );
char c;
std::array<std::size_t, 26> counts {};
while( file >> c)
++counts[c-'a'];
for(char c = 0; c<26;++c)
std::cout<<'('<<c+'a'<<','<<counts[c]<<")\n";
}
Your version keeps track of words unnecessarily: you're simply counting characters in a file. The separation into words and lines doesn't matter. It's also unnecessary to store the words.
You could aim for readable high-level code and write something like this:
// https://github.com/KubaO/stackoverflown/tree/master/questions/letter-count-56498637
#include <cctype>
#include <fstream>
#include <iostream>
#include <iterator>
#include <limits>
#include <utility>
#include <vector>
//*
int main() {
Histogram<char, 'a', 'z'> counts;
std::ifstream file;
file.open("words_alpha.txt");
for (auto ch : make_range<char>(file)) counts.count(tolower(ch));
for (auto c : std::as_const(counts)) std::cout << c.value << ' ' << c.count << '\n';
}
This is the bare minimum of how modern C++ code should look
This requires the Histogram class, and a make_range adapter for input streams. You can't merely implement std::begin and std::end for std::ifstream, because the member end() function takes precedence and interferes (see this answer). The code below is the fragment marked //* above.
template <typename T>
void saturating_inc(T &val) {
if (val < std::numeric_limits<T>::max()) val++;
}
template <typename T, T min, T max>
class Histogram {
using counter_type = unsigned;
using storage_type = std::vector<counter_type>;
storage_type counts;
public:
template <typename U>
void count(U val) {
if (val >= min && val <= max) saturating_inc(counts[size_t(val - min)]);
}
Histogram() : counts(1 + max - min) {}
struct element {
T value;
counter_type count;
};
class const_iterator {
T val;
storage_type::const_iterator it;
public:
const_iterator(T val, storage_type::const_iterator it) : val(val), it(it) {}
const_iterator &operator++() {
++val;
++it;
return *this;
}
bool operator!=(const const_iterator &o) const { return it != o.it; }
element operator*() const { return {val, *it}; }
};
const_iterator begin() const { return {min, counts.begin()}; }
const_iterator end() const { return {0, counts.end()}; }
};
template <class C, class T>
class istream_range {
C &ref;
public:
istream_range(C &ref) : ref(ref) {}
std::istream_iterator<T> begin() { return {ref}; }
std::istream_iterator<T> end() { return {}; }
};
template <class T, class C>
istream_range<C, T> make_range(C &ref) {
return {ref};
}
This concludes the example.

Performance warning for isspace function, conversion from int to bool

#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
#include <iterator>
using namespace std;
bool notSpace(char c) {
return !isspace(c);
}
bool isSpace(char c) {
return isspace(c);
}
vector<string> split(const string& s) {
vector<string> words;
string::const_iterator i = s.begin();
while (i != s.end()) {
i = find_if(i, s.end(), notSpace); // " "
if (i != s.end()) {
string::const_iterator j = i;
j = find_if(i, s.end(), isSpace);
words.push_back(string(i, j));
i = j;
}
}
return words;
}
int main() {
string test = "Hello world, I'm a simple guy";
vector<string> words = split(test);
for (vector<string>::size_type i = 0; i < words.size();i++) {
cout << words[i] << endl;
}
return 0;
}
When I compile the code I get this warning:
warning C4800: 'int': forcing value to bool 'true' or 'false'
(performance warning)
on the return of this function:
bool isSpace(char c) {
return isspace(c);
}
Is good habit changing isspace(c) to (isspace(c) != 0) ? Or is it just an unnecessary fussiness?
Take a look at the code below:
#include <iostream>
using namespace std;
bool f()
{
return 2;
}
int main()
{
cout <<f()<<endl;
return 0;
}
it will print 1 when you return 2, that's why you get the warning.
someone may think a bool is kind of small integer, but it isn't.
If you go back to C, there was no bool type, that's why many C methods (like isspace) returns int, even WINDOWS type of BOOL is actually kind of integer and can return other values but TRUE (1) or FALSE (0).

Replace a loop to implement Contains on an array

Is there a way to use the Standard Library to simplify the loop on an array of struct that compare with strncmp?
Below is my attempt that fails because std::count_if complains there is no instance of overloaded function std::begin matches.
#include "stdafx.h"
#include "afx.h"
#include <string.h>
#include <iostream>
#include <algorithm>
#include <string>
struct nodeobject
{
CString ObjectType;
nodeobject() {}
explicit nodeobject(CString objectType) { ObjectType = objectType; }
};
struct nodeinput
{
struct nodeobject Object;
};
// Original function I want to rewrite to remove the for loop and the strncmp
static int ContainsObjectType(int collectionSize, struct nodeinput collection[], char* objectType)
{
auto found = 0;
for (auto idx = 0; idx < collectionSize; idx++)
{
if (strncmp(objectType,
collection[idx].Object.ObjectType,
strlen(collection[idx].Object.ObjectType)) == 0)
{
found = 1;
}
}
return found;
}
#if 0
// The implementation below does not compile because there is no instance of
// overloaded function std::begin matches
static int ContainsObjectType(int collectionSize, struct nodeinput collection[], char* objectType)
{
auto numFound = std::count_if(std::begin(collection),
std::end(collection),
[](struct nodeinput oneNode)
{
return strncmp(objectType, oneNode.Object.ObjectType, oneNode.Object.ObjectType) == 0);
});
return numFound > 0;
}
#endif
int main()
{
struct nodeobject node1("fokker");
struct nodeobject node2("airbus");
struct nodeobject node3("boing777");
struct nodeinput collection[] = {node1, node2, node3};
auto nintnode = 3;
auto found = ContainsObjectType(nintnode, collection, "boing777");
std::cout << found << std::endl;
return 0;
}
The error is:
C2784: const _Elem *std::begin(std::initializer_list<_Elem>) noexcept': could not deduce template argument for 'std::initializer_list<_Elem>' from 'nodeinput []
Just make collection a pointer, maybe a const one:
static int ContainsObjectType(int collectionSize, struct nodeinput *collection, char* objectType)
{
auto numFound = std::count_if(
collection,
collection + collectionSize,
...);
}
std::count_if's two first arguments must be iterators, and pointers are, in fact, primitive iterators.
std::begin(iterable) is merely pointer, std::end(iterable) is pointer + <data length>.
Below the complete solution in case someone searches how to replace a c-style loop on an array of objects with the library standard.
#include "stdafx.h"
#include "afx.h"
#include <string.h>
#include <iostream>
#include <algorithm>
#include <string>
#include <vector>
// Legacy code
class NodeObject
{
public:
CString objectType; // Keeping CString to minimize the impact for now
NodeObject() {}
explicit NodeObject(CString objectType) : objectType(objectType) { }
};
// Legacy code
class NodeInput
{
public:
NodeObject object;
explicit NodeInput(NodeObject object) : object(object) { }
};
// Code using vector and count_if
static int Contains(std::vector<NodeInput> collection, std::string objectType)
{
auto numFound = std::count_if(std::begin(collection),
std::end(collection),
[&](const NodeInput oneInput)
{
return (std::strncmp(objectType.c_str(),
oneInput.object.objectType,
strlen(oneInput.object.objectType)) == 0);
});
return numFound > 0;
}
// Code using array pointer and count_if
static int Contains(int collectionSize, NodeInput* collection, char* objectType)
{
auto numFound = std::count_if(collection,
collection + collectionSize,
[&](const NodeInput oneInput)
{
return (std::strncmp(objectType,
oneInput.object.objectType,
strlen(oneInput.object.objectType)) == 0);
});
return numFound > 0;
}
// Legacy code
static int ContainsOriginal(int collectionSize, NodeInput collection[], char* objectType)
{
auto found = 0;
for (auto idx = 0; idx < collectionSize; idx++)
{
if (strncmp(objectType,
collection[idx].object.objectType,
strlen(collection[idx].object.objectType)) == 0)
{
found = 1;
}
}
return found;
}
int main()
{
// Legacy code
NodeInput arrayCollection[] { NodeInput(NodeObject("fokker")),
NodeInput(NodeObject("airbus")),
NodeInput(NodeObject("boing777")) };
auto arrayCollectionSize = 3;
auto found = Contains(arrayCollectionSize, arrayCollection, "boing777");
std::cout << found << std::endl;
// Code using Standard Library
std::vector<NodeInput> collection{ NodeInput(NodeObject("fokker")),
NodeInput(NodeObject("airbus")),
NodeInput(NodeObject("boing777")) };
found = Contains(collection, "boing777");
std::cout << found << std::endl;
return 0;
}

What's wrong with strcmp and c_str() here?

The following code is my solution for the Largest Number. However it will crash.
If I directly compare tampa and tempb in cmp() by using tempa > tempb instead of strcmp(), it is OK. So what is wrong here?
#include <iostream>
#include <string>
#include <stack>
#include <vector>
#include <climits>
#include <cstdio>
#include <algorithm>
#include <sstream>
#include <cstring>
using namespace std;
bool cmp(string a, string b) {
string tempa = a + b;
string tempb = b + a;
int res = strcmp(tempa.c_str(), tempb.c_str());
if (res < 0) {
return false;
} else return true;
}
class Solution {
public:
string largestNumber(vector<int>& nums) {
vector<string> str;
string res = "0";
if (nums.empty()) {
return res;
}
for (int i = 0; i < nums.size(); ++i) {
stringstream ss;
ss << nums[i];
str.push_back(ss.str());
}
sort(str.begin(), str.end(), cmp);
res = "";
for (int i = 0; i < str.size(); ++i) {
res += str[i];
}
return res[0] == '0' ? "0" : res;
}
};
int main()
{
Solution sol;
int data[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
vector<int> nums(data, data + sizeof(data) / sizeof(data[0]));
string res = sol.largestNumber(nums);
cout << res << endl;
return 0;
}
Your comparison is not equivalent to tempa > tempb. It is equivalent to !(tempa < tempb) or tempa >= tempb. And a "greater than or equal" comparison does not satisfy the requirements of std::sort. Specifically, the comparison needs to be irreflexive, which is to say that for an operand A, cmp(A, A) should be false, but with >=, it is true.
If you want the strcmp equivalent of tempa > tempb, then do:
return strcmp(tempa.c_str(), tempb.c_str()) > 0;
On closer inspection, your comparison is fundamentally broken. Why are you concatenating a and b together to form temporary strings for comparison? As one simple example of what can go wrong there, an empty string will compare equal to every other string, because:
(string("anything") + "") == (string("") + "anything")

What is the best way to convert a sting of doubles into a vector of doubles?

If I have the string "2.5 4.2 6.9 1.1", how can I convert that into a vector of doubles as efficiently as possible?
vector<double> convert_string_to_vec(std::string const & str)
{
std::istringstream input{str};
vector<double> output{std::istream_iterator<double>{input},
std::istream_iterator<double>{}};
return output;
}
You'll need to include <sstream> as well as <iterator> for this to work. Here's a working example.
This is how I would generally do it. Possibly not the most efficient way, but very simple.
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
int main()
{
std::string d("2.5 4.2 6.9 1.1");
std::stringstream s(d);
std::vector<double> result;
double temp;
while(s >> temp)
{
result.push_back(temp);
}
for(size_t i = 0; i < result.size(); ++i)
{
std::cout << result[i] << "\n";
}
return 0;
}
Here's a unique way:
#include <iostream>
#include <vector>
#include <string>
#include <sstream>
template <class Facet>
struct erasable_facet : Facet
{
erasable_facet() : Facet(0) { }
~erasable_facet() { }
};
std::vector<double> convert(const std::string& str)
{
using num_get = std::num_get<char>;
erasable_facet<num_get> facet;
std::stringbuf buf(str);
std::vector<double> v;
std::ios ios(nullptr);
std::ios_base::iostate err = std::ios_base::goodbit;
double d;
std::istreambuf_iterator<char> it, end;
do
{
it = facet.get(&buf, end, ios, err, d);
buf.sbumpc(); // skip space
if (!(err & std::ios_base::failbit) &&
!(err & std::ios_base::badbit))
v.push_back(d);
else
return v;
} while (it != end);
return v;
}
int main()
{
std::string str = "1.24 5.32 9.53";
auto v = convert(str);
}