Binary Cosine Cofficient - c++

I was given the following forumulae for calculating this
sim=|Q∩D| / √|Q|√|D|
I went ahed and implemented a class to compare strings consisting of a series of words
#pragma once
#include <vector>
#include <string>
#include <iostream>
#include <vector>
using namespace std;
class StringSet
{
public:
StringSet(void);
StringSet( const string the_strings[], const int no_of_strings);
~StringSet(void);
StringSet( const vector<string> the_strings);
void add_string( const string the_string);
bool remove_string( const string the_string);
void clear_set(void);
int no_of_strings(void) const;
friend ostream& operator <<(ostream& outs, StringSet& the_strings);
friend StringSet operator *(const StringSet& first, const StringSet& second);
friend StringSet operator +(const StringSet& first, const StringSet& second);
double binary_coefficient( const StringSet& the_second_set);
private:
vector<string> set;
};
#include "StdAfx.h"
#include "StringSet.h"
#include <iterator>
#include <algorithm>
#include <stdexcept>
#include <iostream>
#include <cmath>
StringSet::StringSet(void)
{
}
StringSet::~StringSet(void)
{
}
StringSet::StringSet( const vector<string> the_strings)
{
set = the_strings;
}
StringSet::StringSet( const string the_strings[], const int no_of_strings)
{
copy( the_strings, &the_strings[no_of_strings], back_inserter(set));
}
void StringSet::add_string( const string the_string)
{
try
{
if( find( set.begin(), set.end(), the_string) == set.end())
{
set.push_back(the_string);
}
else
{
//String is already in the set.
throw domain_error("String is already in the set");
}
}
catch( domain_error e)
{
cout << e.what();
exit(1);
}
}
bool StringSet::remove_string( const string the_string)
{
//Found the occurrence of the string. return it an iterator pointing to it.
vector<string>::iterator iter;
if( ( iter = find( set.begin(), set.end(), the_string) ) != set.end())
{
set.erase(iter);
return true;
}
return false;
}
void StringSet::clear_set(void)
{
set.clear();
}
int StringSet::no_of_strings(void) const
{
return set.size();
}
ostream& operator <<(ostream& outs, StringSet& the_strings)
{
vector<string>::const_iterator const_iter = the_strings.set.begin();
for( ; const_iter != the_strings.set.end(); const_iter++)
{
cout << *const_iter << " ";
}
cout << endl;
return outs;
}
//This function returns the union of the two string sets.
StringSet operator *(const StringSet& first, const StringSet& second)
{
vector<string> new_string_set;
new_string_set = first.set;
for( unsigned int i = 0; i < second.set.size(); i++)
{
vector<string>::const_iterator const_iter = find(new_string_set.begin(), new_string_set.end(), second.set[i]);
//String is new - include it.
if( const_iter == new_string_set.end() )
{
new_string_set.push_back(second.set[i]);
}
}
StringSet the_set(new_string_set);
return the_set;
}
//This method returns the intersection of the two string sets.
StringSet operator +(const StringSet& first, const StringSet& second)
{
//For each string in the first string look though the second and see if
//there is a matching pair, in which case include the string in the set.
vector<string> new_string_set;
vector<string>::const_iterator const_iter = first.set.begin();
for ( ; const_iter != first.set.end(); ++const_iter)
{
//Then search through the entire second string to see if
//there is a duplicate.
vector<string>::const_iterator const_iter2 = second.set.begin();
for( ; const_iter2 != second.set.end(); const_iter2++)
{
if( *const_iter == *const_iter2 )
{
new_string_set.push_back(*const_iter);
}
}
}
StringSet new_set(new_string_set);
return new_set;
}
double StringSet::binary_coefficient( const StringSet& the_second_set)
{
double coefficient;
StringSet intersection = the_second_set + set;
coefficient = intersection.no_of_strings() / sqrt((double) no_of_strings()) * sqrt((double)the_second_set.no_of_strings());
return coefficient;
}
However when I try and calculate the coefficient using the following main function:
// Exercise13.cpp : main project file.
#include "stdafx.h"
#include <boost/regex.hpp>
#include "StringSet.h"
using namespace System;
using namespace System::Runtime::InteropServices;
using namespace boost;
//This function takes as input a string, which
//is then broken down into a series of words
//where the punctuaction is ignored.
StringSet break_string( const string the_string)
{
regex re;
cmatch matches;
StringSet words;
string search_pattern = "\\b(\\w)+\\b";
try
{
// Assign the regular expression for parsing.
re = search_pattern;
}
catch( regex_error& e)
{
cout << search_pattern << " is not a valid regular expression: \""
<< e.what() << "\"" << endl;
exit(1);
}
sregex_token_iterator p(the_string.begin(), the_string.end(), re, 0);
sregex_token_iterator end;
for( ; p != end; ++p)
{
string new_string(p->first, p->second);
String^ copy_han = gcnew String(new_string.c_str());
String^ copy_han2 = copy_han->ToLower();
char* str2 = (char*)(void*)Marshal::StringToHGlobalAnsi(copy_han2);
string new_string2(str2);
words.add_string(new_string2);
}
return words;
}
int main(array<System::String ^> ^args)
{
StringSet words = break_string("Here is a string, with some; words");
StringSet words2 = break_string("There is another string,");
cout << words.binary_coefficient(words2);
return 0;
}
I get an index which is 1.5116 rather than a value from 0 to 1.
Does anybody have a clue why this is the case?
Any help would be appreciated.

You need more parentheses in the final calculation. a / b * c is parsed as (a / b) * c, but you want a / (b * c).

Maybe it's just a precedence matter
coefficient = intersection.no_of_strings() / sqrt((double) no_of_strings()) * sqrt((double)the_second_set.no_of_strings());
doesn't specify that you have to first multiply, then divide. Their precedence is the same but I'm not sure about choosen behaviour.. did you try specifying it:
coefficient = intersection.no_of_strings() / (sqrt((double) no_of_strings()) * sqrt((double)the_second_set.no_of_strings()));

Related

Understanding operator overloading & iterator why does it print out "wrhrwwr"?

In the following code the output is "wrhrwwr", I try to understand what the iterator is doing, also how the "++" is overloaded. It seems like it somehow skips the 'e'. However, the code is very unclear to me, maybe I can be helped.
Thank you
#include <iostream>
using namespace std;
class datas {
private:
const char *string,marker;
const int len;
public:
class hophop {
private:
const char *pos, next;
public:
char operator *() { return *pos; }
hophop operator ++() {++pos;while (*(pos+1)!=next) ++pos; return *this; }
bool operator !=(hophop &o) { return pos < o.pos; }
hophop(const char *p, char m) : pos(p),next(m) {}
};
typedef hophop iterator;
iterator begin() { return hophop (string, marker); }
itrator end () { return hophop(string +len ,marker); }
datas(const char *d,int l, char m) : string (d), len(l),marker(m){}
};
int main( void ) {
datas chars ("we are where we were", 20, 'e');
for (char c: chars)
cout << c;
return 0;
}
It will be easier to see by pulling hophop out of the datas class. Now you can see the hophop constructor and what it is up to. I would have removed the return value of the ++operator, set it to void, to point out it does nothing here.
#include <iostream>
class hophop {
private:
const char* pos, next;
public:
hophop(const char* p, char m) : pos(p), next(m) {}
char operator *() { return *pos; }
hophop operator ++() {
++pos;
while (*(pos + 1) != next)
++pos;
return *this;
}
bool operator !=(const hophop& o) { return pos < o.pos; }
};
class datas {
private:
using iterator = hophop;
const char* string, marker;
const int len;
public:
datas(const char* d, int l, char m) : string(d), len(l), marker(m) {}
iterator begin() { return hophop(string, marker); }
iterator end() { return hophop(string + len, marker); }
};
int main(void) {
datas chars("we are where we were", 20, 'e');
// w r h r w w r
for (char c : chars)
std::cout << c;
std::cout << "\nusing a non range based for loop:" << std::endl;
for (hophop it = chars.begin(); it != chars.end(); ++it)
std::cout << *it;
std::cout << "\nor where the return value could be used:" << std::endl;
auto it = chars.begin();
std::cout << *it;
for (; it != chars.end();)
std::cout << *++it;
}
So now it may be easier to see how the hophop ++ operator is working. operator *() gets called at the beginning of the loop so no matter what the first character is, it gets retrieved. Then the ++operator is called and it moves the iterator at least once forward and until it matches next. Then it returns the character before the match. Look at the comment in main. The first and every character before the e is returned.
If you have not used a debugger much, you should. by putting a break point in operator++ you can see what is happening.
UPDATE
I had previously set the return value of the ++operator to void. As #JaMiT points out, it is appropriate for the operator to return *this. I've also added two more loops, they should be clearer than using a range based loop. The third example actually uses the return value of the ++operator, the first two loops don't.
And, get in the habit of not using namespace std; It will save you from troubles down the road.

C++ Find and save duplicates in vector

I have a custom vector of my user defined type vector
First vector gets filled with elements through stdin, then i sort it and try to find duplicates in it and save them
i've managed to find all unique elements, but i need to find and get a vector of duplicates
I need a hint or a simple solution for this problem
here's my code below:
Agressor.h
#ifndef Agressor_h
#define Agressor_h
#include <string>
#include <vector>
using namespace std;
class Agressor{
public:
/*const char**/ string traderIdentifier;
/*const char**/ string side;
int quantity;
int price;
vector<Agressor> v;
void display(){
cout << traderIdentifier << " " << side << " " << quantity << " " << price << endl;
}
explicit Agressor(){
}
~Agressor(){
}
friend ostream &operator<<(ostream& stream, const Agressor& item);
const friend bool operator > (const Agressor &a1, const Agressor &a2);
// const friend bool operator == (const Agressor &a1, const Agressor &a2);
/* vector<Agressor>& operator[](int i ){
return v[i];
}*/
};
ostream &operator<<(ostream& stream, const Agressor& item) {
string side = "";
if(item.side == "B"){
side = '+';
}else{
if(item.side == "S"){
side = "-";
}
}
stream << item.traderIdentifier << side << item.quantity << "#" << item.price << "\n";
return stream;
}
const bool operator == (const Agressor &a1, const Agressor &a2){
bool isEqual = false;
if((a1.price*a1.quantity == a2.price*a2.quantity) && (a1.traderIdentifier == a2.traderIdentifier) && (a1.side == a2.side)){
isEqual = true;
}
return(isEqual);
}
const bool operator > (const Agressor &a1, const Agressor &a2){
bool isGreater = false;
if(a1.price*a1.quantity > a2.price*a2.quantity){
isGreater = true;
}
return(isGreater);
}
#endif /* Agressor_h */
main.cpp
#include <iostream>
#include "Agressor.h"
#include <sstream>
using namespace std;
vector<string> &split(const string &s, char delim, vector<string> &elems)
{
stringstream ss(s);
string item;
while (getline(ss, item, delim))
{
elems.push_back(item);
}
return elems;
}
vector<string> split(const string &s, char delim)
{
vector<string> elems;
split(s, delim, elems);
return elems;
}
bool equal_comp(const Agressor& a1, const Agressor& a2){
if((a1.price*a1.quantity == a2.price*a2.quantity) && (a1.traderIdentifier == a2.traderIdentifier) && (a1.side == a2.side)){
return true;
}
return false;
}
int main(int argc, const char * argv[]) {
Agressor agr;
while (true) {
std::string sText;
cout << "enter query:" << endl;
std::getline(std::cin, sText);
if(sText == "q"){
cout << "Program terminated by user" << endl;
break;
}else{
std::vector<std::string> sWords = split(sText, ' ');
agr.traderIdentifier = sWords[0];
agr.side = sWords[1];
agr.quantity = stoi(sWords[2]);
agr.price = stoi(sWords[3]);
agr.v.push_back(agr);
vector<Agressor>::iterator it;
sort(agr.v.begin(), agr.v.end(), greater<Agressor>());
//unique(agr.v.begin(), agr.v.end(), equal_comp);
for (vector<Agressor>::const_iterator i = agr.v.begin(); i != agr.v.end(); ++i)
cout << *i << ' ';
}
}
cout << "here we go..." << endl;
vector<Agressor>::iterator it;
sort(agr.v.begin(), agr.v.end(), greater<Agressor>());
//it = unique(agr.v.begin(),agr.v.end(), equal_comp);
//agr.v.resize( distance(agr.v.begin(),it) );
agr.v.erase(unique(agr.v.begin(),agr.v.end(), equal_comp), agr.v.end());
copy(agr.v.begin(), agr.v.end(), ostream_iterator<Agressor>(cout, "\n"));
return 0;
}
You might use something like:
template <typename T>
std::vector<T> get_duplicates(const std::vector<T>& v)
{
// expect sorted vector
auto it = v.begin();
auto end = v.end();
std::vector<T> res;
while (it != end) {
it = std::adjacent_find(it, end);
if (it != end) {
++it;
res.push_back(*it);
}
}
return res;
}
std::unique overwrites duplicate values with later non-duplicate values. You can implement a similar algorithm that moves the values to somewhere.
template<class ForwardIt, class OutputIt, class BinaryPredicate>
ForwardIt unique_retain(ForwardIt first, ForwardIt last, OutputIt d_first, BinaryPredicate p)
{
if (first == last)
return last;
ForwardIt result = first;
while (++first != last) {
if (!p(*result, *first) && ++result != first) {
*d_first++ = std::move(*result);
*result = std::move(*first);
}
}
return ++result;
}
(adapted from this possible implementation of std::unique)
You would then use it like
vector<Agressor> dups;
sort(agr.v.begin(), agr.v.end(), greater<Agressor>());
auto it = unique_retain(agr.v.begin(),agr.v.end(), std::back_inserter(dups), equal_comp);
agr.v.erase(it, agr.v.end());

how to use istream_iterators to split an equation?

I'm trying to split a string like ( 1 + 2 ) into a vector and when using an istream_iterators<string> it doesn't split the parentheses so I get vector outputs like
(1 , + , 2) when I want ( , 1, + , 2 ,)
Is it possible to use istream_iterators to achieve this?
string eq = "(1 + 2)";
istringstream ss(eq);
istream_iterator<string> begin(ss);
istream_iterator<string> end;
vector<string> vec(begin, end);
You can do this by creating a custom type Token and using it with
istream_iterator. Bonus feature: this code will parse multiple digits, multiple operators, and nested expressions. So enjoy. :)
#include <iterator>
#include <string>
#include <sstream>
#include <vector>
#include <iostream>
#include <cctype>
using namespace std;
class Token {
private:
string val;
public:
Token() : val("") {}
Token(string& v) : val(v) {}
friend istream& operator>>(istream &in, Token& tok);
friend ostream& operator<<(ostream &out, Token& tok);
};
istream& operator>>(istream &in, Token& tok) {
char c;
string v;
if (in >> c) {
if (isdigit(c)) {
v.push_back(c);
while (in >> c && isdigit(c)) {
v.push_back(c);
}
in.putback(c);
} else if (c == ' ') {
while (in >> c && c == ' ') ;
in.putback(c);
} else {
v.push_back(c);
}
}
tok = v;
return in;
}
ostream& operator<<(ostream &out, Token& tok) {
out << tok.val;
return out;
}
int main() {
string eq = "(1 + 2)";
//eq = "(100 + 200)"; // multiple digits
//eq = "(100 + 200 * 300)"; // extra operator
//eq = "(100 + (200 * 300))"; // nested parens
istringstream ss(eq);
istream_iterator<Token> begin(ss);
istream_iterator<Token> end;
vector<Token> vec(begin, end);
for (auto& x : vec) {
cout << "[" << x << "] ";
}
cout << endl;
}
I don't think you can do it using istream_iterator. Instead, simply do it by hand:
vector<string> vec;
vec.reserve(eq.size() / 4); // rough guess
bool in_number = false;
for (char ch : eq) {
if (isspace(ch)) {
in_number = false;
} else if (isdigit(ch)) {
if (in_number) {
vec.back().push_back(ch);
} else {
vec.emplace_back(1, ch);
in_number = true;
}
} else {
vec.emplace_back(1, ch);
in_number = false;
}
}

Count the number of unique words (case does not matter for this count)

Hey so I'm having trouble figuring out the code to count the number of unique words. My thought process in terms of psudeocode was first making a vector so something like vector<string> unique_word_list;Then I would get the program to read each line so I would have something likewhile(getline(fin,line)). The hard part for me is coming up with the code where I check the vector(array) to see if the string is already in there. If it's in there I just increase the word count(simple enough) but if its not in there then I just add a new element to the vector. I would really appreciate if someone could help me out here. I feel like this is not hard but for some reason I can't think of the code for comparing the string with whats inside of the array and determining if its a unique word or not.
Don't use a vector - use a container that maintains uniqueness, like std::set or std::unordered_set. Just convert the string into lower case (using std::tolower) before you add it:
std::set<std::string> words;
std::string next;
while (file >> next) {
std::transform(next.begin(), next.end(), next.begin(), std::tolower);
words.insert(next);
}
std::cout << "We have " << words.size() << " unique words.\n"
Cannot help myself writing an answer that makes use of C++ beautiful library. I'd do it like this, with a std::set:
#include <algorithm>
#include <cctype>
#include <string>
#include <set>
#include <fstream>
#include <iterator>
#include <iostream>
int main()
{
std::ifstream ifile("test.txt");
std::istream_iterator<std::string> it{ifile};
std::set<std::string> uniques;
std::transform(it, {}, std::inserter(uniques, uniques.begin()),
[](std::string str) // make it lower case, so case doesn't matter anymore
{
std::transform(str.begin(), str.end(), str.begin(), ::tolower);
return str;
});
// display the unique elements
for(auto&& elem: uniques)
std::cout << elem << " ";
// display the size:
std::cout << std::endl << uniques.size();
}
You can also define a new string type in which you change the char_traits so the comparison becomes case-insensitive. This is the code you'd need (much more lengthy than before, but you may end up reusing it), the char_traits overload is copy/pasted from cppreference.com:
#include <algorithm>
#include <cctype>
#include <string>
#include <set>
#include <fstream>
#include <iterator>
#include <iostream>
struct ci_char_traits : public std::char_traits<char> {
static bool eq(char c1, char c2) { return toupper(c1) == toupper(c2); }
static bool ne(char c1, char c2) { return toupper(c1) != toupper(c2); }
static bool lt(char c1, char c2) { return toupper(c1) < toupper(c2); }
static int compare(const char* s1, const char* s2, size_t n) {
while ( n-- != 0 ) {
if ( toupper(*s1) < toupper(*s2) ) return -1;
if ( toupper(*s1) > toupper(*s2) ) return 1;
++s1; ++s2;
}
return 0;
}
static const char* find(const char* s, int n, char a) {
while ( n-- > 0 && toupper(*s) != toupper(a) ) {
++s;
}
return s;
}
};
using ci_string = std::basic_string<char, ci_char_traits>;
// need to overwrite the insertion and extraction operators,
// otherwise cannot use them with our new type
std::ostream& operator<<(std::ostream& os, const ci_string& str) {
return os.write(str.data(), str.size());
}
std::istream& operator>>(std::istream& os, ci_string& str) {
std::string tmp;
os >> tmp;
str.assign(tmp.data(), tmp.size());
return os;
}
int main()
{
std::ifstream ifile("test.txt");
std::istream_iterator<ci_string> it{ifile};
std::set<ci_string> uniques(it, {}); // that's it
// display the unique elements
for (auto && elem : uniques)
std::cout << elem << " ";
// display the size:
std::cout << std::endl << uniques.size();
}

How to sort by Alphabetical in C++

I've been trying to sort a vector of Employee's with a string data member called last name. I've tried several different ways, using the sort method of vector, trying to convert my vectors to list and using its sorting, I even tried using string compare and > operators as shown below:
vector<Employee>sortE(vector<Employee>record)
{
for (unsigned int i = 0; i < record.size() - 1; i++)
if (record[i].getLastName() > record[i+1].getLastName())
swap(record[i], record[i + 1]);
return record;
}
I thought if I used the above method with the swap function, it would work. But maybe since swap is a string method and I'm doing it with Employees it won't swap properly? But I've also tried it with my own "swap" like below:
vector<Employee>sortE(vector<Employee>record)
{
Employee temp;
for (unsigned int i = 0; i < record.size() - 1; i++)
if (record[i].getLastName() > record[i + 1].getLastName())
{
temp = record[i];
record[i] = record[i + 1];
record[i + 1] = temp;
}
return record;
}
Either way I can't seem to get it to work properly, any insight or help would be appreciated.
You could try using a lambda if using C++11 or newer (also, I don't know what your Employee class looks like, so I made a trivial one). Also, check here for online execution: http://cpp.sh/6574i
#include <algorithm>
#include <iostream>
#include <string>
#include <vector>
class Employee
{
public:
Employee( const std::string& firstName, const std::string& lastName ) :
_firstName( firstName ),
_lastName( lastName )
{}
~Employee()
{}
std::string FirstName() const
{
return _firstName;
}
std::string LastName() const
{
return _lastName;
}
std::string FullName() const
{
return _firstName + " " + _lastName;
}
private:
std::string _firstName;
std::string _lastName;
};
int main()
{
Employee e1( "Suresh", "Joshi" );
Employee e2( "Mats", "Sundin" );
Employee e3( "Steve", "Nash" );
std::vector< Employee > employees { e1, e2, e3 };
std::sort(employees.begin(), employees.end(),
[](const Employee& lhs, const Employee& rhs) -> bool
{
return rhs.LastName() > lhs.LastName();
});
for ( auto employee : employees )
{
std::cout << employee.FullName() << std::endl;
}
}
You can provide a lambda to std::sort:
std::vector<Employee> ve;
using std::begin;
using std::end;
std::sort(begin(ve), end(ve),
[](const Employee& lhs, const Employee& rhs)
{
return lhs.getLastName() < rhs.getLastName();
});
That said, in real life last names are not necessarily unique, and when they compare equal it's a good idea to fall back on first name, and if that's also equal some other field like an employee id:
return lhs.getLastName() < rhs.getLastName() ||
lhs.getLastName() == rhs.getLastName() &&
(lhs.getFirstName() < rhs.getFirstName() ||
lhs.getFirstName() == rhs.getFirstName() &&
lhs.getId() == rhs.getId());