Power Sets ~ Recursion ~ Hasse Diagrams - c++

(SEE BELOW ALSO)
So I've got some functions and some operators for a set-manipulation program I'm coding, and I want to have power sets as a utility too (never mind the comments in the code). I don't want to use a binary approach, but I want to use recursion. I saw in Ralph Oberste-Vorth's book Bridge to Abstract Mathematics a definition of power sets (page 65), and on the next page I see all these equivalences like "if S = X, then P(S) = P(X)," and "if A and B are sets, then P(A) U P(B) = P(A U B)," and I'm reminded of recursion. I think recursion could work here but I'm not sure. I was playing around with Mathematica's Combinatorica package, and that one Haverford College paper on Hasse Diagrams, and I thought I could work out, in much the same way as is done here four minutes in, some kind of method based on the corresponding diagram for some set of size n, but I don't know that that will lead me the right way. I would like to build off of my already-built functions/operators.
#include <iostream>
#include <set>
#include <ostream>
#include <istream>
#include <vector>
using namespace std;
set<int> SetUnion( set<int> A , set<int> B ) // tus koj hlub
{
//A.insert( B.begin() , B.end() );
//return A;
set<int> pump;
for( set<int>::iterator cycle = A.begin() ; cycle != A.end() ; ++cycle )
{
pump.insert(*cycle);
}
for( set<int>::iterator cycle = B.begin() ; cycle != B.end() ; ++cycle )
{
pump.insert(*cycle);
}
return pump;
}
set<int> SetIntersection( set<int> A , set<int> B ) // tus koj hlub
{
set<int> pump;
for( set<int>::iterator cycle = A.begin ; cycle != A.end() ; ++cycle )
{
if( B.find(*cycle) != B.end() )
{
pump.insert(*cycle);
}
}
return pump;
}
set<int> SetDifference( set<int> A , set<int> B )
{
set<int> pump;
for( set<int>::iterator cycle = A.begin ; cycle != A.end() ; ++cycle )
{
if( B.find(*cycle) == B.end() )
{
pump.insert(*cycle);
}
}
return pump;
}
set<int> SymmetricDifference( set<int> A , set<int> B )
{
return SetUnion( SetDifference( A , B ) , SetDifference( B , A ) );
//return SetDifference( SetUnion( A , B ) , SetIntersection( A , B ) );
}
set<set<int>> PowerSet( set<int> A )
{
/*statements*/
}
set<int> Complement( set<int> A , int B )
{
set<int> pump;
for( int i = 1 ; i<=B ; i++ )
{
pump.insert(i);
}
set<int> collect = SetDifference( A , pump );
return collect;
}
set<int> operator+(set<int> A , set<int> B)
{
return SetUnion( A, B );
}
set<int> operator+(set<int> A , int B)
{
set<int> C;
C.insert(B);
return SetUnion( A , C );
}
set<int> operator+(int A , set<int> B)
{
set<int> C;
C.insert(A);
return SetUnion( B , C );
}
set<int> operator-(set<int> A , set<int> B)
{
set<int> pump;
for( set<int>::iterator cycle = A.begin ; cycle != A.end() ; ++cycle )
{
if( B.find(*cycle) == B.end() )
{
pump.insert(*cycle);
}
}
return pump;
}
set<int> operator-(set<int> A , int B)
{
set<int> C;
C.insert(B);
set<int> pump = SetDifference( A , C );
return C;
}
set<int> operator-(int A , set<int> B)
{
set<int> C;
C.insert(A);
set<int> pump = SetDifference( B , C );
return pump;
}
set<int> operator^(set<int> A , set<int> B)
{
return SetUnion( A , B );
}
set<int> operator^(set<int> A , int B)
{
set<int> C;
C.insert(B);
set<int> pump = SetUnion( A , C );
return pump;
}
set<int> operator^(int A , set<int> B)
{
set<int> C;
C.insert(A);
set<int> pump = SetUnion( B , C );
return pump;
}
set<int> operator%(set<int> A , set<int> B)
{
return SymmetricDifference( A , B );
}
set<int> operator%(set<int> A , int B)
{
set<int> C;
C.insert(B);
set<int> pump = SymmetricDifference( A , C );
return pump;
}
set<int> operator%(int A , set<int> B)
{
set<int> C;
C.insert(A);
set<int> pump = SymmetricDifference( B , C );
return pump;
}
set<int> operator~(set<int> A)
{
set<int> pump;
vector<int> hose;
for( set<int>::iterator cycle = A.begin() ; cycle != A.end() ; ++cycle )
{
hose.push_back(*cycle);
}
int last_value =
}
ostream& operator<<(ostream& out , set<int>& B) // tus koj hlub
{
int count=0;
if( B.size() == 0 )
{
out << "{}";
return out;
}
else
{
set<int>::iterator it;
out << "{";
for( it = B.begin() ; it != B.end() ; ++it )
{
++count;
if( count == B.size() )
{
out << *it;
}
else
{
out << *it << ", ";
}
}
out << "}";
return out;
}
}
istream& operator>>(istream& in , set<int>& B) // tus koj hlub
{
int user_input;
while(1)
{
in>>user_input;
if(user_input == -1)
break;
B.insert(user_input);
}
return in;
}
Also, why do I get an error on my "<<" operator symbol in the function here:
ostream& operator<<(ostream& out , set<set<int>>& B)
{
int count=0;
if( B.size() == 0 )
{
out << "{}";
return out;
}
else
{
set<set<int>>::iterator it;
out << "{";
for( it = B.begin() ; it != B.end() ; ++it )
{
count++;
if( count == B.size() )
{
out << *it;
}
else
{
out << *it << ", ";
}
}
out << "}";
return out;
}
}
The answer given by Mr. Shields produces the following error. I'm trying to figure out why it doesn't work:
Error: class "std::_Tree_const_iterator, std::allocator>>>> "has no member "insert"
ANSWER FROM AUTHOR:
set<set<int>> PowerSet( const set<int> A )
{
set<set<int>> ps;
if( A.size() == 0 )
{
ps.insert( set<int>() );
return ps;
}
set<int>::iterator it = A.begin();
int n = *it;
set<int> s1 = A;
s1.erase( n );
set<set<int>> ps1 = PowerSet( s1 );
set<set<int>> ps2;
for( set<set<int>>::iterator it = ps1.begin() ; it != ps1.end() ; ++it )
{
set<int> ss = *it;
ss.insert( n );
ps2.insert (ss );
}
for( set<set<int>>::iterator it = ps1.begin() ; it != ps1.end() ; ++it )
{
ps.insert(*it);
}
for( set<set<int>>::iterator it = ps2.begin() ; it != ps2.end() ; ++it )
{
ps.insert( *it );
}
return ps;
}

This C++ code below is horribly inefficient, but I think it should give you the idea for how to do this recursively. The recursion rule is basically this:
P({}) = {{}}
the power set of the empty set is the set containing the empty set
P({n} U S) = { {n} U T | T in P(S) } U P(S)
every set in the power set of {n} U S either contains n or does not contain n - exactly one of each for each set in the power set of S
Be aware that a set of cardinality K has a power set of cardinality 2^K. So you don't want to perform this operation on any large sets!
set<set<int>> PowerSet( set<int> A )
{
set<set<int>> PA;
if (A.empty())
{
//case: P({}) = {{}}
PA.insert(A);
}
else
{
//case: P({n} U S) = { {n} U T | T in P(S) } U P(S)
int n = *A.begin();
A.erase(A.begin());
//A is now "S" from the explanation above this code
auto PS = PowerSet(A);
for (auto T = PS.begin(); T != PS.end(); ++T)
{
//add each set T from P(S)
PA.insert(*T);
//add each set T from P(S) with n included as well
T->insert(n);
PA.insert(*T);
}
}
return PA;
}

Related

AddressSanitizer throwing SEGV on unknown address C++

I have a task where I have to make a class depending on std::multiset. The idea is that it gets 2 multisets in its constructor, they then have to be sorted and put back in those variables while the class is in scope.To provide an example:
std::multiset<int> a;
std::multiset<int> b;
a.insert(4); a.insert(3); a.insert(1); a.insert(10); // a={1,3,4,10}
b.insert(3); b.insert(2); // b={2,3}
if(true){
multisets_merge_view<int>(a,b);
// sorts them and makes it, so that:
// a={1,2,3,3}
// b={4,10}
}
// and here, the multisets need to go back to their original state:
// a={1,3,4,10}
// b={2,3}
We have to work with g++ 9.3.0 under Ubuntu 20.04, with the following flags: -fsanitize=address,leak,undefined -O3 -Wall -Wextra -WerrorThe code - that was given and I cannot touch - to check if everything works correctly:
#include <iostream>
#include <numeric>
#include "msmview.h"
#include <set>
#include <iterator>
#include <string>
#include <algorithm>
#include "msmview.h"
struct string_size_less
{
bool operator()( const std::string& lhs,
const std::string& rhs ) const
{
return lhs.size() < rhs.size();
}
};
const int max = 1000;
bool check()
{
std::multiset<int> a;
std::multiset<int> b;
a.insert( 1 );
a.insert( 7 );
a.insert( 4 );
b.insert( 4 );
b.insert( 3 );
std::multiset<std::string> s;
std::multiset<std::string> t;
s.insert( "C++" );
s.insert( "Pascal" );
s.insert( "Haskell" );
t.insert( "C" );
t.insert( "Ada" );
std::multiset<int> x;
std::multiset<int> y;
for( int i = 0 ; i < max; ++i )
{
x.insert( i );
y.insert( i );
}
bool c = x.count( max / 3 ) == 2;
if ( !c )
{
multisets_merge_view<int> mvi( a, b );
multisets_merge_view<int> mvm( x, y );
multisets_merge_view<std::string> mvs( s, t );
c = x.count( max / 3 ) == 2 && 3 == s.size() &&
0 == s.count( "Pascal" ) && 1 == t.count( "Pascal" ) &&
0 == x.count( max - 1 ) && 1 == s.count( "C" ) &&
0 == a.count( 7 ) && 0 == b.count( 3 );
}
if ( !c || 1 != x.count( max / 3 ) || 1 != s.count( "Pascal" ) ||
0 != s.count( "C" ) || 2 != t.size() || 1 != x.count( max - 1 ) ||
1 != a.count( 7 ) || 1 != b.count( 3 ) )
{
return false;
}
if ( c )
{
const multisets_merge_view<int> mvi( a, b );
const multisets_merge_view<int> mvm( x, y );
const multisets_merge_view<std::string> mvs( s, t );
if ( 5 != mvi.size() || 2 != mvi.count( 4 ) || 0 != mvi.count( max ) ||
5 != mvs.size() || 1 != mvs.count( "C++" ) || 2 * max != mvm.size() ){
return false;
}
}
else
{
std::cout << "neither";
return false;
}
std::multiset<int, std::greater<int> > ga;
std::multiset<int, std::greater<int> > gb;
ga.insert( 0 );
ga.insert( max );
gb.insert( max );
std::multiset<std::string, string_size_less> sa;
std::multiset<std::string, string_size_less> sb;
sa.insert( "C++" );
sa.insert( "Haskell" );
sb.insert( "Ada" );
sb.insert( "SQL" );
sb.insert( "LISP" );
if ( c )
{
const multisets_merge_view<std::string, string_size_less> mvs( sa, sb );
const multisets_merge_view<int, std::greater<int> > mvi( ga, gb );
return 2 == sa.count( "C++" ) && 1 == sb.count( "C++" ) && 1 == gb.count( 0 ) &&
3U == sb.size() && ga.size() == ga.count( max ) && 3 == mvi.size();
}
return false;
}
int main()
{
std::cout
<< "Your solution is "
<< (check() ? "" : "not ")
<< "ready for submission."
<< std::endl;
}
And my code for the upper mentioned class:
#ifndef MSMVIEW_H
#define MSMVIEW_H
#include <set>
template<typename T1, typename T2 = std::greater<T1>>
class multisets_merge_view : public std::multiset<T1,T2>{
public:
multisets_merge_view(std::multiset<T1> &a, std::multiset<T1> &b);
multisets_merge_view(std::multiset<T1,T2> &a, std::multiset<T1,T2> &b);
~multisets_merge_view();
int size() const { return _size; }
int count(T1 t) const;
private:
std::multiset<T1> *_apo;
std::multiset<T1> *_bpo;
std::multiset<T1,T2> *_apt;
std::multiset<T1,T2> *_bpt;
std::multiset<T1> _ao;
std::multiset<T1> _bo;
std::multiset<T1,T2> _at;
std::multiset<T1,T2> _bt;
unsigned int _size;
};
template<typename T1, typename T2>
multisets_merge_view<T1,T2>::multisets_merge_view(std::multiset<T1> &a, std::multiset<T1> &b):_ao(a),_bo(b),_size(a.size()+b.size()){
_apo = &a;
_bpo = &b;
std::multiset<T1> tmp;
tmp.insert(a.begin(),a.end());
tmp.insert(b.begin(),b.end());
a.clear();
b.clear();
for(typename std::multiset<T1>::const_iterator i(tmp.begin()),end(tmp.end()); i!=end; ++i){
if(a.size() < _ao.size()){
a.insert(*i);
}else{
b.insert(*i);
}
}
};
template<typename T1, typename T2>
multisets_merge_view<T1,T2>::multisets_merge_view(std::multiset<T1,T2> &a, std::multiset<T1,T2> &b):_at(a),_bt(b),_size(a.size()+b.size()){
_apt = &a;
_bpt = &b;
std::multiset<T1,T2> tmp;
tmp.insert(a.begin(),a.end());
tmp.insert(b.begin(),b.end());
a.clear();
b.clear();
for(typename std::multiset<T1,T2>::const_iterator i(tmp.begin()),end(tmp.end()); i!=end; ++i){
if(a.size() < _at.size()){
a.insert(*i);
}else{
b.insert(*i);
}
}
};
template<typename T1, typename T2>
multisets_merge_view<T1,T2>::~multisets_merge_view(){
if(_apo != nullptr && _bpo != nullptr){
*_apo = _ao;
*_bpo = _bo;
}else if(_apt != nullptr && _bpt != nullptr){
*_apt = _bt;
*_bpt = _at;
}
};
template<typename T1, typename T2>
int multisets_merge_view<T1,T2>::count(T1 t) const{
int count = 0;
if(_apo != nullptr && _bpo != nullptr){
for(typename std::multiset<T1>::const_iterator i = _ao.begin(); i != _ao.end(); i++){
if(*i==t) count++;
}
for(typename std::multiset<T1>::const_iterator i = _bo.begin(); i != _bo.end(); i++){
if(*i==t) count++;
}
}else if(_apt != nullptr && _bpt != nullptr){
for(typename std::multiset<T1,T2>::const_iterator i = _at.begin(); i != _at.end(); i++){
if(*i==t) count++;
}
for(typename std::multiset<T1,T2>::const_iterator i = _bt.begin(); i != _bt.end(); i++){
if(*i==t) count++;
}
}
return count;
};
#endif
When I run the compiler, AddressSanitizer throws a memory reallocation error found in ~multisets_merge_view(), probably when *_apt=_at; *_bpt=_bt; happens (without providing a second template parameter in the main program, everything works fine, but as I said I can't modify that).
Is there a way to fix this while not touching the main program and still using the -fsanitize=address,leak,undefined?

remove duplicates from sorted array, same solution with different code way has different output

#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include "hello_world.h"
using namespace std;
class Solution1 {
public:
int removeDuplicates(vector<int>& nums) {
return distance(nums.begin(), removeDuplicates(nums.begin(), nums.end(), nums.begin()));
}
template<typename InIt, typename OutIt>
OutIt removeDuplicates(InIt begin, InIt end, OutIt output){
while(begin != end){
*output++ = *begin;
begin = upper_bound(begin, end, *begin);
}
return output;
}
};
class Solution2 {
public:
int removeDuplicates(vector<int>& nums) {
vector<int>::iterator output = nums.begin();
while(nums.begin() != nums.end()){
*output++ = *nums.begin();
nums.begin() = upper_bound(nums.begin(), nums.end(), *nums.begin());
}
return distance(nums.begin(), output);
}
};
int main()
{
//helloworld test;
//test.print();
int num[3] = {1,1,2};
vector<int> nums(num, num + 3);
Solution2 so;
int a = so.removeDuplicates(nums);
cout<<a<<endl;
return 0;
}
In main function, when i use the class solution1, the code can remove duplicates numbers from the arrary [1 1 2] ,to output [1 2]. In order to simplify the code, I changed the solution1 to solution2, but the solution2 can not execute right output, anybody know the reason?
In this while loop
while(nums.begin() != nums.end()){
*output++ = *nums.begin();
nums.begin() = upper_bound(nums.begin(), nums.end(), *nums.begin());
}
you are always using the iterator nums.begin() in the condition and in this statement
*output++ = *nums.begin();
because this statement
nums.begin() = upper_bound(nums.begin(), nums.end(), *nums.begin());
does not change the iterator returned by a new call of nums.begin().
You need to introduce a variable of the iterator type before the loop like
auto it = nums.begin();
while( it != nums.end()){
*output++ = *it;
it = upper_bound( it, nums.end(), *it );
}
Here is a demonstrative program
#include <iostream>
#include <vector>
#include <iterator>
#include <algorithm>
int main()
{
std::vector<int> v = { 1, 2 };
size_t i = 0;
while ( v.begin() != v.end() )
{
v.begin() = std::upper_bound( v.begin(), v.end(), *v.begin() );
if ( ++i == 10 ) break;
}
std::cout << "i = " << i << '\n';
i = 0;
auto it = v.begin();
while ( it != v.end() )
{
it = std::upper_bound( it, v.end(), *it );
if ( ++i == 10 ) break;
}
std::cout << "i = " << i << '\n';
return 0;
}
Its output is
i = 10
i = 2
To erase duplicates after the loop use the member function erase like
nums.erase( output, nums.end() );
The same can be done using the standard algorithm std::unique. For example
nums.erase( std::unique( nums.begin(), nums.end() ), nums.end() );

How to avoid repetitions when comparing two vectors by blocks

I am trying to compare two vectors which are of size of multiple of 4 and data is represented in terms of blocks (4 elements). Each vector block has a unique number it e.g {0,0,0,0}, {0,0,0,1}, {0,0,0,2} or {0,0,0,0,0,0,0,1,0,0,0,2} and {0,0,0,2,0,0,0,1} so on. I am using iterator which increments i+=4 every time. I wrote a small function which do the job but the blocks tend to repeat. I am not sure how t0 remove repetition of these block. e.g vector_A {0,0,0,0,0,0,0,1} vector_B {0,0,0,1,0,0,0,0,0,0,0,2} it should give local_vector1 {0,0,0,2} instead I get local vector_1 {0,0,0,1,0,0,0,2,0,0,0,0,0,0,0,2}
void comparing_vectors_by_block(std::vector<int> vector_A, std::vector<int>
vector_B)
{
const int blockSize = 4;
std::vector<int> local_vector1;
std::cout << "size of the vector_A: " << vector_A.size() << std::endl;
std::cout << "size of the vector_B: " << vector_B.size() << std::endl;
for (auto it_A = std::begin(vector_A); it_A != std::end(vector_A); it_A+=4)
{
for (auto it_B = std::begin(vector_B); it_B != std::end(vector_B); it_B += 4)
{
bool match = equal(it_A, it_A + blockSize, it_B, it_B + blockSize);
if (!match)
{
std::cout << "match :" << std::endl;
local_vector1.insert(local_vector1.end(), it_B, it_B + blockSize);
}
else
{
std::cout << "not matched :" << std::endl;
}
}
}
Use a vector of arrays of four ints to represent the data.
std::vector<std::array<int,4>> vect1;
If this data has some other meaning. It is better to use the OOP way and create
a struct or a class to represent that four numbers data. Then implement operator == and other helpful methods for the struct/class.
struct foo{
int a;
int b;
int c;
ind d;
};
bool foo::operator==(const X& lhs, const X& rhs){ /* do actual comparison */ }
Then just iterate the vector and compare the elements using == like you would do if the vector was of type int for example.
for(auto& x : vector_A)
{
if(std::find(vector_B.begin(), vector_B.end(), x) != vector_B.end()) {
local_vector1.append(x);
}
}
If I got you right, you want to get the symmetric difference of two vectors of blocks. I.e. for A = {0,0,0,3,0,0,0,0,0,0,0,1} and B = {0,0,0,1,0,0,0,0,0,0,0,2} you want to have local_vector1 = {0,0,0,3,0,0,0,2}.
In your implementation you compare each block of vector A with each block of vector B -- of course, you'll get extra mismatches. My (also non-optimized) solution:
std::vector<int> get_blocked_vectors_diff( const std::vector<int>& vector_A, const std::vector<int>& vector_B )
{
const int blockSize = 4;
std::vector<int> local_vector;
for ( auto it_A = std::begin( vector_A ); it_A != std::end( vector_A ); it_A += 4 )
{
bool found_in_B = false;
for ( auto it_B = std::begin( vector_B ); !found_in_B && it_B != std::end( vector_B ); it_B += 4 )
{
found_in_B = std::equal( it_A, it_A + blockSize, it_B, it_B + blockSize );
}
if ( !found_in_B )
{
local_vector.insert( local_vector.end( ), it_A, it_A + blockSize );
}
}
return local_vector;
}
void comparing_vectors_by_block(std::vector<int> vector_A, std::vector<int> vector_B)
{
auto A_mines_B = get_blocked_vectors_diff( vector_A, vector_B );
auto B_mines_A = get_blocked_vectors_diff( vector_B, vector_A );
auto local_vector1( A_mines_B );
local_vector1.insert( local_vector1.end(), B_mines_A.begin( ), B_mines_A.end( ) );
for ( auto a : local_vector1 )
{
std::cout << a << " ";
}
std::cout << std::endl;
}
Note, that we need two parts of the answer: A\B and B\A, so get_blocked_vectors_diff is called twice.
If you change your data structure like Petar Velev suggested, you will be able to shorten get_blocked_vectors_diff function:
std::vector<int> get_blocked_vectors_diff( const std::vector<Block>& vector_A, const std::vector<Block>& vector_B )
{
std::vector<Block> local_vector;
for ( auto& x : vector_A )
{
if ( std::find( vector_B.begin( ), vector_B.end( ), x ) == vector_B.end( ) )
{
local_vector.push_back( x );
}
}
return local_vector;
}
Better solutions could be obtained if you sort your vectors of blocks first.

C++: read dataset and check if vector<Class> is subset of vector<Class>

I have the following piece of code. The code creates a vector Dataset, each element of which is a vector. It also creates a vector S.
I want to check which vector of Dataset contain vector of S. Apparently I am doing something wrong, because for the following example,
Dataset is:
a b c
a d
a b d
and S:
a b
it should print: 0 2
and for me it prints: 0 1 2
#include <iostream>
#include <fstream>
#include <sstream>
#include <string.h>
#include <string>
#include <time.h>
#include <vector>
#include <algorithm>
using namespace std;
class StringRef
{
private:
char const* begin_;
int size_;
public:
int size() const { return size_; }
char const* begin() const { return begin_; }
char const* end() const { return begin_ + size_; }
StringRef( char const* const begin, int const size )
: begin_( begin )
, size_( size )
{}
bool operator<(const StringRef& obj) const
{
return (strcmp(begin(),obj.begin()) > 0 );
}
};
/************************************************
* Checks if vector B is subset of vector A *
************************************************/
bool isSubset(std::vector<StringRef> A, std::vector<StringRef> B)
{
std::sort(A.begin(), A.end());
std::sort(B.begin(), B.end());
return std::includes(A.begin(), A.end(), B.begin(), B.end());
}
vector<StringRef> split3( string const& str, char delimiter = ' ' )
{
vector<StringRef> result;
enum State { inSpace, inToken };
State state = inSpace;
char const* pTokenBegin = 0; // Init to satisfy compiler.
for(auto it = str.begin(); it != str.end(); ++it )
{
State const newState = (*it == delimiter? inSpace : inToken);
if( newState != state )
{
switch( newState )
{
case inSpace:
result.push_back( StringRef( pTokenBegin, &*it - pTokenBegin ) );
break;
case inToken:
pTokenBegin = &*it;
}
}
state = newState;
}
if( state == inToken )
{
result.push_back( StringRef( pTokenBegin, &str.back() - pTokenBegin ) );
}
return result;
}
int main() {
vector<vector<StringRef> > Dataset;
vector<vector<StringRef> > S;
ifstream input("test.dat");
long count = 0;
int sec, lps;
time_t start = time(NULL);
cin.sync_with_stdio(false); //disable synchronous IO
for( string line; getline( input, line ); )
{
Dataset.push_back(split3( line ));
count++;
};
input.close();
input.clear();
input.open("subs.dat");
for( string line; getline( input, line ); )
{
S.push_back(split3( line ));
};
for ( std::vector<std::vector<StringRef> >::size_type i = 0; i < S.size(); i++ )
{
for(std::vector<std::vector<StringRef> >::size_type j=0; j<Dataset.size();j++)
{
if (isSubset(Dataset[j], S[i]))
{
cout << j << " ";
}
}
}
sec = (int) time(NULL) - start;
cerr << "C++ : Saw " << count << " lines in " << sec << " seconds." ;
if (sec > 0) {
lps = count / sec;
cerr << " Crunch speed: " << lps << endl;
} else
cerr << endl;
return 0;
}
Your StringRef type is dangerous because it contains a const char * pointer, but no concept of ownership. So the pointer could be invalidated at some point after the object is constructed.
And indeed this is what happens here: You have a single string (line) and create StringRefs with pointers to its internal data. When the string is later modified, these pointers are invalidated.
You should create a vector<std::string> instead to prevent this problem.

C++ Graph Building Question

Hello I want to build a graph for connecting sentences.
for example my files has following lines.
ab cd ef
ef gh ij
ij kl mn
xy ab cd
So I want each node should have one line i.e. ab cd ef should be one node and it should be connected to ef gh ij which should be connected to ij kl mn.
Basically last word of a line should be connect to any line whose first word matches with last word.
Here is what I have come up so far, but failing when I add Edges.
#include <map>
#include <string>
#include <deque>
#include <list>
#include <iostream>
#include <stack>
#include <fstream>
#include <vector>
class GraphNode {
public:
GraphNode(std::string name) {
std::vector<std::string> words;
std::string::size_type lastPos = name.find_first_not_of(' ', 0);
std::string::size_type pos = name.find_first_of(' ', lastPos);
while (std::string::npos != pos || std::string::npos != lastPos){
words.push_back(name.substr(lastPos, pos - lastPos));
lastPos = name.find_first_not_of(' ', pos);
pos = name.find_first_of(' ', lastPos);
}
first = words[0];
middle = " ";
for ( int i = 1; i < (int)words.size() - 1; i++) {
middle = words[i] + " ";
}
last = words[words.size() - 1 ];
}
~GraphNode() {};
std::string first;
std::string middle;
std::string last;
};
struct GraphNodeCompare {
bool operator() (const GraphNode& lhs, const GraphNode& rhs) {
return lhs.last < rhs.last;
}
};
class Graph {
public:
Graph() {}
~Graph() {}
typedef std::map <GraphNode, std::list<GraphNode>, GraphNodeCompare > GraphType;
void AddVertex ( GraphNode vertexID );
void AddEdge ( GraphNode vertexLeft, GraphNode vertexRight);
std::list<GraphNode> GetVertices(GraphNode vertexID);
friend std::ostream& operator << (std::ostream& os, const Graph& dt);
private:
GraphType m_graph;
protected:
};
void Graph::AddVertex(GraphNode vertexID) {
GraphType::const_iterator iter = m_graph.find(vertexID);
if ( iter == m_graph.end()) {
std::list<GraphNode> list;
m_graph[vertexID] = list;
}
}
void Graph::AddEdge( GraphNode vertexLeft, GraphNode vertexRight) {
AddVertex(vertexLeft);
AddVertex(vertexRight);
m_graph[vertexLeft].push_back(vertexRight);
m_graph[vertexRight].push_back(vertexLeft);
}
std::list<GraphNode> Graph::GetVertices(GraphNode vertexID) {
GraphType::const_iterator iter = m_graph.find(vertexID);
std::list<GraphNode> list;
if ( iter != m_graph.end()){
return m_graph[vertexID];
}
return list;
}
std::ostream& operator << (std::ostream& os, const Graph& graph) {
std::cout << "---------------------------------------------" << std::endl;
std::map<GraphNode, std::list<GraphNode>, GraphNodeCompare >::const_iterator iter;
for ( iter = graph.m_graph.begin(); iter != graph.m_graph.end(); ++iter) {
std::cout << iter->first.first << iter->first.middle << iter->first.last << " : " ;
std::list<GraphNode> list = iter->second;
std::list<GraphNode>::const_iterator iter1;
for ( iter1 = list.begin(); iter1 != list.end(); ++iter1) {
std::cout << iter1->first << iter1->middle << iter1->last << '\t' ;
}
std::cout << std::endl;
}
std::cout << "---------------------------------------------" << std::endl;
return os;
}
int main( int argc, char **argv) {
Graph *pGraph = new Graph();
std::ifstream dataFile("D:\\personal\\data\\datas3.txt");
if ( dataFile.peek() == EOF ) {
return -1;
}
if (dataFile.is_open()) {
while (! dataFile.eof() ) {
std::string line;
std::getline (dataFile,line);
GraphNode node(line);
pGraph->AddVertex(node);
std::list<GraphNode> vertices = pGraph->GetVertices(node);
for ( std::list<GraphNode>::iterator itr = vertices.begin(); itr != vertices.end(); ++itr) {
pGraph->AddEdge( node, *itr);
}
//std::cout << line << std::endl;
}
}
dataFile.close();
//std::cout << *pGraph;
delete pGraph;
}
I can suggest this tiny, non object-oriented implementation. Works fine for you problem:
#include <iostream>
#include <sstream>
#include <fstream>
#include <vector>
#include <string>
typedef std::vector< std::string > Node;
typedef std::pair< int, int > Edge;
// Node stuff
std::string firstWord ( const Node& node ) { return *node.begin(); }
std::string lastWord ( const Node& node ) { return *node.rbegin(); }
void addWord ( Node& node, std::string s ) { node.push_back( s ); }
bool isNotEmpty( const Node& node ) { return !node.empty(); }
bool precedes( const Node& a, const Node& b ) { return lastWord( a ) == firstWord( b ); }
struct Graph
{
void addNode ( const Node& node ) { nodes.push_back( node ); }
void addEdge ( const int& from, const int& to ) { edges.push_back( Edge( from, to ) ); }
std::vector< Edge > edges;
std::vector< Node > nodes;
};
std::ostream& operator << ( std::ostream& out, const Graph& graph )
{
int esize = graph.edges.size();
for( int i = 0; i < esize; ++i )
{
int index1 = graph.edges[ i ].first, index2 = graph.edges[ i ].second;
for( int j = 0; j < graph.nodes[ index1 ].size(); ++j )
out << graph.nodes[ index1 ][ j ] << ' ';
out << "----> ";
for( int j = 0; j < graph.nodes[ index2 ].size(); ++j )
out << graph.nodes[ index2 ][ j ] << ' ';
out << std::endl;
}
return out;
}
int main ()
{
Graph graph;
std::ifstream inputFile( "input.txt" );
std::string s;
// reading from file and constructing graph vertices
if( inputFile.is_open() )
while( !inputFile.eof() )
{
std::getline( inputFile, s );
std::stringstream ss( s );
Node node;
while( ss >> s )
addWord( node, s );
if( isNotEmpty( node ) )
graph.addNode( node );
}
inputFile.close();
// constructing graph edges
std::vector< Node > nodes ( graph.nodes );
int sz = nodes.size();
for( int i = 0; i < sz; ++i )
for( int j = 0; j < sz; ++j )
if( precedes( nodes[ i ], nodes[ j ] ) )
graph.addEdge( i, j );
// let's see what we got
std::cout << graph;
return 0;
}
Also, as #spraff says, if you want to use a well-designed graph library, have a look at Boost.
Have you considered one of the excellent Boost libraries?