Edit: Added constructors and add function
Consider the following three blocks:
//ONE
template<typename T>
class List1 {
private:
uint32_t capacity;
uint32_t used;
T* data;
void checkGrow() {
if (used < capacity)
return;
T* old = data;
capacity*=2;
data = (T*)new char[sizeof(T) * capacity];
memcpy(data, old, sizeof(T)*used);
delete (void*)old;
}
public:
List1() : capacity(1), used(0), data((T*)new char [sizeof(T)]) {}
List1(uint32_t initialSize) :
capacity(initialSize), used(0), data((T*)new char[sizeof(T)]) {}
List1(const List1& orig) :
capacity(orig.capacity), used(orig.used), data((T*)new char[used * sizeof(T)]) {
memcpy(data, orig.data, used * sizeof(T));
}
uint32_t serializeSize() const { return sizeof(used) + used*sizeof(T); }
char* read(char* p) {
used = *(uint32_t*)p;
p += sizeof(uint32_t);
data = (T*)new char[used*sizeof(T)];
memcpy(p, data, used * sizeof(T));
return p + used*sizeof(T);
}
char* write(char* p) {
*(uint32_t*)p = used;
p += sizeof(uint32_t);
memcpy(p, data, used * sizeof(T));
return p + used * sizeof(T);
}
~List1() { delete [] data; }
void add(const T& v) {
checkGrow();
data[used++] = v;
}
uint32_t getUsed() const{
return used;
}
uint32_t getCapacity() const{
return capacity;
}
//const T& operator [](int i) const { return data[i]; }
//T& operator [](int i) { return data[i]; }
T getData (int i) const{
return data[i];
}
uint32_t size() const { return used * sizeof(T); }
};
//TWO
List1<uint32_t> temp=in.readList<uint32_t>(); // <List1<uint32_t>>();
//BREAKPOINT HERE
for(uint i=0;i<15;i++){
//cout<<temp[i]<<endl;
cout<<temp.getData(i)<<endl;
}
//THREE
template<typename T>
T _read() {
T temp = *(T*)p;
p += sizeof(T);
availRead -= sizeof(T);
return temp;
}
template<typename T>
T read(){
//cout << (uint64_t)p << endl;
checkAvailableRead(sizeof(T));
return _read<T>();
}
template<typename T>
List1<T> readList(){
uint32_t len = read<uint32_t>();
List1<T> temp(len);
for (uint i = 0 ; i < len; i++){
T val =read<T>();
temp.add(val);
//HERE: For some reason code does not work without this print statement
//cout<<temp.getData(i)<<endl;
}
return temp;
}
Basically the issue is that the value of data changes from after returning from getData as shown below.
gdb) p/u *temp.data#15
$1 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}
(gdb) p/u *temp.data#15
$2 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}
(gdb) p/u *data#15
$3 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}
[New Thread 8444.0xad8]
(gdb) p/u *data#15
$4 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}
[New Thread 8444.0x214c]
(gdb) p/u *temp.data#15
$5 = {0, 1, 2, 3, 4, 5, 83, 0, 2150008464, 1, 3742232646, 0, 168272, 6, 0}
For some reason adding a print statement to readList fixes the issue, but that isn't a reasonable solution. I've tried a few different variations on the code but none of them worked.
I'm not sure what the issue is or how to even begin debugging as the problem occurs between the return statement and the next iteration of the loop (there's nothing to step into there).
Any advice would be greatly appreciated.
List1(const List1& orig) :
capacity(orig.capacity), used(orig.used), data((T*)new char[used * sizeof(T)]) {
memcpy(data, orig.data, used * sizeof(T));
}
For List1 to work correctly, there must never be a List1 whose capacity is greater than the actual allocated size. However, this creates a new List1 that violates this invariant if orig has a capacity greater than its used.
You probably meant capacity(orig.used).
Same issue here:
List1(uint32_t initialSize) :
capacity(initialSize), used(0), data((T*)new char[sizeof(T)]) {}
If you set capacity to initialSize, you can't allocate space for just 1 T.
This is also broken delete (void*)old;. What you allocate with new[], you must free with delete[].
Note that List1 can only be used to hold POD types (plain old data) that don't have constructors or destructors. If you're trying to use List1 to hold anything more complex, you're way off base with your design.
Given two key-value lists, I am trying to combine the two sides by matching the keys and applying a function to the two values when the keys match. In my case I want to multiply the values. A small example to make it more clear:
Left keys: { 1, 2, 4, 5, 6 }
Left values: { 3, 4, 1, 2, 1 }
Right keys: { 1, 3, 4, 5, 6, 7 };
Right values: { 2, 1, 1, 4, 1, 2 };
Expected output keys: { 1, 4, 5, 6 }
Expected output values: { 6, 1, 8, 1 }
I have been able to implement this on the CPU using C++ using the next code:
int main() {
int leftKeys[5] = { 1, 2, 4, 5, 6 };
int leftValues[5] = { 3, 4, 1, 2, 1 };
int rightKeys[6] = { 1, 3, 4, 5, 6, 7 };
int rightValues[6] = { 2, 1, 1, 4, 1, 2 };
int leftIndex = 0, rightIndex = 0;
std::vector<std::tuple<int, int>> result;
while (leftIndex < 5 && rightIndex < 6) {
if (leftKeys[leftIndex] < rightKeys[rightIndex]) {
leftIndex++;
}
if (leftKeys[leftIndex] > rightKeys[rightIndex]) {
rightIndex++;
}
result.push_back(std::make_tuple(leftKeys[leftIndex], leftValues[leftIndex] * rightValues[rightIndex]));
leftIndex++;
rightIndex++;
}
// Print results
for (int i = 0; i < result.size(); i++) {
std::cout << "Key: " << std::get<0>(result[i]) << "; Value: " << std::get<1>(result[i]) << "\n";
}
}
However, I have the input keys and values in Thrust's device_vectors and I need the results on the GPU as well. Therefore it would be more efficient if I did not need to copy all inputs to the host and all outputs back to the device.
The problem is that I cannot find a Thrust function that can be used to combine two lists using a set of keys (and apply a function to both values). Does such a function exist or is there an easy way to implement it myself of should I just do this on the host?
Update:
The following assumptions can be made about the input:
The keys are always sorted.
No duplicate keys exist within a single list (between the lists, duplicate keys of course do exist, otherwise the result would be empty).
Update 2:
While implementing the second approach in #Robert's answer I get stuck at the transformation. My code so far is below:
struct multiply_transformation : public thrust::binary_function<std::tuple<int, int>, std::tuple<int, int>, std::tuple<int, int>>
{
__host__ __device__
thrust::tuple<int, int> operator()(thrust::tuple<int, int> d_left, thrust::tuple<int, int> d_right)
{
if (thrust::get<0>(d_left) == thrust::get<0>(d_right)) {
return thrust::make_tuple(thrust::get<0>(d_left), thrust::get<1>(d_left) * thrust::get<1>(d_right));
}
return thrust::make_tuple(-1, -1);
}
};
thrust::device_vector<int> d_mergedKeys(h_leftCount + h_rightCount);
thrust::device_vector<int> d_mergedValues(h_leftCount + h_rightCount);
thrust::merge_by_key(d_leftCountKeys.begin(), d_leftCountKeys.begin() + h_leftCount,
d_rightCountKeys.begin(), d_rightCountKeys.begin() + h_rightCount,
d_leftCounts.begin(), d_rightCounts.begin(), d_mergedKeys.begin(), d_mergedValues.begin());
typedef thrust::tuple<int, int> IntTuple;
thrust::zip_iterator<IntTuple> d_zippedCounts(thrust::make_tuple(d_mergedKeys.begin(), d_mergedValues.begin()));
thrust::zip_iterator<IntTuple> d_zippedCountsOffset(d_zippedCounts + 1);
multiply_transformation transformOperator;
thrust::device_vector<IntTuple> d_transformedResult(h_leftCount + h_rightCount);
thrust::transform(d_zippedCounts, d_zippedCounts + h_leftCount + h_rightCount - 1, d_zippedCountsOffset, d_transformedResult.begin(), transformOperator);
However, I get the error that no overloaded function thrust::transform matches the argument list. In the above code h_leftCount and h_rightCount are the sizes of the left and right inputs. d_leftCountKeys, d_rightCountKeys, d_leftCounts, and d_rightCounts are thrust::device_vector<int>.
Well, I'm not sure this is the best method (#m.s. usually comes up with better approaches than I), but one possible approach would be (method 1):
set_intersection_by_key(Left,Right)
set_intersection_by_key(Right,Left)
Take the values outputs from step 1 and step 2, and perform a transform on them to multiply the values results together (or whatever other mathematical operation you'd like to perform on the corresponding values results from step 1 and step 2).
I don't know what your skill level is with thrust but I can provide a trivial worked example of the above if desired.
Another possible approach (method 2):
merge_by_key the two lists together
Perform a transform using two versions of the resultant list from step 1: The first consisting of [first, last-1) and the second consisting of [first+1, last). This will require a special functor that takes a zipped version of the keys and values, and compares the two keys presented. If it is a match, output the desired mathematical operation on the two presented values; if it is not a match, output some kind of marker or known illegal value. (If such an illegal value is impossible to construct, we can zip a 3rd marker array in if needed.)
Do a remove_if on the output of step 2, to compact the result down to the desired result, removing all value entries that are illegal, or else removing all value entries that are indicated by the marker array.
My sense is the second method might be faster, but I haven't carefully thought through it. In any event, it's better to benchmark test cases than to work off of (my) intuition.
Based on a comment below, here is a description of what is happening starting with the 2nd step of method 2, using your example dataset:
The output of step 1 (the merge_by_key operation) would look like something like this:
keys: { 1, 1, 2, 3, 4, 4, 5, 5, 6, 6, 7 };
values: { 3, 2, 4, 1, 1, 1, 2, 4, 1, 1, 2 };
Let's construct two versions, the first being "the item" and the second being "the next item to the right":
keys1: { 1, 1, 2, 3, 4, 4, 5, 5, 6, 6 };
values1: { 3, 2, 4, 1, 1, 1, 2, 4, 1, 1 };
keys2: { 1, 2, 3, 4, 4, 5, 5, 6, 6, 7 };
values2: { 2, 4, 1, 1, 1, 2, 4, 1, 1, 2 };
The actual "construction" is trivial. keys1 is just [keys.begin(), keys.end()-1), and keys2 is just [keys.begin()+1, keys.end()). And likewise for values1 and values2.
We'll zip keys1 and values1 together and we'll zip keys2 and values2 together. Then we'll pass these two zipped entities to a transform operation that has a special functor that will do the following:
If keys1 == keys2, do the desired math operation on the values1 and values2 quantities, and place a one in the marker array. If not, place a 0 in a marker array. The output of this operation would be:
keys: { 1, 2, 3, 4, 4, 5, 5, 6, 6, 7 };
values: { 6, 4, 1, 1, 1, 8, 4, 1, 1, 2 };
marker: { 1, 0, 0, 1, 0, 1, 0, 1, 0, 0 };
Now zip the 3 vectors above together, and pass that to remove_if. The remove_if functor would indicate removal of any items for which marker == 0, leaving:
keys: { 1, 4, 5, 6 };
values: { 6, 1, 8, 1 };
marker: { 1, 1, 1, 1 };
Here is a fully worked example demonstrating both methods:
$ cat t1007.cu
#include <iostream>
#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/set_operations.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/copy.h>
#include <thrust/merge.h>
#include <thrust/remove.h>
#include <assert.h>
struct mark_mpy_func
{
template <typename T1, typename T2>
__host__ __device__
int operator()(T1 &z1, T2 &z2){
int res = 0;
if (thrust::get<0>(z1) == thrust::get<0>(z2)){
res = thrust::get<1>(z1) * thrust::get<1>(z2);
thrust::get<2>(z1) = 1;}
return res;
}
};
struct mtest_func
{
__host__ __device__
bool operator()(int t){
if (t == 0) return true;
return false;
}
};
int main(){
int Lkeys[] = { 1, 2, 4, 5, 6 };
int Lvals[] = { 3, 4, 1, 2, 1 };
int Rkeys[] = { 1, 3, 4, 5, 6, 7 };
int Rvals[] = { 2, 1, 1, 4, 1, 2 };
size_t Lsize = sizeof(Lkeys)/sizeof(int);
size_t Rsize = sizeof(Rkeys)/sizeof(int);
thrust::device_vector<int> Lkeysv(Lkeys, Lkeys+Lsize);
thrust::device_vector<int> Lvalsv(Lvals, Lvals+Lsize);
thrust::device_vector<int> Rkeysv(Rkeys, Rkeys+Rsize);
thrust::device_vector<int> Rvalsv(Rvals, Rvals+Rsize);
// method 1
thrust::device_vector<int> Lkeysvo(Lsize);
thrust::device_vector<int> Lvalsvo(Lsize);
thrust::device_vector<int> Rkeysvo(Rsize);
thrust::device_vector<int> Rvalsvo(Rsize);
size_t Lsizeo = thrust::set_intersection_by_key(Lkeysv.begin(), Lkeysv.end(), Rkeysv.begin(), Rkeysv.end(), Lvalsv.begin(), Lkeysvo.begin(), Lvalsvo.begin()).first - Lkeysvo.begin();
size_t Rsizeo = thrust::set_intersection_by_key(Rkeysv.begin(), Rkeysv.end(), Lkeysv.begin(), Lkeysv.end(), Rvalsv.begin(), Rkeysvo.begin(), Rvalsvo.begin()).first - Rkeysvo.begin();
assert(Lsizeo == Rsizeo);
thrust::device_vector<int> res1(Lsizeo);
thrust::transform(Lvalsvo.begin(), Lvalsvo.begin()+Lsizeo, Rvalsvo.begin(), res1.begin(), thrust::multiplies<int>());
std::cout << "Method 1 result:" << std::endl << "keys: ";
thrust::copy_n(Lkeysvo.begin(), Lsizeo, std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl << "vals: ";
thrust::copy_n(res1.begin(), Lsizeo, std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl;
// method 2
thrust::device_vector<int> Mkeysv(Lsize + Rsize);
thrust::device_vector<int> Mvalsv(Lsize + Rsize);
thrust::merge_by_key(Lkeysv.begin(), Lkeysv.end(), Rkeysv.begin(), Rkeysv.end(), Lvalsv.begin(), Rvalsv.begin(), Mkeysv.begin(), Mvalsv.begin());
thrust::device_vector<int> marker(Lsize + Rsize - 1);
thrust::device_vector<int> res2(Lsize + Rsize - 1);
thrust::transform(thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.begin(), Mvalsv.begin(), marker.begin())), thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.end()-1, Mvalsv.end()-1, marker.end())), thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.begin()+1, Mvalsv.begin()+1)), res2.begin(), mark_mpy_func());
size_t rsize2 = thrust::remove_if(thrust::make_zip_iterator(thrust::make_tuple( Mkeysv.begin(), res2.begin())), thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.end()-1, res2.end())), marker.begin(), mtest_func()) - thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.begin(), res2.begin()));
std::cout << "Method 2 result:" << std::endl << "keys: ";
thrust::copy_n(Mkeysv.begin(), rsize2, std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl << "vals: ";
thrust::copy_n(res2.begin(), rsize2, std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl;
return 0;
}
$ nvcc -o t1007 t1007.cu
$ ./t1007
Method 1 result:
keys: 1,4,5,6,
vals: 6,1,8,1,
Method 2 result:
keys: 1,4,5,6,
vals: 6,1,8,1,
$
If it is acceptable to use a marker value (say, -1) in the output data to inform the remove_if operation, then the separate marker array can be dispensed with. Here's a modified version of method 2 that works this way:
$ cat t1007.cu
#include <iostream>
#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/transform.h>
#include <thrust/copy.h>
#include <thrust/merge.h>
#include <thrust/remove.h>
#define MARK_VAL -1
struct mark_mpy_func
{
template <typename T1, typename T2>
__host__ __device__
int operator()(T1 &z1, T2 &z2){
int res = MARK_VAL;
if (thrust::get<0>(z1) == thrust::get<0>(z2)){
res = thrust::get<1>(z1) * thrust::get<1>(z2);}
return res;
}
};
struct mtest_func
{
template <typename T>
__host__ __device__
bool operator()(T t){
if (thrust::get<1>(t) == MARK_VAL) return true;
return false;
}
};
int main(){
int Lkeys[] = { 1, 2, 4, 5, 6 };
int Lvals[] = { 3, 4, 1, 2, 1 };
int Rkeys[] = { 1, 3, 4, 5, 6, 7 };
int Rvals[] = { 2, 1, 1, 4, 1, 2 };
size_t Lsize = sizeof(Lkeys)/sizeof(int);
size_t Rsize = sizeof(Rkeys)/sizeof(int);
thrust::device_vector<int> Lkeysv(Lkeys, Lkeys+Lsize);
thrust::device_vector<int> Lvalsv(Lvals, Lvals+Lsize);
thrust::device_vector<int> Rkeysv(Rkeys, Rkeys+Rsize);
thrust::device_vector<int> Rvalsv(Rvals, Rvals+Rsize);
// method 2
thrust::device_vector<int> Mkeysv(Lsize + Rsize);
thrust::device_vector<int> Mvalsv(Lsize + Rsize);
thrust::merge_by_key(Lkeysv.begin(), Lkeysv.end(), Rkeysv.begin(), Rkeysv.end(), Lvalsv.begin(), Rvalsv.begin(), Mkeysv.begin(), Mvalsv.begin());
thrust::device_vector<int> res2(Lsize + Rsize - 1);
thrust::transform(thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.begin(), Mvalsv.begin())), thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.end()-1, Mvalsv.end()-1)), thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.begin()+1, Mvalsv.begin()+1)), res2.begin(), mark_mpy_func());
size_t rsize2 = thrust::remove_if(thrust::make_zip_iterator(thrust::make_tuple( Mkeysv.begin(), res2.begin())), thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.end()-1, res2.end())), mtest_func()) - thrust::make_zip_iterator(thrust::make_tuple(Mkeysv.begin(), res2.begin()));
std::cout << "Method 2 result:" << std::endl << "keys: ";
thrust::copy_n(Mkeysv.begin(), rsize2, std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl << "vals: ";
thrust::copy_n(res2.begin(), rsize2, std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl;
return 0;
}
$ nvcc -o t1007 t1007.cu
$ ./t1007
Method 2 result:
keys: 1,4,5,6,
vals: 6,1,8,1,
$
You can actually do all you want using one thrust::set_intersection_by_key call.
However, some prerequisites need to be met:
First, the easy one:
You need to zip Lvalsv and Rvalsv into a single thrust::zip_iterator and pass this as the values to thrust::set_intersection_by_key.
You could already run this:
std::size_t min_size = std::min(Lsize, Rsize);
thrust::device_vector<int> result_keys(min_size);
thrust::device_vector<int> result_values_left(min_size);
thrust::device_vector<int> result_values_right(min_size);
auto zipped_input_values = thrust::make_zip_iterator(thrust::make_tuple(Lvalsv.begin(), Rvalsv.begin()));
auto zipped_output_values = thrust::make_zip_iterator(thrust::make_tuple(result_values_left.begin(), result_values_right.begin()));
auto result_pair = thrust::set_intersection_by_key(Lkeysv.begin(), Lkeysv.end(), Rkeysv.begin(), Rkeysv.end(), zipped_input_values, result_keys.begin(), zipped_output_values);
This would yield two result vectors, which you would need to multiply element-wise to get your final result.
But wait, wouldn't it be great if you could avoid having to store these two vectors as the result, then read each element again for multiplying them and then store the final result in a third vector?
Let's do that. The concept I adapted is from here.
The transform_output_iterator is a iterator, which is a wrapper around another OutputIterator. When writing to the transform_output_iterator, a UnaryFunction is applied to the value to be written, then that result is written to the wrapped OutputIterator.
This allows us to pass the result from thrust::set_intersection_by_key through the Multiplier functor and then store it in the results in a single result_values vector.
The following code implements this idea:
#include <thrust/iterator/iterator_traits.h>
#include <thrust/iterator/iterator_facade.h>
#include <thrust/iterator/iterator_adaptor.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/tuple.h>
#include <thrust/set_operations.h>
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <iostream>
#include <cstdint>
#define PRINTER(name) print(#name, (name))
template <template <typename...> class V, typename T, typename ...Args>
void print(const char* name, const V<T,Args...> & v)
{
std::cout << name << ":\t";
thrust::copy(v.begin(), v.end(), std::ostream_iterator<T>(std::cout, "\t"));
std::cout << std::endl;
}
template <typename OutputIterator, typename UnaryFunction>
class Proxy
{
UnaryFunction& fun;
OutputIterator& out;
public:
__host__ __device__
Proxy(UnaryFunction& fun, OutputIterator& out) : fun(fun), out(out) {}
template <typename T>
__host__ __device__
Proxy operator=(const T& x) const
{
*out = fun(x);
return *this;
}
};
// This iterator is a wrapper around another OutputIterator which
// applies a UnaryFunction before writing to the OutputIterator.
template <typename OutputIterator, typename UnaryFunction>
class transform_output_iterator : public thrust::iterator_adaptor<
transform_output_iterator<OutputIterator, UnaryFunction>
, OutputIterator
, thrust::use_default
, thrust::use_default
, thrust::use_default
, Proxy<const OutputIterator, const UnaryFunction> >
{
UnaryFunction fun;
public:
friend class thrust::iterator_core_access;
// shorthand for the name of the iterator_adaptor we're deriving from
typedef thrust::iterator_adaptor<
transform_output_iterator<OutputIterator, UnaryFunction>,
OutputIterator, thrust::use_default, thrust::use_default, thrust::use_default, Proxy<const OutputIterator, const UnaryFunction>
> super_t;
__host__ __device__
transform_output_iterator(OutputIterator out, UnaryFunction fun) : super_t(out), fun(fun)
{
}
private:
__host__ __device__
typename super_t::reference dereference() const
{
return Proxy<const OutputIterator, const UnaryFunction>(fun, this->base_reference());
}
};
struct Multiplier
{
template<typename Tuple>
__host__ __device__
auto operator()(Tuple t) const -> decltype(thrust::get<0>(t) * thrust::get<1>(t))
{
return thrust::get<0>(t) * thrust::get<1>(t);
}
};
template <typename OutputIterator, typename UnaryFunction>
transform_output_iterator<OutputIterator, UnaryFunction>
__host__ __device__
make_transform_output_iterator(OutputIterator out, UnaryFunction fun)
{
return transform_output_iterator<OutputIterator, UnaryFunction>(out, fun);
}
int main()
{
int Lkeys[] = { 1, 2, 4, 5, 6 };
int Lvals[] = { 3, 4, 1, 2, 1 };
int Rkeys[] = { 1, 3, 4, 5, 6, 7 };
int Rvals[] = { 2, 1, 1, 4, 1, 2 };
size_t Lsize = sizeof(Lkeys)/sizeof(int);
size_t Rsize = sizeof(Rkeys)/sizeof(int);
thrust::device_vector<int> Lkeysv(Lkeys, Lkeys+Lsize);
thrust::device_vector<int> Lvalsv(Lvals, Lvals+Lsize);
thrust::device_vector<int> Rkeysv(Rkeys, Rkeys+Rsize);
thrust::device_vector<int> Rvalsv(Rvals, Rvals+Rsize);
std::size_t min_size = std::min(Lsize, Rsize);
thrust::device_vector<int> result_keys(min_size);
thrust::device_vector<int> result_values(min_size);
auto zipped_values = thrust::make_zip_iterator(thrust::make_tuple(Lvalsv.begin(), Rvalsv.begin()));
auto output_it = make_transform_output_iterator(result_values.begin(), Multiplier());
auto result_pair = thrust::set_intersection_by_key(Lkeysv.begin(), Lkeysv.end(), Rkeysv.begin(), Rkeysv.end(), zipped_values, result_keys.begin(), output_it);
std::size_t new_size = result_pair.first - result_keys.begin();
result_keys.resize(new_size);
result_values.resize(new_size);
PRINTER(result_keys);
PRINTER(result_values);
}
output
$ nvcc -std=c++11 main.cu && ./a.out
result_keys: 1 4 5 6
result_values: 6 1 8 1
I think two set intersections are required, as suggested in the first answer. The other solutions won't work, and it is just coincidence in the input data they produce correct result. For example, if the second (key,value) pair is removed from the left set, the computed result will be different while it shouldn't Here is the code:
$ cat inner_join.cu
#include <thrust/set_operations.h>
#include <thrust/transform.h>
#include <thrust/device_vector.h>
#include <iostream>
int main()
{
int _Lkeys[] = {1, 4, 5, 6};
int _Lvals[] = {3, 1, 2, 1};
int _Rkeys[] = {1, 3, 4, 5, 6, 7};
int _Rvals[] = {2, 1, 1, 4, 1, 2};
size_t Lsize = sizeof(_Lkeys) / sizeof(int);
size_t Rsize = sizeof(_Rkeys) / sizeof(int);
thrust::device_vector<int> Lkeys(_Lkeys, _Lkeys + Lsize);
thrust::device_vector<int> Lvals(_Lvals, _Lvals + Lsize);
thrust::device_vector<int> Rkeys(_Rkeys, _Rkeys + Rsize);
thrust::device_vector<int> Rvals(_Rvals, _Rvals + Rsize);
std::size_t min_size = std::min(Lsize, Rsize);
thrust::device_vector<int> result_keys(min_size);
thrust::device_vector<int> result_Rvals(min_size);
thrust::device_vector<int> result_Lvals(min_size);
// set intersection keys, and left set values
size_t intersection_size =
thrust::set_intersection_by_key(Lkeys.begin(), Lkeys.end(), Rkeys.begin(),
Rkeys.end(), Lvals.begin(),
result_keys.begin(), result_Lvals.begin())
.first -
result_keys.begin();
// set intersection keys, and right set values
thrust::set_intersection_by_key(Rkeys.begin(), Rkeys.end(), Lkeys.begin(),
Lkeys.end(), Rvals.begin(),
result_keys.begin(), result_Rvals.begin());
result_Lvals.resize(intersection_size);
result_keys.resize(intersection_size);
thrust::device_vector<int> result_values(intersection_size);
// join left and right intersection values
thrust::transform(result_Lvals.begin(), result_Lvals.end(),
result_Rvals.begin(), result_values.begin(),
thrust::multiplies<int>());
std::cout << "keys: ";
thrust::copy_n(result_keys.begin(), intersection_size,
std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl << "vals: ";
thrust::copy_n(result_values.begin(), intersection_size,
std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl;
}
output
$ nvcc inner_join.cu -run
keys: 1,4,5,6,
vals: 6,1,8,1,
I am literally freaking out on the following :
template <class T> // int, float, double etc..
class Graph {
public:
// Documentation, it has to be between [0..100]
Graph(int size = 10, int density = 10, T range = 0):
m_size(size),
m_density(density),
m_range(range) {
generate();
}
~Graph() {
for (int i = 0; i < m_size; i++)
delete[] m_graph[i];
delete[] m_graph;
}
[..]
static Graph<T>* custom(T** cgraph, int size, T range) {
Graph<T> *graph = new Graph<T>(size, 10, range);
for (int i = 0; i < size; i++)
delete[] graph->m_graph[i];
delete[] graph->m_graph;
graph->m_graph = cgraph;
}
private:
T** m_graph;
[ .. ]
};
int nodes[4][4] = {
{ 6, 5, 2, 5 },
{ 5, 6, 3, 3 },
{ 1, 3, 6, 1 },
{ 5, 3, 1, 6 }
};
int main() {
Graph<int> *graph = Graph<int>::custom(nodes, 4, 5);
}
What is it poorly failing to compile reporting the following errors ?
g++ graph.cpp -o test_graph
graph.cpp: In function ‘int main()’:
graph.cpp:191:55: error: no matching function for call to ‘Graph<int>::custom(int [4][4], int, int)’
Graph<int> *graph = Graph<int>::custom(nodes, 4, 5);
^
graph.cpp:60:20: note: candidate: static Graph<T>* Graph<T>::custom(T**, int, T) [with T = int]
static Graph<T>* custom(T** cgraph, int size, T range) {
^
graph.cpp:60:20: note: no known conversion for argument 1 from ‘int [4][4]’ to ‘int**’
It looks so right to me, what's wrong ?
You need to make nodes by an array of pointers to int.
int nodes_v[4][4] = {
{ 6, 5, 2, 5 },
{ 5, 6, 3, 3 },
{ 1, 3, 6, 1 },
{ 5, 3, 1, 6 }
};
int *nodes[4] = { nodes_v[0], nodes_v[1], nodes_v[2], nodes_v[3] };
You also need to add an additional member to the Graph variable to mark that it is a custom graph, and if set, the destructor should not delete the memory.
Best to give Graph a private constructor which passes in the custom flag and doesn't bother allocating the memory if set.
Graph(int size, int density, T range, bool custom):
m_size(size),
m_density(density),
m_range(range),
m_custom {
}
Graph(int size = 10, int density = 10, T range = 0):
Graph(size, density, range, false) {
generate();
}
static Graph<T>* custom(T** cgraph, int size, T range) {
Graph<T> *graph = new Graph<T>(size, 10, range, true);
graph->m_graph = cgraph;
}
Finally, you need to handle copy constructor and assignment operator (start by just deleting them).
I am trying to use typed test concept available in google test. The description of this concept matches what I intend to do, but I cannot figure it out completely. I want to test structs which implements an interface, since they are totally different once, they need to be initialized with different values/instances.
Simply my code is as follows
struct Serializable
{
virtual sObj serialize() = 0;
virtual void unserialize(sObj) = 0;
};
struct s1 : serializable
{
int attrI1;
int attrI2;
sObj serialize()
{
//serialize an instance of this struct
}
void unserialize(sObj)
{
//unserialize data to instance of this struct
}
}
struct s2 : serializable
{
char attrC;
void serialize()
{
//serialize an instance of this struct
}
sObj unserialize()
{
//unserialize data to instance of this struct
}
}
And I want to test s1 and s2 with different instances/values. The test should look like:
template <typename T>
int testSerialzable(T& t)
{
sObj obj = t.pack();
T temp;
TEST_EQ(temp.unpack(obj), t);
}
Can someone please tell me if this is possible to do and how?
Many thanks in advance
I finally figured it out. for the example that I had above. It will be like:
template<class T>
struct TestSerializable : public ::testing::Test
{
static T serializedType;
};
TYPED_TEST_CASE_P(TestSerializable);
TYPED_TEST_P(TestSerializable, serializationTest)
{
sObj obj = t.serialize();
TypeParam temp;
ASSERT_EQ(temp.unserialize(obj), t);
}
REGISTER_TYPED_TEST_CASE_P(TestSerializable, serializationTest);
typedef ::testing::Types<s1, s2> MyTypes;
INSTANTIATE_TYPED_TEST_CASE_P(MySerialiInstantiation, TestSerializable, MyTypes);
template<> s1 TestSerializable<s1>::serializedType(/*instance of s1 with proper values*/s1());
template<> s2 TestSerializable<s2>::serializedType(/*instance of s1 with proper values*/s2());
The original sample can't be compiled, I post my successful change for reference. If you need the full source code, please refer to the link(https://github.com/dougpuob/googletest-sample/blob/master/source/test-derived-func-by-interface/main.cpp).
#include "BinarySearchLoop.h"
#include "BinarySearchRecursive.h"
#include "gtest/gtest.h"
template <class T>
struct TestBinarySearch : public ::testing::Test {
static T Instance;
};
TYPED_TEST_CASE_P(TestBinarySearch);
TYPED_TEST_P(TestBinarySearch, PositiveInteger) {
std::vector<int> SortedArray = {1, 2, 3, 4, 5, 6, 7, 8, 9};
ASSERT_EQ(4, Instance.search(SortedArray, 5));
ASSERT_EQ(5, Instance.search(SortedArray, 6));
ASSERT_EQ(0, Instance.search(SortedArray, 1));
ASSERT_EQ(8, Instance.search(SortedArray, 9));
}
TYPED_TEST_P(TestBinarySearch, NegativeInteger) {
std::vector<int> SortedArray = {-8, -7, -6, -5, -4, -3, -2, -1};
EXPECT_EQ(+6, Instance.search(SortedArray, -2));
EXPECT_EQ(-1, Instance.search(SortedArray, +0));
EXPECT_EQ(-1, Instance.search(SortedArray, -9));
}
TYPED_TEST_P(TestBinarySearch, Integer) {
std::vector<int> SortedArray = {-1, 0, 3, 5, 9, 12};
EXPECT_EQ(+4, Instance.search(SortedArray, 9));
EXPECT_EQ(-1, Instance.search(SortedArray, 2));
}
TYPED_TEST_P(TestBinarySearch, SingleElement) {
std::vector<int> SortedArray = {5};
EXPECT_EQ(+0, Instance.search(SortedArray, 5));
EXPECT_EQ(-1, Instance.search(SortedArray, 1));
EXPECT_EQ(-1, Instance.search(SortedArray, 6));
}
TYPED_TEST_P(TestBinarySearch, TwoElementsOnly) {
std::vector<int> SortedArray = {1, 5};
EXPECT_EQ(+1, Instance.search(SortedArray, 5));
EXPECT_EQ(+0, Instance.search(SortedArray, 1));
EXPECT_EQ(-1, Instance.search(SortedArray, 2)); // Not in the array.
EXPECT_EQ(-1, Instance.search(SortedArray, 7)); // Not in the array.
}
REGISTER_TYPED_TEST_CASE_P(TestBinarySearch,
PositiveInteger,
NegativeInteger,
Integer,
SingleElement,
TwoElementsOnly);
typedef ::testing::Types<BinarySearchLoop, BinarySearchRecursive> MyTypes;
INSTANTIATE_TYPED_TEST_CASE_P(MyBinarySearchInstantiation,
TestBinarySearch,
MyTypes);
template <>
BinarySearchLoop TestBinarySearch<BinarySearchLoop>::Instance;
template <>
BinarySearchRecursive TestBinarySearch<BinarySearchRecursive>::Instance;
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
int Ret = RUN_ALL_TESTS();
return Ret;
}
It's more convenient to use value-parameterized tests for testing different implementations of an interface. Google Test's sample7 shows how to do that.