Related
I'm new to CUDA and the thrust library. I'm learning and trying to implement a function that will have a for loop doing a thrust function. Is there a way to convert this loop into another thrust function? Or should I use a CUDA kernel to achieve this?
I have come up with code like this
// thrust functor
struct GreaterthanX
{
const float _x;
GreaterthanX(float x) : _x(x) {}
__host__ __device__ bool operator()(const float &a) const
{
return a > _x;
}
};
int main(void)
{
// fill a device_vector with
// 3 2 4 5
// 0 -2 3 1
// 9 8 7 6
int row = 3;
int col = 4;
thrust::device_vector<int> vec(row * col);
thrust::device_vector<int> count(row);
vec[0] = 3;
vec[1] = 2;
vec[2] = 4;
vec[3] = 5;
vec[4] = 0;
vec[5] = -2;
vec[6] = 3;
vec[7] = 1;
vec[8] = 9;
vec[9] = 8;
vec[10] = 7;
vec[11] = 6;
// Goal: For each row, count the number of elements greater than 2.
// And then find the row with the max count
// count the element greater than 2 in vec
for (int i = 0; i < row; i++)
{
count[i] = thrust::count_if(vec.begin(), vec.begin() + i * col, GreaterthanX(2));
}
thrust::device_vector<int>::iterator result = thrust::max_element(count.begin(), count.end());
int max_val = *result;
unsigned int position = result - count.begin();
printf("result = %d at position %d\r\n", max_val, position);
// result = 4 at position 2
return 0;
}
My goal is to find the row that has the most elements greater than 2. I'm struggling at how to do this without a loop. Any suggestions would be very appreciated. Thanks.
Solution using Thrust
Here is an implementation using thrust::reduce_by_key in conjunction with multiple "fancy iterators".
I also took the freedom to sprinkle in some const, auto and lambdas for elegance and readability. Due to the lambdas, you will need to use the -extended-lambda flag for nvcc.
#include <cassert>
#include <cstdio>
#include <thrust/reduce.h>
#include <thrust/device_vector.h>
#include <thrust/distance.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/iterator/transform_iterator.h>
int main(void)
{
// fill a device_vector with
// 3 2 4 5
// 0 -2 3 1
// 9 8 7 6
int const row = 3;
int const col = 4;
thrust::device_vector<int> vec(row * col);
vec[0] = 3;
vec[1] = 2;
vec[2] = 4;
vec[3] = 5;
vec[4] = 0;
vec[5] = -2;
vec[6] = 3;
vec[7] = 1;
vec[8] = 9;
vec[9] = 8;
vec[10] = 7;
vec[11] = 6;
thrust::device_vector<int> count(row);
// Goal: For each row, count the number of elements greater than 2.
// And then find the row with the max count
// count the element greater than 2 in vec
// counting iterator avoids read from global memory, gives index into vec
auto keys_in_begin = thrust::make_counting_iterator(0);
auto keys_in_end = thrust::make_counting_iterator(row * col);
// transform vec on the fly
auto vals_in_begin = thrust::make_transform_iterator(
vec.cbegin(),
[] __host__ __device__ (int val) { return val > 2 ? 1 : 0; });
// discard to avoid write to global memory
auto keys_out_begin = thrust::make_discard_iterator();
auto vals_out_begin = count.begin();
// transform keys (indices) into row indices and then compare
// the divisions are one reason one might rather
// use MatX for higher dimensional data
auto binary_predicate = [col] __host__ __device__ (int i, int j){
return i / col == j / col;
};
// this function returns a new end for count
// b/c the final number of elements is often not known beforehand
auto new_ends = thrust::reduce_by_key(keys_in_begin, keys_in_end,
vals_in_begin,
keys_out_begin,
vals_out_begin,
binary_predicate);
// make sure that we didn't provide too small of an output vector
assert(thrust::get<1>(new_ends) == count.end());
auto const result = thrust::max_element(count.begin(), count.end());
int const max_val = *result;
auto const position = thrust::distance(count.begin(), result);
std::printf("result = %d at position %d\r\n", max_val, position);
// result = 4 at position 2
return 0;
}
Bonus solution using MatX
As mentioned in the comments NVIDIA has released a new high-level, C++17 library called MatX which targets problems involving (dense) multi-dimensional data (i.e. tensors). The library tries to unify multiple low-level libraries like CUFFT, CUSOLVER and CUTLASS in one python-/matlab-like interface. At the point of this writing (v0.2.2) the library is still in initial development and therefore probably doesn't guarantee a stable API. Due to this, the performance not being as optimized as with the more mature Thrust library and the documentation/samples not being quite exhaustive, MatX should not be used in production code yet. While constructing this solution I actually stumbled upon a bug which was instantly fixed. So this code will only work on the main branch and not with the current release v0.2.2 and some used features might not appear in the documentation yet.
A solution using MatX looks the following way:
#include <iostream>
#include <matx.h>
int main(void)
{
int const row = 3;
int const col = 4;
auto tensor = matx::make_tensor<int, 2>({row, col});
tensor.SetVals({{3, 2, 4, 5},
{0, -2, 3, 1},
{9, 8, 7, 6}});
// tensor.Print(0,0); // print full tensor
auto count = matx::make_tensor<int, 1>({row});
// count.Print(0); // print full count
// Goal: For each row, count the number of elements greater than 2.
// And then find the row with the max count
// the kind of reduction is determined through the shapes of tensor and count
matx::sum(count, matx::as_int(tensor > 2));
// A single value (scalar) is a tensor of rank 0:
auto result_idx = matx::make_tensor<matx::index_t>();
auto result = matx::make_tensor<int>();
matx::argmax(result, result_idx, count);
cudaDeviceSynchronize();
std::cout << "result = " << result()
<< " at position " << result_idx() << "\r\n";
// result = 4 at position 2
return 0;
}
As MatX employs deferred execution operators, matx::as_int(tensor > 2) is effectively fused into the kernel achieving the same as using a thrust::transform_iterator in Thrust.
Due to MatX knowing about the regularity of the problem while Thrust does not, the MatX solution could potentially be more performant than the Thrust solution. It certainly is more elegant. It is also possible to construct tensors in already allocated memory, so one can mix the libraries e.g. my constructing a tensor in the memory of a thrust::vector named vec via passing thrust::raw_pointer_cast(vec.data()) to the constructor of the tensor.
I've tried to compute the binomial coefficient by making a recursion with Pascal's triangle. It works great for small numbers, but 20 up is either really slow or doesn't work at all.
I've tried to look up some optimization techniques, such as "chaching" but they don't really seem to be well integrated in C++.
Here's the code if that helps you.
int binom(const int n, const int k)
{
double sum;
if(n == 0 || k == 0){
sum = 1;
}
else{
sum = binom(n-1,k-1)+binom(n-1,k);
}
if((n== 1 && k== 0) || (n== 1 && k== 1))
{
sum = 1;
}
if(k > n)
{
sum = 0;
}
return sum;
}
int main()
{
int n;
int k;
int sum;
cout << "Enter a n: ";
cin >> n;
cout << "Enter a k: ";
cin >> k;
Summe = binom(n,k);
cout << endl << endl << "Number of possible combinations: " << sum <<
endl;
}
My guess is that the programm wastes a lot of time calculating results it has already calculated. It somehow must memorize past results.
My guess is that the program wastes a lot of time calculating results it has already calculated.
That's definitely true.
On this topic, I'd suggest you have a look to Dynamic Programming Topic.
There is a class of problem which requires an exponential runtime complexity but they can be solved with Dynamic Programming Techniques.
That'd reduce the runtime complexity to polynomial complexity (most of the times, at the expense of increasing space complexity).
The common approaches for dynamic programming are:
Top-Down (exploiting memoization and recursion).
Bottom-Up (iterative).
Following, my bottom-up solution (fast and compact):
int BinomialCoefficient(const int n, const int k) {
std::vector<int> aSolutions(k);
aSolutions[0] = n - k + 1;
for (int i = 1; i < k; ++i) {
aSolutions[i] = aSolutions[i - 1] * (n - k + 1 + i) / (i + 1);
}
return aSolutions[k - 1];
}
This algorithm has a runtime complexity O(k) and space complexity O(k).
Indeed, this is a linear.
Moreover, this solution is simpler and faster than the recursive approach. It is very CPU cache-friendly.
Note also there is no dependency on n.
I have achieved this result exploiting simple math operations and obtaining the following formula:
(n, k) = (n - 1, k - 1) * n / k
Some math references on the Binomial Coeffient.
Note
The algorithm does not really need a space complexity of O(k).
Indeed, the solution at i-th step depends only on (i-1)-th.
Therefore, there is no need to store all intermediate solutions but just the one at the previous step. That would make the algorithm O(1) in terms of space complexity.
However, I would prefer keeping all intermediate solutions in solution code to better show the principle behind the Dynamic Programming methodology.
Here my repository with the optimized algorithm.
I would cache the results of each calculation in a map. You can't make a map with a complex key, but you could turn the key into a string.
string key = string("") + n.to_s() + "," + k.to_s();
Then have a global map:
map<string, double> cachedValues;
You can then do a lookup with the key, and if found, return immediately. otherwise before your return, store to the map.
I began mapping out what would happen with a call to 4,5. It gets messy, with a LOT of calculations. Each level deeper results in 2^n lookups.
I don't know if your basic algorithm is correct, but if so, then I'd move this code to the top of the method:
if(k > n)
{
return 0;
}
As it appears that if k > n, you always return 0, even for something like 6,100. I don't know if that's correct or not, however.
You're computing some binomial values multiple times. A quick solution is memoization.
Untested:
int binom(int n, int k);
int binom_mem(int n, int k)
{
static std::map<std::pair<int, int>, std::optional<int>> lookup_table;
auto const input = std::pair{n,k};
if (lookup_table[input].has_value() == false) {
lookup_table[input] = binom(n, k);
}
return lookup_table[input];
}
int binom(int n, int k)
{
double sum;
if (n == 0 || k == 0){
sum = 1;
} else {
sum = binom_mem(n-1,k-1) + binom_mem(n-1,k);
}
if ((n== 1 && k== 0) || (n== 1 && k== 1))
{
sum = 1;
}
if(k > n)
{
sum = 0;
}
return sum;
}
A better solution would be to turn the recursion tailrec (not easy with double recursions) or better yet, not use recursion at all ;)
I found this very simple (perhaps a bit slow) method of writing the binomial coefficient even for non integers, based on this proof (written by me):
double binomial_coefficient(float k, int a) {
double b=1;
for(int p=1; p<=a; p++) {
b=b*(k+1-p)/p;
}
return b;
}
If you can tolerate wasting some compile time memory, you can pre-compute a Pascal-Triangle at compile time. With a simple lookup mechanism, this will give you maximum speed.
The downsite is that you can only calculate up to the 69th row. After that, even an unsigned long long would overflow.
So, we simply use a constexpr function and calculate the values for a Pascal triangle in a 2 dimensional compile-time constexpr std::array.
The nCr function simply uses an index into that array (into Pascals Triangle).
Please see the following example code:
#include <iostream>
#include <utility>
#include <array>
#include <iomanip>
#include <cmath>
// Biggest number for which nCR will work with a 64 bit variable: 69
constexpr size_t MaxN = 69u;
// If we store Pascal Triangle in a 2 dimensional array, the size will be that
constexpr size_t ArraySize = MaxN;
// This function will generate Pascals triangle stored in a 2 dimension std::array
constexpr auto calculatePascalTriangle() {
// Result of function. Here we will store Pascals triangle as a 1 dimensional array
std::array<std::array<unsigned long long, ArraySize>, ArraySize> pascalTriangle{};
// Go through all rows and columns of Pascals triangle
for (size_t row{}; row < MaxN; ++row) for (size_t col{}; col <= row; ++col) {
// Border valus are always one
unsigned long long result{ 1 };
if (col != 0 && col != row) {
// And calculate the new value for the current row
result = pascalTriangle[row - 1][col - 1] + pascalTriangle[row - 1][col];
}
// Store new value
pascalTriangle[row][col] = result;
}
// And return array as function result
return pascalTriangle;
}
// This is a constexpr std::array<std::array<unsigned long long,ArraySize>, ArraySize> with the name PPP, conatining all nCr results
constexpr auto PPP = calculatePascalTriangle();
// To calculate nCr, we used look up the value from the array
constexpr unsigned long long nCr(size_t n, size_t r) {
return PPP[n][r];
}
// Some debug test driver code. Print Pascal triangle
int main() {
constexpr size_t RowsToPrint = 16u;
const size_t digits = static_cast<size_t>(std::ceil(std::log10(nCr(RowsToPrint, RowsToPrint / 2))));
for (size_t row{}; row < RowsToPrint; ++row) {
std::cout << std::string((RowsToPrint - row) * ((digits + 1) / 2), ' ');
for (size_t col{}; col <= row; ++col)
std::cout << std::setw(digits) << nCr(row, col) << ' ';
std::cout << '\n';
}
return 0;
}
We can also store Pascals Triangle in a 1 dimensional constexpr std::array. But then we need to additionally calculate the Triangle numbers to find the start index for a row. But also this can be done completely at compile time.
Then the solution would look like this:
#include <iostream>
#include <utility>
#include <array>
#include <iomanip>
#include <cmath>
// Biggest number for which nCR will work with a 64 bit variable
constexpr size_t MaxN = 69u; //14226520737620288370
// If we store Pascal Triangle in an 1 dimensional array, the size will be that
constexpr size_t ArraySize = (MaxN + 1) * MaxN / 2;
// To get the offset of a row of a Pascals Triangle stored in an1 1 dimensional array
constexpr size_t getTriangleNumber(size_t row) {
size_t sum{};
for (size_t i = 1; i <= row; i++) sum += i;
return sum;
}
// Generate a std::array with n elements of a given type and a generator function
template <typename DataType, DataType(*generator)(size_t), size_t... ManyIndices>
constexpr auto generateArray(std::integer_sequence<size_t, ManyIndices...>) {
return std::array<DataType, sizeof...(ManyIndices)>{ { generator(ManyIndices)... } };
}
// This is a std::arrax<size_t,MaxN> withe the Name TriangleNumber, containing triangle numbers for ip ti MaxN
constexpr auto TriangleNumber = generateArray<size_t, getTriangleNumber>(std::make_integer_sequence<size_t, MaxN>());
// This function will generate Pascals triangle stored in an 1 dimension std::array
constexpr auto calculatePascalTriangle() {
// Result of function. Here we will store Pascals triangle as an 1 dimensional array
std::array <unsigned long long, ArraySize> pascalTriangle{};
size_t index{}; // Running index for storing values in the array
// Go through all rows and columns of Pascals triangle
for (size_t row{}; row < MaxN; ++row) for (size_t col{}; col <= row; ++col) {
// Border valuse are always one
unsigned long long result{ 1 };
if (col != 0 && col != row) {
// So, we are not at the border. Get the start index the upper 2 values
const size_t offsetOfRowAbove = TriangleNumber[row - 1] + col;
// And calculate the new value for the current row
result = pascalTriangle[offsetOfRowAbove] + pascalTriangle[offsetOfRowAbove - 1];
}
// Store new value
pascalTriangle[index++] = result;
}
// And return array as function result
return pascalTriangle;
}
// This is a constexpr std::array<unsigned long long,ArraySize> with the name PPP, conatining all nCr results
constexpr auto PPP = calculatePascalTriangle();
// To calculate nCr, we used look up the value from the array
constexpr unsigned long long nCr(size_t n, size_t r) {
return PPP[TriangleNumber[n] + r];
}
// Some debug test driver code. Print Pascal triangle
int main() {
constexpr size_t RowsToPrint = 16; // MaxN - 1;
const size_t digits = static_cast<size_t>(std::ceil(std::log10(nCr(RowsToPrint, RowsToPrint / 2))));
for (size_t row{}; row < RowsToPrint; ++row) {
std::cout << std::string((RowsToPrint - row+1) * ((digits+1) / 2), ' ');
for (size_t col{}; col <= row; ++col)
std::cout << std::setw(digits) << nCr(row, col) << ' ';
std::cout << '\n';
}
return 0;
}
I'm a programming student, and for a project I'm working on, on of the things I have to do is compute the median value of a vector of int values and must be done by passing it through functions. Also the vector is initially generated randomly using the C++ random generator mt19937 which i have already written down in my code.I'm to do this using the sort function and vector member functions such as .begin(), .end(), and .size().
I'm supposed to make sure I find the median value of the vector and then output it
And I'm Stuck, below I have included my attempt. So where am I going wrong? I would appreciate if you would be willing to give me some pointers or resources to get going in the right direction.
Code:
#include<iostream>
#include<vector>
#include<cstdlib>
#include<ctime>
#include<random>
#include<vector>
#include<cstdlib>
#include<ctime>
#include<random>
using namespace std;
double find_median(vector<double>);
double find_median(vector<double> len)
{
{
int i;
double temp;
int n=len.size();
int mid;
double median;
bool swap;
do
{
swap = false;
for (i = 0; i< len.size()-1; i++)
{
if (len[i] > len[i + 1])
{
temp = len[i];
len[i] = len[i + 1];
len[i + 1] = temp;
swap = true;
}
}
}
while (swap);
for (i=0; i<len.size(); i++)
{
if (len[i]>len[i+1])
{
temp=len[i];
len[i]=len[i+1];
len[i+1]=temp;
}
mid=len.size()/2;
if (mid%2==0)
{
median= len[i]+len[i+1];
}
else
{
median= (len[i]+0.5);
}
}
return median;
}
}
int main()
{
int n,i;
cout<<"Input the vector size: "<<endl;
cin>>n;
vector <double> foo(n);
mt19937 rand_generator;
rand_generator.seed(time(0));
uniform_real_distribution<double> rand_distribution(0,0.8);
cout<<"original vector: "<<" ";
for (i=0; i<n; i++)
{
double rand_num=rand_distribution(rand_generator);
foo[i]=rand_num;
cout<<foo[i]<<" ";
}
double median;
median=find_median(foo);
cout<<endl;
cout<<"The median of the vector is: "<<" ";
cout<<median<<endl;
}
The median is given by
const auto median_it = len.begin() + len.size() / 2;
std::nth_element(len.begin(), median_it , len.end());
auto median = *median_it;
For even numbers (size of vector) you need to be a bit more precise. E.g., you can use
assert(!len.empty());
if (len.size() % 2 == 0) {
const auto median_it1 = len.begin() + len.size() / 2 - 1;
const auto median_it2 = len.begin() + len.size() / 2;
std::nth_element(len.begin(), median_it1 , len.end());
const auto e1 = *median_it1;
std::nth_element(len.begin(), median_it2 , len.end());
const auto e2 = *median_it2;
return (e1 + e2) / 2;
} else {
const auto median_it = len.begin() + len.size() / 2;
std::nth_element(len.begin(), median_it , len.end());
return *median_it;
}
There are of course many different ways how we can get element e1. We could also use max or whatever we want. But this line is important because nth_element only places the nth element correctly, the remaining elements are ordered before or after this element, depending on whether they are larger or smaller. This range is unsorted.
This code is guaranteed to have linear complexity on average, i.e., O(N), therefore it is asymptotically better than sort, which is O(N log N).
Regarding your code:
for (i=0; i<len.size(); i++){
if (len[i]>len[i+1])
This will not work, as you access len[len.size()] in the last iteration which does not exist.
std::sort(len.begin(), len.end());
double median = len[len.size() / 2];
will do it. You might need to take the average of the middle two elements if size() is even, depending on your requirements:
0.5 * (len[len.size() / 2 - 1] + len[len.size() / 2]);
Instead of trying to do everything at once, you should start with simple test cases and work upwards:
#include<vector>
double find_median(std::vector<double> len);
// Return the number of failures - shell interprets 0 as 'success',
// which suits us perfectly.
int main()
{
return find_median({0, 1, 1, 2}) != 1;
}
This already fails with your code (even after fixing i to be an unsigned type), so you could start debugging (even 'dry' debugging, where you trace the code through on paper; that's probably enough here).
I do note that with a smaller test case, such as {0, 1, 2}, I get a crash rather than merely failing the test, so there's something that really needs to be fixed.
Let's replace the implementation with one based on overseas's answer:
#include <algorithm>
#include <limits>
#include <vector>
double find_median(std::vector<double> len)
{
if (len.size() < 1)
return std::numeric_limits<double>::signaling_NaN();
const auto alpha = len.begin();
const auto omega = len.end();
// Find the two middle positions (they will be the same if size is odd)
const auto i1 = alpha + (len.size()-1) / 2;
const auto i2 = alpha + len.size() / 2;
// Partial sort to place the correct elements at those indexes (it's okay to modify the vector,
// as we've been given a copy; otherwise, we could use std::partial_sort_copy to populate a
// temporary vector).
std::nth_element(alpha, i1, omega);
std::nth_element(i1, i2, omega);
return 0.5 * (*i1 + *i2);
}
Now, our test passes. We can write a helper method to allow us to create more tests:
#include <iostream>
bool test_median(const std::vector<double>& v, double expected)
{
auto actual = find_median(v);
if (abs(expected - actual) > 0.01) {
std::cerr << actual << " - expected " << expected << std::endl;
return true;
} else {
std::cout << actual << std::endl;
return false;
}
}
int main()
{
return test_median({0, 1, 1, 2}, 1)
+ test_median({5}, 5)
+ test_median({5, 5, 5, 0, 0, 0, 1, 2}, 1.5);
}
Once you have the simple test cases working, you can manage more complex ones. Only then is it time to create a large array of random values to see how well it scales:
#include <ctime>
#include <functional>
#include <random>
int main(int argc, char **argv)
{
std::vector<double> foo;
const int n = argc > 1 ? std::stoi(argv[1]) : 10;
foo.reserve(n);
std::mt19937 rand_generator(std::time(0));
std::uniform_real_distribution<double> rand_distribution(0,0.8);
std::generate_n(std::back_inserter(foo), n, std::bind(rand_distribution, rand_generator));
std::cout << "Vector:";
for (auto v: foo)
std::cout << ' ' << v;
std::cout << "\nMedian = " << find_median(foo) << std::endl;
}
(I've taken the number of elements as a command-line argument; that's more convenient in my build than reading it from cin). Notice that instead of allocating n doubles in the vector, we simply reserve capacity for them, but don't create any until needed.
For fun and kicks, we can now make find_median() generic. I'll leave that as an exercise; I suggest you start with:
typename<class Iterator>
auto find_median(Iterator alpha, Iterator omega)
{
using value_type = typename Iterator::value_type;
if (alpha == omega)
return std::numeric_limits<value_type>::signaling_NaN();
}
I'm trying to learn how to use lamba functions, and want to do something like:
Given a vector = {1,2,3,4,5}
I want the sum of pairwise sums = (1+2)+(2+3)+...
Below is my attempt, which is not working properly.
#include <vector>
#include <algorithm>
using namespace std;
vector <double> data = {1,10,100};
double mean = accumulate(data.begin(),data.end(),0.0);
double foo()
{
auto bar = accumulate(data.begin(),data.end(),0.0,[&](int k, int l){return (k+l);});
return bar
}
I tried changing the return statement to return (data.at(k)+data.at(l)), which didn't quite work.
Adding pairwise sums is the same as summing over everything twice except the first and last elements. No need for a fancy lambda.
auto result = std::accumulate(std::begin(data), std::end(data), 0.0)
* 2.0 - data.front() - data.end();
Or a little safer:
auto result = std::accumulate(std::begin(data), std::end(data), 0.0)
* 2.0 - (!data.empty() ? data.front() : 0) - (data.size() > 1 ? data.back() : 0);
If you insist on a lambda, you can move the doubling inside:
result = std::accumulate(std::begin(data), std::end(data), 0.0,
[](double lhs, double rhs){return lhs + 2.0*rhs;})
- data.front() - data.back();
Note that lhs within the lambda is the current sum, not the next two numbers in the sequence.
If you insist on doing all the work within the lambda, you can track an index by using generalized capture:
result = std::accumulate(std::begin(data), std::end(data), 0.0,
[currIndex = 0U, lastIndex = data.size()-1] (double lhs, double rhs) mutable
{
double result = lhs + rhs;
if (currIndex != 0 && currIndex != lastIndex)
result += rhs;
++currIndex;
return result;
});
Demo of all approaches
You misunderstand how std::accumulate works. Let's say you have int array[], then accumulate does:
int value = initial_val;
value = lambda( value, array[0] );
value = lambda( value, array[1] );
...
return value;
this is pseudo code, but it should be pretty easy to understand how it works. So in your case std::accumulate does not seem to be applicable. You may write a loop, or create your own special accumulate function:
auto lambda = []( int a, int b ) { return a + b; };
auto sum = 0.0;
for( auto it = data.begin(); it != data.end(); ++it ) {
auto itn = std::next( it );
if( itn == data.end() ) break;
sum += lambda( *it, *itn );
}
You could capture a variable in the lambda to keep the last value:
#include <vector>
#include <algorithm>
#include <numeric>
std::vector<double> data = {1,10,100};
double mean = accumulate(data.begin(), data.end(), 0.0);
double foo()
{
double last{0};
auto bar = accumulate(data.begin(), data.end(), 0.0, [&](auto k, auto l)
{
auto total = l + last;
last = l;
return total+k;
});
return bar;
}
int main()
{
auto val = foo();
}
You could use some sort of index, and add the next number.
size_t index = 1;
auto bar = accumulate(data.begin(), data.end(), 0.0, [&index, &data](double a, double b) {
if (index < data.size())
return a + b + data[index++];
else
return a + b;
});
Note you have a vector of doubles but are using ints to sum.
I want to fill a vector<float> with values, starting from a, increasing by inc, up to and including b. So basically what e.g. vec = 2:0.5:4 in Matlab would do - vec should now be { 2.0, 2.5, 3.0, 3.5, 4.0 }.
The best I could come up with is
vector<float> vec(10);
float increment = 0.5f;
std::generate(begin(vec), end(vec), [&increment]() {static float start = 2.0f; return start += increment ; });
But obviously it is incorrect as it starts at 2.5f, not 2.0f. And I would like to specify the parameters a bit easier or more concise.
I could imagine doing it in a dedicated class, but that would require quite some code.
Also I've looked at std::iota, but it can only increase by +1.
Any ideas on the best, concise approach? Using C++11 (and some parts of 14) welcome.
Edit: Of course I've also used a for-loop like:
for (float i = -1.0f; i <= 1.0f; i += 0.05f) {
vec.emplace_back(i);
}
but it has the problem that it sometimes doesn't go up to the end value, as in this example, because of float impreciseness (or rather representation). Fixing that requires some code and I think there should be a more concise way?
You could write your own variant of std::iota that also accepts a stride argument.
template<typename ForwardIterator, typename T>
void strided_iota(ForwardIterator first, ForwardIterator last, T value, T stride)
{
while(first != last) {
*first++ = value;
value += stride;
}
}
In your example, you'd use it as
std::vector<float> vec(10);
strided_iota(std::begin(vec), std::next(std::begin(vec), 5), 2.0f, 0.5f);
Live demo
I don't think you really need any fancy features for this.
void fill_vec(vector<float>& vec, float a, float inc, float b)
{
for(float n = a; n <= b; n += inc)
vec.push_back(n);
}
If you're worried about floating point precision missing the upper range, then you can add a small amount (often denoted by epsilon for this sort of thing):
float eps = 0.0000001f;
for(float n = a; n <= b + eps; n += inc)
If you include <cfloat>, you can use FLT_EPSILON which may vary between platforms to suit the implementation.
If the issue is that you want to include all the float values, then loop on integers and do the necessary calculations to go back to the float value within the loop.
for (int i = 20; i <= 40; i += 5) {
vec.emplace_back(i/10.0);
}
Here is an approach:
#include <iostream>
#include <vector>
#include <algorithm>
// functor
class generator_float
{
float _start, _inc;
public:
generator_float(float start, float inc): _start(start), _inc(inc) {};
float operator()() {
float tmp = _start;
_start += _inc;
return tmp;
}
};
int main()
{
std::vector<float> vec(10);
std::generate(std::begin(vec), std::end(vec), generator_float(2,0.5));
for(auto&& elem: vec)
std::cout << elem << " ";
std::cout << std::endl;
}
You can use a functor that works for both for iota and generate. Overload the function call and increment operator appropriately:
template <typename T>
class ArithmeticProgression
{
T val;
T inc;
public:
ArithmeticProg(T val, T inc) : val(val), inc(inc) {}
ArithmeticProg& operator++() noexcept(noexcept(val += inc))
{
val += inc;
return *this;
}
T operator()() noexcept(noexcept(val += inc))
{
auto tmp = val;
val += inc;
return tmp;
}
operator T() const noexcept {return val;}
};
template <typename T, typename U>
ArProg<typename std::common_type<T, U>::type> makeArithmeticProg( T val, U inc )
{
return {val, inc};
}
Usage:
int main()
{
std::vector<float> vec;
std::generate_n(std::back_inserter(vec), 5, makeArithmeticProg(2.0f, 0.5f) );
for (auto f : vec)
std::cout << f << ", ";
std::cout << '\n';
std::iota( std::begin(vec), std::end(vec), makeArithmeticProg(2.5f, 0.3f) );
for (auto f : vec)
std::cout << f << ", ";
}
Demo.