Cuda using template class / passing lambdas to non-class function

Cuda using template class / passing lambdas to non-class function - c++

I am trying to learn Cuda programming and have written a couple test programs in the process. The first of these works as I expected:
ts0.cu:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <cassert>
template <typename T, typename F>
__global__ void do_op(T *a, T *b, T *c, F f)
{
int i = threadIdx.x;
c[i] = f(a[i], b[i]);
}
int main()
{
int a[] = {1, 2, 3};
int b[] = {4, 5, 6};
int c[sizeof(a) / sizeof(int)] = {0};
int *cudaA = 0;
int *cudaB = 0;
int *cudaC = 0;
cudaMalloc(&cudaA, sizeof(a));
cudaMalloc(&cudaB, sizeof(b));
cudaMalloc(&cudaC, sizeof(c));
cudaMemcpy(cudaA, a, sizeof(a), cudaMemcpyHostToDevice);
cudaMemcpy(cudaB, b, sizeof(b), cudaMemcpyHostToDevice);
do_op<int><<<1, sizeof(a) / sizeof(int)>>>(cudaA, cudaB, cudaC, [] __device__(int l, int r)
{ return l + r; }); // nvcc has no difficulty identifying this + operator
cudaMemcpy(c, cudaC, sizeof(b), cudaMemcpyDeviceToHost);
for (unsigned int i = 0; i < sizeof(c) / sizeof(int); ++i)
{
std::cout << (i == 0 ? '[' : ',') << c[i];
}
std::cout << ']' << std::endl;
return 0;
}
Compiling and running yield the following:
$ nvcc ts0.cu --extended-lambda -o ts0
$ ./ts0
[5,7,9]
I then tried wrapping the code above in a vector class I'm defining (again for learning purposes) by doing the following:
ts1.cu:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <cassert>
template <typename T, typename F>
__global__ void do_op(T *a, T *b, T *c, F f)
{
int i = threadIdx.x;
c[i] = f(a[i], b[i]);
}
template <typename T, unsigned int N>
class vector
{
private:
T _v[N];
public:
vector() : _v{0} {}
vector(const vector<T, N> &src)
{
std::copy(src._v, src._v + N, this->_v);
}
vector(std::initializer_list<T> src)
{
assert(size(src) == N);
std::copy(src.begin(), src.end(), this->_v);
}
friend vector<T, N> operator+(const vector<T, N> &lhs, const vector<T, N> &rhs)
{
vector<T, N> vec;
T *cudaLS = 0;
T *cudaRS = 0;
T *cudaRV = 0;
cudaMalloc(&cudaLS, N);
cudaMalloc(&cudaRS, N);
cudaMalloc(&cudaRV, N);
cudaMemcpy(cudaLS, lhs._v, N, cudaMemcpyHostToDevice);
cudaMemcpy(cudaRS, rhs._v, N, cudaMemcpyHostToDevice);
do_op<T><<<1, N>>>(cudaLS, cudaRS, cudaRV, [] __device__(T l, T r)
{ return l + r; }); // nvcc doesn't recognize this + operator
cudaMemcpy(vec._v, cudaRV, N, cudaMemcpyDeviceToHost);
cudaFree(cudaLS);
cudaFree(cudaRS);
cudaFree(cudaRV);
return vec;
}
friend std::ostream &operator<<(std::ostream &os, const vector<T, N> &vec)
{
for (unsigned int i = 0; i < N; ++i)
{
os << (i == 0 ? '[' : ',') << vec._v[i];
}
os << ']';
return os;
}
};
int main()
{
vector<int, 3> v0 = {1, 4, 9};
vector<int, 3> v1 = v0;
vector<int, 3> v2 = v0 + v1;
std::cout << v0 << std::endl;
std::cout << v1 << std::endl;
std::cout << v2 << std::endl;
return 0;
}
And when I compile I get the following error:
$ nvcc ts1.cu --extended-lambda -o ts1
ts1.cu: In function ‘vector<T, N> operator+(const vector<T, N>&, const vector<T, N>&)’:
ts1.cu:49:171: error: ‘::operator+’ has not been declared
49 | do_op<T><<<1, N>>>(cudaLS, cudaRS, cudaRV, [] __device__(T l, T r)
|
... followed by a large number of suggestions.
$ nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Fri_Jan__6_16:45:21_PST_2023
Cuda compilation tools, release 12.0, V12.0.140
Build cuda_12.0.r12.0/compiler.32267302_0
OS: Fedora 37
Why does the second approach fail to compile while the first one succeeds and how can I modify the second to get this general approach to compile and run successfully?

#paleonix provided some suggestions which helped... The following implementation works:
ts1.cu:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <cassert>
template <typename T, unsigned int N>
class vector
{
private:
T _v[N];
T *_cv;
void upload()
{
cudaMemcpy(this->_cv, this->_v, N * sizeof(T), cudaMemcpyHostToDevice);
}
void download()
{
cudaMemcpy(this->_v, this->_cv, N * sizeof(T), cudaMemcpyDeviceToHost);
}
public:
vector() : _v{0}, _cv(0)
{
cudaMalloc(&(this->_cv), N * sizeof(T));
}
vector(const vector<T, N> &src) : _cv(0)
{
std::copy(src._v, src._v + N, this->_v);
cudaMalloc(&(this->_cv), N * sizeof(T));
}
vector(std::initializer_list<T> src)
{
assert(size(src) == N);
std::copy(src.begin(), src.end(), this->_v);
cudaMalloc(&(this->_cv), N * sizeof(T));
}
template <typename S, unsigned int M>
friend vector<S, M> operator+(vector<S, M> &a, vector<S, M> &b);
template <typename S, unsigned int M>
friend std::ostream &operator<<(std::ostream &os, const vector<S, M> &vec);
};
template <typename T, typename F>
__global__ void do_op(T *a, T *b, T *c, F f)
{
int i = threadIdx.x;
c[i] = f(a[i], b[i]);
}
template <typename T, unsigned int N>
std::ostream &operator<<(std::ostream &os, const vector<T, N> &vec)
{
for (unsigned int i = 0; i < N; ++i)
{
os << (i == 0 ? '[' : ',') << vec._v[i];
}
os << ']';
return os;
}
template <typename T, unsigned int N>
vector<T, N> operator+(vector<T, N> &a, vector<T, N> &b)
{
vector<T, N> c;
a.upload();
b.upload();
do_op<<<1, N>>>(a._cv, b._cv, c._cv, [] __device__(T l, T r)
{ return l + r; });
c.download();
return c;
}
int main()
{
vector<int, 3> v0 = {1, 4, 9};
vector<int, 3> v1 = {5, 10, 15};
vector<int, 3> v2 = v0 + v1;
std::cout << v0 << std::endl;
std::cout << v1 << std::endl;
std::cout << v2 << std::endl;
return 0;
}
Output:
$ nvcc ts1.cu --extended-lambda -o ts1
$ ./ts1
[1,4,9]
[5,10,15]
[6,14,24]

Related

Is it possible to add/subtract arrays read from a data file?

I want to perform some mathematical operations such as addition/subtraction, mean etc. on the data read from the file. The data is two dimensional, looks something like this:
So now for example I want to subtract first row from the 3rd or 4th, it is very simple in python using indexing, but I have no idea how to do in c++.
Here is the code I'm using.
#include <boost/multi_array.hpp>
#include <boost/timer/timer.hpp>
#include <boost/range/irange.hpp>
#include <h5xx/h5xx.hpp>
#include <iostream>
#include <vector>
#include <algorithm>
#include <iterator>
#include <string>
using array_2d_t = boost::multi_array<float, 2>;
template <typename T>
void print_array(T const& array)
{
for (auto const& row : array)
{ for (auto v : row)
printf("%10f ", v);
printf("\n"); //prints a new line similar t0 \n
}
std::cout << "\n End of file" << std::endl;
}
array_2d_t read_frame(std::string const& filename, unsigned frame_no) {
//h5xx::file xaa("../../data/xaa.h5", h5xx::file::mode::in);
h5xx::file xaa(filename, h5xx::file::mode::in);
h5xx::group g(xaa, "particles/lipids/box/positions");
h5xx::dataset ds(g, "value");
// determine dataset shape: frames, particle count, space dimension
auto ds_shape = h5xx::dataspace(ds).extents<3>();
array_2d_t arr(boost::extents[ds_shape[1]][ds_shape[2]]);
std::vector<hsize_t> offsets{frame_no, 0, 0};
std::vector<hsize_t> counts{1, arr.shape()[0], arr.shape()[1]};
//std::vector<hsize_t> strid{1, arr.shape()[0], arr.shape()[1]};
// slice ohne stride:
h5xx::slice slice(offsets, counts);
h5xx::read_dataset(ds, arr, slice);
return arr;
}
int main(int argc, char const* argv[])
{
if (argc < 2) {
std::cout << "Usage: " << argv[0] << " input.h5" << std::endl;
return -1;
}
std::string filename(argv[1]);
print_array(read_frame(filename, 1));
return 0;
}
read_frame function takes the argument filename and frame_no. The file is divided into frames each containing (11214) rows, but it is not important to know here. So read_frame returns the two dimensional data for every frame. In the main() function I can print the data for every frame, but now suppose I want to subtract the first frame from the second, how can I do that?
Does anyone know ho

So, what the data looks like in text form is very irrelevant. What's relevant is that you have to arrays of the same dimensions and want to apply element-wise arithmetic operations to it.
You can use a proper Matrix library (like Boost UBlas, Eigen, Armadillo etc.).
Or you can write them for Boost MultiArray: How to execute mathematical operations between two boost::multi_arrays (C++)?
Here's a sample:
#include <algorithm>
#include <boost/multi_array.hpp>
#include <boost/range/irange.hpp>
#include <boost/timer/timer.hpp>
#include <h5xx/h5xx.hpp>
#include <iostream>
#include <iterator>
#include <string>
#include <vector>
namespace ArrayOperators {
template <typename Op> struct ArrayOp {
Op op;
constexpr explicit ArrayOp(Op op) : op(op) {}
template <typename T, typename Scalar, size_t Dim>
constexpr inline auto operator()(
boost::multi_array<T, Dim> const& l,
Scalar const& v) const
{
std::array<int, Dim> shape;
std::copy_n(l.shape(), Dim, shape.data());
using R = boost::multi_array<decltype(op(T{}, v)), Dim>;
R result(shape);
std::transform(
l.data(), l.data()+l.num_elements(),
result.data(),
[&op=op,v](auto const& el) { return op(el, v); });
return result;
}
template <typename T, typename U, size_t Dim>
constexpr inline auto operator()(
boost::multi_array<T, Dim> const& l,
boost::multi_array<U, Dim> const& r) const
{
std::array<int, Dim> shape;
std::copy_n(l.shape(), Dim, shape.data());
assert(std::equal(shape.begin(), shape.end(), r.shape()));
using R = boost::multi_array<decltype(op(T{}, U{})), Dim>;
R result(shape);
std::transform(
l.data(), l.data()+l.num_elements(),
r.data(), result.data(),
[&op=op](auto const& v1, auto const& v2) { return op(v1, v2); });
return result;
}
};
template <typename L, typename R>
static constexpr inline auto operator+(L const& l, R const& r) {
return ArrayOp {std::plus<>{}} (l, r); }
template <typename L, typename R>
static constexpr inline auto operator-(L const& l, R const& r) {
return ArrayOp {std::minus<>{}} (l, r); }
template <typename L, typename R>
static constexpr inline auto operator/(L const& l, R const& r) {
return ArrayOp {std::divides<>{}} (l, r); }
template <typename L, typename R>
static constexpr inline auto operator*(L const& l, R const& r) {
return ArrayOp {std::multiplies<>{}} (l, r); }
}
using array_2d_t = boost::multi_array<float, 2>;
template <typename T>
void print_array(T const& array)
{
for (auto const& row : array)
{ for (auto v : row)
printf("%10f ", v);
printf("\n"); //prints a new line similar t0 \n
}
std::cout << "\n End of file" << std::endl;
}
array_2d_t read_frame(std::string const& filename, unsigned frame_no) {
//h5xx::file xaa("../../data/xaa.h5", h5xx::file::mode::in);
h5xx::file xaa(filename, h5xx::file::mode::in);
h5xx::group g(xaa, "particles/lipids/box/positions");
h5xx::dataset ds(g, "value");
// determine dataset shape: frames, particle count, space dimension
auto ds_shape = h5xx::dataspace(ds).extents<3>();
array_2d_t arr(boost::extents[ds_shape[1]][ds_shape[2]]);
std::vector<hsize_t> offsets{frame_no, 0, 0};
std::vector<hsize_t> counts{1, arr.shape()[0], arr.shape()[1]};
//std::vector<hsize_t> strid{1, arr.shape()[0], arr.shape()[1]};
// slice ohne stride:
h5xx::slice slice(offsets, counts);
h5xx::read_dataset(ds, arr, slice);
return arr;
}
int main(int argc, char const* argv[])
{
if (argc < 2) {
std::cout << "Usage: " << argv[0] << " input.h5" << std::endl;
return -1;
}
std::string filename(argv[1]);
using namespace ArrayOperators;
auto avg4 = //
(read_frame(filename, 0) + read_frame(filename, 1) +
read_frame(filename, 2) + read_frame(filename, 3)) /
4.0;
print_array(read_frame(filename, 4) - avg4);
return 0;
}
Prints:
-0.610001 0.410004 -0.150000
0.372501 -0.395000 -0.037500
-0.062500 0.027500 -0.122500
0.307503 0.184998 -0.212500
0.150000 0.009995 0.362500
-0.497500 0.197500 0.217500
0.405003 -0.185000 0.115000
-0.072500 0.247498 0.237500
-0.480000 0.034998 0.107500
-0.130000 -0.162499 -0.087500
(...)
-0.340000 -0.280001 -0.040000
-0.572502 -0.747505 -0.005000
0.392494 0.012500 0.045000
0.500000 -0.040000 -0.162500
0.355000 0.700000 0.065000
End of file

C++: How to change expression template to handle infinite amount of sums?

I wrote an expression template to sum up to three vectors together. However, as you can see in my code, this doesn't scale very well because for every additional sum operand I have to add another nested template expression. Is there a way to refactor this code to handle a (theoretically) infinite amount of additions?
template<class A>
struct Expr {
operator const A&() const {
return *static_cast<const A*>(this);
}
};
template<class A, class B>
class Add : public Expr<Add<A,B>> {
private:
const A &a_;
const B &b_;
public:
Add(const A &a, const B &b) : a_(a), b_(b) { }
double operator[] (int i) const {
return a_[i] + b_[i];
}
};
class Vector : public Expr<Vector> {
private:
double *data_;
int n_;
public:
Vector(int n, double w = 0.0) : n_(n) {
data_ = new double[n];
for(int i = 0; i < n; ++i) {
data_[i] = w;
}
}
double operator[] (int i) const {
return data_[i];
}
friend Expr<Add<Vector, Vector>> operator+(Vector &a, Vector &b) {
return Add<Vector, Vector>(a, b);
}
friend Expr<Add<Add<Vector, Vector>, Vector>> operator+(const Add<Vector, Vector> &add, const Vector &b) {
return Add<Add<Vector, Vector>, Vector>(add, b);
}
template<class A>
void operator= (const Expr<A> &a) {
const A &a_(a);
for(int i = 0; i < n_; ++i) {
data_[i] = a_[i];
}
}
};
int main() {
constexpr int size = 5;
Vector a(size, 1.0), b(size, 2.0), c(size);
c = a + b + a;
return 0;
}
This was working for me:
class Vector : public Expr<Vector> {
private:
double *data_;
int n_;
public:
Vector(int n, double w = 0.0) : n_(n) {
data_ = new double[n];
for(int i = 0; i < n; ++i) {
data_[i] = w;
}
}
double operator[] (int i) const {
return data_[i];
}
template<class A, class B>
friend Add<A, B> operator+(const Expr<A> &a, const Expr<B> &b) {
return Add<A, B>(a, b);
}
template<class A>
void operator= (const Expr<A> &a) {
const A &a_(a);
for(int i = 0; i < n_; ++i) {
data_[i] = a_[i];
}
}
};

I'm no template wizard (and I'm not up-to-date with the latest possibilities), but you can at least make a function that added a variadic amount of vectors, using something like described in the code below.
You could then buildup you expressiontree like you did before and call this function in you evaluation (operator=) function.
edit: updated the code, based on this solution (credits there)
#include <vector>
#include <algorithm>
template<typename T>
using Vec = std::vector<T>;
template<typename T, typename...Args>
auto AddVector_impl(Vec<Args> const & ... vecs){
auto its = std::tuple(cbegin(vecs)...);
auto add_inc = [](auto&... iters){
return ((*iters++) + ... );
};
auto end_check = [&](auto&...iters){
return ((iters != cend(vecs)) && ...);
};
Vec<T> res;
for(auto it = back_inserter(res); apply(end_check,its);){
*it++ = apply(add_inc,its);
}
return res;
}
template<typename T, typename... Args>
Vec<T> AddVector(Vec<T> const& vt, Vec<Args> const&... vargs){
return AddVector_impl<T>(vt,vargs...);
}
#include <iostream>
int main() {
constexpr auto size = 5;
Vec<double> a(size, 1.0), b(size, 2.0);
auto c = AddVector(a, b, a);
for(auto const& el : c){
std::cout << el << " ";
}
}
outputs:
4 4 4 4 4

Obfuscate std::array using constexpr

I'm looking for a small function that is able to transform a std::array by adding increasing values. The function must be a compile time function.
I was able to write a small constexpr function which does so for an array of length 3, but I was unable to generalize it to std::arrays of arbitrary lengths. I also failed to generalize it to contain something different than chars.
Does anyone knows how to do it?
#include <array>
#include <iostream>
#include <valarray>
constexpr std::array<char,3> obfuscate(const std::array<char,3>& x) {
return std::array<char, 3>{x.at(0)+1, x.at(1) + 2, x.at(2) + 3 };
}
/* Won't compile
template<typename T,typename S, template<typename, typename> L=std::array<T, U>>
constexpr L<T,U> obfuscate(const L<T, U>& x) {
return {x.at(0) + 1, x.at(0) + 2, x.at(0) + 3 };
}
*/
std::ostream& operator<<(std::ostream& str, const std::array<char, 3>& x) {
for (auto i = 0; i < 3; i++) {
str << x.at(i);
}
return str;
}
int main(int argc, char** args) {
std::array<char, 3> x{ 'a','b','c' };
std::cout << x << std::endl;
std::cout << obfuscate(x) << std::endl;
// std::cout << obfuscate<3>(x) << std::endl;
}

You can use std::index_sequence:
template<class T, std::size_t N, std::size_t... Is>
constexpr std::array<T, N> helper (const std::array<T, N> &x, std::index_sequence<Is...>) {
return std::array<T, N>{static_cast<T>(x.at(Is)+Is+1)...};
}
template<class T, std::size_t N>
constexpr std::array<T, N> obfuscate(const std::array<T, N> &x) {
return helper(x, std::make_index_sequence<N>{});
}

There are a few methods that use tuple packs, these are great except that MSVC has a performance problem compiling large strings.
I've found this compromise works well in MSVC.
template<typename I>
struct encrypted_string;
template<size_t... I>
struct encrypted_string<std::index_sequence<I...>>
{
std::array<char, sizeof...(I)+1> buf;
constexpr static char encrypt(char c) { return c ^ 0x41; }
constexpr static char decrypt(char c) { return encrypt(c); }
constexpr explicit __forceinline encrypted_string(const char* str)
: buf{ encrypt(str[I])... } { }
inline const char* decrypt()
{
for (size_t i = 0; i < sizeof...(I); ++i)
{
buf[i] = decrypt(buf[i]);
}
buf[sizeof...(I)] = 0;
return buf.data();
}
};
#define enc(str) encrypted_string<std::make_index_sequence<sizeof(str)>>(str)
And somewhere later
auto stringo = enc(R"(
kernel void prg_PassThru_src(const global unsigned short * restrict A, int srcstepA, int srcoffsetA,
global float * restrict Beta, int srcstepBeta, int srcoffsetBeta,
int rows, int cols) {
int x = get_global_id(0);
int y0 = get_global_id(1);
if (x < cols) {
int srcA_index = mad24(y0, srcstepA / 2, x + srcoffsetA / 2);
int srcBeta_index = mad24(y0, srcstepBeta / 4, x + srcoffsetBeta / 4);
Beta[srcBeta_index] = A[srcA_index];
}
}
//somewhere later
cv::ocl::ProgramSource programSource(stringo.decrypt());
You can see this guy's talk for more sophisticated methods:
https://www.blackhat.com/docs/eu-14/materials/eu-14-Andrivet-C-plus-plus11-Metaprogramming-Applied-To-software-Obfuscation.pdf

How to generate nested loops at compile time

I have an integer N which I know at compile time. I also have an std::array holding integers describing the shape of an N-dimensional array. I want to generate nested loops, as described bellow, at compile time, using metaprogramming techniques.
constexpr int N {4};
constexpr std::array<int, N> shape {{1,3,5,2}};
auto f = [/* accept object which uses coords */] (auto... coords) {
// do sth with coords
};
// This is what I want to generate.
for(int i = 0; i < shape[0]; i++) {
for(int j = 0; j < shape[1]; j++) {
for(int k = 0; k < shape[2]; k++) {
for(int l = 0; l < shape[3]; l++) {
f(i,j,k,l) // object is modified via the lambda function.
}
}
}
}
Note the parameter N is known at compile time but might change unpredictably between compilations, hence I can't hard code the loops as above. Ideally the loop generation mechanism will provide an interface which accepts the lambda function, generates the loops and calls the function producing the equivalent code as above. I am aware that one can write an equivalent loop at runtime with a single while loop and an array of indices, and there are answers to this question already. I am, however, not interested in this solution. I am also not interested in solutions involving preprocessor magic.

Something like this (NOTE: I take the "shape" as a variadic template argument set..)
#include <iostream>
template <int I, int ...N>
struct Looper{
template <typename F, typename ...X>
constexpr void operator()(F& f, X... x) {
for (int i = 0; i < I; ++i) {
Looper<N...>()(f, x..., i);
}
}
};
template <int I>
struct Looper<I>{
template <typename F, typename ...X>
constexpr void operator()(F& f, X... x) {
for (int i = 0; i < I; ++i) {
f(x..., i);
}
}
};
int main()
{
int v = 0;
auto f = [&](int i, int j, int k, int l) {
v += i + j + k + l;
};
Looper<1, 3, 5, 2>()(f);
auto g = [&](int i) {
v += i;
};
Looper<5>()(g);
std::cout << v << std::endl;
}

Assuming you don't want total loop unrolling, just generation of i, j, k etc. argument tuples for f:
#include <stdio.h>
#include <utility> // std::integer_sequence
template< int dim >
constexpr auto item_size_at()
-> int
{ return ::shape[dim + 1]*item_size_at<dim + 1>(); }
template<> constexpr auto item_size_at<::N-1>() -> int { return 1; }
template< size_t... dim >
void call_f( int i, std::index_sequence<dim...> )
{
f( (i/item_size_at<dim>() % ::shape[dim])... );
}
auto main()
-> int
{
int const n_items = ::shape[0]*item_size_at<0>();
for( int i = 0; i < n_items; ++i )
{
call_f( i, std::make_index_sequence<::N>() );
}
}

I suppose this is exactly what you asked for:
#include <array>
#include <iostream>
constexpr int N{4};
constexpr std::array<int, N> shape {{1,3,5,2}};
// Diagnositcs
template<typename V, typename ...Vals>
struct TPrintf {
constexpr static void call(V v, Vals ...vals) {
std::cout << v << " ";
TPrintf<Vals...>::call(vals...);
}
};
template<typename V>
struct TPrintf<V> {
constexpr static void call(V v) {
std::cout << v << std::endl;
}
};
template<typename ...Vals>
constexpr void t_printf(Vals ...vals) {
TPrintf<Vals...>::call(vals...);
}
// Unroll
template<int CtIdx, typename F>
struct NestedLoops {
template<typename ...RtIdx>
constexpr static void call(const F& f, RtIdx ...idx) {
for(int i = 0; i < shape[CtIdx]; ++i) {
NestedLoops<CtIdx + 1, F>::call(f, idx..., i);
}
}
};
template<typename F>
struct NestedLoops<N-1, F> {
template<typename ...RtIdx>
constexpr static void call(const F& f, RtIdx ...idx) {
for(int i = 0; i < shape[N-1]; ++i) {
f(idx..., i);
}
}
};
template<typename F>
void nested_loops(const F& f) {
NestedLoops<0, F>::call(f);
}
int main()
{
auto lf = [](int i, int j, int k, int l) {
t_printf(i,j,k,l);
};
nested_loops(lf);
return 0;
}

Another variant of the same thing:
template <size_t shape_index, size_t shape_size>
struct Looper
{
template <typename Functor>
void operator()(const std::array<int, shape_size>& shape, Functor functor)
{
for (int index = 0; index < shape[shape_index]; ++index)
{
Looper<shape_index + 1, shape_size>()
(
shape,
[index, &functor](auto... tail){ functor(index, tail...); }
);
}
}
};
template <size_t shape_size>
struct Looper<shape_size, shape_size>
{
template <typename Functor>
void operator()(const std::array<int, shape_size>&, Functor functor)
{
functor();
}
};
template <size_t shape_size, typename Functor>
void loop(const std::array<int, shape_size>& shape, Functor functor)
{
Looper<0, shape_size>()(shape, functor);
}
Example of use:
constexpr size_t N {4};
constexpr std::array<int, N> shape {{1,3,5,2}};
void f(int i, int j, int k, int l)
{
std::cout
<< std::setw(5) << i
<< std::setw(5) << j
<< std::setw(5) << k
<< std::setw(5) << l
<< std::endl;
}
// ...
loop(shape, f);
Live demo

Slicing std::array

Is there an easy way to get a slice of an array in C++?
I.e., I've got
array<double, 10> arr10;
and want to get array consisting of five first elements of arr10:
array<double, 5> arr5 = arr10.???
(other than populating it by iterating through first array)

The constructors for std::array are implicitly defined so you can't initialize it with a another container or a range from iterators. The closest you can get is to create a helper function that takes care of the copying during construction. This allows for single phase initialization which is what I believe you're trying to achieve.
template<class X, class Y>
X CopyArray(const Y& src, const size_t size)
{
X dst;
std::copy(src.begin(), src.begin() + size, dst.begin());
return dst;
}
std::array<int, 5> arr5 = CopyArray<decltype(arr5)>(arr10, 5);
You can also use something like std::copy or iterate through the copy yourself.
std::copy(arr10.begin(), arr10.begin() + 5, arr5.begin());

Sure. Wrote this:
template<int...> struct seq {};
template<typename seq> struct seq_len;
template<int s0,int...s>
struct seq_len<seq<s0,s...>>:
std::integral_constant<std::size_t,seq_len<seq<s...>>::value> {};
template<>
struct seq_len<seq<>>:std::integral_constant<std::size_t,0> {};
template<int Min, int Max, int... s>
struct make_seq: make_seq<Min, Max-1, Max-1, s...> {};
template<int Min, int... s>
struct make_seq<Min, Min, s...> {
typedef seq<s...> type;
};
template<int Max, int Min=0>
using MakeSeq = typename make_seq<Min,Max>::type;
template<std::size_t src, typename T, int... indexes>
std::array<T, sizeof...(indexes)> get_elements( seq<indexes...>, std::array<T, src > const& inp ) {
return { inp[indexes]... };
}
template<int len, typename T, std::size_t src>
auto first_elements( std::array<T, src > const& inp )
-> decltype( get_elements( MakeSeq<len>{}, inp ) )
{
return get_elements( MakeSeq<len>{}, inp );
}
Where the compile time indexes... does the remapping, and MakeSeq makes a seq from 0 to n-1.
Live example.
This supports both an arbitrary set of indexes (via get_elements) and the first n (via first_elements).
Use:
std::array< int, 10 > arr = {0,1,2,3,4,5,6,7,8,9};
std::array< int, 6 > slice = get_elements(arr, seq<2,0,7,3,1,0>() );
std::array< int, 5 > start = first_elements<5>(arr);
which avoids all loops, either explicit or implicit.

2018 update, if all you need is first_elements:
Less boilerplaty solution using C++14 (building up on Yakk's pre-14 answer and stealing from "unpacking" a tuple to call a matching function pointer)
template < std::size_t src, typename T, int... I >
std::array< T, sizeof...(I) > get_elements(std::index_sequence< I... >, std::array< T, src > const& inp)
{
return { inp[I]... };
}
template < int N, typename T, std::size_t src >
auto first_elements(std::array<T, src > const& inp)
-> decltype(get_elements(std::make_index_sequence<N>{}, inp))
{
return get_elements(std::make_index_sequence<N>{}, inp);
}
Still cannot explain why this works, but it does (for me on Visual Studio 2017).

This answer might be late... but I was just toying around with slices - so here is my little home brew of std::array slices.
Of course, this comes with a few restrictions and is not ultimately general:
The source array from which a slice is taken must not go out of scope. We store a reference to the source.
I was looking for constant array slices first and did not try to expand this code to both const and non const slices.
But one nice feature of the code below is, that you can take slices of slices...
// ParCompDevConsole.cpp : This file contains the 'main' function. Program execution begins and ends there.
//
#include "pch.h"
#include <cstdint>
#include <iostream>
#include <array>
#include <stdexcept>
#include <sstream>
#include <functional>
template <class A>
class ArraySliceC
{
public:
using Array_t = A;
using value_type = typename A::value_type;
using const_iterator = typename A::const_iterator;
ArraySliceC(const Array_t & source, size_t ifirst, size_t length)
: m_ifirst{ ifirst }
, m_length{ length }
, m_source{ source }
{
if (source.size() < (ifirst + length))
{
std::ostringstream os;
os << "ArraySliceC::ArraySliceC(<source>,"
<< ifirst << "," << length
<< "): out of bounds. (ifirst + length >= <source>.size())";
throw std::invalid_argument( os.str() );
}
}
size_t size() const
{
return m_length;
}
const value_type& at( size_t index ) const
{
return m_source.at( m_ifirst + index );
}
const value_type& operator[]( size_t index ) const
{
return m_source[m_ifirst + index];
}
const_iterator cbegin() const
{
return m_source.cbegin() + m_ifirst;
}
const_iterator cend() const
{
return m_source.cbegin() + m_ifirst + m_length;
}
private:
size_t m_ifirst;
size_t m_length;
const Array_t& m_source;
};
template <class T, size_t SZ>
std::ostream& operator<<( std::ostream& os, const std::array<T,SZ>& arr )
{
if (arr.size() == 0)
{
os << "[||]";
}
else
{
os << "[| " << arr.at( 0 );
for (auto it = arr.cbegin() + 1; it != arr.cend(); it++)
{
os << "," << (*it);
}
os << " |]";
}
return os;
}
template<class A>
std::ostream& operator<<( std::ostream& os, const ArraySliceC<A> & slice )
{
if (slice.size() == 0)
{
os << "^[||]";
}
else
{
os << "^[| " << slice.at( 0 );
for (auto it = slice.cbegin() + 1; it != slice.cend(); it++)
{
os << "," << (*it);
}
os << " |]";
}
return os;
}
template<class A>
A unfoldArray( std::function< typename A::value_type( size_t )> producer )
{
A result;
for (size_t i = 0; i < result.size(); i++)
{
result[i] = producer( i );
}
return result;
}
int main()
{
using A = std::array<float, 10>;
auto idf = []( size_t i ) -> float { return static_cast<float>(i); };
const auto values = unfoldArray<A>(idf);
std::cout << "values = " << values << std::endl;
// zero copy slice of values array.
auto sl0 = ArraySliceC( values, 2, 4 );
std::cout << "sl0 = " << sl0 << std::endl;
// zero copy slice of the sl0 (the slice of values array)
auto sl01 = ArraySliceC( sl0, 1, 2 );
std::cout << "sl01 = " << sl01 << std::endl;
return 0;
}

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Cuda using template class / passing lambdas to non-class function - c++

Related

Is it possible to add/subtract arrays read from a data file?

C++: How to change expression template to handle infinite amount of sums?

Obfuscate std::array using constexpr

How to generate nested loops at compile time

Slicing std::array

Categories

Resources