I'm building a static loop for type dispatching using macros. Here is
what I achieved so far.
#define LOOP(n, f) \
static_assert(n <= 8 && "static loop size should <= 8"); \
do { \
if constexpr (n >= 8) \
f(std::integral_constant<size_t, n - 8>()); \
if constexpr (n >= 7) \
f(std::integral_constant<size_t, n - 7>()); \
if constexpr (n >= 6) \
f(std::integral_constant<size_t, n - 6>()); \
if constexpr (n >= 5) \
f(std::integral_constant<size_t, n - 5>()); \
if constexpr (n >= 4) \
f(std::integral_constant<size_t, n - 4>()); \
if constexpr (n >= 3) \
f(std::integral_constant<size_t, n - 3>()); \
if constexpr (n >= 2) \
f(std::integral_constant<size_t, n - 2>()); \
if constexpr (n >= 1) \
f(std::integral_constant<size_t, n - 1>()); \
} while (0);
template <typename T> constexpr size_t tupleSize(T&) { return tuple_size_v<T>; }
int main() {
auto t = std::make_tuple(1, "string", 0.2, 3, 1, 1, 1);
LOOP(tupleSize(t), [&](auto i) { cout << std::get<i>(t) << endl; });
return 0;
}
And the godbolt link https://godbolt.org/z/GcMZI3
The question is, why do the first four branches fail the compilation?
Do not use a macro, use a function template instead. if constexpr works by discarding the non-taken branch depending on the current instantiation of a template.
template <std::size_t n, typename F>
void loop(F&& f)
{
static_assert(n <= 8 && "static loop size should <= 8");
if constexpr (n >= 8)
f(std::integral_constant<size_t, n - 8>());
if constexpr (n >= 7)
f(std::integral_constant<size_t, n - 7>());
if constexpr (n >= 6)
f(std::integral_constant<size_t, n - 6>());
if constexpr (n >= 5)
f(std::integral_constant<size_t, n - 5>());
if constexpr (n >= 4)
f(std::integral_constant<size_t, n - 4>());
if constexpr (n >= 3)
f(std::integral_constant<size_t, n - 3>());
if constexpr (n >= 2)
f(std::integral_constant<size_t, n - 2>());
if constexpr (n >= 1)
f(std::integral_constant<size_t, n - 1>());
}
Usage:
int main() {
constexpr auto t = std::make_tuple(1, "string", 0.2, 3);
loop<tupleSize(t)>([&](auto i) { cout << std::get<i>(t) << endl; });
return 0;
}
live example on godbolt.org
From cppreference:
If a constexpr if statement appears inside a templated entity, and if condition is not value-dependent after instantiation, the discarded statement is not instantiated when the enclosing template is instantiated.
Outside a template, a discarded statement is fully checked. if constexpr is not a substitute for the #if preprocessing directive
Related
I was trying out the constexpr functions and stumbled upon the below example of implementing Fibonacci numbers
There is no logical difference between fibon2 and fibon1 but I still get the compilation error of exceeding template initializations for the fibon1.
What i'm missing here?
#include <cstdint>
#include <iostream>
template <int32_t x>
constexpr int32_t fibon2() {
if constexpr (x == 0)
return 1;
else if constexpr (x == 1)
return 1;
else if constexpr (x == 2)
return 1;
else
return fibon2<x - 1>() + fibon2<x - 2>();
}
template <int32_t x>
constexpr int32_t fibon1() {
if constexpr (x == 0) return 1;
if constexpr (x == 1) return 1;
if constexpr (x == 2) return 1;
return fibon1<x - 1>() + fibon1<x - 2>();
}
int32_t fibon3(int32_t x) {
if (x == 1) return 1;
if (x == 2) return 1;
return fibon3(x - 1) + fibon3(x - 2);
}
int main() {
std::cout << fibon3(2) << std::endl;
std::cout << fibon2<2>() << std::endl;
std::cout << fibon1<2>() << std::endl;
return 0;
}
The return in fibon1 is not under constexpr and thus has to be compiled regardless of the template argument provided.
Clang show this nicely: https://godbolt.org/z/577f15Kv1
If you put the return inside its own if constexpr then your fibon1 will compile:
template <int32_t x>
constexpr int32_t fibon1() {
if constexpr (x == 0) return 1;
if constexpr (x == 1) return 1;
if constexpr (x == 2) return 1;
if constexpr (x > 2)
return fibon1<x - 1>() + fibon1<x - 2>();
}
The last return in fibon2 is in a constexpr-if even though there's only an else there. It's the same as if you'd made it:
else if constexpr (x == 2)
return 1;
else if constexpr(true) // <- like this
return fibon2<x - 1>() + fibon2<x - 2>();
That's not the case in fibon1 which is why it fails. If you want a freestanding if constexpr for it, then
template <int32_t x>
constexpr int32_t fibon1() {
if constexpr (x == 0) return 1; // shouldn't this be 0 ?
if constexpr (x == 1) return 1;
if constexpr (x == 2) return 1; // this doesn't seem correct if fibon1<0> => 1
// added constexpr-if:
if constexpr (x < 0 || x > 2) return fibon1<x - 1>() + fibon1<x - 2>();
}
Note that the above will fail if you supply a negative value as a template parameter as it will never reach one of your terminating conditions. Either the recursion will be to deep or you'll get a signed integer overflow. If you instead want symmetry around 0, you can simply negate the template parameter and return value in case the template parameter is negative:
template <int32_t x>
constexpr int32_t fibon1() {
if constexpr (x < 0)
return -fibon1<-x>(); // like this
else if constexpr (x == 0 || x == 1) // assuming you want 0 for input 0
return x;
else
return fibon1<x - 1>() + fibon1<x - 2>();
}
I have some code as follows:
Matrix<T, N> res = zeros<T, N>(dims);
if constexpr (N1 == 1) {
MatrixView<U, 2> m1_view ({1, m1.dims[0]}, m1.dataView(), {m1.dims[0], 1});
if constexpr (N2 == 1) {
MatrixView<V, 2> m2_view ({m2.dims[0], 1}, m2.dataView(), {1, 1});
// duplicate code! yuck..
res.applyFunctionWithBroadcast(m1_view, m2_view, MatmulTo<U, V, T,
std::min(std::max(N1, 2lu), N - 1),
std::min(std::max(N2, 2lu), N - 1), N - 1>);
} else {
MatrixView<V, N2> m2_view (m2);
res.applyFunctionWithBroadcast(m1_view, m2_view, MatmulTo<U, V, T,
std::min(std::max(N1, 2lu), N - 1),
std::min(std::max(N2, 2lu), N - 1), N - 1>);
}
} else {
MatrixView<U, N1> m1_view (m1);
if constexpr (N2 == 1) {
MatrixView<V, 2> m2_view ({m2.dims[0], 1}, m2.dataView(), {1, 1});
// duplicate code! yuck..
res.applyFunctionWithBroadcast(m1_view, m2_view, MatmulTo<U, V, T,
std::min(std::max(N1, 2lu), N - 1),
std::min(std::max(N2, 2lu), N - 1), N - 1>);
} else {
MatrixView<V, N2> m2_view (m2);
res.applyFunctionWithBroadcast(m1_view, m2_view, MatmulTo<U, V, T,
std::min(std::max(N1, 2lu), N - 1),
std::min(std::max(N2, 2lu), N - 1), N - 1>);
}
}
return res;
which can be simplified in languages like Python (but not in C++):
if constexpr (N1 == 1) {
MatrixView<U, 2> m1_view ({1, m1.dims[0]}, m1.dataView(), {m1.dims[0], 1});
} else {
MatrixView<U, N1> m1_view (m1);
}
if constexpr (N2 == 1) {
MatrixView<V, 2> m2_view ({m2.dims[0], 1}, m2.dataView(), {1, 1});
} else {
MatrixView<V, N2> m2_view (m2);
}
// doesn't compile
res.applyFunctionWithBroadcast(m1_view, m2_view, MatmulTo<U, V, T,
std::min(std::max(N1, 2lu), N - 1),
std::min(std::max(N2, 2lu), N - 1), N - 1>);
return res;
MatrixView is not default constructible. (It is basically a non-owning view for some Matrix, therefore allowing so will break design)
The code above is logically valid but it won't compile, so I wrote the first ugly, verbose code.
Is there any way to remove code duplication in situations like this?
you might do:
auto m1_view = [&](){
if constexpr (N1 == 1) {
return MatrixView<U, 2>({1, m1.dims[0]}, m1.dataView(), {m1.dims[0], 1});
} else {
return MatrixView<U, N1>(m1);
}
}(); // notice extra () to immediate call
auto m2_view = [&](){
if constexpr (N2 == 1) {
return MatrixView<V, 2>({m2.dims[0], 1}, m2.dataView(), {1, 1});
} else {
return MatrixView<V, N2>(m2);
}
}();
Matrix<T, N> res = zeros<T, N>(dims);
res.applyFunctionWithBroadcast(m1_view, m2_view, MatmulTo<U, V, T,
std::min(std::max(N1, 2lu), N - 1),
std::min(std::max(N2, 2lu), N - 1), N - 1>);
return res;
I have this static_loop construct that is used for type dispatching over loop unrolling.
template <std::size_t n, typename F> void static_loop(F&& f) {
static_assert(n <= 8 && "static loop size should <= 8");
if constexpr (n >= 8)
f(std::integral_constant<size_t, n - 8>());
if constexpr (n >= 7)
f(std::integral_constant<size_t, n - 7>());
if constexpr (n >= 6)
f(std::integral_constant<size_t, n - 6>());
if constexpr (n >= 5)
f(std::integral_constant<size_t, n - 5>());
if constexpr (n >= 4)
f(std::integral_constant<size_t, n - 4>());
if constexpr (n >= 3)
f(std::integral_constant<size_t, n - 3>());
if constexpr (n >= 2)
f(std::integral_constant<size_t, n - 2>());
if constexpr (n >= 1)
f(std::integral_constant<size_t, n - 1>());
}
template <typename T> constexpr size_t tupleSize(T) { return tuple_size_v<T>; }
struct A {
int a;
int b;
void run() {
auto ab = std::make_tuple(std::ref(a), std::ref(b));
static_loop<2>([&](auto i) {
std::get<i>(ab) = i;
static_loop<2>([&](auto j) { std::get<i * j>(ab) = i; });
// static_loop<2>([&, i = std::integral_constant<size_t, i>()](auto j) { std::get<i * j>(ab) = i; });
});
std::cout << a << " " << b << std::endl;
}
};
However it doesn't compile when doing nested loops. I'd assume i and j are both constexpr thus i * j is valid in std::get<>, however, compiler seems not allowing this. Is it possible to capture i as constexpr in the inner lambda?
Full example is at godbolt along with the error messages.
static_loop<2>([&](auto i) {
std::get<i>(ab) = i;
static_loop<2>([&](auto j) { std::get<i * j>(ab) }
}
The IDE correctly underlines the error for you. i * jis a multiplication of two variables, it is not a compile-time constant.
You can nest loops if you switch to template parameters rather than function arguments.
This somehow works. Not sure if it violates any standard rules.
template <std::size_t n, typename F> void static_loop(F&& f) {
static_assert(n <= 8 && "static loop size should <= 8");
if constexpr (n >= 8)
f(std::integral_constant<size_t, n - 8>());
if constexpr (n >= 7)
f(std::integral_constant<size_t, n - 7>());
if constexpr (n >= 6)
f(std::integral_constant<size_t, n - 6>());
if constexpr (n >= 5)
f(std::integral_constant<size_t, n - 5>());
if constexpr (n >= 4)
f(std::integral_constant<size_t, n - 4>());
if constexpr (n >= 3)
f(std::integral_constant<size_t, n - 3>());
if constexpr (n >= 2)
f(std::integral_constant<size_t, n - 2>());
if constexpr (n >= 1)
f(std::integral_constant<size_t, n - 1>());
}
struct A {
int a;
int b;
void run() {
auto ab = std::make_tuple(std::ref(a), std::ref(b));
static_loop<2>([&](auto i) {
std::get<i>(ab) = i;
static_loop<2>([&](auto j) { auto ii = decltype(i)(); std::get<ii * j>(ab) = ii; });
});
std::cout << a << " " << b << std::endl;
}
};
The captured value might not be constexpr but its type is somehow retained.
I cannot for the life of me figure out what's going on. Here's the error I get:
alloc static vecs
a.out: malloc.c:2451: sYSMALLOc: Assertion `(old_top == (((mbinptr) (((char *) &((av)->bins[((1) - 1) * 2])) - __builtin_offsetof (struct malloc_chunk, fd)))) && old_size == 0) || ((unsigned long) (old_size) >= (unsigned long)((((__builtin_offsetof (struct malloc_chunk, fd_nextsize))+((2 * (sizeof(size_t))) - 1)) & ~((2 * (sizeof(size_t))) - 1))) && ((old_top)->size & 0x1) && ((unsigned long)old_end & pagemask) == 0)' failed. Aborted (core dumped)
The error occurs in the function Halton in class qmc, which I've included the relevant bits to below. As you can see, the first print statement "alloc static vecs" executes, but the statement std::vector<double> H(s); appears not to, since the print statement immediately following it does not execute.
Now, I should mention that when I replace the statement static std::vector<int> bases = FirstPrimes(s); in Halton with static std::vector<int> bases = {2,3,5,7,11,13}; (the RHS is the return array of FirstPrimes(), just hardcoded) then there is no error.
There are more functions in Halton (it returns a std::vector) but I've omitted them for brevity. I'll add them if anyone wants to try to run it themselves, just ask!
I'm using g++ 4.6 and Ubuntu 12.04, and the compilation command is g++ -std=c++0x scratch.cpp QMC.cpp.
main (scratch.cpp):
#include <iostream>
#include <vector>
#include "QMC.h"
int main() {
QMC qmc;
std::vector<double> halton = qmc.Halton(6,1);
}
QMC.h:
#ifndef QMC_H
#define QMC_H
#include <iostream>
#include <cmath>
#include <vector>
class QMC {
public:
QMC();
bool isPrime(int n);
std::vector<int> ChangeBase(int n, int radix);
std::vector<int> NextChangeBase(std::vector<int>& a_in, int radix);
double RadicalInverse(std::vector<int>& a, int b);
std::vector<int> FirstPrimes(int n);
std::vector<double> Halton(int s, int n = 0);
};
#endif
QMC.cpp:
#include "QMC.h"
QMC::QMC(){}
std::vector<double> QMC::Halton(int s, int n) {
static std::vector<std::vector<int> > newBases(s);
static std::vector<int> bases = FirstPrimes(s);
/* replacing the statement immediately above with
static std::vector<int> bases = {2,3,5,7,11,13}; fixes it */
std::cout << "alloc static vecs \n";
std::vector<double> H(s);
std::cout << "alloc H \n";
// ...there's more to this function, but the error occurs just above this.
}
std::vector<int> QMC::FirstPrimes(int n) {
std::vector<int> primes(n);
primes[0] = 2;
int testNum = 3;
for (int countOfPrimes = 1; countOfPrimes <= n; ++countOfPrimes) {
while (isPrime(testNum) == false)
testNum = testNum + 2;
primes[countOfPrimes] = testNum;
testNum = testNum + 2;
}
return primes;
}
bool QMC::isPrime(int n) {
if (n == 1) return false; // 1 is not prime
else if (n < 4) return true; // 2 & 3 are prime
else if (n % 2 == 0) return false; // even numbers are not prime
else if (n < 9) return true; // 5 & 7 are prime
else if (n % 3 == 0) return false; // multiples of 3 (> 3) are not prime
else
{
int r = floor(sqrt((double)n));
int f = 5;
while (f <= r)
{
if (n % f == 0) return false;
if (n % (f + 2) == 0) return false;
f += 6;
}
return true;
}
}
FirstPrimes has a buffer overflow. The relevant lines:
std::vector<int> primes(n);
primes[0] = 2;
for (int countOfPrimes = 1; countOfPrimes <= n; ++countOfPrimes)
primes[countOfPrimes] = testNum;
For a vector of size n, the valud indices are 0 through n-1. On the last loop iteration you do an out-of-bounds access.
I'd suggest changing both of the [ ] to .at( ), as well as fixing the logic error. This would also prevent trouble if you happened to call this function with n == 0.
I have built recursive function to compute Pascal's triangle values.
Is there a way to optimize it?
Short reminder about Pascal's triangle: C(n, k) = C(n-1, k-1) + C(n-1, k)
My code is:
int Pascal(int n, int k) {
if (k == 0) return 1;
if (n == 0) return 0;
return Pascal(n - 1, k - 1) + Pascal(n - 1, k);
}
The inefficiency I see is that it stores some values twice.
Example:
C(6,2) = C(5,1) + C(5,2)
C(6,2) = C(4,0) + C(4,1) + C(4,1) + C(4,2)
it will call C(4,1) twice
Any idea how to optimize this function?
Thanks
The following routine will compute the n-choose-k, using the recursive definition and memoization. The routine is extremely fast and accurate:
inline unsigned long long n_choose_k(const unsigned long long& n,
const unsigned long long& k)
{
if (n < k) return 0;
if (0 == n) return 0;
if (0 == k) return 1;
if (n == k) return 1;
if (1 == k) return n;
typedef unsigned long long value_type;
class n_choose_k_impl
{
public:
n_choose_k_impl(value_type* table,const value_type& dimension)
: table_(table),
dimension_(dimension / 2)
{}
inline value_type& lookup(const value_type& n, const value_type& k)
{
const std::size_t difference = static_cast<std::size_t>(n - k);
return table_[static_cast<std::size_t>((dimension_ * n) + ((k < difference) ? k : difference))];
}
inline value_type compute(const value_type& n, const value_type& k)
{
// n-Choose-k = (n-1)-Choose-(k-1) + (n-1)-Choose-k
if ((0 == k) || (k == n))
return 1;
value_type v1 = lookup(n - 1,k - 1);
if (0 == v1)
v1 = lookup(n - 1,k - 1) = compute(n - 1,k - 1);
value_type v2 = lookup(n - 1,k);
if (0 == v2)
v2 = lookup(n - 1,k) = compute(n - 1,k);
return v1 + v2;
}
value_type* table_;
const value_type dimension_;
};
static const std::size_t static_table_dim = 100;
static const std::size_t static_table_size = static_cast<std::size_t>((static_table_dim * static_table_dim) / 2);
static value_type static_table[static_table_size];
static bool static_table_initialized = false;
if (!static_table_initialized && (n <= static_table_dim))
{
std::fill_n(static_table,static_table_size,0);
static_table_initialized = true;
}
const std::size_t table_size = static_cast<std::size_t>(n * (n / 2) + (n & 1));
unsigned long long dimension = static_table_dim;
value_type* table = 0;
if (table_size <= static_table_size)
table = static_table;
else
{
dimension = n;
table = new value_type[table_size];
std::fill_n(table,table_size,0LL);
}
value_type result = n_choose_k_impl(table,dimension).compute(n,k);
if (table != static_table)
delete [] table;
return result;
}
Keep a table of previously returned results (indexed by their n and k values); the technique used there is memoization. You can also change the recursion to an iteration and use dynamic programming to fill in an array containing the triangle for n and k values smaller than the one you are trying to evaluate, then just get one element from it.