I am currently porting an algorithm from boost::ublas to Eigen:
Code 1 with boost::ublas
#ifndef KHACH_H
#define KHACH_H
#include <set>
#include <boost/numeric/ublas/vector.hpp>
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/triangular.hpp>
#include <boost/numeric/ublas/banded.hpp>
#include <boost/numeric/ublas/lu.hpp>
#include <iostream>
#include <boost/numeric/ublas/io.hpp>
//namespace Minim {
namespace ublas=boost::numeric::ublas;
template<class T>
bool InvertMatrix(const ublas::matrix<T> &input,
ublas::matrix<T> &inverse)
{
using namespace boost::numeric::ublas;
typedef permutation_matrix<std::size_t> pmatrix;
matrix<T> A(input);
pmatrix pm(A.size1());
int res = lu_factorize(A,pm);
if( res != 0 ) return false;
inverse.assign(ublas::identity_matrix<T>(A.size1()));
lu_substitute(A, pm, inverse);
return true;
}
inline void Lift(const ublas::matrix<double> &A,
ublas::matrix<double> &Ap)
{
Ap.resize(A.size1()+1,
A.size2());
ublas::matrix_range<ublas::matrix<double> >
sub(Ap,
ublas::range(0, A.size1()),
ublas::range(0, A.size2()));
sub.assign(A);
ublas::row(Ap, Ap.size1()-1)=ublas::scalar_vector<double>(A.size2(),1.0);
}
#endif
//}
Code 2 with Eigen:
#ifndef KHACH_H
#define KHACH_H
#include <set>
#include <iostream>
#include <Eigen/Eigen>
//namespace Minim {
template <class NT>
using MT = Eigen::Matrix<NT, Eigen::Dynamic, Eigen::Dynamic>;
template <class NT>
using VT = Eigen::Matrix<NT, Eigen::Dynamic, 1>;
template<typename Derived>
inline bool is_nan(const Eigen::MatrixBase<Derived>& x)
{
return ((x.array() == x.array())).all();
}
template<class T>
bool InvertMatrix(const MT<T> &input,
MT<T> &inverse)
{
inverse.setIdentity(input.rows(), input.cols());
inverse = input.inverse();
return !is_nan(inverse);
}
inline void Lift(const MT<double> &A, MT<double> &Ap)
{
Ap.resize(A.rows()+1, A.cols());
Ap.topLeftCorner(A.rows(), A.cols()) = A;
Ap.row(Ap.rows()-1).setConstant(1.0);
}
#endif
//}
These functions are part of the bigger code and functionality, but I think these two functions are the ones creating the difference. The functions with Eigen are giving a different output for some large matrices compared to the output of the code using boost, I am not able to understand the bugs.
Any help would be appreciated.
You didn't specify any inputs or what the discrepancy is you're finding.
This lead me to build simple testers, in which I find that an obvious source of "differences" is the inaccuracy of [binary] floating point representations.
You can easily confirm it with some test input: whose inverse is :
Live On Compuler Explorer
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/vector.hpp>
#include <set>
#include <boost/numeric/ublas/banded.hpp>
#include <boost/numeric/ublas/lu.hpp>
#include <boost/numeric/ublas/triangular.hpp>
#include <boost/numeric/ublas/io.hpp>
#include <iostream>
namespace Minim1 {
namespace ublas = boost::numeric::ublas;
template <class T> using MT = ublas::matrix<T>;
template <class T> bool InvertMatrix(const MT<T>& input, MT<T>& inverse)
{
using namespace boost::numeric::ublas;
typedef permutation_matrix<std::size_t> pmatrix;
matrix<T> A(input);
pmatrix pm(A.size1());
int res = lu_factorize(A, pm);
if (res != 0)
return false;
inverse.assign(ublas::identity_matrix<T>(A.size1()));
lu_substitute(A, pm, inverse);
return true;
}
template <class T>
inline void Lift(const ublas::matrix<T>& A, ublas::matrix<T>& Ap)
{
Ap.resize(A.size1() + 1, A.size2());
ublas::matrix_range<ublas::matrix<T>> sub(
Ap, ublas::range(0, A.size1()), ublas::range(0, A.size2()));
sub.assign(A);
ublas::row(Ap, Ap.size1() - 1) = ublas::scalar_vector<T>(A.size2(), 1.0);
}
}
#include <Eigen/Eigen>
#include <iostream>
#include <set>
namespace Minim2 {
template <class T>
using MT = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>;
static_assert(Eigen::RowMajor == 1);
template <class T>
using VT = Eigen::Matrix<T, Eigen::Dynamic, Eigen::RowMajor>;
template <typename Derived>
inline bool is_nan(const Eigen::MatrixBase<Derived>& x)
{
return ((x.array() == x.array())).all();
}
template <class T> bool InvertMatrix(const MT<T>& input, MT<T>& inverse)
{
inverse.setIdentity(input.rows(), input.cols());
inverse = input.inverse();
return !is_nan(inverse);
}
template <typename T>
inline void Lift(const MT<T>& A, MT<T>& Ap)
{
Ap.resize(A.rows() + 1, A.cols());
Ap.topLeftCorner(A.rows(), A.cols()) = A;
Ap.row(Ap.rows() - 1).setConstant(1.0);
}
}
template <typename T>
static inline std::string compare(Minim1::MT<T> const& a, Minim2::MT<T> const& b) {
if (a.size1() != static_cast<size_t>(b.rows())) return "rows do not match";
if (a.size2() != static_cast<size_t>(b.cols())) return "cols do not match";
for (size_t r = 0; r < a.size1(); r++) {
for (size_t c = 0; c < a.size2(); c++) {
auto va = a(r,c);
auto vb = b(r,c);
auto delta = std::abs(va-vb);
if (va != vb) {
std::ostringstream oss;
oss
<< "mismatch at (" << r << ", " << c << "): "
<< va << " != " << vb
<< " delta:" << std::abs(va-vb)
<< " significant? " << std::boolalpha
<< (std::numeric_limits<T>::epsilon() < delta) << "\n";
return oss.str();
}
}
}
return "equivalent";
}
template <typename T>
auto convert(Minim1::MT<T> const& a) {
Minim2::MT<T> b(a.size1(), a.size2());
for (size_t r = 0; r < a.size1(); r++) {
for (size_t c = 0; c < a.size2(); c++) {
b(r, c) = a(r, c);
} }
return b;
}
int main() {
using T = double;
using M1 = Minim1::MT<T>;
using M2 = Minim2::MT<T>;
auto report = [](auto const& a, auto const& b) {
std::cout << "\na: ------\n" << a;
std::cout << "\nb: ------\n" << b;
std::cout << "\n" << compare(a, b) << "\n";
};
M1 a(3, 3);
a(0, 0) = 1; a(0, 1) = 2; a(0, 2) = 3;
a(1, 0) = 3; a(1, 1) = 2; a(1, 2) = 1;
a(2, 0) = 2; a(2, 1) = 1; a(2, 2) = 3;
M2 b(3, 3);
b << 1, 2, 3,
3, 2, 1,
2, 1, 3;
report(a, b);
std::cout << "\nINVERSIONS";
M1 ai(a.size1(), a.size2());
M2 bi(b.rows(), b.cols());
Minim1::InvertMatrix(a, ai);
Minim2::InvertMatrix(b, bi);
report(ai, bi);
M2 deltas = (convert(ai) - bi).cwiseAbs();
constexpr auto eps = std::numeric_limits<T>::epsilon();
std::cout << "deltas:\n" << deltas << "\n";
for (int r = 0; r < deltas.rows(); r++) {
for (int c = 0; c < deltas.cols(); c++) {
auto d = deltas(r,c);
if (d > eps) {
std::cout << "The delta at (" << r << ", " << c << ") (" << d << " is > ε (" << eps << ")\n";
}
} }
}
Prints
a: ------
[3,3]((1,2,3),(3,2,1),(2,1,3))
b: ------
1 2 3
3 2 1
2 1 3
equivalent
INVERSIONS
a: ------
[3,3]((-0.416667,0.25,0.333333),(0.583333,0.25,-0.666667),(0.0833333,-0.25,0.333333))
b: ------
-0.416667 0.25 0.333333
0.583333 0.25 -0.666667
0.0833333 -0.25 0.333333
mismatch at (0, 0): -0.416667 != -0.416667 delta:5.55112e-17 significant? false
deltas:
5.55112e-17 0 0
0 2.77556e-17 0
0 2.77556e-17 0
Confirming that all differences are around (even <) the machine epsilon for the chosen data type. If you replace that one:
using T = long double;
You get the following deltas: Compiler Explorer
mismatch at (0, 0): -0.416667 != -0.416667 delta:2.71051e-20 significant? false
deltas:
2.71051e-20 1.35525e-20 0
5.42101e-20 0 0
6.77626e-21 0 0
Where To Go From Here
Find out whether this is your problem by plugging in your inputs. You might stumble on other things that escaped your attention before. If not, at least you now have the tools to make a new, more focused question.
If you want to learn more about floating point inaccuracy:
Why are floating point numbers inaccurate?
Is floating point math broken?
Related
I have an app based on qt (qcustomplot) that prints two different graphs. They have one point of intersection. How to find x and y coordinates of this point?
This doesn't have much to do with plotting, since you'd be investigating the underlying data. Let's say that we can interpolate between data points using lines, and the data sets are single-valued (i.e. for any x or key coordinate, there's only one value).
Online demo of the code below
Let's sketch a solution. First, some preliminaries, and we detect whether QCustomPlot was included so that the code can be tested without it - the necessary classes are mocked:
#define _USE_MATH_DEFINES
#include <algorithm>
#include <cassert>
#include <cmath>
#include <iostream>
#include <optional>
#include <type_traits>
#include <vector>
//#include "qcustomplot.h"
constexpr bool debugOutput = false;
#ifndef QCP_PLOTTABLE_GRAPH_H
struct QCPGraphData {
double key, value;
QCPGraphData() = default;
QCPGraphData(double x, double y) : key(x), value(y) {}
};
#endif
auto keyLess(const QCPGraphData &l, const QCPGraphData &r) { return l.key < r.key; }
#ifndef QCP_PLOTTABLE_GRAPH_H
template <typename T> struct QCPDataContainer : public std::vector<T> {
using std::vector<T>::vector;
void sort() { std::sort(this->begin(), this->end(), keyLess); }
};
using QCPGraphDataContainer = QCPDataContainer<QCPGraphData>;
#endif
using Point = QCPGraphData;
using Container = QCPGraphDataContainer;
static_assert(std::is_copy_constructible_v<Point>, "Point must be copy-constructible");
Some helper functions:
std::ostream &operator<<(std::ostream &os, const Point &p) {
return os << "(" << p.key << ", " << p.value << ")";
}
template <class T> bool has_unique_keys(const T &v) {
constexpr auto keyEqual = [](const Point &l, const Point &r) { return l.key == r.key; };
return std::adjacent_find(std::begin(v), std::end(v), keyEqual) == std::end(v);
}
template <class T> bool has_valid_points(const T& v) {
constexpr auto isValid = [](const Point &p) { return std::isfinite(p.key) && std::isfinite(p.value); };
return std::all_of(std::begin(v), std::end(v), isValid);
}
The line segment intersection finder:
// intersection of two line segments
std::optional<Point> intersection(const Point &a1, const Point &a2, const Point &b1, const Point &b2)
{
auto p1 = a1, p2 = a2, p3 = b1, p4 = b2;
assert(p1.key <= p2.key);
assert(p3.key <= p4.key);
if (debugOutput) std::cout << p1 << "-" << p2 << ", " << p3 << "-" << p4;
auto const denom = (p1.key - p2.key)*(p3.value - p4.value)
- (p1.value - p2.value)*(p3.key - p4.key);
if (fabs(denom) > 1e-6*(p2.key - p1.key)) {
// the lines are not parallel
auto const scale = 1.0/denom;
auto const q = p1.key*p2.value - p1.value*p2.key;
auto const r = p3.key*p4.value - p3.value*p4.key;
auto const x = (q*(p3.key-p4.key) - (p1.key-p2.key)*r) * scale;
if (debugOutput) std::cout << " x=" << x << "\n";
if (p1.key <= x && x <= p2.key && p3.key <= x && x <= p4.key) {
auto const y = (q*(p3.value-p4.value) - (p1.value-p2.value)*r) * scale;
return std::optional<Point>(std::in_place, x, y);
}
}
else if (debugOutput) std::cout << "\n";
return std::nullopt;
}
An algorithm that walks down two lists of points sorted in ascending key (x) order, and finds all intersections of line segments spanning consecutive point pairs from these two lists:
std::vector<Point> findIntersections(const Container &a_, const Container &b_)
{
if (a_.size() < 2 || b_.size() < 2) return {};
static constexpr auto check = [](const auto &c){
assert(has_valid_points(c));
assert(std::is_sorted(c.begin(), c.end(), keyLess));
assert(has_unique_keys(c));
};
check(a_);
check(b_);
bool aFirst = a_.front().key <= b_.front().key;
const auto &a = aFirst ? a_ : b_, &b = aFirst ? b_ : a_;
assert(a.front().key <= b.front().key);
if (a.back().key < b.front().key) return {}; // the key spans don't overlap
std::vector<Point> intersections;
auto ia = a.begin(), ib = b.begin();
Point a1 = *ia++, b1 = *ib++;
while (ia->key < b1.key) a1=*ia++; // advance a until the key spans overlap
for (Point a2 = *ia, b2 = *ib;;) {
auto const ipt = intersection(a1, a2, b1, b2);
if (ipt)
intersections.push_back(*ipt);
bool advanceA = a2.key <= b2.key, advanceB = b2.key <= a2.key;
if (advanceA) {
if (++ia == a.end()) break;
a1 = a2, a2 = *ia;
}
if (advanceB) {
if (++ib == b.end()) break;
b1 = b2, b2 = *ib;
}
}
return intersections;
}
And a more generic version that can also sort the points in ascending key order:
auto findIntersections(Container &d1, Container &d2, bool presorted)
{
if (!presorted) {
d1.sort();
d2.sort();
}
return findIntersections(d1, d2);
}
And now some simple demonstration:
template <typename Fun>
Container makeGraph(double start, double step, double end, Fun &&fun) {
Container result;
int i = 0;
for (auto x = start; x <= end; x = ++i * step)
result.emplace_back(x, fun(x));
return result;
}
int main()
{
for (auto step2: {0.1, 0.1151484584}) {
auto sinPlot = makeGraph(-2*M_PI, 0.1, 3*M_PI, sin);
auto cosPlot = makeGraph(0., step2, 2*M_PI, cos);
auto intersections = findIntersections(sinPlot, cosPlot);
std::cout << "Intersections:\n";
for (auto &ip : intersections)
std::cout << " at " << ip << "\n";
}
}
Demo output:
Intersections:
at (0.785613, 0.706509)
at (3.92674, -0.706604)
Intersections:
at (0.785431, 0.706378)
at (3.92693, -0.706732)
First there is a vector with a size 20. We assign random numbers from 0 to 100 to the vector. Then, using the template function, we find the total, mean, median, and standard deviation of this vector's elements. The code is as follows and the screen output is below.
#include <numeric>
#include <cmath>
#include <algorithm>
#include <functional>
#include <vector>
#include <iostream>
#include <ctime>
#include <cstdlib>
using namespace std;
template<int N, class T>
T nthPower(T x) {
T ret = x;
for (int i=1; i < N; ++i) {
ret *= x;
}
return ret;
}
template<class T, int N>
struct SumDiffNthPower {
SumDiffNthPower(T x) : mean_(x) { };
T operator( )(T sum, T current) {
return sum + nthPower<N>(current - mean_);
}
T mean_;
};
template<class T, int N, class Iter_T>
T nthMoment(Iter_T first, Iter_T last, T mean) {
size_t cnt = distance(first, last);
return accumulate(first, last, T( ), SumDiffNthPower<T, N>(mean)) / cnt;
}
template<class T, class Iter_T>
T computeVariance(Iter_T first, Iter_T last, T mean) {
return nthMoment<T, 2>(first, last, mean);
}
template<class T, class Iter_T>
T computeStdDev(Iter_T first, Iter_T last, T mean) {
return sqrt(computeVariance(first, last, mean));
}
template<class T, class Iter_T>
void computeStats(Iter_T first, Iter_T last, T& sum, T& mean,
T& var, T& std_dev)
{
size_t cnt = distance(first, last);
sum = accumulate(first, last, T( ));
mean = sum / cnt;
var = computeVariance(first, last, mean);
std_dev = sqrt(var);
}
int RandomNumber () { return (std::rand()%100); }
int main(int argc, char **argv) {
std::srand ( unsigned ( std::time(0) ) );
vector<int> v(20);
std::generate (v.begin(), v.end(), RandomNumber);
double sum, mean, var, dev;
computeStats(v.begin( ), v.end( ), sum, mean, var, dev);
cout << "count = " << v.size( ) << "\n";
cout << "sum = " << sum << "\n";
cout << "mean = " << mean << "\n";
cout << "variance = " << var << "\n";
cout << "standard deviation = " << dev << "\n";
cout << endl;
}
Output is:
count = 20
sum = 789
mean = 39.45
variance = 888.448
standard deviation = 29.8068
How can I change the parameters?
I don't want to use this style.
template<class T, class Iter_T>
T computeVariance(Iter_T first, Iter_T last, T mean) {
return nthMoment<T, 2>(first, last, mean);
}
I want to use this way to calculate things.
template<class T, class T2>
T computeVariance(vector<T2> &vec, T mean)
{
...
}
All function should be like this. How can I do?
In main function how can I use generate function for copying vector and How can I send the addresses of the arguments
std::generate (v.begin(), v.end(), RandomNumber);
computeStats(v.begin( ), v.end( ), sum, mean, var, dev);
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <ctime>
#include <functional>
#include <iostream>
#include <numeric>
#include <vector>
using namespace std;
template <int N, class T>
T nthPower(T x) {
T ret = x;
for (int i = 1; i < N; ++i) {
ret *= x;
}
return ret;
}
template <class T, int N>
struct SumDiffNthPower {
SumDiffNthPower(T x) : mean_(x){};
T operator()(T sum, T current) {
return sum + nthPower<N>(current - mean_);
}
T mean_;
};
template <class T, int N, class T2>
T nthMoment(vector<T2> &vec, T mean) {
size_t cnt = vec.size();
return accumulate(vec.begin(), vec.end(), T(), SumDiffNthPower<T, N>(mean)) / cnt;
}
template <class T, class T2>
T computeVariance(vector<T2> &vec, T mean) {
return nthMoment<T, 2>(vec, mean);
}
template <class T, class T2>
T computeStdDev(vector<T2> &vec, T mean) {
return sqrt(computeVariance(vec, mean));
}
template <class T, class T2>
void computeStats(vector<T2> &vec, T &sum, T &mean, T &var, T &std_dev) {
size_t cnt = vec.size();
sum = accumulate(vec.begin(), vec.end(), T());
mean = sum / cnt;
var = computeVariance(vec, mean);
std_dev = sqrt(var);
}
int RandomNumber() { return (std::rand() % 100); }
int main(int argc, char **argv) {
std::srand(unsigned(std::time(0)));
vector<int> v(20);
std::generate(v.begin(), v.end(), RandomNumber);
double sum, mean, var, dev;
computeStats(v, sum, mean, var, dev);
cout << "count = " << v.size() << "\n";
cout << "sum = " << sum << "\n";
cout << "mean = " << mean << "\n";
cout << "variance = " << var << "\n";
cout << "standard deviation = " << dev << "\n";
cout << endl;
return 0;
}
I hope this will work. PS: I didn't got your passing by argument thing. Explain it in comments, maybe I can help with you that.
you can have
template<class T, class T2>
T computeVariance(vector<T2> &vec, T mean)
{
return nthMoment<T, 2>(vec.begin(), vec.end(), mean);
}
How can I print out the highest element of Valarray of complex numbers in C++ ?
I have tried with this code but it is returning error messages
#include <iostream> // std::cout
#include <valarray>
#include <complex>// std::valarray
typedef std::complex<double> Complex;
typedef std::valarray <Complex > CArray;
int main ()
{
CArray y[5]={{1, 2},{3, 4},{2,0},{7,0},{9,0}};
std::cout << "The max is " << y.max() << '\n';
return 0;
}
Output:
main.cpp: In function 'int main()':
main.cpp:15:35: error: request for member 'max' in 'y', which is of non-class type 'CArray [5] {aka std::valarray<std::complex<double> > [5]}'
std::cout << "The max is " << y.max() << '\n';
^
What I am doing wrong ?
Second version of code
I have modified a bit the code, Now I would like to get all index corresponding to the highest element of my Valarray in my case all index corresponding to element {9,0}
Note :by "Highest element" I mean element having the highest real part
new code:
#include <iostream>
#include <valarray>
#include <complex>
#include <algorithm>
#include <numeric>
typedef std::complex<double> Complex;
typedef std::valarray <Complex > CArray;
int main ()
{
CArray y={{1, 2},{3, 4},{2,0},{9,0},{7,0},{9,0}};
auto max_val = std::accumulate (std::begin(y), std::end(y), *std::begin(y),
[](const Complex& a ,const Complex& b)
{
auto abs_a = abs(a);
auto abs_b = abs(b);
//if(abs_a == abs_b)
// return std::max(arg(a), arg(b));
return std::max(abs_a, abs_b);
}
);
for (std::size_t i =std::begin(y) ; i != std::end(y) ; i++) {
std::cout << "The max is found on index ["<< i <<"]" << max_val<< '\n';
}
return 0;
}
I am getting following errors :
Output:
main.cpp: In function 'int main()':
main.cpp:22:35: error: invalid conversion from 'std::complex<double>*' to 'std::size_t {aka long unsigned int}' [-fpermissive]
for (std::size_t i =std::begin(y) ; i != std::end(y) ; i++) {
^
main.cpp:22:54: error: ISO C++ forbids comparison between pointer and integer [-fpermissive]
for (std::size_t i =std::begin(y) ; i != std::end(y) ; i++) {
^
y is an array of valarrays, so you need to call max on each element in that array, not on the array itself (which of course has no member functions).
std::complex is not a comparable type, so what does it mean to have a "highest" element?
Update: Regarding your edit, I think I understand what you're after...
For the highest index of the max (by real()) element:
std::size_t max_index(CArray const& y) {
struct acc_t {
double max_value;
std::size_t max_idx, current_idx;
constexpr acc_t next() const { return {max_value, max_idx, current_idx + 1}; }
constexpr acc_t next_with(Complex const c) const {
return {c.real(), current_idx, current_idx + 1};
}
};
return std::accumulate(
std::begin(y), std::end(y), acc_t{},
[](acc_t const acc, Complex const c) {
return c.real() < acc.max_value
? acc.next()
: acc.next_with(c);
}
).max_idx;
}
Online Demo
Or for all indices of the max element:
std::vector<std::size_t> max_indices(CArray const& y) {
struct acc_t {
std::vector<std::size_t> max_idcs;
double max_value;
std::size_t current_idx;
constexpr acc_t&& next() {
++current_idx;
return std::move(*this);
}
acc_t&& next_with_current() {
max_idcs.push_back(current_idx++);
return std::move(*this);
}
acc_t&& next_with(Complex const c) {
max_value = c.real();
max_idcs.clear();
return next_with_current();
}
};
return std::accumulate(
std::begin(y), std::end(y), acc_t{},
[](acc_t& acc, Complex const c) {
return c.real() < acc.max_value ? acc.next()
: c.real() > acc.max_value ? acc.next_with(c)
: acc.next_with_current();
}
).max_idcs;
}
Online Demo
N.b. your code has abs involved but I'm not sure why since you said you just wanted comparison based on std::complex<>::real(), so I've omitted that...
The bigger problem of your original code was (as pointed by Ildjarn) that Complex lack of operator<.
I suppose that your Complex should be a little more complex (if you allow me the play on words).
I propose the following solution were Complex derive from std::complex<double> and declare a friend operator< (). One of many operator< () possible.
#include <iostream>
#include <valarray>
#include <complex>
struct Complex: public std::complex<double>
{
template <typename ... Args>
Complex (const Args & ... args) : std::complex<double>{args...}
{ }
friend bool operator< (const Complex & c1, const Complex & c2)
{
return (c1.real() < c2.real())
|| ((c1.real() == c2.real()) && (c1.imag() < c2.imag()));
}
};
typedef std::valarray <Complex > CArray;
int main ()
{
CArray y { {1.0,2.0}, {3.0,4.0}, {2.0,0.0}, {7.0,0.0}, {9.0,0.0} };
std::cout << "The max is " << y.max() << '\n';
return 0;
}
If you accept that Complex can be a templated class (using Class<double> instead of Complex, you can write a more general solution in this way (that can be used also with complex based on float and long double)
#include <iostream>
#include <valarray>
#include <complex>
template <typename T>
struct Complex: public std::complex<T>
{
template <typename ... Args>
Complex (const Args & ... args) : std::complex<T>{args...}
{ }
friend bool operator< (const Complex & c1, const Complex & c2)
{
return (c1.real() < c2.real())
|| ((c1.real() == c2.real()) && (c1.imag() < c2.imag()));
}
};
typedef std::valarray <Complex<double>> CArray;
int main ()
{
CArray y { {1.0,2.0}, {3.0,4.0}, {2.0,0.0}, {7.0,0.0}, {9.0,0.0} };
std::cout << "The max is " << y.max() << '\n';
return 0;
}
p.s.: should work with C++11 too.
p.s.2: sorry for my bad English.
--- Edited to get the index of the max element ---
#include <iostream>
#include <valarray>
#include <complex>
template <typename T>
struct Complex: public std::complex<T>
{
template <typename ... Args>
Complex (const Args & ... args) : std::complex<T>{args...}
{ }
friend bool operator< (const Complex & c1, const Complex & c2)
{
return (c1.real() < c2.real())
|| ((c1.real() == c2.real()) && (c1.imag() < c2.imag()));
}
};
typedef std::valarray <Complex<double>> CArray;
int main ()
{
CArray y { {1.0,2.0}, {3.0,4.0}, {2.0,0.0}, {7.0,0.0}, {9.0,0.0} };
auto m = 0U;
for ( auto i = 1U ; i < y.size() ; ++i)
if ( y[m] < y[i] )
m = i;
std::cout << "The max is found on index ["<< m <<"] and is " << y[m]
<< std::endl;
return 0;
}
using std::accumulate can get max of complex numbers simillar to Matlab max function:
#include <iostream>
#include <valarray>
#include <complex>
#include <algorithm>
#include <numeric>
typedef std::complex<double> Complex;
typedef std::valarray <Complex > CArray;
int main ()
{
CArray y={{1, 2},{3, 4},{2,0},{7,0},{9,0}};
auto max_val = std::accumulate (std::begin(y), std::end(y), *std::begin(y),
[](const Complex& a ,const Complex& b)
{
auto abs_a = abs(a);
auto abs_b = abs(b);
if(abs_a == abs_b)
return std::max(arg(a), arg(b));
return std::max(abs_a, abs_b);
}
);
std::cout << "The max is " << max_val<< '\n';
return 0;
}
Edit: question edited and OP wants to get index of maximum of real part of complex numbers so your answer:
#include <iostream>
#include <valarray>
#include <complex>
#include <algorithm>
#include <numeric>
typedef std::complex<double> Complex;
typedef std::valarray <Complex > CArray;
int main ()
{
CArray y={{1, 2},{3, 4},{2,0},{7,0},{9,0}};
std::vector<int> index(y.size());
std::iota( index.begin(), index.end(), 0 );
auto max_index = std::accumulate (std::begin(index), std::end(index), *std::begin(index),
[&](int a ,int b)
{
return y[a].real() > y[b].real() ? a: b;
}
);
std::cout << "index of max is " << max_index<< '\n';
return 0;
}
Edit 2: as #ildjarn mentioned modified question wants to get all indices corresponding to the highest element so modified answer:
#include <iostream>
#include <valarray>
#include <complex>
#include <algorithm>
#include <numeric>
typedef std::complex<double> Complex;
typedef std::valarray <Complex > CArray;
int main ()
{
CArray y={{1, 2},{3, 4},{2,0},{7,0},{9,0}};
std::vector<int> index(y.size());
std::iota( index.begin(), index.end(), 0 );
auto max_index = std::accumulate (std::begin(index), std::end(index), *std::begin(index),
[&](int a ,int b)
{
return y[a].real() > y[b].real() ? a: b;
}
);
std::vector<int> indices;
std::copy_if(std::begin(index), std::end(index), std::back_inserter(indices),
[&](int a)
{
return y[a] == y[max_index];
}
);
for (auto i: indices)
std::cout << "index of max is " << i << '\n';
return 0;
}
Edit 3: using std::max_element the simplest solution we have:
#include <iostream>
#include <valarray>
#include <complex>
#include <algorithm>
#include <numeric>
#include <vector>
typedef std::complex<double> Complex;
typedef std::valarray <Complex > CArray;
int main ()
{
CArray y={{1, 2},{3, 4},{2,0},{9,0},{7,0},{9,0}};
auto max_index = std::max_element (std::begin(y), std::end(y),
[](const Complex& a ,const Complex& b)
{
return a.real() < b.real() ;
}
);
std::cout << "index of first max element is " << max_index-std::begin(y) << '\n';
std::cout << "indices of all matches of max element is: " << "[";
for (auto it= std::begin(y), end = std::end(y); it != end; ++it){
if(it->real() == max_index->real()) {
std::cout << it - std::begin(y) << ' ' ;
}
}
std::cout << "]";
return 0;
}
I've written an indirect radix sort algorithm in C++ (by indirect, I mean it returns the indices of the items):
#include <algorithm>
#include <iterator>
#include <vector>
template<class It1, class It2>
void radix_ipass(
It1 begin, It1 const end,
It2 const a, size_t const i,
std::vector<std::vector<size_t> > &buckets)
{
size_t ncleared = 0;
for (It1 j = begin; j != end; ++j)
{
size_t const k = a[*j][i];
while (k >= ncleared && ncleared < buckets.size())
{ buckets[ncleared++].clear(); }
if (k >= buckets.size())
{
buckets.resize(k + 1);
ncleared = buckets.size();
}
buckets[k].push_back(size_t());
using std::swap; swap(buckets[k].back(), *j);
}
for (std::vector<std::vector<size_t> >::iterator
j = buckets.begin(); j != buckets.begin() + ncleared; j->clear(), ++j)
{
begin = std::swap_ranges(j->begin(), j->end(), begin);
}
}
template<class It, class It2>
void radix_isort(It const begin, It const end, It2 const items)
{
for (ptrdiff_t i = 0; i != end - begin; ++i) { items[i] = i; }
size_t smax = 0;
for (It i = begin; i != end; ++i)
{
size_t const n = i->size();
smax = n > smax ? n : smax;
}
std::vector<std::vector<size_t> > buckets;
for (size_t i = 0; i != smax; ++i)
{
radix_ipass(
items, items + (end - begin),
begin, smax - i - 1, buckets);
}
}
It seems to perform around 40% faster than std::sort when I test it with the following code (3920 ms compared to 6530 ms):
#include <functional>
template<class Key>
struct key_comp : public Key
{
explicit key_comp(Key const &key = Key()) : Key(key) { }
template<class T>
bool operator()(T const &a, T const &b) const
{ return this->Key::operator()(a) < this->Key::operator()(b); }
};
template<class Key>
key_comp<Key> make_key_comp(Key const &key) { return key_comp<Key>(key); }
template<class T1, class T2>
struct add : public std::binary_function<T1, T2, T1>
{ T1 operator()(T1 a, T2 const &b) const { return a += b; } };
template<class F>
struct deref : public F
{
deref(F const &f) : F(f) { }
typename std::iterator_traits<
typename F::result_type
>::value_type const
&operator()(typename F::argument_type const &a) const
{ return *this->F::operator()(a); }
};
template<class T> deref<T> make_deref(T const &t) { return deref<T>(t); }
size_t xorshf96(void) // random number generator
{
static size_t x = 123456789, y = 362436069, z = 521288629;
x ^= x << 16;
x ^= x >> 5;
x ^= x << 1;
size_t t = x;
x = y;
y = z;
z = t ^ x ^ y;
return z;
}
#include <stdio.h>
#include <time.h>
#include <array>
int main(void)
{
typedef std::vector<std::array<size_t, 3> > Items;
Items items(1 << 24);
std::vector<size_t> ranks(items.size() * 2);
for (size_t i = 0; i != items.size(); i++)
{
ranks[i] = i;
for (size_t j = 0; j != items[i].size(); j++)
{ items[i][j] = xorshf96() & 0xFFF; }
}
clock_t const start = clock();
if (1) { radix_isort(items.begin(), items.end(), ranks.begin()); }
else // STL sorting
{
std::sort(
ranks.begin(),
ranks.begin() + items.size(),
make_key_comp(make_deref(std::bind1st(
add<Items::const_iterator, ptrdiff_t>(),
items.begin()))));
}
printf("%u ms\n",
(unsigned)((clock() - start) * 1000 / CLOCKS_PER_SEC),
std::min(ranks.begin(), ranks.end()));
return 0;
}
Hmm, I guess that's the best I can do, I thought.
But after lots of banging my head against the wall, I realized that prefetching in the beginning of radix_ipass can help cut down the result to 1440 ms (!):
#include <xmmintrin.h>
...
for (It1 j = begin; j != end; ++j)
{
#if defined(_MM_TRANSPOSE4_PS) // should be defined if xmmintrin.h is included
enum { N = 8 };
if (end - j > N)
{ _mm_prefetch((char const *)(&a[j[N]][i]), _MM_HINT_T0); }
#endif
...
}
Clearly, the bottleneck is the memory bandwidth---the access pattern is unpredictable.
So now my question is: what else can I do to make it even faster on similar amounts of data?
Or is there not much room left for improvement?
(I'm hoping to avoid compromising the readability of the code if possible, so if the readability is harmed, the improvement should be significant.)
Using a more compact data structure that combines ranks and values can boost the performance of std::sort by a factor 2-3. Essentially, the sort now runs on a vector<pair<Value,Rank>>. The Value data type, std::array<integer_type, 3> has been replaced for this by a more compact pair<uint32_t, uint8_t> data structure. Only half a byte of it is unused, and the < comparison can by done in two steps, first using a presumably efficient comparison of uint32_ts (it's not clear if the loop used by std::array<..>::operator< can be optimized to a similarly fast code, but the replacement of std::array<integer_type,3> by this data structure yielded another performance boost).
Still, it doesn't get as efficient as the radix sort. (Maybe you could tweak a custom QuickSort with prefetches?)
Besides that additional sorting method, I've replaced the xorshf96 by a mt19937, because I know how to provide a seed for the latter ;)
The seed and the number of values can be changed via two command-line arguments: first the seed, then the count.
Compiled with g++ 4.9.0 20131022, using -std=c++11 -march=native -O3, for a 64-bit linux
Sample runs; important note running on a Core2Duo processor U9400 (old & slow!)
item count: 16000000
using std::sort
duration: 12260 ms
result sorted: true
seed: 5648
item count: 16000000
using std::sort
duration: 12230 ms
result sorted: true
seed: 5648
item count: 16000000
using std::sort
duration: 12230 ms
result sorted: true
seed: 5648
item count: 16000000
using std::sort with a packed data structure
duration: 4290 ms
result sorted: true
seed: 5648
item count: 16000000
using std::sort with a packed data structure
duration: 4270 ms
result sorted: true
seed: 5648
item count: 16000000
using std::sort with a packed data structure
duration: 4280 ms
result sorted: true
item count: 16000000
using radix sort
duration: 3790 ms
result sorted: true
seed: 5648
item count: 16000000
using radix sort
duration: 3820 ms
result sorted: true
seed: 5648
item count: 16000000
using radix sort
duration: 3780 ms
result sorted: true
New or changed code:
template<class It>
struct fun_obj
{
It beg;
bool operator()(ptrdiff_t lhs, ptrdiff_t rhs)
{
return beg[lhs] < beg[rhs];
}
};
template<class It>
fun_obj<It> make_fun_obj(It beg)
{
return fun_obj<It>{beg};
}
struct uint32p8_t
{
uint32_t m32;
uint8_t m8;
uint32p8_t(std::array<uint16_t, 3> const& a)
: m32( a[0]<<(32-3*4) | a[1]<<(32-2*3*4) | (a[2]&0xF00)>>8)
, m8( a[2]&0xFF )
{
}
operator std::array<size_t, 3>() const
{
return {{m32&0xFFF00000 >> (32-3*4), m32&0x000FFF0 >> (32-2*3*4),
(m32&0xF)<<8 | m8}};
}
friend bool operator<(uint32p8_t const& lhs, uint32p8_t const& rhs)
{
if(lhs.m32 < rhs.m32) return true;
if(lhs.m32 > rhs.m32) return false;
return lhs.m8 < rhs.m8;
}
};
#include <stdio.h>
#include <time.h>
#include <array>
#include <iostream>
#include <iomanip>
#include <utility>
#include <algorithm>
#include <cstdlib>
#include <iomanip>
#include <random>
int main(int argc, char* argv[])
{
std::cout.sync_with_stdio(false);
constexpr auto items_count_default = 2<<22;
constexpr auto seed_default = 42;
uint32_t const seed = argc > 1 ? std::atoll(argv[1]) : seed_default;
std::cout << "seed: " << seed << "\n";
size_t const items_count = argc > 2 ? std::atoll(argv[2])
: items_count_default;
std::cout << "item count: " << items_count << "\n";
using Items_array_value_t =
#ifdef RADIX_SORT
size_t
#elif defined(STDSORT)
uint16_t
#elif defined(STDSORT_PACKED)
uint16_t
#endif
;
typedef std::vector<std::array<Items_array_value_t, 3> > Items;
Items items(items_count);
auto const ranks_count =
#ifdef RADIX_SORT
items.size() * 2
#elif defined(STDSORT)
items.size()
#elif defined(STDSORT_PACKED)
items.size()
#endif
;
//auto prng = xorshf96;
std::mt19937 gen(seed);
std::uniform_int_distribution<> dist;
auto prng = [&dist, &gen]{return dist(gen);};
std::vector<size_t> ranks(ranks_count);
for (size_t i = 0; i != items.size(); i++)
{
ranks[i] = i;
for (size_t j = 0; j != items[i].size(); j++)
{ items[i][j] = prng() & 0xFFF; }
}
std::cout << "using ";
clock_t const start = clock();
#ifdef RADIX_SORT
std::cout << "radix sort\n";
radix_isort(items.begin(), items.end(), ranks.begin());
#elif defined(STDSORT)
std::cout << "std::sort\n";
std::sort(ranks.begin(), ranks.begin() + items.size(),
make_fun_obj(items.cbegin())
//make_key_comp(make_deref(std::bind1st(
// add<Items::const_iterator, ptrdiff_t>(),
// items.begin())))
);
#elif defined(STDSORT_PACKED)
std::cout << "std::sort with a packed data structure\n";
using Items_ranks = std::vector< std::pair<uint32p8_t,
decltype(ranks)::value_type> >;
Items_ranks items_ranks;
size_t i = 0;
for(auto iI = items.cbegin(); iI != items.cend(); ++iI, ++i)
{
items_ranks.emplace_back(*iI, i);
}
std::sort(begin(items_ranks), end(items_ranks),
[](Items_ranks::value_type const& lhs,
Items_ranks::value_type const& rhs)
{ return lhs.first < rhs.first; }
);
std::transform(items_ranks.cbegin(), items_ranks.cend(), begin(ranks),
[](Items_ranks::value_type const& e) { return e.second; }
);
#endif
auto const duration = (clock() - start) / (CLOCKS_PER_SEC / 1000);
bool const sorted = std::is_sorted(ranks.begin(), ranks.begin() + items.size(),
make_fun_obj(items.cbegin()));
std::cout << "duration: " << duration << " ms\n"
<< "result sorted: " << std::boolalpha << sorted << "\n";
return 0;
}
Full code:
#include <algorithm>
#include <iterator>
#include <vector>
#include <cstddef>
using std::size_t;
using std::ptrdiff_t;
#include <xmmintrin.h>
template<class It1, class It2>
void radix_ipass(
It1 begin, It1 const end,
It2 const a, size_t const i,
std::vector<std::vector<size_t> > &buckets)
{
size_t ncleared = 0;
for (It1 j = begin; j != end; ++j)
{
#if defined(_MM_TRANSPOSE4_PS)
constexpr auto N = 8;
if(end - j > N)
{ _mm_prefetch((char const *)(&a[j[N]][i]), _MM_HINT_T0); }
#else
#error SS intrinsic not found
#endif
size_t const k = a[*j][i];
while (k >= ncleared && ncleared < buckets.size())
{ buckets[ncleared++].clear(); }
if (k >= buckets.size())
{
buckets.resize(k + 1);
ncleared = buckets.size();
}
buckets[k].push_back(size_t());
using std::swap; swap(buckets[k].back(), *j);
}
for (std::vector<std::vector<size_t> >::iterator
j = buckets.begin(); j != buckets.begin() + ncleared; j->clear(), ++j)
{
begin = std::swap_ranges(j->begin(), j->end(), begin);
}
}
template<class It, class It2>
void radix_isort(It const begin, It const end, It2 const items)
{
for (ptrdiff_t i = 0; i != end - begin; ++i) { items[i] = i; }
size_t smax = 0;
for (It i = begin; i != end; ++i)
{
size_t const n = i->size();
smax = n > smax ? n : smax;
}
std::vector<std::vector<size_t> > buckets;
for (size_t i = 0; i != smax; ++i)
{
radix_ipass(
items, items + (end - begin),
begin, smax - i - 1, buckets);
}
}
#include <functional>
template<class Key>
struct key_comp : public Key
{
explicit key_comp(Key const &key = Key()) : Key(key) { }
template<class T>
bool operator()(T const &a, T const &b) const
{ return this->Key::operator()(a) < this->Key::operator()(b); }
};
template<class Key>
key_comp<Key> make_key_comp(Key const &key) { return key_comp<Key>(key); }
template<class T1, class T2>
struct add : public std::binary_function<T1, T2, T1>
{ T1 operator()(T1 a, T2 const &b) const { return a += b; } };
template<class F>
struct deref : public F
{
deref(F const &f) : F(f) { }
typename std::iterator_traits<
typename F::result_type
>::value_type const
&operator()(typename F::argument_type const &a) const
{ return *this->F::operator()(a); }
};
template<class T> deref<T> make_deref(T const &t) { return deref<T>(t); }
size_t xorshf96(void) // random number generator
{
static size_t x = 123456789, y = 362436069, z = 521288629;
x ^= x << 16;
x ^= x >> 5;
x ^= x << 1;
size_t t = x;
x = y;
y = z;
z = t ^ x ^ y;
return z;
}
template<class It>
struct fun_obj
{
It beg;
bool operator()(ptrdiff_t lhs, ptrdiff_t rhs)
{
return beg[lhs] < beg[rhs];
}
};
template<class It>
fun_obj<It> make_fun_obj(It beg)
{
return fun_obj<It>{beg};
}
struct uint32p8_t
{
uint32_t m32;
uint8_t m8;
uint32p8_t(std::array<uint16_t, 3> const& a)
: m32( a[0]<<(32-3*4) | a[1]<<(32-2*3*4) | (a[2]&0xF00)>>8)
, m8( a[2]&0xFF )
{
}
operator std::array<size_t, 3>() const
{
return {{m32&0xFFF00000 >> (32-3*4), m32&0x000FFF0 >> (32-2*3*4),
(m32&0xF)<<8 | m8}};
}
friend bool operator<(uint32p8_t const& lhs, uint32p8_t const& rhs)
{
if(lhs.m32 < rhs.m32) return true;
if(lhs.m32 > rhs.m32) return false;
return lhs.m8 < rhs.m8;
}
};
#include <stdio.h>
#include <time.h>
#include <array>
#include <iostream>
#include <iomanip>
#include <utility>
#include <algorithm>
#include <cstdlib>
#include <iomanip>
#include <random>
int main(int argc, char* argv[])
{
std::cout.sync_with_stdio(false);
constexpr auto items_count_default = 2<<22;
constexpr auto seed_default = 42;
uint32_t const seed = argc > 1 ? std::atoll(argv[1]) : seed_default;
std::cout << "seed: " << seed << "\n";
size_t const items_count = argc > 2 ? std::atoll(argv[2]) : items_count_default;
std::cout << "item count: " << items_count << "\n";
using Items_array_value_t =
#ifdef RADIX_SORT
size_t
#elif defined(STDSORT)
uint16_t
#elif defined(STDSORT_PACKED)
uint16_t
#endif
;
typedef std::vector<std::array<Items_array_value_t, 3> > Items;
Items items(items_count);
auto const ranks_count =
#ifdef RADIX_SORT
items.size() * 2
#elif defined(STDSORT)
items.size()
#elif defined(STDSORT_PACKED)
items.size()
#endif
;
//auto prng = xorshf96;
std::mt19937 gen(seed);
std::uniform_int_distribution<> dist;
auto prng = [&dist, &gen]{return dist(gen);};
std::vector<size_t> ranks(ranks_count);
for (size_t i = 0; i != items.size(); i++)
{
ranks[i] = i;
for (size_t j = 0; j != items[i].size(); j++)
{ items[i][j] = prng() & 0xFFF; }
}
std::cout << "using ";
clock_t const start = clock();
#ifdef RADIX_SORT
std::cout << "radix sort\n";
radix_isort(items.begin(), items.end(), ranks.begin());
#elif defined(STDSORT)
std::cout << "std::sort\n";
std::sort(ranks.begin(), ranks.begin() + items.size(),
make_fun_obj(items.cbegin())
//make_key_comp(make_deref(std::bind1st(
// add<Items::const_iterator, ptrdiff_t>(),
// items.begin())))
);
#elif defined(STDSORT_PACKED)
std::cout << "std::sort with a packed data structure\n";
using Items_ranks = std::vector< std::pair<uint32p8_t,
decltype(ranks)::value_type> >;
Items_ranks items_ranks;
size_t i = 0;
for(auto iI = items.cbegin(); iI != items.cend(); ++iI, ++i)
{
items_ranks.emplace_back(*iI, i);
}
std::sort(begin(items_ranks), end(items_ranks),
[](Items_ranks::value_type const& lhs,
Items_ranks::value_type const& rhs)
{ return lhs.first < rhs.first; }
);
std::transform(items_ranks.cbegin(), items_ranks.cend(), begin(ranks),
[](Items_ranks::value_type const& e) { return e.second; }
);
#endif
auto const duration = (clock() - start) / (CLOCKS_PER_SEC / 1000);
bool const sorted = std::is_sorted(ranks.begin(), ranks.begin() + items.size(),
make_fun_obj(items.cbegin()));
std::cout << "duration: " << duration << " ms\n"
<< "result sorted: " << std::boolalpha << sorted << "\n";
return 0;
}
I'm trying to re-write some MatLab code in C++ and I've come across this:
currentsign = sign(vector(i));
I have looked on the internet and found this link: http://www.mathworks.co.uk/help/techdoc/ref/sign.html
I'm just wondering if there's a sign function in C++? If not, can anyone suggest any tutorials on creating it.
Thank you :)
template <typename T>
int sign (const T &val) { return (val > 0) - (val < 0); }
Credit due to Ambroz Bizjak.
template <typename T>
std::vector<int> sign (const std::vector<T> &v) {
std::vector<int> r(v.size());
std::transform(v.begin(), v.end(), r.begin(), (int(*)(const T&))sign);
return r;
}
Full example on ideone.
I would suggest
First, write a function of functor that takes a single element and returns 1, -1 or 0 depending on the element's value
Second, use std::transform together with this function/functor to take an input container and fill a second container with the desired values
template <typename T>
int signum(const T& val) {
// implement signum logic
}
#include <vector>
#include <algorithm>
int main() {
std::vector<int> data = ....;
std::vector<int> signs(data.size());
std::transform(data.begin(), data.end(), signs.begin(), signum<int>);
}
#include <algorithm>
#include <iostream>
#include <iomanip>
#include <vector>
inline int get_signum(int val) {
return val < 0 ? -1
: val == 0 ? 0
: 1;
}
int main() {
std::vector<int> values;
for (int i = -5; i < 6; ++i)
values.push_back(i);
std::vector<int> signum(values.size());
std::transform(values.begin(), values.end(), signum.begin(), get_signum);
for (int i = 0; i < values.size(); ++i) {
std::cout << std::setw(2) << values[i] << ' ' << signum[i] << std::endl;
}
return 0;
}
Well You can do It at compile time using template Specialization.
You can use sign<n>::Positive, sign<n>::Negetive and sign<n>::Zero also you can use sign<n>::Sign which is 1|0|-1 which is same as sign of Matlab.
#include <iostream>
template<int n>
struct sign{
enum{
Positive = (n > 0),
Negetive = (n < 0),
Zero = 0,
Sign = ((n > 0) ? 1 : -1)
};
};
template<>
struct sign<0>{
enum{
Positive = 0,
Negetive = 0,
Zero = 1,
Sign = 0
};
};
int main(){
std::cout << sign<0>::Positive << sign<0>::Negetive << sign<0>::Zero << sign<0>::Sign << std::endl;
std::cout << sign<1>::Positive << sign<1>::Negetive << sign<1>::Zero << sign<1>::Sign << std::endl;
std::cout << sign<-1>::Positive << sign<-1>::Negetive << sign<-1>::Zero << sign<-1>::Sign << std::endl;
return 0;
}
You used to do sign(n) there and here you will do sign<n>::Sign.
C99 has signbit() and copysign(), which seem to be implemented in glibc on Linux. You didn't specify what platform you're on though, so I'm not sure that helps...