Related
Following up to a previous question I asked in Fixing parameters of a fitting function in Nonlinear Least-Square GSL (successfully answered by #zkoza), I would like to implement an algorithm that can fit data to a non-linear function, by fixing some of its parameters while leaving other parameters to change for finding the best fit to the data. The difference to my previous question is that I want to have two independent variables instead of one independent variable.
Non-linear function used to fit the data
double gaussian(double x, double b, double a, double c)
{
const double z = (x - b) / c;
return a * std::exp(-0.5 * z * z);
}
In my previous question I was considering that x was the only independent variable. Now I would like to consider two independent variables, x and b.
The original algorithm used to fit a non-linear function using only one independent variable (while fixing variable a) is a C++ wrapper of the GSL nonlinear least-squares algorithm (borrowed from https://github.com/Eleobert/gsl-curve-fit/blob/master/example.cpp):
template <typename F, size_t... Is>
auto gen_tuple_impl(F func, std::index_sequence<Is...> )
{
return std::make_tuple(func(Is)...);
}
template <size_t N, typename F>
auto gen_tuple(F func)
{
return gen_tuple_impl(func, std::make_index_sequence<N>{} );
}
template <class R, class... ARGS>
struct function_ripper {
static constexpr size_t n_args = sizeof...(ARGS);
};
template <class R, class... ARGS>
auto constexpr n_params(R (ARGS...) )
{
return function_ripper<R, ARGS...>();
}
auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
gsl_multifit_nlinear_parameters *params) -> std::vector<double>
{
// This specifies a trust region method
const gsl_multifit_nlinear_type *T = gsl_multifit_nlinear_trust;
const size_t max_iter = 200;
const double xtol = 1.0e-8;
const double gtol = 1.0e-8;
const double ftol = 1.0e-8;
auto *work = gsl_multifit_nlinear_alloc(T, params, fdf->n, fdf->p);
int info;
// initialize solver
gsl_multifit_nlinear_init(initial_params, fdf, work);
//iterate until convergence
gsl_multifit_nlinear_driver(max_iter, xtol, gtol, ftol, nullptr, nullptr, &info, work);
// result will be stored here
gsl_vector * y = gsl_multifit_nlinear_position(work);
auto result = std::vector<double>(initial_params->size);
for(int i = 0; i < result.size(); i++)
{
result[i] = gsl_vector_get(y, i);
}
auto niter = gsl_multifit_nlinear_niter(work);
auto nfev = fdf->nevalf;
auto njev = fdf->nevaldf;
auto naev = fdf->nevalfvv;
// nfev - number of function evaluations
// njev - number of Jacobian evaluations
// naev - number of f_vv evaluations
//logger::debug("curve fitted after ", niter, " iterations {nfev = ", nfev, "} {njev = ", njev, "} {naev = ", naev, "}");
gsl_multifit_nlinear_free(work);
gsl_vector_free(initial_params);
return result;
}
template<auto n>
auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*
{
auto* result = gsl_vector_alloc(vec.size());
int i = 0;
for(const auto e: vec)
{
gsl_vector_set(result, i, e);
i++;
}
return result;
}
template<typename C1>
struct fit_data
{
const std::vector<double>& t;
const std::vector<double>& y;
// the actual function to be fitted
C1 f;
};
template<typename FitData, int n_params>
int internal_f(const gsl_vector* x, void* params, gsl_vector *f)
{
auto* d = static_cast<FitData*>(params);
// Convert the parameter values from gsl_vector (in x) into std::tuple
auto init_args = [x](int index)
{
return gsl_vector_get(x, index);
};
auto parameters = gen_tuple<n_params>(init_args);
// Calculate the error for each...
for (size_t i = 0; i < d->t.size(); ++i)
{
double ti = d->t[i];
double yi = d->y[i];
auto func = [ti, &d](auto ...xs)
{
// call the actual function to be fitted
return d->f(ti, xs...);
};
auto y = std::apply(func, parameters);
gsl_vector_set(f, i, yi - y);
}
return GSL_SUCCESS;
}
using func_f_type = int (*) (const gsl_vector*, void*, gsl_vector*);
using func_df_type = int (*) (const gsl_vector*, void*, gsl_matrix*);
using func_fvv_type = int (*) (const gsl_vector*, const gsl_vector *, void *, gsl_vector *);
template<auto n>
auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*;
auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
gsl_multifit_nlinear_parameters *params) -> std::vector<double>;
template<typename C1>
auto curve_fit_impl(func_f_type f, func_df_type df, func_fvv_type fvv, gsl_vector* initial_params, fit_data<C1>& fd) -> std::vector<double>
{
assert(fd.t.size() == fd.y.size());
auto fdf = gsl_multifit_nlinear_fdf();
auto fdf_params = gsl_multifit_nlinear_default_parameters();
fdf.f = f;
fdf.df = df;
fdf.fvv = fvv;
fdf.n = fd.t.size();
fdf.p = initial_params->size;
fdf.params = &fd;
// "This selects the Levenberg-Marquardt algorithm with geodesic acceleration."
fdf_params.trs = gsl_multifit_nlinear_trs_lmaccel;
return internal_solve_system(initial_params, &fdf, &fdf_params);
}
template <typename Callable, auto n>
auto curve_fit(Callable f, const std::array<double, n>& initial_params, const std::vector<double>& x, const std::vector<double>& y) -> std::vector<double>
{
// We can't pass lambdas without convert to std::function.
//constexpr auto n = 3;//decltype(n_params(f))::n_args - 5;
//constexpr auto n = 2;
assert(initial_params.size() == n);
auto params = internal_make_gsl_vector_ptr(initial_params);
auto fd = fit_data<Callable>{x, y, f};
return curve_fit_impl(internal_f<decltype(fd), n>, nullptr, nullptr, params, fd);
}
In order to fix one of the parameters of the gaussian function, #zkoza proposed to use functors:
struct gaussian_fixed_a
{
double a;
gaussian_fixed_a(double a) : a{a} {}
double operator()(double x, double b, double c) const { return gaussian(x, b, a, c); }
};
And these last lines show how I would create a fake dataset of observed data (with some noise which is normally distributed) and test the fitting curve function with two independent variables, given by the vectors xs and bs.
int main()
{
auto device = std::random_device();
auto gen = std::mt19937(device());
auto xs = linspace<std::vector<double>>(0.0, 1.0, 300);
auto bs = linspace<std::vector<double>>(0.4, 1.4, 300);
auto ys = std::vector<double>(xs.size());
double a = 5.0, c = 0.15;
for(size_t i = 0; i < xs.size(); i++)
{
auto y = gaussian(xs[i], a, bs[i], c);
auto dist = std::normal_distribution(0.0, 0.1 * y);
ys[i] = y + dist(gen);
}
gaussian_fixed_a g(a);
auto r = curve_fit(g, std::array{0.11}, xs, bs, ys);
std::cout << "result: " << r[0] << ' ' << '\n';
std::cout << "error : " << r[0] - c << '\n';
}
Do you have any idea on how I could implement the two-independent variables non-linear fitting?
The solution, as suggested in the comments by #BenVoigt, is to replace the x and b independent variables in the gaussian function with 'one independent variable' given as a vector, whose first element is x and the second element is b.
Also the backbone of the nonlinear fitting needs to be slightly edited. The edits consist:
Replace the fit_data functor with:
struct fit_data
{
const std::vector< vector<double> > &t;
const std::vector<double>& y;
// the actual function to be fitted
C1 f;
};
Such that, the independent variable is no longer a vector but rather a vector of a vector (aka a matrix).
Replace within the function internal_f.
a) double ti = d->t[i] with std::vector<double> ti = d->t[i]
b) auto func = [ti, &d](auto ...xs) with auto func = [ti, &d](auto ...xs_matrix)
c) return d->f(ti, xs...) with return d->f(ti, xs_matrix...)
Replace within curve_fit function:
a) const std::vector<double>& x with const std::vector< vector<double> > &xs_matrix
b) auto fd = fit_data<Callable>{x, y, f} with auto fd = fit_data<Callable>{xs_matrix, y, f}
Whereas the gaussian function, gaussian_fixed_a functor and the fitting function looks like:
double gaussian(std::vector<double> x_vector, double a, double c)
{
const double z = (x_vector[0] - x_vector[1]) / c;
return a * std::exp(-0.5 * z * z);
}
struct gaussian_fixed_a
{
double a;
gaussian_fixed_a(double a) : a{a} {}
double operator()(std::vector<double> x_vector, double c) const { return gaussian(x_vector, a, c); }
};
double fittingTest(const std::vector< vector<double> > &xs_matrix, const std::vector<double> ys, const double a){
gaussian_fixed_a g(a);
auto r = curve_fit(g, std::array{3.0}, xs_matrix, ys);
return r[0]);
}
I am trying to use templates to represent simple polynomials like x^2 + 3x + 5. My idea is to represent them as sum of terms with each term having a coefficient and a power so e.g. x^2 has coeff=1 and power=2. I also want to be able to evaluate the polynomials for some x (they only have 1 unknown but in many places). So far I have:
struct PolyEnd{
double eval(double x){
return 0;
}
};
template <int coeff, int power, class Tail> struct Poly {
typedef Tail tt;
double eval(double x){
double curr = coeff * std::pow(x, power);
return curr; // has to call eval(x) on rest of the terms which are in the tail and return the sum with "curr"
}
};
int main()
{
double x = 2;
Poly<1,1,Poly<1,1,PolyEnd>> poly;
std::cout << poly.eval(x) << std::endl;
return 0;
}
However, I am stuck. Is what I am trying even possible? If so how can I make the recursive eval() calls work?
Yes, you can do that, you just need to call eval on the tail and since all the classes are state-less, you can just create an instance to call the member function on, on the spot:
struct PolyEnd{
double eval(double x){
return 0;
}
};
template <int coeff, int power, class Tail> struct Poly {
typedef Tail tt;
double eval(double x){
double curr = coeff * std::pow(x, power);
return curr + Tail{}.eval(x);
}
};
int main()
{
double x = 2;
Poly<1,1,Poly<1,1,PolyEnd>> poly;
std::cout << poly.eval(x) << std::endl;
return 0;
}
or if you make eval static, then you can call Tail::eval(x) directly.
I guess that you are experimenting with metaprogramming. Your question also made me excited, because i am also newbie in metaprogramming and I want to practise. #walnut's answer already accepted but there is no harm to share another implementation. I used some basic metaprogramming techniques.
I hope it will help you.
#include <cmath>
#include <iostream>
#include <string>
template<int Value>
struct coeff
{ };
template<int Value>
struct power
{ };
template<typename Coefficient, typename Power>
struct term;
template<int Coefficient , int Power>
struct term< coeff<Coefficient> , power<Power> >
{
inline double eval( double x ) const noexcept {
return Coefficient * std::pow( x , Power );
}
};
template<int Value>
using constant = term< coeff<Value> , power<1> >;
template<int Value>
using exponential = term< coeff<1> , power<Value> >;
template<typename... T>
struct polynomial
{
static_assert( sizeof...(T) == 0, "A polynomial can only be expressed in 'term's.");
[[nodiscard]] constexpr double eval( double ) const noexcept {
return 0;
}
[[nodiscard]] std::string to_string() const noexcept {
return std::string{};
}
};
template<int Coefficient, int Power, typename... Tail>
struct polynomial<term< coeff<Coefficient> , power<Power> >, Tail...>
: polynomial<Tail...>
{
[[nodiscard]] constexpr double eval( double x ) const noexcept {
return m_t.eval( x ) + polynomial<Tail...>::eval( x );
}
[[nodiscard]] std::string to_string(){
using namespace std;
using namespace std::string_literals;
return "("s + std::to_string( Coefficient ) +
string { "x^" } +
std::to_string( Power ) + ( sizeof...(Tail) == 0 ? ")" : ") + " ) +
polynomial<Tail...>::to_string();
}
private:
term< coeff<Coefficient> , power<Power> > m_t;
};
int main()
{
auto p1 = polynomial<term< coeff<1> , power<2> > ,
term< coeff<2> , power<4> > ,
term< coeff<2> , power<3> > ,
constant<3> ,
exponential<2> >{};
std::cout << "Polynomial is : " << p1.to_string() << std::endl;
std::cout << "f(2) : " << p1.eval( 2 ) << std::endl;
std::cout << "f(3) : " << p1.eval( 3 ) << std::endl;
return 0;
}
run online
Polynomial coefficients can be stored in std::array or std::vector(in case you define polynomial degree in runtime).
Then extend functionality with eval function.
template <unsigned N>
class Poly : public std::array<double, N> {
public:
template <typename... E>
Poly(E &&... e) : std::array<double, N>{{std::forward<E>(e)...}} {}
double eval(double x) {
double result = 0;
double exp = 1.;
for (auto it = this->rbegin(); it != this->rend(); ++it) {
result += exp * (*it);
exp *= x;
}
return result;
}
};
usage
double result = Poly<3>{3., 2., 1.}.eval(17);
Say I have a nest for loop like
for (int x = xstart; x < xend; x++){
for (int y = ystart; y < yend; y++){
for (int z = zstart; z < zend; z++){
function_doing_stuff(std::make_tuple(x, y, z));
}
}
}
and would like to transform it into
MyRange range(xstart,xend,ystart,yend, zstart,zend);
for (auto point : range){
function_doing_stuff(point);
}
How would I write the MyRange class to be as efficient as the nested for loops?
The motivation for this is to be able to use std algorithms (such as transform, accumulate, etc), and to create code that is largely dimension agnostic.
By having an iterator, it would be easy to create templated functions that operate over a range of 1d, 2d or 3d points.
Code base is currently C++14.
EDIT:
Writing clear questions is hard. I'll try to clarify.
My problem is not writing an iterator, that I can do. Instead, the problem is one of performance: Is it possible to make an iterator that is as fast as the nested for loops?
With range/v3, you may do
auto xs = ranges::view::iota(xstart, xend);
auto ys = ranges::view::iota(ystart, yend);
auto zs = ranges::view::iota(zstart, zend);
for (const auto& point : ranges::view::cartesian_product(xs, ys, zs)){
function_doing_stuff(point);
}
You can introduce your own class as
class myClass {
public:
myClass (int x, int y, int z):m_x(x) , m_y(y), m_z(z){};
private:
int m_x, m_y, m_z;
}
and then initialize a std::vector<myClass> with your triple loop
std::vector<myClass> myVec;
myVec.reserve((xend-xstart)*(yend-ystart)*(zend-zstart)); // alloc memory only once;
for (int x = ystart; x < xend; x++){
for (int y = xstart; y < yend; y++){ // I assume you have a copy paste error here
for (int z = zstart; z < zend; z++){
myVec.push_back({x,y,z})
}
}
}
Finally, you can use all the nice std algorithms with the std::vector<myClass> myVec. With the syntactic sugar
using MyRange = std::vector<MyClass>;
and
MyRange makeMyRange(int xstart, int xend, int ystart, int yend, int zstart,int zend) {
MyRange myVec;
// loop from above
return MyRange;
}
you can write
const MyRange range = makeMyRange(xstart, xend, ystart, yend, zstart, zend);
for (auto point : range){
function_doing_stuff(point);
}
With the new move semantics this wont create unneeded copies. Please note, that the interface to this function is rather bad. Perhaps rather use 3 pairs of int, denoting the x,y,z interval.
Perhaps you change the names to something meaningful (e.g.myClass could be Point).
Another option, which directly transplants whatever looping code, is to use a Coroutine. This emulates yield from Python or C#.
using point = std::tuple<int, int, int>;
using coro = boost::coroutines::asymmetric_coroutine<point>;
coro::pull_type points(
[&](coro::push_type& yield){
for (int x = xstart; x < xend; x++){
for (int y = ystart; y < yend; y++){
for (int z = zstart; z < zend; z++){
yield(std::make_tuple(x, y, z));
}
}
}
});
for(auto p : points)
function_doing_stuff(p);
Since you care about performance, you should forget about combining iterators for the foreseeable future. The central problem is that compilers cannot yet untangle the mess and figure out that there are 3 independent variables in it, much less perform any loop interchange or unrolling or fusion.
If you must use ranges, use simple ones that the compiler can see through:
for (int const x : boost::irange<int>(xstart,xend))
for (int const y : boost::irange<int>(ystart,yend))
for (int const z : boost::irange<int>(zstart,zend))
function_doing_stuff(x, y, z);
Alternatively, you can actually pass your functor and the boost ranges to a template:
template <typename Func, typename Range0, typename Range1, typename Range2>
void apply_ranges (Func func, Range0 r0, Range1 r1, Range2 r2)
{
for (auto const i0 : r0)
for (auto const i1 : r1)
for (auto const i2 : r2)
func (i0, i1, i2);
}
If you truly care about performance, then you should not contort your code with complicated ranges, because they make it harder to untangle later when you want to rewrite them in AVX intrinsics.
Here's a bare-bones implementation that does not use any advanced language features or other libraries. The performance should be pretty close to the for loop version.
#include <tuple>
class MyRange {
public:
typedef std::tuple<int, int, int> valtype;
MyRange(int xstart, int xend, int ystart, int yend, int zstart, int zend): xstart(xstart), xend(xend), ystart(ystart), yend(yend), zstart(zstart), zend(zend) {
}
class iterator {
public:
iterator(MyRange &c): me(c) {
curvalue = std::make_tuple(me.xstart, me.ystart, me.zstart);
}
iterator(MyRange &c, bool end): me(c) {
curvalue = std::make_tuple(end ? me.xend : me.xstart, me.ystart, me.zstart);
}
valtype operator*() {
return curvalue;
}
iterator &operator++() {
if (++std::get<2>(curvalue) == me.zend) {
std::get<2>(curvalue) = me.zstart;
if (++std::get<1>(curvalue) == me.yend) {
std::get<1>(curvalue) = me.ystart;
++std::get<0>(curvalue);
}
}
return *this;
}
bool operator==(const iterator &other) const {
return curvalue == other.curvalue;
}
bool operator!=(const iterator &other) const {
return curvalue != other.curvalue;
}
private:
MyRange &me;
valtype curvalue;
};
iterator begin() {
return iterator(*this);
}
iterator end() {
return iterator(*this, true);
}
private:
int xstart, xend;
int ystart, yend;
int zstart, zend;
};
And an example of usage:
#include <iostream>
void display(std::tuple<int, int, int> v) {
std::cout << "(" << std::get<0>(v) << ", " << std::get<1>(v) << ", " << std::get<2>(v) << ")\n";
}
int main() {
MyRange c(1, 4, 2, 5, 7, 9);
for (auto v: c) {
display(v);
}
}
I've left off things like const iterators, possible operator+=, decrementing, post increment, etc. They've been left as an exercise for the reader.
It stores the initial values, then increments each value in turn, rolling it back and incrementing the next when it get to the end value. It's a bit like incrementing a multi-digit number.
Using boost::iterator_facade for simplicity, you can spell out all the required members.
First we have a class that iterates N-dimensional indexes as std::array<std::size_t, N>
template<std::size_t N>
class indexes_iterator : public boost::iterator_facade<indexes_iterator, std::array<std::size_t, N>>
{
public:
template<typename... Dims>
indexes_iterator(Dims... dims) : dims{ dims... }, values{} {}
private:
friend class boost::iterator_core_access;
void increment() { advance(1); }
void decrement() { advance(-1); }
void advance(int n)
{
for (std::size_t i = 0; i < N; ++i)
{
int next = ((values[i] + n) % dims[i]);
n = (n \ dims[i]) + (next < value);
values[i] = next;
}
}
std::size_t distance(indexes_iterator const & other) const
{
std::size_t result = 0, mul = 1;
for (std::size_t i = 0; i < dims; ++i)
{
result += mul * other[i] - values[i];
mul *= ends[i];
}
}
bool equal(indexes_iterator const& other) const
{
return values == other.values;
}
std::array<std::size_t, N> & dereference() const { return values; }
std::array<std::size_t, N> ends;
std::array<std::size_t, N> values;
}
Then we use that to make something similar to a boost::zip_iterator, but instead of advancing all together we add our indexes.
template <typename... Iterators>
class product_iterator : public boost::iterator_facade<product_iterator<Iterators...>, const std::tuple<decltype(*std::declval<Iterators>())...>, boost::random_access_traversal_tag>
{
using ref = std::tuple<decltype(*std::declval<Iterators>())...>;
public:
product_iterator(Iterators ... ends) : indexes() , iterators(std::make_tuple(ends...)) {}
template <typename ... Sizes>
product_iterator(Iterators ... begins, Sizes ... sizes)
: indexes(sizes...),
iterators(begins...)
{}
private:
friend class boost::iterator_core_access;
template<std::size_t... Is>
ref dereference_impl(std::index_sequence<Is...> idxs) const
{
auto offs = offset(idxs);
return { *std::get<Is>(offs)... };
}
ref dereference() const
{
return dereference_impl(std::index_sequence_for<Iterators...>{});
}
void increment() { ++indexes; }
void decrement() { --indexes; }
void advance(int n) { indexes += n; }
template<std::size_t... Is>
std::tuple<Iterators...> offset(std::index_sequence<Is...>) const
{
auto idxs = *indexes;
return { (std::get<Is>(iterators) + std::get<Is>(idxs))... };
}
bool equal(product_iterator const & other) const
{
return offset(std::index_sequence_for<Iterators...>{})
== other.offset(std::index_sequence_for<Iterators...>{});
}
indexes_iterator<sizeof...(Iterators)> indexes;
std::tuple<Iterators...> iterators;
};
Then we wrap it up in a boost::iterator_range
template <typename... Ranges>
auto make_product_range(Ranges&&... rngs)
{
product_iterator<decltype(begin(rngs))...> b(begin(rngs)..., std::distance(std::begin(rngs), std::end(rngs))...);
product_iterator<decltype(begin(rngs))...> e(end(rngs)...);
return boost::iterator_range<product_iterator<decltype(begin(rngs))...>>(b, e);
}
int main()
{
using ranges::view::iota;
for (auto p : make_product_range(iota(xstart, xend), iota(ystart, yend), iota(zstart, zend)))
// ...
return 0;
}
See it on godbolt
Just a very simplified version that will be as efficient as a for loop:
#include <tuple>
struct iterator{
int x;
int x_start;
int x_end;
int y;
int y_start;
int y_end;
int z;
constexpr auto
operator*() const{
return std::tuple{x,y,z};
}
constexpr iterator&
operator++ [[gnu::always_inline]](){
++x;
if (x==x_end){
x=x_start;
++y;
if (y==y_end) {
++z;
y=y_start;
}
}
return *this;
}
constexpr iterator
operator++(int){
auto old=*this;
operator++();
return old;
}
};
struct sentinel{
int z_end;
friend constexpr bool
operator == (const iterator& x,const sentinel& s){
return x.z==s.z_end;
}
friend constexpr bool
operator == (const sentinel& a,const iterator& x){
return x==a;
}
friend constexpr bool
operator != (const iterator& x,const sentinel& a){
return !(x==a);
}
friend constexpr bool
operator != (const sentinel& a,const iterator& x){
return !(x==a);
}
};
struct range{
iterator start;
sentinel finish;
constexpr auto
begin() const{
return start;
}
constexpr auto
end()const{
return finish;
}
};
void func(int,int,int);
void test(const range& r){
for(auto [x,y,z]: r)
func(x,y,z);
}
void test(int x_start,int x_end,int y_start,int y_end,int z_start,int z_end){
for(int z=z_start;z<z_end;++z)
for(int y=y_start;y<y_end;++y)
for(int x=x_start;x<x_end;++x)
func(x,y,z);
}
The advantage over 1201ProgramAlarm answer is the faster test performed at each iteration thanks to the use of a sentinel.
Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 5 years ago.
Improve this question
Currently I came across an interesting article what's called the Kronecker-Produkt. At the same time I'm working on my neural network library.
So that my algorithm works, I need a tensor class, where I can get the product of two tensor's with an overloaded * operator.
Consider the following example/questions:
How to efficiently construct/store the nested matrices?
How to perform the product of two tensor's?
How to visualize tensor c as simply as possible?
My class 3 tensor which currently only supports 3 dimensions:
#pragma once
#include <iostream>
#include <sstream>
#include <random>
#include <cmath>
#include <iomanip>
template<typename T>
class tensor {
public:
const unsigned int x, y, z, s;
tensor(unsigned int x, unsigned int y, unsigned int z, T val) : x(x), y(y), z(z), s(x * y * z) {
p_data = new T[s];
for (unsigned int i = 0; i < s; i++) p_data[i] = val;
}
tensor(const tensor<T> & other) : x(other.x), y(other.y), z(other.z), s(other.s) {
p_data = new T[s];
memcpy(p_data, other.get_data(), s * sizeof(T));
}
~tensor() {
delete[] p_data;
p_data = nullptr;
}
T * get_data() {
return p_data;
}
static tensor<T> * random(unsigned int x, unsigned int y, unsigned int z, T val, T min, T max) {
tensor<T> * p_tensor = new tensor<T>(x, y, z, val);
std::random_device rd;
std::mt19937 mt(rd());
std::uniform_real_distribution<T> dist(min, max);
for (unsigned int i = 0; i < p_tensor->s; i++) {
T rnd = dist(mt);
while (abs(rnd) < 0.001) rnd = dist(mt);
p_tensor->get_data()[i] = rnd;
}
return p_tensor;
}
static tensor<T> * from(std::vector<T> * p_data, T val) {
tensor<T> * p_tensor = new tensor<T>(p_data->size(), 1, 1, val);
for (unsigned int i = 0; i < p_tensor->get_x(); i++) p_tensor->set_data(i + 0 * p_tensor->get_x() * + 0 * p_tensor->get_x() * p_tensor->get_y(), p_data->at(i));
return p_tensor;
}
friend std::ostream & operator <<(std::ostream & stream, tensor<T> & tensor) {
stream << "(" << tensor.x << "," << tensor.y << "," << tensor.z << ") Tensor\n";
for (unsigned int i = 0; i < tensor.x; i++) {
for (unsigned int k = 0; k < tensor.z; k++) {
stream << "[";
for (unsigned int j = 0; j < tensor.y; j++) {
stream << std::setw(5) << roundf(tensor(i, j, k) * 1000) / 1000;
if (j + 1 < tensor.y) stream << ",";
}
stream << "]";
}
stream << std::endl;
}
return stream;
}
tensor<T> & operator +(tensor<T> & other) {
tensor<T> result(*this);
return result;
}
tensor<T> & operator -(tensor<T> & other) {
tensor<T> result(*this);
return result;
}
tensor<T> & operator *(tensor<T> & other) {
tensor<T> result(*this);
return result;
}
T & operator ()(unsigned int i, unsigned int j, unsigned int k) {
return p_data[i + (j * x) + (k * x * y)];
}
T & operator ()(unsigned int i) {
return p_data[i];
}
private:
T * p_data = nullptr;
};
int main() {
tensor<double> * p_tensor_input = tensor<double>::random(6, 2, 3, 0.0, 0.0, 1.0);
tensor<double> * p_tensor_weight = tensor<double>::random(2, 6, 3, 0.0, 0.0, 1.0);
std::cout << *p_tensor_input << std::endl;
std::cout << *p_tensor_weight << std::endl;
tensor<double> p_tensor_output = *p_tensor_input + *p_tensor_weight;
return 0;
}
Your first step is #2 -- and get it correct.
After that, optimize.
Start with a container C<T>.
Define some operations on it. wrap(T) returns a C<T> containing that T. map takes a C<T> and a function on T U f(T) and returns C<U>. flatten takes a C<C<U>> and returns a C<U>.
Define scale( T, C<T> ) which takes a T and a C<T> and returns a C<T> with the elements scaled. Aka, scalar multiplication.
template<class T>
C<T> scale( T scalar, C<T> container ) {
return map( container, [&](T t){ return t*scalar; } );
}
Then we have:
template<class T>
C<T> tensor( C<T> lhs, C<T> rhs ) {
return flatten( map( lhs, [&](T t) { return scale( t, rhs ); } ) );
}
is your tensor product. And yes, that can be your actual code. I would tweak it a bit for efficiency.
(Note I used different terms, but I'm basically describing monadic operations using different words.)
After you have this, test, optimize, and iterate.
As for 3, the result of tensor products get large and complex, there is no simple visualization for a large tensor.
Oh, and keep things simple and store data in a std::vector to start.
Here are some tricks for efficient vectors i learned in class, but they should be equally good for a tensor.
Define an empty constructor and assignment operator. For example
tensor(unsigned int x, unsigned int y, unsigned int z) : x(x), y(y), z(z), s(x * y * z) {
p_data = new T[s];
}
tensor& operator=( tensor const& that ) {
for (int i=0; i<size(); ++i) {
p_data[i] = that(i) ;
}
return *this ;
}
template <typename T>
tensor& operator=( T const& that ) {
for (int i=0; i<size(); ++i) {
p_data[i] = that(i) ;
}
return *this ;
}
Now we can implement things like addition and scaling with deferred evaluation. For example:
template<typename T1, typename T2>
class tensor_sum {
//add value_type to base tensor class for this to work
typedef decltype( typename T1::value_type() + typename T2::value_type() ) value_type ;
//also add function to get size of tensor
value_type operator()( int i, int j, int k ) const {
return t1_(i,j,k) + v2_(i,j,k) ;
}
value_type operator()( int i ) const {
return t1_(i) + v2_(i) ;
}
private:
T1 const& t1_;
T2 const& t2_;
}
template <typename T1, typename T2>
tensor_sum<T1,T2> operator+(T1 const& t1, T2 const& t2 ) {
return vector_sum<T1,T2>(t1,t2) ;
}
This tensor_sum behaves exactly like any normal tensor, except that we don't have to allocate memory to store the result. So we can do something like this:
tensor<double> t0(...);
tensor<double> t1(...);
tensor<double> t2(...);
tensor<double> result(...); //define result to be empty, we will fill it later
result = t0 + t1 + 5.0*t2;
The compiler should optimize this to be just one loop, without storing intermediate results or modifying the original tensors. You can do the same thing for scaling and the kronecker product. Depending on what you want to do with the tensors, this can be a big advantage. But be careful, this isn't always the best option.
When implementing the kronecker product you should be careful of the of the ordering of your loop, try to go through the tensors in the order they are stored for cache efficiency.
I would like to know if it is possible to create an actual functor object from a lambda expression. I don't think so, but if not, why?
To illustrate, given the code below, which sorts points using various policies for x and y coordinates:
#include <vector>
#include <functional>
#include <algorithm>
#include <iostream>
struct Point
{
Point(int x, int y) : x(x), y(y) {}
int x, y;
};
template <class XOrder, class YOrder>
struct SortXY :
std::binary_function<const Point&, const Point&, bool>
{
bool operator()(const Point& lhs, const Point& rhs) const
{
if (XOrder()(lhs.x, rhs.x))
return true;
else if (XOrder()(rhs.x, lhs.x))
return false;
else
return YOrder()(lhs.y, rhs.y);
}
};
struct Ascending { bool operator()(int l, int r) const { return l<r; } };
struct Descending { bool operator()(int l, int r) const { return l>r; } };
int main()
{
// fill vector with data
std::vector<Point> pts;
pts.push_back(Point(10, 20));
pts.push_back(Point(20, 5));
pts.push_back(Point( 5, 0));
pts.push_back(Point(10, 30));
// sort array
std::sort(pts.begin(), pts.end(), SortXY<Descending, Ascending>());
// dump content
std::for_each(pts.begin(), pts.end(),
[](const Point& p)
{
std::cout << p.x << "," << p.y << "\n";
});
}
The expression std::sort(pts.begin(), pts.end(), SortXY<Descending, Ascending>()); sorts according to descending x values, and then to ascending y values. It's easily understandable, and I'm not sure I really want to make use of lambda expressions here.
But if I wanted to replace Ascending / Descending by lambda expressions, how would you do it? The following isn't valid:
std::sort(pts.begin(), pts.end(), SortXY<
[](int l, int r) { return l>r; },
[](int l, int r) { return l<r; }
>());
This problem arises because SortXY only takes types, whereas lambdas are objects. You need to re-write it so that it takes objects, not just types. This is basic use of functional objects- see how std::for_each doesn't take a type, it takes an object.
I have posted a similar question w.r.t. lambda functors within classes.
Check this out, perhaps it helps:
Lambda expression as member functors in a class
I had a similar problem: It was required to provide in some cases a "raw"-function pointer and in other a functor. So I came up with a "workaround" like this:
template<class T>
class Selector
{
public:
Selector(int (*theSelector)(T& l, T& r))
: selector(theSelector) {}
virtual int operator()(T& l, T& r) {
return selector(l, r);
}
int (*getRawSelector() const)(T&, T&) {
return this->selector;
}
private:
int(*selector)(T& l, T& r);
};
Assuming you have two very simple functions taking --- as described --- either a functor or a raw function pointer like this:
int
findMinWithFunctor(int* array, int size, Selector<int> selector)
{
if (array && size > 0) {
int min = array[0];
for (int i = 0; i < size; i++) {
if (selector(array[i], min) < 0) {
min = array[i];
}
}
return min;
}
return -1;
}
int
findMinWithFunctionPointer(int* array, int size, int(*selector)(int&, int&))
{
if (array && size > 0) {
int min = array[0];
for (int i = 0; i < size; i++) {
if (selector(array[i], min) < 0) {
min = array[i];
}
}
return min;
}
return -1;
}
Then you would call this functions like this:
int numbers[3] = { 4, 2, 99 };
cout << "The min with functor is:" << findMinWithFunctor(numbers, 3, Selector<int>([](int& l, int& r) -> int {return (l > r ? 1 : (r > l ? -1 : 0)); })) << endl;
// or with the plain version
cout << "The min with raw fn-pointer is:" << findMinWithFunctionPointer(numbers, 3, Selector<int>([](int& l, int& r) -> int {return (l > r ? 1 : (r > l ? -1 : 0)); }).getRawSelector()) << endl;
Of course in this example there is no real benefit passing the int's as reference...it's just an example :-)
Improvements:
You can also modify the Selector class to be more concise like this:
template<class T>
class Selector
{
public:
typedef int(*selector_fn)(T& l, T& r);
Selector(selector_fn theSelector)
: selector(theSelector) {}
virtual int operator()(T& l, T& r) {
return selector(l, r);
}
selector_fn getRawSelector() {
return this->selector;
}
private:
selector_fn selector;
};
Here we are taking advantage of a simple typedef in order to define the function pointer once and use only it's name rather then writing the declaration over and over.