Static array member is private within context - c++

I like to think I have grasped most aspects of object oriented programming but C++ has yet again presented me with an unexpected result. While creating a static array class in C++ (much like std::array), I added the + operator and it gave me a very strange compiler error.
Has anyone encountered this situation before?
#pragma once
#ifdef NDEBUG
#define __ESL_Array_AssertCorrectInitSize(array, init)
#else
#define __ESL_Array_AssertCorrectInitSize(array, init) if (init.size() != array->Size()) throw ESL::ArrayInitializationException("std::initializer_list size is different than static size");
#endif // NDEBUG
#include<cstdint>
#include<utility>
#include<stdexcept>
namespace ESL
{
class ArrayInitializationException : public std::runtime_error
{
public:
ArrayInitializationException(const std::string &msg): std::runtime_error(msg) {}
};
template <typename T, uint32_t S>
class Array
{
public:
Array(): m_data() {}
Array(const T (&arr)[S]):
m_data()
{
const T *src_itr = arr;
T *dst_itr = m_data;
for (uint32_t i = 0; i < S; ++i)
*(dst_itr++) = *(src_itr++);
}
Array(std::initializer_list<T> list):
m_data()
{
__ESL_Array_AssertCorrectInitSize(this, list)
const T *src_itr = list.begin();
T *dst_src = m_data;
for (uint32_t i = 0; i < S; ++i)
{
*(dst_src++) = *(src_itr++);
}
}
Array(const Array &a):
m_data()
{
const T *src_itr = a.m_data;
T *dst_itr = m_data;
for (uint32_t i = 0; i < S; ++i)
*(dst_itr++) = *(src_itr++);
}
Array(Array &&a):
m_data()
{
T *src_itr = a.m_data;
T *dst_itr = m_data;
for (uint32_t i = 0; i < S; ++i)
*(dst_itr++) = std::forward<T>(*(src_itr++));
}
Array &operator=(const Array &a)
{
const T *src_itr = a.m_data;
T *dst_itr = m_data;
for (uint32_t i = 0; i < S; ++i)
*(dst_itr++) = *(src_itr++);
return *this;
}
Array &operator=(Array &&a)
{
const T *src_itr = a.m_data;
T *dst_itr = m_data;
for (uint32_t i = 0; i < S; ++i)
*(dst_itr++) = std::forward<T>(*(src_itr++));
return *this;
}
constexpr uint32_t Size() const { return S; }
const T &operator[](uint32_t index) const { return m_data[index]; }
T &operator[](uint32_t index) { return m_data[index]; }
const T *begin() const { return m_data; }
T *begin() { return m_data; }
const T *end() const { return m_data + S; }
T *end() { return m_data + S; }
bool operator==(const Array &a) const
{
const T *itr1 = m_data;
const T *itr2 = a.m_data;
for (uint32_t i = 0; i < S; ++i)
{
if (*(itr1++) != *(itr2++)) return false;
}
return true;
}
bool operator!=(const Array &a) const
{
const T *itr1 = m_data;
const T *itr2 = a.m_data;
for (uint32_t i = 0; i < S; ++i)
{
if (*(itr1++) != *(itr2++)) return true;
}
return false;
}
template <uint32_t S2>
Array<T, S + S2> operator+(const Array<T, S2> &a) const
{
Array<T, S + S2> res;
const T *src_itr = m_data;
T *dst_itr = res.m_data;
for (uint32_t i = 0; i < S; ++i)
*(dst_itr++) = *(src_itr++);
src_itr = a.m_data;
for (uint32_t i = 0; i < S2; ++i)
*(dst_itr++) = *(src_itr++);
return res;
}
private:
T m_data[S];
};
}
If you need any more code, I'd be willing to post the entire file. I appreciate any input! Thanks!
EDIT: Just realized I forgot the compiler output:
In instantiation of ‘ESL::Array<T, (S + S2)> ESL::Array<T, S>::operator+(const ESL::Array<T, S2>&) const [with unsigned int S2 = 2; T = double; unsigned int S = 3]’:
[build] /home/joshlengel/Dev/C++/ESL/Main.cpp:10:20: required from here
[build] /home/joshlengel/Dev/C++/ESL/ESL/include/collection/Array.h:135:30: error: ‘double ESL::Array<double, 5>::m_data [5]’ is private within this context
[build] 135 | T *dst_itr = res.m_data;
[build] | ~~~~^~~~~~
[build] /home/joshlengel/Dev/C++/ESL/ESL/include/collection/Array.h:149:11: note: declared private here
[build] 149 | T m_data[S];

The reason is for the class, Array<double, 2> and Array<double, 3> are completely two different classes. This is why it says that you cant access private members. For this you either have to make the m_data member public, or use begin() like so,
template <uint32_t S2>
Array<T, S + S2> operator+(const Array<T, S2>& a) const
{
Array<T, S + S2> res;
// You can ditch the for loops and use std::copy instead. It *can* be more efficient.
std::copy(begin(), end(), res.begin());
std::copy(a.begin(), a.end(), res.begin() + S);
return res;
}

Related

How to define a RandomAccessIterator over a pointer to a vector of chars?

I am implementing a kind of dataframe and I want to define a RandomAccessIterator over it, in order to execute the different std algorithms, such as the sorting one. The dataframe of the example contains two column "a" and "b":
a; b;
20; 21;
20; 19;
10; 11;
40; 41;
10; 11;
After sorting with a trivial selection sort this is the result:
a; b;
10; 11;
10; 11;
20; 19;
20; 21;
40; 41;
The problem that I am facing is that the std::sort does not work properly. And I don't know weather the implementation of the iterator is sound or not.
This is the code.
File: dataframe.hpp
#pragma once
#include <iostream>
#include <charconv>
#include <vector>
#include <memory>
#include <cstring>
#include <numeric>
#include "iterator.hpp"
namespace df
{
class Record;
class Column;
class Dataframe;
namespace types
{
enum class Base : char
{
CHAR = 'A',
UNSIGNED = 'U',
// Other types..
};
class Dtype
{
public:
Dtype(types::Base base, std::size_t size) : m_base_dtype{base}, m_size{size} {}
[[nodiscard]] auto name() const
{
return std::string{static_cast<char>(m_base_dtype)} + std::to_string(m_size);
}
[[nodiscard]] auto base() const { return m_base_dtype; }
[[nodiscard]] auto size() const { return m_size; }
[[nodiscard]] auto is_primitive() const
{
switch (base())
{
case types::Base::CHAR:
return size() == 1;
case types::Base::UNSIGNED:
return size() == 1 or size() == 2 or size() == 4 or size() == 8;
}
return false;
}
private:
types::Base m_base_dtype;
std::size_t m_size;
};
[[nodiscard]] static auto CHAR(const std::size_t size) { return Dtype(types::Base::CHAR, size); }
[[nodiscard]] static auto UNSIGNED(const std::size_t size) { return Dtype(types::Base::UNSIGNED, size); }
}
class Column
{
public:
Column(std::vector<char> &raw, const types::Dtype dtype) : m_raw{std::move(raw)}, m_dtype{dtype} {}
Column &operator=(Column &&c) = default; // Move constructor
[[nodiscard]] const auto &dtype() const { return m_dtype; }
[[nodiscard]] auto &raw() { return m_raw; }
[[nodiscard]] const auto &raw() const { return m_raw; }
[[nodiscard]] auto *data() { return m_raw.data(); }
[[nodiscard]] const auto *data() const { return m_raw.data(); }
private:
std::vector<char> m_raw;
types::Dtype m_dtype;
};
class Dataframe
{
public:
Dataframe(std::vector<char> &raw, std::vector<std::string> names, std::vector<types::Dtype> dtypes)
{
m_raw = std::move(raw);
m_column_dtypes = dtypes;
m_column_names = names;
m_record_size = 0;
for (const auto dt : dtypes)
{
m_column_offsets.emplace_back(m_record_size);
m_record_size += dt.size();
}
m_record_count = m_raw.size() / m_record_size;
}
Dataframe(std::vector<char> &raw, std::vector<types::Dtype> dtypes) : Dataframe(raw, {}, dtypes) {}
Dataframe &operator=(Dataframe &&c) = default; // Move constructor
[[nodiscard]] auto &raw() { return m_raw; }
[[nodiscard]] const auto &raw() const { return m_raw; }
[[nodiscard]] auto *data() { return m_raw.data(); }
[[nodiscard]] const auto *data() const { return m_raw.data(); }
// Iterators
[[nodiscard]] df::Iterator begin()
{
return df::Iterator{m_raw.data(), m_record_size};
}
[[nodiscard]] df::Iterator end()
{
return df::Iterator{m_raw.data() + m_raw.size(), m_record_size};
}
[[nodiscard]] auto shape() const { return std::make_pair(m_record_count, m_column_dtypes.size()); }
[[nodiscard]] auto record_count() const { return m_record_count; }
[[nodiscard]] auto record_size() const { return m_record_size; }
[[nodiscard]] const auto &names() const { return m_column_names; }
[[nodiscard]] const auto &dtypes() const { return m_column_dtypes; }
[[nodiscard]] const auto &offsets() const { return m_column_offsets; }
void print() { print(m_record_count); }
void print(const std::size_t initial_records)
{
// Print header
for (auto column_name : m_column_names)
{
std::cout << column_name << "; ";
}
std::cout << std::endl;
// Print rows
std::size_t records_to_print = std::min(initial_records, m_record_count);
for (std::size_t i = 0; i < records_to_print; i++)
{
const auto start_p = i * record_size();
auto start_field = 0;
auto end_field = 0;
for (auto field : m_column_dtypes)
{
end_field += field.size();
switch (field.base())
{
case types::Base::UNSIGNED:
{
std::uint64_t uint_value = 0;
memcpy(&uint_value, m_raw.data() + start_p + start_field, field.size());
std::cout << uint_value;
break;
}
case types::Base::CHAR:
{
std::string str_value = std::string(m_raw.data() + start_p + start_field, field.size());
std::cout << str_value;
break;
}
}
start_field = end_field;
// New column
std::cout << "; ";
}
// New row
std::cout << std::endl;
}
}
std::shared_ptr<Dataframe> copy() const
{
auto x = std::vector<char>(m_raw);
return std::make_shared<Dataframe>(x, std::vector<std::string>(m_column_names), std::vector<types::Dtype>(m_column_dtypes));
}
private:
std::vector<char> m_raw = {};
std::vector<std::string> m_column_names = {};
std::vector<types::Dtype> m_column_dtypes = {};
std::vector<std::size_t> m_column_offsets = {};
std::size_t m_record_size = {};
std::size_t m_record_count = {};
};
using namespace types;
static std::shared_ptr<Dataframe> read_from_vector(const std::vector<std::vector<std::string>> values, const std::vector<std::string> names, const std::vector<Dtype> dtypes)
{
const auto record_size = std::accumulate(dtypes.begin(), dtypes.end(), std::size_t{0},
[](std::size_t accum, const auto &m)
{ return accum + m.size(); });
const auto total_size = values.size() * record_size;
const std::size_t INCR_RECORDS = std::max(total_size / (10 * record_size), std::size_t{65536});
auto raw = std::vector<char>{};
std::size_t written_records = 0;
auto offsets = std::vector<std::size_t>{};
for (int offset = 0; const auto &kd : dtypes)
{
offsets.push_back(offset);
offset += kd.size();
}
for (auto value : values)
{
if (written_records >= raw.size() / record_size)
{
raw.resize(raw.size() + INCR_RECORDS * record_size, char{' '});
}
for (int i = 0; i < names.size(); i++)
{
const auto name = names[i];
const auto dtype = dtypes[i];
const auto offset = offsets[i];
const auto pos = written_records * record_size + offset;
switch (dtype.base())
{
case df::Base::CHAR:
{
const auto v = value[i];
const auto byte_to_copy = std::min(v.size(), dtype.size());
std::memcpy(raw.data() + pos,
v.data() + v.size() - byte_to_copy, byte_to_copy); // Prendo gli ultimi byte
break;
}
case df::Base::UNSIGNED:
{
const auto v = std::stoull(value[i]);
const auto byte_to_copy = dtype.size();
std::memcpy(raw.data() + pos, &v, byte_to_copy); // Prendo gli ultimi byte
break;
}
default:
throw std::runtime_error("ColumnType non riconosciuto");
}
}
written_records++;
}
raw.resize(written_records * record_size);
raw.shrink_to_fit();
return std::make_shared<Dataframe>(raw, names, dtypes);
}
}
File: iterator.hpp
#pragma once
#include <iostream>
#include <cstring>
namespace df
{
class Iterator
{
std::size_t size;
char *ptr;
public:
struct record_reference;
struct record_value
{
std::size_t size;
char *ptr;
record_value(const record_reference &t) : record_value(t.size, t.ptr){};
record_value(const std::size_t m_size, char *m_ptr)
{
this->size = m_size;
this->ptr = new char[this->size];
std::memcpy(ptr, m_ptr, this->size);
}
~record_value()
{
delete[] this->ptr;
}
};
struct record_reference
{
std::size_t size;
char *ptr;
record_reference(const std::size_t m_size, char *m_ptr)
{
this->size = m_size;
this->ptr = m_ptr;
}
record_reference(const record_reference &t)
{
this->size = t.size;
this->ptr = t.ptr;
}
// record_reference(const record_value &t) : record_reference(t.size, t.ptr) {};
record_reference &operator=(const record_value &t)
{
std::memcpy(ptr, t.ptr, size);
return *this;
}
record_reference &operator=(const record_reference &t)
{
std::memcpy(ptr, t.ptr, size);
return *this;
}
record_reference &operator=(char *t)
{
std::memcpy(ptr, t, size);
return *this;
}
operator char *()
{
return ptr;
}
operator const char *() const { return ptr; }
};
using iterator_category = std::random_access_iterator_tag;
using value_type = record_value;
using reference = record_reference;
using difference_type = std::ptrdiff_t;
// default constructible
Iterator() : size(0), ptr(nullptr)
{
}
// copy assignable
Iterator &operator=(const Iterator &t)
{
size = t.size;
ptr = t.ptr;
return *this;
}
Iterator(char *ptr, const std::size_t size) : size{size}, ptr(ptr)
{
}
record_reference operator*() const
{
return {size, ptr};
}
// Prefix
Iterator &operator++()
{
ptr += size;
return *this;
}
// Postfix
Iterator operator++(int)
{
auto tmp = *this;
++*this;
return tmp;
}
Iterator &operator--()
{
ptr -= size;
return *this;
}
difference_type operator-(const Iterator &it) const
{
return (this->ptr - it.ptr) / size;
}
Iterator operator+(const difference_type &offset) const
{
return Iterator(ptr + offset * size, size);
}
friend Iterator operator+(const difference_type &diff, const Iterator &it)
{
return it + diff;
}
Iterator operator-(const difference_type &diff) const
{
return Iterator(ptr - diff * size, size);
}
reference operator[](const difference_type &offset) const
{
return {size, ptr + offset * size};
}
bool operator==(const Iterator &it) const
{
return this->ptr == it.ptr;
}
bool operator!=(const Iterator &it) const
{
return !(*this == it);
}
bool operator<(const Iterator &it) const
{
return this->ptr < it.ptr;
}
bool operator>=(const Iterator &it) const
{
return this->ptr >= it.ptr;
}
bool operator>(const Iterator &it) const
{
return this->ptr > it.ptr;
}
bool operator<=(const Iterator &it) const
{
return this->ptr <= it.ptr;
}
Iterator &operator+=(const difference_type &diff)
{
ptr += diff * size;
return *this;
}
operator Iterator() const
{
return Iterator(ptr, size);
}
};
void swap(df::Iterator::record_reference a, df::Iterator::record_reference b)
{
unsigned char *p;
unsigned char *q;
unsigned char *const sentry = (unsigned char *)a.ptr + a.size;
for (p = (unsigned char *)a.ptr, q = (unsigned char *)b.ptr; p < sentry; ++p, ++q)
{
const unsigned char t = *p;
*p = *q;
*q = t;
}
}
}
File: comparator.hpp
#pragma once
#include <memory>
#include <functional>
#include "dataframe.hpp"
#include "iterator.hpp"
namespace compare
{
using comparator_fn = std::function<int(const df::Iterator::record_reference, const df::Iterator::record_reference)>;
template <typename T, std::size_t offset = 0, std::size_t size = sizeof(T)>
static inline comparator_fn make_comparator()
{
if constexpr (size == 3 or size == 5 or size == 7 or size > 8)
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, size); };
return [](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return *(T *)(a + offset) < *(T *)(b + offset) ? -1 : *(T *)(b + offset) < *(T *)(a + offset) ? +1
: 0; };
}
template <typename T>
static inline comparator_fn make_comparator(const std::size_t offset)
{
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return *(T *)(a + offset) < *(T *)(b + offset) ? -1 : *(T *)(b + offset) < *(T *)(a + offset) ? +1
: 0; };
}
static inline comparator_fn make_column_comparator(const df::Dtype dtype, const std::size_t offset)
{
switch (dtype.base())
{
case df::Base::CHAR:
{
if (dtype.size() == 1)
return make_comparator<std::uint8_t>(offset);
else if (dtype.size() == 2)
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, 2); }; // C'� qualche beneficio a fissare il 2? o conviene trattarlo come uno unsigned short?
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, dtype.size()); };
}
case df::Base::UNSIGNED:
{
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{
std::uint64_t uint_value_a = 0;
std::uint64_t uint_value_b = 0;
std::memcpy(&uint_value_a, a + offset, dtype.size());
std::memcpy(&uint_value_b, b + offset, dtype.size());
return (uint_value_a < uint_value_b ? -1 : uint_value_a > uint_value_b ? +1
: 0);
};
}
default:
throw std::runtime_error("Unsupported dtype");
break;
}
}
static inline comparator_fn make_composite_two_way_comparator(const std::shared_ptr<df::Dataframe> &T)
{
const auto K = T->dtypes().size();
std::vector<comparator_fn> F;
for (int i = 0; i < K; i++)
{
F.emplace_back(make_column_comparator(T->dtypes()[i], T->offsets()[i]));
}
const auto comparator = [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{
for (int i = 0; i < K; i++)
{
// If equal go to the next column, otherwise return the result
// The return value is true if the first argument is less than the second
// and false otherwise
if (const auto result = F[i](a, b); result != 0)
return result < 0;
}
return false;
};
return comparator;
}
}
File: main.cpp
#include <iostream>
#include <vector>
#include "dataframe.hpp"
#include "comparator.hpp"
template <typename RandomAccessIterator, typename Comparator>
static void selection_sort(RandomAccessIterator first, RandomAccessIterator last, Comparator comp)
{
for (auto i = first; i != last; ++i)
{
auto min = i;
for (auto j = i + 1; j != last; ++j)
{
if (comp(*j, *min))
min = j;
}
df::Iterator::value_type temp = *i;
*i = *min;
*min = temp;
// Alternative
// std::iter_swap(i, min);
}
}
int main(int argc, char const *argv[])
{
std::vector<std::string> values{"20", "21", "20", "19", "10", "11", "40", "41", "10", "11"};
// Create a vector that contains values grouped by 2
std::vector<std::vector<std::string>> v;
for (int i = 0; i < values.size(); i += 2)
{
std::vector<std::string> temp;
temp.push_back(values[i]);
temp.push_back(values[i + 1]);
v.push_back(temp);
}
std::vector<std::string> column_names = {"a", "b"};
df::Dtype d = df::Dtype(df::Base::UNSIGNED, 4);
std::vector dtypes = {d, d};
// Create a dataframe
std::shared_ptr<df::Dataframe> df = df::read_from_vector(v, column_names, dtypes);
std::cout << "Before sorting" << std::endl;
df->print();
// This comparator sorts the dataframe first by column a and then by column b in ascending order
auto comparator = compare::make_composite_two_way_comparator(df);
selection_sort(df->begin(), df->end(), comparator);
std::cout << "\nAfter sorting" << std::endl;
df->print();
// With the std::sort it does not work
std::sort(df->begin(), df->end(), comparator);
return 0;
}
Your type is not a C++17 RandomAccessIterator, because it isn't a C++17 ForwardIterator, because reference is an object type, not a reference type.
The type It satisfies ForwardIterator if
Let T be the value type of It. The type std::iterator_traits<It>::reference must be either
T& or T&& if It satisfies OutputIterator (It is mutable), or
const T& or const T&& otherwise (It is constant),
(Other requirements elided)
You will be able to satisfy the C++20 concept std::random_access_iterator, because that relaxes the requirement on It::reference.
In C++17, the reference type of an iterator must be precisely value_type& in order for that iterator to be random access. Only input iterators can have the reference type be something other than value_type&. So in C++17, proxy iterators are limited to input iterators. And every algorithm written against C++17 has this expectation.
The C++20 ranges library adds the ability to have random access proxy iterators. And the C++20 algorithms that use those range concepts will respect them.

Compiler is not producing FMA instructions for simple loop compiled for AVX512 CPU

With an implicit loop-vectorization experiment, GCC 11.2 does not produce fma instructions but only packed add and packed multiply instructions:
https://godbolt.org/z/srbfWMEG6
Sample code for testing:
#include <iostream>
#include <fstream>
#include <sstream>
#include <complex>
#include <vector>
#include <omp.h>
using namespace std;
constexpr int frames=20;
constexpr float width = 2000;
constexpr float height = 2000;
constexpr int grainSize=width/4; // pixels
void createImage();
int getPoint(int x, int y);
/*
* VectorizedKernel.h
*
* Created on: Apr 16, 2022
* Author: tugrul
*/
#ifndef VECTORIZEDKERNEL_H_
#define VECTORIZEDKERNEL_H_
#include <iostream>
#include <string>
#include <functional>
#include<cmath>
namespace Vectorization
{
template<typename Type, int Simd>
struct KernelData
{
alignas(32)
Type data[Simd];
KernelData(){}
KernelData(const Type broadcastedInit)
{
for(int i=0;i<Simd;i++)
{
data[i] = broadcastedInit;
}
}
KernelData(const KernelData<Type,Simd> & vectorizedIit)
{
for(int i=0;i<Simd;i++)
{
data[i] = vectorizedIit.data[i];
}
}
inline void readFrom(const Type * const __restrict__ ptr) noexcept
{
for(int i=0;i<Simd;i++)
{
data[i] = ptr[i];
}
}
inline void writeTo(Type * const __restrict__ ptr) const noexcept
{
for(int i=0;i<Simd;i++)
{
ptr[i] = data[i];
}
}
inline void writeTo(Type * const __restrict__ ptr, const KernelData<int,Simd> vec) const noexcept
{
for(int i=0;i<Simd;i++)
{
ptr[vec.data[i]] = data[i];
}
}
template<typename F>
inline void idCompute(const int id, const F & f) noexcept
{
for(int i=0;i<Simd;i++)
{
data[i] = f(id+i);
}
}
// bool
inline KernelData<int,Simd> lessThan(const KernelData<Type,Simd> vec) const noexcept
{
KernelData<int,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i]<vec.data[i];
}
return result;
}
// bool
inline KernelData<int,Simd> lessThanOrEquals(const KernelData<Type,Simd> vec) const noexcept
{
KernelData<int,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i]<=vec.data[i];
}
return result;
}
// bool
inline KernelData<int,Simd> lessThanOrEquals(const Type val) const noexcept
{
KernelData<int,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i]<=val;
}
return result;
}
// bool
inline KernelData<int,Simd> greaterThan(const KernelData<Type,Simd> vec) const noexcept
{
KernelData<int,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i]>vec.data[i];
}
return result;
}
// bool
inline KernelData<int,Simd> greaterThan(const Type val) const noexcept
{
KernelData<int,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i]>val;
}
return result;
}
// bool
inline KernelData<int,Simd> equals(const KernelData<Type,Simd> vec) const noexcept
{
KernelData<int,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] == vec.data[i];
}
return result;
}
// bool
inline KernelData<int,Simd> equals(const Type val) const noexcept
{
KernelData<int,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] == val;
}
return result;
}
// bool
inline KernelData<int,Simd> notEqual(const KernelData<Type,Simd> vec) const noexcept
{
KernelData<int,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] != vec.data[i];
}
return result;
}
// bool
inline KernelData<int,Simd> notEqual(const Type val) const noexcept
{
KernelData<int,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] != val;
}
return result;
}
// bool
inline KernelData<int,Simd> logicalAnd(const KernelData<int,Simd> vec) const noexcept
{
KernelData<int,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] && vec.data[i];
}
return result;
}
// bool
inline KernelData<int,Simd> logicalOr(const KernelData<int,Simd> vec) const noexcept
{
KernelData<int,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] || vec.data[i];
}
return result;
}
inline bool areAllTrue() const noexcept
{
bool result = true;
for(int i=0;i<Simd;i++)
{
result = result && data[i];
}
return result;
}
inline bool isAnyTrue() const noexcept
{
bool result = false;
for(int i=0;i<Simd;i++)
{
result = result || data[i];
}
return result;
}
template<typename ComparedType>
inline const KernelData<ComparedType,Simd> ternary(const KernelData<ComparedType,Simd> vec1, const KernelData<ComparedType,Simd> vec2) const noexcept
{
KernelData<ComparedType,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i]?vec1.data[i]:vec2.data[i];
}
return result;
}
template<typename ComparedType>
inline const KernelData<ComparedType,Simd> ternary(const ComparedType val1, const ComparedType val2) const noexcept
{
KernelData<ComparedType,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i]?val1:val2;
}
return result;
}
inline void broadcast(const Type val) noexcept
{
for(int i=0;i<Simd;i++)
{
data[i] = val;
}
}
inline void readFrom(const KernelData<Type,Simd> vec) noexcept
{
for(int i=0;i<Simd;i++)
{
data[i] = vec.data[i];
}
}
template<typename NewType>
inline const KernelData<NewType,Simd> cast() const noexcept
{
KernelData<NewType,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = (NewType)data[i];
}
return result;
}
inline const KernelData<Type,Simd> sqrt() const noexcept
{
KernelData<Type,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = std::sqrt(data[i]);
}
return result;
}
inline const KernelData<Type,Simd> add(const KernelData<Type,Simd> vec) const noexcept
{
KernelData<Type,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] + vec.data[i];
}
return result;
}
inline const KernelData<Type,Simd> sub(const KernelData<Type,Simd> vec) const noexcept
{
KernelData<Type,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] - vec.data[i];
}
return result;
}
inline const KernelData<Type,Simd> div(const KernelData<Type,Simd> vec) const noexcept
{
KernelData<Type,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] / vec.data[i];
}
return result;
}
inline const KernelData<Type,Simd> div(const Type val) const noexcept
{
KernelData<Type,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] / val;
}
return result;
}
inline const KernelData<Type,Simd> mul(const KernelData<Type,Simd> vec) const noexcept
{
KernelData<Type,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] * vec.data[i];
}
return result;
}
// returns current vector * vec1 + vec2
inline const KernelData<Type,Simd> fusedMultiplyAdd(const KernelData<Type,Simd> vec1, const KernelData<Type,Simd> vec2) const noexcept
{
KernelData<Type,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] * vec1.data[i] + vec2.data[i];
}
return result;
}
// returns current vector * vec1 - vec2
inline const KernelData<Type,Simd> fusedMultiplySub(const KernelData<Type,Simd> vec1, const KernelData<Type,Simd> vec2) const noexcept
{
KernelData<Type,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] * vec1.data[i] - vec2.data[i];
}
return result;
}
inline const KernelData<Type,Simd> mul(const Type val) const noexcept
{
KernelData<Type,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] * val;
}
return result;
}
inline const KernelData<Type,Simd> modulus(const KernelData<Type,Simd> vec) const noexcept
{
KernelData<Type,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] % vec.data[i];
}
return result;
}
inline const KernelData<Type,Simd> modulus(const Type val) const noexcept
{
KernelData<Type,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] % val;
}
return result;
}
};
template<int mask>
struct KernelDataFactory
{
KernelDataFactory():width(mask)
{
}
template<typename Type>
inline
KernelData<Type,mask> generate() const
{
return KernelData<Type,mask>();
}
template<typename Type>
inline
KernelData<Type,mask> generate(const KernelData<Type,mask> & vec) const
{
return KernelData<Type,mask>(vec);
}
template<typename Type>
inline
KernelData<Type,mask> generate(const Type & val) const
{
return KernelData<Type,mask>(val);
}
const int width;
};
template<class...Args>
struct KernelArgs
{};
template<int SimdWidth, typename F, typename... Args>
class Kernel
{
public:
Kernel(F&& kernelPrm):kernel(std::move(kernelPrm))
{
}
void run(int n, Args... args)
{
const int nLoop = (n/SimdWidth);
const KernelDataFactory<SimdWidth> factory;
for(int i=0;i<nLoop;i++)
{
auto id = factory.template generate<int>();
id.idCompute(i*SimdWidth,[](const int prm){ return prm;});
kernel(factory, id, args...);
}
if((n/SimdWidth)*SimdWidth != n)
{
const KernelDataFactory<1> factoryLast;
const int m = n%SimdWidth;
for(int i=0;i<m;i++)
{
auto id = factoryLast.template generate<int>();
id.idCompute(nLoop*SimdWidth+i,[](const int prm){ return prm;});
kernel(factoryLast, id, args...);
}
}
}
private:
F kernel;
};
template<int SimdWidth, typename F, class...Args>
auto createKernel(F&& kernelPrm, KernelArgs<Args...> const& _prm_)
{
return Kernel<SimdWidth, F, Args...>(std::forward<F>(kernelPrm));
}
}
#endif /* VECTORIZEDKERNEL_H_ */
int main()
{
createImage();
return 0;
}
#include <stdint.h> // <cstdint> is preferred in C++, but stdint.h works.
#ifdef _MSC_VER
# include <intrin.h>
#else
# include <x86intrin.h>
#endif
// optional wrapper if you don't want to just use __rdtsc() everywhere
inline
uint64_t readTSC() {
// _mm_lfence(); // optionally wait for earlier insns to retire before reading the clock
uint64_t tsc = __rdtsc();
// _mm_lfence(); // optionally block later instructions until rdtsc retires
return tsc;
}
void createImage()
{
std::vector<int> img(height*width);
// compute single-thread "scalar" CPU
/*
for (size_t i = 0; i < height; i++)
{
for (size_t j = 0; j < width; j++)
{
img[j+i*width]= getPoint(i, j);
}
}
*/
// single-thread vectorized
constexpr int simd = 64;
auto kernel = Vectorization::createKernel<simd>([&](auto & factory, auto & idThread, int * img){
const auto j = idThread.modulus(width);
const auto i = idThread.div(width);
const int vecWidth = factory.width;
auto x0 = j.template cast<float>();
auto y0 = i.template cast<float>();
const auto heightDiv2 = factory.template generate<float>(height/2.0f);
const auto x = x0.sub(width/2.0f).sub(width/4.0f).div(width/3.0f);
const auto y = heightDiv2.sub(y0).div(width/3.0f);
const auto imagc = factory.template generate<float>(y);
const auto realc = factory.template generate<float>(x);
auto imagz = factory.template generate<float>(0);
auto realz = factory.template generate<float>(0);
// loop
bool anyTrue = true;
auto iteration = factory.template generate<int>(0);
const auto iterationLimit = factory.template generate<int>(35);
const auto one = factory.template generate<int>(1);
const auto zero = factory.template generate<int>(0);
const auto four = factory.template generate<float>(4.0f);
while(anyTrue)
{
// computing while loop condition start
const auto absLessThan2 = realz.mul(realz).add(imagz.mul(imagz)).lessThan(4.0f);
const auto whileLoopCondition = absLessThan2.logicalAnd(iteration.lessThanOrEquals(35));
anyTrue = whileLoopCondition.isAnyTrue();
// computing while loop condition end
// do complex multiplication z = z*z + c
const auto zzReal = realz.fusedMultiplySub(realz,imagz.mul(imagz));
const auto zzImag = realz.fusedMultiplyAdd(imagz,imagz.mul(realz));
// if a lane has completed work, do not modify it
realz = whileLoopCondition.ternary( zzReal.add(realc), realz);
imagz = whileLoopCondition.ternary( zzImag.add(imagc), imagz);
// increment iteration
iteration = iteration.add(whileLoopCondition.ternary(1,0)); // todo: ternary increment
}
const auto thirtyFour = factory.template generate<int>(34);
const auto ifLessThanThirtyFour = iteration.lessThan(thirtyFour);
const auto conditionalValue1 = iteration.mul(255).div(33);
const auto conditionalValue2 = factory.template generate<int>(0);
const auto returnValue = ifLessThanThirtyFour.ternary(conditionalValue1, conditionalValue2);
const auto writeAddr = j.add(i.mul(width));
returnValue.writeTo(img,writeAddr);
},Vectorization::KernelArgs<int*>{});
for(int i=0;i<10;i++)
{
auto t1 = readTSC();
kernel.run(width*height,img.data());
auto t2 = readTSC();
std::cout<<(t2-t1)/(width*height)<<" cycles per pixel"<<std::endl;
}
// create string
stringstream sstr;
sstr << "P3" << endl << width << " " << height << endl << 255 << endl;
for (size_t i = 0; i < height; i++)
{
for (size_t j = 0; j < width; j++)
{
sstr << img[j+i*width] << " 0 0" << endl;
}
}
// write to file at once
ofstream fout;
fout.open("mandelbrot.ppm");
if (fout.is_open())
{
cout << "File is opened!" << endl;
fout << sstr.str();
fout.close();
}
else
{
cout << "Could not open the file!" << endl;
}
}
int getPoint(int a, int b)
{
float x = static_cast<float>(b);
float y = static_cast<float>(a);
x = (x - width / 2 - width/4)/(width/3);
y = (height / 2 - y)/(width/3);
complex<float> c (x,y);
complex <float> z(0, 0);
size_t iter = 0;
while (abs(z) < 2 && iter <= 35)
{
z = z * z + c;
iter++;
}
if (iter < 34) return iter*255/33;
else return 0;
}
What I tried to make it produce any kind of fma instruction:
change -mprefer-vector-width=512 to 256 to 128
change -march=native to cascadelake to skylake-avx512
change -mavx512f to -mavx512bw
but all produced just add and mul instructions in vectorized form.
Is there an instrinsicless way to tell GCC or any other compiler to "produce fma for this line of code" by simple annotations (to let it use any instruction set SSE/AVX/AVX2/AVX512/... with readability)?
The specific function observed is this:
// returns current vector * vec1 + vec2
inline const KernelData<Type,Simd> fusedMultiplyAdd(const KernelData<Type,Simd> vec1, const KernelData<Type,Simd> vec2) const noexcept
{
KernelData<Type,Simd> result;
for(int i=0;i<Simd;i++)
{
result.data[i] = data[i] * vec1.data[i] + vec2.data[i];
}
return result;
}
The test code produces a file containing ppm formatted mandelbrot image. It does the computation in vectorized form, similar to CUDA/OpenCL and gives speedup of around 10x-16x (depends on godbolt server's load) and I think FMA can speed it up more.

sorting array of pointers in c++ using lambdas

I am trying to write sorting template function to make it work with custom classes.
My code is:
#include <iostream>
#include <vector>
struct test
{
int value;
test(int a) : value(a){};
void print() { printf("the value is : %d", value); };
};
template <class T>
void bubblesort(T *m_data, size_t size, bool (*cmp)(const T &l, const T &r))
{
for (uint32_t i = 0; i < size; i++)
for (uint32_t j = 1; j < size - i; j++)
if (cmp(m_data[j - 1], m_data[j]))
{
T temp = m_data[j];
m_data[j] = m_data[j - 1];
m_data[j - 1] = temp;
}
}
int main()
{
std::vector<test> arr;
for (size_t i = 0; i < 10; i++)
arr.emplace_back(i);
std::vector<test *> arr1;
for (auto &i : arr)
arr1.emplace_back(&i);
bubblesort<test>(&arr[0], arr.size(), [](const test &l, const test &r) { return l.value < r.value; });
// bubblesort<test*>(&arr1[0], arr1.size(), [](const test *&l, const test *&r) { return l->value < r->value; });
for (auto i : arr)
printf("%d\n", i.value);
}
My question is how do you sort arr1 using the bubblesort function above? What kind of modification do I have to make in my code to be able to do so?
uncommenting the bubblesort line gives error
error: invalid user-defined conversion from 'main()::<lambda(const test*&, const test*&)>' to 'bool (*)(test* const&, test* const&)' [-fpermissive]
[build] 48 | bubblesort<test *>(&arr1[0], arr1.size(), [](const test *&l, const test *&r) { return l->value < r->value; });
Your function has the wrong type; T is test*, so you need test* const& - "reference to const pointer to test" - as the error message says.
(const test*& is "reference to pointer to const test.)
Your solution cannot work with templates...it cannot deduce the parameters type.
Consider modifying your code as follow:
struct test
{
int value;
explicit test(int a) : value(a) {};
void print() { printf("the value is : %d", value); };
};
template <class T, class _cmp>
void bubblesort(T *m_data, size_t size, _cmp cmp)
{
for (uint32_t i = 0; i < size; i++)
for (uint32_t j = 1; j < size - i; j++)
if (cmp(m_data[j - 1], m_data[j]))
{
T temp = m_data[j];
m_data[j] = m_data[j - 1];
m_data[j - 1] = temp;
}
}
int main()
{
std::vector<test> arr;
for (int i = 0; i < 10; i++)
arr.emplace_back(i);
std::vector<test *> arr1;
for (auto i : arr)
arr1.emplace_back(&i);
bubblesort<test>(&arr[0], arr.size(), [](const test &l, const test &r) -> bool { return l.value < r.value; });
bubblesort<test*>(&arr1[0], arr1.size(), [](const test* l, const test* r) -> bool { return l->value < r->value; });
for (auto i : arr)
printf("%d\n", i.value);
}

array of unknown size as class member for making objects of array at runtime(object creating time)

I want to construct a class of an array whose size is not known,
i want to make objects of array whose size would be initialized at the time when i would create an object(runtime)
class array_class{
public:
int size_of_array;
int arr[size_of_array];
array_class(int p_size_of_array){ //constructor
this->size_of_array=p_size_of_array;
}
};
it say's error of invalid use of non static data member, what wrong am i doing(what should i have known)?.
You can't have something like
void foo(int n)
{
int vals[n];
}
in C++.
The thing is called variable-length array and it's supported in C99.
By the way, it is the only thing I know which is supported in C99 and not supported in C++17 :).
std::vector is a nice alternative.
If you know the size of an array at the compile time, you may use std::array.
I want to construct a class of an array whose size is not known
(what should i have known)?
You should have known that is not not possible in C++. The size of all classes is compile time constant.
i want to make objects of array whose size would be initialized at the time when i would create an object(runtime)
You need to allocate the array dynamically.
A simple solution:
struct array_class {
std::vector<int> arr;
array_class(int p_size_of_array) : arr(p_size_of_array) {}
};
#eerorika and #Nestor have good answers. To add to them here is a simple implementation of a vector i did some time ago you can take as a reference.
As you will see a basic way to have a variable length data structure, is to delete the underlying array and create a larger one if more or less memory is needed.
#pragma once
#include <ostream>
#include <string>
#include <initializer_list>
#include <stdexcept>
static constexpr size_t min_sz = 5;
template<typename T>
class Vector
{
public:
class ConstIterator;
class Iterator;
using value_type = T;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
using reference = value_type&;
using const_reference = const value_type &;
using pointer = value_type *;
using const_pointer = const value_type *;
using iterator = Vector::Iterator;
using const_iterator = Vector::ConstIterator;
/// constructors, destructor, assign
Vector()
{
allocEmpty(min_sz);
}
Vector(Vector<value_type> &vec)
{
allocEmpty(vec.max_sz);
for (size_type i = 0; i < vec.sz; ++i)
values[i] = vec.values[i];
sz = vec.sz;
}
Vector(size_type size)
{
allocEmpty(size);
}
Vector(const std::initializer_list<value_type> &list)
{
allocEmpty(list.size());
size_type i = 0;
for (const auto &val : list)
{
values[i] = val;
++i;
}
sz = list.size();
}
~Vector()
{
delete[] values;
}
Vector &operator=(const Vector &other)
{
reserve(other.sz);
for (size_type i = 0; i < other.sz; ++i)
values[i] = other.values[i];
sz = other.sz;
return *this;
}
/// element access
reference operator[](size_type position)
{
if (position >= sz)
throw std::out_of_range ("operator[] out of range: sz = " + std::to_string(sz) + " but position = " + std::to_string(position));
return values[position];
}
const_reference operator[](size_type position) const
{
if (position >= sz)
throw std::out_of_range ("operator[] out of range: sz = " + std::to_string(sz) + " but position = " + std::to_string(position));
return values[position];
}
const_reference front() const
{
if (sz == 0)
throw std::out_of_range ("front called on empty container");
return values[0];
}
const_reference back() const
{
if (sz == 0)
throw std::out_of_range ("back called on empty container");
return values[sz - 1];
}
pointer data()
{
if (sz > 0)
return values;
return nullptr;
}
const_pointer data() const
{
if (sz > 0)
return values;
return nullptr;
}
/// capacity
size_type size() const
{
return sz;
}
size_type capacity() const
{
return max_sz;
}
bool empty() const
{
return (sz == 0);
}
void reserve(size_type size)
{
if (size <= min_sz)
return;
value_type *newArray = new value_type[size];
for (size_type i = 0; i < sz; ++i)
newArray[i] = values[i];
delete[] values;
values = newArray;
max_sz = size;
}
void shrink_to_fit()
{
size_type newSize = (sz >= min_sz) ? sz : min_sz;
value_type *newArray = new value_type[newSize];
for (size_type i = 0; i < sz; ++i)
newArray[i] = values[i];
delete[] values;
values = newArray;
max_sz = newSize;
}
/// modifiers
void push_back(const value_type &value)
{
if (sz >= max_sz)
reserve(max_sz * 2);
values[sz] = value;
++sz;
}
void resize(size_type count)
{
reserve(count);
sz = count;
}
void resize(size_type count, const value_type &value)
{
reserve(count);
for (size_type i = sz; i < count; ++i)
values[i] = value;
sz = count;
}
void insert(size_type position, const value_type &value)
{
if (position > sz)
throw std::out_of_range ("insert out of range: sz = " + std::to_string(sz) + " but position = " + std::to_string(position));
if (sz >= max_sz)
reserve(max_sz * 2);
for (size_type i = sz; i > position; --i)
values[i] = values[i - 1];
values[position] = value;
++sz;
}
iterator insert(const_iterator pos, const_reference val)
{
auto diff = pos - begin();
if (diff < 0 || static_cast<size_type>(diff) > sz)
throw std::runtime_error("Iterator out of bounds");
size_type current = static_cast<size_type>(diff);
if (sz >= max_sz)
reserve(max_sz * 2);
for (size_t i = sz; i-->current;)
values[i + 1] = values[i];
values[current] = val;
++sz;
return iterator(values + current);
}
void erase(size_type position)
{
if (position >= sz)
throw std::out_of_range ("erase out of range: sz = " + std::to_string(sz) + " but position = " + std::to_string(position));
for (size_type i = position; i < sz - 1; ++i)
values[i] = values[i + 1];
--sz;
}
iterator erase(const_iterator pos)
{
auto diff = pos - begin();
if (diff < 0 || static_cast<size_type>(diff) >= sz)
throw std::runtime_error("Iterator out of bounds");
size_type current = static_cast<size_type>(diff);
for (size_type i = current; i < sz - 1; ++i)
values[i] = values[i + 1];
--sz;
return iterator(values + current);
}
void pop_back()
{
if (sz == 0)
throw std::out_of_range ("pop_back on empty container");
if (sz > 0) --sz;
}
void clear()
{
sz = 0;
}
/// iterators
iterator begin()
{
if (sz == 0)
return end();
return iterator(values);
}
iterator end()
{
return iterator(values + sz);
}
const_iterator begin() const
{
if (sz == 0)
return end();
return ConstIterator(values);
}
const_iterator end() const
{
return ConstIterator(values + sz);
}
/// private section
private:
void allocEmpty(size_type size)
{
auto newSize = (size > min_sz) ? size : min_sz;
sz = 0;
values = new value_type[newSize];
max_sz = newSize;
}
private:
value_type *values;
size_type sz;
size_type max_sz;
/// iterator implementations
public:
class Iterator
{
public:
using value_type = Vector::value_type;
using reference = Vector::reference;
using pointer = Vector::pointer;
using difference_type = Vector::difference_type;
using iterator_category = std::forward_iterator_tag;
public:
Iterator()
{
ptr = nullptr;
}
Iterator(pointer ptr)
{
this->ptr = ptr;
}
reference operator*() const
{
return *ptr;
}
pointer operator->() const
{
return ptr;
}
iterator& operator++()
{
++ptr;
return *this;
}
iterator operator++(int)
{
iterator it = *this;
++ptr;
return it;
}
iterator operator+ (difference_type difference) const
{
return iterator(ptr + difference);
}
bool operator==(const const_iterator &it) const
{
return it == ptr;
}
bool operator!=(const const_iterator &it) const
{
return it != ptr;
}
operator const_iterator() const
{
return const_iterator(ptr);
}
private:
pointer ptr;
};
class ConstIterator
{
public:
using value_type = Vector::value_type;
using reference = Vector::const_reference;
using pointer = Vector::const_pointer;
using difference_type = Vector::difference_type;
using iterator_category = std::forward_iterator_tag;
public:
ConstIterator()
{
ptr = nullptr;
}
ConstIterator(pointer ptr)
{
this->ptr = ptr;
}
reference operator*() const
{
return *ptr;
}
pointer operator->() const
{
return ptr;
}
const_iterator& operator++()
{
++ptr;
return *this;
}
const_iterator operator++(int)
{
const_iterator it = *this;
++ptr;
return it;
}
bool operator==(const const_iterator &it) const
{
return it.ptr == ptr;
}
bool operator!=(const const_iterator &it) const
{
return it.ptr != ptr;
}
Vector::difference_type operator-(const const_iterator &rop)
{
return ptr - rop.ptr;
}
private:
pointer ptr;
};
};
/// non-member functions
template<typename T>
bool operator==(const Vector<T> &lop, const Vector<T> &rop)
{
if (lop.size() != rop.size()) return false;
for (size_t i = 0; i < lop.size(); ++i)
{
if (lop[i] != rop[i])
return false;
}
return true;
}
template<typename T>
std::ostream& operator<<(std::ostream &out, const Vector<T> &vec)
{
out << '[';
for (size_t i = 0; i < vec.size(); ++i)
{
if (i > 0) out << ", ";
out << vec[i];
}
out << ']';
return out;
}

c++ generate (xyz) points in range

Is there a nicer way to generate a list of points like than this? Libraries wise I'm open to any Eigen based method.
auto it = voxels.begin();
for(auto i = -180; i < 90; i++) {
for(auto j = -80; j < 70; j++) {
for(auto k = 20; k < 460; k++) {
*it = (Point3(i,j,k));
it++;
}
}
}
There's an immediate way to improve performance, by reserving enough space in the vector before you fill it with values.
There are many 'nicer' ways of doing it depending on what you think is nice.
Here's one way:
std::vector<Point3> populate()
{
// (arguable) maintainability benefit
constexpr auto I = axis_limits(-180, 90);
constexpr auto J = axis_limits(-80, 70);
constexpr auto K = axis_limits(20, 460);
// pre-reserve the space
std::vector<Point3> voxels;
voxels.reserve(volume(I, J, K));
// although it looks like it might be more work for the compiler, it gets optimised
// there is no loss of performance
for(i : I)
for(j : J)
for(k : J)
voxels.emplace_back(i, j, k);
return voxels;
}
Which will rely on the following infrastructure code:
struct Point3 {
Point3(int, int, int) {}
};
struct int_generator {
int_generator(int v)
: _v(v)
{}
int operator*() const {
return _v;
}
int_generator& operator++() {
++_v;
return *this;
}
bool operator!=(const int_generator& rhs) const {
return _v != rhs._v;
}
private:
int _v;
};
struct axis_limits : std::tuple<int, int>
{
using std::tuple<int, int>::tuple;
int_generator begin() const {
return std::get<0>(*this);
}
int_generator end() const {
return std::get<1>(*this);
}
};
constexpr int lower(const axis_limits& t)
{
return std::get<0>(t);
}
constexpr int upper(const axis_limits& t)
{
return std::get<1>(t);
}
int_generator begin(const axis_limits& t)
{
return std::get<0>(t);
}
int_generator end(const axis_limits& t)
{
return std::get<1>(t);
}
constexpr int volume(const axis_limits& x, const axis_limits& y, const axis_limits& z)
{
return (upper(x) - lower(x))
* (upper(y) - lower(y))
* (upper(z) - lower(z));
}