Related
I am implementing a kind of dataframe and I want to define a RandomAccessIterator over it, in order to execute the different std algorithms, such as the sorting one. The dataframe of the example contains two column "a" and "b":
a; b;
20; 21;
20; 19;
10; 11;
40; 41;
10; 11;
After sorting with a trivial selection sort this is the result:
a; b;
10; 11;
10; 11;
20; 19;
20; 21;
40; 41;
The problem that I am facing is that the std::sort does not work properly. And I don't know weather the implementation of the iterator is sound or not.
This is the code.
File: dataframe.hpp
#pragma once
#include <iostream>
#include <charconv>
#include <vector>
#include <memory>
#include <cstring>
#include <numeric>
#include "iterator.hpp"
namespace df
{
class Record;
class Column;
class Dataframe;
namespace types
{
enum class Base : char
{
CHAR = 'A',
UNSIGNED = 'U',
// Other types..
};
class Dtype
{
public:
Dtype(types::Base base, std::size_t size) : m_base_dtype{base}, m_size{size} {}
[[nodiscard]] auto name() const
{
return std::string{static_cast<char>(m_base_dtype)} + std::to_string(m_size);
}
[[nodiscard]] auto base() const { return m_base_dtype; }
[[nodiscard]] auto size() const { return m_size; }
[[nodiscard]] auto is_primitive() const
{
switch (base())
{
case types::Base::CHAR:
return size() == 1;
case types::Base::UNSIGNED:
return size() == 1 or size() == 2 or size() == 4 or size() == 8;
}
return false;
}
private:
types::Base m_base_dtype;
std::size_t m_size;
};
[[nodiscard]] static auto CHAR(const std::size_t size) { return Dtype(types::Base::CHAR, size); }
[[nodiscard]] static auto UNSIGNED(const std::size_t size) { return Dtype(types::Base::UNSIGNED, size); }
}
class Column
{
public:
Column(std::vector<char> &raw, const types::Dtype dtype) : m_raw{std::move(raw)}, m_dtype{dtype} {}
Column &operator=(Column &&c) = default; // Move constructor
[[nodiscard]] const auto &dtype() const { return m_dtype; }
[[nodiscard]] auto &raw() { return m_raw; }
[[nodiscard]] const auto &raw() const { return m_raw; }
[[nodiscard]] auto *data() { return m_raw.data(); }
[[nodiscard]] const auto *data() const { return m_raw.data(); }
private:
std::vector<char> m_raw;
types::Dtype m_dtype;
};
class Dataframe
{
public:
Dataframe(std::vector<char> &raw, std::vector<std::string> names, std::vector<types::Dtype> dtypes)
{
m_raw = std::move(raw);
m_column_dtypes = dtypes;
m_column_names = names;
m_record_size = 0;
for (const auto dt : dtypes)
{
m_column_offsets.emplace_back(m_record_size);
m_record_size += dt.size();
}
m_record_count = m_raw.size() / m_record_size;
}
Dataframe(std::vector<char> &raw, std::vector<types::Dtype> dtypes) : Dataframe(raw, {}, dtypes) {}
Dataframe &operator=(Dataframe &&c) = default; // Move constructor
[[nodiscard]] auto &raw() { return m_raw; }
[[nodiscard]] const auto &raw() const { return m_raw; }
[[nodiscard]] auto *data() { return m_raw.data(); }
[[nodiscard]] const auto *data() const { return m_raw.data(); }
// Iterators
[[nodiscard]] df::Iterator begin()
{
return df::Iterator{m_raw.data(), m_record_size};
}
[[nodiscard]] df::Iterator end()
{
return df::Iterator{m_raw.data() + m_raw.size(), m_record_size};
}
[[nodiscard]] auto shape() const { return std::make_pair(m_record_count, m_column_dtypes.size()); }
[[nodiscard]] auto record_count() const { return m_record_count; }
[[nodiscard]] auto record_size() const { return m_record_size; }
[[nodiscard]] const auto &names() const { return m_column_names; }
[[nodiscard]] const auto &dtypes() const { return m_column_dtypes; }
[[nodiscard]] const auto &offsets() const { return m_column_offsets; }
void print() { print(m_record_count); }
void print(const std::size_t initial_records)
{
// Print header
for (auto column_name : m_column_names)
{
std::cout << column_name << "; ";
}
std::cout << std::endl;
// Print rows
std::size_t records_to_print = std::min(initial_records, m_record_count);
for (std::size_t i = 0; i < records_to_print; i++)
{
const auto start_p = i * record_size();
auto start_field = 0;
auto end_field = 0;
for (auto field : m_column_dtypes)
{
end_field += field.size();
switch (field.base())
{
case types::Base::UNSIGNED:
{
std::uint64_t uint_value = 0;
memcpy(&uint_value, m_raw.data() + start_p + start_field, field.size());
std::cout << uint_value;
break;
}
case types::Base::CHAR:
{
std::string str_value = std::string(m_raw.data() + start_p + start_field, field.size());
std::cout << str_value;
break;
}
}
start_field = end_field;
// New column
std::cout << "; ";
}
// New row
std::cout << std::endl;
}
}
std::shared_ptr<Dataframe> copy() const
{
auto x = std::vector<char>(m_raw);
return std::make_shared<Dataframe>(x, std::vector<std::string>(m_column_names), std::vector<types::Dtype>(m_column_dtypes));
}
private:
std::vector<char> m_raw = {};
std::vector<std::string> m_column_names = {};
std::vector<types::Dtype> m_column_dtypes = {};
std::vector<std::size_t> m_column_offsets = {};
std::size_t m_record_size = {};
std::size_t m_record_count = {};
};
using namespace types;
static std::shared_ptr<Dataframe> read_from_vector(const std::vector<std::vector<std::string>> values, const std::vector<std::string> names, const std::vector<Dtype> dtypes)
{
const auto record_size = std::accumulate(dtypes.begin(), dtypes.end(), std::size_t{0},
[](std::size_t accum, const auto &m)
{ return accum + m.size(); });
const auto total_size = values.size() * record_size;
const std::size_t INCR_RECORDS = std::max(total_size / (10 * record_size), std::size_t{65536});
auto raw = std::vector<char>{};
std::size_t written_records = 0;
auto offsets = std::vector<std::size_t>{};
for (int offset = 0; const auto &kd : dtypes)
{
offsets.push_back(offset);
offset += kd.size();
}
for (auto value : values)
{
if (written_records >= raw.size() / record_size)
{
raw.resize(raw.size() + INCR_RECORDS * record_size, char{' '});
}
for (int i = 0; i < names.size(); i++)
{
const auto name = names[i];
const auto dtype = dtypes[i];
const auto offset = offsets[i];
const auto pos = written_records * record_size + offset;
switch (dtype.base())
{
case df::Base::CHAR:
{
const auto v = value[i];
const auto byte_to_copy = std::min(v.size(), dtype.size());
std::memcpy(raw.data() + pos,
v.data() + v.size() - byte_to_copy, byte_to_copy); // Prendo gli ultimi byte
break;
}
case df::Base::UNSIGNED:
{
const auto v = std::stoull(value[i]);
const auto byte_to_copy = dtype.size();
std::memcpy(raw.data() + pos, &v, byte_to_copy); // Prendo gli ultimi byte
break;
}
default:
throw std::runtime_error("ColumnType non riconosciuto");
}
}
written_records++;
}
raw.resize(written_records * record_size);
raw.shrink_to_fit();
return std::make_shared<Dataframe>(raw, names, dtypes);
}
}
File: iterator.hpp
#pragma once
#include <iostream>
#include <cstring>
namespace df
{
class Iterator
{
std::size_t size;
char *ptr;
public:
struct record_reference;
struct record_value
{
std::size_t size;
char *ptr;
record_value(const record_reference &t) : record_value(t.size, t.ptr){};
record_value(const std::size_t m_size, char *m_ptr)
{
this->size = m_size;
this->ptr = new char[this->size];
std::memcpy(ptr, m_ptr, this->size);
}
~record_value()
{
delete[] this->ptr;
}
};
struct record_reference
{
std::size_t size;
char *ptr;
record_reference(const std::size_t m_size, char *m_ptr)
{
this->size = m_size;
this->ptr = m_ptr;
}
record_reference(const record_reference &t)
{
this->size = t.size;
this->ptr = t.ptr;
}
// record_reference(const record_value &t) : record_reference(t.size, t.ptr) {};
record_reference &operator=(const record_value &t)
{
std::memcpy(ptr, t.ptr, size);
return *this;
}
record_reference &operator=(const record_reference &t)
{
std::memcpy(ptr, t.ptr, size);
return *this;
}
record_reference &operator=(char *t)
{
std::memcpy(ptr, t, size);
return *this;
}
operator char *()
{
return ptr;
}
operator const char *() const { return ptr; }
};
using iterator_category = std::random_access_iterator_tag;
using value_type = record_value;
using reference = record_reference;
using difference_type = std::ptrdiff_t;
// default constructible
Iterator() : size(0), ptr(nullptr)
{
}
// copy assignable
Iterator &operator=(const Iterator &t)
{
size = t.size;
ptr = t.ptr;
return *this;
}
Iterator(char *ptr, const std::size_t size) : size{size}, ptr(ptr)
{
}
record_reference operator*() const
{
return {size, ptr};
}
// Prefix
Iterator &operator++()
{
ptr += size;
return *this;
}
// Postfix
Iterator operator++(int)
{
auto tmp = *this;
++*this;
return tmp;
}
Iterator &operator--()
{
ptr -= size;
return *this;
}
difference_type operator-(const Iterator &it) const
{
return (this->ptr - it.ptr) / size;
}
Iterator operator+(const difference_type &offset) const
{
return Iterator(ptr + offset * size, size);
}
friend Iterator operator+(const difference_type &diff, const Iterator &it)
{
return it + diff;
}
Iterator operator-(const difference_type &diff) const
{
return Iterator(ptr - diff * size, size);
}
reference operator[](const difference_type &offset) const
{
return {size, ptr + offset * size};
}
bool operator==(const Iterator &it) const
{
return this->ptr == it.ptr;
}
bool operator!=(const Iterator &it) const
{
return !(*this == it);
}
bool operator<(const Iterator &it) const
{
return this->ptr < it.ptr;
}
bool operator>=(const Iterator &it) const
{
return this->ptr >= it.ptr;
}
bool operator>(const Iterator &it) const
{
return this->ptr > it.ptr;
}
bool operator<=(const Iterator &it) const
{
return this->ptr <= it.ptr;
}
Iterator &operator+=(const difference_type &diff)
{
ptr += diff * size;
return *this;
}
operator Iterator() const
{
return Iterator(ptr, size);
}
};
void swap(df::Iterator::record_reference a, df::Iterator::record_reference b)
{
unsigned char *p;
unsigned char *q;
unsigned char *const sentry = (unsigned char *)a.ptr + a.size;
for (p = (unsigned char *)a.ptr, q = (unsigned char *)b.ptr; p < sentry; ++p, ++q)
{
const unsigned char t = *p;
*p = *q;
*q = t;
}
}
}
File: comparator.hpp
#pragma once
#include <memory>
#include <functional>
#include "dataframe.hpp"
#include "iterator.hpp"
namespace compare
{
using comparator_fn = std::function<int(const df::Iterator::record_reference, const df::Iterator::record_reference)>;
template <typename T, std::size_t offset = 0, std::size_t size = sizeof(T)>
static inline comparator_fn make_comparator()
{
if constexpr (size == 3 or size == 5 or size == 7 or size > 8)
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, size); };
return [](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return *(T *)(a + offset) < *(T *)(b + offset) ? -1 : *(T *)(b + offset) < *(T *)(a + offset) ? +1
: 0; };
}
template <typename T>
static inline comparator_fn make_comparator(const std::size_t offset)
{
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return *(T *)(a + offset) < *(T *)(b + offset) ? -1 : *(T *)(b + offset) < *(T *)(a + offset) ? +1
: 0; };
}
static inline comparator_fn make_column_comparator(const df::Dtype dtype, const std::size_t offset)
{
switch (dtype.base())
{
case df::Base::CHAR:
{
if (dtype.size() == 1)
return make_comparator<std::uint8_t>(offset);
else if (dtype.size() == 2)
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, 2); }; // C'� qualche beneficio a fissare il 2? o conviene trattarlo come uno unsigned short?
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, dtype.size()); };
}
case df::Base::UNSIGNED:
{
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{
std::uint64_t uint_value_a = 0;
std::uint64_t uint_value_b = 0;
std::memcpy(&uint_value_a, a + offset, dtype.size());
std::memcpy(&uint_value_b, b + offset, dtype.size());
return (uint_value_a < uint_value_b ? -1 : uint_value_a > uint_value_b ? +1
: 0);
};
}
default:
throw std::runtime_error("Unsupported dtype");
break;
}
}
static inline comparator_fn make_composite_two_way_comparator(const std::shared_ptr<df::Dataframe> &T)
{
const auto K = T->dtypes().size();
std::vector<comparator_fn> F;
for (int i = 0; i < K; i++)
{
F.emplace_back(make_column_comparator(T->dtypes()[i], T->offsets()[i]));
}
const auto comparator = [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{
for (int i = 0; i < K; i++)
{
// If equal go to the next column, otherwise return the result
// The return value is true if the first argument is less than the second
// and false otherwise
if (const auto result = F[i](a, b); result != 0)
return result < 0;
}
return false;
};
return comparator;
}
}
File: main.cpp
#include <iostream>
#include <vector>
#include "dataframe.hpp"
#include "comparator.hpp"
template <typename RandomAccessIterator, typename Comparator>
static void selection_sort(RandomAccessIterator first, RandomAccessIterator last, Comparator comp)
{
for (auto i = first; i != last; ++i)
{
auto min = i;
for (auto j = i + 1; j != last; ++j)
{
if (comp(*j, *min))
min = j;
}
df::Iterator::value_type temp = *i;
*i = *min;
*min = temp;
// Alternative
// std::iter_swap(i, min);
}
}
int main(int argc, char const *argv[])
{
std::vector<std::string> values{"20", "21", "20", "19", "10", "11", "40", "41", "10", "11"};
// Create a vector that contains values grouped by 2
std::vector<std::vector<std::string>> v;
for (int i = 0; i < values.size(); i += 2)
{
std::vector<std::string> temp;
temp.push_back(values[i]);
temp.push_back(values[i + 1]);
v.push_back(temp);
}
std::vector<std::string> column_names = {"a", "b"};
df::Dtype d = df::Dtype(df::Base::UNSIGNED, 4);
std::vector dtypes = {d, d};
// Create a dataframe
std::shared_ptr<df::Dataframe> df = df::read_from_vector(v, column_names, dtypes);
std::cout << "Before sorting" << std::endl;
df->print();
// This comparator sorts the dataframe first by column a and then by column b in ascending order
auto comparator = compare::make_composite_two_way_comparator(df);
selection_sort(df->begin(), df->end(), comparator);
std::cout << "\nAfter sorting" << std::endl;
df->print();
// With the std::sort it does not work
std::sort(df->begin(), df->end(), comparator);
return 0;
}
Your type is not a C++17 RandomAccessIterator, because it isn't a C++17 ForwardIterator, because reference is an object type, not a reference type.
The type It satisfies ForwardIterator if
Let T be the value type of It. The type std::iterator_traits<It>::reference must be either
T& or T&& if It satisfies OutputIterator (It is mutable), or
const T& or const T&& otherwise (It is constant),
(Other requirements elided)
You will be able to satisfy the C++20 concept std::random_access_iterator, because that relaxes the requirement on It::reference.
In C++17, the reference type of an iterator must be precisely value_type& in order for that iterator to be random access. Only input iterators can have the reference type be something other than value_type&. So in C++17, proxy iterators are limited to input iterators. And every algorithm written against C++17 has this expectation.
The C++20 ranges library adds the ability to have random access proxy iterators. And the C++20 algorithms that use those range concepts will respect them.
I tried to create a template for a generic 2D array, which would allow me to create arrays of different data types and fill them with random values. My current code seemingly creates three objects, but fills them with the same random values, and the double type massive does not even have double values. I think the problem is somewhere in the constructor and with the pointers, but I am not sure how to fix this.
My class:
template <typename T>
class MATRIX
{
private:
T** M;
int m = 8;
int n = 12;
public:
MATRIX(T matr[8][12]);
void fill();
};
Constructor:
template <typename T>
MATRIX<T>::MATRIX(T matr[8][12]){
M = (T**) new T*[m];
for (int i = 0; i < m; i++)
M[i] = (T*)new T[n];
}
Method:
template <typename T>
void MATRIX<T>::fill(){
T randomNumber;
srand((unsigned) time(0));
for (int i = 0; i < m; i++){
for (int j = 0; j < n; j++){
randomNumber = (rand() % (122 + 1 - 65)) + 65;
M[i][j] = randomNumber;} } }
Main:
int main() {
int intMatr[8][12];
MATRIX<int>a(intMatr);
a.fill();
double doubleMatr[8][12];
MATRIX<double>b(doubleMatr);
b.fill();
char charMatr[8][12];
MATRIX<char>c(charMatr);
c.fill();
return 0; }
Not really a direct answer to your question, howeverr try not to use new/delete if you don't have to as is shown in this example (note the array2d_t class is something I wrote earlier and reused for this example so it can do a bit more then needed)
I also show how to use c++'s random generators to create characters for your matrix.
#pragma once
#include <vector>
#include <iostream>
#include <random>
#include <utility>
#include <string>
#include <stdexcept>
//---------------------------------------------------------------------------------------------------------------------
template<typename type_t, std::size_t rows_v, std::size_t cols_v>
struct array2d_t
{
constexpr array2d_t() :
m_values{}
{
}
constexpr explicit array2d_t(const type_t(&values)[rows_v][cols_v])
{
// constexpr compatible initialization
for (auto row = 0; row < rows_v; ++row)
{
for (auto col = 0; col < cols_v; ++col)
{
m_values[row][col] = values[row][col];
}
}
}
~array2d_t() = default;
// return a row
constexpr auto operator[](const std::size_t row)
{
//assert(row < rows_v);
if (row >= rows_v) throw std::invalid_argument("row out of bounds");
return row_t(&m_values[row][0]);
}
// return a const row
constexpr auto operator[](const std::size_t row) const
{
//assert(row < rows_v);
if (row >= rows_v) throw std::invalid_argument("row out of bounds");
return const_row_t(&m_values[row][0]);
}
// return iterator to the first row (so array can be used in range based for loop)
constexpr auto begin()
{
return std::begin(m_values);
}
// return iterator to the last row (so array can be used in range based for loop)
constexpr auto end()
{
return std::end(m_values);
}
constexpr std::size_t rows() const
{
return rows_v;
}
constexpr std::size_t columns() const
{
return cols_v;
}
private:
//-----------------------------------------------------------------------------------------------------------------
/// row helper
struct row_t
{
constexpr row_t(type_t* row) :
m_row{ row }
{
}
constexpr type_t& operator[](const std::size_t column) const
{
//assert(column < cols_v);
if (column >= cols_v) throw std::invalid_argument("column out of bounds");
return m_row[column];
}
constexpr auto begin() const
{
return std::begin(m_row);
}
constexpr auto end() const
{
return begin() + rows_v;
}
private:
type_t* m_row;
};
//-----------------------------------------------------------------------------------------------------------------
// row helper for const
struct const_row_t
{
constexpr const_row_t(const type_t* row) :
m_row{ row }
{
}
constexpr const type_t& operator[](const std::size_t column) const
{
//assert(column < cols_v);
if (column >= cols_v) throw std::invalid_argument("column out of bounds");
return m_row[column];
}
constexpr auto begin() const
{
return std::begin(m_row);
}
constexpr auto end() const
{
return begin() + rows_v;
}
private:
const type_t* m_row;
};
type_t m_values[rows_v][cols_v];
};
template<typename type_t, std::size_t rows_v, std::size_t cols_v>
std::ostream& operator<<(std::ostream& os, array2d_t<type_t,rows_v,cols_v>& arr)
{
for (const auto& row : arr)
{
bool comma = false;
for (const auto& value : row)
{
if (comma) std::cout << ", ";
std::cout << value;
comma = true;
}
std::cout << "\n";
}
std::cout << "\n";
return os;
}
//---------------------------------------------------------------------------------------------------------------------
class MATRIX :
public array2d_t<char, 8, 12>
{
public:
void fill()
{
// initialize a vector of valid character for random to pick from
// static ensures this is only done on first call to function
static std::vector<char> valid_chars = []
{
std::vector<char> chars;
chars.reserve(52);
for (char c = 'A'; c < 'Z'; ++c) chars.push_back(c);
for (char c = 'a'; c < 'z'; ++c) chars.push_back(c);
return chars;
}();
// this is how to setup random number generation in C++
static std::random_device rd{};
static std::default_random_engine random{ rd() };
static std::uniform_int_distribution<std::size_t> distribution(0, valid_chars.size() - 1);
for (auto& row : *this)
{
for (auto& value : row)
{
value = valid_chars[distribution(random)];
}
}
}
};
//---------------------------------------------------------------------------------------------------------------------
int main()
{
MATRIX m;
m.fill();
std::cout << m;
return 0;
}
Following the question in Pointer to portions of array, a structure that does operations with portions of an array was proposed.
I would like to request one further question within this issue.
I would like to create a structure for blockMatrices using std::vector and would require to change the implementation of the structure for getting a 3x3 matrix out of a 4x4 matrix.
The current test case is:
#include <vector>
#include <array>
#include <iostream>
// define matrix 4x4
typedef std::array<double, 16> matrix4;
// define matrix 3x3
typedef std::array<double, 9> matrix3;
// get 3x3 matrix out of a 4x4 matrix
struct subMat
{
matrix4& matrix_;
const double& operator[](size_t index) const
{
static size_t mapping[] = {0, 1, 2, 4, 5, 6, 8, 9, 10};
return matrix_[mapping[index]];
}
subMat (matrix4& A): matrix_(A){}
};
template <typename T>
double sum_of_elements(const T& arr)
{
double res = 0;
for (int i=0;i < 9; ++i)
{
res += arr[i];
}
return res;
}
int main(int argCount, char *args[])
{
std::vector<matrix4> myBlockMatrix(5);
for (int i=0; i < myBlockMatrix.size(); i++)
{
for (int j = 0; j<myBlockMatrix[0].size(); j++)
{
myBlockMatrix[i][j] = i*j;
}
}
for (int i = 0; i<myBlockMatrix.size(); i++)
{
std::cout << sum_of_elements(subMat(myBlockMatrix[i])) << std::endl; // this works
}
subBlockMatrix subBlock (myBlockMatrix);
for (int i = 0; i<myBlockMatrix.size(); i++)
{
std::cout << sum_of_elements(subBlock[i])) << std::endl;
}
return 0;
}
For overloading the [] operator, I have:
struct subBlockMatrix : std::vector<matrix4>
{
std::vector<matrix4>& blockMatrix_;
const matrix4& operator[](std::size_t index) const
{
static size_t mapping[] = {0, 1, 2, 4, 5, 6, 8, 9, 10};
return blockMatrix_[mapping[index]];
}
subBlockMatrix(std::vector<matrix4>& A) : blockMatrix_(A) {}
};
But this does not work...
I am having difficulty understanding how to make it work and would really appreciate the help!
Best Regards
I'd start with a matrix view, which starts with an array view.
#include <vector>
#include <iostream>
template<class T, class Size=std::size_t, class Stride=Size>
struct array_view {
array_view( T* start, Size sz, Stride s ):
b(start), length(sz), stride(s) {}
array_view( T* start, Size sz = {} ):
b(start), length(sz), stride(sz) {}
array_view() = default;
array_view( T* start, T* finish ):
array_view(start, finish-start) {}
T* begin() const { return b; }
T* end() const { return b+length; }
std::size_t size() const { return length; }
bool empty() const { return size() == 0; }
explicit operator bool() const { return b; }
T& operator[]( std::size_t i ) const { return begin()[i]; }
array_view& operator++() {
*this += 1;
return *this;
}
array_view operator++(int)& {
auto self = *this;
++this;
return self;
}
array_view& operator--() {
*this -= 1;
return *this;
}
array_view operator--(int)& {
auto self = *this;
--this;
return self;
}
array_view& operator+=(std::ptrdiff_t delta)&{
b+=delta*length;
return *this;
}
array_view& operator-=(std::ptrdiff_t delta)&{
b-=delta*length;
return *this;
}
friend array_view operator+(array_view self, std::ptrdiff_t delta) {
self += delta;
return self;
}
friend array_view operator-(array_view self, std::ptrdiff_t delta) {
self -= delta;
return self;
}
friend array_view operator+(std::ptrdiff_t delta, array_view self) {
self += delta;
return self;
}
friend array_view operator-(std::ptrdiff_t delta, array_view self) {
self -= delta;
return self;
}
// checks address, not contents
friend bool operator==(array_view const& lhs, array_view const& rhs) {
return lhs.b == rhs.b && lhs.length == rhs.length && lhs.stride == rhs.stride;
}
friend bool operator!=(array_view const& lhs, array_view const& rhs) {
return !(lhs==rhs);
}
private:
T* b = nullptr;
Size length = {};
Stride stride = {};
};
then we get a matrix_view. First we write an index_iterator for an iterator wrapper around something that is a value (like a sequence of rows or columns, or an integer):
template<class V>
struct index_iterator {
index_iterator(V v):value(v) {}
V const& operator*() const& { return value; }
V& operator*()& { return value; }
V operator*()&& { return std::move(value); }
V* operator->() { return std::addressof(value); }
V const* operator->() const { return std::addressof(value); }
friend bool operator==(index_iterator const& lhs, index_iterator const& rhs ) {
return lhs.value==rhs.value;
}
friend bool operator!=(index_iterator const& lhs, index_iterator const& rhs ) {
return lhs.value!=rhs.value;
}
index_iterator& operator++() {
++value;
return *this;
}
index_iterator operator++(int)& {
auto self = *this;
++value;
return self;
}
index_iterator& operator--() {
--value;
return *this;
}
index_iterator operator--(int)& {
auto self = *this;
--value;
return self;
}
index_iterator& operator+=( std::ptrdiff_t delta )& {
value += delta;
return *this;
}
index_iterator& operator-=( std::ptrdiff_t delta )& {
value -= delta;
return *this;
}
friend index_iterator operator+(index_iterator self, std::ptrdiff_t delta) {
self += delta;
return self;
}
friend index_iterator operator-(index_iterator self, std::ptrdiff_t delta) {
self -= delta;
return self;
}
friend index_iterator operator+(std::ptrdiff_t delta, index_iterator self) {
self += delta;
return self;
}
friend index_iterator operator-(std::ptrdiff_t delta, index_iterator self) {
self -= delta;
return self;
}
V operator[](std::size_t i)const {
return *((*this) + i);
}
private:
V value = {};
};
template<class T, class M=std::size_t, class Stride=M, class N=M>
struct matrix_view {
using row_view = array_view<T, N, Stride>;
matrix_view( T* start, M cols={}, Stride stride = {}, N rows={} ):
b(start), m(cols), s(stride), n(rows)
{}
matrix_view() = default;
index_iterator<row_view> begin() const { return {{b, n, s}}; }
index_iterator<row_view> end() const { return begin()+m; }
std::size_t size() const { return m; }
bool empty() const { return size() == 0; }
explicit operator bool() const { return b; }
row_view operator[]( std::size_t i ) const { return begin()[i]; }
private:
T* b = nullptr;
M m = {};
Stride s = {};
N n = {};
};
template<std::size_t N>
using size = std::integral_constant<std::size_t, N>;
int main(){
std::vector<int> v {00,01,02,03, 10,11,12,13, 20,21,22,23, 30,31,32,33};
matrix_view<int, size<3>, size<4>> m = v.data()+5;
for (auto col:m) {
for (auto e:col) {
std::cout << e << ",";
}
std::cout << "\n";
}
}
and then clean it up with some CRTP DRY and EBO.
But that is just me.
Live example.
Say I have a nest for loop like
for (int x = xstart; x < xend; x++){
for (int y = ystart; y < yend; y++){
for (int z = zstart; z < zend; z++){
function_doing_stuff(std::make_tuple(x, y, z));
}
}
}
and would like to transform it into
MyRange range(xstart,xend,ystart,yend, zstart,zend);
for (auto point : range){
function_doing_stuff(point);
}
How would I write the MyRange class to be as efficient as the nested for loops?
The motivation for this is to be able to use std algorithms (such as transform, accumulate, etc), and to create code that is largely dimension agnostic.
By having an iterator, it would be easy to create templated functions that operate over a range of 1d, 2d or 3d points.
Code base is currently C++14.
EDIT:
Writing clear questions is hard. I'll try to clarify.
My problem is not writing an iterator, that I can do. Instead, the problem is one of performance: Is it possible to make an iterator that is as fast as the nested for loops?
With range/v3, you may do
auto xs = ranges::view::iota(xstart, xend);
auto ys = ranges::view::iota(ystart, yend);
auto zs = ranges::view::iota(zstart, zend);
for (const auto& point : ranges::view::cartesian_product(xs, ys, zs)){
function_doing_stuff(point);
}
You can introduce your own class as
class myClass {
public:
myClass (int x, int y, int z):m_x(x) , m_y(y), m_z(z){};
private:
int m_x, m_y, m_z;
}
and then initialize a std::vector<myClass> with your triple loop
std::vector<myClass> myVec;
myVec.reserve((xend-xstart)*(yend-ystart)*(zend-zstart)); // alloc memory only once;
for (int x = ystart; x < xend; x++){
for (int y = xstart; y < yend; y++){ // I assume you have a copy paste error here
for (int z = zstart; z < zend; z++){
myVec.push_back({x,y,z})
}
}
}
Finally, you can use all the nice std algorithms with the std::vector<myClass> myVec. With the syntactic sugar
using MyRange = std::vector<MyClass>;
and
MyRange makeMyRange(int xstart, int xend, int ystart, int yend, int zstart,int zend) {
MyRange myVec;
// loop from above
return MyRange;
}
you can write
const MyRange range = makeMyRange(xstart, xend, ystart, yend, zstart, zend);
for (auto point : range){
function_doing_stuff(point);
}
With the new move semantics this wont create unneeded copies. Please note, that the interface to this function is rather bad. Perhaps rather use 3 pairs of int, denoting the x,y,z interval.
Perhaps you change the names to something meaningful (e.g.myClass could be Point).
Another option, which directly transplants whatever looping code, is to use a Coroutine. This emulates yield from Python or C#.
using point = std::tuple<int, int, int>;
using coro = boost::coroutines::asymmetric_coroutine<point>;
coro::pull_type points(
[&](coro::push_type& yield){
for (int x = xstart; x < xend; x++){
for (int y = ystart; y < yend; y++){
for (int z = zstart; z < zend; z++){
yield(std::make_tuple(x, y, z));
}
}
}
});
for(auto p : points)
function_doing_stuff(p);
Since you care about performance, you should forget about combining iterators for the foreseeable future. The central problem is that compilers cannot yet untangle the mess and figure out that there are 3 independent variables in it, much less perform any loop interchange or unrolling or fusion.
If you must use ranges, use simple ones that the compiler can see through:
for (int const x : boost::irange<int>(xstart,xend))
for (int const y : boost::irange<int>(ystart,yend))
for (int const z : boost::irange<int>(zstart,zend))
function_doing_stuff(x, y, z);
Alternatively, you can actually pass your functor and the boost ranges to a template:
template <typename Func, typename Range0, typename Range1, typename Range2>
void apply_ranges (Func func, Range0 r0, Range1 r1, Range2 r2)
{
for (auto const i0 : r0)
for (auto const i1 : r1)
for (auto const i2 : r2)
func (i0, i1, i2);
}
If you truly care about performance, then you should not contort your code with complicated ranges, because they make it harder to untangle later when you want to rewrite them in AVX intrinsics.
Here's a bare-bones implementation that does not use any advanced language features or other libraries. The performance should be pretty close to the for loop version.
#include <tuple>
class MyRange {
public:
typedef std::tuple<int, int, int> valtype;
MyRange(int xstart, int xend, int ystart, int yend, int zstart, int zend): xstart(xstart), xend(xend), ystart(ystart), yend(yend), zstart(zstart), zend(zend) {
}
class iterator {
public:
iterator(MyRange &c): me(c) {
curvalue = std::make_tuple(me.xstart, me.ystart, me.zstart);
}
iterator(MyRange &c, bool end): me(c) {
curvalue = std::make_tuple(end ? me.xend : me.xstart, me.ystart, me.zstart);
}
valtype operator*() {
return curvalue;
}
iterator &operator++() {
if (++std::get<2>(curvalue) == me.zend) {
std::get<2>(curvalue) = me.zstart;
if (++std::get<1>(curvalue) == me.yend) {
std::get<1>(curvalue) = me.ystart;
++std::get<0>(curvalue);
}
}
return *this;
}
bool operator==(const iterator &other) const {
return curvalue == other.curvalue;
}
bool operator!=(const iterator &other) const {
return curvalue != other.curvalue;
}
private:
MyRange &me;
valtype curvalue;
};
iterator begin() {
return iterator(*this);
}
iterator end() {
return iterator(*this, true);
}
private:
int xstart, xend;
int ystart, yend;
int zstart, zend;
};
And an example of usage:
#include <iostream>
void display(std::tuple<int, int, int> v) {
std::cout << "(" << std::get<0>(v) << ", " << std::get<1>(v) << ", " << std::get<2>(v) << ")\n";
}
int main() {
MyRange c(1, 4, 2, 5, 7, 9);
for (auto v: c) {
display(v);
}
}
I've left off things like const iterators, possible operator+=, decrementing, post increment, etc. They've been left as an exercise for the reader.
It stores the initial values, then increments each value in turn, rolling it back and incrementing the next when it get to the end value. It's a bit like incrementing a multi-digit number.
Using boost::iterator_facade for simplicity, you can spell out all the required members.
First we have a class that iterates N-dimensional indexes as std::array<std::size_t, N>
template<std::size_t N>
class indexes_iterator : public boost::iterator_facade<indexes_iterator, std::array<std::size_t, N>>
{
public:
template<typename... Dims>
indexes_iterator(Dims... dims) : dims{ dims... }, values{} {}
private:
friend class boost::iterator_core_access;
void increment() { advance(1); }
void decrement() { advance(-1); }
void advance(int n)
{
for (std::size_t i = 0; i < N; ++i)
{
int next = ((values[i] + n) % dims[i]);
n = (n \ dims[i]) + (next < value);
values[i] = next;
}
}
std::size_t distance(indexes_iterator const & other) const
{
std::size_t result = 0, mul = 1;
for (std::size_t i = 0; i < dims; ++i)
{
result += mul * other[i] - values[i];
mul *= ends[i];
}
}
bool equal(indexes_iterator const& other) const
{
return values == other.values;
}
std::array<std::size_t, N> & dereference() const { return values; }
std::array<std::size_t, N> ends;
std::array<std::size_t, N> values;
}
Then we use that to make something similar to a boost::zip_iterator, but instead of advancing all together we add our indexes.
template <typename... Iterators>
class product_iterator : public boost::iterator_facade<product_iterator<Iterators...>, const std::tuple<decltype(*std::declval<Iterators>())...>, boost::random_access_traversal_tag>
{
using ref = std::tuple<decltype(*std::declval<Iterators>())...>;
public:
product_iterator(Iterators ... ends) : indexes() , iterators(std::make_tuple(ends...)) {}
template <typename ... Sizes>
product_iterator(Iterators ... begins, Sizes ... sizes)
: indexes(sizes...),
iterators(begins...)
{}
private:
friend class boost::iterator_core_access;
template<std::size_t... Is>
ref dereference_impl(std::index_sequence<Is...> idxs) const
{
auto offs = offset(idxs);
return { *std::get<Is>(offs)... };
}
ref dereference() const
{
return dereference_impl(std::index_sequence_for<Iterators...>{});
}
void increment() { ++indexes; }
void decrement() { --indexes; }
void advance(int n) { indexes += n; }
template<std::size_t... Is>
std::tuple<Iterators...> offset(std::index_sequence<Is...>) const
{
auto idxs = *indexes;
return { (std::get<Is>(iterators) + std::get<Is>(idxs))... };
}
bool equal(product_iterator const & other) const
{
return offset(std::index_sequence_for<Iterators...>{})
== other.offset(std::index_sequence_for<Iterators...>{});
}
indexes_iterator<sizeof...(Iterators)> indexes;
std::tuple<Iterators...> iterators;
};
Then we wrap it up in a boost::iterator_range
template <typename... Ranges>
auto make_product_range(Ranges&&... rngs)
{
product_iterator<decltype(begin(rngs))...> b(begin(rngs)..., std::distance(std::begin(rngs), std::end(rngs))...);
product_iterator<decltype(begin(rngs))...> e(end(rngs)...);
return boost::iterator_range<product_iterator<decltype(begin(rngs))...>>(b, e);
}
int main()
{
using ranges::view::iota;
for (auto p : make_product_range(iota(xstart, xend), iota(ystart, yend), iota(zstart, zend)))
// ...
return 0;
}
See it on godbolt
Just a very simplified version that will be as efficient as a for loop:
#include <tuple>
struct iterator{
int x;
int x_start;
int x_end;
int y;
int y_start;
int y_end;
int z;
constexpr auto
operator*() const{
return std::tuple{x,y,z};
}
constexpr iterator&
operator++ [[gnu::always_inline]](){
++x;
if (x==x_end){
x=x_start;
++y;
if (y==y_end) {
++z;
y=y_start;
}
}
return *this;
}
constexpr iterator
operator++(int){
auto old=*this;
operator++();
return old;
}
};
struct sentinel{
int z_end;
friend constexpr bool
operator == (const iterator& x,const sentinel& s){
return x.z==s.z_end;
}
friend constexpr bool
operator == (const sentinel& a,const iterator& x){
return x==a;
}
friend constexpr bool
operator != (const iterator& x,const sentinel& a){
return !(x==a);
}
friend constexpr bool
operator != (const sentinel& a,const iterator& x){
return !(x==a);
}
};
struct range{
iterator start;
sentinel finish;
constexpr auto
begin() const{
return start;
}
constexpr auto
end()const{
return finish;
}
};
void func(int,int,int);
void test(const range& r){
for(auto [x,y,z]: r)
func(x,y,z);
}
void test(int x_start,int x_end,int y_start,int y_end,int z_start,int z_end){
for(int z=z_start;z<z_end;++z)
for(int y=y_start;y<y_end;++y)
for(int x=x_start;x<x_end;++x)
func(x,y,z);
}
The advantage over 1201ProgramAlarm answer is the faster test performed at each iteration thanks to the use of a sentinel.
I would like to dump the content inside the object tmp_np which is a UllmanSet which if you know a key then you know the position and if you know the position , you know the key. Is there a standard C++ container that's similar to the Ullmanset. Ullmanset 's index starts at 0 and Ullmanset1's index start at 1.
and also dump the content of frontierq which is a PriorityQ class.
UllmanSet tmp_np;
template<unsigned B>
class BasedUllmanSet
{
size_t n;
std::vector<int> key;
BasedVector<unsigned, B> pos;
public:
BasedUllmanSet()
: n(0)
{}
BasedUllmanSet(size_t cap)
: n(0), key(cap), pos(cap)
{}
size_t capacity() const { return key.size(); }
size_t size() const { return n; }
bool empty() const { return n == 0; }
void clear() { n = 0; }
void resize(size_t cap)
{
key.resize(cap);
pos.resize(cap);
n = 0;
}
bool contains(int k) const
{
unsigned p = pos[k];
return (p < n
&& key[p] == k);
}
void insert(int k)
{
if (contains(k))
return;
unsigned p = n++;
key[p] = k;
pos[k] = p;
}
void extend(int k)
{
assert(!contains(k));
unsigned p = n++;
key[p] = k;
pos[k] = p;
}
void erase(int k)
{
if (!contains(k))
return;
unsigned p = pos[k];
--n;
if (p != n)
{
int ell = key[n];
pos[ell] = p;
key[p] = ell;
}
}
int ith(int i)
{
assert(i >= 0 && i < (int)n);
return key[i];
}
};
using UllmanSet = BasedUllmanSet<0>;
using UllmanSet1 = BasedUllmanSet<1>;
The priority queue is implemented as followed. I like to print out the values inside the queue using std::cout.
PriorityQ<std::pair<int, int>, Comp> frontierq;
class Comp
{
public:
Comp() {}
bool operator()(const std::pair<int, int> &lhs,
const std::pair<int, int> &rhs) const
{
return (lhs.second > rhs.second
|| (lhs.second == rhs.second
&& lhs.first > rhs.first));
}
};
class PriorityQ
{
public:
Comp comp;
std::vector<T> v;
unsigned n;
public:
PriorityQ() : n(0) {}
PriorityQ(Comp comp_) : comp(comp_), n(0) {}
size_t size() const { return n; }
void clear() { n = 0; }
bool empty() { return n == 0; }
void push(const T &x)
{
assert(v.size() >= n);
if (v.size() == n)
v.push_back(x);
else
v[n] = x;
++n;
std::push_heap(&v[0], &v[n], comp);
}
const T &pop()
{
assert(n > 0);
std::pop_heap(&v[0], &v[n], comp);
--n;
return v[n];
}
const T &top()
{
assert(n > 0);
return v[0];
}
};
C++ standard containers do get this esoteric. If you have another question about the priority queue you should make another post.