Using std::sort to sort Matrix - c++

Is there a way I can sort matrix elements with the sort function from std?
//Using this matrix
vector<vector<string>> mat;
For example if you have
5 2 1
0 0 2
1 4 3
The result would be
0 0 1
1 2 2
3 4 5

To store a matrix, the nesting of std::vector is not the best solution. As any row manages its own size, there is no unique column size intrinsically granted by the matrix class.
Assuming that OP is damned to use an existing type (which may not be changed), I wrote my sample based on this.
One solution (with least implementation effort) would be
to copy the matrix (std::vector<std::vector<std::string> >) into a temporary of type std::vector<std::string>
apply std::sort() to this temporary
assign the sorted vector to matrix again.
Considering that moving of elements may be expensive, an alternative could be
to build up an index-pair vector
std::sort() it with a custom less functor (which considers matrix elements).
Afterwards, the index-pair vector may be used to access the original matrix elements in sorted order.
The latter is shown in my sample code:
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
typedef std::vector<std::string> Row;
typedef std::vector<Row> Matrix;
typedef std::pair<size_t, size_t> IndexPair;
struct LessMatrix {
const Matrix &mat;
LessMatrix(const Matrix &mat): mat(mat) { }
bool operator()(const IndexPair &i1, const IndexPair &i2)
{
return mat[i1.first][i1.second] < mat[i2.first][i2.second];
}
};
std::ostream& operator<<(std::ostream &out, const Matrix &mat)
{
for (const Row row : mat) {
for (const std::string elem : row) out << ' ' << elem;
out << '\n';
}
return out;
}
int main()
{
Matrix mat = {
{ "5", "2", "1" },
{ "0", "0", "2" },
{ "1", "4", "3" }
};
// print input
std::cout << "Input:\n" << mat << '\n';
// indexify matrix
std::vector<IndexPair> indices;
for (size_t i = 0, n = mat.size(); i < n; ++i) {
const std::vector<std::string> &row = mat[i];
for (size_t j = 0, m = row.size(); j < m; ++j) {
indices.push_back(std::make_pair(i, j));
}
}
// sort matrix
LessMatrix less(mat);
std::sort(indices.begin(), indices.end(), less);
// print output
Matrix matOut;
{ size_t i = 0; const size_t nCols = 3;
for (const IndexPair &index : indices) {
if (i % nCols == 0) matOut.push_back(Row());
matOut.back().push_back(mat[index.first][index.second]);
++i;
}
}
std::cout << "Output:\n" << matOut << '\n';
// done
return 0;
}
Output:
Input:
5 2 1
0 0 2
1 4 3
Output:
0 0 1
1 2 2
3 4 5
Life demo on coliru
The OP complained about creating a separate index vector. I suspected that a custom random access iterator might be a replacement (to sort the matrix directly). I must admit that I didn't do this before but, out of curiosity, I tried to puzzle this out:
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <cassert>
typedef std::vector<std::string> Row;
typedef std::vector<Row> Matrix;
struct MatrixIterator {
typedef size_t difference_type;
typedef std::string value_type;
typedef std::string* pointer;
typedef std::string& reference;
typedef std::random_access_iterator_tag iterator_category;
// accessed matrix
Matrix &mat;
// index of row, index of column
size_t i, j;
MatrixIterator(Matrix &mat, size_t i = 0, size_t j = 0): mat(mat), i(i), j(j) { }
MatrixIterator& operator =(const MatrixIterator &iter)
{
assert(&mat == &iter.mat);
i = iter.i; j = iter.j;
return *this;
}
size_t nCols() const { return mat.front().size(); }
std::string& operator *() { return mat[i][j]; }
const std::string& operator *() const { return mat[i][j]; }
MatrixIterator& operator ++() { return *this += 1; }
MatrixIterator operator ++(int) { MatrixIterator iter(*this); ++*this; return iter; }
MatrixIterator& operator --() { return *this -= 1; }
MatrixIterator operator --(int) { MatrixIterator iter(*this); --*this; return iter; }
MatrixIterator& operator += (size_t n)
{
j += i * nCols() + n; i = j / nCols(); j %= nCols(); return *this;
}
MatrixIterator operator + (size_t n) const
{
MatrixIterator iter(*this); iter += n; return iter;
}
friend MatrixIterator operator + (size_t n, const MatrixIterator &iter)
{
MatrixIterator iter2(iter); iter2 += n; return iter2;
}
MatrixIterator& operator -= (size_t n)
{
j += i * nCols() - n; i = j / nCols(); j %= nCols();
return *this;
}
MatrixIterator operator - (size_t n) const
{
MatrixIterator iter(*this); iter -= n; return iter;
}
size_t operator - (const MatrixIterator &iter) const
{
return (i * nCols() + j) - (iter.i * iter.nCols() + iter.j);
}
std::string& operator[](size_t i) { return mat[i / nCols()][i % nCols()]; }
const std::string& operator[](size_t i) const { return mat[i / nCols()][i % nCols()]; }
bool operator == (const MatrixIterator &iter) const
{
return i == iter.i && j == iter.j;
}
bool operator != (const MatrixIterator &iter) const { return !(*this == iter); }
bool operator < (const MatrixIterator &iter) const
{
return i * nCols() + j < iter.i * iter.nCols() + iter.j;
}
bool operator > (const MatrixIterator &iter) const { return iter < *this; }
bool operator <= (const MatrixIterator &iter) const { return !(iter > *this); }
bool operator >= (const MatrixIterator &iter) const { return !(*this < iter); }
};
MatrixIterator begin(Matrix &mat) { return MatrixIterator(mat, 0, 0); }
MatrixIterator end(Matrix &mat) { return MatrixIterator(mat, mat.size(), 0); }
std::ostream& operator<<(std::ostream &out, const Matrix &mat)
{
for (const Row row : mat) {
for (const std::string elem : row) out << ' ' << elem;
out << '\n';
}
return out;
}
int main()
{
Matrix mat = {
{ "5", "2", "1" },
{ "0", "0", "2" },
{ "1", "4", "3" }
};
// print input
std::cout << "Input:\n" << mat << '\n';
// sort matrix
std::sort(begin(mat), end(mat));
// print output
std::cout << "Output:\n" << mat << '\n';
// done
return 0;
}
Output:
Input:
5 2 1
0 0 2
1 4 3
Output:
0 0 1
1 2 2
3 4 5
Life demo on coliru
Notes:
I used the descriptions on cppreference.com
C++ concepts: RandomAccessIterator
C++ concepts: BidirectionalIterator
C++ concepts: ForwardIterator
C++ concepts: Iterator
std::iterator_traits
as a "requirement-cheat-sheet" and implemented everything described there. Some of the details, I decided by guess. (Especially, concerning the typedefs I'm not quite sure how to do them correct.)

You can sort the nested vectors without extra data copying by creating a custom wrapper matrix class and define your own iterator:
#include <algorithm>
#include <iterator>
#include <string>
#include <vector>
class MyMatrix {
public:
using DataStore = std::vector<std::vector<std::string> >;
// Note: Make sure MyMatrix DO NOT out-live its `data` argument!
MyMatrix(DataStore& data) : data_(data) {
// Check that
// 1. data.size() > 0;
// 2. data[i].size() > 0 and same for all valid i.
}
class Iterator : public std::iterator<std::random_access_iterator_tag,
std::string, int> {
public:
Iterator(DataStore& data) : data_(&data), index_(0) {}
Iterator(DataStore& data, int index) : data_(&data), index_(index) {}
Iterator(const Iterator& it) : data_(it.data_), index_(it.index_) {}
Iterator& operator=(const Iterator& it) {
data_ = it.data_;
index_ = it.index_;
}
operator bool() const {
return index_ >= 0 && index_ < data_->size() * (*data_)[0].size();
}
bool operator==(const Iterator& it) const {
return data_ == it.data_ && index_ == it.index_;
}
bool operator!=(const Iterator& it) const {
return data_ != it.data_ || index_ != it.index_;
}
Iterator& operator++() { ++index_; return *this; }
Iterator& operator--() { --index_; return *this; }
Iterator operator++(int) { return Iterator(*data_, index_++); }
Iterator operator--(int) { return Iterator(*data_, index_--); }
Iterator& operator+=(int offs) { index_ += offs; return *this; }
Iterator& operator-=(int offs) { index_ -= offs; return *this; }
Iterator operator+(int offs) { return Iterator(*data_, index_ + offs); }
Iterator operator-(int offs) { return Iterator(*data_, index_ - offs); }
int operator-(const Iterator& it) { return index_ - it.index_; }
std::string& operator*() {
return (*data_)[index_ / data_->size()][index_ % (*data_)[0].size()];
}
const std::string& operator*() const { return operator*(); }
private:
DataStore* data_;
int index_;
}; // class Iterator
Iterator iterator() { return Iterator(data_); }
Iterator begin() { return Iterator(data_, 0); }
Iterator end() { return Iterator(data_, data_.size() * data_[0].size()); }
private:
DataStore& data_;
}; // class MyMatrix
Then sort could be applied to MyMatrix as follows:
#include <iostream>
int main()
{
std::vector<std::vector<std::string> > store = {
{ "5", "2", "1" },
{ "0", "0", "2" },
{ "1", "4", "3" }
};
MyMatrix matrix(store);
for (const auto& row : store) {
for (const auto& item : row) { std::cout << item <<' '; }
std::cout <<'\n';
}
std::cout <<'\n';
std::sort(matrix.begin(), matrix.end());
for (const auto& row : store) {
for (const auto& item : row) { std::cout << item <<' '; }
std::cout <<'\n';
}
std::cout <<'\n';
}
Running the above code will result in the following output:
5 2 1
0 0 2
1 4 3
0 0 1
1 2 2
3 4 5

Related

How to define a RandomAccessIterator over a pointer to a vector of chars?

I am implementing a kind of dataframe and I want to define a RandomAccessIterator over it, in order to execute the different std algorithms, such as the sorting one. The dataframe of the example contains two column "a" and "b":
a; b;
20; 21;
20; 19;
10; 11;
40; 41;
10; 11;
After sorting with a trivial selection sort this is the result:
a; b;
10; 11;
10; 11;
20; 19;
20; 21;
40; 41;
The problem that I am facing is that the std::sort does not work properly. And I don't know weather the implementation of the iterator is sound or not.
This is the code.
File: dataframe.hpp
#pragma once
#include <iostream>
#include <charconv>
#include <vector>
#include <memory>
#include <cstring>
#include <numeric>
#include "iterator.hpp"
namespace df
{
class Record;
class Column;
class Dataframe;
namespace types
{
enum class Base : char
{
CHAR = 'A',
UNSIGNED = 'U',
// Other types..
};
class Dtype
{
public:
Dtype(types::Base base, std::size_t size) : m_base_dtype{base}, m_size{size} {}
[[nodiscard]] auto name() const
{
return std::string{static_cast<char>(m_base_dtype)} + std::to_string(m_size);
}
[[nodiscard]] auto base() const { return m_base_dtype; }
[[nodiscard]] auto size() const { return m_size; }
[[nodiscard]] auto is_primitive() const
{
switch (base())
{
case types::Base::CHAR:
return size() == 1;
case types::Base::UNSIGNED:
return size() == 1 or size() == 2 or size() == 4 or size() == 8;
}
return false;
}
private:
types::Base m_base_dtype;
std::size_t m_size;
};
[[nodiscard]] static auto CHAR(const std::size_t size) { return Dtype(types::Base::CHAR, size); }
[[nodiscard]] static auto UNSIGNED(const std::size_t size) { return Dtype(types::Base::UNSIGNED, size); }
}
class Column
{
public:
Column(std::vector<char> &raw, const types::Dtype dtype) : m_raw{std::move(raw)}, m_dtype{dtype} {}
Column &operator=(Column &&c) = default; // Move constructor
[[nodiscard]] const auto &dtype() const { return m_dtype; }
[[nodiscard]] auto &raw() { return m_raw; }
[[nodiscard]] const auto &raw() const { return m_raw; }
[[nodiscard]] auto *data() { return m_raw.data(); }
[[nodiscard]] const auto *data() const { return m_raw.data(); }
private:
std::vector<char> m_raw;
types::Dtype m_dtype;
};
class Dataframe
{
public:
Dataframe(std::vector<char> &raw, std::vector<std::string> names, std::vector<types::Dtype> dtypes)
{
m_raw = std::move(raw);
m_column_dtypes = dtypes;
m_column_names = names;
m_record_size = 0;
for (const auto dt : dtypes)
{
m_column_offsets.emplace_back(m_record_size);
m_record_size += dt.size();
}
m_record_count = m_raw.size() / m_record_size;
}
Dataframe(std::vector<char> &raw, std::vector<types::Dtype> dtypes) : Dataframe(raw, {}, dtypes) {}
Dataframe &operator=(Dataframe &&c) = default; // Move constructor
[[nodiscard]] auto &raw() { return m_raw; }
[[nodiscard]] const auto &raw() const { return m_raw; }
[[nodiscard]] auto *data() { return m_raw.data(); }
[[nodiscard]] const auto *data() const { return m_raw.data(); }
// Iterators
[[nodiscard]] df::Iterator begin()
{
return df::Iterator{m_raw.data(), m_record_size};
}
[[nodiscard]] df::Iterator end()
{
return df::Iterator{m_raw.data() + m_raw.size(), m_record_size};
}
[[nodiscard]] auto shape() const { return std::make_pair(m_record_count, m_column_dtypes.size()); }
[[nodiscard]] auto record_count() const { return m_record_count; }
[[nodiscard]] auto record_size() const { return m_record_size; }
[[nodiscard]] const auto &names() const { return m_column_names; }
[[nodiscard]] const auto &dtypes() const { return m_column_dtypes; }
[[nodiscard]] const auto &offsets() const { return m_column_offsets; }
void print() { print(m_record_count); }
void print(const std::size_t initial_records)
{
// Print header
for (auto column_name : m_column_names)
{
std::cout << column_name << "; ";
}
std::cout << std::endl;
// Print rows
std::size_t records_to_print = std::min(initial_records, m_record_count);
for (std::size_t i = 0; i < records_to_print; i++)
{
const auto start_p = i * record_size();
auto start_field = 0;
auto end_field = 0;
for (auto field : m_column_dtypes)
{
end_field += field.size();
switch (field.base())
{
case types::Base::UNSIGNED:
{
std::uint64_t uint_value = 0;
memcpy(&uint_value, m_raw.data() + start_p + start_field, field.size());
std::cout << uint_value;
break;
}
case types::Base::CHAR:
{
std::string str_value = std::string(m_raw.data() + start_p + start_field, field.size());
std::cout << str_value;
break;
}
}
start_field = end_field;
// New column
std::cout << "; ";
}
// New row
std::cout << std::endl;
}
}
std::shared_ptr<Dataframe> copy() const
{
auto x = std::vector<char>(m_raw);
return std::make_shared<Dataframe>(x, std::vector<std::string>(m_column_names), std::vector<types::Dtype>(m_column_dtypes));
}
private:
std::vector<char> m_raw = {};
std::vector<std::string> m_column_names = {};
std::vector<types::Dtype> m_column_dtypes = {};
std::vector<std::size_t> m_column_offsets = {};
std::size_t m_record_size = {};
std::size_t m_record_count = {};
};
using namespace types;
static std::shared_ptr<Dataframe> read_from_vector(const std::vector<std::vector<std::string>> values, const std::vector<std::string> names, const std::vector<Dtype> dtypes)
{
const auto record_size = std::accumulate(dtypes.begin(), dtypes.end(), std::size_t{0},
[](std::size_t accum, const auto &m)
{ return accum + m.size(); });
const auto total_size = values.size() * record_size;
const std::size_t INCR_RECORDS = std::max(total_size / (10 * record_size), std::size_t{65536});
auto raw = std::vector<char>{};
std::size_t written_records = 0;
auto offsets = std::vector<std::size_t>{};
for (int offset = 0; const auto &kd : dtypes)
{
offsets.push_back(offset);
offset += kd.size();
}
for (auto value : values)
{
if (written_records >= raw.size() / record_size)
{
raw.resize(raw.size() + INCR_RECORDS * record_size, char{' '});
}
for (int i = 0; i < names.size(); i++)
{
const auto name = names[i];
const auto dtype = dtypes[i];
const auto offset = offsets[i];
const auto pos = written_records * record_size + offset;
switch (dtype.base())
{
case df::Base::CHAR:
{
const auto v = value[i];
const auto byte_to_copy = std::min(v.size(), dtype.size());
std::memcpy(raw.data() + pos,
v.data() + v.size() - byte_to_copy, byte_to_copy); // Prendo gli ultimi byte
break;
}
case df::Base::UNSIGNED:
{
const auto v = std::stoull(value[i]);
const auto byte_to_copy = dtype.size();
std::memcpy(raw.data() + pos, &v, byte_to_copy); // Prendo gli ultimi byte
break;
}
default:
throw std::runtime_error("ColumnType non riconosciuto");
}
}
written_records++;
}
raw.resize(written_records * record_size);
raw.shrink_to_fit();
return std::make_shared<Dataframe>(raw, names, dtypes);
}
}
File: iterator.hpp
#pragma once
#include <iostream>
#include <cstring>
namespace df
{
class Iterator
{
std::size_t size;
char *ptr;
public:
struct record_reference;
struct record_value
{
std::size_t size;
char *ptr;
record_value(const record_reference &t) : record_value(t.size, t.ptr){};
record_value(const std::size_t m_size, char *m_ptr)
{
this->size = m_size;
this->ptr = new char[this->size];
std::memcpy(ptr, m_ptr, this->size);
}
~record_value()
{
delete[] this->ptr;
}
};
struct record_reference
{
std::size_t size;
char *ptr;
record_reference(const std::size_t m_size, char *m_ptr)
{
this->size = m_size;
this->ptr = m_ptr;
}
record_reference(const record_reference &t)
{
this->size = t.size;
this->ptr = t.ptr;
}
// record_reference(const record_value &t) : record_reference(t.size, t.ptr) {};
record_reference &operator=(const record_value &t)
{
std::memcpy(ptr, t.ptr, size);
return *this;
}
record_reference &operator=(const record_reference &t)
{
std::memcpy(ptr, t.ptr, size);
return *this;
}
record_reference &operator=(char *t)
{
std::memcpy(ptr, t, size);
return *this;
}
operator char *()
{
return ptr;
}
operator const char *() const { return ptr; }
};
using iterator_category = std::random_access_iterator_tag;
using value_type = record_value;
using reference = record_reference;
using difference_type = std::ptrdiff_t;
// default constructible
Iterator() : size(0), ptr(nullptr)
{
}
// copy assignable
Iterator &operator=(const Iterator &t)
{
size = t.size;
ptr = t.ptr;
return *this;
}
Iterator(char *ptr, const std::size_t size) : size{size}, ptr(ptr)
{
}
record_reference operator*() const
{
return {size, ptr};
}
// Prefix
Iterator &operator++()
{
ptr += size;
return *this;
}
// Postfix
Iterator operator++(int)
{
auto tmp = *this;
++*this;
return tmp;
}
Iterator &operator--()
{
ptr -= size;
return *this;
}
difference_type operator-(const Iterator &it) const
{
return (this->ptr - it.ptr) / size;
}
Iterator operator+(const difference_type &offset) const
{
return Iterator(ptr + offset * size, size);
}
friend Iterator operator+(const difference_type &diff, const Iterator &it)
{
return it + diff;
}
Iterator operator-(const difference_type &diff) const
{
return Iterator(ptr - diff * size, size);
}
reference operator[](const difference_type &offset) const
{
return {size, ptr + offset * size};
}
bool operator==(const Iterator &it) const
{
return this->ptr == it.ptr;
}
bool operator!=(const Iterator &it) const
{
return !(*this == it);
}
bool operator<(const Iterator &it) const
{
return this->ptr < it.ptr;
}
bool operator>=(const Iterator &it) const
{
return this->ptr >= it.ptr;
}
bool operator>(const Iterator &it) const
{
return this->ptr > it.ptr;
}
bool operator<=(const Iterator &it) const
{
return this->ptr <= it.ptr;
}
Iterator &operator+=(const difference_type &diff)
{
ptr += diff * size;
return *this;
}
operator Iterator() const
{
return Iterator(ptr, size);
}
};
void swap(df::Iterator::record_reference a, df::Iterator::record_reference b)
{
unsigned char *p;
unsigned char *q;
unsigned char *const sentry = (unsigned char *)a.ptr + a.size;
for (p = (unsigned char *)a.ptr, q = (unsigned char *)b.ptr; p < sentry; ++p, ++q)
{
const unsigned char t = *p;
*p = *q;
*q = t;
}
}
}
File: comparator.hpp
#pragma once
#include <memory>
#include <functional>
#include "dataframe.hpp"
#include "iterator.hpp"
namespace compare
{
using comparator_fn = std::function<int(const df::Iterator::record_reference, const df::Iterator::record_reference)>;
template <typename T, std::size_t offset = 0, std::size_t size = sizeof(T)>
static inline comparator_fn make_comparator()
{
if constexpr (size == 3 or size == 5 or size == 7 or size > 8)
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, size); };
return [](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return *(T *)(a + offset) < *(T *)(b + offset) ? -1 : *(T *)(b + offset) < *(T *)(a + offset) ? +1
: 0; };
}
template <typename T>
static inline comparator_fn make_comparator(const std::size_t offset)
{
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return *(T *)(a + offset) < *(T *)(b + offset) ? -1 : *(T *)(b + offset) < *(T *)(a + offset) ? +1
: 0; };
}
static inline comparator_fn make_column_comparator(const df::Dtype dtype, const std::size_t offset)
{
switch (dtype.base())
{
case df::Base::CHAR:
{
if (dtype.size() == 1)
return make_comparator<std::uint8_t>(offset);
else if (dtype.size() == 2)
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, 2); }; // C'� qualche beneficio a fissare il 2? o conviene trattarlo come uno unsigned short?
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, dtype.size()); };
}
case df::Base::UNSIGNED:
{
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{
std::uint64_t uint_value_a = 0;
std::uint64_t uint_value_b = 0;
std::memcpy(&uint_value_a, a + offset, dtype.size());
std::memcpy(&uint_value_b, b + offset, dtype.size());
return (uint_value_a < uint_value_b ? -1 : uint_value_a > uint_value_b ? +1
: 0);
};
}
default:
throw std::runtime_error("Unsupported dtype");
break;
}
}
static inline comparator_fn make_composite_two_way_comparator(const std::shared_ptr<df::Dataframe> &T)
{
const auto K = T->dtypes().size();
std::vector<comparator_fn> F;
for (int i = 0; i < K; i++)
{
F.emplace_back(make_column_comparator(T->dtypes()[i], T->offsets()[i]));
}
const auto comparator = [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{
for (int i = 0; i < K; i++)
{
// If equal go to the next column, otherwise return the result
// The return value is true if the first argument is less than the second
// and false otherwise
if (const auto result = F[i](a, b); result != 0)
return result < 0;
}
return false;
};
return comparator;
}
}
File: main.cpp
#include <iostream>
#include <vector>
#include "dataframe.hpp"
#include "comparator.hpp"
template <typename RandomAccessIterator, typename Comparator>
static void selection_sort(RandomAccessIterator first, RandomAccessIterator last, Comparator comp)
{
for (auto i = first; i != last; ++i)
{
auto min = i;
for (auto j = i + 1; j != last; ++j)
{
if (comp(*j, *min))
min = j;
}
df::Iterator::value_type temp = *i;
*i = *min;
*min = temp;
// Alternative
// std::iter_swap(i, min);
}
}
int main(int argc, char const *argv[])
{
std::vector<std::string> values{"20", "21", "20", "19", "10", "11", "40", "41", "10", "11"};
// Create a vector that contains values grouped by 2
std::vector<std::vector<std::string>> v;
for (int i = 0; i < values.size(); i += 2)
{
std::vector<std::string> temp;
temp.push_back(values[i]);
temp.push_back(values[i + 1]);
v.push_back(temp);
}
std::vector<std::string> column_names = {"a", "b"};
df::Dtype d = df::Dtype(df::Base::UNSIGNED, 4);
std::vector dtypes = {d, d};
// Create a dataframe
std::shared_ptr<df::Dataframe> df = df::read_from_vector(v, column_names, dtypes);
std::cout << "Before sorting" << std::endl;
df->print();
// This comparator sorts the dataframe first by column a and then by column b in ascending order
auto comparator = compare::make_composite_two_way_comparator(df);
selection_sort(df->begin(), df->end(), comparator);
std::cout << "\nAfter sorting" << std::endl;
df->print();
// With the std::sort it does not work
std::sort(df->begin(), df->end(), comparator);
return 0;
}
Your type is not a C++17 RandomAccessIterator, because it isn't a C++17 ForwardIterator, because reference is an object type, not a reference type.
The type It satisfies ForwardIterator if
Let T be the value type of It. The type std::iterator_traits<It>::reference must be either
T& or T&& if It satisfies OutputIterator (It is mutable), or
const T& or const T&& otherwise (It is constant),
(Other requirements elided)
You will be able to satisfy the C++20 concept std::random_access_iterator, because that relaxes the requirement on It::reference.
In C++17, the reference type of an iterator must be precisely value_type& in order for that iterator to be random access. Only input iterators can have the reference type be something other than value_type&. So in C++17, proxy iterators are limited to input iterators. And every algorithm written against C++17 has this expectation.
The C++20 ranges library adds the ability to have random access proxy iterators. And the C++20 algorithms that use those range concepts will respect them.

Relatively simple custom container signature does not fulfill <ranges> concept

I am trying to use <ranges> (C++20 MSVC) to iterate a custom container called Span, whose begin/end() methods return a SpanIter. However, the following static assert fails in the test() method and the pipe operator is unrecognized with my class (but not vector<>).
static_assert(std::ranges::range<Span>, "Is not a range");
I think the method signatures match this working example. I haven't been able to track down what's missing, whether a method or typedef/using.
#include <ranges>
#include <iostream>
#include <vector>
namespace ptest {
class SpanIter;
struct Span;
struct Span : std::ranges::view_base
{
using iterator = SpanIter;
using sentinel = SpanIter;
int start;
int length;
int index;
Span() noexcept { start = length = index = 0; }
Span(int s, int l, int i) noexcept : start(l), length(l), index(i) { }
Span(const Span& other) noexcept : Span(other.start, other.length, other.index) { }
Span(const Span&& other) noexcept : Span(other.start, other.length, other.index) { }
Span& operator=(const Span& other) { start = other.start; length = other.length; index = other.index; }
bool operator==(const Span& other) const { return start == other.start && length == other.length && index == other.index; }
iterator begin();
sentinel end();
iterator cbegin() const;
sentinel cend() const;
size_t size() { return length; }
void Print() {
// eg. "range[5 - 9]: 5"
std::cout << "range [" << start << "-"
<< (start + length - 1)
<< "]: " << index << std::endl;
}
auto operator <=> (const Span& other) const = default;
};
class SpanIter {
Span Empty;
using value_type = Span;
using reference = Span&;
using const_reference = const Span&;
using iterator = SpanIter;
using sentinel = SpanIter;
using const_iterator = const SpanIter;
using iterator_category = std::forward_iterator_tag;
using difference_type = ptrdiff_t;
using pointer = Span*;
using size_type = size_t;
public:
SpanIter() noexcept { }
SpanIter(const SpanIter& other) noexcept : current_(other.current_) { }
SpanIter(const SpanIter&& other) noexcept : current_(other.current_) { }
SpanIter(Span t) noexcept : current_(t) {
current_.index = current_.start;
}
~SpanIter() { }
SpanIter& operator++() {
if (current_.index < current_.start + current_.length)
++current_.index;
return *this;
}
SpanIter operator++(int) {
SpanIter result = *this;
++(*this);
return result;
}
SpanIter& operator = (const SpanIter& other)
{
current_ = other.current_;
return *this;
}
bool operator==(const SpanIter& other) const {
return this->operator*() == *other;
}
bool operator!=(const SpanIter& other) const {
return !(*this == other);
}
template<typename T>
bool operator==(T&& value) const {
return this->operator*() == std::forward<T>(value);
}
template<typename T>
bool operator!=(T&& value) const {
return !(*this == value);
}
Span& operator*() {
return current_.start <= current_.index &&
current_.index < current_.start + current_.length
? current_
: Empty;
}
const Span& operator*() const {
return current_.start <= current_.index &&
current_.index < current_.start + current_.length
? current_
: Empty;
}
private:
Span current_;
};
SpanIter Span::begin() { return SpanIter(*this); }
Span::sentinel Span::end() { return SpanIter(Span()); }
SpanIter Span::cbegin() const { return SpanIter(*this); }
Span::sentinel Span::cend() const { return SpanIter(Span()); }
SpanIter begin(const Span& span) { return span.cbegin(); }
Span::sentinel end(const Span& span) { return span.cbegin(); }
void test() {
// WORKS (confirm pipes work with vector)
// output: 5 6
std::vector<int> v { 5, 6, 7, 8, 9};
auto works = v | std::views::take(2);
for (int i : works)
std::cout << i << " ";
// FAILS !!!!!!!!!!!!!!!!!!!
// <ranges> doesn't recognize my Span container as a range
static_assert(std::ranges::range<Span>, "Is not a range");
// FAILS !!!!!!!!!!!!!!!!!!!
// binary '|': 'Span' does not define this operator or a conversion
// to a type acceptable to the predefined operator
Span span(6, 5, 8);
auto fails = span | std::views::take(2);
// WORKS: confirm my Span operates as a container with normal C++ 11 features
// output:
// range[5 - 9] : 5
// range[5 - 9] : 6
// range[5 - 9] : 7
// range[5 - 9] : 8
// range[5 - 9] : 9
for (Span s : span)
s.Print();
}
}
The problem was that using statements for iterator traits need to be public ... that's it. This is surely obvious to most people familiar with C++, but may help other relative newbies like myself.

Abstraction for bitset element iteration

I have a custom bitset class implementation in C++. I often iterate over the indexes of bits that are set in the bitset (i.e. for bitset '10011' I want to iterate over numbers 0, 3, 4.) This iteration can be implemented as follows:
struct Bitset {
uint64_t* data_;
size_t chunks_;
std::vector<int> Elements() const {
std::vector<int> ret;
for (size_t i=0;i<chunks_;i++){
uint64_t td = data_[i];
while (td) {
ret.push_back(i*BITS + __builtin_ctzll(td));
td &= ~-td;
}
}
return ret;
}
};
void Iterate(Bitset bitset) {
for (int b : bitset.Elements()) {
std::cout << "bit: " << b << std::endl;
}
}
The above implementation provides clean code for the iteration, but it involves an unnecessary heap allocation with the vector. The following version which essentially inlines the Elements() function is often faster:
void Iterate(Bitset bitset) {
int chunks = bitset.chunks_;
for (int i = 0; i < chunks; i++) {
uint64_t td = bitset.data_[i];
while (td) {
std::cout << "bit: " << i*BITS + __builtin_ctzll(td) << std::endl;
td &= ~-td;
}
}
}
What would be a good way to implement an abstraction for the iteration so that it would be as clean as the above version, but also with no performance cost.
Just iterate over your class. Provide your own implementation of an iterator class for your Bitset and provide begin() and end() methods. A simplest (untested!) implementation could look something like this:
#include <vector>
#include <cstdint>
#include <iostream>
struct Bitset {
uint64_t* data_;
size_t chunks_;
struct iterator {
uint64_t *pnt;
uint_fast8_t pos;
iterator(uint64_t *pnt, size_t pos) :
pnt(pnt), pos(pos) {}
bool operator !=(const iterator& o) {
return o.pnt != pnt || o.pos != pos;
}
void operator ++() {
pos++;
if (pos == 64) {
pnt++;
pos = 0;
}
}
bool operator *() {
return *pnt & (1 << pos);
}
};
iterator begin() { return iterator(data_, 0); }
iterator end() { return iterator(data_ + chunks_, 64); }
};
void Iterate(Bitset bitset) {
for (auto&& b : bitset) {
std::cout << "bit: " << b << std::endl;
}
}
I believe for your strange while (td) { ... i*BITS + __builtin_ctzll(td) ... loop that I don't understand that could be something along (untested!):
constexpr int BITS = 100000;
struct Bitset {
uint64_t* data_;
size_t chunks_;
struct iterator {
uint64_t *data_;
int i = 0;
uint64_t td = 0;
iterator(uint64_t *data_, int i, uint64_t td) :
data_(data_), i(i), td(td) {}
bool operator !=(const iterator& o) {
return o.data_ != data_ || o.i != i || o.td != td;
}
void operator ++() {
if (td == 0) {
td = *data_;
data_++;
} else {
td &= ~-td;
}
}
bool operator *() {
return i * BITS + __builtin_ctzll(td);
}
};
iterator begin() { return iterator(data_, 0, *data_); }
iterator end() { return iterator(data_ + chunks_, 0, 0); }
};
As KamilCuk suggested, I used an iterator to solve this problem. Now the implementation looks like:
struct Bitset {
uint64_t* data_;
size_t chunks_;
class BitsetIterator {
private:
const Bitset* const bitset_;
size_t pos_;
uint64_t tb_;
public:
BitsetIterator(const Bitset* const bitset, size_t pos, uint64_t tb) :
bitset_(bitset), pos_(pos), tb_(tb) { }
bool operator!=(const BitsetIterator& other) const {
return pos_ != other.pos_ || tb_ != other.tb_;
}
const BitsetIterator& operator++() {
tb_ &= ~-tb_;
while (tb_ == 0 && pos_ < bitset_->chunks_) {
pos_++;
if (pos_ < bitset_->chunks_) {
tb_ = bitset_->data_[pos_];
}
}
return *this;
}
int operator*() const {
return pos_*BITS + __builtin_ctzll(tb_);
}
};
BitsetIterator begin() const {
size_t pos = 0;
while (pos < chunks_ && data_[pos] == 0) {
pos++;
}
if (pos < chunks_) {
return BitsetIterator(this, pos, data_[pos]);
} else {
return BitsetIterator(this, pos, 0);
}
}
BitsetIterator end() const {
return BitsetIterator(this, chunks_, 0);
}
};
void Iterate(Bitset bitset) {
for (int b : bitset) {
std::cout << "bit: " << b << std::endl;
}
}
This avoids heap allocation and is much faster than the version that uses vector. I'm not sure if this provides exactly same performance as the version without any abstractions, but it should be very close.

How to prevent infinite loop of custom iterator

I implemented my first custom iterator, which is supposed to return an std::pair at each iteration. The only problem, is it loops infinitely and I do not know how to provide stop condition. The code looks like so:
#include <iostream>
#include <vector>
#include <utility>
class Simple
{
private:
std::vector<std::size_t> indices;
std::vector<int> values;
public:
void insert(std::size_t index, int value)
{
indices.push_back(index);
values.push_back(value);
}
int at(std::size_t index)
{
return values[indices[index]];
}
class Iterator
{
private:
const std::vector<std::size_t>* indices;
const std::vector<int>* values;
std::size_t pos = 0;
public:
Iterator(const std::vector<std::size_t>* indices_, const std::vector<int>* values_, const std::size_t &pos_ = 0):
values(values_), indices(indices_), pos(pos_){ }
bool operator==(const Iterator& other){
return this == &other;
}
bool operator!=(const Iterator& other){
return !operator==(other);
}
Iterator operator++() {
pos++;
Iterator i = *this;
return i;
}
std::pair<std::size_t, int> operator*()
{
if (pos < (*values).size())
{
return std::make_pair((*indices)[pos], (*values)[pos]);
}
std::cout << "EMPTY PAIR" << std::endl; //loops infinitely and prints this message
return std::pair<std::size_t,int>{};
}
std::pair<std::size_t, int>* operator->()
{
if (pos < (*values).size())
{
std::pair<std::size_t,int> *p;
*p = std::make_pair((*indices)[pos], (*values)[pos]);
return p;
}
return nullptr;
}
};
Iterator begin() const
{
return Iterator(&indices, &values, 0);
}
Iterator end() const
{
return Iterator(&indices, &values, values.size());
}
};
int main() {
Simple s;
s.insert(10, 100);
std::cout << s.at(10) << std::endl;
int i = 0;
for (const std::pair<std::size_t, int> &p : s)
{
std::cout << p.first << " " << p.second << std::endl;
if (i > 3) break; // otherwise it will loop infinitely
i++;
}
return 0;
}
I would like to stress, that this example is simplified and for demonstration purposes, so you may not ask me, why I'm not using map or unorded_map (just because what I'm implementing looks like a map). The question is solely about iterator and I need understanding what may be wrong with iterator itself and how to provide stop condition.

Create Iterator to C++ container that returns a std::pair

I'm trying to implement a container in C++ that uses a flat array to store the data but iterates over that data in pairs. Now I could easily change the implementation such that the container holds a vector of std::pair however I want to iterate through pairs starting at element 0 or at element 1.
To illustrate what I want to achieve, if my underlying array looks like:
1,2,3,4,5,6,7,8
I want to define two iterators, one which returns the pairs:
(1,2), (3,4), (5,6), (7,8)
and the second iterator to return the pairs:
(2,3), (4,5), (6,7)
is this possible to do while still allowing the elements of the iterator to be references of the underlying array?
It is possible to write your own iterator, which iterates over the elements. The following question shows some explanations on how that is done:
Custom Iterator in C++.
You can then return the desired values as std::pair and iterate to the next element-pair (by incrementing the counter by 2).
Boost library has got Iterator Adaptor that allows you to wrap other iterator types and change or adapt their functionality. Here's how you could use it for your purpose:
#include <boost/iterator/iterator_adaptor.hpp>
#include <vector>
struct iterator :
public boost::iterator_adaptor<
iterator, // the name of our class, see docs for details
std::vector<int>::iterator, // underlying base iterator
std::pair<int&, int&>, // our value type
boost::forward_traversal_tag // the category you wish to give it
>
{
// need this to convert from vector::iterator to ours
explicit iterator(std::vector<int>::iterator i)
: iterator::iterator_adaptor_(i) {}
value_type operator*()
{
return value_type(
*base_reference(),
*(base_reference()+1)
);
}
};
Example of usage:
std::vector<int> v {1,2,3,4};
iterator it(v.begin());
++it;
(*it).first = 0; // TODO: operator->
(*it).second = 0;
for (int i : v) std::cout << i << ' '; // prints 1 0 0 4
You'll also need to override comparison to properly handle end condition, etc. Hope that helps.
Just thought I'd put in what I actually used in my code. I didn't want to use boost as suggested by #jrok but the type std::pair<int&, int&> in their answer gave me a hint of what was required.
Below is the class that I constructed which uses two iterators. A RepeatIterator that returns pairs starting on even indexes in the underlying data, and a SpacerIterator that returns pairs starting on odd indexes.
class RepeatArray {
typedef std::vector<int> storage_t;
public:
class RepeatIterator {
public:
typedef RepeatIterator self_t;
typedef int value_t;
typedef int& reference_t;
typedef int* pointer_t;
typedef std::pair<reference_t, reference_t> return_t;
RepeatIterator(storage_t::iterator input) : current_pos(input){}
return_t operator *() {
return return_t(*(current_pos), *(current_pos + 1 ));
}
self_t operator++() { self_t i = *this; current_pos += 2; return i; }
self_t operator++(int junk) { current_pos+=2; return *this; }
bool operator==(const self_t& rhs) { return current_pos == rhs.current_pos; }
bool operator!=(const self_t& rhs) { return current_pos != rhs.current_pos; }
bool operator<(const self_t& rhs) { return current_pos < rhs.current_pos; }
bool operator<=(const self_t& rhs) { return current_pos <= rhs.current_pos; }
bool operator>(const self_t& rhs) { return current_pos > rhs.current_pos; }
bool operator>=(const self_t& rhs) { return current_pos >= rhs.current_pos; }
private:
storage_t::iterator current_pos;
};
class SpacerIterator {
public:
typedef SpacerIterator self_t;
typedef int value_t;
typedef int& reference_t;
typedef int* pointer_t;
typedef std::pair<reference_t, reference_t> return_t;
SpacerIterator(storage_t::iterator input) : current_pos(input){}
return_t operator *() {
return return_t(*(current_pos), *(current_pos + 1 ));
}
self_t operator++() { self_t i = *this; current_pos += 2; return i; }
self_t operator++(int junk) { current_pos+=2; return *this; }
bool operator==(const self_t& rhs) { return current_pos == rhs.current_pos; }
bool operator!=(const self_t& rhs) { return current_pos != rhs.current_pos; }
bool operator<(const self_t& rhs) { return current_pos < rhs.current_pos; }
bool operator<=(const self_t& rhs) { return current_pos <= rhs.current_pos; }
bool operator>(const self_t& rhs) { return current_pos > rhs.current_pos; }
bool operator>=(const self_t& rhs) { return current_pos >= rhs.current_pos; }
private:
storage_t::iterator current_pos;
};
void add(int start, int end) {
positions.push_back(start);
positions.push_back(end);
}
void dump() {
for (auto i : positions) {
std::cout <<i<<",";
}
std::cout <<std::endl;
}
RepeatIterator repeatBegin(){return RepeatIterator(positions.begin());}
RepeatIterator repeatEnd(){return RepeatIterator(positions.end());}
SpacerIterator spacerBegin(){return SpacerIterator(positions.begin() + 1);}
SpacerIterator spacerEnd(){return SpacerIterator(positions.end() - 1);}
protected:
storage_t positions;
};
And then the tesing program compiled using clang++ -std=c++0x -o testRepeatArray RepeatArray.cpp
int main() {
RepeatArray r = RepeatArray();
r.add(1,3);
r.add(7,12);
std::cout<<"original:"<<std::endl;
r.dump();
std::cout << "Testing Repeat iterator:"<<std::endl;
for (RepeatArray::RepeatIterator it2 = r.repeatBegin(); it2 != r.repeatEnd(); ++it2) {
std::cout << (*it2).first <<","<< (*it2).second << std::endl;
}
std::cout << "Testing Spacer iterator:"<<std::endl;
for (RepeatArray::SpacerIterator it3 = r.spacerBegin(); it3 != r.spacerEnd(); ++it3) {
std::cout << (*it3).first <<","<< (*it3).second << std::endl;
}
std::cout<<"Testing modification:"<<std::endl;
RepeatArray::RepeatIterator it = r.repeatBegin();
(*it).first = 0;
(*it).second = 123;
r.dump();
return 0;
}