I have a custom bitset class implementation in C++. I often iterate over the indexes of bits that are set in the bitset (i.e. for bitset '10011' I want to iterate over numbers 0, 3, 4.) This iteration can be implemented as follows:
struct Bitset {
uint64_t* data_;
size_t chunks_;
std::vector<int> Elements() const {
std::vector<int> ret;
for (size_t i=0;i<chunks_;i++){
uint64_t td = data_[i];
while (td) {
ret.push_back(i*BITS + __builtin_ctzll(td));
td &= ~-td;
}
}
return ret;
}
};
void Iterate(Bitset bitset) {
for (int b : bitset.Elements()) {
std::cout << "bit: " << b << std::endl;
}
}
The above implementation provides clean code for the iteration, but it involves an unnecessary heap allocation with the vector. The following version which essentially inlines the Elements() function is often faster:
void Iterate(Bitset bitset) {
int chunks = bitset.chunks_;
for (int i = 0; i < chunks; i++) {
uint64_t td = bitset.data_[i];
while (td) {
std::cout << "bit: " << i*BITS + __builtin_ctzll(td) << std::endl;
td &= ~-td;
}
}
}
What would be a good way to implement an abstraction for the iteration so that it would be as clean as the above version, but also with no performance cost.
Just iterate over your class. Provide your own implementation of an iterator class for your Bitset and provide begin() and end() methods. A simplest (untested!) implementation could look something like this:
#include <vector>
#include <cstdint>
#include <iostream>
struct Bitset {
uint64_t* data_;
size_t chunks_;
struct iterator {
uint64_t *pnt;
uint_fast8_t pos;
iterator(uint64_t *pnt, size_t pos) :
pnt(pnt), pos(pos) {}
bool operator !=(const iterator& o) {
return o.pnt != pnt || o.pos != pos;
}
void operator ++() {
pos++;
if (pos == 64) {
pnt++;
pos = 0;
}
}
bool operator *() {
return *pnt & (1 << pos);
}
};
iterator begin() { return iterator(data_, 0); }
iterator end() { return iterator(data_ + chunks_, 64); }
};
void Iterate(Bitset bitset) {
for (auto&& b : bitset) {
std::cout << "bit: " << b << std::endl;
}
}
I believe for your strange while (td) { ... i*BITS + __builtin_ctzll(td) ... loop that I don't understand that could be something along (untested!):
constexpr int BITS = 100000;
struct Bitset {
uint64_t* data_;
size_t chunks_;
struct iterator {
uint64_t *data_;
int i = 0;
uint64_t td = 0;
iterator(uint64_t *data_, int i, uint64_t td) :
data_(data_), i(i), td(td) {}
bool operator !=(const iterator& o) {
return o.data_ != data_ || o.i != i || o.td != td;
}
void operator ++() {
if (td == 0) {
td = *data_;
data_++;
} else {
td &= ~-td;
}
}
bool operator *() {
return i * BITS + __builtin_ctzll(td);
}
};
iterator begin() { return iterator(data_, 0, *data_); }
iterator end() { return iterator(data_ + chunks_, 0, 0); }
};
As KamilCuk suggested, I used an iterator to solve this problem. Now the implementation looks like:
struct Bitset {
uint64_t* data_;
size_t chunks_;
class BitsetIterator {
private:
const Bitset* const bitset_;
size_t pos_;
uint64_t tb_;
public:
BitsetIterator(const Bitset* const bitset, size_t pos, uint64_t tb) :
bitset_(bitset), pos_(pos), tb_(tb) { }
bool operator!=(const BitsetIterator& other) const {
return pos_ != other.pos_ || tb_ != other.tb_;
}
const BitsetIterator& operator++() {
tb_ &= ~-tb_;
while (tb_ == 0 && pos_ < bitset_->chunks_) {
pos_++;
if (pos_ < bitset_->chunks_) {
tb_ = bitset_->data_[pos_];
}
}
return *this;
}
int operator*() const {
return pos_*BITS + __builtin_ctzll(tb_);
}
};
BitsetIterator begin() const {
size_t pos = 0;
while (pos < chunks_ && data_[pos] == 0) {
pos++;
}
if (pos < chunks_) {
return BitsetIterator(this, pos, data_[pos]);
} else {
return BitsetIterator(this, pos, 0);
}
}
BitsetIterator end() const {
return BitsetIterator(this, chunks_, 0);
}
};
void Iterate(Bitset bitset) {
for (int b : bitset) {
std::cout << "bit: " << b << std::endl;
}
}
This avoids heap allocation and is much faster than the version that uses vector. I'm not sure if this provides exactly same performance as the version without any abstractions, but it should be very close.
Related
I am implementing a kind of dataframe and I want to define a RandomAccessIterator over it, in order to execute the different std algorithms, such as the sorting one. The dataframe of the example contains two column "a" and "b":
a; b;
20; 21;
20; 19;
10; 11;
40; 41;
10; 11;
After sorting with a trivial selection sort this is the result:
a; b;
10; 11;
10; 11;
20; 19;
20; 21;
40; 41;
The problem that I am facing is that the std::sort does not work properly. And I don't know weather the implementation of the iterator is sound or not.
This is the code.
File: dataframe.hpp
#pragma once
#include <iostream>
#include <charconv>
#include <vector>
#include <memory>
#include <cstring>
#include <numeric>
#include "iterator.hpp"
namespace df
{
class Record;
class Column;
class Dataframe;
namespace types
{
enum class Base : char
{
CHAR = 'A',
UNSIGNED = 'U',
// Other types..
};
class Dtype
{
public:
Dtype(types::Base base, std::size_t size) : m_base_dtype{base}, m_size{size} {}
[[nodiscard]] auto name() const
{
return std::string{static_cast<char>(m_base_dtype)} + std::to_string(m_size);
}
[[nodiscard]] auto base() const { return m_base_dtype; }
[[nodiscard]] auto size() const { return m_size; }
[[nodiscard]] auto is_primitive() const
{
switch (base())
{
case types::Base::CHAR:
return size() == 1;
case types::Base::UNSIGNED:
return size() == 1 or size() == 2 or size() == 4 or size() == 8;
}
return false;
}
private:
types::Base m_base_dtype;
std::size_t m_size;
};
[[nodiscard]] static auto CHAR(const std::size_t size) { return Dtype(types::Base::CHAR, size); }
[[nodiscard]] static auto UNSIGNED(const std::size_t size) { return Dtype(types::Base::UNSIGNED, size); }
}
class Column
{
public:
Column(std::vector<char> &raw, const types::Dtype dtype) : m_raw{std::move(raw)}, m_dtype{dtype} {}
Column &operator=(Column &&c) = default; // Move constructor
[[nodiscard]] const auto &dtype() const { return m_dtype; }
[[nodiscard]] auto &raw() { return m_raw; }
[[nodiscard]] const auto &raw() const { return m_raw; }
[[nodiscard]] auto *data() { return m_raw.data(); }
[[nodiscard]] const auto *data() const { return m_raw.data(); }
private:
std::vector<char> m_raw;
types::Dtype m_dtype;
};
class Dataframe
{
public:
Dataframe(std::vector<char> &raw, std::vector<std::string> names, std::vector<types::Dtype> dtypes)
{
m_raw = std::move(raw);
m_column_dtypes = dtypes;
m_column_names = names;
m_record_size = 0;
for (const auto dt : dtypes)
{
m_column_offsets.emplace_back(m_record_size);
m_record_size += dt.size();
}
m_record_count = m_raw.size() / m_record_size;
}
Dataframe(std::vector<char> &raw, std::vector<types::Dtype> dtypes) : Dataframe(raw, {}, dtypes) {}
Dataframe &operator=(Dataframe &&c) = default; // Move constructor
[[nodiscard]] auto &raw() { return m_raw; }
[[nodiscard]] const auto &raw() const { return m_raw; }
[[nodiscard]] auto *data() { return m_raw.data(); }
[[nodiscard]] const auto *data() const { return m_raw.data(); }
// Iterators
[[nodiscard]] df::Iterator begin()
{
return df::Iterator{m_raw.data(), m_record_size};
}
[[nodiscard]] df::Iterator end()
{
return df::Iterator{m_raw.data() + m_raw.size(), m_record_size};
}
[[nodiscard]] auto shape() const { return std::make_pair(m_record_count, m_column_dtypes.size()); }
[[nodiscard]] auto record_count() const { return m_record_count; }
[[nodiscard]] auto record_size() const { return m_record_size; }
[[nodiscard]] const auto &names() const { return m_column_names; }
[[nodiscard]] const auto &dtypes() const { return m_column_dtypes; }
[[nodiscard]] const auto &offsets() const { return m_column_offsets; }
void print() { print(m_record_count); }
void print(const std::size_t initial_records)
{
// Print header
for (auto column_name : m_column_names)
{
std::cout << column_name << "; ";
}
std::cout << std::endl;
// Print rows
std::size_t records_to_print = std::min(initial_records, m_record_count);
for (std::size_t i = 0; i < records_to_print; i++)
{
const auto start_p = i * record_size();
auto start_field = 0;
auto end_field = 0;
for (auto field : m_column_dtypes)
{
end_field += field.size();
switch (field.base())
{
case types::Base::UNSIGNED:
{
std::uint64_t uint_value = 0;
memcpy(&uint_value, m_raw.data() + start_p + start_field, field.size());
std::cout << uint_value;
break;
}
case types::Base::CHAR:
{
std::string str_value = std::string(m_raw.data() + start_p + start_field, field.size());
std::cout << str_value;
break;
}
}
start_field = end_field;
// New column
std::cout << "; ";
}
// New row
std::cout << std::endl;
}
}
std::shared_ptr<Dataframe> copy() const
{
auto x = std::vector<char>(m_raw);
return std::make_shared<Dataframe>(x, std::vector<std::string>(m_column_names), std::vector<types::Dtype>(m_column_dtypes));
}
private:
std::vector<char> m_raw = {};
std::vector<std::string> m_column_names = {};
std::vector<types::Dtype> m_column_dtypes = {};
std::vector<std::size_t> m_column_offsets = {};
std::size_t m_record_size = {};
std::size_t m_record_count = {};
};
using namespace types;
static std::shared_ptr<Dataframe> read_from_vector(const std::vector<std::vector<std::string>> values, const std::vector<std::string> names, const std::vector<Dtype> dtypes)
{
const auto record_size = std::accumulate(dtypes.begin(), dtypes.end(), std::size_t{0},
[](std::size_t accum, const auto &m)
{ return accum + m.size(); });
const auto total_size = values.size() * record_size;
const std::size_t INCR_RECORDS = std::max(total_size / (10 * record_size), std::size_t{65536});
auto raw = std::vector<char>{};
std::size_t written_records = 0;
auto offsets = std::vector<std::size_t>{};
for (int offset = 0; const auto &kd : dtypes)
{
offsets.push_back(offset);
offset += kd.size();
}
for (auto value : values)
{
if (written_records >= raw.size() / record_size)
{
raw.resize(raw.size() + INCR_RECORDS * record_size, char{' '});
}
for (int i = 0; i < names.size(); i++)
{
const auto name = names[i];
const auto dtype = dtypes[i];
const auto offset = offsets[i];
const auto pos = written_records * record_size + offset;
switch (dtype.base())
{
case df::Base::CHAR:
{
const auto v = value[i];
const auto byte_to_copy = std::min(v.size(), dtype.size());
std::memcpy(raw.data() + pos,
v.data() + v.size() - byte_to_copy, byte_to_copy); // Prendo gli ultimi byte
break;
}
case df::Base::UNSIGNED:
{
const auto v = std::stoull(value[i]);
const auto byte_to_copy = dtype.size();
std::memcpy(raw.data() + pos, &v, byte_to_copy); // Prendo gli ultimi byte
break;
}
default:
throw std::runtime_error("ColumnType non riconosciuto");
}
}
written_records++;
}
raw.resize(written_records * record_size);
raw.shrink_to_fit();
return std::make_shared<Dataframe>(raw, names, dtypes);
}
}
File: iterator.hpp
#pragma once
#include <iostream>
#include <cstring>
namespace df
{
class Iterator
{
std::size_t size;
char *ptr;
public:
struct record_reference;
struct record_value
{
std::size_t size;
char *ptr;
record_value(const record_reference &t) : record_value(t.size, t.ptr){};
record_value(const std::size_t m_size, char *m_ptr)
{
this->size = m_size;
this->ptr = new char[this->size];
std::memcpy(ptr, m_ptr, this->size);
}
~record_value()
{
delete[] this->ptr;
}
};
struct record_reference
{
std::size_t size;
char *ptr;
record_reference(const std::size_t m_size, char *m_ptr)
{
this->size = m_size;
this->ptr = m_ptr;
}
record_reference(const record_reference &t)
{
this->size = t.size;
this->ptr = t.ptr;
}
// record_reference(const record_value &t) : record_reference(t.size, t.ptr) {};
record_reference &operator=(const record_value &t)
{
std::memcpy(ptr, t.ptr, size);
return *this;
}
record_reference &operator=(const record_reference &t)
{
std::memcpy(ptr, t.ptr, size);
return *this;
}
record_reference &operator=(char *t)
{
std::memcpy(ptr, t, size);
return *this;
}
operator char *()
{
return ptr;
}
operator const char *() const { return ptr; }
};
using iterator_category = std::random_access_iterator_tag;
using value_type = record_value;
using reference = record_reference;
using difference_type = std::ptrdiff_t;
// default constructible
Iterator() : size(0), ptr(nullptr)
{
}
// copy assignable
Iterator &operator=(const Iterator &t)
{
size = t.size;
ptr = t.ptr;
return *this;
}
Iterator(char *ptr, const std::size_t size) : size{size}, ptr(ptr)
{
}
record_reference operator*() const
{
return {size, ptr};
}
// Prefix
Iterator &operator++()
{
ptr += size;
return *this;
}
// Postfix
Iterator operator++(int)
{
auto tmp = *this;
++*this;
return tmp;
}
Iterator &operator--()
{
ptr -= size;
return *this;
}
difference_type operator-(const Iterator &it) const
{
return (this->ptr - it.ptr) / size;
}
Iterator operator+(const difference_type &offset) const
{
return Iterator(ptr + offset * size, size);
}
friend Iterator operator+(const difference_type &diff, const Iterator &it)
{
return it + diff;
}
Iterator operator-(const difference_type &diff) const
{
return Iterator(ptr - diff * size, size);
}
reference operator[](const difference_type &offset) const
{
return {size, ptr + offset * size};
}
bool operator==(const Iterator &it) const
{
return this->ptr == it.ptr;
}
bool operator!=(const Iterator &it) const
{
return !(*this == it);
}
bool operator<(const Iterator &it) const
{
return this->ptr < it.ptr;
}
bool operator>=(const Iterator &it) const
{
return this->ptr >= it.ptr;
}
bool operator>(const Iterator &it) const
{
return this->ptr > it.ptr;
}
bool operator<=(const Iterator &it) const
{
return this->ptr <= it.ptr;
}
Iterator &operator+=(const difference_type &diff)
{
ptr += diff * size;
return *this;
}
operator Iterator() const
{
return Iterator(ptr, size);
}
};
void swap(df::Iterator::record_reference a, df::Iterator::record_reference b)
{
unsigned char *p;
unsigned char *q;
unsigned char *const sentry = (unsigned char *)a.ptr + a.size;
for (p = (unsigned char *)a.ptr, q = (unsigned char *)b.ptr; p < sentry; ++p, ++q)
{
const unsigned char t = *p;
*p = *q;
*q = t;
}
}
}
File: comparator.hpp
#pragma once
#include <memory>
#include <functional>
#include "dataframe.hpp"
#include "iterator.hpp"
namespace compare
{
using comparator_fn = std::function<int(const df::Iterator::record_reference, const df::Iterator::record_reference)>;
template <typename T, std::size_t offset = 0, std::size_t size = sizeof(T)>
static inline comparator_fn make_comparator()
{
if constexpr (size == 3 or size == 5 or size == 7 or size > 8)
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, size); };
return [](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return *(T *)(a + offset) < *(T *)(b + offset) ? -1 : *(T *)(b + offset) < *(T *)(a + offset) ? +1
: 0; };
}
template <typename T>
static inline comparator_fn make_comparator(const std::size_t offset)
{
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return *(T *)(a + offset) < *(T *)(b + offset) ? -1 : *(T *)(b + offset) < *(T *)(a + offset) ? +1
: 0; };
}
static inline comparator_fn make_column_comparator(const df::Dtype dtype, const std::size_t offset)
{
switch (dtype.base())
{
case df::Base::CHAR:
{
if (dtype.size() == 1)
return make_comparator<std::uint8_t>(offset);
else if (dtype.size() == 2)
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, 2); }; // C'� qualche beneficio a fissare il 2? o conviene trattarlo come uno unsigned short?
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, dtype.size()); };
}
case df::Base::UNSIGNED:
{
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{
std::uint64_t uint_value_a = 0;
std::uint64_t uint_value_b = 0;
std::memcpy(&uint_value_a, a + offset, dtype.size());
std::memcpy(&uint_value_b, b + offset, dtype.size());
return (uint_value_a < uint_value_b ? -1 : uint_value_a > uint_value_b ? +1
: 0);
};
}
default:
throw std::runtime_error("Unsupported dtype");
break;
}
}
static inline comparator_fn make_composite_two_way_comparator(const std::shared_ptr<df::Dataframe> &T)
{
const auto K = T->dtypes().size();
std::vector<comparator_fn> F;
for (int i = 0; i < K; i++)
{
F.emplace_back(make_column_comparator(T->dtypes()[i], T->offsets()[i]));
}
const auto comparator = [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{
for (int i = 0; i < K; i++)
{
// If equal go to the next column, otherwise return the result
// The return value is true if the first argument is less than the second
// and false otherwise
if (const auto result = F[i](a, b); result != 0)
return result < 0;
}
return false;
};
return comparator;
}
}
File: main.cpp
#include <iostream>
#include <vector>
#include "dataframe.hpp"
#include "comparator.hpp"
template <typename RandomAccessIterator, typename Comparator>
static void selection_sort(RandomAccessIterator first, RandomAccessIterator last, Comparator comp)
{
for (auto i = first; i != last; ++i)
{
auto min = i;
for (auto j = i + 1; j != last; ++j)
{
if (comp(*j, *min))
min = j;
}
df::Iterator::value_type temp = *i;
*i = *min;
*min = temp;
// Alternative
// std::iter_swap(i, min);
}
}
int main(int argc, char const *argv[])
{
std::vector<std::string> values{"20", "21", "20", "19", "10", "11", "40", "41", "10", "11"};
// Create a vector that contains values grouped by 2
std::vector<std::vector<std::string>> v;
for (int i = 0; i < values.size(); i += 2)
{
std::vector<std::string> temp;
temp.push_back(values[i]);
temp.push_back(values[i + 1]);
v.push_back(temp);
}
std::vector<std::string> column_names = {"a", "b"};
df::Dtype d = df::Dtype(df::Base::UNSIGNED, 4);
std::vector dtypes = {d, d};
// Create a dataframe
std::shared_ptr<df::Dataframe> df = df::read_from_vector(v, column_names, dtypes);
std::cout << "Before sorting" << std::endl;
df->print();
// This comparator sorts the dataframe first by column a and then by column b in ascending order
auto comparator = compare::make_composite_two_way_comparator(df);
selection_sort(df->begin(), df->end(), comparator);
std::cout << "\nAfter sorting" << std::endl;
df->print();
// With the std::sort it does not work
std::sort(df->begin(), df->end(), comparator);
return 0;
}
Your type is not a C++17 RandomAccessIterator, because it isn't a C++17 ForwardIterator, because reference is an object type, not a reference type.
The type It satisfies ForwardIterator if
Let T be the value type of It. The type std::iterator_traits<It>::reference must be either
T& or T&& if It satisfies OutputIterator (It is mutable), or
const T& or const T&& otherwise (It is constant),
(Other requirements elided)
You will be able to satisfy the C++20 concept std::random_access_iterator, because that relaxes the requirement on It::reference.
In C++17, the reference type of an iterator must be precisely value_type& in order for that iterator to be random access. Only input iterators can have the reference type be something other than value_type&. So in C++17, proxy iterators are limited to input iterators. And every algorithm written against C++17 has this expectation.
The C++20 ranges library adds the ability to have random access proxy iterators. And the C++20 algorithms that use those range concepts will respect them.
I am trying to use <ranges> (C++20 MSVC) to iterate a custom container called Span, whose begin/end() methods return a SpanIter. However, the following static assert fails in the test() method and the pipe operator is unrecognized with my class (but not vector<>).
static_assert(std::ranges::range<Span>, "Is not a range");
I think the method signatures match this working example. I haven't been able to track down what's missing, whether a method or typedef/using.
#include <ranges>
#include <iostream>
#include <vector>
namespace ptest {
class SpanIter;
struct Span;
struct Span : std::ranges::view_base
{
using iterator = SpanIter;
using sentinel = SpanIter;
int start;
int length;
int index;
Span() noexcept { start = length = index = 0; }
Span(int s, int l, int i) noexcept : start(l), length(l), index(i) { }
Span(const Span& other) noexcept : Span(other.start, other.length, other.index) { }
Span(const Span&& other) noexcept : Span(other.start, other.length, other.index) { }
Span& operator=(const Span& other) { start = other.start; length = other.length; index = other.index; }
bool operator==(const Span& other) const { return start == other.start && length == other.length && index == other.index; }
iterator begin();
sentinel end();
iterator cbegin() const;
sentinel cend() const;
size_t size() { return length; }
void Print() {
// eg. "range[5 - 9]: 5"
std::cout << "range [" << start << "-"
<< (start + length - 1)
<< "]: " << index << std::endl;
}
auto operator <=> (const Span& other) const = default;
};
class SpanIter {
Span Empty;
using value_type = Span;
using reference = Span&;
using const_reference = const Span&;
using iterator = SpanIter;
using sentinel = SpanIter;
using const_iterator = const SpanIter;
using iterator_category = std::forward_iterator_tag;
using difference_type = ptrdiff_t;
using pointer = Span*;
using size_type = size_t;
public:
SpanIter() noexcept { }
SpanIter(const SpanIter& other) noexcept : current_(other.current_) { }
SpanIter(const SpanIter&& other) noexcept : current_(other.current_) { }
SpanIter(Span t) noexcept : current_(t) {
current_.index = current_.start;
}
~SpanIter() { }
SpanIter& operator++() {
if (current_.index < current_.start + current_.length)
++current_.index;
return *this;
}
SpanIter operator++(int) {
SpanIter result = *this;
++(*this);
return result;
}
SpanIter& operator = (const SpanIter& other)
{
current_ = other.current_;
return *this;
}
bool operator==(const SpanIter& other) const {
return this->operator*() == *other;
}
bool operator!=(const SpanIter& other) const {
return !(*this == other);
}
template<typename T>
bool operator==(T&& value) const {
return this->operator*() == std::forward<T>(value);
}
template<typename T>
bool operator!=(T&& value) const {
return !(*this == value);
}
Span& operator*() {
return current_.start <= current_.index &&
current_.index < current_.start + current_.length
? current_
: Empty;
}
const Span& operator*() const {
return current_.start <= current_.index &&
current_.index < current_.start + current_.length
? current_
: Empty;
}
private:
Span current_;
};
SpanIter Span::begin() { return SpanIter(*this); }
Span::sentinel Span::end() { return SpanIter(Span()); }
SpanIter Span::cbegin() const { return SpanIter(*this); }
Span::sentinel Span::cend() const { return SpanIter(Span()); }
SpanIter begin(const Span& span) { return span.cbegin(); }
Span::sentinel end(const Span& span) { return span.cbegin(); }
void test() {
// WORKS (confirm pipes work with vector)
// output: 5 6
std::vector<int> v { 5, 6, 7, 8, 9};
auto works = v | std::views::take(2);
for (int i : works)
std::cout << i << " ";
// FAILS !!!!!!!!!!!!!!!!!!!
// <ranges> doesn't recognize my Span container as a range
static_assert(std::ranges::range<Span>, "Is not a range");
// FAILS !!!!!!!!!!!!!!!!!!!
// binary '|': 'Span' does not define this operator or a conversion
// to a type acceptable to the predefined operator
Span span(6, 5, 8);
auto fails = span | std::views::take(2);
// WORKS: confirm my Span operates as a container with normal C++ 11 features
// output:
// range[5 - 9] : 5
// range[5 - 9] : 6
// range[5 - 9] : 7
// range[5 - 9] : 8
// range[5 - 9] : 9
for (Span s : span)
s.Print();
}
}
The problem was that using statements for iterator traits need to be public ... that's it. This is surely obvious to most people familiar with C++, but may help other relative newbies like myself.
Is there a way I can sort matrix elements with the sort function from std?
//Using this matrix
vector<vector<string>> mat;
For example if you have
5 2 1
0 0 2
1 4 3
The result would be
0 0 1
1 2 2
3 4 5
To store a matrix, the nesting of std::vector is not the best solution. As any row manages its own size, there is no unique column size intrinsically granted by the matrix class.
Assuming that OP is damned to use an existing type (which may not be changed), I wrote my sample based on this.
One solution (with least implementation effort) would be
to copy the matrix (std::vector<std::vector<std::string> >) into a temporary of type std::vector<std::string>
apply std::sort() to this temporary
assign the sorted vector to matrix again.
Considering that moving of elements may be expensive, an alternative could be
to build up an index-pair vector
std::sort() it with a custom less functor (which considers matrix elements).
Afterwards, the index-pair vector may be used to access the original matrix elements in sorted order.
The latter is shown in my sample code:
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
typedef std::vector<std::string> Row;
typedef std::vector<Row> Matrix;
typedef std::pair<size_t, size_t> IndexPair;
struct LessMatrix {
const Matrix &mat;
LessMatrix(const Matrix &mat): mat(mat) { }
bool operator()(const IndexPair &i1, const IndexPair &i2)
{
return mat[i1.first][i1.second] < mat[i2.first][i2.second];
}
};
std::ostream& operator<<(std::ostream &out, const Matrix &mat)
{
for (const Row row : mat) {
for (const std::string elem : row) out << ' ' << elem;
out << '\n';
}
return out;
}
int main()
{
Matrix mat = {
{ "5", "2", "1" },
{ "0", "0", "2" },
{ "1", "4", "3" }
};
// print input
std::cout << "Input:\n" << mat << '\n';
// indexify matrix
std::vector<IndexPair> indices;
for (size_t i = 0, n = mat.size(); i < n; ++i) {
const std::vector<std::string> &row = mat[i];
for (size_t j = 0, m = row.size(); j < m; ++j) {
indices.push_back(std::make_pair(i, j));
}
}
// sort matrix
LessMatrix less(mat);
std::sort(indices.begin(), indices.end(), less);
// print output
Matrix matOut;
{ size_t i = 0; const size_t nCols = 3;
for (const IndexPair &index : indices) {
if (i % nCols == 0) matOut.push_back(Row());
matOut.back().push_back(mat[index.first][index.second]);
++i;
}
}
std::cout << "Output:\n" << matOut << '\n';
// done
return 0;
}
Output:
Input:
5 2 1
0 0 2
1 4 3
Output:
0 0 1
1 2 2
3 4 5
Life demo on coliru
The OP complained about creating a separate index vector. I suspected that a custom random access iterator might be a replacement (to sort the matrix directly). I must admit that I didn't do this before but, out of curiosity, I tried to puzzle this out:
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <cassert>
typedef std::vector<std::string> Row;
typedef std::vector<Row> Matrix;
struct MatrixIterator {
typedef size_t difference_type;
typedef std::string value_type;
typedef std::string* pointer;
typedef std::string& reference;
typedef std::random_access_iterator_tag iterator_category;
// accessed matrix
Matrix &mat;
// index of row, index of column
size_t i, j;
MatrixIterator(Matrix &mat, size_t i = 0, size_t j = 0): mat(mat), i(i), j(j) { }
MatrixIterator& operator =(const MatrixIterator &iter)
{
assert(&mat == &iter.mat);
i = iter.i; j = iter.j;
return *this;
}
size_t nCols() const { return mat.front().size(); }
std::string& operator *() { return mat[i][j]; }
const std::string& operator *() const { return mat[i][j]; }
MatrixIterator& operator ++() { return *this += 1; }
MatrixIterator operator ++(int) { MatrixIterator iter(*this); ++*this; return iter; }
MatrixIterator& operator --() { return *this -= 1; }
MatrixIterator operator --(int) { MatrixIterator iter(*this); --*this; return iter; }
MatrixIterator& operator += (size_t n)
{
j += i * nCols() + n; i = j / nCols(); j %= nCols(); return *this;
}
MatrixIterator operator + (size_t n) const
{
MatrixIterator iter(*this); iter += n; return iter;
}
friend MatrixIterator operator + (size_t n, const MatrixIterator &iter)
{
MatrixIterator iter2(iter); iter2 += n; return iter2;
}
MatrixIterator& operator -= (size_t n)
{
j += i * nCols() - n; i = j / nCols(); j %= nCols();
return *this;
}
MatrixIterator operator - (size_t n) const
{
MatrixIterator iter(*this); iter -= n; return iter;
}
size_t operator - (const MatrixIterator &iter) const
{
return (i * nCols() + j) - (iter.i * iter.nCols() + iter.j);
}
std::string& operator[](size_t i) { return mat[i / nCols()][i % nCols()]; }
const std::string& operator[](size_t i) const { return mat[i / nCols()][i % nCols()]; }
bool operator == (const MatrixIterator &iter) const
{
return i == iter.i && j == iter.j;
}
bool operator != (const MatrixIterator &iter) const { return !(*this == iter); }
bool operator < (const MatrixIterator &iter) const
{
return i * nCols() + j < iter.i * iter.nCols() + iter.j;
}
bool operator > (const MatrixIterator &iter) const { return iter < *this; }
bool operator <= (const MatrixIterator &iter) const { return !(iter > *this); }
bool operator >= (const MatrixIterator &iter) const { return !(*this < iter); }
};
MatrixIterator begin(Matrix &mat) { return MatrixIterator(mat, 0, 0); }
MatrixIterator end(Matrix &mat) { return MatrixIterator(mat, mat.size(), 0); }
std::ostream& operator<<(std::ostream &out, const Matrix &mat)
{
for (const Row row : mat) {
for (const std::string elem : row) out << ' ' << elem;
out << '\n';
}
return out;
}
int main()
{
Matrix mat = {
{ "5", "2", "1" },
{ "0", "0", "2" },
{ "1", "4", "3" }
};
// print input
std::cout << "Input:\n" << mat << '\n';
// sort matrix
std::sort(begin(mat), end(mat));
// print output
std::cout << "Output:\n" << mat << '\n';
// done
return 0;
}
Output:
Input:
5 2 1
0 0 2
1 4 3
Output:
0 0 1
1 2 2
3 4 5
Life demo on coliru
Notes:
I used the descriptions on cppreference.com
C++ concepts: RandomAccessIterator
C++ concepts: BidirectionalIterator
C++ concepts: ForwardIterator
C++ concepts: Iterator
std::iterator_traits
as a "requirement-cheat-sheet" and implemented everything described there. Some of the details, I decided by guess. (Especially, concerning the typedefs I'm not quite sure how to do them correct.)
You can sort the nested vectors without extra data copying by creating a custom wrapper matrix class and define your own iterator:
#include <algorithm>
#include <iterator>
#include <string>
#include <vector>
class MyMatrix {
public:
using DataStore = std::vector<std::vector<std::string> >;
// Note: Make sure MyMatrix DO NOT out-live its `data` argument!
MyMatrix(DataStore& data) : data_(data) {
// Check that
// 1. data.size() > 0;
// 2. data[i].size() > 0 and same for all valid i.
}
class Iterator : public std::iterator<std::random_access_iterator_tag,
std::string, int> {
public:
Iterator(DataStore& data) : data_(&data), index_(0) {}
Iterator(DataStore& data, int index) : data_(&data), index_(index) {}
Iterator(const Iterator& it) : data_(it.data_), index_(it.index_) {}
Iterator& operator=(const Iterator& it) {
data_ = it.data_;
index_ = it.index_;
}
operator bool() const {
return index_ >= 0 && index_ < data_->size() * (*data_)[0].size();
}
bool operator==(const Iterator& it) const {
return data_ == it.data_ && index_ == it.index_;
}
bool operator!=(const Iterator& it) const {
return data_ != it.data_ || index_ != it.index_;
}
Iterator& operator++() { ++index_; return *this; }
Iterator& operator--() { --index_; return *this; }
Iterator operator++(int) { return Iterator(*data_, index_++); }
Iterator operator--(int) { return Iterator(*data_, index_--); }
Iterator& operator+=(int offs) { index_ += offs; return *this; }
Iterator& operator-=(int offs) { index_ -= offs; return *this; }
Iterator operator+(int offs) { return Iterator(*data_, index_ + offs); }
Iterator operator-(int offs) { return Iterator(*data_, index_ - offs); }
int operator-(const Iterator& it) { return index_ - it.index_; }
std::string& operator*() {
return (*data_)[index_ / data_->size()][index_ % (*data_)[0].size()];
}
const std::string& operator*() const { return operator*(); }
private:
DataStore* data_;
int index_;
}; // class Iterator
Iterator iterator() { return Iterator(data_); }
Iterator begin() { return Iterator(data_, 0); }
Iterator end() { return Iterator(data_, data_.size() * data_[0].size()); }
private:
DataStore& data_;
}; // class MyMatrix
Then sort could be applied to MyMatrix as follows:
#include <iostream>
int main()
{
std::vector<std::vector<std::string> > store = {
{ "5", "2", "1" },
{ "0", "0", "2" },
{ "1", "4", "3" }
};
MyMatrix matrix(store);
for (const auto& row : store) {
for (const auto& item : row) { std::cout << item <<' '; }
std::cout <<'\n';
}
std::cout <<'\n';
std::sort(matrix.begin(), matrix.end());
for (const auto& row : store) {
for (const auto& item : row) { std::cout << item <<' '; }
std::cout <<'\n';
}
std::cout <<'\n';
}
Running the above code will result in the following output:
5 2 1
0 0 2
1 4 3
0 0 1
1 2 2
3 4 5
I implemented my first custom iterator, which is supposed to return an std::pair at each iteration. The only problem, is it loops infinitely and I do not know how to provide stop condition. The code looks like so:
#include <iostream>
#include <vector>
#include <utility>
class Simple
{
private:
std::vector<std::size_t> indices;
std::vector<int> values;
public:
void insert(std::size_t index, int value)
{
indices.push_back(index);
values.push_back(value);
}
int at(std::size_t index)
{
return values[indices[index]];
}
class Iterator
{
private:
const std::vector<std::size_t>* indices;
const std::vector<int>* values;
std::size_t pos = 0;
public:
Iterator(const std::vector<std::size_t>* indices_, const std::vector<int>* values_, const std::size_t &pos_ = 0):
values(values_), indices(indices_), pos(pos_){ }
bool operator==(const Iterator& other){
return this == &other;
}
bool operator!=(const Iterator& other){
return !operator==(other);
}
Iterator operator++() {
pos++;
Iterator i = *this;
return i;
}
std::pair<std::size_t, int> operator*()
{
if (pos < (*values).size())
{
return std::make_pair((*indices)[pos], (*values)[pos]);
}
std::cout << "EMPTY PAIR" << std::endl; //loops infinitely and prints this message
return std::pair<std::size_t,int>{};
}
std::pair<std::size_t, int>* operator->()
{
if (pos < (*values).size())
{
std::pair<std::size_t,int> *p;
*p = std::make_pair((*indices)[pos], (*values)[pos]);
return p;
}
return nullptr;
}
};
Iterator begin() const
{
return Iterator(&indices, &values, 0);
}
Iterator end() const
{
return Iterator(&indices, &values, values.size());
}
};
int main() {
Simple s;
s.insert(10, 100);
std::cout << s.at(10) << std::endl;
int i = 0;
for (const std::pair<std::size_t, int> &p : s)
{
std::cout << p.first << " " << p.second << std::endl;
if (i > 3) break; // otherwise it will loop infinitely
i++;
}
return 0;
}
I would like to stress, that this example is simplified and for demonstration purposes, so you may not ask me, why I'm not using map or unorded_map (just because what I'm implementing looks like a map). The question is solely about iterator and I need understanding what may be wrong with iterator itself and how to provide stop condition.
I am still kind of new to C++ and am trying to figure out what I am not able to pass a value correctly to a bitset, at least I suspect that is what the problem is.
I wrote a small function to assist in flipping the bits of a hex value to reverse the endian. So example would be input 0x01 and it would return 0x80.
This is the code I wrote.
int flipBits(char msd, char lsd) {
char ch[5];
sprintf_s(ch, "0x%d%d", msd, lsd);
char buffer[5];
strncpy_s(buffer, ch, 4);
cout << ch << endl;
cout << buffer << endl;
bitset<8> x(buffer);
bitset<8> y;
for (int i = 0; i < 8; i++) {
y[i] = x[7 - i];
}
cout << y << endl; // print the reversed bit order
int b = y.to_ulong(); // convert the binary to int
cout << b << endl; // print the int
cout << hex << b << endl; // print the hex
return b;
}
I tried adding the strncpy because I thought maybe the null terminator from sprintf was not working properly with the bitset. If in the line
bitset<8> x(buffer);
I replace buffer with a hex value, say for example 0x01, then it works and prints out 0x80 as I would expect, but if I try to pass in the value with the buffer it doesn't work.
We can write a stl-like container wrapper such that we can write:
int main() {
std::bitset<8> x(0x01);
auto container = make_bit_range(x);
std::reverse(container.begin(), container.end());
std::cout << x << std::endl;
}
and expect the output:
10000000
full code:
#include <iostream>
#include <bitset>
#include <algorithm>
template<std::size_t N>
struct bit_reference {
bit_reference(std::bitset<N>& data, int i) : data_(data), i_(i) {}
operator bool() const { return data_[i_]; }
bit_reference& operator=(bool x) {
data_[i_] = x;
return *this;
}
std::bitset<N>& data_;
int i_;
};
template<std::size_t N>
void swap(bit_reference<N> l, bit_reference<N> r) {
auto lv = bool(l);
auto rv = bool(r);
std::swap(lv, rv);
l = lv;
r = rv;
}
template<std::size_t N>
struct bit_range {
using bitset_type = std::bitset<N>;
bit_range(bitset_type &data) : data_(data) {}
struct iterator {
using iterator_category = std::bidirectional_iterator_tag;
using value_type = bit_reference<N>;
using difference_type = int;
using pointer = value_type *;
using reference = value_type &;
iterator(bitset_type &data, int i) : data_(data), i_(i) {}
bool operator==(iterator const &r) const { return i_ == r.i_; }
bool operator!=(iterator const &r) const { return i_ != r.i_; }
iterator &operator--() {
return update(i_ - 1);
}
iterator &operator++() {
return update(i_ + 1);
}
value_type operator*() const {
return bit_reference<N>(data_, i_);
}
private:
auto update(int pos) -> iterator & {
i_ = pos;
return *this;
}
private:
bitset_type &data_;
int i_;
};
auto begin() const { return iterator(data_, 0); }
auto end() const { return iterator(data_, int(data_.size())); }
private:
bitset_type &data_;
};
template<std::size_t N>
auto make_bit_range(std::bitset<N> &data) {
return bit_range<N>(data);
}
int main() {
std::bitset<8> x(0x01);
auto container = make_bit_range(x);
std::reverse(container.begin(), container.end());
std::cout << x << std::endl;
}
also plenty of fun algorithms here: Best Algorithm for Bit Reversal ( from MSB->LSB to LSB->MSB) in C