character string to bitset in C++ - c++

I am still kind of new to C++ and am trying to figure out what I am not able to pass a value correctly to a bitset, at least I suspect that is what the problem is.
I wrote a small function to assist in flipping the bits of a hex value to reverse the endian. So example would be input 0x01 and it would return 0x80.
This is the code I wrote.
int flipBits(char msd, char lsd) {
char ch[5];
sprintf_s(ch, "0x%d%d", msd, lsd);
char buffer[5];
strncpy_s(buffer, ch, 4);
cout << ch << endl;
cout << buffer << endl;
bitset<8> x(buffer);
bitset<8> y;
for (int i = 0; i < 8; i++) {
y[i] = x[7 - i];
}
cout << y << endl; // print the reversed bit order
int b = y.to_ulong(); // convert the binary to int
cout << b << endl; // print the int
cout << hex << b << endl; // print the hex
return b;
}
I tried adding the strncpy because I thought maybe the null terminator from sprintf was not working properly with the bitset. If in the line
bitset<8> x(buffer);
I replace buffer with a hex value, say for example 0x01, then it works and prints out 0x80 as I would expect, but if I try to pass in the value with the buffer it doesn't work.

We can write a stl-like container wrapper such that we can write:
int main() {
std::bitset<8> x(0x01);
auto container = make_bit_range(x);
std::reverse(container.begin(), container.end());
std::cout << x << std::endl;
}
and expect the output:
10000000
full code:
#include <iostream>
#include <bitset>
#include <algorithm>
template<std::size_t N>
struct bit_reference {
bit_reference(std::bitset<N>& data, int i) : data_(data), i_(i) {}
operator bool() const { return data_[i_]; }
bit_reference& operator=(bool x) {
data_[i_] = x;
return *this;
}
std::bitset<N>& data_;
int i_;
};
template<std::size_t N>
void swap(bit_reference<N> l, bit_reference<N> r) {
auto lv = bool(l);
auto rv = bool(r);
std::swap(lv, rv);
l = lv;
r = rv;
}
template<std::size_t N>
struct bit_range {
using bitset_type = std::bitset<N>;
bit_range(bitset_type &data) : data_(data) {}
struct iterator {
using iterator_category = std::bidirectional_iterator_tag;
using value_type = bit_reference<N>;
using difference_type = int;
using pointer = value_type *;
using reference = value_type &;
iterator(bitset_type &data, int i) : data_(data), i_(i) {}
bool operator==(iterator const &r) const { return i_ == r.i_; }
bool operator!=(iterator const &r) const { return i_ != r.i_; }
iterator &operator--() {
return update(i_ - 1);
}
iterator &operator++() {
return update(i_ + 1);
}
value_type operator*() const {
return bit_reference<N>(data_, i_);
}
private:
auto update(int pos) -> iterator & {
i_ = pos;
return *this;
}
private:
bitset_type &data_;
int i_;
};
auto begin() const { return iterator(data_, 0); }
auto end() const { return iterator(data_, int(data_.size())); }
private:
bitset_type &data_;
};
template<std::size_t N>
auto make_bit_range(std::bitset<N> &data) {
return bit_range<N>(data);
}
int main() {
std::bitset<8> x(0x01);
auto container = make_bit_range(x);
std::reverse(container.begin(), container.end());
std::cout << x << std::endl;
}
also plenty of fun algorithms here: Best Algorithm for Bit Reversal ( from MSB->LSB to LSB->MSB) in C

Related

How to define a RandomAccessIterator over a pointer to a vector of chars?

I am implementing a kind of dataframe and I want to define a RandomAccessIterator over it, in order to execute the different std algorithms, such as the sorting one. The dataframe of the example contains two column "a" and "b":
a; b;
20; 21;
20; 19;
10; 11;
40; 41;
10; 11;
After sorting with a trivial selection sort this is the result:
a; b;
10; 11;
10; 11;
20; 19;
20; 21;
40; 41;
The problem that I am facing is that the std::sort does not work properly. And I don't know weather the implementation of the iterator is sound or not.
This is the code.
File: dataframe.hpp
#pragma once
#include <iostream>
#include <charconv>
#include <vector>
#include <memory>
#include <cstring>
#include <numeric>
#include "iterator.hpp"
namespace df
{
class Record;
class Column;
class Dataframe;
namespace types
{
enum class Base : char
{
CHAR = 'A',
UNSIGNED = 'U',
// Other types..
};
class Dtype
{
public:
Dtype(types::Base base, std::size_t size) : m_base_dtype{base}, m_size{size} {}
[[nodiscard]] auto name() const
{
return std::string{static_cast<char>(m_base_dtype)} + std::to_string(m_size);
}
[[nodiscard]] auto base() const { return m_base_dtype; }
[[nodiscard]] auto size() const { return m_size; }
[[nodiscard]] auto is_primitive() const
{
switch (base())
{
case types::Base::CHAR:
return size() == 1;
case types::Base::UNSIGNED:
return size() == 1 or size() == 2 or size() == 4 or size() == 8;
}
return false;
}
private:
types::Base m_base_dtype;
std::size_t m_size;
};
[[nodiscard]] static auto CHAR(const std::size_t size) { return Dtype(types::Base::CHAR, size); }
[[nodiscard]] static auto UNSIGNED(const std::size_t size) { return Dtype(types::Base::UNSIGNED, size); }
}
class Column
{
public:
Column(std::vector<char> &raw, const types::Dtype dtype) : m_raw{std::move(raw)}, m_dtype{dtype} {}
Column &operator=(Column &&c) = default; // Move constructor
[[nodiscard]] const auto &dtype() const { return m_dtype; }
[[nodiscard]] auto &raw() { return m_raw; }
[[nodiscard]] const auto &raw() const { return m_raw; }
[[nodiscard]] auto *data() { return m_raw.data(); }
[[nodiscard]] const auto *data() const { return m_raw.data(); }
private:
std::vector<char> m_raw;
types::Dtype m_dtype;
};
class Dataframe
{
public:
Dataframe(std::vector<char> &raw, std::vector<std::string> names, std::vector<types::Dtype> dtypes)
{
m_raw = std::move(raw);
m_column_dtypes = dtypes;
m_column_names = names;
m_record_size = 0;
for (const auto dt : dtypes)
{
m_column_offsets.emplace_back(m_record_size);
m_record_size += dt.size();
}
m_record_count = m_raw.size() / m_record_size;
}
Dataframe(std::vector<char> &raw, std::vector<types::Dtype> dtypes) : Dataframe(raw, {}, dtypes) {}
Dataframe &operator=(Dataframe &&c) = default; // Move constructor
[[nodiscard]] auto &raw() { return m_raw; }
[[nodiscard]] const auto &raw() const { return m_raw; }
[[nodiscard]] auto *data() { return m_raw.data(); }
[[nodiscard]] const auto *data() const { return m_raw.data(); }
// Iterators
[[nodiscard]] df::Iterator begin()
{
return df::Iterator{m_raw.data(), m_record_size};
}
[[nodiscard]] df::Iterator end()
{
return df::Iterator{m_raw.data() + m_raw.size(), m_record_size};
}
[[nodiscard]] auto shape() const { return std::make_pair(m_record_count, m_column_dtypes.size()); }
[[nodiscard]] auto record_count() const { return m_record_count; }
[[nodiscard]] auto record_size() const { return m_record_size; }
[[nodiscard]] const auto &names() const { return m_column_names; }
[[nodiscard]] const auto &dtypes() const { return m_column_dtypes; }
[[nodiscard]] const auto &offsets() const { return m_column_offsets; }
void print() { print(m_record_count); }
void print(const std::size_t initial_records)
{
// Print header
for (auto column_name : m_column_names)
{
std::cout << column_name << "; ";
}
std::cout << std::endl;
// Print rows
std::size_t records_to_print = std::min(initial_records, m_record_count);
for (std::size_t i = 0; i < records_to_print; i++)
{
const auto start_p = i * record_size();
auto start_field = 0;
auto end_field = 0;
for (auto field : m_column_dtypes)
{
end_field += field.size();
switch (field.base())
{
case types::Base::UNSIGNED:
{
std::uint64_t uint_value = 0;
memcpy(&uint_value, m_raw.data() + start_p + start_field, field.size());
std::cout << uint_value;
break;
}
case types::Base::CHAR:
{
std::string str_value = std::string(m_raw.data() + start_p + start_field, field.size());
std::cout << str_value;
break;
}
}
start_field = end_field;
// New column
std::cout << "; ";
}
// New row
std::cout << std::endl;
}
}
std::shared_ptr<Dataframe> copy() const
{
auto x = std::vector<char>(m_raw);
return std::make_shared<Dataframe>(x, std::vector<std::string>(m_column_names), std::vector<types::Dtype>(m_column_dtypes));
}
private:
std::vector<char> m_raw = {};
std::vector<std::string> m_column_names = {};
std::vector<types::Dtype> m_column_dtypes = {};
std::vector<std::size_t> m_column_offsets = {};
std::size_t m_record_size = {};
std::size_t m_record_count = {};
};
using namespace types;
static std::shared_ptr<Dataframe> read_from_vector(const std::vector<std::vector<std::string>> values, const std::vector<std::string> names, const std::vector<Dtype> dtypes)
{
const auto record_size = std::accumulate(dtypes.begin(), dtypes.end(), std::size_t{0},
[](std::size_t accum, const auto &m)
{ return accum + m.size(); });
const auto total_size = values.size() * record_size;
const std::size_t INCR_RECORDS = std::max(total_size / (10 * record_size), std::size_t{65536});
auto raw = std::vector<char>{};
std::size_t written_records = 0;
auto offsets = std::vector<std::size_t>{};
for (int offset = 0; const auto &kd : dtypes)
{
offsets.push_back(offset);
offset += kd.size();
}
for (auto value : values)
{
if (written_records >= raw.size() / record_size)
{
raw.resize(raw.size() + INCR_RECORDS * record_size, char{' '});
}
for (int i = 0; i < names.size(); i++)
{
const auto name = names[i];
const auto dtype = dtypes[i];
const auto offset = offsets[i];
const auto pos = written_records * record_size + offset;
switch (dtype.base())
{
case df::Base::CHAR:
{
const auto v = value[i];
const auto byte_to_copy = std::min(v.size(), dtype.size());
std::memcpy(raw.data() + pos,
v.data() + v.size() - byte_to_copy, byte_to_copy); // Prendo gli ultimi byte
break;
}
case df::Base::UNSIGNED:
{
const auto v = std::stoull(value[i]);
const auto byte_to_copy = dtype.size();
std::memcpy(raw.data() + pos, &v, byte_to_copy); // Prendo gli ultimi byte
break;
}
default:
throw std::runtime_error("ColumnType non riconosciuto");
}
}
written_records++;
}
raw.resize(written_records * record_size);
raw.shrink_to_fit();
return std::make_shared<Dataframe>(raw, names, dtypes);
}
}
File: iterator.hpp
#pragma once
#include <iostream>
#include <cstring>
namespace df
{
class Iterator
{
std::size_t size;
char *ptr;
public:
struct record_reference;
struct record_value
{
std::size_t size;
char *ptr;
record_value(const record_reference &t) : record_value(t.size, t.ptr){};
record_value(const std::size_t m_size, char *m_ptr)
{
this->size = m_size;
this->ptr = new char[this->size];
std::memcpy(ptr, m_ptr, this->size);
}
~record_value()
{
delete[] this->ptr;
}
};
struct record_reference
{
std::size_t size;
char *ptr;
record_reference(const std::size_t m_size, char *m_ptr)
{
this->size = m_size;
this->ptr = m_ptr;
}
record_reference(const record_reference &t)
{
this->size = t.size;
this->ptr = t.ptr;
}
// record_reference(const record_value &t) : record_reference(t.size, t.ptr) {};
record_reference &operator=(const record_value &t)
{
std::memcpy(ptr, t.ptr, size);
return *this;
}
record_reference &operator=(const record_reference &t)
{
std::memcpy(ptr, t.ptr, size);
return *this;
}
record_reference &operator=(char *t)
{
std::memcpy(ptr, t, size);
return *this;
}
operator char *()
{
return ptr;
}
operator const char *() const { return ptr; }
};
using iterator_category = std::random_access_iterator_tag;
using value_type = record_value;
using reference = record_reference;
using difference_type = std::ptrdiff_t;
// default constructible
Iterator() : size(0), ptr(nullptr)
{
}
// copy assignable
Iterator &operator=(const Iterator &t)
{
size = t.size;
ptr = t.ptr;
return *this;
}
Iterator(char *ptr, const std::size_t size) : size{size}, ptr(ptr)
{
}
record_reference operator*() const
{
return {size, ptr};
}
// Prefix
Iterator &operator++()
{
ptr += size;
return *this;
}
// Postfix
Iterator operator++(int)
{
auto tmp = *this;
++*this;
return tmp;
}
Iterator &operator--()
{
ptr -= size;
return *this;
}
difference_type operator-(const Iterator &it) const
{
return (this->ptr - it.ptr) / size;
}
Iterator operator+(const difference_type &offset) const
{
return Iterator(ptr + offset * size, size);
}
friend Iterator operator+(const difference_type &diff, const Iterator &it)
{
return it + diff;
}
Iterator operator-(const difference_type &diff) const
{
return Iterator(ptr - diff * size, size);
}
reference operator[](const difference_type &offset) const
{
return {size, ptr + offset * size};
}
bool operator==(const Iterator &it) const
{
return this->ptr == it.ptr;
}
bool operator!=(const Iterator &it) const
{
return !(*this == it);
}
bool operator<(const Iterator &it) const
{
return this->ptr < it.ptr;
}
bool operator>=(const Iterator &it) const
{
return this->ptr >= it.ptr;
}
bool operator>(const Iterator &it) const
{
return this->ptr > it.ptr;
}
bool operator<=(const Iterator &it) const
{
return this->ptr <= it.ptr;
}
Iterator &operator+=(const difference_type &diff)
{
ptr += diff * size;
return *this;
}
operator Iterator() const
{
return Iterator(ptr, size);
}
};
void swap(df::Iterator::record_reference a, df::Iterator::record_reference b)
{
unsigned char *p;
unsigned char *q;
unsigned char *const sentry = (unsigned char *)a.ptr + a.size;
for (p = (unsigned char *)a.ptr, q = (unsigned char *)b.ptr; p < sentry; ++p, ++q)
{
const unsigned char t = *p;
*p = *q;
*q = t;
}
}
}
File: comparator.hpp
#pragma once
#include <memory>
#include <functional>
#include "dataframe.hpp"
#include "iterator.hpp"
namespace compare
{
using comparator_fn = std::function<int(const df::Iterator::record_reference, const df::Iterator::record_reference)>;
template <typename T, std::size_t offset = 0, std::size_t size = sizeof(T)>
static inline comparator_fn make_comparator()
{
if constexpr (size == 3 or size == 5 or size == 7 or size > 8)
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, size); };
return [](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return *(T *)(a + offset) < *(T *)(b + offset) ? -1 : *(T *)(b + offset) < *(T *)(a + offset) ? +1
: 0; };
}
template <typename T>
static inline comparator_fn make_comparator(const std::size_t offset)
{
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return *(T *)(a + offset) < *(T *)(b + offset) ? -1 : *(T *)(b + offset) < *(T *)(a + offset) ? +1
: 0; };
}
static inline comparator_fn make_column_comparator(const df::Dtype dtype, const std::size_t offset)
{
switch (dtype.base())
{
case df::Base::CHAR:
{
if (dtype.size() == 1)
return make_comparator<std::uint8_t>(offset);
else if (dtype.size() == 2)
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, 2); }; // C'� qualche beneficio a fissare il 2? o conviene trattarlo come uno unsigned short?
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{ return std::memcmp(a + offset, b + offset, dtype.size()); };
}
case df::Base::UNSIGNED:
{
return [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{
std::uint64_t uint_value_a = 0;
std::uint64_t uint_value_b = 0;
std::memcpy(&uint_value_a, a + offset, dtype.size());
std::memcpy(&uint_value_b, b + offset, dtype.size());
return (uint_value_a < uint_value_b ? -1 : uint_value_a > uint_value_b ? +1
: 0);
};
}
default:
throw std::runtime_error("Unsupported dtype");
break;
}
}
static inline comparator_fn make_composite_two_way_comparator(const std::shared_ptr<df::Dataframe> &T)
{
const auto K = T->dtypes().size();
std::vector<comparator_fn> F;
for (int i = 0; i < K; i++)
{
F.emplace_back(make_column_comparator(T->dtypes()[i], T->offsets()[i]));
}
const auto comparator = [=](const df::Iterator::record_reference a, const df::Iterator::record_reference b)
{
for (int i = 0; i < K; i++)
{
// If equal go to the next column, otherwise return the result
// The return value is true if the first argument is less than the second
// and false otherwise
if (const auto result = F[i](a, b); result != 0)
return result < 0;
}
return false;
};
return comparator;
}
}
File: main.cpp
#include <iostream>
#include <vector>
#include "dataframe.hpp"
#include "comparator.hpp"
template <typename RandomAccessIterator, typename Comparator>
static void selection_sort(RandomAccessIterator first, RandomAccessIterator last, Comparator comp)
{
for (auto i = first; i != last; ++i)
{
auto min = i;
for (auto j = i + 1; j != last; ++j)
{
if (comp(*j, *min))
min = j;
}
df::Iterator::value_type temp = *i;
*i = *min;
*min = temp;
// Alternative
// std::iter_swap(i, min);
}
}
int main(int argc, char const *argv[])
{
std::vector<std::string> values{"20", "21", "20", "19", "10", "11", "40", "41", "10", "11"};
// Create a vector that contains values grouped by 2
std::vector<std::vector<std::string>> v;
for (int i = 0; i < values.size(); i += 2)
{
std::vector<std::string> temp;
temp.push_back(values[i]);
temp.push_back(values[i + 1]);
v.push_back(temp);
}
std::vector<std::string> column_names = {"a", "b"};
df::Dtype d = df::Dtype(df::Base::UNSIGNED, 4);
std::vector dtypes = {d, d};
// Create a dataframe
std::shared_ptr<df::Dataframe> df = df::read_from_vector(v, column_names, dtypes);
std::cout << "Before sorting" << std::endl;
df->print();
// This comparator sorts the dataframe first by column a and then by column b in ascending order
auto comparator = compare::make_composite_two_way_comparator(df);
selection_sort(df->begin(), df->end(), comparator);
std::cout << "\nAfter sorting" << std::endl;
df->print();
// With the std::sort it does not work
std::sort(df->begin(), df->end(), comparator);
return 0;
}
Your type is not a C++17 RandomAccessIterator, because it isn't a C++17 ForwardIterator, because reference is an object type, not a reference type.
The type It satisfies ForwardIterator if
Let T be the value type of It. The type std::iterator_traits<It>::reference must be either
T& or T&& if It satisfies OutputIterator (It is mutable), or
const T& or const T&& otherwise (It is constant),
(Other requirements elided)
You will be able to satisfy the C++20 concept std::random_access_iterator, because that relaxes the requirement on It::reference.
In C++17, the reference type of an iterator must be precisely value_type& in order for that iterator to be random access. Only input iterators can have the reference type be something other than value_type&. So in C++17, proxy iterators are limited to input iterators. And every algorithm written against C++17 has this expectation.
The C++20 ranges library adds the ability to have random access proxy iterators. And the C++20 algorithms that use those range concepts will respect them.

Is there anything like C++ default object method

I have the following templated merge sort program:
#include <iostream>
#include <vector>
#include <string>
// trying to create a default method call
class CInstance {
private:
std::string str_;
public:
CInstance(const std::string& str) : str_(str) {}
bool const operator>(const CInstance& that){ return (this->str_.size() > that.str_.size());}
};
template<class T>
class CObj {
private:
T val;
public:
CObj(const T n) : val(n) {}
T Get() { return val; }
};
template<class T>
using vcobj = std::vector<CObj<T>>;
template<class T>
void display(vcobj<T>& v) {
for (auto &i : v) {
std::cout << i.Get() << " ";
}
std::cout << "\n";
}
template<class T>
vcobj<T> Merge(vcobj<T>& lv, vcobj<T>& rv) {
vcobj<T> ret;
auto lsize = lv.size();
auto rsize = rv.size();
unsigned int lpin = 0,
rpin = 0;
while(lpin < lsize && rpin < rsize) {
if(lv.at(lpin).Get() > rv.at(rpin).Get()) {
ret.emplace_back(rv.at(rpin).Get());
rpin++;
}
else {
ret.emplace_back(lv.at(lpin).Get());
lpin++;
}
}
for (auto i=lpin; i<lsize; i++) {
ret.emplace_back(lv.at(i).Get());
}
for (auto i=rpin; i<rsize; i++) {
ret.emplace_back(rv.at(i).Get());
}
return ret;
}
template<class T>
vcobj<T> Sort(const vcobj<T>& v) {
vcobj<T> ret;
auto size = v.size();
if(size == 0) {
return ret;
}
if(size > 1) {
auto mid = size / 2;
vcobj<T> l(v.begin(), v.begin()+mid);
auto lv = Sort(l);
vcobj<T> r(v.begin()+mid, v.end());
auto rv = Sort(r);
ret = Merge(lv, rv);
}
else {
ret = v;
}
return ret;
}
int main() {
{
vcobj<int> v = {4, 5, 2, 1, 9, 6, 10, 8, 15, 3, 7};
display(v);
auto sorted = Sort(v);
display(sorted);
}
{
vcobj<float> v = {0.01, 0.001, 0.002, 0.009, 0.010, 0.0003, 0.00001};
display(v);
auto sorted = Sort(v);
display(sorted);
}
{
vcobj<std::string> v = {{"pineapple"}, {"jackfruit"}, {"mango"}, {"apple"}, {"banana"}};
display(v);
auto sorted = Sort(v);
display(sorted);
}
// causing problem
{
vcobj<CInstance> v = {{"pineapple"}, {"jackfruit"}, {"mango"}, {"apple"}, {"banana"}};
display(v);
auto sorted = Sort(v);
display(sorted);
}
return 0;
}
In all of the above types, I can simply call the object and it extracts the data which looks like calling a default get() method. Is there a way to make objects of class CInstance trigger a methos, when used just alone.
example:
I could do something like
CInstance obj;
std::cout << obj;
And that will call a default method in CInstance what every it may be.
As already mentioned in the other answer you can create your own operator<< function:
std::ostream & operator<<(std::ostream &stream, const CInstance &obj) {
// stream << whatever you want to output
return stream;
}
You could also define a conversion operator. But you should think twice before you use them. They can lead to problems that are not easy to debug, especially when explicit is omitted. You generally should not use those for logging/debugging purposes. If your type represents a string and you use it to allow an easy conversion to an std::string then it might be fine.
#include <iostream>
#include <string>
class CInstance {
std::string str_ = "test";
public:
explicit operator const std::string () const { return str_; }
};
int main() {
CInstance obj;
std::cout << (std::string)obj << std::endl;
return 0;
}
If you can guarantee that the lifetime of the returned const char * is still valid after the call you could also do something like (but I would avoid that solution):
#include <iostream>
#include <string>
class CInstance {
std::string str_ = "test";
public:
operator const char *() const { return str_.c_str(); }
};
int main() {
CInstance t;
std::cout << t << std::endl;
return 0;
}
Personally, I would go with the first solution. But that really depends if you actually have a string representation of CInstance or if you want to display something for debugging purposes in a different format. I however would avoid the last non-explicit version with the const char * conversion operator.
In this exact case, you define an operator<< method like so:
std::ostream & operator<<(std::ostream &stream, const CInstance &obj) {
... output obj however you want to the stream. For instance:
stream << obj.getAge();
return stream;
}

Abstraction for bitset element iteration

I have a custom bitset class implementation in C++. I often iterate over the indexes of bits that are set in the bitset (i.e. for bitset '10011' I want to iterate over numbers 0, 3, 4.) This iteration can be implemented as follows:
struct Bitset {
uint64_t* data_;
size_t chunks_;
std::vector<int> Elements() const {
std::vector<int> ret;
for (size_t i=0;i<chunks_;i++){
uint64_t td = data_[i];
while (td) {
ret.push_back(i*BITS + __builtin_ctzll(td));
td &= ~-td;
}
}
return ret;
}
};
void Iterate(Bitset bitset) {
for (int b : bitset.Elements()) {
std::cout << "bit: " << b << std::endl;
}
}
The above implementation provides clean code for the iteration, but it involves an unnecessary heap allocation with the vector. The following version which essentially inlines the Elements() function is often faster:
void Iterate(Bitset bitset) {
int chunks = bitset.chunks_;
for (int i = 0; i < chunks; i++) {
uint64_t td = bitset.data_[i];
while (td) {
std::cout << "bit: " << i*BITS + __builtin_ctzll(td) << std::endl;
td &= ~-td;
}
}
}
What would be a good way to implement an abstraction for the iteration so that it would be as clean as the above version, but also with no performance cost.
Just iterate over your class. Provide your own implementation of an iterator class for your Bitset and provide begin() and end() methods. A simplest (untested!) implementation could look something like this:
#include <vector>
#include <cstdint>
#include <iostream>
struct Bitset {
uint64_t* data_;
size_t chunks_;
struct iterator {
uint64_t *pnt;
uint_fast8_t pos;
iterator(uint64_t *pnt, size_t pos) :
pnt(pnt), pos(pos) {}
bool operator !=(const iterator& o) {
return o.pnt != pnt || o.pos != pos;
}
void operator ++() {
pos++;
if (pos == 64) {
pnt++;
pos = 0;
}
}
bool operator *() {
return *pnt & (1 << pos);
}
};
iterator begin() { return iterator(data_, 0); }
iterator end() { return iterator(data_ + chunks_, 64); }
};
void Iterate(Bitset bitset) {
for (auto&& b : bitset) {
std::cout << "bit: " << b << std::endl;
}
}
I believe for your strange while (td) { ... i*BITS + __builtin_ctzll(td) ... loop that I don't understand that could be something along (untested!):
constexpr int BITS = 100000;
struct Bitset {
uint64_t* data_;
size_t chunks_;
struct iterator {
uint64_t *data_;
int i = 0;
uint64_t td = 0;
iterator(uint64_t *data_, int i, uint64_t td) :
data_(data_), i(i), td(td) {}
bool operator !=(const iterator& o) {
return o.data_ != data_ || o.i != i || o.td != td;
}
void operator ++() {
if (td == 0) {
td = *data_;
data_++;
} else {
td &= ~-td;
}
}
bool operator *() {
return i * BITS + __builtin_ctzll(td);
}
};
iterator begin() { return iterator(data_, 0, *data_); }
iterator end() { return iterator(data_ + chunks_, 0, 0); }
};
As KamilCuk suggested, I used an iterator to solve this problem. Now the implementation looks like:
struct Bitset {
uint64_t* data_;
size_t chunks_;
class BitsetIterator {
private:
const Bitset* const bitset_;
size_t pos_;
uint64_t tb_;
public:
BitsetIterator(const Bitset* const bitset, size_t pos, uint64_t tb) :
bitset_(bitset), pos_(pos), tb_(tb) { }
bool operator!=(const BitsetIterator& other) const {
return pos_ != other.pos_ || tb_ != other.tb_;
}
const BitsetIterator& operator++() {
tb_ &= ~-tb_;
while (tb_ == 0 && pos_ < bitset_->chunks_) {
pos_++;
if (pos_ < bitset_->chunks_) {
tb_ = bitset_->data_[pos_];
}
}
return *this;
}
int operator*() const {
return pos_*BITS + __builtin_ctzll(tb_);
}
};
BitsetIterator begin() const {
size_t pos = 0;
while (pos < chunks_ && data_[pos] == 0) {
pos++;
}
if (pos < chunks_) {
return BitsetIterator(this, pos, data_[pos]);
} else {
return BitsetIterator(this, pos, 0);
}
}
BitsetIterator end() const {
return BitsetIterator(this, chunks_, 0);
}
};
void Iterate(Bitset bitset) {
for (int b : bitset) {
std::cout << "bit: " << b << std::endl;
}
}
This avoids heap allocation and is much faster than the version that uses vector. I'm not sure if this provides exactly same performance as the version without any abstractions, but it should be very close.

OpenSSL easy way to modulo max data type size

Using OpenSSL 1.1.0 is there an easier way to reduce a bignum such that the result would be modulo, which fits into an unsigned long
What I would like to do is something like the following:
char p_str[] = "489133282872437279"; // A big prime
BIGNUM *p = BN_new();
BN_dec2bn(&p, p_str); // Prime converted into bignum
BIGNUM *a = BN_new();
BN_rand_range(a, p); // Find random number from 0 to prime
// Define max modulo for data type. Note the +1! wont work
BN_ULONG m = std::numeric_limits<unsigned long>::max() + 1;
// Reduce random number in field Z_p such that it fits into unsigned long
unsigned long result = BN_mod_word(a, m);
std::cout << result << std::endl;
Other way would be to define another BIGNUM
char m_str[] = "4294967296"; // Note that this is now 2**32, which we want
BIGNUM *m_2 = BN_new();
BN_dec2bn(&m_2, m_str);
And then do the calculation as:
BN_CTX *ctx = BN_CTX_new();
BIGNUM *remainder = BN_new();
BN_nnmod(remainder, a, m_2, ctx);
This would require now to transform resulting BN remainder now back into data type.
I was wondering if there is any easier way to do such reductions, so that the remainder fits into the data type.
The way I would approach this is to convert the BIGNUM to binary bytes and then convert those binary bytes to the unsigned integer type, modulo any extra un-necessary bytes.
The documentation to BN_bn2bin() says that the returned bytes are in big-endian format.
Here's my solution (along with a minimal c++ wrapper for BIGNUM):
#include <openssl/bn.h>
#include <limits>
#include <limits>
#include <stdexcept>
#include <utility>
#include <iostream>
#include <memory>
#include <vector>
#include <cstdint>
struct bn_failure : std::runtime_error
{
using runtime_error::runtime_error;
};
struct BigNum
{
BigNum()
: bn_(BN_new())
{
if (!bn_) throw bn_failure("BN_new()");
}
BigNum(const char* str)
: BigNum()
{
if (!bn_) throw bn_failure("BN_new()");
auto len = BN_dec2bn(&bn_, str); // Prime converted into bignum
if (len == 0) throw bn_failure("BN_dec2bn()");
}
BigNum(BigNum&& other) noexcept
: bn_(other.bn_)
{
other.bn_ = nullptr;
}
BigNum& operator=(BigNum&& other) noexcept
{
auto tmp = BigNum(std::move(other));
swap(tmp);
return *this;
}
BigNum(BigNum const& other)
: bn_(BN_dup(other.bn_))
{
if (!bn_) throw bn_failure("BN_dup()");
}
BigNum& operator=(BigNum const& other)
{
if(!BN_copy(bn_, other.bn_))
throw bn_failure("BN_copy()");
return *this;
}
~BigNum() noexcept
{
if (bn_)
{
BN_free(bn_);
}
}
void swap(BigNum& other) noexcept
{
std::swap(bn_, other.bn_);
}
void randRange(const BigNum& other)
{
BN_rand_range(bn_, other.bn_);
}
friend std::ostream& operator<<(std::ostream& os, BigNum const& bn)
{
return os << bn.as_text().get();
}
struct string_free {
void operator()(char *str) const noexcept {
OPENSSL_free(str);
}
};
auto as_text() const -> std::unique_ptr<char, string_free>
{
auto p = BN_bn2dec(bn_);
return { p, string_free() };
}
auto as_bytes() const -> std::vector<std::uint8_t>
{
auto bytes = BN_num_bytes(bn_);
auto result = std::vector<std::uint8_t>(bytes);
BN_bn2bin(bn_, result.data());
return result;
}
BIGNUM* bn_;
};
template<class Type>
auto be_bytes_to_unsigned(std::vector<std::uint8_t> const& vec) -> Type
{
auto result = Type(0);
auto first = rbegin(vec);
auto last = rend(vec);
auto count = std::size_t(std::distance(first, last));
count = std::min(sizeof(Type), count);
last = std::next(first , count);
int i = 0;
while (first != last)
{
auto b = *first++;
auto v = Type(b);
v <<= (i * std::numeric_limits<std::uint8_t>::digits);
++i;
result |= v;
}
return result;
}
int main()
{
auto p = BigNum("489133282872437279");
auto b = BigNum();
b.randRange(p);
std::cout << p << std::endl << b << std::endl;
auto modval = be_bytes_to_unsigned<unsigned long>(b.as_bytes());
std::cout << modval << std::endl;
}
sample output (64 bit long ints):
489133282872437279
281503461139622433
281503461139622433

Different return and coordinate types in nanoflann radius search

I'm trying to use nanoflann in a project and am looking at the vector-of-vector and radius search examples.
I can't find a way to perform a radius search with a different data type than the coordinate type. For example, my coordinates are vectors of uint8_t; I am trying to input a radius of type uint32_t with little success.
I see in the source that the metric_L2 struct (which I am using for distance) uses the L2_Adaptor with two template parameters. L2_Adaptor itself takes three parameters, with the third defaulted to the first, which seems to be the problem if I am understanding the code correctly. However, trying to force use of the third always results in 0 matches in the radius search.
Is there a way to do this?
Edit: In the same code below, everything works. However, if I change the search_radius (and ret_matches) to uint32_t, the radiusSearch method doesn't work.
#include <iostream>
#include <Eigen/Dense>
#include <nanoflann.hpp>
typedef Eigen::Matrix<uint8_t, Eigen::Dynamic, 1> coord_t;
using namespace nanoflann;
struct Point
{
coord_t address;
Point() {}
Point(uint8_t coordinates) : address(coord_t::Random(coordinates)) {}
};
struct Container
{
std::vector<Point> points;
Container(uint8_t coordinates, uint32_t l)
: points(l)
{
for(auto& each_location: points)
{
each_location = Point(coordinates);
}
}
};
struct ContainerAdaptor
{
typedef ContainerAdaptor self_t;
typedef nanoflann::metric_L2::traits<uint8_t, self_t>::distance_t metric_t;
typedef KDTreeSingleIndexAdaptor<metric_t, self_t, -1, size_t> index_t;
index_t *index;
const Container &container;
ContainerAdaptor(const int dimensions, const Container &container, const int leaf_max_size = 10)
: container(container)
{
assert(container.points.size() != 0 && container.points[0].address.rows() != 0);
const size_t dims = container.points[0].address.rows();
index = new index_t(dims, *this, nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size));
index->buildIndex();
}
~ContainerAdaptor()
{
delete index;
}
inline void query(const uint8_t *query_point, const size_t num_closest, size_t *out_indices, uint32_t *out_distances_sq, const int ignoreThis = 10) const
{
nanoflann::KNNResultSet<uint32_t, size_t, size_t> resultSet(num_closest);
resultSet.init(out_indices, out_distances_sq);
index->findNeighbors(resultSet, query_point, nanoflann::SearchParams());
}
const self_t& derived() const
{
return *this;
}
self_t& derived()
{
return *this;
}
inline size_t kdtree_get_point_count() const
{
return container.points.size();
}
inline size_t kdtree_distance(const uint8_t *p1, const size_t idx_p2, size_t size) const
{
size_t s = 0;
for (size_t i = 0; i < size; i++)
{
const uint8_t d = p1[i] - container.points[idx_p2].address[i];
s += d * d;
}
return s;
}
inline coord_t::Scalar kdtree_get_pt(const size_t idx, int dim) const
{
return container.points[idx].address[dim];
}
template <class BBOX>
bool kdtree_get_bbox(BBOX & bb) const
{
for(size_t i = 0; i < bb.size(); i++)
{
bb[i].low = 0;
bb[i].high = UINT8_MAX;
}
return true;
}
};
void container_demo(const size_t points, const size_t coordinates)
{
Container s(coordinates, points);
coord_t query_pt(coord_t::Random(coordinates));
typedef ContainerAdaptor my_kd_tree_t;
my_kd_tree_t mat_index(coordinates, s, 25);
mat_index.index->buildIndex();
const uint8_t search_radius = static_cast<uint8_t>(100);
std::vector<std::pair<size_t, uint8_t>> ret_matches;
nanoflann::SearchParams params;
const size_t nMatches = mat_index.index->radiusSearch(query_pt.data(), search_radius, ret_matches, params);
for (size_t i = 0; i < nMatches; i++)
{
std::cout << "idx[" << i << "]=" << +ret_matches[i].first << " dist[" << i << "]=" << +ret_matches[i].second << std::endl;
}
std::cout << std::endl;
std::cout << "radiusSearch(): radius=" << +search_radius << " -> " << +nMatches << " matches" << std::endl;
}
int main()
{
container_demo(1e6, 32);
return 0;
}
More info: so it seems that the distance type, which the third parameter of the L2_Adaptor, must be a signed type. Changing the metric_t typedef to the following solves the problem if search_radius and ret_matches are also changed to int64_t.
typedef L2_Adaptor<uint8_t, self_t, int64_t> metric_t;