How to build Perfect Hash in C++? - c++

I'd like to know how to build a Perfect Hash in C++.
Perfect Hash is such a hash that 1) Has no collisions at all, 2) Is built only for fixed set of values, 3) Maps set of N values to a range of numbers of 0 .. N * 1.23 - 1, i.e. it maps not to numbers till N, but till some bigger multiple of N, like N * 1.23.
I've read this Wiki article about Perfect Hash.
And decided to post this short question only to send my own Answer.
So I don't provide any Minimal Reproducible Example, only because answer is fully contained.

Suppose we have set S of N integer elements. We want to perfect-hash this set.
There are different ways of building perfect hash. But one way, according to Wiki, is following way:
First we choose some function g(x) = k * x mod p mod n, where P is some quite large prime. And K is some random constant. N is number of elements in a set.
Then we map through g(x) all elements of a set, these elements map to some integers in a range 0..N-1 which may collide. Collided integers form separate buckets.
We create infinite amount of Hash functions. For example in my below C++ code I use same as g(x) functions equal to Hash[i](x) = RandomConstant[i] * x mod Prime[i] mod M, where M = N * 1.23, here 1.23 is some small constant, it can be something like 1.2-1.5.
Each bucket B_i is hashed separately in such a way that it forms set K_i = Hash[l](x) for x in B_i, so that l is minimal and |K_i| = |B_i|, and K_i doesn't intersect with any previous K_i. Each minimal found l is stored as sigma(i) = l.
sigma(i) is compressed into bit vector in such a way that we can get value l = sigma(i) in O(1) time. Using any compact bit packing technique like Gamma Coding or Unary Coding.
Finally, to get perfect hash of value x we do PerfectHash(x) = Hash[sigma(g(x))](x).
Full code below. It generates N random numbers, then perfect hashes them and finally outputs amount of bits per number.
Try it online!
#include <cstdint>
#include <bit>
#include <vector>
#include <random>
#include <cstring>
#include <stdexcept>
#include <string>
#include <iostream>
#include <iomanip>
#include <algorithm>
#include <unordered_map>
#include <array>
#define ASSERT_MSG(cond, msg) { if (!(cond)) throw std::runtime_error("Assertion (" #cond ") failed at line " + std::to_string(__LINE__) + "! Msg: '" + std::string(msg) + "'."); }
#define ASSERT(cond) ASSERT_MSG(cond, "")
#define DASSERT_MSG(cond, msg) ASSERT_MSG(cond, msg)
#define DASSERT(cond) DASSERT_MSG(cond, "")
using u8 = uint8_t;
using u32 = uint32_t;
using u64 = uint64_t;
template <typename T>
bool IsPrime(T const & n) {
if (n % 2 == 0)
return false;
for (size_t d = 3; d * d <= n; d += 2)
if (n % d == 0)
return false;
return true;
}
template <typename T>
T NextPrime(T const & i) {
if (i <= 2) return 2;
for (T j = i | 1;; j += 2)
if (IsPrime(j))
return j;
}
class BitVector {
static size_t constexpr index_block = 1 << 9;
public:
BitVector() {}
BitVector(size_t size) : size_(size), bits_((size_ + 7) / 8) {}
void Clear() {
size_ = 0;
bits_.clear();
index_.clear();
}
size_t Size() const { return size_; }
bool Get(size_t i) const {
return (bits_[i / 8] >> (i % 8)) & u8(1);
}
void Set(size_t i, bool val = true) {
if (val)
bits_[i / 8] |= u8(1) << (i % 8);
else
bits_[i / 8] &= ~(u8(1) << (i % 8));
}
void Push(bool val) {
++size_;
if (size_ - 1 >= bits_.size() * 8)
bits_.resize((size_ + 7) / 8);
Set(size_ - 1, val);
}
void Index() {
index_.clear();
for (size_t i = 0; i < size_; i += index_block) {
size_t sum = 0;
size_t const portion = std::min(index_block, size_ - i);
for (size_t k = i; k < i + portion; k += 64)
if (i + portion - k >= 64)
sum += std::popcount(*(u64*)&bits_[k / 8]);
else {
u64 x = 0;
std::memcpy(&x, &bits_[k / 8], bits_.size() - k / 8);
sum += std::popcount(x);
}
index_.push_back(index_.empty() ? sum : (index_.back() + sum));
}
}
size_t Select1(size_t idx) const {
size_t const d = std::distance(index_.data(), std::upper_bound(index_.data(), index_.data() + index_.size(), idx));
ASSERT_MSG(d < index_.size(), "idx " + std::to_string(idx));
size_t const prev_sum = d == 0 ? 0 : index_[d - 1], hi = std::min<size_t>(size_, index_block * (d + 1));
size_t csum = 0, i = 0;
u64 word = 0;
for (i = index_block * d; i < hi; i += 64) {
size_t const portion = std::min<size_t>(hi - i, 64);
size_t word_sum = 0;
if (portion == 64)
word = *(u64*)&bits_[i / 8];
else {
word = 0;
std::memcpy(&word, &bits_[i / 8], bits_.size() - i / 8);
}
word_sum = std::popcount(word);
if (prev_sum + csum + word_sum > idx)
break;
csum += word_sum;
}
size_t sum0 = 0;
while (true) {
size_t const i1 = std::countr_zero(word);
ASSERT(i1 < 64);
if (prev_sum + csum + sum0 >= idx) {
ASSERT(prev_sum + csum + sum0 == idx);
ASSERT(Get(i + i1));
return i + i1;
}
word &= word - 1;
++sum0;
}
}
std::string Dump() const {
std::string r;
for (size_t i = 0; i < size_; ++i)
r.append(1, Get(i) ? '1' : '0');
return r;
}
u64 Word(size_t i) const {
return (*(u64*)&bits_[i / 8]) >> (i % 8);
}
private:
size_t size_ = 0;
std::vector<u8> bits_;
std::vector<size_t> index_;
};
class GammaBitVector {
static size_t constexpr index_block = 1 << 7;
public:
template <typename T>
void GammaEncodeVec(std::vector<T> const & nums) {
for (auto n: nums) {
auto [x, b] = GammaEncode(std::max<size_t>(n, 1) - 1);
//std::cout << n << ": " << b << " " << x << std::endl;
for (size_t i = 0; i < b; ++i) {
bv_.Push(bool(x & 1));
x >>= 1;
}
}
//std::cout << "GammaEncodedVec " << bv_.Size() << std::endl;
//std::cout << bv_.Dump() << std::endl << std::endl;
}
void Index() {
size_t i = 0, cnt = 0;
while (i < bv_.Size()) {
auto const [n, ebits, dbits] = GammaDecode(bv_.Word(i));
++cnt;
i += ebits;
if (cnt < index_block && i < bv_.Size())
continue;
index_.push_back(i);
cnt = 0;
}
}
size_t Get(size_t i) const {
size_t j = i / index_block * index_block, sum = i / index_block > 0 ? index_.at(i / index_block - 1) : 0;
while (sum < bv_.Size()) {
auto const [n, ebits, dbits] = GammaDecode(bv_.Word(sum));
if (j >= i)
return n + 1;
++j;
sum += ebits;
}
ASSERT(false);
}
size_t Size() const { return bv_.Size(); }
size_t GetBitOffset(size_t i) const {
size_t j = i / index_block * index_block, sum = i / index_block > 0 ? index_.at(i / index_block - 1) : 0;
while (sum < bv_.Size()) {
auto const [n, ebits, dbits] = GammaDecode(bv_.Word(sum));
if (j >= i)
return sum;
++j;
sum += ebits;
}
ASSERT(false);
}
private:
static u64 Shl(u64 w, size_t cnt) {
return cnt >= 64 ? u64(0) : (w << cnt);
}
static u64 Shr(u64 w, size_t cnt) {
return cnt >= 64 ? u64(0) : (w >> cnt);
}
static u64 Mask(size_t n) {
return n >= 64 ? ~u64(0) : (u64(1) << n) - 1;
}
static size_t NumBits(u64 n) {
return 64 - std::countl_zero(n);
}
static std::tuple<u64, size_t> GammaEncode(u64 n) {
++n;
DASSERT(n != 0);
size_t const nbits = NumBits(n);
static auto lo = []{
std::array<u32, 32> r{};
for (size_t i = 0; i < r.size(); ++i)
r[i] = u32(1) << i;
return r;
}();
size_t const rnbits = nbits - 1;
DASSERT(rnbits < lo.size());
return std::make_tuple((Shl(n & Mask(rnbits), nbits) | u64(lo[rnbits])), rnbits + nbits);
}
static std::tuple<u64, size_t, size_t> GammaDecode(u64 n) {
static size_t constexpr c_tab_bits = 8;
static auto tab = []{
std::array<u8, (1 << c_tab_bits)> r{};
for (size_t i = 0; i < r.size(); ++i) {
size_t j = i, sr = 0;
if (i == 0)
sr = 0xFF;
else
while (!bool(j & 1)) {
++sr;
j >>= 1;
}
r[i] = u8(sr);
}
return r;
}();
size_t cnt = tab[n & Mask(c_tab_bits)];
if (cnt == 0xFF) {
ASSERT(n != 0);
cnt = 0;
u64 m = n;
while (!bool(m & 1)) {
++cnt;
m >>= 1;
}
ASSERT(cnt <= 31);
}
return std::make_tuple(u64((((n >> (cnt + 1)) & Mask(cnt)) | (u64(1) << cnt)) - 1), size_t(2 * cnt + 1), size_t(cnt + 1));
}
BitVector bv_;
std::vector<size_t> index_;
};
class PerfectHash {
public:
// https://en.wikipedia.org/wiki/Perfect_hash_function
void Build(std::vector<u64> const & nums) {
size_t const n = nums.size();
m_ = 1.5 * n;
n_ = n;
primes_.clear();
primes_.push_back({rng_(), NextPrime(n_)});
primes_.push_back({rng_(), NextPrime(m_)});
std::vector<std::vector<size_t>> Bs(n);
for (size_t i = 0; i < n; ++i) {
Bs[g(nums[i])].push_back(nums[i]);
//std::cout << "i " << i << ": " << nums[i] << ": " << g(nums[i]) << ", ";
}
//std::cout << std::endl;
std::vector<u64> K;
BitVector Tb(m_);
std::vector<u32> sigma_l(n);
size_t max_bucket_size = 0;
for (size_t i = 0; i < n; ++i) {
auto const & B = Bs.at(i);
max_bucket_size = std::max<size_t>(max_bucket_size, B.size());
if (B.empty())
continue;
size_t l = 0;
for (l = 1; l < 10'000; ++l) {
bool exists = false;
K.clear();
for (size_t iB = 0; iB < B.size(); ++iB) {
auto const j = B[iB];
auto const h = HashFunc(l, j);
if (Tb.Get(h)) {
exists = true;
break;
}
for (auto k: K)
if (k == h) {
exists = true;
break;
}
if (exists)
break;
K.push_back(h);
}
if (!exists)
break;
}
ASSERT(l < 10'000);
sigma_l[i] = l;
for (auto j: K)
Tb.Set(j);
}
sigma_bv_.Clear();
//std::cout << "MaxBucket " << max_bucket_size << std::endl;
//std::cout << "Sigma: ";
for (size_t i = 0; i < sigma_l.size(); ++i) {
auto const l = sigma_l[i];
//std::cout << l << ", ";
sigma_bv_.Push(1);
for (size_t i = 0; i + 1 < l; ++i)
sigma_bv_.Push(0);
}
//std::cout << std::endl;
sigma_gbv_.GammaEncodeVec(sigma_l);
sigma_gbv_.Index();
//std::cout << "Sigma from GBV: ";
for (size_t i = 0; i < sigma_l.size(); ++i) {
//std::cout << sigma_gbv_.Get(i) << ", ";
ASSERT_MSG(std::max<size_t>(1, sigma_l[i]) == sigma_gbv_.Get(i), "i " + std::to_string(i) + " sigma_l " + std::to_string(std::max<size_t>(1, sigma_l[i])) + " sigma_Get(i) " + std::to_string(sigma_gbv_.Get(i)) + " sigma_GetOff(i) " + std::to_string(sigma_gbv_.GetBitOffset(i)) + " sigma_GetOff(i - 1) " + std::to_string(sigma_gbv_.GetBitOffset(i - 1)));
}
//std::cout << std::endl;
sigma_bv_.Index();
for (size_t i = 0; i < sigma_bv_.Size(); ++i) {
//std::cout << (sigma_bv_.Get(i) ? "1" : "0");
}
//std::cout << std::endl;
}
size_t Hash(u64 const & x) {
return HashFunc(Sigma(g(x)), x);
}
size_t NumBits() const {
return sigma_gbv_.Size();
}
size_t HashFunc(size_t i, u64 const & x) {
while (i >= primes_.size())
primes_.push_back({rng_(), NextPrime(primes_.back().second + 1)});
auto const [k, p] = primes_[i];
auto v = (k * x) % p;
size_t const mod = i == 0 ? n_ : m_;
while (v >= mod)
v -= mod;
return v;
}
size_t g(u64 const & x) {
return HashFunc(0, x);
}
size_t Sigma(size_t i) {
size_t const i1 = sigma_gbv_.Get(i);
//std::cout << "Sigma: " << i << ": " << i1 << std::endl;
return i1;
/*
size_t cnt = 0;
for (size_t i = i1 + 1, size = sigma_bv_.Size(); i < size; ++i, ++cnt) {
std::cout << i << " (" << std::boolalpha << sigma_bv_.Get(i) << "), ";
if (sigma_bv_.Get(i))
break;
}
std::cout << std::endl << "Val: " << (cnt + 1) << std::endl;
return cnt + 1;
*/
}
size_t N() const { return n_; }
size_t M() const { return m_; }
private:
std::mt19937_64 rng_{123};
size_t n_ = 0, m_ = 0;
BitVector sigma_bv_;
GammaBitVector sigma_gbv_;
std::vector<std::pair<u64, u64>> primes_;
};
int main() {
try {
std::mt19937_64 rng{123};
std::vector<u64> nums(1 << 17);
for (size_t i = 0; i < nums.size(); ++i)
nums[i] = rng();
PerfectHash ph;
ph.Build(nums);
std::cout << "Nums " << nums.size() << std::endl;
std::cout << "PerfectHash Bits " << ph.NumBits() << ", " << std::setprecision(3)
<< (double(ph.NumBits()) / nums.size()) << " bits/num" << std::endl;
std::unordered_map<u64, u64> hashes;
for (size_t i = 0; i < nums.size(); ++i) {
//std::cout << "i " << i << std::endl;
auto const hash = ph.Hash(nums[i]);
if (i < 16) {
//std::cout << nums[i] << ": " << hash << std::endl;
}
ASSERT(hash < ph.M());
ASSERT_MSG(!hashes.count(hash),
"i " + std::to_string(i) + " nums[i] " + std::to_string(nums[i]) +
" hash " + std::to_string(hash) + " g(x) " + std::to_string(ph.g(nums[i])) +
" sigma " + std::to_string(ph.Sigma(ph.g(nums[i]))) +
" hash_func " + std::to_string(ph.HashFunc(ph.Sigma(ph.g(nums[i])), nums[i])) +
" prev_i " + std::to_string(hashes.at(hash)) + " nums[hashes.at(hash)] " +
std::to_string(nums[hashes.at(hash)]) + " prev_g(x) " +
std::to_string(ph.g(nums[hashes.at(hash)])) + " prev_sigma " +
std::to_string(ph.Sigma(ph.g(nums[hashes.at(hash)]))) + " prev_hash_func " +
std::to_string(ph.HashFunc(ph.Sigma(ph.g(nums[hashes.at(hash)])), nums[hashes.at(hash)]))
);
hashes[hash] = i;
}
ASSERT(hashes.size() == nums.size());
return 0;
} catch (std::exception const & ex) {
std::cout << "Exception: " << ex.what() << std::endl;
return -1;
}
}
Console Output:
Nums 131072
PerfectHash Bits 244430, 1.86 bits/num

Related

How to avoid loop removal with -O3 and the performance impact of asm("") when benchmarking a C++ code block?

I'm trying to microbenchmark the following C++ code, compiled with the -O3 g++ compiler option for maximum performance:
long long x = 0;
int iterations = 1000000;
int load = 1000;
for(int i = 0; i < iterations; i++) {
long start = get_nano_ts(&ts);
for(int j = 0; j < load; j++) {
long p = (i % 8) * (i % 16);
if (i % 2 == 0) {
x += p;
} else {
x -= p;
}
asm(""); // so that the loop is not removed by -O3
}
long end = get_nano_ts(&ts);
int res = end - start - nanoTimeCost;
if (res <= 0) res = 1;
// (...) removed for clarity
}
cout << "Value computed: " << x << endl;
As you can see I'm using the asm("") instruction to prevent the compiler from turning my loop calculation into something else. How do I know it is turning my loop calculation into something else? That's because without the asm("") line, the value is calculated immediately and the program exits immediately, no matter how large I make the load variable.
So my question is: How can I use the -O3 compiler option and still prevent it from turning my loop into something else?
From this SO question, I got that I have to use asm("") for that. But then my question becomes: Wouldn't asm("") mess with my timing (add overhead) and/or prevent the compiler from doing other valid/good optimizations like inlining?
Another solution for me would be to come up with a for/loop code that cannot be translated to a straight mathematical formula by the compiler, in other words, some kind of mathematical computation that really requires a loop to be executed. Any suggestions?
Below the full C++ code I'm using:
#include <iostream>
#include <string>
#include <random>
#include <cmath>
#include <algorithm>
#include <limits>
#include <sys/time.h>
#include <map>
#include <sched.h>
#include <sstream>
#include <iomanip>
using namespace std;
// TO COMPILE: g++ TestJitter.cpp -o TestJitter -std=c++11 -O3
// TO EXECUTE: ./TestJitter 10000000 1000000 1000 1
static const bool MORE_PERCS = true;
static const bool INCLUDE_WORST_PERCS = true;
static const bool INCLUDE_TOTALS = true;
static const bool INCLUDE_RATIOS = false;
static const bool INCLUDE_STDEV = true;
static const bool EXCLUDE_NANO_TS_COST = true;
long get_nano_ts(timespec* ts) {
clock_gettime(CLOCK_MONOTONIC, ts);
return ts->tv_sec * 1000000000 + ts->tv_nsec;
}
static const long NANO_COST_ITERATIONS = 10000000;
static long calc_nano_ts_cost() {
struct timespec ts;
long start = get_nano_ts(&ts);
long finish = start;
for (long i = 0; i < NANO_COST_ITERATIONS; i++) {
finish = get_nano_ts(&ts);
}
finish = get_nano_ts(&ts);
return (finish - start) / NANO_COST_ITERATIONS;
}
struct mi {
long value;
};
void add_perc(stringstream& ss, int size, double perc, map<int, mi*>* map) {
if (map->empty()) return;
int max = -1;
int minBottom = -1;
long x = round(perc * size);
long i = 0;
long iBottom = 0;
long sum = 0;
long sumBottom = 0;
bool trueForTopFalseForBottom = true;
bool flag = false;
const int arraySize = 1024 * 1024 * 10;
int* tempData = new int[arraySize];
double stdevTop = -1;
for(auto iter = map->begin(); iter != map->end(); iter++) {
if (flag) break;
int time = iter->first;
long count = (iter->second)->value;
for(int a = 0; a < count; a++) {
if (trueForTopFalseForBottom) {
tempData[i] = time;
i++;
sum += time;
if (i == x) {
max = time;
if (INCLUDE_STDEV) {
double avg = (double) sum / (double) i;
double temp = 0;
for(int b = 0; b < i; b++) {
int t = tempData[b];
temp += (avg - t) * (avg - t);
}
stdevTop = sqrt(((double) temp / (double) i));
}
if (INCLUDE_WORST_PERCS) {
trueForTopFalseForBottom = false;
} else {
flag = true;
break;
}
}
} else {
tempData[iBottom] = time;
iBottom++;
sumBottom += time;
if (minBottom == -1) {
minBottom = time;
}
}
}
}
ss << " | " << fixed << setprecision(5) << (perc * 100) << "%";
if (INCLUDE_TOTALS) ss << " (" << i << ")";
ss << " = [avg: " << (sum / i);
if (INCLUDE_STDEV) ss << ", stdev: " << fixed << setprecision(2) << stdevTop;
ss << ", max: " << max << "]";
if (INCLUDE_WORST_PERCS) {
ss << " - " << fixed << setprecision(5) << ((1 - perc) * 100) << "%";
if (INCLUDE_TOTALS) ss << " (" << (iBottom > 0 ? iBottom : 0) << ")";
ss << " = [avg: " << (iBottom > 0 ? (sumBottom / iBottom) : -1);
if (INCLUDE_STDEV) {
ss << ", stdev: ";
if (iBottom <= 0) {
ss << "?";
} else {
double avgBottom = (sumBottom / iBottom);
double temp = 0;
for(int b = 0; b < iBottom; b++) {
long t = tempData[b];
temp += (avgBottom - t) * (avgBottom - t);
}
double stdevBottom = sqrt((double) temp / (double) iBottom);
ss << fixed << setprecision(2) << stdevBottom;
}
}
ss << ", min: " << (minBottom != -1 ? minBottom : -1) << "]";
if (INCLUDE_RATIOS) {
ss << " R: ";
ss << fixed << setprecision(2) << (iBottom > 0 ? (((sumBottom / iBottom) / (double) (sum / i)) - 1) * 100 : -1);
ss << "%";
}
}
delete[] tempData;
}
int main(int argc, char* argv[]) {
int iterations = stoi(argv[1]);
int warmup = stoi(argv[2]);
int load = stoi(argv[3]);
int proc = stoi(argv[4]);
cpu_set_t my_set;
CPU_ZERO(&my_set);
CPU_SET(proc, &my_set);
sched_setaffinity(0, sizeof(cpu_set_t), &my_set);
long nanoTimeCost = EXCLUDE_NANO_TS_COST ? calc_nano_ts_cost() : 0;
struct timespec ts;
long long x = 0;
long long totalTime = 0;
int minTime = numeric_limits<int>::max();
int maxTime = numeric_limits<int>::min();
map<int, mi*>* results = new map<int, mi*>();
for(int i = 0; i < iterations; i++) {
long start = get_nano_ts(&ts);
for(int j = 0; j < load; j++) {
long p = (i % 8) * (i % 16);
if (i % 2 == 0) {
x += p;
} else {
x -= p;
}
asm(""); // so that the loop is not removed by -O3
}
long end = get_nano_ts(&ts);
int res = end - start - nanoTimeCost;
if (res <= 0) res = 1;
if (i >= warmup) {
totalTime += res;
minTime = min(minTime, res);
maxTime = max(maxTime, res);
auto iter = results->find(res);
if (iter != results->end()) {
(iter->second)->value = (iter->second)->value + 1;
} else {
mi* elem = new mi();
elem->value = 1;
(*results)[res] = elem;
}
}
}
int count = iterations - warmup;
double avg = totalTime / count;
cout << "Value computed: " << x << endl;
cout << "Nano timestamp cost: " << nanoTimeCost << endl;
stringstream ss;
ss << "Iterations: " << count << " | Avg Time: " << avg;
if (INCLUDE_STDEV) {
long temp = 0;
long x = 0;
for(auto iter = results->begin(); iter != results->end(); iter++) {
int time = iter->first;
long count = (iter->second)->value;
for(int a = 0; a < count; a++) {
temp += (avg - time) * (avg - time);
x++;
}
}
double stdev = sqrt( temp / x );
ss << " | Stdev: " << fixed << setprecision(2) << stdev;
}
if (count > 0) {
ss << " | Min Time: " << minTime << " | Max Time: " << maxTime;
}
add_perc(ss, count, 0.75, results);
add_perc(ss, count, 0.90, results);
add_perc(ss, count, 0.99, results);
add_perc(ss, count, 0.999, results);
add_perc(ss, count, 0.9999, results);
add_perc(ss, count, 0.99999, results);
if (MORE_PERCS) {
add_perc(ss, count, 0.999999, results);
add_perc(ss, count, 0.9999999, results);
}
cout << ss.str() << endl << endl;
delete results;
return 0;
}

Program containing threading in cpp is not executed completely

Code given below is not executed completely;
I have looked for everything on web but I don't know why it is working for starting numbers from nums (i.e. 1000 and sometimes 5000) and after that it starts execution but in between program terminates itself and stopes working.
#include <bits/stdc++.h>
// #include <iostream>
// #include <chrono>
// #include <vector>
#define UPPER_LIMIT 10
using namespace std;
using namespace std::chrono;
bool inTimeLimit = true;
bool isFinished = false;
bool isRunning = true;
class Timer {
public:
time_point<high_resolution_clock> start, end;
Timer() {
start = high_resolution_clock::now();
}
~Timer() {
end = high_resolution_clock::now();
auto durationTime = durationCounter();
cout << "\n\nTaken time Duration " << (unsigned long long)durationTime << " us; " << (unsigned long long)durationTime * 0.001 << "ms.";
}
float durationCounter() {
auto currTime = high_resolution_clock::now();
auto durationTime = duration_cast<microseconds>(currTime - start);
return durationTime.count();
}
};
void printVector(vector <int> v) {
cout << endl;
for (int x : v) {
cout << setw(3) << x << " ";
}
}
void printVectorToFile(ofstream &fout , vector <int> v, string msg) {
fout << "\n\n===================\n\n";
fout << msg << endl;
fout << endl;
for (int x : v) {
fout << setw(5) << x << " ";
}
fout << endl;
}
void swap (int *a, int *b) {
int temp = *a;
*a = *b;
*b = temp;
}
vector <int> randomArrayGenerator(int n) {
vector<int> v(n);
for (int i = 0; i < n; ++i)
v[i] = i + 1;
srand(time(0));
for (int i = 0; i < n; ++i)
{
int pos = rand() % n;
swap(&v[i], &v[pos]);
}
return v;
}
string sortingChecker(vector<int> v) {
for (int i = 0; i < (int)v.size() - 1; ++i)
{
if (v[i] > v[i + 1]) return "false";
}
return "true";
}
bool sortChecker(vector<int> v) {
for (int i = 0; i < (int)v.size() - 1; ++i)
{
if (v[i] > v[i + 1]) return false;
}
return true;
}
// Merge function
void merge(vector <int> &v, int begin, int middle, int end) {
vector <int> left, right;
for (int i = begin; i < middle + 1; ++i)
{
left.push_back(v[i]);
}
for (int i = middle + 1; i <= end; ++i)
{
right.push_back(v[i]);
}
int p1 = 0, p2 = 0, n1 = left.size(), n2 = right.size(), p = begin;
while ((p1 < n1 ) || (p2 < n2)) {
if ((p1 != n1 ) && ((p2 == n2) || left[p1] < right[p2]))
v[p++] = left[p1++];
else
v[p++] = right[p2++];
}
}
void mergeSortByIteration(vector <int> &v, bool &isTimeDelayed) {
int low = 0, high = v.size();
cout << "Thread ID: " << this_thread::get_id() << endl;
// n :for taking individual block of vector containing number of elements n=[1,2,4,8,..]
for (int n = 1; n < high; n *= 2) {
if (isTimeDelayed) return;
// taking block according to n and then sorting them by merge function
// n=1 => i=0,2,4,8,16
// n=2 => i=0,4,8
for (int i = 0; i < high; i += 2 * n) {
if (isTimeDelayed) return;
int begin = i;
int mid = i + n - 1;
int end = min(i + 2 * n - 1 , high - 1);
merge(v, begin, mid, end);
}
}
}
// Merge by recurision
void mergeSortByRecursion (vector <int> &v, int begin, int end, bool &isTimeDelayed) {
if (end <= begin || isTimeDelayed) return;
int middle = begin + (end - begin) / 2;
mergeSortByRecursion(v, begin, middle, isTimeDelayed);
mergeSortByRecursion(v, middle + 1, end, isTimeDelayed);
merge(v, begin, middle, end);
}
int main() {
int nums[] = {1000, 5000, 10000, 50000, 100000};
// int nums[] = {50000};
ofstream vectorOutput ;
vectorOutput.open("outputTexts\\prac1_resultedArrays.txt", ios::trunc);;
for (int n : nums)
// ``````` Merge by Iteration ````````
{
vector<int> num, arr = randomArrayGenerator(n);
cout << "\n=======";
cout << "\n\nMerge by Iteration:" << endl;
num = arr;
cout << "Array size: " << num.size() << endl;
bool isTimeOut = false, isSorted = false;
Timer timer;
std::thread worker(mergeSortByIteration, ref(num), ref(isTimeOut));
// mergeSortByIteration(num, isTimeOut);
// std::thread worker(mergeSortByRecursion, ref(num), 0, n - 1, ref(isTimeOut));
while ( ( ( timer.durationCounter() / 1000000 ) < 5) && (!isSorted ) ) {
// this_thread::sleep_for(seconds(1));
// cout << timer.durationCounter() << " ";
isSorted = sortChecker(num);
}
if ( ( ( ( timer.durationCounter() / 1000000 ) > 5) && (!isSorted ) ) )
{
isTimeOut = true;
cout << endl << "!!!!!Execution Terminated ---- Time Limit reached!!!!!!" << endl;
}
if (worker.joinable())
worker.join();
printVector(num);
cout << "\nCheck result for sorted Vector:" << (isSorted ? "true" : "false") << endl;
// printVectorToFile(vectorOutput, num, "Merge By Iteration for size:" + to_string(n) );
}
cout << "\n\ndone" << endl;
return 0;
}
can anyone help me out here?
If issue is not clear fill free to ask.

print binary tree: incorrect leaves and spaces output

I have a problem with the output of a binary tree. The code that I have designed for the print of a tree, in which the elements have a length of 1 character, and in my tree elements are words (at most 10 characters). Tell me pls what should I change in the code to the correct output of tree.
My code:
int maxHeight(Node *p) {
if (!p) return 0;
int leftHeight = maxHeight(p->left);
int rightHeight = maxHeight(p->right);
return (leftHeight > rightHeight) ? leftHeight + 1 : rightHeight + 1;
}
void printBranches(int branchLen, int nodeSpaceLen, int startLen, int nodesInThisLevel, const deque<Node*>& nodesQueue) {
deque<Node*>::const_iterator iter = nodesQueue.begin();
for (int i = 0; i < nodesInThisLevel / 2; i++) {
cout << ((i == 0) ? setw(startLen - 1) : setw(nodeSpaceLen - 2)) << "" << ((*iter++) ? "/" : " ");
cout << setw(2 * branchLen + 2) << "" << ((*iter++) ? "\\" : " ");
}
cout << endl;
}
void printNodes(int branchLen, int nodeSpaceLen, int startLen, int nodesInThisLevel, const deque<Node*>& nodesQueue) {
deque<Node*>::const_iterator iter = nodesQueue.begin();
for (int i = 0; i < nodesInThisLevel; i++, iter++) {
cout << ((i == 0) ? setw(startLen) : setw(nodeSpaceLen)) << "" << ((*iter && (*iter)->left) ? setfill('_') : setfill(' '));
cout << setw(branchLen + 2);
if (*iter)
cout << (*iter)->data << "(" << (*iter)->frequency << ")";
else
cout << "";
cout << ((*iter && (*iter)->right) ? setfill('_') : setfill(' ')) << setw(branchLen) << "" << setfill(' ');
}
cout << endl;
}
void printLeaves(int indentSpace, int level, int nodesInThisLevel, const deque<Node*>& nodesQueue) {
deque<Node*>::const_iterator iter = nodesQueue.begin();
for (int i = 0; i < nodesInThisLevel; i++, iter++) {
cout << ((i == 0) ? setw(indentSpace + 2) : setw(2 * level + 2));
if (*iter)
cout << (*iter)->data << "(" << (*iter)->frequency << ")";
else
cout << "";
}
cout << endl;
}
void printPretty(Node *root, int level, int indentSpace) {
int h = maxHeight(root);
int nodesInThisLevel = 1;
int branchLen = 2 * ((int)pow(2.0, h) - 1) - (3 - level)*(int)pow(2.0, h - 1);
int nodeSpaceLen = 2 + (level + 1)*(int)pow(2.0, h);
int startLen = branchLen + (3 - level) + indentSpace;
deque<Node*> nodesQueue;
nodesQueue.push_back(root);
for (int r = 1; r < h; r++) {
printBranches(branchLen, nodeSpaceLen, startLen, nodesInThisLevel, nodesQueue);
branchLen = branchLen / 2 - 1;
nodeSpaceLen = nodeSpaceLen / 2 + 1;
startLen = branchLen + (3 - level) + indentSpace;
printNodes(branchLen, nodeSpaceLen, startLen, nodesInThisLevel, nodesQueue);
for (int i = 0; i < nodesInThisLevel; i++) {
Node *currNode = nodesQueue.front();
nodesQueue.pop_front();
if (currNode) {
nodesQueue.push_back(currNode->left);
nodesQueue.push_back(currNode->right);
}
else {
nodesQueue.push_back(NULL);
nodesQueue.push_back(NULL);
}
}
nodesInThisLevel *= 2;
}
printBranches(branchLen, nodeSpaceLen, startLen, nodesInThisLevel, nodesQueue);
printLeaves(indentSpace, level, nodesInThisLevel, nodesQueue);
}
as you can see, chars "\" and leaves are shifted by 3 positions to the left
http://i.stack.imgur.com/ADdGZ.png

Print heap array in tree format

So I've been trying to implement an algorithm to output a heap array in tree format. For
instance if I have an array like A[10,6,8,2,4,3,6,0,1,3,2,2,1,0,2] I would like the output to be:
10-----6-----2-----0
| | |--1
| |--4-----3
| |--2
|--8-----3-----2
| |--1
|--6-----0
|--2
Update: Solved my question, I made an answer with the code for those interested.
A possible solution is to insert placeholders into the array and thus form a MxN matrix out if it. Then you can simply loop over it, insert a line feed after every row and indent cells having a placeholder.
This C++11 program outputs heap in a little bit different format:
// 10
// ||--------------||
// 6 8
// ||------|| ||------||
// 2 4 3 6
//||--|| ||--|| ||--|| ||--||
// 0 1 3 2 2 1 0 2
#include<iostream>
#include<vector>
#include<sstream>
#include<string>
#include<cmath>
#include<iomanip>
// http://stackoverflow.com/questions/994593/how-to-do-an-integer-log2-in-c
// will be used to compute height of the heap
size_t IntegerLogarithm2(size_t arg) {
size_t logarithm = 0;
while (arg >>= 1) ++logarithm;
return logarithm;
}
// will be used to compute number of elements at the level i
size_t IntegerPower2(size_t arg) {
if(arg)
return (size_t)2 << (arg-1);
else
return 1;
}
// returns total line length for the level
size_t LineLength(size_t level, size_t item_width, size_t spaces_between) {
return IntegerPower2(level) * (item_width + spaces_between) - spaces_between;
}
int main()
{
// The input heap array
std::vector<int> A = {10, 6, 8, 2, 4, 3, 6, 0, 1, 3, 2, 2, 1, 0, 2};
// The heap array split by levels
std::vector<std::vector<int> > levels;
// Height of the heap
size_t levels_number = IntegerLogarithm2(A.size() + 1);
levels.resize(levels_number);
// Now fill the levels
for (size_t i = 0; i < levels.size(); ++i) {
size_t elements_number = IntegerPower2(i);
levels[i].resize(elements_number);
for (size_t j = elements_number - 1, p = 0; p < elements_number; ++j, ++p)
levels[i][p] = A[j];
}
if (levels_number < 1) return 0;
int magnitude = (abs(A[0]) <= 1 ? 1 : abs(A[0]));
size_t tab_width = (size_t)floor(log(double(magnitude)) / log(10.0)) + 1;
// size_t longest_line = LineLength(levels_number - 1, tab_width, tab_width);
std::vector<std::string> text;
text.reserve(levels_number * 2 - 1);
// Do the aligned output to the strings array
for (size_t i = 0; i < levels_number; ++i) {
size_t outer_space_width = IntegerPower2(levels_number - 1 - i) - 1;
size_t inner_space_width = outer_space_width * 2 + 1;
std::string outer_space(outer_space_width * tab_width, ' ');
std::string inner_space(inner_space_width * tab_width, ' ');
std::ostringstream line;
line << outer_space;
if (i > 0) {
std::ostringstream branchline;
std::string joint(tab_width, '|');
std::string branch(inner_space_width * tab_width, '-');
branchline << outer_space;
if (levels[i].size() > 0) {
branchline << joint;
}
bool isline = true;
for (size_t j = 1; j < levels[i].size(); ++j, isline = !isline) {
if(isline)
branchline << branch << joint;
else
branchline << inner_space << std::setfill(' ') <<
std::setw(tab_width) << joint;
}
branchline << outer_space;
text.push_back(branchline.str());
}
if (levels[i].size() > 0) {
line << std::setfill(' ') << std::setw(tab_width) << levels[i][0];
}
for (size_t j = 1; j < levels[i].size(); ++j) {
line << inner_space << std::setfill(' ') <<
std::setw(tab_width) << levels[i][j];
}
line << outer_space;
text.push_back(line.str());
}
// Output the text
for (auto& i : text)
std::cout << i << std::endl;
return 0;
}
Yap, harder than it initially seemed. Effectively does what Sebastian Dressler proposed.
Here is the final implementation. Formatting scales with number length.
#include <string>
#include <vector>
#include <iostream>
#include <algorithm>
std::string do_padding (unsigned index, unsigned mlength){
std::string padding;
if (int((index-1)/2) != 0){
return (int((index-1)/2) % 2 == 0) ?
(do_padding(int((index-1)/2),mlength) + std::string(mlength+4,' ') + " ") :
(do_padding(int((index-1)/2),mlength) + std::string(mlength+3,' ') + " |") ;
}
return padding;
}
void printer (std::vector<int> const & tree, unsigned index, unsigned mlength){
auto last = tree.size() - 1 ;
auto left = 2 * index + 1 ;
auto right = 2 * index + 2 ;
std::cout << " " << tree[index] << " ";
if (left <= last){
auto llength = std::to_string(tree[left]).size();
std::cout << "---" << std::string(mlength - llength,'-');
printer(tree,left,mlength);
if (right <= last) {
auto rlength = std::to_string(tree[right]).size();
std::cout << "\n" << do_padding(right,mlength) << std::string(mlength+ 3,' ') << " | ";
std::cout << "\n" << do_padding(right,mlength) << std::string(mlength+ 3,' ') << " └" <<
std::string(mlength - rlength,'-');
printer(tree,right,mlength);
}
}
}
void print_tree (std::vector<int> & tree){
unsigned mlength = 0;
for (int & element : tree){
auto clength = std::to_string(element).size();
if (clength > mlength) {
mlength = std::to_string(element).size();
}
}
std::cout << std::string(mlength- std::to_string(tree[0]).size(),' ');
printer(tree,0,mlength);
}
int main() {
std::vector<int> test;
for(auto i =0; i<50; ++i){
test.push_back(rand() % 1000 + 1);
}
std::make_heap(test.begin(),test.end());
std::cout << "\n";
print_tree(test);
}

Optimizing conversion algorithm

I have recently been working on a exercise in a book I have been reading. The task was to create a program that prints all the numbers between 1-256 in their binary, octal and hexadecimal equivalents. We were only supposed to use methods we had learned so far in the book, which meant only using for, while and do..while loops, if and else if statements, converting integers to ASCII equivalents and some more basic stuff (e.g. cmath and iomanip).
So after some work, here is my result. However, it is messy and un-elegant and obfuscated. Does anyone have any suggestions to increase code efficiency (or elegance... :P) and performance?
#include <iostream>
#include <iomanip>
#include <cmath>
using namespace std;
int main()
{
int decimalValue, binaryValue, octalValue, hexadecimalValue, numberOfDigits;
cout << "Decimal\t\tBinary\t\tOctal\t\tHexadecimal\n\n";
for (int i = 1; i <= 256; i++)
{
binaryValue = 0;
octalValue = 0;
hexadecimalValue = 0;
if (i != 0)
{
int x, j, e, c, r = i, tempBinary, powOfTwo, tempOctal, tempDecimal;
for (j = 0; j <=8; j++) //Starts to convert to binary equivalent
{
x = pow(2.0, j);
if (x == i)
{
powOfTwo = 1;
binaryValue = pow(10.0, j);
break;
}
else if (x > i)
{
powOfTwo = 0;
x /= 2;
break;
}
}
if (powOfTwo == 0)
{
for (int k = j-1; k >= 0; k--)
{
if ((r-x)>=0)
{
r -= x;
tempBinary = pow(10.0, k);
x /= 2;
}
else if ((r-x)<0)
{
tempBinary = 0;
x /= 2;
}
binaryValue += tempBinary;
}
} //Finished converting
int counter = ceil(log10(binaryValue+1)); //Starts on octal equivalent
int iter;
if (counter%3 == 0)
{
iter = counter/3;
}
else if (counter%3 != 0)
{
iter = (counter/3)+1;
}
c = binaryValue;
for (int h = 0; h < iter; h++)
{
tempOctal = c%1000;
int count = ceil(log10(tempOctal+1));
tempDecimal = 0;
for (int counterr = 0; counterr < count; counterr++)
{
if (tempOctal%10 != 0)
{
e = pow(2.0, counterr);
tempDecimal += e;
}
tempOctal /= 10;
}
octalValue += (tempDecimal * pow(10.0, h));
c /= 1000;
}//Finished Octal conversion
cout << i << "\t\t" << binaryValue << setw(21-counter) << octalValue << "\t\t";
int c1, tempHex, tempDecimal1, e1, powOf;
char letter;
if (counter%4 == 0)//Hexadecimal equivalent
{
iter = counter/4;
}
else if (counter%4 != 0)
{
iter = (counter/4)+1;
}
c1 = binaryValue;
for (int h = 0, g = iter-1; h < iter; h++, g--)
{
powOf = g*4;
if (h == 0)
{
tempHex = c1 / pow(10.0, powOf);
}
else if (h > 0)
{
tempHex = c1 / pow(10.0, powOf);
tempHex %= 10000;
}
int count = ceil(log10(tempHex+1));
tempDecimal1 = 0;
for (int counterr = 0; counterr < count; counterr++)
{
if (tempHex%10 != 0)
{
e1 = pow(2.0, counterr);
tempDecimal1 += e1;
}
tempHex /= 10;
}
if (tempDecimal1 <= 9)
{
cout << tempDecimal1;
}
else if (tempDecimal1 > 9)
{
cout << char(tempDecimal1+55); //ASCII's numerical value for A is 65. Since 10-15 are supposed to be letters you just add 55
}
}
cout << endl;
}
}
system("pause");
return 0;
}
Any recommendations for improvement will be appreciated.
You have already covered 'iomanip', which infers you've already covered 'iostream'.
If that's the case, have a look at the following:
#include <iostream>
#include <iomanip>
using namespace std;
int x = 250;
cout << dec << x << " "
<< oct << x << " "
<< hex << x << "\n"
<< x << "\n"; // This will still be in HEX
Break out the functions for each output type, then loop through the integer list and output each in turn by calling the function for each different format.
for (int i = 1; i <= 256; ++i)
{
printBin(i);
printHex(i);
printOct(i);
}
Fundamental problem is that a function this long needs refactoring to be more modular. Imagine you are writing the code for someone else to use. How can they call your main? How do they understand what each section of code is doing? They can't. If you make each section of code that has a particular job to do callable as a function then it's easier to understand its intent, and to reuse later.
Have you considered writing a general function that works with any base?
Converting a non-negative number to a generic base is simple... you just need to compute number % base and you get the least significant digit, then divide number by base and repeat to get other digits...
std::string converted_number;
do {
int digit = number % base;
converted_number = digits[digit] + converted_number;
number = number / base;
} while (number != 0);
Once you have a generic conversion function then solving your problem is easy... just call it with base=2, 8 and 16 to get the results you need as strings.
My answer may be a bit tongue in cheek, but
printf ("%u %o %x \n", value, value, value);
will do the trick for the octal and hexadecimal versions ;)
For the binary version, i'd use a flag initialized to 256, and compare it to your number with the AND operator. If true, print a 1, if not, print a 0. Then divide the flag by two. Repeat until the flag is 1.
Pseudocode for the conversion from integer to binary
int flag = 256
do
{
if (flag && value)
print "1"
else
print "0"
flag = flag >> 1 // aka divide by two, if my memory serves well
} while flag > 1
For the octal and hex values, i'm a bit rusty but looking around should guide you to samples you may adapt
Why make it any harder than it really is.
for (int i = 1; i <= 256; ++i)
{
std::cout << std::dec << i << "\t" << std::oct << i << "\t" << std::hex << i << std::endl;
}
Try this
using namespace std;
template <typename T>
inline void ShiftMask(T& mask) {
mask = (mask >> 1) & ~mask;
}
template < typename T >
std::ostream& bin(T& value, std::ostream &o)
{
T mask = 1 << (sizeof(T) * 8 - 1);
while (!(value & mask) && (mask != 0)) ShiftMask(mask);
while (mask) {
o << (value & mask ? '1' : '0');
ShiftMask(mask);
}
return o;
}
int main(void) {
for (int i=0; i<256;i++) {
bin(a, std::cout);
cout << " " << oct << i;
cout << " " << dec << i;
cout << " " << hex << i;
cout << ""
}
}
Maybe something like this?
#include "stdio.h"
int main(){
char Chars[16]= {48,49,50,51,52,53,54,55,56,57,65,66,67,68,69,70};
for(int n = 1;n != 256; n++)
{
{//decimal
printf("%i\t", n);
}
{//Hexadecimal
char L, R;
R = (n & 0x0F) >> 0;
L = (n & 0xF0) >> 4;
printf("%c%c\t", Chars[L], Chars[R]);
}
{//Octal
char L, M, R;
R = (n & 0x07) >> 0;
M = (n & 0x38) >> 3;
L = (n & 0xC0) >> 6;
printf("%c%c%c\t", Chars[L], Chars[M], Chars[R]);
}
{//Binary
char B0, B1, B2, B3, B4, B5, B6, B7;
B0 = (n & 0x01) >> 0;
B1 = (n & 0x02) >> 1;
B2 = (n & 0x04) >> 2;
B3 = (n & 0x08) >> 3;
B4 = (n & 0x10) >> 4;
B5 = (n & 0x20) >> 5;
B6 = (n & 0x40) >> 6;
B7 = (n & 0x80) >> 7;
printf("%c%c%c%c%c%c%c%c\n", Chars[B0], Chars[B1], Chars[B2], Chars[B3], Chars[B4], Chars[B5], Chars[B6], Chars[B7]);
}
printf("256\t100\t400\t100000000\n");
}
}