I've tested the code from [1] to remove items from a Boost Rtree. However, it is quite slow. It takes about 1s to insert 1M records and 12s to remove them.
Can it be done faster?
Here is the test code:
#include <vector>
#include <iostream>
#include <stdio.h>
#include <boost/geometry.hpp>
#include <boost/geometry/index/rtree.hpp>
#include <boost/timer.hpp>
struct Rect
{
Rect() {}
Rect(int a_minX, int a_minY, int a_maxX, int a_maxY)
{
min[0] = a_minX;
min[1] = a_minY;
max[0] = a_maxX;
max[1] = a_maxY;
}
int min[2];
int max[2];
};
int main()
{
// randomize rectangles
std::vector<Rect> rects;
for (size_t i = 0 ; i < 1000000 ; ++i)
{
int min_x = rand() % 100000;
int min_y = rand() % 100000;
int w = 1 + rand() % 100;
int h = 1 + rand() % 100;
rects.push_back(Rect(min_x, min_y, min_x+w, min_y+h));
}
// create the rectangle passed into the query
Rect search_rect(4, 4, 6, 6);
// create the Boost.Geometry R-tree
namespace bg = boost::geometry;
namespace bgi = boost::geometry::index;
typedef bg::model::point<double, 2, bg::cs::cartesian> point_t;
typedef bg::model::box<point_t> box_t;
typedef std::pair<box_t, uint64_t> value_t;
bgi::rtree<value_t, bgi::quadratic<8,4> > bg_tree;
{
boost::timer t;
for(size_t i = 0; i < rects.size(); i++)
{
Rect const& r = rects[i];
box_t b(point_t(r.min[0], r.min[1]), point_t(r.max[0], r.max[1]));
bg_tree.insert(value_t(b, i));
}
double s = t.elapsed();
std::cout << s << " Boost insert time" << std::endl;
}
// test BG Rtree
{
std::vector<value_t> res;
box_t search_box(
point_t(search_rect.min[0], search_rect.min[1]),
point_t(search_rect.max[0], search_rect.max[1]));
size_t sum = 0;
boost::timer t;
for (size_t i = 0 ; i < 10000 ; ++i)
{
res.clear();
sum += bg_tree.query(bgi::intersects(search_box), std::back_inserter(res));
}
double s = t.elapsed();
std::cout << s << " Boost query " << sum << std::endl;
}
{
boost::timer t;
std::cout << "Tree contains " << bg_tree.size() << " box-id values." << std::endl;
while (!bg_tree.empty()) {
// 1. Choose arbitrary BoxIdPair to be the leader of a new canopy.
// Remove it from the tree. Insert it into the canopy map, with its
// corresponding id.
Rect const& r = rects[rects.size()-1];
point_t origin(r.min[0], r.min[0]);
rects.pop_back();
auto first = bgi::qbegin(bg_tree, bgi::nearest(origin, 1)),
last = bgi::qend(bg_tree);
if (first != last) {
bg_tree.remove(*first); // assuming single result
}
}
double s = t.elapsed();
std::cout << s << " Boost tree emptied " << std::endl;
}
}
Example output:
1.09421 Boost insert time
0.000371 Boost query 0
Tree contains 1000000 box-id values.
12.7237 Boost tree emptied
[1] Issue with removing points from a boost::geometry::index::rtree
Related
**it works at the beggining, adds 16 and 4 but then it doesn't add 2 + 3 and somehow even gets 8.
what am I doing wrong?**
I don't really understand how to count it separately
#include <iostream>
#include <iomanip>
using namespace std;
int main() {
int a = 0;
int my_vector[][2] = { {16,2}, {4, 3} };
for (int r = 0; r < 2; r++)
{
for (int c = 0; c < 2; c++)
{
std::cout << " " << my_vector[r][c] << "\t";
a = *my_vector[r] + *my_vector[c];
}
std::cout << "\n" << a;
std::cout << "\n";
} ```
[1]: https://i.stack.imgur.com/RbTcO.png
Here you dereference the first element in my_vector row r and add that to the dereferenced first element in my_vactor row c:
a = *my_vector[r] + *my_vector[c];
Then the result stored in a is overwritten in the next loop.
You probably meant:
a += my_vector[r][c]; // or: a = a + my_vector[r][c];
Another approach could be to use std::for_each and std::accumulate:
#include <algorithm> // std::for_each
#include <numeric> // std::accumulate
#include <iostream>
#include <iterator> // std::begin, std::end
int main() {
int a = 0;
int my_vector[][2] = { {16,2}, {4, 3} };
// for_each loops over the rows
std::for_each(std::begin(my_vector), std::end(my_vector), [&a](auto& row) {
// and accumulate sums up everything in columns in that row
a += std::accumulate(std::begin(row), std::end(row), 0);
});
std::cout << "\n" << a << '\n';
}
I'd like to sample from a discrete distribution without replacement (i.e., without repetition).
With the function discrete_distribution, it is possible to sample with replacement. And, with this function, I implemented sampling without replacement in a very rough way:
#include <iostream>
#include <random>
#include <vector>
#include <array>
int main()
{
const int sampleSize = 8; // Size of the sample
std::vector<double> weights = {2,2,1,1,2,2,1,1,2,2}; // 10 possible outcome with different weights
std::random_device rd;
std::mt19937 generator(rd());
/// WITH REPLACEMENT
std::discrete_distribution<int> distribution(weights.begin(), weights.end());
std::array<int, 10> p ={};
for(int i=0; i<sampleSize; ++i){
int number = distribution(generator);
++p[number];
}
std::cout << "Discrete_distribution with replacement:" << std::endl;
for (int i=0; i<10; ++i)
std::cout << i << ": " << std::string(p[i],'*') << std::endl;
/// WITHOUT REPLACEMENT
p = {};
for(int i=0; i<sampleSize; ++i){
std::discrete_distribution<int> distribution(weights.begin(), weights.end());
int number = distribution(generator);
weights[number] = 0; // the weight associate to the sampled value is set to 0
++p[number];
}
std::cout << "Discrete_distribution without replacement:" << std::endl;
for (int i=0; i<10; ++i)
std::cout << i << ": " << std::string(p[i],'*') << std::endl;
return 0;
}
Have you ever coded such sampling without replacement? Probably in a more optimized way?
Thank you.
Cheers,
T.A.
This solution might be a bit shorter. Unfortunately, it needs to create a discrete_distribution<> object in every step, which might be prohibitive when drawing a lot of samples.
#include <iostream>
#include <boost/random/discrete_distribution.hpp>
#include <boost/random/mersenne_twister.hpp>
using namespace boost::random;
int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 };
discrete_distribution<> dist(w);
int n = 10;
boost::random::mt19937 gen;
std::vector<int> samples;
for (auto i = 0; i < n; i++) {
samples.push_back(dist(gen));
w[*samples.rbegin()] = 0;
dist = discrete_distribution<>(w);
}
for (auto iter : samples) {
std::cout << iter << " ";
}
return 0;
}
Improved answer:
After carefully looking for a similar question on this site (Faster weighted sampling without replacement), I found a stunningly simple algorithm for weighted sampling without replacement, it is just a bit complicated to implement in C++. Note, that this is not the most efficient algorithm, but it seems to me the simplest one to implement.
In https://doi.org/10.1016/j.ipl.2005.11.003 the method is described in detail.
Especially, it is not efficient if the sample size is much smaller than the basic population.
#include <iostream>
#include <iterator>
#include <boost/random/uniform_01.hpp>
#include <boost/random/mersenne_twister.hpp>
using namespace boost::random;
int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 10 };
uniform_01<> dist;
boost::random::mt19937 gen;
std::vector<double> vals;
std::generate_n(std::back_inserter(vals), w.size(), [&dist,&gen]() { return dist(gen); });
std::transform(vals.begin(), vals.end(), w.begin(), vals.begin(), [&](auto r, auto w) { return std::pow(r, 1. / w); });
std::vector<std::pair<double, int>> valIndices;
size_t index = 0;
std::transform(vals.begin(), vals.end(), std::back_inserter(valIndices), [&index](auto v) { return std::pair<double,size_t>(v,index++); });
std::sort(valIndices.begin(), valIndices.end(), [](auto x, auto y) { return x.first > y.first; });
std::vector<int> samples;
std::transform(valIndices.begin(), valIndices.end(), std::back_inserter(samples), [](auto v) { return v.second; });
for (auto iter : samples) {
std::cout << iter << " ";
}
return 0;
}
Easier answer
I just removed some of the STL functions and replaced it with simple for loops.
#include <iostream>
#include <iterator>
#include <boost/random/uniform_01.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <algorithm>
using namespace boost::random;
int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 1000 };
uniform_01<> dist;
boost::random::mt19937 gen(342575235);
std::vector<double> vals;
for (auto iter : w) {
vals.push_back(std::pow(dist(gen), 1. / iter));
}
// Sorting vals, but retain the indices.
// There is unfortunately no easy way to do this with STL.
std::vector<std::pair<int, double>> valsWithIndices;
for (size_t iter = 0; iter < vals.size(); iter++) {
valsWithIndices.emplace_back(iter, vals[iter]);
}
std::sort(valsWithIndices.begin(), valsWithIndices.end(), [](auto x, auto y) {return x.second > y.second; });
std::vector<size_t> samples;
int sampleSize = 8;
for (auto iter = 0; iter < sampleSize; iter++) {
samples.push_back(valsWithIndices[iter].first);
}
for (auto iter : samples) {
std::cout << iter << " ";
}
return 0;
}
The existing answer by Aleph0 works the best of the ones I tested. I tried benchmarking the original solution, the one added by Aleph0, and a new one where you only make a new discrete_distribution when the existing one is over 50% already added items (redrawing when distribution produces an item already in the sample).
I tested with sample size == population size, and weights equal the index. I think the original solution in the question runs in O(n^2), my new one runs in O(n logn) and the one from the paper seems to run in O(n).
-------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------
BM_Reuse 25252721 ns 25251731 ns 26
BM_NewDistribution 17338706125 ns 17313620000 ns 1
BM_SomePaper 6789525 ns 6779400 ns 100
Code:
#include <array>
#include <benchmark/benchmark.h>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_01.hpp>
#include <iostream>
#include <iterator>
#include <random>
#include <vector>
const int sampleSize = 20000;
using namespace boost::random;
static void BM_ReuseDistribution(benchmark::State &state) {
std::vector<double> weights;
weights.resize(sampleSize);
for (auto _ : state) {
for (int i = 0; i < sampleSize; i++) {
weights[i] = i + 1;
}
std::random_device rd;
std::mt19937 generator(rd());
int o[sampleSize];
std::discrete_distribution<int> distribution(weights.begin(),
weights.end());
int numAdded = 0;
int distSize = sampleSize;
for (int i = 0; i < sampleSize; ++i) {
if (numAdded > distSize / 2) {
distSize -= numAdded;
numAdded = 0;
distribution =
std::discrete_distribution<int>(weights.begin(), weights.end());
}
int number = distribution(generator);
if (!weights[number]) {
i -= 1;
continue;
} else {
weights[number] = 0;
o[i] = number;
numAdded += 1;
}
}
}
}
BENCHMARK(BM_ReuseDistribution);
static void BM_NewDistribution(benchmark::State &state) {
std::vector<double> weights;
weights.resize(sampleSize);
for (auto _ : state) {
for (int i = 0; i < sampleSize; i++) {
weights[i] = i + 1;
}
std::random_device rd;
std::mt19937 generator(rd());
int o[sampleSize];
for (int i = 0; i < sampleSize; ++i) {
std::discrete_distribution<int> distribution(weights.begin(),
weights.end());
int number = distribution(generator);
weights[number] = 0;
o[i] = number;
}
}
}
BENCHMARK(BM_NewDistribution);
static void BM_SomePaper(benchmark::State &state) {
std::vector<double> w;
w.resize(sampleSize);
for (auto _ : state) {
for (int i = 0; i < sampleSize; i++) {
w[i] = i + 1;
}
uniform_01<> dist;
boost::random::mt19937 gen;
std::vector<double> vals;
std::generate_n(std::back_inserter(vals), w.size(),
[&dist, &gen]() { return dist(gen); });
std::transform(vals.begin(), vals.end(), w.begin(), vals.begin(),
[&](auto r, auto w) { return std::pow(r, 1. / w); });
std::vector<std::pair<double, int>> valIndices;
size_t index = 0;
std::transform(
vals.begin(), vals.end(), std::back_inserter(valIndices),
[&index](auto v) { return std::pair<double, size_t>(v, index++); });
std::sort(valIndices.begin(), valIndices.end(),
[](auto x, auto y) { return x.first > y.first; });
std::vector<int> samples;
std::transform(valIndices.begin(), valIndices.end(),
std::back_inserter(samples),
[](auto v) { return v.second; });
}
}
BENCHMARK(BM_SomePaper);
BENCHMARK_MAIN();
Thanks for your question and others' nice answer, I meet a same qustion as you. I think you needn't new distribution every time, instead of
dist.param({ wts.begin(), wts.end() });
complete codes are as follows:
//STL改进方案
#include <iostream>
#include <vector>
#include <random>
#include <iomanip>
#include <map>
#include <set>
int main()
{
//随机数引擎采用默认引擎
std::default_random_engine rng;
//随机数引擎采用设备熵值保证随机性
auto gen = std::mt19937{ std::random_device{}() };
std::vector<int> wts(24); //存储权重值
std::vector<int> in(24); //存储总体
std::set<int> out; //存储抽样结果
std::map<int, int> count; //输出计数
int sampleCount = 0; //抽样次数计数
int index = 0; //抽取的下标
int sampleSize = 24; //抽取样本的数量
int sampleTimes = 100000; //抽取样本的次数
//权重赋值
for (int i = 0; i < 24; i++)
{
wts.at(i) = 48 - 2 * i;
}
//总体赋值并输出
std::cout << "总体为24个:" << std::endl;
//赋值
for (int i = 0; i < 24; i++)
{
in.at(i) = i + 1;
std::cout << in.at(i) << " ";
}
std::cout << std::endl;
//产生按照给定权重的离散分布
std::discrete_distribution<size_t> dist{ wts.begin(), wts.end() };
auto probs = dist.probabilities(); // 返回概率计算结果
//输出概率计算结果
std::cout << "总体中各数据的权重为:" << std::endl;
std::copy(probs.begin(), probs.end(), std::ostream_iterator<double>
{ std::cout << std::fixed << std::setprecision(5), “ ”});
std::cout << std::endl << std::endl;
//==========抽样测试==========
for (size_t j = 0; j < sampleTimes; j++)
{
index = dist(gen);
//std::cout << index << “ ”; //输出抽样结果
count[index] += 1; //抽样结果计数
}
double sum = 0.0; //用于概率求和
//输出抽样结果
std::cout << "总共抽样" << sampleTimes << "次," << "各下标的频数及频率为:" << std::endl;
for (size_t i = 0; i < 24; i++)
{
std::cout << i << "共有" << count[i] << "个 频率为:" << count[i] / double(sampleTimes) << std::endl;
sum += count[i] / double(sampleTimes);
}
std::cout << "总频率为:" << sum << std::endl << std::endl; //输出总概率
//==========抽样测试==========
//从总体中抽样放入集合中,直至集合大小达到样本数
while (out.size() < sampleSize - 1)
{
index = dist(gen); //抽取下标
out.insert(index); //插入集合
sampleCount += 1; //抽样次数增加1
wts.at(index) = 0; //将抽取到的下标索引的权重设置为0
dist.param({ wts.begin(), wts.end() });
probs = dist.probabilities(); // 返回概率计算结果
//输出概率计算结果
std::cout << "总体中各数据的权重为:" << std::endl;
std::copy(probs.begin(), probs.end(), std::ostream_iterator<double>
{ std::cout << std::fixed << std::setprecision(5), “ ”});
std::cout << std::endl << std::endl;
}
//最后一次抽取,单独出来是避免将所有权重都为0,的权重数组赋值给离散分布dist,避免报错
index = dist(gen); //抽取下标
out.insert(index); //插入集合
sampleCount += 1; //抽样次数增加1
//输出抽样结果
std::cout << "从总体中抽取的" << sampleSize << "个样本的下标索引为:" << std::endl;
for (auto iter : out)
{
std::cout << iter << “-”;
}
std::cout << std::endl;
//输出抽样次数
std::cout << "抽样次数为:" << sampleCount << std::endl;
out.clear(); //清空输出集合,为下次抽样做准备
std::cin.get(); //保留控制台窗口
return 0;
}
As per the answer to this question I have attempted to change the parameter of a distribution in <random> by using .param(). Below is a toy example where I'm trying to do this.
For both a chi-squared and a normal distribution I have a function that generates two values, the second where the parameter has been changed by .param(). I run both functions multiple times and print out the mean outcome for both. As expected the normal function produces mean outcomes of 0 and 10. Unexpectedly the chi-squared function produces mean outcomes of 4 and 4, instead of my expectation of 4 and 3. Why are my expectations off for the chi-squared distribution?
#include <iostream>
#include <random>
#include <vector>
using namespace std;
vector<double> chisqtest(mt19937_64 &gen)
{
vector<double> res(2);
chi_squared_distribution<double> chisq_dist(4);
res[0] = chisq_dist(gen);
chisq_dist.param(std::chi_squared_distribution<double>::param_type (3));
res[1] = chisq_dist(gen);
return res;
}
vector<double> normtest(mt19937_64 &gen)
{
vector<double> res(2);
normal_distribution<double> norm_dist(0,1);
res[0] = norm_dist(gen);
norm_dist.param(std::normal_distribution<double>::param_type (10,1));
res[1] = norm_dist(gen);
return res;
}
int main() {
unsigned int n = 100000;
mt19937_64 gen(1);
vector<double> totals = {0,0}, res(2);
for(unsigned int i = 0; i < n; i++){
res = chisqtest(gen);
totals[0] += res[0];
totals[1] += res[1];
}
cout << totals[0]/n << " " << totals[1]/n << "\n";
vector<double> totals2 = {0,0}, res2;
for(unsigned int i = 0; i < n; i++){
res2 = normtest(gen);
totals2[0] += res2[0];
totals2[1] += res2[1];
}
cout << totals2[0]/n << " " << totals2[1]/n << "\n";
}
i got stuck when i was trying to convert my code to MPI and Crossover solution. My problem is about the Crossover is too hard for me to understand and more difficult to implement all of those solution to MPI. If anyone could give me hint, or example, or any related document. I will included my code below for everyone to see.
Thank you very much
#include <string>
#include <cstdlib>
#include <iostream>
#include <cassert>
#include <algorithm>
#include <vector>
std::string allowed_chars = " ABCDEFGHIJKLMNOPQRSTUVWXYZ";
class selection
{
public:
static int fitness(std::string candidate)
{
assert(target.length() == candidate.length());
int fitness_so_far = 0;
for (int i = 0; i < target.length(); ++i)
{
int target_pos = allowed_chars.find(target[i]);
int candidate_pos = allowed_chars.find(candidate[i]);
int diff = std::abs(target_pos - candidate_pos);
fitness_so_far -= std::min(diff, int(allowed_chars.length()) - diff);
}
return fitness_so_far;
}
// get the target string length
static int target_length() { return target.length(); }
private:
static std::string target;
};
std::string selection::target = "METHINKS IT IS LIKE A WEASEL";
void move_char(char& c, int distance)
{
while (distance < 0)
distance += allowed_chars.length();
int char_pos = allowed_chars.find(c);
c = allowed_chars[(char_pos + distance) % allowed_chars.length()];
}
std::string mutate(std::string parent, double mutation_rate)
{
for (int i = 0; i < parent.length(); ++i)
if (std::rand()/(RAND_MAX + 1.0) < mutation_rate)
{
int distance = std::rand() % 3 + 1;
if(std::rand()%2 == 0)
move_char(parent[i], distance);
else
move_char(parent[i], -distance);
}
return parent;
}
bool less_fit(std::string const& s1, std::string const& s2)
{
return selection::fitness(s1) < selection::fitness(s2);
}
int main()
{
int const C = 100;
std::srand(time(0));
std::string parent;
for (int i = 0; i < selection::target_length(); ++i)
{
parent += allowed_chars[std::rand() % allowed_chars.length()];
}
int const initial_fitness = selection::fitness(parent);
for(int fitness = initial_fitness;
fitness < 0;
fitness = selection::fitness(parent))
{
std::cout << parent << ": " << fitness << "\n";
double const mutation_rate = 0.02 + (0.9*fitness)/initial_fitness;
typedef std::vector<std::string> childvec;
childvec childs;
childs.reserve(C+1);
childs.push_back(parent);
for (int i = 0; i < C; ++i)
childs.push_back(mutate(parent, mutation_rate));
parent = *std::max_element(childs.begin(), childs.end(), less_fit);
}
std::cout << "final string: " << parent << "\n";
}
For cross over select 2 parents strings and associated split index(es),
then generate 2 new string:
std::string new_string1 = s1.substring(0, split_index1) + s2.substring(split_index2);
std::string new_string2 = s2.substring(0, split_index2) + s1.substring(split_index1);`.
I am able to generate average from 2D Array (columns). Though I am unable to figure out, how to sort that average. I do have to create a new array (rankedscore[]).
Any help would be greatly appreciated:
int rankedArtist() // ranked artist based on score
{
const int A1_SIZE = 5, A2_ROWSIZE =5, A2_COLSIZE =10;
string Artist[A1_SIZE]={ "Degas", "Holbien", "Monet", "Matisse", "Valesquez" };
int Scores[A2_ROWSIZE][A2_COLSIZE] = {{5,5,6,8,4,6,8,8,8,10},{8,8,8,8,8,8,8,8,8,8},
{10,10,10,10,10,10,10,10,10,10},{5,0,0,0,0,0,0,0,0,0},{5,6,8,10,4,0,0,0,0,0}};
cout << "\n\n\t-------------------------------------------" << endl;
cout << "\t\tRanking by Artist"<< endl;
cout << "\t===========================================" << endl;
int total = 0;
float faverage;
double AverageScore[5];
double average;
double rankedscore[A2_ROWSIZE];
for (int x=0; x<5; x++)
{
cout << "\n\t" << Artist[x] << "\t\t";
for (int col = 0; col < A2_COLSIZE; col++)
{
total+=Scores[x][col];
}
faverage = (float)total / 10.0f;
average = total = 0;
AverageScore[x] = faverage;
}
}
Though I am unable to figure out, how to sort that average.
I would use a std::pair to map the artist to the score. Then I would use std::sort to implement the sorting:
#include <vector>
#include <utility>
#include <string>
#include <algorithm>
#include <iostream>
int main()
{
std::vector<std::string> artists{ "Degas", "Holbien", "Monet", "Matisse", "Valesquez" };
std::vector<std::vector<int>> scores{{ 3, 2, 1 }, { 5, 4, 3 }};
std::vector<std::pair<std::string, int>> chart;
for (auto name = artists.begin(); name != artists.end(); ++name)
{
for (auto score = scores.begin(); score != scores.end(); ++score)
{
int total = std::accumulate(score->begin(), score->end(), 0);
int average = total / score->size();
chart.push_back(std::make_pair(*name, average));
}
}
struct
{
bool operator()(std::pair<std::string, int> p1, std::pair<std::string, int> p2) const
{
return p1.second < p2.second;
}
} Predicate;
std::sort(chart.begin(), chart.end(), Predicate);
for (auto it = chart.begin(); it != chart.end(); ++it)
{
std::cout << it->first << ": " << it->second << std::endl;
}
}
You could use std::sort to sort the rankedscore array. Afterwards, just make a for loop and print its values.
You can view an example of how to use std::sort here
EDIT: As RankedScore and Artists should be related, you could use a map structure. Use the average scores as keys. Then you can sort the map by its keys and print the key values (the artists) in order.