Related
#include <iostream>
#include <chrono>
#include <random>
using namespace std;
class MyTimer
{
private:
std::chrono::time_point<std::chrono::steady_clock> starter;
std::chrono::time_point<std::chrono::steady_clock> ender;
public:
void startCounter() {
starter = std::chrono::steady_clock::now();
}
long long getCounter() {
ender = std::chrono::steady_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(ender - starter).count();
}
};
int findBestKey(int keys[4], int values[4])
{
int index = 0;
for (int i = 1; i <= 3; i++)
if (keys[index] > keys[i])
index = i;
return values[index];
}
int findBestKeyPro(int keys[4], int values[4])
{
int index = keys[0] > keys[1];
if (keys[index] > keys[2]) index = 2;
if (keys[index] > keys[3]) return values[3];
else return values[index];
}
int findBestKeyProMax(int keys[4], int values[4])
{
// fill your implementation here. Not necessary to read the parts below
return 0;
}
void benchMethod(int (*findBestKeyFunc)(int keys[4], int values[4]), int n, int* keys, int* values, int& res, double& totalTime)
{
MyTimer timer;
timer.startCounter();
// In my actual problems, values of arrays "keys" are completely unrelated. They are not the same continuous values in memory. The line below is just an example for benchmark purposes
for (int i = 0; i < n - 4; i+=4)
res += findBestKeyFunc(&keys[i], &values[i]);
totalTime += timer.getCounter();
/*
it is possible to calculate 4 arrays "keys","values", then process them all at once.
for (int i=0; i<n-4; i+=16)
{
keys[4][4] = ...; values[4][4] = ...;
res += find4BestKeyAtOnce(&keys, &values);
}
*/
}
double totalTimeNormal = 0, totalTimePro = 0, totalTimeProMax = 0;
void benching(int& res1, int& res2, int& res3)
{
const int n = 10000000;
int* keys1 = new int[n], * values1 = new int[n];
int* keys2 = new int[n], * values2 = new int[n];
MyTimer timer;
double tmp;
for (int i = 0; i < n; i++) {
keys1[i] = rand() % 100; // need 2 arrays to prevent caching
keys2[i] = rand() % 100; // this should be % (256*256)
values1[i] = rand() % 100; // and % 256
values2[i] = rand() % 100; // but I use % 100 so that in this example it doesn't overflow int32
}
// the size of keys2/values2 is big enough to flush out keys1/values1 from cache completely.
// so order of execution doesn't affect performance here
benchMethod(&findBestKey, n, keys1, values1, res1, totalTimeNormal);
benchMethod(&findBestKey, n, keys2, values2, res1, totalTimeNormal);
benchMethod(&findBestKeyPro, n, keys1, values1, res2, totalTimePro);
benchMethod(&findBestKeyPro, n, keys2, values2, res2, totalTimePro);
benchMethod(&findBestKeyProMax, n, keys1, values1, res2, totalTimeProMax);
benchMethod(&findBestKeyProMax, n, keys2, values2, res2, totalTimeProMax);
delete[] keys1;
delete[] keys2;
delete[] values1;
delete[] values2;
}
void testIf()
{
int res1 = 0, res2 = 0, res3 = 0;
for (int t = 1; t <= 100; t++) {
benching(res1, res2, res3);
res1 %= 100;
res2 %= 100;
res3 %= 100;
cout << "Lap " << t << "\n";
cout << "time normal = " << totalTimeNormal/1000 << " ms\n";
cout << "time pro = " << totalTimePro/1000 << " ms\n";
cout << "time pro max = " << totalTimeProMax/1000 << " ms\n";
cout << "\n";
}
cout << "**********************\n" << res1 << " " << res2 << "\n";
}
int main()
{
testIf();
return 0;
}
There are two arrays, keys and values, both completely random. This function returns the value that has the minimum key. So: index = indexOfMin(keys); return values[index]; See function findBestKey. I need to fill in findBestKeyProMax
findBestKeyPro is around 30-35% faster than findBestKey, on my computer and on here: https://www.onlinegdb.com/online_c++_compiler . Compiler option is -std=c++14 -O2 Update: I get ~~5-10% more performance just by changing to -O3
Is there anyway I can make this faster? Every nanosecond matters, since this function is called ~~10^6-10^7 times (once for each pixel); saving 1 ns per call would translate to 1ms less, which is the difference between 200fps and 250fps.
Edit: no multi-threading or GPU. It's already done (each thread performs findBestKey on distinct keys/values arrays), so I want to improve this function directly. Maybe something like SIMD for CPU? Or branchless function.
Also the functions findBest... are what matters, function benchMethod() is just for benchmarking.
Edit 2: target architecture is CPUs with AVX256 capability, mainly Intel Skylake or AMD Zen 2.
I got stuck in many problems where I was trying to store values in 2D vectors.
So I have written this simple code.
I am just storing and printing my values :
int main()
{
vector<vector<int>> vec;
vector<int> row{1,3,5,7,9,12,34,56};
int i,n,m,rs,vs;
rs=row.size();
cout<<"rs = "<<rs<<endl;
for(i=0;i<(rs/2);i++)
{
vec[i].push_back(row.at(i));
vec[i].push_back(row.at(i+4));
}
vs=vec.size();
cout<<vs<<endl;
for(n=0;n<vs;n++)
{
for(m=0;m<2;m++)
{
cout<<vec[n][m]<<" ";
}
cout<<endl;
}
return 0;
}
First you should read Why is “using namespace std;” considered bad practice?.
Declare variables when you use them and not at the beginning of your program.
The vector vec is empty at the beginning. In the loop
for(i=0;i<(rs/2);i++)
{
vec[i].push_back(row.at(i));
vec[i].push_back(row.at(i+4));
}
you are taking a reference to the i-th element in vec with
vec[i]
but this element does not exist. This is undefined behavior and can result in a segmentation fault. You can fix it by constructing the vector with the needed elements
#include <iostream>
#include <vector>
int main()
{
std::vector<int> row{1,3,5,7,9,12,34,56};
int rs = row.size();
std::vector<std::vector<int>> vec(rs / 2);
std::cout << "rs = " << rs << '\n';
for(int i = 0; i < rs / 2; ++i)
{
vec[i].push_back(row.at(i));
vec[i].push_back(row.at(i + 4));
}
int vs = vec.size();
std::cout << vs << '\n';
for(int n = 0; n < vs; ++n)
{
for(int m = 0; m < 2; ++m)
{
std::cout << vec[n][m] << " ";
}
std::cout << '\n';
}
return 0;
}
In this example the line
std::vector<std::vector<int>> vec(rs / 2);
constructs a vector containing rs / 2 default constructed elements. Alternatively you can start with an empty vector and push back elements in the loop
#include <iostream>
#include <vector>
int main()
{
std::vector<int> row{1,3,5,7,9,12,34,56};
int rs=row.size();
std::vector<std::vector<int>> vec;
std::cout << "rs = " << rs << '\n';
for(int i = 0; i < rs / 2; ++i)
{
vec.push_back({row.at(i), row.at(i+4)});
//
// is similar to:
// vec.push_back({});
// vec.back().push_back(row.at(i));
// vec.back().push_back(row.at(i+4));
//
// is similar to:
// vec.push_back({});
// vec[i].push_back(row.at(i));
// vec[i].push_back(row.at(i+4));
}
int vs = vec.size();
std::cout << vs << '\n';
for(int n = 0; n < vs; ++n)
{
for(int m = 0; m < 2; ++m)
{
std::cout << vec[n][m] << " ";
}
std::cout << '\n';
}
return 0;
}
I recommend the first solution. It's better to allocate memory for all elements and work with it instead of allocate memory in each loop iteration.
I'm a new programmer, and want to generate array of number with 3-digits from number list (2,5,8). I have created a code, but the output is not my expected result. This is my simple code:
int main()
{
int arr[3]={2,5,8};
int d=3;
int times=1;
for (int a:arr){
int sum = a;
for (int i=1; i<d; i++){
times *= 10;
sum += a*times;
}
cout<<sum<<endl;
sum=0;
}
return 0;
}
My expected the result of 222,555 and 888, but the actual output is 222,55005 and 8800008.
It will help probably. You forget to reset times variable
int main()
{
int arr[3]={2,5,8};
int d=3;
int times=1;
for (int a:arr){
int sum = a;
for (int i=1; i<d; i++){
times *= 10;
sum += a*times;
}
cout<<sum<<endl;
times = 1; //<---added
sum=0;
}
return 0;
}
For your current code to generate 222, 555, 888, you forget to reinit times.
You might have created a sub function for clarification:
int mul_by_111(int n) // { return 111 * n; }
{
int sum = a;
int times = 1;
for (int i = 1; i < 3; ++i) {
times *= 10;
sum += a * times;
}
return sum;
}
int main()
{
int arr[] = {2, 5, 8};
for (int a:arr){
std::cout << mul_by_111(a) << std::endl;
}
}
If you want the cartesian product to display 222, 225, 228, 522, .., 888
you might (naively) do it with 3 loops:
int main()
{
int arr[] = {2, 5, 8};
for (int a:arr){
for (int b:arr){
for (int c:arr){
std::cout << a << b << c << std::endl;
}
}
}
}
Some libraries as range-v3 propose cartesian_product to allow even simpler:
for (auto t : ranges::view::cartesian_product(arr, arr, arr)) {
std::cout << std::get<0>(t) << std::get<1>(t) << std::get<2>(t) << std::endl;
// std::apply([](auto... args){ (std::cout << ... << args) << std::endl; }, t); // C++17
}
Using a std::stringstream it becomes trivial:
#include <sstream>
#include <iostream>
int main()
{
std::stringstream ss;
ss << 2 << 5 << 8;
int result{};
ss >> result;
std::cout << result << std::endl;
}
Update:
To make the permutation part of algorithm you may use std::next_permutation:
std::string s("258");
do {
std::cout << s << '\n';
} while(std::next_permutation(s.begin(), s.end()));
The problem is rather the scope of variables. Just keep them in the block and all is well:
int main()
{
int arr[3]={2,5,8};
int d=3;
for (int a:arr){
int sum = a;
int times=1;
for (int i=1; i<d; i++){
times *= 10;
sum += a*times;
}
cout<<sum<<endl;
}
return 0;
}
The variables sum and times will initialize on blockentry. No need to reset them.
I'd like to sample from a discrete distribution without replacement (i.e., without repetition).
With the function discrete_distribution, it is possible to sample with replacement. And, with this function, I implemented sampling without replacement in a very rough way:
#include <iostream>
#include <random>
#include <vector>
#include <array>
int main()
{
const int sampleSize = 8; // Size of the sample
std::vector<double> weights = {2,2,1,1,2,2,1,1,2,2}; // 10 possible outcome with different weights
std::random_device rd;
std::mt19937 generator(rd());
/// WITH REPLACEMENT
std::discrete_distribution<int> distribution(weights.begin(), weights.end());
std::array<int, 10> p ={};
for(int i=0; i<sampleSize; ++i){
int number = distribution(generator);
++p[number];
}
std::cout << "Discrete_distribution with replacement:" << std::endl;
for (int i=0; i<10; ++i)
std::cout << i << ": " << std::string(p[i],'*') << std::endl;
/// WITHOUT REPLACEMENT
p = {};
for(int i=0; i<sampleSize; ++i){
std::discrete_distribution<int> distribution(weights.begin(), weights.end());
int number = distribution(generator);
weights[number] = 0; // the weight associate to the sampled value is set to 0
++p[number];
}
std::cout << "Discrete_distribution without replacement:" << std::endl;
for (int i=0; i<10; ++i)
std::cout << i << ": " << std::string(p[i],'*') << std::endl;
return 0;
}
Have you ever coded such sampling without replacement? Probably in a more optimized way?
Thank you.
Cheers,
T.A.
This solution might be a bit shorter. Unfortunately, it needs to create a discrete_distribution<> object in every step, which might be prohibitive when drawing a lot of samples.
#include <iostream>
#include <boost/random/discrete_distribution.hpp>
#include <boost/random/mersenne_twister.hpp>
using namespace boost::random;
int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 };
discrete_distribution<> dist(w);
int n = 10;
boost::random::mt19937 gen;
std::vector<int> samples;
for (auto i = 0; i < n; i++) {
samples.push_back(dist(gen));
w[*samples.rbegin()] = 0;
dist = discrete_distribution<>(w);
}
for (auto iter : samples) {
std::cout << iter << " ";
}
return 0;
}
Improved answer:
After carefully looking for a similar question on this site (Faster weighted sampling without replacement), I found a stunningly simple algorithm for weighted sampling without replacement, it is just a bit complicated to implement in C++. Note, that this is not the most efficient algorithm, but it seems to me the simplest one to implement.
In https://doi.org/10.1016/j.ipl.2005.11.003 the method is described in detail.
Especially, it is not efficient if the sample size is much smaller than the basic population.
#include <iostream>
#include <iterator>
#include <boost/random/uniform_01.hpp>
#include <boost/random/mersenne_twister.hpp>
using namespace boost::random;
int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 10 };
uniform_01<> dist;
boost::random::mt19937 gen;
std::vector<double> vals;
std::generate_n(std::back_inserter(vals), w.size(), [&dist,&gen]() { return dist(gen); });
std::transform(vals.begin(), vals.end(), w.begin(), vals.begin(), [&](auto r, auto w) { return std::pow(r, 1. / w); });
std::vector<std::pair<double, int>> valIndices;
size_t index = 0;
std::transform(vals.begin(), vals.end(), std::back_inserter(valIndices), [&index](auto v) { return std::pair<double,size_t>(v,index++); });
std::sort(valIndices.begin(), valIndices.end(), [](auto x, auto y) { return x.first > y.first; });
std::vector<int> samples;
std::transform(valIndices.begin(), valIndices.end(), std::back_inserter(samples), [](auto v) { return v.second; });
for (auto iter : samples) {
std::cout << iter << " ";
}
return 0;
}
Easier answer
I just removed some of the STL functions and replaced it with simple for loops.
#include <iostream>
#include <iterator>
#include <boost/random/uniform_01.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <algorithm>
using namespace boost::random;
int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 1000 };
uniform_01<> dist;
boost::random::mt19937 gen(342575235);
std::vector<double> vals;
for (auto iter : w) {
vals.push_back(std::pow(dist(gen), 1. / iter));
}
// Sorting vals, but retain the indices.
// There is unfortunately no easy way to do this with STL.
std::vector<std::pair<int, double>> valsWithIndices;
for (size_t iter = 0; iter < vals.size(); iter++) {
valsWithIndices.emplace_back(iter, vals[iter]);
}
std::sort(valsWithIndices.begin(), valsWithIndices.end(), [](auto x, auto y) {return x.second > y.second; });
std::vector<size_t> samples;
int sampleSize = 8;
for (auto iter = 0; iter < sampleSize; iter++) {
samples.push_back(valsWithIndices[iter].first);
}
for (auto iter : samples) {
std::cout << iter << " ";
}
return 0;
}
The existing answer by Aleph0 works the best of the ones I tested. I tried benchmarking the original solution, the one added by Aleph0, and a new one where you only make a new discrete_distribution when the existing one is over 50% already added items (redrawing when distribution produces an item already in the sample).
I tested with sample size == population size, and weights equal the index. I think the original solution in the question runs in O(n^2), my new one runs in O(n logn) and the one from the paper seems to run in O(n).
-------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------
BM_Reuse 25252721 ns 25251731 ns 26
BM_NewDistribution 17338706125 ns 17313620000 ns 1
BM_SomePaper 6789525 ns 6779400 ns 100
Code:
#include <array>
#include <benchmark/benchmark.h>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_01.hpp>
#include <iostream>
#include <iterator>
#include <random>
#include <vector>
const int sampleSize = 20000;
using namespace boost::random;
static void BM_ReuseDistribution(benchmark::State &state) {
std::vector<double> weights;
weights.resize(sampleSize);
for (auto _ : state) {
for (int i = 0; i < sampleSize; i++) {
weights[i] = i + 1;
}
std::random_device rd;
std::mt19937 generator(rd());
int o[sampleSize];
std::discrete_distribution<int> distribution(weights.begin(),
weights.end());
int numAdded = 0;
int distSize = sampleSize;
for (int i = 0; i < sampleSize; ++i) {
if (numAdded > distSize / 2) {
distSize -= numAdded;
numAdded = 0;
distribution =
std::discrete_distribution<int>(weights.begin(), weights.end());
}
int number = distribution(generator);
if (!weights[number]) {
i -= 1;
continue;
} else {
weights[number] = 0;
o[i] = number;
numAdded += 1;
}
}
}
}
BENCHMARK(BM_ReuseDistribution);
static void BM_NewDistribution(benchmark::State &state) {
std::vector<double> weights;
weights.resize(sampleSize);
for (auto _ : state) {
for (int i = 0; i < sampleSize; i++) {
weights[i] = i + 1;
}
std::random_device rd;
std::mt19937 generator(rd());
int o[sampleSize];
for (int i = 0; i < sampleSize; ++i) {
std::discrete_distribution<int> distribution(weights.begin(),
weights.end());
int number = distribution(generator);
weights[number] = 0;
o[i] = number;
}
}
}
BENCHMARK(BM_NewDistribution);
static void BM_SomePaper(benchmark::State &state) {
std::vector<double> w;
w.resize(sampleSize);
for (auto _ : state) {
for (int i = 0; i < sampleSize; i++) {
w[i] = i + 1;
}
uniform_01<> dist;
boost::random::mt19937 gen;
std::vector<double> vals;
std::generate_n(std::back_inserter(vals), w.size(),
[&dist, &gen]() { return dist(gen); });
std::transform(vals.begin(), vals.end(), w.begin(), vals.begin(),
[&](auto r, auto w) { return std::pow(r, 1. / w); });
std::vector<std::pair<double, int>> valIndices;
size_t index = 0;
std::transform(
vals.begin(), vals.end(), std::back_inserter(valIndices),
[&index](auto v) { return std::pair<double, size_t>(v, index++); });
std::sort(valIndices.begin(), valIndices.end(),
[](auto x, auto y) { return x.first > y.first; });
std::vector<int> samples;
std::transform(valIndices.begin(), valIndices.end(),
std::back_inserter(samples),
[](auto v) { return v.second; });
}
}
BENCHMARK(BM_SomePaper);
BENCHMARK_MAIN();
Thanks for your question and others' nice answer, I meet a same qustion as you. I think you needn't new distribution every time, instead of
dist.param({ wts.begin(), wts.end() });
complete codes are as follows:
//STL改进方案
#include <iostream>
#include <vector>
#include <random>
#include <iomanip>
#include <map>
#include <set>
int main()
{
//随机数引擎采用默认引擎
std::default_random_engine rng;
//随机数引擎采用设备熵值保证随机性
auto gen = std::mt19937{ std::random_device{}() };
std::vector<int> wts(24); //存储权重值
std::vector<int> in(24); //存储总体
std::set<int> out; //存储抽样结果
std::map<int, int> count; //输出计数
int sampleCount = 0; //抽样次数计数
int index = 0; //抽取的下标
int sampleSize = 24; //抽取样本的数量
int sampleTimes = 100000; //抽取样本的次数
//权重赋值
for (int i = 0; i < 24; i++)
{
wts.at(i) = 48 - 2 * i;
}
//总体赋值并输出
std::cout << "总体为24个:" << std::endl;
//赋值
for (int i = 0; i < 24; i++)
{
in.at(i) = i + 1;
std::cout << in.at(i) << " ";
}
std::cout << std::endl;
//产生按照给定权重的离散分布
std::discrete_distribution<size_t> dist{ wts.begin(), wts.end() };
auto probs = dist.probabilities(); // 返回概率计算结果
//输出概率计算结果
std::cout << "总体中各数据的权重为:" << std::endl;
std::copy(probs.begin(), probs.end(), std::ostream_iterator<double>
{ std::cout << std::fixed << std::setprecision(5), “ ”});
std::cout << std::endl << std::endl;
//==========抽样测试==========
for (size_t j = 0; j < sampleTimes; j++)
{
index = dist(gen);
//std::cout << index << “ ”; //输出抽样结果
count[index] += 1; //抽样结果计数
}
double sum = 0.0; //用于概率求和
//输出抽样结果
std::cout << "总共抽样" << sampleTimes << "次," << "各下标的频数及频率为:" << std::endl;
for (size_t i = 0; i < 24; i++)
{
std::cout << i << "共有" << count[i] << "个 频率为:" << count[i] / double(sampleTimes) << std::endl;
sum += count[i] / double(sampleTimes);
}
std::cout << "总频率为:" << sum << std::endl << std::endl; //输出总概率
//==========抽样测试==========
//从总体中抽样放入集合中,直至集合大小达到样本数
while (out.size() < sampleSize - 1)
{
index = dist(gen); //抽取下标
out.insert(index); //插入集合
sampleCount += 1; //抽样次数增加1
wts.at(index) = 0; //将抽取到的下标索引的权重设置为0
dist.param({ wts.begin(), wts.end() });
probs = dist.probabilities(); // 返回概率计算结果
//输出概率计算结果
std::cout << "总体中各数据的权重为:" << std::endl;
std::copy(probs.begin(), probs.end(), std::ostream_iterator<double>
{ std::cout << std::fixed << std::setprecision(5), “ ”});
std::cout << std::endl << std::endl;
}
//最后一次抽取,单独出来是避免将所有权重都为0,的权重数组赋值给离散分布dist,避免报错
index = dist(gen); //抽取下标
out.insert(index); //插入集合
sampleCount += 1; //抽样次数增加1
//输出抽样结果
std::cout << "从总体中抽取的" << sampleSize << "个样本的下标索引为:" << std::endl;
for (auto iter : out)
{
std::cout << iter << “-”;
}
std::cout << std::endl;
//输出抽样次数
std::cout << "抽样次数为:" << sampleCount << std::endl;
out.clear(); //清空输出集合,为下次抽样做准备
std::cin.get(); //保留控制台窗口
return 0;
}
I am getting an unexpected result using std::accumulate with test code. I am trying to add up a large vector of doubles but for some reason the value is overflowing:
#include <iostream>
#include <vector>
#include <functional>
#include <numeric>
using namespace std;
double sum(double x, double y)
{
// slows things down but shows the problem:
//cout << x << " + " << y << endl;
return (x+y);
}
double mean(const vector<double> & vec)
{
double result = 0.0;
// works:
//vector<double>::const_iterator it;
//for (it = vec.begin(); it != vec.end(); ++it){
// result += (*it);
//}
// broken:
result = accumulate(vec.begin(), vec.end(), 0, sum);
result /= vec.size();
return result;
}
int main(int argc, char ** argv)
{
const unsigned int num_pts = 100000;
vector<double> vec(num_pts, 0.0);
for (unsigned int i = 0; i < num_pts; ++i){
vec[i] = (double)i;
}
cout << "mean = " << mean(vec) << endl;
return 0;
}
Partial output from the cout inside the sum:
2.14739e+09 + 65535
2.14745e+09 + 65536
-2.14748e+09 + 65537
-2.14742e+09 + 65538
-2.14735e+09 + 65539
Correct output (iterating):
mean = 49999.5
Incorrect output (using accumulate):
mean = 7049.5
I am probably making a tired mistake? I have used accumulate successfully before...
Thanks
You need to pass a double to accumulate:
result = accumulate(vec.begin(), vec.end(), 0.0, sum);
^^^
otherwise the accumulation is performed using int, and then converting the result to a double.