std::chrono - fixed time step loop - c++

I'm trying to make fixed time step loop with using < chrono >.
This is my code:
#include <iostream>
#include <chrono>
int main()
{
std::chrono::steady_clock::time_point start;
const double timePerFrame = 1.0 / 60.0;
double accumulator = 0.0;
int i = 0;
while(true)
{
start = std::chrono::steady_clock::now();
while(accumulator >= timePerFrame)
{
accumulator -= timePerFrame;
std::cout << ++i << std::endl;
//update();
}
accumulator += std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::steady_clock::now() - start).count();
//render();
}
return 0;
}
Value of variable "i" is printed less then 60 times a second. The same situation takes place when I'm trying to change "timePerFrame" to "1.0". What is wrong with it?

#include <iostream>
#include <chrono>
#include <thread>
int main()
{
using namespace std::chrono;
using Framerate = duration<steady_clock::rep, std::ratio<1, 60>>;
auto next = steady_clock::now() + Framerate{1};
int i = 0;
while(true)
{
std::cout << ++i << std::endl;
//update();
std::this_thread::sleep_until(next);
next += Framerate{1};
//render();
}
return 0;
}
Here's the same thing with a busy loop:
int main()
{
using namespace std::chrono;
using Framerate = duration<steady_clock::rep, std::ratio<1, 60>>;
auto next = steady_clock::now() + Framerate{1};
int i = 0;
while(true)
{
std::cout << ++i << std::endl;
//update();
while (steady_clock::now() < next)
;
next += Framerate{1};
//render();
}
return 0;
}

Related

How to improved performance of code when dealing with large input in C++?

How would it be possible to make this code run faster in C++. The code takes a lot of time to run. The purpose is to determine how many gates are required to handle a prescribed
arrivals-and-departures schedule.
#include <vector>
struct Airplane {
int arrival_time_seconds;
int departure_time_seconds;
};
class Schedule {
private:
const std::vector<Airplane> airplanes_;
public:
Schedule(const std::vector<Airplane>& airplanes) :
airplanes_(airplanes) {}
int MaximumNumberOfPlanes() const {
int rv = 0;
for (const Airplane& airplane : airplanes_) {
int num_planes = NumberOfPlanes(airplane.arrival_time_seconds);
if (num_planes > rv) {
rv = num_planes;
}
}
return rv;
}
private:
int NumberOfPlanes(int time_seconds) const {
int rv = 0;
for (const Airplane& airplane : airplanes_) {
if (airplane.arrival_time_seconds < time_seconds &&
time_seconds <= airplane.departure_time_seconds) {
rv++;
}
}
return rv;
}
};
A lot of people stated that this can be made O(N), and it is possible to some extent. At least I was able to make it O(max(N,86400)) which is better than your version for N>294 and better than a O(NlogN) for N>6788.
I assume that if a plane departs the next day it has a departure_time_seconds = 86400 (the number of seconds in a day), while all arrival_time_seconds are lower than 86400.
You can compile a vector of the change in number of planes in O(N) and than use it to compute the current number of planes in the airport at every second in O(86400):
int MaximumNumberOfPlanes2() const {
int delta[24 * 60 * 60 + 1] = { 0 };
for (const Airplane& x : airplanes_) {
delta[x.arrival_time_seconds]++;
delta[x.departure_time_seconds]--;
}
int rv = 0;
int np = 0;
for (int i = 0; i < 24 * 60 * 60; ++i) {
np += delta[i];
rv = std::max(rv, np);
}
return rv;
}
A test program with some timing:
#include <vector>
#include <iostream>
#include <fstream>
#include <random>
#include <chrono>
#include <queue>
int main()
{
using namespace std;
using namespace std::chrono;
default_random_engine eng;
uniform_int_distribution<int> arr_dist(0, 24*60*60);
gamma_distribution<double> dep_dist(5, 3);
std::vector<Airplane> a;
for (int i = 0; i < 100000; ++i) {
int arrival = arr_dist(eng);
int departure = arrival + (20 + lround(dep_dist(eng))) * 60;
departure = min(departure, 24*60*60);
a.push_back({ arrival, departure });
}
Schedule s(a);
{
const auto& start = steady_clock::now();
int mnp = s.MaximumNumberOfPlanes();
const auto& stop = steady_clock::now();
duration<double> elapsed = stop - start;
std::cout << "MaximumNumberOfPlanes : " << mnp << " - Elapsed: " << elapsed.count() << " s\n";
}
{
const auto& start = steady_clock::now();
int mnp = s.MaximumNumberOfPlanes2();
const auto& stop = steady_clock::now();
duration<double> elapsed = stop - start;
std::cout << "MaximumNumberOfPlanes2: " << mnp << " - Elapsed: " << elapsed.count() << " s\n";
}
return 0;
}
This gives (on my laptop):
MaximumNumberOfPlanes : 2572 - Elapsed: 48.8979 s
MaximumNumberOfPlanes2: 2572 - Elapsed: 0.0010778 s

Error in generating Random Character Arrays with Integer elements for Time Analysis of Insertion Sort Algorithm using rand() [ C++ ]

This question is not about where to put the srand function.
I have just started learning DSA with Insertion Sort. I have written a C++ program to perform Insertion Sort and wanted to create some neat visuals of the Time Analysis. I tried generating Random Arrays for the Time Analysis using the rand() function but the Arrays generated seem to have a character at the end. The elements in the character array should all be single digit integers like '0' '3' and so on.....
The Main Function of the Program:
#include <iostream>
#include <time.h>
#include <cstdlib>
#include <iomanip>
using namespace std;
int size(char a[]) {
int l = 0;
while (a[l] != NULL) {
l++;
}
return l;
}
void InsertionSort(char arr[]) {
for (int k = 1; k < size(arr); k++) {
char temp = arr[k];
int i = k - 1;
while (i >= 0 && arr[i] > temp) {
arr[i + 1] = arr[i];
i--;
}
arr[i + 1] = temp;
}
}
int main(void) {
//Generate Random Arrays of size snum
for (int k = 1; k < 100; k++) {
int snum = k * 100;
char Array[snum];
srand(time(NULL));
for (int s = 0; s < snum; s++)
{
int no = rand() % 9 + 1;
Array[s] = no + '0';
}
cout << "START\t";
//cout<<"\n"<<Array<<"END\n"; // Character is being Printed at the end........ :-(
clock_t start, end;
start = clock();
InsertionSort(Array);
end = clock();
double time_taken = double(end - start) / double(CLOCKS_PER_SEC);
cout << "\"" << fixed << time_taken << setprecision(9) << "\",\"" << 100 * k << "\"" << endl;
}
}
How I Compile and Run the Program:
g++ InsertionSort.cpp
./a.out > InsertionSort.txt
--------------EDIT--------------
Based on the suggestions, I have replaced the Array with a vector. Please provide any further suggestions....
RandomIntVector.cpp
#include "RandomIntVector.h"
#include <random>
#include <vector>
using namespace std;
vector<int> RandomVector(int size){
uniform_int_distribution<> d(1, 1000);
mt19937 gen;
vector<int> Ar;
for(int s=0; s<(size-1); s++)
{
int no = d(gen);
Ar.push_back(no);
}
return Ar;}
InsertionSort.cpp
#include "InsertionSort.h"
#include <vector>
using namespace std;
void InsertionSort(vector<int> arr){
int size=arr.size();
for(int k=1;k<size;k++){
int temp = arr[k];
int i=k-1;
while(i>=0 && arr[i]>temp ){
arr[i+1]=arr[i];
i--;
}
arr[i+1]=temp;
}
}
Main.cpp
#include <iostream>
#include <vector>
#include <chrono>
#include <iomanip>
#include "RandomIntVector.h"
#include "InsertionSort.h"
using namespace std;
int main(void){
//Generate Random Arrays of size snum
for(int k=1;k<100;k++){
vector<int> Array = RandomVector(100*k);
clock_t start, end;
start = clock();
InsertionSort(Array);
end = clock();
double time_taken = double(end - start) /
double(CLOCKS_PER_SEC);
//Print the Time Taken along with the size of the Input
cout<<"\""<<fixed << time_taken << setprecision(9)<<"\",\""
<<100*k<<"\""<<endl;
}
return 0;
}

Incorrect timing in release mode

I'm trying to measure time of execution of the following code:
#include <iostream>
#include <cmath>
#include <stdio.h>
#include <chrono>
uint64_t LCG(uint64_t LCG_state)
{
LCG_state = (LCG_state * 2862933555777941757 + 1422359891750319841);
return LCG_state;
}
int main()
{
auto begin = std::chrono::high_resolution_clock::now();
uint64_t LCG_state = 333;
uint32_t w;
for(int i=0; i<640000000; i++)
{
LCG_state = LCG(LCG_state);
w = LCG_state >> 32;
//std::cout << w << "\n";
}
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
}
I'm using option release in Code Blocks (because I think I should if I want to measure it right). Problem is that time measured is 0 s every time. What's more if I would do loop:
#include <iostream>
#include <cmath>
#include <stdio.h>
#include <chrono>
uint64_t LCG(uint64_t LCG_state)
{
LCG_state = (LCG_state * 2862933555777941757 + 1422359891750319841);
return LCG_state;
}
int main()
{
auto begin = std::chrono::high_resolution_clock::now();
uint64_t LCG_state = 333;
uint32_t w;
for(int i=0; i<10000; i++)
{
for(int i=0; i<640000000; i++)
{
LCG_state = LCG(LCG_state);
w = LCG_state >> 32;
//std::cout << w << "\n";
}
}
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
}
Then still measurerd time is 0 s. In debug trybe everything works right, but measuring time of code with debug make no sense right? Especially I would like to compare it to for example this:
#include <stdint.h>
#include <iostream>
uint64_t s[2] = {5,11};
uint64_t result;
uint64_t next(void) {
uint64_t s1 = s[0];
uint64_t s0 = s[1];
uint64_t result = s0 + s1;
s[0] = s0;
s1 ^= s1 << 23; // a
s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c
return result;
}
int main()
{
for(int i=0; i<160000000; i++)
//while (true)
{
//std::cout << next() << "\n";
result = next();
//char *c = reinterpret_cast<char*>(&result);
//std::cout.write(reinterpret_cast<char*>(&result), sizeof result);
}
}
I want to know what is faster. How to measure it right? Why is the execution time 0 seconds, does the code not execute at all?
You can add an empty asm statement dependent on the variable w
#include <iostream>
#include <cmath>
#include <stdio.h>
#include <chrono>
uint64_t LCG(uint64_t LCG_state)
{
LCG_state = (LCG_state * 2862933555777941757 + 1422359891750319841);
return LCG_state;
}
int main()
{
auto begin = std::chrono::high_resolution_clock::now();
uint64_t LCG_state = 333;
uint32_t w;
for(int i=0; i<640000000; i++)
{
LCG_state = LCG(LCG_state);
w = LCG_state >> 32;
__asm__ volatile("" : "+g" (w) : :);
}
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
}
This is opaque to the compiler and will prevent the loop from being optimized out

Thread not improving the code performance

I am trying to convert a basic long loop into thread to improve the loop performance.
Here is the threaded version:
#include <iostream>
#include <thread>
#include <chrono>
using namespace std;
using namespace std::chrono;
void funcSum(long long int start, long long int end, long long int *sum)
{
for(auto i = start; i <= end; ++i)
{
*sum += i;
}
}
int main()
{
long long int start = 10, end = 1900000000;
long long int sum = 0;
auto startTime = high_resolution_clock::now();
thread t1(funcSum, start, end / 2, &sum);
thread t2(funcSum, end / 2 + 1 , end, &sum);
t1.join();
t2.join();
auto stopTime = high_resolution_clock::now();
auto duration = duration_cast<seconds>(stopTime - startTime);
cout << "Sum: " << sum << endl;
cout << duration.count() << " Seconds";
return 0;
}
And here is the normal code (Without threads):
#include <iostream>
#include <thread>
#include <chrono>
using namespace std;
using namespace std::chrono;
void funcSum(long long int start, long long int end, long long int *sum)
{
for(auto i = start; i <= end; ++i)
{
*sum += i;
}
}
int main()
{
long long int start = 10, end = 1900000000;
long long int sum = 0;
auto startTime = high_resolution_clock::now();
funcSum(start, end, &sum);
auto stopTime = high_resolution_clock::now();
auto duration = duration_cast<seconds>(stopTime - startTime);
cout << "Sum: " << sum << endl;
cout << duration.count() << " Seconds";
return 0;
}
Sum: 1805000000949999955
5 Seconds
Process finished with exit code 0
In both the cases, time spent is 5 seconds.
Why the first threaded version does not improve the performance? How do I decrease the time using threads for this sum of range?
Fixed version of threaded code:
// Compute the sum of start ... end
class Summer {
public:
long long int start;
long long int end;
long long int sum = 0;
Summer(long long int aStart, long long int aEnd)
: start(aStart),
end(aEnd)
{
}
void funcSum()
{
sum = 0;
for (auto i = start; i <= end; ++i)
{
sum += i;
}
}
};
class SummerFunctor {
Summer& mSummer;
public:
SummerFunctor(Summer& aSummer)
: mSummer(aSummer)
{
}
void operator()()
{
mSummer.funcSum();
}
};
// Version with n thread objects reports
// 1 threads, sum = 1805000000949999955, 1587 ms
// 2 threads, sum = 1805000000949999955, 2547 ms
// 4 threads, sum = 1805000000949999955, 1251 ms
// 6 threads, sum = 1805000000949999955, 916 ms
int main()
{
long long int start = 10, end = 1900000000;
long long int sum = 0;
auto startTime = high_resolution_clock::now();
const size_t threadCount = 6;
if (threadCount < 2) {
funcSum(start, end, &sum);
} else {
Summer* summers[threadCount];
std::thread* threads[threadCount];
// Start threads
auto val = start;
auto partitionSize = (end-start) / threadCount;
for (size_t i = 0; i < threadCount; ++i) {
auto partitionEnd = std::min(start + partitionSize, end);
summers[i] = new Summer(start, partitionEnd);
start = partitionEnd + 1;
SummerFunctor functor (*summers[i]);
threads[i] = new std::thread(functor);
}
// Join threads
for (size_t i = 0; i < threadCount; ++i) {
threads[i]->join();
sum += summers[i]->sum;
delete threads[i];
delete summers[i];
}
}
auto stopTime = high_resolution_clock::now();
auto duration = duration_cast<milliseconds>(stopTime - startTime);
cout << threadCount << " threads, sum = " << sum << ", " << duration.count() << " ms" << std::endl;
return 0;
}
I had to wrap the Summer object with a functor because std::thread insists on making a copy of a functor handed to it, that we can't access later. The execution gets better when more threads are used (running times see comments). Possible reasons for this:
The CPU has to synchronize access to the memory pages even though the threads use separate variables here because the variables likely lie in the same page
If there is only one thread running on a CPU, that thread may run at higher CPU frequency, but several threads may run only at normal CPU frequency
CPU cores often share arithmetic units
Without threads, the compiler can make optimizations that are not possible with threads. In theory, the compiler could unroll the loop and directly print the result.

Fastest way to determine whether elements of a vector y occur in a vector x

I have the following problem: I have two vectors x and y of type double that are increasingly sorted and I would like to obtain a vector z indicating whether an element of y is present in x. Up to now, I have used std::binary_search in a for-loop as illustrated below, but I think there should be a faster way making use of the fact that also x is sorted?
The issue is that this needs to be super fast as it turns out to be the bottleneck in my code.
For those familiar with R, I need an equivalent to match(y, x, nomatch = 0L) > 0L.
#include <iostream>
#include <algorithm>
#include <vector>
int main() {
using namespace std;
vector<double> x = {1.8, 2.4, 3.3, 4.2, 5.6,7.9, 8.5, 9.3};
vector<double> y = {0.5, 0.98, 1.8, 3.1, 5.6, 6.6, 9.3, 9.3, 9.5};
vector<bool> z(y.size());
for (int i = 0; i != y.size(); ++i)
z[i] = binary_search(x.begin(), x.end(), y[i]);
for (vector<bool>::const_iterator i = z.begin(); i != z.end(); ++i)
cout << *i << " ";
return 0;
}
EDIT
Here are representative sample data for my problem:
#include <iostream>
#include <algorithm>
#include <vector>
#include <cstdlib>
#include <ctime>
// function generator:
double RandomNumber () { return (std::rand() / 10e+7); }
int main() {
using namespace std;
std::srand ( unsigned ( std::time(0) ) );
// 5000 is representative
int n = 5000;
std::vector<double> x (n);
std::generate (x.begin(), x.end(), RandomNumber);
std::vector<double> y (n);
std::generate (y.begin(), y.end(), RandomNumber);
for(std::vector<double>::const_iterator i = x.begin(); i != x.end(); i++) {
y.push_back(*i);
}
std::sort(x.begin(), x.end());
std::sort(y.begin(), y.end());
return 0;
}
You can use std::set_itersection:
#include <vector>
#include <algorithm>
#include <iterator>
#include <iostream>
int main()
{
std::vector<double> x {1.8, 2.4, 3.3, 4.2, 5.6,7.9, 8.5, 9.3};
std::vector<double> y {0.5, 0.98, 1.8, 3.1, 5.6, 6.6, 9.3, 9.3, 9.5};
std::vector<double> z {};
std::set_intersection(std::cbegin(x), std::cend(x),
std::cbegin(y), std::cend(y),
std::back_inserter(z));
std::copy(std::cbegin(z), std::cend(z),
std::ostream_iterator<double> {std::cout, " "});
}
Edit
To address Dieter Lücking point in the comments, here is a version that more closely matches R's match function:
#include <vector>
#include <deque>
#include <algorithm>
#include <iterator>
#include <functional>
#include <memory>
#include <iostream>
template <typename T>
std::deque<bool> match(const std::vector<T>& y, const std::vector<T>& x)
{
std::vector<std::reference_wrapper<const T>> z {};
z.reserve(std::min(y.size(), x.size()));
std::set_intersection(std::cbegin(y), std::cend(y),
std::cbegin(x), std::cend(x),
std::back_inserter(z));
std::deque<bool> result(y.size(), false);
for (const auto& e : z) {
result[std::distance(std::addressof(y.front()), std::addressof(e.get()))] = true;
}
return result;
}
int main()
{
std::vector<double> x {1.8, 2.4, 3.3, 4.2, 5.6,7.9, 8.5, 9.3};
std::vector<double> y {0.5, 0.98, 1.8, 3.1, 5.6, 6.6, 9.3, 9.3, 9.5};
const auto matches = match(y, x);
std::copy(std::cbegin(matches), std::cend(matches),
std::ostream_iterator<bool> {std::cout});
}
I picked up all your codes, Dieter timing sample and the sample data of 5000 random doubles of the OP to perform a more complete timing of all the alternatives. This is the code:
#include <chrono>
#include <iostream>
#include <algorithm>
#include <vector>
#include <iterator>
#include <cstdlib>
#include <ctime>
#include <assert.h>
#include <deque>
#include <functional>
#include <memory>
using namespace std;
double RandomNumber () { return (std::rand() / 10e+7); }
template <typename T>
std::deque<bool> match(const std::vector<T>& y, const std::vector<T>& x)
{
std::vector<std::reference_wrapper<const T>> z {};
z.reserve(std::min(y.size(), x.size()));
std::set_intersection(y.cbegin(), y.cend(),
x.cbegin(), x.cend(),
std::back_inserter(z));
std::deque<bool> result(y.size(), false);
for (const auto& e : z) {
result[std::distance(std::addressof(y.front()), std::addressof(e.get()))] = true;
}
return result;
}
int main() {
const int NTESTS = 10;
long long time1 = 0;
long long time2 = 0;
long long time3 = 0;
long long time3_prime = 0;
long long time4 = 0;
long long time5 = 0;
long long time6 = 0;
for (int i = 0; i < NTESTS; ++i){
std::srand ( unsigned ( std::time(0) ) );
// 5000 is representative
int n = 5000;
std::vector<double> x (n);
std::generate (x.begin(), x.end(), RandomNumber);
std::vector<double> y (n);
std::generate (y.begin(), y.end(), RandomNumber);
for(std::vector<double>::const_iterator i = x.begin(); i != x.end(); i++) {
y.push_back(*i);
}
std::sort(x.begin(), x.end());
std::sort(y.begin(), y.end());
vector<bool> z1(y.size());
vector<unsigned char> z2(y.size());
vector<unsigned char> z3(y.size());
std::deque<bool> z3_prime;
vector<bool> z4(y.size());
std::vector<bool> z5(y.size());
std::vector<bool> z6(y.size());
// Original
{
auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i != y.size(); ++i) {
z1[i] = binary_search(x.begin(), x.end(), y[i]);
}
auto stop = std::chrono::high_resolution_clock::now();
auto duration = chrono::duration_cast<chrono::nanoseconds>(stop - start);
time1 += duration.count();
}
// Original (replacing vector<bool> by vector<unsigned char>)
{
auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i != y.size(); ++i) {
z2[i] = binary_search(x.begin(), x.end(), y[i]);
}
auto stop = std::chrono::high_resolution_clock::now();
auto duration = chrono::duration_cast<chrono::nanoseconds>(stop - start);
time2 += duration.count();
}
{ // Dieter Lücking set_intersection
auto start = std::chrono::high_resolution_clock::now();
size_t ix = 0;
size_t iy = 0;
while(ix < x.size() && iy < y.size())
{
if(x[ix] < y[iy]) ++ix;
else if(y[iy] < x[ix]) ++iy;
else {
z3[iy] = 1;
// ++ix; Not this if one vector is not uniquely sorted
++iy;
}
}
auto stop = std::chrono::high_resolution_clock::now();
auto duration = chrono::duration_cast<chrono::nanoseconds>(stop - start);
time3 += duration.count();
}
// Std::set_intersection
{
auto start = std::chrono::high_resolution_clock::now();
z3_prime = match(y, x);
auto stop = std::chrono::high_resolution_clock::now();
auto duration = chrono::duration_cast<chrono::nanoseconds>(stop - start);
time3_prime += duration.count();
}
{ // Ed Heal
auto start = std::chrono::high_resolution_clock::now();
int i_x = 0, i_y = 0;
while (i_x < x.size() && i_y < y.size())
{
if (x[i_x] == y[i_y]) {
//cout << "In both" << x[i_x] << endl;
z4[i_y] = true;
++i_x;
++i_y;
} else if (x[i_x] < y[i_y]) {
++i_x;
} else {
z4[i_y] = false;
++i_y;
}
}
/* for (; i_y < y.size(); ++i_y) {
//Empty
} */
auto stop = std::chrono::high_resolution_clock::now();
auto duration = chrono::duration_cast<chrono::nanoseconds>(stop - start);
time4 += duration.count();
}
{ // JacquesdeHooge
auto start = std::chrono::high_resolution_clock::now();
auto it_x = x.begin();
int i = 0;
for (; i < (int)y.size(); ++i) {
it_x = std::lower_bound(it_x, x.end(), y[i]);
if (it_x == x.end()) break;
z5[i] = *it_x == y[i];
}
std::fill(z5.begin() + i, z5.end(), false);
auto stop = std::chrono::high_resolution_clock::now();
auto duration = chrono::duration_cast<chrono::nanoseconds>(stop - start);
time5 += duration.count();
}
{ // Skizz
auto start = std::chrono::high_resolution_clock::now();
vector<double>::iterator a = x.begin(), b = y.begin();
int i = 0;
while (a != x.end () && b != y.end ())
{
if (*a == *b) {
z6[i] = true;
++a;
++b;
}
else
{
z6[i] = false;
if (*a < *b)
{
++a;
}
else
{
++b;
}
}
i++;
}
auto stop = std::chrono::high_resolution_clock::now();
auto duration = chrono::duration_cast<chrono::nanoseconds>(stop - start);
time6 += duration.count();
}
assert (std::equal(z1.begin(), z1.begin() + 5000, z2.begin()));
assert (std::equal(z1.begin(), z1.begin() + 5000, z3.begin()));
assert (std::equal(z1.begin(), z1.begin() + 5000, z3_prime.begin()));
assert (std::equal(z1.begin(), z1.begin() + 5000, z4.begin()));
assert (std::equal(z1.begin(), z1.begin() + 5000, z5.begin()));
assert (std::equal(z1.begin(), z1.begin() + 5000, z6.begin()));
}
cout << "Original - vector<bool>: \t\t" << time1 << " ns\n";
cout << "Original - vector<unsigned char>: \t" << time2 << " ns\n";
cout << "Set intersection (Daniel): \t\t" << time3_prime << " ns\n";
cout << "Set intersection (Dieter Lücking): \t" << time3 << " ns\n";
cout << "Ed Heal: \t\t\t\t" << time4 << " ns\n";
cout << "JackesdeHooge: \t\t\t\t" << time5 << " ns\n";
cout << "Skizz: \t\t\t\t\t" << time6 << " ns\n";
cout << endl;
return 0;
}
My results with g++ 5.2.1 -std::c++11 and -O3:
Original - vector: 10152069 ns
Original - vector: 8686619 ns
Set intersection (Daniel): 1768855 ns
Set intersection (Dieter Lücking): 1617106 ns
Ed Heal: 1446596 ns
JackesdeHooge: 3998958 ns
Skizz: 1385193 ns
*Please note Ed Heal and Skizz solutions are essentially the same.
Since both vectors are sorted, you have to apply bin search only on the remainder part of the second vector.
So if you e.g. don't find x [i] in before y [j], you're certain you also won't find x [i + 1] before y [j]. In finding a match for x [i + 1] it therefore suffices to apply bin search starting with y [j].
Off the top of my head, I can only think of this:-
vector<double>::iterator a = x.begin(), b = y.begin();
while (a != x.end () && b != y.end ())
{
if (*a == *b)
{
// value is in both containers
++a;
}
else
{
if (*a < *b)
{
++a;
}
else
{
++b;
}
}
}
Perhaps this algorithm will be better as the two vectors are sorted. The time complexity is linear.
#include <iostream>
#include <algorithm>
#include <vector>
int main() {
using namespace std;
vector<double> x = {1.8, 2.4, 3.3, 4.2, 5.6,7.9, 8.5, 9.3};
vector<double> y = {0.5, 0.98, 1.8, 3.1, 5.6, 6.6, 9.3, 9.3, 9.5};
vector<bool> z(y.size());
int i_x = 0, i_y = 0;
while (i_x < x.size() && i_y < y.size())
{
if (x[i_x] == y[i_y]) {
cout << "In both" << x[i_x] << endl;
z[i_y] = true;
++i_x;
++i_y;
} else if (x[i_x] < y[i_y]) {
++i_x;
} else {
z[i_y] = false;
++i_y;
}
}
for (; i_y < y.size(); ++i_y) {
//Empty
}
for (vector<bool>::const_iterator i = z.begin(); i != z.end(); ++i)
cout << *i << " ";
return 0;
}
An implementation of #JacquesdeHooge's answer:
std::vector<bool> ComputeMatchFlags(const std::vector<double>& x,
const std::vector<double>& y) {
std::vector<bool> found(y.size());
auto it_x = x.begin();
int i = 0;
for (; i < (int)y.size(); ++i) {
it_x = std::lower_bound(it_x, x.end(), y[i]);
if (it_x == x.end()) break;
found[i] = *it_x == y[i];
}
std::fill(found.begin() + i, found.end(), false);
return found;
}
When you have found an element (or a place in the array the element would have been), you don't need to consider elements that occur before that any more. So use the result of the previous find instead of x.begin().
Since std::binary_search does not return an iterator, use std::lower_bound instead. Also consider std::find (yes linear search, it might be actually faster, depending on your data).
If this doesn't bring enough improvement, try std::unordered_set instead of an array.
Just a timing of binary search and set intersection with the improvement of using std::vector:
#include <chrono>
#include <iostream>
#include <algorithm>
#include <vector>
int main() {
using namespace std;
// Original
{
vector<double> x = {1.8, 2.4, 3.3, 4.2, 5.6,7.9, 8.5, 9.3};
vector<double> y = {0.5, 0.98, 1.8, 3.1, 5.6, 6.6, 9.3, 9.3, 9.5};
auto start = std::chrono::high_resolution_clock::now();
vector<bool> z(y.size());
for (size_t i = 0; i != y.size(); ++i)
z[i] = binary_search(x.begin(), x.end(), y[i]);
auto stop = std::chrono::high_resolution_clock::now();
auto duration = chrono::duration_cast<chrono::nanoseconds>(stop - start);
cout << "vector<bool>: " << duration.count() << "ns\n";
for (auto i = z.begin(); i != z.end(); ++i)
cout << unsigned(*i) << " ";
cout << '\n';
}
// Original (replacing vector<bool> by vector<unsigned char>)
{
vector<double> x = {1.8, 2.4, 3.3, 4.2, 5.6,7.9, 8.5, 9.3};
vector<double> y = {0.5, 0.98, 1.8, 3.1, 5.6, 6.6, 9.3, 9.3, 9.5};
auto start = std::chrono::high_resolution_clock::now();
vector<unsigned char> z(y.size());
for (size_t i = 0; i != y.size(); ++i)
z[i] = binary_search(x.begin(), x.end(), y[i]);
auto stop = std::chrono::high_resolution_clock::now();
auto duration = chrono::duration_cast<chrono::nanoseconds>(stop - start);
cout << "vector<unsigned char>: " << duration.count() << "ns\n";
for (auto i = z.begin(); i != z.end(); ++i)
cout << unsigned(*i) << " ";
cout << '\n';
}
// Similar to std::set_intersection
{
vector<double> x = {1.8, 2.4, 3.3, 4.2, 5.6,7.9, 8.5, 9.3};
vector<double> y = {0.5, 0.98, 1.8, 3.1, 5.6, 6.6, 9.3, 9.3, 9.5};
auto start = std::chrono::high_resolution_clock::now();
vector<unsigned char> z(y.size());
size_t ix = 0;
size_t iy = 0;
while(ix < x.size() && iy < y.size())
{
if(x[ix] < y[iy]) ++ix;
else if(y[iy] < x[ix]) ++iy;
else {
z[iy] = 1;
// ++ix; Not this if one vector is not uniquely sorted
++iy;
}
}
auto stop = std::chrono::high_resolution_clock::now();
auto duration = chrono::duration_cast<chrono::nanoseconds>(stop - start);
cout << "set intersection: " << duration.count() << "ns\n";
for (auto i = z.begin(); i != z.end(); ++i)
cout << unsigned(*i) << " ";
cout << '\n';
}
return 0;
}
Compiled with g++ -std=c++11 -O3 (g++ 4.84) gives:
vector<bool>: 3622ns
0 0 1 0 1 0 1 1 0
vector<unsigned char>: 1635ns
0 0 1 0 1 0 1 1 0
set intersection: 1299ns
0 0 1 0 1 0 1 1 0