my biggest problem is getting the quickSortHelper class to work. i know what i want the parameters to be, and the stuff inside i need to call on is what i can't figure out. i've tried a mixture of using the partition and quicksort but i can't figure it out. the code is written like this because i will be using a timesort class in the future to solve and time 6+ sorting algorithms. i got it to work by just throwing the code inside main. but all i want inside main is what i have here.
#include <iostream>
#include <algorithm>
#include <vector>
#include <chrono>
#include <functional>
#include <random>
//i know not all the above libraries are being used, once quickSort is
//working i plan on adding it to 5 other sorting algorithms, where these
//are neccessary.
using namespace std;
void quickSort(vector<int>&, int, int);
int partition(vector<int>&, int, int);
double timeSort(vector<int> &v, function<void(vector<int>&)>f);
int main()
{
vector<int>intVec(1000);
generate(intVec.begin(), intVec.end(), rand);
int p = 0;
int q = 1000;
quickSort(intVec, p, q);
auto time = timeSort(intVec, quickSort);
for (auto i = 0u; i != intVec.size(); ++i)
cout << intVec[i] << " ";
cout << "\nQuick sort took " << time << " nanoseconds\n";
char chubby;
cin >> chubby;
return 0;
}
double timeSort(vector<int> &v, function<void(vector<int>&)>f)
{
auto start = chrono::high_resolution_clock::now();
f(v);
auto end = chrono::high_resolution_clock::now();
return static_cast<double>(((end - start).count()));
}
int partition(vector<int>&intVec, int p, int q)
{
int x = intVec[p];
int i = p;
int j;
for (j = p + 1; j < q; j++)
{
if (intVec[j] <= x)
{
i = i + 1;
swap(intVec[i], intVec[j]);
}
}
swap(intVec[i], intVec[p]);
return i;
}
void quickSort(vector<int>&intVec, int p, int q)
{
int r;
if (p < q)
{
r = partition(intVec, p, q);
quickSort(intVec, p, r);
quickSort(intVec, r + 1, q);
}
}
void quickSortHelper(vector<int>&intVec)
{
//i want to make a call to the timeSort function with
//quickSortHelper, i can't use quickSort directly because timeSort
//only has 2 parameters, the vector to be solved, and the method of
//solving it. i know
}
I suggest simplifying your program:
int main(void)
{
vector<int>intVec(1000);
unsigned int duration = 0;
for (unsigned int iteration = 0;
iteration < 1000000;
++iteration)
{
generate(intVec.begin(), intVec.end(), rand);
int p = 0;
int q = 1000;
auto start = chrono::high_resolution_clock::now();
quickSort(intVec, p, q);
auto end = chrono::high_resolution_clock::now();
duration += (end - start);
}
cout << "Average time for quicksort: " << (duration / 1000000) << "\n";
cout.flush();
return 0;
}
I made the following changes:
1) Running the sort for many iterations to get an average duration.
2) Removed the timing function; it only complicates things.
Umm... If I understand correctly, this should do it:
void quickSortHelper(vector<int>&intVec)
{
quickSort(intVec, 0, intVec.size());
}
Related
I want to write a method in C++ which creates an array of monotonically increasing values. It has the inputs of int begin, int end, int interval.
In this example; method should return the array of [0,1,2,3,4,5,6,7,8,9,10]. When I print the results it should print out the first two indexes and get 0 and 1. However, when I print it, it gives 0 for the first one and 9829656 for the second one.
When I only print one index it is always correct, but when I print more than one index, every value except for the first printed one gives a different result. I think the other results are related to memory address since I used pointers.
#include <iostream>
using namespace std;
int* getIntervalArray(int begin, int end, int interval){
int len = (end - begin) / interval + 1;
int result[11] = {};
for (int i = 0; i <= len - 1; i++) {
result[i] = begin + interval * i;
}
return result;
}
int main(){
int begin = 0;
int end = 10;
int interval = 1;
int* newResult = getIntervalArray(begin, end, interval);
cout << newResult[0] << endl;
cout << newResult[1] << endl;
return 0;
}
You are returning a pointer to a local variable. You can instead return a std::vector by value as shown below:
#include <iostream>
#include <vector>
//return a vector by value
std::vector<int> getIntervalArray(int begin, int end, int interval){
int len = (end - begin) / interval + 1;
std::vector<int> result(len); //create a vector of size len
for (int i = 0; i <= len - 1; i++) {
result.at(i) = begin + interval * i;
}
return result;
}
int main(){
int begin = 0;
int end = 10;
int interval = 1;
std::vector<int> newResult = getIntervalArray(begin, end, interval);
//print out elements of returned vector
for(int i = 0; i < newResult.size(); ++i)
{
std::cout << newResult.at(i) << std::endl;
}
return 0;
}
The output of the above program can be seen here.
A possible solution dynamically allocating the local array, and returning it via a smart pointer:
#include <array>
#include <iostream>
#include <memory> // make_unique
auto getIntervalArray(int begin, int end, int interval)
{
int len = (end - begin) / interval + 1;
auto result{ std::make_unique<std::array<int, 11>>() };
for (int i = 0; i <= len - 1; i++) {
(*result)[i] = begin + interval * i;
}
return result;
}
int main()
{
int begin = 0;
int end = 10;
int interval = 1;
auto newResult{ getIntervalArray(begin, end, interval) };
std::cout << (*newResult)[0] << std::endl;
std::cout << (*newResult)[1] << std::endl;
std::cout << (*newResult)[2] << std::endl;
return 0;
}
Demo
Set the array variable in your function as static. This is because C++ does not support returning the address of a local variable.
static int result[11];
try this. also add deletion of the newResult
#include <iostream>
using namespace std;
int* getIntervalArray(int begin, int end, int interval){
int len = (end - begin) / interval + 1;
int* result = new int[len];
int lastValue = begin;
for (int i = 0; i <= len - 1; i++) {
result[i] = lastValue;
lastValue += interval;
}
return result;
}
int main(){
int begin = 0;
int end = 10;
int interval = 1;
int* newResult = getIntervalArray(begin, end, interval);
cout << newResult[0] << endl;
cout << newResult[1] << endl;
// add delete here.
return 0;
}
i have a type defined as below,
using BookMapT = std::map<long long int, long long int, std::function<bool(long long int,long long int)>>;
BookMapT greater_map(greater<long long int>{});
BookMapT less_map(less<long long int>{});
So if have a number and I want to compare with the beginning of a BookMapT
int compare(BookMapT& book, long long int number_to_compare){
return book.key_comp()(number_to_compare, book.begin()->first);
}
From http://www.cplusplus.com/reference/map/map/key_comp/ I can see key_comp return a copy of the comparison object, will this copy action somehow hamper performance?
What confuse me most is how much cost is this copying of std::function ?
Thanks.
#include <chrono>
#include <functional>
#include <iostream>
#include <random>
typedef std::function<bool(long long int,long long int)> TF;
TF cp(const TF & f) { return f; }
int cp1(TF f, int times, int n) {
int c = 0;
for(int i = times; --i >= 0;) {
c += cp(f)(i, n);
}
return n;
}
int cp2(TF f, int times, int n) {
int c = 0;
for(int i = times; --i >= 0;) {
c += TF(f)(i, n);
}
return n;
}
int no_cp(TF f, int times, int n) {
int c = 0;
for(int i = times; --i >= 0;) {
c += f(i, n);
}
return n;
}
typedef int(*TP)(TF, int, int);
void test(TP p, int n)
{
double s = 0;
int c = 0;
for(int i = 10; --i >= 0;) {;
auto t = std::chrono::steady_clock::now();
c += (*p)(std::less<long long int>{}, 50000000, n);
std::chrono::duration<double> d = std::chrono::steady_clock::now() - t;
s += d.count();
}
std::cout << s / 10 << "(" << c << ")" << std::endl;
}
int main() {
int n = static_cast<int>(std::random_device()());
std::cout << "cp1: ";
test(&cp1, n);
std::cout << "cp2: ";
test(&cp2, n);
std::cout << "no_cp: ";
test(&no_cp, n);
}
Results with -O0 flag:
cp1: 2.24382
cp2: 2.10708
no_cp: 0.833974
Results with -O3 flag:
cp1: 0.20065
cp2: 0.199159
no_cp: 0.0643156
CPU: Intel Core i7 (Comet Lake)
I'm trying to sort large arrays with Quicksort and Mergesort to evaluate performances.
I've a problem: if I impose a large number of elements in an array, the program does not start to generate values randomly. In the code below, if N=500000, it works very well. If N > 500000, for example 1000000, it does not work. With MergeSort the limit is 200000. I tried on multiple devices, C++ on Eclipse IDE.
Someone knows how to solve the problem?
#define N 800000
#include <iostream>
#include <cstdlib>
#include <time.h>
#include <chrono>
using namespace std;
void Exchange(int *a, int *b) {
int temp;
temp = *a;
*a = *b;
*b = temp;
}
int Partition(int A[], int p, int r) {
int x = A[r];
int i = p - 1;
for (int j = p; j <= r; j++) {
if (A[j] < x) {
i++;
Exchange(&A[i], &A[j]);
}
}
Exchange(&A[i + 1], &A[r]);
return i + 1;
}
int RPartition(int A[], int p, int r) {
srand(time(NULL));
int i = p + rand() % (p - r);
Exchange(&A[i], &A[r]);
return Partition(A, p, r);
}
void QuickSort(int A[], int p, int r) {
if (p < r) {
int q = RPartition(A, p, r);
QuickSort(A, p, q - 1);
QuickSort(A, q + 1, r);
}
}
void Stampa(int A[], int n) {
for (int i = 0; i < n; i++) {
cout << A[i] << "\n";
}
}
int main() {
srand(50000);
int A[N];
for (int i = 0; i < N; i++) {
A[i] = rand();
}
cout << "Array non ordinato\n";
Stampa(A, N);
auto start = std::chrono::system_clock::now();
QuickSort(A, 0, N - 1);
auto end = std::chrono::system_clock::now();
cout << "\nArray ordinato\n";
Stampa(A, N);
std::chrono::duration<double> elapsed = end - start;
cout << "Elapsed time: " << elapsed.count() << "s";
}
The explanation is very simple: you allocate the array as a local variable with automatic storage (aka on the stack), hence if the size is too large, you get a stack overflow.
You should either allocate the array from the heap or define it as static data.
Here is a modified version:
int main() {
srand(time(NULL));
int *A = new int[N];
for (int i = 0; i < N; i++) {
A[i] = rand();
}
cout << "Array non ordinato\n";
Stampa(A, N);
auto start = std::chrono::system_clock::now();
QuickSort(A, 0, N - 1);
auto end = std::chrono::system_clock::now();
cout << "\nArray ordinato\n";
Stampa(A, N);
std::chrono::duration<double> elapsed = end - start;
cout << "Elapsed time: " << elapsed.count() << "s";
delete[] A;
}
I have a program that computes the matrix product x'Ay repeatedly. Is it better practice to compute this by making calls to MKL's blas, i.e. cblas_dgemv and cblas_ddot, which requires allocating memory to a temporary vector, or is better to simply take the sum of x_i * a_ij * y_j? In other words, does MKL's blas theoretically add any value?
I benchmarked this for my laptop. There was virtually no difference in each of the tests, other than g++_no_blas performed twice as poorly as the other tests (why?). There was also no difference between O2, O3 and Ofast.
g++_blas_static 57ms
g++_blas_dynamic 58ms
g++_no_blas 100ms
icpc_blas_static 57ms
icpc_blas_dynamic 58ms
icpc_no_blas 58ms
util.h
#ifndef UTIL_H
#define UTIL_H
#include <random>
#include <memory>
#include <iostream>
struct rng
{
rng() : unif(0.0, 1.0)
{
}
std::default_random_engine re;
std::uniform_real_distribution<double> unif;
double rand_double()
{
return unif(re);
}
std::unique_ptr<double[]> generate_square_matrix(const unsigned N)
{
std::unique_ptr<double[]> p (new double[N * N]);
for (unsigned i = 0; i < N; ++i)
{
for (unsigned j = 0; j < N; ++j)
{
p.get()[i*N + j] = rand_double();
}
}
return p;
}
std::unique_ptr<double[]> generate_vector(const unsigned N)
{
std::unique_ptr<double[]> p (new double[N]);
for (unsigned i = 0; i < N; ++i)
{
p.get()[i] = rand_double();
}
return p;
}
};
#endif // UTIL_H
main.cpp
#include <iostream>
#include <iomanip>
#include <memory>
#include <chrono>
#include "util.h"
#include "mkl.h"
double vtmv_blas(double* x, double* A, double* y, const unsigned n)
{
double temp[n];
cblas_dgemv(CblasRowMajor, CblasNoTrans, n, n, 1.0, A, n, y, 1, 0.0, temp, 1);
return cblas_ddot(n, temp, 1, x, 1);
}
double vtmv_non_blas(double* x, double* A, double* y, const unsigned n)
{
double r = 0;
for (unsigned i = 0; i < n; ++i)
{
for (unsigned j = 0; j < n; ++j)
{
r += x[i] * A[i*n + j] * y[j];
}
}
return r;
}
int main()
{
std::cout << std::fixed;
std::cout << std::setprecision(2);
constexpr unsigned N = 10000;
rng r;
std::unique_ptr<double[]> A = r.generate_square_matrix(N);
std::unique_ptr<double[]> x = r.generate_vector(N);
std::unique_ptr<double[]> y = r.generate_vector(N);
auto start = std::chrono::system_clock::now();
const double prod = vtmv_blas(x.get(), A.get(), y.get(), N);
auto end = std::chrono::system_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
end - start);
std::cout << "Result: " << prod << std::endl;
std::cout << "Time (ms): " << duration.count() << std::endl;
GCC no blas is poor because it does not use vectorized SMID instructions, while others all do. icpc will auto-vectorize you loop.
You don't show your matrix size, but generally gemv is memory bound. As the matrix is much larger than a temp vector, eliminating it may not be able to increase the performance a lot.
My code is in
#include <iostream>
#include <string>
#include <algorithm>
#include <climits>
#include <vector>
#include <cmath>
using namespace std;
struct State {
int v;
const State *rest;
void dump() const {
if(rest) {
cout << ' ' << v;
rest->dump();
} else {
cout << endl;
}
}
State() : v(0), rest(0) {}
State(int _v, const State &_rest) : v(_v), rest(&_rest) {}
};
void ss(int *ip, int *end, int target, const State &state) {
if(target < 0) return; // assuming we don't allow any negatives
if(ip==end && target==0) {
state.dump();
return;
}
if(ip==end)
return;
{ // without the first one
ss(ip+1, end, target, state);
}
{ // with the first one
int first = *ip;
ss(ip+1, end, target-first, State(first, state));
}
}
vector<int> get_primes(int N) {
int size = floor(0.5 * (N - 3)) + 1;
vector<int> primes;
primes.push_back(2);
vector<bool> is_prime(size, true);
for(long i = 0; i < size; ++i) {
if(is_prime[i]) {
int p = (i << 1) + 3;
primes.push_back(p);
// sieving from p^2, whose index is 2i^2 + 6i + 3
for (long j = ((i * i) << 1) + 6 * i + 3; j < size; j += p) {
is_prime[j] = false;
}
}
}
}
int main() {
int N;
cin >> N;
vector<int> primes = get_primes(N);
int a[primes.size()];
for (int i = 0; i < primes.size(); ++i) {
a[i] = primes[i];
}
int * start = &a[0];
int * end = start + sizeof(a) / sizeof(a[0]);
ss(start, end, N, State());
}
It takes one input N (int), and gets the vector of all prime numbers smaller than N.
Then, it finds the number of unique sets from the vector that adds up to N.
The get_primes(N) works, but the other one doesn't.
I borrowed the other code from
How to find all matching numbers, that sums to 'N' in a given array
Please help me.. I just want the number of unique sets.
You've forgotten to return primes; at the end of your get_primes() function.
I'm guessing the problem is:
vector<int> get_primes(int N) {
// ...
return primes; // missing this line
}
As-is, you're just writing some junk here:
vector<int> primes = get_primes(N);
it's undefined behavior - which in this case manifests itself as crashing.