Counting Comparisons with Merge Sort - c++

Right now I'm learning about algorithms, and I was encouraged to go back through all the different sorting methods to find out how many comparisons each kind takes to sort through an array of numbers. I need to implement a count inside this Merge Sort program which works, but I'm getting a little lost as far as where it needs to go. If anyone could point me in the right direction, I'd really appreciate it.
//MergeSort.h
#include <iostream>
using namespace std;
void merge_sort(int A[], int low, int high);
void merge(int A[], int low, int mid, int high);
void merge_sort(int A[], int low, int high)
{
int mid;
if(low<high)
{
mid=(low+high)/2;
merge_sort(A,low,mid);
merge_sort(A,mid+1,high);
merge(A,low,mid,high);
}
}
void merge(int A[], int low, int mid, int high)
{
int h, i, j, B[100], k;
h = low;
i = low;
j = mid + 1;
while ((h <= mid) && (j <= high))
{
if (A[h] <= A[j])
{
B[i] = A[h];
h++;
}
else
{
B[i] = A[j];
j++;
}
i++;
}
if (h > mid)
{
for (k = j;k <= high;k++)
{
B[i] = A[k];
i++;
}
}
else
{
for (k = h;k <= mid;k++)
{
B[i] = A[k];
i++;
}
}
for (k = low;k <= high;k++)
{
A[k] = B[k];
}
}
and
//MergeSort.cpp
#include <iostream>
using namespace std;
#include "MergeSort.h"
#include <ctime>
int main()
{
int A[1000], n = 100, i;
srand(time(NULL));
cout << "Here are " << n << " random numbers: \n";
for (i = 0; i < n; i++)
{
A[i] = rand() % 100;
cout << " " << A[i];
}
merge_sort(A, 0, n-1);
cout << "\n\nThe sorted array is: ";
for (int i=0;i<n;i++)
cout << A[i] <<" ";
cout<<endl<<endl;
system("pause");
}

One simple way to count number of comparisons is to change your merge and merge-sort functions from void to return number of comparisons within them and counting recursively.

The array of B 's length is only 100.it will be bound.

The simplest way to do this is to use a static global variable, and increment it with each compare of A[] in merge(), then have the main program display the count after a sort. This avoids having to change the interface for the existing functions.

Related

Obtaining Large Signed Value in my QuickSort/InsertionSort Algorithm

so my code is essentially working; however, I need clarification as to why when I implement insertion sort when I get to less than 20 elements in a subarray it adds a large signed value at the end when I print them out.
#include <iostream>
#include <cstdlib>
#include <ctime>
using namespace std;
void swap(int* a, int* b)
{
int t = *a;
*a = *b;
*b = t;
}
int partitionMiddle (int arr[], int low, int high) {
int pivot = arr[high];
int i = (low - 1);
for (int j = low; j <= high- 1; j++)
{
if (arr[j] <= pivot)
{
i++; // increment index of smaller element
swap(&arr[i], &arr[j]);
}
}
swap(&arr[i + 1], &arr[high]);
return (i + 1);
}
void chooseMiddle....
void chooseMedian{code here...}
void insertionSort(int arr[], int low, int high){
int j;
for(int i = low+1; i <= high; i++){
j = i;
while(j>low && arr[j-1]> arr[j]){
swap(arr[j], arr[j-1]);
j= j-1;
}
}
}
void quickSortMiddle(int arr[], int low, int high)
{
int pi = partitionMiddle(arr, low, high);
if (high-low > 20)
{
quickSortMiddle(arr, low, pi - 1);
quickSortMiddle(arr, pi + 1, high);
}
else {
insertionSort(arr, low, pi);
insertionSort(arr, pi+1, high);
}
}
void printArray(int arr[], int size)
{
int i;
for (i=0; i < size; i++)
printf("%d ", arr[i]);
}
int main()
{
clock_t x, k;
x = clock();
int arr[10000];
srand((unsigned)time(0));
for(int i =0; i<10000; i++) {
arr[i] = (rand()%100)+1;
}
int n = sizeof(arr)/sizeof(arr[0]);
cout << "Original array: " ;
printArray(arr, n);
cout << endl;
cout << "With Insertion Sort & Middle Element";
chooseMiddle(arr, 0, n-1);
quickSortMiddle(arr, 0, n-1);
cout << endl;
k = clock();
// can comment this one vvvv or the previous one out to try them both
cout << "With Insertion Sort & Median Element";
chooseMedian(arr, 0, arr[((n-1)/2)], n-1);
quickSortMiddle(arr, 0, n-1);
cout << endl;
k = clock();
printf("Sorted array: ");
printArray(arr, n);
cout << endl;
cout << "CPU Time : ";
cout << k-x;
cout << endl << endl;
return 0;
}
My expected results were to not just have -272632368 at the very end of my sorted array when I print it out. I think I went out of bounds in my insertion sort algorithm but I'm not sure as to where. Maybe someone with a better eye can catch it.
**** EDIT...So I went to debug the code on my own and it the number was appearing there somehow having to do with the n-1 parameter when I was calling the chooseMedian/chooseMiddle or quickSortMiddle functions. I changed it to simply being 'n'. I don't get why, but it's working now. If someone can explain why, that'd be helpful, but if not, that's okay too.
****EDIT2...So now that I have your attention, can you please explain to me why using pointers in the swap function is better than not? I found this example of a swap function online and don't know why they used pointers.

My merge sort algorithm is taking too long for large file input?

well it's not exactly a merge sort, the algorithm counts the number on inversions in an array using merge sort (basically I just added one simple line)
it takes 2.415 seconds to read and merge sort 100,000 distinct integers from a text file while others who solved the same problem (on coursera.com) said it took them less than 0.5 seconds
here's my code, what went wrong? file reading maybe? thanks
#include <bits/stdc++.h>
using namespace std;
int a,b,i,j,n,x,k;
int t[100000]={0};
long long int s=0;
void merge(int a, int mid, int b)
{
i=a;
j=mid+1;
k=a;
int v[100000]={0};
while(i<=mid && j<= b)
{
if (t[i]<t[j])
{v[k]=t[i];
i++;
}
else
{v[k]=t[j];
j++;
s+=mid-i+1; //this line here counts the inversions
}
k++;
}
while(i<=mid)
{v[k]=t[i];
i++; k++;}
while(j<=b)
{v[k]=t[j];
j++; k++;}
for (i=a;i<k;i++)
t[i]=v[i];
}
void mergeSort(int a, int b)
{
if(a<b)
{
int mid=(a+b)/2;
mergeSort(a,mid);
mergeSort(mid+1,b);
merge(a,mid,b);
}
}
int main(){
ifstream fin("C:\\Users\\ASUS\\Desktop\\coursera.txt");
n=100000;
for(i=0;i<n;i++)
fin>>t[i];
mergeSort(0,n-1);
cout<<endl<<s;
}
One issue I could see is that in merge function, you allocate too much space, and the copy back also take quite O(N), which make the total copy time O(N * N) instead of O(N*Log(N)). Simple change to merge function could be like:
void merge(int a, int mid, int b)
{
i = a;
j = mid + 1;
k = 0;
int* v = new int[b - a + 1];
while (i <= mid && j <= b)
{
if (t[i]<t[j])
{
v[k] = t[i];
i++;
}
else
{
v[k] = t[j];
j++;
s += mid - i + 1; //this line here counts the inversions
}
k++;
}
while (i <= mid)
{
v[k] = t[i];
i++; k++;
}
while (j <= b)
{
v[k] = t[j];
j++; k++;
}
for (i = 0; i<k; i++)
t[a+i] = v[i];
delete[] v;
v = NULL;
}

C++ Program loops forever

I seem to be having an issue that I've been stuck on for hours. I run the program, and it just hangs after asking for the user input. My computer also begins to slow down unless I terminate the program. I have no idea what the problem is. I have tried commenting out code to see where the issue may be coming from. I put a cout statement after asking for the input, and even that does not display.
#include <iostream>
#include <vector>
#include <stdexcept>
#include <iomanip>
#include <cstdlib>
#include <array>
#include "problem2.h"
using namespace std;
int binarySearch(int array[], int input);
void selectSort(int arr[], int n);
int problem2() {
srand(time(0)); // generate seed based on current system time
int rand[20];
int result;
int input = 1;
cout << "Enter a number to search for: ";
cin >> input;
cout << "testset ";
for (int z = 0; z < 19; z++) {
rand[z] = random() % 70;
cout << rand[15];
}
selectSort(rand, 20);
for (int t = 0; t < 20; t++) {
//cout << random1D[z];
}
result = binarySearch(rand, input);
//cout << result;
return 0;
}
int binarySearch(int arr[], int a) {
int high = 19;
int middle = 19/2;
int low = 0;
while (arr[middle] != a && low<= high) {
if (arr[middle] > a) {
high = middle - 1;
} else {
low = middle - 1;
}
if (low > high) {
}
}
return middle;
}
void selectSort(int arr[], int n) {
int min, temp;
for (int i = 0; i < n-1; i++) {
min = i;
for (int j = i + 1; j < n; j++) {
if (arr[j] < arr[min])
min = j;
}
if (min != i) {
temp = arr[i];
arr[i] = arr[min];
arr[min] = temp;
}
}
}
You have several loops, but all of them except for one have explicit termination. The for loops all end after a specific number of iterations, but your while loop is less certain. Your low is likely never going to be greater than your high, so the loop just keeps going.
Consider changing to low = middle + 1 or altering your logic to more likely ensure that low will eventually overtake high. Or, change the condition the while loop checks.

Merge Sort: Heap Corruption on delete[]

Working on a class project in which i need to implement a Merge Sort to sort 500,000 items.
After many attempts I tried looking for source code online and found some here: http://www.sanfoundry.com/cpp-program-implement-merge-sort/
I had to alter the code to use a dynamic array (for size). When the program runs the merge function, I create a new, dynamic array using the number of elements (or high) that are being merged. Once the function is finished sorting them and merge them into the original array, i use delete[] on the new dynamic array. This is where I get my "Heap Corruption Detected" error.
Here is the code (wall of text):
//Heap Sort
#include <iostream>
#include <fstream>
#include <sstream>
#include <ctime>
#include <stdlib.h>
#include <stdio.h>
using namespace std;
//Function Prototypes
void mergesort(int *a, int low, int high);
void merge(int *a, int low, int high, int mid);
int main()
{
//Start with element 1 of the array
int line_no = 0;
int num;
int array_size = 500000;
int* num_array = new int[array_size];
//Open file for input
fstream in_file("CSCI3380_final_project_dataset.txt", ios::in);
//Test for file opening
if (!in_file)
{
cout << "Cannot open words1.txt for reading" << endl;
exit(-1);
}
//Read file
while(true)
{
//Read one line at a time
in_file >> num;
//Test for eof
if (in_file.eof())
break;
num_array[line_no] = num;
//Increment array position
line_no++;
}
//Close the file
in_file.close();
//Start Time
clock_t time_a = clock();
//Run Sorting Algorithim
mergesort(num_array, 0, array_size-1);
//End Time
clock_t time_b = clock();
//Elapsed Time
if (time_a == ((clock_t)-1) || time_b == ((clock_t)-1))
{
cout << "Unable to calculate elapsed time" << endl;
}
else
{
int total_time_ticks = time_b - time_a;
cout << "Elapsed time: " << total_time_ticks << endl;
}
delete[] num_array;
return 0;
}
void mergesort(int *a, int low, int high)
{
int mid;
if (low < high)
{
mid=(low+high)/2;
mergesort(a,low,mid);
mergesort(a,mid+1,high);
merge(a,low,high,mid);
}
return;
}
void merge(int *a, int low, int high, int mid)
{
//--------------------------Create new array-------------------------------
int* sort_array = new int[high];
//--------------------------New Array Created-----------------------------
int i, j, k;
i = low;
k = low;
j = mid + 1;
while (i <= mid && j <= high)
{
if (a[i] < a[j])
{
sort_array[k] = a[i];
k++;
i++;
}
else
{
sort_array[k] = a[j];
k++;
j++;
}
}
while (i <= mid)
{
sort_array[k] = a[i];
k++;
i++;
}
while (j <= high)
{
sort_array[k] = a[j];
k++;
j++;
}
for (i = low; i < k; i++)
{
a[i] = sort_array[i];
}
//---------------------------Delete the New Array--------------------
delete[] sort_array;
//--------------------------Oh No! Heap Corruption!------------------
}
I'll spare you the "you should be using vectors", "you should be using smart pointers", etc. You should be, and I'll leave it at that. Regarding your actual problem....
You're writing one-past the allocated space of your array. The allocated size is high:
int* sort_array = new int[high];
meaning you can only dereference from 0..(high-1). Yet this:
while (j <= high)
{
sort_array[k] = a[j];
k++;
j++;
}
is one location that is guaranteed to write to sort_array[high], and therefore invoke undefined behavior.
A Different Approach
Mergesort is about div-2 partitioning. You know this. What you may not have considered is that C and C++ both perform pointer-arithmetic beautifully and as such you only need two parameters for mergesort(): a base address and a length. the rest can be taken care of for you with pointer math:
Consider this:
void mergesort(int *a, int len)
{
if (len < 2)
return;
int mid = len/2;
mergesort(a, mid);
mergesort(a + mid, len-mid);
merge(a, mid, len);
}
And a merge implementation that looks like this:
void merge(int *a, int mid, int len)
{
int *sort_array = new int[ len ];
int i=0, j=mid, k=0;
while (i < mid && j < len)
{
if (a[i] < a[j])
sort_array[k++] = a[i++];
else
sort_array[k++] = a[j++];
}
while (i < mid)
sort_array[k++] = a[i++];
while (j < len)
sort_array[k++] = a[j++];
for (i=0;i<len;++i)
a[i] = sort_array[i];
delete[] sort_array;
}
Invoked from main() like the following. Note: I've removed the file i/o in place of a random generation just to make it easier to test:
#include <iostream>
#include <ctime>
#include <cstdlib>
#include <cstdio>
using namespace std;
//Function Prototypes
void mergesort(int *a, int len);
void merge(int *a, int mid, int len);
int main()
{
std::srand((unsigned int)std::time(nullptr));
// Start with element 1 of the array
int array_size = 500000;
int* num_array = new int[array_size];
std::generate_n(num_array, array_size, std::rand);
// Start Time
clock_t time_a = clock();
// Run Sorting Algorithim
mergesort(num_array, array_size);
// End Time
clock_t time_b = clock();
//Elapsed Time
if (time_a == ((clock_t)-1) || time_b == ((clock_t)-1))
{
cout << "Unable to calculate elapsed time" << endl;
}
else
{
int total_time_ticks = time_b - time_a;
cout << "Elapsed time: " << total_time_ticks << endl;
}
delete[] num_array;
return 0;
}
This resulted is an elapsed time of:
Elapsed time: 247287
More Efficient
By now you've seen that you will need at most N-space in addition to you sequence. The top-most merge should e evidence enough of that. What you may not consider is that in-reality that is exactly the space you need, and you can allocate it up-front and use it throughout the algorithm if you desire. You can keep the current entrapping for mergesort(), but we'll be wrapping it up with a front-loader that allocates all the space we'll ever need once:
// merges the two sequences a[0...mid-1] and a[mid...len-1]
// using tmp[] as the temporary storage space
static void merge_s(int *a, int *tmp, int mid, int len)
{
int i=0, j=mid, k=0;
while (i < mid && j < len)
{
if (a[i] < a[j])
tmp[k++] = a[i++];
else
tmp[k++] = a[j++];
}
while (i < mid)
tmp[k++] = a[i++];
while (j < len)
tmp[k++] = a[j++];
for (i=0;i<len;++i)
a[i] = tmp[i];
}
static void mergesort_s(int *a, int *tmp, int len)
{
if (len < 2)
return;
int mid = len/2;
mergesort_s(a, tmp, mid);
mergesort_s(a + mid, tmp+mid, len-mid);
merge_s(a, tmp, mid, len);
}
void mergesort(int *a, int len)
{
if (len < 2)
return;
int *tmp = new int[len];
mergesort_s(a,tmp,len);
delete [] tmp;
}
This resulted in an elapsed time of:
Elapsed time: 164704
Considerably better than we had before. Best of luck.
The copy step shown in WhozCraig's code example can be avoided using a pair of functions to control the direction of the merge (note - a bottom up merge would still be faster).
Note - I wouldn't recommend using either WhozCraig's or my code example, since these methods were probably not covered in your class, and it's supposed to be code written based on what you were taught in your class. I don't know if bottom up merge sort was covered in your class, so I didn't post an example of it.
mergesort_s(int *a, int *tmp, int len)
{
// ...
mergesort_atoa(a, tmp, 0, len);
// ...
}
mergesort_atoa(int *a, int *tmp, int low, int end)
{
if((end - low) < 2){
return;
}
int mid = (low + end) / 2;
mergesort_atot(a, tmp, low, mid);
mergesort_atot(a, tmp, mid, end);
merge_s(tmp, a, low, mid, end);
}
mergesort_atot(int *a, int *tmp, int low, int end)
{
if((end - low) < 2){
tmp[0] = a[0];
return;
}
int mid = (low + end) / 2;
mergesort_atoa(a, tmp, low, mid);
mergesort_atoa(a, tmp, mid, end);
merge_s(a, tmp, low, mid, end);
}
void merge_s(int *src, int *dst, int low, int mid, int end)
{
int i = low; // src[] left index
int j = mid; // src[] right index
int k = low; // dst[] index
while(1){ // merge data
if(src[i] <= src[j]){ // if src[i] <= src[j]
dst[k++] = src[i++]; // copy src[i]
if(i < mid) // if not end of left run
continue; // continue (back to while)
while(j < end) // else copy rest of right run
dst[k++] = src[j++];
return; // and return
} else { // else src[i] > src[j]
dst[k++] = src[j++]; // copy src[j]
if(j < end) // if not end of right run
continue; // continue (back to while)
while(i < mid) // else copy rest of left run
dst[k++] = src[i++];
return; // and return
}
}
}

Quicksort algorithm with duplicate keys

I am trying to implement Quick Sort algorithm. Following code works for unique elements but it doesn't working for arrays having duplicate elements. Please tell me where I am doing wrong. Also when I change value of pivot to some other number other than 0 , program crashes. Here is the code:
#include <iostream>
#include <cstdlib>
using namespace std;
void swapme(int &a, int &b)
{
int temp = a;
a = b;
b = temp;
}
void quicksort(int *arr, int size)
{
// these two variables will take care of position of comparison
int lower = 0, upper = size - 1;
int pivot = 0; // assigns pivot
if (size <= 1)
return;
while (lower < upper)
{
while (arr[lower] < arr[pivot])
{
++lower;
}
}
while (arr[upper] > arr[pivot])
{
--upper;
}
if (upper > lower)
{
swapme(arr[upper], arr[lower]);
// upper--;
// lower++;
}
quicksort(arr, lower);
quicksort(&arr[lower + 1], size - 1 - lower);
}
int main()
{
int arr[30];
for(int j = 0; j < 30; j++)
{
arr[j] = 1 + rand() % 5000;
}
for(int j = 0; j < 30; j++)
{
cout << arr[j] << "\t";
}
cout << endl;
quicksort(arr, 30);
for(int j = 0; j < 30; j++)
{
cout << arr[j] << "\t";
}
cout << endl;
cin.get();
cin.get();
}
Update: I have finally managed to make it work. Here is the fixed version:
void swapme(int &a, int &b )
{
int temp = a;
a = b;
b = temp;
}
void quicksort(int *arr, int size)
{
if (size <= 1)
return;
// These two variables will take care of position of comparison.
int lower = 0;
int upper = size-1;
int pivot = arr[upper/2]; // assigns pivot
while (lower <= upper)
{
while (arr[lower] < pivot)
++lower;
while (arr[upper] > pivot)
--upper;
if (upper >= lower)
{
swapme(arr[upper],arr[lower]);
if(arr[upper] == arr[lower])
{
// Can either increment or decrement in case of duplicate entry
upper--; // lower++;
}
}
}
quicksort(arr, lower);
quicksort( &arr[lower+1], size-1-lower);
}
You are storing the index of your pivot element in the pivot variable, so swapping the elements can potentially change the choice of pivot element during the loop. Not a very good idea. I would suggest storing the actual value of the pivot element inside pivot instead.
Also, if this really isn't homework, why don't you simply use the standard library facilities?
#include <algorithm>
// ...
std::sort(arr + 0, arr + 30);
You will get heavily optimized and tested code that will outperform your handwritten Quicksort anytime.
Quick Sort that can implement any number of i/p integers. it also deal with duplicate keys
#include <conio.h>
#include <string>
using namespace std;
void InputArray(int*,int);
void QuickSort(int *,int,int);
int partition(int *,int,int);
void swap(int *,int,int);
void printArr(int *,int Siz=11);
void main(){
int siz;
cout<<"Enter Array length : "; cin>>siz;
int *a=new int[siz];
InputArray(a,siz);
QuickSort(a,0,siz-1);
int i=0,j=11;
printArr(a,siz);
system("pause");
}
void InputArray(int*a,int s){
for(int i=0; i<s; i++){
cout<<"ELement ["<<i<<"] = "; cin>>a[i];
}
}
void QuickSort(int *a,int start,int end){
if(start<end){
int pivot=partition(a,start,end);
QuickSort(a,start,pivot);
QuickSort(a,pivot+1,end);
}
}
int partition(int *a,int start,int end){
int currentPivotValue=a[start];
int i=start-1, j=end+1;
while(true){
i++;
while(i<j && a[i]<currentPivotValue){ i++; }
j--;
while(j>start && a[j]>currentPivotValue) {j--;}
if(i<j) swap(a,i,j);
else return j;
}
}
void swap(int *b,int i,int j){
int t=b[i];
b[i]=b[j];
b[j]=t;
}
void printArr(int *a,int Siz){
for(int i=0; i<Siz; i++) cout<<a[i]<<" ";
}