Merge sort variant: using link array - c++

I was looking for the variants of Merge Sort. So my textbook says,
A variant of function Merge in which No records need to be moved at all, can be implemented by use of an auxiliary array links.
Firstly, I would like to state the code.
Algo MergeSort(low,high)
{
// a is the array to be sorted using auxiliary array link.
if(high-low<15)
return InsertionSort1(a,link,low,high);
else
{
mid = (low+high)/2;
q=MergeSort(low,mid);
r=MergeSort(mid+1,high);
return Merge1(q,r);
}
}
// The function Merge1 is as defined:
function Merge1(q,r)
{
// q and r are pointers to list contained in the global array link[0:n], the lists pointed at by q //and r are merged and a pointer to the beginning of the merged list is returned.
i=q;
j=r;
k=0;
while(i!=0 && j!=0)
{
if(a[i]<=a[j])
{
link[k]=i;
k=i;
i=link[i];
}
else
{
link[k]=j;
k=j;
j=link[j];
}
} // end of while
if(i=0)
link[k]=j;
else
link[k]=i;
return link[0];
}
Okay so what I understood of the algorithm is:
If the number of elements are less than 15, apply insertion sort and sort those elements.
This way, we will get many lists that will be sorted by themselves but the entire array will not be sorted as such.
To sort the entire array, the function Merge is used.
My question is,
How is the function Merge combining the different sorted lists to one sorted list? I dont have any idea of the concept of the link array.
I am sorry but I tried very hard to understand but I dont get how the output array is "sorted" ?
Any kind of example will be of utmost help.
Thank You.

I cleaned up the code, and I also added a bottom up version that uses an array of starting indexes (see below). I changed high in MergeSort() to end, so the call is now MergeSort(0, SIZE). i = MergeSort() returns the index of the smallest value in a[], then i = link[i] is the 2nd element, i = link[i] is the 3rd element, until i = -1. Instead of using insertion sort, MergeSort() directly sorts groups of size==1 or size==2 and initializes link[].
MergeLists() uses head for the start of a list (the old code uses link[0]), and -1 for the end of a list (the old code uses 0). This allows sorting of a[0] to a[n-1] (the old code was sorting a[1] to a[n], with a[0] unused).
If a[] ={5,4,8,7}, then MergeSort() returns a 1, and link[] = {3,0,-1,2}, link[1] = 0, link[0] = 3, link[3] = 2, link[2] = -1, so the order is a[1], a[0], a[3], a[2].
#define SIZE 4
static unsigned int a[SIZE] = {5,8,4,7};
static size_t link[SIZE]; /* index to next element */
size_t MergeLists(size_t i, size_t j)
{
size_t head;
size_t *pprev = &head; /* ptr: head or link[] */
while((i != -1) && (j != -1)){ /* while not end lists */
if(a[i] <= a[j]){ /* if i < j */
*pprev = i; /* link to i */
pprev = &link[i]; /* advance pprev */
i=*pprev; /* advance i */
} else { /* else */
*pprev = j; /* link to j */
pprev = &link[j]; /* advance pprev */
j=*pprev; /* advance j */
}
}
if(i == -1) /* if end of i list */
*pprev=j; /* link to rest of j */
else /* else */
*pprev=i; /* link to rest of i */
return head;
}
size_t MergeSort(size_t low, size_t end)
{
size_t mid, i, j;
if((end - low) == 0){ /* if size == 0 */
return low; /* (only on first call) */
}
if((end - low) == 1){ /* if size == 1 */
link[low] = -1; /* initialize link[] */
return low; /* return index */
}
if((end - low) == 2){ /* if size == 2 */
if(a[low] <= a[end-1]){ /* if in order */
link[low] = end-1; /* initialize link[] */
link[end-1] = -1;
return low; /* return index */
} else { /* else */
link[end-1] = low; /* initialize link[] */
link[low] = -1;
return end-1; /* return index */
}
}
mid = (low+end)/2; /* size > 2, recursively */
i = MergeSort(low, mid); /* split lists until */
j = MergeSort(mid, end); /* size <= 2 */
return MergeLists(i, j); /* merge a pair of lists */
}
int main(void)
{
size_t i;
i = MergeSort(0, SIZE);
do{
printf("%3d", a[i]);
i = link[i];
}while(i != -1);
return 0;
}
This is an example that is non-recursive. It uses an array of starting indexes S[]. N[] is the same a link[] above, and MergeLists() is the same as before. S[0] points to lists of size 1, S[1] points to lists of size 2, S[2] points to lists of size 4, ... S[i] points to lists of size 2^i (2 to the power i). S[31] points to a list of unlimited size. Elements are merged into the array one at a time, then the array lists are merged to form a single list.
#define NUMIDX (32) // number of indexes in array
// A[] is array to be sorted
// N[] is array of indexes to next index
// l is index of N[] to left list
// r is index of N[] to right list
// returns starting index (l or r) for merged list
size_t MergeLists(int A[], size_t N[], size_t l, size_t r)
{
size_t head;
size_t *pprev = &head; // ptr: head or N[]
while((l != -1) && (r != -1)){ // while not end lists
if(A[l] <= A[r]){ // if l <= r
*pprev = l; // link to l
pprev = &N[l]; // advance pprev
l=*pprev; // advance l
} else { // else
*pprev = r; // link to r
pprev = &N[r]; // advance pprev
r=*pprev; // advance r
}
}
if(l == -1) // if end of l list
*pprev=r; // link to rest of r
else // else
*pprev=l; // link to rest of l
return head;
}
// A[] is array to be sorted
// N[] is set to array of indexes to next index (-1 = end list)
// low is starting index of A[]
// end is ending index of A[] (1 past last)
// returns starting index of N[] for merged list
// S[] is array of starting indexes in N[]
// S[i] is starting index of list of size pow(2,i)
size_t MergeSort(int A[], size_t N[], size_t low, size_t end)
{
size_t S[NUMIDX]; // array of starting indexes
size_t i,j;
if((end - low) == 0){ // if size == 0
return low; // (only on first call)
}
for(i = 0; i < (end-low); i++) // init N[]
N[i] = -1;
for(i = 0; i < NUMIDX; i++) // init S[]
S[i] = -1;
for(j = low; j < end; j++){ // merge index lists into S[], N[]
low = j;
for(i = 0; (i < NUMIDX) && (S[i] != -1); i++){
low = MergeLists(A, N, S[i], low);
S[i] = -1;
}
if(i == NUMIDX)
i--;
S[i] = low;
}
low = -1; // merge S[] lists to one list in N[]
for(i = 0; i < NUMIDX; i++)
low = MergeLists(A, N, S[i], low);
return low;
}

Related

Splitting a group of numbers into a subgroups of members

I wanted to ask how to check if a group of numbers could be split into subgroups (every subgroup has to have 3 members) that every sum of subgroups' members would be equal. How to check so many combinations?
Example:
int numbers[] = {1, 2, 5, 6, 8, 3, 2, 4, 5};
can be divided into
{1, 5, 6}, {2, 8, 2}, {3, 4, 5}
A recursive approach can be followed, where one keeps two arrays:
An array with the sums of every subgroup.
A boolean array to check whether an element is already taken into
some subgroup or not.
You asked for 3 subgroups, i.e. K = 3 in the rest of this post, but keep in mind that when dealing with recursion, bases cases should be taken into account. In this case we will focus on two base cases:
If K is 1, then we already have our answer, complete array is only
subset with same sum.
If N < K, then it is not possible to divide array into subsets with
equal sum, because we can’t divide the array into more than N parts.
If the sum of group is not divisible by K, then it is not possible to divide it. We will only proceed if k divides sum. Our goal reduces to divide the group into K subgroups where sum of each subgroup should be the sum of the group divided by K.
In the code below a recursive method is written which tries to add array element into some subset. If sum of this subset reaches required sum, we iterate for next part recursively, otherwise we backtrack for different set of elements. If number of subsets whose sum reaches the required sum is (K-1), we flag that it is possible to partition array into K parts with equal sum, because remaining elements already have a sum equal to required sum.
Quoted from here, while in your case you would set K = 3, as in the example code.
// C++ program to check whether an array can be
// subsetitioned into K subsets of equal sum
#include <bits/stdc++.h>
using namespace std;
// Recursive Utility method to check K equal sum
// subsetition of array
/**
array - given input array
subsetSum array - sum to store each subset of the array
taken - boolean array to check whether element
is taken into sum subsetition or not
K - number of subsetitions needed
N - total number of element in array
curIdx - current subsetSum index
limitIdx - lastIdx from where array element should
be taken */
bool isKPartitionPossibleRec(int arr[], int subsetSum[], bool taken[],
int subset, int K, int N, int curIdx, int limitIdx)
{
if (subsetSum[curIdx] == subset)
{
/* current index (K - 2) represents (K - 1) subsets of equal
sum last subsetition will already remain with sum 'subset'*/
if (curIdx == K - 2)
return true;
// recursive call for next subsetition
return isKPartitionPossibleRec(arr, subsetSum, taken, subset,
K, N, curIdx + 1, N - 1);
}
// start from limitIdx and include elements into current subsetition
for (int i = limitIdx; i >= 0; i--)
{
// if already taken, continue
if (taken[i])
continue;
int tmp = subsetSum[curIdx] + arr[i];
// if temp is less than subset then only include the element
// and call recursively
if (tmp <= subset)
{
// mark the element and include into current subsetition sum
taken[i] = true;
subsetSum[curIdx] += arr[i];
bool nxt = isKPartitionPossibleRec(arr, subsetSum, taken,
subset, K, N, curIdx, i - 1);
// after recursive call unmark the element and remove from
// subsetition sum
taken[i] = false;
subsetSum[curIdx] -= arr[i];
if (nxt)
return true;
}
}
return false;
}
// Method returns true if arr can be subsetitioned into K subsets
// with equal sum
bool isKPartitionPossible(int arr[], int N, int K)
{
// If K is 1, then complete array will be our answer
if (K == 1)
return true;
// If total number of subsetitions are more than N, then
// division is not possible
if (N < K)
return false;
// if array sum is not divisible by K then we can't divide
// array into K subsetitions
int sum = 0;
for (int i = 0; i < N; i++)
sum += arr[i];
if (sum % K != 0)
return false;
// the sum of each subset should be subset (= sum / K)
int subset = sum / K;
int subsetSum[K];
bool taken[N];
// Initialize sum of each subset from 0
for (int i = 0; i < K; i++)
subsetSum[i] = 0;
// mark all elements as not taken
for (int i = 0; i < N; i++)
taken[i] = false;
// initialize first subsubset sum as last element of
// array and mark that as taken
subsetSum[0] = arr[N - 1];
taken[N - 1] = true;
if (subset < subsetSum[0])
return false;
// call recursive method to check K-subsetition condition
return isKPartitionPossibleRec(arr, subsetSum, taken,
subset, K, N, 0, N - 1);
}
// Driver code to test above methods
int main()
{
int arr[] = {2, 1, 4, 5, 3, 3};
int N = sizeof(arr) / sizeof(arr[0]);
int K = 3;
if (isKPartitionPossible(arr, N, K))
cout << "Partitions into equal sum is possible.\n";
else
cout << "Partitions into equal sum is not possible.\n";
}
Output:
Partitions into equal sum is possible.
Relevant links: 2 and 3.
You could just do something like that in this particular case (3x3):
const int COUNT = 9;
bool test(int const (&array)[COUNT], std::vector<std::vector<int>>* result) {
for(int _1=0; _1<COUNT-2; ++_1) {
for(int _2=1; _2<COUNT-1; ++_2) {
if(_2 == _1)
continue;
for(int _3=2; _3<COUNT; ++_3) {
if(_3 == _2 || _3 == _1)
continue;
std::vector<int> chosen1 {array[_1], array[_2], array[_3]};
std::vector<int> rest;
for(int _x = 0; _x < COUNT; ++_x) {
if(_x != _1 && _x != _2 && _x != _3) {
rest.push_back(array[_x]);
}
}
for (int _4 = 0; _4 < COUNT-5; ++_4) {
for (int _5 = 1; _5 < COUNT-4; ++_5) {
if(_5 == _4)
continue;
for (int _6 = 2; _6 < COUNT-3; ++_6) {
if(_6 == _5 || _6 == _4)
continue;
std::vector<int> chosen2 = {rest[_4], rest[_5], rest[_6]};
std::vector<int> chosen3;
for(int _x = 0; _x < COUNT-3; ++_x) {
if(_x != _4 && _x != _5 && _x != _6) {
chosen3.push_back(rest[_x]);
}
}
int total = std::accumulate(chosen1.begin(), chosen1.end(), 0);
if((std::accumulate(chosen2.begin(), chosen2.end(), 0) == total) &&
(std::accumulate(chosen3.begin(), chosen3.end(), 0) == total)) {
*result = {chosen1, chosen2, chosen3};
return true;
}
}
}
}
}
}
}
return false;
}
int main() {
int values[] = {1, 2, 5, 6, 8, 3, 2, 4, 5};
std::vector<std::vector<int>> result;
if(test(values, &result)) {
for(auto& x : result) {
std::cout << "{";
for(auto& y : x) {
std::cout << y << ",";
}
std::cout << "}";
}
std::cout << std::endl;
} else {
std::cout << "not found";
}
}
If you had longer array (3+ * 3) then you could use recurrence (you could use it in my example too), but that would be still very slow.

Really slow when getting maximum distance between m nodes (Backtracking) (c++)

I've been asked to make an exercise using Backtracking, or Backtracking + Branch and Bound, where the imput data are n, m and a matrix(n x n). The n, represents a number of people, and the m, some people from the n. In the matrix, there are the distances among them, and the distances between i and j is different from the j and the i.
I am trying to get the maximum distance i can get from m nodes, that distance is the sum of the distance of all of them. For example, if i choose the node 1, 2 and 4, the result is the sums: distance(1, 2) + distance(2,1) + distance(2, 4) + distance(4, 2) + distance(1, 4) + distance(4, 1).
I have used Backtracking with Branch and Bound (iterative, not recursive), storing the nodes (structs where i store the current value and nodes used) that may get me to a solution. This nodes stores de lower and upper bound, i mean, the lower and higher solution I can obtain if i keep on using this node and his sons. From a node x, I generate all the possible nodes (nodes that are not used), and I check if this node may get me to a solution, if not, this node is discarded and erased.
The code i have implemented to make this, works, but it is really slowly. With low values of n and m, it is quick, but if i use higher numbers it is really slowly.
This is the main function and the others functions that are used:
void backtracking(int **matrix, int n, int m){
/////////////////////////////////////////////////////
/*
Part where I try to get the minimum/maximum that I can get from the beginning of the problem
*/
// Lists where I store the values from the matrix, sort from the minimum to the maximum, and from
// the maximum to the minimum. The values are the distances, I mean, the sum of matrix[i][j] and
// matrix[j][i].
list<int> listMinSums; // list of minimum sums
list<int> listMaxSums; // list of maximum sums
int nMinimumSums = floor((m*m - m)/2); // rounding down
int nMaximumSums = ceil((m*m - m)/2); // rounding up
/*
* m*m - m = Given m nodes, there are m*m - m sums.
*
* I count matrix[i][j] + matrix[j][i] as one, so there
* are (m*m - m)/2 sums.
*/
for (int i = 0; i < n; i++){
for (int j = 0; j < i; j++){
int x = (matrix[i][j] + matrix[j][i]);
// to differentiate from the minimum and maximum sums, I use false and true
aLista(listMinSums, x, nMinimumSums, false);
aLista(listMaxSums, x, nMaximumSums, true);
}
}
int min = 0;
int max = 0;
int contador = 0; // counting in every iteration to not surpassing the minimum/maximum sums
list<int>::iterator it = listMinSums.begin();
while (it != listMinSums.end() && contador < nMinimumSums){
min += *it;
it++;
contador++;
}
contador = 0;
list<int>::iterator it2 = listMaxSums.begin();
while (it2 != listMaxSums.end() && contador < nMaximumSums){
max += *it2;
it2++;
contador++;
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
// LLV = List of Live Nodes. Where I store the nodes that are going to
// guide me to the solution
list<nodo*> llv;
// I do not store the root node, i store the first n nodes, of the first level.
for (int i = 0; i < n; i++){
nodo *nod = new nodo(n);
nod ->level = 0;
//lower bound. It's the lower solution i can get from this node
nod ->ci = min;
// upper bound. The higher solution i can get from this node.
nod ->cs = max;
// estimated benefit. If i have to choose from one node or another, i choose the one with higher
nod ->be = (min+max)/2;
// The node i is used
nod ->used[i] = true;
// Inserting this node to the list of live nodes.
insert(llv, nod);
}
int solution = 0; // Initial solution
int c = min; // c = variable used to not use a node and his "sons", anymore
while (!empty(llv)){
nodo *x = erase(llv, n); // erasing the node with the highest estimated benefit from the llv.
if (x ->cs > c){
for (int i = 0; i < n; i++){ // Creating every son of the x node...
if (!(x ->used[i])){ // ... that has not being used yet
nodo *y = new nodo(n);
y ->level = x ->level + 1;
for (int j = 0; j < n; j++){
y ->used[j] = x ->used[j];
}
y ->used[i] = true;
// Adding the values. For example, if node 1 and 2 were inserted, and this is the node 4,
// adding matrix[1][4]+matrix[4][1]+matrix[2][4] + matrix[4][2]
int acum = 0;
for (int k = 0; k < n; k++){
if (k != i && consult(x ->used, k))
acum += matrix[i][k] + matrix[k][i];
}
y ->bact = x ->bact + acum;
// Getting the lower and upper bound of this node y.
cotas(y, x, i, y ->level, n, matrix);
y ->be = (y ->ci + y ->cs)/2;
// Node where i can get the solution
if (y ->level == (m-1) && y ->bact > solution){
solution = y ->bact;
if (y ->bact > c)
c = y ->bact;
}
// Checking if i can update c
else if (y ->level != (m-1) && y ->cs > c){
insert(llv, y);
if (y ->ci > c)
c = y ->ci;
}
else{
// i cannot use this node anymore, so i delete it.
delete y;
}
}
}
}
}
cout << solution << endl;
liberacionMemoria(matrix, n); // freeing the memory used in the matrix
}
void liberacionMemoria(int **matriz, int n){
for (int i = 0; i < n; i++)
delete[] matriz[i];
delete[] matriz;
}
void insert(list<nodo*> &t, nodo *x){
list<nodo*>::iterator it= t.begin();
t.insert(it, x);
}
/*
* Getting the node with hightest estimated benefit from the list of live nodes
* */
nodo* erase (list<nodo*> &t, int n){
nodo* erased = new nodo(n);
erased ->level = -1;
erased ->be = -1;
list<nodo*>::iterator it= t.begin();
list<nodo*>::iterator it2;
while (it != t.end()){
nodo* aux = *it;
if (aux ->be > erased ->be){
it2 = it;
erased = aux;
}
else if (aux ->be == erased ->be && aux ->level > erased ->level){
it2 = it;
erased = aux;
}
it++;
}
t.erase(it2);
return erased;
}
/*
* Checking if in the array of nodes used, the node in the x position it's used
* */
bool consult(bool *nodesUsed, int x){
if (nodesUsed[x])
return true;
return false;
}
bool empty(list<nodo*> &t){
list<nodo*>::iterator it= t.begin();
return (it==t.end());
}
bool aLista(list<int> &t, int x, int m, bool MayorAMenor){
list<int>::iterator it = t.begin();
int contador = 0;
while (it != t.end() && contador < m){
if (!MayorAMenor){ // lower to upper
if (*it > x){
t.insert(it, x);
return true;
}
}
else{
if (*it < x){
t.insert(it, x);
return true;
}
}
contador++;
it++;
}
if (it == t.end() && contador < m){
t.insert(it, x);
return true;
}
return false;
}
void cotas(nodo *sonNode, nodo *fatherNode, int elegido, int m, int n, int **matriz){
int max = 0;
int min = 999;
// Getting the sums from the chosen node with the already used
for (int i = 0; i < n; i++){
if (consult(sonNode ->used, i)){
if (elegido != i){
int x = matriz[i][elegido] + matriz[elegido][i];
if (x > max)
max = x;
if (x < min)
min = x;
}
}
}
min *= (m-1);
max *= (m-1);
min += fatherNode -> bact;
max += fatherNode -> bact;
sonNode -> ci = fatherNode ->ci - min;
sonNode -> cs = fatherNode ->cs - max;
}
I think, that the reason of going really slow with n and m a bit high, it is really slowly, it is because of the upper and lower bounds of the nodes, that are not accurate, but i don't know how to make it better.
I've been many days thinking how to do it, and trying to, but nothing works.
Here there are some examples:
Given an n = 4 and m = 2 and the following matrix:
0 3 2 4
2 0 4 5
2 1 0 4
2 3 2 0
the result is 8. This works and it is quickly.
But with n = 40 and m = 10 it never ends...
I hope someone may help me. Thanks.
****EDIT******
I may not have explained well. My doubt is, how can i know, from a node x, the less and the maximum I can get.
Because, the length of the solution nodes depends on m, but the solution changes if i choose some nodes or others, and I don't know how to be sure, of obtaining the less and the maximum from a node, but being accurate, to be able to cut the others branchs that do not guide me to a solution

Having trouble getting Merge sort to be O(n log n)

In full disclosure, I'm a student and having trouble with merge sort. The purpose is obviously to have a O(n log n), but it's more n^2. I think the problem lies within the tempList, as you'll see in the code, but in the program description it says to use static int tempList[LIST_SIZE] to avoid degradation.
Here's what I have and the runtime using start is around 16000, which is obviously way to long for the merge sort.
void mergeSort(int randomNum[], int lowIdx, int highIdx)
{
int midIdx;
if (lowIdx < highIdx)
{
midIdx = (highIdx + lowIdx) / 2;
mergeSort(randomNum, lowIdx, midIdx);
mergeSort(randomNum, midIdx + 1, highIdx);
merge(randomNum, lowIdx, midIdx, highIdx);
}
}
Here is the second portion of the sort
void merge(int randomNum[], int lowIdx, int midIdx, int highIdx)
{
static int tempList[MAX_SORT];
for (int count = 0; count <= highIdx; count++)
tempList[count] = randomNum[count];
int leftIdx = lowIdx,
rightIdx = midIdx + 1,
tempPos = lowIdx;
while (leftIdx <= midIdx && (rightIdx <= highIdx))
{
if (tempList[leftIdx] <= tempList[rightIdx])
{
randomNum[tempPos] = tempList[leftIdx];
leftIdx++;
}
else
{
randomNum[tempPos] = tempList[rightIdx];
rightIdx++;
}
tempPos++;
}
while (leftIdx <= midIdx)
{
randomNum[tempPos] = tempList[leftIdx];
tempPos++;
leftIdx++;
}
while (rightIdx <= highIdx)
{
randomNum[tempPos] = tempList[rightIdx];
tempPos++;
rightIdx++;
}
}
The details of the program are that we have an array with 100000 random numbers and sort it using various sorting algorithms. The other sorts are working as expected, but this one seems to be off by a lot in comparison to what the big-O is supposed to be.
Can someone please help?
Not sure if this is all of your problem, but this is one issue:
You are copying randomNum to tempList from 0 to highIdx, but you only ever access tempList from lowIdx to highIdx.
That means that all the items you copied from 0 to lowIdx are wasted copies.
Solution: Only copy what you need.
for (int count = lowIdx; count <= highIdx; count++)
You might want to consider a bottom up merge sort. Example template code. a[] is the array to be sorted, b[] is a temp array with the same size as a[]. The sorted data may end up in either a[] or b[]. This can be modified to always end up with the data in a[] by doing a pass count check at the start and optionally skipping the swap in place if there will be an even number of passes.
template <typename T>
T * BottomUpMergeSort(T a[], T b[], size_t n)
{
for(size_t s = 1; s < n; s += 2) // swap in place for 1st pass
if(a[s] < a[s-1])
std::swap(a[s], a[s-1]);
for(size_t s = 2; s < n; s <<= 1){ // s = run size
size_t ee = 0; // init end index
while(ee < n){ // merge pairs of runs
size_t ll = ee; // ll = start of left run
size_t rr = ll+s; // rr = start of right run
if(rr >= n){ // if only left run
rr = n;
BottomUpCopy(a, b, ll, rr); // copy left run
break; // end of pass
}
ee = rr+s; // ee = end of right run
if(ee > n)
ee = n;
BottomUpMerge(a, b, ll, rr, ee);
}
std::swap(a, b); // swap a and b
}
return a; // return sorted array
}
template <typename T>
void BottomUpCopy(T a[], T b[], size_t ll, size_t rr)
{
while(ll < rr){ // copy left run
b[ll] = a[ll];
ll++;
}
}
template <typename T>
void BottomUpMerge(T a[], T b[], size_t ll, size_t rr, size_t ee)
{
size_t o = ll; // b[] index
size_t l = ll; // a[] left index
size_t r = rr; // a[] right index
while(1){ // merge data
if(a[l] <= a[r]){ // if a[l] <= a[r]
b[o++] = a[l++]; // copy a[l]
if(l < rr) // if not end of left run
continue; // continue (back to while)
while(r < ee){ // else copy rest of right run
b[o++] = a[r++];
}
break; // and return
} else { // else a[l] > a[r]
b[o++] = a[r++]; // copy a[r]
if(r < ee) // if not end of right run
continue; // continue (back to while)
while(l < rr){ // else copy rest of left run
b[o++] = a[l++];
}
break; // and return
}
}
}

What am I doing wrong in this C++ mergesort?

The program compiles and runs properly. A list of integers is read from an input file, but the output displays those numbers without any changes. I expect them to be sorted from least to greatest. For reference, I am trying to implement a version similar to the example on wikipedia.
// arrA contains items to sort; arrB is an array to work in
void mergesort(int *arrA, int *arrB, int first, int last) {
// a 1 element array is already sorted
// make increasingly longer sorted lists
for (int width = 1; width < last; width = 2 * width) {
// arrA is made up of 1 or more sorted lists of size width
for (int i = 0; i < last; i += 2 * width) {
// merge two sorted lists
// or copy arrA to arrB if arrA is full
merge(arrA, i, min(i+width, last), min (i + 2 * width,
last), arrB);
} // end for
// now arrB is full of sorted lists of size 2* width
// copy arrB into arrA for next iteration
copy(arrB, arrA, last);
} // end for
} // end mergesort
void merge(int *arrA, int iLeft, int iRight, int iEnd, int *arrB) {
int i0 = iLeft, i1 = iRight;
// while either list contains integers
for (int j = iLeft; j < iEnd; j++) {
// if 1st integer in left list is <= 1st integer of right list
if (i0 < iRight && (i1 >= iEnd || arrA[i0] <= arrA[i1])) {
arrB[j] = arrA[i0];
i0 += 1;
} // end if
else { // right head > left head
arrB[j] = arrA[i0];
i0 += 1;
} // end else
} // end for
} // end merge
void copy(int *origin, int *destination, int size) {
for (int i = 0; i < size; i++) {
destination[i] = origin[i];
} // end for
} // end copy
int main() {
int size = 0, first = 0, *arrA, *arrB;
// input data
read(&arrA, &arrB, &size);
// sorting
mergesort(arrA, arrB, first, size);
// output
write(arrA, first, size);
// cleanup
delete [] arrA;
delete [] arrB;
}
input
33 9 -2
output
33 9 -2
I haven't looked very deeply at your code, but this if-statement seems a bit off to me:
if (i0 < iRight && (i1 >= iEnd || arrA[i0] <= arrA[i1])) {
arrB[j] = arrA[i0];
i0 += 1;
} // end if
else { // right head > left head
arrB[j] = arrA[i0];
i0 += 1;
} // end else
Surely, the whole point of a pair of if/else clauses is that you do different things in the if vs. the else part. As far as I can tell, it's identical here.

Binary search to find the range in which the number lies

I have an array
Values array: 12 20 32 40 52
^ ^ ^ ^ ^
0 1 2 3 4
on which I have to perform binary search to find the index of the range in which the number lies. For example:
Given the number -> 19 (It lies between index 0 and 1), return 0
Given the number -> 22 (It lies between index 1 and 2), return 1
Given the number -> 40 (It lies between index 3 and 4), return 3
I implemented the binary search in the following manner, and this comes to be correct for case 1, and 3 but incorrect if we search for case 2 or 52, 55 32, etc.
#include <iostream>
using namespace std;
int findIndex(int values[], int number, unsigned first, unsigned last)
{
unsigned midPoint;
while(first<last)
{
unsigned midPoint = (first+last)/2;
if (number <= values[midPoint])
last = midPoint -1;
else if (number > values[midPoint])
first = midPoint + 1;
}
return midPoint;
}
int main()
{
int a[] = {12, 20, 32, 40, 52};
unsigned i = findIndex(a, 55, 0, 4);
cout << i;
}
Use of additional variables such as bool found is not allowed.
A range in C or C++ is normally given as the pointing directly to the lower bound, but one past the upper bound. Unless you're feeling extremely masochistic, you probably want to stick to that convention in your search as well.
Assuming you're going to follow that, your last = midpoint-1; is incorrect. Rather, you want to set last to one past the end of the range you're going to actually use, so it should be last = midpoint;
You also only really need one comparison, not two. In a binary search as long as the two bounds aren't equal, you're going to set either the lower or the upper bound to the center point, so you only need to do one comparison to decide which.
At least by convention, in C++, you do all your comparisons using < instead of <=, >, etc. Any of the above can work, but following the convention of using only < keeps from imposing extra (unnecessary) requirements on contained types.
Though most interviewers probably don't care, there's also a potential overflow when you do midpoint = (left + right)/2;. I'd generally prefer midpoint = left + (right - left)/2;
Taking those into account, code might look something like this:
template <class T>
T *lower_bound(T *left, T *right, T val) {
while (left < right) {
T *middle = left + (right - left) / 2;
if (*middle < val)
left = middle + 1;
else
right = middle;
}
return left;
}
template <class T>
T *upper_bound(T *left, T *right, T val) {
while (left < right) {
T *middle = left + (right - left) / 2;
if (val < *middle)
right = middle;
else
left = middle + 1;
}
return left;
}
Why not to use standard library functions?
#include <vector>
#include <algorithm>
#include <iostream>
using namespace std;
int main() {
for (int input = 10; input < 55; input++) {
cout << input << ": ";
// Your desire:
vector<int> v = { 12, 20, 32, 40, 52 };
if (input < v.front() || input > v.back()) {
cout << "Not found" << endl;
} else {
auto it = upper_bound(v.begin(), v.end(), input);
cout << it - v.begin() - 1 << endl;
}
}
}
Note: a pretty-cool site - http://en.cppreference.com/w/cpp/algorithm
This will work under the condition that min(A[i]) <= key <=max(A[i])
int binary_search(int A[],int key,int left, int right)
{
while (left <= right) {
int middle = left + (right - left) / 2;
if (A[middle] < key)
left = middle+1;
else if(A[middle] > key)
right = middle-1;
else
return middle;
}
return (left - 1);
}
For INPUT
4
1 3 8 10
4
OUTPUT
3 (the minimum of the 3 and 8)
#include <stdio.h>
int main()
{
int c, first, last, middle, n, search, array[100];
scanf("%d",&n);
for (c = 0; c < n; c++)
scanf("%d",&array[c]);
scanf("%d", &search);
first = 0;
last = n - 1;
middle = (first+last)/2;
while (first <= last) {
if (array[middle] < search)
{
first = middle + 1; }
else if (array[middle] == search) {
break;
}
else
{
last = middle - 1;
}
middle = (first + last)/2;
}
printf("%d\n",array[middle]);
return 0;
}
A regular binary search on success returns the index of the key. On failure to find the key it always stops at the index of the lowest key greater than the key we are searching. I guess following modified binary search algorithm will work.
Given sorted array A
Find a key using binary search and get an index.
If A[index] == key
return index;
else
while(index > 1 && A[index] == A[index -1]) index = index -1;
return index;
binsrch(array, num, low, high) {
if (num > array[high])
return high;
while(1) {
if (low == high-1)
return low;
if(low >= high)
return low-1;
mid = (low+high)/2
if (num < arr[mid])
high = mid;
else
low = mid+1;
}
}
here is a more specific answer
int findIndex(int values[],int key,int first, int last)
{
if(values[first]<=key && values[first+1]>=key)// stopping condition
{
return first;
}
int imid=first+(last-first)/2;
if(first==last || imid==first)
{
return -1;
}
if(values[imid]>key)
{
return findIndex(values,key,first,imid);
}
else if(values[imid]<=key)
{
return findIndex(values,key,imid,last);
}
}
I feel this is more inline to what you were looking for...and we won't crap out on the last value in this thing
/* binary_range.c (c) 2016 adolfo#di-mare.com */
/* http://stackoverflow.com/questions/10935635 */
/* This code is written to be easily translated to Fortran */
#include <stdio.h> /* printf() */
#include <assert.h> /* assert() */
/** Find the biggest index 'i' such that '*nSEED <= nVEC[i]'.
- nVEC[0..N-1] is an strict ascending order array.
- Returns and index in [0..N].
- Returns 'N' when '*nSEED>nVEC[N-1]'.
- Uses binary search to find the range for '*nSEED'.
*/
int binary_range( int *nSEED, int nVEC[] , int N ) {
int lo,hi, mid,plus;
if ( *nSEED > nVEC[N-1] ) {
return N;
}
for (;;) { /* lo = binary_range_search() */
lo = 0;
hi = N-1;
for (;;) {
plus = (hi-lo)>>1; /* mid = (hi+lo)/2; */
if ( plus == 0 ) { assert( hi-lo==1 );
if (*nSEED <= nVEC[lo]) {
hi = lo;
}
else {
lo = hi;
}
}
mid = lo + plus; /* mid = lo + (hi-lo)/2; */
if (*nSEED <= nVEC[mid]) {
hi = mid;
}
else {
lo = mid;
}
if (lo>=hi) { break; }
}
break;
} /* 'lo' is the index */
/* This implementation does not use division. */
/* ========================================= */
assert( *nSEED <= nVEC[lo] );
return lo;
}
/** Find the biggest index 'i' such that '*nSEED <= nVEC[i]'.
- nVEC[0..N-1] is an strict ascending order array.
- Returns and index in [0..N].
- Returns 'N' when '*nSEED>nVEC[N-1]'.
- Uses sequential search to find the range for '*nSEED'.
*/
int sequential_range( int* nSEED, int nVEC[] , int N ) {
int i;
if ( *nSEED > nVEC[N-1] ) {
return N;
}
i=0;
while ( i<N ) {
if ( *nSEED <= nVEC[i] ) { break; }
++i;
}
return i;
}
/** test->stackoverflow.10935635(). */
void test_10935635() {
{{ /* test.stackoverflow.10935635() */
/* http://stackoverflow.com/questions/10935635 */
/* binary_range search to find the range in which the number lies */
/* 0 1 2 3 4 */
int nVEC[] = { 12,20,32,40,52 }; int val;
int N = sizeof(nVEC)/sizeof(nVEC[0]); /* N = DIM(nVEC[]) */
val=19; val = binary_range( &val,nVEC,N );
/* 19 -> [12 < (19) <= 20] -> return 1 */
val=19; assert( binary_range( &val,nVEC,N ) == 1 );
/* 22 -> [20 < (22) <= 32] -> return 2 */
val=22; assert( binary_range( &val,nVEC,N ) == 2 );
/* 40 -> [32 < (40) <= 40] -> return 3 */
val=40; assert( binary_range( &val,nVEC,N ) == 3 );
/* Everything over 52 returns N */
val=53; assert( binary_range( &val,nVEC,N ) == N );
}}
}
/** Test program. */
int main() {
if (1) {
printf( "\ntest_10935635()" );
test_10935635();
}
printf( "\nEND" );
return 0;
}
/* Compiler: gcc.exe (tdm-1) 4.9.2 */
/* IDE: Code::Blocks 16.01 */
/* Language: C && C++ */
/* EOF: binary_range.c */
I know this is an old thread, but since I had to solve a similar problem I thought I would share it. Given a set of non-overlapping ranges of integers, I need to test if a given value lies in any of those ranges. The following (in Java), uses a modified binary search to test if a value lies within the sorted (lowest to highest) set of integer ranges.
/**
* Very basic Range representation for long values
*
*/
public class Range {
private long low;
private long high;
public Range(long low, long high) {
this.low = low;
this.high = high;
}
public boolean isInRange(long val) {
return val >= low && val <= high;
}
public long getLow() {
return low;
}
public void setLow(long low) {
this.low = low;
}
public long getHigh() {
return high;
}
public void setHigh(long high) {
this.high = high;
}
#Override
public String toString() {
return "Range [low=" + low + ", high=" + high + "]";
}
}
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
//Java implementation of iterative Binary Search over Ranges
class BinaryRangeSearch {
// Returns index of x if it is present in the list of Range,
// else return -1
int binarySearch(List<Range> ranges, int x)
{
Range[] arr = new Range[ranges.size()];
arr = ranges.toArray(arr);
int low = 0, high = arr.length - 1;
int iters = 0;
while (low <= high) {
int mid = low + (high - low) / 2; // find mid point
// Check if x is present a
if (arr[mid].getLow() == x) {
System.out.println(iters + " iterations");
return mid;
}
// If x greater, ignore left half
if (x > arr[mid].getHigh()) {
low = mid + 1;
}
else if (x >= arr[mid].getLow()) {
System.out.println(iters + " iterations");
return mid;
}
// If x is smaller, ignore right half of remaining Ranges
else
high = mid - 1;
iters++;
}
return -1; // not in any of the given Ranges
}
// Driver method to test above
public static void main(String args[])
{
BinaryRangeSearch ob = new BinaryRangeSearch();
// make a test list of long Range
int multiplier = 1;
List<Range> ranges = new ArrayList<>();
int high = 0;
for(int i = 0; i <7; i++) {
int low = i + high;
high = (i+10) * multiplier;
Range r = new Range(low, high);
multiplier *= 10;
ranges.add(r);
}
System.out.println(Arrays.toString(ranges.toArray()));
int result = ob.binarySearch(ranges, 11);
if (result == -1)
System.out.println("Element not present");
else
System.out.println("Element found at "
+ "index " + result);
}
}
My python implementation:
Time complexity: O(log(n))
Space complexity: O(log(n))
def searchForRange(array, target):
range = [-1, -1]
alteredBinarySerach(array, target, 0, len(array) -1, range, True)
alteredBinarySerach(array, target, 0, len(array) -1, range, False)
return range
def alteredBinarySerach(array, target, left, right, range, goLeft):
if left > right:
return
middle = (left+ right)//2
if array[middle] > target:
alteredBinarySerach(array, target, left, middle -1, range, goLeft)
elif array[middle] < target:
alteredBinarySerach(array, target, middle +1, right, range, goLeft)
else:
if goLeft:
if middle == 0 or array[middle -1] != target:
range[0] = middle
else:
alteredBinarySerach(array, target, left, middle -1 , range, goLeft)
else:
if middle == len(array) -1 or array[middle+1] != target:
range[1] = middle
else:
alteredBinarySerach(array, target, middle +1, right , range, goLeft)