Rabin-Karp giving wrong answer with increasing power raise to p - c++

I have started learning string processing algorithms and wanted to implement the Rabin-Karp algorithm in C++. I have taken:
p = prime number larger than character set: 31
m = prime number for mod operations: 1e9+9
According to what I can find, there are multiple ways to implement the algorithm and I have tested two of them: increasing power as we go to higher index and decreasing power as we go to higher index.
For example on string S[0..n-1]
Let M1 = S[0]*p^0 + S[1]*p^1 + S[2]*p^2 + ... + S[n-1]*p^n-1
Let M2 = S[0]*p^n-1 + S[1]*p^n-2 + S[2]*p^n-3 ... + S[n-1]*p^0
I have implemented both the methods and could only get successful results using M2.
Code for M1:
int rabinKarpM1(string s, string t) {
int a = (int)s.size(), b = (int)t.size();
long long p = 31, m = 1e9+9;
long long powTable[a] = {0};
powTable[0] = 1;
for (int i = 1; i < a; i++) {
powTable[i] = powTable[i-1] * p % m;
}
long long hashS = 0, hashT = 0;
for (int i = 0; i < a; i++) {
hashS = (hashS + (s[i] - 'a' + 1)*powTable[i] % m) % m;
hashT = (hashT + (t[i] - 'a' + 1)*powTable[i] % m) % m;
}
if (hashS == hashT) {
bool match = true;
for (int i = 0; i < a; i++) {
if (s[i] != t[i]) {
match = false;
break;
}
}
if (match) {
return 0;
}
}
for (int i = 0; i+a-1 < b; i++) {
hashT = (hashT - (t[i] - 'a' + 1)) / p;
hashT = hashT + (t[i+a] - 'a' + 1)*powTable[a-1] % m;
hashT = hashT % m;
if (hashS == hashT) {
bool match = true;
for (int j = i+1; j < a+i+1; j++) {
if (s[j-i-1] != t[j]) {
match = false;
break;
}
}
if (match) {
return i+1;
}
}
}
return -1;
}
Code for M2:
int rabinKarpM2(string s, string t) {
int a = (int)s.size(), b = (int)t.size();
long long p = 31, m = 1e9+9;
long long powTable[a] = {0};
powTable[0] = 1;
for (int i = 1; i < a; i++) {
powTable[i] = powTable[i-1] * p % m;
}
long long hashS = 0, hashT = 0;
for (int i = 0; i < a; i++) {
hashS = (hashS + (s[i] - 'a' + 1)*powTable[a-i-1] % m) % m;
hashT = (hashT + (t[i] - 'a' + 1)*powTable[a-i-1] % m) % m;
}
if (hashS == hashT) {
bool match = true;
for (int i = 0; i < a; i++) {
if (s[i] != t[i]) {
match = false;
break;
}
}
if (match) {
return 0;
}
}
for (int i = 0; i+a-1 < b; i++) {
hashT = (hashT + m - (t[i] - 'a' + 1)*powTable[a-1] % m) % m * p;
hashT = hashT + (t[i+a] - 'a' + 1) % m;
hashT = hashT % m;
if (hashS == hashT) {
bool match = true;
for (int j = i+1; j < a+i+1; j++) {
if (s[j-i-1] != t[j]) {
match = false;
break;
}
}
if (match) {
return i+1;
}
}
}
return -1;
}
Test Input:
string s = foobarfoo
string t = barfoobarfoobarfoobarfoobarfoobarfoo
I have got correct results on M2 but not on M1.

Related

Rabin-Karp algorithm in c++

I am trying to understand the implementation of the Rabin-Karp algorithm. d is the number of characters in the input alphabet, but if I replace 0 or any other value instead of 20, it won't affect anything. Why is this happening like this ?
// Rabin-Karp algorithm in C++
#include <string.h>
#include <iostream>
using namespace std;
#define d 20
void rabinKarp(char pattern[], char text[], int q) {
int m = strlen(pattern);
int n = strlen(text);
int i, j;
int p = 0;
int t = 0;
int h = 1;
for (i = 0; i < m - 1; i++)
h = (h * d) % q;
// Calculate hash value for pattern and text
for (i = 0; i < m; i++) {
p = (d * p + pattern[i]) % q;
t = (d * t + text[i]) % q;
}
// Find the match
for (i = 0; i <= n - m; i++) {
if (p == t) {
for (j = 0; j < m; j++) {
if (text[i + j] != pattern[j])
break;
}
if (j == m)
cout << "Pattern is found at position: " << i + 1 << endl;
}
if (i < n - m) {
t = (d * (t - text[i] * h) + text[i + m]) % q;
if (t < 0)
t = (t + q);
}
}
}
int main() {
// char text[] = "ABCCDXAEFGX";
char text[] = "QWERTYUIOPASDFGHJKLXQWERTYUIOPASDFGHJKLX";
char pattern[] = "KLXQW";
int q = 13;
rabinKarp(pattern, text, q);
}
I believe the short answer is that the lower d is the more hash collisions you will have, but you go about verifying the match anyway so it does not affect anything.
A bit more verbose:
First let me modify your code to be have more expressive variables:
// Rabin-Karp algorithm in C++
#include <string.h>
#include <iostream>
using namespace std;
#define HASH_BASE 0
void rabinKarp(char pattern[], char text[], int inputBase) {
int patternLen = strlen(pattern);
int textLen = strlen(text);
int i, j; //predefined iterators
int patternHash = 0;
int textHash = 0;
int patternLenOut = 1;
for (i = 0; i < patternLen - 1; i++)
patternLenOut = (patternLenOut * HASH_BASE) % inputBase; // hash of pattern len
// Calculate hash value for pattern and text
for (i = 0; i < patternLen; i++) {
patternHash = (HASH_BASE * patternHash + pattern[i]) % inputBase;
textHash = (HASH_BASE * textHash + text[i]) % inputBase;
}
// Find the match
for (i = 0; i <= textLen - patternLen; i++) {
if (patternHash == textHash) {
for (j = 0; j < patternLen; j++) {
if (text[i + j] != pattern[j])
break;
}
if (j == patternLen)
cout << "Pattern is found at position: " << i + 1 << endl;
}
if (i < textLen - patternLen) {
textHash = (HASH_BASE * (textHash - text[i] * patternLenOut) + text[i + patternLen]) % inputBase;
if (textHash < 0)
textHash = (textHash + inputBase);
}
}
}
int main() {
// char text[] = "ABCCDXAEFGX";
char text[] = "QWEEERTYUIOPASDFGHJKLXQWERTYUIOPASDFGHJKLX";
char pattern[] = "EE";
int q = 13;
rabinKarp(pattern, text, q);
}
The easiest way to attack it is to set HASH_BASE (previously d) to zero and see where we can simplify. The rabinKarp function can then be reduced to:
void rabinKarp(char pattern[], char text[], int inputBase) {
int patternLen = strlen(pattern);
int textLen = strlen(text);
int i, j; //predefined iterators
int patternHash = 0;
int textHash = 0;
int patternLenOut = 0;
// Calculate hash value for pattern and text
for (i = 0; i < patternLen; i++) {
patternHash = (pattern[i]) % inputBase;
textHash = (text[i]) % inputBase;
}
// Find the match
for (i = 0; i <= textLen - patternLen; i++) {
if (patternHash == textHash) {
for (j = 0; j < patternLen; j++) {
if (text[i + j] != pattern[j])
break;
}
if (j == patternLen)
cout << "Pattern is found at position: " << i + 1 << endl;
}
if (i < textLen - patternLen) {
textHash = (text[i + patternLen]) % inputBase;
if (textHash < 0)
textHash = (textHash + inputBase);
}
}
}
now you'll notice that all the hashes becomes is the sum of the letters mod some number (in your case 13, in my case 2). This is a bad hash, meaning many things will sum to the same number. However, in this portion of the code:
if (patternHash == textHash) {
for (j = 0; j < patternLen; j++) {
if (text[i + j] != pattern[j])
break;
}
if (j == patternLen)
cout << "Pattern is found at position: " << i + 1 << endl;
}
you explicitly check the match, letter by letter, if the hashes match. The worse your hash function is, the more often you will have false positives (which will mean a longer runtime for your function). There are more details, but I believe that directly answers your question. What might be interesting is to record false positives and see how the false positive rate increases as d and q decrease.

Max value 2d array using pointer arithmetic

I'm trying to write a programm to find a maximum value in column in a initialized 5x5 matrix, and change it to -1. I found out the way to do it, but i want to find a better solution.
Input:
double array2d[5][5];
double *ptr;
ptr = array2d[0];
// initializing matrix
for (int i = 0; i < 5; ++i) {
for (int j = 0; j < 5; ++j) {
if (j % 2 != 0) {
array2d[i][j] = (i + 1) - 2.5;
} else {
array2d[i][j] = 2 * (i + 1) + 0.5;
}
}
}
This is my solution for the first column :
// Changing the matrix using pointer arithmetic
for (int i = 0; i < (sizeof(array2d) / sizeof(array2d[0][0])); ++i) {
if (i % 5 == 0) {
if (maxTemp <= *(ptr + i)) {
maxTemp = *(ptr + i);
}
}
}
for (int i = 0; i < (sizeof(array2d) / sizeof(array2d[0][0])); ++i) {
if (i % 5 == 0) {
if (*(ptr + i) == maxTemp) {
*(ptr + i) = -1;
}
}
}
I can repeat this code 5 times, and get the result, but i want a better solution. THX.
Below is the complete program that uses pointer arithmetic. This program replaces all the maximum values in each column of the 2D array -1 as you desire.
#include <iostream>
int main()
{
double array2d[5][5];
double *ptr;
ptr = array2d[0];
// initializing matrix
for (int i = 0; i < 5; ++i) {
for (int j = 0; j < 5; ++j) {
if (j % 2 != 0) {
array2d[i][j] = (i + 1) - 2.5;
} else {
array2d[i][j] = 2 * (i + 1) + 0.5;
}
}
}
//these(from this point on) are the things that i have added.
//Everything above this comment is the same as your code.
double (*rowBegin)[5] = std::begin(array2d);
double (*rowEnd)[5] = std::end(array2d);
while(rowBegin != rowEnd)
{
double *colBegin = std::begin(rowBegin[0]);
double *colEnd = std::end(rowBegin[0]);
double lowestvalue = *colBegin;//for comparing elements
//double *pointerToMaxValue = colBegin;
while(colBegin!= colEnd)
{
if(*colBegin > lowestvalue)
{
lowestvalue = *colBegin;
//pointerToMaxValue = colBegin ;
}
colBegin = colBegin + 1;
}
double *newcolBegin = std::begin(rowBegin[0]);
double *newcolEnd = std::end(rowBegin[0]);
while(newcolBegin!=newcolEnd)
{
if(*newcolBegin == lowestvalue)
{
*newcolBegin = -1;
}
++newcolBegin;
}
++rowBegin;
}
return 0;
}
The program can be checked here.
You can add print out all the element of the array to check whether the above program replaced all the maximum value in each column with -1.
I have written it in java but I think u can understand. This one is for all 5 columns at the same time. You can try this:
int count = 0;
double max = 0;
for (int i = 0; i < 5; ++i) {
for (int j = 0; j < 5; ++j) {
if (j == 0) {
max = array2d[j][I];
count = 0;
}
if (array2d[j][i] > max) {
count = j;
}
}
array2d[count][i] = -1;
}

Merge sort errors c++

I'm very new to C++ and only coded in python before, but python is too slow for my purposes now. I did a mergesort algorithm in python and it worked. But now I translated it into C++ and I got a bunch of errors in my IDE. What are my errors?
#include <iostream>
using namespace std;
int *sort(int lenght, int lis[]) {
int units = lenght;
int umt;
int tiles = 1;
while (units > 1) {
bool whole = true;
umt = units % 2;
if (umt = 1) {
units++;
whole = false;
}
units = units / 2;
tiles = tiles * 2;
if (whole) {
int buffd[units];
int add_l = 0;
int add_r = 0;
int prod_l = 0;
int prod_r = prod_l + tiles / 2;
for (int k = 0; k < units; k++) {
int buffd[units];
int add_l = 0;
int add_r = 0;
int prod_l = k * tiles;
int prod_r = prod_l + tiles / 2;
for (int f = 0; f < tiles; f++) {
if (lis[prod_l + add_l] <= lis[prod_r + add_r]) {
buffd[f] = lis[prod_l + add_l];
add_l++;
if (add_l = tiles / 2) {
for (int e = f; e < tiles; e++) {
buffd[e] = lis[prod_r + add_r + e];
}
f = tiles;
}
} else {
buffd[f] = lis[prod_r + add_r];
add_r++;
if (add_r = tiles / 2) {
for (int e = f; e < tiles; e++) {
buffd[e] = lis[prod_l + add_l + e];
}
f = tiles;
}
}
}
for (int i = prod_l; i < prod_l + tiles; i++) {
lis[i] = buffd[i - prod_l];
}
}
} else {
int buffd[units];
int add_l = 0;
int add_r = 0;
int prod_l = 0;
int prod_r = prod_l + tiles / 2;
for (int k = 0; k < units - 1; k++) {
int buffd[units];
int add_l = 0;
int add_r = 0;
int prod_l = k * tiles;
int prod_r = prod_l + tiles / 2;
for (int f = 0; f < tiles; f++) {
if (lis[prod_l + add_l] <= lis[prod_r + add_r]) {
buffd[f] = lis[prod_l + add_l];
add_l++;
if (add_l = tiles / 2) {
for (int e = f; e < tiles; e++) {
buffd[e] = lis[prod_r + add_r + e];
}
f = tiles;
}
} else {
buffd[f] = lis[prod_r + add_r];
add_r++;
if (add_r = tiles / 2) {
for (int e = f; e < tiles; e++) {
buffd[e] = lis[prod_l + add_l + e];
}
f = tiles;
}
}
}
}
for (int i = prod_l; i < prod_l + tiles; i++) {
lis[i] = buffd[i - prod_l];
}
}
}
return lis;
}
int main() {
int to_sort[8] = { 23, 1, 654, 2, 4, 87, 3, 1 };
cout << "sortiert: ";
int *sorted;
sorted = sort(8, to_sort);
for (int p = 0; p < 8; p++) {
cout << sorted[p] << " ";
}
return 0;
}
The errors are in German and I have no idea why, the rest of the IDE is in English. Does anyone know how to set that to English, I'm using Clion from JetBrains.
There are some major problems in your code:
comparisons must use == instead of =, which is the assignment operator.
the redundant definitions for buffd, add_l, add_r, prod_l and prod_r should me removed.
variable length array definitions such as int buffd[units] are not supported by many C++ compilers. These are extensions for compatibility with C90 optional features, likely to cause stack overflow for large arrays. You should allocate these arrays or use std::vector.
these local arrays are declared with a incorrect size: it should be int buffd[tiles];, not int buffd[units]. Undefined behavior ensues.
the last for loop is outside the body of the previous loop, which is incorrect.
you do not increment f before copying the remaining elements from the other slice when either add_l or add_r equals tiles / 2.
your non-recursive algorithm cannot succeed in the general case, I got it to work for array lengths that are powers of 2, and it is quite surprising that it may come as a translation from your python version. There are much simpler ways to program mergesort in python, and in C++ too.
With some extra work, I simplified your code and got it to work for the general case:
#include <iostream>
using namespace std;
int *sort(int length, int lis[]) {
for (int tile = 1; tile < length; tile += tile) {
int tiles = tile + tile;
int *buffd = new int[tiles];
for (int prod_l = 0; prod_l < length; prod_l += tiles) {
int add_l = 0;
int max_l = tile;
int add_r = 0;
int max_r = tile;
int prod_r = prod_l + max_l;
int f = 0;
if (prod_r >= length)
break;
if (prod_r + max_r > length)
max_r = length - prod_r;
for (;;) {
if (lis[prod_l + add_l] <= lis[prod_r + add_r]) {
buffd[f++] = lis[prod_l + add_l++];
if (add_l == max_l) {
while (add_r < max_r) {
buffd[f++] = lis[prod_r + add_r++];
}
break;
}
} else {
buffd[f++] = lis[prod_r + add_r++];
if (add_r == max_r) {
while (add_l < max_l) {
buffd[f++] = lis[prod_l + add_l++];
}
break;
}
}
}
for (int i = 0; i < f; i++) {
lis[prod_l + i] = buffd[i];
}
}
delete[] buffd;
}
return lis;
}
int main() {
int to_sort[8] = { 23, 1, 654, 2, 4, 87, 3, 1 };
for (int i = 1; i < 8; i++) {
cout << "sortiert: ";
int *sorted = sort(i, to_sort);
for (int p = 0; p < i; p++) {
cout << sorted[p] << " ";
}
cout << endl;
}
return 0;
}
Here is a classic top-down recursive implementation for reference:
void mergesort(int lis[], int lo, int hi, int *tmp) {
if (hi - lo >= 2) {
int mid = (hi - lo) / 2;
mergesort(lis, lo, lo + mid, tmp);
mergesort(lis, lo + mid, hi, tmp);
for (int i = 0; i < mid; i++)
tmp[i] = lis[lo + i];
for (int i = 0, j = lo + mid, k = lo; i < mid;) {
if (j >= hi || tmp[i] <= lis[j])
lis[k++] = tmp[i++];
else
lis[k++] = lis[j++];
}
}
}
int *mergesort(int length, int lis[]) {
int *tmp = new int[length / 2];
mergesort(lis, 0, length, tmp);
delete[] tmp;
return lis;
}

Regular Matrix in C++

My homework will create a program that check the numbers in an array with a given pattern. Program must take the matrix dimensions and terms in the matrix as arguments from command line. For example program name is myProg.exe and we want to check a 2x3 dimensioned matrix with (maximum dimension limit is 20x20):
1 2 3
4 5 6
Then I will run your program as.
The program will check a special matrix pattern and prints out ACCEPTABLE or NOT MATCH according to the values we put from the console. The Special Pattern: In a row major representation the cells of the matrix must obey this rule. Some terms of the matrix must be sum or product of the neighbor cells. In row major representation the sum and product operations are placed as given in the examples. Sum and Product cells follows each other with one free cells. For Odd rows the sequence starts with free cells and in Even Rows the sequence starts with Sum or Product cell.
My code is here:
#include <iostream>
#include <sstream>
using namespace std;
static int iter = 0;
static unsigned int sat=3, sut=2;
bool ok = false;
int *accepted;
int *array;
string isAcceptable(int mat[]) {
int l, co = 0;
bool operation = false;
int mat2[sat][sut];
for (int i = 0; i < sat; i++) {
for (int j = 0; j < sut; j++) {
mat2[i][j] = mat[co];
co++;
}
}
for (int i = 0; i < sat; i++) {
if (i % 2 == 0)
l = 1;
else
l = 0;
for (int j = l; j < sut; j += 2) {
int totalProduct;
if (!operation) {
totalProduct = 0;
if (j > 0)
totalProduct += mat2[i][j - 1];
if (j < sut - 1)
totalProduct += mat2[i][j + 1];
if (i > 0)
totalProduct += mat2[i - 1][j];
if (i < sat - 1)
totalProduct += mat2[i + 1][j];
} else {
totalProduct = 1;
if (j > 0)
totalProduct *= mat2[i][j - 1];
if (j < sut - 1)
totalProduct *= mat2[i][j + 1];
if (i > 0)
totalProduct *= mat2[i - 1][j];
if (i < sat - 1)
totalProduct *= mat2[i + 1][j];
}
if (mat2[i][j] != totalProduct)
return "NOT MATCH";
operation = !operation;
}
}
return "ACCEPTABLE";
}
void change(int index1, int index2) {
int temp;
temp = array[index1];
array[index1] = array[index2];
array[index2] = temp;
iter++;
}
void combine(int mat[], int len) {
if(ok)
return;
array = new int[len];
*array = *mat;
if (len <= sat * sut) {
for (int i = len; i < sat * sut - 1; i++) {
for (int j = i; j < sat * sut; j++) {
combine(array, len + 1);
change(i, j);
if (isAcceptable(array) == ("ACCEPTABLE")) {
int accepted[sat*sut];
*accepted = *array;
ok = true;
return;
}
}
}
} else
return;
}
string isAcceptableCombine(int mat[]) {
combine(mat, 6);
if (ok)
{
cout<< " TRUE Sequense";
return "ACCEPTABLE";
}
else
cout<< " FALSE Sequense";
return "NOT MATCH";
}
int main(int argc, char** argv) {
int matris[] = {1,2,1,4,1,6};
isAcceptableCombine(matris);
}
My code's result is always returning TRUE Sequence.
Where is my mistake?

unable to understand why adding zero in end after adding digits

http://www.spoj.com/problems/JULKA/help me solve this question please .explain why we are adding zero in end after addition of bits.as commented in code below
#include <stdio.h>
#include <string.h>
#define MAX 111
char klaudia[MAX], natalia[MAX], total[MAX], diff[MAX];
void calc()
{
int len1 = strlen(total);
int len2 = strlen(diff);
int a, b, c, i, j, k, f;
char temp[MAX];
for(i=len1-1, j=len2-1, k=c=0; i>=0 || j>=0 || c; i--, j--, k++)
{
a = i>=0? total[i]-'0' : 0;
b = j>=0? diff[j]-'0' : 0;
temp[k] = (a+b+c)%10 + '0';
c = (a+b+c)/10;
}
temp[k] = 0;//explain
strcpy(klaudia,"0");
//explain below for loop what is actually being done in this loop//
for(i=k-1, j=a=f=0; i>=0; i--)
{
b = (a*10 + temp[i]-'0') / 2;
a = (a*10 + temp[i]-'0') % 2;
if(b) f = 1;
if(f) klaudia[j++] = b+'0';
}
if(!j) j++;
klaudia[j] = 0;
for(i=len1-1, j=len2-1, k=c=0; i>=0; i--, j--, k++)
{
a = total[i]-'0';
b = j>=0? diff[j]-'0' : 0;
if(a < b+c)
{
temp[k] = (10+a-b-c) + '0';
c = 1;
}
else
{
temp[k] = a-b-c + '0';
c = 0;
}
}
temp[k] = 0;
strcpy(natalia,"0");
for(i=k-1, j=a=f=0; i>=0; i--)
{
b = (a*10 + temp[i]-'0') / 2;
a = (a*10 + temp[i]-'0') % 2;
if(b) f = 1;
if(f) natalia[j++] = b+'0';
}
if(!j) j++;
natalia[j] = 0;
}
int main()
{
while(scanf("%s %s", total, diff)==2)
{
calc();
printf("%s\n%s\n", klaudia, natalia);
}
return 0;
}
why are we adding zeo in array temp after we have added all the digits
You are not computing with number but with character string. In C, character string are terminated by a Nul character whose code is zero.