Rabin-Karp algorithm in c++

Rabin-Karp algorithm in c++ - c++

I am trying to understand the implementation of the Rabin-Karp algorithm. d is the number of characters in the input alphabet, but if I replace 0 or any other value instead of 20, it won't affect anything. Why is this happening like this ?
// Rabin-Karp algorithm in C++
#include <string.h>
#include <iostream>
using namespace std;
#define d 20
void rabinKarp(char pattern[], char text[], int q) {
int m = strlen(pattern);
int n = strlen(text);
int i, j;
int p = 0;
int t = 0;
int h = 1;
for (i = 0; i < m - 1; i++)
h = (h * d) % q;
// Calculate hash value for pattern and text
for (i = 0; i < m; i++) {
p = (d * p + pattern[i]) % q;
t = (d * t + text[i]) % q;
}
// Find the match
for (i = 0; i <= n - m; i++) {
if (p == t) {
for (j = 0; j < m; j++) {
if (text[i + j] != pattern[j])
break;
}
if (j == m)
cout << "Pattern is found at position: " << i + 1 << endl;
}
if (i < n - m) {
t = (d * (t - text[i] * h) + text[i + m]) % q;
if (t < 0)
t = (t + q);
}
}
}
int main() {
// char text[] = "ABCCDXAEFGX";
char text[] = "QWERTYUIOPASDFGHJKLXQWERTYUIOPASDFGHJKLX";
char pattern[] = "KLXQW";
int q = 13;
rabinKarp(pattern, text, q);
}

I believe the short answer is that the lower d is the more hash collisions you will have, but you go about verifying the match anyway so it does not affect anything.
A bit more verbose:
First let me modify your code to be have more expressive variables:
// Rabin-Karp algorithm in C++
#include <string.h>
#include <iostream>
using namespace std;
#define HASH_BASE 0
void rabinKarp(char pattern[], char text[], int inputBase) {
int patternLen = strlen(pattern);
int textLen = strlen(text);
int i, j; //predefined iterators
int patternHash = 0;
int textHash = 0;
int patternLenOut = 1;
for (i = 0; i < patternLen - 1; i++)
patternLenOut = (patternLenOut * HASH_BASE) % inputBase; // hash of pattern len
// Calculate hash value for pattern and text
for (i = 0; i < patternLen; i++) {
patternHash = (HASH_BASE * patternHash + pattern[i]) % inputBase;
textHash = (HASH_BASE * textHash + text[i]) % inputBase;
}
// Find the match
for (i = 0; i <= textLen - patternLen; i++) {
if (patternHash == textHash) {
for (j = 0; j < patternLen; j++) {
if (text[i + j] != pattern[j])
break;
}
if (j == patternLen)
cout << "Pattern is found at position: " << i + 1 << endl;
}
if (i < textLen - patternLen) {
textHash = (HASH_BASE * (textHash - text[i] * patternLenOut) + text[i + patternLen]) % inputBase;
if (textHash < 0)
textHash = (textHash + inputBase);
}
}
}
int main() {
// char text[] = "ABCCDXAEFGX";
char text[] = "QWEEERTYUIOPASDFGHJKLXQWERTYUIOPASDFGHJKLX";
char pattern[] = "EE";
int q = 13;
rabinKarp(pattern, text, q);
}
The easiest way to attack it is to set HASH_BASE (previously d) to zero and see where we can simplify. The rabinKarp function can then be reduced to:
void rabinKarp(char pattern[], char text[], int inputBase) {
int patternLen = strlen(pattern);
int textLen = strlen(text);
int i, j; //predefined iterators
int patternHash = 0;
int textHash = 0;
int patternLenOut = 0;
// Calculate hash value for pattern and text
for (i = 0; i < patternLen; i++) {
patternHash = (pattern[i]) % inputBase;
textHash = (text[i]) % inputBase;
}
// Find the match
for (i = 0; i <= textLen - patternLen; i++) {
if (patternHash == textHash) {
for (j = 0; j < patternLen; j++) {
if (text[i + j] != pattern[j])
break;
}
if (j == patternLen)
cout << "Pattern is found at position: " << i + 1 << endl;
}
if (i < textLen - patternLen) {
textHash = (text[i + patternLen]) % inputBase;
if (textHash < 0)
textHash = (textHash + inputBase);
}
}
}
now you'll notice that all the hashes becomes is the sum of the letters mod some number (in your case 13, in my case 2). This is a bad hash, meaning many things will sum to the same number. However, in this portion of the code:
if (patternHash == textHash) {
for (j = 0; j < patternLen; j++) {
if (text[i + j] != pattern[j])
break;
}
if (j == patternLen)
cout << "Pattern is found at position: " << i + 1 << endl;
}
you explicitly check the match, letter by letter, if the hashes match. The worse your hash function is, the more often you will have false positives (which will mean a longer runtime for your function). There are more details, but I believe that directly answers your question. What might be interesting is to record false positives and see how the false positive rate increases as d and q decrease.

Related

Why the simple multiplication would result in some garbled characters?

I try to design a program that implements the multiplication between two big integers(Using C++). But after I complete it, I found that if I input the two integers by the command arguments, the results would be sometimes very weird and sometimes right. Please help me figure out the reason and tell me how to fix it. Thanks (The main function that implements the function of multiplication between two big integers is mul() ).
#include <iostream>
#include <cstring>
#include <stdlib.h>
using namespace std;
void mul(char *c1, char *c2, int len1, int len2);
void printArr(char *c1, char *c2, int len1, int len2);
int main(int argc, char **argv)
{
if (argv[1] != NULL)
{
char cArr1[500], cArr2[500];
for (int i = 0; i < strlen(argv[1]); i++)
{
cArr1[i] = argv[1][i];
}
for (int i = 0; i < strlen(argv[2]); i++)
{
cArr2[i] = argv[2][i];
}
int len1 = strlen(cArr1);
int len2 = strlen(cArr2);
printArr(cArr1, cArr2, len1, len2);
(len1 > len2) ? mul(cArr1, cArr2, len1, len2) : mul(cArr2, cArr1, len2, len1);
exit(100);
}
while (true)
{
cout << "Please input two integers" << endl;
char cArr1[500], cArr2[500];
cin >> cArr1;
if (cArr1[0] == 'q' && cArr1[1] == 'u' && cArr1[2] == 'i' && cArr1[3] == 't')
{
exit(1000);
}
cin >> cArr2;
int parity = 0;
int len1 = strlen(cArr1);
int len2 = strlen(cArr2);
printArr(cArr1, cArr2, len1, len2);
if (cArr1[0] == '-')
{
for (int i = 1; i < len1; i++)
{
cArr1[i - 1] = cArr1[i];
}
parity++;
len1--;
}
if (cArr2[0] == '-')
{
for (int i = 1; i < len2; i++)
{
cArr2[i - 1] = cArr2[i];
}
parity++;
len2--;
}
bool isDigit = true;
for (int i = 0; i < len1; i++)
{
if (!isdigit(cArr1[i]))
{
isDigit = false;
}
}
for (int i = 0; i < len2; i++)
{
if (!isdigit(cArr2[i]))
{
isDigit = false;
}
}
if (!isDigit)
{
cout << "\rInvalid input. Try again" << endl;
continue;
}
if (parity % 2 != 0)
{
cout << "-";
}
(len1 > len2) ? mul(cArr1, cArr2, len1, len2) : mul(cArr2, cArr1, len2, len1);
}
}
void mul(char *bigger, char *smaller, int bigLen, int smallLen)
{
int *bigNum = new int[bigLen];
int *smallNum = new int[smallLen];
for (int i = 0; i < bigLen; i++)
{
bigNum[i] = bigger[bigLen - i - 1] - '0';
}
for (int i = 0; i < smallLen; i++)
{
smallNum[i] = smaller[smallLen - i - 1] - '0';
}
int res[30];
for (int i = 0; i < 30; i++)
{
res[i] = 0;
}
for (int i = 0; i < smallLen; i++)
{
for (int j = 0; j < bigLen; j++)
{
res[i + j] += bigNum[j] * smallNum[i];
}
}
for (int i = 0; i < bigLen + smallLen; i++)
{
int digit = res[i] % 10;
int carry = res[i] / 10;
res[i] = digit;
res[i + 1] += carry;
}
bool null = false;
for (int i = bigLen + smallLen - 1; i >= 0; i--)
{
if (res[i] != 0 && res[i + 1] == 0)
{
null = true;
}
if (null)
{
cout << res[i];
}
}
cout << endl;
}
void printArr(char *c1, char *c2, int len1, int len2)
{
for (int i = 0; i < len1; i++)
{
cout << c1[i];
}
cout << " * ";
for (int i = 0; i < len2; i++)
{
cout << c2[i];
}
cout << " = ";
}

Just initialize your char arrays to empty ones:
char cArr1[500] = {};
char cArr2[500] = {};
then, for the sake of clarity, assign the lengths from your arguments to two integers, casting them since the compiler might warn you about incompatibility between size_t and int.
int lenArg1 = 0;
int lenArg2 = 0;
lenArg1 = (int)strlen ( argv[1] );
lenArg2 = (int)strlen ( argv[2] );
Then, printing the lengths len1 and len2 for debugging purposes only:
int len1 = strlen ( cArr1 );
int len2 = strlen ( cArr2 );
cout << "len1 >> " + to_string(len1) <<endl;
cout << "len2 >> " + to_string(len2) <<endl;
In fact, as #Kevin SUN mentioned, it was possible that your argument reading steps were missing the null characters, however, after running some tests it seems to work fine just by initializing the char arrays.
Also, as mentioned in the comments you need to increase the size reserved for res array, I did it to 500
Compiled with g++ -Wall main.cpp -o calc.exe and running: calc 10 100
Without initialization you get problems like:
after initializing, the output works just fine:

How to resolve the segmentation fault in this code?

I am trying to make an infix calculator for which I am currently trying to convert numbers entered in a character array to double.
here's my code:
#include <iostream>
#include<cmath>
using namespace std;
int main()
{
char exp[500];
const int SIZE = 100;
char temp[SIZE];
char op;
int strLen = 0, k, l, num = 0, fnum = 0;
double number = 0;
cin.getline(exp, 500,'\n');
int i = 0, j = 0, fpoint=0;
cout << exp;
for (i = 0, j = 0; exp[j] != 0; i++)
{
if (i % 2 == 0)
{
for (int m = 0; exp[m] != ','; m++) //stopped working
temp[m] = exp[m];
cout << temp;
for (k = 0; k < SIZE && temp[k] != 0; k++)
{
strLen = k;
if (temp[k] == '.')
fpoint = k + 1;
}
cout << fpoint<<endl;
cout << "strLen" << strLen;
for (k = 0; k <= fpoint; k++)
{
num = num + ((temp[fpoint - k] - '0') * pow(10, k));
}
for (k = fpoint + 1, l = 0; k <= strLen; k++, l++)
{
fnum = fnum + ((temp[strLen - l] - '0') * pow(10, l));
}
number = num + (fnum / pow(10, strLen - fpoint + 1));
cout << number;
j = j + strLen + 1;
}
else
{
char op = temp[j];
cout << op;
}
}
system("pause");
return 0;
}
sample input
2.5*3
It stops working and gives segmentation fault as an error on the marked position.

This line for (int m = 0; exp[m] != ','; m++) //stopped working will always fail if there are no , characters since exp[m] != ',' will always be equal to true and so will reach beyond the end of the array of exp which triggers the "segmentation fault".

Rabin-Karp giving wrong answer with increasing power raise to p

I have started learning string processing algorithms and wanted to implement the Rabin-Karp algorithm in C++. I have taken:
p = prime number larger than character set: 31
m = prime number for mod operations: 1e9+9
According to what I can find, there are multiple ways to implement the algorithm and I have tested two of them: increasing power as we go to higher index and decreasing power as we go to higher index.
For example on string S[0..n-1]
Let M1 = S[0]*p^0 + S[1]*p^1 + S[2]*p^2 + ... + S[n-1]*p^n-1
Let M2 = S[0]*p^n-1 + S[1]*p^n-2 + S[2]*p^n-3 ... + S[n-1]*p^0
I have implemented both the methods and could only get successful results using M2.
Code for M1:
int rabinKarpM1(string s, string t) {
int a = (int)s.size(), b = (int)t.size();
long long p = 31, m = 1e9+9;
long long powTable[a] = {0};
powTable[0] = 1;
for (int i = 1; i < a; i++) {
powTable[i] = powTable[i-1] * p % m;
}
long long hashS = 0, hashT = 0;
for (int i = 0; i < a; i++) {
hashS = (hashS + (s[i] - 'a' + 1)*powTable[i] % m) % m;
hashT = (hashT + (t[i] - 'a' + 1)*powTable[i] % m) % m;
}
if (hashS == hashT) {
bool match = true;
for (int i = 0; i < a; i++) {
if (s[i] != t[i]) {
match = false;
break;
}
}
if (match) {
return 0;
}
}
for (int i = 0; i+a-1 < b; i++) {
hashT = (hashT - (t[i] - 'a' + 1)) / p;
hashT = hashT + (t[i+a] - 'a' + 1)*powTable[a-1] % m;
hashT = hashT % m;
if (hashS == hashT) {
bool match = true;
for (int j = i+1; j < a+i+1; j++) {
if (s[j-i-1] != t[j]) {
match = false;
break;
}
}
if (match) {
return i+1;
}
}
}
return -1;
}
Code for M2:
int rabinKarpM2(string s, string t) {
int a = (int)s.size(), b = (int)t.size();
long long p = 31, m = 1e9+9;
long long powTable[a] = {0};
powTable[0] = 1;
for (int i = 1; i < a; i++) {
powTable[i] = powTable[i-1] * p % m;
}
long long hashS = 0, hashT = 0;
for (int i = 0; i < a; i++) {
hashS = (hashS + (s[i] - 'a' + 1)*powTable[a-i-1] % m) % m;
hashT = (hashT + (t[i] - 'a' + 1)*powTable[a-i-1] % m) % m;
}
if (hashS == hashT) {
bool match = true;
for (int i = 0; i < a; i++) {
if (s[i] != t[i]) {
match = false;
break;
}
}
if (match) {
return 0;
}
}
for (int i = 0; i+a-1 < b; i++) {
hashT = (hashT + m - (t[i] - 'a' + 1)*powTable[a-1] % m) % m * p;
hashT = hashT + (t[i+a] - 'a' + 1) % m;
hashT = hashT % m;
if (hashS == hashT) {
bool match = true;
for (int j = i+1; j < a+i+1; j++) {
if (s[j-i-1] != t[j]) {
match = false;
break;
}
}
if (match) {
return i+1;
}
}
}
return -1;
}
Test Input:
string s = foobarfoo
string t = barfoobarfoobarfoobarfoobarfoobarfoo
I have got correct results on M2 but not on M1.

mtrix chain multiplication print the sequence of the mattrices

I have written code for matrix chain multiplication in dynamic programming in c++.
there is an error in the recursive call for printing the correct parenthesization of the matrices. I am taking input from text file and giving output on a text file. please help..
#include <iostream>
#include <fstream>
#include <limits.h>
using namespace std;
int * MatrixChainOrder(int p[], int n)
{
static int m[100][100];
static int s[100][100];
int j, q;
int min = INT_MAX;
for (int i = 1; i <= n; i++)
m[i][i] = 0;
for (int L = 2; L <= n; L++) {
for (int i = 1; i <= n - L + 1; i++) {
j = i + L - 1;
m[i][j] = min;
for (int k = i; k <= j - 1; k++) {
q = m[i][k] + m[k + 1][j] + p[i - 1] * p[k] * p[j];
if (q < m[i][j]) {
m[i][j] = q;
s[i][j] = k;
}
}
}
}
return (*s);
}
void Print(int *s, int i, int j)
{
ofstream outfile("output.text");
if (i == j)
{
outfile << "a1";
}
else
outfile << "(";
{
Print(*s, i, s[i][j]);
Print(*s, s[i][j] + 1, j);
outfile << ")";
}
outfile.close();
}
int main()
{
int arr[100];
int num, i = 0;
ifstream infile("input.text");
while (infile)
{
infile >> num;
arr[i] = num;
i++;
}
i = i - 1;
infile.close();
Print(MatrixChainOrder(arr, i - 1), 0, i - 1);
return 0;
}

In C++ it is better to use std::vector for arrays. Aside from that, you can't mix pointers and arrays like that because the compiler loses track of array size.
For example this doesn't work:
int x[10][20];
void foo(int *ptr)
{
//the numbers 10 and 20 have not been passed through
}
But you can change it to
int x[10][20];
void foo(int arr[10][20])
{
//the numbers 10 and 20 are available
}
MatrixChainOrder is supposed to return a number, according to this link
int MatrixChainOrder(int s[100][100], int p[], int n)
{
int m[100][100];
for (int i = 0; i < 100; i++) m[i][i] = 0;
for (int i = 0; i < 100; i++) s[i][i] = 0;
int q = 0;
for (int L = 2; L <= n; L++) {
for (int i = 1; i <= n - L + 1; i++) {
int j = i + L - 1;
m[i][j] = INT_MAX;
for (int k = i; k <= j - 1; k++) {
q = m[i][k] + m[k + 1][j] + p[i - 1] * p[k] * p[j];
if (q < m[i][j]) {
m[i][j] = q;
s[i][j] = k;
}
}
}
}
return q;
}
int main()
{
int arr[] = { 40, 20, 30, 10, 30 };
int array_size = sizeof(arr) / sizeof(int);
int n = array_size - 1;
int s[100][100];
int minimum = MatrixChainOrder(s, arr, n);
printf("{ 40, 20, 30, 10, 30 } should result in 26000 : %d\n", minimum);
return 0;
}
Likewise you can change your Print function
void Print(int s[100][100], int i, int j)
{
if (i < 0 || i >= 100 || j < 0 || j >= 100)
{
cout << "array bound error\n";
}
//safely access s[i][j] ...
}

Regular Matrix in C++

My homework will create a program that check the numbers in an array with a given pattern. Program must take the matrix dimensions and terms in the matrix as arguments from command line. For example program name is myProg.exe and we want to check a 2x3 dimensioned matrix with (maximum dimension limit is 20x20):
1 2 3
4 5 6
Then I will run your program as.
The program will check a special matrix pattern and prints out ACCEPTABLE or NOT MATCH according to the values we put from the console. The Special Pattern: In a row major representation the cells of the matrix must obey this rule. Some terms of the matrix must be sum or product of the neighbor cells. In row major representation the sum and product operations are placed as given in the examples. Sum and Product cells follows each other with one free cells. For Odd rows the sequence starts with free cells and in Even Rows the sequence starts with Sum or Product cell.
My code is here:
#include <iostream>
#include <sstream>
using namespace std;
static int iter = 0;
static unsigned int sat=3, sut=2;
bool ok = false;
int *accepted;
int *array;
string isAcceptable(int mat[]) {
int l, co = 0;
bool operation = false;
int mat2[sat][sut];
for (int i = 0; i < sat; i++) {
for (int j = 0; j < sut; j++) {
mat2[i][j] = mat[co];
co++;
}
}
for (int i = 0; i < sat; i++) {
if (i % 2 == 0)
l = 1;
else
l = 0;
for (int j = l; j < sut; j += 2) {
int totalProduct;
if (!operation) {
totalProduct = 0;
if (j > 0)
totalProduct += mat2[i][j - 1];
if (j < sut - 1)
totalProduct += mat2[i][j + 1];
if (i > 0)
totalProduct += mat2[i - 1][j];
if (i < sat - 1)
totalProduct += mat2[i + 1][j];
} else {
totalProduct = 1;
if (j > 0)
totalProduct *= mat2[i][j - 1];
if (j < sut - 1)
totalProduct *= mat2[i][j + 1];
if (i > 0)
totalProduct *= mat2[i - 1][j];
if (i < sat - 1)
totalProduct *= mat2[i + 1][j];
}
if (mat2[i][j] != totalProduct)
return "NOT MATCH";
operation = !operation;
}
}
return "ACCEPTABLE";
}
void change(int index1, int index2) {
int temp;
temp = array[index1];
array[index1] = array[index2];
array[index2] = temp;
iter++;
}
void combine(int mat[], int len) {
if(ok)
return;
array = new int[len];
*array = *mat;
if (len <= sat * sut) {
for (int i = len; i < sat * sut - 1; i++) {
for (int j = i; j < sat * sut; j++) {
combine(array, len + 1);
change(i, j);
if (isAcceptable(array) == ("ACCEPTABLE")) {
int accepted[sat*sut];
*accepted = *array;
ok = true;
return;
}
}
}
} else
return;
}
string isAcceptableCombine(int mat[]) {
combine(mat, 6);
if (ok)
{
cout<< " TRUE Sequense";
return "ACCEPTABLE";
}
else
cout<< " FALSE Sequense";
return "NOT MATCH";
}
int main(int argc, char** argv) {
int matris[] = {1,2,1,4,1,6};
isAcceptableCombine(matris);
}
My code's result is always returning TRUE Sequence.
Where is my mistake?

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Rabin-Karp algorithm in c++ - c++

Related

Why the simple multiplication would result in some garbled characters?

How to resolve the segmentation fault in this code?

Rabin-Karp giving wrong answer with increasing power raise to p

mtrix chain multiplication print the sequence of the mattrices

Regular Matrix in C++

Categories

Resources