Solving 5SUM in O(n^3) with strict memory limits - c++

I need a way to solve the classic 5SUM problem without hashing or with a memory efficient way of hashing.
The problem asks you to find how many subsequences in a given array of length N have the sum equal to S
Ex:
Input
6 5
1 1 1 1 1 1
Output
6
The restrictions are:
N <= 1000 ( size of the array )
S <= 400000000 ( the sum of the subsequence )
Memory usage <= 5555 kbs
Execution time 2.2s
I'm pretty sure the excepted complexity is O(N^3). Due to the memory limitations hashing doesn't provide an actual O(1) time.
The best I got was 70 points using this code. ( I got TLE on 6 tests )
#include <iostream>
#include <fstream>
#include <algorithm>
#include <vector>
#define MAX 1003
#define MOD 10472
using namespace std;
ifstream in("take5.in");
ofstream out("take5.out");
vector<pair<int, int>> has[MOD];
int v[MAX];
int pnt;
vector<pair<int, int>>::iterator it;
inline void ins(int val) {
pnt = val%MOD;
it = lower_bound(has[pnt].begin(), has[pnt].end(), make_pair(val, -1));
if(it == has[pnt].end() || it->first != val) {
has[pnt].push_back({val, 1});
sort(has[pnt].begin(), has[pnt].end());
return;
}
it->second++;
}
inline int get(int val) {
pnt = val%MOD;
it = lower_bound(has[pnt].begin(), has[pnt].end(), make_pair(val, -1));
if(it == has[pnt].end() || it->first != val)
return 0;
return it->second;
}
int main() {
int n,S;
int ach = 0;
int am = 0;
int rez = 0;
in >> n >> S;
for(int i = 1; i <= n; i++)
in >> v[i];
sort(v+1, v+n+1);
for(int i = n; i >= 1; i--) {
if(v[i] > S)
continue;
for(int j = i+1; j <= n; j++) {
if(v[i]+v[j] > S)
break;
ins(v[i]+v[j]);
}
int I = i-1;
if(S-v[I] < 0)
continue;
for(int j = 1; j <= I-1; j++) {
if(S-v[I]-v[j] < 0)
break;
for(int k = 1; k <= j-1; k++) {
if(S-v[I]-v[j]-v[k] < 0)
break;
ach = S-v[I]-v[j]-v[k];
rez += get(ach);
}
}
}
out << rez << '\n';
return 0;
}

I think it can be done. We are looking for all subsets of 5 items in the array arr with the correct SUM. We have array with indexes 0..N-1. Third item of those five can have index i in range 2..N-3. We cycle through all those indexes. For every index i we generate all combinations of two numbers for index in range 0..i-1 on the left of index i and all combinations of two numbers for index in the range i+1..N-1 on the right of index i. For every index i there are less than N*N combinations on the left plus on the right side. We would store only sum for every combination, so it would not be more than 1000 * 1000 * 4 = 4MB.
Now we have two sequences of numbers (the sums) and task is this: Take one number from first sequence and one number from second sequence and get sum equal to Si = SUM - arr[i]. How many combinations are there? To do it efficiently, sequences have to be sorted. Say first is sorted ascending and have numbers a, a, a, b, c ,.... Second is sorted descending and have numbers Z, Z, Y, X, W, .... If a + Z > Si then we can throw Z away, because we do not have smaller number to match. If a + Z < Si we can throw away a, because we do not have bigger number to match. And if a + Z = Si we have 2 * 3 = 6 new combinations and get rid of both a and Z. If we get sorting for free, it is nice O(N^3) algorithm.
While sorting is not for free, it is O(N * N^2 * log(N^2)) = O(N^3 * log(N)). We need to do sorting in linear time, which is not possible. Or is it? In index i+1 we can reuse sequences from index i. There are only few new combinations for i+1 - only those that involve number arr[i] together with some number from index 0..i-1. If we sort them (and we can, because there are not N*N of them, but N at most), all we need is to merge two sorted sequences. And that can be done in linear time. We can even avoid sorting completely if we sort arr at the beginning. We just merge.
For second sequence the merging does not involve adding but removing, but it is very simmilar.
The implementation seems to work, but I expect there is off by one error somewhere ;-)
#include <iostream>
#include <fstream>
#include <algorithm>
#include <vector>
using namespace std;
int Generate(int arr[], int i, int sums[], int N, int NN)
{
int p1 = 0;
for (int i1 = 0; i1 < i - 1; ++i1)
{
int ai = arr[i1];
for (int i2 = i1 + 1; i2 < i; ++i2)
{
sums[p1++] = ai + arr[i2];
}
}
sort(sums, sums + p1);
return p1;
}
int Combinations(int n, int sums[], int p1, int p2, int NN)
{
int cnt = 0;
int a = 0;
int b = NN - p2;
do
{
int state = sums[a] + sums[b] - n;
if (state > 0) { ++b; }
else if (state < 0) { ++a; }
else
{
int cnta = 0;
int lastA = sums[a];
while (a < p1 && sums[a] == lastA) { a++; cnta++; }
int cntb = 0;
int lastB = sums[b];
while (b < NN && sums[b] == lastB) { b++; cntb++; }
cnt += cnta * cntb;
}
} while (b < NN && a < p1);
return cnt;
}
int Add(int arr[], int i, int sums[], int p2, int N, int NN)
{
int ii = N - 1;
int n = arr[i];
int nn = n + arr[ii--];
int ip = NN - p2;
int newP2 = p2 + N - i - 1;
for (int p = NN - newP2; p < NN; ++p)
{
if (ip < NN && (ii < i || sums[ip] > nn))
{
sums[p] = sums[ip++];
}
else
{
sums[p] = nn;
nn = n + arr[ii--];
}
}
return newP2;
}
int Remove(int arr[], int i, int sums[], int p1)
{
int ii = 0;
int n = arr[i];
int nn = n + arr[ii++];
int pp = 0;
int p = 0;
for (; p < p1 - i; ++p)
{
while (ii <= i && sums[pp] == nn)
{
++pp;
nn = n + arr[ii++];
}
sums[p] = sums[pp++];
}
return p;
}
int main() {
ifstream in("take5.in");
ofstream out("take5.out");
int N, SUM;
in >> N >> SUM;
int* arr = new int[N];
for (int i = 0; i < N; i++)
in >> arr[i];
sort(arr, arr + N);
int NN = (N - 3) * (N - 4) / 2 + 1;
int* sums = new int[NN];
int combinations = 0;
int p1 = 0;
int p2 = 1;
for (int i = N - 3; i >= 2; --i)
{
if (p1 == 0)
{
p1 = Generate(arr, i, sums, N, NN);
sums[NN - 1] = arr[N - 1] + arr[N - 2];
}
else
{
p1 = Remove(arr, i, sums, p1);
p2 = Add(arr, i + 1, sums, p2, N, NN);
}
combinations += Combinations(SUM - arr[i], sums, p1, p2, NN);
}
out << combinations << '\n';
return 0;
}

Related

Faster way to compute the Sum of this function below?

I have this problem I'm curious about where I have an Array and I need to compute the Sum of this function:
Arr[L] + (Arr[L] ^ Arr[L+1]) + ... + (Arr[L] ^ Arr[L+1] ^ ... ^ Arr[R])
Example:
If the Array given was: [1, 2, 3, 5] and I asked what's the sum on the range [L = 1, R = 3] (assuming 1-based Index), then it'd be:
Sum = 1 + (1 ^ 2) + (1 ^ 2 ^ 3) = 4
In this problem, the Array, the size of the Array, and the Ranges are given. My approach for this is too slow.
There's also a variable called Q which indicates the number of Queries that would process each [L, R].
What I have:
I XOR'ed each element and then summed it to a variable within the range of [L, R]. Is there any faster way to compute this if the elements in the Array are suppose... 1e18 or 1e26 larger?
#include <iostream>
#include <array>
int main (int argc, const char** argv)
{
long long int N, L, R;
std::cin >> N;
long long int Arr[N];
for (long long int i = 0; i < N; i++)
{
std::cin >> Arr[i];
}
std::cin >> L >> R;
long long int Summation = 0, Answer = 0;
for (long long int i = L; i <= R; i++)
{
Answer = Answer ^ Arr[i - 1];
Summation += Answer;
}
std::cout << Summation << '\n';
return 0;
}
There are two loops in your code:
for (long long int i = 0; i < N; i++)
{
std::cin >> Arr[i];
}
long long int Summation = 0, Answer = 0;
for (long long int i = L; i <= R; i++)
{
Answer = Answer ^ Arr[i - 1];
Summation += Answer;
}
The second loop is smaller, and only does two operations (^= and +). These are native CPU instructions; this will be memory bound on the sequential access of Arr[]. You can't speed this up. You need all elements, and it doesn't get faster than a single sequential scan. The CPU prefetcher will hit maximum memory bandwidth.
However, the killer is the first loop. Parsing digits takes many, many more operations, and the range is even larger.
Disclaimer : NOT A FASTER SOLUTION !
Changing a bit the subject by making L and R valid indices of an integer matrix ( range [0, size) ), the following function is working for me:
size_t xor_rec(size_t* array, size_t size, size_t l, size_t r) {
if (l < 0 || r >= size || l > r) {
return 0; // error control
}
if (r > l + 1) {
size_t prev_to_prev_sum = xor_rec(array, size, l, r - 2);
size_t prev_sum = xor_rec(array, size, l, r - 1);
return prev_sum + ((prev_sum - prev_to_prev_sum) ^ array[r]);
}
if (r == l + 1) {
return array[r - 1] + (array[r - 1] ^ array[r]);
}
if (r == l) {
return array[r];
}
return 0;
}
Edit: changed int for size_t.
If indices are 0 based. That is L=0 implies the first element: Arr[0] is the first element in the array, then it's simply this:
int sum = 0;
int prev = 0;
for (int i = L; i <= R; i++)
{
int current = (prev ^ Arr[i]);
sum += current;
prev = current;
}
If it's 1 based, where L=1 is really Arr[0], then it's a quick adjustment:
int sum = 0;
int prev = 0;
for (int i = L; i <= R; i++)
{
int current = (prev ^ Arr[i-1]);
sum += current;
prev = current;
}

kth smallest element of an array, median of medians not fulfilling required time complexity

I am trying to submit a solution to finding kth smallest element of an array to an online judge.
I picked up the most efficient fast algorithm, but the online judge still throws a time limit error. Is there a more faster algorithm?
It is an algorithm named Median of medians.
Code submitted:
#include <iostream>
//#include <cmath>
#include <climits>
//#include <string>
#include <algorithm>
using namespace std;
int partition(int arr[], int l, int r, int x)
{
// Search for x in arr[l..r] and move it to end
int i;
for (i = l; i < r; i++)
if (arr[i] == x)
break;
swap(arr[i], arr[r]);
// Standard partition algorithm
i = l;
for (int j = l; j <= r - 1; j++)
{
if (arr[j] <= x)
{
swap(arr[i], arr[j]);
i++;
}
}
swap(arr[i], arr[r]);
return i;
}
// A simple function to find median of arr[]. This is called
// only for an array of size 5 in this program.
int findMedian(int arr[], int n)
{
sort(arr, arr + n); // Sort the array
return arr[n / 2]; // Return middle element
}
int kthSmallest(int arr[], int l, int r, int k)
{
if (k > 0 && k <= r - l + 1)
{
int i, n = r - l + 1;
int *median = new int[(n + 4) / 5];
for (i = 0; i < n / 5; i++) {
median[i] = findMedian(arr + l + i * 5, 5);
}
if (i * 5 < n) //For last group with less than 5 elements
{
median[i] = findMedian(arr + l + i * 5, n % 5);
i++;
}
int medOfMed = (i == 1) ? median[i - 1] :
kthSmallest(median, 0, i - 1, i / 2);
int pos = partition(arr, l, r, medOfMed);
if (pos - l == k - 1)
return arr[pos];
if (pos - l > k - 1)
return kthSmallest(arr, l, pos - 1, k);
return kthSmallest(arr, pos + 1, r, k - pos + l - 1);
}
return INT_MAX;
}
// It searches for x in arr[l..r], and partitions the array
// around x.
int main() {
freopen("input.txt", "r", stdin);
freopen("output.txt", "w", stdout);
int n, k;
cin >> n >> k;
int *a = new int[n];
for (int i = 0; i < n; i++)
cin >> a[i];
cout << kthSmallest(a, 0, n - 1, k);
delete[] a;
return 0;
}
If you know of a more efficient algorithm than this one, or an idea to improve this one, it would be very much appreciated.
There's a function std::nth_element in the <algorithm> header (https://en.cppreference.com/w/cpp/algorithm/nth_element) which can be used to find median of an array fast.

Passing Swaps and Comparisons into quicksort function

I'm making this program where I have to count the number of swaps and comparisons a quick sort function, and we have to pass the swaps and comps to the function. I'm not too sure how to do this. I have it so it can be done without passing anything to it, as shown below.
#include <iostream>
#include <ctime>
#include <stdlib.h>
#include <math.h>
using namespace std;
struct SwapandComp {
int swaps;
int comps;
};
const long ARRAY_SIZE = 5000;
int totalSwaps = 0;
int totalComps = 0;
int partition(int[], int, int) //add parameters int& swap and int& comp
SwapandComp quickSort(int[], int, int) //also add parameters for int& swap and int& comp
int main() {
SwapandComp qui;
long masterAry[ARRAY_SIZE] = {0};
int quickAry[ARRAY_SIZE] = {0};
int start = 0;
int end = 0;
double difference = 0;
int size = ARRAY_SIZE;
srand(time(NULL));
for (int i = 0; i < ARRAY_SIZE; i++) {
masterAry[i] = rand();
}
for (int a = 0; a < ARRAY_SIZE; a++) {
quickAry[a] = masterAry[a];
}
start = clock();
qui = quickSort(quickAry, 0, ARRAY_SIZE - 1);
end = clock();
difference = end - start;
double f = difference / CLOCKS_PER_SEC;
cout << "Quick: " << f << " " << qui.swaps << " " << qui.comps << endl;
}
This is the main. It's where values are assigned to the array to be sorted by the quickSort function, which will be defined below.
int partition(int numbers[], int i, int k) { //add parameters int& swap and int& comp
int l = 0;
int h = 0;
int midpoint = 0;
int pivot = 0;
int temp = 0;
bool done = false;
// Pick middle element as pivot
midpoint = i + (k - i) / 2;
pivot = numbers[midpoint];
l = i;
h = k;
while (!done) {
// Increment l while numbers[l] < pivot
while (numbers[l] < pivot) {
++l;
totalComps++;
}
// Decrement h while pivot < numbers[h]
while (pivot < numbers[h]) {
--h;
totalComps++;
}
// If there are zero or one elements remaining,
// all numbers are partitioned. Return h
if (l >= h) {
totalComps++;
done = true;
}
else {
// Swap numbers[l] and numbers[h],
// update l and h
temp = numbers[l];
numbers[l] = numbers[h];
numbers[h] = temp;
totalSwaps++;
++l;
--h;
}
}
//cout << totalSwaps << " " << totalComps << endl;
return h;
}
This is the partition function to find where to find the next partition point
SwapandComp quickSort(int numbers[], int i, int k) { //add parameters int& swap and int& comp
SwapandComp quick = { 0 };
//quick.swaps = quick.comps = 0;
int j = 0;
int z = 0;
// Base case: If there are 1 or zero elements to sort,
// partition is already sorted
if (i >= k) {
return quick;
}
// Partition the data within the array. Value j returned
// from partitioning is location of last element in low partition.
j = partition(numbers, i, k);
// Recursively sort low partition (i to j) and
// high partition (j + 1 to k)
quickSort(numbers, i, j);
quickSort(numbers, j + 1, k);
quick.swaps = totalSwaps;
quick.comps = totalComps;
//totalSwaps = 0;
//totalComps = 0;
return quick;
}
And finally, here is the quick sort function where all the swaps and comps will be added together and put into the struct. Again, I'm not too sure how to add in the pass by reference variables for swap and comp. Any help is appreciated! (Also sorry about the code formatting, it got kind of crazy on my screen.)

Quick Sort program stopped working

I was trying to solve the quick sort - 2 challenge on hackerrank. It said that we had to repeatedly call partition till the entire array was sorted. My program works for some test cases but for some it crashes, "Quick Sort - 2.exe has stopped working". I couldn't find the reason as to why it's happening.
The first element of the array/sub-array was to be taken as pivot element each time.
#include <iostream>
#include <conio.h>
using namespace std;
void swap(int arr[], int a, int b)
{
int c = arr[a];
arr[a] = arr[b];
arr[b] = c;
}
void qsort(int arr[], int m, int n) //m - lower limit, n - upper limit
{
if (n - m == 1)
{
return;
}
int p = arr[m], i, j, t; //p - pivot element, t - temporary
//partition
for (int i = m+1; i < n; i++)
{
j = i;
if (arr[j] < p)
{
t = arr[j];
while (arr[j] != p)
{
arr[j] = arr[j-1];
j--;
}
arr[j] = t; //pivot is at j and j+1
}
}
//check if sorted
int f = 1;
while (arr[f] > arr[f-1])
{
if (f == n-1)
{
f = -1;
break;
}
f++;
}
if (f == -1)
{
cout << "Sub Array Sorted\n";
}
else
{
if (p == arr[m]) //pivot is the smallest in sub array
{
qsort(arr, m+1, n); //sort right sub array
}
else
{
qsort(arr, m, j+1); //sort left sub array
qsort(arr, j+1, n); //sort right sub array
}
}
}
int main()
{
int n;
cin >> n;
int arr[n];
for (int i = 0; i < n; i++)
{
cin >> arr[i];
}
qsort(arr, 0, n);
for (int i = 0; i < n; i++)
{
cout << arr[i] << " ";
}
return 0;
}
You have an index out of range problem.
This will not give you the solution, but it may help you to find the reason why your program fails.
I have modified your program so it uses a vector of int rather than a raw array of int, and when you run this program you get an index out of range exception.
The sequence 4 3 7 1 6 4 that triggers the problem is hardcoded, so you don't need to type it each time.
#include <iostream>
#include <vector>
using namespace std;
void swap(vector<int> & arr, int a, int b)
{
int c = arr[a];
arr[a] = arr[b];
arr[b] = c;
}
void qsort(vector<int> & arr, int m, int n) //m - lower limit, n - upper limit
{
if (n - m == 1)
{
return;
}
int p = arr[m], j, t; //p - pivot element, t - temporary
//partition
for (int i = m + 1; i < n; i++)
{
j = i;
if (arr[j] < p)
{
t = arr[j];
while (arr[j] != p)
{
arr[j] = arr[j - 1];
j--;
}
arr[j] = t; //pivot is at j and j+1
}
}
//check if sorted
int f = 1;
while (arr[f] > arr[f - 1])
{
if (f == n - 1)
{
f = -1;
break;
}
f++;
}
if (f == -1)
{
cout << "Sub Array Sorted\n";
}
else
{
if (p == arr[m]) //pivot is the smallest in sub array
{
qsort(arr, m + 1, n); //sort right sub array
}
else
{
qsort(arr, m, j + 1); //sort left sub array
qsort(arr, j + 1, n); //sort right sub array
}
}
}
int main()
{
vector<int> arr = { 4,3,7,1,6,4 };
qsort(arr, 0, arr.size());
for (unsigned int i = 0; i < arr.size(); i++)
{
cout << arr[i] << " ";
}
return 0;
}
First of all, what you made is not quick sort, but some combination of divide-ans-conquer partitioning and insert sort.
Canonical quicksort goes from from lower (p) and upper (q) bounds of array, skipping elements arr[p]m respectively. Then it swaps arr[p] with arr[q], increments/decrements and checks if p>=q. Rinse and repeat until p>=q. Then make calls on sub-partitions. This way p or q holds pivot position and subcalls are obvious.
But you are doing it different way: you insert elements from right side of subarray to left side. Such thing can produce O(N^2) time complexity for one iteration. Consider 1,0,1,0,1,0,1,0,1,0,1,0,... sequence, for example. This can increase worst case complexity over O(N^2).
Out of time complexity... The problem in your function lies in assumption that j holds pivot location in subcalls:
qsort(arr, m, j+1); //sort left sub array
qsort(arr, j+1, n); //sort right sub array
Actually, j is set again and again equal to i in your main for loop. If last element is equal or greater than pivot, you end up with j=n-1, the you call qsort(arr, n, n) and first lines check is passed (sic!), because n-n != 1.
To fix this you should do two things:
1) find pivot location directly after rearrange:
for (int i = m; i < n; i++)
if (p == arr[i])
{
j = i;
break;
}
or initialize it in different variable, update after this line:
arr[j] = t; //pivot is at j and j+1
and update recursive calls to use new variable instead of j
2) make a more bulletproof check in the beginning of your function:
if (n - m <= 1)
the latter will be enough to get some result, but it will be much less effective than your current idea, falling down to probably O(N^3) in worst case.

Damerau–Levenshtein distance (Edit Distance with Transposition) c implementation

I implemented the Damerau–Levenshtein distance in c++ but it does not give correct o/p for the input (pantera,aorta) the correct o/p is 4 but my code gives 5.....
int editdist(string s,string t,int n,int m)
{
int d1,d2,d3,cost;
int i,j;
for(i=0;i<=n;i++)
{
for(j=0;j<=m;j++)
{
if(s[i+1]==t[j+1])
cost=0;
else
cost=1;
d1=d[i][j+1]+1;
d2=d[i+1][j]+1;
d3=d[i][j]+cost;
d[i+1][j+1]=minimum(d1,d2,d3);
if(i>0 && j>0 && s[i+1]==t[j] && s[i]==t[j+1] ) //transposition
{
d[i+1][j+1]=min(d[i+1][j+1],d[i-1][j-1]+cost);
}
}
}
return d[n+1][m+1];
}
I don't see any errors. Can someone find a problem with the code?
The algorithm in the post does not compute Damerau-Levenshtein distance. In a wikipedia article this algorithm is defined as the Optimal String Alignment Distance.
A java implementation of DL distance algorithm can be found in another SO post.
To get the correct values of OSA distance please change the lines marked with - below with the lines marked with +
int editdist(string s,string t,int n,int m)
{
int d1,d2,d3,cost;
int i,j;
for(i=0;i<=n;i++)
{
for(j=0;j<=m;j++)
{
- if(s[i+1]==t[j+1])
+ if(s[i+1]==t[j+1])
cost=0;
else
cost=1;
d1=d[i][j+1]+1;
d2=d[i+1][j]+1;
d3=d[i][j]+cost;
d[i+1][j+1]=minimum(d1,d2,d3);
- if(i>0 && j>0 && s[i+1]==t[j] && s[i]==t[j+1] ) //transposition
+ if(i>0 && j>0 && s[i]==t[j-1] && s[i-1]==t[j] ) //transposition
{
d[i+1][j+1]=min(d[i+1][j+1],d[i-1][j-1]+cost);
}
}
}
return d[n+1][m+1];
}
It looks as if the code was copied from a program written in a programming language where array indices start at 1 by default. Therefore all references to the elements of the distance array d were incremented. However the references to the characters within the strings are references to 0-based arrays, therefore they should not be updated.
To compute the distance the distance array has to be properly initialized:
for( i = 0; i < n + 1; i++)
d[i][0] = i;
for( j = 1; j < m + 1; j++)
d[0][j] = j;
Since you have got the answer 5, you probably have your distance array already initialized correctly.
Since the above algorithm does not compute the DL distance, here is a sketch of a C implementation of the DL algorithm (derived from the SO post with a java impl. derived from an ActionScript impl. in the Wikipedia article).
#define d(i,j) dd[(i) * (m+2) + (j) ]
#define min(x,y) ((x) < (y) ? (x) : (y))
#define min3(a,b,c) ((a)< (b) ? min((a),(c)) : min((b),(c)))
#define min4(a,b,c,d) ((a)< (b) ? min3((a),(c),(d)) : min3((b),(c),(d)))
int dprint(int* dd, int n,int m){
int i,j;
for (i=0; i < n+2;i++){
for (j=0;j < m+2; j++){
printf("%02d ",d(i,j));
}
printf("\n");
}
printf("\n");
return 0;
}
int dldist2(char *s, char* t, int n, int m) {
int *dd;
int i, j, cost, i1,j1,DB;
int INFINITY = n + m;
int DA[256 * sizeof(int)];
memset(DA, 0, sizeof(DA));
if (!(dd = (int*) malloc((n+2)*(m+2)*sizeof(int)))) {
return -1;
}
d(0,0) = INFINITY;
for(i = 0; i < n+1; i++) {
d(i+1,1) = i ;
d(i+1,0) = INFINITY;
}
for(j = 0; j<m+1; j++) {
d(1,j+1) = j ;
d(0,j+1) = INFINITY;
}
dprint(dd,n,m);
for(i = 1; i< n+1; i++) {
DB = 0;
for(j = 1; j< m+1; j++) {
i1 = DA[t[j-1]];
j1 = DB;
cost = ((s[i-1]==t[j-1])?0:1);
if(cost==0) DB = j;
d(i+1,j+1) =
min4(d(i,j)+cost,
d(i+1,j) + 1,
d(i,j+1)+1,
d(i1,j1) + (i-i1-1) + 1 + (j-j1-1));
}
DA[s[i-1]] = i;
dprint(dd,n,m);
}
cost = d(n+1,m+1);
free(dd);
return cost;
}
Here is my C++ version of this algorithm:
int damerau_levenshtein_distance(std::string p_string1, std::string p_string2)
{
int l_string_length1 = p_string1.length();
int l_string_length2 = p_string2.length();
int d[l_string_length1+1][l_string_length2+1];
int i;
int j;
int l_cost;
for (i = 0;i <= l_string_length1;i++)
{
d[i][0] = i;
}
for(j = 0; j<= l_string_length2; j++)
{
d[0][j] = j;
}
for (i = 1;i <= l_string_length1;i++)
{
for(j = 1; j<= l_string_length2; j++)
{
if( p_string1[i-1] == p_string2[j-1] )
{
l_cost = 0;
}
else
{
l_cost = 1;
}
d[i][j] = std::min(
d[i-1][j] + 1, // delete
std::min(d[i][j-1] + 1, // insert
d[i-1][j-1] + l_cost) // substitution
);
if( (i > 1) &&
(j > 1) &&
(p_string1[i-1] == p_string2[j-2]) &&
(p_string1[i-2] == p_string2[j-1])
)
{
d[i][j] = std::min(
d[i][j],
d[i-2][j-2] + l_cost // transposition
);
}
}
}
return d[l_string_length1][l_string_length2];
}