Damerau–Levenshtein distance (Edit Distance with Transposition) c implementation

Damerau–Levenshtein distance (Edit Distance with Transposition) c implementation - c++

I implemented the Damerau–Levenshtein distance in c++ but it does not give correct o/p for the input (pantera,aorta) the correct o/p is 4 but my code gives 5.....
int editdist(string s,string t,int n,int m)
{
int d1,d2,d3,cost;
int i,j;
for(i=0;i<=n;i++)
{
for(j=0;j<=m;j++)
{
if(s[i+1]==t[j+1])
cost=0;
else
cost=1;
d1=d[i][j+1]+1;
d2=d[i+1][j]+1;
d3=d[i][j]+cost;
d[i+1][j+1]=minimum(d1,d2,d3);
if(i>0 && j>0 && s[i+1]==t[j] && s[i]==t[j+1] ) //transposition
{
d[i+1][j+1]=min(d[i+1][j+1],d[i-1][j-1]+cost);
}
}
}
return d[n+1][m+1];
}
I don't see any errors. Can someone find a problem with the code?

The algorithm in the post does not compute Damerau-Levenshtein distance. In a wikipedia article this algorithm is defined as the Optimal String Alignment Distance.
A java implementation of DL distance algorithm can be found in another SO post.
To get the correct values of OSA distance please change the lines marked with - below with the lines marked with +
int editdist(string s,string t,int n,int m)
{
int d1,d2,d3,cost;
int i,j;
for(i=0;i<=n;i++)
{
for(j=0;j<=m;j++)
{
- if(s[i+1]==t[j+1])
+ if(s[i+1]==t[j+1])
cost=0;
else
cost=1;
d1=d[i][j+1]+1;
d2=d[i+1][j]+1;
d3=d[i][j]+cost;
d[i+1][j+1]=minimum(d1,d2,d3);
- if(i>0 && j>0 && s[i+1]==t[j] && s[i]==t[j+1] ) //transposition
+ if(i>0 && j>0 && s[i]==t[j-1] && s[i-1]==t[j] ) //transposition
{
d[i+1][j+1]=min(d[i+1][j+1],d[i-1][j-1]+cost);
}
}
}
return d[n+1][m+1];
}
It looks as if the code was copied from a program written in a programming language where array indices start at 1 by default. Therefore all references to the elements of the distance array d were incremented. However the references to the characters within the strings are references to 0-based arrays, therefore they should not be updated.
To compute the distance the distance array has to be properly initialized:
for( i = 0; i < n + 1; i++)
d[i][0] = i;
for( j = 1; j < m + 1; j++)
d[0][j] = j;
Since you have got the answer 5, you probably have your distance array already initialized correctly.
Since the above algorithm does not compute the DL distance, here is a sketch of a C implementation of the DL algorithm (derived from the SO post with a java impl. derived from an ActionScript impl. in the Wikipedia article).
#define d(i,j) dd[(i) * (m+2) + (j) ]
#define min(x,y) ((x) < (y) ? (x) : (y))
#define min3(a,b,c) ((a)< (b) ? min((a),(c)) : min((b),(c)))
#define min4(a,b,c,d) ((a)< (b) ? min3((a),(c),(d)) : min3((b),(c),(d)))
int dprint(int* dd, int n,int m){
int i,j;
for (i=0; i < n+2;i++){
for (j=0;j < m+2; j++){
printf("%02d ",d(i,j));
}
printf("\n");
}
printf("\n");
return 0;
}
int dldist2(char *s, char* t, int n, int m) {
int *dd;
int i, j, cost, i1,j1,DB;
int INFINITY = n + m;
int DA[256 * sizeof(int)];
memset(DA, 0, sizeof(DA));
if (!(dd = (int*) malloc((n+2)*(m+2)*sizeof(int)))) {
return -1;
}
d(0,0) = INFINITY;
for(i = 0; i < n+1; i++) {
d(i+1,1) = i ;
d(i+1,0) = INFINITY;
}
for(j = 0; j<m+1; j++) {
d(1,j+1) = j ;
d(0,j+1) = INFINITY;
}
dprint(dd,n,m);
for(i = 1; i< n+1; i++) {
DB = 0;
for(j = 1; j< m+1; j++) {
i1 = DA[t[j-1]];
j1 = DB;
cost = ((s[i-1]==t[j-1])?0:1);
if(cost==0) DB = j;
d(i+1,j+1) =
min4(d(i,j)+cost,
d(i+1,j) + 1,
d(i,j+1)+1,
d(i1,j1) + (i-i1-1) + 1 + (j-j1-1));
}
DA[s[i-1]] = i;
dprint(dd,n,m);
}
cost = d(n+1,m+1);
free(dd);
return cost;
}

Here is my C++ version of this algorithm:
int damerau_levenshtein_distance(std::string p_string1, std::string p_string2)
{
int l_string_length1 = p_string1.length();
int l_string_length2 = p_string2.length();
int d[l_string_length1+1][l_string_length2+1];
int i;
int j;
int l_cost;
for (i = 0;i <= l_string_length1;i++)
{
d[i][0] = i;
}
for(j = 0; j<= l_string_length2; j++)
{
d[0][j] = j;
}
for (i = 1;i <= l_string_length1;i++)
{
for(j = 1; j<= l_string_length2; j++)
{
if( p_string1[i-1] == p_string2[j-1] )
{
l_cost = 0;
}
else
{
l_cost = 1;
}
d[i][j] = std::min(
d[i-1][j] + 1, // delete
std::min(d[i][j-1] + 1, // insert
d[i-1][j-1] + l_cost) // substitution
);
if( (i > 1) &&
(j > 1) &&
(p_string1[i-1] == p_string2[j-2]) &&
(p_string1[i-2] == p_string2[j-1])
)
{
d[i][j] = std::min(
d[i][j],
d[i-2][j-2] + l_cost // transposition
);
}
}
}
return d[l_string_length1][l_string_length2];
}

Related

Outputting differences in arrays

I have to make a code where the user inputs altitude readings and the code is supposed to output total climb, total descent, and net change. This is what I have below. I can't figure out how to code to have it output what I want it to.
#include <iostream>
using namespace std;
int main()
{
int array[2010], n, c, d, swap; //the array
printf("Enter number of elements\n");
scanf("%d", &n);
printf("Enter %d integers\n", n);
for (c=0; c < n; c++)
scanf("%d", &array[c]);
for (c=0 ; c < ( n - 1 ); c++)
{
for (d = 0 ; d < n - c - 1; d++)
{
if (array[d] > array[d+1]) /* For decreasing order use < */
{
swap = array[d];
array[d] = array[d+1];
array[d+1] = swap;
}
}
}
printf("Sorted list in ascending order:\n"); //lists in order
for ( c = 0 ; c < n ; c++ )
printf("%d\n", array[c]);
// Returns minimum difference between any pair
int findMinDiff(int arr[2010], int n); //supposed to find differce
{
// Initialize difference as infinite
int diff = INT_MAX;
// Find the min diff by comparing difference
// of all possible pairs in given array
for (int d=0; d<n-1; d++)
for (int j=d+1; j<n; j++)
if (abs(array[d] - array[d--]) < diff)
diff = abs(array[d] - array[d--]);
cout<<"Total Climb "<<diff<<endl;
}
system("pause");
return 0;
}

I don't see why you are sorting the array. Sorting the array may cause problems in calculating the "total climb" and "total descent".
My understanding is that this assignment is about calculating the difference between two numbers and processing that difference.
void Process_Data(int array[2010], unsigned int quantity_of_climbs)
{
int total_climb = 0;
int total_descent = 0;
int minimum_change = INT_MAX;
for (int i = 0; i < quantity_of_climbs - 1; ++i)
{
const int height_change = array[i] - array[i+1];
if (height_change > 0) // A "climb"
{
total_climb += height_change;
}
if (height_change < 0) // A "descent"
{
total_descent = (-1) * height_change; // Change from negative to positive.
}
const int abs_height_change = abs(height_change);
if (abs_height_change < minimum_change)
{
minimum_change = abs_height_change;
}
}
// Display results
}

Solving 5SUM in O(n^3) with strict memory limits

I need a way to solve the classic 5SUM problem without hashing or with a memory efficient way of hashing.
The problem asks you to find how many subsequences in a given array of length N have the sum equal to S
Ex:
Input
6 5
1 1 1 1 1 1
Output
6
The restrictions are:
N <= 1000 ( size of the array )
S <= 400000000 ( the sum of the subsequence )
Memory usage <= 5555 kbs
Execution time 2.2s
I'm pretty sure the excepted complexity is O(N^3). Due to the memory limitations hashing doesn't provide an actual O(1) time.
The best I got was 70 points using this code. ( I got TLE on 6 tests )
#include <iostream>
#include <fstream>
#include <algorithm>
#include <vector>
#define MAX 1003
#define MOD 10472
using namespace std;
ifstream in("take5.in");
ofstream out("take5.out");
vector<pair<int, int>> has[MOD];
int v[MAX];
int pnt;
vector<pair<int, int>>::iterator it;
inline void ins(int val) {
pnt = val%MOD;
it = lower_bound(has[pnt].begin(), has[pnt].end(), make_pair(val, -1));
if(it == has[pnt].end() || it->first != val) {
has[pnt].push_back({val, 1});
sort(has[pnt].begin(), has[pnt].end());
return;
}
it->second++;
}
inline int get(int val) {
pnt = val%MOD;
it = lower_bound(has[pnt].begin(), has[pnt].end(), make_pair(val, -1));
if(it == has[pnt].end() || it->first != val)
return 0;
return it->second;
}
int main() {
int n,S;
int ach = 0;
int am = 0;
int rez = 0;
in >> n >> S;
for(int i = 1; i <= n; i++)
in >> v[i];
sort(v+1, v+n+1);
for(int i = n; i >= 1; i--) {
if(v[i] > S)
continue;
for(int j = i+1; j <= n; j++) {
if(v[i]+v[j] > S)
break;
ins(v[i]+v[j]);
}
int I = i-1;
if(S-v[I] < 0)
continue;
for(int j = 1; j <= I-1; j++) {
if(S-v[I]-v[j] < 0)
break;
for(int k = 1; k <= j-1; k++) {
if(S-v[I]-v[j]-v[k] < 0)
break;
ach = S-v[I]-v[j]-v[k];
rez += get(ach);
}
}
}
out << rez << '\n';
return 0;
}

I think it can be done. We are looking for all subsets of 5 items in the array arr with the correct SUM. We have array with indexes 0..N-1. Third item of those five can have index i in range 2..N-3. We cycle through all those indexes. For every index i we generate all combinations of two numbers for index in range 0..i-1 on the left of index i and all combinations of two numbers for index in the range i+1..N-1 on the right of index i. For every index i there are less than N*N combinations on the left plus on the right side. We would store only sum for every combination, so it would not be more than 1000 * 1000 * 4 = 4MB.
Now we have two sequences of numbers (the sums) and task is this: Take one number from first sequence and one number from second sequence and get sum equal to Si = SUM - arr[i]. How many combinations are there? To do it efficiently, sequences have to be sorted. Say first is sorted ascending and have numbers a, a, a, b, c ,.... Second is sorted descending and have numbers Z, Z, Y, X, W, .... If a + Z > Si then we can throw Z away, because we do not have smaller number to match. If a + Z < Si we can throw away a, because we do not have bigger number to match. And if a + Z = Si we have 2 * 3 = 6 new combinations and get rid of both a and Z. If we get sorting for free, it is nice O(N^3) algorithm.
While sorting is not for free, it is O(N * N^2 * log(N^2)) = O(N^3 * log(N)). We need to do sorting in linear time, which is not possible. Or is it? In index i+1 we can reuse sequences from index i. There are only few new combinations for i+1 - only those that involve number arr[i] together with some number from index 0..i-1. If we sort them (and we can, because there are not N*N of them, but N at most), all we need is to merge two sorted sequences. And that can be done in linear time. We can even avoid sorting completely if we sort arr at the beginning. We just merge.
For second sequence the merging does not involve adding but removing, but it is very simmilar.
The implementation seems to work, but I expect there is off by one error somewhere ;-)
#include <iostream>
#include <fstream>
#include <algorithm>
#include <vector>
using namespace std;
int Generate(int arr[], int i, int sums[], int N, int NN)
{
int p1 = 0;
for (int i1 = 0; i1 < i - 1; ++i1)
{
int ai = arr[i1];
for (int i2 = i1 + 1; i2 < i; ++i2)
{
sums[p1++] = ai + arr[i2];
}
}
sort(sums, sums + p1);
return p1;
}
int Combinations(int n, int sums[], int p1, int p2, int NN)
{
int cnt = 0;
int a = 0;
int b = NN - p2;
do
{
int state = sums[a] + sums[b] - n;
if (state > 0) { ++b; }
else if (state < 0) { ++a; }
else
{
int cnta = 0;
int lastA = sums[a];
while (a < p1 && sums[a] == lastA) { a++; cnta++; }
int cntb = 0;
int lastB = sums[b];
while (b < NN && sums[b] == lastB) { b++; cntb++; }
cnt += cnta * cntb;
}
} while (b < NN && a < p1);
return cnt;
}
int Add(int arr[], int i, int sums[], int p2, int N, int NN)
{
int ii = N - 1;
int n = arr[i];
int nn = n + arr[ii--];
int ip = NN - p2;
int newP2 = p2 + N - i - 1;
for (int p = NN - newP2; p < NN; ++p)
{
if (ip < NN && (ii < i || sums[ip] > nn))
{
sums[p] = sums[ip++];
}
else
{
sums[p] = nn;
nn = n + arr[ii--];
}
}
return newP2;
}
int Remove(int arr[], int i, int sums[], int p1)
{
int ii = 0;
int n = arr[i];
int nn = n + arr[ii++];
int pp = 0;
int p = 0;
for (; p < p1 - i; ++p)
{
while (ii <= i && sums[pp] == nn)
{
++pp;
nn = n + arr[ii++];
}
sums[p] = sums[pp++];
}
return p;
}
int main() {
ifstream in("take5.in");
ofstream out("take5.out");
int N, SUM;
in >> N >> SUM;
int* arr = new int[N];
for (int i = 0; i < N; i++)
in >> arr[i];
sort(arr, arr + N);
int NN = (N - 3) * (N - 4) / 2 + 1;
int* sums = new int[NN];
int combinations = 0;
int p1 = 0;
int p2 = 1;
for (int i = N - 3; i >= 2; --i)
{
if (p1 == 0)
{
p1 = Generate(arr, i, sums, N, NN);
sums[NN - 1] = arr[N - 1] + arr[N - 2];
}
else
{
p1 = Remove(arr, i, sums, p1);
p2 = Add(arr, i + 1, sums, p2, N, NN);
}
combinations += Combinations(SUM - arr[i], sums, p1, p2, NN);
}
out << combinations << '\n';
return 0;
}

Trying to implement HeapSort

I am getting stuck on heapSort. I have some code but I think its pretty wrong since I'm having hard time compiling it. Any suggestions? Heap sort should be fairly easy to implement but I have bunch of syntax errors. Here's my code:
/* Framework for Heap Sort
* CS333 Spring 2011
*
*/
#include <stdio.h>
#define MAX_SIZE 1000000
int data[MAX_SIZE];
int n;
int j;
int parent(int j) {
if(j==1)
return 0;
if(j%2==0)
return ( (j / 2)-1);
else
return ( (j / 2));
}
int left(int j) {
return (2 * j) + 1;
}
int right(int j) {
return (2 * j) + 2;
}
void heapify(int data[], int j) {
int l = left(j), great;
int r = right(j);
if ( (data[l] > data[j]) && (l < sizeof(data))) {
great = l;
}
else {
great = j;
}
if ( (data[r] > data[great]) && (r < sizeof(data))) {
great = r;
}
if (great != j) {
int temp = data[j];
data[j] = data[great];
data[great] = temp;
heapify(data, great);
}
}
int BuildMaxHeap(int data[]) {
for (int j = (sizeof(data) - 1) / 2; j >= 0; j--) {
heapify(data, j);
return data;
}
}
void HeapSort(int data[]) {
BuildMaxHeap(data);
for (int j = sizeof(data); j > 0; j--) {
int temp = data[0];
data[0] = data[data.sizeof() - 1];
data[sizeof(data) - 1] = temp;
sizeof(data) = sizeof(data) - 1;
heapify(data, 0);
}
}
int main()
{
int i;
/* Read in the data */
n = 0;
while (scanf("%d", &data[n]) == 1)
++n;
/* Sort the numbers low to high */
HeapSort(data);
/* Print out the data */
for (i = 0; i < n; ++i)
printf("%d", data[i]);
}

Most of your problems seem to be in your HeapSort routine:
void HeapSort(int data[]) {
BuildMaxHeap(data);
for (int j = sizeof(data); j > 0; j--) {
When you pass an array to a function like this, what the function receives is actually a pointer. Using sizeof on that pointer will not tell you about the size of the data pointed to by the pointer -- it'll just tell you how many bytes the pointer itself occupies (typically 4). You probably want to pass the array size as a parameter:
void HeapSort(int *data, size_t data_size) {
and throughout the rest of the routine, you'll refer to data_size, not sizeof(data).
int temp = data[0];
data[0] = data[data.sizeof() - 1];
data[sizeof(data) - 1] = temp;
sizeof(data) = sizeof(data) - 1;
sizeof(whatever) is also essentially a constant, not a variable; you can't use it as the target of an assignment (but, again, using data_size as suggested above will let you do the assignment).

Iterating over subsets of any size

I can iterate over the subsets of size 1
for( int a = 0; a < size; a++ ) {
or subsets of size 2
for( int a1 = 0; a1 < size; a1++ ) {
for( int a2 = a1+1; a2 < size; a2++ ) {
or 3
for( int a1 = 0; a1 < size; a1++ ) {
for( int a2 = a1+1; a2 < size; a2++ ) {
for( int a3 = a2+1; a3 < size; a3++ ) {
But how to do this for subsets of size n?
This does the job, based on an answer by Adam Rosenfield
void iterate(int *a, int i, int size, int n)
{
int start = 0;
if( i > 0 ) start = a[i-1]+1;
for(a[i] = start; a[i] < n; a[i]++) {
if(i == n-1) {
// a is the array of indices of size n
for( int k = 0; k < size; k++ ) {
printf("%d ",a[k]);
}
printf("\n");
}
else
iterate(a, i+1, size, n);
}
}

You can use recursion:
void iterate(int *a, int i, int size, int n)
{
for(a[i] = 0; a[i] < size; a[i]++)
{
if(i == n-1)
DoStuff(a, n); // a is the array of indices of size n
else
iterate(a, i+1, size, n);
}
}
...
// Equivalent to 4 nested for loops
int a[4];
iterate(a, 0, size, 4);

You likely could do this with some recursion.

Here is something I used for a similar problem. It does not use recursion; rather, it uses a vector of indexes.
#include <vector>
template<class T>
class MultiForVar {
std::vector<T> _begin, _end, _vars;
inline int dim(){return _vars.size();}
public:
MultiForVar(std::vector<T> begin, std::vector<T> end) : _begin(begin), _end(end), _vars(_begin)
{
assert(begin.size() == end.size() and "Starting and ending vector<T> not the same size!" );
}
MultiForVar& operator ++()
{
++_vars[dim()-1];
for(int d = dim()-1; d > 0; --d)
{
if( _vars[d] >= _end[d] )
{
_vars[d] = _begin[d];
++_vars[d-1];
}
}
return *this;
}
bool done()
{
/*for(int d = 0; d < dim(); ++d)
if( _vars[d] < _end[d] )
return false;
return true;*/
return (_vars[0] >= _end[0]);
}
T operator[](int d)
{
return _vars.at(d);
}
int numDimensions(){
return dim();
}
std::vector<T>& getRaw(){
return _vars;
}
};

If I understand what you're asking correctly, another way to do it is to use bit-wise operators:
for(int i = 0; i < 1<<size; i++) {
for(int j = 0; j < size; j++) {
if(i & 1<<j) printf("%d ", a[j]);
}
printf("\n");
}

You need something the constructs the powerset of the original set. It's been a while since I've written that, but the psuedocode looks like
Powerset(a, size)
{
if(size == 0) return emptyset
subseta = Powerset(a, size-1) // Powerset of everything except last element
subsetb = appendToAll(a[size-1], subseta) // appends the last element to every set in subseta
return union(subseta, subsetb)
}

Sieve of Eratosthenes algorithm

I am currently reading "Programming: Principles and Practice Using C++", in Chapter 4 there is an exercise in which:
I need to make a program to calculate prime numbers between 1 and 100 using the Sieve of Eratosthenes algorithm.
This is the program I came up with:
#include <vector>
#include <iostream>
using namespace std;
//finds prime numbers using Sieve of Eratosthenes algorithm
vector<int> calc_primes(const int max);
int main()
{
const int max = 100;
vector<int> primes = calc_primes(max);
for(int i = 0; i < primes.size(); i++)
{
if(primes[i] != 0)
cout<<primes[i]<<endl;
}
return 0;
}
vector<int> calc_primes(const int max)
{
vector<int> primes;
for(int i = 2; i < max; i++)
{
primes.push_back(i);
}
for(int i = 0; i < primes.size(); i++)
{
if(!(primes[i] % 2) && primes[i] != 2)
primes[i] = 0;
else if(!(primes[i] % 3) && primes[i] != 3)
primes[i]= 0;
else if(!(primes[i] % 5) && primes[i] != 5)
primes[i]= 0;
else if(!(primes[i] % 7) && primes[i] != 7)
primes[i]= 0;
}
return primes;
}
Not the best or fastest, but I am still early in the book and don't know much about C++.
Now the problem, until max is not bigger than 500 all the values print on the console, if max > 500 not everything gets printed.
Am I doing something wrong?
P.S.: Also any constructive criticism would be greatly appreciated.

I have no idea why you're not getting all the output, as it looks like you should get everything. What output are you missing?
The sieve is implemented wrongly. Something like
vector<int> sieve;
vector<int> primes;
for (int i = 1; i < max + 1; ++i)
sieve.push_back(i); // you'll learn more efficient ways to handle this later
sieve[0]=0;
for (int i = 2; i < max + 1; ++i) { // there are lots of brace styles, this is mine
if (sieve[i-1] != 0) {
primes.push_back(sieve[i-1]);
for (int j = 2 * sieve[i-1]; j < max + 1; j += sieve[i-1]) {
sieve[j-1] = 0;
}
}
}
would implement the sieve. (Code above written off the top of my head; not guaranteed to work or even compile. I don't think it's got anything not covered by the end of chapter 4.)
Return primes as usual, and print out the entire contents.

Think of the sieve as a set.
Go through the set in order. For each value in thesive remove all numbers that are divisable by it.
#include <set>
#include <algorithm>
#include <iterator>
#include <iostream>
typedef std::set<int> Sieve;
int main()
{
static int const max = 100;
Sieve sieve;
for(int loop=2;loop < max;++loop)
{
sieve.insert(loop);
}
// A set is ordered.
// So going from beginning to end will give all the values in order.
for(Sieve::iterator loop = sieve.begin();loop != sieve.end();++loop)
{
// prime is the next item in the set
// It has not been deleted so it must be prime.
int prime = *loop;
// deleter will iterate over all the items from
// here to the end of the sieve and remove any
// that are divisable be this prime.
Sieve::iterator deleter = loop;
++deleter;
while(deleter != sieve.end())
{
if (((*deleter) % prime) == 0)
{
// If it is exactly divasable then it is not a prime
// So delete it from the sieve. Note the use of post
// increment here. This increments deleter but returns
// the old value to be used in the erase method.
sieve.erase(deleter++);
}
else
{
// Otherwise just increment the deleter.
++deleter;
}
}
}
// This copies all the values left in the sieve to the output.
// i.e. It prints all the primes.
std::copy(sieve.begin(),sieve.end(),std::ostream_iterator<int>(std::cout,"\n"));
}

From Algorithms and Data Structures:
void runEratosthenesSieve(int upperBound) {
int upperBoundSquareRoot = (int)sqrt((double)upperBound);
bool *isComposite = new bool[upperBound + 1];
memset(isComposite, 0, sizeof(bool) * (upperBound + 1));
for (int m = 2; m <= upperBoundSquareRoot; m++) {
if (!isComposite[m]) {
cout << m << " ";
for (int k = m * m; k <= upperBound; k += m)
isComposite[k] = true;
}
}
for (int m = upperBoundSquareRoot; m <= upperBound; m++)
if (!isComposite[m])
cout << m << " ";
delete [] isComposite;
}

Interestingly, nobody seems to have answered your question about the output problem. I don't see anything in the code that should effect the output depending on the value of max.
For what it's worth, on my Mac, I get all the output. It's wrong of course, since the algorithm isn't correct, but I do get all the output. You don't mention what platform you're running on, which might be useful if you continue to have output problems.
Here's a version of your code, minimally modified to follow the actual Sieve algorithm.
#include <vector>
#include <iostream>
using namespace std;
//finds prime numbers using Sieve of Eratosthenes algorithm
vector<int> calc_primes(const int max);
int main()
{
const int max = 100;
vector<int> primes = calc_primes(max);
for(int i = 0; i < primes.size(); i++)
{
if(primes[i] != 0)
cout<<primes[i]<<endl;
}
return 0;
}
vector<int> calc_primes(const int max)
{
vector<int> primes;
// fill vector with candidates
for(int i = 2; i < max; i++)
{
primes.push_back(i);
}
// for each value in the vector...
for(int i = 0; i < primes.size(); i++)
{
//get the value
int v = primes[i];
if (v!=0) {
//remove all multiples of the value
int x = i+v;
while(x < primes.size()) {
primes[x]=0;
x = x+v;
}
}
}
return primes;
}

In the code fragment below, the numbers are filtered before they are inserted into the vector. The divisors come from the vector.
I'm also passing the vector by reference. This means that the huge vector won't be copied from the function to the caller. (Large chunks of memory take long times to copy)
vector<unsigned int> primes;
void calc_primes(vector<unsigned int>& primes, const unsigned int MAX)
{
// If MAX is less than 2, return an empty vector
// because 2 is the first prime and can't be placed in the vector.
if (MAX < 2)
{
return;
}
// 2 is the initial and unusual prime, so enter it without calculations.
primes.push_back(2);
for (unsigned int number = 3; number < MAX; number += 2)
{
bool is_prime = true;
for (unsigned int index = 0; index < primes.size(); ++index)
{
if ((number % primes[k]) == 0)
{
is_prime = false;
break;
}
}
if (is_prime)
{
primes.push_back(number);
}
}
}
This not the most efficient algorithm, but it follows the Sieve algorithm.

below is my version which basically uses a bit vector of bool and then goes through the odd numbers and a fast add to find multiples to set to false. In the end a vector is constructed and returned to the client of the prime values.
std::vector<int> getSieveOfEratosthenes ( int max )
{
std::vector<bool> primes(max, true);
int sz = primes.size();
for ( int i = 3; i < sz ; i+=2 )
if ( primes[i] )
for ( int j = i * i; j < sz; j+=i)
primes[j] = false;
std::vector<int> ret;
ret.reserve(primes.size());
ret.push_back(2);
for ( int i = 3; i < sz; i+=2 )
if ( primes[i] )
ret.push_back(i);
return ret;
}

Here is a concise, well explained implementation using bool type:
#include <iostream>
#include <cmath>
void find_primes(bool[], unsigned int);
void print_primes(bool [], unsigned int);
//=========================================================================
int main()
{
const unsigned int max = 100;
bool sieve[max];
find_primes(sieve, max);
print_primes(sieve, max);
}
//=========================================================================
/*
Function: find_primes()
Use: find_primes(bool_array, size_of_array);
It marks all the prime numbers till the
number: size_of_array, in the form of the
indexes of the array with value: true.
It implemenets the Sieve of Eratosthenes,
consisted of:
a loop through the first "sqrt(size_of_array)"
numbers starting from the first prime (2).
a loop through all the indexes < size_of_array,
marking the ones satisfying the relation i^2 + n * i
as false, i.e. composite numbers, where i - known prime
number starting from 2.
*/
void find_primes(bool sieve[], unsigned int size)
{
// by definition 0 and 1 are not prime numbers
sieve[0] = false;
sieve[1] = false;
// all numbers <= max are potential candidates for primes
for (unsigned int i = 2; i <= size; ++i)
{
sieve[i] = true;
}
// loop through the first prime numbers < sqrt(max) (suggested by the algorithm)
unsigned int first_prime = 2;
for (unsigned int i = first_prime; i <= std::sqrt(double(size)); ++i)
{
// find multiples of primes till < max
if (sieve[i] = true)
{
// mark as composite: i^2 + n * i
for (unsigned int j = i * i; j <= size; j += i)
{
sieve[j] = false;
}
}
}
}
/*
Function: print_primes()
Use: print_primes(bool_array, size_of_array);
It prints all the prime numbers,
i.e. the indexes with value: true.
*/
void print_primes(bool sieve[], unsigned int size)
{
// all the indexes of the array marked as true are primes
for (unsigned int i = 0; i <= size; ++i)
{
if (sieve[i] == true)
{
std::cout << i <<" ";
}
}
}
covering the array case. A std::vector implementation will include minor changes such as reducing the functions to one parameter, through which the vector is passed by reference and the loops will use the vector size() member function instead of the reduced parameter.

Here is a more efficient version for Sieve of Eratosthenes algorithm that I implemented.
#include <iostream>
#include <cmath>
#include <set>
using namespace std;
void sieve(int n){
set<int> primes;
primes.insert(2);
for(int i=3; i<=n ; i+=2){
primes.insert(i);
}
int p=*primes.begin();
cout<<p<<"\n";
primes.erase(p);
int maxRoot = sqrt(*(primes.rbegin()));
while(primes.size()>0){
if(p>maxRoot){
while(primes.size()>0){
p=*primes.begin();
cout<<p<<"\n";
primes.erase(p);
}
break;
}
int i=p*p;
int temp = (*(primes.rbegin()));
while(i<=temp){
primes.erase(i);
i+=p;
i+=p;
}
p=*primes.begin();
cout<<p<<"\n";
primes.erase(p);
}
}
int main(){
int n;
n = 1000000;
sieve(n);
return 0;
}

Here's my implementation not sure if 100% correct though :
http://pastebin.com/M2R2J72d
#include<iostream>
#include <stdlib.h>
using namespace std;
void listPrimes(int x);
int main() {
listPrimes(5000);
}
void listPrimes(int x) {
bool *not_prime = new bool[x];
unsigned j = 0, i = 0;
for (i = 0; i <= x; i++) {
if (i < 2) {
not_prime[i] = true;
} else if (i % 2 == 0 && i != 2) {
not_prime[i] = true;
}
}
while (j <= x) {
for (i = j; i <= x; i++) {
if (!not_prime[i]) {
j = i;
break;
}
}
for (i = (j * 2); i <= x; i += j) {
not_prime[i] = true;
}
j++;
}
for ( i = 0; i <= x; i++) {
if (!not_prime[i])
cout << i << ' ';
}
return;
}

I am following the same book now. I have come up with the following implementation of the algorithm.
#include<iostream>
#include<string>
#include<vector>
#include<algorithm>
#include<cmath>
using namespace std;
inline void keep_window_open() { char ch; cin>>ch; }
int main ()
{
int max_no = 100;
vector <int> numbers (max_no - 1);
iota(numbers.begin(), numbers.end(), 2);
for (unsigned int ind = 0; ind < numbers.size(); ++ind)
{
for (unsigned int index = ind+1; index < numbers.size(); ++index)
{
if (numbers[index] % numbers[ind] == 0)
{
numbers.erase(numbers.begin() + index);
}
}
}
cout << "The primes are\n";
for (int primes: numbers)
{
cout << primes << '\n';
}
}

Here is my version:
#include "std_lib_facilities.h"
//helper function:check an int prime, x assumed positive.
bool check_prime(int x) {
bool check_result = true;
for (int i = 2; i < x; ++i){
if (x%i == 0){
check_result = false;
break;
}
}
return check_result;
}
//helper function:return the largest prime smaller than n(>=2).
int near_prime(int n) {
for (int i = n; i > 0; --i) {
if (check_prime(i)) { return i; break; }
}
}
vector<int> sieve_primes(int max_limit) {
vector<int> num;
vector<int> primes;
int stop = near_prime(max_limit);
for (int i = 2; i < max_limit+1; ++i) { num.push_back(i); }
int step = 2;
primes.push_back(2);
//stop when finding the last prime
while (step!=stop){
for (int i = step; i < max_limit+1; i+=step) {num[i-2] = 0; }
//the multiples set to 0, the first none zero element is a prime also step
for (int j = step; j < max_limit+1; ++j) {
if (num[j-2] != 0) { step = num[j-2]; break; }
}
primes.push_back(step);
}
return primes;
}
int main() {
int max_limit = 1000000;
vector<int> primes = sieve_primes(max_limit);
for (int i = 0; i < primes.size(); ++i) {
cout << primes[i] << ',';
}
}

Here is a classic method for doing this,
int main()
{
int max = 500;
vector<int> array(max); // vector of max numbers, initialized to default value 0
for (int i = 2; i < array.size(); ++ i) // loop for rang of numbers from 2 to max
{
// initialize j as a composite number; increment in consecutive composite numbers
for (int j = i * i; j < array.size(); j +=i)
array[j] = 1; // assign j to array[index] with value 1
}
for (int i = 2; i < array.size(); ++ i) // loop for rang of numbers from 2 to max
if (array[i] == 0) // array[index] with value 0 is a prime number
cout << i << '\n'; // get array[index] with value 0
return 0;
}

I think im late to this party but im reading the same book as you, this is the solution in came up with! Feel free to make suggestions (you or any!), for what im seeing here a couple of us extracted the operation to know if a number is multiple of another to a function.
#include "../../std_lib_facilities.h"
bool numIsMultipleOf(int n, int m) {
return n%m == 0;
}
int main() {
vector<int> rawCollection = {};
vector<int> numsToCheck = {2,3,5,7};
// Prepare raw collection
for (int i=2;i<=100;++i) {
rawCollection.push_back(i);
}
// Check multiples
for (int m: numsToCheck) {
vector<int> _temp = {};
for (int n: rawCollection) {
if (!numIsMultipleOf(n,m)||n==m) _temp.push_back(n);
}
rawCollection = _temp;
}
for (int p: rawCollection) {
cout<<"N("<<p<<")"<<" is prime.\n";
}
return 0;
}

Try this code it will be useful to you by using java question bank
import java.io.*;
class Sieve
{
public static void main(String[] args) throws IOException
{
int n = 0, primeCounter = 0;
double sqrt = 0;
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
System.out.println(“Enter the n value : ”);
n = Integer.parseInt(br.readLine());
sqrt = Math.sqrt(n);
boolean[] prime = new boolean[n];
System.out.println(“\n\nThe primes upto ” + n + ” are : ”);
for (int i = 2; i<n; i++)
{
prime[i] = true;
}
for (int i = 2; i <= sqrt; i++)
{
for (int j = i * 2; j<n; j += i)
{
prime[j] = false;
}
}
for (int i = 0; i<prime.length; i++)
{
if (prime[i])
{
primeCounter++;
System.out.print(i + ” “);
}
}
prime = new boolean[0];
}
}

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Damerau–Levenshtein distance (Edit Distance with Transposition) c implementation - c++

Related

Outputting differences in arrays

Solving 5SUM in O(n^3) with strict memory limits

Trying to implement HeapSort

Iterating over subsets of any size

Sieve of Eratosthenes algorithm

Categories

Resources