Square Root in C/C++ - c++

I am trying to implement my own square root function which gives square root's integral part only e.g. square root of 3 = 1.
I saw the method here and tried to implement the method
int mySqrt(int x)
{
int n = x;
x = pow(2, ceil(log(n) / log(2)) / 2);
int y=0;
while (y < x)
{
y = (x + n / x) / 2;
x = y;
}
return x;
}
The above method fails for input 8. Also, I don't get why it should work.
Also, I tried the method here
int mySqrt(int x)
{
if (x == 0) return 0;
int x0 = pow(2, (log(x) / log(2))/2) ;
int y = x0;
int diff = 10;
while (diff>0)
{
x0 = (x0 + x / x0) / 2; diff = y - x0;
y = x0;
if (diff<0) diff = diff * (-1);
}
return x0;
}
In this second way, for input 3 the loop continues ... indefinitely (x0 toggles between 1 and 2).
I am aware that both are essentially versions of Netwon's method but I can't figure out why they fail in certain cases and how could I make them work for all cases. I guess i have the correct logic in implementation. I debugged my code but still I can't find a way to make it work.

This one works for me:
uintmax_t zsqrt(uintmax_t x)
{
if(x==0) return 0;
uintmax_t yn = x; // The 'next' estimate
uintmax_t y = 0; // The result
uintmax_t yp; // The previous estimate
do{
yp = y;
y = yn;
yn = (y + x/y) >> 1; // Newton step
}while(yn ^ yp); // (yn != yp) shortcut for dumb compilers
return y;
}
returns floor(sqrt(x))
Instead of testing for 0 with a single estimate, test with 2 estimates.
When I was writing this, I noticed the result estimate would sometimes oscillate. This is because, if the exact result is a fraction, the algorithm could only jump between the two nearest values. So, terminating when the next estimate is the same as the previous will prevent an infinite loop.

Try this
int n,i;//n is the input number
i=0;
while(i<=n)
{
if((i*i)==n)
{
cout<<"The number has exact root : "<<i<<endl;
}
else if((i*i)>n)
{
cout<<"The integer part is "<<(i-1)<<endl;
}
i++;
}
Hope this helps.

You can try there C sqrt implementations :
// return the number that was multiplied by itself to reach N.
unsigned square_root_1(const unsigned num) {
unsigned a, b, c, d;
for (b = a = num, c = 1; a >>= 1; ++c);
for (c = 1 << (c & -2); c; c >>= 2) {
d = a + c;
a >>= 1;
if (b >= d)
b -= d, a += c;
}
return a;
}
// return the number that was multiplied by itself to reach N.
unsigned square_root_2(unsigned n){
unsigned a = n > 0, b;
if (n > 3)
for (a = n >> 1, b = (a + n / a) >> 1; b < a; a = b, b = (a + n / a) >> 1);
return a ;
}
Example of usage :
#include <assert.h>
int main(void){
unsigned num, res ;
num = 1847902954, res = square_root_1(num), assert(res == 42987);
num = 2, res = square_root_2(num), assert(res == 1);
num = 0, res = square_root_2(num), assert(res == 0);
}
Source

Related

terminated by signal SIGSEGV (Address boundary error) in recursive function

I'm trying to implement Karatsuba algorithm for multiplication. I'm kinda follow the pseudocode in this wiki page. But I'm always getting this error:
terminated by signal SIGSEGV (Address boundary error)
When I replaced the lines that cause the recursion to happen with something else:
z0 = multiply(a, c);
z1 = multiply(b, d);
z2 = multiply(a+b, c+d);
the error disappeared.
Here's my code:
#include <iostream>
#include <math.h>
long int multiply(int x, int y);
int get_length(int val);
int main()
{
int x = 0, y = 0;
long int result = 0;
std::cout << "Enter x: ";
std::cin >> x;
std::cout << "Enter y: ";
std::cin >> y;
result = multiply(x, y);
std::cout << "Result: " << result << std::endl;
return 0;
}
long int multiply(int x, int y)
{
if(x < 10 || y < 10) {
return x * y;
}
int x_len = get_length(x);
int y_len = get_length(y);
long int z0 = 0 , z1 = 0, z2 = 0;
int a = 0, b = 0, c = 0, d = 0;
a = x / pow(10, x_len);
b = x - (a * pow(10, x_len));
c = y / pow(10, y_len);
d = y - (c * pow(10, y_len));
z0 = multiply(a, c);
z1 = multiply(b, d);
z2 = multiply(a+b, c+d);
return (pow(10, x_len) * z0) + (pow(10, x_len/2) * (z2 - z1 - z0)) + z1;
}
int get_length(int val)
{
int count = 0;
while(val > 0) {
count++;
val /= 10;
}
return count;
}
I found the problem cause.
It was because of these lines:
a = x / pow(10, x_len);
b = x - (a * pow(10, x_len));
c = y / pow(10, y_len);
d = y - (c * pow(10, y_len));
It should be x_len / 2 instead of x_len and the same with y_len. Since it causes the recursion to be infinite.
You are using the pow function to do integer powers. It is not an integer function. Code your own pow function that's suitable for your application. For example:
int pow(int v, int q)
{
int ret = 1;
while (q > 1)
{
ret*=v;
q--;
}
return ret;
}
Make sure to put an int pow(int, int); at the top.

How to permute each digit of a number one step to the right?

How to create all possible numbers, starting from a given one, where all digits of the new ones are moved one slot to the right? For example if we have 1234. I want to generate 4123, 3412 and 2341.
What I have come out with so far is this:
int move_digits(int a)
{
int aux = 0;
aux = a % 10;
for(int i=pow(10, (number_digits(a) - 1)); i>0; i=i/10)
aux = aux * 10 + ((a % i) / (i/10));
return aux;
}
But it doesn't work.
The subprogram number_digits looks like this (it just counts how many digits the given number has):
int number_digits(int a)
{
int ct = 0;
while(a != 0)
{
a = a/10;
ct++;
}
return ct;
}
I think there is no need to write separate function number_digits.
I would write function move_digits simpler
#include <iostream>
#include <cmath>
int move_digits( int x )
{
int y = x;
double n = 0.0;
while ( y /= 10 ) ++n;
return ( x / 10 + x % 10 * std::pow( 10.0, n ) );
}
int main()
{
int x = 1234;
std::cout << x << std::endl;
std::cout << move_digits( x ) << std::endl;
}
Retrieving the last digit of n: n % 10.
To "cut off" the last digit, you could use number / 10.
Say you have a three-digit number n, then you can prepend a new digit d using 1000 * d + n
That said, you probably want to compute
aux = pow(10, number_digits - 1) * (aux % 10) + (aux / 10)
Calculatea/(number_digits(a) - 1) and a%(number_digits(a) - 1)
And your answer is (a%(number_digits(a) - 1))*10 + a/(number_digits(a) - 1)
int i =0 ;
int len = number_digits(a);
while(i < len){
cout << (a%(len - 1))*10 + a/(len - 1) <<endl;
a = (a%(len - 1))*10 + a/(len - 1);
}
void move_digits(int a)
{
int digits = 0;
int b = a;
while(b / 10 ){
digits++;
b = b / 10;
}
for (int i = 0; i < digits; ++i)
{
int c = a / 10;
int d = a % 10;
int res = c + pow(10, digits) * d;
printf("%d\n", res);
a = res;
}
printf("\n");
}
int main()
{
move_digits(12345);
}

Convert this recursive function to iterative

How can I convert this recursive function to an iterative function?
#include <cmath>
int M(int H, int T){
if (H == 0) return T;
if (H + 1 >= T) return pow(2, T) - 1;
return M(H - 1, T - 1) + M(H, T - 1) + 1;
}
Well it's a 3-line code but it's very hard for me to convert this to an iterative function. Because it has 2 variables. And I don't know anything about Stacks so I couldn't convert that.
My purpose for doing this is speed of the function. This function is too slow. I wanted to use map to make this faster but I have 3 variables M, H and T so I couldn't use map
you could use dynamic programming - start from the bottom up when H == 0 and T == 0 calculate M and iterate them. here is a link explaining how to do this for Fibonacci numbers, which are quite similar to your problem.
Check this,recursive and not recursive versions gave equal results for all inputs i gave so far. The idea is to keep intermediate results in matrix, where H is row index, T is col index, and the value is M(H,T). By the way, you can calculate it once and later just obtain the result from the matrix, so you will have performance O(1)
int array[10][10]={{0}};
int MNR(int H, int T)
{
if(array[H][T])
return array[H][T];
for(int i =0; i<= H;++i)
{
for(int j = 0; j<= T;++j)
{
if(i == 0)
array[i][j] = j;
else if( i+1 > j)
array[i][j] = pow(2,j) -1;
else
array[i][j] = array[i-1][j-1] + array[i][j-1] + 1;
}
}
return array[H][T];
}
int M(int H, int T)
{
if (H == 0) return T;
if (H + 1 >= T) return pow(2, T) - 1;
return M(H - 1, T - 1) + M(H, T - 1) + 1;
}
int main()
{
printf("%d\n", M(6,3));
printf("%d\n", MNR(6,3));
}
Unless you know the formula for n-th (in your case, (m,n)-th) element of the sequence, the easiest way is to simulate the recursion using a stack.
The code should look like the following:
#include <cmath>
#include <stack>
struct Data
{
public:
Data(int newH, int newT)
: T(newT), H(newH)
{
}
int H;
int T;
};
int M(int H, int T)
{
std::stack<Data> st;
st.push(Data(H, T));
int sum = 0;
while (st.size() > 0)
{
Data top = st.top();
st.pop();
if (top.H == 0)
sum += top.T;
else if (top.H + 1 >= top.T)
sum += pow(2, top.T) - 1;
else
{
st.push(Data(top.H - 1, top.T - 1));
st.push(Data(top.H, top.T - 1));
sum += 1;
}
}
return sum;
}
The main reason why this function is slow is because it has exponential complexity, and it keeps recalculating the same members again and again. One possible cure is memoize pattern (handily explained with examples in C++ here). The idea is to store every result in a structure with a quick access (e.g. an array) and every time you need it again, retrieve already precomputed result. Of course, this approach is limited by the size of your memory, so it won't work for extremely big numbers...
In your case, we could do something like that (keeping the recursion but memoizing the results):
#include <cmath>
#include <map>
#include <utility>
std::map<std::pair<int,int>,int> MM;
int M(int H, int T){
std::pair<int,int> key = std::make_pair(H,T);
std::map<std::pair<int,int>,int>::iterator found = MM.find(key);
if (found!=MM.end()) return found->second; // skip the calculations if we can
int result = 0;
if (H == 0) result = T;
else if (H + 1 >= T) result = pow(2, T) - 1;
else result = M(H - 1, T - 1) + M(H, T - 1) + 1;
MM[key] = result;
return result;
}
Regarding time complexity, C++ maps are tree maps, so searching there is of the order of N*log(N) where N is the size of the map (number of results which have been already computed). There are also hash maps for C++ which are part of the STL but not part of the standard library, as was already mentioned on SO. Hash map promises constant search time (the value of the constant is not specified though :) ), so you might also give them a try.
You may calculate using one demintional array. Little theory,
Let F(a,b) == M(H,T)
1. F(0,b) = b
2. F(a,b) = 2^b - 1, when a+1 >= b
3. F(a,b) = F(a-1,b-1) + F(a,b-1) + 1
Let G(x,y) = F(y,x) ,then
1. G(x,0) = x // RULE (1)
2. G(x,y) = 2^x - 1, when y+1 >= x // RULE (2)
3. G(x,y) = G(x-1,y-1) + G(x-1,y) + 1 // RULE(3) --> this is useful,
// because for G(x,y) need only G(x-1,?), i.e if G - is two deminsions array, then
// for calculating G[x][?] need only previous row G[x-1][?],
// so we need only last two rows of array.
// Here some values of G(x,y)
4. G(0,y) = 2^0 - 1 = 0 from (2) rule.
5. G(1,0) = 1 from (1) rule.
6. G(1,y) = 2^1 - 1 = 1, when y > 0, from (2) rule.
G(0,0) = 0, G(0,1) = 0, G(0,2) = 0, G(0,3) = 0 ...
G(1,0) = 1, G(1,1) = 1, G(1,2) = 1, G(1,3) = 1 ...
7. G(2,0) = 2 from (1) rule
8. G(2,1) = 2^2 - 1 = 3 from (2) rule
9. G(2,y) = 2^2 - 1 = 3 when y > 0, from (2) rule.
G(2,0) = 2, G(2,1) = 3, G(2,2) = 3, G(2,3) = 3, ....
10. G(3,0) = 3 from (1) rule
11. G(3,1) = G(2,0) + G(2,1) + 1 = 2 + 3 + 1 = 6 from (3) rule
12. G(3,2) = 2^3 - 1 = 7, from (2) rule
Now, how to calculate this G(x,y)
int M(int H, int T ) { return G(T,H); }
int G(int x, int y)
{
const int MAX_Y = 100; // or something else
int arr[2][MAX_Y] = {0} ;
int icurr = 0, inext = 1;
for(int xi = 0; xi < x; ++xi)
{
for( int yi = 0; yi <= y ;++yi)
{
if ( yi == 0 )
arr[inext][yi] = xi; // rule (1);
else if ( yi + 1 >= xi )
arr[inext][yi] = (1 << xi) - 1; // rule ( 2 )
else arr[inext][yi] =
arr[icurr][yi-1] + arr[icurr][yi] + 1; // rule (3)
}
icurr ^= 1; inext ^= 1; //swap(i1,i2);
}
return arr[icurr][y];
}
// Or some optimizing
int G(int x, int y)
{
const int MAX_Y = 100;
int arr[2][MAX_Y] = {0};
int icurr = 0, inext = 1;
for(int ix = 0; ix < x; ++ix)
{
arr[inext][0] = ix; // rule (1)
for(int iy = 1; iy < ix - 1; ++ iy)
arr[inext][iy] = arr[icurr][iy-1] + arr[icurr][iy] + 1; // rule (3)
for(int iy = max(0,ix-1); iy <= y; ++iy)
arr[inext][iy] = (1 << ix ) - 1; // rule(2)
icurr ^= 1 ; inext ^= 1;
}
return arr[icurr][y];
}

Miller-Rabin Primality test FIPS 186-3 implementation

Im trying to implement the Miller-Rabin primality test according to the description in FIPS 186-3 C.3.1. No matter what I do, I cannot get it to work. The instructions are pretty specific, and I dont think I missed anything, and yet Im getting true for non-prime values.
What did I do wrong?
template <typename R, typename S, typename T>
T POW(R base, S exponent, const T mod){
T result = 1;
while (exponent){
if (exponent & 1)
result = (result * base) % mod;
exponent >>= 1;
base = (base * base) % mod;
}
return result;
}
// used uint64_t to prevent overflow, but only testing with small numbers for now
bool MillerRabin_FIPS186(uint64_t w, unsigned int iterations = 50){
srand(time(0));
unsigned int a = 0;
uint64_t W = w - 1; // dont want to keep calculating w - 1
uint64_t m = W;
while (!(m & 1)){
m >>= 1;
a++;
}
// skipped getting wlen
// when i had this function using my custom arbitrary precision integer class,
// and could get len(w), getting it and using it in an actual RBG
// made no difference
for(unsigned int i = 0; i < iterations; i++){
uint64_t b = (rand() % (W - 3)) + 2; // 2 <= b <= w - 2
uint64_t z = POW(b, m, w);
if ((z == 1) || (z == W))
continue;
else
for(unsigned int j = 1; j < a; j++){
z = POW(z, 2, w);
if (z == W)
continue;
if (z == 1)
return 0;// Composite
}
}
return 1;// Probably Prime
}
this:
std::cout << MillerRabin_FIPS186(33) << std::endl;
std::cout << MillerRabin_FIPS186(35) << std::endl;
std::cout << MillerRabin_FIPS186(37) << std::endl;
std::cout << MillerRabin_FIPS186(39) << std::endl;
std::cout << MillerRabin_FIPS186(45) << std::endl;
std::cout << MillerRabin_FIPS186(49) << std::endl;
is giving me:
0
1
1
1
0
1
The only difference between your implementation and Wikipedia's is that you forgot the second return composite statement. You should have a return 0 at the end of the loop.
Edit: As pointed out by Daniel, there is a second difference. The continue is continuing the inner loop, rather than the outer loop like it's supposed to.
for(unsigned int i = 0; i < iterations; i++){
uint64_t b = (rand() % (W - 3)) + 2; // 2 <= b <= w - 2
uint64_t z = POW(b, m, w);
if ((z == 1) || (z == W))
continue;
else{
int continueOuter = 0;
for(unsigned int j = 1; j < a; j++){
z = POW(z, 2, w);
if (z == W)
continueOuter = 1;
break;
if (z == 1)
return 0;// Composite
}
if (continueOuter) {continue;}
}
return 0; //This is the line you're missing.
}
return 1;// Probably Prime
Also, if the input is even, it will always return probably prime since a is 0. You should add an extra check at the start for that.
In the inner loop,
for(unsigned int j = 1; j < a; j++){
z = POW(z, 2, w);
if (z == W)
continue;
if (z == 1)
return 0;// Composite
}
you should break; instead of continue; when z == W. By continueing, in the next iteration of that loop, if there is one, z will become 1 and the candidate is possibly wrongly declared composite. Here, that happens for 17, 41, 73, 89 and 97 among the primes less than 100.

CUDA not returning result

I am trying to make a fraction calculator that calculates on a cuda devise, below is first the sequential version and then my try for a parallel version.
It runs without error, but for some reason do it not give the result back, I have been trying to get this to work for 2 weeks now, but can’t find the error!
Serilized version
int f(int x, int c, int n);
int gcd(unsigned int u, unsigned int v);
int main ()
{
clock_t start = clock();
srand ( time(NULL) );
int x = 1;
int y = 2;
int d = 1;
int c = rand() % 100;
int n = 323;
if(n % y == 0)
d = y;
while(d == 1)
{
x = f(x, c, n);
y = f(f(y, c, n), c, n);
int abs = x - y;
if(abs < 0)
abs = abs * -1;
d = gcd(abs, n);
if(d == n)
{
printf("\nd == n");
c = 0;
while(c == 0 || c == -2)
c = rand() % 100;
x = 2;
y = 2;
}
}
int d2 = n/d;
printf("\nTime elapsed: %f", ((double)clock() - start) / CLOCKS_PER_SEC);
printf("\nResult: %d", d);
printf("\nResult2: %d", d2);
int dummyReadForPause;
scanf_s("%d",&dummyReadForPause);
}
int f(int x, int c, int n)
{
return (int)(pow((float)x, 2) + c) % n;
}
int gcd(unsigned int u, unsigned int v){
int shift;
/ * GCD(0,x) := x * /
if (u == 0 || v == 0)
return u | v;
/ * Let shift := lg K, where K is the greatest power of 2
dividing both u and v. * /
for (shift = 0; ((u | v) & 1) == 0; ++shift) {
u >>= 1;
v >>= 1;
}
while ((u & 1) == 0)
u >>= 1;
/ * From here on, u is always odd. * /
do {
while ((v & 1) == 0) / * Loop X * /
v >>= 1;
/ * Now u and v are both odd, so diff(u, v) is even.
Let u = min(u, v), v = diff(u, v)/2. * /
if (u < v) {
v -= u;
} else {
int diff = u - v;
u = v;
v = diff;
}
v >>= 1;
} while (v != 0);
return u << shift;
}
parallel version
#define threads 512
#define MaxBlocks 65535
#define RunningTheads (512*100)
__device__ int gcd(unsigned int u, unsigned int v)
{
int shift;
if (u == 0 || v == 0)
return u | v;
for (shift = 0; ((u | v) & 1) == 0; ++shift) {
u >>= 1;
v >>= 1;
}
while ((u & 1) == 0)
u >>= 1;
do {
while ((v & 1) == 0)
v >>= 1;
if (u < v) {
v -= u;
} else {
int diff = u - v;
u = v;
v = diff;
}
v >>= 1;
} while (v != 0);
return u << shift;
}
__device__ bool cuda_found;
__global__ void cudaKernal(int *cArray, int n, int *outr)
{
int index = blockIdx.x * threads + threadIdx.x;
int x = 1;
int y = 2;
int d = 4;
int c = cArray[index];
while(d == 1 && !cuda_found)
{
x = (int)(pow((float)x, 2) + c) % n;
y = (int)(pow((float)y, 2) + c) % n;
y = (int)(pow((float)y, 2) + c) % n;
int abs = x - y;
if(abs < 0)
abs = abs * -1;
d = gcd(abs, n);
}
if(d != 1 && !cuda_found)
{
cuda_found = true;
outr = &d;
}
}
int main ()
{
int n = 323;
int cArray[RunningTheads];
cArray[0] = 1;
for(int i = 1; i < RunningTheads-1; i++)
{
cArray[i] = i+2;
}
int dresult = 0;
int *dev_cArray;
int *dev_result;
HANDLE_ERROR(cudaMalloc((void**)&dev_cArray, RunningTheads*sizeof(int)));
HANDLE_ERROR(cudaMalloc((void**)&dev_result, sizeof(int)));
HANDLE_ERROR(cudaMemcpy(dev_cArray, cArray, RunningTheads*sizeof(int), cudaMemcpyHostToDevice));
int TotalBlocks = ceil((float)RunningTheads/(float)threads);
if(TotalBlocks > MaxBlocks)
TotalBlocks = MaxBlocks;
printf("Blocks: %d\n", TotalBlocks);
printf("Threads: %d\n\n", threads);
cudaKernal<<<TotalBlocks,threads>>>(dev_cArray, n, dev_result);
HANDLE_ERROR(cudaMemcpy(&dresult, dev_result, sizeof(int), cudaMemcpyDeviceToHost));
HANDLE_ERROR(cudaFree(dev_cArray));
HANDLE_ERROR(cudaFree(dev_result));
if(dresult == 0)
dresult = 1;
int d2 = n/dresult;
printf("\nResult: %d", dresult);
printf("\nResult2: %d", d2);
int dummyReadForPause;
scanf_s("%d",&dummyReadForPause);
}
Lets have a look at your kernel code:
__global__ void cudaKernal(int *cArray, int n, int *outr)
{
int index = blockIdx.x * threads + threadIdx.x;
int x = 1;
int y = 2;
int d = 4;
int c = cArray[index];
while(d == 1 && !cuda_found) // always false because d is always 4
{
x = (int)(pow((float)x, 2) + c) % n;
y = (int)(pow((float)y, 2) + c) % n;
y = (int)(pow((float)y, 2) + c) % n;
int abs = x - y;
if(abs < 0)
abs = abs * -1;
d = gcd(abs, n); // never writes to d because the loop won't
// be executed
}
if(d != 1 && !cuda_found) // maybe true if cuda_found was initalized
// with false
{
cuda_found = true; // Memory race here.
outr = &d; // you are changing the adresse where outr
// points to; the host code does not see this
// change. your cudaMemcpy dev -> host will copy
// the exact values back from device that have
// been uploaded by cudaMemcpy host -> dev
// if you want to set outr to 4 than write:
// *outr = d;
}
}
One of the problems is you don't return the result. In your code you just change outr which has local scope in your kernel function (i.e. changes are not seen outside this function). You should write *outr = d; to change the value of memory you're pointing with outr.
and I'm not sure if CUDA initializes global variables with zero. I mean are you sure cuda_found is always initialized with false?