I'm trying to do division on a uint128_t that is made up of 2 uint64_ts. Weirdly enough, the function works for uint64_ts with only the lower value set and the upper value = 0. I don't understand why.
Here's the code for the division and bit shift
class uint128_t{
private:
uint64_t UPPER, LOWER;
public:
// lots of stuff
uint128_t operator<<(int shift){
uint128_t out;
if (shift >= 128)
out = uint128_t(0, 0);
else if ((128 > shift) && (shift >= 64))
out = uint128_t(LOWER << (64 - shift), 0);
else if (shift < 64)
out = uint128_t((UPPER << shift) + (LOWER >> (64 - shift)), LOWER << shift);
return out;
}
uint128_t operator<<=(int shift){
*this = *this << shift;
return *this;
}
uint128_t operator/(uint128_t rhs){
// copy of numerator = copyn
uint128_t copyn(*this), quotient = 0;// constructor: uint128_t(T), uint128_t(S, T), uint128_t(uint128_t), etc
while (copyn >= rhs){
// copy of denomiator = copyd
// temp is the current quotient bit being worked with
uint128_t copyd(rhs), temp(1);
// shift the divosr to the highest bit
while (copyn > (copyd << 1)){
copyd <<= 1;
temp <<= 1;
}
copyn -= copyd;
quotient += temp;
}
return quotient;
}
// more stuff
};
Please ignore my blatant disregard for memory management.
out = uint128_t(LOWER << (64 - shift), 0); is wrong - it should be shift - 64 instead.
As a style note, ALL_CAPITALS are usually reserved for constants only. Variables and members should use mostly lowercase.
try this:
// some bit operations stuff
const unsigned char de_brujin_bit_map_64 [] =
{
0,1,2,7,3,13,8,19,4,25,14,28,9,34,20,40,5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
63,6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
};
inline uint8_t trailing_zero_count(uint64_t x) { return x?de_brujin_bit_map_64[(lower_bit(x)*0x0218A392CD3D5DBFL) >> 58]:64; }
inline uint8_t leading_zero_count(uint64_t x) { return x?(63-de_brujin_bit_map_64[(upper_bit(x)*0x0218A392CD3D5DBFL) >> 58]):64; }
inline uint64_t lower_bit(uint64_t x) { return x & -(int64_t&)x; }
inline uint64_t upper_bit(uint64_t x)
{
if(!x) return 0;
x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; x |= x >> 32;
return (x >> 1) + 1;
}
inline uint128_t upper_bit(const uint128_t x)
{
if(x.upper()) return uint128_t(upper_bit(x.upper()), 0);
else return uint128_t(0, upper_bit(x.lower()));
}
inline uint128_t lower_bit(const uint128_t x)
{
if(x.lower()) return uint128_t(0, lower_bit(x.lower()));
else return uint128_t(lower_bit(x.upper()), 0);
}
inline uint8_t trailing_zero_count(const uint128_t& x) { return x.lower()?trailing_zero_count(x.lower()):(64+trailing_zero_count(x.upper())); }
inline uint8_t leading_zero_count(const uint128_t& x) { return x.upper()?leading_zero_count(x.upper()):(64+leading_zero_count(x.lower())); }
// division operator
uint128_t uint128_t::operator/(const uint128_t& rhs) const
{
if(rhs == 0) return uint128_t(0); // !!!! zero division
if(rhs == rhs) return uint128_t(1);
if(rhs > *this) return uint128_t(0);
if(rhs == 1) return *this;
if(!upper_ && !rhs.upper_) return uint128_t(0, lower_/rhs.lower_);
if(lower_bit(rhs) == rhs) return *this >> trailing_zero_count(rhs);
uint128_t result;
uint128_t bit_mask = upper_bit();
uint128_t denom = 1;
do
{
bit_mask >>= 1;
denom <<= 1;
if(*this & bit_mask) denom |= 1;
result <<= 1;
if(denom >= rhs) { denom -= rhs; result |= 1; }
}
while (bit_mask.lower_ != 1);
return result;
}
anyway, this version is a little bit faster :)
ensure, 4000 iterations against 127:
uint128_t divident = uint128_t(0xffffffffffffffffULL, 0xffffffffffffffffULL);
uint128_t divisor = 10;
{
uint32_t iter_count = 0;
uint128_t copyn(divident), quotient = 0;
while (copyn >= divisor)
{
++iter_count;
uint128_t copyd(divisor), temp(1);
while ((copyn >> 1) > copyd) { ++iter_count; copyd <<= 1; temp <<= 1; }
copyn -= copyd;
quotient += temp;
}
std::cout << "iterations: " << std::dec << iter_count << std::endl;
}
{
uint32_t iter_count = 0;
uint128_t bit_pos = dtl::bits::upper_bit(divident);
uint128_t tmp = 1, quotient = 0;
do
{
++iter_count;
bit_pos >>= 1;
tmp <<= 1;
if(divident & bit_pos) tmp |= 1;
quotient <<= 1;
if(tmp >= divisor) { tmp -= divisor; quotient |= 1; }
}
while (bit_pos != 1);
std::cout << "iterations: " << std::dec << iter_count << std::endl;
}
Related
I am given a nxn grid with filled with 1 or 0. I want to count the number of subgrids where the corner tiles are all 1s. My solution goes through all pairs of rows and counts the number of matching 1s then it uses the formula numOf1s * (numOf1s-1)/2 and adds to the result. However, when I submit my solution on https://cses.fi/problemset/task/2137, there is no output on inputs with n = 3000 (probably caused by some error). What could the error be?
int main()
{
int n; cin>> n;
vector<bitset<3000>> grid(n);
for(int i=0;i<n;i++){
cin >> grid[i];
}
long result = 0;
for(int i=0;i<n-1;i++){
for(int j=i+1;j<n;j++){
int count = (grid[i]&grid[j]).count();
result += (count*(count-1))/2;
}
}
cout << result;
}
This solution will cause a time limit exceeded. bitset::count() is O(n) in worst case. The total complexity of your code is O(n^3). In the worst-case the number of operations would be 3000^3 > 10^10 which is too large.
I'm not sure this solution is the best you can come up with, but it is based on the original solution, with a homebrew alternative for the bitset. This allows me to work with 64 bits blocks, and using a fast popcnt(). An hardware version would be even better, as it would be to work with AVX registers, but this should be more portable and it works on cses.fi. Basically instead of generating a long intersection bitset and later count the number of ones, the function count_common() makes a piece of the intersection and immediately uses it just to count the ones.
The stream extractor could be probably improved, saving some more time.
#include <iostream>
#include <array>
#include <cstdint>
#include <climits>
uint64_t popcnt(uint64_t v) {
v = v - ((v >> 1) & (uint64_t)~(uint64_t)0 / 3);
v = (v & (uint64_t)~(uint64_t)0 / 15 * 3) + ((v >> 2) & (uint64_t)~(uint64_t)0 / 15 * 3);
v = (v + (v >> 4)) & (uint64_t)~(uint64_t)0 / 255 * 15;
uint64_t c = (uint64_t)(v * ((uint64_t)~(uint64_t)0 / 255)) >> (sizeof(uint64_t) - 1) * CHAR_BIT;
return c;
}
struct line {
uint64_t cells_[47] = { 0 }; // 3000/64 = 47
uint64_t& operator[](int pos) { return cells_[pos]; }
const uint64_t& operator[](int pos) const { return cells_[pos]; }
};
uint64_t count_common(const line& a, const line& b) {
uint64_t u = 0;
for (int i = 0; i < 47; ++i) {
u += popcnt(a[i] & b[i]);
}
return u;
}
std::istream& operator>>(std::istream& is, line& ln) {
is >> std::ws;
int pos = 0;
uint64_t val = 0;
while (true) {
char ch = is.get();
if (is && ch == '\n') {
break;
}
if (ch == '1') {
val |= 1LL << (63 - pos % 64);
}
if ((pos + 1) % 64 == 0) {
ln[pos / 64] = val;
val = 0;
}
++pos;
}
if (pos % 64 != 0) {
ln[pos / 64] = val;
}
return is;
}
struct grid {
int n_;
std::array<line, 3000> data_;
line& operator[](int r) {
return data_[r];
}
};
std::istream& operator>>(std::istream& is, grid& g) {
is >> g.n_;
for (int r = 0; r < g.n_; ++r) {
is >> g[r];
}
return is;
}
int main()
{
grid g;
std::cin >> g;
uint64_t count = 0;
for (int r1 = 0; r1 < g.n_; ++r1) {
for (int r2 = r1 + 1; r2 < g.n_; ++r2) {
uint64_t n = count_common(g[r1], g[r2]);
count += n * (n - 1) / 2;
}
}
std::cout << count << '\n';
return 0;
}
I wrote this implementation of sqrt that is finite in complexity and precise up to the last digit when double is ieee754 double. The question is that is this portable on devices of various endian (assuming 0LL is still 64 bit)? get_fraction returns the 52bits plus the 1 bit at the begining. Small doubles are treated separately and ensured that they also have 1 in the 53rd bit. The c++ part numeric_limits nan can easily be replaced with a constant.
Code:
static inline constexpr int16_t get_exponent(double x)
{
uint64_t bits = *(uint64_t*)&x;
int16_t val = ((bits & 0x7FF0000000000000ULL) >> 52) - 1023;
if(val != -1023)
return val;
uint64_t temp_fractal= (bits & 0x000FFFFFFFFFFFFFULL);
for (int i = 51; i >= 0;--i) {
if(!(temp_fractal & (0x01ULL<<i))) --val;
else break;
}
return val;
}
static inline constexpr uint64_t get_fraction(double x)
{
uint64_t bits = *(uint64_t*)&x;
if (bits & 0x7FF0000000000000ULL)
return (bits & 0x000FFFFFFFFFFFFFULL) | 0x0010000000000000ULL;
uint64_t temp_fraction = bits & 0x000FFFFFFFFFFFFFULL;
for (int i = 51; i >= 0; --i) {
temp_fraction<<=1;
if(0x0010000000000000ULL & temp_fraction) break;
}
return temp_fraction;
}
static inline constexpr bool is_reserved(double x)
{
return get_exponent(x) == 1024;
}
static inline constexpr double my_abs(double x)
{
uint64_t bits = *(uint64_t*)&x;
bits &= 0x7FFFFFFFFFFFFFFFULL;
return *(double*)&bits;
}
constexpr double make_double(bool sign, int16_t exponent, uint64_t fractal)
{
uint64_t data = (fractal & 0x000FFFFFFFFFFFFFULL);
assert((fractal & 0xFFF0000000000000ULL) == 0x0010000000000000ULL);
if (exponent < -1023) {
fractal >>= (-1022 - exponent);
data = fractal;
exponent = -1023;
}
else if (exponent > 1023) {
return (1-2*sign)*std::numeric_limits<double>::infinity();
}
{
data |= ((uint64_t)((uint16_t)(exponent + 1023))) << 52;
if (sign)
data |= 0x8000000000000000ULL;
return *(double*)&data;
}
}
constexpr double my_sqrt(double x)
{
if(!x || is_reserved(x))
return x;
if(x < 0)
return -std::numeric_limits<double>::quiet_NaN();
uint64_t fraction = get_fraction(x);
int16_t exponent = get_exponent(x);
//C standard says it rounds to zero
int16_t half_exponent = ((exponent-1024)/2)+512;
uint64_t test_fraction = 0x0010000000000000ULL;
double test = make_double(0, half_exponent, test_fraction);
if (test * test > x) half_exponent -= 1;
//just to be safe
test = make_double(0, half_exponent, test_fraction);
if (test * test > x) half_exponent -= 1;
//find each bit except last one, binary search for result
for (int i = 51; i > 0; --i) {
test = make_double(0, half_exponent, test_fraction | (0x01ULL<<i));
if(test*test<x) test_fraction |= (0x01ULL << i);
}
double del1 = my_abs(x - test*test);
double temp = make_double(0, half_exponent, test_fraction | 0x01ULL);
double del2 = my_abs(x - temp * temp);
//see if the whole fraction needs to round up by one
if (x > temp * temp) {
test_fraction += 2;
//rounding up by one made the fraction too large
if (test_fraction >= 0x0020000000000000ULL) {
test_fraction >>= 1;
half_exponent -= 1;
}
double temp2 = make_double(0, half_exponent, test_fraction);
double del3 = my_abs(x - temp2 * temp2);
if(del3 <del2) return temp2;
else return temp;
}
else if(del2<del1) return temp;
else return make_double(0, half_exponent, test_fraction);
}
Edit: add some comments
Edit2: add missing functions
I have a unsigned 16 bit number,a ,and I need to get the value for ~a. Simply taking the value of ~a does not work( for a=10, I need ~a=5, not ~a=7FF5).
The best I could come up with is:
int negate(int a)
{
int mask1 = 0x4000;
int mask2 = 0x7FFF;
for (int i=0;i<15;i++)
{
if (!(a&mask1))
{
mask1>>=1;
mask2>>=1;
}
else
break;
}
int t = (0x7FFF - ~a) & mask2;
return t;
}
The problem with this is that it's too slow; do you know of a faster way to get the result I need?
Thanks for your help
Just to verify: what you want is to invert all the bits up to and including the most significant bit of the input that's set, but leave all the bits higher than that as zero's?
If that is indeed the case, then here is the code for that:
// inspired by Hacker's Delight
unsigned significant_bits(unsigned x)
{
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
// x |= (x >> 16); // only needed for 32-bit integers
return x;
}
unsigned negate(unsigned x)
{
return x ^ significant_bits(x);
}
It sounds like you want ~a & 0xF.
int negate(unsigned int a) {
if(a==0);
else if(a==1) a = 0;
else if(a<4) a ^= 0x0003;
else if(a<8) a ^= 0x0007;
else if(a<16) a ^= 0x000f;
else if(a<32) a ^= 0x001f;
else if(a<64) a ^= 0x003f;
else if(a<128) a ^= 0x007f;
else if(a<256) a ^= 0x00ff;
else if(a<512) a ^= 0x01ff;
else if(a<1024) a ^= 0x03ff;
else if(a<2048) a ^= 0x07ff;
else if(a<4096) a^= 0x0fff;
else if(a<8192) a ^= 0x1fff;
else if(a<16384) a ^= 0x3fff;
else if(a<32768) a ^= 0x7fff;
else a^=0xffff;
return a;
}
int main()
{
printf("%d",negate(10));
return 0;
}
If you want to increase the size say to a 32 bit number you could just add else if branches. The first case
a==0; the result is a itself
a==1; the result is 0, simple assigning 0 is fine .
Or simple you could put it in a loop and take a variable say i just left shift by 1 each time. and then return a ^ = (i - 1) ;
Pass the mask with the call to negate():
int negate(int a, int mask)
{
return ~a & mask;
}
Execution examples:
negate(0x000A, 0x000F) == 0x0005
negate(0x000A, 0x00F0) == 0x00F0
Update:
int negate(int num)
{
unsigned int bitMask = 0xFFFFFFFF;
for(unsigned int bit = 0x80000000; bit != 0; bit >>= 1)
{
if(bit & num)
break;
bitMask /= 2 ;
}
return ~num & bitMask;
}
I wonder how to reverse something like this. So having a mask where auto mask = 1ULL << 20; how to get 20 out from mask?
Loop-free
Many years ago when I was writing a bit-wise arithmetic for a chess engine, I found a fast implementation which is useful for your requirement, it's loop-free. This method will return the position of the first 1-bit from right-to-left (Least Significant Bit):
inline unsigned int lsb(unsigned long long value)
{
if (!value)
return -1;
value &= -value;
unsigned int lsb = (unsigned) value | (unsigned) (value >> 32);
return (((((((((((unsigned) (value >> 32) != 0) << 1)
+ ((lsb & 0xffff0000) != 0)) << 1)
+ ((lsb & 0xff00ff00) != 0)) << 1)
+ ((lsb & 0xf0f0f0f0) != 0)) << 1)
+ ((lsb & 0xcccccccc) != 0)) << 1)
+ ((lsb & 0xaaaaaaaa) != 0);
}
int main()
{
unsigned long long x = 1ULL<<20;
cout << lsb(x) << endl;
}
Output
20
I think, I had found it here.
Using log:
#include <iostream>
#include <cmath>
int main() {
auto mask = 1ULL << 20;
std::cout << log2(mask) << std::endl;
// edit out: std::cout << log(mask) / log(2) << std::endl;
return 0;
}
or loop and shift:
#include <iostream>
int main() {
auto mask = 1ULL << 20;
for (unsigned int c = 0; c < sizeof(mask) * 8 && mask; c++) {
mask >>= 1;
if (mask == 0)
std::cout << c << std::endl;
}
return 0;
}
If it's a 64-bit mask, you can compute it modulo 67 and do a table lookup.
To wit:
static int table[67] = {
-1, 0, 1,39, 2,15,40,23, 3,12,
16,59,41,19,24,54, 4,-1,13,10,
17,62,60,28,42,30,20,51,25,44,
55,47, 5,32,-1,38,14,22,11,58,
18,53,63, 9,61,27,29,50,43,46,
31,37,21,57,52, 8,26,49,45,36,
56, 7,48,35, 6,34,33};
int unmask(unsigned long long ull) {
return table[ull % 67];
}
//first if you want to make sure only 1 bit is "on" you can do that:
if ((mask & mask-1) != 0)
{
//you have more than 1 bit "on", deal with it...
}
//finding which bit is "on" can be achieve in a loop
int count 0;
while (mask > 1)
{
mask>>=1;
count++;
}
//At this point count will have the required value (20 in your example)
Option 1: iterate
while (mask && !(mask & 1)) { mask>>=1; count++; }
Option 2: iterate multiple bits at a time:
unsigned long long a=0xFFFFFFFFULL; int b=32;
while (mask>1) {
if (!(mask & a)) { count+=b; mask>>=b; }
b>>=1; mask>>=b;
}
Option 3: Convert the mask to double or float and extract the exponent.
union {
struct {
int mantissa:23;
int exp:7;
int sign:1;
} s;
float f;
} u = { (float) mask };
return u.s.exp + 1;
A simple loop will be quite okay:
for (int bit = 0; bit < sizeof(mask) * 8; bit++)
{
if ((1ULL << bit) & mask)
std::cout << "Bit " << bit << " is set in the mask\n";
}
How about a TMP solution:
#include <iostream>
template < unsigned long long MASK >
struct MaskIndex
{
enum { i = MaskIndex < MASK / 2 >::i + 1 };
};
template <>
struct MaskIndex < 1 >
{
enum { i = 0 };
};
int main()
{
const unsigned long long mask = 1ULL << 20;
std::cout << MaskIndex < mask >::i << std::endl;
return ( 0 );
}
You can try this..
if((1ULL<<20)&mask) {
cout << "20th bit is set";
}
So I've been working recently on an implementation of the Miller-Rabin primality test. I am limiting it to a scope of all 32-bit numbers, because this is a just-for-fun project that I am doing to familiarize myself with c++, and I don't want to have to work with anything 64-bits for awhile. An added bonus is that the algorithm is deterministic for all 32-bit numbers, so I can significantly increase efficiency because I know exactly what witnesses to test for.
So for low numbers, the algorithm works exceptionally well. However, part of the process relies upon modular exponentiation, that is (num ^ pow) % mod. so, for example,
3 ^ 2 % 5 =
9 % 5 =
4
here is the code I have been using for this modular exponentiation:
unsigned mod_pow(unsigned num, unsigned pow, unsigned mod)
{
unsigned test;
for(test = 1; pow; pow >>= 1)
{
if (pow & 1)
test = (test * num) % mod;
num = (num * num) % mod;
}
return test;
}
As you might have already guessed, problems arise when the arguments are all exceptionally large numbers. For example, if I want to test the number 673109 for primality, I will at one point have to find:
(2 ^ 168277) % 673109
now 2 ^ 168277 is an exceptionally large number, and somewhere in the process it overflows test, which results in an incorrect evaluation.
on the reverse side, arguments such as
4000111222 ^ 3 % 1608
also evaluate incorrectly, for much the same reason.
Does anyone have suggestions for modular exponentiation in a way that can prevent this overflow and/or manipulate it to produce the correct result? (the way I see it, overflow is just another form of modulo, that is num % (UINT_MAX+1))
Exponentiation by squaring still "works" for modulo exponentiation. Your problem isn't that 2 ^ 168277 is an exceptionally large number, it's that one of your intermediate results is a fairly large number (bigger than 2^32), because 673109 is bigger than 2^16.
So I think the following will do. It's possible I've missed a detail, but the basic idea works, and this is how "real" crypto code might do large mod-exponentiation (although not with 32 and 64 bit numbers, rather with bignums that never have to get bigger than 2 * log (modulus)):
Start with exponentiation by squaring, as you have.
Perform the actual squaring in a 64-bit unsigned integer.
Reduce modulo 673109 at each step to get back within the 32-bit range, as you do.
Obviously that's a bit awkward if your C++ implementation doesn't have a 64 bit integer, although you can always fake one.
There's an example on slide 22 here: http://www.cs.princeton.edu/courses/archive/spr05/cos126/lectures/22.pdf, although it uses very small numbers (less than 2^16), so it may not illustrate anything you don't already know.
Your other example, 4000111222 ^ 3 % 1608 would work in your current code if you just reduce 4000111222 modulo 1608 before you start. 1608 is small enough that you can safely multiply any two mod-1608 numbers in a 32 bit int.
I wrote something for this recently for RSA in C++, bit messy though.
#include "BigInteger.h"
#include <iostream>
#include <sstream>
#include <stack>
BigInteger::BigInteger() {
digits.push_back(0);
negative = false;
}
BigInteger::~BigInteger() {
}
void BigInteger::addWithoutSign(BigInteger& c, const BigInteger& a, const BigInteger& b) {
int sum_n_carry = 0;
int n = (int)a.digits.size();
if (n < (int)b.digits.size()) {
n = b.digits.size();
}
c.digits.resize(n);
for (int i = 0; i < n; ++i) {
unsigned short a_digit = 0;
unsigned short b_digit = 0;
if (i < (int)a.digits.size()) {
a_digit = a.digits[i];
}
if (i < (int)b.digits.size()) {
b_digit = b.digits[i];
}
sum_n_carry += a_digit + b_digit;
c.digits[i] = (sum_n_carry & 0xFFFF);
sum_n_carry >>= 16;
}
if (sum_n_carry != 0) {
putCarryInfront(c, sum_n_carry);
}
while (c.digits.size() > 1 && c.digits.back() == 0) {
c.digits.pop_back();
}
//std::cout << a.toString() << " + " << b.toString() << " == " << c.toString() << std::endl;
}
void BigInteger::subWithoutSign(BigInteger& c, const BigInteger& a, const BigInteger& b) {
int sub_n_borrow = 0;
int n = a.digits.size();
if (n < (int)b.digits.size())
n = (int)b.digits.size();
c.digits.resize(n);
for (int i = 0; i < n; ++i) {
unsigned short a_digit = 0;
unsigned short b_digit = 0;
if (i < (int)a.digits.size())
a_digit = a.digits[i];
if (i < (int)b.digits.size())
b_digit = b.digits[i];
sub_n_borrow += a_digit - b_digit;
if (sub_n_borrow >= 0) {
c.digits[i] = sub_n_borrow;
sub_n_borrow = 0;
} else {
c.digits[i] = 0x10000 + sub_n_borrow;
sub_n_borrow = -1;
}
}
while (c.digits.size() > 1 && c.digits.back() == 0) {
c.digits.pop_back();
}
//std::cout << a.toString() << " - " << b.toString() << " == " << c.toString() << std::endl;
}
int BigInteger::cmpWithoutSign(const BigInteger& a, const BigInteger& b) {
int n = (int)a.digits.size();
if (n < (int)b.digits.size())
n = (int)b.digits.size();
//std::cout << "cmp(" << a.toString() << ", " << b.toString() << ") == ";
for (int i = n-1; i >= 0; --i) {
unsigned short a_digit = 0;
unsigned short b_digit = 0;
if (i < (int)a.digits.size())
a_digit = a.digits[i];
if (i < (int)b.digits.size())
b_digit = b.digits[i];
if (a_digit < b_digit) {
//std::cout << "-1" << std::endl;
return -1;
} else if (a_digit > b_digit) {
//std::cout << "+1" << std::endl;
return +1;
}
}
//std::cout << "0" << std::endl;
return 0;
}
void BigInteger::multByDigitWithoutSign(BigInteger& c, const BigInteger& a, unsigned short b) {
unsigned int mult_n_carry = 0;
c.digits.clear();
c.digits.resize(a.digits.size());
for (int i = 0; i < (int)a.digits.size(); ++i) {
unsigned short a_digit = 0;
unsigned short b_digit = b;
if (i < (int)a.digits.size())
a_digit = a.digits[i];
mult_n_carry += a_digit * b_digit;
c.digits[i] = (mult_n_carry & 0xFFFF);
mult_n_carry >>= 16;
}
if (mult_n_carry != 0) {
putCarryInfront(c, mult_n_carry);
}
//std::cout << a.toString() << " x " << b << " == " << c.toString() << std::endl;
}
void BigInteger::shiftLeftByBase(BigInteger& b, const BigInteger& a, int times) {
b.digits.resize(a.digits.size() + times);
for (int i = 0; i < times; ++i) {
b.digits[i] = 0;
}
for (int i = 0; i < (int)a.digits.size(); ++i) {
b.digits[i + times] = a.digits[i];
}
}
void BigInteger::shiftRight(BigInteger& a) {
//std::cout << "shr " << a.toString() << " == ";
for (int i = 0; i < (int)a.digits.size(); ++i) {
a.digits[i] >>= 1;
if (i+1 < (int)a.digits.size()) {
if ((a.digits[i+1] & 0x1) != 0) {
a.digits[i] |= 0x8000;
}
}
}
//std::cout << a.toString() << std::endl;
}
void BigInteger::shiftLeft(BigInteger& a) {
bool lastBit = false;
for (int i = 0; i < (int)a.digits.size(); ++i) {
bool bit = (a.digits[i] & 0x8000) != 0;
a.digits[i] <<= 1;
if (lastBit)
a.digits[i] |= 1;
lastBit = bit;
}
if (lastBit) {
a.digits.push_back(1);
}
}
void BigInteger::putCarryInfront(BigInteger& a, unsigned short carry) {
BigInteger b;
b.negative = a.negative;
b.digits.resize(a.digits.size() + 1);
b.digits[a.digits.size()] = carry;
for (int i = 0; i < (int)a.digits.size(); ++i) {
b.digits[i] = a.digits[i];
}
a.digits.swap(b.digits);
}
void BigInteger::divideWithoutSign(BigInteger& c, BigInteger& d, const BigInteger& a, const BigInteger& b) {
c.digits.clear();
c.digits.push_back(0);
BigInteger two("2");
BigInteger e = b;
BigInteger f("1");
BigInteger g = a;
BigInteger one("1");
while (cmpWithoutSign(g, e) >= 0) {
shiftLeft(e);
shiftLeft(f);
}
shiftRight(e);
shiftRight(f);
while (cmpWithoutSign(g, b) >= 0) {
g -= e;
c += f;
while (cmpWithoutSign(g, e) < 0) {
shiftRight(e);
shiftRight(f);
}
}
e = c;
e *= b;
f = a;
f -= e;
d = f;
}
BigInteger::BigInteger(const BigInteger& other) {
digits = other.digits;
negative = other.negative;
}
BigInteger::BigInteger(const char* other) {
digits.push_back(0);
negative = false;
BigInteger ten;
ten.digits[0] = 10;
const char* c = other;
bool make_negative = false;
if (*c == '-') {
make_negative = true;
++c;
}
while (*c != 0) {
BigInteger digit;
digit.digits[0] = *c - '0';
*this *= ten;
*this += digit;
++c;
}
negative = make_negative;
}
bool BigInteger::isOdd() const {
return (digits[0] & 0x1) != 0;
}
BigInteger& BigInteger::operator=(const BigInteger& other) {
if (this == &other) // handle self assignment
return *this;
digits = other.digits;
negative = other.negative;
return *this;
}
BigInteger& BigInteger::operator+=(const BigInteger& other) {
BigInteger result;
if (negative) {
if (other.negative) {
result.negative = true;
addWithoutSign(result, *this, other);
} else {
int a = cmpWithoutSign(*this, other);
if (a < 0) {
result.negative = false;
subWithoutSign(result, other, *this);
} else if (a > 0) {
result.negative = true;
subWithoutSign(result, *this, other);
} else {
result.negative = false;
result.digits.clear();
result.digits.push_back(0);
}
}
} else {
if (other.negative) {
int a = cmpWithoutSign(*this, other);
if (a < 0) {
result.negative = true;
subWithoutSign(result, other, *this);
} else if (a > 0) {
result.negative = false;
subWithoutSign(result, *this, other);
} else {
result.negative = false;
result.digits.clear();
result.digits.push_back(0);
}
} else {
result.negative = false;
addWithoutSign(result, *this, other);
}
}
negative = result.negative;
digits.swap(result.digits);
return *this;
}
BigInteger& BigInteger::operator-=(const BigInteger& other) {
BigInteger neg_other = other;
neg_other.negative = !neg_other.negative;
return *this += neg_other;
}
BigInteger& BigInteger::operator*=(const BigInteger& other) {
BigInteger result;
for (int i = 0; i < (int)digits.size(); ++i) {
BigInteger mult;
multByDigitWithoutSign(mult, other, digits[i]);
BigInteger shift;
shiftLeftByBase(shift, mult, i);
BigInteger add;
addWithoutSign(add, result, shift);
result = add;
}
if (negative != other.negative) {
result.negative = true;
} else {
result.negative = false;
}
//std::cout << toString() << " x " << other.toString() << " == " << result.toString() << std::endl;
negative = result.negative;
digits.swap(result.digits);
return *this;
}
BigInteger& BigInteger::operator/=(const BigInteger& other) {
BigInteger result, tmp;
divideWithoutSign(result, tmp, *this, other);
result.negative = (negative != other.negative);
negative = result.negative;
digits.swap(result.digits);
return *this;
}
BigInteger& BigInteger::operator%=(const BigInteger& other) {
BigInteger c, d;
divideWithoutSign(c, d, *this, other);
*this = d;
return *this;
}
bool BigInteger::operator>(const BigInteger& other) const {
if (negative) {
if (other.negative) {
return cmpWithoutSign(*this, other) < 0;
} else {
return false;
}
} else {
if (other.negative) {
return true;
} else {
return cmpWithoutSign(*this, other) > 0;
}
}
}
BigInteger& BigInteger::powAssignUnderMod(const BigInteger& exponent, const BigInteger& modulus) {
BigInteger zero("0");
BigInteger one("1");
BigInteger e = exponent;
BigInteger base = *this;
*this = one;
while (cmpWithoutSign(e, zero) != 0) {
//std::cout << e.toString() << " : " << toString() << " : " << base.toString() << std::endl;
if (e.isOdd()) {
*this *= base;
*this %= modulus;
}
shiftRight(e);
base *= BigInteger(base);
base %= modulus;
}
return *this;
}
std::string BigInteger::toString() const {
std::ostringstream os;
if (negative)
os << "-";
BigInteger tmp = *this;
BigInteger zero("0");
BigInteger ten("10");
tmp.negative = false;
std::stack<char> s;
while (cmpWithoutSign(tmp, zero) != 0) {
BigInteger tmp2, tmp3;
divideWithoutSign(tmp2, tmp3, tmp, ten);
s.push((char)(tmp3.digits[0] + '0'));
tmp = tmp2;
}
while (!s.empty()) {
os << s.top();
s.pop();
}
/*
for (int i = digits.size()-1; i >= 0; --i) {
os << digits[i];
if (i != 0) {
os << ",";
}
}
*/
return os.str();
And an example usage.
BigInteger a("87682374682734687"), b("435983748957348957349857345"), c("2348927349872344")
// Will Calculate pow(87682374682734687, 435983748957348957349857345) % 2348927349872344
a.powAssignUnderMod(b, c);
Its fast too, and has unlimited number of digits.
Two things:
Are you using the appropriate data type? In other words, does UINT_MAX allow you to have 673109 as an argument?
No, it does not, since at one point you have Your code does not work because at one point you have num = 2^16 and the num = ... causes overflow. Use a bigger data type to hold this intermediate value.
How about taking modulo at every possible overflow oppertunity such as:
test = ((test % mod) * (num % mod)) % mod;
Edit:
unsigned mod_pow(unsigned num, unsigned pow, unsigned mod)
{
unsigned long long test;
unsigned long long n = num;
for(test = 1; pow; pow >>= 1)
{
if (pow & 1)
test = ((test % mod) * (n % mod)) % mod;
n = ((n % mod) * (n % mod)) % mod;
}
return test; /* note this is potentially lossy */
}
int main(int argc, char* argv[])
{
/* (2 ^ 168277) % 673109 */
printf("%u\n", mod_pow(2, 168277, 673109));
return 0;
}
package playTime;
public class play {
public static long count = 0;
public static long binSlots = 10;
public static long y = 645;
public static long finalValue = 1;
public static long x = 11;
public static void main(String[] args){
int[] binArray = new int[]{0,0,1,0,0,0,0,1,0,1};
x = BME(x, count, binArray);
System.out.print("\nfinal value:"+finalValue);
}
public static long BME(long x, long count, int[] binArray){
if(count == binSlots){
return finalValue;
}
if(binArray[(int) count] == 1){
finalValue = finalValue*x%y;
}
x = (x*x)%y;
System.out.print("Array("+binArray[(int) count]+") "
+"x("+x+")" +" finalVal("+ finalValue + ")\n");
count++;
return BME(x, count,binArray);
}
}
LL is for long long int
LL power_mod(LL a, LL k) {
if (k == 0)
return 1;
LL temp = power(a, k/2);
LL res;
res = ( ( temp % P ) * (temp % P) ) % P;
if (k % 2 == 1)
res = ((a % P) * (res % P)) % P;
return res;
}
Use the above recursive function for finding the mod exp of the number. This will not result in overflow because it calculates in a bottom up manner.
Sample test run for :
a = 2 and k = 168277 shows output to be 518358 which is correct and the function runs in O(log(k)) time;
You could use following identity:
(a * b) (mod m) === (a (mod m)) * (b (mod m)) (mod m)
Try using it straightforward way and incrementally improve.
if (pow & 1)
test = ((test % mod) * (num % mod)) % mod;
num = ((num % mod) * (num % mod)) % mod;