Fast n choose k mod p for large n? - c++

What I mean by "large n" is something in the millions. p is prime.
I've tried
http://apps.topcoder.com/wiki/display/tc/SRM+467
But the function seems to be incorrect (I tested it with 144 choose 6 mod 5 and it gives me 0 when it should give me 2)
I've tried
http://online-judge.uva.es/board/viewtopic.php?f=22&t=42690
But I don't understand it fully
I've also made a memoized recursive function that uses the logic (combinations(n-1, k-1, p)%p + combinations(n-1, k, p)%p) but it gives me stack overflow problems because n is large
I've tried Lucas Theorem but it appears to be either slow or inaccurate.
All I'm trying to do is create a fast/accurate n choose k mod p for large n. If anyone could help show me a good implementation for this I'd be very grateful. Thanks.
As requested, the memoized version that hits stack overflows for large n:
std::map<std::pair<long long, long long>, long long> memo;
long long combinations(long long n, long long k, long long p){
if (n < k) return 0;
if (0 == n) return 0;
if (0 == k) return 1;
if (n == k) return 1;
if (1 == k) return n;
map<std::pair<long long, long long>, long long>::iterator it;
if((it = memo.find(std::make_pair(n, k))) != memo.end()) {
return it->second;
}
else
{
long long value = (combinations(n-1, k-1,p)%p + combinations(n-1, k,p)%p)%p;
memo.insert(std::make_pair(std::make_pair(n, k), value));
return value;
}
}

So, here is how you can solve your problem.
Of course you know the formula:
comb(n,k) = n!/(k!*(n-k)!) = (n*(n-1)*...(n-k+1))/k!
(See http://en.wikipedia.org/wiki/Binomial_coefficient#Computing_the_value_of_binomial_coefficients)
You know how to compute the numerator:
long long res = 1;
for (long long i = n; i > n- k; --i) {
res = (res * i) % p;
}
Now, as p is prime the reciprocal of each integer that is coprime with p is well defined i.e. a-1 can be found. And this can be done using Fermat's theorem ap-1=1(mod p) => a*ap-2=1(mod p) and so a-1=ap-2.
Now all you need to do is to implement fast exponentiation(for example using the binary method):
long long degree(long long a, long long k, long long p) {
long long res = 1;
long long cur = a;
while (k) {
if (k % 2) {
res = (res * cur) % p;
}
k /= 2;
cur = (cur * cur) % p;
}
return res;
}
And now you can add the denominator to our result:
long long res = 1;
for (long long i = 1; i <= k; ++i) {
res = (res * degree(i, p- 2)) % p;
}
Please note I am using long long everywhere to avoid type overflow. Of course you don't need to do k exponentiations - you can compute k!(mod p) and then divide only once:
long long denom = 1;
for (long long i = 1; i <= k; ++i) {
denom = (denom * i) % p;
}
res = (res * degree(denom, p- 2)) % p;
EDIT: as per #dbaupp's comment if k >= p the k! will be equal to 0 modulo p and (k!)^-1 will not be defined. To avoid that first compute the degree with which p is in n*(n-1)...(n-k+1) and in k! and compare them:
int get_degree(long long n, long long p) { // returns the degree with which p is in n!
int degree_num = 0;
long long u = p;
long long temp = n;
while (u <= temp) {
degree_num += temp / u;
u *= p;
}
return degree_num;
}
long long combinations(int n, int k, long long p) {
int num_degree = get_degree(n, p) - get_degree(n - k, p);
int den_degree = get_degree(k, p);
if (num_degree > den_degree) {
return 0;
}
long long res = 1;
for (long long i = n; i > n - k; --i) {
long long ti = i;
while(ti % p == 0) {
ti /= p;
}
res = (res * ti) % p;
}
for (long long i = 1; i <= k; ++i) {
long long ti = i;
while(ti % p == 0) {
ti /= p;
}
res = (res * degree(ti, p-2, p)) % p;
}
return res;
}
EDIT: There is one more optimization that can be added to the solution above - instead of computing the inverse number of each multiple in k!, we can compute k!(mod p) and then compute the inverse of that number. Thus we have to pay the logarithm for the exponentiation only once. Of course again we have to discard the p divisors of each multiple. We only have to change the last loop with this:
long long denom = 1;
for (long long i = 1; i <= k; ++i) {
long long ti = i;
while(ti % p == 0) {
ti /= p;
}
denom = (denom * ti) % p;
}
res = (res * degree(denom, p-2, p)) % p;

For large k, we can reduce the work significantly by exploiting two fundamental facts:
If p is a prime, the exponent of p in the prime factorisation of n! is given by (n - s_p(n)) / (p-1), where s_p(n) is the sum of the digits of n in the base p representation (so for p = 2, it's popcount). Thus the exponent of p in the prime factorisation of choose(n,k) is (s_p(k) + s_p(n-k) - s_p(n)) / (p-1), in particular, it is zero if and only if the addition k + (n-k) has no carry when performed in base p (the exponent is the number of carries).
Wilson's theorem: p is a prime, if and only if (p-1)! ≡ (-1) (mod p).
The exponent of p in the factorisation of n! is usually calculated by
long long factorial_exponent(long long n, long long p)
{
long long ex = 0;
do
{
n /= p;
ex += n;
}while(n > 0);
return ex;
}
The check for divisibility of choose(n,k) by p is not strictly necessary, but it's reasonable to have that first, since it will often be the case, and then it's less work:
long long choose_mod(long long n, long long k, long long p)
{
// We deal with the trivial cases first
if (k < 0 || n < k) return 0;
if (k == 0 || k == n) return 1;
// Now check whether choose(n,k) is divisible by p
if (factorial_exponent(n) > factorial_exponent(k) + factorial_exponent(n-k)) return 0;
// If it's not divisible, do the generic work
return choose_mod_one(n,k,p);
}
Now let us take a closer look at n!. We separate the numbers ≤ n into the multiples of p and the numbers coprime to p. With
n = q*p + r, 0 ≤ r < p
The multiples of p contribute p^q * q!. The numbers coprime to p contribute the product of (j*p + k), 1 ≤ k < p for 0 ≤ j < q, and the product of (q*p + k), 1 ≤ k ≤ r.
For the numbers coprime to p we will only be interested in the contribution modulo p. Each of the full runs j*p + k, 1 ≤ k < p is congruent to (p-1)! modulo p, so altogether they produce a contribution of (-1)^q modulo p. The last (possibly) incomplete run produces r! modulo p.
So if we write
n = a*p + A
k = b*p + B
n-k = c*p + C
we get
choose(n,k) = p^a * a!/ (p^b * b! * p^c * c!) * cop(a,A) / (cop(b,B) * cop(c,C))
where cop(m,r) is the product of all numbers coprime to p which are ≤ m*p + r.
There are two possibilities, a = b + c and A = B + C, or a = b + c + 1 and A = B + C - p.
In our calculation, we have eliminated the second possibility beforehand, but that is not essential.
In the first case, the explicit powers of p cancel, and we are left with
choose(n,k) = a! / (b! * c!) * cop(a,A) / (cop(b,B) * cop(c,C))
= choose(a,b) * cop(a,A) / (cop(b,B) * cop(c,C))
Any powers of p dividing choose(n,k) come from choose(a,b) - in our case, there will be none, since we've eliminated these cases before - and, although cop(a,A) / (cop(b,B) * cop(c,C)) need not be an integer (consider e.g. choose(19,9) (mod 5)), when considering the expression modulo p, cop(m,r) reduces to (-1)^m * r!, so, since a = b + c, the (-1) cancel and we are left with
choose(n,k) ≡ choose(a,b) * choose(A,B) (mod p)
In the second case, we find
choose(n,k) = choose(a,b) * p * cop(a,A)/ (cop(b,B) * cop(c,C))
since a = b + c + 1. The carry in the last digit means that A < B, so modulo p
p * cop(a,A) / (cop(b,B) * cop(c,C)) ≡ 0 = choose(A,B)
(where we can either replace the division with a multiplication by the modular inverse, or view it as a congruence of rational numbers, meaning the numerator is divisible by p). Anyway, we again find
choose(n,k) ≡ choose(a,b) * choose(A,B) (mod p)
Now we can recur for the choose(a,b) part.
Example:
choose(144,6) (mod 5)
144 = 28 * 5 + 4
6 = 1 * 5 + 1
choose(144,6) ≡ choose(28,1) * choose(4,1) (mod 5)
≡ choose(3,1) * choose(4,1) (mod 5)
≡ 3 * 4 = 12 ≡ 2 (mod 5)
choose(12349,789) ≡ choose(2469,157) * choose(4,4)
≡ choose(493,31) * choose(4,2) * choose(4,4
≡ choose(98,6) * choose(3,1) * choose(4,2) * choose(4,4)
≡ choose(19,1) * choose(3,1) * choose(3,1) * choose(4,2) * choose(4,4)
≡ 4 * 3 * 3 * 1 * 1 = 36 ≡ 1 (mod 5)
Now the implementation:
// Preconditions: 0 <= k <= n; p > 1 prime
long long choose_mod_one(long long n, long long k, long long p)
{
// For small k, no recursion is necessary
if (k < p) return choose_mod_two(n,k,p);
long long q_n, r_n, q_k, r_k, choose;
q_n = n / p;
r_n = n % p;
q_k = k / p;
r_k = k % p;
choose = choose_mod_two(r_n, r_k, p);
// If the exponent of p in choose(n,k) isn't determined to be 0
// before the calculation gets serious, short-cut here:
/* if (choose == 0) return 0; */
choose *= choose_mod_one(q_n, q_k, p);
return choose % p;
}
// Preconditions: 0 <= k <= min(n,p-1); p > 1 prime
long long choose_mod_two(long long n, long long k, long long p)
{
// reduce n modulo p
n %= p;
// Trivial checks
if (n < k) return 0;
if (k == 0 || k == n) return 1;
// Now 0 < k < n, save a bit of work if k > n/2
if (k > n/2) k = n-k;
// calculate numerator and denominator modulo p
long long num = n, den = 1;
for(n = n-1; k > 1; --n, --k)
{
num = (num * n) % p;
den = (den * k) % p;
}
// Invert denominator modulo p
den = invert_mod(den,p);
return (num * den) % p;
}
To calculate the modular inverse, you can use Fermat's (so-called little) theorem
If p is prime and a not divisible by p, then a^(p-1) ≡ 1 (mod p).
and calculate the inverse as a^(p-2) (mod p), or use a method applicable to a wider range of arguments, the extended Euclidean algorithm or continued fraction expansion, which give you the modular inverse for any pair of coprime (positive) integers:
long long invert_mod(long long k, long long m)
{
if (m == 0) return (k == 1 || k == -1) ? k : 0;
if (m < 0) m = -m;
k %= m;
if (k < 0) k += m;
int neg = 1;
long long p1 = 1, p2 = 0, k1 = k, m1 = m, q, r, temp;
while(k1 > 0) {
q = m1 / k1;
r = m1 % k1;
temp = q*p1 + p2;
p2 = p1;
p1 = temp;
m1 = k1;
k1 = r;
neg = !neg;
}
return neg ? m - p2 : p2;
}
Like calculating a^(p-2) (mod p), this is an O(log p) algorithm, for some inputs it's significantly faster (it's actually O(min(log k, log p)), so for small k and large p, it's considerably faster), for others it's slower.
Overall, this way we need to calculate at most O(log_p k) binomial coefficients modulo p, where each binomial coefficient needs at most O(p) operations, yielding a total complexity of O(p*log_p k) operations.
When k is significantly larger than p, that is much better than the O(k) solution. For k <= p, it reduces to the O(k) solution with some overhead.

If you're calculating it more than once, there's another way that's faster. I'm going to post code in python because it'll probably be the easiest to convert into another language, although I'll put the C++ code at the end.
Calculating Once
Brute force:
def choose(n, k, m):
ans = 1
for i in range(k): ans *= (n-i)
for i in range(k): ans //= i
return ans % m
But the calculation can get into very big numbers, so we can use modular airthmetic tricks instead:
(a * b) mod m = (a mod m) * (b mod m) mod m
(a / (b*c)) mod m = (a mod m) / ((b mod m) * (c mod m) mod m)
(a / b) mod m = (a mod m) * (b mod m)^-1
Note the ^-1 at the end of the last equation. This is the multiplicative inverse of b mod m. It basically means that ((b mod m) * (b mod m)^-1) mod m = 1, just like how a * a^-1 = a * 1/a = 1 with (non-zero) integers.
This can be calculated in a few ways, one of which is the extended euclidean algorithm:
def multinv(n, m):
''' Multiplicative inverse of n mod m '''
if m == 1: return 0
m0, y, x = m, 0, 1
while n > 1:
y, x = x - n//m*y, y
m, n = n%m, m
return x+m0 if x < 0 else x
Note that another method, exponentiation, works only if m is prime. If it is, you can do this:
def powmod(b, e, m):
''' b^e mod m '''
# Note: If you use python, there's a built-in pow(b, e, m) that's probably faster
# But that's not in C++, so you can convert this instead:
P = 1
while e:
if e&1: P = P * b % m
e >>= 1; b = b * b % m
return P
def multinv(n, m):
''' Multiplicative inverse of n mod m, only if m is prime '''
return powmod(n, m-2, m)
But note that the Extended Euclidean Algorithm tends to still run faster, even though they technically have the same time complexity, O(log m), because it has a lower constant factor.
So now the full code:
def multinv(n, m):
''' Multiplicative inverse of n mod m in log(m) '''
if m == 1: return 0
m0, y, x = m, 0, 1
while n > 1:
y, x = x - n//m*y, y
m, n = n%m, m
return x+m0 if x < 0 else x
def choose(n, k, m):
num = den = 1
for i in range(k): num = num * (n-i) % m
for i in range(k): den = den * i % m
return num * multinv(den, m)
Querying Multiple Times
We can calculate the numerator and denominator separately, and then combine them. But notice that the product we're calculating for the numerator is n * (n-1) * (n-2) * (n-3) ... * (n-k+1). If you've ever learned about something called prefix sums, this is awfully similar. So let's apply it.
Precalculate fact[i] = i! mod m for i up to whatever the max value of n is, maybe 1e7 (ten million). Then, the numerator is (fact[n] * fact[n-k]^-1) mod m, and the denominator is fact[k]. So we can calculate choose(n, k, m) = fact[n] * multinv(fact[n-k], m) % m * multinv(fact[k], m) % m.
Python code:
MAXN = 1000 # Increase if necessary
MOD = 10**9+7 # A common mod that's used, change if necessary
fact = [1]
for i in range(1, MAXN+1):
fact.append(fact[-1] * i % MOD)
def multinv(n, m):
''' Multiplicative inverse of n mod m in log(m) '''
if m == 1: return 0
m0, y, x = m, 0, 1
while n > 1:
y, x = x - n//m*y, y
m, n = n%m, m
return x+m0 if x < 0 else x
def choose(n, k, m):
return fact[n] * multinv(fact[n-k] * fact[k] % m, m) % m
C++ code:
#include <iostream>
using namespace std;
const int MAXN = 1000; // Increase if necessary
const int MOD = 1e9+7; // A common mod that's used, change if necessary
int fact[MAXN+1];
int multinv(int n, int m) {
/* Multiplicative inverse of n mod m in log(m) */
if (m == 1) return 0;
int m0 = m, y = 0, x = 1, t;
while (n > 1) {
t = y;
y = x - n/m*y;
x = t;
t = m;
m = n%m;
n = t;
}
return x<0 ? x+m0 : x;
}
int choose(int n, int k, int m) {
return (long long) fact[n]
* multinv((long long) fact[n-k] * fact[k] % m, m) % m;
}
int main() {
fact[0] = 1;
for (int i = 1; i <= MAXN; i++) {
fact[i] = (long long) fact[i-1] * i % MOD;
}
cout << choose(4, 2, MOD) << '\n';
cout << choose(1e6, 1e3, MOD) << '\n';
}
Note that I'm casting to long long to avoid overflow.

Related

If NxM multiplication table put in order, what is number on K position?

If I have multiplication table 3x4
1 2 3 4
2 4 6 8
3 6 9 12
and put all these numbers in the order:
1 2 2 3 3 4 4 6 6 8 9 12
What number at the K position?
For example, if K = 5, then this is number 3.
N and M in the range 1 to 500 000. K is always less then N * M.
I've tried to use binary-search like in this(If an NxM multiplication table is put in order, what is number in the middle?) solution, but there some mistake if desired value not in the middle of sequence.
long findK(long n, long m, long k)
{
long min = 1;
long max = n * m;
long ans = 0;
long prev_sum = 0;
while (min <= max) {
ans = (min + max) / 2;
long sum = 0;
for (int i = 1; i <= m; i++)
{
sum += std::min(ans / i, n);
}
if (prev_sum + 1 == sum) break;
sum--;
if (sum < k) min = ans - 1;
else if (sum > k) max = ans + 1;
else break;
prev_sum = sum;
}
long sum = 0;
for (int i = 1; i <= m; i++)
sum += std::min((ans - 1) / i, n);
if (sum == k) return ans - 1;
else return ans;
}
For example, when N = 1000, M = 1000, K = 876543; expected value is 546970, but returned 546972.
I believe that the breakthrough will lie with counting the quantity of factorizations of each integer up to the desired point. For each integer prod, you need to count how many simple factorizations i*j there are with i <= m, j <= n. See the divisor functions.
You need to iterate prod until you reach the desired point, midpt = N*M / 2. Cumulatively subtract σ0(prod) from midpt until you reach 0. Note that once prod passes min(i, j), you need to start cropping the divisor count, due to running off the edge of the multiplication table.
Is that enough to get you started?
Code of third method from this(https://leetcode.com/articles/kth-smallest-number-in-multiplication-table/#) site solve the problem.
bool enough(int x, int m, int n, int k) {
int count = 0;
for (int i = 1; i <= m; i++) {
count += std::min(x / i, n);
}
return count >= k;
}
int findK(int m, int n, int k) {
int lo = 1, hi = m * n;
while (lo < hi) {
int mi = lo + (hi - lo) / 2;
if (!enough(mi, m, n, k)) lo = mi + 1;
else hi = mi;
}
return lo;
}

"Maximum Sum mod M" ranges in an array: sum and count

Problem
Given an array A = a0,a1,...an, with size up to N ≤ 10^5, and 0 ≤ ai ≤ 10^9.
And a number 0 < M ≤ 10^9.
The task is to find the maximum ∑(k=i, j) ak % M = (ai + ai+1 + a(i+2) + ⋯ + a(j−1) + a(j)) % M, and how many different range(i,j) get that sum.
The complexity has to be less than O(N^2), the latter is too slow.
Example
N = 3, M = 5
A = {2, 4, 3}
The Maximum Sum mod M is 4 and there are 2 ranges, which are a0 to a2 and a1
My attempt
Let's define s[j] = (a0 + a1 + ... + aj) % M so if you want the best sum that ends in j you have to choose an s[i] i < j that s[i] is the smallest sum higher than you.
Because if s[i] > s[j]; s[i] = M - K; K < M - s[j] then the result sum range will be (s[j]-s[i]+M) % M = (s[j] + K) % M and because K < M - s[j] it will increase the result mod M, and as s[j] gets closer to s[j] it will increase the result mod M.
The idea is my attemp, first you have to have to calculate all the sums that starts from 0 and end in a index i, then you can search the smaller value grater than you fast by searching the value with a binary search that the map already have (lower_bound), and count how many time you could do sum with the value that you found. You have to keep the sum somewhere to count how many time you could do it.
#include <iostream>
#include <map>
#define optimizar_io ios_base::sync_with_stdio(false);cin.tie(NULL);
using namespace std;
const int LN = 1e5;
long long N, M, num[LN];
map < long long, int > sum;
int main() {
optimizar_io
cin >> N >> M;
sum[0]++;
long long cont = 0, tmax = 0, res = 1, val;
map < long long, int > :: iterator best;
for (int i = 0; i < N; i++)
{
cin >> num[i];
cont = (cont + num[i]) % M;
if (tmax == cont)
res += sum[0];
if (tmax < cont)
tmax = cont, res = sum[0];
best = sum.lower_bound(cont + 1);
if (best != sum.end())
{
val = cont - (*best).first + M;
if (tmax == val)
res += (*best).second;
if (tmax < val)
tmax = val, res = (*best).second;
}
sum[cont]++;
}
cout << tmax << " " << res;
return 0;
}

nCk modulo p when n % p or k % p == 0

I'm trying to solve a coding challenge on hacker rank which requires one to calculate binomial coefficients mod a prime, i.e.
nchoosek(n, k, p)
I'm using the code from this answer that works for the first three sets of inputs but begins failing on the 4th. I stepped through it in the debugger and determined that the issue arises when:
n % p == 0 || k % p == 0
I just need to know how to modify my current solution to handle the specific cases where n % p == 0 or k % p == 0. None of the answers I've found on stack exchange seem to address this specific case. Here's my code:
#include <iostream>
#include <fstream>
long long FactorialExponent(long long n, long long p)
{
long long ex = 0;
do
{
n /= p;
ex += n;
}while(n > 0);
return ex;
}
unsigned long long ModularMultiply(unsigned long long a, unsigned long long b, unsigned long p) {
unsigned long long a1 = (a >> 21), a2 = a & ((1ull << 21) - 1);
unsigned long long temp = (a1 * b) % p; // doesn't overflow under the assumptions
temp = (temp << 21) % p; // this neither
temp += (a2 * b) % p; // nor this
return temp % p;
}
unsigned long long ModularInverse(unsigned long long k, unsigned long m) {
if (m == 0) return (k == 1 || k == -1) ? k : 0;
if (m < 0) m = -m;
k %= m;
if (k < 0) k += m;
int neg = 1;
unsigned long long p1 = 1, p2 = 0, k1 = k, m1 = m, q, r, temp;
while(k1 > 0) {
q = m1 / k1;
r = m1 % k1;
temp = q*p1 + p2;
p2 = p1;
p1 = temp;
m1 = k1;
k1 = r;
neg = !neg;
}
return neg ? m - p2 : p2;
}
// Preconditions: 0 <= k <= min(n,p-1); p > 1 prime
unsigned long long ChooseModTwo(unsigned long long n, unsigned long long k, unsigned long p)
{
// reduce n modulo p
n %= p;
// Trivial checks
if (n < k) {
return 0;
}
if (k == 0 || k == n) {
return 1;
}
// Now 0 < k < n, save a bit of work if k > n/2
if (k > n/2) {
k = n-k;
}
// calculate numerator and denominator modulo p
unsigned long long num = n, den = 1;
for(n = n-1; k > 1; --n, --k)
{
num = ModularMultiply(num, n, p);
den = ModularMultiply(den, k, p);
}
den = ModularInverse(den,p);
return ModularMultiply(num, den, p);
}
// Preconditions: 0 <= k <= n; p > 1 prime
long long ChooseModOne(long long n, long long k, const unsigned long p)
{
// For small k, no recursion is necessary
if (k < p) return ChooseModTwo(n,k,p);
unsigned long long q_n, r_n, q_k, r_k, choose;
q_n = n / p;
r_n = n % p;
q_k = k / p;
r_k = k % p;
choose = ChooseModTwo(r_n, r_k, p);
// If the exponent of p in choose(n,k) isn't determined to be 0
// before the calculation gets serious, short-cut here:
// if (choose == 0) return 0;
return ModularMultiply(choose, ChooseModOne(q_n, q_k, p), p);
}
unsigned long long ModularBinomialCoefficient(unsigned long long n, unsigned long long k, const unsigned long p)
{
// We deal with the trivial cases first
if (k < 0 || n < k) return 0;
if (k == 0 || k == n) return 1;
// Now check whether choose(n,k) is divisible by p
if (FactorialExponent(n, p) > FactorialExponent(k, p) + FactorialExponent(n - k, p)) return 0;
// If it's not divisible, do the generic work
return ChooseModOne(n, k, p);
}
int main() {
//std::ifstream fin ("input03.txt");
std::ifstream fin ("test.in");
int kMod = 1000003;
int T;
fin >> T;
int N = T;
//std::cin >> T;
unsigned long long n, k;
unsigned long long a, b;
int result[N];
int index = 0;
while (T--) {
fin >> n >> k;
a = ModularBinomialCoefficient(n - 3, k, kMod);
b = ModularBinomialCoefficient(n + k, n - 1, kMod);
// (1 / (n + k) * nCk(n - 3, k) * nCk(n + k, n - 1)) % 1000003
unsigned long long x = ModularMultiply(a, b, kMod);
unsigned long long y = ModularMultiply(x, ModularInverse((n + k), kMod), kMod);
result[index] = y;
index++;
}
for(int i = 0; i < N; i++) {
std::cout << result[i] << "\n";
}
return 0;
}
Input:
6
90 13
65434244 16341234
23424244 12341234
424175 341198
7452123 23472
56000168 16000048
Output:
815483
715724
92308
903465
241972
0 <-- Incorrect, should be: 803478
Constraints:
4 <= N <= 10^9
1 <= K <= N
You can use Lucas' theorem to reduce the problem to ceil(log_P(N)) subproblems with k, n < p: Write n = n_m * p^m + ... + n_0 and k = k_m * p^m + ... + k_0 in base p (n_i, k_i < p are the digits), then we have
C(n,k) = PROD(i = 0 to m, C(n_i, k_i)) (mod p)
The subproblems are easy to solve, because every factor of k! has an inverse modulo p. You get an algorithm with runtime complexity O(p log(n)), which is better than that of Ivaylo's code in case of p << n, if I understand it correctly.
int powmod(int x, int e, int p) {
if (e == 0) return 1;
if (e & 1) return (long long)x * powmod(x, e - 1, p) % p;
long long rt = powmod(x, e / 2, p);
return rt * rt % p;
}
int binom_coeff_mod_prime(int n, int k, int p) {
long long res = 1;
while (n || k) {
int N = n % p, K = k % p;
for (int i = N - K + 1; i <= N; ++i)
res = res * i % p;
for (int i = 1; i <= K; ++i)
res = res * powmod(i, p - 2, p) % p;
n /= p;
k /= p;
}
return res;
}
I suggest you use factorization to compute the number of combinations without division. I've got code for doing so here, originally inspired by Fast computation of multi-category number of combinations (I still would like to post a proper answer to that, if some kind souls would reopen it).
My code stores the result as a table of factors, doing the modular multiplication to expand the result should be quite straightforward.
Probably not practical for n in the range of 10**9, though, since the sieve will be quite massive and take a while to construct.

What is the fastest way to compute large power of 2 modulo a number

For 1 <= N <= 1000000000, I need to compute 2N mod 1000000007, and it must be really fast!
My current approach is:
ull power_of_2_mod(ull n) {
ull result = 1;
if (n <= 63) {
result <<= n;
result = result % 1000000007;
}
else {
ull one = 1;
one <<= 63;
while (n > 63) {
result = ((result % 1000000007) * (one % 1000000007)) % 1000000007;
n -= 63;
}
for (int i = 1; i <= n; ++i) {
result = (result * 2) % 1000000007;
}
}
return result;
}
but it doesn't seem to be fast enough. Any idea?
This will be faster (code in C):
typedef unsigned long long uint64;
uint64 PowMod(uint64 x, uint64 e, uint64 mod)
{
uint64 res;
if (e == 0)
{
res = 1;
}
else if (e == 1)
{
res = x;
}
else
{
res = PowMod(x, e / 2, mod);
res = res * res % mod;
if (e % 2)
res = res * x % mod;
}
return res;
}
This method doesn't use recursion with O(log(n)) complexity. Check this out.
#define ull unsigned long long
#define MODULO 1000000007
ull PowMod(ull n)
{
ull ret = 1;
ull a = 2;
while (n > 0) {
if (n & 1) ret = ret * a % MODULO;
a = a * a % MODULO;
n >>= 1;
}
return ret;
}
And this is pseudo from Wikipedia (see Right-to-left binary method section)
function modular_pow(base, exponent, modulus)
Assert :: (modulus - 1) * (base mod modulus) does not overflow base
result := 1
base := base mod modulus
while exponent > 0
if (exponent mod 2 == 1):
result := (result * base) mod modulus
exponent := exponent >> 1
base := (base * base) mod modulus
return result
You can solve it in O(log n).
For example, for n = 1234 = 10011010010 (in base 2) we have n = 2 + 16 + 64 + 128 + 1024, and thus 2^n = 2^2 * 2^16 * 2^64 * 2^128 * 2 ^ 1024.
Note that 2^1024 = (2^512)^2, so that, given you know 2^512, you can compute 2^1024 in a couple of operations.
The solution would be something like this (pseudocode):
const ulong MODULO = 1000000007;
ulong mul(ulong a, ulong b) {
return (a * b) % MODULO;
}
ulong add(ulong a, ulong b) {
return (a + b) % MODULO;
}
int[] decompose(ulong number) {
//for 1234 it should return [1, 4, 6, 7, 10]
}
//for x it returns 2^(2^x) mod MODULO
// (e.g. for x = 10 it returns 2^1024 mod MODULO)
ulong power_of_power_of_2_mod(int power) {
ulong result = 1;
for (int i = 0; i < power; i++) {
result = mul(result, result);
}
return result;
}
//for x it returns 2^x mod MODULO
ulong power_of_2_mod(int power) {
ulong result = 1;
foreach (int metapower in decompose(power)) {
result = mul(result, power_of_power_of_2_mod(metapower));
}
return result;
}
Note that O(log n) is, in practice, O(1) for ulong arguments (as log n < 63); and that this code is compatible with any uint MODULO (MODULO < 2^32), independent of whether MODULO is prime or not.
It can be solved in O((log n)^2).
Try this approach:-
unsigned long long int fastspcexp(unsigned long long int n)
{
if(n==0)
return 1;
if(n%2==0)
return (((fastspcexp(n/2))*(fastspcexp(n/2)))%1000000007);
else
return ( ( ((fastspcexp(n/2)) * (fastspcexp(n/2)) * 2) %1000000007 ) );
}
This is a recursive approach and is pretty fast enough to meet the time requirements in most of the programming competitions.
If u also want to store that array ie. (2^i)%mod [i=0 to whatever] than:
long mod = 1000000007;
long int pow_mod[ele]; //here 'ele' = maximum power upto which you want to store 2^i
pow_mod[0]=1; //2^0 = 1
for(int i=1;i<ele;++i){
pow_mod[i] = (pow_mod[i-1]*2)%mod;
}
I hope it'll be helpful to someone.

Calculating Catalan Numbers mod prime number

The following is the problem description:
let c[n] be the catalan number for n and p be a large prime eg.1000000007
I need to calculate c[n] % p where n ranges from {1,2,3,...,1000}
The problem which I am having is that on a 32 bit machine you get overflow when you calculate catalan number for such large integer. I am familiar with modulo arithmetic. Also
(a.b) % p = ((a % p)(b % p)) % p
this formula helps me to get away with the overflow in numerator separately but I have no idea how to deal with denominators.
For a modulus of 1000000007, avoiding overflow with only 32-bit integers is cumbersome. But any decent C implementation provides 64-bit integers (and any decent C++ implementation does too), so that shouldn't be necessary.
Then to deal with the denominators, one possibility is, as KerrekSB said in his comment, to calculate the modular inverse of the denominators modulo the prime p = 1000000007. You can calculate the modular inverse with the extended Euclidean algorithm or, equivalently, the continued fraction expansion of k/p. Then instead of dividing by k in the calculation, you multiply by its modular inverse.
Another option is to use Segner's recurrence relation for the Catalan numbers, which gives a calculation without divisions:
C(0) = 1
n
C(n+1) = ∑ C(i)*C(n-i)
0
Since you only need the Catalan numbers C(k) for k <= 1000, you can precalculate them, or quickly calculate them at program startup and store them in a lookup table.
If contrary to expectation no 64-bit integer type is available, you can calculate the modular product by splitting the factors into low and high 16 bits,
a = a1 + (a2 << 16) // 0 <= a1, a2 < (1 << 16)
b = b1 + (b2 << 16) // 0 <= b1, b2 < (1 << 16)
a*b = a1*b1 + (a1*b2 << 16) + (a2*b1 << 16) + (a2*b2 << 32)
To calculate a*b (mod m) with m <= (1 << 31), reduce each of the four products modulo m,
p1 = (a1*b1) % m;
p2 = (a1*b2) % m;
p3 = (a2*b1) % m;
p4 = (a2*b2) % m;
and the simplest way to incorporate the shifts is
for(i = 0; i < 16; ++i) {
p2 *= 2;
if (p2 >= m) p2 -= m;
}
the same for p3 and with 32 iterations for p4. Then
s = p1+p2;
if (s >= m) s -= m;
s += p3;
if (s >= m) s -= m;
s += p4;
if (s >= m) s -= m;
return s;
That way is not very fast, but for the few multiplications needed here, it's fast enough. A small speedup should be obtained by reducing the number of shifts; first calculate (p4 << 16) % m,
for(i = 0; i < 16; ++i) {
p4 *= 2;
if (p4 >= m) p4 -= m;
}
then all of p2, p3 and the current value of p4 need to be multiplied with 216 modulo m,
p4 += p3;
if (p4 >= m) p4 -= m;
p4 += p2;
if (p4 >= m) p4 -= m;
for(i = 0; i < 16; ++i) {
p4 *= 2;
if (p4 >= m) p4 -= m;
}
s = p4+p1;
if (s >= m) s -= m;
return s;
what about if you store the results using dynamic programming and while populating the lookup table, you can use MODULO division at each step. It will take care of the overflow for the 1000 Catalans and also will be faster than BigDecimal/BigInteger.
My solution:
public class Catalan {
private static long [] catalan= new long[1001];
private static final int MOD=1000000007;
public static void main(String[] args) {
precalc();
for (int i=1;i<=1000;i++){
System.out.println("Catalan number for "+i+" is: "+catalan[i]);
}
}
private static void precalc(){
for (int i=0;i<=1000;i++){
if (i==0 || i==1){
catalan[i]=1;
}
else{
long sum =0;long left, right;
for (int k=1;k<=i;k++){
left = catalan[k-1] % MOD;
right= catalan[i-k] % MOD;
sum =(sum+ (left * right)%MOD)%MOD;
}
catalan[i]=sum;
}
}
}
}
What about using a library for big integers? Try googling for it...
#include <stdio.h>
#include <stdlib.h>
/*
C(n) = (2n)!/(n+1)!n!
= (2n)(2n-1)(2n-2)..(n+2)/n!
*/
int p = 1000000007;
int gcd(int x, int y){
while(y!=0){
int wk = x % y;
x = y;
y = wk;
}
return x;
}
int catalanMod(n){
long long c = 1LL;
int i;
int *list,*wk;
//make array [(2n),(2n-1),(2n-2)..(n+2)]
wk = list = (int*)malloc(sizeof(int)*(n-1));
for(i=n+2;i<=2*n;++i){
*wk++ = i;
}
wk=list;
//[(2n),(2n-1),(2n-2)..(n+2)] / [1,2,3,..n]
//E.g C(10)=[13,17,19,4]
for(i=2;i<=n;++i){
int j,k,w;
for(w=i,j=0;j<n-1;++j){
while(1!=(k = gcd(wk[j], w))){
wk[j] /= k;
w /= k;
}
if(w == 1) break;
}
}
wk=list;
//Multiplication and modulo reduce
for(i=0;i<n-1;++i){
if(wk[i]==1)continue;
c = c * wk[i] % p;
}
free(list);
return c;
}
Simply, use the property, (a * b) % mod = (a % mod) * (b % mod)