C or C++: Libraries for factoring integers? [closed] - c++

Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
We don’t allow questions seeking recommendations for books, tools, software libraries, and more. You can edit the question so it can be answered with facts and citations.
Closed 11 months ago.
The community reviewed whether to reopen this question 8 months ago and left it closed:
Original close reason(s) were not resolved
Improve this question
It seems that there are several really fast prime factorization algorithms around (one that looks ideal is quadratic sieving). However, rather than make my own (likely poor) implementation I would like to use a ready-made library for simplicity.
I need to be able to factor integers of up to 15 digits efficiently. Because of that, I'm not looking for the algorithm that necessarily scales asymptotically best since we can assume the numbers being factored are less than 1015.
I've already had a look at some of the implementations listed on Wikipedia's Quadratic Sieve page. However, some of the implementations don't seem well-maintained; some don't have documentation; and so on! I checked if a few well-known libraries, such as Boost, had factorization methods but they don't seem to.
Can anyone recommend a library that fits the above criteria?

Check out MSIEVE library for factoring large integers by Jason Papadopoulos.
Msieve is the result of my efforts to understand and optimize how
integers are factored using the most powerful modern algorithms.
This documentation corresponds to version 1.46 of the Msieve library.
Do not expect to become a factoring expert just by reading it. I've
included a relatively complete list of references that you can and
should look up if you want to treat the code as more than a black box
to solve your factoring problems.

To factor integers in C you can try to use a probabilistic approach :
The headers of my proposition :
#include <stdlib.h>
#include <sys/time.h>
typedef unsigned long long int positive_number; // __uint128_t
static inline positive_number multiplication_modulo(positive_number lhs, positive_number rhs, positive_number mod);
static int is_prime(positive_number n, int k); // prime checker
positive_number factor_worker(positive_number n);
positive_number factor(positive_number n, int timeout);
The factorization process manager, because there is a timeout:
double microtime() {
struct timeval time; gettimeofday(&time, 0);
return (double) time.tv_sec + (double) time.tv_usec / 1e6;
}
// This is the master function you can call, expecting a number and a timeout(s)
positive_number factor(positive_number n, int timeout) {
if (n < 4) return n;
if (n != (n | 1)) return 2;
double begin = microtime();
int steps = 8; // primality check iterations
positive_number a, b;
for (a = n >> 1, b = (a + n / a) >> 1; b < a; a = b, b = (a + n / a) >> 1, ++steps);
if (b * b == n) return b ; // we just computed b = sqrt(n)
if (is_prime(n, steps)) return n;
do { positive_number res = factor_worker(n);
if (res != n) return res;
if (++steps % 96 == 0 && is_prime(n, 32)) return n ; // adjust it
} while (microtime() - begin < timeout);
return n;
}
The multiplier helper because computations are done modulo N :
static inline positive_number multiplication_modulo(positive_number lhs, positive_number rhs, positive_number mod) {
positive_number res = 0; // we avoid overflow in modular multiplication
for (lhs %= mod, rhs%= mod; rhs; (rhs & 1) ? (res = (res + lhs) % mod) : 0, lhs = (lhs << 1) % mod, rhs >>= 1);
return res; // <= (lhs * rhs) % mod
}
The prime checker helper, of course :
static int is_prime(positive_number n, int k) {
positive_number a = 0, b, c, d, e, f, g; int h, i;
if ((n == 1) == (n & 1)) return n == 2;
if (n < 51529) // fast constexpr check for small primes (removable)
return (n & 1) & ((n < 6) * 42 + 0x208A2882) >> n % 30 && (n < 49 || (n % 7 && n % 11 && n % 13 && n % 17 && n % 19 && n % 23 && n % 29 && n % 31 && n % 37 && (n < 1369 || (n % 41 && n % 43 && n % 47 && n % 53 && n % 59 && n % 61 && n % 67 && n % 71 && n % 73 && ( n < 6241 || (n % 79 && n % 83 && n % 89 && n % 97 && n % 101 && n % 103 && n % 107 && n % 109 && n % 113 && ( n < 16129 || (n % 127 && n % 131 && n % 137 && n % 139 && n % 149 && n % 151 && n % 157 && n % 163 && n % 167 && ( n < 29929 || (n % 173 && n % 179 && n % 181 && n % 191 && n % 193 && n % 197 && n % 199 && n % 211 && n % 223))))))))));
for (b = c = n - 1, h = 0; !(b & 1); b >>= 1, ++h);
for (; k--;) {
for (g = 0; g < sizeof(positive_number); ((char*)&a)[g++] = rand()); // random number
do for (d = e = 1 + a % c, f = n; (d %= f) && (f %= d););
while (d > 1 && f > 1);
for (d = f = 1; f <= b; f <<= 1);
for (; f >>= 1; d = multiplication_modulo(d, d, n), f & b && (d = multiplication_modulo(e, d, n)));
if (d == 1) continue;
for (i = h; i-- && d != c; d = multiplication_modulo(d, d, n));
if (d != c) return 0;
}
return 1;
}
The factorization worker, a single call does not guarantee a success, it's a probabilistic try :
positive_number factor_worker(positive_number n) {
size_t a; positive_number b = 0, c, d, e, f;
for (a = 0; a < sizeof(positive_number); ((char*)&b)[a++] = rand()); // pick random polynomial
c = b %= n;
do {
b = multiplication_modulo(b, b, n); if(++b == n) b = 0;
c = multiplication_modulo(c, c, n); if(++c == n) c = 0;
c = multiplication_modulo(c, c, n); if(++c == n) c = 0;
for (d = n, e = b > c ? b - c : c - b; e; f = e, e = multiplication_modulo(d / f, f, n), e = (d - e) % n, d = f);
// handle your precise timeout in the for loop body
} while (d == 1);
return d;
}
Example of usage :
#include <stdio.h>
positive_number exec(positive_number n) {
positive_number res = factor(n, 2); // 2 seconds
if (res == n) return res + printf("%llu * ", n) * fflush(stdout) ;
return exec(res) * exec(n / res);
}
int main() {
positive_number n = 0, mask = -1, res;
for (int i = 0; i < 1000;) {
int bits = 4 + rand() % 60; // adjust the modulo for tests
for (size_t k = 0; k < sizeof(positive_number); ((char*)&n)[k++] = rand());
// slice a random number with the "bits" variable
n &= mask >> (8 * sizeof (positive_number) - bits); n += 4;
printf("%5d. (%2d bits) %llu = ", ++i, bits, n);
res = exec(n);
if (res != n) return 1;
printf("1\n");
}
}
You can put it into a primes.c file then compile + execute :
gcc -O3 -std=c99 -Wall -pedantic primes.c ; ./a.out ;
Also, 128-bit integers GCC extension extension may be available.
Example output :
358205873110913227 = 380003149 * 942639223 took 0.01s
195482582293315223 = 242470939 * 806210357 took 0.0021s
107179818338278057 = 139812461 * 766597037 took 0.0023s
44636597321924407 = 182540669 * 244529603 took 0s
747503348409771887 = 865588487 * 863578201 took 0.016s
// 128-bit extension available output :
170141183460469231731687303715506697937 =
13602473 * 230287853 * 54315095311400476747373 took 0.646652s
Info : This C99 100 lines probabilistic factorization software proposition is based on a Miller–Rabin primality test followed or not by a Pollard's rho algo. Like you, i initially aimed to factorize just a little long long int. By my tests it's working fast enough to me, even for some larger. Thank you.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.

How about GMP-ECM (Elliptic Curve Method for Integer Factorization)?
If the link to the official project page at Inria is unavailable, you can check a recent version on the web archive.

Related

Want to minimize my code so it consumes time less than 1 sec. It uses concept of modular exponentiation .Correct output but exceeding time limit

The below code is to calculate 2^n where n is equal to 1 <= n <= 10^5. So to calculate such large numbers I have used concept of modular exponentian. The code is giving correct output but due to large number of test cases it is exceeding the time limit. I am not getting a way to minimize the solution so it consumes less time. As the "algo" function is called as many times as the number of test cases. So I want to put the logic used in "algo" function in the main() function so it consumes time less than 1 sec and also gives the correct output. Here "t" represents number of test cases and it's value is 1 <= t <= 10^5.
Any suggestions from your side would be of great help!!
#include<iostream>
#include<math.h>
using namespace std;
int algo(int x, int y){
long m = 1000000007;
if(y == 0){
return 1;
}
int k = algo(x,y/2);
if (y % 2 == 1){
return ((((1ll * k * k) % m) * x) % m);
} else if (y % 2 == 0){
return ((1ll * k * k) % m);
}
}
int main(void)
{
int n, t, k;
cin>>t; //t = number of test cases
for ( k = 0; k < t; k++)
{
cin >> n; //power of 2
cout<<"the value after algo is: "<<algo(2,n)<<endl;
}
return 0;
}
You can make use of binary shifts to find powers of two
#include <iostream>
using namespace std;
int main()
{
unsigned long long u = 1, w = 2, n = 10, p = 1000000007, r;
//n -> power of two
while (n != 0)
{
if ((n & 0x1) != 0)
u = (u * w) % p;
if ((n >>= 1) != 0)
w = (w * w) % p;
}
r = (unsigned long)u;
cout << r;
return 0;
}
This is the function that I often use to calculate
Any integer X raised to power Y modulo M
C++ Function to calculate (X^Y) mod M
int power(int x, int y, const int mod = 1e9+7)
{
int result = 1;
x = x % mod;
if (x == 0)
return 0;
while (y > 0)
{
if (y & 1)
result = ( (result % mod) * (x % mod) ) % mod;
y = y >> 1; // y = y / 2
x = ( (x % mod) * (x % mod) ) % mod;
}
return result;
}
Remove the Mod if you don't want.
Time Complexity of this Function is O(log2(Y))
There can be a case of over flow so use int , long , long long etc as per your need.
Well your variables won't sustain the boundary test cases, introducing 2^10000, 1 <= n <= 10^5. RIP algorithms
19950631168807583848837421626835850838234968318861924548520089498529438830221946631919961684036194597899331129423209124271556491349413781117593785932096323957855730046793794526765246551266059895520550086918193311542508608460618104685509074866089624888090489894838009253941633257850621568309473902556912388065225096643874441046759871626985453222868538161694315775629640762836880760732228535091641476183956381458969463899410840960536267821064621427333394036525565649530603142680234969400335934316651459297773279665775606172582031407994198179607378245683762280037302885487251900834464581454650557929601414833921615734588139257095379769119277800826957735674444123062018757836325502728323789270710373802866393031428133241401624195671690574061419654342324638801248856147305207431992259611796250130992860241708340807605932320161268492288496255841312844061536738951487114256315111089745514203313820202931640957596464756010405845841566072044962867016515061920631004186422275908670900574606417856951911456055068251250406007519842261898059237118054444788072906395242548339221982707404473162376760846613033778706039803413197133493654622700563169937455508241780972810983291314403571877524768509857276937926433221599399876886660808368837838027643282775172273657572744784112294389733810861607423253291974813120197604178281965697475898164531258434135959862784130128185406283476649088690521047580882615823961985770122407044330583075869039319604603404973156583208672105913300903752823415539745394397715257455290510212310947321610753474825740775273986348298498340756937955646638621874569499279016572103701364433135817214311791398222983845847334440270964182851005072927748364550578634501100852987812389473928699540834346158807043959118985815145779177143619698728131459483783202081474982171858011389071228250905826817436220577475921417653715687725614904582904992461028630081535583308130101987675856234343538955409175623400844887526162643568648833519463720377293240094456246923254350400678027273837755376406726898636241037491410966718557050759098100246789880178271925953381282421954028302759408448955014676668389697996886241636313376393903373455801407636741877711055384225739499110186468219696581651485130494222369947714763069155468217682876200362777257723781365331611196811280792669481887201298643660768551639860534602297871557517947385246369446923087894265948217008051120322365496288169035739121368338393591756418733850510970271613915439590991598154654417336311656936031122249937969999226781732358023111862644575299135758175008199839236284615249881088960232244362173771618086357015468484058622329792853875623486556440536962622018963571028812361567512543338303270029097668650568557157505516727518899194129711337690149916181315171544007728650573189557450920330185304847113818315407324053319038462084036421763703911550639789000742853672196280903477974533320468368795868580237952218629120080742819551317948157624448298518461509704888027274721574688131594750409732115080498190455803416826949787141316063210686391511681774304792596709376
Fear not my friend, someone did tried to solve the problem https://www.quora.com/What-is-2-raised-to-the-power-of-50-000, you are looking for Piyush Michael's answer , here is his sample code
#include <stdio.h>
int main()
{
int ul=16,000;
int rs=50,000;
int s=0,carry[ul],i,j,k,ar[ul];
ar[0]=2;
for(i=1;i<ul;i++)ar[i]=0;
for(j=1;j<rs;j++)
{for(k=0;k<ul;k++)carry[k]=0;
for(i=0;i<ul;i++)
{ar[i]=ar[i]*2+carry[i];
if(ar[i]>9)
{carry[i+1]=ar[i]/10;
ar[i]=ar[i]%10;
}
}
}
for(j=ul-1;j>=0;j--)printf("%d",ar[j]);
for(i=0;i<ul-1;i++)s+=ar[i];
printf("\n\n%d",s);
}

Big primes loop with GMP library C++

It's the first time that I use the gmp library, so I'm really lost, I've found a code implementing the "miller rabin primality test" in c++ but I wanted to be able to apply it to integers with arbitrary precision so I installed the GMP library.
The problem is, I've got no idea of how GMP library actually works (I've read trough a few pages of the manual but I understand very little about it also since I haven't even studied object oriented programming), I want to adapt the primality test to be able to input integers 'num' of about 1000-2000 digits, here's the code:
#include <iostream>
#include <cstring>
#include <cstdlib>
#include <gmpxx.h>
#include <gmp.h>
#define ll long long
using namespace std;
/*
* calculates (a * b) % c taking into account that a * b might overflow
*/
ll mulmod(ll a, ll b, ll mod)
{
ll x = 0,y = a % mod;
while (b > 0)
{
if (b % 2 == 1)
{
x = (x + y) % mod;
}
y = (y * 2) % mod;
b /= 2;
}
return x % mod;
}
/*
* modular exponentiation
*/
ll modulo(ll base, ll exponent, ll mod)
{
ll x = 1;
ll y = base;
while (exponent > 0)
{
if (exponent % 2 == 1)
x = (x * y) % mod;
y = (y * y) % mod;
exponent = exponent / 2;
}
return x % mod;
}
/*
* Miller-Rabin primality test, iteration signifies the accuracy
*/
bool Miller(ll p,int iteration)
{
if (p < 2)
{
return false;
}
if (p != 2 && p % 2==0)
{
return false;
}
ll s = p - 1;
while (s % 2 == 0)
{
s /= 2;
}
for (int i = 0; i < iteration; i++)
{
ll a = rand() % (p - 1) + 1, temp = s;
ll mod = modulo(a, temp, p);
while (temp != p - 1 && mod != 1 && mod != p - 1)
{
mod = mulmod(mod, mod, p);
temp *= 2;
}
if (mod != p - 1 && temp % 2 == 0)
{
return false;
}
}
return true;
}
//Main
int main()
{
int w=0;
int iteration = 5;
mpz_t num;
cout<<"Enter integer to loop: ";
cin>>num;
if (num % 2 == 0)
num=num+1;
while (w==0) {
if (Miller(num, iteration)) {
cout<<num<<" is prime"<<endl;
w=1;
}
else
num=num+2;
}
system ("PAUSE");
return 0;
}
(If I define num to be 'long long' the program works just fine, but I have no idea how I should adapt the whole thing to "match" num being defined as 'mpz_t' instead, also I didn't mention it but the program basically takes an initial integer value and loops it by adding 2 if the integer is composite until it becomes a prime number)

using putchar_unlocked for fast output

I wish to use fast input and output in my code. I understood the use of getchar_unlocked for fast input using the below function.
inline int next_int() {
int n = 0;
char c = getchar_unlocked();
while (!('0' <= c && c <= '9')) {
c = getchar_unlocked();
}
while ('0' <= c && c <= '9') {
n = n * 10 + c - '0';
c = getchar_unlocked();
}
return n;
}
can someone please explain me how to use fast output using putchar_unlocked() function?
I was going through this question and there someone said putchar_unlocked() could be used for fast output.
Well the following code works well for fast output using putchar_unlocked().
#define pc(x) putchar_unlocked(x);
inline void writeInt (int n)
{
int N = n, rev, count = 0;
rev = N;
if (N == 0) { pc('0'); pc('\n'); return ;}
while ((rev % 10) == 0) { count++; rev /= 10;} //obtain the count of the number of 0s
rev = 0;
while (N != 0) { rev = (rev<<3) + (rev<<1) + N % 10; N /= 10;} //store reverse of N in rev
while (rev != 0) { pc(rev % 10 + '0'); rev /= 10;}
while (count--) pc('0');
}
Normally Printf is quite fast for outputs,however for writing Integer or Long Outputs,the below function is a tad bit faster. Here we use the putchar_unlocked() method for outputting a character which is similar thread-unsafe version of putchar() and is faster.
See Link.

Project Euler #27 [closed]

This question is unlikely to help any future visitors; it is only relevant to a small geographic area, a specific moment in time, or an extraordinarily narrow situation that is not generally applicable to the worldwide audience of the internet. For help making this question more broadly applicable, visit the help center.
Closed 9 years ago.
I'm challenging myself in Project Euler but currently stuck on problem 27, in which the problem states:
Euler published the remarkable quadratic formula:
n² + n + 41
It turns out that the formula will produce 40 primes for the consecutive values n = 0 to 39. However, when n = 40, 402 + 40 + 41 = 40(40 + 1) + 41 is divisible by 41, and certainly when n = 41, 41² + 41 + 41 is clearly divisible by 41.
Using computers, the incredible formula n² 79n + 1601 was discovered, which produces 80 primes for the consecutive values n = 0 to 79. The product of the coefficients, 79 and 1601, is 126479.
Considering quadratics of the form:
n² + an + b, where |a| 1000 and |b| 1000
where |n| is the modulus/absolute value of n
e.g. |11| = 11 and |4| = 4
Find the product of the coefficients, a and b, for the quadratic expression that produces > the maximum number of primes for consecutive values of n, starting with n = 0.
I wrote the following code, which gives me the answers pretty quick but it is wrong (it spits me (-951) * (-705) = 670455). Can somebody check my code to see where is/are my mistake(s)?
#include <iostream>
#include <vector>
#include <cmath>
#include <time.h>
using namespace std;
bool isprime(unsigned int n, int d[339]);
int main()
{
clock_t t = clock();
int c[] = {13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283,293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,509,521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,617,619,631,641,643,647,653,659,661,673,677,683,691,701,709,719,727,733,739,743,751,757,761,769,773,787,797,809,811,821,823,827,829,839,853,857,859,863,877,881,883,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997,1009,1013,1019,1021,1031,1033,1039,1049,1051,1061,1063,1069,1087,1091,1093,1097,1103,1109,1117,1123,1129,1151,1153,1163,1171,1181,1187,1193,1201,1213,1217,1223,1229,1231,1237,1249,1259,1277,1279,1283,1289,1291,1297,1301,1303,1307,1319,1321,1327,1361,1367,1373,1381,1399,1409,1423,1427,1429,1433,1439,1447,1451,1453,1459,1471,1481,1483,1487,1489,1493,1499,1511,1523,1531,1543,1549,1553,1559,1567,1571,1579,1583,1597,1601,1607,1609,1613,1619,1621,1627,1637,1657,1663,1667,1669,1693,1697,1699,1709,1721,1723,1733,1741,1747,1753,1759,1777,1783,1787,1789,1801,1811,1823,1831,1847,1861,1867,1871,1873,1877,1879,1889,1901,1907,1913,1931,1933,1949,1951,1973,1979,1987,1993,1997,1999,2003,2011,2017,2027,2029,2039,2053,2063,2069,2081,2083,2087,2089,2099,2111,2113,2129,2131,2137,2141,2143,2153,2161,2179,2203,2207,2213,2221,2237,2239,2243,2251,2267,2269,2273,2281,2287,2293,2297,2309,2311};
int result[4];
result[3] = 0;
for (int a = -999; a < 1000; a+=2)
{
for (int b = -999; b < 1000; b+=2)
{
bool prime;
int n = 0, count = 0;
do
{
prime = isprime(n*n + a*n + b, c);
n++;
count++;
} while (prime);
count--;
n--;
if (count > result[3])
{
result[0] = a;
result[1] = b;
result[2] = n;
result[3] = count;
}
}
if ((a+1) % 100 == 0)
cout << a+1 << endl;
}
cout << result[0] << endl << result[1] << endl << result[2] << endl << result[3] << endl << clock()-t;
cin >> result[0];
return 0;
}
bool isprime(unsigned int n, int d[339])
{
int j = 0, l;
if ((n == 2) || (n == 3) || (n == 5) || (n == 7) || (n == 11))
return 1;
if ((n % 2 == 0) || (n % 3 == 0) || (n % 5 == 0) || (n % 7 == 0) || (n % 11 == 0))
return 0;
while (j <= int (sqrt(n) / 2310))
{
for (int k = 0; k < 339; k++)
{
l = 2310 * j + d[k];
if (n % l == 0)
return 0;
}
j++;
}
return 1;
}
There's a bug in isprime function.
In your function, you check all 2310 * j + d[k] where j < int (sqrt(n) / 2310)) to ensure the target n is a prime number. However, an additional condition that l < sqrt(n) is also required, or you will over-exclude some prime numbers.
For example, when a = 1, b = 41 and n = 0, your function will check whether 41 is a prime number starting from j = 0. So whether 41 can be divisible by 2310 * 0 + d[7] = 41 is also verified, which leads to a false return.
This version should be correct:
bool isprime(unsigned int n, int d[])
{
int j = 0, l;
if ((n == 2) || (n == 3) || (n == 5) || (n == 7) || (n == 11))
return 1;
if ((n % 2 == 0) || (n % 3 == 0) || (n % 5 == 0) || (n % 7 == 0) || (n % 11 == 0))
return 0;
double root = sqrt(n);
while (j <= int (root / 2310))
{
for (int k = 0; k < 339; k++)
{
l = 2310 * j + d[k];
if (l < root && n % l == 0)
return 0;
}
j++;
}
return 1;
}

Fast n choose k mod p for large n?

What I mean by "large n" is something in the millions. p is prime.
I've tried
http://apps.topcoder.com/wiki/display/tc/SRM+467
But the function seems to be incorrect (I tested it with 144 choose 6 mod 5 and it gives me 0 when it should give me 2)
I've tried
http://online-judge.uva.es/board/viewtopic.php?f=22&t=42690
But I don't understand it fully
I've also made a memoized recursive function that uses the logic (combinations(n-1, k-1, p)%p + combinations(n-1, k, p)%p) but it gives me stack overflow problems because n is large
I've tried Lucas Theorem but it appears to be either slow or inaccurate.
All I'm trying to do is create a fast/accurate n choose k mod p for large n. If anyone could help show me a good implementation for this I'd be very grateful. Thanks.
As requested, the memoized version that hits stack overflows for large n:
std::map<std::pair<long long, long long>, long long> memo;
long long combinations(long long n, long long k, long long p){
if (n < k) return 0;
if (0 == n) return 0;
if (0 == k) return 1;
if (n == k) return 1;
if (1 == k) return n;
map<std::pair<long long, long long>, long long>::iterator it;
if((it = memo.find(std::make_pair(n, k))) != memo.end()) {
return it->second;
}
else
{
long long value = (combinations(n-1, k-1,p)%p + combinations(n-1, k,p)%p)%p;
memo.insert(std::make_pair(std::make_pair(n, k), value));
return value;
}
}
So, here is how you can solve your problem.
Of course you know the formula:
comb(n,k) = n!/(k!*(n-k)!) = (n*(n-1)*...(n-k+1))/k!
(See http://en.wikipedia.org/wiki/Binomial_coefficient#Computing_the_value_of_binomial_coefficients)
You know how to compute the numerator:
long long res = 1;
for (long long i = n; i > n- k; --i) {
res = (res * i) % p;
}
Now, as p is prime the reciprocal of each integer that is coprime with p is well defined i.e. a-1 can be found. And this can be done using Fermat's theorem ap-1=1(mod p) => a*ap-2=1(mod p) and so a-1=ap-2.
Now all you need to do is to implement fast exponentiation(for example using the binary method):
long long degree(long long a, long long k, long long p) {
long long res = 1;
long long cur = a;
while (k) {
if (k % 2) {
res = (res * cur) % p;
}
k /= 2;
cur = (cur * cur) % p;
}
return res;
}
And now you can add the denominator to our result:
long long res = 1;
for (long long i = 1; i <= k; ++i) {
res = (res * degree(i, p- 2)) % p;
}
Please note I am using long long everywhere to avoid type overflow. Of course you don't need to do k exponentiations - you can compute k!(mod p) and then divide only once:
long long denom = 1;
for (long long i = 1; i <= k; ++i) {
denom = (denom * i) % p;
}
res = (res * degree(denom, p- 2)) % p;
EDIT: as per #dbaupp's comment if k >= p the k! will be equal to 0 modulo p and (k!)^-1 will not be defined. To avoid that first compute the degree with which p is in n*(n-1)...(n-k+1) and in k! and compare them:
int get_degree(long long n, long long p) { // returns the degree with which p is in n!
int degree_num = 0;
long long u = p;
long long temp = n;
while (u <= temp) {
degree_num += temp / u;
u *= p;
}
return degree_num;
}
long long combinations(int n, int k, long long p) {
int num_degree = get_degree(n, p) - get_degree(n - k, p);
int den_degree = get_degree(k, p);
if (num_degree > den_degree) {
return 0;
}
long long res = 1;
for (long long i = n; i > n - k; --i) {
long long ti = i;
while(ti % p == 0) {
ti /= p;
}
res = (res * ti) % p;
}
for (long long i = 1; i <= k; ++i) {
long long ti = i;
while(ti % p == 0) {
ti /= p;
}
res = (res * degree(ti, p-2, p)) % p;
}
return res;
}
EDIT: There is one more optimization that can be added to the solution above - instead of computing the inverse number of each multiple in k!, we can compute k!(mod p) and then compute the inverse of that number. Thus we have to pay the logarithm for the exponentiation only once. Of course again we have to discard the p divisors of each multiple. We only have to change the last loop with this:
long long denom = 1;
for (long long i = 1; i <= k; ++i) {
long long ti = i;
while(ti % p == 0) {
ti /= p;
}
denom = (denom * ti) % p;
}
res = (res * degree(denom, p-2, p)) % p;
For large k, we can reduce the work significantly by exploiting two fundamental facts:
If p is a prime, the exponent of p in the prime factorisation of n! is given by (n - s_p(n)) / (p-1), where s_p(n) is the sum of the digits of n in the base p representation (so for p = 2, it's popcount). Thus the exponent of p in the prime factorisation of choose(n,k) is (s_p(k) + s_p(n-k) - s_p(n)) / (p-1), in particular, it is zero if and only if the addition k + (n-k) has no carry when performed in base p (the exponent is the number of carries).
Wilson's theorem: p is a prime, if and only if (p-1)! ≡ (-1) (mod p).
The exponent of p in the factorisation of n! is usually calculated by
long long factorial_exponent(long long n, long long p)
{
long long ex = 0;
do
{
n /= p;
ex += n;
}while(n > 0);
return ex;
}
The check for divisibility of choose(n,k) by p is not strictly necessary, but it's reasonable to have that first, since it will often be the case, and then it's less work:
long long choose_mod(long long n, long long k, long long p)
{
// We deal with the trivial cases first
if (k < 0 || n < k) return 0;
if (k == 0 || k == n) return 1;
// Now check whether choose(n,k) is divisible by p
if (factorial_exponent(n) > factorial_exponent(k) + factorial_exponent(n-k)) return 0;
// If it's not divisible, do the generic work
return choose_mod_one(n,k,p);
}
Now let us take a closer look at n!. We separate the numbers ≤ n into the multiples of p and the numbers coprime to p. With
n = q*p + r, 0 ≤ r < p
The multiples of p contribute p^q * q!. The numbers coprime to p contribute the product of (j*p + k), 1 ≤ k < p for 0 ≤ j < q, and the product of (q*p + k), 1 ≤ k ≤ r.
For the numbers coprime to p we will only be interested in the contribution modulo p. Each of the full runs j*p + k, 1 ≤ k < p is congruent to (p-1)! modulo p, so altogether they produce a contribution of (-1)^q modulo p. The last (possibly) incomplete run produces r! modulo p.
So if we write
n = a*p + A
k = b*p + B
n-k = c*p + C
we get
choose(n,k) = p^a * a!/ (p^b * b! * p^c * c!) * cop(a,A) / (cop(b,B) * cop(c,C))
where cop(m,r) is the product of all numbers coprime to p which are ≤ m*p + r.
There are two possibilities, a = b + c and A = B + C, or a = b + c + 1 and A = B + C - p.
In our calculation, we have eliminated the second possibility beforehand, but that is not essential.
In the first case, the explicit powers of p cancel, and we are left with
choose(n,k) = a! / (b! * c!) * cop(a,A) / (cop(b,B) * cop(c,C))
= choose(a,b) * cop(a,A) / (cop(b,B) * cop(c,C))
Any powers of p dividing choose(n,k) come from choose(a,b) - in our case, there will be none, since we've eliminated these cases before - and, although cop(a,A) / (cop(b,B) * cop(c,C)) need not be an integer (consider e.g. choose(19,9) (mod 5)), when considering the expression modulo p, cop(m,r) reduces to (-1)^m * r!, so, since a = b + c, the (-1) cancel and we are left with
choose(n,k) ≡ choose(a,b) * choose(A,B) (mod p)
In the second case, we find
choose(n,k) = choose(a,b) * p * cop(a,A)/ (cop(b,B) * cop(c,C))
since a = b + c + 1. The carry in the last digit means that A < B, so modulo p
p * cop(a,A) / (cop(b,B) * cop(c,C)) ≡ 0 = choose(A,B)
(where we can either replace the division with a multiplication by the modular inverse, or view it as a congruence of rational numbers, meaning the numerator is divisible by p). Anyway, we again find
choose(n,k) ≡ choose(a,b) * choose(A,B) (mod p)
Now we can recur for the choose(a,b) part.
Example:
choose(144,6) (mod 5)
144 = 28 * 5 + 4
6 = 1 * 5 + 1
choose(144,6) ≡ choose(28,1) * choose(4,1) (mod 5)
≡ choose(3,1) * choose(4,1) (mod 5)
≡ 3 * 4 = 12 ≡ 2 (mod 5)
choose(12349,789) ≡ choose(2469,157) * choose(4,4)
≡ choose(493,31) * choose(4,2) * choose(4,4
≡ choose(98,6) * choose(3,1) * choose(4,2) * choose(4,4)
≡ choose(19,1) * choose(3,1) * choose(3,1) * choose(4,2) * choose(4,4)
≡ 4 * 3 * 3 * 1 * 1 = 36 ≡ 1 (mod 5)
Now the implementation:
// Preconditions: 0 <= k <= n; p > 1 prime
long long choose_mod_one(long long n, long long k, long long p)
{
// For small k, no recursion is necessary
if (k < p) return choose_mod_two(n,k,p);
long long q_n, r_n, q_k, r_k, choose;
q_n = n / p;
r_n = n % p;
q_k = k / p;
r_k = k % p;
choose = choose_mod_two(r_n, r_k, p);
// If the exponent of p in choose(n,k) isn't determined to be 0
// before the calculation gets serious, short-cut here:
/* if (choose == 0) return 0; */
choose *= choose_mod_one(q_n, q_k, p);
return choose % p;
}
// Preconditions: 0 <= k <= min(n,p-1); p > 1 prime
long long choose_mod_two(long long n, long long k, long long p)
{
// reduce n modulo p
n %= p;
// Trivial checks
if (n < k) return 0;
if (k == 0 || k == n) return 1;
// Now 0 < k < n, save a bit of work if k > n/2
if (k > n/2) k = n-k;
// calculate numerator and denominator modulo p
long long num = n, den = 1;
for(n = n-1; k > 1; --n, --k)
{
num = (num * n) % p;
den = (den * k) % p;
}
// Invert denominator modulo p
den = invert_mod(den,p);
return (num * den) % p;
}
To calculate the modular inverse, you can use Fermat's (so-called little) theorem
If p is prime and a not divisible by p, then a^(p-1) ≡ 1 (mod p).
and calculate the inverse as a^(p-2) (mod p), or use a method applicable to a wider range of arguments, the extended Euclidean algorithm or continued fraction expansion, which give you the modular inverse for any pair of coprime (positive) integers:
long long invert_mod(long long k, long long m)
{
if (m == 0) return (k == 1 || k == -1) ? k : 0;
if (m < 0) m = -m;
k %= m;
if (k < 0) k += m;
int neg = 1;
long long p1 = 1, p2 = 0, k1 = k, m1 = m, q, r, temp;
while(k1 > 0) {
q = m1 / k1;
r = m1 % k1;
temp = q*p1 + p2;
p2 = p1;
p1 = temp;
m1 = k1;
k1 = r;
neg = !neg;
}
return neg ? m - p2 : p2;
}
Like calculating a^(p-2) (mod p), this is an O(log p) algorithm, for some inputs it's significantly faster (it's actually O(min(log k, log p)), so for small k and large p, it's considerably faster), for others it's slower.
Overall, this way we need to calculate at most O(log_p k) binomial coefficients modulo p, where each binomial coefficient needs at most O(p) operations, yielding a total complexity of O(p*log_p k) operations.
When k is significantly larger than p, that is much better than the O(k) solution. For k <= p, it reduces to the O(k) solution with some overhead.
If you're calculating it more than once, there's another way that's faster. I'm going to post code in python because it'll probably be the easiest to convert into another language, although I'll put the C++ code at the end.
Calculating Once
Brute force:
def choose(n, k, m):
ans = 1
for i in range(k): ans *= (n-i)
for i in range(k): ans //= i
return ans % m
But the calculation can get into very big numbers, so we can use modular airthmetic tricks instead:
(a * b) mod m = (a mod m) * (b mod m) mod m
(a / (b*c)) mod m = (a mod m) / ((b mod m) * (c mod m) mod m)
(a / b) mod m = (a mod m) * (b mod m)^-1
Note the ^-1 at the end of the last equation. This is the multiplicative inverse of b mod m. It basically means that ((b mod m) * (b mod m)^-1) mod m = 1, just like how a * a^-1 = a * 1/a = 1 with (non-zero) integers.
This can be calculated in a few ways, one of which is the extended euclidean algorithm:
def multinv(n, m):
''' Multiplicative inverse of n mod m '''
if m == 1: return 0
m0, y, x = m, 0, 1
while n > 1:
y, x = x - n//m*y, y
m, n = n%m, m
return x+m0 if x < 0 else x
Note that another method, exponentiation, works only if m is prime. If it is, you can do this:
def powmod(b, e, m):
''' b^e mod m '''
# Note: If you use python, there's a built-in pow(b, e, m) that's probably faster
# But that's not in C++, so you can convert this instead:
P = 1
while e:
if e&1: P = P * b % m
e >>= 1; b = b * b % m
return P
def multinv(n, m):
''' Multiplicative inverse of n mod m, only if m is prime '''
return powmod(n, m-2, m)
But note that the Extended Euclidean Algorithm tends to still run faster, even though they technically have the same time complexity, O(log m), because it has a lower constant factor.
So now the full code:
def multinv(n, m):
''' Multiplicative inverse of n mod m in log(m) '''
if m == 1: return 0
m0, y, x = m, 0, 1
while n > 1:
y, x = x - n//m*y, y
m, n = n%m, m
return x+m0 if x < 0 else x
def choose(n, k, m):
num = den = 1
for i in range(k): num = num * (n-i) % m
for i in range(k): den = den * i % m
return num * multinv(den, m)
Querying Multiple Times
We can calculate the numerator and denominator separately, and then combine them. But notice that the product we're calculating for the numerator is n * (n-1) * (n-2) * (n-3) ... * (n-k+1). If you've ever learned about something called prefix sums, this is awfully similar. So let's apply it.
Precalculate fact[i] = i! mod m for i up to whatever the max value of n is, maybe 1e7 (ten million). Then, the numerator is (fact[n] * fact[n-k]^-1) mod m, and the denominator is fact[k]. So we can calculate choose(n, k, m) = fact[n] * multinv(fact[n-k], m) % m * multinv(fact[k], m) % m.
Python code:
MAXN = 1000 # Increase if necessary
MOD = 10**9+7 # A common mod that's used, change if necessary
fact = [1]
for i in range(1, MAXN+1):
fact.append(fact[-1] * i % MOD)
def multinv(n, m):
''' Multiplicative inverse of n mod m in log(m) '''
if m == 1: return 0
m0, y, x = m, 0, 1
while n > 1:
y, x = x - n//m*y, y
m, n = n%m, m
return x+m0 if x < 0 else x
def choose(n, k, m):
return fact[n] * multinv(fact[n-k] * fact[k] % m, m) % m
C++ code:
#include <iostream>
using namespace std;
const int MAXN = 1000; // Increase if necessary
const int MOD = 1e9+7; // A common mod that's used, change if necessary
int fact[MAXN+1];
int multinv(int n, int m) {
/* Multiplicative inverse of n mod m in log(m) */
if (m == 1) return 0;
int m0 = m, y = 0, x = 1, t;
while (n > 1) {
t = y;
y = x - n/m*y;
x = t;
t = m;
m = n%m;
n = t;
}
return x<0 ? x+m0 : x;
}
int choose(int n, int k, int m) {
return (long long) fact[n]
* multinv((long long) fact[n-k] * fact[k] % m, m) % m;
}
int main() {
fact[0] = 1;
for (int i = 1; i <= MAXN; i++) {
fact[i] = (long long) fact[i-1] * i % MOD;
}
cout << choose(4, 2, MOD) << '\n';
cout << choose(1e6, 1e3, MOD) << '\n';
}
Note that I'm casting to long long to avoid overflow.