Worst case time complexity for recursion - c++

int memo[101][101];
int findMinPath(vector<vector<int> >& V, int r, int c) {
int R = V.size();
int C = V[0].size();
if (r >= R || c >= C) return 100000000; // Infinity
if (r == R - 1 && c == C - 1) return 0;
if (memo[r][c] != -1) return memo[r][c];
memo[r][c] = V[r][c] + min(findMinPath(V, r + 1, c), findMinPath(V, r, c + 1));
return memo[r][c];
}
Callsite :
memset(memo, -1, sizeof(memo));
findMinPath(V, 0, 0);
In the above code, what will be the worst case time complexity?.I understand that every function will call other functions atmost one time, but I am not clear with the calculation of the time complexity.

The memoisation is the key here. Normally this would have exponential growth, but because you never perform additional recursive steps if the result is previously computed in memo, then it is reduced to the number of elements in memo as a worst case. i.e. O( 101 x 101 )

Related

How to modulo this formula?

I would like to write this formula in C++ language:
(2<=n<=1e5), (1<=k<=n), (2<=M<=1e9).
I would like to do this without using special structures.
Unfortunately in this formula there are a lot of cases which effectively make modulation difficult. Example: ((n-k)!) mod M can be equal to 0, or ((n-1)(n-2))/4 may not be an integer. I will be very grateful for any help.
(n−1)!/(n−k)! can be handled by computing the product (n−k+1)…(n−1).
(n−1)! (n−1)(n−2)/4 can be handled by handling n ≤ 2 (0) and n ≥ 3
(3…(n−1) (n−1)(n−2)/2) separately.
Untested C++:
#include <cassert>
#include <cstdint>
class Residue {
public:
// Accept int64_t for convenience.
explicit Residue(int64_t rep, int32_t modulus) : modulus_(modulus) {
assert(modulus > 0);
rep_ = rep % modulus;
if (rep_ < 0)
rep_ += modulus;
}
// Return int64_t for convenience.
int64_t rep() const { return rep_; }
int32_t modulus() const { return modulus_; }
private:
int32_t rep_;
int32_t modulus_;
};
Residue operator+(Residue a, Residue b) {
assert(a.modulus() == b.modulus());
return Residue(a.rep() + b.rep(), a.modulus());
}
Residue operator-(Residue a, Residue b) {
assert(a.modulus() == b.modulus());
return Residue(a.rep() - b.rep(), a.modulus());
}
Residue operator*(Residue a, Residue b) {
assert(a.modulus() == b.modulus());
return Residue(a.rep() * b.rep(), a.modulus());
}
Residue QuotientOfFactorialsMod(int32_t a, int32_t b, int32_t modulus) {
assert(modulus > 0);
assert(b >= 0);
assert(a >= b);
Residue result(1, modulus);
// Don't initialize with b + 1 because it could overflow.
for (int32_t i = b; i < a; i++) {
result = result * Residue(i + 1, modulus);
}
return result;
}
Residue FactorialMod(int32_t a, int32_t modulus) {
assert(modulus > 0);
assert(a >= 0);
return QuotientOfFactorialsMod(a, 0, modulus);
}
Residue Triangular(int32_t a, int32_t modulus) {
assert(modulus > 0);
return Residue((static_cast<int64_t>(a) + 1) * a / 2, modulus);
}
Residue F(int32_t n, int32_t k, int32_t m) {
assert(n >= 2);
assert(n <= 100000);
assert(k >= 1);
assert(k <= n);
assert(m >= 2);
assert(m <= 1000000000);
Residue n_res(n, m);
Residue n_minus_1(n - 1, m);
Residue n_minus_2(n - 2, m);
Residue k_res(k, m);
Residue q = QuotientOfFactorialsMod(n - 1, n - k, m);
return q * (k_res - n_res) * n_minus_1 +
(FactorialMod(n - 1, m) - q) * k_res * n_minus_1 +
(n > 2 ? QuotientOfFactorialsMod(n - 1, 2, m) *
(n_res * n_minus_1 + Triangular(n - 2, m))
: Residue(1, m));
}
As mentioned in the other answer dividing factorials can be evaluated directly without division. Also you need 64bit arithmetics in order to store your subresults. And use modulo after each multiplication otherwise you would need very huge numbers which would take forever to compute.
Also you mention ((n-1)(n-2))/4 can be non just integer how to deal with that is questionable as we do not have any context to what you are doing. However you can move /2 before brackets (apply it on (n-1)! so modpi without 2 beware not to divide the already modded factorial!!!) and then you have no remainder as the (n-1)*(n-2)/4 become (n-1)*(n-2)/2 and the (n-1)*(n-2) is always odd (divisible by 2). The only "problem" is when n=2 as the n*(n-1)/2 is 1 but the /2 moved before bracket will round down the (n-1)! so you should handle it as special case by not moving the /2 before brackets (not included in code below).
I see it like this:
typedef unsigned __int64 u64;
u64 modpi(u64 x0,u64 x1,u64 p) // ( x0*(x0+1)*(x0+2)*...*x1 ) mod p
{
u64 x,y;
if (x0>x1){ x=x0; x0=x1; x1=x; }
for (y=1,x=x0;x<=x1;x++){ y*=x; y%=p; }
return y;
}
void main()
{
u64 n=100,k=20,m=123456789,a,b,b2,c,y;
a =modpi(n-k+1,n-1,m); // (n-1)!/(n-k)!
b =modpi(1,n-1,m); // (n-1)! mod m
b2=modpi(3,n-1,m); // (n-1)!/2 mod m
c =((n*(n-1)))%m; // 2*( n*(n-1)/2 + (n-1)*(n-2)/4 ) mod m
c+=(((n-1)*(n-2))/2)%m;
y =(((a*(k-n))%m)*(n-1))%m; // ((n-1)!/(n-k)!)*(k-1)*(n-1) mod m
y+=b; // (n-1)! mod m
y-=(((a*k)%m)*(n-1))%m; // ((n-1)!/(n-k)!)*k*(n-1) mod m
y+=(b2*c)%m; // (n-1)!*( n*(n-1)/2 + (n-1)*(n-2)/4 ) mod m
// here y should hold your answer
}
however be careful older compilers do not have full support of 64 bit integers and can produce wrong results or even does not compile. In such case use big integer lib or compute using 2*32bit variables or look for 32 bit modmul implementation.
The expression implies the use of a floating point type. Therefore, use the function fmod to get the remainder of the division.

Why the function is failing in case of numbers greater than 48 digits?

I am trying to find
(a^b) % mod
where b and mod is upto 10^9, while l can be really large i have tested upto 48 digits with success
using this relation
(a^b) % mod = (a%mod)^b % mod
#define ll long long int
ll powerLL(ll x, ll n,ll MOD)
{
ll result = 1;
while (n) {
if (n & 1)
result = result * x % MOD;
n = n / 2;
x = x * x % MOD;
}
return result;
}
ll powerStrings(string sa, string sb,ll MOD)
{
ll a = 0, b = 0;
for (size_t i = 0; i < sa.length(); i++)
a = (a * 10 + (sa[i] - '0')) % MOD;
for (size_t i = 0; i < sb.length(); i++)
b = (b * 10 + (sb[i] - '0')) % (MOD - 1);
return powerLL(a, b,MOD);
}
powerStrings("5109109785634228366587086207094636370893763284000","362323789",354252525) returns 208624800 but it should return 323419500. In this case a is 49 digits
powerStrings("300510498717329829809207642824818434714870652000","362323489",354255221) returns 282740484 , which is correct. In this case a is 48 digits
Is something wrong with the code or I will have to use other method of doing the same??
It does not work because it is not mathematically correct.
In general, we have that pow(a, n, m) = pow(a, n % λ(m), m) (with a coprime to m) where λ is the Carmichael function. As a special case, when m is a prime number, then λ(m) = m - 1. That situation is also covered by Fermat's little theorem. That's only a special case, it does not always work.
λ(354252525) = 2146980, if I hack that in then the right result comes out. (the base is not actually coprime to the modulus though)
In general you would need to compute the Carmichael function for the modulus, which is non-trivial, but feasible for small moduli.

Minimum cuts on a rectangle to make into squares

I'm trying to solve this problem:
Given an a×b rectangle, your task is to cut it into squares. On each move you can select a rectangle and cut it into two rectangles in such a way that all side lengths remain integers. What is the minimum possible number of moves?
My logic is that the minimum number of cuts means the minimum number of squares; I don't know if it's the correct approach.
I see which side is smaller, Now I know I need to cut bigSide/SmallSide of cuts to have squares of smallSide sides, then I am left with SmallSide and bigSide%smallSide. Then I go on till any side is 0 or both are equal.
#include <iostream>
int main() {
int a, b; std::cin >> a >> b; // sides of the rectangle
int res = 0;
while (a != 0 && b != 0) {
if (a > b) {
if (a % b == 0)
res += a / b - 1;
else
res += a / b;
a = a % b;
} else if (b > a) {
if (b % a == 0)
res += b / a - 1;
else
res += b / a;
b = b % a;
} else {
break;
}
}
std::cout << res;
return 0;
}
When the input is 404 288, my code gives 18, but the right answer is actually 10.
What am I doing wrong?
It seems clear to me that the problem defines each move as cutting a rectangle to two rectangles along the integer lines, and then asks for the minimum number of such cuts. As you can see there is a clear recursive nature in this problem. Once you cut a rectangle to two parts, you can recurse and cut each of them into squares with minimum moves and then sum up the answers. The problem is that the recursion might lead to exponential time complexity which leads us directly do dynamic programming. You have to use memoization to solve it efficiently (worst case time O(a*b*(a+b))) Here is what I'd suggest doing:
#include <iostream>
#include <vector>
using std::vector;
int min_cuts(int a, int b, vector<vector<int> > &mem) {
int min = mem[a][b];
// if already computed, just return the value
if (min > 0)
return min;
// if one side is divisible by the other,
// store min-cuts in 'min'
if (a%b==0)
min= a/b-1;
else if (b%a==0)
min= b/a -1;
// if there's no obvious solution, recurse
else {
// recurse on hight
for (int i=1; i<a/2; i++) {
int m = min_cuts(i,b, mem);
int n = min_cuts(a-i, b, mem);
if (min<0 or m+n+1<min)
min = m + n + 1;
}
// recurse on width
for (int j=1; j<b/2; j++) {
int m = min_cuts(a,j, mem);
int n = min_cuts(a, b-j, mem);
if (min<0 or m+n+1<min)
min = m + n + 1;
}
}
mem[a][b] = min;
return min;
}
int main() {
int a, b; std::cin >> a >> b; // sides of the rectangle
// -1 means the problem is not solved yet,
vector<vector<int> > mem(a+1, vector<int>(b+1, -1));
int res = min_cuts(a,b,mem);
std::cout << res << std::endl;
return 0;
}
The reason the foor loops go up until a/2 and b/2 is that cuting a paper is symmetric: if you cut along vertical line i it is the same as cutting along the line a-i if you flip the paper vertically. This is a little optimization hack that reduces complexity by a factor of 4 overall.
Another little hack is that by knowing that the problem is that if you transpose the paper the result is the same, meaining min_cuts(a,b)=min_cuts(b,a) you can potentially reduce computations by half. But any major further improvement, say a greedy algorithm would take more thinking (if there exists one at all).
The current answer is a good start, especially the suggestions to use memoization or dynamic programming, and potentially efficient enough.
Obviously, all answerers used the first with a sub-par data-structure. Vector-of-Vector has much space and performance overhead, using a (strict) lower triangular matrix stored in an array is much more efficient.
Using the maximum value as sentinel (easier with unsigned) would also reduce complexity.
Finally, let's move to dynamic programming instead of memoization to simplify and get even more efficient:
#include <algorithm>
#include <memory>
#include <utility>
constexpr unsigned min_cuts(unsigned a, unsigned b) {
if (a < b)
std::swap(a, b);
if (a == b || !b)
return 0;
const auto triangle = [](std::size_t n) { return n * (n - 1) / 2; };
const auto p = std::make_unique_for_overwrite<unsigned[]>(triangle(a));
/* const! */ unsigned zero = 0;
const auto f = [&](auto a, auto b) -> auto& {
if (a < b)
std::swap(a, b);
return a == b ? zero : p[triangle(a - 1) + b - 1];
};
for (auto i = 1u; i <= a; ++i) {
for (auto j = 1u; j < i; ++j) {
auto r = -1u;
for (auto k = i / 2; k; --k)
r = std::min(r, f(k, j) + f(i - k, j));
for (auto k = j / 2; k; --k)
r = std::min(r, f(k, i) + f(j - k, i));
f(i, j) = ++r;
}
}
return f(a, b);
}

To find combination value of large numbers

I want to find (n choose r) for large integers, and I also have to find out the mod of that number.
long long int choose(int a,int b)
{
if (b > a)
return (-1);
if(b==0 || a==1 || b==a)
return(1);
else
{
long long int r = ((choose(a-1,b))%10000007+(choose(a-1,b- 1))%10000007)%10000007;
return r;
}
}
I am using this piece of code, but I am getting TLE. If there is some other method to do that please tell me.
I don't have the reputation to comment yet, but I wanted to point out that the answer by rock321987 works pretty well:
It is fast and correct up to and including C(62, 31)
but cannot handle all inputs that have an output that fits in a uint64_t. As proof, try:
C(67, 33) = 14,226,520,737,620,288,370 (verify correctness and size)
Unfortunately, the other implementation spits out 8,829,174,638,479,413 which is incorrect. There are other ways to calculate nCr which won't break like this, however the real problem here is that there is no attempt to take advantage of the modulus.
Notice that p = 10000007 is prime, which allows us to leverage the fact that all integers have an inverse mod p, and that inverse is unique. Furthermore, we can find that inverse quite quickly. Another question has an answer on how to do that here, which I've replicated below.
This is handy since:
x/y mod p == x*(y inverse) mod p; and
xy mod p == (x mod p)(y mod p)
Modifying the other code a bit, and generalizing the problem we have the following:
#include <iostream>
#include <assert.h>
// p MUST be prime and less than 2^63
uint64_t inverseModp(uint64_t a, uint64_t p) {
assert(p < (1ull << 63));
assert(a < p);
assert(a != 0);
uint64_t ex = p-2, result = 1;
while (ex > 0) {
if (ex % 2 == 1) {
result = (result*a) % p;
}
a = (a*a) % p;
ex /= 2;
}
return result;
}
// p MUST be prime
uint32_t nCrModp(uint32_t n, uint32_t r, uint32_t p)
{
assert(r <= n);
if (r > n-r) r = n-r;
if (r == 0) return 1;
if(n/p - (n-r)/p > r/p) return 0;
uint64_t result = 1; //intermediary results may overflow 32 bits
for (uint32_t i = n, x = 1; i > r; --i, ++x) {
if( i % p != 0) {
result *= i % p;
result %= p;
}
if( x % p != 0) {
result *= inverseModp(x % p, p);
result %= p;
}
}
return result;
}
int main() {
uint32_t smallPrime = 17;
uint32_t medNum = 3001;
uint32_t halfMedNum = medNum >> 1;
std::cout << nCrModp(medNum, halfMedNum, smallPrime) << std::endl;
uint32_t bigPrime = 4294967291ul; // 2^32-5 is largest prime < 2^32
uint32_t bigNum = 1ul << 24;
uint32_t halfBigNum = bigNum >> 1;
std::cout << nCrModp(bigNum, halfBigNum, bigPrime) << std::endl;
}
Which should produce results for any set of 32-bit inputs if you are willing to wait. To prove a point, I've included the calculation for a 24-bit n, and the maximum 32-bit prime. My modest PC took ~13 seconds to calculate this. Check the answer against wolfram alpha, but beware that it may exceed the 'standard computation time' there.
There is still room for improvement if p is much smaller than (n-r) where r <= n-r. For example, we could precalculate all the inverses mod p instead of doing it on demand several times over.
nCr = n! / (r! * (n-r)!) {! = factorial}
now choose r or n - r in such a way that any of them is minimum
#include <cstdio>
#include <cmath>
#define MOD 10000007
int main()
{
int n, r, i, x = 1;
long long int res = 1;
scanf("%d%d", &n, &r);
int mini = fmin(r, (n - r));//minimum of r,n-r
for (i = n;i > mini;i--) {
res = (res * i) / x;
x++;
}
printf("%lld\n", res % MOD);
return 0;
}
it will work for most cases as required by programming competitions if the value of n and r are not too high
Time complexity :- O(min(r, n - r))
Limitation :- for languages like C/C++ etc. there will be overflow if
n > 60 (approximately)
as no datatype can store the final value..
The expansion of nCr can always be reduced to product of integers. This is done by canceling out terms in denominator. This approach is applied in the function given below.
This function has time complexity of O(n^2 * log(n)). This will calculate nCr % m for n<=10000 under 1 sec.
#include <numeric>
#include <algorithm>
int M=1e7+7;
int ncr(int n, int r)
{
r=min(r,n-r);
int A[r],i,j,B[r];
iota(A,A+r,n-r+1); //initializing A starting from n-r+1 to n
iota(B,B+r,1); //initializing B starting from 1 to r
int g;
for(i=0;i<r;i++)
for(j=0;j<r;j++)
{
if(B[i]==1)
break;
g=__gcd(B[i], A[j] );
A[j]/=g;
B[i]/=g;
}
long long ans=1;
for(i=0;i<r;i++)
ans=(ans*A[i])%M;
return ans;
}

Calculating the Amount of Combinations

Cheers,
I know you can get the amount of combinations with the following formula (without repetition and order is not important):
// Choose r from n
n! / r!(n - r)!
However, I don't know how to implement this in C++, since for instance with
n = 52
n! = 8,0658175170943878571660636856404e+67
the number gets way too big even for unsigned __int64 (or unsigned long long). Is there some workaround to implement the formula without any third-party "bigint" -libraries?
Here's an ancient algorithm which is exact and doesn't overflow unless the result is to big for a long long
unsigned long long
choose(unsigned long long n, unsigned long long k) {
if (k > n) {
return 0;
}
unsigned long long r = 1;
for (unsigned long long d = 1; d <= k; ++d) {
r *= n--;
r /= d;
}
return r;
}
This algorithm is also in Knuth's "The Art of Computer Programming, 3rd Edition, Volume 2: Seminumerical Algorithms" I think.
UPDATE: There's a small possibility that the algorithm will overflow on the line:
r *= n--;
for very large n. A naive upper bound is sqrt(std::numeric_limits<long long>::max()) which means an n less than rougly 4,000,000,000.
From Andreas' answer:
Here's an ancient algorithm which is exact and doesn't overflow unless the result is to big for a long long
unsigned long long
choose(unsigned long long n, unsigned long long k) {
if (k > n) {
return 0;
}
unsigned long long r = 1;
for (unsigned long long d = 1; d <= k; ++d) {
r *= n--;
r /= d;
}
return r;
}
This algorithm is also in Knuth's "The Art of Computer Programming, 3rd Edition, Volume 2: Seminumerical Algorithms" I think.
UPDATE: There's a small possibility that the algorithm will overflow on the line:
r *= n--;
for very large n. A naive upper bound is sqrt(std::numeric_limits<long long>::max()) which means an n less than rougly 4,000,000,000.
Consider n == 67 and k == 33. The above algorithm overflows with a 64 bit unsigned long long. And yet the correct answer is representable in 64 bits: 14,226,520,737,620,288,370. And the above algorithm is silent about its overflow, choose(67, 33) returns:
8,829,174,638,479,413
A believable but incorrect answer.
However the above algorithm can be slightly modified to never overflow as long as the final answer is representable.
The trick is in recognizing that at each iteration, the division r/d is exact. Temporarily rewriting:
r = r * n / d;
--n;
For this to be exact, it means if you expanded r, n and d into their prime factorizations, then one could easily cancel out d, and be left with a modified value for n, call it t, and then the computation of r is simply:
// compute t from r, n and d
r = r * t;
--n;
A fast and easy way to do this is to find the greatest common divisor of r and d, call it g:
unsigned long long g = gcd(r, d);
// now one can divide both r and d by g without truncation
r /= g;
unsigned long long d_temp = d / g;
--n;
Now we can do the same thing with d_temp and n (find the greatest common divisor). However since we know a-priori that r * n / d is exact, then we also know that gcd(d_temp, n) == d_temp, and therefore we don't need to compute it. So we can divide n by d_temp:
unsigned long long g = gcd(r, d);
// now one can divide both r and d by g without truncation
r /= g;
unsigned long long d_temp = d / g;
// now one can divide n by d/g without truncation
unsigned long long t = n / d_temp;
r = r * t;
--n;
Cleaning up:
unsigned long long
gcd(unsigned long long x, unsigned long long y)
{
while (y != 0)
{
unsigned long long t = x % y;
x = y;
y = t;
}
return x;
}
unsigned long long
choose(unsigned long long n, unsigned long long k)
{
if (k > n)
throw std::invalid_argument("invalid argument in choose");
unsigned long long r = 1;
for (unsigned long long d = 1; d <= k; ++d, --n)
{
unsigned long long g = gcd(r, d);
r /= g;
unsigned long long t = n / (d / g);
if (r > std::numeric_limits<unsigned long long>::max() / t)
throw std::overflow_error("overflow in choose");
r *= t;
}
return r;
}
Now you can compute choose(67, 33) without overflow. And if you try choose(68, 33), you'll get an exception instead of a wrong answer.
The following routine will compute the n-choose-k, using the recursive definition and memoization. The routine is extremely fast and accurate:
inline unsigned long long n_choose_k(const unsigned long long& n,
const unsigned long long& k)
{
if (n < k) return 0;
if (0 == n) return 0;
if (0 == k) return 1;
if (n == k) return 1;
if (1 == k) return n;
typedef unsigned long long value_type;
value_type* table = new value_type[static_cast<std::size_t>(n * n)];
std::fill_n(table,n * n,0);
class n_choose_k_impl
{
public:
n_choose_k_impl(value_type* table,const value_type& dimension)
: table_(table),
dimension_(dimension)
{}
inline value_type& lookup(const value_type& n, const value_type& k)
{
return table_[dimension_ * n + k];
}
inline value_type compute(const value_type& n, const value_type& k)
{
if ((0 == k) || (k == n))
return 1;
value_type v1 = lookup(n - 1,k - 1);
if (0 == v1)
v1 = lookup(n - 1,k - 1) = compute(n - 1,k - 1);
value_type v2 = lookup(n - 1,k);
if (0 == v2)
v2 = lookup(n - 1,k) = compute(n - 1,k);
return v1 + v2;
}
value_type* table_;
value_type dimension_;
};
value_type result = n_choose_k_impl(table,n).compute(n,k);
delete [] table;
return result;
}
Remember that
n! / ( n - r )! = n * ( n - 1) * .. * (n - r + 1 )
so it's way smaller than n!. So the solution is to evaluate n* ( n - 1 ) * ... * ( n - r + 1) instead of first calculating n! and then dividing it .
Of course it all depends on the relative magnitude of n and r - if r is relatively big compared to n, then it still won't fit.
Well, I have to answer to my own question. I was reading about Pascal's triangle and by accident noticed that we can calculate the amount of combinations with it:
#include <iostream>
#include <boost/cstdint.hpp>
boost::uint64_t Combinations(unsigned int n, unsigned int r)
{
if (r > n)
return 0;
/** We can use Pascal's triange to determine the amount
* of combinations. To calculate a single line:
*
* v(r) = (n - r) / r
*
* Since the triangle is symmetrical, we only need to calculate
* until r -column.
*/
boost::uint64_t v = n--;
for (unsigned int i = 2; i < r + 1; ++i, --n)
v = v * n / i;
return v;
}
int main()
{
std::cout << Combinations(52, 5) << std::endl;
}
Getting the prime factorization of the binomial coefficient is probably the most efficient way to calculate it, especially if multiplication is expensive. This is certainly true of the related problem of calculating factorial (see Click here for example).
Here is a simple algorithm based on the Sieve of Eratosthenes that calculates the prime factorization. The idea is basically to go through the primes as you find them using the sieve, but then also to calculate how many of their multiples fall in the ranges [1, k] and [n-k+1,n]. The Sieve is essentially an O(n \log \log n) algorithm, but there is no multiplication done. The actual number of multiplications necessary once the prime factorization is found is at worst O\left(\frac{n \log \log n}{\log n}\right) and there are probably faster ways than that.
prime_factors = []
n = 20
k = 10
composite = [True] * 2 + [False] * n
for p in xrange(n + 1):
if composite[p]:
continue
q = p
m = 1
total_prime_power = 0
prime_power = [0] * (n + 1)
while True:
prime_power[q] = prime_power[m] + 1
r = q
if q <= k:
total_prime_power -= prime_power[q]
if q > n - k:
total_prime_power += prime_power[q]
m += 1
q += p
if q > n:
break
composite[q] = True
prime_factors.append([p, total_prime_power])
print prime_factors
Using a dirty trick with a long double, it is possible to get the same accuracy as Howard Hinnant (and probably more):
unsigned long long n_choose_k(int n, int k)
{
long double f = n;
for (int i = 1; i<k+1; i++)
f /= i;
for (int i=1; i<k; i++)
f *= n - i;
unsigned long long f_2 = std::round(f);
return f_2;
}
The idea is to divide first by k! and then to multiply by n(n-1)...(n-k+1). The approximation through the double can be avoided by inverting the order of the for loop.
Improves Howard Hinnant's answer (in this question) a little bit:
Calling gcd() per loop seems a bit slow.
We could aggregate the gcd() call into the last one, while making the most use of the standard algorithm from Knuth's book "The Art of Computer Programming, 3rd Edition, Volume 2: Seminumerical Algorithms":
const uint64_t u64max = std::numeric_limits<uint64_t>::max();
uint64_t choose(uint64_t n, uint64_t k)
{
if (k > n)
throw std::invalid_argument(std::string("invalid argument in ") + __func__);
if (k > n - k)
k = n - k;
uint64_t r = 1;
uint64_t d;
for (d = 1; d <= k; ++d) {
if (r > u64max / n)
break;
r *= n--;
r /= d;
}
if (d > k)
return r;
// Let N be the original n,
// n is the current n (when we reach here)
// We want to calculate C(N,k),
// Currently we already calculated the r value so far:
// r = C(N, n) = C(N, N-n) = C(N, d-1)
// Note that N-n = d-1
// In addition we know the following identity formula:
// C(N,k) = C(N,d-1) * C(N-d+1, k-d+1) / C(k, k-d+1)
// = C(N,d-1) * C(n, k-d+1) / C(k, k-d+1)
// Using this formula, we effectively reduce the calculation,
// while recursively use the same function.
uint64_t b = choose(n, k-d+1);
if (b == u64max) {
return u64max; // overflow
}
uint64_t c = choose(k, k-d+1);
if (c == u64max) {
return u64max; // overflow
}
// Now, the combinatorial should be r * b / c
// We can use gcd() to calculate this:
// We Pick b for gcd: b < r almost (if not always) in all cases
uint64_t g = gcd(b, c);
b /= g;
c /= g;
r /= c;
if (r > u64max / b)
return u64max; // overflow
return r * b;
}
Note that the recursive depth is normally 2 (I don't really see a case goes to 3, the combinatorial reducing is quite decent.), i.e. calling choose() for 3 times, for non-overflow cases.
Replace uint64_t with unsigned long long if you prefer it.
One of SHORTEST way :
int nChoosek(int n, int k){
if (k > n) return 0;
if (k == 0) return 1;
return nChoosek(n - 1, k) + nChoosek(n - 1, k - 1);
}
If you want to be 100% sure that no overflows occur so long as the final result is within the numeric limit, you can sum up Pascal's Triangle row-by-row:
for (int i=0; i<n; i++) {
for (int j=0; j<=i; j++) {
if (j == 0) current_row[j] = 1;
else current_row[j] = prev_row[j] + prev_row[j-1];
}
prev_row = current_row; // assume they are vectors
}
// result is now in current_row[r-1]
However, this algorithm is much slower than the multiplication one. So perhaps you could use multiplication to generate all the cases you know that are 'safe' and then use addition from there. (.. or you could just use a BigInt library).