I have implemented karatsuba multiplication algorithm in C++. I want to optimize it so that I can multiply two 64 digits numbers. Can someone help, please? Thank you :)
int Karatsuba::count(long long int a)
{
ostringstream str1;
str1 << a;
string s = str1.str();
return s.length();
}
long long int Karatsuba::multiply(long long int x, long long int y)
{
if(x / 10 == 0 || y / 10 == 0)
return x*y;
else
{
int n = count(x);
long long int n2 = (long)pow(10, n/2);
long long int a = x / n2;
long long int b = x % n2;
long long int c = y / n2;
long long int d = y % n2;
long long int ac = multiply(a,c);
long long int bd = multiply(b,d);
long long int step3 = multiply((a+b), (c+d));
return (pow(10, n)*ac) + (n2*(step3 - ac - bd)) + bd;
}
}
I need to implement a couple versions of SHA in C++, mostly from the ground up, for a summer camp. Here's the docs for the algorithm.
I've gotten SHA-1, SHA-224, and SHA-256 working perfectly, but I haven't managed to get 512 or its derivatives right. I'm supposed to be getting ddaf35a193617aba cc417349ae204131 12e6fa4e89a97ea2 0a9eeee64b55d39a 2192992a274fc1a8 36ba3c23a3feebbd 454d4423643ce80e 2a9ac94fa54ca49f, but my program gives me 21fb47208172306 4570d403444f23d 3fcab6a24097aaf4 7920558b5eea0ae8 7cfc6ce26543e3a6 8ba9c07d1b89d02 1e27ad9d5487df13 2e4a745e0e4df60.
main.cpp:
#include "main.h"
#include <iostream>
#include <iomanip>
#include <sstream>
int main(int argc, const char * argv[]) {
std::string msg = "abc";
std::string hashedMsg = hash(msg);
std::cout << "Hash:" << std::endl << hashedMsg;
std::cin.get();
}
std::string hash(std::string msg)
{
//Preprocessing
const unsigned char eighty = 0x80;
unsigned long long msgLength = msg.length() * 8;
unsigned long long msgBitSize = _byteswap_uint64(msgLength);
unsigned long long k = 1024 - ((msgLength + 64 + 1) % 1024);
unsigned long long finalSize = msgLength + 1 + k + 64;
unsigned long long hashValues[] = {
0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179
};
std::vector<unsigned long long> words(finalSize / 64);
std::memcpy(words.data(), msg.c_str(), msg.length());
std::memcpy((unsigned char *)words.data() + msg.length(), &eighty, 1);
std::memcpy((unsigned char *)words.data() + msg.length()+1+((k-7)/8), &msgBitSize, 8);
unsigned char* byte = (unsigned char*)words.data();
for (int i = 0; i < words.size() * 8; i++) {
std::bitset<8> b(byte[i]);
std::cout << b << " " << std::hex << std::setfill('0') << std::setw(2) << (int)byte[i] << std::endl;
}
// Processing
unsigned long long workingValues[8];
unsigned long long a;
unsigned long long b;
unsigned long long c;
unsigned long long d;
unsigned long long e;
unsigned long long f;
unsigned long long g;
unsigned long long h;
unsigned long long temp1;
unsigned long long temp2;
for (int chunk = 0; chunk < words.size(); chunk += 16) {
std::vector<unsigned long long> schedule(80);
for (int i = 0; i < 16; i++) {
schedule[i] = _byteswap_uint64(words[chunk + i]);
}
for (int i = 16; i < 80; i++) {
schedule[i] = (s1(schedule[i - 2]) + schedule[i - 7] + s0(schedule[i - 15]) + schedule[i - 16]) % (long long)(pow(2, 64));
}
for (int i = 0; i < 8; i++) {
workingValues[i] = hashValues[i];
}
a = workingValues[0];
b = workingValues[1];
c = workingValues[2];
d = workingValues[3];
e = workingValues[4];
f = workingValues[5];
g = workingValues[6];
h = workingValues[7];
for (int t = 0; t < 80; t++) {
temp1 = (h + S1(e) + ch(e, f, g) + sha::words[t] + schedule[t]) % (long long)(pow(2, 64));
temp2 = (S0(a) + maj(a, b, c)) % (long long)(pow(2, 64));
h = g;
g = f;
f = e;
e = (d + temp1) % (long long)(pow(2, 64));
d = c;
c = b;
b = a;
a = (temp1 + temp2) % (long long)(pow(2, 64));
}
hashValues[0] = (hashValues[0] + a) % (long long)(pow(2, 64));
hashValues[1] = (hashValues[1] + b) % (long long)(pow(2, 64));
hashValues[2] = (hashValues[2] + c) % (long long)(pow(2, 64));
hashValues[3] = (hashValues[3] + d) % (long long)(pow(2, 64));
hashValues[4] = (hashValues[4] + e) % (long long)(pow(2, 64));
hashValues[5] = (hashValues[5] + f) % (long long)(pow(2, 64));
hashValues[6] = (hashValues[6] + g) % (long long)(pow(2, 64));
hashValues[7] = (hashValues[7] + h) % (long long)(pow(2, 64));
}
// Return final message
std::stringstream ss;
for (int i = 0; i < 8; i++) {
ss << std::hex << hashValues[i];
}
return ss.str();
}
main.h:
#pragma once
#include<string>
#include<vector>
#include<bitset>
namespace sha {
const unsigned long long words[] = {
0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694,
0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4,
0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70,
0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b,
0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30,
0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8,
0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec,
0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b,
0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b,
0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
};
}
std::string hash(std::string msg);
unsigned long long rotr(unsigned long long x, unsigned int n) {
return (x >> n) | (x << (32 - n));
}
unsigned long long ch(unsigned long long e, unsigned long long f, unsigned long long g) {
return (e & f) ^ ((~e) & g);
}
unsigned long long maj(unsigned long long a, unsigned long long b, unsigned long long c) {
return (a & b) ^ (a & c) ^ (b & c);
}
unsigned long long s0(unsigned long long x) {
return rotr(x, 1) ^ rotr(x, 8) ^ (x >> 7);
}
unsigned long long s1(unsigned long long x) {
return rotr(x, 19) ^ rotr(x, 61) ^ (x >> 6);
}
unsigned long long S0(unsigned long long x) {
return rotr(x, 28) ^ rotr(x, 34) ^ rotr(x, 39);
}
unsigned long long S1(unsigned long long x) {
return rotr(x, 14) ^ rotr(x, 18) ^ rotr(x, 41);
}
If it helps, I'm using Visual Studio 2015. I've tried using the built-in 64-bit cl.exe compiler, and while it does give me a different hash, it's still not the target one. What am I doing wrong here?
There are two bugs in this implementation:
In rotr, you should have 64 - n instead of 32 - n
You should remove all % (long long)(pow(2, 64));. If you used uint64_t instead of unsigned long long, these weren't needed. And they cause bug here, because long long is usually 64-bit, and it cannot store the number 2^64. Using uint64_t "automatically" uses modulo 2^64.
And it is questionable to put all these function definitions and data into the header file. You should use inline for function definitions, at least. But you'd better move them into the .cpp.
Problem statement : Given two integers n and m, output Fn mod m (that is, the remainder of Fn when divided by m).
Input Format. The input consists of two integers n and m given on the same line (separated by a space).
Constraints. 1 ≤ n ≤ 10^18, 2 ≤ m ≤ 10^5
Output Format. Output Fn mod m.
I tried the following program and it didn't work. The method pi is returning the right Pisano period though for any number as per http://webspace.ship.edu/msrenault/fibonacci/fiblist.htm
#include <iostream>
long long pi(long long m) {
long long result = 2;
for (long long fn2 = 1, fn1 = 2 % m, fn = 3 % m;
fn1 != 1 || fn != 1;
fn2 = fn1, fn1 = fn, fn = (fn1 + fn2) % m
) {
result++;
}
return result;
}
long long get_fibonaccihuge(long long n, long long m) {
long long periodlength = pi(m);
int patternRemainder = n % periodlength;
long long *sum = new long long[patternRemainder];
sum[0] = 0;
sum[1] = 1;
for (int i = 2; i <= patternRemainder; ++i)
{
sum[i] = sum[i - 1] + sum[i - 2];
}
return sum[patternRemainder] % m;
}
int main() {
long long n, m;
std::cin >> n >> m;
std::cout << get_fibonaccihuge(n, m) << '\n';
}
The exact program/logic is working well in python as expected. What's wrong withthis cpp program ? Is it the data types ?
Performing 10^18 additions isn't going to be very practical. Even on a teraflop computer, 10^6 seconds is still 277 hours.
But 10^18 ~= 2^59.8 so there'll be up to 60 halving steps.
Calculate (a,b) --> (a^2 + b^2, 2ab + b^2) to go from (n-1,n)th to (2n-1,2n)th consecutive Fibonacci number pairs in one step.
At each step perform the modulus calculation for each operation. You'll need to accommodate integers up to 3*1010 ≤ 235 in magnitude (i.e. up to 35 bits).
(cf. a related older answer of mine).
This was my solution for this problem, it works well and succeeded in the submission test ...
i used a simpler way to get the pisoano period ( pisano period is the main tricky part in this problem ) ... i wish to be helpful
#include <iostream>
using namespace std;
unsigned long long get_fibonacci_huge_naive(unsigned long long n, unsigned long long m)
{
if (n <= 1)
return n;
unsigned long long previous = 0;
unsigned long long current = 1;
for (unsigned long long i = 0; i < n - 1; ++i)
{
unsigned long long tmp_previous = previous;
previous = current;
current = tmp_previous + current;
}
return current % m;
}
long long get_pisano_period(long long m)
{
long long a = 0, b = 1, c = a + b;
for (int i = 0; i < m * m; i++)
{
c = (a + b) % m;
a = b;
b = c;
if (a == 0 && b == 1)
{
return i + 1;
}
}
}
unsigned long long get_fibonacci_huge_faster(unsigned long long n, unsigned long long m)
{
n = n % get_pisano_period(m);
unsigned long long F[n + 1] = {};
F[0] = 0;
F[-1] = 1;
for (int i = 1; i <= n; i++)
{
F[i] = F[i - 1] + F[i - 2];
F[i] = F[i] % m;
}
return F[n];
}
int main()
{
unsigned long long n, m;
std::cin >> n >> m;
std::cout << get_fibonacci_huge_faster(n, m) << '\n';
}
The simple solution to problem 1 is
static unsigned int solutionInefficient(unsigned int n){
unsigned int sum = 0;
for (unsigned int i = 0; i < n; i++){
if (i % 3 == 0 || i % 5 == 0) {
sum += i;
}
}
return sum;
}
I decided to try a different test case with n = 2147483647 and the final result was computed in 12 seconds. So, I came up with another solution that gave me the same answer and took 2 seconds:
static unsigned int solutionEfficient(unsigned int n){
unsigned int sum = 0;
unsigned int sum3 = 0;
unsigned int sum5 = 0;
unsigned int sum15 = 0;
for (unsigned int i = 3; i < n; i += 3){
sum3 += i;
}
for (unsigned int i = 5; i < n; i += 5){
sum5 += i;
}
for (unsigned int i = 15; i < n; i += 15){
sum15 += i;
}
return sum3 + sum5 - sum15;
}
My last attempt at making a faster implementation involved some google searches and using the arithmetic summation formula and the final piece of code looked like this:
static unsigned int solutionSuperEfficient(unsigned int n){
n = n - 1;
unsigned int t3 = n / (unsigned int)3,
t5 = n / (unsigned int)5,
t15 = n / (unsigned int)15;
unsigned int res_3 = 3 * (t3 * (t3 + 1)) *0.5;
unsigned int res_5 = 5 * (t5 * (t5 + 1)) *0.5;
unsigned int res_15 = 15 * (t15 * (t15 + 1)) *0.5;
return res_3 + res_5 - res_15;
}
however this did not provide the correct answer for this test case. It did provide the correct answer for n = 1000. I am not sure why it failed for my test case, any ideas?
You have two problems in your super efficient solution:
You are using floating point number 0.5 instead of dividing by 2. This will cause rounding errors. Note that it is guaranteed that x * (x + 1) is even so you can safely divide by two.
Integer overflow. The calculation t3 * (t3 + 1) and the similar products will overflow unsigned int. To avoid this, use unsigned long long instead.
Here is the corrected code:
static unsigned int solutionSuperEfficient(unsigned int n){
n = n - 1;
unsigned long long t3 = n / 3,
t5 = n / 5,
t15 = n / 15;
unsigned long long res_3 = 3ULL * ((t3 * (t3 + 1)) / 2ULL);
unsigned long long res_5 = 5ULL * ((t5 * (t5 + 1)) / 2ULL);
unsigned long long res_15 = 15LL * ((t15 * (t15 + 1)) / 2ULL);
return (unsigned int)(res_3 + res_5 - res_15);
}
In fact you don't need t3, t5 and t15 to be unsigned long long as those values would never overflow unsigned int.
I know that pow(base, power) is a built-in function in C with complexity O(power). Can I reduce the complexity of it by dynamic programming?
You can calculate it in O(logn)
int power(int x, unsigned int y)
{
int temp;
if( y == 0)
return 1;
temp = power(x, y/2);
if (y%2 == 0)
return temp*temp;
else
return x*temp*temp;
}
Details in Here
If your input arguments are non-negative integers, then you can implement your own pow.
Iteratively, with running time = O(n):
unsigned long long pow(unsigned long long x,unsigned int n)
{
unsigned long long res = 1;
while (n--)
res *= x;
return res;
}
Recursively, with running time = O(n):
unsigned long long pow(unsigned long long x,unsigned int n)
{
if (n == 0)
return 1;
if (n == 1)
return x;
return pow(x,n/2)*pow(x,n-n/2);
}
Efficiently, with running time = O(log(n)):
unsigned long long pow(unsigned long long x,unsigned int n)
{
unsigned long long res = 1;
while (n > 0)
{
if (n & 1)
res *= x;
n >>= 1;
x *= x;
}
return res;
}