I wrote C++ codes that calculates the determinant of matrix using the recursive method.
Now, I would like to rewrite that recursive method using an iterative method. But I cannot figure out how to accomplish that.
The question is: How to rewrite the specific recursive method (int Determinant(int *&, const int)) into an iterative method?
Here are my C++ codes:
// Determinant C++
#include <iostream>
#include <ctime>
using namespace std;
void RandInitArray(int *, const int, int = 0, int = 20);
void Display(int *, const int);
int CalculateDeterminant(int *&, const int);
int Determinant(int *&, const int);
int main()
{
start = clock();
srand((unsigned int)time(NULL));
int N = 12; // Size of matrix
int * S = new int[N * N];
int a(-10), b(10), det;
RandInitArray(S, N, a, b);
cout.precision(4);
Display(S, N);
det = CalculateDeterminant(S, N);
cout << "\nDeterminant = " << det << "\n\n";
cin.get();
return 0;
}
void RandInitArray(int * arr, const int N, int a, int b)
{
for (int i = 0; i < N * N; i++)
arr[i] = rand() % (b - a + 1) + a;
}
void Display(int *arr, const int N)
{
for (int i = 0; i < N * N; i++)
cout << arr[i] << ((i + 1) % N ? "\t" : "\n");
}
int CalculateDeterminant(int *& S, const int N)
{
int rez;
if (N < 1)
cout << "Size of matrix must be positive\n";
else if (N == 1)
rez = *S;
else if (N == 2)
rez = S[0] * S[3] - S[1] * S[2];
else if (N == 3)
rez = S[0] * S[4] * S[8] + S[1] * S[5] * S[6] + S[2] * S[3] * S[7] -
S[2] * S[4] * S[6] - S[1] * S[3] * S[8] - S[0] * S[5] * S[7];
else
rez = Determinant(S, N);
return rez;
}
int Determinant(int *& S, const int N)
{
int sign(1), det(0), res, M(N - 1);
int * _S;
for (int k = 0; k < N; k++)
{
_S = new int[M * M];
int ind = 0;
for (int i = N; i < N * N; i++)
{
if (i % N != k)
_S[ind++] = S[i];
}
if (M == 3)
{
res = S[k] == 0 ? 0 : _S[0] * _S[4] * _S[8] + _S[1] * _S[5] * _S[6] + _S[2] * _S[3] * _S[7] -
_S[2] * _S[4] * _S[6] - _S[1] * _S[3] * _S[8] - _S[0] * _S[5] * _S[7];
delete[] _S;
}
else
res = S[k] == 0 ? 0 : Determinant(_S, N - 1);
det += S[k] * sign * res;
sign *= -1;
}
delete[] S;
return det;
}
I know this is not the smartest way to calculate determinant of a large matrix. I can simplify matrix using matrix transformations and drastically speed up calculations.
Despite that any help or hint would be greatly appreciated.
You should sum up a total of N! product terms, each of which is a unique product N elements none of which reside on same row nor colomn: Sorted in row order, colomn indices are permutations of range [1··N), with odd permutations negated. You can use std::next_permutation to calculate permutation and invert sign per iteration.
Related
I'm creating a Matrix math library with CUDA to improve my CNNs performance (and to understand C++ better).
I would like to be able to add error handling and tell the user (me) what has gone wrong when using the matrix class.
This can be seen in my main file as, in this case, I'm trying to add a 10 * 10 matrix to a 15 * 15 matrix. This is an impossible action and would like some output to tell the user. for example
Error in file "Main.cu" on line: 9 (Dimensions inconsistent)
If you check inside the function the line number is line number of the check and I've looked at using macros to check but I'm wondering if there is another way without having to call the macro every time I add two matrices together.
Main.cu
#include "Matrix.cuh"
int main() {
double* init;
cudaMallocManaged(&init, sizeof(double));
Matrix A(10, 10, 2);
Matrix B(15, 15, 3);
Matrix C = A + B;
A.printM("A");
B.printM("B");
C.printM("C");
//cudaFree(init);
return 0;
}
Matrix.cu
#include "Matrix.cuh"
__global__
void sumMatrix(Matrix* A, Matrix* B, Matrix* C)
{
int x = blockIdx.x * BLOCK_SIZE + threadIdx.x;
int y = blockIdx.y * BLOCK_SIZE + threadIdx.y;
if (x < A->ColumnCount && y < A->RowCount)
{
C->VALUES[y * A->ColumnCount + x] = A->VALUES[y * A->ColumnCount + x] + B->VALUES[y * A->ColumnCount + x];
}
}
__global__
void matrixInit(Row* rows, int R, int C, double* VALUES, double val) {
int x = blockIdx.x * BLOCK_SIZE + threadIdx.x;
int y = blockIdx.y * BLOCK_SIZE + threadIdx.y;
if (x < C && y < R)
{
if (x == 0)
{
rows[y].Count = C;
rows[y].values = VALUES + C * y;
}
VALUES[y * C + x] = val;
}
}
Matrix::Matrix(int R, int C, double val)
{
cudaMallocManaged(&VALUES, R * C * sizeof(double));
cudaMallocManaged(&rows, R * sizeof(Row));
RowCount = R;
ColumnCount = C;
dim3 gridDim(ceil(C / (double)BLOCK_SIZE), ceil(R / (double)BLOCK_SIZE), 1);
dim3 blockDim(BLOCK_SIZE, BLOCK_SIZE, 1);
matrixInit << <gridDim, blockDim >> > (rows, R, C, VALUES, val);
cudaDeviceSynchronize();
cudaCheckErrors("MATRIX INIT VAL");
}
Matrix::Matrix(int R, int C)
{
cudaMallocManaged(&VALUES, R * C * sizeof(double));
cudaMallocManaged(&rows, R * sizeof(Row));
RowCount = R;
ColumnCount = C;
dim3 gridDim(ceil(C / (double)BLOCK_SIZE), ceil(R / (double)BLOCK_SIZE), 1);
dim3 blockDim(BLOCK_SIZE, BLOCK_SIZE, 1);
matrixInit << <gridDim, blockDim >> > (rows, R, C, VALUES, 0);
cudaDeviceSynchronize();
cudaCheckErrors("MATRIX INIT VAL");
}
void Matrix::updatePointers()
{
for (size_t i = 0; i < RowCount; i++)
{
rows[i].values = VALUES + (i * ColumnCount);
}
}
void Matrix::removePointers()
{
VALUES = nullptr;
rows = nullptr;
}
void Matrix::printM(const char* msg)
{
std::cout << "Matrix " << msg << ": " << RowCount << "*" << ColumnCount << std::endl;
for (size_t i = 0; i < RowCount; i++)
{
for (size_t j = 0; j < ColumnCount; j++)
{
std::cout << rows[i][j] << " ";
}
std::cout << std::endl;
}
}
Matrix Matrix::sum(Matrix B)
{
Matrix* A_p, * B_p, * C_p;
Matrix C(RowCount, ColumnCount);
cudaMallocManaged(&A_p, sizeof(Matrix));
cudaMallocManaged(&B_p, sizeof(Matrix));
cudaMallocManaged(&C_p, sizeof(Matrix));
memcpy(A_p, this, sizeof(Matrix));
memcpy(B_p, &B, sizeof(Matrix));
memcpy(C_p, &C, sizeof(Matrix));
dim3 gridDim(ceil(ColumnCount / (double)BLOCK_SIZE), ceil(RowCount / (double)BLOCK_SIZE), 1);
dim3 blockDim(BLOCK_SIZE, BLOCK_SIZE, 1);
sumMatrix << < gridDim, blockDim >> > (A_p, B_p, C_p);
cudaDeviceSynchronize();
cudaCheckErrors("SUM");
B.removePointers();
C.removePointers();
return *C_p;
}
Row& Matrix::operator[](size_t i)
{
if (i >= RowCount)
{
std::cout << "OUT OF BOUNDS";
std::exit(1);
}
return rows[i];
}
Matrix& Matrix::operator+(Matrix B)
{
Matrix C = sum(B);
Matrix* C_p;
cudaMallocManaged(&C_p, sizeof(Matrix));
memcpy(C_p, &C, sizeof(Matrix));
B.removePointers();
C.removePointers();
return *C_p;
}
Matrix::~Matrix()
{
if (VALUES != nullptr && rows != nullptr)
{
cudaFree(VALUES);
cudaFree(rows);
}
}
I've read many blogs and examples of code but I'm trying to implement the Karatsuba multiplication through a way that is currently not logically working. Only single digit number multiplications are working, but any digits longer than 1 are displaying completely wrong answers.
It should be able to take in long numbers of input, but I'm not allowed to use long int storage type.
Also, it's meant to be able to multiply numbers of different base (1-10) however I'm unsure on how to do that so initially I'm just attempting base 10.
This is my code so far:
#include <iostream>
#include <string>
#include <algorithm>
#include <cmath>
#include <sstream>
int compareSize(string integer1, string integer2)
{
int length = 0;
if (integer1.length() > integer2.length())
{
//allocating int 1's length as final length if bigger than int 2
length = integer1.length();
}
else if (integer2.length() > integer1.length())
{
//allocating int 2's length as final length if bigger than int 1
length = integer2.length();
}
else
{
length = integer1.length();
}
return length;
}
int multiplication( string I1, string I2, int B){
int length = compareSize(I1, I2);
//converting the strings into integers
stringstream numberOne(I1);
int digitOne = 0;
numberOne >> digitOne;
stringstream numberTwo(I2);
int digitTwo = 0;
numberTwo >> digitTwo;
//checking if numbers are single digits
if ( (10 > digitOne) || (10 > digitTwo) ) {
//cout<<(digitOne * digitTwo)<<endl;
return (digitOne * digitTwo);
}
int size = ( length % 2) + (length / 2);
int power = pow(10, size);
int a = digitOne / power;
int c = digitTwo / power;
int b = digitOne - (a * power);
int d = digitTwo - (c * power);
int p2 = b * d;
int p0 = a * c;
int p1 = (b + a) * (d + c);
//int final = p1 - p2 - p0;
int sum = ((a*d) + (b*c)) ;
int p = (p0*(pow(10,length))) + (sum * (pow(10,length)) + p2);
//int p = ( p2 * (pow(10,size*2)) + ( p1 - (p2+p0)) * pow(10,size) + p0 ) * 100;
// int p = (p2 * (long long)(pow(10, 2 * size))) + p0 + ((p1 - p0 - p2) * power);
return p;
}
int main(){
string int1, int2;
int base;
cout<<"Enter I1, I2 and B: ";
cin>> int1 >> int2 >> base;
cout<<" "<<multiplication(int1, int2, base)<<endl;
return 0;
}
The main problem is
int p = (p0*(pow(10,length))) + (sum * (pow(10,length)) + p2);
It should be
int p = (p0*(pow(10,length))) + (sum * (pow(10,length - 1)) + p2);
You shouldn't use std::pow for integers GCC C++ pow accuracy. I replaced it with my own version:
#include <iostream>
#include <string>
#include <algorithm>
#include <sstream>
int pow(int b, int e) {
if (e == 0) return 1;
if (e == 1) return b;
if (e % 2 == 0) return pow(b * b, e / 2);
return b * pow(b * b, e / 2);
}
int multiplication(std::string I1, std::string I2) {
int length = std::max(I1.length(), I2.length());
int digitOne = std::stoi(I1);
int digitTwo = std::stoi(I2);
if ( (10 > digitOne) || (10 > digitTwo) ) {
return (digitOne * digitTwo);
}
int size = ( length % 2) + (length / 2);
int power = pow(10, size);
int a = digitOne / power;
int c = digitTwo / power;
int b = digitOne - (a * power);
int d = digitTwo - (c * power);
int p2 = b * d;
int p0 = a * c;
int sum = ((a*d) + (b*c)) ;
int p = (p0*(pow(10,length))) + (sum * (pow(10,length - 1)) + p2);
return p;
}
int main(){
std::string int1, int2;
std::cout<<"Enter I1 and I2: ";
std::cin>> int1 >> int2;
std::cout<<" "<<multiplication(int1, int2)<<'\n';
return 0;
}
For 2 numbers x and n entered by the user, my code needs to find Hn(x) defined recursively by the following formulas:
I am trying to implement a recursive version and and iterative version of that function. But I think I am getting the wrong concept of it, since my code doesn't compile due to errors on H(n) and H[n]:
#include "pch.h"
#include <iostream>
int H(int n, int x) //function for recursion
{
if (n < 0) return -1;
else if (n == 0) return 1;
else if (n == 1) return 2 * x;
return 2 * x * H(n) * x - 2 * n * H(n - 1) * x;
}
int H1(int n, int x) //function for Iterator
{
int *H1 = new int[n + 1];
H[0] * x = 1;
H[1] * x = 2 * x;
for (int i = 0; i <= n; i++)
{
H[i] * x = 2 * x * H[n] * x - 2 * n * H[n - 1] * x;
}
return H1(n) * x;
}
int main()
{
int n, x;
std::cout << "Enter the number n: ";
std::cin >> n;
std::cout << "Enter the number x: ";
std::cin >> x;
std::cout << "Rec = " << H[n] * x std::endl;
std::cout << "Iter = " << H1[n] * x std::endl;
}
It is confusing, I apologize for that as I am completely new to functions.
I already managed to do this with fibonacci sequence. And there I used only one parameter for function f(x) that is f(int n){... }, but here I am a bit confused with two parameters in function H(int n, int x) , where n is the index of H while x is an integer.
Yes, you need to translate your matematically indexed function into a function with 2 parameters.
The recursive version is almost ok, except for some shifts in the indexes:
int H(int n, int x) // recursive version
{
if (n <= 0)
return -1;
else if (n == 1)
return 1;
else if (n == 2)
return 2 * x;
else
return 2 * x * H(n-1, x) - 2 * n * H(n - 2, x); // shift n+1, n n-1 to n, n-1 n-2
}
Your iterative version needs rework, since you should write it as a loop, if possible without cashing the values that you no longer need. For example:
int Hi(int n, int x) //iterative version
{
if (n <= 0)
return -1;
else if (n == 1)
return 1;
int am2 = 1; // start for for n-2
int am1 = 2*x; // start for n-1
if (n == 2)
return am1;
int am;
for (int i=3; i<=n; i++) {
am = 2*x*am1 - 2*i*am2; // calculate Hn from Hn-1 and Hn-2
//prepare next interation
am2=am1;
am1=am;
}
return am;
}
Online demo
You wrote:
int H(int n, int x) //function for recursion
{
if (n < 0) return -1;
else if (n == 0) return 1;
else if (n == 1) return 2 * x;
return 2 * x * H(n) * x - 2 * n * H(n - 1) * x;
}
You're not far from a working program. Drop that H1 function. Let's see:
int H(int n, int x)
{
switch(n)
{
// H_0(x) = 1
case 0: return 1;
// H_1(x) = 2x
case 1: return 2 * x;
// H_{n+1}(x) = 2x H_n(x) - 2n H_{n - 1}(x)
default:
return 2*x*H(n-1, x) - 2*(n-1)*H(n-2, x);
}
}
The trick part is realizing than the n in H_{n+1}(x) = 2x H_n(x) - 2n H_{n - 1}(x) and in return 2*x*H(n-1, x) - 2*(n-1)*H(n-2, x);are not the same, they differ by one.
Now, you only need to handle user I/O and calling your H function with user input.
I tried to implement the above algorithm in C++, but somehow I am getting a rounding error when computing the length of half of the array.
Attempt:
#include <iostream>
#include <math.h>
using namespace std;
void print_array(int arr[], int size){
for(int i = 0; i < size; i++){
cout << arr[i] << "-";
}
cout << endl;
}
float median_h(int arr[], int size){
float median = 0.0;
if(size%2 == 0)
median = (arr[size/2] + arr[(size/2)-1])/2;
else median = arr[(int)ceil(size/2)];
return median;
}
float median(int arr1[], int arr2[], int size_1, int size_2){
cout << size_1 << endl;
print_array(arr1,size_1);
cout << size_2 << endl;
print_array(arr2,size_2);
cout << endl;
float m1 = median_h(arr1,size_1);
float m2 = median_h(arr2,size_2);
float median_res = 0.0;
if(size_1 == 1)
median_res = (arr1[0] + arr2[0])/2;
else if(m1 == m2)
median_res = m1;
else{
int index = 0;
int size = ceil(size_1/2);
(size_1 % 2 == 0)? index = size_1/2 : index = floor(size_1/2);
if(m1 < m2)
median_res = median(arr1 + index,arr2, size, size);
else
median_res = median(arr1,arr2 + index, size, size);
}
return median_res;
}
int main(void){
int arr1[] = {1,12,15,26,38};
int arr2[] = {2,13,17,30,45};
float med = median(arr1,arr2,5,5);
cout << med << endl;
}
This is the output:
output:
5
1-12-15-26-38-
5
2-13-17-30-45-
2
15-26-
2
2-13-
1
15-
1
13-
14
I am expecting a length of 3 in the second recursive iteration, but I am getting 2. I don't know what's wrong with ceil(5/2). It's supposed to be 3.
int size = ceil(size_1/2);
Since size_1 is of type int, dividing it by 2 is implicitly equivalent to taking the floor of the division. e.g. if size_1 was equal to 3, then (size_1/2) will return 1, not 1.5.
Therefore, ceil() isn't going to get you what you want here, because the value being passed to it is already floored.
You could do it like this instead:
int size = ceil(size_1/2.0f); // now we're dividing by a float, so the result will be non-integer
... but you could get the same result without having to resort to floating point math simply by adding 1 to the value before dividing:
int size = (size_1+1)/2;
/* Function to get median of a sorted array */
int median_h(int arr[], int n)
{
if (n%2 == 0)
return (arr[n/2] + arr[n/2-1])/2;
else
return arr[n/2]; // <- HERE
// also since n is int, you can do ( n << 1 )
// it's faster
}
/* This function returns median of ar1[] and ar2[].
Assumptions in this function:
Both ar1[] and ar2[] are sorted arrays
Both have n elements */
int median(int ar1[], int ar2[], int n)
{
/* return -1 for invalid input */
if (n <= 0)
return -1;
if (n == 1)
return (ar1[0] + ar2[0])/2;
if (n == 2)
return (max(ar1[0], ar2[0]) + min(ar1[1], ar2[1])) / 2;
int m1 = median_h(ar1, n); /* get the median of the first array */
int m2 = median_h(ar2, n); /* get the median of the second array */
/* If medians are equal then return either m1 or m2 */
if (m1 == m2)
return m1;
/* if m1 < m2 then median must exist in ar1[m1....] and
ar2[....m2] */
if (m1 < m2)
{
if (n % 2 == 0)
return median(ar1 + n/2 - 1, ar2, n - n/2 +1);
return median(ar1 + n/2, ar2, n - n/2);
}
/* if m1 > m2 then median must exist in ar1[....m1] and
ar2[m2...] */
if (n % 2 == 0)
return median(ar2 + n/2 - 1, ar1, n - n/2 + 1);
return median(ar2 + n/2, ar1, n - n/2);
}
use this website :
https://www.geeksforgeeks.org/median-of-two-sorted-arrays/
#include <vector>
#include<iostream>
#include<stdio.h>
#define REP(i,n) for (ll i = 1; i <= n; i++)
using namespace std;
typedef unsigned long long int ll;
typedef vector<vector<ll> > matrix;
ll MOD = 1000000007;
const ll K = 2;
// computes A * B
matrix mul(matrix A, matrix B)
{
matrix C(K+1, vector<ll>(K+1));
REP(i, K) REP(j, K) REP(k, K)
C[i][j] = (C[i][j] + A[i][k] * B[k][j]) % MOD;
return C;
}
// computes A ^ p
matrix pow(matrix A, ll p)
{
if (p == 1)
return A;
if (p & 1)
return mul(A, pow(A, p-1));
matrix X = pow(A, p>>1);
return mul(X, X);
}
// returns the N-th term of Fibonacci sequence
ll fib(ll N)
{
// create vector F1
vector<ll> F1(K+1);
F1[1] = 1;
F1[2] = 3;
// create matrix T
matrix T(K+1, vector<ll>(K+1));
T[1][1] = 0, T[1][2] = 1;
T[2][1] = 2, T[2][2] = 2;
// raise T to the (N-1)th power
if (N == 1)
return 1;
T = pow(T, N-1);
// the answer is the first row of T . F1
ll res = 0;
REP(i, K)
res = (res + ((T[1][i] )* (F1[i]))) %MOD;
return res;
}
ll fib2(ll n)
{
if(n==1)
return 1;
ll a=1;ll b=3;ll c;
for(ll i=3;i<=n;i++)
{
c=(2*a+2*b)%MOD;
a=b;
b=c;
}
return c;
}
int main()
{
ll t;
scanf("%llu",&t);
// t=10000;
ll n=1;
while(t--)
{
scanf("%llu",&n);
//n=1;
// n++;
// n=1000000000;
printf("%llu\n",fib(n));
}
return 0;
}
I am writing a code to generate 1,3,8,22,60,164 a[n]=2*(a[n-1]+a[n-2]) mod 10^9+7 .I am using modular exponentiation and the matrix multiplication method to generate this sequence.How can I improve its time from 2.3 seconds for worst case i.e. n=10^9 .
10000 times to around .5 to 1 second?
Please give me suggestions to improve speed of this code.
I suspect vector is the main culprit here - dynamic allocation and numerical stuff don't mix very well.
2x2 matrices are way too small for any fancy algorithm to have any impact. I have a hunch that they'd actually be worse due to fanciness overhead.
Have you tried unrolling the loops and ditching the dynamic allocation?
I hope this is correct:
void mul(ll A[][2], ll B[][2], ll C[][2])
{
C[0][0] = (A[0][0] * B[0][0]) % MOD;
C[0][0] = (C[0][0] + A[0][1] * B[1][0]) % MOD;
C[0][1] = (A[0][0] * B[0][1]) % MOD;
C[0][1] = (C[0][1] + A[0][1] * B[1][1]) % MOD;
C[1][0] = (A[1][0] * B[0][0]) % MOD;
C[1][0] = (C[1][0] + A[1][1] * B[1][0]) % MOD;
C[1][1] = (A[1][0] * B[0][1]) % MOD;
C[1][1] = (C[1][1] + A[1][1] * B[1][1]) % MOD;
}
void pow(ll A[][2], ll p, ll out[][2])
{
if (p == 1)
{
out[0][0] = A[0][0];
out[0][1] = A[0][1];
out[1][0] = A[1][0];
out[1][1] = A[1][1];
return;
}
if (p & 1)
{
ll B[2][2] = {{0}};
pow(A, p - 1, B);
mul(A, B, out);
}
else
{
ll X[2][2] = {{0}};
pow(A, p >> 1, X);
mul(X, X, out);
}
}
ll fibv(ll N)
{
ll T[2][2] =
{
{2, 2},
{1, 0}
};
if (N == 1)
return 1;
ll RM[2][2] = {{0}};
pow(T, N-1, RM);
ll res = RM[0][1] % MOD;
res = (res + RM[0][0] * 3) % MOD;
return res;
}
https://en.wikipedia.org/wiki/Strassen_algorithm
You can even find it in video lectures at MIT Opencourseware Intro to Algorithms course or Stanford's Algorithms course
You will get a significant speedup if you specialise to 2x2 matrices:
struct matrix {
ll a, b, c, d ;
void Square() ;
void Mul (const matrix& M) ;
} ;