does any one has a simple C++ code example of using MKL sparse matrix vector multiply routine? I need to use "mkl_zcsrsymv" to multiply a complex symmetric matrix (stored in lower triangular) with a complex vector, but I couldn't find a single demonstrative example on this. Unable to compile my code for complex case.
Read the documentation for mkl_zcsrsymv on Intel's homepage. Here symmetric is to be taken literally! (It does not mean Hermitian)
Compile with icpc -mkl test.c for maximal convenience.
#include <stdio.h>
#include "mkl_spblas.h"
int main()
{
/* Matrix in CRS format
*
* { { 0, 0, i }
* { 0, -1, 2 }
* { i, 2, 0 } }
*/
int m = 3;
MKL_Complex16 a[] = { {0,1}, {-1,0}, {2,0}, {0,1}, {2,0} };
int ia[] = { 0, 1, 3, 5 };
int ja[] = { 2, 1, 2, 0, 1 };
MKL_Complex16 x[] = { {1,0}, {2,0}, {3,0} };
MKL_Complex16 y[] = { {0,0}, {0,0}, {0,0} };
char uplo = 'L';
// Use MKL to compute
// y = A*x
mkl_cspblas_zcsrsymv(&uplo, &m, a, ia, ja, x, y);
printf("y = { (%g,%g), (%g,%g), (%g,%g) }\n",
y[0].real, y[0].imag,
y[1].real, y[1].imag,
y[2].real, y[2].imag
);
}
Output is y = { (0,3), (4,0), (4,1) }. Check it on WolframAlpha.
Here is also an example for mkl_dcsrmv.
#include <stdio.h>
#include "mkl_spblas.h"
int main()
{
/* Matrix in CRS format
*
* { { 0, 0, 1 }
* { 0, -1, 2 }
* { 1, 0, 0 } }
*/
int m = 3;
int k = 3;
double val[] = { 1, -1, 2, 1 };
int indx[] = { 2, 1, 2, 0 };
int pntrb[] = { 0, 1, 3 };
int pntre[] = { 1, 3, 4 };
double x[] = { 1, 2, 3 };
double y[] = { 0, 0, 0 };
double alpha = 1;
double beta = 0;
char transa = 'N';
char matdescra[] = {
'G', // type of matrix
' ', // triangular indicator (ignored in multiplication)
' ', // diagonal indicator (ignored in multiplication)
'C' // type of indexing
};
// Use MKL to compute
// y = alpha*A*x + beta*y
mkl_dcsrmv(&transa, &m, &k, &alpha, matdescra, val, indx, pntrb, pntre, x, &beta, y);
printf("y = { %g, %g, %g }\n", y[0], y[1], y[2]);
}
Output is y = { 3, 4, 1 }. Check it on WolframAlpha.
While playing with this I found out that this is directly compatible with Armadillo. This makes it very convenient to use in C++. Here I first generate a random symmetric matrix with Armadillo and convert it to sparse. This I multiply with a random vector. Finally I compare the result to Armadillo's sparse matrix-vector product. The precision differs quite substantially.
#include <iostream>
#include <armadillo>
#define MKL_Complex16 arma::cx_double
#include "mkl_spblas.h"
int main()
{
/* Matrix in CRS format
*
* { { 0, 0, i }
* { 0, -1, 2 }
* { i, 2, 0 } }
*/
int dim = 1000;
arma::sp_cx_mat a(arma::randu<arma::cx_mat>(dim,dim));
a += a.st();
arma::cx_vec x = arma::randu<arma::cx_vec>(dim);
arma::cx_vec y(dim);
char uplo = 'L';
// Use MKL to compute
// y = A*x
mkl_cspblas_zcsrsymv(&uplo, &dim,
a.values, (int*)a.col_ptrs, (int*)a.row_indices,
x.memptr(), y.memptr());
std::cout << std::boolalpha
<< arma::approx_equal(y, a*x, "absdiff", 1e-10)
<< '\n';
}
Related
I want to get array h[16]={16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,}, but I get an error:
If I delete status = vslCorrSetStart(task, h_start); in code, it will work normally, and array h[16]={0,0,0,0,0,16,15,14,0,12,11,10,0,8,7,6}.
What can I do?
#include <assert.h>
#include <mkl.h>
#include <iostream>
using namespace std;
void main()
{
//input
const double f[16] = { 1 , 2 , 3 , 4,
5 , 6 , 7 , 8,
9 , 10, 11, 12,
13, 14, 15, 16,};
//kernel
const double g[9] = { 0, 0, 0,
0, 1, 0,
0, 0, 0 };
//out
double h[16] = { 0 };
VSLCorrTaskPtr task;
MKL_INT f_shape[2] = { 4, 4 };
MKL_INT g_shape[2] = { 3, 3 };
MKL_INT h_shape[2] = { 4, 4 };
MKL_INT h_start[2] = { 1, 1 };
MKL_INT f_stride[2] = { f_shape[1], 1 };
MKL_INT g_stride[2] = { g_shape[1], 1 };
MKL_INT h_stride[2] = { h_shape[1], 1 };
int status;
status = vsldCorrNewTask(&task, VSL_CORR_MODE_DIRECT, 2, f_shape, g_shape, h_shape);
assert(status == VSL_STATUS_OK);
status = vslCorrSetStart(task, h_start);
assert(status == VSL_STATUS_OK);
//always get wrong,return -2303 I can't find the problem
status = vsldCorrExec(task, f, f_stride, g, g_stride, h, h_stride);
assert(status == VSL_STATUS_OK);
status = vslCorrDeleteTask(&task);
assert(status == VSL_STATUS_OK);
//print the result
for (int r = 0; r < 4; r++)
{
cout << r << "(out): ";
for (int c = 0; c < 4; c++)
{
cout << h[r * 4 + c] << " ";
}
cout << endl;
}
}
double learning_rate = 1;
int training_epochs = 1;
int k = 1;
int train_S = 6;
int test_S = 6;
int visible_E = 6;
int hidden_E = 6;
// training data
int train_X[6][6] = {
{1, 1, 1, 0, 0, 0},
{1, 0, 1, 0, 0, 0},
{1, 1, 1, 0, 0, 0},
{0, 0, 1, 1, 1, 0},
{0, 0, 1, 1, 1, 0},
{0, 0, 1, 1, 1, 0}
};
the above code are the input parameters am giving to my function. but i would like to cast them into a function withing my mexFunction and simply call them.
the matlab side has the following
clear *
close all
clc
%% Load the data
X= [ 1, 1, 1, 0, 0, 0; ...
1, 0, 1, 0, 0, 0; ...
1, 1, 1, 0, 0, 0; ...
0, 0, 1, 1, 1, 0; ...
0, 0, 1, 1, 1, 0; ...
0, 0, 1, 1, 1, 0];
%% Define Parameters
numHiddenUnits = 6;
numIterations = 1000;
kCD = 1;
%% Compute the RBM
x = RBM(X, numHiddenUnits, numIterations, kCD);
Scalar input parameters are fairly simple. Matrix inputs are little trickier because they use old-school Fortran column major order, you may need to transpose the data before you send it to your function. Here's kind of an example, you'll have to fill in the blanks:
/*=========================================================
* Built on:
* matrixDivide.c - Example for illustrating how to use
* LAPACK within a C MEX-file.
*
* This is a MEX-file for MATLAB.
* Copyright 2009 The MathWorks, Inc.
*=======================================================*/
/* $Revision: 1.1.6.2 $ $Date: 2009/05/18 19:50:18 $ */
#include "mex.h"
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
double * pX, * pNumHiddenUnits, * pNumIter, * pkCD; /* pointers to inputs */
double * pOutput; /* output arugments */
mwSignedIndex m,n; /* matrix dimensions */
int i, j;
/* Check for proper number of arguments. */
if ( nrhs != 4)
{
mexErrMsgIdAndTxt("MATLAB:RBM:rhs",
"This function requires 4 inputs.");
}
pX = mxGetPr(prhs[0]); /* pointer to first input, X matrix */
pNumHiddenUnits = mxGetPr(prhs[1]); /* pointer to second input, scalar hidden units */
pNumIter = mxGetPr(prhs[2]); /* pointer to third input, scalar number of iterations */
pkCD = mxGetPr(prhs[3]); /* pointer to third input, scalar kCD */
/* dimensions of input matrix */
m = (mwSignedIndex)mxGetM(prhs[0]);
n = (mwSignedIndex)mxGetN(prhs[0]);
/* Validate input arguments */
if (m < 1 && n < 1)
{
mexErrMsgIdAndTxt("MATLAB:RBM:notamatrix",
"X must be a matrix.");
}
plhs[0] = mxCreateDoubleMatrix(m, n, mxREAL);
pOutput = mxGetPr(plhs[0]);
for (i = 0; i < n; ++i)
{
for (j = 0; j < m; ++j)
{
int index = j * n + i;
pOutput[index] = pX[i * m + j];
}
}
}
/* */
I am trying to use the routine 'cblas_dgemv()' declared in cblas.h to calculate matrix vector product. The code is given below:
#include <cblas.h>
#include <iostream>
using namespace std;
/* compile with: g++ test.cpp -o test -lblas */
int main (int argc, char** argv)
{
/* We store the following 3x4 array in column major form:
A = [1.83292 0.267964 0.382422 0.520162
0.428562 0.720323 0.839606 1.30816
0.388731 0.619452 1.01375 0.229333 ];
*/
const double A[12] = { 1.83292,
0.428562,
0.388731,
0.267964,
0.720323,
0.619452,
0.382422,
0.839606,
1.01375,
0.520162,
1.30816,
0.229333};
/* The vector X is:
X = [ 0.570695, 0.670179, 0.927146, 0.297343 ]';
where ' is transpose.
*/
const double X[4] = { 0.570695, 0.670179, 0.927146, 0.297343};
double Y[4] = {0, 0, 0, 0};
/* Calculate Y = A*X (alpha = 1 and beta = 0)
*/
cblas_dgemv(CblasColMajor, CblasNoTrans, 3, 4, 1, A, 3, X, 1, 0, Y, 1);
/* If I was correct, I should have got:
Y =[1.69366 1.20999 1.72082 1.38618] = A*X;
but I get:
Y = [1.73485 1.89473 1.64507 0] = A'*X;
*/
cout<<"Y = [";
for ( unsigned int i = 0; i < 4; i++ )
cout<<Y[i]<<" ";
cout<<"]";
}
However, instead of getting Y = A*X, I am consistently getting Y = A'*X, where ' represents transpose. I am not sure if I am doing some classical silly mistake somewhere, but after hours of trying, I could not figure out the problem. Please Help!!
If it is required, I am using Linux version 3.2.0-4-amd64 ( Debian 4.6.3-14) ) #1 SMP Debian 3.2.57-3+deb7u2 and g++ (Debian 4.4.7-2) 4.4.7. Thanks in advance.
What's wrong is your math. The product of a 3x4 matrix and a 4-component vector is a 3-component vector. In your case the product of A*X is [1.73485 1.89473 1.64507].
If you multiply the transpose of A (which is 4x3) you need a 3-component vector to multiply with and the product is a 4-component vector. Let's call X' the first three components of X -> X' = [0.570695, 0.670179, 0.927146]. Then A'*X' = [1.693662 1.209994 1.720827 1.386180].
#include <stdio.h>
void dgemv(const double *A, const double *u, double *v, const int n, const int m) {
for(int i=0; i<n; i++) {
double sum = 0;
for(int j=0; j<m; j++) {
sum += A[m*i+j]*u[j];
}
v[i] = sum;
}
}
int main() {
const double A[12] = {1.83292 , 0.267964, 0.382422, 0.520162,
0.428562, 0.720323, 0.839606, 1.30816 ,
0.388731, 0.619452, 1.01375 , 0.229333};
const double AT[12] = {1.83292 , 0.428562, 0.388731,
0.267964, 0.720323, 0.619452,
0.382422, 0.839606, 1.01375 ,
0.520162, 1.30816 , 0.229333};
const double X[4] = { 0.570695, 0.670179, 0.927146, 0.297343};
const double X2[4] = { 0.570695, 0.670179, 0.927146};
double Y[3], Y2[4];
dgemv(A, X, Y, 3,4);
for(int i=0; i<3; i++) printf("%f ", Y[i]); printf("\n");
dgemv(AT, X2, Y2, 4,3);
for(int i=0; i<4; i++) printf("%f ", Y2[i]); printf("\n");
}
I'm trying to find path with sum of numbers is maximum. It's only allowed to move right and down through the matrix.
I've coded it but it doesn't give me the maximum sum, and I can't figure out why it does so!.
Thanks in advance.
Here's my code
/*Given grid of positive numbers, strat from (0,0) ad end at (n,n).
Move only to RIGHT and DOWN. Find path with sum of numbers is maximum. */
#include<iostream>
using namespace std;
int grid[2][3] = { { 5, 1, 2 }, { 6, 7, 8 } };
int max(int x, int y){
if (x >= y){
return x;
}
else if (x < y){
return y;
}
}
bool valid(int r, int c){
if (r + 1 == 2 || c + 1 == 3)
return false;
else
return true;
}
int maxPathSum(int row, int column){
if (!valid(row, column))
return 0;
if (row == 1 && column == 2) //base condition
return grid[row][column];
int path1 = maxPathSum(row, column + 1); //Right
int path2 = maxPathSum(row + 1, column); //Down
return grid[row][column] + max(path1, path2);
}
int main()
{
cout << maxPathSum(0, 0) << endl;
return 0;
}
the correct answer should be 26, but the output is 6.
Your function Valid should be
bool valid (int r, int c)
{
return r < 2 && c < 3;
}
and then you got 26 (Live example).
BTW, you may use dynamic programming for this problem.
You can also use dynamic programming to solve the problem
Here is the code:
#include <bits/stdc++.h>
using namespace std;
int grid[2][3] = { { 5, 1, 2 }, { 6, 7, 8 } };
int dp[3][4];
int main()
{
for(int i=0;i<4;i++)
dp[0][i] = 0;
for(int i=0;i<3;i++)
dp[i][0] = 0;
for(int i=1;i<3;i++)
{
for(int j=1;j<4;j++)
{
dp[i][j] = grid[i-1][j-1] + max(dp[i][j-1], dp[i-1][j]);
}
}
cout << dp[2][3];
return 0;
}
Live example
Apart from DP, you can also use simple (n,m) Matrix based solution. The good part is this approach wont need recursion as DP does which can cause memory issues if matrix is bigger and space complexity is just O(n x m) i.e. input array itself. And the time complexity is also O(n x m). Following code in java illustrate the approach -
package com.company.dynamicProgramming;
import static java.lang.Integer.max;
public class MaxSumPathInMatrix {
public static void main (String[] args)
{
int mat[][] = { { 10, 10, 2, 0, 20, 4 },
{ 1, 0, 0, 30, 2, 5 },
{ 200, 10, 4, 0, 2, 0 },
{ 1, 0, 2, 20, 0, 4 }
};
System.out.println(findMaxSum2(mat));
}
/*
Given a matrix of N * M. Find the maximum path sum in matrix.
Find the one path having max sum - originating from (0,0) with traversal to either right or down till (N-1, M-1)
*/
static int findMaxSum2(int mat[][])
{
int M = mat[0].length;
int N = mat.length;
for (int i = 0; i < N; i++)
{
for (int j = 0; j < M; j++)
{
if(i==0 && j!=0){
mat[i][j]+=mat[i][j-1];
}
else if(i!=0 && j==0){
mat[i][j]+=mat[i-1][j];
}
else if (i!=0 && j!=0){
mat[i][j]+= max(mat[i-1][j], mat[i][j-1]);
}
}
}
return mat[N-1][M-1];
}
}
Run it as -
251
Process finished with exit code 0
Further Even if you are using Dynamic Programming(DP) then use Memoization concept to reduce the time complexity.. Here is the code with DP plus memoization along complexity calculation -
package com.company.dynamicProgramming;
import java.util.HashMap;
import java.util.Map;
import static java.lang.Integer.max;
public class MaxSumPathInMatrix {
static int complexity = 0;
public static void main (String[] args)
{
int mat[][] = { { 10, 10, 2, 0, 20, 4 },
{ 1, 0, 0, 30, 2, 5 },
{ 200, 10, 4, 0, 2, 0 },
{ 1, 0, 2, 20, 0, 4 }
};
System.out.println("Max Sum : " + findMaxSum2_dp(mat, 0, 0, new HashMap<>()));
System.out.println("Time complexity : " +complexity);
}
/*
~~~> solve via ~dp~ and ~memoization~
Given a matrix of N * M. Find the maximum path sum in matrix.
Find the one path having max sum - originating from (0,0) with traversal to either right or down till (m-1, n-1)
*/
static int findMaxSum2_dp(int mat[][], int i, int j, Map<String, Integer> memo){
int M = mat[0].length;
int N = mat.length;
Integer sum = memo.get(i+"-"+j);
if(sum!= null){
return sum;
}
complexity++;
if(i==N-1 && j<M-1){
mat[i][j] += findMaxSum2_dp(mat, i, j+1, memo);
memo.put(i+"-"+j, mat[i][j]);
return mat[i][j];
}
else if(i<N-1 && j==M-1){
mat[i][j] += findMaxSum2_dp(mat, i+1, j, memo);
memo.put(i+"-"+j, mat[i][j]);
return mat[i][j];
}
else if (i<N-1 && j<M-1){
int s1 = findMaxSum2_dp(mat, i+1, j, memo);
int s2 = findMaxSum2_dp(mat, i, j+1, memo);
mat[i][j] += max(s1, s2);
memo.put(i+"-"+j, mat[i][j]);
return mat[i][j];
}
return mat[N-1][M-1] += max(mat[N-1][M-2], mat[N-2][M-1]);
}
}
Two important points to note in above code -
I am storing max sum of any sub matrix [i][j] in a store(HashMap), whenever it's max sum is ready. And in further steps if this sub matrix [i][j] reappears then I take it from store instead of processing again. As a illustration - you can see [N-1][M-1] appears 2 times in below diagram of recursion -
[N][M] = max([N][M-1]) , [N-1][M]
/ \
/ \
/ \
[N][M-1] = max ([N-1][M-1], [N][M-2]) [N-1][M] = max ([N-2][M], [N-1][M-1])
Connected with Point 1 : I have provisioned a complexity variable which I increment if I have to calculate max sum for matrix [i][j] i.e. wont find in the store. If you see the result it shows 25 in 6x4 matrix i.e. the time complexity is just O(NxM).
I need to write recursive function in c++ that finds largest area of number '1' in 2d array that contains only 1 or 0.
Example:
int Arr[5][8] =
{
{ 0, 0, 0, 0, 1, 1, 0, 0, },
{ 1, 0, 0, 1, 1, 1, 0, 0, },
{ 1, 1, 0, 1, 0, 1, 1, 0, },
{ 0, 0, 0, 1, 1, 1, 1, 0, },
{ 0, 1, 1, 0, 0, 0, 0, 0, },
};
Visual example: http://s23.postimg.org/yabwp6h23/find_largest.png
Largest area of this array is 12, second largest is 3 and third largest is 2.
I was thinking to do this with something similar to flood fill algorithm, but just can't figure out how.
bool visited[5][8];
int i,j;
// variables for the area:
int current_area = 0, max_area = 0;
int Arr[5][8]={ // type your map of values here
}
// functions
void prepare_visited_map() {
for(i=0;i<5;i++) {
for(j=0;j<8;j++) visited[i][j] = false;
}
}
// recursive function to calculate the area around (x,y)
void calculate_largest_area(int x, int y) {
if(visited[x][y]) return;
// check if out of boundaries
if(x<0 || y<0 || x>=5 || y>=8) return;
// check if the cell is 0
if(!Arr[x][y]) {
visited[x][y] = true;
return;
}
// found a propper cell, proceed
current_area++;
visited[x][y] = true;
// call recursive function for the adjacent cells (north, east, south, west)
calculate_largest_area(x,y-1);
calculate_largest_area(x+1,y);
calculate_largest_area(x,y+1);
calculate_largest_area(x-1,y);
// by the end of the recursion current_area will hold the area around the initial cell
}
// main procedure where the above functions are used
int mian() {
// calculate the sorrounding area of each cell, and pick up the largest of all results
for(i=0;i<5;i++) {
for(j=0;j<8;j++) {
prepare_visited_map();
calculate_largest_area(i,j);
if(current_area > max_area) max_area = current_area;
}
}
printf("Max area is %d",max_area");
}
Hope this was helpful :)
I was thinking to do this with something similar to flood fill algorithm
I think that's a pretty good way to do it. Apply flood fill to any 1, counting the ones and replacing them with zeros.
Repeat until the grid consists entirely of zeroes.
The following will print out the sizes of the connected components in no particular order:
#include <iostream>
constexpr int N = 5;
constexpr int M = 8;
int arr[N][M] =
{
{ 0, 0, 0, 0, 1, 1, 0, 0, },
{ 1, 0, 0, 1, 1, 1, 0, 0, },
{ 1, 1, 0, 1, 0, 1, 1, 0, },
{ 0, 0, 0, 1, 1, 1, 1, 0, },
{ 0, 1, 1, 0, 0, 0, 0, 0, },
};
int fill(int arr[N][M], int r, int c) {
int count = 0;
if (r < N && arr[r][c]) {
for (int i = c; i >= 0 && arr[r][i]; --i) {
arr[r][i] = 0;
count += fill(arr, r + 1, i) + 1;
}
for (int i = c + 1; i < M && arr[r][i]; ++i) {
arr[r][i] = 0;
count += fill(arr, r + 1, i) + 1;
}
}
return count;
}
int print_components(int arr[N][M]) {
for (int r = 0; r < N; ++r) {
for (int c = 0; c < M; ++c) {
if (arr[r][c]) {
std::cout << fill(arr, r, c) << std::endl;
}
}
}
}
int main() {
print_components(arr);
}
something like,
int max_area = 0;
foreach y
foreach x
if (pos[y][x] == 1 && !visited[y][x])
{
int area = 0;
Queue queue = new Queue();
queue.push(new Point(x, y));
visited[y][x] = true;
while (!queue.empty())
{
Point pt = queue.pop();
area++;
foreach neightboor of pt (pt.x±1, pt.y±1)
if (pos[neightboor.y][neightboor.x] == 1 && !visited[neightboor.y][neightboor.x])
{
visited[neightboor.y][neightboor.x] = true;
queue.push(new Point(neightboor.x, neightboor.y));
}
}
if (area > max_area)
max_area = area;
}
Quick approach, but I don't know if there is a way to do this in a sane way (recursive
call for each element does not scale for C++ because call stack is limited)
int maxy = 5
int maxx = 8
int areasize(int x, int y) {
if (x < 0 || y < 0 || x > maxx || y > maxy || !Arr[y][x])
return 0;
Arr[y][x] = 0;
return 1
+ areasize(x + 1, y)
+ areasize(x - 1, y)
+ areasize(x, y + 1)
+ areasize(x, y - 1);
}
maxarea = 0;
for (int y = 0; y < maxy; y++) {
for (int x = 0; x < maxx; x++) {
maxarea = std::max(maxarea, areasize(x, y));
}
}