i try to build an DLL with the OpenCV PCA included to make it usable under Labview.
I have defined the the function:
extern "C" __declspec(dllexport) int __cdecl doPCA(float *input,int32_t input_rows,int32_t input_cols,double maxComponents,float *output);
And wrote it like:
int __cdecl doPCA(float *input,int32_t input_rows, int32_t input_cols,double maxComponents,float *output)
Mat pcaset = Mat(input_rows,input_cols, CV_32FC1, &input); //CV_32FC1 is for float valued pixel
PCA pca(pcaset, // pass the data
Mat(), // we do not have a pre-computed mean vector, // so let the PCA engine to compute it
CV_PCA_DATA_AS_ROW, // indicate that the vectors// are stored as matrix rows// (use PCA::DATA_AS_COL if the vectors are // the matrix columns)
2 // specify, how many principal components to retain
int i, j;
for(i = 0; i < input_rows; i++)
for(j = 0; j < input_cols; j++)
output[(i * input_cols) + j] = pca.eigenvectors.data[(i * input_cols) + j]; // Write Values to 1D output array
if(pca.eigenvectors.empty()){return 0;} // is empty
if(!pca.eigenvectors.empty()){return 1;} // is not empty
At Labview side I access the function by the compiled DLL:
But I canĀ“t figure it out, how to pass value the from pca.eigenvectors cv::Mat to the 1D float output array.
Could anyone give a hint?
I learn how to do PCA from the page that Miki gives.
This is my code to do the similar thing.
///! Convert pointer to cv::Mat, do PCA, and convert back.
///! 2017.10.05 10:28:25 CST
int doPCA(float* data, int rows, int cols, int maxC, float* eigenvecs ) {
// convert pointer to Mat, CV_32FC1 is for float valued pixel.
Mat pcaset = Mat(rows,cols, CV_32FC1, data);
// let opencv compute the eigenvectors, and treat data as row, extract the first 2 principle components.
// pca.means : eigenvalues as row matrix
// pca.eigenvectors: eigenvectors as row matrix
maxC = (maxC >0 && maxC <= rows)?maxC:rows;
PCA pca(pcaset, Mat(), CV_PCA_DATA_AS_ROW,maxC);
cout << "Eigen values:\n"<< pca.mean <<endl;
cout << "Eigen vectors:\n"<<pca.eigenvectors<<endl;
if(pca.eigenvectors.empty()) {
return 0; // is empty
float *pvec = eigenvecs;
// get eigenvector in revered order
for(int i=maxC-1; i>=0; --i){
for(int j=0; j<cols; ++j){
*pvec++ = pca.eigenvectors.at<float>(i,j);
return 1;
int testPCA(){
// row first
float data[4] = {1.0,2.0,2.0,5.0};
int cols = 2;
int rows = 2;
// alloc two eigenvectors length: 2x2=4
float eigenvecs[4]={0};
// max components nums
int maxC = 2;
int res = doPCA(data, rows, cols, maxC, eigenvecs);
Mat eigenvalues(Size(cols, rows), CV_32FC1, eigenvecs);
cout << "Flag:\n" << res << endl;
cout << "Principle Components:\n"<< eigenvalues<<endl;
return 0;
Eigen values:
[1.5, 3.5]
Eigen vectors:
[0.31622776, 0.94868332;
0.94868332, -0.31622776]
Principle Components:
[0.94868332, -0.31622776;
0.31622776, 0.94868332]
I have a problem with initializing a 3D Mat with openCV.
I would like to create a 3D matrix of size (rows x cols x 16), rows and cols being the dimensions of an image given earlier in the program. I tried I can not say how many different methods, and all return to me more or less the same thing: the dimensions of my matrices are worth 0 or -858993460.
My code lines :
Mat image_Conv;
int rows = imageBicubic.rows;
int cols = imageBicubic.cols;
image_Conv = Mat::zeros(rows, cols, CV_32FC(16));
Can you tell me why I have this problem? Of course I read all the posts that speak, read the doc opencv on the class Mat, but nothing works, I still have the same problem. I specify that my data in the Mat will be float.
The code :
// Include standard headers
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <vector>
#include <ctime>
#include <iostream>
using namespace std;
//#include <opencv.hpp>
#include <opencv/cv.h>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv/highgui.h>
using namespace cv;
// main file
int main()
string fileName = "myImage.jpg";
Mat imageSrc = cv::imread(fileName, CV_LOAD_IMAGE_UNCHANGED); // Read the file
if (!imageSrc.data) // Check for invalid input
cout << "Could not open or find the image\n";
return 1;
cout << "Loaded " << fileName << " (" << imageSrc.channels() << " channels)\n";
//int colorTransform = (imageSrc.channels() == 4) ? CV_BGRA2RGBA : (imageSrc.channels() == 3) ? CV_BGR2RGB : CV_GRAY2RGB;
//cv::cvtColor(imageSrc, imageSrc, colorTransform);
imageSrc.convertTo(imageSrc, CV_32F, 1 / 255.0, 0.0);
int SliceSizeWidth = imageSrc.cols / 2;
int sliceShiftWidth = imageSrc.cols / 4;
int sliceWidthNumber = (imageSrc.cols / sliceShiftWidth) - 1;
int SliceSizeHeight = imageSrc.rows / 2;
int sliceShiftHeight = imageSrc.rows / 4;
int sliceHeightNumber = (imageSrc.rows / sliceShiftHeight) - 1;
for (int sliceIndexHeight = 0; sliceIndexHeight < sliceHeightNumber; sliceIndexHeight++)
for (int sliceIndexWidth = 0; sliceIndexWidth < sliceWidthNumber; sliceIndexWidth++)
Mat patchImage = imageSrc(Rect(sliceIndexWidth*sliceShiftWidth, sliceIndexHeight*sliceShiftHeight, SliceSizeWidth, SliceSizeHeight));
Mat patchImageCopy;
patchImage.copyTo(patchImageCopy); // Deep copy => data are contiguous in patchImageCopy
Mat imageBicubic;
resize(patchImageCopy, imageBicubic, Size(2 * patchImage.cols, 2 * patchImage.rows), INTER_CUBIC);
Mat image_Padding;
int padding = 1;
copyMakeBorder(imageBicubic, image_Padding, padding, padding, padding, padding, BORDER_CONSTANT, Scalar(0));
Mat image_Conv;
int rows = imageBicubic.rows;
int cols = imageBicubic.cols;
image_Conv = Mat::zeros(rows, cols, CV_32FC(16));
/* rest of the code I have to write */
image_Conv.convertTo(image_Conv, CV_8U, 255.0, 0.0);
string nameBase = fileName.substr(0, fileName.find('.'));
string nameExt = fileName.substr(fileName.find('.'), fileName.length() - nameBase.length());
string strH = to_string(sliceIndexHeight);
string strW = to_string(sliceIndexWidth);
string outFileName = nameBase + "_H" + strH + "W" + strW + nameExt;
imwrite(outFileName, image_Conv);
return 0;
PS : Most of the code is not mine, I have to use it for my internship and can only edit between the lines :
resize(patchImageCopy, imageBicubic, Size(2 * patchImage.cols, 2 * patchImage.rows), INTER_CUBIC);
image_Conv.convertTo(image_Conv, CV_8U, 255.0, 0.0);
Thank you for your help !
EDIT : My first problem is solved, but it seems that it didn't work after all. I suppose that Mat::zeros set all the Mat elements at 0, right ? But if I write
cout << image_Conv.at<float>(0,0,0) << endl;
I have the error : "Unhandled exception at 0x000007FEFD4FA06D in xxxxxx.exe: Microsoft C++ exception: cv::Exception at memory location 0x000000000023E540.".
I don't know what the problem is with the memory and how to fix it.
My goal is to fill my matrix element by element thanks to several for loops which will be realized several operations, before the result is written in the element of my corresponding Mat. I did that why 3D and 4D arrays, and maybe it's the easiest solution, to do all the calculs with arrays, but I can't go from a 3D array to a 3D Mat or a 3D Mat to a 3D array.
just tested this on visual studio 2015, opencv 3.4
cv::Mat mat = cv::Mat::zeros(5, 5, CV_32FC(16));
this works fine.
You should be able to create a multi-dimensional matrix filled with 0-values using:
int size[3] = { 5, 4, 3 };
cv::Mat M(3, size, CV_32F, cv::Scalar(0));
You can iterate over the matrix with M.at(i,j,k) (only for 3D matrix created as above):
for (int i = 0; i < size[0]; i++) {
for (int j = 0; j < size[1]; j++) {
for (int k = 0; k < size[2]; k++) {
M.at<float>(i,j,k) = i*12+j*3+k;
for (int i = 0; i < size[0]; i++) {
for (int j = 0; j < size[1]; j++) {
for (int k = 0; k < size[2]; k++) {
std::cout << "M(" << i << ", " << j << ", " << k << "): " << M.at<float>(i,j,k) << std::endl;
Alternatively, you should be able to create a 2D matrix with multiple channels with:
cv::Mat M(5, 4, CV_32FC(3), cv::Scalar(0));
To iterate over the 2D matrix and over the channels:
for (int i = 0; i < M.rows; i++) {
for (int j = 0; j < M.cols; j++) {
for (int k = 0; k < M.channels(); k++) {
M.at<cv::Vec<float, 3> >(i,j)[k] = i*M.cols*M.channels()+j*M.channels()+k;
I want to multiply one image by its transpose. my image size is nxm.
i do as follows
for (int k = 0; k < total_images; k++)
Mat img_tp1 = cv::Mat(imgRows, imgCols, CV_32FC1);
Mat img_tp2 = cv::Mat(imgRows, imgRows, CV_32FC1);
subtract(img[k], MeanMat, img_tp1);
img_tp2 = img_tp1 * img_tp2.t();
std::ostringstream name;
name << "sub" << k << ".jpg";
cv::imwrite(name.str(), img_tp2);
and i face this error
Unhandled exception at 0x000007FEFDB79E5D in Tracking.exe: Microsoft C++ exception: cv::Exception at memory location 0x00000000001E5EE0.
how can i do this multiplication? in fact i want to compute the covariance matrix of the sequence of images so i need this multiplication.
Then i decide to implement the multiplying for my RGB image and i use this code:
for (int i = 0; i < imgRows; i++)
for (int j = 0; j < imgRows; j++)
uchar pix1[3];
uchar pix2[3];
uchar pix[3] = { 0, 0, 0 };
for (int k = 0; k < imgCols; k++)
img_tp1.at<Vec3b>(i, k) = { pix1[0], pix1[1], pix1[2] };
img_tp1.at<Vec3b>(j, k) = { pix2[0], pix2[1], pix2[2] };
CovMat0.at<Vec3b>(i, j) = { pix[0], pix[1], pix[2] };
pix[0] = (pix1[0] * pix2[0]) + pix[0];
pix[1] = (pix1[1] * pix2[1]) + pix[1];
pix[2] = (pix1[2] * pix2[2]) + pix[2];
CovMat0.at<Vec3b>(i, j) = { pix[0], pix[1], pix[2] };
but it takes lots of time to process it. Is there any better way for that?
(I want to multiply one image by its transpose)
I want to modify a part of a multi-dimensional matrix using openCV. Basically I want to achieve the same as written in Matlab:
A = zeros(5,5,25);
A(:,:,1) = some_matrix1;
A(:,:,2) = some_matrix2;
I am not sure if I should use a 5x5 matrix with 25 channels or a 5x5x25 matrix with single channel. Here is what I tried:
int dim[3] = { 5,5,25 };
Mat A(3, dim, CV_32FC(1), Scalar::all(0));
A(Range::all(),Range::all(),0) = some_matrix;
But it seems like I can only use Range for two dimensions.
Mat A(5, 5, CV_32FC(25), Scalar::all(0));
A(Range::all(),Range::all())[0] = some_matrix;
But in this case, I don't know how to access the channel.
Can you please help me with it?
OpenCV is optimized for 2D matrices. Multidimensional matrix will work, but are rather inefficient and difficult to access.
This example code will show you how to write and read values from an 3D matrix:
#include <opencv2\opencv.hpp>
using namespace cv;
int main()
int sizes[] = { 5, 5, 25 };
Mat data(3, sizes, CV_32F);
Mat1f some_matrix(sizes[0], sizes[1]);
randu(some_matrix, 0.f, 100.f); // some random values
// Init data with each plane a constant increasing value
for (int z = 0; z < data.size[2]; ++z)
// Set each z-plane to some scalar value
Range ranges[] = { Range::all(), Range::all(), Range(z, z + 1) };
data(ranges) = data.size[2] - z;
// Set the n-th z-plane to some_matrix
int z = 0;
for (int r = 0; r < sizes[0]; ++r)
for (int c = 0; c < sizes[1]; ++c)
data.at<float>(r, c, z) = some_matrix(r, c);
// Access all slices along z dimension
for (int z = 0; z < data.size[2]; ++z)
Range ranges[] = { Range::all(), Range::all(), Range(z, z + 1) };
Mat slice3d(data(ranges).clone()); // with clone slice is continuous, but still 3d
Mat slice(2, &data.size[0], data.type(), slice3d.data);
return 0;
However, it's far easier and practical to store your 5x5x25 3D matrix as a std::vector<Mat>, where the vector has length 25, and each matrix is a 2D 5x5.
See the code:
#include <opencv2\opencv.hpp>
using namespace cv;
int main()
int sizes[] = { 5, 5, 25 };
vector<Mat> data(sizes[2]);
// Init data with each plane a constant increasing value
for (int z = 0; z < sizes[2]; ++z)
data[z] = Mat(sizes[0], sizes[1], CV_32F, float(sizes[2] - z));
Mat1f some_matrix(sizes[0], sizes[1]);
randu(some_matrix, 0.f, 100.f); // some random values
// Set the n-th z-plane to some_matrix
int z = 0;
data[z] = some_matrix;
return 0;
Here is the piece of code to access the pixel from the channel, you can try it.
int dim[3] = { 5,5,25 };
Mat A(3, dim, CV_32FC1, Scalar::all(0));
for (int m = 0; m < 5; m++)
for (int n = 0; n < 5; n++)
for (int a = 0; a < 25; a++) // no of channels
cout << A.at<cv::Vec3f>(m,n)[a] << endl;
How can I flatten a 3D Matrix and display it in 2d?
Are there simple ways to display it in 3d?
So far I simply tile the images in the 3rd dimension together like thus:
void Flatten3DArray(const cv::Mat& In, cv::Mat& Out2d)
CV_Assert(In.dims == 3);
int rows = In.size[0];
int cols = In.size[1];
int third = In.size[2];
int rowTiles = ceil(sqrt(third));
int colTiles = ceil(sqrt(third));
Out2d.create(rowTiles*rows, colTiles*cols, In.type());
Out2d = Scalar(0);
int thirdDimIdx = 0;
for (int i = 0; i < rowTiles; ++i)
for (int j = 0; j < colTiles; ++j, ++thirdDimIdx)
if (thirdDimIdx >= third)
Mat roi(Out2d(cv::Rect(j*cols, i*rows, cols, rows)));
uint16_t *ind = (uint16_t*)In.data + thirdDimIdx * rows*cols; // sub-matrix pointer
cv::Mat subMatrix(2, In.size, In.type(), ind);
Is there a better way to do this?
I have 200 matrices A[i] (whose dimension is 4096*48), and 48 vectors v[j](whose dimension is 48*1). I want to calculate A[i]*v[j], (i=0:199,j=1:47).
I think about how to arrange my grid size and block size from yesterday. But I don't figure out an answer now. Could anyone give me some advice?
Max num of per block is 512. This is my working environment.
The following is my code. It works right. I have checked. But it is slower than Matlab :(
#include <mat.h>
#include <time.h>
#include <cuda_runtime.h>
#include "cuda.h"
using std::cout;
using std::endl;
using namespace cv;
using namespace std;
#include <limits>
#include <iostream>
#include <cstdlib>
using namespace std;
#define kernel_size 48
typedef struct {
int width;
int height;
int stride;
float* elements;
} Matrix;
// Forward declaration of the matrix multiplication kernel
__global__ void MatMulKernel(const Matrix, const Matrix, Matrix);
// Matrix multiplication - Host code
// Matrix dimensions are assumed to be multiples of BLOCK_SIZE
void MatMul(const Matrix A, const Matrix B, Matrix C)
// Load A and B to device memory
Matrix d_A;
d_A.width = d_A.stride = A.width; d_A.height = A.height;
size_t size = A.width * A.height * sizeof(float);
cudaMalloc(&d_A.elements, size);
cudaMemcpy(d_A.elements, A.elements, size,
Matrix d_B;
d_B.width = d_B.stride = B.width; d_B.height = B.height;
size = B.width * B.height * sizeof(float);
cudaMalloc(&d_B.elements, size);
cudaMemcpy(d_B.elements, B.elements, size,
// Allocate C in device memory
Matrix d_C;
d_C.width = d_C.stride = C.width; d_C.height = C.height;
size = C.width * C.height * sizeof(float);
cudaMalloc(&d_C.elements, size);
// Invoke kernel
dim3 dimBlock(1,B.height);
dim3 dimGrid(A.height, C.width);
MatMulKernel<<<dimGrid, dimBlock>>>(d_A, d_B, d_C);
// Read C from device memory
cudaMemcpy(C.elements, d_C.elements, size,
// Free device memory
// Matrix multiplication kernel called by MatMul()
__global__ void MatMulKernel(Matrix A, Matrix B, Matrix C)
// Block row and column
int blockCol = blockIdx.y;
int blockRow = blockIdx.x;
float Cvalue = 0;
// Thread row and column within Csub
int row = threadIdx.y;
int col = threadIdx.x;
// Loop over all the sub-matrices of A and B that are
// required to compute Csub
// Multiply each pair of sub-matrices together
// and accumulate the results
// Shared memory used to store Asub and Bsub respectively
__shared__ float As[1][kernel_size];
__shared__ float Bs[kernel_size][1];
// Load Asub and Bsub from device memory to shared memory
// Each thread loads one element of each sub-matrix
As[0][row] = A.elements[blockRow * A.stride + row+B.height*blockCol];
Bs[row][0] = B.elements[row];
// Synchronize to make sure the sub-matrices are loaded
// before starting the computation
// Multiply Asub and Bsub together
for (int e = 0; e < B.height; ++e)
Cvalue += As[0][e] * Bs[e][0];
// Synchronize to make sure that the preceding
// computation is done before loading two new
// sub-matrices of A and B in the next iteration
// Write Csub to device memory
// Each thread writes one element
C.elements[blockRow * C.stride +blockCol]= Cvalue;
float * gen_matrix(int n /*row*/, int m /*col*/){
float *A;
A = (float *) malloc(n*m*sizeof(float));
for(int row = 0;row < n;row++)
for(int col = 0;col < m;col++) {
A[row*m+col] = rand()%10;
// print matrix elements.
for (int i = 0; i < n; ++i) {
for (int j = 0; j < m; ++j)
cout << " [" << i << "," << j << "] " << A[i*m+j] ;
cout << endl;
return A;
int main()
int k=kernel_size;
int s=2000;
int m =4096;
//int m=2;
//int s=1;
int n = k*s;
float *Ae = gen_matrix(m,n);
float *Be= gen_matrix(k,1);00
float *Ce=(float *) malloc(m*s*sizeof(float));
Matrix A ={n,m,n,Ae};
Matrix B ={1,k,1,Be};
Matrix C ={s,m,s,Ce};
const clock_t begin_time = clock();
MatMul(A, B, C);
std::cout << float( clock () - begin_time ) / CLOCKS_PER_SEC;
for (int i = 0; i < 3; ++i) {
for (int j = 0; j <7; ++j)
cout << " [" << i << "," << j << "] " << Ce[i*m+j] ;
cout << endl;
float *Ce2=(float *) malloc(s*m*sizeof(float));
for (int i = 0; i < m; i++)
for (int j = 0; j < s; j++)
for (int i = 0; i < m; i++)
for (int j = 0; j < s; j++)
for (int ind = 0; ind < k; ind++)
// printf("%f---****%f\n",Ae[j*k+ind+i*k*s],Be[ind]);
if (Ce2[i*s+j]!= Ce[i*s+j])
This is just a matrix-matrix multiplication problem. If you want things to run fast, you should not be writing your own matrix-matrix multiply code. Use CUBLAS Sgemm.
Conceptually, if you arrange your A matrices like this:
then you will have a new matrix AA that is (4096*200) rows x 48 columns.
Arrange your 48 V vectors (48x1) in a 48x48 matrix (VV):
(each V vector is a column of the new matrix VV)
You now have a single matrix multiplication problem (AA*VV) that is (4096*200)x48 multiplied by 48x48 which yields a (4096*200) x 48 result. This result has one column vector of length 4096*200 that contains 200 results of the individual matrix-vector multiplications you were trying to do. The 200 results per column * 48 columns combine to give you all of the results that your original problem would create. The first column would contain the results of [V0] multiplied by each of the 200 A matrices, the second column would contain the results of [V1] multiplied by each of the 200 A matrices, etc.
Once you have arranged your data like this, using CUBLAS Sgemm should be the quickest possible approach on the GPU. Note that CUBLAS expects the underlying storage to be column-major, so if you are rearranging your data, you will probably want to keep this in mind. There is a CUDA sample code for CUBLAS matrix multiplication.
In your code it appears you actually have 2000 A matrices, but your question refers to 200. I have used 200 for example in my answer, but the concept would be the same with 2000 A matrices.