failed assertion during applying BDCSVD - c++

I am using the following struct in my project and the problem occurs in the second constructor. (I am using Visual Studio 2019.)
struct optimal_subspace {
vector<Eigen::VectorXd> span;
//empty constructor
optimal_subspace() {}
//constructor taking a pointset, the cluster number i and the size of subspaces q
//used for the k-means subspace algorithm on the whole pointset
optimal_subspace(vector<point>& pointset, int i, int q) {
//declare a vector to contain the span
vector<Eigen::VectorXd> subspace_span;
//declare integers n,d to save the dimensions of the current data matrix
int n, d;
//declare integer r to save the minimum of n and d
int r;
//using the constructor of the struct subspacematrix to get the data matrix of cluster i, the cluster mean is already subtracted
subspacematrix sm(pointset, i);
Eigen::MatrixXd m = sm.matrix;
//save the dimensions of m
n = m.rows();
d = m.cols();
//determine min(n,d)
r = min(n, d);
//check if the cluster contains points
if (sm.status == true) {
//use either Jacobi or BDCSVD according to the size of m, declare v to save V from the SVD D = U E V^T or thin SVD
//Jacobi better for matrices smaller than 16x16
Eigen::MatrixXd v;
//if r < q compute the Full decomposition as otherwise there are not enough singular vectors to obtain a q-dimensional subspace
//else compute the thin decomposition
clock_t start = clock();
if (n < 16 & d < 16) {
Eigen::JacobiSVD<Eigen::MatrixXd> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
v = svd.matrixV();
}
else {
Eigen::BDCSVD<Eigen::MatrixXd> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
v = svd.matrixV();
}
clock_t stop = clock();
svd_time += (double) (stop - start) / CLOCKS_PER_SEC;
for (int j = 0; j < min(q, r); j++) {
//V is of the form dxr, so, we take the r columns
subspace_span.push_back(v.col(j));
//currentsubspace.push_back(v.col(j) + mean);
}
//if r < q, we fill the subspaces by taking the coordinates of random points outside the cluster
if (min(q, r) < q) {
vector<int> non_cluster_indices = opp_ind(sm.cluster_indices, pointset.size());
uniform_int_distribution<int> uniform_dist(0, non_cluster_indices.size());
//pick randomly a point outside the cluster and add it
for (int j = min(q, r); j < q; j++) {
Eigen::VectorXd non_cluster_vector = pointset[non_cluster_indices[uniform_dist(mt)]].getcoord();
subspace_span.push_back(non_cluster_vector);
}
//orthonormalize the span
stableGramSchmidt(subspace_span, min(q, r));
}
if (subspace_span.size() == 0) cout << "error: empty subspace added" << endl;
}
span = subspace_span;
}
//constructor taking a pointset, the cluster number i and the size of subspaces q and a vector of indices representing a sample
//used for sampling k-means
optimal_subspace(vector<point>& pointset, int i, int q, vector<int> indices) {
//declare a vector to contain the span
vector<Eigen::VectorXd> subspace_span;
//declare integers n,d to save the dimensions of the current data matrix
int n, d;
//declare integer r to save the minimum of n and d
int r;
//using the constructor of the struct subspacematrix to get the data matrix of cluster i, the cluster mean is already subtracted
subspacematrix sm(pointset, indices, i);
Eigen::MatrixXd m = sm.matrix;
//save the dimensions of m
n = m.rows();
d = m.cols();
//check if the cluster contains points
if (sm.status == true) {
//use either Jacobi or BDCSVD according to the size of m, declare v to save V from the SVD D = U E V^T or thin SVD
//Jacobi better for matrices smaller than 16x16
Eigen::MatrixXd v;
//if r < q compute the Full decomposition as otherwise there are not enough singular vectors to obtain a q-dimensional subspace
//else compute the thin decomposition
clock_t start = clock();
if (n < 16 & d < 16) {
Eigen::JacobiSVD<Eigen::MatrixXd> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
v = svd.matrixV();
}
else {
//ofstream file("problematicmatrix.txt", ofstream::trunc);
//file << sm.matrix.format(CommaInitFmt) << endl;
//file.close();
//Eigen::MatrixXd matrix = load_csv<Eigen::MatrixXd>("problematicmatrix.txt");
Eigen::BDCSVD<Eigen::MatrixXd> svd(sm.matrix, Eigen::ComputeThinU | Eigen::ComputeThinV);
v = svd.matrixV();
}
clock_t stop = clock();
svd_time += (double) (stop - start) / CLOCKS_PER_SEC;
int v_cols = v.cols();
int fill_up_index = min(q, v_cols);
for (int j = 0; j < fill_up_index; j++) {
subspace_span.push_back(v.col(j));
}
//if we don't have enough columns, we fill the subspaces by taking the coordinates of random points outside the cluster
if (fill_up_index < q) {
vector<int> non_cluster_indices = opp_ind(sm.cluster_indices, indices);
uniform_int_distribution<int> uniform_dist(0, non_cluster_indices.size() - 1);
//pick randomly a point outside the cluster and add it
for (int j = fill_up_index; j < q; j++) {
Eigen::VectorXd non_cluster_vector = pointset[non_cluster_indices[uniform_dist(mt)]].getcoord();
subspace_span.push_back(non_cluster_vector);
}
//orthonormalize the span
stableGramSchmidt(subspace_span, fill_up_index);
}
if (subspace_span.size() == 0) cout << "error: empty subspace added" << endl;
}
span = subspace_span;
}
};
I get the following exception:
Unhandled exception at 0x00007FF6CB72BD3B in MAaktuell.exe: 0xC0000005: Access violation reading location 0xFFFFFFFFFFFFFFFF.
and when debugging after getting it, I end up in the BDCSVD.h.
I also run it in debug mode and got the following error message:
Assertion failed: index >= 0 && index < size(), file C:\Users\Marcel\Desktop\eigen-3.3.7\eigen-3.3.7\Eigen\src\Core\DenseCoeffsBase.h, line 180
I stored the matrix using the I0 format provided by eigen in a txt.file as follows (and included it in the second constructor, it is commented right now):
ofstream file("problematicmatrix.txt", ofstream::trunc);
ile << sm.matrix.format(CommaInitFmt) << endl;
file.close();
and uploaded it here:
problematic matrix in a txt.file
However, I tried to compute the BDCSVD for this matrix again as follows:
Eigen::MatrixXd matrix = load_csv<Eigen::MatrixXd>("problematicmatrix.txt");
Eigen::BDCSVD<Eigen::MatrixXd> svd(matrix, Eigen::ComputeThinU | Eigen::ComputeThinV);
and then, it works. If I include saving and loading the matrix in my method, it fails again. Can anyone help me finding the error? Why do I end up in the header of BDCSVD, when debugging?

Related

convolution implementation in c++

I want to implement 2D convolution function in C++ by myself, without using filter2D(). I'm trying to iterate all pixels of input image and kernel, then, assign new value to each pixel of dst.
However, I got this error.
Thread 1: EXC_BAD_ACCESS (code=1, address=0x0)
I found that this error tells I'm accessing nullptr, but I could not solve the problem. Here is my c++ code.
cv::Mat_<float> spatialConvolution(const cv::Mat_<float>& src, const cv::Mat_<float>& kernel)
{
// declare variables
Mat_<float> dst;
Mat_<float> flipped_kernel;
float tmp = 0.0;
// flip kernel
flip(kernel, flipped_kernel, -1);
// multiply and integrate
// input rows
for(int i=0;i<src.rows;i++){
// input columns
for(int j=0;j<src.cols;j++){
// kernel rows
for(int k=0;k<flipped_kernel.rows;k++){
// kernel columns
for(int l=0;l<flipped_kernel.cols;l++){
tmp += src.at<float>(i,j) * flipped_kernel.at<float>(k,l);
}
}
dst.at<float>(i,j) = tmp;
}
}
return dst.clone();
}
To simplify let's suppose you have kernel 3x3
k(0,0) k(0,1) k(0,2)
k(1,0) k(1,1) k(1,2)
k(2,0) k(2,1) k(2,2)
to calculate convolution you are scanning input image (marked as I) from left to fright, from top to bottom
and for every pixel of input image you assign one value calculated from the formula below:
newValue(y,x) = I(y-1,x-1) * k(0,0) + I(y-1,x) * k(0,1) + I(y-1,x+1) * k(0,2)
+ I(y,x-1) * k(1,0) + I(y,x) * k(1,1) + I(y,x+1) * k(1,2) +
+ I(y+1,x-1) * k(2,0) + I(y+1,x) * k(2,1) + I(y+1,x+1) * k(2,2)
------------------x------------>
|
|
| [k(0,0) k(0,1) k(0,2)]
y [k(1,0) k(1,1) k(1,2)]
| [k(2,0) k(2,1) k(2,2)]
|
(y,x) of input Image (I) is anchor point of kernel, to assign new value to I(y,x)
you need to multiply every k coefficient by corresponding point of I - your code doesn't do it.
First you need to create dst matrix with dimenstion as original image, and the same type of pixel.
Then you need to rewrite your loops to reflect formula described above:
cv::Mat_<float> spatialConvolution(const cv::Mat_<float>& src, const cv::Mat_<float>& kernel)
{
Mat dst(src.rows,src.cols,src.type());
Mat_<float> flipped_kernel;
flip(kernel, flipped_kernel, -1);
const int dx = kernel.cols / 2;
const int dy = kernel.rows / 2;
for (int i = 0; i<src.rows; i++)
{
for (int j = 0; j<src.cols; j++)
{
float tmp = 0.0f;
for (int k = 0; k<flipped_kernel.rows; k++)
{
for (int l = 0; l<flipped_kernel.cols; l++)
{
int x = j - dx + l;
int y = i - dy + k;
if (x >= 0 && x < src.cols && y >= 0 && y < src.rows)
tmp += src.at<float>(y, x) * flipped_kernel.at<float>(k, l);
}
}
dst.at<float>(i, j) = saturate_cast<float>(tmp);
}
}
return dst.clone();
}
Your memory access error is presumably happening due to the line:
dst.at<float>(i,j) = tmp;
because dst is not initialized. You can't assign something to that index of the matrix if it has no size/data. Instead, initialize the matrix first, as Mat_<float> is a declaration, not an initialization. Use one of the initializations where you can specify a cv::Size or the rows/columns from the different constructors for Mat (see the docs). For example, you can initialize dst with:
Mat dst{src.size(), src.type()};

define and filling a sparse matrix using Eigen Library in C++

I am trying to build a spars Matrix using a Eigen or Armadillo library in C++ to solve a system of linear equations Ax=b. A is the coefficient matrix with a dimension of n*n, and B is a vector of right hand side with a dimension of n
the Spars Matrix A is like this, see the figure
I had a look though the Eigen document but I have a problem with defining and filling the Spars Matrix in C++.
could you please give me an example code to define the spars matrix and how to fill the values into the matrix using Eigen library in c++?
consider for example a simple spars matrix A:
1 2 0 0
0 3 0 0
0 0 4 5
0 0 6 7
int main()
{
SparseMatrix<double> A;
// fill the A matrix ????
VectorXd b, x;
SparseCholesky<SparseMatrix<double> > solver;
solver.compute(A);
x = solver.solve(b);
return 0;
}
The sparse matrix could be filled with the values mentioned in the post by using the .coeffRef() member function, as shown in this routine:
SparseMatrix<double> fillMatrix() {
int N = 4;
int M = 4;
SparseMatrix<double> m1(N,M);
m1.reserve(VectorXi::Constant(M, 4)); // 4: estimated number of non-zero enties per column
m1.coeffRef(0,0) = 1;
m1.coeffRef(0,1) = 2.;
m1.coeffRef(1,1) = 3.;
m1.coeffRef(2,2) = 4.;
m1.coeffRef(2,3) = 5.;
m1.coeffRef(3,2) = 6.;
m1.coeffRef(3,3) = 7.;
m1.makeCompressed();
return m1;
}
However, the SparseCholesky module (SimplicialCholesky<SparseMatrix<double> >) won't work in this case because the matrix is not Hermitian. The system could be solved with a LU or BiCGStab solver. Also note that sizes ofx and b need to be defined:
VectorXd b(A.rows()), x(A.cols());
In case of larger sparse matrices you may also want to look at the .reserve() function in order to allocate memory before filling the elements. The .reserve() function can be used to provide an estimate of the number of non-zero entries per column (or row, depending on the storage order. The default is comumn-major). In the example above that estimate is 4, but it does not make sense in such a small matrix. The documentation states that it is preferable to overestimate the number of non-zeros per column.
Since this question also asks about Armadillo, here is the corresponding Armadillo-based code. Best to use Armadillo version 9.100+ or later, and link with SuperLU.
#include <armadillo>
using namespace arma;
int main()
{
sp_mat A(4,4); // don't need to explicitly reserve the number of non-zeros
// fill with direct element access
A(0,0) = 1.0;
A(0,1) = 2.0;
A(1,1) = 3.0;
A(2,2) = 4.0;
A(2,3) = 5.0;
A(3,2) = 6.0;
A(3,3) = 7.0; // etc
// or load the sparse matrix from a text file with the data stored in coord format
sp_mat AA;
AA.load("my_sparse_matrix.txt", coord_ascii)
vec b; // ... fill b here ...
vec x = spsolve(A,b); // solve sparse system
return 0;
}
See also the documentation for SpMat, element access, .load(), spsolve().
The coord file format is simple. It stores non-zeros values.
Each line contains:
row col value
The row and column counts start at zero. Example:
0 0 1.0
0 1 2.0
1 1 3.0
2 2 4.0
2 3 5.0
3 2 6.0
3 3 7.0
1000 2000 9.0
Values not explicitly listed are assumed to be zero.
#include <vector>
#include <iostream>
#include <Eigen/Dense>
#include <Eigen/Sparse>
#include <Eigen/Core>
#include <cstdlib>
using namespace Eigen;
using namespace std;
int main()
{
double L = 5; // Length
const int N = 120; // No of cells
double L_cell = L / N;
double k = 100; // Thermal Conductivity
double T_A = 100.;
double T_B = 200.;
double S = 1000.;
Vector<double, N> d, D, A, aL, aR, aP, S_u, S_p;
vector<double> xp;
xp.push_back((0 + L_cell) / 2.0);
double xm = xp[0];
for (int i = 0; i < N - 1; i++)
{
xm = xm + L_cell;
xp.push_back(xm);
}
for (int i = 0; i < N; i++)
{
A(i) = .1;
d(i) = L_cell;
D(i) = k / d(i);
}
aL(0) = 0;
aR(0) = D(0) * A(0);
S_p(0) = -2 * D(0) * A(0);
aP(0) = aL(0) + aR(0) - S_p(0);
S_u(0) = 2 * D(0) * A(0) * T_A + S * L_cell * A(0);
for (int i = 1; i < N - 1; i++)
{
aL(i) = D(i) * A(i);
aR(i) = D(i) * A(i);
S_p(i) = 0;
aP(i) = aL(i) + aR(i) - S_p(i);
S_u(i) = S * A(i) * L_cell;
}
aL(N - 1) = D(N - 1) * A(N - 1);
aR(N - 1) = 0;
S_p(N - 1) = -2 * D(N - 1) * A(N - 1);
aP(N - 1) = aL(N - 1) + aR(N - 1) - S_p(N - 1);
S_u(N - 1) = 2 * D(N - 1) * A(N - 1) * T_B + S * L_cell * A(N - 1);
typedef Eigen::Triplet<double> T;
std::vector<T> tripletList;
tripletList.reserve(N * 3);
Matrix<double, N, 3> v; // v is declared here
v << (-1) * aL, aP, (-1) * aR;
for (int i = 0, j = 0; i < N && j < N; i++, j++)
{
tripletList.push_back(T(i, j, v(i, 1)));
if (i + 1 < N && j + 1 < N)
{
tripletList.push_back(T(i + 1, j, v(i + 1, 0)));
tripletList.push_back(T(i, j + 1, v(i, 2)));
}
}
SparseMatrix<double> coeff(N, N);
coeff.setFromTriplets(tripletList.begin(), tripletList.end());
SimplicialLDLT<SparseMatrix<double> > solver;
solver.compute(coeff);
if (solver.info() != Success) {
cout << "decomposition failed" << endl;
return;
}
Vector<double, N> temparature;
temparature = solver.solve(S_u);
if (solver.info() != Success)
{
cout << "Solving failed" << endl;
return;
}
vector<double> Te = {}, x = {};
Te.push_back(T_A);
x.push_back(0);
for (int i = 0; i < N; i++)
{
Te.push_back(temparature(i));
x.push_back(xp[i]);
}
Te.push_back(T_B);
x.push_back(L);
for (int i = 0; i < N + 2; i++)
{
cout << x[i] << " " << Te[i] << endl;
}
return 0;
}
Here is a full code of a solution to numerical problem which uses SparseMatrix. Look at the matrix v. It has the values of all the nonzero elements of coeff matrix yet to be defined. In the next loop I made a series of tripletList.push_back(...) adding a triplet consisting of row and column index and corresponding value taken from v for each non-zero element of coeff. Now declare a Sparse Matrix coeff with appropriate size and use the method setFromTriplets (documentation) to set its non-zero elements from tripletList triplets.

RcppParallel Parallelizing distance computation: segfault

I have a matrix, for which I want to compute the distance (let's say Euclidean) between the ith row and every other row(i.e. I want the ith row of the pairwise distance matrix).
#include <Rcpp.h>
#include <cmath>
#include <algorithm>
#include <RcppParallel.h>
//#include <RcppArmadillo.h>
#include <queue>
using namespace std;
using namespace Rcpp;
using namespace RcppParallel;
// [[Rcpp::export]]
double dist_fun(NumericVector row1, NumericVector row2){
double rval = 0;
for (int i = 0; i < row1.length(); i++){
rval += (row1[i] - row2[i]) * (row1[i] - row2[i]);
}
return rval;
}
// [[Rcpp::export]]
NumericVector dist_row(NumericMatrix mat, int i){
NumericVector row(mat.nrow());
NumericMatrix::Row row1 = mat.row(i - 1);
for (int j = 0; j < mat.nrow(); j++){
NumericMatrix::Row row2 = mat.row(j);
row(j) = dist_fun(row1, row2);
}
return row;
}
// [[Rcpp::depends(RcppParallel)]]
struct JsDistance: public Worker {
// input matrix to read from
const NumericMatrix mat;
int i;
// output vector to write to
NumericVector output;
// initialize from Rcpp input and output matrixes (the RMatrix class
// can be automatically converted to from the Rcpp matrix type)
JsDistance(const NumericMatrix mat, int i, NumericVector output)
: mat(mat), i(i), output(output) {}
// function call operator that work for the specified range (begin/end)
void operator()(std::size_t begin, std::size_t end) {
NumericVector row1 = mat.row(i);
for (std::size_t j = begin; j < end; j++) {
NumericVector row2 = mat.row(j);
output[j] = dist_fun(row1, row2);
}
}
};
// [[Rcpp::export]]
NumericVector parallel_dist_row(NumericMatrix mat, int i) {
// allocate the matrix we will return
NumericVector output(mat.nrow());
// create the worker
JsDistance JsDistance(mat, i, output);
// call it with parallelFor
parallelFor(0, mat.nrow(), JsDistance);
return output;
}
The sequential way using Rcpp is the function 'row_dist' as written above. Yet the matrix I want to work with is very large so I want to parallelize it. But then I will run into a segfault error which I don't quite understand why. To trigger the error you can run the following code:
library(Rcpp)
library(RcppParallel)
setThreadOptions(numThreads = 20)
set.seed(42)
X = matrix(rnorm(10000 * 400), 10000, 400)
sourceCpp("question.cpp")
start1 = proc.time()
print(dist_row(X, 2)[1:30])
print(proc.time() - start1)
start2 = proc.time()
print(parallel_dist_row(X, 2)[1:30])
print(proc.time() - start2)
Can someone give me some hint about what I did wrong? Thanks in advance for your time!
=======================================================================
Edit:
inline double d(double a, double b){
return fabs(a - b);
}
// [[Rcpp::depends(RcppParallel)]
struct dtwDistance: public Worker {
// Input matrix to read from must be of the RMatrix<T> form
// if using Rcpp objects
const RMatrix<double> mat;
int i;
// Output vector to write to must be of the RVector<T> form
// if using Rcpp objects
RVector<double> output;
// initialize from Rcpp input and output matrixes (the RMatrix class
// can be automatically converted to from the Rcpp matrix type)
dtwDistance(const NumericMatrix mat, int i, NumericVector output)
: mat(mat), i(i - 1), output(output) {}
// Note the -1 ^^^^ to match results from prior function
// Function call operator to iterate over a specified range (begin/end)
void operator()(std::size_t begin, std::size_t end) {
RMatrix<double>::Row row1 = mat.row(i);
for (std::size_t j = begin; j < end; ++j) {
RMatrix<double>::Row row2 = mat.row(j);
size_t n = row1.length();
size_t m = row2.length();
NumericMatrix cost(n + 1, m + 1);
for (int ii = 1; ii <= n; ii++){
cost(i, 0) = numeric_limits<double>::infinity();
}
for (int jj = 1; jj <= m; jj++){
cost(0, j) = numeric_limits<double>::infinity();
}
for (int ii = 1; ii <= n; ii++){
for (int jj = 1; jj <= m; jj++){
double dist = d(row1[ii - 1], row2[jj - 1]);
cost(ii, jj) = dist + min(min(cost(ii - 1, jj), cost(ii, jj - 1)), cost(ii - 1, jj - 1));
//cout << ii << ", " << jj << ", " << cost(ii, jj) << "\n";
}
}
output[j] = cost(n, m);
}
}
};
// [[Rcpp::export]]
NumericVector parallel_dist_row_dtw(NumericMatrix mat, int i) {
// allocate the matrix we will return
//RMatrix<double> input(mat);
NumericVector y(mat.nrow());
//RVector<double> output(y);
// create the worker
dtwDistance dtwDistance(mat, i, y);
// call it with parallelFor
parallelFor(0, mat.nrow(), dtwDistance);
return y;
}
The distance I needed to calculate is the dynamic time warping distance. I implemented it as above. Yet when running, it will give a 'stack imbalance' warning. And there will be a segfault after several runs. I'm wondering what is the problem now.
To trigger the problem, I did:
library(Rcpp)
library(RcppParallel)
setThreadOptions(numThreads = 4)
sourceCpp("scripts/chisq_dtw.cpp")
set.seed(42)
X = matrix(rnorm(1000), 100, 10)
parallel_dist_row_dtw(X, 1)
parallel_dist_row_dtw(X, 2)
parallel_dist_row_dtw(X, 3)
parallel_dist_row_dtw(X, 4)
parallel_dist_row_dtw(X, 5)
The issue is you are not using the thread-safe wrapper around R objects via RMatrix<T> and RVector<T>. These classes are important because of the parallelization being executed on a background thread, which is an area that is not safe to call R or Rcpp APIs. The official documentation emphasizes this in the Safe Accessors section.
In particular, we have:
To provide safe and convenient access to the arrays underlying R vectors and matrices RcppParallel introduces several accessor classes:
RVector<T> — Wrap R vectors of various types
RMatrix<T> — Wrap R matrices of various types (also includes Row and Column classes)
To create a thread safe accessor for an Rcpp vector or matrix just construct an instance of RVector or RMatrix with it.
Code Fix
So, your work can be fixed by switching *Matrix to RMatrix<T> and *Vector to RVector<T>.
struct JsDistance: public Worker {
// Input matrix to read from must be of the RMatrix<T> form
// if using Rcpp objects
const RMatrix<double> mat;
int i;
// Output vector to write to must be of the RVector<T> form
// if using Rcpp objects
RVector<double> output;
// initialize from Rcpp input and output matrixes (the RMatrix class
// can be automatically converted to from the Rcpp matrix type)
JsDistance(const NumericMatrix mat, int i, NumericVector output)
: mat(mat), i(i - 1), output(output) {}
// Note the -1 ^^^^ to match results from prior function
// Function call operator to iterate over a specified range (begin/end)
void operator()(std::size_t begin, std::size_t end) {
RMatrix<double>::Row row1 = mat.row(i);
for (std::size_t j = begin; j < end; ++j) {
RMatrix<double>::Row row2 = mat.row(j);
double rval = 0;
for (unsigned int k = 0; k < row1.length(); ++k) {
rval += (row1[k] - row2[k]) * (row1[k] - row2[k]);
}
output[j] = rval;
}
}
};
In particular, the data types used here are of the form RMatrix<double> even for accessing the matrix.
Also, within the parallelized version there is a missing i-1 statement. To remedy this, I've opted to have it taken care of in the constructor of JSDistance.
Test
set.seed(42)
X = matrix(rnorm(10000 * 400), 10000, 400)
start1 = proc.time()
print(dist_row(X, 2)[1:30])
# [1] 811.8873 0.0000 799.8153 810.1442 720.3232 730.6083 797.8441 781.8066 827.1511 834.1863 842.9392 850.2476 724.5842 673.1428 775.0994
# [16] 805.5752 804.9281 774.9770 799.7669 870.3187 815.1129 934.7581 726.1554 804.2097 758.4943 772.8931 806.6026 715.8257 847.8980 831.7555
print(proc.time() - start1)
# user system elapsed
# 0.22 0.00 0.23
start2 = proc.time()
print(parallel_dist_row(X, 2)[1:30])
# [1] 811.8873 0.0000 799.8153 810.1442 720.3232 730.6083 797.8441 781.8066 827.1511 834.1863 842.9392 850.2476 724.5842 673.1428 775.0994
# [16] 805.5752 804.9281 774.9770 799.7669 870.3187 815.1129 934.7581 726.1554 804.2097 758.4943 772.8931 806.6026 715.8257 847.8980 831.7555
print(proc.time() - start2)
# user system elapsed
# 0.28 0.00 0.06
all.equal(parallel_dist_row(X, 2), dist_row(X, 2))
# [1] TRUE

Fast Sequency-ordered Walsh-Hadamard Transform

EDIT You can checkout my implementation on Github: https://github.com/Sheljohn/WalshHadamard
I am looking for an implementation, or indications on how to implement, the sequency-ordered Fast Walsh Hadamard transform (see this and this).
I slightly adapted a very nice implementation found online:
// (a,b) -> (a+b,a-b) without overflow
void rotate( long& a, long& b )
{
static long t;
t = a;
a = a + b;
b = t - b;
}
// Integer log2
long ilog2( long x )
{
long l2 = 0;
for (; x; x >>=1) ++l2;
return l2;
}
/**
* Fast Walsh-Hadamard transform
*/
void fwht( std::vector<long>& data )
{
const long l2 = ilog2(data.size()) - 1;
for (long i = 0; i < l2; ++i)
{
for (long j = 0; j < (1 << l2); j += 1 << (i+1))
for (long k = 0; k < (1 << i ); ++k)
rotate( data[j + k], data[j + k + (1<<i)] );
}
}
but it does not compute the WHT in sequency order (the natural Hadamard matrix is used implicitly). Note that in the code above (and if you try it), the size of data needs to be a power of 2.
My question is: is there a simple adaptation of this implementation that gives the sequency-ordered FWHT?
A possible solution would be to write a small function to compute dynamically the elements of Hn (the Hadamard matrix of order n), count the number of zero crossings, and create a ranking of the rows, but I am wondering whether there is a smarter way. Thanks in advance for any input! Cheers
As indicated here (linked from within your reference):
The sequency ordering of the rows of the Walsh matrix can be derived from the ordering of the Hadamard matrix by first applying the bit-reversal permutation and then the Gray code permutation.
There are various implementations of bit-reversal algorithm such as this:
// Bit-reversal
// adapted from http://www.idi.ntnu.no/~elster/pubs/elster-bit-rev-1989.pdf
void bitrev(int t, std::vector<long>& c)
{
long n = 1<<t;
long L = 1;
c[0] = 0;
for (int q=0; q<t; ++q)
{
n /= 2;
for (long j=0; j<L; ++j)
{
c[L+j] = c[j] + n;
}
L *= 2;
}
}
The gray code can be obtained from here:
/*
The purpose of this function is to convert an unsigned
binary number to reflected binary Gray code.
The operator >> is shift right. The operator ^ is exclusive or.
*/
unsigned int binaryToGray(unsigned int num)
{
return (num >> 1) ^ num;
}
These can be combined to yields the final permutation:
// Compute a permutation of size 2^order
// to reorder the Fast Walsh-Hadamard transform's output
// into the Walsh-ordered (sequency-ordered)
void sequency_permutation(long order, std::vector<long>& p)
{
long n = 1<<order;
std::vector<long> tmp(n);
bitrev(order, tmp);
p.resize(n);
for (long i=0; i<n; ++i)
{
p[i] = tmp[binaryToGray(i)];
}
}
All that's left to do is to apply the permutation to the normal Walsh-Hadamard Transform output.
void permuted_fwht(std::vector<long>& data, const std::vector<long>& permutation)
{
std::vector<long> tmp = data;
fwht(tmp);
for (long i=0; i<data.size(); ++i)
{
data[i] = tmp[permutation[i]];
}
}
Note that the permutation is fixed for a given data size, so it only needs to be computed once (assuming you are processing multiple blocks of data). So, putting it all together you would get something such as:
std::vector<long> p;
const long order = ilog2(data_block_size) - 1;
sequency_permutation(order, p);
permuted_fwht( data_block_1, p);
permuted_fwht( data_block_2, p);
//...

assignment error with Mat OpenCv

I am working with OpenCV and C++ for a project and I found the following problem: after initializing a mat with the following statement
Mat or_mat=Mat(img->height,img->width,CV_32FC1);
check the following value
or_mat.at <float> (i, j) = atan (fy / fx) / 2 +1.5707963;
After completing returning the mat for the output of the function but when I go to read there are many values ​​that do not correspond to the output. Precise in incorrect values ​​for the I-4.31602e +008 is inserted and if I make a cout the value of the expression is correct. What could be the error??
relevant Code:
Mat or_mat=Mat(img->height,img->width,CV_32FC1);
to angle
if(fx > 0){
or_mat.at<float>(i,j) = atan(fy/fx)/2+1.5707963;
}
else if(fx<0 && fy >0){
or_mat.at<float>(i,j) = atan(fy/fx)/2+3.1415926;
}
else if(fx<0 && fy <0){
or_mat.at<float>(i,j) = atan(fy/fx)/2;
}
else if(fy!=0 && fx==0){
or_mat.at<float>(i,j) = 1.5707963;
}
I have to calculate the local orientation of the fingerprint image, the following code I have omitted several statements and calculations that do not have errors.
I would triple check that you are indexing correctly. The following code shows my initialising a matrix full of zeros, and then filling it with some float using at .at operator. It compiles and runs nicely:
int main()
{
int height = 10;
int width = 3;
// Initialise or_mat to with every element set to zero
cv::Mat or_mat = cv::Mat::zeros(height, width, CV_32FC1);
std::cout << "Original or_mat:\n" << or_mat << std::endl;
// Loop through and set each element equal to some float
float value = 10.254;
for (int i = 0; i < or_mat.rows; ++i)
{
for (int j = 0; j < or_mat.cols; ++j)
{
or_mat.at<float>(i,j) = value;
}
}
std::cout << "Final or_mat:\n" << or_mat << std::endl;
return 0;
}