Non-Linear Fit Using GSL - c++

So I'm trying to modify some code I found here to fit a different function, but my slightly modified version fails to converge and I don't understand why.
The function I'm trying to find the least squared fit for is "A + lambdalog(t) + blog(t)^2. Here's the code
main.cpp
#include <stdlib.h>
#include <stdio.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_multifit_nlin.h>
#include "expfit.c"
#define N 40 // All "N"s are 40
void print_state (size_t iter, gsl_multifit_fdfsolver * s);//The prototype of some function
int main (void){
const gsl_multifit_fdfsolver_type *T; //pointer to a newly allocated instance of a solver of type T
gsl_multifit_fdfsolver *s;
int status;
unsigned int i, iter = 0;
const size_t n = N; //N number of observations
const size_t p = 3;//3 parameters
gsl_matrix *covar = gsl_matrix_alloc (p, p); // creates a pxp gsl_matrix
double y[N], sigma[N]; //declares vector variables that will hold the noise data. They are "N" long
struct data d = { n, y, sigma}; //Populates struct d with variables n, y and sigma. Struct data is defined in expfit.c
gsl_multifit_function_fdf f;
double x_init[3] = { 0, -.1, -.1 }; //initial x values !These are initial guesses to the solution!
gsl_vector_view x = gsl_vector_view_array (x_init, p);//view arrays allow one to litterally view elements of a certain array without modifying or created a copy of the original array. Essentially a pointer to the original data.
const gsl_rng_type * type; // pointer to a new random number generator type. RNG type will be assigned later
gsl_rng * r; //Pointer to a new RNG
gsl_rng_env_setup();
type = gsl_rng_default; //Assigns random number generator type
r = gsl_rng_alloc (type); // Allocates memory for new RNG of type "type"
f.f = &logb_f;
f.df = &logb_df;
f.fdf = &logb_fdf;
f.n = n;
f.p = p;
f.params = &d;
for (i = 0; i < n; i++){// This is where the data is being generated
double t = i; // t is being redclared at each iteration for some reason wtf
if(t==0){//since log(0) is undefined, I said they equal 0 at t=0
y[i] = 2.0 -.5 * 0 - 0 + gsl_ran_gaussian (r, 0.1);
}else{
y[i] = 2.0 -.5 * log (t) - pow(log(t),2) + gsl_ran_gaussian (r, 0.1); //This is the noised up data
}
sigma[i] = .1; //not sure what this sigma does
printf ("data: %u %g %g\n", i, y[i], sigma[i]);//Printing out the data at each iteration
};
T = gsl_multifit_fdfsolver_lmsder; // Not sure what this is doing
s = gsl_multifit_fdfsolver_alloc (T, n, p);
gsl_multifit_fdfsolver_set (s, &f, &x.vector);
print_state (iter, s);
do{
iter++;
status = gsl_multifit_fdfsolver_iterate (s);
printf ("status = %s\n", gsl_strerror (status));
print_state (iter, s);
if (status)
break;
status = gsl_multifit_test_delta (s->dx, s->x, 1e-4, 1e-4);
}while (status == GSL_CONTINUE && iter < 500);
gsl_multifit_covar (s->J, 0.0, covar);
#define FIT(i) gsl_vector_get(s->x, i)
#define ERR(i) sqrt(gsl_matrix_get(covar,i,i))
{
double chi = gsl_blas_dnrm2(s->f);
double dof = n - p;
double c = GSL_MAX_DBL(1, chi / sqrt(dof));
printf("chisq/dof = %g\n", pow(chi, 2.0) / dof);
printf ("A = %.5f +/- %.5f\n", FIT(0), c*ERR(0));
printf ("lambda = %.5f +/- %.5f\n", FIT(1), c*ERR(1));
printf ("b = %.5f +/- %.5f\n", FIT(2), c*ERR(2));
}
printf ("status = %s\n", gsl_strerror (status));
gsl_multifit_fdfsolver_free (s);
gsl_matrix_free (covar);
gsl_rng_free (r);
return 0;
}
void print_state (size_t iter, gsl_multifit_fdfsolver * s){
printf ("iter: %3zu x = % 15.8f % 15.8f % 15.8f "
"|f(x)| = %g\n",
iter,
gsl_vector_get (s->x, 0),
gsl_vector_get (s->x, 1),
gsl_vector_get (s->x, 2),
gsl_blas_dnrm2 (s->f));
}
expfit.c
//
// expfit.c
// test
//
// Created by [] on 4/11/15.
// Copyright (c) 2015 []. All rights reserved.
//
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_multifit_nlin.h>
#include "expfit.h"
/* expfit.c -- model functions for exponential + background */
struct data {
size_t n;
double * y;
double * sigma;
};
int logb_f (const gsl_vector * x, void *data, gsl_vector * f){
size_t n = ((struct data *)data)->n;
double *y = ((struct data *)data)->y;
double *sigma = ((struct data *) data)->sigma;
double A = gsl_vector_get (x, 0);
double lambda = gsl_vector_get (x, 1);
double b = gsl_vector_get (x, 2);
double Yi;//will hold the value of the function to be stored into the vector set
double t;//time variable.
size_t i;//iterative variable
for (i = 0; i < n; i++){
/* Model Yi = A + lambda*log(i) + b*lambda*log(i)^2 */
t = i;
if(t==0){ //need if statement to bypass log(0) when t==0 since the value of log is undefined there
Yi = A + lambda * log(t) + b * pow(log(t),2);//function for t==0
}else{
Yi = A + lambda * log(t) + b * pow(log(t),2); // This is the function we used
}
gsl_vector_set (f, i, (Yi - y[i])/sigma[i]);
}
return GSL_SUCCESS;
}
int logb_df (const gsl_vector * x, void *data, gsl_matrix * J){
size_t n = ((struct data *)data)->n;
double *sigma = ((struct data *) data)->sigma;
//double A = gsl_vector_get (x, 0);
//double lambda = gsl_vector_get (x, 1);
//double b = gsl_vector_get(x,2);
size_t i;
for (i = 0; i < n; i++){
/* Jacobian matrix J(i,j) = dfi / dxj, */
/* where fi = (Yi - yi)/sigma[i], */
/* Yi = A + lambda*log(i) + b*log(i)^2 */
/* and the xj are the parameters (A,lambda,b) */
double t = i;
double s = sigma[i];
gsl_matrix_set (J, i, 0, 1/s);
gsl_matrix_set (J, i, 1, log(t)/s);
gsl_matrix_set (J, i, 2, pow(log(t),2)/s);
}
return GSL_SUCCESS;
}
int logb_fdf (const gsl_vector * x, void *data, gsl_vector * f, gsl_matrix * J){
logb_f (x, data, f);
logb_df (x, data, J);
return GSL_SUCCESS;
}
And here's the header file just incase you want it
//
// expfit.h
// test
//
// Created by [] on 4/11/15.
// Copyright (c) 2015 []. All rights reserved.
//
#ifndef __test__expfit__
#define __test__expfit__
#include <stdio.h>
#endif /* defined(__test__expfit__) */

When computing the fitting function, you consider the special case t=0 to avoid log(0), but the function values don't differ:
if(t==0){
Yi = A + lambda * log(t) + b * pow(log(t),2);//function for t==0
}else{
Yi = A + lambda * log(t) + b * pow(log(t),2); // This is the function we used
}
Furthermore, you don't consider this case when computing the derivatives.
So, I changed the function and the derivatives as follows:
int logb_f (const gsl_vector * x, void *data, gsl_vector * f){
size_t n = ((struct data *)data)->n;
double *y = ((struct data *)data)->y;
double *sigma = ((struct data *) data)->sigma;
double A = gsl_vector_get (x, 0);
double lambda = gsl_vector_get (x, 1);
double b = gsl_vector_get (x, 2);
double Yi;//will hold the value of the function to be stored into the vector set
double t;//time variable.
size_t i;//iterative variable
for (i = 0; i < n; i++){
/* Model Yi = A + lambda*log(i) + b*lambda*log(i)^2 */
t = i;
if(t==0){ //need if statement to bypass log(0) when t==0 since the value of log is undefined there
Yi = A ;
}else{
Yi = A + lambda * log(t) + b * pow(log(t),2); // This is the function we used
}
//Yi = A + lambda * log(t) + b * pow(log(t),2); // This is the function we used
gsl_vector_set (f, i, (Yi - y[i])/sigma[i]);
}
return GSL_SUCCESS;
}
int logb_df (const gsl_vector * x, void *data, gsl_matrix * J){
size_t n = ((struct data *)data)->n;
double *sigma = ((struct data *) data)->sigma;
//double A = gsl_vector_get (x, 0);
//double lambda = gsl_vector_get (x, 1);
//double b = gsl_vector_get(x,2);
size_t i;
for (i = 0; i < n; i++){
/* Jacobian matrix J(i,j) = dfi / dxj, */
/* where fi = (Yi - yi)/sigma[i], */
/* Yi = A + lambda*log(i) + b*log(i)^2 */
/* and the xj are the parameters (A,lambda,b) */
// d
double t = i;
double s = sigma[i];
if(i == 0)
{
gsl_matrix_set (J, i, 0, 0);
gsl_matrix_set (J, i, 1, 0);
gsl_matrix_set (J, i, 2, 0);
}
else
{
gsl_matrix_set (J, i, 0, 1/s);
gsl_matrix_set (J, i, 1, log(t)/s);
gsl_matrix_set (J, i, 2, pow(log(t),2)/s);
}
//Yi = A + lambda * log(t) + b * pow(log(t),2); // This is the function we used
}
return GSL_SUCCESS;
}
In this case the iteration converges:
iter: 0 x = 0.00000000 -0.10000000 -0.10000000 |f(x)| = 470.77
status = success
iter: 1 x = 2.08763815 -0.60282892 -0.97819822 |f(x)| = 5.5047
status = success
iter: 2 x = 2.08763815 -0.60282892 -0.97819822 |f(x)| = 5.5047
chisq/dof = 0.818964
A = 2.08764 +/- 0.08245
lambda = -0.60283 +/- 0.07702
b = -0.97820 +/- 0.01722
status = success
However, I recommend to verify the differentiability of the fit function for t -> 0.
If in doubt, you can also restrict the fit range in the above functions by only considering the values for t > 0:
for (i = 1; i < n; i++) ... instead of for (i = 0; i < n; i++) ...

Related

Can MATLAB C generation coder generate C-code that fits embedded system?

I need to convert this code into C code.
Questions:
Will MATLAB Coder generate C code that are memory safe, e.g they not using calloc or malloc. Misra C standard does not allow coder to use dynamical memory allocation. It's dangerous for embedded system due to memory leaks.
Will MATLAB Coder generate C code with dynamical matrix as argument e.g. functions with arguments foo(float* A, int m, int n) or foo(int m, int n, float A[m][n]) or is fix size example foo(float A[3][5]), only available as option?
Will MATLAB Coder generate C code that can be fitted into an embedded system. How about the internal C++ commands in the .m files such as horzcat, size and vertcat? Will they become 100% portable C-code?
Will MATLAB Coder generate functions that have call by reference? Example foo(float* input, float* output) instead of float* output = foo(float* input)
function [U] = mpc (A, B, C, x, N, r, lb)
## Find matrix
PHI = phiMat(A, C, N);
GAMMA = gammaMat(A, B, C, N);
## Solve first with no constraints
U = solve(PHI, GAMMA, x, N, r, 0, 0, false);
## Then use the last U as upper bound
U = solve(PHI, GAMMA, x, N, r, lb, U(end), true);
end
function U = solve(PHI, GAMMA, x, N, r, lb, ub, constraints)
## Set U
U = zeros(N, 1);
## Iterate Gaussian Elimination
for i = 1:N
## Solve u
if(i == 1)
u = (r - PHI(i,:)*x)/GAMMA(i,i)
else
u = (r - PHI(i,:)*x - GAMMA(i,1:i-1)*U(1:i-1) )/GAMMA(i,i)
end
## Constraints
if(constraints == true)
if(u > ub)
u = ub;
elseif(u < lb)
u = lb;
end
end
## Save u
U(i) = u
end
end
function PHI = phiMat(A, C, N)
## Create the special Observabillity matrix
PHI = [];
for i = 1:N
PHI = vertcat(PHI, C*A^i);
end
end
function GAMMA = gammaMat(A, B, C, N)
## Create the lower triangular toeplitz matrix
GAMMA = [];
for i = 1:N
GAMMA = horzcat(GAMMA, vertcat(zeros((i-1)*size(C*A*B, 1), size(C*A*B, 2)),cabMat(A, B, C, N-i+1)));
end
end
function CAB = cabMat(A, B, C, N)
## Create the column for the GAMMA matrix
CAB = [];
for i = 0:N-1
CAB = vertcat(CAB, C*A^i*B);
end
end
My C-code. Yes its working!
/*
* Generalized_Predictive_Control.c
*
* Created on:
* Author:
*/
#include "Generalized_Predictive_Control.h"
/*
* Parameters
*/
int adim;
int ydim;
int rdim;
int horizon;
/*
* Deceleration
*/
static void obsv(float* PHI, const float* A, const float* C);
static void kalman(float* x, const float* A, const float* B, float* u, const float* K, float* y, const float* C);
static void mul(float* A, float* B, float* C, int row_a, int column_a, int column_b);
static void tran(float* A, int row, int column);
static void CAB(float* GAMMA, float* PHI, const float* A, const float* B, const float* C);
static void solve(float* GAMMA, float* PHI, float* x, float* u, float* r, float lb, float ub, int constraintsON);
static void print(float* A, int row, int column);
void GPC(int adim_, int ydim_, int rdim_, int horizon_, const float* A, const float* B, const float* C, const float* D, const float* K, float* u, float* r, float* y, float* x){
/*
* Set the dimensions
*/
adim = adim_;
ydim = ydim_;
rdim = rdim_;
horizon = horizon_;
/*
* Identify the model - Extended Least Square
*/
int n = 5;
float* phi;
float* theta;
//els(phi, theta, n, y, u, P);
/*
* Create a state space model with Observable canonical form
*/
/*
* Create the extended observability matrix
*/
float PHI[horizon*ydim*adim];
memset(PHI, 0, horizon*ydim*adim*sizeof(float));
obsv(PHI, A, C);
/*
* Create the lower triangular toeplitz matrix
*/
float GAMMA[horizon*rdim*horizon*ydim];
memset(GAMMA, 0, horizon*rdim*horizon*ydim*sizeof(float));
CAB(GAMMA, PHI, A, B, C);
/*
* Solve the best input value
*/
solve(GAMMA, PHI, x, u, r, 0, 0, 0);
solve(GAMMA, PHI, x, u, r, 0, *(u), 1);
/*
* Estimate the state vector
*/
kalman(x, A, B, u, K, y, C);
}
/*
* Identify the model
*/
static void els(float* P, float* phi, float* theta, int polyLength, int totalPolyLength, float* y, float* u, float* e){
/*
* move phi with the inputs, outputs, errors one step to right
*/
for(int i = 0; i < polyLength; i++){
*(phi + i+1 + totalPolyLength*0) = *(phi + i + totalPolyLength*0); // Move one to right for the y's
*(phi + i+1 + totalPolyLength*1) = *(phi + i + totalPolyLength*1); // Move one to right for the u's
*(phi + i+1 + totalPolyLength*2) = *(phi + i + totalPolyLength*2); // Move one to right for the e's
}
/*
* Add the current y, u and e
(*phi + totalPolyLength*0) = -*(y + 0); // Need to be negative!
(*phi + totalPolyLength*1) = *(u + 0);
(*phi + totalPolyLength*2) = *(e + 0);
*/
/*
* phi'*theta
*/
float y_est = 0;
for(int i = 0; i < totalPolyLength; i++){
y_est += *(phi + i) * *(theta + i);
}
float epsilon = *(y + 0) - y_est; // In this case, y is only one element array
/*
* phi*epsilon
*/
float phi_epsilon[totalPolyLength];
memset(phi_epsilon, 0, totalPolyLength*sizeof(float));
for(int i = 0; i < totalPolyLength; i++){
*(phi_epsilon + i) = *(phi + i) * epsilon;
}
/*
* P_vec = P*phi_epsilon
*/
float P_vec[totalPolyLength];
memset(P_vec, 0, totalPolyLength*sizeof(float));
mul(P, phi_epsilon, P_vec, totalPolyLength, totalPolyLength, 1);
/*
* Update our estimated vector theta = theta + P_vec
*/
for(int i = 0; i < totalPolyLength; i++){
*(theta + i) = *(theta + i) + *(P_vec + i);
}
/*
* Update P = P - (P*phi*phi'*P)/(1 + phi'*P*phi)
*/
// Create phi'
float phiT[totalPolyLength];
memset(phiT, 0, totalPolyLength*sizeof(float));
memcpy(phiT, phi, totalPolyLength*sizeof(float));
tran(phiT, totalPolyLength, 1);
// phi'*P
float phiT_P[totalPolyLength];
memset(phiT_P, 0, totalPolyLength*sizeof(float));
mul(phiT, P, phiT_P, 1, totalPolyLength, totalPolyLength);
// phi*phi'*P
float phi_phiT_P[totalPolyLength*totalPolyLength];
memset(phi_phiT_P, 0, totalPolyLength*totalPolyLength*sizeof(float));
mul(phi, phiT_P, phi_phiT_P, totalPolyLength, 1, totalPolyLength);
// P*phi*phi'*P
float P_phi_phiT_P[totalPolyLength*totalPolyLength];
memset(P_phi_phiT_P, 0, totalPolyLength*totalPolyLength*sizeof(float));
mul(P, phi_phiT_P, P_phi_phiT_P, totalPolyLength, totalPolyLength, totalPolyLength);
// P*phi
float P_phi[totalPolyLength];
memset(P_phi, 0, totalPolyLength*sizeof(float));
mul(P, phi, P_phi, totalPolyLength, totalPolyLength, 1);
// phi'*P*phi
float phiT_P_phi[1];
memset(phiT_P_phi, 0, 1*sizeof(float));
mul(phiT, P_phi, phiT_P_phi, 1, totalPolyLength, 1);
// P = P - (P_phi_phiT_P) / (1+phi'*P*phi)
for(int i = 0; i < totalPolyLength*totalPolyLength; i++){
*(P + i) = *(P + i) - *(P_phi_phiT_P + i) / (1 + *(phiT_P_phi));
}
}
/*
* This will solve if GAMMA is square!
*/
static void solve(float* GAMMA, float* PHI, float* x, float* u, float* r, float lb, float ub, int constraintsON){
/*
* Now we are going to solve on the form
* Ax=b, where b = (R*r-PHI*x) and A = GAMMA and x = U
*/
/*
* R_vec = R*r
*/
float R_vec[horizon*ydim];
memset(R_vec, 0, horizon*ydim*sizeof(float));
for(int i = 0; i < horizon*ydim; i++){
for (int j = 0; j < rdim; j++) {
*(R_vec + i + j) = *(r + j);
}
i += rdim-1;
}
/*
* PHI_vec = PHI*x
*/
float PHI_vec[horizon*ydim];
memset(PHI_vec, 0, horizon * ydim * sizeof(float));
mul(PHI, x, PHI_vec, horizon*ydim, adim, 1);
/*
* Solve now (R_vec - PHI_vec) = GAMMA*U
* Notice that this is ONLY for Square GAMMA with lower triangular toeplitz matrix e.g SISO case
* This using Gaussian Elimination backward substitution
*/
float U[horizon];
float sum = 0.0;
memset(U, 0, horizon*sizeof(float));
for(int i = 0; i < horizon; i++){
for(int j = 0; j < i; j++){
sum += *(GAMMA + i*horizon + j) * *(U + j);
}
float newU = (*(R_vec + i) - *(PHI_vec + i) - sum) / (*(GAMMA + i*horizon + i));
if(constraintsON == 1){
if(newU > ub)
newU = ub;
if(newU < lb)
newU = lb;
}
*(U + i) = newU;
sum = 0.0;
}
//print(U, horizon, 1);
/*
* Set last U to u
*/
if(constraintsON == 0){
*(u + 0) = *(U + horizon - 1);
}else{
*(u + 0) = *(U + 0);
}
}
/*
* Lower traingular toeplitz of extended observability matrix
*/
static void CAB(float* GAMMA, float* PHI, const float* A, const float* B, const float* C){
/*
* First create the initial C*A^0*B == C*I*B == C*B
*/
float CB[ydim*rdim];
memset(CB, 0, ydim*rdim*sizeof(float));
mul((float*)C, (float*)B, CB, ydim, adim, rdim);
/*
* Take the transpose of CB so it will have dimension rdim*ydim instead
*/
tran(CB, ydim, rdim);
/*
* Create the CAB matrix from PHI*B
*/
float PHIB[horizon*ydim*rdim];
mul(PHI, (float*) B, PHIB, horizon*ydim, adim, rdim); // CAB = PHI*B
tran(PHIB, horizon*ydim, rdim);
/*
* We insert GAMMA = [CB PHI;
* 0 CB PHI;
* 0 0 CB PHI;
* 0 0 0 CB PHI] from left to right
*/
for(int i = 0; i < horizon; i++) {
for(int j = 0; j < rdim; j++) {
memcpy(GAMMA + horizon*ydim*(i*rdim+j) + ydim*i, CB + ydim*j, ydim*sizeof(float)); // Add CB
memcpy(GAMMA + horizon*ydim*(i*rdim+j) + ydim*i + ydim, PHIB + horizon*ydim*j, (horizon-i-1)*ydim*sizeof(float)); // Add PHI*B
}
}
/*
* Transpose of gamma
*/
tran(GAMMA, horizon*rdim, horizon*ydim);
//print(CB, rdim, ydim);
//print(PHIB, rdim, horizon*ydim);
//print(GAMMA, horizon*ydim, horizon*rdim);
}
/*
* Transpose
*/
static void tran(float* A, int row, int column) {
float B[row*column];
float* transpose;
float* ptr_A = A;
for (int i = 0; i < row; i++) {
transpose = &B[i];
for (int j = 0; j < column; j++) {
*transpose = *ptr_A;
ptr_A++;
transpose += row;
}
}
// Copy!
memcpy(A, B, row*column*sizeof(float));
}
/*
* [C*A^1; C*A^2; C*A^3; ... ; C*A^horizon] % Extended observability matrix
*/
static void obsv(float* PHI, const float* A, const float* C){
/*
* This matrix will A^(i+1) all the time
*/
float A_pow[adim*adim];
memset(A_pow, 0, adim * adim * sizeof(float));
float A_copy[adim*adim];
memcpy(A_copy, (float*) A, adim * adim * sizeof(float));
/*
* Temporary matrix
*/
float T[ydim*adim];
memset(T, 0, ydim * adim * sizeof(float));
/*
* Regular T = C*A^(1+i)
*/
mul((float*) C, (float*) A, T, ydim, adim, adim);
/*
* Insert temporary T into PHI
*/
memcpy(PHI, T, ydim*adim*sizeof(float));
/*
* Do the rest C*A^(i+1) because we have already done i = 0
*/
for(int i = 1; i < horizon; i++){
mul((float*) A, A_copy, A_pow, adim, adim, adim); // Matrix power A_pow = A*A_copy
mul((float*) C, A_pow, T, ydim, adim, adim); // T = C*A^(1+i)
memcpy(PHI + i*ydim*adim, T, ydim*adim*sizeof(float)); // Insert temporary T into PHI
memcpy(A_copy, A_pow, adim * adim * sizeof(float)); // A_copy <- A_pow
}
}
/*
* x = Ax - KCx + Bu + Ky % Kalman filter
*/
static void kalman(float* x, const float* A, const float* B, float* u, const float* K, float* y, const float* C) {
/*
* Compute the vector A_vec = A*x
*/
float A_vec[adim*1];
memset(A_vec, 0, adim*sizeof(float));
mul((float*) A, x, A_vec, adim, adim, 1);
/*
* Compute the vector B_vec = B*u
*/
float B_vec[adim*1];
memset(B_vec, 0, adim*sizeof(float));
mul((float*) B, u, B_vec, adim, rdim, 1);
/*
* Compute the vector C_vec = C*x
*/
float C_vec[ydim*1];
memset(C_vec, 0, ydim*sizeof(float));
mul((float*) C, x, C_vec, ydim, adim, 1);
/*
* Compute the vector KC_vec = K*C_vec
*/
float KC_vec[adim*1];
memset(KC_vec, 0, adim*sizeof(float));
mul((float*) K, C_vec, KC_vec, adim, ydim, 1);
/*
* Compute the vector Ky_vec = K*y
*/
float Ky_vec[adim*1];
memset(Ky_vec, 0, adim*sizeof(float));
mul((float*) K, y, Ky_vec, adim, ydim, 1);
/*
* Now add x = A_vec - KC_vec + B_vec + Ky_vec
*/
for(int i = 0; i < adim; i++){
*(x + i) = *(A_vec + i) - *(KC_vec + i) + *(B_vec + i) + *(Ky_vec + i);
}
}
/*
* C = A*B
*/
static void mul(float* A, float* B, float* C, int row_a, int column_a, int column_b) {
// Data matrix
float* data_a = A;
float* data_b = B;
for (int i = 0; i < row_a; i++) {
// Then we go through every column of b
for (int j = 0; j < column_b; j++) {
data_a = &A[i * column_a];
data_b = &B[j];
*C = 0; // Reset
// And we multiply rows from a with columns of b
for (int k = 0; k < column_a; k++) {
*C += *data_a * *data_b;
data_a++;
data_b += column_b;
}
C++; // ;)
}
}
}
/*
* Print matrix or vector - Just for error check
*/
static void print(float* A, int row, int column) {
for (int i = 0; i < row; i++) {
for (int j = 0; j < column; j++) {
printf("%0.18f ", *(A++));
}
printf("\n");
}
printf("\n");
}
Disclaimer: I work on MATLAB Coder
There is a configuration setting to tell MATLAB Coder to generate code without using dynamically allocated memory or issue an error telling you why it can't do so.
cfg = coder.config('lib');
cfg.DynamicMemoryAllocation = 'Off';
codegen -config cfg ...
MATLAB Coder supports generating code with fixed-size arrays, variable-sized arrays, and dynamically allocated arrays. The various generated signature formats are shown in the documentation. For non-dynamically allocated variable-sized arrays, a common signature is something like: foo(x_data[100], x_size[2])
Yes, the generated code is generally portable and independent of MATLAB for the hardware you specify when generating code. The full list of available functions and classes supported for code generation is listed here. In a very small number of cases, the generated code needs to depend on libraries from MATLAB. Those cases will be called out in the documentation. Fundamental operations like horzcat and vertcat produce portable code that is independent of MATLAB.
Yes. For array outputs and MATLAB functions with multiple outputs, the generated code will return outputs by reference. It also supports passing an argument by reference in some cases when the corresponding MATLAB function has the same variable as an input and output: function A = foo(A,B) with a call like: y = foo(y,z); can produce something like void foo(double A[100], const double B[20]); where A is an input and output.

Quasi-Monte Carlo Integration for inverse cumulative Normal Distribution

I am trying to integrate functions, which includes changing of variables with ICDF function (gsl_cdf_gaussian_Pinv(x[1], 1)), but the results are always wrong:
#include <fstream>
#include <iostream>
#include <memory>
#include <cmath>
#include <iomanip>
#include <ctime>
#include <cmath>
#include <cstdlib>
#include <ctime>
#include <math.h>
#include <stdio.h>
#include <gaussinv.c>
#define _USE_MATH_DEFINES
using namespace std;
double f(double[], int);
double int_mcnd(double(*)(double[], int), double[], double[], int, int);
double varr[100];
int k = 0;
double hj = 0;
double mj = 1;
# include "sobol.hpp"
int DIM_NUM = 10;
int main() {
const int n = 10; /* define how many integrals */
// const int m = 1000000; /* define how many points */
double a[n] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* left end-points */
double b[n] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; /* right end-points */
double result;
int i, m;
int ntimes;
cout.setf(ios::fixed | ios::showpoint);
// current time in seconds (begin calculations)
time_t seconds_i;
seconds_i = time(NULL);
m = 1; // initial number of intervals
ntimes = 20; // number of interval doublings with nmax=2^ntimes
cout << setw(12) << n << "D Integral" << endl;
for (i = 0; i <= ntimes; i = i + 1) {
result = int_mcnd(f, a, b, n, m);
cout << setw(10) << m << " " << setprecision(30) << result << endl;
m = m * 2;
}
// current time in seconds (end of calculations)
time_t seconds_f;
seconds_f = time(NULL);
cout << endl << "total elapsed time = " << seconds_f - seconds_i << " seconds" << endl << endl;
return 0;
}
double f(double x[], int n) {
double y;
int j;
y = 0.0;
/* define Multidimensional Gaussian distribution and covariance */
/* X=(x1, k=2, mu = (0, covariance matrix = (v 0 0 0
* x2 0 0 v 0 0
* x3 0 0 0 v 0
* x4) 0) 0 0 0 v) */
double v = 1;
double determinant = pow(v, 10);
double inverse = 1 / v;
double rang = gsl_cdf_gaussian_Pinv(0.99999904632568359375, 1) - gsl_cdf_gaussian_Pinv(0.00000095367431640625, 1) +
gsl_cdf_gaussian_Pinv(0.00000095367431640625, 1);
y = (1 / sqrt(pow(2 * M_PI, 10) * determinant) * exp(-0.5 * (inverse * pow(gsl_cdf_gaussian_Pinv(x[0], 1), 2) +
inverse * pow(gsl_cdf_gaussian_Pinv(x[1], 1), 2) +
inverse * pow(gsl_cdf_gaussian_Pinv(x[2], 1), 2) +
inverse * pow(gsl_cdf_gaussian_Pinv(x[3], 1), 2) +
inverse * pow(gsl_cdf_gaussian_Pinv(x[4], 1), 2) +
inverse * pow(gsl_cdf_gaussian_Pinv(x[5], 1), 2) +
inverse * pow(gsl_cdf_gaussian_Pinv(x[6], 1), 2) +
inverse * pow(gsl_cdf_gaussian_Pinv(x[7], 1), 2) +
inverse * pow(gsl_cdf_gaussian_Pinv(x[8], 1), 2) +
inverse * pow(gsl_cdf_gaussian_Pinv(x[9], 1), 2))));
return y;
}
/*==============================================================
input:
fn - a multiple argument real function (supplied by the user)
a[] - left end-points of the interval of integration
b[] - right end-points of the interval of integration
n - dimension of integral
m - number of random points
output:
r - result of integration
================================================================*/
double int_mcnd(double(*fn)(double[], int), double a[], double b[], int n, int m) {
double r, x[n], p;
int i, j;
double rarr[DIM_NUM];
long long int seed;
seed = 1;
long long int seed_in;
long long int seed_out;
srand(time(NULL)); /* initial seed value (use system time) */
r = 0.0;
p = 1.0;
// step 1: calculate the common factor p
for (j = 0; j < n; j = j + 1) {
// p = p * (b[j] - a[j]);
p=p*(gsl_cdf_gaussian_Pinv(0.99999904632568359375, 1)-gsl_cdf_gaussian_Pinv(0.00000095367431640625, 1));
}
// step 2: integration
for (i = 1; i <= m; i = i + 1) {
seed_in = seed;
i8_sobol(DIM_NUM, &seed, rarr);
seed_out = seed;
// calculate random x[] points
for (j = 0; j < n; j = j + 1) {
x[j] = a[j] + (b[j] - a[j]) * rarr[j];
}
r = r + fn(x, n);
}
cout << endl << "p = " << p << " seconds" << endl << endl;
r = r * p / m;
return r;
}
The problem is in the parametrization parameter p, which I suggest to be
p=p*(gsl_cdf_gaussian_Pinv(0.99999904632568359375, 1)-gsl_cdf_gaussian_Pinv(0.00000095367431640625, 1))
instead of standard - p = p * (b[j] - a[j]);
I want to integrate not only within [0,1]^N intervals, but also in [-20;20].
I can't define my mistake. Can somebody help, please?

CUDA: How to fill a vector of dynamic size on device and return its contents to another device function?

I want to know which is the proper technique to fill an dynamic size array on device (int *row, in the code bellow) and then return its content, to be used by another device function.
Aiming to contextualize the question, the code bellow attempt to span an arbitrary function in a basis set of Legendre polynomials using Gauss-Legendre quadratures running on the GPU.
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
__device__ double *d_droot, *d_dweight;
/*How could be returned the array or the pointer to the array int *row, on the device, that is filled by this function? */
__device__
void Pascal_Triangle(int n_row, int * row) {
int a[100][100];
int i, j;
//first row and first coloumn has the same value=1
for (i = 1; i <= n_row; i++) {
a[i][1] = a[1][i] = 1;
}
//Generate the full Triangle
for (i = 2; i <= n_row; i++) {
for (j = 2; j <= n_row - i; j++) {
if (a[i - 1][j] == 0 || a[i][j - 1] == 0) {
break;
}
a[i][j] = a[i - 1][j] + a[i][j - 1];
}
}
row = new int[n_row];
for (i = 1; i <= n_row; i++) {
row[i] = a[i][n_row-1];
}
}
__device__
double Legendre_poly(int order, double x)
{
int n,k;
double val=0;
int *binomials;
for(n=order; n>=0; n--)
{
Pascal_Triangle(n, binomials); /*Here are the problems*/
for(k=0; k<=n; k++)
val += binomials[k]*pow(x-1,n-k)*pow(x-1,k);
}
return val;
}
__device__ __host__
double f(double alpha,double x)
{
/*function expanded on a basis of Legendre palynomials. */
return exp(-alpha*x*x);
}
/*Kernel that computes the expansion by quadratures*/
__global__ void Span(int n, double alpha, double a, double b, double *coefficients)
{
/*
Parameters:
n: Total number of expansion coeficients
a: Upper integration limit
b: Lower integration limit
d_droots[]: roots for the quadrature
d_dweight[]: weights for the quadrature
coefficients[]: allocate N expansion coefficients.
*/
double c1 = (b - a) / 2, c2 = (b + a) / 2, sum = 0;
int dummy;
int i = blockIdx.x*blockDim.x + threadIdx.x;
if (i < n)
{
coefficients[i] = 0.0;
for (dummy = 0; dummy < 5; dummy++)
coefficients[i] += d_dweight[dummy] * f(alpha,c1 * d_droot[dummy] + c2)*Legendre_poly(dummy,c1 * d_droot[dummy] + c2)*c1;
}
}
int main(void)
{
int N = 1<<23;
int N_nodes = 5;
double *droot, *dweight, *dresult, *d_dresult, *d_droot_temp, *d_dweight_temp;
/*double version in host*/
droot =(double*)malloc(N_nodes*sizeof(double));
dweight =(double*)malloc(N_nodes*sizeof(double));
dresult =(double*)malloc(N*sizeof(double)); /*will recibe the results of N quadratures!*/
/*double version in device*/
cudaMalloc(&d_droot_temp, N_nodes*sizeof(double));
cudaMalloc(&d_dweight_temp, N_nodes*sizeof(double));
cudaMalloc(&d_dresult, N*sizeof(double)); /*results for N quadratures will be contained here*/
/*double version of the roots and weights*/
droot[0] = 0.90618;
droot[1] = 0.538469;
droot[2] = 0.0;
droot[3] = -0.538469;
droot[4] = -0.90618;
dweight[0] = 0.236927;
dweight[1] = 0.478629;
dweight[2] = 0.568889;
dweight[3] = 0.478629;
dweight[4] = 0.236927;
/*double copy host-> device*/
cudaMemcpy(d_droot_temp, droot, N_nodes*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(d_dweight_temp, dweight, N_nodes*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(d_droot, &d_droot_temp, sizeof(double *));
cudaMemcpyToSymbol(d_dweight, &d_dweight_temp, sizeof(double *));
// Perform the expansion
Span<<<(N+255)/256, 256>>>(N,1.0, -3.0, 3.0, d_dresult); /*This kerlnel works OK*/
cudaMemcpy(dresult, d_dresult, N*sizeof(double), cudaMemcpyDeviceToHost);
cudaFree(d_dresult);
cudaFree(d_droot_temp);
cudaFree(d_dweight_temp);
}
and here is the makefile to compile the code above:
objects = main.o
all: $(objects)
nvcc -arch=sm_20 $(objects) -o span
%.o: %.cpp
nvcc -x cu -arch=sm_20 -I. -dc $< -o $#
clean:
rm -f *.o span
Thanks in advance for any suggestions.
(sorry my previous answer was off-base)
You are passing a row pointer to this function:
void Pascal_Triangle(int n_row, int * row) {
You are then attempting to overwrite this pointer with a new value:
row = new int[n_row];
Once you return from this function, row in the calling environment will be unmodified. (This is an ordinay C/C++ issue, not specific to CUDA.)
This is perhaps a confusing issue, but the pointer value of row is passed by value to the function Pascal_Triangle. You cannot modify the pointer value in the function, and expect the modified value to show up in the calling environment. (You can modify the contents of the locations that the pointer points to, which would be the usual reason to pass row by pointer.)
There are a few ways to fix this issue. The simplest might be just to pass the pointer by reference:
void Pascal_Triangle(int n_row, int * &row) {
Your code seems to have other defects in it. I would suggest that you employ proper cuda error checking and also run your code with cuda-memcheck.
In particular, the in-kernel new operator behaves in a similar fashion to in-kernel malloc, and it has similar limitations.
You are running out of device heap space, so many of your new operations are failing, and returning a NULL pointer.
As a test for this, it's good debug practice to put a line like this after your new operation:
if (row == NULL) assert(0);
(you'll also need to include assert.h)
If you do that, you'll find that this assert is being hit.
I haven't calculated how much device heap space your code actually needs, but it appears to be using quite a bit. In C++, it's customary to delete an allocation made by new once you're done with it. You might want to investigate freeing the allocations done with new, or else (even better) re-use the allocation (i.e. allocate it once per thread), and avoid the reallocation altogether.
here's a modification to your code that demonstrates the above (one allocation per thread) and compiles and runs without error for me:
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
__device__ double *d_droot, *d_dweight;
/*How could be returned the array or the pointer to the array int *row, on the device, that is filled by this function? */
__device__
void Pascal_Triangle(int n_row, int *row) {
int a[100][100];
int i, j;
//first row and first coloumn has the same value=1
for (i = 1; i <= n_row; i++) {
a[i][1] = a[1][i] = 1;
}
//Generate the full Triangle
for (i = 2; i <= n_row; i++) {
for (j = 2; j <= n_row - i; j++) {
if (a[i - 1][j] == 0 || a[i][j - 1] == 0) {
break;
}
a[i][j] = a[i - 1][j] + a[i][j - 1];
}
}
for (i = 1; i <= n_row; i++) {
row[i] = a[i][n_row-1];
}
}
__device__
double Legendre_poly(int order, double x, int *my_storage)
{
int n,k;
double val=0;
int *binomials = my_storage;
if (binomials == NULL) assert(0);
for(n=order; n>=0; n--)
{
Pascal_Triangle(n, binomials); /*Here are the problems*/
for(k=0; k<=n; k++)
val += binomials[k]*pow(x-1,n-k)*pow(x-1,k);
}
return val;
}
__device__ __host__
double f(double alpha,double x)
{
/*function expanded on a basis of Legendre palynomials. */
return exp(-alpha*x*x);
}
/*Kernel that computes the expansion by quadratures*/
__global__ void Span(int n, double alpha, double a, double b, double *coefficients)
{
/*
Parameters:
n: Total number of expansion coeficients
a: Upper integration limit
b: Lower integration limit
d_droots[]: roots for the quadrature
d_dweight[]: weights for the quadrature
coefficients[]: allocate N expansion coefficients.
*/
double c1 = (b - a) / 2, c2 = (b + a) / 2, sum = 0;
int dummy;
int i = blockIdx.x*blockDim.x + threadIdx.x;
if (i < n)
{
#define MY_LIM 5
int *thr_storage = new int[MY_LIM];
if (thr_storage == NULL) assert(0);
coefficients[i] = 0.0;
for (dummy = 0; dummy < MY_LIM; dummy++)
coefficients[i] += d_dweight[dummy] * f(alpha,c1 * d_droot[dummy] + c2)*Legendre_poly(dummy,c1 * d_droot[dummy] + c2, thr_storage)*c1;
delete thr_storage;
}
}
int main(void)
{
cudaDeviceSetLimit(cudaLimitMallocHeapSize, (1048576ULL*1024));
int N = 1<<23;
int N_nodes = 5;
double *droot, *dweight, *dresult, *d_dresult, *d_droot_temp, *d_dweight_temp;
/*double version in host*/
droot =(double*)malloc(N_nodes*sizeof(double));
dweight =(double*)malloc(N_nodes*sizeof(double));
dresult =(double*)malloc(N*sizeof(double)); /*will recibe the results of N quadratures!*/
/*double version in device*/
cudaMalloc(&d_droot_temp, N_nodes*sizeof(double));
cudaMalloc(&d_dweight_temp, N_nodes*sizeof(double));
cudaMalloc(&d_dresult, N*sizeof(double)); /*results for N quadratures will be contained here*/
/*double version of the roots and weights*/
droot[0] = 0.90618;
droot[1] = 0.538469;
droot[2] = 0.0;
droot[3] = -0.538469;
droot[4] = -0.90618;
dweight[0] = 0.236927;
dweight[1] = 0.478629;
dweight[2] = 0.568889;
dweight[3] = 0.478629;
dweight[4] = 0.236927;
/*double copy host-> device*/
cudaMemcpy(d_droot_temp, droot, N_nodes*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(d_dweight_temp, dweight, N_nodes*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(d_droot, &d_droot_temp, sizeof(double *));
cudaMemcpyToSymbol(d_dweight, &d_dweight_temp, sizeof(double *));
// Perform the expansion
Span<<<(N+255)/256, 256>>>(N,1.0, -3.0, 3.0, d_dresult); /*This kerlnel works OK*/
cudaMemcpy(dresult, d_dresult, N*sizeof(double), cudaMemcpyDeviceToHost);
cudaFree(d_dresult);
cudaFree(d_droot_temp);
cudaFree(d_dweight_temp);
}
This code has a couple advantages:
it can run with a much smaller reservation on the device heap
it's considerably quicker than the vast number of allocations that your code was trying to do.
EDIT:
instead of the assert you could do something like this:
/*Kernel that computes the expansion by quadratures*/
__global__ void Span(int n, double alpha, double a, double b, double *coefficients)
{
/*
Parameters:
n: Total number of expansion coeficients
a: Upper integration limit
b: Lower integration limit
d_droots[]: roots for the quadrature
d_dweight[]: weights for the quadrature
coefficients[]: allocate N expansion coefficients.
*/
double c1 = (b - a) / 2, c2 = (b + a) / 2, sum = 0;
int dummy;
int i = blockIdx.x*blockDim.x + threadIdx.x;
if (i < n)
{
#define MY_LIM 5
int *thr_storage = new int[MY_LIM];
if (thr_storage == NULL) printf("allocation failure!\");
else {
coefficients[i] = 0.0;
for (dummy = 0; dummy < MY_LIM; dummy++)
coefficients[i] += d_dweight[dummy] * f(alpha,c1 * d_droot[dummy] + c2)*Legendre_poly(dummy,c1 * d_droot[dummy] + c2, thr_storage)*c1;
delete thr_storage;
}
}
}

opencv: Rigid Transformation between two 3D point clouds

I have two 3D point clouds, and I'd like to use opencv to find the rigid transformation matrix (translation, rotation, constant scaling among all 3 axes).
I've found an estimateRigidTransformation function, but it's only for 2D points apparently
In addition, I've found estimateAffine3D, but it doesn't seem to support rigid transformation mode.
Do I need to just write my own rigid transformation function?
I did not find the required functionality in OpenCV so I have written my own implementation. Based on ideas from OpenSFM.
cv::Vec3d
CalculateMean(const cv::Mat_<cv::Vec3d> &points)
{
cv::Mat_<cv::Vec3d> result;
cv::reduce(points, result, 0, CV_REDUCE_AVG);
return result(0, 0);
}
cv::Mat_<double>
FindRigidTransform(const cv::Mat_<cv::Vec3d> &points1, const cv::Mat_<cv::Vec3d> points2)
{
/* Calculate centroids. */
cv::Vec3d t1 = -CalculateMean(points1);
cv::Vec3d t2 = -CalculateMean(points2);
cv::Mat_<double> T1 = cv::Mat_<double>::eye(4, 4);
T1(0, 3) = t1[0];
T1(1, 3) = t1[1];
T1(2, 3) = t1[2];
cv::Mat_<double> T2 = cv::Mat_<double>::eye(4, 4);
T2(0, 3) = -t2[0];
T2(1, 3) = -t2[1];
T2(2, 3) = -t2[2];
/* Calculate covariance matrix for input points. Also calculate RMS deviation from centroid
* which is used for scale calculation.
*/
cv::Mat_<double> C(3, 3, 0.0);
double p1Rms = 0, p2Rms = 0;
for (int ptIdx = 0; ptIdx < points1.rows; ptIdx++) {
cv::Vec3d p1 = points1(ptIdx, 0) + t1;
cv::Vec3d p2 = points2(ptIdx, 0) + t2;
p1Rms += p1.dot(p1);
p2Rms += p2.dot(p2);
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
C(i, j) += p2[i] * p1[j];
}
}
}
cv::Mat_<double> u, s, vh;
cv::SVD::compute(C, s, u, vh);
cv::Mat_<double> R = u * vh;
if (cv::determinant(R) < 0) {
R -= u.col(2) * (vh.row(2) * 2.0);
}
double scale = sqrt(p2Rms / p1Rms);
R *= scale;
cv::Mat_<double> M = cv::Mat_<double>::eye(4, 4);
R.copyTo(M.colRange(0, 3).rowRange(0, 3));
cv::Mat_<double> result = T2 * M * T1;
result /= result(3, 3);
return result.rowRange(0, 3);
}
I've found PCL to be a nice adjunct to OpenCV. Take a look at their Iterative Closest Point (ICP) example. The provided example registers the two point clouds and then displays the rigid transformation.
Here's my rmsd code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <assert.h>
typedef struct
{
float m[4][4];
} MATRIX;
#define vdiff2(a,b) ( ((a)[0]-(b)[0]) * ((a)[0]-(b)[0]) + \
((a)[1]-(b)[1]) * ((a)[1]-(b)[1]) + \
((a)[2]-(b)[2]) * ((a)[2]-(b)[2]) )
static double alignedrmsd(float *v1, float *v2, int N);
static void centroid(float *ret, float *v, int N);
static int getalignmtx(float *v1, float *v2, int N, MATRIX *mtx);
static void crossproduct(float *ans, float *pt1, float *pt2);
static void mtx_root(MATRIX *mtx);
static int almostequal(MATRIX *a, MATRIX *b);
static void mulpt(MATRIX *mtx, float *pt);
static void mtx_mul(MATRIX *ans, MATRIX *x, MATRIX *y);
static void mtx_identity(MATRIX *mtx);
static void mtx_trans(MATRIX *mtx, float x, float y, float z);
static int mtx_invert(float *mtx, int N);
static float absmaxv(float *v, int N);
/*
calculate rmsd between two structures
Params: v1 - first set of points
v2 - second set of points
N - number of points
mtx - return for transfrom matrix used to align structures
Returns: rmsd score
Notes: mtx can be null. Transform will be rigid. Inputs must
be previously aligned for sequence alignment
*/
double rmsd(float *v1, float *v2, int N, float *mtx)
{
float cent1[3];
float cent2[3];
MATRIX tmtx;
MATRIX tempmtx;
MATRIX move1;
MATRIX move2;
int i;
double answer;
float *temp1 = 0;
float *temp2 = 0;
int err;
assert(N > 3);
temp1 = malloc(N * 3 * sizeof(float));
temp2 = malloc(N * 3 * sizeof(float));
if(!temp1 || !temp2)
goto error_exit;
centroid(cent1, v1, N);
centroid(cent2, v2, N);
for(i=0;i<N;i++)
{
temp1[i*3+0] = v1[i*3+0] - cent1[0];
temp1[i*3+1] = v1[i*3+1] - cent1[1];
temp1[i*3+2] = v1[i*3+2] - cent1[2];
temp2[i*3+0] = v2[i*3+0] - cent2[0];
temp2[i*3+1] = v2[i*3+1] - cent2[1];
temp2[i*3+2] = v2[i*3+2] - cent2[2];
}
err = getalignmtx(temp1, temp2, N, &tmtx);
if(err == -1)
goto error_exit;
mtx_trans(&move1, -cent2[0], -cent2[1], -cent2[2]);
mtx_mul(&tempmtx, &move1, &tmtx);
mtx_trans(&move2, cent1[0], cent1[1], cent1[2]);
mtx_mul(&tmtx, &tempmtx, &move2);
memcpy(temp2, v2, N * sizeof(float) * 3);
for(i=0;i<N;i++)
mulpt(&tmtx, temp2 + i * 3);
answer = alignedrmsd(v1, temp2, N);
free(temp1);
free(temp2);
if(mtx)
memcpy(mtx, &tmtx.m, 16 * sizeof(float));
return answer;
error_exit:
free(temp1);
free(temp2);
if(mtx)
{
for(i=0;i<16;i++)
mtx[i] = 0;
}
return sqrt(-1.0);
}
/*
calculate rmsd between two aligned structures (trivial)
Params: v1 - first structure
v2 - second structure
N - number of points
Returns: rmsd
*/
static double alignedrmsd(float *v1, float *v2, int N)
{
double answer =0;
int i;
for(i=0;i<N;i++)
answer += vdiff2(v1 + i *3, v2 + i * 3);
return sqrt(answer/N);
}
/*
compute the centroid
*/
static void centroid(float *ret, float *v, int N)
{
int i;
ret[0] = 0;
ret[1] = 0;
ret[2] = 0;
for(i=0;i<N;i++)
{
ret[0] += v[i*3+0];
ret[1] += v[i*3+1];
ret[2] += v[i*3+2];
}
ret[0] /= N;
ret[1] /= N;
ret[2] /= N;
}
/*
get the matrix needed to align two structures
Params: v1 - reference structure
v2 - structure to align
N - number of points
mtx - return for rigid body alignment matrix
Notes: only calculates rotation part of matrix.
assumes input has been aligned to centroids
*/
static int getalignmtx(float *v1, float *v2, int N, MATRIX *mtx)
{
MATRIX A = { {{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,1}} };
MATRIX At;
MATRIX Ainv;
MATRIX temp;
float tv[3];
float tw[3];
float tv2[3];
float tw2[3];
int k, i, j;
int flag = 0;
float correction;
correction = absmaxv(v1, N * 3) * absmaxv(v2, N * 3);
for(k=0;k<N;k++)
for(i=0;i<3;i++)
for(j=0;j<3;j++)
A.m[i][j] += (v1[k*3+i] * v2[k*3+j])/correction;
while(flag < 3)
{
for(i=0;i<4;i++)
for(j=0;j<4;j++)
At.m[i][j] = A.m[j][i];
memcpy(&Ainv, &A, sizeof(MATRIX));
/* this will happen if all points are in a plane */
if( mtx_invert((float *) &Ainv, 4) == -1)
{
if(flag == 0)
{
crossproduct(tv, v1, v1+3);
crossproduct(tw, v2, v2+3);
}
else
{
crossproduct(tv2, tv, v1);
crossproduct(tw2, tw, v2);
memcpy(tv, tv2, 3 * sizeof(float));
memcpy(tw, tw2, 3 * sizeof(float));
}
for(i=0;i<3;i++)
for(j=0;j<3;j++)
A.m[i][j] += tv[i] * tw[j];
flag++;
}
else
flag = 5;
}
if(flag != 5)
return -1;
mtx_mul(&temp, &At, &A);
mtx_root(&temp);
mtx_mul(mtx, &temp, &Ainv);
return 0;
}
/*
get the crossproduct of two vectors.
Params: ans - return pinter for answer.
pt1 - first vector
pt2 - second vector.
Notes: crossproduct is at right angles to the two vectors.
*/
static void crossproduct(float *ans, float *pt1, float *pt2)
{
ans[0] = pt1[1] * pt2[2] - pt1[2] * pt2[1];
ans[1] = pt1[0] * pt2[2] - pt1[2] * pt2[0];
ans[2] = pt1[0] * pt2[1] - pt1[1] * pt2[0];
}
/*
Denman-Beavers square root iteration
*/
static void mtx_root(MATRIX *mtx)
{
MATRIX Y = *mtx;
MATRIX Z;
MATRIX Y1;
MATRIX Z1;
MATRIX invY;
MATRIX invZ;
MATRIX Y2;
int iter = 0;
int i, ii;
mtx_identity(&Z);
do
{
invY = Y;
invZ = Z;
if( mtx_invert((float *) &invY, 4) == -1)
return;
if( mtx_invert((float *) &invZ, 4) == -1)
return;
for(i=0;i<4;i++)
for(ii=0;ii<4;ii++)
{
Y1.m[i][ii] = 0.5 * (Y.m[i][ii] + invZ.m[i][ii]);
Z1.m[i][ii] = 0.5 * (Z.m[i][ii] + invY.m[i][ii]);
}
Y = Y1;
Z = Z1;
mtx_mul(&Y2, &Y, &Y);
}
while(!almostequal(&Y2, mtx) && iter++ < 20 );
*mtx = Y;
}
/*
Check two matrices for near-enough equality
Params: a - first matrix
b - second matrix
Returns: 1 if almost equal, else 0, epsilon 0.0001f.
*/
static int almostequal(MATRIX *a, MATRIX *b)
{
int i, ii;
float epsilon = 0.001f;
for(i=0;i<4;i++)
for(ii=0;ii<4;ii++)
if(fabs(a->m[i][ii] - b->m[i][ii]) > epsilon)
return 0;
return 1;
}
/*
multiply a point by a matrix.
Params: mtx - matrix
pt - the point (transformed)
*/
static void mulpt(MATRIX *mtx, float *pt)
{
float ans[4] = {0};
int i;
int ii;
for(i=0;i<4;i++)
{
for(ii=0;ii<3;ii++)
{
ans[i] += pt[ii] * mtx->m[ii][i];
}
ans[i] += mtx->m[3][i];
}
pt[0] = ans[0];
pt[1] = ans[1];
pt[2] = ans[2];
}
/*
multiply two matrices.
Params: ans - return pointer for answer.
x - first matrix
y - second matrix.
Notes: ans may not be equal to x or y.
*/
static void mtx_mul(MATRIX *ans, MATRIX *x, MATRIX *y)
{
int i;
int ii;
int iii;
for(i=0;i<4;i++)
for(ii=0;ii<4;ii++)
{
ans->m[i][ii] = 0;
for(iii=0;iii<4;iii++)
ans->m[i][ii] += x->m[i][iii] * y->m[iii][ii];
}
}
/*
create an identity matrix.
Params: mtx - return pointer.
*/
static void mtx_identity(MATRIX *mtx)
{
int i;
int ii;
for(i=0;i<4;i++)
for(ii=0;ii<4;ii++)
{
if(i==ii)
mtx->m[i][ii] = 1.0f;
else
mtx->m[i][ii] = 0;
}
}
/*
create a translation matrix.
Params: mtx - return pointer for matrix.
x - x translation.
y - y translation.
z - z translation
*/
static void mtx_trans(MATRIX *mtx, float x, float y, float z)
{
mtx->m[0][0] = 1;
mtx->m[0][1] = 0;
mtx->m[0][2] = 0;
mtx->m[0][3] = 0;
mtx->m[1][0] = 0;
mtx->m[1][1] = 1;
mtx->m[1][2] = 0;
mtx->m[1][3] = 0;
mtx->m[2][0] = 0;
mtx->m[2][1] = 0;
mtx->m[2][2] = 1;
mtx->m[2][3] = 0;
mtx->m[3][0] = x;
mtx->m[3][1] = y;
mtx->m[3][2] = z;
mtx->m[3][3] = 1;
}
/*
matrix invert routine
Params: mtx - the matrix in raw format, in/out
N - width and height
Returns: 0 on success, -1 on fail
*/
static int mtx_invert(float *mtx, int N)
{
int indxc[100]; /* these 100s are the only restriction on matrix size */
int indxr[100];
int ipiv[100];
int i, j, k;
int irow, icol;
double big;
double pinv;
int l, ll;
double dum;
double temp;
assert(N <= 100);
for(i=0;i<N;i++)
ipiv[i] = 0;
for(i=0;i<N;i++)
{
big = 0.0;
/* find biggest element */
for(j=0;j<N;j++)
if(ipiv[j] != 1)
for(k=0;k<N;k++)
if(ipiv[k] == 0)
if(fabs(mtx[j*N+k]) >= big)
{
big = fabs(mtx[j*N+k]);
irow = j;
icol = k;
}
ipiv[icol]=1;
if(irow != icol)
for(l=0;l<N;l++)
{
temp = mtx[irow * N + l];
mtx[irow * N + l] = mtx[icol * N + l];
mtx[icol * N + l] = temp;
}
indxr[i] = irow;
indxc[i] = icol;
/* if biggest element is zero matrix is singular, bail */
if(mtx[icol* N + icol] == 0)
goto error_exit;
pinv = 1.0/mtx[icol * N + icol];
mtx[icol * N + icol] = 1.0;
for(l=0;l<N;l++)
mtx[icol * N + l] *= pinv;
for(ll=0;ll<N;ll++)
if(ll != icol)
{
dum = mtx[ll * N + icol];
mtx[ll * N + icol] = 0.0;
for(l=0;l<N;l++)
mtx[ll * N + l] -= mtx[icol * N + l]*dum;
}
}
/* unscramble matrix */
for (l=N-1;l>=0;l--)
{
if (indxr[l] != indxc[l])
for (k=0;k<N;k++)
{
temp = mtx[k * N + indxr[l]];
mtx[k * N + indxr[l]] = mtx[k * N + indxc[l]];
mtx[k * N + indxc[l]] = temp;
}
}
return 0;
error_exit:
return -1;
}
/*
get the asolute maximum of an array
*/
static float absmaxv(float *v, int N)
{
float answer;
int i;
for(i=0;i<N;i++)
if(answer < fabs(v[i]))
answer = fabs(v[i]);
return answer;
}
#include <stdio.h>
/*
debug utlitiy
*/
static void printmtx(FILE *fp, MATRIX *mtx)
{
int i, ii;
for(i=0;i<4;i++)
{
for(ii=0;ii<4;ii++)
fprintf(fp, "%f, ", mtx->m[i][ii]);
fprintf(fp, "\n");
}
}
int rmsdmain(void)
{
float one[4*3] = {0,0,0, 1,0,0, 2,1,0, 0,3,1};
float two[4*3] = {0,0,0, 0,1,0, 1,2,0, 3,0,1};
MATRIX mtx;
double diff;
int i;
diff = rmsd(one, two, 4, (float *) &mtx.m);
printf("%f\n", diff);
printmtx(stdout, &mtx);
for(i=0;i<4;i++)
{
mulpt(&mtx, two + i * 3);
printf("%f %f %f\n", two[i*3], two[i*3+1], two[i*3+2]);
}
return 0;
}
I took #vagran's implementation and added RANSAC on top of it, since estimateRigidTransform2d does it and it was helpful for me since my data is noisy. (Note: This code doesn't have constant scaling along all 3 axes; you can add it back in easily by comparing to vargran's).
cv::Vec3f CalculateMean(const cv::Mat_<cv::Vec3f> &points)
{
if(points.size().height == 0){
return 0;
}
assert(points.size().width == 1);
double mx = 0.0;
double my = 0.0;
double mz = 0.0;
int n_points = points.size().height;
for(int i = 0; i < n_points; i++){
double x = double(points(i)[0]);
double y = double(points(i)[1]);
double z = double(points(i)[2]);
mx += x;
my += y;
mz += z;
}
return cv::Vec3f(mx/n_points, my/n_points, mz/n_points);
}
cv::Mat_<double>
FindRigidTransform(const cv::Mat_<cv::Vec3f> &points1, const cv::Mat_<cv::Vec3f> points2)
{
/* Calculate centroids. */
cv::Vec3f t1 = CalculateMean(points1);
cv::Vec3f t2 = CalculateMean(points2);
cv::Mat_<double> T1 = cv::Mat_<double>::eye(4, 4);
T1(0, 3) = double(-t1[0]);
T1(1, 3) = double(-t1[1]);
T1(2, 3) = double(-t1[2]);
cv::Mat_<double> T2 = cv::Mat_<double>::eye(4, 4);
T2(0, 3) = double(t2[0]);
T2(1, 3) = double(t2[1]);
T2(2, 3) = double(t2[2]);
/* Calculate covariance matrix for input points. Also calculate RMS deviation from centroid
* which is used for scale calculation.
*/
cv::Mat_<double> C(3, 3, 0.0);
for (int ptIdx = 0; ptIdx < points1.rows; ptIdx++) {
cv::Vec3f p1 = points1(ptIdx) - t1;
cv::Vec3f p2 = points2(ptIdx) - t2;
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
C(i, j) += double(p2[i] * p1[j]);
}
}
}
cv::Mat_<double> u, s, vt;
cv::SVD::compute(C, s, u, vt);
cv::Mat_<double> R = u * vt;
if (cv::determinant(R) < 0) {
R -= u.col(2) * (vt.row(2) * 2.0);
}
cv::Mat_<double> M = cv::Mat_<double>::eye(4, 4);
R.copyTo(M.colRange(0, 3).rowRange(0, 3));
cv::Mat_<double> result = T2 * M * T1;
result /= result(3, 3);
return result;
}
cv::Mat_<double> RANSACFindRigidTransform(const cv::Mat_<cv::Vec3f> &points1, const cv::Mat_<cv::Vec3f> &points2)
{
cv::Mat points1Homo;
cv::convertPointsToHomogeneous(points1, points1Homo);
int iterations = 100;
int min_n_points = 3;
int n_points = points1.size().height;
std::vector<int> range(n_points);
cv::Mat_<double> best;
int best_inliers = -1;
// inlier points should be projected within this many units
float threshold = .02;
std::iota(range.begin(), range.end(), 0);
auto gen = std::mt19937{std::random_device{}()};
for(int i = 0; i < iterations; i++) {
std::shuffle(range.begin(), range.end(), gen);
cv::Mat_<cv::Vec3f> points1subset(min_n_points, 1, cv::Vec3f(0,0,0));
cv::Mat_<cv::Vec3f> points2subset(min_n_points, 1, cv::Vec3f(0,0,0));
for(int j = 0; j < min_n_points; j++) {
points1subset(j) = points1(range[j]);
points2subset(j) = points2(range[j]);
}
cv::Mat_<float> rigidT = FindRigidTransform(points1subset, points2subset);
cv::Mat_<float> rigidT_float = cv::Mat::eye(4, 4, CV_32F);
rigidT.convertTo(rigidT_float, CV_32F);
std::vector<int> inliers;
for(int j = 0; j < n_points; j++) {
cv::Mat_<float> t1_3d = rigidT_float * cv::Mat_<float>(points1Homo.at<cv::Vec4f>(j));
if(t1_3d(3) == 0) {
continue; // Avoid 0 division
}
float dx = (t1_3d(0)/t1_3d(3) - points2(j)[0]);
float dy = (t1_3d(1)/t1_3d(3) - points2(j)[1]);
float dz = (t1_3d(2)/t1_3d(3) - points2(j)[2]);
float square_dist = dx * dx + dy * dy + dz * dz;
if(square_dist < threshold * threshold){
inliers.push_back(j);
}
}
int n_inliers = inliers.size();
if(n_inliers > best_inliers) {
best_inliers = n_inliers;
best = rigidT;
}
}
return best;
}
#vagran Thanks for the code! Seems to work very well.
I do have a little terminology suggestion though. Since you are estimating and applying a scale during the transformation, it is a 7-parameter transformation, or Helmert / similarity transformation. And in a rigid transformation, no scaling is applied because all Euclidiean distances need to be reserved.
I would've added this as comment, but don't have enough points.. D: sorry for that.
rigid transformation: https://en.wikipedia.org/wiki/Rigid_transformation
Helmert transformation: https://www.researchgate.net/publication/322841143_Parameter_estimation_in_3D_affine_and_similarity_transformation_implementation_of_variance_component_estimation

bandpass butterworth filter implementation in C++

I am implementing an image analysis algorithm using openCV and c++, but I found out openCV doesnt have any function for Butterworth Bandpass filter officially.
in my project I have to pass a time series of pixels into the Butterworth 5 order filter and the function will return the filtered time series pixels. Butterworth(pixelseries,order, frequency), if you have any idea to help me of how to start please let me know. Thank you
EDIT :
after getting help, finally I come up with the following code. which can calculate the Numerator Coefficients and Denominator Coefficients, but the problem is that some of the numbers is not as same as matlab results. here is my code:
#include <iostream>
#include <stdio.h>
#include <vector>
#include <math.h>
using namespace std;
#define N 10 //The number of images which construct a time series for each pixel
#define PI 3.14159
double *ComputeLP( int FilterOrder )
{
double *NumCoeffs;
int m;
int i;
NumCoeffs = (double *)calloc( FilterOrder+1, sizeof(double) );
if( NumCoeffs == NULL ) return( NULL );
NumCoeffs[0] = 1;
NumCoeffs[1] = FilterOrder;
m = FilterOrder/2;
for( i=2; i <= m; ++i)
{
NumCoeffs[i] =(double) (FilterOrder-i+1)*NumCoeffs[i-1]/i;
NumCoeffs[FilterOrder-i]= NumCoeffs[i];
}
NumCoeffs[FilterOrder-1] = FilterOrder;
NumCoeffs[FilterOrder] = 1;
return NumCoeffs;
}
double *ComputeHP( int FilterOrder )
{
double *NumCoeffs;
int i;
NumCoeffs = ComputeLP(FilterOrder);
if(NumCoeffs == NULL ) return( NULL );
for( i = 0; i <= FilterOrder; ++i)
if( i % 2 ) NumCoeffs[i] = -NumCoeffs[i];
return NumCoeffs;
}
double *TrinomialMultiply( int FilterOrder, double *b, double *c )
{
int i, j;
double *RetVal;
RetVal = (double *)calloc( 4 * FilterOrder, sizeof(double) );
if( RetVal == NULL ) return( NULL );
RetVal[2] = c[0];
RetVal[3] = c[1];
RetVal[0] = b[0];
RetVal[1] = b[1];
for( i = 1; i < FilterOrder; ++i )
{
RetVal[2*(2*i+1)] += c[2*i] * RetVal[2*(2*i-1)] - c[2*i+1] * RetVal[2*(2*i-1)+1];
RetVal[2*(2*i+1)+1] += c[2*i] * RetVal[2*(2*i-1)+1] + c[2*i+1] * RetVal[2*(2*i-1)];
for( j = 2*i; j > 1; --j )
{
RetVal[2*j] += b[2*i] * RetVal[2*(j-1)] - b[2*i+1] * RetVal[2*(j-1)+1] +
c[2*i] * RetVal[2*(j-2)] - c[2*i+1] * RetVal[2*(j-2)+1];
RetVal[2*j+1] += b[2*i] * RetVal[2*(j-1)+1] + b[2*i+1] * RetVal[2*(j-1)] +
c[2*i] * RetVal[2*(j-2)+1] + c[2*i+1] * RetVal[2*(j-2)];
}
RetVal[2] += b[2*i] * RetVal[0] - b[2*i+1] * RetVal[1] + c[2*i];
RetVal[3] += b[2*i] * RetVal[1] + b[2*i+1] * RetVal[0] + c[2*i+1];
RetVal[0] += b[2*i];
RetVal[1] += b[2*i+1];
}
return RetVal;
}
double *ComputeNumCoeffs(int FilterOrder)
{
double *TCoeffs;
double *NumCoeffs;
int i;
NumCoeffs = (double *)calloc( 2*FilterOrder+1, sizeof(double) );
if( NumCoeffs == NULL ) return( NULL );
TCoeffs = ComputeHP(FilterOrder);
if( TCoeffs == NULL ) return( NULL );
for( i = 0; i < FilterOrder; ++i)
{
NumCoeffs[2*i] = TCoeffs[i];
NumCoeffs[2*i+1] = 0.0;
}
NumCoeffs[2*FilterOrder] = TCoeffs[FilterOrder];
free(TCoeffs);
return NumCoeffs;
}
double *ComputeDenCoeffs( int FilterOrder, double Lcutoff, double Ucutoff )
{
int k; // loop variables
double theta; // PI * (Ucutoff - Lcutoff) / 2.0
double cp; // cosine of phi
double st; // sine of theta
double ct; // cosine of theta
double s2t; // sine of 2*theta
double c2t; // cosine 0f 2*theta
double *RCoeffs; // z^-2 coefficients
double *TCoeffs; // z^-1 coefficients
double *DenomCoeffs; // dk coefficients
double PoleAngle; // pole angle
double SinPoleAngle; // sine of pole angle
double CosPoleAngle; // cosine of pole angle
double a; // workspace variables
cp = cos(PI * (Ucutoff + Lcutoff) / 2.0);
theta = PI * (Ucutoff - Lcutoff) / 2.0;
st = sin(theta);
ct = cos(theta);
s2t = 2.0*st*ct; // sine of 2*theta
c2t = 2.0*ct*ct - 1.0; // cosine of 2*theta
RCoeffs = (double *)calloc( 2 * FilterOrder, sizeof(double) );
TCoeffs = (double *)calloc( 2 * FilterOrder, sizeof(double) );
for( k = 0; k < FilterOrder; ++k )
{
PoleAngle = PI * (double)(2*k+1)/(double)(2*FilterOrder);
SinPoleAngle = sin(PoleAngle);
CosPoleAngle = cos(PoleAngle);
a = 1.0 + s2t*SinPoleAngle;
RCoeffs[2*k] = c2t/a;
RCoeffs[2*k+1] = s2t*CosPoleAngle/a;
TCoeffs[2*k] = -2.0*cp*(ct+st*SinPoleAngle)/a;
TCoeffs[2*k+1] = -2.0*cp*st*CosPoleAngle/a;
}
DenomCoeffs = TrinomialMultiply(FilterOrder, TCoeffs, RCoeffs );
free(TCoeffs);
free(RCoeffs);
DenomCoeffs[1] = DenomCoeffs[0];
DenomCoeffs[0] = 1.0;
for( k = 3; k <= 2*FilterOrder; ++k )
DenomCoeffs[k] = DenomCoeffs[2*k-2];
return DenomCoeffs;
}
void filter(int ord, double *a, double *b, int np, double *x, double *y)
{
int i,j;
y[0]=b[0] * x[0];
for (i=1;i<ord+1;i++)
{
y[i]=0.0;
for (j=0;j<i+1;j++)
y[i]=y[i]+b[j]*x[i-j];
for (j=0;j<i;j++)
y[i]=y[i]-a[j+1]*y[i-j-1];
}
for (i=ord+1;i<np+1;i++)
{
y[i]=0.0;
for (j=0;j<ord+1;j++)
y[i]=y[i]+b[j]*x[i-j];
for (j=0;j<ord;j++)
y[i]=y[i]-a[j+1]*y[i-j-1];
}
}
int main(int argc, char *argv[])
{
//Frequency bands is a vector of values - Lower Frequency Band and Higher Frequency Band
//First value is lower cutoff and second value is higher cutoff
double FrequencyBands[2] = {0.25,0.375};//these values are as a ratio of f/fs, where fs is sampling rate, and f is cutoff frequency
//and therefore should lie in the range [0 1]
//Filter Order
int FiltOrd = 5;
//Pixel Time Series
/*int PixelTimeSeries[N];
int outputSeries[N];
*/
//Create the variables for the numerator and denominator coefficients
double *DenC = 0;
double *NumC = 0;
//Pass Numerator Coefficients and Denominator Coefficients arrays into function, will return the same
NumC = ComputeNumCoeffs(FiltOrd);
for(int k = 0; k<11; k++)
{
printf("NumC is: %lf\n", NumC[k]);
}
//is A in matlab function and the numbers are correct
DenC = ComputeDenCoeffs(FiltOrd, FrequencyBands[0], FrequencyBands[1]);
for(int k = 0; k<11; k++)
{
printf("DenC is: %lf\n", DenC[k]);
}
double y[5];
double x[5]={1,2,3,4,5};
filter(5, DenC, NumC, 5, x, y);
return 1;
}
I get this resutls for my code :
B= 1,0,-5,0,10,0,-10,0,5,0,-1
A= 1.000000000000000, -4.945988709743181, 13.556489496973796, -24.700711850327743,
32.994881546824828, -33.180726698160655, 25.546126213403539, -14.802008410165968,
6.285430089797051, -1.772929809750849, 0.277753012228403
but if I want to test the coefficinets in same frequency band in MATLAB, I get the following results:
>> [B, A]=butter(5, [0.25,0.375])
B = 0.0002, 0, -0.0008, 0, 0.0016, 0, -0.0016, 0, 0.0008, 0, -0.0002
A = 1.0000, -4.9460, 13.5565, -24.7007, 32.9948, -33.1806, 25.5461, -14.8020, 6.2854, -1.7729, 0.2778
I have test this website :http://www.exstrom.com/journal/sigproc/ code, but the result is equal as mine, not matlab. anybody knows why? or how can I get the same result as matlab toolbox?
I know this is a post on an old thread, and I would usually leave this as a comment, but I'm apparently not able to do that.
In any case, for people searching for similar code, I thought I would post the link from where this code originates (it also has C code for other types of Butterworth filter coefficients and some other cool signal processing code).
The code is located here:
http://www.exstrom.com/journal/sigproc/
Additionally, I think there is a piece of code which calculates said scaling factor for you already.
/**********************************************************************
sf_bwbp - calculates the scaling factor for a butterworth bandpass filter.
The scaling factor is what the c coefficients must be multiplied by so
that the filter response has a maximum value of 1.
*/
double sf_bwbp( int n, double f1f, double f2f )
{
int k; // loop variables
double ctt; // cotangent of theta
double sfr, sfi; // real and imaginary parts of the scaling factor
double parg; // pole angle
double sparg; // sine of pole angle
double cparg; // cosine of pole angle
double a, b, c; // workspace variables
ctt = 1.0 / tan(M_PI * (f2f - f1f) / 2.0);
sfr = 1.0;
sfi = 0.0;
for( k = 0; k < n; ++k )
{
parg = M_PI * (double)(2*k+1)/(double)(2*n);
sparg = ctt + sin(parg);
cparg = cos(parg);
a = (sfr + sfi)*(sparg - cparg);
b = sfr * sparg;
c = -sfi * cparg;
sfr = b - c;
sfi = a - b - c;
}
return( 1.0 / sfr );
}
I finally found it.
I just need to implement the following code from matlab source code to c++ . "the_mandrill" were right, I need to add the normalizing constant into the coefficient:
kern = exp(-j*w*(0:length(b)-1));
b = real(b*(kern*den(:))/(kern*b(:)));
EDIT:
and here is the final edition, which the whole code will return numbers exactly equal to MATLAB :
double *ComputeNumCoeffs(int FilterOrder,double Lcutoff, double Ucutoff, double *DenC)
{
double *TCoeffs;
double *NumCoeffs;
std::complex<double> *NormalizedKernel;
double Numbers[11]={0,1,2,3,4,5,6,7,8,9,10};
int i;
NumCoeffs = (double *)calloc( 2*FilterOrder+1, sizeof(double) );
if( NumCoeffs == NULL ) return( NULL );
NormalizedKernel = (std::complex<double> *)calloc( 2*FilterOrder+1, sizeof(std::complex<double>) );
if( NormalizedKernel == NULL ) return( NULL );
TCoeffs = ComputeHP(FilterOrder);
if( TCoeffs == NULL ) return( NULL );
for( i = 0; i < FilterOrder; ++i)
{
NumCoeffs[2*i] = TCoeffs[i];
NumCoeffs[2*i+1] = 0.0;
}
NumCoeffs[2*FilterOrder] = TCoeffs[FilterOrder];
double cp[2];
double Bw, Wn;
cp[0] = 2*2.0*tan(PI * Lcutoff/ 2.0);
cp[1] = 2*2.0*tan(PI * Ucutoff / 2.0);
Bw = cp[1] - cp[0];
//center frequency
Wn = sqrt(cp[0]*cp[1]);
Wn = 2*atan2(Wn,4);
double kern;
const std::complex<double> result = std::complex<double>(-1,0);
for(int k = 0; k<11; k++)
{
NormalizedKernel[k] = std::exp(-sqrt(result)*Wn*Numbers[k]);
}
double b=0;
double den=0;
for(int d = 0; d<11; d++)
{
b+=real(NormalizedKernel[d]*NumCoeffs[d]);
den+=real(NormalizedKernel[d]*DenC[d]);
}
for(int c = 0; c<11; c++)
{
NumCoeffs[c]=(NumCoeffs[c]*den)/b;
}
free(TCoeffs);
return NumCoeffs;
}
There are code which could be found online implementing butterworth filter. If you use the source code to try to get result matching MATLAB results, there will be the same problem.Basically the result you got from the code hasn't been normalized, and in the source code there is a variable sff in bwhp.c. If you set that to 1, the problem will be easily solved.
I recommend you to use this source code and
the source code and usage could be found here
I added the final edition of function ComputeNumCoeffs to the program and fix "FilterOrder" (k<11 to k<2*FiltOrd+1). Maybe it will save someone's time.
f1=0.5Gz, f2=10Gz, fs=127Gz/2
In MatLab
a={1.000000000000000,-3.329746259105707, 4.180522138699884,-2.365540522960743,0.514875789136976};
b={0.041065495448784, 0.000000000000000,-0.082130990897568, 0.000000000000000,0.041065495448784};
Program:
#include <iostream>
#include <stdio.h>
#include <vector>
#include <math.h>
#include <complex>
using namespace std;
#define N 10 //The number of images which construct a time series for each pixel
#define PI 3.1415926535897932384626433832795
double *ComputeLP(int FilterOrder)
{
double *NumCoeffs;
int m;
int i;
NumCoeffs = (double *)calloc(FilterOrder+1, sizeof(double));
if(NumCoeffs == NULL) return(NULL);
NumCoeffs[0] = 1;
NumCoeffs[1] = FilterOrder;
m = FilterOrder/2;
for(i=2; i <= m; ++i)
{
NumCoeffs[i] =(double) (FilterOrder-i+1)*NumCoeffs[i-1]/i;
NumCoeffs[FilterOrder-i]= NumCoeffs[i];
}
NumCoeffs[FilterOrder-1] = FilterOrder;
NumCoeffs[FilterOrder] = 1;
return NumCoeffs;
}
double *ComputeHP(int FilterOrder)
{
double *NumCoeffs;
int i;
NumCoeffs = ComputeLP(FilterOrder);
if(NumCoeffs == NULL) return(NULL);
for(i = 0; i <= FilterOrder; ++i)
if(i % 2) NumCoeffs[i] = -NumCoeffs[i];
return NumCoeffs;
}
double *TrinomialMultiply(int FilterOrder, double *b, double *c)
{
int i, j;
double *RetVal;
RetVal = (double *)calloc(4 * FilterOrder, sizeof(double));
if(RetVal == NULL) return(NULL);
RetVal[2] = c[0];
RetVal[3] = c[1];
RetVal[0] = b[0];
RetVal[1] = b[1];
for(i = 1; i < FilterOrder; ++i)
{
RetVal[2*(2*i+1)] += c[2*i] * RetVal[2*(2*i-1)] - c[2*i+1] * RetVal[2*(2*i-1)+1];
RetVal[2*(2*i+1)+1] += c[2*i] * RetVal[2*(2*i-1)+1] + c[2*i+1] * RetVal[2*(2*i-1)];
for(j = 2*i; j > 1; --j)
{
RetVal[2*j] += b[2*i] * RetVal[2*(j-1)] - b[2*i+1] * RetVal[2*(j-1)+1] +
c[2*i] * RetVal[2*(j-2)] - c[2*i+1] * RetVal[2*(j-2)+1];
RetVal[2*j+1] += b[2*i] * RetVal[2*(j-1)+1] + b[2*i+1] * RetVal[2*(j-1)] +
c[2*i] * RetVal[2*(j-2)+1] + c[2*i+1] * RetVal[2*(j-2)];
}
RetVal[2] += b[2*i] * RetVal[0] - b[2*i+1] * RetVal[1] + c[2*i];
RetVal[3] += b[2*i] * RetVal[1] + b[2*i+1] * RetVal[0] + c[2*i+1];
RetVal[0] += b[2*i];
RetVal[1] += b[2*i+1];
}
return RetVal;
}
double *ComputeNumCoeffs(int FilterOrder,double Lcutoff, double Ucutoff, double *DenC)
{
double *TCoeffs;
double *NumCoeffs;
std::complex<double> *NormalizedKernel;
double Numbers[11]={0,1,2,3,4,5,6,7,8,9,10};
int i;
NumCoeffs = (double *)calloc(2*FilterOrder+1, sizeof(double));
if(NumCoeffs == NULL) return(NULL);
NormalizedKernel = (std::complex<double> *)calloc(2*FilterOrder+1, sizeof(std::complex<double>));
if(NormalizedKernel == NULL) return(NULL);
TCoeffs = ComputeHP(FilterOrder);
if(TCoeffs == NULL) return(NULL);
for(i = 0; i < FilterOrder; ++i)
{
NumCoeffs[2*i] = TCoeffs[i];
NumCoeffs[2*i+1] = 0.0;
}
NumCoeffs[2*FilterOrder] = TCoeffs[FilterOrder];
double cp[2];
//double Bw;
double Wn;
cp[0] = 2*2.0*tan(PI * Lcutoff/ 2.0);
cp[1] = 2*2.0*tan(PI * Ucutoff/2.0);
//Bw = cp[1] - cp[0];
//center frequency
Wn = sqrt(cp[0]*cp[1]);
Wn = 2*atan2(Wn,4);
//double kern;
const std::complex<double> result = std::complex<double>(-1,0);
for(int k = 0; k<2*FilterOrder+1; k++)
{
NormalizedKernel[k] = std::exp(-sqrt(result)*Wn*Numbers[k]);
}
double b=0;
double den=0;
for(int d = 0; d<2*FilterOrder+1; d++)
{
b+=real(NormalizedKernel[d]*NumCoeffs[d]);
den+=real(NormalizedKernel[d]*DenC[d]);
}
for(int c = 0; c<2*FilterOrder+1; c++)
{
NumCoeffs[c]=(NumCoeffs[c]*den)/b;
}
free(TCoeffs);
return NumCoeffs;
}
double *ComputeDenCoeffs(int FilterOrder, double Lcutoff, double Ucutoff)
{
int k; // loop variables
double theta; // PI * (Ucutoff - Lcutoff)/2.0
double cp; // cosine of phi
double st; // sine of theta
double ct; // cosine of theta
double s2t; // sine of 2*theta
double c2t; // cosine 0f 2*theta
double *RCoeffs; // z^-2 coefficients
double *TCoeffs; // z^-1 coefficients
double *DenomCoeffs; // dk coefficients
double PoleAngle; // pole angle
double SinPoleAngle; // sine of pole angle
double CosPoleAngle; // cosine of pole angle
double a; // workspace variables
cp = cos(PI * (Ucutoff + Lcutoff)/2.0);
theta = PI * (Ucutoff - Lcutoff)/2.0;
st = sin(theta);
ct = cos(theta);
s2t = 2.0*st*ct; // sine of 2*theta
c2t = 2.0*ct*ct - 1.0; // cosine of 2*theta
RCoeffs = (double *)calloc(2 * FilterOrder, sizeof(double));
TCoeffs = (double *)calloc(2 * FilterOrder, sizeof(double));
for(k = 0; k < FilterOrder; ++k)
{
PoleAngle = PI * (double)(2*k+1)/(double)(2*FilterOrder);
SinPoleAngle = sin(PoleAngle);
CosPoleAngle = cos(PoleAngle);
a = 1.0 + s2t*SinPoleAngle;
RCoeffs[2*k] = c2t/a;
RCoeffs[2*k+1] = s2t*CosPoleAngle/a;
TCoeffs[2*k] = -2.0*cp*(ct+st*SinPoleAngle)/a;
TCoeffs[2*k+1] = -2.0*cp*st*CosPoleAngle/a;
}
DenomCoeffs = TrinomialMultiply(FilterOrder, TCoeffs, RCoeffs);
free(TCoeffs);
free(RCoeffs);
DenomCoeffs[1] = DenomCoeffs[0];
DenomCoeffs[0] = 1.0;
for(k = 3; k <= 2*FilterOrder; ++k)
DenomCoeffs[k] = DenomCoeffs[2*k-2];
return DenomCoeffs;
}
void filter(int ord, double *a, double *b, int np, double *x, double *y)
{
int i,j;
y[0]=b[0] * x[0];
for (i=1;i<ord+1;i++)
{
y[i]=0.0;
for (j=0;j<i+1;j++)
y[i]=y[i]+b[j]*x[i-j];
for (j=0;j<i;j++)
y[i]=y[i]-a[j+1]*y[i-j-1];
}
for (i=ord+1;i<np+1;i++)
{
y[i]=0.0;
for (j=0;j<ord+1;j++)
y[i]=y[i]+b[j]*x[i-j];
for (j=0;j<ord;j++)
y[i]=y[i]-a[j+1]*y[i-j-1];
}
}
int main(int argc, char *argv[])
{
(void)argc;
(void)argv;
//Frequency bands is a vector of values - Lower Frequency Band and Higher Frequency Band
//First value is lower cutoff and second value is higher cutoff
//f1 = 0.5Gz f2=10Gz
//fs=127Gz
//Kotelnikov/2=Nyquist (127/2)
double FrequencyBands[2] = {0.5/(127.0/2.0),10.0/(127.0/2.0)};//these values are as a ratio of f/fs, where fs is sampling rate, and f is cutoff frequency
//and therefore should lie in the range [0 1]
//Filter Order
int FiltOrd = 2;//5;
//Pixel Time Series
/*int PixelTimeSeries[N];
int outputSeries[N];
*/
//Create the variables for the numerator and denominator coefficients
double *DenC = 0;
double *NumC = 0;
//Pass Numerator Coefficients and Denominator Coefficients arrays into function, will return the same
printf("\n");
//is A in matlab function and the numbers are correct
DenC = ComputeDenCoeffs(FiltOrd, FrequencyBands[0], FrequencyBands[1]);
for(int k = 0; k<2*FiltOrd+1; k++)
{
printf("DenC is: %lf\n", DenC[k]);
}
printf("\n");
NumC = ComputeNumCoeffs(FiltOrd,FrequencyBands[0],FrequencyBands[1],DenC);
for(int k = 0; k<2*FiltOrd+1; k++)
{
printf("NumC is: %lf\n", NumC[k]);
}
double y[5];
double x[5]={1,2,3,4,5};
filter(5, DenC, NumC, 5, x, y);
return 1;
}