I am trying to create a Dynamic Time Warping(DTW) function, which will calculate the minimum distance between the two signals provided to it. It is based on the following algorithm,
DTW Algorithm:-
int DTWDistance(s: array [1..n], t: array [1..m]) {
DTW := array [0..n, 0..m]
w := abs(n-m)// adapt window size (*)
for i := 0 to n
for j:= 0 to m
DTW[i, j] := infinity
DTW[0, 0] := 0
for i := 1 to n
for j := max(1, i-w) to min(m, i+w)
cost := d(s[i], t[j])
DTW[i, j] := cost + minimum(DTW[i-1, j ], // insertion
DTW[i, j-1], // deletion
DTW[i-1, j-1]) // match
return DTW[n, m]
more info DTW Algorithm
Now I was able to create an Octave function of this Algorithm and its working properly.
Octave Function:-
function dtw_distance = dtw2(a,b)
length_a = length(a);
length_b = length(b);
an=zeros(length_a+1,length_b+1);
an(:,:)=9999;
an(1,1)=0;
cost=0;
#Here we have also implemented the window size.
w=abs(length_a-length_b);
for i=1:length_a
for j=max(1,i-w):min(length_b,i+w)
cost=abs(a(i)-b(j));
an(i+1,j+1)=cost+min([an(i,j+1),an(i+1,j),an(i,j)]);
end
end
an;
dtw_distance=an(length_a+1,length_b+1);
Now the computation time of this code increases as the size of argument increases. Hence I am trying to create OCT file which is written in C++ for faster execution.
C++ OCT File:-
#include <octave/oct.h>
octave_idx_type getMax(octave_idx_type a, octave_idx_type b){
return (a>b)?a:b;
}
octave_idx_type getMin(octave_idx_type a, octave_idx_type b){
return (a<b)?a:b;
}
DEFUN_DLD (dtw3, args, , "Find DTW of two Signals With Window")
{
int nargin = args.length();
if (nargin != 2)
print_usage();
else
{
NDArray A = args(0).array_value();
NDArray B = args(1).array_value();
octave_stdout << "Size of A is" << A.length();
octave_stdout << "Size of B is" << B.length();
if (! error_state)
{
octave_idx_type row = A.length()+1;
octave_idx_type col = B.length()+1;
Matrix results (row,col);
for(octave_idx_type i = 0; i <= row ; i++)
{
for(octave_idx_type j=0; j<= col ; j++)
{
results(i,j)=9999;
}
}
octave_stdout << "row col" << results.dim1() << results.dim2() ;
octave_stdout << "row end" << results(row,0) ;
octave_stdout << "col end" << results(0,col) ;
results(0,0)=0;
octave_idx_type win = (row>col)?(row-col):(col-row);
octave_idx_type cost = 0;
for(octave_idx_type i = 1 ; i <= row ; i++)
{
for(octave_idx_type j = getMax(1,i-win) ; j <= getMin(col,i+win) ; j++)
{
cost=(A(i)>B(j))?(A(i)-B(j)):(B(j)-A(i));
results(i,j)= cost + getMin(getMin(results(i-1,j),results(i,j-1)),results(i-1,j-1));
}
}
octave_stdout << "Ans is: " << results(row,col);
return octave_value(results(row,col));
}
}
}
Sample Input/Output
Input - Arg1: [1 2 3 4 5] , Arg2: [1 2 3 4 5 6 7]
Output:
For Octave Function: Ans is 3
For OCT FIle:
* Error in /usr/lib/x86_64-linux-gnu/octave/4.0.0/exec/x86_64-pc-linux-gnu/octave-gui': double free or corruption (!prev): 0x00007f24e81eb0a0 ***
panic: Aborted -- stopping myself...
*** Error in/usr/lib/x86_64-linux-gnu/octave/4.0.0/exec/x86_64-pc-linux-gnu/octave-gui': malloc(): memory corruption: 0x00007f24e81eb230 *
Input : Arg1 : A=rand(1,221), Args2: B=rand(1,299)
Output:
For Octave Function: Ans is 72.63
For OCT File:
* Error in `/usr/lib/x86_64-linux-gnu/octave/4.0.0/exec/x86_64-pc-linux-gnu/octave-gui': double free or corruption (!prev): 0x00007f57a06ad940 *
panic: Aborted -- stopping myself...
Size of A is221Size of B is299row col222300row end9999col end9999Ans is:1 attempting to save variables to 'octave-workspace'...
save to 'octave-workspace' complete
Aborted (core dumped)
My Problem:
First of all what is this double free corruption error I am getting when using OCT files?
The answer for Octave file and OCT file is different, whats the error in OCT file which is causing this?
Thank you.
First, you should read how to debug oct files (http://wiki.octave.org/Debugging_Octave#Debugging_oct-files)
Then you'll find this part:
Matrix results (row,col);
for(octave_idx_type i = 0; i <= row ; i++)
{
for(octave_idx_type j=0; j<= col ; j++)
{
results(i,j)=9999;
}
}
The Matrix result has dimension row, col but you are writing until i<=row and j<=col which is 1 beyond bounds. Try i<row and j<col
There were so many problems in your code which was too much to describe, here my changes. I've replaces some functions which buildt-in functions:
#include <octave/oct.h>
DEFUN_DLD (dtw3, args, , "Find DTW of two signals with window")
{
int nargin = args.length();
if (nargin != 2)
print_usage();
Matrix A = args(0).array_value();
Matrix B = args(1).array_value();
octave_stdout << "Size of A is " << A.length() << std::endl;;
octave_stdout << "Size of B is " << B.length() << std::endl;
if (! error_state)
{
octave_idx_type n = A.length();
octave_idx_type m = B.length();
Matrix results (n + 1, m + 1);
for(octave_idx_type i = 0; i <= n ; i++)
for(octave_idx_type j = 0; j <= m ; j++)
results(i, j) = octave_Inf;
results(0, 0) = 0;
octave_idx_type win = abs (n-m);
double cost = 0;
for(octave_idx_type i = 1 ; i <= n ; i++)
for(octave_idx_type j = std::max(1, i-win) ; j <= std::min(m, i+win) ; j++)
{
cost = abs(A(i-1) - B(j-1));
results(i, j) = cost + std::min(std::min(results(i-1,j),results(i,j-1)),results(i-1,j-1));
}
//octave_stdout << results << std::endl;
return ovl(results(n, m));
}
}
Related
When trying to compile OpenACC code with GCC-9.3.0 (g++) configured with --enable-languages=c,c++,lto --disable-multilib the following code does not use multiple cores, whereas if the same code is compiled with the pgc++ compiler it does use multiple cores.
g++ compilation: g++ -lgomp -Ofast -o jsolve -fopenacc jsolvec.cpp
pgc++ compilation: pgc++ -o jsolvec.exe jsolvec.cpp -fast -Minfo=opt -ta=multicore
Code from OpenACC Tutorial1/solver https://github.com/OpenACCuserGroup/openacc-users-group.git:
// Jacobi iterative method for solving a system of linear equations
// This is guaranteed to converge if the matrix is diagonally dominant,
// so we artificially force the matrix to be diagonally dominant.
// See https://en.wikipedia.org/wiki/Jacobi_method
//
// We solve for vector x in Ax = b
// Rewrite the matrix A as a
// lower triangular (L),
// upper triangular (U),
// and diagonal matrix (D).
//
// Ax = (L + D + U)x = b
//
// rearrange to get: Dx = b - (L+U)x --> x = (b-(L+U)x)/D
//
// we can do this iteratively: x_new = (b-(L+U)x_old)/D
// build with TYPE=double (default) or TYPE=float
// build with TOLERANCE=0.001 (default) or TOLERANCE= any other value
// three arguments:
// vector size
// maximum iteration count
// frequency of printing the residual (every n-th iteration)
#include <cmath>
#include <omp.h>
#include <cstdlib>
#include <iostream>
#include <iomanip>
using std::cout;
#ifndef TYPE
#define TYPE double
#endif
#define TOLERANCE 0.001
void
init_simple_diag_dom(int nsize, TYPE* A)
{
int i, j;
// In a diagonally-dominant matrix, the diagonal element
// is greater than the sum of the other elements in the row.
// Scale the matrix so the sum of the row elements is close to one.
for (i = 0; i < nsize; ++i) {
TYPE sum;
sum = (TYPE)0;
for (j = 0; j < nsize; ++j) {
TYPE x;
x = (rand() % 23) / (TYPE)1000;
A[i*nsize + j] = x;
sum += x;
}
// Fill diagonal element with the sum
A[i*nsize + i] += sum;
// scale the row so the final matrix is almost an identity matrix
for (j = 0; j < nsize; j++)
A[i*nsize + j] /= sum;
}
} // init_simple_diag_dom
int
main(int argc, char **argv)
{
int nsize; // A[nsize][nsize]
int i, j, iters, max_iters, riter;
double start_time, elapsed_time;
TYPE residual, err, chksum;
TYPE *A, *b, *x1, *x2, *xnew, *xold, *xtmp;
// set matrix dimensions and allocate memory for matrices
nsize = 0;
if (argc > 1)
nsize = atoi(argv[1]);
if (nsize <= 0)
nsize = 1000;
max_iters = 0;
if (argc > 2)
max_iters = atoi(argv[2]);
if (max_iters <= 0)
max_iters = 5000;
riter = 0;
if (argc > 3)
riter = atoi(argv[3]);
if (riter <= 0)
riter = 200;
cout << "nsize = " << nsize << ", max_iters = " << max_iters << "\n";
A = new TYPE[nsize*nsize];
b = new TYPE[nsize];
x1 = new TYPE[nsize];
x2 = new TYPE[nsize];
// generate a diagonally dominant matrix
init_simple_diag_dom(nsize, A);
// zero the x vectors, random values to the b vector
for (i = 0; i < nsize; i++) {
x1[i] = (TYPE)0.0;
x2[i] = (TYPE)0.0;
b[i] = (TYPE)(rand() % 51) / 100.0;
}
start_time = omp_get_wtime();
//
// jacobi iterative solver
//
residual = TOLERANCE + 1.0;
iters = 0;
xnew = x1; // swap these pointers in each iteration
xold = x2;
while ((residual > TOLERANCE) && (iters < max_iters)) {
++iters;
// swap input and output vectors
xtmp = xnew;
xnew = xold;
xold = xtmp;
#pragma acc parallel loop
for (i = 0; i < nsize; ++i) {
TYPE rsum = (TYPE)0;
#pragma acc loop reduction(+:rsum)
for (j = 0; j < nsize; ++j) {
if (i != j) rsum += A[i*nsize + j] * xold[j];
}
xnew[i] = (b[i] - rsum) / A[i*nsize + i];
}
//
// test convergence, sqrt(sum((xnew-xold)**2))
//
residual = 0.0;
#pragma acc parallel loop reduction(+:residual)
for (i = 0; i < nsize; i++) {
TYPE dif;
dif = xnew[i] - xold[i];
residual += dif * dif;
}
residual = sqrt((double)residual);
if (iters % riter == 0 ) cout << "Iteration " << iters << ", residual is " << residual << "\n";
}
elapsed_time = omp_get_wtime() - start_time;
cout << "\nConverged after " << iters << " iterations and " << elapsed_time << " seconds, residual is " << residual << "\n";
//
// test answer by multiplying my computed value of x by
// the input A matrix and comparing the result with the
// input b vector.
//
err = (TYPE)0.0;
chksum = (TYPE)0.0;
for (i = 0; i < nsize; i++) {
TYPE tmp;
xold[i] = (TYPE)0.0;
for (j = 0; j < nsize; j++)
xold[i] += A[i*nsize + j] * xnew[j];
tmp = xold[i] - b[i];
chksum += xnew[i];
err += tmp * tmp;
}
err = sqrt((double)err);
cout << "Solution error is " << err << "\n";
if (err > TOLERANCE)
cout << "****** Final Solution Out of Tolerance ******\n" << err << " > " << TOLERANCE << "\n";
delete A;
delete b;
delete x1;
delete x2;
return 0;
}
It's not yet supported in GCC to use OpenACC to schedule parallel loops onto multicore CPUs. Using OpenMP works for that, of course, and you can have code with mixed OpenACC (for GPU offloading, as already present in your code) and OpenMP directives (for CPU parallelization, not yet present in your code), so that the respective mechanism will be used depending on whether compiling with -fopenacc vs. -fopenmp.
Like PGI are doing, it certainly can be supported in GCC; we'll certainly be able to implement that, but it has not yet been scheduled, has not yet been funded for GCC.
I am trying to check for if the kept dice rolls (which can be up to 5) are 3 of a kind or not, so I'm trying to compare the dice roll values to each other.
The first value of R is showing as 0 of course, but the second value of R after the code is run is showing as 8191 every time, and I'm not entirely sure why.
I've also tried using
r++ instead of r += r+1, but of course that didn't change anything.
int r = 0;
cout << "first value of R is " << r << endl;
for(int t = 0; t < 5; t++) {
for(int w = 0; w < 5; w++) {
if(keptDice[t] == keptDice[w] ) {
r += r + 1;
}
}
}
cout << "Value of R is " << r << endl;
The point is that in the second for loop yo have tor start from t (int w = t;...) otherwise you would compare each dice with itself which will be naturally equal. Plus use r++ instead of r += r + 1 which is definitely wrong but I think that is just a misspelling.
int r = 0;
cout << "first value of R is " << r << endl;
for(int t = 0; t < 5; t++) {
for(int w = t; w < 5; w++) {
if(keptDice[t] == keptDice[w] ) {
r++;
}
}
}
cout << "Value of R is " << r << endl;
r += r + 1
is the same as writing
r = r + r + 1
r is doubling every time. Interestingly, it's always 1 less than 2^n
r = 0 + 0 + 1 (1)
r = 1 + 1 + 1 (3)
r = 3 + 3 + 1 (7)
r = 7 + 7 + 1 (15)
r = 15 + 15 + 1 (31)
r = 31 + 31 + 1 (63)
r = 63 + 63 + 1 (127)
r = 127 + 127 + 1 (255)
r = 255 + 255 + 1 (511)
r = 511 + 511 + 1 (1023)
r = 1023 + 1023 + 1 (2047)
r = 2047 + 2047 + 1 (4095)
r = 4095 + 4095 + 1 (8191)
Your program is counting 13 matches. For yahtzee, you probably want an array that counts matches, otherwise you're doubling up on each die. For example, if you had
1 2 3 4 1
It would count the first 1 matching the last die AND the last die matching the first (2 matches).
What would be more sensible is to count how many 1's you have, how many 2s you have, and store in an array
int diceCount[6];
for(int num = 1; num <= 6; num++) {
for(int w = 0; w < 5; w++) {
int count = 0;
if(keptDice[w] == num ) {
count++;
}
diceCount[num-1] = count;
}
This way when it's done, if you had two 1's, then diceCount[0] will be 2
Based on your approach, but generalized to N-of-a-kind:
int N = 3; // N in [1;5]
bool isNOfAKind = false;
for(int t = 0; t < 6-N; t++) { // skip searches with less elements than N
int r = 0; // r must be reset for each count
for(int w = t+1; w < 5; w++) { // avoid comparing to self
if(keptDice[t] == keptDice[w]) {
r++;
}
}
// found a solution already? then bail out.
if(r == N) {
isNOfAKind = true;
break;
}
}
cout << N << " of a kind? " << isNOfAKind << endl;
In OpenCV, I have two matrix One and Two which are the same size. I want to find the signed value that has minimal absolute value in both matrix and keep it in matrix One. For this, I use following code:
for (int i = 0; i < One.rows; ++i)
{
p=One.ptr<float>(i);
p_two = Two.ptr<float>(i);
for (int j = 0; j < One.cols; ++j)
{
if(fabsf(p_two[j])<fabsf(p[j]))
p[j] = p_two[j];
}
}
This code seems to be the bottleneck in my program. Does anyone know how to improve the performance? Thanks a lot!
Your code is not the bottleneck of your program. It's indeed very fast. You need to profile your code to see where the actual bottleneck is.
You can optimize it a little in case your matrices are continuous (which is very often in practice), like:
int rows = one.rows;
int cols = one.cols;
if (one.isContinuous() && two.isContinuous())
{
cols = rows * cols;
rows = 1;
}
for (int r = 0; r < rows; ++r)
{
float* pone = one.ptr<float>(r);
float* ptwo = two.ptr<float>(r);
for (int c = 0; c < cols; ++c)
{
if (fabs(ptwo[c]) < fabs(pone[c]))
{
pone[c] = ptwo[c];
}
}
}
Here a small evaluation also against the good alternative method proposed by #s1h in the comments:
two.copyTo(one, abs(two) < abs(one));
Time (in ms)
Size: Yuanhao s1h Miki
[3 x 3] 0.000366543 0.117294 0.000366543
[10 x 10] 0.00109963 0.0157614 0.00109963
[100 x 100] 0.0964009 0.139653 0.112529
[1280 x 720] 8.70577 11.0267 8.65372
[1000 x 1000] 9.66538 13.5068 9.02026
[1920 x 1080] 16.5681 26.9706 15.7412
[4096 x 3112] 104.423 135.629 102.595
[5000 x 5000] 196.124 277.457 187.203
You see that your method is very fast. Mine is a little bit faster. #s1h is slower, but more concise and easy to read.
Code
You can evaulate the results on your PC with this:
#include <opencv2/opencv.hpp>
#include <iostream>
using namespace std;
using namespace cv;
int main()
{
vector<Size> sizes{ Size(3, 3), Size(10, 10), Size(100, 100), Size(1280, 720), Size(1000, 1000), Size(1920, 1080), Size(4096, 3112), Size(5000, 5000) };
cout << "Size: \t\tYuanhao \ts1h \t\tMiki" << endl;
for (int is = 0; is < sizes.size(); ++is)
{
Size sz = sizes[is];
cout << sz << "\t";
Mat1f img1(sz);
randu(img1, Scalar(-100), Scalar(100));
Mat1f img2(sz);
randu(img2, Scalar(-100), Scalar(100));
{
Mat1f one = img1.clone();
Mat1f two = img2.clone();
double tic = double(getTickCount());
for (int r = 0; r < one.rows; ++r)
{
float* pone = one.ptr<float>(r);
float* ptwo = two.ptr<float>(r);
for (int c = 0; c < one.cols; ++c)
{
if (fabs(ptwo[c]) < fabs(pone[c]))
{
pone[c] = ptwo[c];
}
}
}
double toc = (double(getTickCount()) - tic) * 1000. / getTickFrequency();
cout << toc << " \t";
}
{
Mat1f one = img1.clone();
Mat1f two = img2.clone();
double tic = double(getTickCount());
two.copyTo(one, abs(two) < abs(one));
double toc = (double(getTickCount()) - tic) * 1000. / getTickFrequency();
cout << toc << " \t";
}
{
Mat1f one = img1.clone();
Mat1f two = img2.clone();
double tic = double(getTickCount());
int rows = one.rows;
int cols = one.cols;
if (one.isContinuous() && two.isContinuous())
{
cols = rows * cols;
rows = 1;
}
for (int r = 0; r < rows; ++r)
{
float* pone = one.ptr<float>(r);
float* ptwo = two.ptr<float>(r);
for (int c = 0; c < cols; ++c)
{
if (fabs(ptwo[c]) < fabs(pone[c]))
{
pone[c] = ptwo[c];
}
}
}
double toc = (double(getTickCount()) - tic) * 1000. / getTickFrequency();
cout << toc << " \t";
}
cout << endl;
}
getchar();
return 0;
}
I'm new using cplex and I try to find some information on internet but didn't find clear stuff to help me in my problem.
I have P[k] k will be equal to 1 to 4
and I have a decision variable x[i][k] must be equal to 0 or 1 (also p[k])
the i is between 1 to 5
For now I do like this
IloEnv env;
IloModel model(env);
IloNumVarArray p(env);
p.add(IloNumVar(env, 0, 1));
p.add(IloNumVar(env, 0, 1));
p.add(IloNumVar(env, 0, 1));
IloIntVar x(env, 0, 1);
model.add(IloMaximize(env, 1000 * p[1] + 2000 * p[2] + 500 * p[3] + 1500 * p[4]));
for(int k = 1; k <= 4; k++){
for(int i = 1; i <= 5; i++){
model.add(x[i][k] + x[i][k] + x[i][k] + x[i][k] + x[i][k] => 2 * p[k]; );
}}
The loop should do something like this:
x[1][1] + x[2][1] + x[3][1] + x[4][1] + x[5][1] => 2 * p[1];
x[1][2] + x[2][2] + x[3][2] + x[4][2] + x[5][2] => 2 * p[2];
x[1][3] + x[2][3] + x[3][3] + x[4][3] + x[5][3] => 2 * p[3];
x[1][4] + x[2][4] + x[3][4] + x[4][4] + x[5][4] => 3 * p[4];
but I'm far away from this result.
Does anyone have an idea?
Thanks
You probably want to use an IloNumExpr
for(int k = 0; k < 4; k++){
IloNumExpr sum_over_i(env);
for(int i = 0; i < 5; i++){
sum_over_i += x[i][k];
}
model.add(sum_over_i >= 2 * p[k]; );
}
You also need to declare x as a 2-dimensional array.
IloArray x(env, 4);
for (int k = 0; k < 4; ++k)
x[k] = IloIntVarArray(env, 5, 0, 1);
Also, in c++, array indices are from 0 to size-1, not 1 to size. Your objective should be written
model.add(IloMaximize(env, 1000 * p[0] + 2000 * p[1] + 500 * p[2] + 1500 * p[3]));
Usertfwr already gave a good answer, but I would like to give another version of solution which might help you to code CPLEX applications in a more generic way. First, I would suggest you to use a text file to hold all the data (objective function coefficients) which will be fed into the program. In your case, you only have to copy literally the following matrix like data to notepad and name it as “coef.dat”:
[1000, 2000, 500, 1500]
Now comes the full code, let me know if have difficulties understanding any statement:
#include <ilcplex/ilocplex.h>
#include <fstream>
#include <iostream>
ILOSTLBEGIN
int main(int argc, char **argv) {
IloEnv env;
try {
const char* inputData = "coef.dat";
ifstream inFile(inputData); // put your data in the same directory as your executable
if(!inFile) {
cerr << "Cannot open the file " << inputData << " successfully! " <<endl;
throw(-1);
}
// Define parameters (coef of objective function)
IloNumArray a(env);
// Read in data
inFile >> a;
// Define variables
IloBoolVarArray p(env, a.getSize()); // note that a.getSize() = 4
IloArray<IloBoolVarArray> X(env, 5); // note that you need a 5x4 X variables, not 4x5
for(int i = 0; i < 5; i++) {
X[i] = IloBoolVarArray(env,4);
}
// Build model
IloModel model(env);
// Add objective function
IloExpr objFun (env);
for(int i = 0; i < a.getSize(); i++){
objFun += a[i]*p[i];
}
model.add(IloMaximize(env, objFun));
objFun.end();
// Add constraints -- similar to usertfwr’s answer
for(int i = 0; i < 4; k++){
IloExpr sumConst (env);
for(int j = 0; j < 5; i++){
sumConst += x[j][i];
}
// before clearing sumConst expr, add it to model
model.add(sumConst >= 2*p[i]);
sumConst.end(); // very important to end after having been added to the model
}
// Extract the model to CPLEX
IloCplex cplex(mod);
// Export the LP model to a txt file to check correctness
//cplex.exportModel("model.lp");
// Solve model
cplex.solve();
}
catch (IloException& e) {
cerr << "Concert exception caught: " << e << endl;
}
catch (...) {
cerr << "Unknown exception caught" << endl;
}
env.end();
}
I have a 2D array containing blocks of data, and I have created a function that calculates how many times each value crosses zero.
I am using MatLab and trying to convert the code and MatLab returns 287 values for the zero-crossing and in the C++ code, the values are extremely high and I can't seem to figure out why.
Here is the Matlab code:
function f = zerocross(vector)
% This function simply reports the number of times
% that the input vector crosses the zero boundary
len = length(vector);
currsum = 0;
prevsign = 0;
for i = 1:len
currsign = sign(vector(i));
if (currsign * prevsign) == -1
currsum = currsum + 1;
end
if currsign ~= 0
prevsign = currsign;
end
end
f = currsum;
And my C++ code:
vector<iniMatrix> Audio::filter(vector<iniMatrix>&blocks, double sumThres, double ZeroThres)
{
double totalSum = this->width * sumThres;
double totalZero = this->width * ZeroThres;
int currZero = 0;
int currsum = 0;
int prevsign = 0;
for(unsigned i=0; (i < 287); i++)
{
for(int j=0; (j < blocks.size()); j++)
{
currZero = sign<double>(blocks[j][i]);
if(currZero * prevsign == -1)
{
currsum++;
}
if(currZero != 0)
{
prevsign = currZero;
}
}
cout << currsum << endl;
}
return blocks;
Sign function:
int sign(T n)
{
if(n < 0) return -1;
if(n > 0) return 1;
return n;
}
The values that I should (and matlab give) are:
6, 6, 7, 9, 9, 10 .., 11, ..., 9, ...
The values that I get:
212, 337, 118, 84, ...., 348, ..., 92
Anyone have any ideas?
EDIT:
This is how I have my loop now:
for(int q=0; (q < 287); q++)
{
for(unsigned i=0; (i < blocks.size()); i++)
{
for(unsigned j=0; (j < blocks[0].size()); j++)
{
currZero = sign<double>(blocks[i][j]);
cout << currZero << endl;
}
cout << endl << endl << endl;
}
//cout << currZero << endl;
}
Heavy use of for loop's in Matlab has traditionally usually been a bad idea, as Matlab's JIT compiler isn't (or wasn't) very fast (at least not compared to native C code). I am guessing you wrote the C++ code to speed up computation, but maybe an alternative would be to write code the 'matlab way':
vector = rand(1e7,1)-0.1;
zero_crossings = sum(diff(array<0)~=0);
Though I just tested on Matlab 2015b, in the vectorized code is only about twice as fast:
function test()
function currsum = zerocross(vector)
% This function simply reports the number of times
% that the input vector crosses the zero boundary
len = length(vector);
currsum = 0;
prevsign = 0;
for i = 1:len
currsign = sign(vector(i));
if (currsign * prevsign) == -1
currsum = currsum + 1;
end
if currsign ~= 0
prevsign = currsign;
end
end
end
function f = zerocross_vectorized(array)
f = sum(diff(array<0)~=0);
end
array = rand(1e5,1e3)-0.1;
% test for loop
t = tic;
crossings = nan(1,size(array,2));
for column = 1:size(array,2)
vector = array(:,column);
crossings(column) = zerocross(vector);
end
disp(crossings(1:5))
fprintf(1,'For loop: Calculated in %0.4f seconds\n',toc(t));
% test vectorized
t = tic;
crossings = zerocross_vectorized(array);
disp(crossings(1:5))
fprintf(1,'Vectorized: Calculated in %0.4f seconds\n',toc(t));
end
Which results in:
>> zerocross
18208 17884 17902 17734 17988
For loop: Calculated in 1.7186 seconds
18208 17884 17902 17734 17988
Vectorized: Calculated in 0.9695 seconds