Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
We don’t allow questions seeking recommendations for books, tools, software libraries, and more. You can edit the question so it can be answered with facts and citations.
Closed 6 years ago.
Improve this question
Does anyone know if there is any free and open source library that has implemented these two functions the way they are defined in matlab?
Thanks
FFTHIFT / IFFTSHIFT is a fancy way of doing CIRCSHIFT.
You can verify that FFTSHIFT can be rewritten as CIRCSHIFT as following.
You can define macros in C/C++ to punt FFTSHIFT to CIRCSHIFT.
A = rand(m, n);
mm = floor(m / 2);
nn = floor(n / 2);
% All three of the following should provide zeros.
circshift(A,[mm, nn]) - fftshift(A)
circshift(A,[mm, 0]) - fftshift(A, 1)
circshift(A,[ 0, nn]) - fftshift(A, 2)
Similar equivalents can be found for IFFTSHIFT.
Circular shift can be implemented very simply with the following code (Can be improved with parallel versions ofcourse).
template<class ty>
void circshift(ty *out, const ty *in, int xdim, int ydim, int xshift, int yshift)
{
for (int i = 0; i < xdim; i++) {
int ii = (i + xshift) % xdim;
for (int j = 0; j < ydim; j++) {
int jj = (j + yshift) % ydim;
out[ii * ydim + jj] = in[i * ydim + j];
}
}
}
And then
#define fftshift(out, in, x, y) circshift(out, in, x, y, (x/2), (y/2))
#define ifftshift(out, in, x, y) circshift(out, in, x, y, ((x+1)/2), ((y+1)/2))
This was done a bit impromptu. Bear with me if there are any formatting / syntactical problems.
Possible this code may help. It perform fftshift/ifftshift only for 1D array within one buffer. Algorithm of forward and backward fftshift for even number of elements is fully identical.
void swap(complex *v1, complex *v2)
{
complex tmp = *v1;
*v1 = *v2;
*v2 = tmp;
}
void fftshift(complex *data, int count)
{
int k = 0;
int c = (int) floor((float)count/2);
// For odd and for even numbers of element use different algorithm
if (count % 2 == 0)
{
for (k = 0; k < c; k++)
swap(&data[k], &data[k+c]);
}
else
{
complex tmp = data[0];
for (k = 0; k < c; k++)
{
data[k] = data[c + k + 1];
data[c + k + 1] = data[k + 1];
}
data[c] = tmp;
}
}
void ifftshift(complex *data, int count)
{
int k = 0;
int c = (int) floor((float)count/2);
if (count % 2 == 0)
{
for (k = 0; k < c; k++)
swap(&data[k], &data[k+c]);
}
else
{
complex tmp = data[count - 1];
for (k = c-1; k >= 0; k--)
{
data[c + k + 1] = data[k];
data[k] = data[c + k];
}
data[c] = tmp;
}
}
UPDATED:
Also FFT library (including fftshift operations) for arbitrary points number could be found in Optolithium (under the OptolithiumC/libs/fourier)
Normally, centering the FFT is done with v(k)=v(k)*(-1)**k in
the time domain. Shifting in the frequency domain is a poor substitute, for
mathematical reasons and for computational efficiency.
See pp 27 of:
http://show.docjava.com/pub/document/jot/v8n6.pdf
I am not sure why Matlab documentation does it the way they do,
they give no technical reference.
Or you can do it yourself by typing type fftshift and recoding that in C++. It's not that complicated of Matlab code.
Edit: I've noticed that this answer has been down-voted a few times recently and commented on in a negative way. I recall a time when type fftshift was more revealing than the current implementation, but I could be wrong. If I could delete the answer, I would as it seems no longer relevant.
Here is a version (courtesy of Octave) that implements it without
circshift.
I tested the code provided here and made an example project to test them. For 1D code one can simply use std::rotate
template <typename _Real>
static inline
void rotshift(complex<_Real> * complexVector, const size_t count)
{
int center = (int) floor((float)count/2);
if (count % 2 != 0) {
center++;
}
// odd: 012 34 changes to 34 012
std::rotate(complexVector,complexVector + center,complexVector + count);
}
template <typename _Real>
static inline
void irotshift(complex<_Real> * complexVector, const size_t count)
{
int center = (int) floor((float)count/2);
// odd: 01 234 changes to 234 01
std::rotate(complexVector,complexVector +center,complexVector + count);
}
I prefer using std::rotate over the code from Alexei due to its simplicity.
For 2D it gets more complicated. For even numbers it is basically a flip left right and flip upside down. For odd it is the circshift algorithm:
// A =
// 1 2 3
// 4 5 6
// 7 8 9
// fftshift2D(A)
// 9 | 7 8
// --------------
// 3 | 1 2
// 6 | 4 5
// ifftshift2D(A)
// 5 6 | 4
// 8 9 | 7
// --------------
// 2 3 | 1
Here I implemented the circshift code with an interface using only one array for in and output. For even numbers only a single array is required, for odd numbers a second array is temporarily created and copied back to the input array. This causes a performance decrease because of the additional time for copying the array.
template<class _Real>
static inline
void fftshift2D(complex<_Real> *data, size_t xdim, size_t ydim)
{
size_t xshift = xdim / 2;
size_t yshift = ydim / 2;
if ((xdim*ydim) % 2 != 0) {
// temp output array
std::vector<complex<_Real> > out;
out.resize(xdim * ydim);
for (size_t x = 0; x < xdim; x++) {
size_t outX = (x + xshift) % xdim;
for (size_t y = 0; y < ydim; y++) {
size_t outY = (y + yshift) % ydim;
// row-major order
out[outX + xdim * outY] = data[x + xdim * y];
}
}
// copy out back to data
copy(out.begin(), out.end(), &data[0]);
}
else {
// in and output array are the same,
// values are exchanged using swap
for (size_t x = 0; x < xdim; x++) {
size_t outX = (x + xshift) % xdim;
for (size_t y = 0; y < yshift; y++) {
size_t outY = (y + yshift) % ydim;
// row-major order
swap(data[outX + xdim * outY], data[x + xdim * y]);
}
}
}
}
template<class _Real>
static inline
void ifftshift2D(complex<_Real> *data, size_t xdim, size_t ydim)
{
size_t xshift = xdim / 2;
if (xdim % 2 != 0) {
xshift++;
}
size_t yshift = ydim / 2;
if (ydim % 2 != 0) {
yshift++;
}
if ((xdim*ydim) % 2 != 0) {
// temp output array
std::vector<complex<_Real> > out;
out.resize(xdim * ydim);
for (size_t x = 0; x < xdim; x++) {
size_t outX = (x + xshift) % xdim;
for (size_t y = 0; y < ydim; y++) {
size_t outY = (y + yshift) % ydim;
// row-major order
out[outX + xdim * outY] = data[x + xdim * y];
}
}
// copy out back to data
copy(out.begin(), out.end(), &data[0]);
}
else {
// in and output array are the same,
// values are exchanged using swap
for (size_t x = 0; x < xdim; x++) {
size_t outX = (x + xshift) % xdim;
for (size_t y = 0; y < yshift; y++) {
size_t outY = (y + yshift) % ydim;
// row-major order
swap(data[outX + xdim * outY], data[x + xdim * y]);
}
}
}
}
Notice: There are better answers provided, I just keep this here for a while for... I do not know what.
Try this:
template<class T> void ifftShift(T *out, const T* in, size_t nx, size_t ny)
{
const size_t hlen1 = (ny+1)/2;
const size_t hlen2 = ny/2;
const size_t shft1 = ((nx+1)/2)*ny + hlen1;
const size_t shft2 = (nx/2)*ny + hlen2;
const T* src = in;
for(T* tgt = out; tgt < out + shft1 - hlen1; tgt += ny, src += ny) { // (nx+1)/2 times
copy(src, src+hlen1, tgt + shft2); //1->4
copy(src+hlen1, src+ny, tgt+shft2-hlen2); } //2->3
src = in;
for(T* tgt = out; tgt < out + shft2 - hlen2; tgt += ny, src += ny ){ // nx/2 times
copy(src+shft1, src+shft1+hlen2, tgt); //4->1
copy(src+shft1-hlen1, src+shft1, tgt+hlen2); } //3->2
};
For matrices with even dimensions you can do it in-place, just passing the same pointer into in and out parameters.
Also note that for 1D arrays fftshift is just std::rotate.
You could also use arrayfire's shift function as replacement for Matlab's circshift and re-implement the rest of the code. This could be useful if you are interested in any of the other features of AF anyway (such as portability to GPU by simply changing a linker flag).
However if all your code is meant to be run on the CPU and is quite sophisticated or you don't want to use any other data format (AF requires af::arrays) stick with one of the other options.
I ended up changing to AF because I would have had to re-implement fftshift as an OpenCL kernel otherwise back in the time.
It will give equivalent result to ifftshift in matlab
ifftshift(vector< vector <double> > Hlow,int RowLineSpace, int ColumnLineSpace)
{
int pivotRow=floor(RowLineSpace/2);
int pivotCol=floor(ColumnLineSpace/2);
for(int i=pivotRow;i<RowLineSpace;i++){
for(int j=0;j<ColumnLineSpace;j++){
double temp=Hlow.at(i).at(j);
second.push_back(temp);
}
ifftShiftRow.push_back(second);
second.clear();
}
for(int i=0;i<pivotRow;i++){
for(int j=0;j<ColumnLineSpace;j++){
double temp=Hlow.at(i).at(j);
first.push_back(temp);
}
ifftShiftRow.push_back(first);
first.clear();
}
double** arr = new double*[RowLineSpace];
for(int i = 0; i < RowLineSpace; ++i)
arr[i] = new double[ColumnLineSpace];
int i1=0,j1=0;
for(int j=pivotCol;j<ColumnLineSpace;j++){
for(int i=0;i<RowLineSpace;i++){
double temp2=ifftShiftRow.at(i).at(j);
arr[i1][j1]=temp2;
i1++;
}
j1++;
i1=0;
}
for(int j=0;j<pivotCol;j++){
for(int i=0;i<RowLineSpace;i++){
double temp1=ifftShiftRow.at(i).at(j);
arr[i1][j1]=temp1;
i1++;
}
j1++;
i1=0;
}
for(int i=0;i<RowLineSpace;i++){
for(int j=0;j<ColumnLineSpace;j++){
double value=arr[i][j];
temp.push_back(value);
}
ifftShiftLow.push_back(temp);
temp.clear();
}
return ifftShiftLow;
}
Octave uses fftw to implement (i)fftshift.
You can use kissfft. It's reasonable fast, extremely simple to use, and free. Arranging the output like you want it requires only to:
a) shift by (-dim_x/2, -dim_y/2, ...), with periodic boundary conditions
b) FFT or IFFT
c) shift back by (dim_x/2, dim_y/2, ...) , with periodic boundary conditions
d) scale ? (according to your needs IFFT*FFT will scale the function by dim_x*dim_y*... by default)
Related
I recently finished making an algorithm for a project I'm working on.
Briefly, a part of my project needs to fill a matrix, the requirements of how to do it are these:
- Fill the matrix in form of spiral, from the center.
- The size of the matrix must be dynamic, so the spiral can be large or small.
- Every two times a cell of the matrix is filled, //DO STUFF must be executed.
In the end, the code that I made works, it was my best effort and I am not able to optimize it more, it bothers me a bit having had to use so many ifs, and I was wondering if someone could take a look at my code to see if it is possible to optimize it further or some constructive comment (it works well, but it would be great if it was faster, since this algorithm will be executed several times in my project). Also so that other people can use it!
#include <stdio.h>
typedef unsigned short u16_t;
const u16_t size = 7; //<-- CHANGE HERE!!! just odd numbers and bigger than 3
const u16_t maxTimes = 2;
u16_t array_cont[size][size] = { 0 };
u16_t counter = 3, curr = 0;
u16_t endColumn = (size - 1) / 2, endRow = endColumn;
u16_t startColumn = endColumn + 1, startRow = endColumn + 1;
u16_t posLoop = 2, buffer = startColumn, i = 0;
void fillArray() {
if (curr < maxTimes) {
if (posLoop == 0) { //Top
for (i = buffer; i <= startColumn && curr < maxTimes; i++, curr++)
array_cont[endRow][i] = counter++;
if (curr == maxTimes) {
if (i <= startColumn) {
buffer = i;
} else {
buffer = endRow;
startColumn++;
posLoop++;
}
} else {
buffer = endRow;
startColumn++;
posLoop++;
fillArray();
}
} else if (posLoop == 1) { //Right
for (i = buffer; i <= startRow && curr < maxTimes; i++, curr++)
array_cont[i][startColumn] = counter++;
if (curr == maxTimes) {
if (i <= startRow) {
buffer = i;
} else {
buffer = startColumn;
startRow++;
posLoop++;
}
} else {
buffer = startColumn;
startRow++;
posLoop++;
fillArray();
}
} else if (posLoop == 2) { //Bottom
for (i = buffer; i >= endColumn && curr < maxTimes; i--, curr++)
array_cont[startRow][i] = counter++;
if (curr == maxTimes) {
if (i >= endColumn) {
buffer = i;
} else {
buffer = startRow;
endColumn--;
posLoop++;
}
} else {
buffer = startRow;
endColumn--;
posLoop++;
fillArray();
}
} else if (posLoop == 3) { //Left
for (i = buffer; i >= endRow && curr < maxTimes; i--, curr++)
array_cont[i][endColumn] = counter++;
if (curr == maxTimes) {
if (i >= endRow) {
buffer = i;
} else {
buffer = endColumn;
endRow--;
posLoop = 0;
}
} else {
buffer = endColumn;
endRow--;
posLoop = 0;
fillArray();
}
}
}
}
int main(void) {
array_cont[endColumn][endColumn] = 1;
array_cont[endColumn][endColumn + 1] = 2;
//DO STUFF
u16_t max = ((size * size) - 1) / maxTimes;
for (u16_t j = 0; j < max; j++) {
fillArray();
curr = 0;
//DO STUFF
}
//Demostration
for (u16_t x = 0; x < size; x++) {
for (u16_t y = 0; y < size; y++)
printf("%-4d ", array_cont[x][y]);
printf("\n");
}
return 0;
}
Notice that the numbers along the diagonal (1, 9, 25, 49) are the squares of the odd numbers. That's an important clue, since it suggests that the 1 in the center of the matrix should be treated as the end of a spiral.
From the end of each spiral, the x,y coordinates should be adjusted up and to the right by 1. Then the next layer of the spiral can be constructed by moving down, left, up, and right by the same amount.
For example, starting from the position of the 1, move up and to the right (to the position of the 9), and then form a loop with the following procedure:
move down, and place the 2
move down, and place the 3
move left, and place the 4
move left, and place the 5
etc.
Thus the code looks something like this:
int size = 7;
int matrix[size][size];
int dy[] = { 1, 0, -1, 0 };
int dx[] = { 0, -1, 0, 1 };
int directionCount = 4;
int ringCount = (size - 1) / 2;
int y = ringCount;
int x = ringCount;
int repeatCount = 0;
int value = 1;
matrix[y][x] = value++;
for (int ring = 0; ring < ringCount; ring++)
{
y--;
x++;
repeatCount += 2;
for (int direction = 0; direction < directionCount; direction++)
for (int repeat = 0; repeat < repeatCount; repeat++)
{
y += dy[direction];
x += dx[direction];
matrix[y][x] = value++;
}
}
I saw already many approaches for doing a spiral. All a basically drawing it, by following a path.
BUT, you can also come up with an analytical calculation formula for a spiral.
So, no recursion or iterative solution by following a path or such. We can directly calculate the indices in the matrix, if we have the running number.
I will start with the spiral in mathematical positive direction (counter clockwise) in a cartesian coordinate system. We will concentrate on X and Y coordinates.
I made a short Excel and derived some formulas from that. Here is a short picture:
From the requirements we know that the matrix will be quadratic. That makes things easier. A little bit trickier is, to get the matrix data symmetrical. But with some simple formulas, derived from the prictures, this is not really a problem.
And then we can calculate x and y coordinates with some simple statements. See the below example program with long variable names for better understanding. The code is made using some step by step approach to illustrate the implementation. Of course it can be made more compact easily. Anyway. Let's have a look.
#include <iostream>
#include <cmath>
#include <iomanip>
int main() {
// Show some example values
for (long step{}; step < 81; ++step) {
// Calculate result
const long roundedSquareRoot = std::lround(std::sqrt(step));
const long roundedSquare = roundedSquareRoot * roundedSquareRoot;
const long distance = std::abs(roundedSquare - step) - roundedSquareRoot;
const long rsrIsOdd = (roundedSquareRoot % 2);
const long x = (distance + roundedSquare - step - rsrIsOdd) / (rsrIsOdd ? -2 : 2);
const long y = (-distance + roundedSquare - step - rsrIsOdd) / (rsrIsOdd ? -2 : 2);
// Show ouput
std::cout << "Step:" << std::setw(4) << step << std::setw(3) << x << ' ' << std::setw(3) << y << '\n';
}
}
So, you see that we really have an analytical solution. Given any number we can calculate the x and y coordinate using a formula. Cool.
Getting indices in a matrix is just adding some offset.
With that gained know how, we can now easily calculate the complete matrix. And, since there is no runtime activity needed at all, we can let the compiler do the work. We will simply use constexpr functions for everything.
Then the compiler will create this matrix at compile time. At runtime, nothing will happen.
Please see a very compact solution:
#include <iostream>
#include <iomanip>
#include <array>
constexpr size_t MatrixSize = 15u;
using MyType = long;
static_assert(MatrixSize > 0 && MatrixSize%2, "Matrix size must be odd and > 0");
constexpr MyType MatrixHalf = MatrixSize / 2;
using Matrix = std::array<std::array<MyType, MatrixSize>, MatrixSize >;
// Some constexpr simple mathematical functions ------------------------------------------------------------------------------
// No need for <cmath>
constexpr MyType myAbs(MyType v) { return v < 0 ? -v : v; }
constexpr double mySqrtRecursive(double x, double c, double p) {return c == p? c: mySqrtRecursive(x, 0.5 * (c + x / c), c); }
constexpr MyType mySqrt(MyType x) {return (MyType)(mySqrtRecursive((double)x,(double)x,0.0)+0.5); }
// Main constexpr function will fill the matrix with a spiral pattern during compile time -------------------------------------
constexpr Matrix fillMatrix() {
Matrix matrix{};
for (int i{}; i < (MatrixSize * MatrixSize); ++i) {
const MyType rsr{ mySqrt(i) }, rs{ rsr * rsr }, d{ myAbs(rs - i) - rsr }, o{ rsr % 2 };
const size_t col{ (size_t)(MatrixHalf +((d + rs - i - o) / (o ? -2 : 2)))};
const size_t row{ (size_t)(MatrixHalf -((-d + rs - i - o) / (o ? -2 : 2)))};
matrix[row][col] = i;
}
return matrix;
}
// This is a compile time constant!
constexpr Matrix matrix = fillMatrix();
// All the above has been done during compile time! -----------------------------------------
int main() {
// Nothing to do. All has beend done at compile time already!
// The matrix is already filled with a spiral pattern
// Just output
for (const auto& row : matrix) {
for (const auto& col : row) std::cout << std::setw(5) << col << ' '; std::cout << '\n';
}
}
Different coordinate systems or other spiral direction can be adapted easily.
Happy coding.
I recently asked question about how to work with element Edit1 dynamically, now I want to ask something about values, which I received from dynamical arrays. First I try to divide image into sectors:
const n=20;
unsigned short i, j, line_length, w = Image1->Width, h = Image1->Height, l = Left + Image1->Left, t = Top + Image1->Top;
unsigned short border = (Width-ClientWidth)/2, topborder = Height-ClientHeight-border;
Image1->Canvas->Pen->Color = clRed;
for (i = 0; i <= n; i++)
{
Image1->Canvas->MoveTo(0, 0);
line_length = w * tan(M_PI/2*i/n);
if (line_length <= h)
Image1->Canvas->LineTo(w, line_length);
else
{
line_length = h * tan(M_PI/2*(1-1.*i/n));
Image1->Canvas->LineTo(line_length, h);
}
}
Then I use regions to count black dots in each sector and I want to add values to element Memo:
HRGN region[n];
TPoint points[3];
points[0] = Point(l + border, t + topborder);
for (i = 0; i < n; i++)
{
for (j = 0; j <= 1; j++)
{
line_length = w * tan(M_PI/2*(i+j)/n);
if (line_length <= h)
points[j+1] = Point(l + border + w, t + topborder + line_length);
else
{
line_length = h * tan(M_PI/2*(1-1.*(i+j)/n));
points[j+1] = Point(l + border + line_length, t + topborder + h);
}
}
region[i] = CreatePolygonRgn(points, 3, ALTERNATE); // or WINDING ?? as u want
}
Byte k;
unsigned __int64 point_count[n] = {0}, points_count = 0;
for(j = 0; j < h; j++)
for (i = 0; i < w; i++)
if (Image1->Canvas->Pixels[i][j] == clBlack)
{
points_count++;
for (k = 0; k < n; k++)
if (PtInRegion(region[k], l + border + i, t + topborder + j))
point_count[k]++;
}
unsigned __int64 sum = 0;
for (i = 0; i < n; i++)
{
sum += point_count[i];
Memo1->Lines->Add(point_count[i]);
}
As i received an advice from one man, in order to allocate an array using a TEdit to specify the array's count I should use, for example DynamicArray:
#include <sysdyn.h>
DynamicArray<HRGN> region;
...
int n = Edit1-> Text.ToInt();
region.Length = n;
I have made the same changes to point_count array:
Byte k;
DynamicArray<unsigned __int64> point_count;
point_count.Length = n;
unsigned __int64 /*point_count[n] = {0},*/ points_count = 0;
...
The problem is that I received different values if I do it dynamically or statically(n=20).
Statically:
Dynamically:
The problem is that I received different values if I do it dynamically or statically(n=20)
There is no difference whatsoever in accessing elements of a static array vs a dynamic array. Your problem has to be elsewhere.
For instance, your static code is initializing all of the array elements to 0, but your dynamic code is not doing that, so they will have random values before your loop then increments them.
Try this:
DynamicArray<unsigned __int64> point_count;
point_count.Length = n;
for(int i = 0; i < n; ++i) {
point_count[i] = 0;
}
...
Alternatively:
DynamicArray<unsigned __int64> point_count;
point_count.Length = n;
ZeroMemory(&point_count[0], sizeof(unsigned __int64) * n);
...
Also, using the Image1->Canvas->Pixels[][] property is very slow. Consider using the Image1->Picture->Bitmap->ScanLine[] property instead for faster access to the raw pixels.
I have this funcition (RotateSlownessTop) and it's called about 800 times computing the corresponding values. But the calculation is slow and is there a way I can make the computations faster.
The number of element in X/Y is 7202. (Fairly large set)
I did the performance analysis and the screenshot has been attached.
void RotateSlownessTop(vector <double> &XR1, vector <double> &YR1, float theta = 0.0)
{
Matrix2d a;
a(0,0) = cos(theta);
a(0,1) = -sin(theta);
a(1, 0) = sin(theta);
a(1, 1) = cos(theta);
vector <double> XR2(7202), YR2(7202);
for (size_t i = 0; i < X.size(); ++i)
{
XR2[i] = (a(0, 0)*X[i] + a(0, 1)*Y[i]);
YR2[i] = (a(1, 0)*X[i] + a(1, 1)*Y[i]);
}
size_t i = 0;
size_t j = 0;
while (i < YR2.size())
{
if (i > 0)
if ((XR2[i]>0) && (XR2[i-1]<0))
j = i;
if (YR2[i] > (-1e-10) && YR2[i]<0.0)
YR2[i] = 0.0;
if (YR2[i] < (1e-10) && YR2[i]>0.0)
YR2[i] = -YR2[i];
if ( YR2[i]<0.0)
{
YR2.erase(YR2.begin() + i);
XR2.erase(XR2.begin() + i);
--i;
}
++i;
}
size_t k = 0;
while (j < YR2.size())
{
YR1[k] = (YR2[j]);
XR1[k] = (XR2[j]);
YR2.erase(YR2.begin() + j);
XR2.erase(XR2.begin() + j);
++k;
}
size_t l = 0;
for (; k < XR1.size(); ++k)
{
XR1[k] = XR2[l];
YR1[k] = YR2[l];
l++;
}
}
Edit1: I have updated the code by replacing all push_back() with operator[], since I read somewhere that this is much faster.
However the whole program is still slow. Any suggestions are appreciated.
If the size is large, you can improve the push_back by pre-allocating the space needed. Add this before the loop:
XR2.reserve(X.size());
YR2.reserve(X.size());
here is my code, my "algorithm" is trying to take a bayer image, or an RGB image, and separate the channel G, which is the Luma (or even grayscale) into the different channels of the color,
an example Bayer Pattern
void Utilities::SeparateChannels(int* channelR, int* channelG, int* channelB, double*& gr, double*& r, double*& b, double*& gb,int _width, int _height, int _colorOrder)
{
//swith case the color Order
int counter_R = 0;
int counter_GR = 0;
int counter_GB = 0;
int counter_B = 0;
switch (_colorOrder)
{
//grbg
case 0:
for (int j = 0; j < _width; j++)
{
for (int i = 0; i < _height; i++)
{
if (i % 2 == 0 && j % 2 == 0)
{
gr[counter_GR] = channelG[i*_width+ j];
counter_GR++;
}
else if (i % 2 == 0 && j % 2 == 1)
{
r[counter_R] = channelG[i*_width+ j];
counter_R++;
}
else if (i % 2 == 1 && j % 2 == 0)
{
b[counter_B] =channelG[i*_width+ j];
counter_B++;
}
else if (i % 2 == 1 && j % 2 == 1)
{
gb[counter_GB] = channelG[i*_width+ j];
counter_GB++;
}
}
}
I ran the profiler on 70 images, I attached my results.
Can you suggest a way to optimize my code?
Swap the loops, first iterate over the height. Then you can calculate i * _width before the second loop and calculate this 1 time instead of _width times.
You test i%2==0 in the first if, then you test it again in the second if, then you test if i%2==1 in the third if and yet again in the fourth. If you nested your if statements then you wouldn't have to keep testing, and if you know i%2 != 0 you can deduce it must be 1, likewise with j.
if(i%2==0){
if(j%2==0){
}else{
// j%2 is pretty likely to be 1
}
}else{
// i%2 is pretty likely to be 1
}
In fact, you can go further than that... if j is your row counter, it will not vary all the way across any row, so you could do one test at the start of each row and then execute a different loop according to whether you are on an odd or an even row without testing the row index for every pixel.
The whole algorithm can be reduced to an inner loop that de-interleaves a section of the input array into 2 seperate output arrays. The 2 output arrays are changing for each row, and their selection depends on the input type (_colorOrder).
So.. first change your algorithm to work like this:
void Utilities::SeparateChannels(int* channelR, int* channelG, int* channelB, double*& gr, double*& r, double*& b, double*& gb,int _width, int _height, int _colorOrder)
{
//swith case the color Order
int counter_R = 0;
int counter_GR = 0;
int counter_GB = 0;
int counter_B = 0;
double *split1, *split2;
switch (_colorOrder)
{
//grbg
case 0:
for (int i = 0; i < _height; i++)
{
if(i % 2 == 0)
{
split1 = gr + counter_GR;
split2 = r + counter_R;
counter_GR += _width / 2;
counter_R += _width / 2;
}
else
{
split1 = b + counter_B;
split2 = gb + counter_GB;
counter_B += _width / 2;
counter_GB += _width / 2;
}
int *channel = channelG + (i * _width);
// deinterleave(channel, split1, split2, _width);
}
Now all you need to do is de-interleave channel into split1 & split2 over _width elements. Do that in an optimized (ASM?), inlined function.
I have a Nx3 array which I need to fill as a function (so vector isn't an option). I already know how big N as as I feed it into the function as a parameter. I still get this stupid error of "must have a constant value", my code is:
double bspline_plot(double CP[], double Knot[], const int N, int d, int ncontrol, double *A){
// CP are the control points
//Knot is the knot vector
//N is the number of internal point you want in each segment
//d is the degree of the polynomials
double min_x, max_x, dx;
double *x_1;
x_1 = new double[N];
double A[N][2];
int i, j, M, L;
min_x = min(Knot);
max_x = max(Knot);
dx = (max_x - min_x) / N;
for (i = 0; i <= N; i = i + 1)
{
x_1[i] = min_x + dx*i;
}
M = ncontrol;
L = (sizeof(Knot) / sizeof(*Knot));
if (L < d + M + 1) // This checks if the number of control points are positive
{
printf("Incorrectly defined knot vector\n");
return;
}
else //This is the Cox - deBoor algorithm
{
for (i = 0; i <= N; i = i + 1)
{
for (j = 0; j <= L - 1; j = j + 1)
{
A[i][1] = A[i][1] + CP[j, 1] * CdB(j, d, x_1[i], Knot);
A[i][2] = A[i][2] + CP[j, 2] * CdB(j, d, x_1[i], Knot);
A[i][3] = A[i][3] + CP[j, 3] * CdB(j, d, x_1[i], Knot);
}
A[N][1] = CP[L, 2];
A[N][2] = CP[L, 2];
A[N][3] = CP[L, 1];
}
}
return A;
}
My other option is to feed in an array and then find it's values in the function but that seems a bit silly.
try to use std::vector in following way:
std::vector<std::vector<double>> A( N );
for( auto& row : A )
row.resize( M );
or
std::vector<std::vector<double>> A( N, std::vector<double>( M ));
From a quick inspection, the problem in your C++ code appears to be the following array declaration:
double A[N][2];
You need to dynamically allocate this 2d array as follows:
double** A = new double*[N];
for (int i=0; i<N; ++i)
A[i] = new double[2];
Have a look at this SO article for more information.
In the end I had to split A up into three vectors and change the output of the function from double to void and read in the (now) three vectors as double*. I can then just change the contents of the vectors and it now is showing no errors.