this is a part of my original code, the code is too big to put it all in here,
anyway my question is only related to Sads 4D matrix,
I don't want to use the int**** like it was suggested to me in my previous question
int main()
//4D matrix
int**** Sads = new int***[inputImage->HeightLines];
for (size_t i = 0; i < inputImage->HeightLines; i++)
Sads[i] = new int**[inputImage->WidthColumns];
for (size_t j = 0; j < inputImage->WidthColumns; j++)
Sads[i][j] = new int*[W_SIZE];
for (size_t k = 0; k < W_SIZE; k++)
Sads[i][j][k] = new int[W_SIZE];
ProcessRowsLoop(20, 1904, Sads);
void ProcessRowsLoop(int m_support, int m_height, int**** sads)
for (int row_in = m_support - 1; row_in < m_Height_in; row_in += BNLM_OUT_SZ)
ProcessRow( &Sads[indexRow]);
void ProcessRow(int**** sads)
int m_SAD_00[W_SIZE][W_SIZE];
int m_SAD_01[W_SIZE][W_SIZE];
int m_SAD_10[W_SIZE][W_SIZE];
int m_SAD_11[W_SIZE][W_SIZE];
RunAlgo(m_support, m_SAD_00, m_SAD_01, m_SAD_10, m_SAD_11, m_CP_00, m_CP_01, m_CP_10, m_CP_11, m_ColumnSADUp, m_ColumnSADDown);
for (size_t i = 0; i < W_SIZE; i++)
for (size_t j = 0; j < W_SIZE; j++)
Sads[0][m_col_out][i][j] = (m_SAD_00[i][j] + color_penalty_weight * m_CP_00[i][j]) / (sqrt(m_sigma_patch[0][0] / pow(mnm, 2)));
Sads[0][m_col_out + 1][i][j] = (m_SAD_01[i][j] + color_penalty_weight * m_CP_01[i][j]) / (sqrt(m_sigma_patch[0][1] / pow(mnm, 2)));
Sads[1][m_col_out][i][j] = (m_SAD_10[i][j] + color_penalty_weight + m_CP_10[i][j]) / (sqrt(m_sigma_patch[1][0] / pow(mnm, 2)));
Sads[1][m_col_out + 1][i][j] = (m_SAD_11[i][j] + color_penalty_weight + m_CP_11[i][j]) / (sqrt(m_sigma_patch[1][1] / pow(mnm, 2)));
In my new code I would like to replace the 4D matrix int**** Sads, with
struct VectorFourD
int _width, _height;
int _w_size;
std::vector<int> _vec;
VectorFourD(int width, int height, int size) : _width(width), _height(height), _w_size(size), _vec(totalSize())
auto totalSize() const-> int
return _width * _height * _w_size * _w_size;
int* at(int a)
return + (a * _height * _w_size * _w_size);
int* at(int a, int b)
return at(a) + (b * _w_size * _w_size);
int *at(int a, int b, int c)
return at(a, b) + (c* _w_size);
int& at(int a, int b, int c, int d)
return *(at(a, b, c) + d);
you can see that i'm iterating two lines at the same time in the function processrow()
running over
Sads[0][m_col_out][i][j], Sads[0][m_col_out + 1][i][j],
Sads[1][m_col_out][i][j], Sads[1][m_col_out + 1][i][j]
at the same time, my question is how do I change my code to work with the new 4dvector for example
int main()
VectorFourD SadsVec = VectorFourD(inputImage->HeightLines, inputImage->WidthColumns, W_SIZE);
ProcessRowsLoop(20, 1904, SadsVec);
also change the function void ProcessRowsLoop(int m_support, int m_height, VectorFourD* SadsVec)
but i don't know how to continue from here, can you please help?
I'm creating a Matrix math library with CUDA to improve my CNNs performance (and to understand C++ better).
I would like to be able to add error handling and tell the user (me) what has gone wrong when using the matrix class.
This can be seen in my main file as, in this case, I'm trying to add a 10 * 10 matrix to a 15 * 15 matrix. This is an impossible action and would like some output to tell the user. for example
Error in file "" on line: 9 (Dimensions inconsistent)
If you check inside the function the line number is line number of the check and I've looked at using macros to check but I'm wondering if there is another way without having to call the macro every time I add two matrices together.
#include "Matrix.cuh"
int main() {
double* init;
cudaMallocManaged(&init, sizeof(double));
Matrix A(10, 10, 2);
Matrix B(15, 15, 3);
Matrix C = A + B;
return 0;
#include "Matrix.cuh"
void sumMatrix(Matrix* A, Matrix* B, Matrix* C)
int x = blockIdx.x * BLOCK_SIZE + threadIdx.x;
int y = blockIdx.y * BLOCK_SIZE + threadIdx.y;
if (x < A->ColumnCount && y < A->RowCount)
C->VALUES[y * A->ColumnCount + x] = A->VALUES[y * A->ColumnCount + x] + B->VALUES[y * A->ColumnCount + x];
void matrixInit(Row* rows, int R, int C, double* VALUES, double val) {
int x = blockIdx.x * BLOCK_SIZE + threadIdx.x;
int y = blockIdx.y * BLOCK_SIZE + threadIdx.y;
if (x < C && y < R)
if (x == 0)
rows[y].Count = C;
rows[y].values = VALUES + C * y;
VALUES[y * C + x] = val;
Matrix::Matrix(int R, int C, double val)
cudaMallocManaged(&VALUES, R * C * sizeof(double));
cudaMallocManaged(&rows, R * sizeof(Row));
RowCount = R;
ColumnCount = C;
dim3 gridDim(ceil(C / (double)BLOCK_SIZE), ceil(R / (double)BLOCK_SIZE), 1);
dim3 blockDim(BLOCK_SIZE, BLOCK_SIZE, 1);
matrixInit << <gridDim, blockDim >> > (rows, R, C, VALUES, val);
cudaCheckErrors("MATRIX INIT VAL");
Matrix::Matrix(int R, int C)
cudaMallocManaged(&VALUES, R * C * sizeof(double));
cudaMallocManaged(&rows, R * sizeof(Row));
RowCount = R;
ColumnCount = C;
dim3 gridDim(ceil(C / (double)BLOCK_SIZE), ceil(R / (double)BLOCK_SIZE), 1);
dim3 blockDim(BLOCK_SIZE, BLOCK_SIZE, 1);
matrixInit << <gridDim, blockDim >> > (rows, R, C, VALUES, 0);
cudaCheckErrors("MATRIX INIT VAL");
void Matrix::updatePointers()
for (size_t i = 0; i < RowCount; i++)
rows[i].values = VALUES + (i * ColumnCount);
void Matrix::removePointers()
VALUES = nullptr;
rows = nullptr;
void Matrix::printM(const char* msg)
std::cout << "Matrix " << msg << ": " << RowCount << "*" << ColumnCount << std::endl;
for (size_t i = 0; i < RowCount; i++)
for (size_t j = 0; j < ColumnCount; j++)
std::cout << rows[i][j] << " ";
std::cout << std::endl;
Matrix Matrix::sum(Matrix B)
Matrix* A_p, * B_p, * C_p;
Matrix C(RowCount, ColumnCount);
cudaMallocManaged(&A_p, sizeof(Matrix));
cudaMallocManaged(&B_p, sizeof(Matrix));
cudaMallocManaged(&C_p, sizeof(Matrix));
memcpy(A_p, this, sizeof(Matrix));
memcpy(B_p, &B, sizeof(Matrix));
memcpy(C_p, &C, sizeof(Matrix));
dim3 gridDim(ceil(ColumnCount / (double)BLOCK_SIZE), ceil(RowCount / (double)BLOCK_SIZE), 1);
dim3 blockDim(BLOCK_SIZE, BLOCK_SIZE, 1);
sumMatrix << < gridDim, blockDim >> > (A_p, B_p, C_p);
return *C_p;
Row& Matrix::operator[](size_t i)
if (i >= RowCount)
std::cout << "OUT OF BOUNDS";
return rows[i];
Matrix& Matrix::operator+(Matrix B)
Matrix C = sum(B);
Matrix* C_p;
cudaMallocManaged(&C_p, sizeof(Matrix));
memcpy(C_p, &C, sizeof(Matrix));
return *C_p;
if (VALUES != nullptr && rows != nullptr)
I have a problem with calling this function:
void powell(float p[], float **xi, int n,
float ftol, int *iter, float *fret,
float (*func)(float []))
I don't know which argument must be under **xi to run my code.
Whole function below:
void powell(float p[], float** xi, int n, float ftol, int* iter, float* fret, float (*func)(float[]))
void linmin(float p[], float xi[], int n, float* fret, float (*func)(float[]));
int i, ibig, j;
float del, fp, fptt, t, *pt, *ptt, *xit;
pt = vector(1, n);
ptt = vector(1, n);
xit = vector(1, n);
*fret = (*func)(p);
for (j = 1; j <= n; j++)
pt[j] = p[j];
for (*iter = 1;; ++(*iter)) {
fp = (*fret);
ibig = 0;
del = 0.0;
for (i = 1; i <= n; i++) {
for (j = 1; j <= n; j++)
xit[j] = xi[j][i];
fptt = (*fret);
linmin(p, xit, n, fret, func);
if (fptt - (*fret) > del) {
del = fptt - (*fret);
ibig = i;
if (2.0 * (fp - (*fret)) <= ftol * (fabs(fp) + fabs(*fret)) + TINY) {
free_vector(xit, 1, n);
free_vector(ptt, 1, n);
free_vector(pt, 1, n);
if (*iter == ITMAX)
nrerror("powell exceeding maximum iterations.");
for (j = 1; j <= n; j++) {
ptt[j] = 2.0 * p[j] - pt[j];
xit[j] = p[j] - pt[j];
pt[j] = p[j];
fptt = (*func)(ptt);
if (fptt < fp) {
t = 2.0 * (fp - 2.0 * (*fret) + fptt) * SQR(fp - (*fret) - del) - del * SQR(fp - fptt);
if (t < 0.0) {
linmin(p, xit, n, fret, func);
for (j = 1; j <= n; j++) {
xi[j][ibig] = xi[j][n];
xi[j][n] = xit[j];
Thanks in advance.
A double pointer means that the function wants the address of a pointer.
void my_function(int **p_pointer)
*p_pointer = new int[42];
int main(void)
int * pointer = nullptr;
return 0;
In C++, the double pointer can be avoided by using reference:
void another_function(int *& pointer)
pointer = new int [256];
int main(void)
int p = nullptr;
return 0;
One of the primary concerns with pointers is that they can point to anywhere, a defined location or not. Testing a pointer for validity is complex because it depends on the range (or ranges) that are valid for the current platform. With references, the reference is valid, by definition, so no validity checks need to be performed.
I have tried to extract patches from an image parallelly with pixel shift/overlapping. I have written the CPU version of the code. But I could not able to convert the for loop which has an increment of pixel shift. I have given the part of the code where for loop is being used. CreatePatchDataSet function has the "for loop " which has an increment of pixel shift. Please help me out to convert this function into Cuda. I have provided the following code.
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#include <fstream>
#include <sstream>
#include <random>
#include <vector>
#include <omp.h>
using namespace std;
using namespace cv;
#define PATCH_SIZE (5)
#define PIXEL_SHIFT (2)
void ConvertMat2DoubleArray(cv::Mat input, double* output)
for (int i = 0; i < input.rows; i++)
double *src = input.ptr<double>(i);
for (int j = 0; j < input.cols; j++)
output[input.cols * input.channels() * i + input.channels() * j + 0] = src[j];
void GetNumOfPatch(const int width, const int height, const int patch_size, const int pixel_shift, int* num_of_patch, int* num_of_patch_col, int* num_of_patch_row) {
*num_of_patch_col = 0;
int len_nb = 0;
while (len_nb < width) {
if (len_nb != 0) {
len_nb += patch_size - (patch_size - pixel_shift);
else {
len_nb += patch_size;
len_nb = 0;
*num_of_patch_row = 0;
while (len_nb < height) {
if (len_nb != 0) {
len_nb += patch_size - (patch_size - pixel_shift);
else {
len_nb += patch_size;
*num_of_patch = (*num_of_patch_col) * (*num_of_patch_row);
void CreatePatchDataSet(double *original_data, double* patch_data, const int width, const int height, const int pixel_shift, const int patch_size, const int num_of_patch_col, const int num_of_patch_row) {
int counter_row = 0;
int num_of_patch_image = num_of_patch_row * num_of_patch_col;
for (int i = 0; i < height; i += pixel_shift) {
int counter_col = 0;
for (int j = 0; j < width; j += pixel_shift) {
//Get Low Resolution Image
for (int ii = 0; ii < patch_size; ii++) {
for (int jj = 0; jj < patch_size; jj++) {
if ((i + ii) < height && (j + jj) < width) {
patch_data[num_of_patch_image * (patch_size * ii + jj) + num_of_patch_col*counter_row + counter_col] = original_data[width*(i + ii) + (j + jj)];
else {
patch_data[num_of_patch_image * (patch_size * ii + jj) + num_of_patch_col*counter_row + counter_col] = 0.;
if (counter_col == num_of_patch_col) {
if (counter_row == num_of_patch_row) {
int main()
int ratio=2;
cv::Mat image = cv::imread("input_b2_128.tif", CV_LOAD_IMAGE_UNCHANGED);
cv::Mat imageH = cv::Mat(image.rows * ratio, image.cols * ratio, CV_8UC1);
cv::resize(image, imageH, cv::Size(imageH.cols, imageH.rows), 0, 0,
double* orgimageH = (double*)calloc(imageH.cols*imageH.rows*image.channels(), sizeof(double));
ConvertMat2DoubleArray(imageH, orgimageH);
int widthH = imageH.cols;
int heightH = imageH.rows;
int dimH = (int)PATCH_SIZE * (int)PATCH_SIZE* (int)image.channels();
int dimL = (int)PATCH_SIZE/ratio* (int)PATCH_SIZE/ratio * (int)image.channels();
//3. Create training data set=========================
int num_of_patch_image = 0;
int num_of_patch_col = 0;
int num_of_patch_row = 0;
GetNumOfPatch(widthH, heightH, (int)PATCH_SIZE, (int)PIXEL_SHIFT, &num_of_patch_image, &num_of_patch_col, &num_of_patch_row);
cout<<"patch numbers: \n " << num_of_patch_image << endl;
double* FY = (double*)calloc(dimH * num_of_patch_image, sizeof(double));
CreatePatchDataSet(orgimageH, FY, widthH, heightH, (int)PIXEL_SHIFT, (int)PATCH_SIZE, num_of_patch_col, num_of_patch_row);
return 0;
The results I got for first 10 values in CPU version:
patch numbers:
I have tried to convert this function to Kernel function using cuda:. But it goes into the infinite loop. As I am very new to this CUDA field, could you please help me to find out the problem in the code ?
__global__ void CreatePatchDataSet(double *original_data, double* patch_data, const int width, const int height, const int pixel_shift, const int patch_size, const int num_of_patch_col, const int num_of_patch_row) {
int num_of_patch_image = num_of_patch_row * num_of_patch_col;
int i = threadIdx.x + (blockDim.x*blockIdx.x);
int j = threadIdx.y + (blockDim.y*blockIdx.y);
while (i<height && j< width)
int counter_row = 0;
int counter_col = 0;
//Get Low Resolution Image
for (int ii = 0; ii < patch_size; ii++) {
for (int jj = 0; jj < patch_size; jj++) {
if ((i + ii) < height && (j + jj) < width) {
patch_data[num_of_patch_image * (patch_size * ii + jj) + num_of_patch_col*counter_row + counter_col] = original_data[width*(i + ii) + (j + jj)];
else {
patch_data[num_of_patch_image * (patch_size * ii + jj) + num_of_patch_col*counter_row + counter_col] = 0.;
if (counter_col == num_of_patch_col) {
if (counter_row == num_of_patch_row) {
i+= blockDim.x*gridDim.x;
j+= blockDim.y*gridDim.y;
int main()
int ratio=2;
cv::Mat image = cv::imread("input_b2_128.tif", CV_LOAD_IMAGE_UNCHANGED);
cv::Mat imageH = cv::Mat(image.rows * ratio, image.cols * ratio, CV_8UC1);
cv::resize(image, imageH, cv::Size(imageH.cols, imageH.rows), 0, 0, cv::INTER_LANCZOS4);
double *orgimageH = (double*)calloc(imageH.cols*imageH.rows*image.channels(), sizeof(double));
ConvertMat2DoubleArray(imageH, orgimageH);
int widthH = imageH.cols;
int heightH = imageH.rows;
int dimH = (int)PATCH_SIZE * (int)PATCH_SIZE* (int)image.channels();
int dimL = (int)PATCH_SIZE/ratio* (int)PATCH_SIZE/ratio * (int)image.channels();
//3. Create training data set=========================
int num_of_patch_image = 0;
int num_of_patch_col = 0;
int num_of_patch_row = 0;
GetNumOfPatch(widthH, heightH, (int)PATCH_SIZE, (int)PIXEL_SHIFT, &num_of_patch_image, &num_of_patch_col, &num_of_patch_row);
cout<<"patch numbers: \n " << num_of_patch_image << endl;
double* FY = (double*)calloc(dimH * num_of_patch_image, sizeof(double));
double *d_orgimageH;
gpuErrchk(cudaMalloc ((void**)&d_orgimageH, sizeof(double)*widthH*heightH));
double *d_FY;
gpuErrchk(cudaMalloc ((void**)&d_FY, sizeof(double)* dimH * num_of_patch_image));
gpuErrchk(cudaMemcpy(d_orgimageH , orgimageH , sizeof(double)*widthH*heightH, cudaMemcpyHostToDevice));
dim3 dimBlock(16, 16);
dim3 dimGrid;
dimGrid.x = (widthH + dimBlock.x - 1) / dimBlock.x;
dimGrid.y = (heightH + dimBlock.y - 1) / dimBlock.y;
CreatePatchDataSet<<<dimGrid,dimBlock>>>(d_orgimageH, d_FY, widthH, heightH, (int)PIXEL_SHIFT, (int)PATCH_SIZE, num_of_patch_col, num_of_patch_row);
gpuErrchk(cudaMemcpy(FY,d_FY, sizeof(double)*dimH * num_of_patch_image, cudaMemcpyDeviceToHost));
// cout<<"Hello world";
return 0;
Image I have used: [1]:
i+= blockDim.x*gridDim.x;
j+= blockDim.y*gridDim.y;
is outside the while loop in your kernel. As i and j never change inside the while loop, it isn't stopping. There could be more problems here, but this is the most prominent one.
EDIT: Another one that I found, is that you have only one while over both i and j instead of one for each. You should probably use for loops like in your CPU code:
for (i = pixel_shift * (threadIdx.x + (blockDim.x*blockIdx.x));
i < height;
i += pixel_shift * blockDim.x * gridDim.x) {
for (j = ...; j < ...; j += ...) {
/* ... */
I could imagine this to be a good idea:
for (counter_row = threadIdx.y + blockDim.y * blockIdx.y;
counter_row < num_of_patch_row;
counter_row += blockDim.y * gridDim.y) {
i = counter_row * pixel_shift;
if (i > height)
for (counter_col = threadIdx.x + blockDim.x * blockIdx.x;
counter_col < num_of_patch_col;
counter_col += blockDim.x * gridDim.x) {
j = counter_col * pixel_shift;
if (j > width)
/* ... */
I have also exchanged the x/y fields of the execution parameters between the inner and the outer loop, as it seemed more appropriate considering that the x field is continuous in warps (memory access benefits).
I have two overloaded functions: "ChooseElements", which chooses elements from passed array, and "SortElements", which sorts elements of passed array. One pair works with INT data, and another one with FLOAT.
int * ChooseElements(int * X, int n, int & m)
int * Y = NULL;
for (int i = 0; i < n; i++)
if (X[i] > 0)
if (Y == NULL)
m = 1;
Y = new int[1];
Y[0] = X[i];
Y = (int *)realloc(Y, sizeof(int) * m);
Y[m - 1] = X[i];
return Y;
float * ChooseElements(float * X, int n, int & m)
float * Y = NULL;
for (int i = 0; i < n; i++)
if (X[i] > 0)
if (Y == NULL)
m = 1;
Y = new float[1];
Y[0] = X[i];
Y = (float *)realloc(Y, sizeof(float) * m);
Y[m - 1] = X[i];
return Y;
int * SortElements(int m, int *& Y)
for (int i = 1; i < m; i++)
for (int j = 0; j < m - i; j++)
if (Y[j] > Y[j + 1])
int Temp = Y[j];
Y[j] = Y[j + 1];
Y[j + 1] = Temp;
return Y;
float * SortElements(int m, float *& Y)
for (int i = 1; i < m; i++)
for (int j = 0; j < m - i; j++)
if (Y[j] > Y[j + 1])
float Temp = Y[j];
Y[j] = Y[j + 1];
Y[j + 1] = Temp;
return Y;
What I want to do is pass first function as argument to second one. Like that:
int n, m;
int * X = NULL, * Y = NULL;
/* ...
Some code in which n and X are initialized
... */
Y = SortElements(m, ChooseElements(X, n, m));
However, when I try to do that, Visual Studio 2017 tells me:
no instance of overloaded function "SortElements" matches the argument list
argument types are: (int, int *)
If I do this instead:
Y = ChooseElements(X, n, m);
Y = SortElements(m, Y);
everything works fine.
If I remove overloads and leave only INT pair and once again try
int n, m;
int * X = NULL, * Y = NULL;
/* ...
Some code in which n and X are initialized
... */
Y = SortElements(m, ChooseElements(X, n, m));
I get another problem:
int *ChooseElements(int *X, int n, int &m)
initial value of reference to non-const value must be an lvalue
What am I doing wrong? My teacher asks for a function which uses another function as an argument. What I have written does not work, and I have no idea what could be done here.
In your int * SortElements(int m, int *& Y)
function you are using : int *& Y. So you have a reference to a int pointer. My guess is that you don't need that.
You can just use int * Y as a parameter as a solution.
Int *& Y - needs an lvalue(like your variable Y) but your ChooseElements function returns only a temporary object(rvalue) because you are returning by value.
I'm getting the following errors due to the namespace cpl?
I included Wavepacket.cpp and Vector.hpp below.
obj\Debug\wavepacket.o||In function `Z10initializev':|
wavepacket.cpp|79|undefined reference to `cpl::Vector::Vector(int)'|
wavepacket.cpp|79|undefined reference to `cpl::Vector::operator=(cpl::Vector const&)'|
wavepacket.cpp|80|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|80|undefined reference to `cpl::ComplexVector::operator=(cpl::ComplexVector const&)'|
wavepacket.cpp|81|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|81|undefined reference to `cpl::ComplexVector::operator=(cpl::ComplexVector const&)'|
wavepacket.cpp|101|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|101|undefined reference to `cpl::ComplexVector::operator=(cpl::ComplexVector const&)'|
wavepacket.cpp|102|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|102|undefined reference to `cpl::ComplexVector::operator=(cpl::ComplexVector const&)'|
wavepacket.cpp|103|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|103|undefined reference to `cpl::ComplexVector::operator=(cpl::ComplexVector const&)'|
obj\Debug\wavepacket.o||In function `Z8timeStepv':|
wavepacket.cpp|124|undefined reference to `cpl::solveTridiagonalCyclic(cpl::ComplexVector&, cpl::ComplexVector&, cpl::ComplexVector&, std::complex<double>, std::complex<double>, cpl::ComplexVector&, cpl::ComplexVector&)'|
wavepacket.cpp|126|undefined reference to `cpl::solveTridiagonal(cpl::ComplexVector&, cpl::ComplexVector&, cpl::ComplexVector&, cpl::ComplexVector&, cpl::ComplexVector&)'|
obj\Debug\wavepacket.o||In function `_static_initialization_and_destruction_0':|
wavepacket.cpp|22|undefined reference to `cpl::Vector::Vector(int)'|
wavepacket.cpp|71|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|71|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|72|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|72|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
wavepacket.cpp|72|undefined reference to `cpl::ComplexVector::ComplexVector(int)'|
||=== Build finished: 20 errors, 0 warnings ===|
#include <cmath>
#include <complex>
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <string>
#include <sstream>
#include <GL/gl.h>
#include <GL/glu.h>
#include <GL/glut.h>
#include "Vector.hpp"
const double pi = 4*std::atan(1.0);
double h_bar = 1; // natural units
double mass = 1; // natural units
// The spatial grid
int N = 200; // number of interior grid points
double L = 100; // system extends from x=0 to x=L
double h = L / (N + 1); // grid size
double tau = 1; // time step
cpl::Vector x; // coordinates of grid points
bool periodic = true; // false = oo potential, true = periodic
// The potential V(x)
double V0 = 1.0; // height of potential well
double Vwidth = 10; // width of potential well
double Vcenter = 0.75 * L; // center of potential well
bool gaussian; // false = step potential
double V(double x) {
double halfWidth = std::abs(0.5 * Vwidth);
if (gaussian) {
double dx = (x - Vcenter) / halfWidth;
return V0 * std::exp( - dx * dx / 2);
} else {
if (std::abs(x - Vcenter) <= halfWidth)
return V0;
return 0;
// Inital wave packet
double x0 = L / 4; // location of center
double E = 1; // average energy
double sigma0 = L / 10; // width of wave packet
double Norm_psi; // norm of psi
double k0; // average wavenumber
double velocity; // average velocity
void getInput() {
std::cout << "Time-dependent Schroedinger Equation\n";
std::cout << "Enter size of x region L = ";
std::cin >> L;
std::cout << "Enter number of grid points N = ";
std::cin >> N;
std::cout << "Enter integration time step tau = ";
std::cin >> tau;
std::cout << "Enter width of potential = ";
std::cin >> Vwidth;
std::cout << "Enter height of potential V0 = ";
std::cin >> V0;
std::cout << "Enter width of packet sigma = ";
std::cin >> sigma0;
std::cout << "Enter energy of packet E = ";
std::cin >> E;
double t; // time
cpl::ComplexVector psi, chi; // complex wavefunction
cpl::ComplexVector a, b, c; // to represent tridiagonal Q matrix
std::complex<double> alpha, beta; // corner elements of Q
void initialize () {
t = 0;
// reset vectors
x = cpl::Vector(N);
psi = cpl::ComplexVector(N);
chi = cpl::ComplexVector(N);
// reset the lattice
h = L / (N + 1);
for (int j = 0; j < N; j++)
x[j] = (j + 1) * h;
// inititalize the packet
k0 = std::sqrt(2*mass*E - h_bar*h_bar/2/sigma0/sigma0) / h_bar;
velocity = k0 / mass;
Norm_psi = 1 / std::sqrt(sigma0 * std::sqrt(pi));
for (int j = 0; j < N; j++) {
double expFactor = std::exp(-(x[j] - x0) * (x[j] - x0)
/ (2 * sigma0 * sigma0));
psi[j] = std::complex<double>(
Norm_psi * std::cos(k0 * x[j]) * expFactor,
Norm_psi * std::sin(k0 * x[j]) * expFactor);
// elements of tridiagonal matrix Q = (1/2)(1 + i tau H / (2 hbar))
a = cpl::ComplexVector(N);
b = cpl::ComplexVector(N);
c = cpl::ComplexVector(N);
for (int j = 0; j < N; j++) {
const std::complex<double> i(0.0, 1.0);
b[j] = 0.5 + i * tau / (4 * h_bar) *
(V(x[j]) + h_bar * h_bar / (mass * h * h));
a[j] = c[j] = - i * tau * h_bar / (8 * mass * h * h);
alpha = c[N-1];
beta = a[0];
double T = 5; // time to travel length L
double framesPerSec = 50; // animation rate for screen redraws
void timeStep() {
static std::clock_t clockStart;
static bool done;
if (!done) {
double t0 = t;
do {
if (periodic)
solveTridiagonalCyclic(a, b, c, alpha, beta, psi, chi);
solveTridiagonal(a, b, c, psi, chi);
for (int j = 0; j < N; j++)
psi[j] = chi[j] - psi[j];
t += tau;
} while (std::abs(velocity * (t - t0)) < L / T / framesPerSec);
done = true;
std::clock_t clockNow = std::clock();
double seconds = (clockNow - clockStart) / double(CLOCKS_PER_SEC);
if ( seconds < 1 / framesPerSec ) {
} else {
clockStart = clockNow;
done = false;
void drawText(const std::string& str, double x, double y) {
glRasterPos2d(x, y);
int len = str.find('\0');
for (int i = 0; i < len; i++)
glutBitmapCharacter(GLUT_BITMAP_HELVETICA_12, str[i]);
bool showRealImaginary; // false = probability only
void display() {
if (showRealImaginary) {
glColor3f(0, 0, 1); // real part of psi blue
for (int j = 1; j < N; j++) {
glVertex2d(x[j-1], psi[j-1].real());
glVertex2d(x[j], psi[j].real());
glColor3f(0, 1, 0); // imaginary part of psi green
for (int j = 1; j < N; j++) {
glVertex2d(x[j-1], psi[j-1].imag());
glVertex2d(x[j], psi[j].imag());
glColor3f(1, 0, 0); // probability red
double pOld = psi[0].real() * psi[0].real() +
psi[0].imag() * psi[0].imag();
for (int j = 1; j < N; j++) {
double p = psi[j].real() * psi[j].real() +
psi[j].imag() * psi[j].imag();
glVertex2d(x[j-1], 4 * pOld);
glVertex2d(x[j], 4 * p);
pOld = p;
glColor3ub(255, 165, 0); // potential orange
double Vold = V(x[1]);
for (int j = 1; j < N; j++) {
double Vnew = V(x[j]);
glVertex2d(x[j-1], 0.2 * Vold);
glVertex2d(x[j], 0.2 * Vnew);
Vold = Vnew;
glColor3f(0, 0, 0); // text black
std::ostringstream os;
os << (periodic ? "Periodic " : "Infinite Wall ")
<< "Boundary Conditions" << std::ends;
drawText(os.str(), 0.02 * L, 0.28);
os.seekp(0); // beginning of string stream
os << "0" << std::ends;
drawText(os.str(), 0, -0.02);
drawText("0", 0, -0.02);
os << "x = " << L << std::ends;
drawText(os.str(), (1 - 0.1) * L, -0.02);
os << "t = " << t << std::ends;
drawText(os.str(), 0.02 * L, -0.29);
void reshape(int w, int h) {
glViewport(0, 0, w, h);
gluOrtho2D(0, L, -0.3, 0.3);
bool running; // to control animation
void mouse(int button, int state, int x, int y) {
switch (button) {
if (state == GLUT_DOWN) {
if (running) {
running = false;
} else {
running = true;
void menu(int menuItem) {
switch (menuItem) {
case 1:
gaussian = !gaussian;
case 2:
periodic = !periodic;
case 3:
showRealImaginary = !showRealImaginary;
case 4:
if (running) {
running = false;
int main(int argc, char *argv[]) {
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB);
glutInitWindowSize(600, 400);
glutInitWindowPosition(100, 100);
glutCreateWindow("Schroedinger Wave Packet Motion");
glClearColor(1.0, 1.0, 1.0, 0.0);
glutAddMenuEntry("Potential: Square/Gaussian", 1);
glutAddMenuEntry("Boundaries: Dirichlet/Periodic", 2);
glutAddMenuEntry("Real & Imag: Show/Hide", 3);
glutAddMenuEntry("Reset", 4);
#include <complex>
#include <iostream>
namespace cpl {
class Vector {
Vector(int dim = 1);
Vector(const Vector& dv);
~Vector() { delete [] v; }
int dimension() const { return dim; }
void resize(const int);
const double operator[](const int i) const { return v[i]; }
double& operator[](const int i) { return v[i]; }
Vector& operator = (const Vector& dv);
Vector& operator += (const Vector& dv);
Vector& operator -= (const Vector& dv);
Vector& operator *= (double d);
Vector& operator /= (double d);
double abs();
double norm();
double dot(const Vector& dv);
friend std::ostream& operator<<(std::ostream& os, const Vector& dv);
int dim;
double *v;
inline Vector operator + (const Vector& dv) {
return dv;
extern Vector operator - (const Vector& dv);
extern Vector operator * (const Vector& dv, double d);
extern Vector operator * (double d, const Vector& dv);
extern Vector operator / (const Vector& dv, double d);
extern Vector operator + (const Vector& v1, const Vector& v2);
extern Vector operator - (const Vector& v1, const Vector& v2);
class ComplexVector {
ComplexVector(int dim = 1);
ComplexVector(const ComplexVector& cv);
~ComplexVector() { delete [] v; }
int dimension() const { return dim; }
const std::complex<double> operator[](const int i) const { return v[i]; }
std::complex<double>& operator[](const int i) { return v[i]; }
ComplexVector& operator = (const ComplexVector& cv);
int dim;
std::complex<double> *v;
class FFT {
FFT() { N = 0; f = 0; inverse = false; }
void transform(ComplexVector& data);
void inverseTransform(ComplexVector& data);
Vector power(ComplexVector& data);
int N;
ComplexVector *f;
bool inverse;
void bitReverse();
void DanielsonLanczos(int n);
extern void solveTridiagonal(
ComplexVector& a, ComplexVector& b, ComplexVector& c,
ComplexVector& r, ComplexVector& u);
extern void solveTridiagonalCyclic(
ComplexVector& a, ComplexVector& b, ComplexVector& c,
std::complex<double> alpha, std::complex<double> beta,
ComplexVector& r, ComplexVector& x);
} /* end namespace cpl */
#endif /* CPL_VECTOR_HPP */
EDIT I didn't want to delete this post incase someone needed it but I forgot to use Vector.cpp which is below.
#include "Vector.hpp"
namespace cpl {
Vector::Vector(int dim) {
v = new double [this->dim = dim];
for (int i = 0; i < dim; i++) v[i] = 0;
Vector::Vector(const Vector& dv) {
v = new double [dim = dv.dim];
for (int i = 0; i < dim; i++) v[i] = dv.v[i];
void Vector::resize(const int dimension) {
delete [] v;
v = new double [dim = dimension];
for (int i = 0; i < dim; i++) v[i] = 0;
Vector& Vector::operator = (const Vector& dv) {
if (this != &dv) {
if (dim != dv.dim) {
delete [] v;
v = new double [dim = dv.dim];
for (int i = 0; i < dim; i++) v[i] = dv[i];
return *this;
Vector& Vector::operator += (const Vector& dv) {
for (int i = 0; i < dim; i++) v[i] += dv[i];
return *this;
Vector& Vector::operator -= (const Vector& dv) {
for (int i = 0; i < dim; i++) v[i] -= dv[i];
return *this;
Vector& Vector::operator *= (double d) {
for (int i = 0; i < dim; i++) v[i] *= d;
return *this;
Vector& Vector::operator /= (double d) {
for (int i = 0; i < dim; i++) v[i] /= d;
return *this;
Vector operator - (const Vector& dv) {
int dim = dv.dimension();
Vector temp(dim);
for (int i = 0; i < dim; i++) temp[i] = -dv[i];
return temp;
Vector operator * (const Vector& dv, double d) {
int dim = dv.dimension();
Vector temp(dim);
for (int i = 0; i < dim; i++) temp[i] = dv[i] * d;
return temp;
Vector operator * (double d, const Vector& dv) {
int dim = dv.dimension();
Vector temp(dim);
for (int i = 0; i < dim; i++) temp[i] = dv[i] * d;
return temp;
Vector operator / (const Vector& dv, double d) {
int dim = dv.dimension();
Vector temp(dim);
for (int i = 0; i < dim; i++) temp[i] = dv[i] / d;
return temp;
Vector operator + (const Vector& v1, const Vector& v2) {
int dim = v1.dimension();
Vector temp(dim);
for (int i = 0; i < dim; i++) temp[i] = v1[i] + v2[i];
return temp;
Vector operator - (const Vector& v1, const Vector& v2) {
int dim = v1.dimension();
Vector temp(dim);
for (int i = 0; i < dim; i++) temp[i] = v1[i] - v2[i];
return temp;
double Vector::abs() {
return std::sqrt(norm());
double Vector::norm() {
double sum = 0;
for (int i = 0; i < dim; i++) sum += v[i] * v[i];
return sum;
double Vector::dot(const Vector& dv) {
double sum = 0;
for (int i = 0; i < dim; i++) sum += v[i] * dv[i];
return sum;
std::ostream& operator<<(std::ostream& os, const Vector& dv) {
for (int i = 0; i < dv.dim; i++) {
os << dv.v[i];
if (i < dv.dim-1)
os << '\t';
os << '\n';
return os;
// ComplexVector implementation
ComplexVector::ComplexVector(int dim) {
v = new std::complex<double> [this->dim = dim];
for (int i = 0; i < dim; i++) v[i] = 0.0;
ComplexVector::ComplexVector(const ComplexVector& cv) {
v = new std::complex<double> [dim = cv.dim];
for (int i = 0; i < dim; i++) v[i] = cv.v[i];
ComplexVector& ComplexVector::operator = (const ComplexVector& cv) {
if (this != &cv) {
if (dim != cv.dim) {
delete [] v;
v = new std::complex<double> [dim = cv.dim];
for (int i = 0; i < dim; i++) v[i] = cv[i];
return *this;
// FFT implementation
void FFT::transform(ComplexVector& data) {
N = data.dimension();
f = &data;
for (int n = 1; n < N; n *= 2)
for (int i = 0; i < N; ++i)
(*f)[i] /= std::sqrt(double(N));
void FFT::inverseTransform(ComplexVector& data) {
inverse = true;
inverse = false;
void FFT::bitReverse() {
int j = 1;
for (int i = 1; i < N; ++i) {
if (i < j) {
std::complex<double> temp = (*f)[i-1];
(*f)[i-1] = (*f)[j-1];
(*f)[j-1] = temp;
int k = N / 2;
while ( k < j ) {
j -= k;
k /= 2;
j += k;
void FFT::DanielsonLanczos(int n) {
const double pi = 4 * atan(1.0);
std::complex<double> W(0, pi / n);
W = inverse ? std::exp(-W) : std::exp(W);
std::complex<double> W_j(1, 0);
for (int j = 0; j < n; ++j) {
for (int i = j; i < N; i += 2 * n) {
std::complex<double> temp = W_j * (*f)[n+i];
(*f)[n+i] = (*f)[i] - temp;
(*f)[i] += temp;
W_j *= W;
Vector FFT::power(ComplexVector& data) {
Vector P(1 + N / 2);
P[0] = std::norm(data[0]) / double(N);
for (int i = 1; i < N / 2; i++)
P[i] = (std::norm(data[i]) + std::norm(data[N-i])) / double(N);
P[N/2] = std::norm(data[N/2]) / double(N);
return P;
// Solving tridiagonal complex matrices
void solveTridiagonal(
ComplexVector& a, ComplexVector& b, ComplexVector& c,
ComplexVector& r, ComplexVector& u)
int n = a.dimension();
ComplexVector gamma(n);
std::complex<double> beta = b[0];
u[0] = r[0] / beta;
for (int j = 1; j < n; j++) {
gamma[j] = c[j-1] / beta;
beta = b[j] - a[j] * gamma[j];
u[j] = (r[j] - a[j] * u[j-1]) / beta;
for (int j = n - 2; j >= 0; j--)
u[j] -= gamma[j+1] * u[j+1];
void solveTridiagonalCyclic(
ComplexVector& a, ComplexVector& b, ComplexVector& c,
std::complex<double> alpha, std::complex<double> beta,
ComplexVector& r, ComplexVector& x)
int n = a.dimension();
ComplexVector bb(n), u(n), z(n);
std::complex<double> gamma = -b[0];
bb[0] = b[0] - gamma;
bb[n-1] = b[n-1] - alpha * beta / gamma;
for (int i = 1; i < n-1; i++)
bb[i] = b[i];
solveTridiagonal(a, bb, c, r, x);
u[0] = gamma;
u[n-1] = alpha;
for (int i = 1; i < n-1; i++)
u[i] = 0.0;
solveTridiagonal(a, bb, c, u, z);
std::complex<double> fact = x[0] + beta * x[n-1] / gamma;
fact /= 1.0 + z[0] + beta * z[n-1] / gamma;
for (int i = 0; i < n; i++)
x[i] -= fact * z[i];
} /* end namespace cpl */
it's probably your build script that's not configured correctly. Your code compiled for me when I used the following commands:
g++ -c Vector.cpp -o Vector.o
g++ -c Wavepacket.cpp -o Wavepacket.o
g++ Vector.o Wavepacket.o -lGL -lGLU -lglut -o app