Trying to use std::aligned_storage with SSE and new - c++

I wanted to try getting a square root of some floats using SSE instrincs in C++. But I get a exception when I try to store the result. Can I use std::aligned_storage like that?
#include <iostream>
#include <type_traits>
#include <xmmintrin.h>
using namespace std;
using float_storage = aligned_storage<4 * sizeof(float), 16>;
int main()
{
int N;
cin >> N;
float_storage * values = new float_storage[ N / 4 ]; // 4 floats in pack
for(int i = 0; i < N / 4; i++)
{
void *vptr = static_cast<void*>(&values[i]);
float *fptr = static_cast<float*>(vptr);
for(int i = 0; i < 4; i++)
cin >> fptr[i];
}
for(int i = 0; i < N / 4; i++)
{
void *vptr = static_cast<void*>(&values[i]);
float *fptr = static_cast<float*>(vptr);
__m128 x = _mm_loadu_ps(fptr);
x = _mm_sqrt_ps(x);
_mm_store_ps(fptr, x); // im getting an crash here
}
for(int i = 0; i < N / 4; i++)
{
void *vptr = static_cast<void*>(&values[i]);
float *fptr = static_cast<float*>(vptr);
for(int i = 0; i < 4; i++)
cout << fptr[i] << endl;
}
delete[] values;
}

It's aligned_storage<size, align>::type. aligned_storage itself is just a metaprogramming struct.
Also, new is only rated to std::max_align_t, if I recall correctly, even if you new up a type with higher alignment requirements.

Related

C++ Memory leak error when resizing C++ dynamic array

The code below converts lets say array 3,9,3 to sorted array of integers 3,3,3,3,3 by converting 9 into sum of maximum possible parts.
The link to code/algorithm used in this code is answered at
https://stackoverflow.com/a/75331557/21145472
I am struck in this C++ code. When I ran it yesterday it was fine but today it gives memory leak error when function resizeArray() is run third time.
Please help fix this memory leak
#include<cmath>
#include <algorithm>
#include <iterator>
using namespace std;
void resizeArray(int *orig, int size, int newSize) {
int *resized = new int[newSize];
for (int i = 0; i < size; i ++)
resized[i] = orig[i];
delete [] orig;
orig = resized;
}
int main(){
int n = 3;
int *arr = new int[n];
int arrLength = n;
arr[0] = 3;
arr[1] = 9;
arr[2] = 3;
int *arrSorted = new int[0];
int sortedArrayLength = 0;
int temp;
unsigned long long int limit = 10e4;
long long parts = 0;
int extra = 0;
int mainArrayIndex = 0;
for(int i = 0; i<n/2; i++){
temp = arr[i];
arr[i] = arr[n-i-1];
arr[n-i-1] = temp;
}
for(int i = 0; i < n; i++){
parts = floor((arr[i] - 1) / (limit)) + 1;
limit = arr[i] / parts;
extra = arr[i] % parts;
for(int index = 0; index < extra; index++){
resizeArray(arrSorted, sortedArrayLength, sortedArrayLength + 1);
arrSorted[mainArrayIndex] = limit+1;
mainArrayIndex+=1;
sortedArrayLength+=1;
}
for(int index = 0; index < parts - extra; index++){
resizeArray(arrSorted, sortedArrayLength, sortedArrayLength + 1);
arrSorted[mainArrayIndex] = limit;
mainArrayIndex+=1;
sortedArrayLength+=1;
}
}
cout << "Array sorted steps taken" << " " << sortedArrayLength - arrLength;
cout << endl;
for(int i = 0; i < sortedArrayLength; i++){
if(i == 0)
cout << "[";
cout << arrSorted[i];
if(i < sortedArrayLength - 1)
cout << ", ";
if(i == sortedArrayLength - 1)
cout << "]";
}
delete []arr;
delete []arrSorted;
}
Your helper function's orig = resized; doesn't reassign your main function's arrSorted as you intend. Use a reference:
void resizeArray(int *&orig, ...) {
(That and the lack of including iostream are the only correctness issues I see, and this fix got rid of the error.)

"cout<<count<<endl;" isn't printing anything

cout<<count<<endl; sould provide an output according to the conditions , but it isn't printing anything, what is the fault/error/defects in the code that are causing such results ?
it is my first question , sorry if i am not completely understandable.
i used the following code , i can't understand whats happening here.this is a simple input output question. the output provides us info about matching two team's uniform.
#include <stdio.h>
#include <iostream>
using namespace std;
main(){
int a;
cin>>a;
int **b;
b=new int*[a];
for (int i = 0; i < a; i++)
{
b[i]=new int[2];
for (int j = 0; j <2 ; j++)
{
cin>>b[i][j];
}
}
int count=0;
for (int i = 0; i < a*(a-1); i++)
{ for (int j = 0; j < a; j++)
if (b[i][0]==b[j][1])
count=count+1;
}
cout<<count<<endl;
for (size_t i = 0; i < a; i++)
{
delete b[i];
}
delete b;
}
input:
3
1 2
2 4
3 4
output does not contain anything
You use the array out of bounds and delete when you should delete[]. Comments in code:
#include <iostream> // use the correct headers
#include <cstddef>
// not recommended: https://stackoverflow.com/questions/1452721/why-is-using-namespace-std-considered-bad-practice
using namespace std;
int main() { // main must return int
size_t a; // better type for an array size
cin >> a;
int** b;
b = new int*[a];
for(size_t i = 0; i < a; i++) {
b[i] = new int[2];
for(size_t j = 0; j < 2; j++) {
cin >> b[i][j];
}
}
int count = 0;
std::cout << a * (a - 1) << "\n"; // will print 6 for the given input
for(size_t i = 0; i < a * (a - 1); i++) {
// i will be in the range [0, 5]
for(size_t j = 0; j < a; j++)
if(b[i][0] == b[j][1]) count = count + 1;
// ^ undefined behaviour
}
cout << count << endl;
for(size_t i = 0; i < a; i++) {
delete[] b[i]; // use delete[] when you've used new[]
}
delete[] b; // use delete[] when you've used new[]
}

pass empty matrix to function

I'm trying to pass a matrix without dimensions to a function and fill with data. Here is an example:
#include <iostream>
#include <stdlib.h>
using namespace std;
void fun(double **matrix);
int main(void)
{
double **matrix;
fun(matrix);
for(int i = 0; i < 10; i ++)
{
cout << "matrix = " << matrix[i][0] << "\t" << matrix[i][1] << endl;
}
}
void fun(double **matrix)
{
int rowCount = 10;
int colCount = 2;
matrix = new double*[rowCount];
for(int i = 0; i < rowCount; ++i)
matrix[i] = new double[colCount];
for(int i = 0; i < rowCount; i ++)
{
matrix[i][0] = 3.;
matrix[i][1] = 4.;
}
}
It compiles but when I execute it, it return the following error:
Illegal instruction: 4
Do you know why?
First of all if you are got a pointer it should point at something, so in order to not have a warnings just initialize it with nullptr. Second thing is that you should return a pointer to pointer.I fixed your code so you can look what you have made wrong,
#include <iostream>
#include <stdlib.h>
using namespace std;
double **fun(double **matrix);
int main(void)
{
double **matrix= nullptr;
matrix = fun(matrix);
for (int i = 0; i < 10; i++)
{
cout << "matrix = " << matrix[i][0] << "\t" << matrix[i][1] << endl;
}
getchar();
getchar();
}
double **fun(double **matrix)
{
int rowCount = 10;
int colCount = 2;
matrix = new double*[rowCount];
for (int i = 0; i < rowCount; ++i)
matrix[i] = new double[colCount];
for (int i = 0; i < rowCount; i++)
{
matrix[i][0] = 3.;
matrix[i][1] = 4.;
}
return matrix;
}
Your program fails because fun allocates memory, but does not return the newly allocated matrix array to main. It would be easier to do this by return value than by argument:
double** fun() {
// ...
double** matrix = new double*[...];
// ...
return matrix;
}
int main() {
double** matrix = fun();
// ...
}
There are a million other ways to do that (by passing a double*** into fun, or a reference, or by using some sort of object etc). Pick whatever you like best.

C++ memory leak, how to detect

I am using SSE to implement matrix multiplication, but I found there exists memory leak(see the picture below), the memory usage is increasing from 400M to 1G or more.
But, I free the memory in the code.
The following are codes
main.cpp
#include "sse_matrix.h"
#include <ctime>
int main(int argc, char* argv[])
{
vector<float> left(size, 0);
vector<float> right(size, 0);
vector<float> result(size, 0);
// initialize value
for (int i = 0; i < dim; i ++)
{
for (int j = 0; j < dim; j ++)
{
left[i*dim + j] = j;
right[i*dim + j] = j;
}
}
cout << "1. INFO: value initialized, starting matrix multiplication" << endl;
// calculate the result
clock_t my_time = clock();
SSE_Matrix_Multiply(&left, &right, &result);
cout << "2. INFO: SSE matrix multiplication result has got" << endl;
/*for (int i = 0; i < dim; i ++)
{
for (int j = 0; j < dim; j ++)
{
cout << result[i * dim + j] << " ";
}
cout << endl;
}*/
cout << "3. INFO: " << float(clock() - my_time)/1000.0 << endl;
system("pause");
return 0;
}
sse_matrix.h
#ifndef __SSE_MATRIX_H
#define __SSE_MATRIX_H
#include <vector>
#include <iostream>
using std::cin;
using std::cout;
using std::endl;
using std::vector;
//#define dim 8
//#define size (dim * dim)
const int dim = 4096;
const int size = dim * dim;
struct Matrix_Info
{
vector<float> * A;
int ax, ay;
vector<float> * B;
int bx, by;
vector<float> * C;
int cx, cy;
int m;
int n;
};
void Transpose_Matrix_SSE(float * matrix)
{
__m128 row1 = _mm_loadu_ps(&matrix[0*4]);
__m128 row2 = _mm_loadu_ps(&matrix[1*4]);
__m128 row3 = _mm_loadu_ps(&matrix[2*4]);
__m128 row4 = _mm_loadu_ps(&matrix[3*4]);
_MM_TRANSPOSE4_PS(row1, row2, row3, row4);
_mm_storeu_ps(&matrix[0*4], row1);
_mm_storeu_ps(&matrix[1*4], row2);
_mm_storeu_ps(&matrix[2*4], row3);
_mm_storeu_ps(&matrix[3*4], row4);
}
float * Shuffle_Matrix_Multiply(float * left, float * right)
{
__m128 _t1, _t2, _sum;
_sum = _mm_setzero_ps(); // set all value of _sum to zero
float * _result = new float[size];
float _res[4] = {0};
for (int i = 0; i < 4; i ++)
{
for (int j = 0; j < 4; j ++)
{
_t1 = _mm_loadu_ps(left + i * 4);
_t2 = _mm_loadu_ps(right + j * 4);
_sum = _mm_mul_ps(_t1, _t2);
_mm_storeu_ps(_res, _sum);
_result[i * 4 + j] = _res[0] + _res[1] + _res[2] + _res[3];
}
}
return _result;
}
float * SSE_4_Matrix(struct Matrix_Info * my_info)
{
int m = my_info->m;
int n = my_info->n;
int ax = my_info->ax;
int ay = my_info->ay;
int bx = my_info->bx;
int by = my_info->by;
//1. split Matrix A and Matrix B
float * _a = new float[16];
float * _b = new float[16];
for (int i = 0; i < m; i ++)
{
for (int j = 0; j < m; j ++)
{
_a[i*m + j] = (*my_info->A)[(i + ax) * n + j + ay];
_b[i*m + j] = (*my_info->B)[(i + bx) * n + j + by];
}
}
//2. transpose Matrix B
Transpose_Matrix_SSE(_b);
//3. calculate result and return a float pointer
return Shuffle_Matrix_Multiply(_a, _b);
}
int Matrix_Multiply(struct Matrix_Info * my_info)
{
int m = my_info->m;
int n = my_info->n;
int cx = my_info->cx;
int cy = my_info->cy;
for (int i = 0; i < m; i ++)
{
for (int j = 0; j < m; j ++)
{
float * temp = SSE_4_Matrix(my_info);
(*my_info->C)[(i + cx) * n + j + cy] += temp[i*m + j];
delete [] temp;
}
}
return 0;
}
void SSE_Matrix_Multiply(vector<float> * left, vector<float> * right, vector<float> * result)
{
struct Matrix_Info my_info;
my_info.A = left;
my_info.B = right;
my_info.C = result;
my_info.n = dim;
my_info.m = 4;
// Matrix A row:i, column:j
for (int i = 0; i < dim; i += 4)
{
for (int j = 0; j < dim; j += 4)
{
// Matrix B row:j column:k
for (int k = 0; k < dim; k += 4)
{
my_info.ax = i;
my_info.ay = j;
my_info.bx = j;
my_info.by = k;
my_info.cx = i;
my_info.cy = k;
Matrix_Multiply(&my_info);
}
}
}
}
#endif
And I guess maybe the memory leak is in Shuffle_Matrix_Multiply function in sse_matrix.h file. But, I am not sure, and now, the memory usage is increasing and my system will crash.
Hope someone can help to figure out and thanks in advance.
You never free the _a and _b allocated in SSE_4_Matrix.
You also allocate a lot dynamically just to throw it away a bit later. For example the _a and _b could be arrays of 16 floats in stack.
I would like to use a header file to help me to check memory leak. The header file as follows:
MemoryLeakChecker.hpp
#ifndef __MemoryLeakChecker_H__
#define __MemoryLeakChecker_H__
#include <crtdbg.h>
#include <cassert>
//for memory leak check
#ifdef _DEBUG
#define DEBUG_CLIENTBLOCK new(_CLIENT_BLOCK,__FILE__,__LINE__)
#else
#define DEBUG_CLIENTBLOCK
#endif
#define _CRTDBG_MAP_ALLOC
#ifdef _DEBUG
#define new DEBUG_CLIENTBLOCK
#endif
inline void checkMemoryLeak() {
_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
int m_count = _CrtDumpMemoryLeaks();
assert(m_count == 0);
}
#endif
In my project, i will use MemoryLeakChecker.hpp in the file including main function as follows:
MemoryLeakTest.cpp
#include "MemoryLeakChecker.hpp"
int main() {
//_crtBreakAlloc = 148; //if you only know the memory leak block number is 148 after checking memory leak log, use this to locate the code causing memory leak.
//do some things
atexit(checkMemoryLeak); //check all leak after main() function called
return 0;
}
Run your program in debug mode in Visual Studio, you can get the memory leak log in output window after your program exited. Also, you can find the place where memory leaked in the memory leak log.

Multiplication with AVX

Please this is my first time of using AVX and I'm trying to perform a simple multiplication on double precision numbers but I'm not getting all results correct.
I get just the first 4 results and the others are jargon.
#include <immintrin.h>
#include <iostream>
#include <math.h>
#include <time.h>
using namespace std;
int main() {
double *a, *b; // data pointers
double *pA,*pB; // work pointer
__m256d rA_AVX, rB_AVX; // variables for AVX
const int vector_size = 8;
a = (double*) _mm_malloc (vector_size*sizeof(double),64);
b = (double*) _mm_malloc (vector_size*sizeof(double),64);
for(int i = 0; i < vector_size; i++) {
a[i] = (rand() % 48);
b[i] = 0.0f;
cout << a[i] << endl;
}
for (int i = 0; i < vector_size; i += 8)
{
pA = a;
pB = b;
rA_AVX = _mm256_load_pd(pA);
rB_AVX = _mm256_mul_pd(rA_AVX,rA_AVX);
_mm256_store_pd(pB,rB_AVX);
pA += 8;
pB += 8;
}
for (int i=0; i<vector_size; i++){
cout << endl << b[i] << endl;
}
_mm_free(a);
_mm_free(b);
system("PAUSE");
return 0;
}