I want to implement ,as possible , spdiags function in C.
(I prefer C to C++ and I don't want to use C++ algorithms for now)
Having as input matrix:
inMx =
1 0 0
4 5 6
0 7 9
you should obtain (running that using spdiags in Matlab) :
ouMx =
4 1 0
7 5 0
0 9 6
(one thing I can't understand though , is even though at the documentation it says that zeros are inserted at the top if you are below the main diagonal , here you we can see that happens the opposite , but in the example in the link it's ok )
With my code below , I am taking as output:
ouMx =
4 7 0
7 5 9
0 9 6
so I think I am close!
I zeroed out the output matrix instead of having to insert zeros at the bottom or top of columns.
But I can't completeley handled the above/below the main diagonal.
I used if ( j > i ) swap rows , but it doesn't work , so I just use swap rows.
( I am assuming square matrix for this example ,but it should work for any matrix )
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
void Diag( int Rows , int Cols , float * inMx , float * ouMx );
void swapRows( int Rows , int Cols , float * Mx );
int main( int argc, const char* argv[] ){
int Rows = 3 , Cols = 3;
float *inMx = (float *) malloc ( Rows * Cols * sizeof (float) );
float *ouMx = (float *) malloc ( Rows * Cols * sizeof (float) );
// assume row major order
inMx[0] = 1.0;
inMx[1] = 0.0;
inMx[2] = 0.0;
inMx[3] = 4.0;
inMx[4] = 5.0;
inMx[5] = 6.0;
inMx[6] = 0.0;
inMx[7] = 7.0;
inMx[8] = 9.0;
// print input matrix ( row major )
printf("\n Input matrix \n\n");
for ( int i = 0; i < Rows; i++ )
for ( int j = 0; j < Cols; j++ ) {
printf("%f\t",inMx[ i * Cols + j ]);
if( j == Cols-1 )
printf("\n");
}
printf("\n");
// extract diagonals
Diag( Rows , Cols , inMx , ouMx );
// print Diagonal matrix
printf("\n Diagonal matrix \n\n");
for ( int i = 0; i < Rows; i++ )
for (int j = 0; j < Cols; j++ ) {
printf("%f\t",ouMx[ i * Cols + j ]);
if( j == Cols-1 )
printf("\n");
}
printf("\n");
free( inMx );
free( ouMx );
return 0;
}
void Diag( int Rows , int Cols , float * inMx , float * ouMx )
{
//zero out ouMx
memset( ouMx , 0 , Rows * Cols * sizeof(float) );
// scan from the last line to the first -1 for each column
for ( int j = 0; j < Cols; j++ )
{
for ( int i = ( Rows - 1 ); i > 0 ; i-- )
{
// neglect the zero elements
if ( inMx[ i * Cols + j ] != 0 )
{
ouMx[ i * Cols + j ] = inMx[ i * Cols + j ];
//if the element in the next colulmn is !=0
if ( inMx[ ( i + 1 ) * Cols + ( j + 1 ) ] != 0 )
{
ouMx[ ( i + 1 ) * Cols + j ] = inMx[ ( i + 1 ) * Cols + ( j + 1 ) ];
}
}
//if we are above the main diagonal
//swap elements of a row (in each column) in order to have the zeros at bottom/top
// if ( i > j ) doesn't work
swapRows( Rows , Cols , ouMx );
}
}
}
void swapRows( int Rows , int Cols , float * Mx )
{
float temp;
for ( int j = 0; j < Cols; j++ )
{
for ( int i = ( Rows - 1 ); i > 0 ; i-- )
{
temp = Mx[ ( i - 1 ) * Cols + j ];
Mx[ ( i - 1 ) * Cols + j ] = Mx[ i * Cols + j ];
Mx[ i * Cols + j ] = temp;
}
}
}
Related
I need to find how many areas and with how many elements are there in a matrix. The matrix is filled with 0 and 1. We have an area if there are adjacent 1 (diagonal, vertical or horizontal).
int count_regions( int *arr, int rows, int cols ) {
int region_count = 0;
for ( int first_index = 0; first_index != rows * cols; ++ first_index ) {
if ( arr[ first_index ] == 0 ) continue;
++ region_count;
int first_row = first_index / cols, first_col = first_index % cols;
int last_col;
for ( last_col = first_col;
last_col != cols && arr[ first_row * cols + last_col ] != 0;
++ last_col ) ;
for ( int last_row = first_row;
last_row != rows && arr[ last_row * cols + first_col ] != 0;
++ last_row ) {
for ( int col = first_col; col != last_col; ++ col ) {
arr[ last_row * cols + col ] = 0;
}
}
}
return region_count;
}
Try having a look at connected components labelling algorithms.
I am allocating a 2d matrix using malloc and trying to insert values in relative address. I do not understand why it is core dumped error. Please look at my code below.
#include <stdio.h>
#include <stdlib.h>
int main()
{
int width = 4;
FILE *fp = fopen("matB.txt", "r");
int *x;
x = (int*)malloc(width*width*sizeof(int));
int i, j;
for(i=0; i<width; i++)
{
for(j=0; j<width; j++)
{
fscanf(fp, "%d", x[i*width+j]);
}
}
for(i=0; i<width; i++)
{
for(j=0; j<width; j++)
{
printf("%d", x[i*width+j]);
}
}
return 0;
}
matB.txt
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
I have made the above sample program to check with the relative address and placing &x[] in fscanf cleared this problem.
The above sample C code is done because of the same read problem in Cuda. When using the same way of allocation of 2d array and its relative address, it is reading the file and when trying to print the same.. it prints 0's instead of 1,2,3,4.. I am in learning phase of CUDA. I see there is no allocation problem for the host array and placing in its relative address but why the file read is printing 0's??
Cuda Program is below
//Matrix multiplication using shared and non shared kernal
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define TILE_WIDTH 2
/*matrix multiplication kernels*/
//non shared
__global__ void MatrixMul( float *Md , float *Nd , float *Pd , const int WIDTH )
{
// calculate thread id
unsigned int col = TILE_WIDTH*blockIdx.x + threadIdx.x ;
unsigned int row = TILE_WIDTH*blockIdx.y + threadIdx.y ;
for (int k = 0 ; k<WIDTH ; k++ )
{
Pd[row*WIDTH + col]+= Md[row * WIDTH + k ] * Nd[ k * WIDTH + col] ;
}
}
// shared
__global__ void MatrixMulSh( float *Md , float *Nd , float *Pd , const int WIDTH )
{
//Taking shared array to break the MAtrix in Tile widht and fatch them in that array per ele
__shared__ float Mds [TILE_WIDTH][TILE_WIDTH] ;
__shared__ float Nds [TILE_WIDTH][TILE_WIDTH] ;
// calculate thread id
unsigned int col = TILE_WIDTH*blockIdx.x + threadIdx.x ;
unsigned int row = TILE_WIDTH*blockIdx.y + threadIdx.y ;
for (int m = 0 ; m<WIDTH/TILE_WIDTH ; m++ ) // m indicate number of phase
{
Mds[threadIdx.y][threadIdx.x] = Md[row*WIDTH + (m*TILE_WIDTH + threadIdx.x)] ;
Nds[threadIdx.y][threadIdx.x] = Nd[ ( m*TILE_WIDTH + threadIdx.y) * WIDTH + col] ;
__syncthreads() ; // for syncronizeing the threads
// Do for tile
for ( int k = 0; k<TILE_WIDTH ; k++ )
Pd[row*WIDTH + col]+= Mds[threadIdx.x][k] * Nds[k][threadIdx.y] ;
__syncthreads() ; // for syncronizeing the threads
}
}
// main routine
int main (int argc, char* argv[])
{
const int WIDTH = 4 ;
printf("%d\n", WIDTH);
//float array1_h[WIDTH][WIDTH] ,array2_h[WIDTH][WIDTH], M_result_array_h[WIDTH][WIDTH] ;
float *array1_h, *array2_h, *M_result_array_h;
float *array1_d , *array2_d ,*result_array_d ,*M_result_array_d; // device array
int i , j ;
cudaEvent_t start_full,stop_full;
float time;
cudaEventCreate(&start_full);
cudaEventCreate(&stop_full);
cudaEventRecord(start_full, 0);
//char *file1 = argv[2];
//char *file2 = argv[3];
//char *file3 = argv[4];
FILE *fp1 = fopen("matA.txt", "r");
FILE *fp2 = fopen("matB.txt", "r");
FILE *fp3 = fopen("matC.txt", "w");
//create device array cudaMalloc ( (void **)&array_name, sizeofmatrixinbytes) ;
cudaMallocHost((void **) &array1_h , WIDTH*WIDTH*sizeof (float) ) ;
cudaMallocHost((void **) &array2_h , WIDTH*WIDTH*sizeof (float) ) ;
cudaMallocHost((void **) &M_result_array_h , WIDTH*WIDTH*sizeof (float) ) ;
//input in host array
for ( i = 0 ; i<WIDTH ; i++ )
{
for (j = 0 ; j<WIDTH ; j++ )
{
fscanf(fp1, "%d", &array1_h[i*WIDTH + j]);
printf("%d\t", array1_h[i*WIDTH + j]);
}
// fscanf(fp1, "\n");
}
/*
for ( i = 0 ; i<WIDTH ; i++ )
{
for (j = 0 ; j<WIDTH ; j++ )
{
printf("%d\t", array1_h[i*WIDTH+j]);
}
printf("\n");
}*/
for ( i = 0 ; i<WIDTH ; i++ )
{
for (j = 0 ; j<WIDTH ; j++ )
{
fscanf(fp2, "%d", &array2_h[i*WIDTH+j]);
}
fscanf(fp2, "\n");
}
//create device array cudaMalloc ( (void **)&array_name, sizeofmatrixinbytes) ;
cudaMalloc((void **) &array1_d , WIDTH*WIDTH*sizeof (float) ) ;
cudaMalloc((void **) &array2_d , WIDTH*WIDTH*sizeof (float) ) ;
//copy host array to device array; cudaMemcpy ( dest , source , WIDTH , direction )
cudaMemcpy ( array1_d , array1_h , WIDTH*WIDTH*sizeof (float) , cudaMemcpyHostToDevice ) ;
cudaMemcpy ( array2_d , array2_h , WIDTH*WIDTH*sizeof (float) , cudaMemcpyHostToDevice ) ;
//allocating memory for resultant device array
cudaMalloc((void **) &result_array_d , WIDTH*WIDTH*sizeof (float) ) ;
cudaMalloc((void **) &M_result_array_d , WIDTH*WIDTH*sizeof (float) ) ;
//calling kernal
dim3 dimGrid ( WIDTH/TILE_WIDTH , WIDTH/TILE_WIDTH ,1 ) ;
dim3 dimBlock( TILE_WIDTH, TILE_WIDTH, 1 ) ;
// Change if 0 to if 1 for running non shared code and make if 0 for shared memory code
#if 0
MatrixMul <<<dimGrid,dimBlock>>> ( array1_d , array2_d ,M_result_array_d , WIDTH) ;
#endif
#if 1
MatrixMulSh<<<dimGrid,dimBlock>>> ( array1_d , array2_d ,M_result_array_d , WIDTH) ;
#endif
// all GPU function blocked till kernel is working
//copy back result_array_d to result_array_h
cudaMemcpy(M_result_array_h , M_result_array_d , WIDTH*WIDTH*sizeof(int), cudaMemcpyDeviceToHost) ;
//printf the result array
for ( i = 0 ; i<WIDTH ; i++ )
{
for ( j = 0 ; j < WIDTH ; j++ )
{
fprintf (fp3, "%d\t", M_result_array_h[i*WIDTH+j]) ;
}
fprintf (fp3, "\n") ;
}
//system("pause") ;
cudaFree(array1_d);
cudaFree(array2_d);
cudaFree(M_result_array_d);
cudaFreeHost(array1_h);
cudaFreeHost(array2_h);
cudaFreeHost(M_result_array_h);
cudaEventRecord(stop_full, 0);
cudaEventSynchronize(stop_full);
cudaEventElapsedTime(&time, start_full, stop_full);
printf ("Total execution Time is : %1.5f ms\n", time);
}
Should be fscanf(fp, "%d", &x[i*width+j]);. The scanf family requires the address of a location in which to write the scanned value.
Also, don't cast malloc.
I tried to make a gaussian blur operation using c + + (OpenCV).
This is the code
int mask [3][3] = {1 ,2 ,1 ,
2 ,3 ,2 ,
1 ,2 ,1 };
int getPixel ( unsigned char * arr , int col , int row ) {
int sum = 0;
for ( int j = -1; j <=1; j ++) {
for ( int i = -1; i <=1; i ++) {
int color = arr [( row + j ) * width + ( col + i ) ];
sum += color * mask [ i +1][ j +1];
}
}
return sum /15;
}
void h_blur ( unsigned char * arr , unsigned char * result) {
int offset = 2 *width ;
for ( int row =2; row < height -3; row ++) {
for ( int col =2; col < width -3; col ++) {
result [ offset + col ] = getPixel ( arr , col , row ) ;
}
offset += width ;
}
}
int main(int argc, char** argv)
{
starttime = getTickCount();
image_input = cvLoadImage("test.jpg", CV_LOAD_IMAGE_UNCHANGED);
width = image_input->width;
height = image_input->height;
widthStep = image_input->widthStep;
channels = image_input->nChannels;
IplImage* image_output = cvCreateImage(cvGetSize(image_input),IPL_DEPTH_8U,channels);
unsigned char *h_out = (unsigned char*)image_output->imageData;
unsigned char *h_in = (unsigned char*)image_input->imageData;
//sobel_parallel(h_in, h_out, width, height, widthStep, channels);
h_blur ( h_in , h_out) ;
endtime = getTickCount();
printf("Waktu Eksekusi = %f\n", (endtime-starttime)/getTickFrequency());
cvShowImage("CPU", image_output);
cvSaveImage("output.jpg",image_output);
cvReleaseImage(&image_output);
waitKey(0);
}
but when i run the program, the image are divided into three. I still have not found what is wrong with my code. T_T
here the result
please help me solve this problem.
#include <opencv2/opencv.hpp>
int mask [3][3] = {1 ,2 ,1 ,
2 ,3 ,2 ,
1 ,2 ,1 };
int width;
int height;
int widthStep;
int channels;
int getPixel ( unsigned char * arr , int col , int row , int k ) {
int sum = 0;
int denom = 0;
for ( int j = -1; j <=1; j ++) {
for ( int i = -1; i <=1; i ++) {
if ((row + j) >= 0 && (row + j) < height && (col + i) >= 0 && (col + i) < width) {
int color = arr [( row + j ) * 3 * width + ( col + i ) * 3 + k];
sum += color * mask [ i +1][ j +1];
denom += mask [ i +1][ j +1];
}
}
}
return sum / denom;
}
void h_blur ( unsigned char * arr , unsigned char * result) {
for ( int row =0; row < height; row ++) {
for ( int col =0; col < width; col ++) {
for (int k = 0; k < 3; k++) {
result [ 3 * row * width + 3 * col + k] = getPixel ( arr , col , row , k ) ;
}
}
}
}
int main(int argc, char** argv)
{
//starttime = getTickCount();
IplImage *image_input = cvLoadImage("test.jpg", CV_LOAD_IMAGE_UNCHANGED);
width = image_input->width;
height = image_input->height;
widthStep = image_input->widthStep;
channels = image_input->nChannels;
IplImage* image_output = cvCreateImage(cvGetSize(image_input),IPL_DEPTH_8U,channels);
unsigned char *h_out = (unsigned char*)image_output->imageData;
unsigned char *h_in = (unsigned char*)image_input->imageData;
//sobel_parallel(h_in, h_out, width, height, widthStep, channels);
h_blur ( h_in , h_out) ;
//endtime = getTickCount();
//printf("Waktu Eksekusi = %f\n", (endtime-starttime)/getTickFrequency());
cvShowImage("input", image_input);
cvShowImage("CPU", image_output);
cvSaveImage("output.jpg",image_output);
cvReleaseImage(&image_output);
cv::waitKey(0);
}
I had a few minor issues when compiling your code, so there are a few extra changes (it seems like the top section of your code may have been cut off, so a few variable declarations are missing).
In any case, the big changes are to getPixel and h_blur.
The main problem in your code was that you did not handle the fact that the data contains three bytes (blue, green, red) for each pixel, not one byte. Because of this, your code only actually looked at the first third of the image, and it swapped around the colors a bit.
I'm doing LU decom and I found this code on googel ,but want to understan it by output 'pvt' and' a 'but it semes my pvt is not correct so I got something diffrent so pease could any one correct me ..
Thanks
here is my code
int* LUfactor ( double **a, int n, int ps )
/*PURPOSE: compute an LU decomposition for the coefficient matrix a
CALLING SEQUENCE:
pvt = LUfactor ( a, n, ps );
INPUTS:
a coefficient matrix
type: **doble
n number of equations in system
type: int
ps flag indicating which pivoting strategy to use
ps == 0: no pivoting
ps == 1; partial pivoting
ps == 2; scaled partial pivoting
type: int
OUTPUT:
pvt vector which indicates the permutation of the rows
performed during the decomposition process
type: *int
a matrix containing LU decomposition of the input coefficient
matrix - the L matrix in the decomposition consists of 1's
along the main diagonal together with the strictly lower
triangular portion of the output matrix a; the U matrix
in the decomposition is theupper triangular portion of the
output matrix a
type: **double
*/
{
int pass, row, col, *pvt, j, temp;
double *s,rmax,ftmp, mult, sum;
/*initialize row pointer array*/
pvt = new int [n];
for ( row = 0; row < n; row++ )
pvt[row] = row;
/* if scaled partial pivoting option was selected,
initialize scale vector*/
if ( ps == 2 ) {
s = new double [n];
for ( row = 0; row < n; row++ ) {
s[row] = fabs( a[row][0] );
for ( col = 1; col < n; col++ )
if ( fabs( a[row][col] ) > s[row] )
s[row] = fabs( a[row][col] );
}
}
/*elimination phase*/
for ( pass = 0; pass < n; pass++ ) {
/* perform requested pivoting strategy
even if no pivoting option is requested, still must check for
zero pivot*/
if ( ps != 0 ) {
rmax = ( ps == 1 ? fabs( a[pvt[pass]][pass] ) :
fabs( a[pvt[pass]][pass] ) / s[pvt[pass]] );
j = pass;
for ( row = pass+1; row < n; row++ ) {
ftmp = ( ps == 1 ? fabs( a[pvt[row]][pass] ) :
fabs( a[pvt[row]][pass] ) / s[pvt[row]] );
if ( ftmp > rmax ) {
rmax = ftmp;
j = row;
}
}
if ( j != pass ) {
temp = pvt[j];
pvt[j] = pvt[pass];
pvt[pass] = temp;
}
}
else {
if ( a[pvt[pass]][pass] == 0.0 ) {
for ( row = pass+1; row < n; row++ )
if ( a[pvt[row]][pass] != 0.0 ) break;
temp = pvt[row];
pvt[row] = pvt[pass];
pvt[pass] = temp;
}
}
for ( row = pass + 1; row < n; row++ ) {
mult = - a[pvt[row]][pass] / a[pvt[pass]][pass];
a[pvt[row]][pass] = -mult;
for ( col = pass+1; col < n; col++ )
a[pvt[row]][col] += mult * a[pvt[pass]][col];
}
}
if ( ps == 2 ) delete [] s;
return ( pvt );
}
Here is my main
double **af;
int *pvt;
int i, j, n;
/*
allocate space for coefficient matrix
*/
n = 4;
af = new double* [n];
pvt = new int [n];
for ( i = 0; i < n; i++ )
af[i] = new double [n];
af[0][0] = 2.00; af[0][1] = 1.00; af[0][2] = 1.00; af[0][3] = -2.00;
af[1][0] = 4.00; af[1][1] = 0.00; af[1][2] = 2.00; af[1][3] = 1.00;
af[2][0] = 3.00; af[2][1] = 2.00; af[2][2] = 2.00; af[2][3] = 0.00;
af[3][0] = 1.00; af[3][1] = 3.00; af[3][2] = 2.00; af[3][3] = 0.00;
pvt =LUfactor ( af, n, 0 );
cout << "pvt" << endl;
for ( i = 0; i < n; i++ )
cout << pvt[i] << endl;
cout << endl << endl << endl;
cout << "a" << endl;
for ( i = 0; i < n; i++ )
cout << af[i][i] << endl;
cout << endl << endl << endl;
///////
out put
pvt
0
3
1
2
LU matrix is
2 1 1 -2 0
2 -0.8 1.2 5.8 0
1.5 0.2 0.166667 1.83333 0
0.5 2.5 1.5 1 0
Segmentation fault
////////////////////////////////////////
The out put I'm looking for is
Matrix A
0 2 0 1
2 2 3 2
4 -3 0 1
6 1 -6 -5
determinant: -234
pivot vector: 3 2 1 0
Lower triangular matrix
6 0 0 0
4 -3.667 0 0
2 1.667 6.818 0
0 2 2.182 1.56
Upper triangular matrix
1 0.1667 -1 -0.8333
0 1 -1.091 -1.182
0 0 1 0.8267
0 0 0 1
Product of L U
6 1 -6 -5
4 -3 0 1
2 2 3 2
0 2 0 1
Right-hand-side number 1
0.0000 -2.0000 -7.0000 6.0000
Solution vector
-0.5000 1.0000 0.3333 -2.0000
You didn't read the fine documentation. It clearly says
CALLING SEQUENCE:
pvt = LUfactor ( a, n, ps );
You used the function incorrectly. You allocated and populated pvt, and then you ignored the return value from LUfactor. You do not allocate pvt; the function LUfactor does. You need to call LUfactor per the documentation.
I got a bit stuck with my algorithm and I need some help to solve my problem. I think an example would explain better my problem.
Assuming:
d = 4 (maximum number of allowed bits in a number, 2^4-1=15).
m_max = 1 (maximum number of allowed bits mismatches).
kappa = (maximum number of elements to find for a given d and m, where m in m_max)
The main idea is for a given number, x, to compute its complement number (in binary base) and all the possible combinations for up to m_max mismatches from x complement's number.
Now the program start to scan from i = 0 till 15.
for i = 0 and m = 0, kappa = \binom{d}{0} = 1 (this called a perfect match)
possible combinations in bits, is only 1111 (for 0: 0000).
for i = 0 and m = 1, kappa = \binom{d}{1} = 4 (one mismatch)
possible combinations in bits are: 1000, 0100, 0010 and 0001
My problem was to generalize it to general d and m. I wrote the following code:
#include <stdlib.h>
#include <iomanip>
#include <boost/math/special_functions/binomial.hpp>
#include <iostream>
#include <stdint.h>
#include <vector>
namespace vec {
typedef std::vector<unsigned int> uint_1d_vec_t;
}
int main( int argc, char* argv[] ) {
int counter, d, m;
unsigned num_combination, bits_mask, bit_mask, max_num_mismatch;
uint_1d_vec_t kappa;
d = 4;
m = 2;
bits_mask = 2^num_bits - 1;
for ( unsigned i = 0 ; i < num_elemets ; i++ ) {
counter = 0;
for ( unsigned m = 0 ; m < max_num_mismatch ; m++ ) {
// maximum number of allowed combinations
num_combination = boost::math::binomial_coefficient<double>( static_cast<unsigned>( d ), static_cast<unsigned>(m) );
kappa.push_back( num_combination );
for ( unsigned j = 0 ; j < kappa.at(m) ; j++ ) {
if ( m == 0 )
v[i][counter++] = i^bits_mask; // M_0
else {
bit_mask = 1 << ( num_bits - j );
v[i][counter++] = v[i][0] ^ bits_mask
}
}
}
}
return 0;
}
I got stuck in the line v[i][counter++] = v[i][0] ^ bits_mask since I was unable to generalize my algorithm to m_max>1, since I needed for m_max mismatches m_max loops and in my original problem, m is unknown until runtime.
i wrote a code that do what i want, but since i am newbie, it is a bit ugly.
i fixed m and d although this code would work fine for genral m and d.
the main idea is simple, assuming we would like to compute the complement of 0 up to two failure (d= 4,m=2), we will see that max number of possibilities is given by \sum_{i = 0)^2\binom{4}{i} = 11.
The complement to 0 (at 4 bits) is 15
With 1 bit possible mismatch (from 15): 7 11 13 14
with 2 bits possible mismatches (from 15): 3 5 6 9 10 12
i wanted that the output of this program will be a vector with the numbers 15 7 11 13 14 3 5 6 9 10 12 inside it.
i hope that this time i am more clear with presenting my question (although i also supplied the solution). I would appreachiate if someone could point out, in my code, ways to improve it and make it faster.
regards
#include <boost/math/special_functions/binomial.hpp>
#include <iostream>
#include <vector>
#define USE_VECTOR
namespace vec {
#if defined(USE_VECTOR) || !defined(USE_DEQUE)
typedef std::vector<unsigned int> uint_1d_vec_t;
typedef std::vector<uint_1d_vec_t> uint_2d_vec_t;
#else
typedef std::deque<unsigned int> uint_1d_vec_t;
typedef std::deque<uint_1d_vec_t> uint_2d_vec_t;
#endif
}
using namespace std;
void get_pointers_vec( vec::uint_2d_vec_t &v , unsigned num_elemets , unsigned max_num_unmatch , unsigned num_bits );
double get_kappa( int m , int d );
int main( ) {
unsigned int num_elements , m , num_bits;
num_elements = 16;
num_bits = 4; // 2^4 = 16
m = 2;
double kappa = 0;
for ( unsigned int i = 0 ; i <= m ; i++ )
kappa += get_kappa( num_bits , i );
vec::uint_2d_vec_t Pointer( num_elements , vec::uint_1d_vec_t (kappa ,0 ) );
get_pointers_vec( Pointer , num_elements , m , num_bits );
for ( unsigned int i = 0 ; i < num_elements ; i++ ) {
std::cout << setw(2) << i << ":";
for ( unsigned int j = 0 ; j < kappa ; j++ )
std::cout << setw(3) << Pointer[i][j];
std::cout << std::endl;
}
return EXIT_SUCCESS;
}
double get_kappa( int n , int k ) {
double kappa = boost::math::binomial_coefficient<double>( static_cast<unsigned>( n ), static_cast<unsigned>(k) );
return kappa;
}
void get_pointers_vec( vec::uint_2d_vec_t &v , unsigned num_elemets , unsigned max_num_unmatch , unsigned num_bits ) {
int counter;
unsigned num_combination, ref_index, bits_mask, bit_mask;
vec::uint_1d_vec_t kappa;
bits_mask = pow( 2 , num_bits ) - 1;
for ( unsigned i = 0 ; i < num_elemets ; i++ ) {
counter = 0;
kappa.clear();
ref_index = 0;
for ( unsigned m = 0 ; m <= max_num_unmatch ; m++ ) {
num_combination = get_kappa( num_bits , m ); // maximum number of allowed combinations
kappa.push_back( num_combination );
if ( m == 0 ) {
v[i][counter++] = i^bits_mask; // M_0
}
else if ( num_bits == kappa.at(m) ) {
for ( unsigned k = m ; k <= num_bits ; k++ ) {
bit_mask = 1 << ( num_bits - k );
v[i][counter++] = v[i][ref_index] ^ bit_mask;
}
}
else {
// Find first element's index
ref_index += kappa.at( m - 2 );
for( unsigned j = 0 ; j < ( kappa.at(m - 1) - 1 ) ; j++ ) {
for ( unsigned k = m + j ; k <= num_bits ; k++ ) {
bit_mask = 1 << ( num_bits - k );
v[i][counter++] = v[i][ref_index] ^ bit_mask;
}
ref_index++;
}
}
}
}
}