Row-wise/column-wise operations on matrices with CUDA - c++
I'm relatively new to CUDA programming. I have understood the programming model and have already written few basic kernels. I know how to apply a kernel to each element of a matrix (stored as 1D array), but now I'm trying to figure out how to apply the same operation to the same row/column of the input matrix.
Let's say I have a MxN matrix and a vector of length N. I would like to sum (but it can be any other math operation) the vector to each row of the matrix.
The serial code of such operation is:
for (int c = 0; c < columns; c++)
{
for (int r = 0; r < rows; r++)
{
M[r * rows + c] += V[c];
}
}
Now the CUDA code for doing this operation should be quite straightforward: I should spawn as many cuda threads as the elements and apply this kernel:
__global__ void kernel(const unsigned int size, float* matrix, const float* vector)
{
// get the current element index for the thread
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < size)
{
// sum the current element with the
matrix[idx] += vector[threadIdx.x];
}
}
It runs but the result is not correct. Actually, it's correct if I transpose the matrix after the kernel completes its work. Unfortunately, I have no clue why it works in this way. Could you help me to figure out this problem? Thanks in advance.
EDIT #1
I launch the kernel using:
int block_size = 64;
int grid_size = (M * N + block_size - 1) / block_size;
kernel<<<grid_size, block_size>>>(M * N, matrix, vector);
EDIT #2
I solved the problem by fixing the CPU code as suggested by #RobertCrovella:
M[r * columns + c] += V[c];
It should match the outer for, that is, over the columns.
The kernel shown in the question could be used without modification to sum a vector to each of the rows of a matrix (assuming c-style row-major storage), subject to certain limitations. A demonstration is here.
The main limitation of that approach is that the maximum vector length and therefore matrix width that can be handled is equal to the maximum number of threads per block, which on current CUDA 7-supported GPUs is 1024.
We can eliminate that limitation with a slight modification to the vector indexing, and passing the row width (number of columns) as a parameter to the matrix. With this modification, we should be able to handle arbitrary matrix (and vector) sizes.
EDIT: based on discussion/comments, OP wants to know how to handle row-major or column major underlying storage. The following example uses a templated kernel to select either row-major or column major underlying storage, and also shows one possible CUBLAS method for doing a add-vector-to-each-matrix-row operation using rank-1 update function:
$ cat t712.cu
#include <iostream>
#include <cublas_v2.h>
#define ROWS 20
#define COLS 10
#define nTPB 64
#define ROW_MAJOR 0
#define COL_MAJOR 1
template <int select, typename T>
__global__ void vec_mat_row_add(const unsigned int height, const unsigned int width, T* matrix, const T* vector)
{
// get the current element index for the thread
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < height*width)
{
// sum the current element with the
if (select == ROW_MAJOR)
matrix[idx] += vector[idx%width];
else // COL_MAJOR
matrix[idx] += vector[idx/height];
}
}
int main(){
float *h_mat, *d_mat, *h_vec, *d_vec;
const unsigned int msz = ROWS*COLS*sizeof(float);
const unsigned int vsz = COLS*sizeof(float);
h_mat = (float *)malloc(msz);
h_vec = (float *)malloc(vsz);
cudaMalloc(&d_mat, msz);
cudaMalloc(&d_vec, vsz);
for (int i=0; i<COLS; i++) h_vec[i] = i; // set vector to 0,1,2, ...
cudaMemcpy(d_vec, h_vec, vsz, cudaMemcpyHostToDevice);
// test row-major case
cudaMemset(d_mat, 0, msz); // set matrix to zero
vec_mat_row_add<ROW_MAJOR><<<(ROWS*COLS + nTPB -1)/nTPB, nTPB>>>(ROWS, COLS, d_mat, d_vec);
cudaMemcpy(h_mat, d_mat, msz, cudaMemcpyDeviceToHost);
std::cout << "Row-major result: " << std::endl;
for (int i = 0; i < ROWS; i++){
for (int j = 0; j < COLS; j++) std::cout << h_mat[i*COLS+j] << " ";
std::cout << std::endl;}
// test column-major case
cudaMemset(d_mat, 0, msz); // set matrix to zero
vec_mat_row_add<COL_MAJOR><<<(ROWS*COLS + nTPB -1)/nTPB, nTPB>>>(ROWS, COLS, d_mat, d_vec);
cudaMemcpy(h_mat, d_mat, msz, cudaMemcpyDeviceToHost);
std::cout << "Column-major result: " << std::endl;
for (int i = 0; i < ROWS; i++){
for (int j = 0; j < COLS; j++) std::cout << h_mat[j*ROWS+i] << " ";
std::cout << std::endl;}
// test CUBLAS, doing matrix-vector add using <T>ger
cudaMemset(d_mat, 0, msz); // set matrix to zero
float *d_ones, *h_ones;
h_ones = (float *)malloc(ROWS*sizeof(float));
for (int i =0; i<ROWS; i++) h_ones[i] = 1.0f;
cudaMalloc(&d_ones, ROWS*sizeof(float));
cudaMemcpy(d_ones, h_ones, ROWS*sizeof(float), cudaMemcpyHostToDevice);
cublasHandle_t ch;
cublasCreate(&ch);
float alpha = 1.0f;
cublasStatus_t stat = cublasSger(ch, ROWS, COLS, &alpha, d_ones, 1, d_vec, 1, d_mat, ROWS);
if (stat != CUBLAS_STATUS_SUCCESS) {std::cout << "CUBLAS error: " << (int)stat << std::endl; return 1;}
cudaMemcpy(h_mat, d_mat, msz, cudaMemcpyDeviceToHost);
std::cout << "CUBLAS Column-major result: " << std::endl;
for (int i = 0; i < ROWS; i++){
for (int j = 0; j < COLS; j++) std::cout << h_mat[j*ROWS+i] << " ";
std::cout << std::endl;}
return 0;
}
$ nvcc -o t712 t712.cu -lcublas
$ ./t712
Row-major result:
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
Column-major result:
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
CUBLAS Column-major result:
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
$
For brevity of presentation, I've not included proper cuda error checking, but that is always a good idea any time you are having trouble with a CUDA code. As a proxy/shortcut, you can run your code with cuda-memcheck as a quick check to see if there are any CUDA errors.
Note that we expect all 3 printouts to be identical because that is actually the correct way to display the matrix, regardless of whether the underlying storage is row-major or column-major. The difference in underlying storage is accounted for in the for-loops handling the display output.
Robert Crovella has already answered this question providing examples using explicit CUDA kernels and cuBLAS.
I find it useful, for future references, to show also an example on how performing row-wise or column-wise operations using CUDA Thrust. In particular, I'm focusing on two problems:
Summing a column vector to all matrix columns;
Summing a row vector to all matrix rows.
The generality of thrust::transform enables to generalize the example below to elementwise operations other than the sum (e.g., multiplications, divisions, subtractions etc.).
#include <thrust/device_vector.h>
#include <thrust/reduce.h>
#include <thrust/random.h>
#include <thrust/sort.h>
#include <thrust/unique.h>
#include <thrust/equal.h>
using namespace thrust::placeholders;
/*************************************/
/* CONVERT LINEAR INDEX TO ROW INDEX */
/*************************************/
template <typename T>
struct linear_index_to_row_index : public thrust::unary_function<T,T> {
T Ncols; // --- Number of columns
__host__ __device__ linear_index_to_row_index(T Ncols) : Ncols(Ncols) {}
__host__ __device__ T operator()(T i) { return i / Ncols; }
};
/********/
/* MAIN */
/********/
int main()
{
/**************************/
/* SETTING UP THE PROBLEM */
/**************************/
const int Nrows = 10; // --- Number of rows
const int Ncols = 3; // --- Number of columns
// --- Random uniform integer distribution between 0 and 100
thrust::default_random_engine rng;
thrust::uniform_int_distribution<int> dist1(0, 100);
// --- Random uniform integer distribution between 1 and 4
thrust::uniform_int_distribution<int> dist2(1, 4);
// --- Matrix allocation and initialization
thrust::device_vector<float> d_matrix(Nrows * Ncols);
for (size_t i = 0; i < d_matrix.size(); i++) d_matrix[i] = (float)dist1(rng);
// --- Column vector allocation and initialization
thrust::device_vector<float> d_column(Nrows);
for (size_t i = 0; i < d_column.size(); i++) d_column[i] = (float)dist2(rng);
// --- Row vector allocation and initialization
thrust::device_vector<float> d_row(Ncols);
for (size_t i = 0; i < d_row.size(); i++) d_row[i] = (float)dist2(rng);
printf("\n\nOriginal matrix\n");
for(int i = 0; i < Nrows; i++) {
std::cout << "[ ";
for(int j = 0; j < Ncols; j++)
std::cout << d_matrix[i * Ncols + j] << " ";
std::cout << "]\n";
}
printf("\n\nColumn vector\n");
for(int i = 0; i < Nrows; i++) std::cout << d_column[i] << "\n";
printf("\n\nRow vector\n");
for(int i = 0; i < Ncols; i++) std::cout << d_row[i] << " ";
/*******************************************************/
/* ADDING THE SAME COLUMN VECTOR TO ALL MATRIX COLUMNS */
/*******************************************************/
thrust::device_vector<float> d_matrix2(d_matrix);
thrust::transform(d_matrix.begin(), d_matrix.end(),
thrust::make_permutation_iterator(
d_column.begin(),
thrust::make_transform_iterator(thrust::make_counting_iterator(0), linear_index_to_row_index<int>(Ncols))),
d_matrix2.begin(),
thrust::plus<float>());
printf("\n\nColumn + Matrix -> Result matrix\n");
for(int i = 0; i < Nrows; i++) {
std::cout << "[ ";
for(int j = 0; j < Ncols; j++)
std::cout << d_matrix2[i * Ncols + j] << " ";
std::cout << "]\n";
}
/*************************************************/
/* ADDING THE SAME ROW VECTOR TO ALL MATRIX ROWS */
/*************************************************/
thrust::device_vector<float> d_matrix3(d_matrix);
thrust::transform(thrust::make_permutation_iterator(
d_matrix.begin(),
thrust::make_transform_iterator(thrust::make_counting_iterator(0),(_1 % Nrows) * Ncols + _1 / Nrows)),
thrust::make_permutation_iterator(
d_matrix.begin(),
thrust::make_transform_iterator(thrust::make_counting_iterator(0),(_1 % Nrows) * Ncols + _1 / Nrows)) + Nrows * Ncols,
thrust::make_permutation_iterator(
d_row.begin(),
thrust::make_transform_iterator(thrust::make_counting_iterator(0), linear_index_to_row_index<int>(Nrows))),
thrust::make_permutation_iterator(
d_matrix3.begin(),
thrust::make_transform_iterator(thrust::make_counting_iterator(0),(_1 % Nrows) * Ncols + _1 / Nrows)),
thrust::plus<float>());
printf("\n\nRow + Matrix -> Result matrix\n");
for(int i = 0; i < Nrows; i++) {
std::cout << "[ ";
for(int j = 0; j < Ncols; j++)
std::cout << d_matrix3[i * Ncols + j] << " ";
std::cout << "]\n";
}
return 0;
}
Related
take one column from a 2D array and store in 1D
I am trying to take this 9 x 3 and use only the 3rd column to store in its own 1D array: 3 5 8 6 3 9 7 5 12 0 5 5 1 2 3 8 2 10 8 3 11 9 3 12 4 1 5 This is what I have for a conversion: int index = 0; // 2D to 1D conversion for (int r = 0; r < N; r++) { for (int c = 0; c < 3; c++) { end[index++] = start[r][c]; } } But it is giving me the first 9 numbers in the whole matrix: 3 5 8 6 3 9 7 5 12 (but vertically) I need the 3rd column only and I don't know what I am doing wrong.
you can try this: int index = 0; // 2D to 1D conversion for (int r = 0; r < N; r++) { end[index++] = start[r][2]; }
How to relocate an element in one array in C++
I took this interview question and I failed, so I'm here to not fail again! I have an array of int with size 16 and a 5 < givenIndex < 10. I have to take the element in this index a print every possible array (there are 16) by moving the element at givenIndex through every position in array and pushing rest of elements. For example: int array[16] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; int givenIndex = 6; Since array[givenIndex] = 7, I need to move 7 to every possible position and print that array. [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] [7,1,2,3,4,5,6,8,9,10,11,12,13,14,15,16] [1,7,2,3,4,5,6,8,9,10,11,12,13,14,15,16] [1,2,7,3,4,5,6,8,9,10,11,12,13,14,15,16] And that's for 16 cases. What I was trying was: for(int i = 0;i<16;i++){ array[i] = array[indexInsercion] if (i<indexInsert){ //right shift array[i] = array[i+1] }else if(i == indexInsert){ //no shift }else{ //left shift array[i] = array[i-1] } } Can I get some help?
We can only guess what the interviewer expected to see. If I was the interviewer I would like to see that you keep things simple. This is code I think one can expect to be written from scratch in an interview situation: #include <iostream> #include <array> template <size_t size> void print_replaced(const std::array<int,size>& x,size_t index){ for (int i=0;i<size;++i){ for (int j=0;j<i;++j) { if (j == index) continue; std::cout << x[j] << " "; } std::cout << x[index] << " "; for (int j=i;j<size;++j) { if (j == index) continue; std::cout << x[j] << " "; } std::cout << "\n"; } } int main() { std::array<int,16> x{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; print_replaced(x,6); } It is a first approach at the problem, with a loop that prints 16 different combinations of the array elements. Printing each line follows simple logic: We print all elements before the one that should be replaced, then the one that should be shuffled, then the remaining elements. It is simple, but wrong. Its output is: 7 1 2 3 4 5 6 8 9 10 11 12 13 14 15 16 1 7 2 3 4 5 6 8 9 10 11 12 13 14 15 16 1 2 7 3 4 5 6 8 9 10 11 12 13 14 15 16 1 2 3 7 4 5 6 8 9 10 11 12 13 14 15 16 1 2 3 4 7 5 6 8 9 10 11 12 13 14 15 16 1 2 3 4 5 7 6 8 9 10 11 12 13 14 15 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 1 2 3 4 5 6 8 7 9 10 11 12 13 14 15 16 1 2 3 4 5 6 8 9 7 10 11 12 13 14 15 16 1 2 3 4 5 6 8 9 10 7 11 12 13 14 15 16 1 2 3 4 5 6 8 9 10 11 7 12 13 14 15 16 1 2 3 4 5 6 8 9 10 11 12 7 13 14 15 16 1 2 3 4 5 6 8 9 10 11 12 13 7 14 15 16 1 2 3 4 5 6 8 9 10 11 12 13 14 7 15 16 1 2 3 4 5 6 8 9 10 11 12 13 14 15 7 16 There is one line that appears twice and the last line is missing. As an interviewer I would not be surprised that the first attempt does not produce correct output. I don't care about that. Thats not a minus. What I would care about is how you react on that. Do you know the next steps? Do you have a strategy to fix the wrong output? Or do you just panic because you didn't manage to write the correct code on the first attempt? This is what I would like to check in an interview and then thats the end of the exercise. I want to ask more different questions rather than giving you the time to fix all mistakes and write correct well tested code, because I know that this takes more time than we have in the interview. I'll leave it to you to fix the above code ;)
Here's a quick stab at it. Basically just keep track of where the given index should go and print it there as well as skip the original position it would be in. #include <iostream> int main() { int array[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 }; int givenIndex = 6; for (int p = 0; p <= 16; ++p) { if (p != givenIndex) { std::cout << "["; for (int i = 0; i < 16; ++i) { if (i == p) { if (i > 0) { std::cout << ","; } std::cout << array[givenIndex]; } if (array[i] != array[givenIndex]) { if (i > 0 || p == 0) { std::cout << ","; } std::cout << array[i]; } } if (p == 16) { std::cout << "," << array[givenIndex]; } std::cout << "]\n"; } } } Output: [7,1,2,3,4,5,6,8,9,10,11,12,13,14,15,16] [1,7,2,3,4,5,6,8,9,10,11,12,13,14,15,16] [1,2,7,3,4,5,6,8,9,10,11,12,13,14,15,16] [1,2,3,7,4,5,6,8,9,10,11,12,13,14,15,16] [1,2,3,4,7,5,6,8,9,10,11,12,13,14,15,16] [1,2,3,4,5,7,6,8,9,10,11,12,13,14,15,16] [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] [1,2,3,4,5,6,8,7,9,10,11,12,13,14,15,16] [1,2,3,4,5,6,8,9,7,10,11,12,13,14,15,16] [1,2,3,4,5,6,8,9,10,7,11,12,13,14,15,16] [1,2,3,4,5,6,8,9,10,11,7,12,13,14,15,16] [1,2,3,4,5,6,8,9,10,11,12,7,13,14,15,16] [1,2,3,4,5,6,8,9,10,11,12,13,7,14,15,16] [1,2,3,4,5,6,8,9,10,11,12,13,14,7,15,16] [1,2,3,4,5,6,8,9,10,11,12,13,14,15,7,16] [1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,7]
If the expectation is to just print the elements of array in the given order: Keep the track of current index of array element to be print, say indx - If the position of current element processing is equal to row number then print the element at givenIndex. If indx is equal to givenIndex skip it and print indx + 1 element, otherwise print element at indx and increase indx by 1. Implementation: #include <iostream> #include <array> int main() { std::array<int, 16> array = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; std::size_t givenIndex = 6; for (std::size_t i = 0, indx = 0; i < array.size(); indx = 0, ++i) { std::cout << '['; for (std::size_t j = 0; j < array.size(); ++j) { if (j == i) { std::cout << array[givenIndex] << ','; continue; } if (indx == givenIndex) { ++indx; } std::cout << array[indx++] << ','; } std::cout << ']'; std::cout << '\n'; } return 0; } Output: # ./a.out [7,1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,] [1,7,2,3,4,5,6,8,9,10,11,12,13,14,15,16,] [1,2,7,3,4,5,6,8,9,10,11,12,13,14,15,16,] [1,2,3,7,4,5,6,8,9,10,11,12,13,14,15,16,] [1,2,3,4,7,5,6,8,9,10,11,12,13,14,15,16,] [1,2,3,4,5,7,6,8,9,10,11,12,13,14,15,16,] [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,] [1,2,3,4,5,6,8,7,9,10,11,12,13,14,15,16,] [1,2,3,4,5,6,8,9,7,10,11,12,13,14,15,16,] [1,2,3,4,5,6,8,9,10,7,11,12,13,14,15,16,] [1,2,3,4,5,6,8,9,10,11,7,12,13,14,15,16,] [1,2,3,4,5,6,8,9,10,11,12,7,13,14,15,16,] [1,2,3,4,5,6,8,9,10,11,12,13,7,14,15,16,] [1,2,3,4,5,6,8,9,10,11,12,13,14,7,15,16,] [1,2,3,4,5,6,8,9,10,11,12,13,14,15,7,16,] [1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,7,] If the expectation is to alter the order of elements in the array and then print the array: First move the element at givenIndex to the 0th index of array and then - Print array In every iteration swap the current element with its next element in the array and print it. Implementation: #include <iostream> #include <array> void print_array (std::array<int, 16>& array) { std::cout << '['; for (std::size_t indx = 0; indx < array.size(); ++indx) { std::cout << array[indx] << ','; } std::cout << ']'; std::cout << '\n'; } void rearrange_array_elem (std::array<int, 16>& array, std::size_t givenIndx) { // move the element at givneIndx to first position in array for (std::size_t j = givenIndx; j > 0; --j) { std::swap (array[j], array[j - 1]); } // print array print_array (array); for (std::size_t indx = 0; indx < array.size() - 1; ++indx) { // swap current element with its next element std::swap (array[indx], array[indx + 1]); print_array (array); } } int main() { std::array<int, 16> array = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; std::size_t givenIndex = 6; rearrange_array_elem (array, givenIndex); return 0; } Output: # ./a.out [7,1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,] [1,7,2,3,4,5,6,8,9,10,11,12,13,14,15,16,] [1,2,7,3,4,5,6,8,9,10,11,12,13,14,15,16,] [1,2,3,7,4,5,6,8,9,10,11,12,13,14,15,16,] [1,2,3,4,7,5,6,8,9,10,11,12,13,14,15,16,] [1,2,3,4,5,7,6,8,9,10,11,12,13,14,15,16,] [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,] [1,2,3,4,5,6,8,7,9,10,11,12,13,14,15,16,] [1,2,3,4,5,6,8,9,7,10,11,12,13,14,15,16,] [1,2,3,4,5,6,8,9,10,7,11,12,13,14,15,16,] [1,2,3,4,5,6,8,9,10,11,7,12,13,14,15,16,] [1,2,3,4,5,6,8,9,10,11,12,7,13,14,15,16,] [1,2,3,4,5,6,8,9,10,11,12,13,7,14,15,16,] [1,2,3,4,5,6,8,9,10,11,12,13,14,7,15,16,] [1,2,3,4,5,6,8,9,10,11,12,13,14,15,7,16,] [1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,7,]
I am trying to generate a list of all the subsets r, of the set, n. My code works if n-r=2, but if > 2, prints out incorrect output
I am trying to generate a list of subsets from a set. For example, if I had n = 6, and r = 4, I would have 15 possible combinations which would be the following: 0 1 2 3 0 1 2 4 0 1 2 5 0 1 3 4 0 1 3 5 0 1 4 5 0 2 3 4 0 2 3 5 0 2 4 5 0 3 4 5 1 2 3 4 1 2 3 5 1 2 4 5 1 3 4 5 2 3 4 5 My current code does work with the above subsets if n = 6 & r = 4. It also works if any other combination of n-r=2. It does not work for anything else and I'm having a bit of trouble debugging since my code makes perfect sense to me. The code I have is the following: int array[r]; int difference = n-r; for(int i = 0; i < r; i++){ array[i] = i; } while (array[0] < difference){ print (array, r); for(int i = r-1; i >= 0; i--){ if ((array[i] - i) == 0){ array[i] = array[i] + 1; for (int j = i+1; j < r; j++){ array[j] = j + 1; } i = r; } else{ array[i] = array[i] + 1; } print (array, r); } } } To give some context, when I plug in n=6 and r=3, I am supposed to have 20 combinations as the output. Only 14 are printed, however: 0 1 2 0 1 3 0 1 4 0 2 3 0 2 4 0 3 4 1 2 3 1 2 4 1 3 4 2 3 4 2 3 4 2 3 5 2 4 5 3 4 5 It does print the first and last output correctly, however I need to have all the outputs printed out and correct. I can see after the 3rd iteration, the code starts failing as it goes from 0 1 4 to 0 2 3 when it should go to 0 1 5 instead. Any suggestions as to what I'm doing wrong?
Here's what I think you are trying to do. As far as I can tell, your main problem is that the main for loop should start over after incrementing an array element to a valid value, rather than continuing. So this version only calls print in one place and uses break to get out of the main for loop. It also counts the combinations found. #include <iostream> void print(int array[], int r) { for(int i=0; i<r; ++i) { std::cout << array[i] << ' '; } std::cout << '\n'; } int main() { static const int n = 6; static const int r = 3; static const int difference = n-r; int array[r]; for(int i = 0; i < r; i++) { array[i] = i; } int count = 0; while(array[0] <= difference) { ++count; print(array, r); for(int i=r-1; i>=0; --i) { ++array[i]; if(array[i] <= difference + i) { for(int j=i+1; j<r; ++j) { array[j] = array[j-1] + 1; } break; } } } std::cout << "count: " << count << '\n'; } Outputs 0 1 2 0 1 3 0 1 4 0 1 5 0 2 3 0 2 4 0 2 5 0 3 4 0 3 5 0 4 5 1 2 3 1 2 4 1 2 5 1 3 4 1 3 5 1 4 5 2 3 4 2 3 5 2 4 5 3 4 5 count: 20
Assigning a vector to a matrix column in Eigen
This question was asked in haste. The error in my original program, was not the typo in the code that is displayed here. The error was that in my program v was not getting populated due to some conditions. The more useful takeaway from this thread is the demonstration of copying a std::vector to all rows or columns of an Eigen Matrix, in the accepted answer. I want to copy vectors into the columns of a matrix, like the following: #include <Eigen/Dense> #include <vector> #include <iostream> int main() { int m = 10; std::vector<Eigen::VectorXd> v(m); Eigen::MatrixXd S(m,m); for (int i = 0; i != m; ++i) { v[i].resize(m); for (int j = 0; j != m; ++j) { v[i](j) = rand() % m; } //S.cols(i) = v[i]; //needed something like this } return 0; } S is of type Eigen::MatrixXd and dimension mxm. v is a std::vector of Eigen::VectorXd, where each Eigen::VectorXd is of size m and there are m of them in v.
Regarding the original question, you need to wrap the std::vector with an Eigen::Map. You could/should also make the operation a one-liner. The reworded question is reduced to a typo. S.cols(i) should be S.col(i). int main() { size_t sz = 6; Eigen::MatrixXd S(sz, sz); std::vector<double> v(sz); std::vector<Eigen::VectorXd> vv(sz); for(int i = 0; i < sz; i++) { v[i] = i*2; vv[i] = Eigen::VectorXd::LinSpaced(sz, (i+sz), (i+sz)*2); } for (int i = 0; i != sz; ++i) S.col(i) = vv[i]; std::cout << S << "\n\n"; S.rowwise() = Eigen::Map<Eigen::RowVectorXd>(v.data(), sz); std::cout << S << "\n\n"; S.colwise() = Eigen::Map<Eigen::VectorXd>(v.data(), sz); std::cout << S << "\n\n"; return 0; } which would output 6 7 8 9 10 11 7.2 8.4 9.6 10.8 12 13.2 8.4 9.8 11.2 12.6 14 15.4 9.6 11.2 12.8 14.4 16 17.6 10.8 12.6 14.4 16.2 18 19.8 12 14 16 18 20 22 0 2 4 6 8 10 0 2 4 6 8 10 0 2 4 6 8 10 0 2 4 6 8 10 0 2 4 6 8 10 0 2 4 6 8 10 0 0 0 0 0 0 2 2 2 2 2 2 4 4 4 4 4 4 6 6 6 6 6 6 8 8 8 8 8 8 10 10 10 10 10 10
What is resetting the value of the iterator in this for-loop?
#include <iostream> using namespace std; int main() { const int SIZE = 5; double x[SIZE]; for(int i = 2; i <= SIZE; i++) { x[i] = 0.0; cout << i << endl; } } Output: 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 ... If SIZE is initialized to a different value, the iterator will iterate until it is one short of that value and then reset back to zero. If the array of x is changed to data type int, the loop does not get stuck on itself. If the assignment value to x[i] is changed to any non-zero number, the value of is changed to garbage during the last run of the loop. #include <iostream> using namespace std; int main() { const int SIZE = 5; double x[SIZE]; for(int i = 2; i <= SIZE; i++) { x[i] = 1; cout << i << endl; } } Output: 2 3 4 1072693248 #include <iostream> using namespace std; int main() { const int SIZE = 5; int x[SIZE]; for(int i = 2; i <= SIZE; i++) { x[i] = 1; cout << i << endl; } } Output: 2 3 4 5
You are writing past the end of the x array. x[] ranges from 0 to SIZE - 1 (or 4), and you let your index i == SIZE. So, the behavior is undefined and coincidentally, you are overwriting i when you write x[5]. Use a debugger. It's your friend.
for(int i = 2; i < SIZE; i++) // i <= SIZE will write beyond the array
Your current array is of size 5. Arrays are 0 indexed: 1st element last element 0 1 2 3 4 You're iterating past the end of your array (i <= 5), which is undefined behavior.
Your end condition is wrong. Use i < SIZE #include <iostream> using namespace std; int main() { const int SIZE = 5; double x[SIZE]; for(int i = 2; i < SIZE; i++) { x[i] = 0.0; cout << i << endl; } }