Segmentation faults and erroneous output from std::sort - c++

There is something very wrong with the following code, but I can't figure out what it is.
#include <map>
#include <iostream>
#include <random>
#include <algorithm>
class A
{
public:
A(int v) : val(v) { }
A() = delete;
const size_t val;
};
typedef std::shared_ptr<A> A_ptr;
class B
{
public:
B(unsigned int L)
{
std::mt19937 rand_engine(9341255);
std::uniform_int_distribution<int> dist(1,1000);
for (int i = 0; i < L; ++i)
{
m.insert(std::pair<unsigned int, A_ptr>(i, std::make_shared<A>(dist(rand_engine))));
}
}
B() = delete;
std::map<unsigned int, A_ptr> m;
};
int main()
{
unsigned int b_size = 47;
B b(b_size);
std::vector<A_ptr> v = {};
for (std::map<unsigned int, A_ptr>::iterator it = b.m.begin(); it != b.m.end(); ++it)
{
v.push_back(it->second);
}
std::cout << "v = ";
for (auto & i : v)
std::cout << i->val << " ";
std::cout << "\n";
std::sort(v.begin(), v.end(), [](A_ptr a1, A_ptr a2){ return (a1->val >= a2->val); });
std::cout << "v sorted = ";
for (auto & i : v)
std::cout << i->val << " ";
std::cout << "\n";
return 0;
}
This code compiles and executes, and yields the following output:
g++ -std=c++17 map_sort.cpp -o map_sort && ./map_sort
v = 634 739 51 906 227 185 738 302 310 888 886 644 191 719 68 212 124 732 879 724 671 167 367 313 813 1000 905 24 245 320 580 605 641 760 23 382 348 718 373 937 733 335 306 679 840 880 138
v sorted = 1000 937 906 905 880 888 886 245 879 840 813 760 733 719 718 644 641 739 738 732 724 679 634 671 605 580 382 373 367 320 348 335 313 310 306 302 227 68 191 185 167 212 138 124 51 24 23
Note that the output on the v sorted line is NOT correctly sorted.
Changing unsigned int b_size = 47 to unsigned int b_size = 48 yields the following output:
g++ -std=c++17 map_sort.cpp -o map_sort && ./map_sort
v = 634 739 51 906 227 185 738 302 310 888 886 644 191 719 68 212 124 732 879 724 671 167 367 313 813 1000 905 24 245 320 580 605 641 760 23 382 348 718 373 937 733 335 306 679 840 880 138 924
Segmentation fault: 11
Similar behaviors occur for other values of b_size as well. It wouldn't surprise me if they're coming from the same underlying problem. I'm clearly using std::sort incorrectly, but I have no idea how. Can anybody shed some light on this?

Related

Is it possible to stop a parallel process in CUDA [duplicate]

I am working with CUDA and I am trying to stop my kernels work (i.e. terminate all running threads) after a certain if block is being hit. How can I do that? I am really stuck in here.
The CUDA execution model doesn't allow for inter-block communication by design. That can potentially make this sort of kernel abort on condition operation difficult to achieve reliably without resorting to the assert or trap type approaches which can potentially result in context destruction and loss of data which isn't what you probably want.
If your kernel design involves a small number of blocks with "resident" threads, then the only approach is some sort of atomic spinlock, which is hard to get to work reliably, and which will greatly degrade memory controller performance and achievable bandwidth.
If, on the other hand, your kernel design has rather large grids with a lot of blocks, and your main goal is to stop blocks which are not yet scheduled from running, then you could try something like this:
#include <iostream>
#include <vector>
__device__ unsigned int found_idx;
__global__ void setkernel(unsigned int *indata)
{
indata[115949] = 0xdeadbeef;
indata[119086] = 0xdeadbeef;
indata[60534] = 0xdeadbeef;
indata[37072] = 0xdeadbeef;
indata[163107] = 0xdeadbeef;
}
__global__ void searchkernel(unsigned int *indata, unsigned int *outdata)
{
if (found_idx > 0) {
return;
} else if (threadIdx.x == 0) {
outdata[blockIdx.x] = blockIdx.x;
};
unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (indata[tid] == 0xdeadbeef) {
unsigned int oldval = atomicCAS(&found_idx, 0, 1+tid);
}
}
int main()
{
const unsigned int N = 1 << 19;
unsigned int* in_data;
cudaMalloc((void **)&in_data, sizeof(unsigned int) * size_t(N));
cudaMemset(in_data, 0, sizeof(unsigned int) * size_t(N));
setkernel<<<1,1>>>(in_data);
cudaDeviceSynchronize();
unsigned int block_size = 1024;
unsigned int grid_size = N / block_size;
unsigned int* out_data;
cudaMalloc((void **)&out_data, sizeof(unsigned int) * size_t(grid_size));
cudaMemset(out_data, 0xf0, sizeof(unsigned int) * size_t(grid_size));
const unsigned int zero = 0;
cudaMemcpyToSymbol(found_idx, &zero, sizeof(unsigned int));
searchkernel<<<grid_size, block_size>>>(in_data, out_data);
std::vector<unsigned int> output(grid_size);
cudaMemcpy(&output[0], out_data, sizeof(unsigned int) * size_t(grid_size), cudaMemcpyDeviceToHost);
cudaDeviceReset();
std::cout << "The following blocks did not run" << std::endl;
for(int i=0, j=0; i<grid_size; i++) {
if (output[i] == 0xf0f0f0f0) {
std::cout << " " << i;
if (j++ == 20) {
std::cout << std::endl;
j = 0;
}
}
}
std::cout << std::endl;
return 0;
}
Here I have a simple kernel which is searching for a magic word in a large array. To get the early exit behaviour, I use a single global word, which is set atomically by those threads which "win" or trigger the termination condition. Every new block checks the state of this global word, and if it is set, they return without doing any work.
If I compile and run this on a moderate sized Kepler device:
$ nvcc -arch=sm_30 -o blocking blocking.cu
$ ./blocking
The following blocks did not run
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
504 505 506 507 508 509 510 511
you can see that a large number of blocks in the grid saw the change in the global word and early terminated without running the search code. This might be the best you can do without a severely invasive spinlock approach which will greatly harm performance.
I assume you want to stop a running kernel (not a single thread).
The simplest approach (and the one that I suggest) is to set up a global memory flag which is been tested by the kernel.
You can set the flag using cudaMemcpy() (or without if using unified memory).
Like the following:
if (gm_flag) {
__threadfence(); // ensure store issued before trap
asm("trap;"); // kill kernel with error
}
ams("trap;") will stop all running thread
Note that since cuda 2.0 you can use assert() to terminate a kernel!
A different approach could be the following (I haven't tried the code!)
__device__ bool go(int val){
return true;
}
__global__ void stopme(bool* flag, int* val, int size){
int idx= blockIdx.x *blockDim.x + threadIdx.x;
if(idx < size){
bool canContinue = true;
while(canContinue && (flag[0])){
printf("HELLO from %i\n",idx);
if(!(*flag)){
return;
}
else{
//do some computation
val[idx]++;
val[idx]%=100;
}
canContinue = go(val[idx]);
}
}
}
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
int main(void)
{
int size = 128;
int* h_val = (int*)malloc(sizeof(int)*size);
bool * h_flag = new bool;
*h_flag=true;
bool* d_flag;
cudaMalloc(&d_flag,sizeof(bool));
cudaMemcpy(d_flag,h_flag,1,cudaMemcpyHostToDevice);
int* d_val;
cudaMalloc(&d_val,sizeof(int)*size );
for(int i=0;i<size;i++){
h_val[i] = i;
}
cudaMemcpy(d_val,h_val,size,cudaMemcpyHostToDevice);
int BSIZE=32;
int nblocks =size/BSIZE;
printf("%i,%i",nblocks,BSIZE);
stopme<<<nblocks,BSIZE>>>(d_flag,d_val,size);
//--------------sleep for a while --------------------------
*h_flag=false;
cudaMemcpy(d_flag,h_flag,1,cudaMemcpyHostToDevice);
cudaDeviceSynchronize();
gpuErrchk( cudaPeekAtLastError() );
printf("END\n");
}
where the kernel stopMe keeps running until someone from the host side sets up the flag to false. Note that your kernel could be much more complicated than this and the effort to synchronize all threads in order to execute the return could be much more than this (and can affect performance). Hope this helped.
More info here

Why does it give a "multiple test case" error?

I've written the code for eliminating the largest 2 elements of an array, but this code gives junk value for testcase > 1. Why?
Input:
no of TestCase
size of array
elements of array
Sorting function:
int sort_asc(int arr[], int n)
{
for(int i=0;i<n;i++)
{
for(int j=i+1;j<n;j++)
{
if(arr[j]<arr[i])
{
int temp;
temp=arr[i];
arr[i]=arr[j];
arr[j]=temp;
}
}
}
}
int main() {
//code
int test;
cin>>test;
while(test--){
//taking size and array as inputs
int size;
cin>>size;
int a[size];
cin>>a[size];
for(int i=0;i<size;i++){
cin>>a[i];
}
//sorting the array
sort_asc(a,size);
//printing the output discarding last 2 elements of the array
for(int i=0;i<size-2;i++){
cout<<a[i]<<" ";
}
cout<<"\n";
}
return 0;
}
Expected:
12 23 28 43 44 59 60 68 70 85 88 92 124 125 136 168 171 173 179 199 212
230 277 282 306 314 316 325 328 336 337 363 365 368 369 371 374 387 394 414
422 427 430 435 457 493 506 527 531 538 541 546 568 583 650 691 730 737 751
764 778 783 785 789 794 803 809 815 847 858 863 874 887 896 916 920 926 927 930 957
My output:
12 23 28 43 44 59 60 68 70 81 85 88 92 124 125 136 168 171 173 179 199 212 230 277 282 306 314 316 325 328 336 337 363 365 368 369 371 374 387 394 414 422 427 430 435 457 493 506 527 531 538 541 546 568 583 650 691 730 737 751 764 778 783 785 789 794 803 809 815 847 858 863 874 887 896 916 920 926 930 957
A VLA (variable length array) is invalid C++ code. It is tolerated by some compilers, but it is still invalid.
But that is not your main problem. You produced an out of bound error. An array index starts with 0. The last element is at position size-1. So your statement
cin>>a[size];
will write past the end of your array. Producing undefined behavior.
I am not sure, why you put the statement at all, but after that, anything undefined can and most probably will happen.

Primes with argc and argv

I have an assignment in Codejudge which I write a command line program which reads a space separated list of integers from the command line and prints the ordered sublist consisting of the input prime numbers.
I tried numerous times but I can't seem to work
this is input argument:
9308 2034 9466 283 7949 1153 7241 5341 4693 6910 6852 5540 8015 9305 5697 1395 4727 9159 8661 1367 6096 2911 4797 8025 2593 5460 5767 5543 2429 8371 6024 2343 285 8657 9869 5388 5295 6279 3084 9573 6980 2362 1565 5134 5185 1991 7142 3699 5937 4151 3044 2468 8005 1603 662 2989 752 6971 3152 3681 9743 653 4542 719 2081 5772 9179 4034 5904 5494 1653 251 130 6646 2835 2260 8998 7464 112 2179 6592 8502 7381 5990 6681 8237 1331 537 2048 3342 9353 7883 1041 621 1022 4569 1421 9592 877 657 7097 2828 6242 2216 387 4605 8017 2784 4509 5818 7959 1612 491 6381 6530 5773 2220 2802 6478 7401 9084 1845 8805 8192 9806 6940 6578 9132 3144 8793 4854 1087 3238 8622 419 346 2598 1194 5766 4626 4740 6191 8639 7948 9833 3117 232 5839 8726 4863 4532 3498 6717 4874 3496 2951 5750 6982 1779 9614 9519 5980 3245 2698 6771
etc.
#include <cmath>
#include <iostream>
#include <vector>
#include <algorithm>
int main(int argc, char* argv[]) {
std::vector<int> input;
std::vector<int> output;
for (int a = 0; a < argc; a++) {
input.push_back(std::atoi(argv[a]));
}
int count = 0;
for (int i = 0; i < input.size(); i++) {
if (input.at(i) % 2 != 0 && (input.at(i) % 3 != 0 || input.at(i) / 3 == 1) && (input.at(i) % 5 != 0 || input.at(i) / 5 == 1) /*&& input.at(i)*input.at(i)% input.at(i)!=0*/) {
output.push_back(input.at(i));
count++;
}
}
sort(output.begin(), output.end());
for (int i = 0; i < count; i++) {
std::cout << output[i] << " ";
}
}
expected result:
1 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 101 103 107 109 113 127 131 137 139 149 151 157 163 167 173 179 181 191 193 197 199 211 223 227 229 233 239 241
actual result:
1 3 5 7 11 13 17 19 23 29 31 37 41 43 47 49 53 59 61 67 71 73 77 79 83 89 91 97 101 103 107 109 113 119 121 127 131 133 137 139 143 149 151 157 161 163 167 169 173 179 181 187 191 193 197 199 203 209 211 217 221 223 227 229 233 239 241
there are difference between the expected and the actual.
keep in mind that the vector of random numbers are in random orders and not from smallest to largest and they all are
for (int a = 0; a < argc; a++) {
input.push_back(std::atoi(argv[a]));
}
should be
for (int a = 1; a < argc; a++) {
input.push_back(std::atoi(argv[a]));
}
The first argument argv[0] is the program name.

Shuffling vectors in C++

I am working on a programme which takes an initial vector as an inpit.This vector is vector of size 20.The programme then generates 10 random vectors from this vector.For this purpose I choose 2 ransom indices in the initial vector and swap them with each other to generate a new vector.This is done to generate all the 10 new vectors.
The 10 new vectors generated should be stored in the following 2 dimensional vector
vector<vector<int>> allparents
I have been able to generate 2 random indice numbers using the srand() function and then swap the elements at these indices for the initial vector.However I am unable to generate 10 of such random parents and then store them in the allparents 2D vector.My code is as follows :
#include<vector>
#include<iostream>
#include<algorithm>
#include<cstdio>
#include<ctime>
using namespace std;
int main () {
srand(time(NULL));
int i1=(rand()%19+1);
int i2=(rand()%19+1);
cout<<i1<<endl;
cout<<i2<<endl;
vector<int> initial={71,127,428,475,164,253,229,395,92,189,41,110,443,490,278,305,28,58,371,560};
vector<vector<int>> allparents;
for(int r=0;r<10;r++){
for(int c=0;c<20;c++){
swap(initial[i1],initial[i2]);
allparents[r][c]=initial[c];
cout<<allparents[r][c]<" "<<endl;
}
}
return 0;
}
As I am new to vectors,I would request your help in this programme.Thanks.
First of all, I would not say that you're "generating random vectors", you're just shuffling a predefined one.
Second, I suggest to create small working functions and assemble your program with those:
vector<int> shuffle(vector<int> v) {
// Use the implementation you want here, I will use a std algorithm
static auto rng = std::default_random_engine {};
std::shuffle(std::begin(v), std::end(v), rng);
return v; // this is a copy of the vector
}
int main() {
vector<int> initial= {
71,127,428,475,164,253,229,395,92,189,41,110,443,490,278,305,28,58,371,560
};
// Generate 10 shuffled vectors
vector<vector<int>> shuffledVectors;
for (int i = 0; i < 10; i++) {
vector<int> shuffled = shuffle(initial);
shuffledVectors.push_back(shuffled);
}
// Print them
for (vector<int>& v : shuffledVectors) {
for (int& i : v)
cout << i << " ";
cout << endl;
}
return 0;
}
Output:
164 41 110 305 278 28 58 127 229 189 475 395 560 428 71 443 253 371 490 92
490 71 305 58 428 127 28 110 92 443 189 229 278 475 371 395 560 41 253 164
395 278 560 490 28 164 71 229 58 41 428 305 127 253 475 371 92 189 110 443
164 475 92 253 229 189 127 560 71 58 41 443 428 395 371 490 110 278 28 305
443 253 428 110 278 71 475 127 58 41 371 229 305 189 395 164 28 490 92 560
560 28 58 71 229 41 490 475 189 443 253 395 305 164 371 278 428 92 110 127
395 443 371 58 253 305 92 127 475 110 428 229 189 41 164 278 71 560 28 490
278 189 71 127 443 110 28 428 305 560 371 58 229 253 395 164 41 490 475 92
28 395 92 443 560 278 371 71 58 305 475 253 428 490 229 189 164 110 41 127
443 71 428 229 127 278 490 58 475 253 164 110 92 189 395 560 305 41 28 371

Insertion Array Sorting Method

I am currently a student working on an insertion sort method.
Below is the code:
//Insertion Sorting of an Integer array
void InsertionSort(int insertVals[]){
//Systematic processing of the Array
for(int i = 0; i < INITSIZE - 1; i++){
//Value to check
int temp = insertVals[i];
//Index placeholder for the insterion sort
int k;
//Shifts the int array
for(k = i; k > 0 && insertVals[k-1] > temp; k--){
insertVals[k] = insertVals[k-1];
}
//Inserts the checked value back into the array
insertVals[k] = temp;
}
}
In my tests, I have given it the array from left to right:
307 249 73 158 430 272 44 378 423 209
440 165 492 42 487 3 327 229 340 112
303 169 209 157 60 433 99 278 316 335
97 326 12 267 310 133 479 149 79 321
467 172 393 336 485 245 228 91 194 357
1 153 208 444 168 490 124 196 30 403
222 166 49 24 301 353 477 408 228 433
298 481 135 13 365 314 63 36 425 169
115 94 129 1 17 195 105 404 451 298
188 123 5 382 252 66 216 337 438 144
The method produces from left to right:
314 63 314 63 36 425 36 169 425 169
115 115 94 129 94 129 1 17 195 105
404 451 298 188 123 5 382 252 66 216
337 438 144 1 17 195 105 404 451 298
188 123 5 382 252 66 216 337 438 144
228 229 245 249 252 267 272 278 298 298
301 303 307 310 314 316 321 326 327 335
336 337 340 353 357 365 378 382 393 403
404 408 423 425 430 433 433 438 440 444
451 467 477 479 481 485 487 490 492 144
What am I incorrectly coding?
Thanks!
EDIT:
//In main...
Printing(insertionSortValues, "Insertion Sorted Array");
//Function for Print
void Printing(int vals[], string s){
cout << s << ":" << endl;
for(int i = 0; i < INITSIZE; i++){
if(i % 10 == 0){
cout << endl;
}
cout << setw(3) << vals[i] << " ";
}
cout << endl;
}
The solution to this problem was solved through #PaulMcKenzie.
The line:
for(int i = 0; i < INITSIZE - 1; i++){
needed to become:
for(int i = 0; i <= INITSIZE - 1; i++){
Below is the corrected function.
//Insertion Sorting of an Integer array
void InsertionSort(int insertVals[]){
//Systematic processing of the Array
for(int i = 0; i <= INITSIZE - 1; i++){
//Value to check
int temp = insertVals[i];
//Index placeholder for the insterion sort
int k;
//Shifts the int array
for(k = i; k > 0 && insertVals[k-1] > temp; k--){
insertVals[k] = insertVals[k-1];
}
//Inserts the checked value back into the array
insertVals[k] = temp;
}
}