Optimisation hint for array of random numbers - c++

To provide context, I'm working through Programming Praxis Bingo Challenge and wanted to see how fast I could make this code run.
static void fisher_yates(T& source) {
const size_t len = source.size();
for(size_t i = 1; i < len;++i) {
std::swap(source[i],source[rand() % (i+1)]);
}
}
std::array<int,25> generate_table() {
std::array<int,25> bingo_grid;
for(int i = 0 ; i < 25;++i) {
switch(i) {
case 0: case 1: case 2: case 3: case 4:
bingo_grid[i] = rand() % 15 + 1;
break;
case 5: case 6: case 7: case 8: case 9:
bingo_grid[i] = rand() % 15 + 16;
break;
case 10: case 11: case 12: case 13: case 14:
bingo_grid[i] = rand() % 15 + 31;
break;
case 15: case 16: case 17: case 18: case 19:
bingo_grid[i] = rand() % 15 + 46;
break;
case 20: case 21: case 22: case 23: case 24:
bingo_grid[i] = rand() % 15 + 61;
break;
}
}
bingo_grid[12] = 0;
return bingo_grid;
}
bool is_bingoed(const std::array<int,25>& grid) {
// Check columns
if(grid[0] == 0) {
if(grid[1] == 0 && grid[2] == 0 && grid[3] == 0 && grid[4] == 0)
return true;
if(grid[0] == 0 && grid[6] == 0 && grid[18] == 0 && grid[24] == 0)
return true;
if(grid[5] == 0 && grid[10] == 0 && grid[15] == 0 && grid[20] == 0)
return true;
}
if(grid[1] == 0) {
if(grid[6] == 0 && grid[11] == 0 && grid[16] == 0 && grid[21] == 0)
return true;
}
if(grid[2] == 0) {
if(grid[7] == 0 && grid[17] == 0 && grid[22] == 0)
return true;
}
if(grid[3] == 0) {
if(grid[8] == 0 && grid[13] == 0 && grid[18] == 0 && grid[23] == 0)
return true;
}
if(grid[4] == 0) {
if(grid[9] == 0 && grid[14] == 0 && grid[19] == 0 && grid[24] == 0)
return true;
if(grid[8] == 0 && grid[16] == 0 && grid[21] == 0)
return true;
}
if(grid[6] == 0) {
if(grid[6] == 0 && grid[7] == 0 && grid[8] == 0 && grid[9] == 0)
return true;
}
if(grid[12] == 0) {
if(grid[10] == 0 && grid[11] == 0 && grid[13] == 0 && grid[14] == 0)
return true;
}
if(grid[18] == 0) {
if(grid[15] == 0 && grid[16] == 0 && grid[17] == 0 && grid[19] == 0)
return true;
}
return false;
}
static bool mark_card(const int card,std::array<int,25>& bingo_grid) {
for(auto &i : bingo_grid)
if(card == i) {
i = 0;
return true;
}
return false;
}
int play_game() {
// Bingo is 5 columns, each column(n) is random permutation of 1-15*n
// Fisher-Yates to generate random permutations
// Create 500 playing cards
const int max = 500;
std::vector<std::array<int,25>> bingo_cards;
bingo_cards.reserve(max);
for(int i = 0; i<max;++i) {
bingo_cards.push_back(generate_table());
//display_bingo(bingo_cards[i]);
}
// Random shuffle 75 cards
auto iter = boost::counting_range(1,76);
std::vector<int> cards(std::begin(iter),std::end(iter));
fisher_yates(cards);
bool is_finished = false;
int counter = 0;
for(auto card : cards) {
for(auto& playing_card : bingo_cards) {
if(mark_card(card,playing_card)) {
//display_bingo(playing_card);
if(is_bingoed(playing_card))
return counter;
}
}
counter++;
}
return counter;
}
int bingo() {
srand(time(NULL));
int total = 0;
for(int i = 0 ; i < 10000;i++) {
total+=play_game();
}
boost::singleton_pool<boost::pool_allocator_tag, sizeof(int)>::release_memory();
return total / 10000;
}
The original version used a boost::multi_array to represent the grid. After profiling, I changed it to a std::array which got me a significant speed up. I then moved from using fisher_yates shuffle to generate bingo cards to using a random number generator.
Then finally I changed the is_bingoed test function to reduce the number of checks per call to speed up the game-over check.
All this has helped. Right now if I profile this code, the generate_table function takes up 72% of the time, mark_card() is 18%, and is_bingoed() about 6%. I'm looking for hints to see what can be done to improve the speed of either.
My first thought with is_bingoed() is to use the SSE intrinsics to do a compare with 0 (maybe use XOR?) but I don't have any ideas on the generate_table() or mark_car(). This is more of a self challenge for fun but wondered what others thought?
Current timing is it takes 4.6s on a 2Ghz Q6660 (down from 35s originally)

Just focussing on your most expensive function, generate_table, you can simplify this part of the code and make it less branchy, which may help:
for(int i = 0 ; i < 25;++i) {
switch(i) {
case 0: case 1: case 2: case 3: case 4:
bingo_grid[i] = rand() % 15 + 1;
break;
case 5: case 6: case 7: case 8: case 9:
bingo_grid[i] = rand() % 15 + 16;
break;
case 10: case 11: case 12: case 13: case 14:
bingo_grid[i] = rand() % 15 + 31;
break;
case 15: case 16: case 17: case 18: case 19:
bingo_grid[i] = rand() % 15 + 46;
break;
case 20: case 21: case 22: case 23: case 24:
bingo_grid[i] = rand() % 15 + 61;
break;
}
}
e.g.
for(int i = 0 ; i < 25;++i) {
int r = rand() % 15 + 1;
bingo_grid[i] = r + (i / 5) * 15;
}
Beyond that I'd look at a faster rand() and also see if you can get rid of the divide and and modulo.
On a separate note, your algorithm may be flawed in that there is nothing to prevent duplicate numbers in bingo_grid.

Changing the is_bingoed() method to use SSE instructions (using Agner Fog's library) and Paul R's generate_table() reduced the time to just 1.05s. And using Intel's fast_rand() function got it down to 0.38s.
So I thought I'd paste the code changes for others who might be interested.
static unsigned int g_seed;
//Used to seed the generator.
inline void fast_srand( int seed )
{
g_seed = seed;
}
//fastrand routine returns one integer, similar output value range as C lib.
inline int fastrand()
{
g_seed = (214013*g_seed+2531011);
return (g_seed>>16)&0x7FFF;
}
bool is_bingoed(const std::array<int,25>& grid) {
// Check columns
Vec8i vec(grid[0],grid[1],grid[2],grid[3],grid[4],0,0,0);
Vec4i vec2(grid[6],grid[18],grid[24],0);
Vec4i vec3(grid[5],grid[10],grid[15],20);
Vec8i vec4(grid[1],grid[6],grid[11],grid[16],grid[21],0,0,0);
Vec4i vec5(grid[2],grid[7],grid[17],grid[22]);
Vec8i vec6(grid[3],grid[8],grid[13],grid[18],grid[23],0,0,0);
Vec8i vec7(grid[4],grid[9],grid[14],grid[19],grid[24],0,0,0);
Vec4i vec8(grid[8],grid[16],grid[21],grid[4]);
Vec4i vec9(grid[6],grid[7],grid[8],grid[9]);
Vec8i vec10(grid[12],grid[10],grid[11],grid[13],grid[14],0,0,0);
Vec8i vec11(grid[18],grid[15],grid[16],grid[17],grid[19],0,0,0);
if(horizontal_and(vec) && horizontal_and(vec2) && horizontal_and(vec3) && horizontal_and(vec4) &&
horizontal_and(vec5) && horizontal_and(vec6) && horizontal_and(vec7) && horizontal_and(vec8)) {
return false;
}
if(horizontal_and(vec9) && horizontal_and(vec10) && horizontal_and(vec11)) {
return false;
}
return true;
}

Related

Stack smashing detected

#include <iostream>
using namespace std;
int main()
{
int tablica[9];
string inputromanum;
cout << "ROMAN: ";
cin >> inputromanum;
int maxindeks;
bool disablenextcomp = false;
int readysolution = 0;
maxindeks = inputromanum.length() - 1;{}{}
for (int i = 0; i <= maxindeks; i++)
{
if (inputromanum[i] == 'M' || inputromanum[i] == 'm')
{
tablica[i] = 1000;
}
if (inputromanum[i] == 'D' || inputromanum[i] == 'd')
{
tablica[i] = 500;
}
if (inputromanum[i] == 'C'|| inputromanum[i] == 'c')
{
tablica[i] = 100;
}
if (inputromanum[i] == 'L' || inputromanum[i] == 'l')
{
tablica[i] = 50;
}
if (inputromanum[i] == 'X' || inputromanum[i] == 'x')
{
tablica[i] = 10;
}
if (inputromanum[i] == 'V' || inputromanum[i] == 'v')
{
tablica[i] = 5;
}
if (inputromanum[i] == 'I' || inputromanum[i] == 'i')
{
tablica[i] = 1;
}
}
cout<<endl;
for(int i4 = 0; i4 <= maxindeks; i4++)
{
cout<<"tablica["<<i4<<"] = "<<tablica[i4]<<endl;
}
for (int i2 = 0; i2 <= maxindeks; i2++)
{
int i5 = i2 + 1;
if (i5 <= maxindeks)
{
//cout<<endl<<"tablica[i2 + 1] = "<<tablica[i2 + 1];
//cout<<endl<<"tablica[i2] = "<<tablica[i2];
//cout<<endl<<"tablica[i2 + 1] - tablica[i2] = "<<tablica[i2 + 1] - tablica[i2];
if (tablica[i2 + 1] - tablica[i2] > 0 && disablenextcomp == false)
{
//cout<<endl<<"readysolution + (tablica[i2 + 1] - tablica[i2]) = "<<readysolution + (tablica[i2 + 1] - tablica[i2])<<endl;
readysolution = readysolution + (tablica[i2 + 1] - tablica[i2]);
disablenextcomp = true;
}
else
{
if(disablenextcomp == false)
{
//cout<<endl<<"readysolution + tablica[i2] = "<<readysolution + tablica[i2]<<endl;
readysolution = readysolution + tablica[i2];
}
else
{
disablenextcomp = false;
}
}
}
else
{
if(disablenextcomp == false)
{
//cout<<endl<<endl<<"OSTATNI INDEKS";
//cout<<endl<<"tablica[i2] = "<<tablica[i2];
//cout<<endl<<"readysolution + tablica[i2] = "<<readysolution + tablica[i2];
readysolution = readysolution + tablica[i2];
}
}
i5++;
}
cout << endl << readysolution;
}
This is my program. made for decoding roman numerals into arabic ones. It works as intended in most cases, however, one of my colleagues found it to produce this error while inputting MMMCMXCVIII into the program:
*** stack smashing detected ***: terminated
It would refuse to work afterwards.
I wasn't able to find different numbers that would cause this error except MMMMMMMMMMM.
It seems to fail when the index of tablica array exceeds 10. I don't know why it does so, as i am a novice in c++. It should've outputted 3999 instead of the error appearing. The numbers it should process successfully should range from 1 to 5000.
Thanks to folks in the comments, I've found the cause.
The tablica[9] array is supposed to store 9 or less characters.
The length of the input (MMMCMXCVIII in this case) has more characters, therefore it makes the for loop responsible for storing values for each character to cause mentioned above error, as there are no remaining units to store the values in.
I've expanded the storage of tablica to 25 characters.
In modern C++ it is considered bad practice to use C-style arrays and index loops whenever you can avoid this. So, fo example you can rewrite first loop like this:
std::vector<int> tablica;
tablica.reserve(inputromanum.size()); // This line is not necessary, but it can help optimize memory allocations
for (char c : inputromanum)
{
if (c == 'M' || c == 'm')
{
tablica.push_back(1000);
}
if (c == 'D' || c == 'd')
{
tablica.push_back(500);
}
if (c == 'C'|| c == 'c')
{
tablica.push_back(100);
}
if (c == 'L' || c == 'l')
{
tablica.push_back(50);
}
if (c == 'X' || c == 'x')
{
tablica.push_back(10);
}
if (c == 'V' || c == 'v')
{
tablica.push_back(5);
}
if (c == 'I' || c == 'i')
{
tablica.push_back(1);
}
}
And you will avoid your issue completly. Something similar can be done with other loops too. This approach also has benefit of (somewhat) properly handling situations when input line has other symbols, which is not roman number. Try it on your version and you will see what I mean.
One more point. When you need to do something different depending of value of one variable, like you did with all those ifs. There is special statement in C/C++ for this: switch. So instead of those ifs you can do this:
std::vector<int> tablica;
tablica.reserve(inputromanum.size()); // This line is not necessary, but it can help optimize memory allocations
for (char c : inputromanum)
{
switch(c)
{
case 'M':
case 'm':
tablica.push_back(1000);
break;
case 'D':
case 'd':
tablica.push_back(500);
break;
case 'C':
case 'c':
tablica.push_back(100);
break;
case 'L':
case 'l':
tablica.push_back(50);
break;
case 'X':
case 'x':
tablica.push_back(10);
break;
case 'V':
case 'v':
tablica.push_back(5);
break;
case 'I':
case 'i':
tablica.push_back(1);
break;
}
}

How to Input 2 or 4 at empty places in a 3x3 grid?

void putTile(int &n1, int &n2, int &n3, int &n4, int &n5, int &n6, int &n7, int &n8, int &n9)
{
srand(time(0));
int i = (((rand()%10)) < 5 ? 2:4);
if (n1||n2||n3||n4||n5||n6||n7||n8||n9 == 0) {
switch((rand()%9) + 1) {
case 1:
n1 = i;
break;
case 2:
n2 = i;
break;
case 3:
n3 = i;
break;
case 4:
n4 = i;
break;
case 5:
n5 = i;
break;
case 6:
n6 = i;
break;
case 7:
n7 = i;
break;
case 8:
n8 = i;
break;
case 9:
n9 = i;
break;
}
}
}
I am trying to generate either 2 or 4 at empty places(places containing 0) randomly, but my algorithm inputs randomly generated numbers (2 or 4) at random places even at places having a number already, as I have used the if statement at the beginning of the code. Any suggestions on how to correct the code?
Let's see your code like the compiler does:
void putTile(int &n1, int &n2, int &n3, int &n4, int &n5, int &n6, int &n7, int &n8, int &n9)
{
srand(time(0));
int i = (((rand()%10)) < 5 ? 2:4);
// below compiler sees "number or number or number or (number is zero)",
// compiler does not see "if either of the numbers is zero",
// if asked to do a logical "OR" of two numbers, then it will do the logical
// "number is non-zero OR other number is non-zero",
// i.e. it sees this, probably extremely probable condition:
if ( (n1 != 0)
||(n2 != 0)
||(n3 != 0)
||(n4 != 0)
||(n5 != 0)
||(n6 != 0)
||(n7 != 0)
||(n8 != 0)
||(n9 == 0)
)
{
// program will almost always execute this
switch((rand()%9) + 1) // do a switch on a random number
{
case 1: // if it is 1
n1 = i; // overwrite n1 unconditionally with i, whether it is 0 or not
break;
case 2: // same for 2 ... etc.
n2 = i;
break;
case 3:
n3 = i;
break;
case 4:
n4 = i;
break;
case 5:
n5 = i;
break;
case 6:
n6 = i;
break;
case 7:
n7 = i;
break;
case 8:
n8 = i;
break;
case 9:
n9 = i;
break;
}
}
}
There's a few issues which the comments already point out. Firstly, your if statement doesn't work the way you expect. It is checking to see if n1 through n8 is not zero, or if n9 is zero.
Secondly, after the initial if statement, you just randomly change a position, but don't check if that value is non-zero.
A possible solution is:
void putTile( int &n1, int &n2, int &n3, int &n4, int &n5, int &n6, int &n7, int &n8, int &n9 ) {
int i = ( ( ( rand() % 10 ) ) < 5 ? 2 : 4 );
if ( n1 == 0 || n2 == 0 || n3 == 0 || n4 == 0 || n5 == 0 || n6 == 0 || n7 == 0 || n8 == 0 || n9 == 0 ) {
while ( true ) {
switch ( ( rand() % 9 ) + 1 ) {
case 1:
if ( n1 == 0 ) {
n1 = i;
return;
}
break;
case 2:
if ( n2 == 0 ) {
n2 = i;
return;
}
break;
case 3:
if ( n3 == 0 ) {
n3 = i;
return;
}
break;
case 4:
// ...
}
}
}
Note, this isn't very efficient, and has a lot of duplication. I would suggest using arrays.

Number of Mondays, Tuesdays, etc.. in a given month in C++

I have been able to retrieve the first day of any given month, and I also know how many days are in that month. How do i calculate the amount of mondays, tuesdays, etc.. left in the month? Below is my code is far.
//Set Days in Month
if(Time.Month == 2)
{
if((Time.Year % 400 == 0) || ((Time.Year % 4 == 0) && (Time.Year % 100 != 0)))
Weekend_Status.Month_Days[Time.Month] = 29;
else
Weekend_Status.Month_Days[Time.Month] = 28;
}
else if(Time.Month == 1 || Time.Month == 3 || Time.Month == 5 || Time.Month == 7 || Time.Month == 8 || Time.Month == 10 || Time.Month == 12)
{
Weekend_Status.Month_Days[Time.Month] = 31;
}
else
{
Weekend_Status.Month_Days[Time.Month] = 30;
}
//Set New Month Start Day
if(Time.Day == 1 && Time.Hour == 0 && Time.Minute == 0 && Time.Second == 1)
{
switch(Time.Day_of_Week)
{
case 0: //Sunday
Weekend_Status.Month_Start_Day = 0;
break;
case 1: //Monday
Weekend_Status.Month_Start_Day = 1;
break;
case 2: //Tuesday
Weekend_Status.Month_Start_Day = 2;
break;
case 3: //Wednesday
Weekend_Status.Month_Start_Day = 3;
break;
case 4: //Thursday
Weekend_Status.Month_Start_Day = 4;
break;
case 5: //Friday
Weekend_Status.Month_Start_Day = 5;
break;
case 6: //Saturday
Weekend_Status.Month_Start_Day = 6;
break;
}
}
You could do something like this, where you take advantage of the modulo operator, using it to count all days in the month where the day occurs. Note: Untested.
// example: count how many Wednesdays
int search_day = 3;
int search_day_count = 0;
int start_day = Weekend_Status.Month_Start_Day;
int end_day = Weekend_Status.Month_Days[Time.Month];
// loop through all days for the given month
for (int day = start_day; day <= start_day + end_day; day++)
{
// count all days that are 7 days apart starting from search_day = 3
if (day == search_day || (day - search_day) % 7 == 0)
{
search_day_count++;
}
}

How can I reduce the time complexity of the following block of code?

I am taking 1 to n digits and finding count of numbers that are divisible by a or b but not divisible by both.
I want to reduce time complexity of this block by some logical change.
cin >> n >> a >> b >> k;
for(int i = 1; i <= n; i++) {
if(i % a == 0 && i % b==0) {
count++;
} else if(i % b == 0 && i % a != 0) {
count++;
}
}
Calculate the count of numbers divisible by a, add it to the count of numbers divisible by b, subtract it with twice the count of numbers divisible by the lcm (lowest common multiple) of a,b.
Time complexity: O(log(min(a,b)))
Because to calculate Lowest common multiple you calculate gcd (Greatest common divisor) which can be calculated in O(log(min(a,b)))
Note: If you include bits/stdc++.h, you can use the inbuilt function to calculate gcd: __gcd(int , int )
int lcm(int a, int b) {
return (a * b)/__gcd(a,b);
}
cin>>n>>a>>b>>k;
int divisible_by_a = n / a;
int divisible_by_b = n / b;
int divisible_by_both = n / lcm(a,b);
ans = divisible_by_a + divisible_by_b - 2*divisible_by_both;
It seems to me that your code don't work as you describe: it counts for every number divisible by b. You should check if i is multiple of a or b
if (i % a == 0 && i % b != 0) {...
} else if (i % a != 0 && i % b == 0) {...
}
I also suggest to you a different approach: find multiples of a and b untill you reach n and count that numbers. remove same numers in lists from the sum (better if you do that before the final sum)
Before optimizing it, make sure it works first.
Right now, you're checking if a number is divisible by only b or by both a and b. To make it a or b but not both, you need to switch i % b==0 to i % b!=0 in the first condition:
for(int i = 1; i <= n; i++) {
if(i % a == 0 && i % b!=0) {
count++;
} else if(i % b == 0 && i % a != 0) {
count++;
}
}
One small thing you can do to speed things up is to do the divisibility check just once each and save the result instead of twice. Then you can use a single XOR for the final result.
for(int i = 1; i <= n; i++) {
int div_a = (i % a == 0);
int div_b = (i % b == 0);
if (a ^ b) {
count++;
}
}
Let's start with this:
temp = a;
while(temp < n) {
if(temp%b != 0) {
count++;
}
temp += a;
}
temp = b;
while(temp < n) {
if(temp%a != 0) {
count++;
}
temp += b;
}
Next, consider some cheats. If a%b == 0 then any number divisible by a will also be divisible by b; and similar for b%a == 0. In both cases the count must be zero.
If a == 0 then no number is divisible by a; and similar for b == 0; and if both a and b are zero then the count must be zero.
Finally; don't forget that (in C) the behavior of x%0 is undefined and you need to guard against that.
Combining all of the above you get something like:
if( (a == 0) && (b == 0) ) {
return 0;
}
if( (a != 0) && (b != 0) ) {
if( (a%b == 0) || (b%a == 0) ) {
return 0;
}
}
count = 0;
if(a != 0) {
temp = a;
while(temp < n) {
if(temp%b != 0) {
count++;
}
temp += a;
}
}
if(b != 0) {
temp = b;
while(temp < n) {
if(temp%a != 0) {
count++;
}
temp += b;
}
}
return count;
Next round of cheats:
If n <= 1 then the count must be zero.
If a == 1 or a == -1 then all numbers are divisible by a.
If b == 1 or b == -1 then all numbers are divisible by b.
To deal with these I'd move to "nested switch" to minimise the number of branches, like:
switch(a) {
case 0:
switch(b) {
case 0:
...
break;
case -1:
case 1:
...
break;
default:
...
break;
}
break;
case -1:
case 1:
switch(b) {
case 0:
...
break;
case -1:
case 1:
...
break;
default:
...
break;
}
break;
default:
switch(b) {
case 0:
...
break;
case -1:
case 1:
...
break;
default:
...
break;
}
break;
}

Turn if else into a for loop

I want to choose positions 1, 3, 7, 9.
I made an if else statement that solves the problem. Could this be implemented in a for loop.
int move1 = (rand() % 4) + 1;
if (move1 == 1)
{
move1 = 1;
}
else if (move1 == 2)
{
move1 = 3;
}
else if (move1 == 3)
{
move1 = 7;
}
else if (move1 == 4)
{
move1 = 9;
}
I don't know about a loop, but usually you want to implement these kind of things with a switch statement. Something like this:
int move1;
switch(rand() % 4)
{
case 1:
move1 = 1;
break;
case 2:
move1 = 3;
break;
case 3:
move1 = 7;
break;
case 4:
move1 = 9;
break;
}