Dynamic stack buffer overflow error (Leetcode) - c++
I'm getting a runtime error while submitting the code on Leetcode.
Code:
class Solution {
public:
void gameOfLife(vector<vector<int>>& board) {
int m = board.size();
int n = board[0].size();
int mat[m][n], copy[m][n];
int i = 0, j = 0;
vector<vector<int>>::iterator row;
vector<int>::iterator col;
for (row = board.begin(); row != board.end(); row++) {
for (col = row->begin(); col != row->end(); col++) {
mat[i][j] = *col;
copy[i][j] = *col;
j++;
}
i++;
j = 0;
}
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++) {
int c = 0;
if (i == 0 && j == 0) {
if (mat[1][0] == 1) c++;
if (mat[0][1] == 1) c++;
if (mat[1][1] == 1) c++;
} else if (i == 0 && j == n - 1) {
if (mat[0][n - 2] == 1) c++;
if (mat[1][n - 1] == 1) c++;
if (mat[1][n - 2] == 1) c++;
} else if (i == 0 && j != 0 && j != n - 1) {
if (mat[0][j - 1] == 1) c++;
if (mat[0][j + 1] == 1) c++;
if (mat[1][j - 1] == 1) c++;
if (mat[1][j] == 1) c++;
if (mat[1][j + 1] == 1) c++;
} else if (j == 0 && i != 0 && i != m - 1) {
if (mat[i - 1][0] == 1) c++;
if (mat[i + 1][0] == 1) c++;
if (mat[i - 1][j + 1] == 1) c++;
if (mat[i][j + 1] == 1) c++;
if (mat[i + 1][j + 1] == 1) c++;
} else if (j == n - 1 && i != 0 && i != m - 1) {
if (mat[i - 1][j] == 1) c++;
if (mat[i + 1][j] == 1) c++;
if (mat[i - 1][j - 1] == 1) c++;
if (mat[i][j - 1] == 1) c++;
if (mat[i + 1][j - 1] == 1) c++;
} else if (i == m - 1 && j == 0) {
if (mat[i][j + 1] == 1) c++;
if (mat[i - 1][j] == 1) c++;
if (mat[i - 1][j + 1] == 1) c++;
} else if (i == m - 1 && j == n - 1) {
if (mat[i][j - 1] == 1) c++;
if (mat[i - 1][j] == 1) c++;
if (mat[i - 1][j - 1] == 1) c++;
} else if (i == m - 1 && j != 0 && j != n - 1) {
if (mat[i][j - 1] == 1) c++;
if (mat[i][j + 1] == 1) c++;
if (mat[i - 1][j - 1] == 1) c++;
if (mat[i - 1][j] == 1) c++;
if (mat[i - 1][j + 1] == 1) c++;
} else {
if (mat[i][j - 1] == 1) c++;
if (mat[i][j + 1] == 1) c++;
if (mat[i - 1][j - 1] == 1) c++;
if (mat[i - 1][j] == 1) c++;
if (mat[i - 1][j + 1] == 1) c++;
if (mat[i + 1][j - 1] == 1) c++;
if (mat[i + 1][j] == 1) c++;
if (mat[i + 1][j + 1] == 1) c++;
}
if (mat[i][j] == 0) {
if (c == 3) copy[i][j] = 1;
} else {
if (c != 2 && c != 3) copy[i][j] = 0;
}
}
}
i = 0, j = 0;
vector<vector<int>>::iterator r;
vector<int>::iterator cl;
for (r = board.begin(); r != board.end(); r++) {
for (cl = r->begin(); cl != r->end(); cl++) {
*cl = copy[i][j];
j++;
}
i++;
j = 0;
}
}
};
I don't have much clarity regarding the dynamic-stack-buffer-overflow error. I referenced the link: Error: dynamic-stack-buffer-overflow and got the idea it's due to an out-of-bound index reference. I tried dry running with the sample input but it didn't help much.
Error message:
=================================================================
==34==ERROR: AddressSanitizer: dynamic-stack-buffer-overflow on address 0x7ffc2a0e3024 at pc 0x000000347dd6 bp 0x7ffc2a0e2f90 sp 0x7ffc2a0e2f88
READ of size 4 at 0x7ffc2a0e3024 thread T0
#2 0x7f85967fb0b2 (/lib/x86_64-linux-gnu/libc.so.6+0x270b2)
Address 0x7ffc2a0e3024 is located in stack of thread T0
Shadow bytes around the buggy address:
0x1000054145b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x1000054145c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x1000054145d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x1000054145e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x1000054145f0: 00 00 00 00 ca ca ca ca 04 cb cb cb cb cb cb cb
=>0x100005414600: ca ca ca ca[04]cb cb cb cb cb cb cb f1 f1 f1 f1
0x100005414610: 00 f2 f2 f2 f8 f3 f3 f3 00 00 00 00 00 00 00 00
0x100005414620: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x100005414630: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x100005414640: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x100005414650: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 01 f2 04 f2
Shadow byte legend (one shadow byte represents 8 application bytes):
Addressable: 00
Partially addressable: 01 02 03 04 05 06 07
Heap left redzone: fa
Freed heap region: fd
Stack left redzone: f1
Stack mid redzone: f2
Stack right redzone: f3
Stack after return: f5
Stack use after scope: f8
Global redzone: f9
Global init order: f6
Poisoned by user: f7
Container overflow: fc
Array cookie: ac
Intra object redzone: bb
ASan internal: fe
Left alloca redzone: ca
Right alloca redzone: cb
Shadow gap: cc
==34==ABORTING
Any suggestions as to which part of the code is throwing the error would be much appreciated.
You should really look include the input for which the error was thrown. When the grid only consists of 1 row/column or for example {{0}} then i == 0 && j == 0 is true, but mat[1] is out of bounds, possibly filled with garbage which then gets dereferenced. I recommend you clean up the bounds checking and neighbor counting so that it is not only more compact but much easier to look through as well. One example on how it can be rewritten is:
for(int a = -1; a <= 1; ++a)
for(int b = -1; b <= 1; ++b){
if (a == 0 && b == 0) continue;
if (i + a < 0 || i + a >= board.size() ||
j + b < 0 || j + b >= board[i].size()) continue;
c += board[i+a][j+b];
}
An other note I mentioned in the comment int mat[n][m] is not standard C++. Some compilers support it some don't. If you want dynamic arrays use new or you can use some C style alloca to still be standard compliant, but generally stl containers are your friend so you could also just use a std::vector<std::vector<int>>, like the problem does.
Note about alloca: As someone pointed out errors can be caused by placing arrays too large on the stack, so be careful. Also generally this is only useful to people who already know about it and know its limitations well. Mainly there as an example, use standard containers.
Related
C++ for loop that reverses the digits 10 - 99
I am trying to write code that will list and reverse all digit pairs from 01 - 99. 01-09 is easy enough since you just have to multiply it by 10. I also know at least one way to reverse a given digit, I just don't know how to properly use it with a for loop. This is what I have so far. for ( int num99 =1 ; num99 > 0 && num99 < 100; num99 = num99+1) { if (num99 <10){ temp99 = num99 * 10; cout << temp99 << endl; } else { while (num99 != 0) { remainder99 = num99 % 10; reverse99 = (reverse99 * 10) + remainder99; num99 = num99 / 10; } cout << reverse99 << endl; } } Thank you for any help.
You have to reinitialize the values of remainder99, reverse99, temp99, num99 after you have used them. You also have to prepend 0 to the result, when you reverse 10, 20, 30 etc... to get 01, 02, 03 etc... After making the necessary changes, your program will look like this. #include<iostream> using namespace std; int main(void) { int remainder99 = 0; int reverse99 = 0; int temp99 = 0; for ( int num99 =1 ; num99 > 0 && num99 < 100; num99 = num99+1) { if (num99 <10){ temp99 = num99 * 10; cout << temp99 << endl; } else { temp99 = num99; while (num99 != 0) { remainder99 = num99 % 10; reverse99 = (reverse99 * 10) + remainder99; num99 = num99 / 10; } if(reverse99 < 10) cout << 0; cout << reverse99 << endl; reverse99 = 0; remainder99 = 0; num99 = temp99; } } return 0; } Output is: 10 20 30 40 50 60 70 80 90 01 11 21 31 41 51 61 71 81 91 02 12 22 32 42 52 62 72 82 92 03 13 23 33 43 53 63 73 83 93 04 14 24 34 44 54 64 74 84 94 05 15 25 35 45 55 65 75 85 95 06 16 26 36 46 56 66 76 86 96 07 17 27 37 47 57 67 77 87 97 08 18 28 38 48 58 68 78 88 98 09 19 29 39 49 59 69 79 89 99
What you are trying to do is trying to operate over the loop variable, which causes unexpected results. Instead, separate your loop variable and your num99 variable like this: for (int i = 1; i < 100; i++) { int num99 = i; if (num99 < 10) { temp99 = num99 * 10; cout << temp99 << endl; } else { remainder99 = num99 % 10; num99 = num99 / 10; if (remainder99 == 0) { cout<<"0"<<num99<<endl; } else { reverse99 = remainder99 * 10 + num99; cout << reverse99 << endl; } } } I have also taken the liberty of assuming that when you reverse the two digit numbers like 10, 20, etc, you want the output to be 01, 02, like that, if that is not what is expected by you, you can remove the if condition.
There are lot of issues with mentioned code like Here reverse99 * 10 what is initial value of reverse99 ? Assign it as 0. Here num99 = num99 / 10; you are modifying original num99 which outer loop variable, instead of this assign num99 to some temporary variable & do operation with that temporary variable. Sample Code int main(void) { int temp99 = 0,remainder99 = 0, reverse99 = 0/* must initialize with 0 */ ; for ( int num99 =1 ; num99 > 0 && num99 < 100; num99 += 1) { if (num99 <10){ temp99 = num99 * 10; std::cout << num99 << std::endl; } else { reverse99 = 0;/*make it as 0 again for every iteration */ temp99 = num99; /* assign loop variable to temp99 & do the operation with that */ while (temp99 != 0) { remainder99 = temp99 % 10; reverse99 = (reverse99 * 10) + remainder99; temp99 /= 10; } std::cout << reverse99 << std::endl; } } return 0; }
A C++11 oriented solution: #include <vector> #include <iostream> #include <numeric> #include <algorithm> int main() { std::vector<int> in(99); std::iota(in.begin(), in.end(), 1); std::for_each(in.begin(), in.end(), [](int& i) { i = (i % 10)*10 + (i / 10); } ); for (const auto& val : in) { std::string out = std::to_string(in); if (out.size() == 1) { out = '0' + out; } std::cout << out << std::endl; } return 0; }
AES 256-cbc encryption C++ using OpenSSL
I am trying to create a function I can put a string key (I have another algorithm to generate the key) into and a message string. The function should encrypt and decrypt the text using the aes256-cbc from OpenSSL library #define AES_KEYLENGTH 256 string cipher_AES(string key, string message); int main(int argc, char* argv[]) { cipher_AES("115792089237316195423570985008687907853269984665640564039457583884239776304164", "Hello, how are you, you mad?"); return 0; } // a simple hex-print routine. could be modified to print 16 bytes-per-line static void hex_print(const void* pv, size_t len) { const unsigned char * p = (const unsigned char*)pv; if (NULL == pv) printf("NULL"); else { size_t i = 0; for (; i<len;++i) printf("%02X ", *p++); } printf("\n"); } /* computes the ciphertext from plaintext and key using AES256-CBC algorithm */ string cipher_AES(string key, string message) { size_t inputslength = message.length(); unsigned char aes_input[inputslength]; unsigned char aes_key[AES_KEYLENGTH]; memset(aes_input, 0, inputslength/8); memset(aes_key, 0, AES_KEYLENGTH/8); strcpy((char*) aes_input, message.c_str()); strcpy((char*) aes_key, key.c_str()); /* init vector */ unsigned char iv[AES_BLOCK_SIZE]; memset(iv, 0x00, AES_BLOCK_SIZE); // buffers for encryption and decryption const size_t encslength = ((inputslength + AES_BLOCK_SIZE) / AES_BLOCK_SIZE) * AES_BLOCK_SIZE; unsigned char enc_out[encslength]; unsigned char dec_out[inputslength]; memset(enc_out, 0, sizeof(enc_out)); memset(dec_out, 0, sizeof(dec_out)); AES_KEY enc_key, dec_key; AES_set_encrypt_key(aes_key, AES_KEYLENGTH, &enc_key); AES_cbc_encrypt(aes_input, enc_out, inputslength, &enc_key, iv, AES_ENCRYPT); AES_set_decrypt_key(aes_key, AES_KEYLENGTH, &dec_key); AES_cbc_encrypt(enc_out, dec_out, encslength, &dec_key, iv, AES_DECRYPT); printf("original:\t"); hex_print(aes_input, sizeof(aes_input)); printf("encrypt:\t"); hex_print(enc_out, sizeof(enc_out)); printf("decrypt:\t"); hex_print(dec_out, sizeof(dec_out)); stringstream ss; for(int i = 0; i < encslength; i++) { ss << enc_out[i]; } return ss.str();); } In the output it seems that something is the same but not all of it: original: 48 65 6C 6C 6F 2C 20 68 6F 77 20 61 72 65 20 79 6F 75 2C 20 79 6F 75 20 69 64 69 6F 74 3F encrypt: 25 C3 B4 4B 92 68 2E DA 61 B6 AB 19 97 D3 90 8A 5F 8B 3C 4B 78 13 FC E1 3A AF 2C B5 3F C8 2B D7 decrypt: 17 EE 50 27 17 3F DC 89 55 D8 0C D4 4D AD 0B AE 6F 75 2C 20 79 6F 75 20 69 64 69 6F 74 3F
Looking at your data, the first block (16 bytes) is wrong but following blocks are correct. This suggests that the wrong IV is being used when decrypting. A little testing (printing the IV before and after the first call to AES_cbc_encrypt) shows that the IV does indeed change during this call. A little poking around the OpenSSL source shows that it changes the IV parameter to be the last block of the ciphertext when encrypting. Resetting the IV before the decryption fixes it though, and you get the correct plaintext regenerated: memset(iv, 0x00, AES_BLOCK_SIZE); AES_cbc_encrypt(enc_out, dec_out, encslength, &dec_key, iv, AES_DECRYPT);
Reading the source code of cbc128.c of openssl, there exists a loop calling the iv or the output of last block. When the loop ends, iv will be renewed. void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out, size_t len, const void *key, unsigned char ivec[16], block128_f block) { size_t n; const unsigned char *iv = ivec; if (len == 0) return; #if !defined(OPENSSL_SMALL_FOOTPRINT) if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out | (size_t)ivec) % sizeof(size_t) != 0) { while (len >= 16) { for (n = 0; n < 16; ++n) out[n] = in[n] ^ iv[n]; (*block) (out, out, key); iv = out; len -= 16; in += 16; out += 16; } } else { while (len >= 16) { for (n = 0; n < 16; n += sizeof(size_t)) *(size_t_aX *)(out + n) = *(size_t_aX *)(in + n) ^ *(size_t_aX *)(iv + n); (*block) (out, out, key); iv = out; len -= 16; in += 16; out += 16; } } #endif while (len) { for (n = 0; n < 16 && n < len; ++n) out[n] = in[n] ^ iv[n]; for (; n < 16; ++n) out[n] = iv[n]; (*block) (out, out, key); iv = out; if (len <= 16) break; len -= 16; in += 16; out += 16; } memcpy(ivec, iv, 16); }
Crafting TCP/IP Packets
I am studying socket programming with C/C++ and I think the best way is to dive into it. I can send data to the socket using socket.h send() hence would like to go deeper by crafting network packets. I tried but still couldn't figure out which part of my data is invalid, as I am getting Invalid argument errno 22. This is my IP header in hex: 45 00 28 00 d4 31 00 00 ff 06 3c 6e c0 a8 01 06 c0 a8 01 01 And this is my TCP header: 00 50 00 50 00 00 00 00 00 00 00 00 50 02 16 d0 15 1b 00 00 I appreciate any tips. NB: I am reading beej.us and here for my studies. Edit: This is my code: struct pseudo_header { u_int32_t source_address; u_int32_t dest_address; u_int8_t placeholder; u_int8_t protocol; u_int16_t tcp_length; }; int main(int argc, char* argv[]) { int sockfd = socket (PF_INET, SOCK_RAW, IPPROTO_TCP); if (sockfd == -1) { perror("Failed to create socket"); exit(1); } // Datagram to represent the packet char datagram[4096]; memset(datagram, 0, 4096); // zero out the packet buffer //Data part char *data = datagram + sizeof(struct ip) + sizeof(struct tcphdr); strcpy(data, ""); // some address resolution char source_ip[32]; strcpy(source_ip, "192.168.1.6"); struct sockaddr_in sai; sai.sin_family = AF_INET; sai.sin_port = htons(80); sai.sin_addr.s_addr = inet_addr("192.168.1.1"); cout << "sai.sin_addr.s_addr=" << sai.sin_addr.s_addr << endl; //Fill in the IP Header struct ip *iph = (struct ip *) datagram; iph->ip_hl = 5; iph->ip_v = 4; iph->ip_tos = 0; iph->ip_len = sizeof(struct ip) + sizeof(struct tcphdr) + strlen(data); iph->ip_id = htons(54321); iph->ip_off = 0; iph->ip_ttl = 255; iph->ip_p = IPPROTO_TCP; iph->ip_sum = 0; iph->ip_src.s_addr = inet_addr(source_ip); iph->ip_dst.s_addr = sai.sin_addr.s_addr; //Ip checksum unsigned short checksum = csum((unsigned short *) datagram, iph->ip_len); iph->ip_sum = checksum; cout << "iph->ip_sum=" << checksum << endl; unsigned char *pIph = (unsigned char *) datagram; for (int i = 0; i < 20; i++) { cout << setfill('0') << setw(2) << hex << (int) pIph[i] << " "; if (i + 1 >= 4 && (i + 1) % 4 == 0) { cout << endl; } } //TCP Header struct tcphdr *tcph = (struct tcphdr *) (datagram + sizeof(struct ip)); struct pseudo_header psh; tcph->th_sport = htons(80); tcph->th_dport = htons(80); tcph->th_seq = 0; tcph->th_ack = 0; tcph->th_off = 5; tcph->th_flags = TH_SYN; tcph->th_win = htons(5840); /* maximum allowed window size */ tcph->th_sum = 0; tcph->th_urp = 0; //Now the TCP checksum psh.source_address = inet_addr(source_ip); psh.dest_address = sai.sin_addr.s_addr; psh.placeholder = 0; psh.protocol = IPPROTO_TCP; psh.tcp_length = htons(sizeof(struct tcphdr) + strlen(data)); int psize = sizeof(struct pseudo_header) + sizeof(struct tcphdr) + strlen(data); char *pseudogram = malloc(psize); memcpy(pseudogram, (char*) &psh, sizeof(struct pseudo_header)); memcpy(pseudogram + sizeof(struct pseudo_header), tcph, sizeof(struct tcphdr) + strlen(data)); checksum = csum((unsigned short*) pseudogram, psize); tcph->th_sum = checksum; cout << "tcph->th_sum=" << checksum << endl; unsigned char *pTcph = (unsigned char *) tcph; for (int i = 0; i < 20; i++) { cout << setfill('0') << setw(2) << hex << (int) pTcph[i] << " "; if (i + 1 >= 4 && (i + 1) % 4 == 0) { cout << endl; } } //IP_HDRINCL to tell the kernel that headers are included in the packet int one = 1; const int *val = &one; if (setsockopt(sockfd, IPPROTO_IP, IP_HDRINCL, val, sizeof(one)) < 0) { perror("Error setting IP_HDRINCL"); exit(0); } struct sockaddr *pSa = (struct sockaddr *) &sai; // Send the packet if (sendto(sockfd, datagram, iph->ip_len, 0, pSa, sizeof(sai)) < 0) { // failed here perror("sendto failed"); } else { //Data send successfully printf("Packet Send. Length : %d \n", iph->ip_len); } return 1; }
In your IPv4 header: 45 00[28 00] d4 31 00 00 ff 06 3c 6e c0 a8 01 06 c0 a8 01 01 Is your packet length 10240 (0x2800)? Are you sure it's not 40 (0x0028)? 45 00[00 28] d4 31 00 00 ff 06[64 46] // checksum updated c0 a8 01 06 c0 a8 01 01 EDIT: now that you posted your code… You should replace: iph->ip_len = sizeof(struct ip) + sizeof(struct tcphdr) + strlen(data); by: iph->ip_len = htons(sizeof(struct ip) + sizeof(struct tcphdr) + strlen(data));
Simplifying a boolean logical expression to make it faster
Consider the following two functions: template <typename Type, Type Mask, class = typename std::enable_if<std::is_unsigned<Type>::value>::type> inline bool function1(const Type n, const Type m) { const Type diff = m-n; const Type msk = Mask & diff; return (n <= m) && ((!msk && !diff) || (msk && msk <= diff)); } template <typename Type, Type Mask, class = typename std::enable_if<std::is_unsigned<Type>::value>::type> inline bool function2(const Type n, const Type m) { return (n <= m) && ((!(Mask & (m-n)) && !(m-n)) || ((Mask & (m-n)) && (Mask & (m-n)) <= (m-n))); } They do exactly the same thing, except that the first one is more readable due to the use of temporary values (function2 is function1 but where I replaced the temporaries by their original values). It happens that function2 is a little faster than function1 and due to the fact that I will call it billion times on supercomputers I would like to know whether there is a more simple boolean expression that will produce exactly the same result (Type will always be an unsigned integral type).
The expression can be optimized as follows: (!msk && !diff) can be rewritten to !diff, as the expression will be true if both are zero, and msk is zero if diff is zero. Also, isn't diff always >= msk? That is, because using & cannot increase the value of msk. (This holds if Type is an unsigned integer) I changed the order of !diff and msk as it seems plausible that msk is more often true than !diff. The final expression is: (n <= m) && (msk || !diff). Another equivalent expression (suggested by anatolyg) is: (n < m && (Mask && (m - n))) || (n == m)
A test might be flawed. First Test: #include <iostream> #include <chrono> template <unsigned Mask> inline bool function1(const unsigned n, const unsigned m) { const unsigned diff = m-n; const unsigned msk = Mask & diff; return (n <= m) && ((!msk && !diff) || (msk && msk <= diff)); } template <unsigned Mask> inline bool function2(const unsigned n, const unsigned m) { return (n <= m) && ((!(Mask & (m-n)) && !(m-n)) || ((Mask & (m-n)) && (Mask & (m-n)) <= (m-n))); } template <unsigned Mask> inline bool function3(const unsigned n, const unsigned m) { if(m < n) return false; else if(m == n) return true; else return Mask & (m-n); } template <unsigned Mask> inline bool function4(const unsigned n, const unsigned m) { return (n < m && (Mask & (m-n))) || (n == m); } volatile unsigned a = std::rand(); volatile unsigned b = std::rand(); volatile bool result; inline double duration( std::chrono::system_clock::time_point start, std::chrono::system_clock::time_point end) { return double((end - start).count()) / std::chrono::system_clock::period::den; } int main() { typedef bool (*Function)(const unsigned, const unsigned); const unsigned N = 4; std::chrono::system_clock::duration timing[N] = {}; Function fn[] = { function1<0x1234>, function2<0x1234>, function3<0x1234>, function4<0x1234>, }; for(unsigned i = 0; i < 10000; ++i) { for(unsigned j = 0; j < 100; ++j) { unsigned Loops = 100; for(unsigned f = 0; f < N; ++f) { auto start = std::chrono::system_clock::now(); for(unsigned loop = 0; loop < Loops; ++loop) { result = fn[f](a, b); } auto end = std::chrono::system_clock::now(); timing[f] += (end-start); } } } for(unsigned i = 0; i < 4; ++i) { std::cout << "Timing " << i+1 << ": " << double(timing[i].count()) / std::chrono::system_clock::period::den << "\n"; } } compiled with g++ -std=c++11 -O3 shows: Timing 1: 0.435909 Timing 2: 0.435438 Timing 3: 0.435435 Timing 4: 0.435523 Second Test: #include <iostream> #include <chrono> inline bool function1(const unsigned Mask, const unsigned n, const unsigned m) { const unsigned diff = m-n; const unsigned msk = Mask & diff; return (n <= m) && ((!msk && !diff) || (msk && msk <= diff)); } inline bool function2(const unsigned Mask, const unsigned n, const unsigned m) { return (n <= m) && ((!(Mask & (m-n)) && !(m-n)) || ((Mask & (m-n)) && (Mask & (m-n)) <= (m-n))); } inline bool function3(const unsigned Mask, const unsigned n, const unsigned m) { if(m < n) return false; else if(m == n) return true; else return Mask & (m-n); } inline bool function4(const unsigned Mask, const unsigned n, const unsigned m) { return (n < m && (Mask & (m-n))) || (n == m); } inline double duration( std::chrono::system_clock::time_point start, std::chrono::system_clock::time_point end) { return double((end - start).count()) / std::chrono::system_clock::period::den; } int main() { typedef bool (*Function)(const unsigned, const unsigned, const unsigned); const unsigned N = 4; std::chrono::system_clock::duration timing[N] = {}; Function fn[] = { function1, function2, function3, function4, }; const unsigned OuterLoops = 1000000; const unsigned InnerLoops = 100; const unsigned Samples = OuterLoops * InnerLoops; unsigned* M = new unsigned[Samples]; unsigned* A = new unsigned[Samples]; unsigned* B = new unsigned[Samples]; for(unsigned i = 0; i < Samples; ++i) { M[i] = std::rand(); A[i] = std::rand(); B[i] = std::rand(); } unsigned result[N]; for(unsigned i = 0; i < OuterLoops; ++i) { for(unsigned f = 0; f < N; ++f) { auto start = std::chrono::system_clock::now(); for(unsigned j = 0; j < InnerLoops; ++j) { unsigned index = i + j; unsigned mask = M[index]; unsigned a = A[index]; unsigned b = B[index]; result[f] = fn[f](mask, a, b); } auto end = std::chrono::system_clock::now(); timing[f] += (end-start); } for(unsigned f = 1; f < N; ++f) { if(result[0] != result[f]) { std::cerr << "Different Results\n"; exit(-1); } } } for(unsigned i = 0; i < 4; ++i) { std::cout << "Timing " << i+1 << ": " << double(timing[i].count()) / std::chrono::system_clock::period::den << "\n"; } } compiled with g++ -std=c++11 -O3 shows: Timing 1: 0.763875 Timing 2: 0.738105 Timing 3: 0.518714 Timing 4: 0.785299 Disassembly of the second functions (compiled without inline): 0000000000000000 <_Z9function1jjj>: 0: 31 c0 xor %eax,%eax 2: 39 f2 cmp %esi,%edx 4: 72 10 jb 16 <_Z9function1jjj+0x16> 6: 29 f2 sub %esi,%edx 8: 21 d7 and %edx,%edi a: 89 f8 mov %edi,%eax c: 09 d0 or %edx,%eax e: 74 18 je 28 <_Z9function1jjj+0x28> 10: 39 d7 cmp %edx,%edi 12: 76 0c jbe 20 <_Z9function1jjj+0x20> 14: 31 c0 xor %eax,%eax 16: f3 c3 repz retq 18: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1) 1f: 00 20: 85 ff test %edi,%edi 22: 74 f0 je 14 <_Z9function1jjj+0x14> 24: 0f 1f 40 00 nopl 0x0(%rax) 28: b8 01 00 00 00 mov $0x1,%eax 2d: c3 retq 2e: 66 90 xchg %ax,%ax 0000000000000030 <_Z9function2jjj>: 30: 31 c0 xor %eax,%eax 32: 39 d6 cmp %edx,%esi 34: 77 0c ja 42 <_Z9function2jjj+0x12> 36: 89 d1 mov %edx,%ecx 38: 29 f1 sub %esi,%ecx 3a: 21 cf and %ecx,%edi 3c: 75 0a jne 48 <_Z9function2jjj+0x18> 3e: 39 f2 cmp %esi,%edx 40: 74 0a je 4c <_Z9function2jjj+0x1c> 42: f3 c3 repz retq 44: 0f 1f 40 00 nopl 0x0(%rax) 48: 39 f9 cmp %edi,%ecx 4a: 72 f6 jb 42 <_Z9function2jjj+0x12> 4c: b8 01 00 00 00 mov $0x1,%eax 51: c3 retq 52: 66 66 66 66 66 2e 0f data32 data32 data32 data32 nopw %cs:0x0(%rax,%rax,1) 59: 1f 84 00 00 00 00 00 0000000000000060 <_Z9function3jjj>: 60: 31 c0 xor %eax,%eax 62: 39 f2 cmp %esi,%edx 64: 72 0f jb 75 <_Z9function3jjj+0x15> 66: 74 08 je 70 <_Z9function3jjj+0x10> 68: 29 f2 sub %esi,%edx 6a: 85 fa test %edi,%edx 6c: 0f 95 c0 setne %al 6f: c3 retq 70: b8 01 00 00 00 mov $0x1,%eax 75: f3 c3 repz retq 77: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 7e: 00 00 0000000000000080 <_Z9function4jjj>: 80: 39 d6 cmp %edx,%esi 82: 73 0d jae 91 <_Z9function4jjj+0x11> 84: 89 d1 mov %edx,%ecx 86: b8 01 00 00 00 mov $0x1,%eax 8b: 29 f1 sub %esi,%ecx 8d: 85 f9 test %edi,%ecx 8f: 75 05 jne 96 <_Z9function4jjj+0x16> 91: 39 d6 cmp %edx,%esi 93: 0f 94 c0 sete %al 96: f3 c3 repz retq Hardware: Intel® Core™ i3-2310M CPU # 2.10GHz × 4 7.7 GiB My conclusion: Analyze the algorithm properly (See #George answer) Express the optimized algorithm in simple code and leave fine tuning optimizations to the compiler. Write a proper test case (measurement), but the kind of measurement will impact the result. (Here: The first and second show different results) -
The difference between f1 and f2 is probably because the compiler fail to delay the evaluation of diff and msk in the case n>m in f1. Below a sample code to time your functions and results in microseconds on my computer under VS2013, also, as #George said, there is redundant evaluations so i added f1b and f3. f1 = 98201.7us f1b = 95574.1us f2 = 96613.1us f3 = 94809.9us And the code : #include <iostream> #include <vector> #include <random> #include <limits> #include <chrono> #include <algorithm> #define NOMINMAX #include <windows.h> struct HighResClock { typedef long long rep; typedef std::nano period; typedef std::chrono::duration<rep, period> duration; typedef std::chrono::time_point<HighResClock> time_point; static const bool is_steady = true; static time_point now( ); }; namespace { const long long g_Frequency = [] ( ) -> long long { LARGE_INTEGER frequency; QueryPerformanceFrequency( &frequency ); return frequency.QuadPart; }( ); } HighResClock::time_point HighResClock::now( ) { LARGE_INTEGER count; QueryPerformanceCounter( &count ); return time_point( duration( count.QuadPart * static_cast<rep>( period::den ) / g_Frequency ) ); } template <typename Type, Type Mask> inline bool function1( const Type n, const Type m ) { static_assert( std::is_unsigned<Type>::value, "Type must be unsigned" ); const Type diff = m - n; const Type msk = Mask & diff; return ( n <= m ) && ( ( !msk && !diff ) || ( msk && msk <= diff ) ); } template <typename Type, Type Mask> inline bool function1b( const Type n, const Type m ) { static_assert( std::is_unsigned<Type>::value, "Type must be unsigned" ); if ( n > m ) return false; const Type diff = m - n; const Type msk = Mask & diff; return ( ( !msk && !diff ) || ( msk && msk <= diff ) ); } template <typename Type, Type Mask> inline bool function2( const Type n, const Type m ) { static_assert( std::is_unsigned<Type>::value, "Type must be unsigned" ); return ( n <= m ) && ( ( !( Mask & ( m - n ) ) && !( m - n ) ) || ( ( Mask & ( m - n ) ) && ( Mask & ( m - n ) ) <= ( m - n ) ) ); } template <typename Type, Type Mask> inline bool function3( const Type n, const Type m ) { static_assert( std::is_unsigned<Type>::value, "Type must be unsigned" ); if ( n == m ) return true; if ( n>m ) return false; const Type diff = m - n; const Type msk = Mask & diff; return msk && msk <= diff; } std::vector<std::pair<size_t, size_t>> fill( size_t n ) { std::random_device rd; std::mt19937 gen( rd( ) ); std::uniform_int_distribution<size_t> dis( 0, std::numeric_limits<size_t>::max( ) ); auto rnd = [ &] { return dis( gen ); }; std::vector<std::pair<size_t, size_t>> result; result.reserve( n ); while ( n-- ) { result.push_back( { rnd( ), rnd( ) } ); } return result; } size_t ignoreOptim {}; template <typename F> std::chrono::microseconds foo( std::vector<std::pair<size_t, size_t>> const nms, F &&f ) { using clock = HighResClock; // Does VS2014 will fix the high_resolution_clock fallbacking to system_clock ??? auto t0 = clock::now( ); auto f1 = std::count_if( begin( nms ), end( nms ), std::forward<F&&>( f ) ); auto t1 = clock::now( ); ignoreOptim += f1; auto result = std::chrono::duration_cast<std::chrono::microseconds>( t1 - t0 ); return result; } template <typename F> void bar( std::vector<std::pair<size_t, size_t>> const nms, char const* name, F &&f ) { std::chrono::microseconds f1 {}; for ( int i {}; i != 100; ++i ) f1 += foo( nms, std::forward<F&&>( f ) ); std::cout << name << " = " << float( f1.count( ) ) / 10.f << "us" << std::endl; } int main( ) { auto nms = fill( 1 << 21 ); bar( nms, "f1", [] ( std::pair<size_t, size_t> nm ) { return function1<size_t, 0x0003000000000000ull>( nm.first, nm.second ); } ); bar( nms, "f1b", [] ( std::pair<size_t, size_t> nm ) { return function1b<size_t, 0x0003000000000000ull>( nm.first, nm.second ); } ); bar( nms, "f2", [] ( std::pair<size_t, size_t> nm ) { return function2<size_t, 0x0003000000000000ull>( nm.first, nm.second ); } ); bar( nms, "f3", [] ( std::pair<size_t, size_t> nm ) { return function3<size_t, 0x0003000000000000ull>( nm.first, nm.second ); } ); return 0; }
Adjacent products in a grid
I'm solving Problem 11 from Project Euler. I have figured out the algorithm and what I would need to do. The grid is saved in a file grid.txt and its contents are- 08 02 22 97 38 15 00 40 00 75 04 05 07 78 52 12 50 77 91 08 49 49 99 40 17 81 18 57 60 87 17 40 98 43 69 48 04 56 62 00 81 49 31 73 55 79 14 29 93 71 40 67 53 88 30 03 49 13 36 65 52 70 95 23 04 60 11 42 69 24 68 56 01 32 56 71 37 02 36 91 22 31 16 71 51 67 63 89 41 92 36 54 22 40 40 28 66 33 13 80 24 47 32 60 99 03 45 02 44 75 33 53 78 36 84 20 35 17 12 50 32 98 81 28 64 23 67 10 26 38 40 67 59 54 70 66 18 38 64 70 67 26 20 68 02 62 12 20 95 63 94 39 63 08 40 91 66 49 94 21 24 55 58 05 66 73 99 26 97 17 78 78 96 83 14 88 34 89 63 72 21 36 23 09 75 00 76 44 20 45 35 14 00 61 33 97 34 31 33 95 78 17 53 28 22 75 31 67 15 94 03 80 04 62 16 14 09 53 56 92 16 39 05 42 96 35 31 47 55 58 88 24 00 17 54 24 36 29 85 57 86 56 00 48 35 71 89 07 05 44 44 37 44 60 21 58 51 54 17 58 19 80 81 68 05 94 47 69 28 73 92 13 86 52 17 77 04 89 55 40 04 52 08 83 97 35 99 16 07 97 57 32 16 26 26 79 33 27 98 66 88 36 68 87 57 62 20 72 03 46 33 67 46 55 12 32 63 93 53 69 04 42 16 73 38 25 39 11 24 94 72 18 08 46 29 32 40 62 76 36 20 69 36 41 72 30 23 88 34 62 99 69 82 67 59 85 74 04 36 16 20 73 35 29 78 31 90 01 74 31 49 71 48 86 81 16 23 57 05 54 01 70 54 71 83 51 54 69 16 92 33 48 61 43 52 01 89 19 67 48 The question is- What is the greatest product of four adjacent numbers in any direction (up, down, left, right, or diagonally) in the 20x20 grid? I know the algorithm is working right because I've tried using cout to output the nums and they show up in the right sequence. It's giving me an incorrect answer though, what could be the fault? void problem11() { vector<vector<int>> grid; ifstream stream("grid.txt"); string line; char *tok; if (stream.is_open()) { while(stream.good()) { getline(stream, line); tok = strtok((char *)line.c_str(), " "); vector<int> row; while (tok != NULL) { int field; stringstream ss; ss << tok; ss >> field; row.push_back(field); tok = strtok(NULL, " "); } grid.push_back(row); } stream.close(); } unsigned long highest = 0; /// LEFT TO RIGHT for (int i=0; i < 20; i++) // i'th row { vector<int> row = grid.at(i); for (int c=0; c < 20-3; c++) // -3 to accomodate for last { unsigned long prod = row.at(c) * row.at(c+1) * row.at(c+2) * row.at(c+3); // four consecutive //cout << row.at(c) << " " << row.at(c+1) << " " << row.at(c+2) << " " << row.at(c+3) << endl; if (prod > highest) highest = prod; } } /// TOP TO DOWN /// This moves from left to right, then top to botom /// for (int i=0; i < 20-3; i++) // subtract last 3 { vector<int> row1, row2, row3, row4; row1 = grid.at(i); row2 = grid.at(i+1); row3 = grid.at(i+2); row4 = grid.at(i+3); for (int c=0; c < 20; c++) { unsigned long prod = row1.at(c) * row2.at(c) * row3.at(c) * row4.at(c); //cout << row1.at(c) << " " << row2.at(c) << " " << row3.at(c) << " " << row4.at(c) << endl; if (prod > highest) highest = prod; } } /// DOWN DIAGONAL /// This moves diagonally from left to right, top to bottom for (int i=0; i < 20-3; i++) // subtract last 3 { vector<int> row1, row2, row3, row4; row1 = grid.at(i); row2 = grid.at(i+1); row3 = grid.at(i+2); row4 = grid.at(i+3); for (int c=0; c < 20-3; c++) // omit last 3 { unsigned long prod = row1.at(c) * row2.at(c+1) * row3.at(c+2) * row4.at(c+3); //cout << row1.at(c) << " " << row2.at(c+1) << " " << row3.at(c+2) << " " << row4.at(c+3) << endl; if (prod > highest) highest = prod; } } /// UP DIAGONAL /// This moves diagonally from left to right, bottom to top for (int i=3; i < 20; i++) // start from 3, skipping first four { vector<int> row1, row2, row3, row4; row4 = grid.at(i); row3 = grid.at(i-1); row2 = grid.at(i-2); row1 = grid.at(i-3); for (int c=0; c < 20-3; c++) // omit last 3 { unsigned long prod = row4.at(c) * row3.at(c+1) * row3.at(c+2) * row4.at(c+3); //cout << row4.at(c) << " " << row3.at(c+1) << " " << row2.at(c+2) << " " << row1.at(c+3) << endl; if (prod > highest) highest = prod; } } cout << "Required: " << highest; }
At the risk of spoiling the fun of finding the answer yourself... Do print out the diagonals. Check visually whether they correspond to what you would expect. As a side-note: and don't create copies of your table rows, but access them likewise: vector[rowindex][column]. EDIT -- OK now I'm going to spoil it. In a matrix of NxN, how many diagonal paths do you have? How many paths do you traverse? (Cross check this by taking a 2x2 matrix, that has 3 diagonal paths in each direction) PS. If you take up programming seriously, when encountering a bug, validate your assumptions first.
Try the following input: 0 0 0 1 0 0 ... 0 0 1 0 0 0 ... 0 1 0 0 0 0 ... 1 0 0 0 0 0 ... 0 0 0 0 0 0 ... ..... and do again what xtofl recommends you. In general, if you want to reverse the logic of some operation, do it once and only once. (Or, in general, odd number of times.) Beware of replacing a<=b by b>=a, or left to right and top to bottom by right to left and bottom to top, or whatever similar.
I used Javascript to solve this problem. Please let me know if you have any doubts. <html> <head> </head> <body> <input type="button" value="Click Me" onClick="onPress()"></input> </body> <script> function onPress() { var arr=[ [8,2,22,97,38,15,0,40,0,75 4, 5, 7, 78, 52, 12, 50, 77,91, 8] [49,49,99, 40,17,81, 18, 57, 60, 87,17,40,98,43,69,48,4,56, 62,0], [81,49,31,73,55,79,14,29,93,71,40,67,53, 88, 30,3,49,13,36,65], [52,70,95,23,4,60,11,42,69,24,68,56,1,32,56,71, 37, 2, 36, 91], [22,31,16,71,51,67,63,89,41,92,36, 54,22,40,40,28,66, 33, 13,80], [24,47,32,60,99,3,45,2,44,75,33,53,78,36,84, 20, 35,17,12,50], [32,98,81,28,64,23,67,10,26,38,40, 67, 59, 54, 70, 66,18,38,64,70], [67,26,20,68,2,62,12,20,95,63,94,39,63,8,40, 91, 66, 49, 94, 21], [24,55,58,5,66,73,99,26,97,17,78,78,96,83,14, 88, 34, 89, 63, 72], [21,36,23,9,75,0,76,44,20,45,35,14,0,61,33,97,34,31,33,95], [78,17,53,28,22,75,31,67,15,94,3,80,4,62,16,14,9,53,56, 92], [16,39,5,42,96,35,31,47,55, 58, 88, 24, 0, 17, 54,24,36,29,85,57], [86,56,0,48,35,71,89,7,5,44,44,37,44,60,21,58,51, 54, 17, 58], [19,80,81,68,5,94,47,69,28,73,92,13,86,52,17,77,4,89, 55, 40], [4,52,8,83,97,35,99,16,7,97,57,32,16,26,26, 79, 33, 27, 98, 66], [88,36,68,87,57,62,20,72,3,46,33,67,46, 55, 12, 32, 63, 93, 53, 69], [4,42,16,73,38,25,39,11,24,94,72,18,8,46,29, 32, 40, 62, 76, 36], [20,69,36,41,72,30,23,88,34,62,99,69,82,67,59,85,74,4,36, 16], [20,73,35,29,78,31,90,1,74,31,49,71,48,86, 81, 16, 23, 57,5, 54], [1,70,54,71,83,51,54,69,16,92,33,48,61,43,52,1, 89, 19, 67, 48] ]; var i, j, product=0, arr2=[], max; /* Horizontal 4 digit multiplication*/ for(i=0; i<arr.length; i++) { for(j=0; j<17;j++) { product= arr[i][j]* arr[i][j+1] *arr[i][j+2] * arr[i][j+3] arr2.push(product); } } /* Vertical 4 digit multiplication*/ for(var j=0; j<arr.length; j++) { for(var i=0; i<17; i++) { product= arr[i][j] * arr[i+1][j] * arr[i+2][j] * arr[i+3][j]; arr2.push(product); } } /* left to right diagonal*/ for(var j=0 ; j<17; j++) { for(var i=0; i<17; i++) { product= arr[i][j]* arr[i+1][j+1]*arr[i+2][j+2]*arr[i+3][j+3]; arr2.push(product) } } /* right to left diagonal*/ for(var i=0; i<17; i++) { for(var j=19; j>=3; j--) { product= arr[i][j]*arr[i+1][j-1]*arr[i+2][j-2]*arr[i+3][j-3] arr2.push(product); } } max= Math.max.apply(Math, arr2); console.log(max) } </script> </html>
This is the code written by me. Gives the correct answer. I hope this helps.... #include <iostream> #include <vector> using namespace std; void main() { int num_container[20][20] = { { 8,02,22,97,38,15,00,40,00,75,04,05,07,78,52,12,50,77,91, 8}, {49,49,99,40,17,81,18,57,60,87,17,40,98,43,69,48,04,56,62,00}, {81,49,31,73,55,79,14,29,93,71,40,67,53,88,30,03,49,13,36,65}, {52,70,95,23,04,60,11,42,69,24,68,56,01,32,56,71,37,02,36,91}, {22,31,16,71,51,67,63,89,41,92,36,54,22,40,40,28,66,33,13,80}, {24,47,32,60,99,03,45,02,44,75,33,53,78,36,84,20,35,17,12,50}, {32,98,81,28,64,23,67,10,26,38,40,67,59,54,70,66,18,38,64,70}, {67,26,20,68,02,62,12,20,95,63,94,39,63, 8,40,91,66,49,94,21}, {24,55,58,05,66,73,99,26,97,17,78,78,96,83,14,88,34,89,63,72}, {21,36,23, 9,75,00,76,44,20,45,35,14,00,61,33,97,34,31,33,95}, {78,17,53,28,22,75,31,67,15,94,03,80,04,62,16,14, 9,53,56,92}, {16,39,05,42,96,35,31,47,55,58,88,24,00,17,54,24,36,29,85,57}, {86,56,00,48,35,71,89,07,05,44,44,37,44,60,21,58,51,54,17,58}, {19,80,81,68,05,94,47,69,28,73,92,13,86,52,17,77,04,89,55,40}, {04,52, 8,83,97,35,99,16,07,97,57,32,16,26,26,79,33,27,98,66}, {88,36,68,87,57,62,20,72,03,46,33,67,46,55,12,32,63,93,53,69}, {04,42,16,73,38,25,39,11,24,94,72,18, 8,46,29,32,40,62,76,36}, {20,69,36,41,72,30,23,88,34,62,99,69,82,67,59,85,74,04,36,16}, {20,73,35,29,78,31,90,01,74,31,49,71,48,86,81,16,23,57,05,54}, {01,70,54,71,83,51,54,69,16,92,33,48,61,43,52,01,89,19,67,48}, }; int test = num_container[6][8] * num_container[7][9] * num_container[8][10] * num_container[9][11]; cout<<test<<endl; system("pause"); int start = 0; int end = 3; long long mul_result = 1; vector<long long>final_results; /////////////////////UP/DOWN///////////////////// for(int k=0; k<20; k++) { for(int i=0; i<=16; i++) { for(int j=start; j<=end; j++) { mul_result = mul_result * num_container[k][j]; if (j == end) final_results.push_back(mul_result); } mul_result = 1; start++; end++; } start = 0; end = 3; for(int i=0; i<=16; i++) { for(int j=start; j<=end; j++) { mul_result = mul_result * num_container[j][k]; if (j == end) final_results.push_back(mul_result); } mul_result = 1; start++; end++; } start = 0; end = 3; } /////////////////////UP/DOWN Ends here////////////////////// ///////////////////Both Ways Diagonal Starts here////////////////////// int current_row = 0; for(int i=0; i<=16; i++) { for(int j=0; j<=16; j++) { current_row = i; for(int k=start; k<=end; k++) { mul_result = mul_result * num_container[current_row][k]; current_row++; if (k==end) final_results.push_back(mul_result); } mul_result = 1; start++; end++; } start = 0; end = 3; for(int j=0; j<=16; j++) { current_row = i+3; for(int k=start; k<=end; k++) { mul_result = mul_result * num_container[current_row][k]; current_row--; if (k==end) final_results.push_back(mul_result); } mul_result = 1; start++; end++; } start = 0; end = 3; } /////////////////////Both Ways diagonal ends here/////////////////// ////////////////////Compare Thning Starts here////////////////////// long long the_big_one = 0; for(int i=0; i<final_results.size(); i++) { if (final_results[i] > the_big_one) the_big_one = final_results[i]; } cout<<endl<<endl<<"The big one is: "<<the_big_one<<endl; system("pause"); }