I am trying to find average of 2x2 block pixels within a window of 6x6 of overall image size mxn. I can able to find the average of block till the end of first row and when the code has to move to next row, it throws runtime error "exception at memory location"
vector<int>m; vector<int>m1; vector<int>m2; vector<int>m3;vector<int>m4; vector<int>m5; vector<int>m6; vector<int>m7; vector<int>m8;
for (int i = 2; i < road.rows - 2 ; i++){
for (int j = 2; j < road.cols - 2 ; j++){
//center block
int avg=(round((road.at<uchar>(i, j) + road.at<uchar>(i, j + 1) + road.at<uchar>(i + 1, j) + road.at<uchar>(i + 1, j + 1)) / 4));
//top left block
int avg1= (round((road.at<uchar>(i - 2, j - 2) + road.at<uchar>(i - 2, j - 1) + road.at<uchar>(i - 1, j - 2) + road.at<uchar>(i - 1, j - 1)) / 4));
//top
int avg2 = (round((road.at<uchar>(i - 2, j) + road.at<uchar>(i - 2, j + 1) + road.at<uchar>(i - 1, j) + road.at<uchar>(i - 1, j + 1)) / 4));
//top right block
int avg3 = (round((road.at<uchar>(i - 2, j + 2) + road.at<uchar>(i - 2, j + 3) + road.at<uchar>(i - 1, j + 2) + road.at<uchar>(i - 1, j + 3)) / 4));
//left block
int avg4 = (round((road.at<uchar>(i, j - 2) + road.at<uchar>(i, j - 1) + road.at<uchar>(i + 1, j - 2) + road.at<uchar>(i + 1, j - 1)) / 4));
//right block
int avg5 = (round((road.at<uchar>(i, j + 2) + road.at<uchar>(i, j + 3) + road.at<uchar>(i + 1, j + 2) + road.at<uchar>(i + 1, j + 3)) / 4));
//bottom left block
int avg6 = (round((road.at<uchar>(i + 2, j - 2) + road.at<uchar>(i + 2, j - 1) + road.at<uchar>(i + 3, j - 2) + road.at<uchar>(i + 3, j - 1)) / 4));
//bottom
int avg7 = (round((road.at<uchar>(i + 2, j) + road.at<uchar>(i + 2, j + 1) + road.at<uchar>(i + 3, j) + road.at<uchar>(i + 3, j + 1)) / 4));
//bottom right block
int avg8 = (round((road.at<uchar>(i + 2, j + 2) + road.at<uchar>(i + 2, j + 3) + road.at<uchar>(i + 3, j + 2) + road.at<uchar>(i + 3, j + 3)) / 4));
m.push_back(avg);
m1.push_back(avg1);
m2.push_back(avg2);
m3.push_back(avg3);
m4.push_back(avg4);
m5.push_back(avg5);
m6.push_back(avg6);
m7.push_back(avg7);
m8.push_back(avg8);
}
}
Help me out of this error;
Related
I'm coding a recursive 3-way partitioning in C++, but I don't see what's wrong here. (It is 97% of the time correct, sometimes incorrect. That's making me hard to debug.)
Expected invariant: After the partitioning, A[p, ..., p + less1 + less2 - 1] is less than pivot, A[p + less1 + less2, ..., p + less1 + less2 + same1 + same2 - 1] is the same with pivot, the rest is bigger than pivot.
Wrong result: "The middle" part (the part with values the same with pivot) doesn't locate correctly.
Reproducable test case:
A = {0, 1, 4, 7, 6, 9, 3, 10, 11, 12, 2, 8, 5}
p = 0, r = 12, pivot = 11
Expected output of less : 11
Current output : 10
Code:
template <typename T>
void PPartition(std::vector<T>& A, size_t p, size_t r, const T& pivot, std::size_t& less, std::size_t& same) {
if (p == r) {
less = A[p] < pivot;
same = A[p] == pivot;
return;
}
std::size_t m = (p + r) / 2;
std::size_t less1 = 0, less2 = 0, same1 = 0, same2 = 0;
PPartition(A, p, m, pivot, less1, same1);
PPartition(A, m + 1, r, pivot, less2, same2);
if (less2 > m - p + 1 - less1) {
// sends "less" part of the right to the left
for (std::size_t k = 0; k < m - p + 1 - less1; k++) {
std::swap(A[p + less1 + k], A[m + less2 - k]);
}
// maintains "equal" part
for (std::size_t k = 0; k < same1 + same2; k++) {
std::swap(A[p + less1 + less2 + k], A[m + less2 + same2 - k]);
}
} else {
// sends "less" part of the right to the left
for (std::size_t k = 0; k < less2; k++) {
std::swap(A[p + less1 + k], A[m + less2 - k]);
}
// maintains "equal" part
for (std::size_t k = 0; k < same1 + same2; k++) {
std::swap(A[p + less1 + less2 + k], A[m + less2 + same2 - k]);
}
}
less = less1 + less2;
same = same1 + same2;
}
Self-answer:
If less2 == 0, then We shouldn't swap "middle" partition of the left part with the right part.
template <typename T>
void PPartition(std::vector<T>& A, size_t p, size_t r, const T& pivot, std::size_t& less, std::size_t& same) {
if (p == r) {
less = A[p] < pivot;
same = A[p] == pivot;
return;
}
std::size_t m = (p + r) / 2;
std::size_t less1 = 0, less2 = 0, same1 = 0, same2 = 0;
PPartition(A, p, m, pivot, less1, same1);
PPartition(A, m + 1, r, pivot, less2, same2);
if (less2 > m - p + 1 - less1) {
for (std::size_t k = 0; k < m - p + 1 - less1; k++) {
std::swap(A[p + less1 + k], A[m + less2 - k]);
}
for (std::size_t k = 0; k < same1 + same2 && p + less1 + k < m + same2 - k; k++) {
std::swap(A[p + less1 + less2 + k], A[m + less2 + same2 - k]);
}
} else {
for (std::size_t k = 0; k < less2; k++) {
std::swap(A[p + less1 + k], A[m + less2 - k]);
}
if (less2 > 0) {
for (std::size_t k = 0; k < same1 + same2; k++) {
std::swap(A[p + less1 + less2 + k], A[m + less2 + same2 - k]);
}
} else {
for (std::size_t k = 0; k < same2; k++) {
std::swap(A[p + less1 + same1 + k], A[m + same2 - k]);
}
}
}
less = less1 + less2;
same = same1 + same2;
}
I am trying to implement a demosaicing algorithm (interpolation) for a raw image with the Bayer pattern GRBG. The program logic was to use the neighboring pixels to assign the values to R,G and B channels(I have attached the code). I am having a problem for this logic at the border pixels. For example let i be the pixel at (0,0), I need the value of i-1 which is not present in the image. My question is there a possibility to work around this like masking i-1 and the others as 0 without adding an new border of zeros to my existing image.
Any suggestions will be helpful.
thanks.
int rows = 256;
int cols = 512;
Mat raw_img(rows, cols, CV_8U); //////////////////////
Mat image(rows, cols, CV_8UC3); // BAYER PATTERN //
cvtColor(image, image, COLOR_BGR2RGB); // G R //
for (int i = 0; i < raw_img.rows; i++) { // B G //
for (int j = 0; j < raw_img.cols; j++) { //////////////////////
if ((i % 2 == 0) && (j % 2 == 0))//top green
{
image.at<Vec3b>(i, j)[0] = (raw_img.at<uchar>(i - 1, j) +
raw_img.at<uchar>(i + 1, j)) / 2; //red
image.at<Vec3b>(i, j)[1] = (raw_img.at<uchar>(i, j) * 2); //blue
image.at<Vec3b>(i, j)[2] = (raw_img.at<uchar>(i, j - 1) +
raw_img.at<uchar>(i, j + 1)) / 2; //green
}
else if ((i % 2 == 0) && (j % 2 == 1))//red
{
image.at<Vec3b>(i, j)[0] = (raw_img.at<uchar>(i, j)); //red
image.at<Vec3b>(i, j)[1] = (raw_img.at<uchar>(i - 1, j) +
raw_img.at<uchar>(i + 1, j) +
raw_img.at<uchar>(i, j - 1) +
raw_img.at<uchar>(i, j + 1)) / 2;//green
image.at<Vec3b>(i, j)[2] = (raw_img.at<uchar>(i + 1, j - 1) +
raw_img.at<uchar>(i - 1, j + 1) +
raw_img.at<uchar>(i + 1, j + 1) +
raw_img.at<uchar>(i - 1, j - 1)) / 4;//blue
}
else if ((i % 2 == 1) && (j % 2 == 0))//blue
{
image.at<Vec3b>(i, j)[0] = (raw_img.at<uchar>(i + 1, j - 1) +
raw_img.at<uchar>(i - 1, j + 1) +
raw_img.at<uchar>(i + 1, j + 1) +
raw_img.at<uchar>(i - 1, j - 1)) / 4;//red
image.at<Vec3b>(i, j)[1] = (raw_img.at<uchar>(i + 1, j) +
raw_img.at<uchar>(i, j + 1) +
raw_img.at<uchar>(i - 1, j) +
raw_img.at<uchar>(i, j + 1)) / 2;//green
image.at<Vec3b>(i, j)[0] = (raw_img.at<uchar>(i, j));//blue
}
else // bottom green
{
image.at<Vec3b>(i, j)[0] = (raw_img.at<uchar>(i, j - 1) +
raw_img.at<uchar>(i, j + 1)) / 2;//red
image.at<Vec3b>(i, j)[1] = (raw_img.at<uchar>(i, j) * 2);//blue
image.at<Vec3b>(i, j)[2] = (raw_img.at<uchar>(i - 1, j) +
raw_img.at<uchar>(i + 1, j)) / 2;//green
}
}
}
You could do something like:
image.at<Vec3b>(i, j)[0] = (raw_img.at<uchar>(max(0,i - 1), j)
+ raw_img.at<uchar>(min(i + 1,raw_img.rows-1), j)) / 2; //red
For all you i +/- 1 , j +/-1: this way you "replicate" border values by simply sticking to the last value value in the X or Y dimension
As a side note, openCV includes different demosaic algorithm that will be hard to beat (for both quality and execution speed)
The above stated answer works. But to prevent the hassle of using min,max with each pixel. It can be done as shown below with an Opencv function:
int main(int argc, char** argv)
{
Mat img_rev = imread("C:/Users/20181217/Desktop/images/imgs/den_check.png");
//number of additional rows and columns
int top, left, right, bottom;
top = 1;
left = 1;
right = 1;
bottom = 1;
//define new image with additional borders
Mat img_clamp(img_rev.rows + 2, img_rev.cols + 2, CV_8UC3);
//if you want to pad the image with zero's
copyMakeBorder(img_rev, img_clamp, top, left, right, bottom, BORDER_CONSTANT);
//if you want to replicate the border of the image
copyMakeBorder(img_rev, img_clamp, top, left, right, bottom, BORDER_REPLICATE);
//Now you can access the image without having to worry about the borders as shown below
for(int i=1;i<img_clamp.rows-1;i++)
{
for(int j=1;i<img_clamp.cols-1;i++)
{
...
}
}
waitKey(100000);
return 0;
}
More operations can be found here:
https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html?highlight=copymakeborder#copymakeborder
I'm kinda new to c++ and I have an assignment for my course to read a P6 ppm image and use some filters on it.Im stuck on the blurring one and I cannot find my logical error on my code.
My problem is that the output image is blurred but it is like there are 4 blurred same images there
EDIT- HERE IS THE LINK TO OUTPUT IMAGE(TESTFILTER.ppm) THE INPUT IMAGE (Original) AND A SIMPLE VIEWER TO VIEW THEM MY PROFFESSOR GAVE US (CPIViewer)
http://www.filedropper.com/blurfilterquestion
Here is the blur filter code.
typedef Vec3<float> buffer;
static void blur(Image* pic) { //BLUR FILTER
int h = pic->getHeight();
int w = pic->getWidth();
int count = 0;
buffer* temp = new buffer[h*w];
for (int x = 0; x < w; x++){
for (int y = 0; y < h; y++){
if (x == 0 && y == 0) {
count++;
temp[y*w + x] = (pic->getPixel(x, y) + pic->getPixel(x + 1, y) + pic->getPixel(x, y + 1) + pic->getPixel(x + 1, y + 1)) / 4;
}
else if (x == 0 && y == h - 1) {
count++;
temp[y*w + x] = (pic->getPixel(x, y) + pic->getPixel(x + 1, y) + pic->getPixel(x, y - 1) + pic->getPixel(x + 1, y - 1)) / 4;
}
else if (x == w - 1 && y == 0){
count++;
temp[y*w + x] = (pic->getPixel(x, y) + pic->getPixel(x - 1, y) + pic->getPixel(x, y + 1) + pic->getPixel(x - 1, y + 1)) / 4;
}
else if (x == w - 1 && y == h - 1){
count++;
temp[y*w + x] = (pic->getPixel(x, y) + pic->getPixel(x - 1, y) + pic->getPixel(x, y - 1) + pic->getPixel(x - 1, y - 1)) / 4;
}
else if (x == 0){
count++;
temp[y*w + x] = ((pic->getPixel(x, y)) + (pic->getPixel(x, y - 1)) + (pic->getPixel(x, y + 1)) + (pic->getPixel(x + 1, y - 1)) + (pic->getPixel(x + 1, y)) + (pic->getPixel(x + 1, y + 1))) / 6;
}
else if (x == w - 1){
count++;
temp[y*w + x] = ((pic->getPixel(x, y)) + (pic->getPixel(x, y - 1)) + (pic->getPixel(x, y + 1)) + (pic->getPixel(x - 1, y + 1)) + (pic->getPixel(x - 1, y)) + (pic->getPixel(x - 1, y - 1))) / 6;
}
else if (y == 0){
count++;
temp[y*w + x] = ((pic->getPixel(x, y)) + (pic->getPixel(x, y - 1)) + (pic->getPixel(x, y + 1)) + (pic->getPixel(x - 1, y + 1)) + (pic->getPixel(x - 1, y)) + (pic->getPixel(x + 1, y + 1))) / 6;
}
else if (y == h - 1){
count++;
temp[y*w + x] = ((pic->getPixel(x, y)) + (pic->getPixel(x, y - 1)) + (pic->getPixel(x - 1, y)) + (pic->getPixel(x + 1, y - 1)) + (pic->getPixel(x + 1, y)) + (pic->getPixel(x - 1, y - 1))) / 6;
}
else {
count++;
temp[y*w + x].r = ((pic->getPixel(x, y).r) + (pic->getPixel(x + 1, y).r) + (pic->getPixel(x, y + 1).r) + (pic->getPixel(x, y - 1).r) + (pic->getPixel(x - 1, y).r) + (pic->getPixel(x - 1, y - 1).r) + (pic->getPixel(x + 1, y + 1).r) + (pic->getPixel(x + 1, y - 1).r) + (pic->getPixel(x - 1, y + 1).r)) / 9;
temp[y*w + x].g = ((pic->getPixel(x, y).g) + (pic->getPixel(x + 1, y).g) + (pic->getPixel(x, y + 1).g) + (pic->getPixel(x, y - 1).g) + (pic->getPixel(x - 1, y).g) + (pic->getPixel(x - 1, y - 1).g) + (pic->getPixel(x + 1, y + 1).g) + (pic->getPixel(x + 1, y - 1).g) + (pic->getPixel(x - 1, y + 1).g)) / 9;
temp[y*w + x].b = ((pic->getPixel(x, y).b) + (pic->getPixel(x + 1, y).b) + (pic->getPixel(x, y + 1).b) + (pic->getPixel(x, y - 1).b) + (pic->getPixel(x - 1, y).b) + (pic->getPixel(x - 1, y - 1).b) + (pic->getPixel(x + 1, y + 1).b) + (pic->getPixel(x + 1, y - 1).b) + (pic->getPixel(x - 1, y + 1).b)) / 9;
}
}
}
const buffer* constTemp;
constTemp = temp;
pic->setData(constTemp);
std::cout << "TIMES: " << count << std::endl;
here is the getPixel method
typedef Vec3<float> color;
enter code here
color Image::getPixel(unsigned int x, unsigned int y) const {
if (x > getWidth() || y > getHeight()) { color col(0, 0, 0); return col; }
color col = buffer[y*width + x];
return col;
}
and here is my main() method
#include "Image.h"
#include "ppm_format.h"
#include "Filters.h"
#include <iostream>
using namespace imaging;
using namespace std;
using namespace filtering;
int main(int argc, char* argv[]) {
Image* image = ReadPPM(argv[argc - 1]); // Creating Image
float r, g, b;
bool filterFlag = false; //Flag used to check if -f command is given
string check;
Vec3<float> temp;
for (int i = 1; i < argc - 2; i++) {
string check(argv[i]);
if (!check.compare("-f")) { filterFlag = true; } // check.compare("-f") returns 0 if check = -f
if (filterFlag) {
i++;
check = argv[i];
if (!check.compare("gray")) {
filterFlag = false; //filter flag falsified to show that a filter is read
gray(image);
}
else if (!check.compare("color")) {
filterFlag = false;
if (argc-2 <= i + 3) {
cout << "INVALID COLOR FILTER CALL.EXITING...";
system("pause");
exit(0);
}
r = i + 1;
g = i + 2;
b = i + 3;
i += 4;
color(image, r, g, b);
}
else if (!check.compare("blur")) {
filterFlag = false;
blur(image);
}
else if (!check.compare("median")){
filterFlag = false;
medianORdiff(image,"median");
}
else if (!check.compare("diff")){
filterFlag = false;
medianORdiff(image,"diff");
}
else {
cout << "Invalid filter name or no filter name at all.\nFilter names are gray , color r g b , blur , median , diff";
system("pause");
exit(0);
}
*image >> "C://Users/M1TR0S0UL4S/Desktop/TESTFILTER.ppm";
}
}
When attempted to push back a vector of UINT, the progrma crashes with Critical error detected c0000374. Below is the initial code:
void Terrain::CreateIndexList(UINT Width, UINT Height){
UINT sz_iList = (Width - 1)*(Height - 1) * 6;
UINT *iList = new UINT[sz_iList];
for (int i = 0; i < Width; i++){
for (int j = 0; j < Height; j++){
iList[(i + j * (Width - 1)) * 6] = ((UINT)(2 * i));
iList[(i + j * (Width - 1)) * 6 + 1] = (UINT)(2 * i + 1);
iList[(i + j * (Width - 1)) * 6 + 2] = (UINT)(2 * i + 2);
iList[(i + j * (Width - 1)) * 6 + 3] = (UINT)(2 * i + 2);
iList[(i + j * (Width - 1)) * 6 + 4] = (UINT)(2 * i + 1);
iList[(i + j * (Width - 1)) * 6 + 5] = (UINT)(2 * i + 3);
}
}
for (int i = 0; i < sz_iList; i++){
Geometry.IndexVertexData.push_back(iList[i]);
}
delete[] iList;
}
The goal is to take the generated indices from the iList array and fill the Geometry.IndexVertexData vector array. While debugging this, I've created several other implementations of this:
//After creating the iList array:
Geometry.IndexVertexData.resize(sz_iList); //Fails with "Vector subscript out of range?"
UINT in = 0;
for (int i = 0; i < Width; i++){
for (int j = 0; j < Height; j++){
Geometry.IndexVertexData[(i + j*(Width - 1)) * 6] = iList[in];
in++;
Geometry.IndexVertexData[(i + j*(Width - 1)) * 6 + 1] = iList[in];
in++;
Geometry.IndexVertexData[(i + j*(Width - 1)) * 6 + 2] = iList[in];
in++;
Geometry.IndexVertexData[(i + j*(Width - 1)) * 6 + 3] = iList[in];
in++;
Geometry.IndexVertexData[(i + j*(Width - 1)) * 6 + 4] = iList[in];
in++;
Geometry.IndexVertexData[(i + j*(Width - 1)) * 6 + 5] = iList[in];
in++;
}
}
And a final, direct to vector implementation:
Geometry.IndexVertexData.reserve(sz_iList);
for (int index = 0; index < sz_iList; index+=6) {
Geometry.IndexVertexData[(i + j*(Width - 1)) * 6] = ((UINT)(2 * i));
Geometry.IndexVertexData[(i + j*(Width - 1)) * 6 + 1] = (UINT)(2 * i + 1);
Geometry.IndexVertexData[(i + j*(Width - 1)) * 6 + 2] = (UINT)(2 * i + 2);
Geometry.IndexVertexData[(i + j*(Width - 1)) * 6 + 3] = (UINT)(2 * i + 2);
Geometry.IndexVertexData[(i + j*(Width - 1)) * 6 + 4] = (UINT)(2 * i + 1);
Geometry.IndexVertexData[(i + j*(Width - 1)) * 6 + 5] = (UINT)(2 * i + 3);
}
sz_iList has a final value of 2166, resultant from a grid of 20x20 (400 total points) and is used to initialize sizes. In all cases, the vector array would not fully fill, crashing with Critical error detected c0000374. Am I doing something wrong?
Your sz_iList doesn't appear to be big enough. Let's use a simple example of Width = Height = 2;, then sz_iList = (2 - 1) * (2 - 1) * 6 = 6, right? But in your nested loops, the last iteration occurs when i = j = 1 (i is one less than Width and j is one less than Height), where (in the last line of your loop), you try to access element (i + j * (Width - 1)) * 6 + 5 = (1 + 1 * (2 - 1)) * 6 + 5 = (1 + 1 * 1) * 6 + 5 = 2 * 6 + 5 = 17, which is bigger than the size of your array. This results in undefined behavior.
I have a longer code that was cut down as much as possible while keeping the issue alive. My code runs an MCMC computation for different parameter values. For some combinations of values, the code runs much longer, about 100x slower than on typical cases. However, it shouldn't, because the number of operations does not depend on parameter values.
I am running this on an AMD64 Linux box with glibc-2.17 compiled with GCC 4.8.1 on Gentoo. Compile flags do not matter as it appears. I also tested it on a different Gentoo box with an older AMD64 processor and the results were the same.
There were a bunch of tests I did:
I have tried debugging with Valgrind and it found no memory issues or other nasty things.
Secondly, I have tried running the code with the problematic parameter values fixed, but the slowness was not encountered.
I have tried putting sleep(4) between iterations, but nothing changed.
The problem shows when the iteration hits k = 1, i = j = 0, which translates into mu[0] = -0.05, mu[1] = -0.05 and mu[2] = 0.05. As I said, using this fixed value for all iterations eliminates the issue I am seeing.
Here are things that eliminate the problem:
Changing the limits.
Fixing the mu[] coefficients.
Removing dW3 from the computation.
Removing rand().
Removing computation of q.
Removing the update of s[j].
I have read quit a bit about slowpow, thus tried eliminating exp by writing my own version of it. This solves the problems I am having with this MWE, but not when the reimplemented exp is placed in the production code.
Question: what is causing the semi-random slowness?
Code of the MWE follows. All help and suggestions as how to proceed would be greatly appreciated.
Note: This code is compiled with g++ although it is essentially C. Changing compiler doesn't change anything.
Regarding branch prediction: Removing one of the if statements by using
q = exp(dW);
q = q / (1.0 + q);
no matter what is the value of dW doesn't change code's behavior; if this indeed is due to branch prediction, it would have to be due to the second if.
#include <cstdio>
#include <cstdlib>
#include <cmath>
inline int index(int const i, int const j, int const n)
{
return (i + n) % n + ((j + n) % n) * n;
}
void get_sample(int* s, int n, double* mu)
{
for (int i = 0; i < 10 * n * n; i++)
{
int j = i % (n * n);
int x = j % n;
int y = (j - x) / n;
double dW1 = mu[0] * (s[index(x - 1, y, n)] + s[index(x + 1, y, n)] + s[index(x, y - 1, n)] + s[index(x, y + 1, n)]);
double dW2 = mu[1] * (s[index(x - 1, y - 1, n)] + s[index(x + 1, y - 1, n)] + s[index(x + 1, y + 1, n)] + s[index(x - 1, y + 1, n)]);
double dW3 = mu[2] * (s[index(x - 1, y, n)] * s[index(x - 1, y - 1, n)] * s[index(x, y - 1, n)] + s[index(x - 1, y, n)] * s[index(x - 1, y + 1, n)] * s[index(x, y + 1, n)]
+ s[index(x, y + 1, n)] * s[index(x + 1, y + 1, n)] * s[index(x + 1, y, n)] + s[index(x + 1, y, n)] * s[index(x + 1, y - 1, n)] * s[index(x, y - 1, n)]);
double dW = 2.0 * (dW1 + dW2 + dW3);
double q;
if (dW < 0.0)
{
q = exp(dW);
q = q / (1.0 + q);
}
else
{
q = exp(-dW);
q = 1.0 / (1.0 + q);
}
double p = ((double) rand()) / ((double) RAND_MAX);
if (p < q)
{
s[j] = 1;
}
else
{
s[j] = -1;
}
}
}
int main(int argc, char** argv)
{
double mu[3];
double limits[6] = {-0.05, 0.8, -0.05, 0.45, -0.45, 0.05};
int s[16];
for (int i = 0; i < 16; i++)
{
s[i] = -1;
}
for (int k = 0; k < 2; k++)
{
for (int j = 0; j < 2; j++)
{
for (int i = 0; i < 2; i++)
{
mu[0] = limits[0] + ((limits[1] - limits[0]) * i);
mu[1] = limits[2] + ((limits[3] - limits[2]) * j);
mu[2] = limits[4] + ((limits[5] - limits[4]) * k);
printf(" Computing (% .6lf, % .6lf, % .6lf)...\n", mu[0], mu[1], mu[2]);
for (int sample = 0; sample < 1000; sample++)
{
get_sample(s, 4, mu);
}
}
}
}
return 0;
}
However, it shouldn't, because the number of operations does not depend on parameter values.
The speed of floating point operations does depend on parameter values, though. If you introduce NaN or other exceptional values in your computation (which I didn't review the code for) it will drastically degrade your floating point performance.
EDIT: I manually profiled (with simple rdtsc counting) around the exp() and it was easy to bin "good" and "bad" cases. When I printed the bad cases it was all where dW ~= 0. If you split that case out you get even performance:
double q;
if (dW < -0.1e-15)
{
q = exp(dW);
q = q / (1.0 + q);
}
else if (dW > 0.1e-15)
{
q = exp(-dW);
q = 1.0 / (1.0 + q);
}
else
{
q = 0.5;
}
If I am right and branch prediction is the problem, you should try
void get_sample(int* s, int n, double* mu)
{
for (int i = 0; i < 10 * n * n; i++)
{
int j = i % (n * n);
int x = j % n;
int y = (j - x) / n;
double dW1 = mu[0] * (s[index(x - 1, y, n)] + s[index(x + 1, y, n)] + s[index(x, y - 1, n)] + s[index(x, y + 1, n)]);
double dW2 = mu[1] * (s[index(x - 1, y - 1, n)] + s[index(x + 1, y - 1, n)] + s[index(x + 1, y + 1, n)] + s[index(x - 1, y + 1, n)]);
double dW3 = mu[2] * (s[index(x - 1, y, n)] * s[index(x - 1, y - 1, n)] * s[index(x, y - 1, n)] + s[index(x - 1, y, n)] * s[index(x - 1, y + 1, n)] * s[index(x, y + 1, n)]
+ s[index(x, y + 1, n)] * s[index(x + 1, y + 1, n)] * s[index(x + 1, y, n)] + s[index(x + 1, y, n)] * s[index(x + 1, y - 1, n)] * s[index(x, y - 1, n)]);
double dW = 2.0 * (dW1 + dW2 + dW3);
double q;
q = exp(dW *((dW>0)*2-1);
q = ((dW>0)*q + (dW<=0)) / (1.0 + q);
double p = ((double) rand()) / ((double) RAND_MAX);
s[j] = (p<q)*2-1;
}
}
I am also wondering if a good compiler shouldn't make such transformations anyway...