C++ Image interpolation with Bicubic method - c++

I am just trying to smoothing the image by BiCubic interpolation. I got some code which is used to interpolate the RGB image. I have changed the code to work for Grayscale image. But in result i only got fully black image. Considered input and output image size are same. The code is pasted below. Please help me. Thanks in advance.
inline Uint16 saturate(float x, unsigned max_pixel)
{
return x > max_pixel ? max_pixel
: x < 0.0f ? 0
: Uint16(x);
}
inline float get_subpixel(const Uint16* in, std::size_t dest_width, std::size_t dest_height, unsigned x, unsigned y)
{
if (x < dest_width && y < dest_height)
return in[(y * dest_width) + x];
return 0;
}
void interpolate(unsigned dest_width, unsigned dest_height, unsigned bits_allocated, const Uint16* src, Uint16** dest)
{
const double tx = 1;
const double ty = 1;
float C[5] = { 0 };
unsigned max_bit = pow(2, bits_allocated);
for (unsigned i = 0; i < dest_height; ++i)
{
for (unsigned j = 0; j < dest_width; ++j)
{
const float x = float(tx * j);
const float y = float(ty * i);
const float dx = tx * j - x, dx2 = dx * dx, dx3 = dx2 * dx;
const float dy = ty * i - y, dy2 = dy * dy, dy3 = dy2 * dy;
for (int jj = 0; jj < 4; ++jj)
{
const int idx = y - 1 + jj;
float a0 = get_subpixel(src, dest_width, dest_height, x, idx);
float d0 = get_subpixel(src, dest_width, dest_height, x - 1, idx) - a0;
float d2 = get_subpixel(src, dest_width, dest_height, x + 1, idx) - a0;
float d3 = get_subpixel(src, dest_width, dest_height, x + 2, idx) - a0;
float a1 = -(1.0f / 3.0f) * d0 + d2 - (1.0f / 6.0f) * d3;
float a2 = 0.5f * d0 + 0.5f * d2;
float a3 = -(1.0f / 6.0f) * d0 - 0.5f * d2 + (1.0f / 6.0f) * d3;
C[jj] = a0 + a1 * dx + a2 * dx2 + a3 * dx3;
d0 = C[0] - C[1];
d2 = C[2] - C[1];
d3 = C[3] - C[1];
a0 = C[1];
a1 = -(1.0f / 3.0f) * d0 + d2 - (1.0f / 6.0f) * d3;
a2 = 0.5f * d0 + 0.5f * d2;
a3 = -(1.0f / 6.0f) * d0 - 0.5f * d2 + (1.0f / 6.0f) * d3;
(*dest)[i * dest_width + j] = saturate(a0 + a1 * dy + a2 * dy2 + a3 * dy3, max_bit);
}
}
}
}

How can you have this? The c's havent been computed until the jj loop ends the brace should be above the d's - I'm not considering if the method is correct otherwise.
for (int jj = 0; jj < 4; ++jj)
{
const int idx = y - 1 + jj;
float a0 = get_subpixel(src, dest_width, dest_height, x, idx);
float d0 = get_subpixel(src, dest_width, dest_height, x - 1, idx) - a0;
float d2 = get_subpixel(src, dest_width, dest_height, x + 1, idx) - a0;
float d3 = get_subpixel(src, dest_width, dest_height, x + 2, idx) - a0;
float a1 = -(1.0f / 3.0f) * d0 + d2 - (1.0f / 6.0f) * d3;
float a2 = 0.5f * d0 + 0.5f * d2;
float a3 = -(1.0f / 6.0f) * d0 - 0.5f * d2 + (1.0f / 6.0f) * d3;
C[jj] = a0 + a1 * dx + a2 * dx2 + a3 * dx3;
// } // end jj
d0 = C[0] - C[1];
d2 = C[2] - C[1];
d3 = C[3] - C[1];
a0 = C[1];
a1 = -(1.0f / 3.0f) * d0 + d2 - (1.0f / 6.0f) * d3;
a2 = 0.5f * d0 + 0.5f * d2;
a3 = -(1.0f / 6.0f) * d0 - 0.5f * d2 + (1.0f / 6.0f) * d3;
(*dest)[i * dest_height + j] = saturate(a0 + a1 * dy + a2 * dy2 + a3 * dy3, max_bit);
} // end jj move his above
}
}

I wanted to share great link
cubic splines

Related

Trouble with precision, point accuracy at intersection of 3 planes

have tried multiple methods of plane intersection, the code is not producing an accurate result. At some angles its pretty good, for a right angle triangle, roughly in position but noticeably off.
xyzPoint return_Intersect_3planes(Tri tri1, Tri tri2, Tri tri3) {
double x1 = 0.646;
double y1 = 0.210;
double z1 = 2.147;
double a1 = 0.251;
double b1 = -0.456;
double c1 = -0.411;
double d1 = -((a1 * x1) + (b1 * y1) + (c1 * z1));
double x2 = -0.0744;
double y2 = 0.0808;
double z2 = 2.082;
double a2 = -0.1218;
double b2 = -0.2606;
double c2 = -0.748;
double d2 = -((a2 * x2) + (b2 * y2) + (c2 * z2));
double x3 = 0.10627;
double y3 = 0.3924;
double z3 = 2.335;
double a3 = 0.0987;
double b3 = 0.3236;
double c3 = -0.278;
double d3 = -((a3 * x3) + (b3 * y3) + (c3 * z3));
double D = (a2 * b2 * c3) + (b1 * c2 * a3) + (c1 * a2 * b3) - (a3 * b2 * c1) - (b3 * c2 * a1) - (c3 * a2 * b1);
double Dx = (d1 * b2 * c3) + (b1 * c2 * d3) + (c1 * d2 * b3) - (d3 * b2 * c1) - (b3 * c2 * d1) - (c3 * d2 * b1);
double Dy = (a1 * d2 * c3) + (d1 * c2 * a3) + (c1 * a2 * d3) - (a3 * d2 * c1) - (d3 * c2 * a1) - (c3 * a2 * d1);
double Dz = (a1 * b2 * d3) + (b1 * d2 * a3) + (d1 * a2 * b3) - (a3 * b2 * d1) - (b3 * d2 * a1) - (d3 * a2 * b1);
xyzPoint Intersection;
Intersection.x = Dx / D;
Intersection.y = Dy / D;
Intersection.z = Dz / D;
return Intersection;
}
The input numbers
Result is
x = 0.00276579 y = -0.32880155 z = -4.0193058
(this has been flipped to negative and might be totally wrong?)
Correct position based on CAD is
x = -0.002 y = 0.204 z = 2.498
ODS (open office calc) file has been uploaded
https://filebin.net/g6onuah7q5rfg7lj

Why is call by reference so much slower than inline code?

I am programming a physics simulation with few particles (typically 3, no more than 5).
In a condensed version my code structure like this:
#include<iostream>
class Particle{
double x; // coordinate
double m; // mass
};
void performStep(Particle &p, double &F_external){
p.x += -0.2*p.x + F_external/p.m; // boiled down, in reality complex calculation, not important here
}
int main(){
dt = 0.001; // time step, not important
Particle p1;
p1.x = 5; // some random number for initialization, in reality more complex but not important here
p.m = 1;
Particle p2;
p2.x = -1; // some random numbersfor initialization, in reality more complex but not important here
p.m = 2;
Particle p3;
p3.x = 0; // some random number for initialization, in reality more complex but not important here
p.m = 3;
double F_external = 0; // external forces
for(unsigned long long int i=0; i < 10000000000; ++i){ // many steps, typically 10e9
F_external = sin(i*dt);
performStep(p1, F_external);
performStep(p2, F_external);
performStep(p3, F_external);
}
std::cout << "p1.x: " << p1.x << std::endl;
std::cout << "p2.x: " << p2.x << std::endl;
std::cout << "p3.x: " << p3.x << std::endl;
}
I have determined with clock() that the performStep(p, F_external) call is the bottleneck in my code).
When I tried to do inline calculation, i.e. replace performStep(p1, F_external) by p1.x += -0.2*p1.x + F_external/p1.m; the calculation suddenly was roughly a factor of 2 faster. Note that performStep() in reality is about ~60 basic arithmetic calculations over ~20 lines, so the code becomes really bloated if I just inline it for every particle.
Why is that the case? I am compiling with MinGW64/g++ and the -O2 flag. I thought the compiler would optimize such things?
Edit:
Here is the function that is called. Note that in reality, I calculate all three coordinates x,y,z with a couple of different external forces. Variables which are not passed via the function are a member of SimulationRun. The algorithm is a fourth-order leapfrog algorithm.
void SimulationRun::performLeapfrog_z(const unsigned long long int& i, const double& x, const double& y, double& z, const double& vx, const double& vy, double& vz, const double& qC2U0,
const double& U0, const double& m, const double& C4, const double& B2, const double& f_minus, const double& f_z, const double& f_plus, const bool& bool_calculate_xy,
const double& Find, const double& Fheating) {
// probing for C4 == 0 and B2 == 0 saves some computation time
if (C4 == 0) {
Fz_C4_Be = 0;
}
if (B2 == 0 || !bool_calculate_xy) {
Fz_B2_Be = 0;
}
z1 = z + c1 * vz * dt;
if (C4 != 0 && !bool_calculate_xy) {
Fz_C4_Be = (-4) * q * C4 * U0 * z1 * z1 * z1;
}
else if (C4 != 0 && bool_calculate_xy) {
Fz_C4_Be = q * C4 * U0 * (-4 * z1 * z1 * z1 + 6 * z1 * (x * x + y * y));
}
if (B2 != 0 && bool_calculate_xy) {
Fz_B2_Be = q * B2 * (-vx * z1 * y + vy * z1 * x);
}
acc_z1 = (qC2U0 * (-2) * z1 + Find + Fz_C4_Be + Fz_B2_Be + Fheating) / m;
vz1 = vz + d1 * acc_z1 * dt;
z2 = z1 + c2 * vz1 * dt;
if (C4 != 0 && !bool_calculate_xy) {
Fz_C4_Be = (-4) * q * C4 * U0 * z2 * z2 * z2;
}
else if (C4 != 0 && bool_calculate_xy) {
Fz_C4_Be = q * C4 * U0 * (-4 * z2 * z2 * z2 + 6 * z2 * (x * x + y * y));
}
if (B2 != 0 && bool_calculate_xy) {
Fz_B2_Be = q * B2 * (-vx * z2 * y + vy * z2 * x);
}
acc_z2 = (qC2U0 * (-2) * z2 + +Find + Fz_C4_Be + Fz_B2_Be + Fheating) / m;
vz2 = vz1 + d2 * acc_z2 * dt;
z3 = z2 + c3 * vz2 * dt;
if (C4 != 0 && !bool_calculate_xy) {
Fz_C4_Be = (-4) * q * C4 * U0 * z3 * z3 * z3;
}
else if (C4 != 0 && bool_calculate_xy) {
Fz_C4_Be = q * C4 * U0 * (-4 * z3 * z3 * z3 + 6 * z3 * (x * x + y * y));
}
if (B2 != 0 && bool_calculate_xy) {
Fz_B2_Be = q * B2 * (-vx * z3 * y + vy * z3 * x);
}
acc_z3 = (qC2U0 * (-2) * z3 + Find + Fz_C4_Be + Fz_B2_Be + Fheating) / m;
vz3 = vz2 + d3 * acc_z3 * dt;
z = z3 + c4 * vz3 * dt;
vz = vz3;
}
Optimization is hard, even for compilers. Here are some optimization tips:
Since your performStep is hotspot, put it into a header file(in case that you split declaration and definition into header/source), then add inline keyword, like:
// at file xxx.h
inline void performStep(Particle &p, double F_external){
p.x += -0.2*p.x + F_external/p.m; // boiled down, in reality complex calculation, not important here
}
Upgrade your compiler, maybe to the latest.
use https://godbolt.org/ to check the assembly code. In this case, unnecessary dereference is the headache of performance.

Converting RGB to Luv

i'm trying to convert an rgb image to Luv, i have some problem. The L component is good, but when i show the u and v component both are black(all pixels have value 0).
for (int i = 0; i<height; i++)
for (int j = 0; j<width; j++)
{
Vec3b v3 = src.at<Vec3b>(i, j);
float b = ((float)v3[0]) / 255;
float g = ((float)v3[1]) / 255;
float r = ((float)v3[2]) / 255;
float x = r * 0.412453 + g * 0.357580 + b * 0.180423;
float y = r * 0.212671 + g * 0.715160 + b * 0.072169;
float z = r * 0.019334 + g * 0.119193 + b * 0.950227;
//L
if (y > 0.008856) {
l_mat.at<uchar>(i, j) = 255 / 100 * (116 * pow(y, 1.0 / 3.0));
dst.at<Vec3b>(i, j)[0] = 255 / 100 * (116 * pow(y, 1.0 / 3.0));
// printf("%d / " , l_mat.at<uchar>(i, j));
}
else {
l_mat.at<uchar>(i, j) = 255 / 100 * (903.3 * y);
dst.at<Vec3b>(i, j)[0] = 255 / 100 * (903.3 * y);
}
float u = 4 * x / (x + 15 * y + 3 * z);
float v = 9 * y / (x + 15 * y + 3 * z);
//printf("u: %.2f , v:%.2f || ", u, v);
//U
u_mat.at<uchar>(i, j) = 255 / 354 * (13 * l_mat.at<uchar>(i, j)*(u - 0.19793943) + 134);
//printf("%d / ", u_mat.at<uchar>(i, j));
dst.at<Vec3b>(i, j) = 255 / 354 * (13 * l_mat.at<uchar>(i, j)*(u - 0.19793943) + 134);
//v
v_mat.at<uchar>(i, j) = 255 / 262 * (13 * l_mat.at<uchar>(i, j)*(v - 0.46831096)+140);
dst.at<Vec3b>(i, j) = 255 / 262 * (13 * l_mat.at<uchar>(i, j)*(v - 0.46831096) + 140);
}
I have to do the conversions pixel by pixel, i can't use cvtcolor.

Negative index runtime error

I am using this as my reference to implement my version of Bicubic interpolation for resizing the images. Here is the function that I have so far with some changes.
IplImage * bicubic(IplImage *img, int newWidth, int newHeight)
{
IplImage *img2 ;
img2 = createImage(newWidth,newHeight);
uchar * data = (uchar*)img->imageData;
uchar * Data = (uchar*)img2->imageData;
//int a,b,c,index;
uchar Cc;
uchar C[5];
uchar d0,d2,d3,a0,a1,a2,a3;
int i,j,k,jj;
int x,y;
float dx,dy;
float tx,ty;
tx = (float)img->width /newWidth ;
ty = (float)img->height / newHeight;
printf("New Width = %d, New Height = %d WidthStep = %d", newWidth, newHeight,img->widthStep);
for(i = 0; i< newHeight; i++)
{
for(j = 0; j< newWidth; j++)
{
x = (int)(tx * j);
y = (int)(ty * i);
dx = tx * j - x;
dy = ty * i - y;
for(k = 0;k < 3;k++)
{
for(jj = 0;jj <= 3 ;jj++)
{
int z = (y - 1 + jj);
//if(z > -1){
a0 = data[z * img->widthStep + (x)*img->nChannels +k];//===>Throws of runtime error
d0 = data[z * img->widthStep + (x-1)*img->nChannels +k] - a0 ;
d2 = data[z * img->widthStep + (x+1)*img->nChannels +k] - a0 ;
d3 = data[z * img->widthStep + (x+2)*img->nChannels +k] - a0 ;
a1 = -1.0/3 * d0 + d2 -1.0/6*d3;
a2 = 1.0/2 * d0 + 1.0/2*d2;
a3 = -1.0/6 * d0 - 1.0/2*d2 + 1.0/6*d3;
C[jj] = a0 + a1*dx + a2*dx*dx + a3*dx*dx*dx;
d0 = C[0]-C[1];
d2 = C[2]-C[1];
d3 = C[3]-C[1];
a0 = C[1];
a1 = -1.0/3*d0 + d2 -1.0/6*d3;
a2 = 1.0/2*d0 + 1.0/2*d2;
a3 = -1.0/6*d0 - 1.0/2*d2 + 1.0/6*d3;
Cc = a0 + a1*dy + a2*dy*dy + a3*dy*dy*dy;
Data[i*img2->widthStep +j*img2->nChannels +k ] = Cc;
//}
}
}
}
}
return img2;
}
The problem that I am facing is that when I call this bicubic function, it throws off an invalid access runtime error at the line where I find out the value of a0. I am using VS 2012 debugger and it tells me that the value of z is calculated as -1. This causes the index to access the invalid part of memory of data array.
My question is, why is this happening? Am I missing something in OpenCV's image library that can help in getting right indices so that I dont run into this error? Or am I making some mistake in accessing the correct indices?
for(i = 0; i< newHeight; i++)
{
for(j = 0; j< newWidth; j++)
{
x = (int)(tx * j);
y = (int)(ty * i);
dx = tx * j - x;
dy = ty * i - y;
for(k = 0;k < 3;k++)
{
for(jj = 0;jj <= 3 ;jj++)
{
int z = (y - 1 + jj);
//if(z > -1){
a0 = data[z * img->widthStep + (x)*img->nChannels +k];//===>Throws of runtime error
d0 = data[z * img->widthStep + (x-1)*img->nChannels +k] - a0 ;
On the first iteration, i and j are 0. as are k and jj
This means that:
y = (int)(ty * i); //y = ty * 0 (== 0)
int z = (y - 1 + jj); //z = 0 - 1 + 0 (==-1)
And so in the line:
a0 = data[z * img->widthStep + (x)*img->nChannels +k];//===>Throws of runtime error
the index is:
(-1) * img->widthStep + (x)*img->nChannels +k
simplifies to:
(-1) * img->widthStep + 0 + 0
which is:
-img->widthStep
This is of course out of bounds, leading to the crash.

Bi-Cubic Interpolation Algorithm for Image Scaling

I'm trying to write a basic bicubic resize algorithm to resize a 24-bit RGB bitmap. I have a general understanding of the math involved, and I'm using this implementation from Google Code as a guide. I'm not using any external libraries here - I'm just experimenting with the algorithm itself. The bitmap is represented as a plain std::vector<unsigned char>:
inline unsigned char getpixel(const std::vector<unsigned char>& in,
std::size_t src_width, std::size_t src_height, unsigned x, unsigned y, int channel)
{
if (x < src_width && y < src_height)
return in[(x * 3 * src_width) + (3 * y) + channel];
return 0;
}
std::vector<unsigned char> bicubicresize(const std::vector<unsigned char>& in,
std::size_t src_width, std::size_t src_height, std::size_t dest_width, std::size_t dest_height)
{
std::vector<unsigned char> out(dest_width * dest_height * 3);
const float tx = float(src_width) / dest_width;
const float ty = float(src_height) / dest_height;
const int channels = 3;
const std::size_t row_stride = dest_width * channels;
unsigned char C[5] = { 0 };
for (int i = 0; i < dest_height; ++i)
{
for (int j = 0; j < dest_width; ++j)
{
const int x = int(tx * j);
const int y = int(ty * i);
const float dx = tx * j - x;
const float dy = ty * i - y;
for (int k = 0; k < 3; ++k)
{
for (int jj = 0; jj < 4; ++jj)
{
const int z = y - 1 + jj;
unsigned char a0 = getpixel(in, src_width, src_height, z, x, k);
unsigned char d0 = getpixel(in, src_width, src_height, z, x - 1, k) - a0;
unsigned char d2 = getpixel(in, src_width, src_height, z, x + 1, k) - a0;
unsigned char d3 = getpixel(in, src_width, src_height, z, x + 2, k) - a0;
unsigned char a1 = -1.0 / 3 * d0 + d2 - 1.0 / 6 * d3;
unsigned char a2 = 1.0 / 2 * d0 + 1.0 / 2 * d2;
unsigned char a3 = -1.0 / 6 * d0 - 1.0 / 2 * d2 + 1.0 / 6 * d3;
C[jj] = a0 + a1 * dx + a2 * dx * dx + a3 * dx * dx * dx;
d0 = C[0] - C[1];
d2 = C[2] - C[1];
d3 = C[3] - C[1];
a0 = C[1];
a1 = -1.0 / 3 * d0 + d2 -1.0 / 6 * d3;
a2 = 1.0 / 2 * d0 + 1.0 / 2 * d2;
a3 = -1.0 / 6 * d0 - 1.0 / 2 * d2 + 1.0 / 6 * d3;
out[i * row_stride + j * channels + k] = a0 + a1 * dy + a2 * dy * dy + a3 * dy * dy * dy;
}
}
}
}
return out;
}
Problem: When I use this algorithm to downscale an image, it works except the output image contains all black pixels on the right side for some reason, giving the appearance that it's been "cropped".
Example:
INPUT IMAGE:
OUTPUT IMAGE:
Question: Reviewing the algorithm, I can't see why this would happen. Does anyone see the flaw here?
try not exchanging width and height.
for (int i = 0; i < dest_width; ++i)
{
for (int j = 0; j < dest_height; ++j)
I suggest don't use this function because it was written very bad. You need to make two convolutions: at first by X coordinate then by Y. In this function all these convolutions are making in the same time that leads to very slow work. And if You would look at jj loop body you could notice that all second part of body begining from "d0 = C[0] - C[1];" could be moved outside jj loop because only the last iteration of this loop takes effect on out[] array (all previous iterations results will be overwrited).
You should switch the x and z when you call getpixel, and in getpixel you should index the array using:
[(y * 3 * src_width) + (3 * x) + channel]
In getpixel(in, src_width, src_height, z, x, k):
z mean horizontal offset
x mean vertical offset
So just need patch the getpixel function, below is the patched code:
inline unsigned char getpixel(const std::vector<unsigned char>& in,
std::size_t src_width, std::size_t src_height, unsigned y, unsigned x, int channel)
{
if (x < src_width && y < src_height)
return in[(y * 3 * src_width) + (3 * x) + channel];
return 0;
}
std::vector<unsigned char> bicubicresize(const std::vector<unsigned char>& in,
std::size_t src_width, std::size_t src_height, std::size_t dest_width, std::size_t dest_height)
{
std::vector<unsigned char> out(dest_width * dest_height * 3);
const float tx = float(src_width) / dest_width;
const float ty = float(src_height) / dest_height;
const int channels = 3;
const std::size_t row_stride = dest_width * channels;
unsigned char C[5] = { 0 };
for (int i = 0; i < dest_height; ++i)
{
for (int j = 0; j < dest_width; ++j)
{
const int x = int(tx * j);
const int y = int(ty * i);
const float dx = tx * j - x;
const float dy = ty * i - y;
for (int k = 0; k < 3; ++k)
{
for (int jj = 0; jj < 4; ++jj)
{
const int z = y - 1 + jj;
unsigned char a0 = getpixel(in, src_width, src_height, z, x, k);
unsigned char d0 = getpixel(in, src_width, src_height, z, x - 1, k) - a0;
unsigned char d2 = getpixel(in, src_width, src_height, z, x + 1, k) - a0;
unsigned char d3 = getpixel(in, src_width, src_height, z, x + 2, k) - a0;
unsigned char a1 = -1.0 / 3 * d0 + d2 - 1.0 / 6 * d3;
unsigned char a2 = 1.0 / 2 * d0 + 1.0 / 2 * d2;
unsigned char a3 = -1.0 / 6 * d0 - 1.0 / 2 * d2 + 1.0 / 6 * d3;
C[jj] = a0 + a1 * dx + a2 * dx * dx + a3 * dx * dx * dx;
d0 = C[0] - C[1];
d2 = C[2] - C[1];
d3 = C[3] - C[1];
a0 = C[1];
a1 = -1.0 / 3 * d0 + d2 -1.0 / 6 * d3;
a2 = 1.0 / 2 * d0 + 1.0 / 2 * d2;
a3 = -1.0 / 6 * d0 - 1.0 / 2 * d2 + 1.0 / 6 * d3;
out[i * row_stride + j * channels + k] = a0 + a1 * dy + a2 * dy * dy + a3 * dy * dy * dy;
}
}
}
}
return out;
}