SDL2.0 screen nullptr on render of Window - c++

Hey so I'm relatively new to the SDL library and just trying to get to grips with it.
I found a C++ conversion for Minecraft4k but it was based on SDL1.x so I'm trying to convert it to SDL2.0
At present the build is successful, but when it gets to;
plot(x, y, rgbmul(col, fxmul(br, ddist)));
It throws a read access violation exception:
screen was nullptr
This is my code;
// C++ port of Minecraft 4k JS (http://jsdo.it/notch/dB1E)
// By The8BitPimp
// See: the8bitpimp.wordpress.com
#include <SDL.h>
#include <math.h>
#include <windows.h>
#include <tchar.h>
#include "plot.h"
#include "llist.h"
const int w = 320;
const int h = 240;
SDL_Surface *screen = nullptr;
const float math_pi = 3.14159265359f;
static inline float math_sin(float x) {
return sinf(x);
}
static inline float math_cos(float x) {
return cosf(x);
}
// the texture map
int texmap[16 * 16 * 16 * 3];
// the voxel map
char map[64 * 64 * 64];
static inline int random(int max) {
return (rand() ^ (rand() << 16)) % max;
}
static inline void plot(int x, int y, int c) {
int *p = (int*)screen->pixels;
p[y * w + x] = c;
}
static void makeTextures(void) {
// each texture
for (int j = 0; j<16; j++) {
int k = 255 - random(96);
// each pixel in the texture
for (int m = 0; m<16 * 3; m++)
for (int n = 0; n<16; n++) {
int i1 = 0x966C4A;
int i2 = 0;
int i3 = 0;
if (j == 4)
i1 = 0x7F7F7F;
if ((j != 4) || (random(3) == 0))
k = 255 - random(96);
if (j == 1)
{
if (m < (((n * n * 3 + n * 81) >> 2) & 0x3) + 18)
i1 = 0x6AAA40;
else if (m < (((n * n * 3 + n * 81) >> 2) & 0x3) + 19)
k = k * 2 / 3;
}
if (j == 7)
{
i1 = 0x675231;
if ((n > 0) && (n < 15) && (((m > 0) && (m < 15)) || ((m > 32) && (m < 47))))
{
i1 = 0xBC9862;
i2 = n - 7;
i3 = (m & 0xF) - 7;
if (i2 < 0)
i2 = 1 - i2;
if (i3 < 0)
i3 = 1 - i3;
if (i3 > i2)
i2 = i3;
k = 196 - random(32) + i2 % 3 * 32;
}
else if (random(2) == 0)
k = k * (150 - (n & 0x1) * 100) / 100;
}
if (j == 5)
{
i1 = 0xB53A15;
if (((n + m / 4 * 4) % 8 == 0) || (m % 4 == 0))
i1 = 0xBCAFA5;
}
i2 = k;
if (m >= 32)
i2 /= 2;
if (j == 8)
{
i1 = 5298487;
if (random(2) == 0)
{
i1 = 0;
i2 = 255;
}
}
// fixed point colour multiply between i1 and i2
i3 =
((((i1 >> 16) & 0xFF) * i2 / 255) << 16) |
((((i1 >> 8) & 0xFF) * i2 / 255) << 8) |
((i1 & 0xFF) * i2 / 255);
// pack the colour away
texmap[n + m * 16 + j * 256 * 3] = i3;
}
}
}
static void makeMap(void) {
// add random blocks to the map
for (int x = 0; x < 64; x++) {
for (int y = 0; y < 64; y++) {
for (int z = 0; z < 64; z++) {
int i = (z << 12) | (y << 6) | x;
float yd = (y - 32.5) * 0.4;
float zd = (z - 32.5) * 0.4;
map[i] = random(16);
float th = random(256) / 256.0f;
if (th > sqrtf(sqrtf(yd * yd + zd * zd)) - 0.8f)
map[i] = 0;
}
}
}
}
static void init(void) {
makeTextures();
makeMap();
}
// fixed point byte byte multiply
static inline int fxmul(int a, int b) {
return (a*b) >> 8;
}
// fixed point 8bit packed colour multiply
static inline int rgbmul(int a, int b) {
int _r = (((a >> 16) & 0xff) * b) >> 8;
int _g = (((a >> 8) & 0xff) * b) >> 8;
int _b = (((a)& 0xff) * b) >> 8;
return (_r << 16) | (_g << 8) | _b;
}
static void render(void) {
float now = (float)(SDL_GetTicks() % 10000) / 10000.f;
float xRot = math_sin(now * math_pi * 2) * 0.4 + math_pi / 2;
float yRot = math_cos(now * math_pi * 2) * 0.4;
float yCos = math_cos(yRot);
float ySin = math_sin(yRot);
float xCos = math_cos(xRot);
float xSin = math_sin(xRot);
float ox = 32.5 + now * 64.0;
float oy = 32.5;
float oz = 32.5;
// for each column
for (int x = 0; x < w; x++) {
// get the x axis delta
float ___xd = ((float)x - (float)w / 2.f) / (float)h;
// for each row
for (int y = 0; y < h; y++) {
// get the y axis delta
float __yd = ((float)y - (float)h / 2.f) / (float)h;
float __zd = 1;
float ___zd = __zd * yCos + __yd * ySin;
float _yd = __yd * yCos - __zd * ySin;
float _xd = ___xd * xCos + ___zd * xSin;
float _zd = ___zd * xCos - ___xd * xSin;
int col = 0;
int br = 255;
float ddist = 0;
float closest = 32.f;
// for each principle axis x,y,z
for (int d = 0; d < 3; d++) {
float dimLength = _xd;
if (d == 1)
dimLength = _yd;
if (d == 2)
dimLength = _zd;
float ll = 1.0f / (dimLength < 0.f ? -dimLength : dimLength);
float xd = (_xd)* ll;
float yd = (_yd)* ll;
float zd = (_zd)* ll;
float initial = ox - floor(ox);
if (d == 1) initial = oy - floor(oy);
if (d == 2) initial = oz - floor(oz);
if (dimLength > 0) initial = 1 - initial;
float dist = ll * initial;
float xp = ox + xd * initial;
float yp = oy + yd * initial;
float zp = oz + zd * initial;
if (dimLength < 0) {
if (d == 0) xp--;
if (d == 1) yp--;
if (d == 2) zp--;
}
// while we are concidering a ray that is still closer then the best so far
while (dist < closest) {
// quantize to the map grid
int tex = map[(((int)zp & 63) << 12) | (((int)yp & 63) << 6) | ((int)xp & 63)];
// if this voxel has a texture applied
if (tex > 0) {
// find the uv coordinates of the intersection point
int u = ((int)((xp + zp) * 16.f)) & 15;
int v = ((int)(yp * 16.f) & 15) + 16;
// fix uvs for alternate directions?
if (d == 1) {
u = ((int)(xp * 16.f)) & 15;
v = (((int)(zp * 16.f)) & 15);
if (yd < 0)
v += 32;
}
// find the colour at the intersection point
int cc = texmap[u + v * 16 + tex * 256 * 3];
// if the colour is not transparent
if (cc > 0) {
col = cc;
ddist = 255 - ((dist / 32 * 255));
br = 255 * (255 - ((d + 2) % 3) * 50) / 255;
// we now have the closest hit point (also terminates this ray)
closest = dist;
}
}
// advance the ray
xp += xd;
yp += yd;
zp += zd;
dist += ll;
}
}
plot(x, y, rgbmul(col, fxmul(br, ddist)));
}
}
}
int main(int argc, char *argv[]) {
SDL_Init(SDL_INIT_EVERYTHING);
SDL_Window *screen;
screen = SDL_CreateWindow(
"Minecraft4k", // window title
SDL_WINDOWPOS_CENTERED, // initial x position
SDL_WINDOWPOS_CENTERED, // initial y position
320, // width, in pixels
240, // height, in pixels
SDL_WINDOW_OPENGL // flags - see below
);
SDL_Renderer* renderer;
renderer = SDL_CreateRenderer(screen, -1, SDL_RENDERER_ACCELERATED);
if (screen == nullptr) {
return 1;
}
init();
bool running = true;
while (running) {
SDL_Event event;
while (SDL_PollEvent(&event)) {
running &= (event.type != SDL_QUIT);
}
SDL_RenderPresent(renderer);
render();
}
SDL_DestroyWindow(screen);
SDL_Quit();
return 0;
}
When I actually run the code I do get a black screen, but the debugger lands on the line
plot(x, y, rgbmul(col, fxmul(br, ddist)));
in ;
static void render(void)
This is all just "for fun" so any information or guidance is appreciated.

You define screen twice (the first time as a global variable, the second time within your main), but you initialize it only once (within your main).
Because of that, the global variable screen actually is set to nullptr and plot fails trying to use it, as the error message states.

Related

Problem of converting bgr to yuv420p with cuda

I need to convert image from bgr to yuv420p and I first use OpenCV to do so.
Mat img = imread("1.bmp");
Mat yuvImg;
cvtColor(img,yuvImg,COLOR_BGR2YUV_I420);
The result of it is normal. However,my image is too big and its pixel is almost 6400 * 2000.
I find it costs too much time of converting bgr to yuv420p with opencv api cvtcolor.
Then I decide to convert it myself and speed it with cuda.
Here is code in cpu:
void bgr_to_yuv420p(unsigned char* yuv420p, unsigned char* bgr, int width, int height)
{
if (yuv420p == NULL || bgr== NULL)
return;
int frameSize = width*height;
int chromaSize = frameSize / 4;
int yIndex = 0;
int uIndex = frameSize;
int vIndex = frameSize + chromaSize;
int R, G, B, Y, U, V;
for (int i = 0; i < height; i++)
{
for (int j = 0; j < width; j++)
{
B = bgr[(i * width + j) * 3 + 0];
G = bgr[(i * width + j) * 3 + 1];
R = bgr[(i * width + j) * 3 + 2];
//BGR to YUV
Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
yuv420p[yIndex++] = (unsigned char)((Y < 0) ? 0 : ((Y > 255) ? 255 : Y));
if (i % 2 == 0 && j % 2 == 0)
{
yuv420p[uIndex++] = (unsigned char)((U < 0) ? 0 : ((U > 255) ? 255 : U));
yuv420p[vIndex++] = (unsigned char)((V < 0) ? 0 : ((V > 255) ? 255 : V));
}
}
}
}
I test the code bgr_to_yuv420p(...) and the result is also normal.
Then I speed it up with cuda.
Here is all my code include kernel function and test function.
#include <iostream>
#include <time.h>
#include <vector_types.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include "opencv2/highgui.hpp"
#include "opencv2/opencv.hpp"
using namespace cv;
using namespace std;
//kernel function to convert bgr to yuv420p
__global__ void bgr2yuv420p(uchar3 * d_in, unsigned char * d_out,
uint imgheight, uint imgwidth)
{
int col_num = blockIdx.x*blockDim.x+threadIdx.x;
int row_num = blockIdx.y*blockDim.y+threadIdx.y;
if ((row_num < imgheight) && (col_num < imgwidth))
{
// uint32_t a = *((uint32_t *)&dinput[global_offset*3]);
int global_offset = row_num*imgwidth+col_num;
int r,g,b;
r = int(d_in[global_offset].z);
g = int (d_in[global_offset].y);
b = int (d_in[global_offset].x);
d_out[row_num * imgwidth + col_num] = ((66*r + 129*g + 25*b) >> 8) + 16;
if(((threadIdx.x & 1) == 0) && ((threadIdx.y & 1) == 0)){
int uv_offset = imgwidth*imgheight+((row_num*imgwidth))+col_num;
d_out[uv_offset] = ((112*r + -94*g + -18*b) >> 8) + 128;
d_out[uv_offset+1] = ((-38*r + -74*g + 112*b) >> 8) + 128;
}
}
}
int main(void)
{
Mat srcImage = imread("1.bmp");
imshow("srcImage", srcImage);
const uint imgheight = srcImage.rows;
const uint imgwidth = srcImage.cols;
Mat nv12Image(imgheight * 3 / 2, imgwidth, CV_8UC1, Scalar(255));
//input and output
uchar3 *d_in;
unsigned char *d_out;
// malloc memo in gpu
cudaMalloc((void**)&d_in, imgheight*imgwidth*sizeof(uchar3));
cudaMalloc((void**)&d_out, imgheight*imgwidth*sizeof(unsigned char) * 3 / 2);
//copy image from cpu to gpu
cudaMemcpy(d_in, srcImage.data, imgheight*imgwidth*sizeof(uchar3), cudaMemcpyHostToDevice);
dim3 threadsPerBlock(32, 32);
dim3 blocksPerGrid((imgwidth + threadsPerBlock.x - 1) / threadsPerBlock.x,
(imgheight + threadsPerBlock.y - 1) / threadsPerBlock.y);
//run kernel function
bgr2yuv420p<<<blocksPerGrid, threadsPerBlock>>>(d_in, d_out, imgheight, imgwidth);
cudaDeviceSynchronize();
//copy yuv420p from gpu to cpu
cudaMemcpy(nv12Image.data, d_out, imgheight*imgwidth*sizeof(unsigned char) * 3 / 2, cudaMemcpyDeviceToHost);
imshow("nv12",nv12Image);
imwrite("cuda.bmp",nv12Image);
cudaFree(d_in);
cudaFree(d_out);
return 0;
}
The code with cuda can run but the result is not normal. Y of YUV420p is normal but there is something wrong with U and V. I think the reason is here in __global__ void bgr2yuv420p(...)
if(((threadIdx.x & 1) == 0) && ((threadIdx.y & 1) == 0)){
int uv_offset = imgwidth*imgheight+((row_num*imgwidth))+col_num;
d_out[uv_offset] = ((112*r + -94*g + -18*b) >> 8) + 128;
d_out[uv_offset+1] = ((-38*r + -74*g + 112*b) >> 8) + 128;
}
I try a lot but still cannot solve it. And I find little code about converting rgb to yuv420p, More codes are about converting yuv420p to rgb. So I want to know is somebody running into the same question or giving me some advice?
Thanks Robert Crovella.Here is my update-1.
I follow Robert Crovella's advice and change the kernel function like this:
//kernel function to convert bgr to yuv420p
__global__ void bgr2yuv420p(uchar3 * d_in, unsigned char * d_out,
uint imgheight, uint imgwidth)
{
int col_num = blockIdx.x*blockDim.x+threadIdx.x;
int row_num = blockIdx.y*blockDim.y+threadIdx.y;
if ((row_num < imgheight) && (col_num < imgwidth))
{
// uint32_t a = *((uint32_t *)&dinput[global_offset*3]);
int global_offset = row_num*imgwidth+col_num;
int r,g,b;
r = int(d_in[global_offset].z);
g = int (d_in[global_offset].y);
b = int (d_in[global_offset].x);
d_out[row_num * imgwidth + col_num] = ((66*r + 129*g + 25*b) >> 8) + 16;
if(((threadIdx.x & 1) == 0) && ((threadIdx.y & 1) == 0)){
int uv_offset = imgwidth*imgheight+((row_num>>1)*imgwidth)+col_num;
d_out[uv_offset] = ((112*r + -94*g + -18*b) >> 8) + 128;
d_out[uv_offset+1] = ((-38*r + -74*g + 112*b) >> 8) + 128;
}
}
}
I test the new kernel with excitement,but the result is also not normal.
Here is my result image with the updated kernel function.
yuv420p image converted by myself
Then the normal result image converted by opencv api is here.
yuv420p image converted by opencv api
As we can see, the difference between the two images is U and V. I have already changed the index of U and V in kernel function, i.e.
if(((threadIdx.x & 1) == 0) && ((threadIdx.y & 1) == 0)){
int uv_offset = imgwidth*imgheight+((row_num >>1)*imgwidth)+col_num;
d_out[uv_offset] = ((112*r + -94*g + -18*b) >> 8) + 128;
d_out[uv_offset+1] = ((-38*r + -74*g + 112*b) >> 8) + 128;
}
I think it will work but it does not. Any other advice? Robert Crovella
Edit: The solution is Robert Crovella's latest answer. I have double checked it and it is really perfect.
There are a variety of issues:
the calculations to convert R,G,B to Y,U,V between your CPU and GPU codes are not identical. Yes, this matters.
Your CPU code has planar Y,U,V storage. That means Y has its own plane, U has its own plane, and V has its own plane. Your GPU codes is semi planar (NV12) format. That means Y has its own plane, and U,V are interleaved in a single plane: UVUVUVUVUVUV.... Obviously the output of those two codes could never match identically.
IMO, there is no need to drag OpenCV into this.
Your UV offset calculation in the kernel (GPU) code was broken. The imgwidth*imgheight offset gets you past the Y area (correctly), but from that point, it is not correct to use row_num*imgwidth to index by row into the UV planar region. You do not have that many rows in the UV planar region, you only have half as many rows.
In your GPU kernel, you had U,V ordering reversed, you were effectively doing VUVUVUVU...
My recommendation would be to start by harmonizing the calculation differences and storage order/format. The following code has the above issues addressed, and gives matching results for me between CPU and GPU codes:
$ cat t1708.cu
#include <iostream>
#include <time.h>
#include <cstdlib>
using namespace std;
// I have no idea if these are the correct conversion formulas
// I simply lifted what I saw in your host code so that we
// are using the same conversion calculations in host and device
__host__ __device__ unsigned char bgr2y(int R, int G, int B){
int Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
return (unsigned char)((Y<0)? 0 : ((Y > 255) ? 255 : Y));}
__host__ __device__ int bgr2u(int R, int G, int B){
int U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
return (unsigned char)((U<0)? 0 : ((U > 255) ? 255 : U));}
__host__ __device__ int bgr2v(int R, int G, int B){
int V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
return (unsigned char)((V<0)? 0 : ((V > 255) ? 255 : V));}
void bgr_to_yuv420p(unsigned char* yuv420p, unsigned char* bgr, int width, int height)
{
if (yuv420p == NULL || bgr== NULL)
return;
int frameSize = width*height;
int yIndex = 0;
int uIndex = frameSize;
int R, G, B;
for (int i = 0; i < height; i++)
{
for (int j = 0; j < width; j++)
{
B = bgr[(i * width + j) * 3 + 0];
G = bgr[(i * width + j) * 3 + 1];
R = bgr[(i * width + j) * 3 + 2];
//BGR to YUV
yuv420p[yIndex++] = bgr2y(R,G,B);
if (i % 2 == 0 && j % 2 == 0)
{
yuv420p[uIndex] = bgr2u(R,G,B);
yuv420p[uIndex+1] = bgr2v(R,G,B);
uIndex+=2;
}
}
}
}
//kernel function to convert bgr to yuv420p
__global__ void bgr2yuv420p(uchar3 * d_in, unsigned char * d_out,
uint imgheight, uint imgwidth)
{
int col_num = blockIdx.x*blockDim.x+threadIdx.x;
int row_num = blockIdx.y*blockDim.y+threadIdx.y;
if ((row_num < imgheight) && (col_num < imgwidth))
{
// uint32_t a = *((uint32_t *)&dinput[global_offset*3]);
int global_offset = row_num*imgwidth+col_num;
int r,g,b;
r = int(d_in[global_offset].z);
g = int (d_in[global_offset].y);
b = int (d_in[global_offset].x);
d_out[row_num * imgwidth + col_num] = bgr2y(r,g,b);
if(((threadIdx.x & 1) == 0) && ((threadIdx.y & 1) == 0)){
int uv_offset = imgwidth*imgheight+((row_num>>1)*imgwidth)+col_num;
d_out[uv_offset] = bgr2u(r,g,b);
d_out[uv_offset+1] = bgr2v(r,g,b);
}
}
}
int main(void)
{
const uint imgheight = 1000;
const uint imgwidth = 1500;
//input and output
uchar3 *d_in;
unsigned char *d_out;
uchar3 *idata = new uchar3[imgheight*imgwidth];
unsigned char *odata = new unsigned char[imgheight*imgwidth*3/2];
unsigned char *cdata = new unsigned char[imgheight*imgwidth*3/2];
uchar3 pix;
for (int i = 0; i < imgheight*imgwidth; i++){
pix.x = (rand()%30)+40;
pix.y = (rand()%30)+40;
pix.z = (rand()%30)+40;
idata[i] = pix;}
for (int i = 0; i < imgheight*imgwidth; i++) idata[i] = pix;
bgr_to_yuv420p(cdata, (unsigned char*) idata, imgwidth, imgheight);
// malloc memo in gpu
cudaMalloc((void**)&d_in, imgheight*imgwidth*sizeof(uchar3));
cudaMalloc((void**)&d_out, imgheight*imgwidth*sizeof(unsigned char) * 3 / 2);
//copy image from cpu to gpu
cudaMemcpy(d_in, idata, imgheight*imgwidth*sizeof(uchar3), cudaMemcpyHostToDevice);
dim3 threadsPerBlock(32, 32);
dim3 blocksPerGrid((imgwidth + threadsPerBlock.x - 1) / threadsPerBlock.x,
(imgheight + threadsPerBlock.y - 1) / threadsPerBlock.y);
//run kernel function
bgr2yuv420p<<<blocksPerGrid, threadsPerBlock>>>(d_in, d_out, imgheight, imgwidth);
cudaDeviceSynchronize();
//copy yuv420p from gpu to cpu
cudaMemcpy(odata, d_out, imgheight*imgwidth*sizeof(unsigned char) * 3 / 2, cudaMemcpyDeviceToHost);
for (int i = 0; i < (imgwidth*imgheight*3/2); i++) if (odata[i] != cdata[i]) {std::cout << "mismatch at: " << i << " was: " << (int)odata[i] << " should be: " << (int)cdata[i] << std::endl; return 0;}
cudaFree(d_in);
cudaFree(d_out);
return 0;
}
$ nvcc -o t1708 t1708.cu
$ cuda-memcheck ./t1708
========= CUDA-MEMCHECK
========= ERROR SUMMARY: 0 errors
$
Any time you are having trouble with a CUDA code, I recommend
Proper CUDA error checking
Running your code with cuda-memcheck
EDIT: Based on additional comments, here is a version of the above code that uses the OP-supplied CPU code verbatim, and provides a CUDA kernel that generates YUV planar storage (instead of semi-planar storage):
#include <iostream>
#include <time.h>
#include <cstdlib>
using namespace std;
__host__ __device__ unsigned char bgr2y(int R, int G, int B){
int Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
return (unsigned char)((Y<0)? 0 : ((Y > 255) ? 255 : Y));}
__host__ __device__ int bgr2u(int R, int G, int B){
int U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
return (unsigned char)((U<0)? 0 : ((U > 255) ? 255 : U));}
__host__ __device__ int bgr2v(int R, int G, int B){
int V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
return (unsigned char)((V<0)? 0 : ((V > 255) ? 255 : V));}
void bgr_to_yuv420sp(unsigned char* yuv420p, unsigned char* bgr, int width, int height)
{
if (yuv420p == NULL || bgr== NULL)
return;
int frameSize = width*height;
int yIndex = 0;
int uIndex = frameSize;
int R, G, B;
for (int i = 0; i < height; i++)
{
for (int j = 0; j < width; j++)
{
B = bgr[(i * width + j) * 3 + 0];
G = bgr[(i * width + j) * 3 + 1];
R = bgr[(i * width + j) * 3 + 2];
//BGR to YUV
yuv420p[yIndex++] = bgr2y(R,G,B);
if (i % 2 == 0 && j % 2 == 0)
{
yuv420p[uIndex] = bgr2u(R,G,B);
yuv420p[uIndex+1] = bgr2v(R,G,B);
uIndex+=2;
}
}
}
}
void bgr_to_yuv420p(unsigned char* yuv420p, unsigned char* bgr, int width, int height)
{
if (yuv420p == NULL || bgr== NULL)
return;
int frameSize = width*height;
int chromaSize = frameSize / 4;
int yIndex = 0;
int uIndex = frameSize;
int vIndex = frameSize + chromaSize;
int R, G, B, Y, U, V;
for (int i = 0; i < height; i++)
{
for (int j = 0; j < width; j++)
{
B = bgr[(i * width + j) * 3 + 0];
G = bgr[(i * width + j) * 3 + 1];
R = bgr[(i * width + j) * 3 + 2];
//BGR to YUV
Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
yuv420p[yIndex++] = (unsigned char)((Y < 0) ? 0 : ((Y > 255) ? 255 : Y));
if (i % 2 == 0 && j % 2 == 0)
{
yuv420p[uIndex++] = (unsigned char)((U < 0) ? 0 : ((U > 255) ? 255 : U));
yuv420p[vIndex++] = (unsigned char)((V < 0) ? 0 : ((V > 255) ? 255 : V));
}
}
}
}
//kernel function to convert bgr to yuv420sp
__global__ void bgr2yuv420sp(uchar3 * d_in, unsigned char * d_out,
uint imgheight, uint imgwidth)
{
int col_num = blockIdx.x*blockDim.x+threadIdx.x;
int row_num = blockIdx.y*blockDim.y+threadIdx.y;
if ((row_num < imgheight) && (col_num < imgwidth))
{
// uint32_t a = *((uint32_t *)&dinput[global_offset*3]);
int global_offset = row_num*imgwidth+col_num;
int r,g,b;
r = int(d_in[global_offset].z);
g = int (d_in[global_offset].y);
b = int (d_in[global_offset].x);
d_out[row_num * imgwidth + col_num] = bgr2y(r,g,b);
if(((threadIdx.x & 1) == 0) && ((threadIdx.y & 1) == 0)){
int uv_offset = imgwidth*imgheight+((row_num>>1)*imgwidth)+col_num;
d_out[uv_offset] = bgr2u(r,g,b);
d_out[uv_offset+1] = bgr2v(r,g,b);
}
}
}
//kernel function to convert bgr to yuv420p
__global__ void bgr2yuv420p(uchar3 * d_in, unsigned char * d_out,
uint imgheight, uint imgwidth)
{
int col_num = blockIdx.x*blockDim.x+threadIdx.x;
int row_num = blockIdx.y*blockDim.y+threadIdx.y;
if ((row_num < imgheight) && (col_num < imgwidth))
{
// uint32_t a = *((uint32_t *)&dinput[global_offset*3]);
int global_offset = row_num*imgwidth+col_num;
int r,g,b;
r = int(d_in[global_offset].z);
g = int (d_in[global_offset].y);
b = int (d_in[global_offset].x);
d_out[row_num * imgwidth + col_num] = bgr2y(r,g,b);
if(((threadIdx.x & 1) == 0) && ((threadIdx.y & 1) == 0)){
int u_offset = imgwidth*imgheight+((row_num>>1)*(imgwidth>>1))+(col_num>>1);
d_out[u_offset] = bgr2u(r,g,b);
int v_offset = u_offset+((imgheight>>1)*(imgwidth>>1));
d_out[v_offset] = bgr2v(r,g,b);
}
}
}
int main(void)
{
const uint imgheight = 1000;
const uint imgwidth = 1500;
//input and output
uchar3 *d_in;
unsigned char *d_out;
uchar3 *idata = new uchar3[imgheight*imgwidth];
unsigned char *odata = new unsigned char[imgheight*imgwidth*3/2];
unsigned char *cdata = new unsigned char[imgheight*imgwidth*3/2];
uchar3 pix;
for (int i = 0; i < imgheight*imgwidth; i++){
pix.x = (rand()%30)+40;
pix.y = (rand()%30)+40;
pix.z = (rand()%30)+40;
idata[i] = pix;}
for (int i = 0; i < imgheight*imgwidth; i++) idata[i] = pix;
bgr_to_yuv420p(cdata, (unsigned char*) idata, imgwidth, imgheight);
// malloc memo in gpu
cudaMalloc((void**)&d_in, imgheight*imgwidth*sizeof(uchar3));
cudaMalloc((void**)&d_out, imgheight*imgwidth*sizeof(unsigned char) * 3 / 2);
//copy image from cpu to gpu
cudaMemcpy(d_in, idata, imgheight*imgwidth*sizeof(uchar3), cudaMemcpyHostToDevice);
dim3 threadsPerBlock(32, 32);
dim3 blocksPerGrid((imgwidth + threadsPerBlock.x - 1) / threadsPerBlock.x,
(imgheight + threadsPerBlock.y - 1) / threadsPerBlock.y);
//run kernel function
bgr2yuv420p<<<blocksPerGrid, threadsPerBlock>>>(d_in, d_out, imgheight, imgwidth);
cudaDeviceSynchronize();
//copy yuv420p from gpu to cpu
cudaMemcpy(odata, d_out, imgheight*imgwidth*sizeof(unsigned char) * 3 / 2, cudaMemcpyDeviceToHost);
for (int i = 0; i < (imgwidth*imgheight*3/2); i++) if (odata[i] != cdata[i]) {std::cout << "mismatch at: " << i << " was: " << (int)odata[i] << " should be: " << (int)cdata[i] << std::endl; return 0;}
cudaFree(d_in);
cudaFree(d_out);
return 0;
}
I don't claim correctness for this code or any other code that I post. Anyone using any code I post does so at their own risk. I merely claim that I have attempted to address the deficiencies that I found in the original posting, and provide some explanation thereof. I am not claiming my code is defect-free, or that it is suitable for any particular purpose. Use it (or not) at your own risk.

Scaling png font down

Is there a way to scale down with highest quality a font which is png image in opengl at startup? I tried gluScaleImage but there are many artefacts. Is there anything that uses lanczos or something like that? I don't want to write a shader or anything that does the scaling runtime.
This is based on an algorithm, I copied decades ago from the German c't Magazin, and still use it from time to time for similar issues like described by OP.
bool scaleDown(
const Image &imgSrc,
Image &imgDst,
int w, int h,
int align)
{
const int wSrc = imgSrc.w(), hSrc = imgSrc.h();
assert(w > 0 && w <= wSrc && h > 0 && h <= hSrc);
// compute scaling factors
const double sx = (double)wSrc / (double)w;
const double sy = (double)hSrc / (double)h;
const double sxy = sx * sy;
// prepare destination image
imgDst.resize(w, h, (w * 3 + align - 1) / align * align);
// cache some data
const uint8 *const dataSrc = imgSrc.data();
const int bPRSrc = imgSrc.bPR();
// perform scaling
for (int y = 0; y < h; ++y) {
const double yStart = sy * y;
const double yEnd = std::min(sy * (y + 1), (double)hSrc);
const int yStartInt = (int)yStart;
const int yEndInt = (int)yEnd - (yEndInt == yEnd);
const double tFrm = 1 + yStartInt - yStart, bFrm = yEnd - yEndInt;
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc);
const int xStartInt = (int)xStart;
const int xEndInt = (int)xEnd - (xEndInt == xEnd);
double lFrm = 1 + xStartInt - xStart, rFrm = xEnd - xEndInt;
double pixel[3] = { 0.0, 0.0, 0.0 }; // values of target pixel
for (int i = yStartInt; i <= yEndInt; ++i) {
int jData = i * bPRSrc + xStartInt * 3;
for (int j = xStartInt; j <= xEndInt; ++j) {
double pixelAdd[3];
for (int k = 0; k < 3; ++k) {
pixelAdd[k] = (double)dataSrc[jData++] / sxy;
}
if (j == xStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= lFrm;
} else if (j == xEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= rFrm;
}
if (i == yStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= tFrm;
} else if (i == yEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= bFrm;
}
for (int k = 0; k < 3; ++k) pixel[k] += pixelAdd[k];
}
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
}
// done
return true;
}
If I got it right, this implements a bilinear interpolation.
I don't dare to call it a Minimal Complete Verifiable Example although this is what I intended to do.
The complete sample application:
A simplified class Image
image.h:
#ifndef IMAGE_H
#define IMAGE_H
#include <vector>
// convenience type for bytes
typedef unsigned char uint8;
// image helper class
class Image {
private: // variables:
int _w, _h; // image size
size_t _bPR; // bytes per row
std::vector<uint8> _data; // image data
public: // methods:
// constructor.
Image(): _w(0), _h(0), _bPR(0) { }
// destructor.
~Image() = default;
// copy constructor.
Image(const Image&) = delete; // = default; would work as well.
// copy assignment.
Image& operator=(const Image&) = delete; // = default; would work as well.
// returns width of image.
int w() const { return _w; }
// returns height of image.
int h() const { return _h; }
// returns bytes per row.
size_t bPR() const { return _bPR; }
// returns pointer to image data.
const uint8* data(
int y = 0) // row number
const {
return &_data[y * _bPR];
}
// returns data size (in bytes).
size_t size() const { return _data.size(); }
// clears image.
void clear();
// resizes image.
uint8* resize( // returns allocated buffer
int w, // image width
int h, // image height
int bPR); // bytes per row
// returns pixel.
int getPixel(
int x, // column
int y) // row
const;
// sets pixel.
void setPixel(
int x, // column
int y, // row
uint8 r, uint8 g, uint8 b);
// sets pixel.
void setPixel(
int x, // column
int y, // row
int value) // RGB value
{
setPixel(x, y, value & 0xff, value >> 8 & 0xff, value >> 16 & 0xff);
}
};
// helper functions:
inline uint8 getR(int value) { return value & 0xff; }
inline uint8 getG(int value) { return value >> 8 & 0xff; }
inline uint8 getB(int value) { return value >> 16 & 0xff; }
#endif // IMAGE_H
image.cc:
#include <cassert>
#include "image.h"
// clears image.
void Image::clear()
{
_data.clear(); _w = _h = _bPR = 0;
}
// allocates image data.
uint8* Image::resize( // returns allocated buffer
int w, // image width
int h, // image height
int bPR) // bits per row
{
assert(w >= 0 && 3 * w <= bPR);
assert(h >= 0);
_w = w; _h = h; _bPR = bPR;
const size_t size = h * bPR;
_data.resize(size);
return _data.data();
}
// returns pixel.
int Image::getPixel(
int x, // column
int y) // row
const {
assert(x >= 0 && x < _w);
assert(y >= 0 && y < _h);
const size_t offs = y * _bPR + 3 * x;
return _data[offs + 0]
| _data[offs + 1] << 8
| _data[offs + 2] << 16;
}
// sets pixel.
void Image::setPixel(
int x, // column
int y, // row
uint8 r, uint8 g, uint8 b) // R, G, B values
{
assert(x >= 0 && x < _w);
assert(y >= 0 && y < _h);
const size_t offs = y * _bPR + 3 * x;
_data[offs + 0] = r;
_data[offs + 1] = g;
_data[offs + 2] = b;
}
Image Scaling
imageScale.h:
#ifndef IMAGE_SCALE_H
#define IMAGE_SCALE_H
#include "image.h"
/* scales an image to a certain width and height.
*
* Note:
* imgSrc and imgDst may not be identical.
*/
bool scaleTo( // returns true if successful
const Image &imgSrc, // source image
Image &imgDst, // destination image
int w, int h, // destination width and height
int align = 4); // row alignment
/* scales an image about a certain horizontal/vertical scaling factor.
*
* Note:
* imgSrc and imgDst may not be identical.
*/
inline bool scaleXY( // returns true if successful
const Image &imgSrc, // source image
Image &imgDst, // destination image
double sX, // horizontal scaling factor (must be > 0 but not too large)
double sY, // vertical scaling factor (must be > 0 but not too large)
int align = 4) // row alignment
{
return sX > 0.0 && sY > 0.0
? scaleTo(imgSrc, imgDst,
(int)(sX * imgSrc.w()), (int)(sY * imgSrc.h()), align)
: false;
}
/* scales an image about a certain scaling factor.
*
* Note:
* imgSrc and imgDst may not be identical.
*/
inline bool scale( // returns true if successful
const Image &imgSrc, // source image
Image &imgDst, // destination image
double s, // scaling factor (must be > 0 but not too large)
int align = 4) // row alignment
{
return scaleXY(imgSrc, imgDst, s, s, align);
}
#endif // IMAGE_SCALE_H
imageScale.cc:
#include <cassert>
#include <algorithm>
#include "imageScale.h"
namespace {
template <typename VALUE>
VALUE clip(VALUE value, VALUE min, VALUE max)
{
return value < min ? min : value > max ? max : value;
}
bool scaleDown(
const Image &imgSrc,
Image &imgDst,
int w, int h,
int align)
{
const int wSrc = imgSrc.w(), hSrc = imgSrc.h();
assert(w > 0 && w <= wSrc && h > 0 && h <= hSrc);
// compute scaling factors
const double sx = (double)wSrc / (double)w;
const double sy = (double)hSrc / (double)h;
const double sxy = sx * sy;
// prepare destination image
imgDst.resize(w, h, (w * 3 + align - 1) / align * align);
// cache some data
const uint8 *const dataSrc = imgSrc.data();
const int bPRSrc = imgSrc.bPR();
// perform scaling
for (int y = 0; y < h; ++y) {
const double yStart = sy * y;
const double yEnd = std::min(sy * (y + 1), (double)hSrc);
const int yStartInt = (int)yStart;
const int yEndInt = (int)yEnd - (yEndInt == yEnd);
const double tFrm = 1 + yStartInt - yStart, bFrm = yEnd - yEndInt;
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc);
const int xStartInt = (int)xStart;
const int xEndInt = (int)xEnd - (xEndInt == xEnd);
double lFrm = 1 + xStartInt - xStart, rFrm = xEnd - xEndInt;
double pixel[3] = { 0.0, 0.0, 0.0 }; // values of target pixel
for (int i = yStartInt; i <= yEndInt; ++i) {
int jData = i * bPRSrc + xStartInt * 3;
for (int j = xStartInt; j <= xEndInt; ++j) {
double pixelAdd[3];
for (int k = 0; k < 3; ++k) {
pixelAdd[k] = (double)dataSrc[jData++] / sxy;
}
if (j == xStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= lFrm;
} else if (j == xEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= rFrm;
}
if (i == yStartInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= tFrm;
} else if (i == yEndInt) {
for (int k = 0; k < 3; ++k) pixelAdd[k] *= bFrm;
}
for (int k = 0; k < 3; ++k) pixel[k] += pixelAdd[k];
}
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
}
// done
return true;
}
bool scaleUp(
const Image &imgSrc,
Image &imgDst,
int w, int h,
int align)
{
const int wSrc = imgSrc.w(), hSrc = imgSrc.h();
assert(w && w >= wSrc && h && h >= hSrc);
// compute scaling factors
const double sx = (double)wSrc / (double)w;
const double sy = (double)hSrc / (double)h;
// prepare destination image
imgDst.resize(w, h, (w * 3 + align - 1) / align * align);
// cache some data
const uint8 *const dataSrc = imgSrc.data();
const int bPRSrc = imgSrc.bPR();
// perform scaling
for (int y = 0; y < h; ++y) {
const double yStart = sy * y;
const double yEnd = std::min(sy * (y + 1), (double)hSrc - 1);
const int yStartInt = (int)yStart;
const int yEndInt = (int)yEnd;
if (yStartInt < yEndInt) {
const double bFract = clip((double)((yEnd - yEndInt) / sy), 0.0, 1.0);
const double tFract = 1.0 - bFract;
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc - 1);
const int xStartInt = (int)xStart, xEndInt = (int)xEnd;
double pixel[4];
if (xStartInt < xEndInt) {
const double rFract
= clip((double)((xEnd - xEndInt) / sx), 0.0, 1.0);
const double lFract = 1.0 - rFract;
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] = tFract * lFract * dataSrc[jData++];
}
for (int k = 0; k < 3; ++k) {
pixel[k] += tFract * rFract * dataSrc[jData++];
}
jData = yEndInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] += bFract * lFract *dataSrc[jData++];
}
for (int k = 0; k < 3; ++k) {
pixel[k] += bFract * rFract *dataSrc[jData++];
}
} else {
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] = tFract * dataSrc[jData++];
}
jData = yEndInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] += bFract * dataSrc[jData++];
}
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
} else {
for (int x = 0; x < w; ++x) {
const double xStart = sx * x;
const double xEnd = std::min(sx * (x + 1), (double)wSrc - 1);
const int xStartInt = (int)xStart, xEndInt = (int)xEnd;
double pixel[3];
if (xStartInt < xEndInt) {
const double rFract
= clip((double)((xEnd - xEndInt) / sx), 0.0, 1.0);
const double lFract = 1.0 - rFract;
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) {
pixel[k] = lFract * dataSrc[jData++];
}
for (int k = 0; k < 3; ++k) {
pixel[k] += rFract * dataSrc[jData++];
}
} else {
int jData = yStartInt * bPRSrc + xStartInt * 3;
for (int k = 0; k < 3; ++k) pixel[k] = dataSrc[jData++];
}
imgDst.setPixel(x, y,
(uint8)pixel[0], (uint8)pixel[1], (uint8)pixel[2]);
}
}
}
// done
return true;
}
} // namespace
bool scaleTo(const Image &imgSrc, Image &imgDst, int w, int h, int align)
{
Image imgTmp;
return w <= 0 || h <= 0 ? false
: w >= imgSrc.w() && h >= imgSrc.h()
? scaleUp(imgSrc, imgDst, w, h, align)
: w <= imgSrc.w() && h <= imgSrc.h()
? scaleDown(imgSrc, imgDst, w, h, align)
: w >= imgSrc.w()
? scaleUp(imgSrc, imgTmp, w, imgSrc.h(), 1)
&& scaleDown(imgTmp, imgDst, w, h, align)
: scaleDown(imgSrc, imgTmp, w, imgSrc.h(), 1)
&& scaleUp(imgTmp, imgDst, w, h, align);
}
PPM file IO
imagePPM.h:
#ifndef IMAGE_PPM_H
#define IMAGE_PPM_H
#include <iostream>
#include "image.h"
// reads a binary PPM file.
bool readPPM( // returns true if successful
std::istream &in, // input stream (must be opened with std::ios::binary)
Image &img, // image to read into
int align = 4); // row alignment
// writes binary PPM file.
bool writePPM( // returns true if successful
std::ostream &out, // output stream (must be opened with std::ios::binary)
const Image &img); // image to write from
#endif // IMAGE_PPM_H
imagePPM.cc:
#include <sstream>
#include <string>
#include "imagePPM.h"
// reads a binary PPM file.
bool readPPM( // returns true if successful
std::istream &in, // input stream (must be opened with std::ios::binary)
Image &img, // image to read into
int align) // row alignment
{
// parse header
std::string buffer;
if (!getline(in, buffer)) return false;
if (buffer != "P6") {
std::cerr << "Wrong header! 'P6' expected.\n";
return false;
}
int w = 0, h = 0, t = 0;
for (int i = 0; i < 3;) {
if (!getline(in, buffer)) return false;
if (buffer.empty()) continue; // skip empty lines
if (buffer[0] == '#') continue; // skip comments
std::istringstream str(buffer);
switch (i) {
case 0:
if (!(str >> w)) continue;
++i;
case 1:
if (!(str >> h)) continue;
++i;
case 2:
if (!(str >> t)) continue;
++i;
}
}
if (t != 255) {
std::cerr << "Unsupported format! t = 255 expected.\n";
return false;
}
// allocate image buffer
uint8 *data = img.resize(w, h, (w * 3 + align - 1) / align * align);
// read data
for (int i = 0; i < h; ++i) {
if (!in.read((char*)data, 3 * img.w())) return false;
data += img.bPR();
}
// done
return true;
}
// writes binary PPM file.
bool writePPM( // returns true if successful
std::ostream &out, // output stream (must be opened with std::ios::binary)
const Image &img) // image to write from
{
// write header
if (!(out << "P6\n" << img.w() << ' ' << img.h() << " 255\n")) return false;
// write image data
for (size_t y = 0; y < img.h(); ++y) {
const uint8 *const data = img.data(y);
if (!out.write((const char*)data, 3 * img.w())) return false;
}
// done
return true;
}
The main application
scaleRGBImg.cc:
#include <iostream>
#include <fstream>
#include <string>
#include "image.h"
#include "imagePPM.h"
#include "imageScale.h"
int main(int argc, char **argv)
{
// read command line arguments
if (argc <= 3) {
std::cerr << "Missing arguments!\n";
std::cout
<< "Usage:\n"
<< " scaleRGBImg IN_FILE SCALE OUT_FILE\n";
return 1;
}
const std::string inFile = argv[1];
char *end;
const double s = std::strtod(argv[2], &end);
if (end == argv[2] || *end != '\0') {
std::cerr << "Invalid scale factor '" << argv[2] << "'!\n";
return 1;
}
if (s <= 0.0) {
std::cerr << "Invalid scale factor " << s << "!\n";
return 1;
}
const std::string outFile = argv[3];
// read image
Image imgSrc;
{ std::ifstream fIn(inFile.c_str(), std::ios::binary);
if (!readPPM(fIn, imgSrc)) {
std::cerr << "Reading '" << inFile << "' failed!\n";
return 1;
}
}
// scale image
Image imgDst;
if (!scale(imgSrc, imgDst, s)) {
std::cerr << "Scaling failed!\n";
return 1;
}
// write image
{ std::ofstream fOut(outFile.c_str(), std::ios::binary);
if (!writePPM(fOut, imgDst) || (fOut.close(), !fOut.good())) {
std::cerr << "Writing '" << outFile << "' failed!\n";
return 1;
}
}
// done
return 0;
}
Test
Compiled in cygwin64:
$ g++ -std=c++11 -o scaleRGBImg scaleRGBImg.cc image.cc imagePPM.cc imageScale.cc
$
A sample image test.ppm for a test – converted to PPM in GIMP:
Test with the sample image:
$ for I in 0.8 0.6 0.4 0.2 ; do echo ./scaleRGBImg test.ppm $I test.$I.ppm ; done
./scaleRGBImg test.ppm 0.8 test.0.8.ppm
./scaleRGBImg test.ppm 0.6 test.0.6.ppm
./scaleRGBImg test.ppm 0.4 test.0.4.ppm
./scaleRGBImg test.ppm 0.2 test.0.2.ppm
$ for I in 0.8 0.6 0.4 0.2 ; do ./scaleRGBImg test.ppm $I test.$I.ppm ; done
$
This is what came out:
test.0.8.ppm:
test.0.6.ppm:
test.0.4.ppm:
test.0.2.ppm:

Detect a 2d collision in C++

I need to create a function that returns a bool that is false if there is no collision between two sprites and true if there is, I was thinking for a long time and I can not find an exact solution, the objective is to detect if there is a collision per pixel, that is if two pixels with the alpha value (from rgba) different than 0 (it is visible) coincide in the same place in the space, the function has the following signature :
bool checkPixelCollision(
const Vector2& pixelPos1,
const Vector2& pixelSize1,
const vector<uint8_t> pixel1,
const Vector2& pixelPos2,
const Vector2& pixelSize2,
const vector<uint8_t> pixel2);
Vector2 is a struct with the next form:
struct Vector2
{
float x;
float y;
};
pixelPos1 is the position of the upper left corner of the rectangle that contains sprite 1, pixelSize1 is the size (x = width; y = height) of the rectangle that contains sprite 1, pixel1 is a vector that has the rgba values ​​of each pixel of the sprite, they are stored from 4 to 4 so that i contains the amount of r of the pixel i; i + 1 the amount of g of the pixel i; i + 2 the amount of b of the pixel i; i + 3 the amount of alpha of the pixel i, so that if i + 3 is different from 0 is a visible pixel, the size of pixel1 is given by pixelSize1.x * pixelSize1.y * 4.
The other three parameters of the header are those corresponding to sprite 2. The objective would therefore be to check when there is a collision (either on the side or corner that is) and from there establish a collision rectangle between both rectangles (the coincident area), and set two indexes that travel pixel1 and pixel2 (since each one will have to start from a different position in its corresponding vector).
The problem is that I can not find an optimal and / or easy way to do it and that it works. If anyone knows any way to do it, I would appreciate it very much.
EDIT
Here is my code (it doesn't work)
#include <algorithm>
#include <stdint.h>
#include <vector>
struct Vector2
{
float x;
float y;
};
float clamp(float val, float min, float max) {
return std::max(min, std::min(max, val));
}
bool checkPixelCollision(const Vector2& pixelPos1, const Vector2& pixelSize1, const vector<uint8_t> pixel1, const Vector2& pixelPos2, const Vector2& pixelSize2, const vector<uint8_t> pixel2) {
return check(pixelPos1,pixelSize1,pixel1,pixelPos2,pixelSize2,pixel2)||check(pixelPos2,pixelSize2,pixel2,pixelPos1,pixelSize1,pixel1);
}
bool check(const Vector2& pixelsPos1, const Vector2& pixelsSize1, const vector<uint8_t> pixels1, const Vector2& pixelsPos2, const Vector2& pixelsSize2, const vector<uint8_t> pixels2){
bool res = false;
if (pixelsPos1.x <= pixelsPos2.x + pixelsSize2.x && pixelsPos1.y <= pixelsPos2.y + pixelsSize2.y && pixelsPos1.x >= pixelsPos2.x && pixelsPos1.y >= pixelsPos2.y) {
float i = pixelsSize2.x - (pixelsSize1.y*((pixelsPos1.x - pixelsPos2.x + pixelsSize2.x) / pixelsSize1.x));
float j = pixelsSize2.y - (pixelsSize1.y*((pixelsPos1.y - pixelsPos2.y + pixelsSize2.y) / pixelsSize1.y));
float ifin = fmin(pixelsSize1.x - pixelsSize2.x, pixelsSize1.x);
float jfin = fmin(pixelsSize1.y - pixelsSize2.y, pixelsSize1.y);
float i2 = 0;
float j2 = 0;
while (j<jfin-1) {
int k = floor((pixelsSize2.x*j) + i) * 4 - 1;
int k2 = floor((pixelsSize1.x*j2) + i2) * 4 - 1;
if (pixels1[k2 + 3] != 0 && pixels2[k + 3] != 0) {
res = true;
}
if (i < ifin) {
i = i + 1;
i2 = i2 + 1;
}
else {
i2 = 0;
i = pixelsSize2.x - (pixelsSize1.x*((pixelsPos1.x - pixelsPos2.x + pixelsSize2.x) / pixelsSize1.x));
j = j + 1;
j2 = j2 + 1;
}
}
}
else if (pixelsPos1.x <= pixelsPos2.x + pixelsSize2.x && pixelsPos1.y + pixelsSize1.y >= pixelsPos2.y && pixelsPos1.x >= pixelsPos2.x && pixelsPos1.y + pixelsSize1.y <= pixelsPos2.y + pixelsSize2.y) {
float i = clamp(pixelsSize2.x - (pixelsSize1.x*((pixelsPos1.x - pixelsPos2.x + pixelsSize2.x) / pixelsSize1.x)), 0.0f, pixelsSize2.x);
float jfin = clamp(pixelsSize1.y*((pixelsPos2.y - pixelsPos1.y+pixelsSize1.y) / pixelsSize1.y), 0.0f, pixelsSize1.y);
float ifin = fmin(pixelsSize1.x - pixelsSize2.x, pixelsSize1.x);
float j = 0;
float i2 = 0;
float j2 = clamp(pixelsSize1.y - pixelsSize1.y*((pixelsPos2.y - pixelsPos1.y + pixelsSize1.y) / pixelsSize1.y),0.0f, pixelsSize1.y);
while (j<jfin-1) {
int k = floor((pixelsSize2.x*j) + i) * 4 - 1;
int k2 = floor((pixelsSize1.x*j2) + i2) * 4 - 1;
if (pixels1[k2 + 3] != 0 && pixels2[k + 3] != 0) {
res = true;
}
if (i < ifin) {
i = i + 1;
i2 = i2 + 1;
}
else {
i2 = 0;
i = clamp(pixelsSize2.x - (pixelsSize1.x*((pixelsPos1.x - pixelsPos2.x + pixelsSize2.x) / pixelsSize1.x)),0.0f, pixelsSize2.x);
j = j + 1;
j2 = j2 + 1;
}
}
}
else if (pixelsPos1.x + pixelsSize1.x >= pixelsPos2.x && pixelsPos1.y<= pixelsPos2.y + pixelsSize2.y && pixelsPos1.x + pixelsSize1.x <= pixelsPos2.x + pixelsSize2.x && pixelsPos1.y >= pixelsPos2.y) {
float ifin = clamp(pixelsSize1.x*((pixelsPos2.x - pixelsPos1.x + pixelsSize1.x) / pixelsSize1.x), 0.0f, pixelsSize1.x);
float j = clamp(pixelsSize2.y - (pixelsSize1.y*((pixelsPos1.y - pixelsPos2.y + pixelsSize2.y) / pixelsSize1.y)),0.0f, pixelsSize2.y);
float jfin = fmin(pixelsSize1.y - pixelsSize2.y, pixelsSize1.y);
float i = 0;
float i2 = clamp(pixelsSize1.x - pixelsSize1.x*((pixelsPos2.x - pixelsPos1.x + pixelsSize1.x) / pixelsSize1.x), 0.0f, pixelsSize1.x);
float j2 = 0;
while (j<jfin-1) {
int k = floor((pixelsSize2.x*j) + i) * 4 - 1;
int k2 = floor((pixelsSize1.x*j2) + i2) * 4 - 1;
if (pixels1[k2 + 3] != 0 && pixels2[k + 3] != 0) {
res = true;
}
if (i < ifin) {
i = i + 1;
i2 = i2 + 1;
}
else {
i2 = clamp(pixelsSize1.x - pixelsSize1.x*((pixelsPos2.x - pixelsPos1.x + pixelsSize1.x) / pixelsSize1.x), 0.0f, pixelsSize1.x);
i = 0;
j = j + 1;
j2 = j2 + 1;
}
}
}
else if (pixelsPos1.x + pixelsSize1.x >= pixelsPos2.x && pixelsPos1.y + pixelsSize1.y >= pixelsPos2.y && pixelsPos1.x + pixelsSize1.x <= pixelsPos2.x + pixelsSize2.x && pixelsPos1.y + pixelsSize1.y <= pixelsPos2.y + pixelsSize2.y) {
float jfin = clamp(pixelsSize1.y*((pixelsPos2.y - pixelsPos1.y + pixelsSize1.y) / pixelsSize1.y), 0.0f, pixelsSize1.y);
float j = 0;
float ifin = clamp(pixelsSize1.x*((pixelsPos2.x - pixelsPos1.x + pixelsSize1.x) / pixelsSize1.x), 0.0f, pixelsSize1.x);
float i = 0;
float i2 = clamp(pixelsSize1.x - pixelsSize1.x*((pixelsPos2.x - pixelsPos1.x + pixelsSize1.x) / pixelsSize1.x), 0.0f, pixelsSize1.x);
float j2 = clamp(pixelsSize1.y - pixelsSize1.y*((pixelsPos2.y - pixelsPos1.y + pixelsSize1.y) / pixelsSize1.y), 0.0f, pixelsSize1.y);
while (j<jfin-1) {
int k = floor((pixelsSize2.x*j) + i) * 4 - 1;
int k2 = floor((pixelsSize1.x*j2) + i2) * 4 - 1;
if (pixels1[k2 + 3] != 0 && pixels2[k + 3] != 0) {
res = true;
}
if (i < ifin) {
i = i + 1;
i2 = i2 + 1;
}
else {
i2 = clamp(pixelsSize1.x - pixelsSize1.x*((pixelsPos2.x - pixelsPos1.x + pixelsSize1.x) / pixelsSize1.x), 0.0f, pixelsSize1.x);
i = 0;
j = j + 1;
j2 = j2 + 1;
}
}
}
return res;
}
Start by checking if the bounding rectangles of the two sprites overlap. If they don't, great; no collision is possible. If they do overlap, calculate the overlapping rectangle for each sprite and compare pixel by pixel - if pixel a or pixel b is transparent then there is no collision caused by that pixel, if both pixels are non-transparent there is a collision and you are done. If you finish checking all pixels in the overlapping area and there are no collisions you are also done.

RGBA pixel data into D3DLOCKED_RECT

I'm trying to update a 128x128 D3DLOCKED_RECT with sub images using the following code, but it seems to squish them down along the top, the X offset is ignored and the y offset is 60 percent off.
I've also tried to make the texture the correct size and copy it into a 128x128 texture at the correct location using RECT, however this is very slow and didn't seem to work correctly when I attempted it. There must be way to do it using the raw pixel data?
Any help would be much appreciated :)
EDIT: I got it semi working using the below code, the locations are now correct and the sizes. But it's only using the blue channel and everything is grey scale (blue scale?)
srcdata = (byte *) pixels;
dstdata = (unsigned int *)lockrect.pBits;
for (y = yoffset; y < (yoffset + height); y++)
{
for (x = xoffset; x < (xoffset + width); x++)
{
dstdata[ ( y * lockrect.Pitch / dstbytes + x ) + 0] = (unsigned int)srcdata[0];
dstdata[ ( y * lockrect.Pitch / dstbytes + x ) + 1] = (unsigned int)srcdata[1];
dstdata[ ( y * lockrect.Pitch / dstbytes + x ) + 2] = (unsigned int)srcdata[0];
dstdata[ ( y * lockrect.Pitch / dstbytes + x ) + 3] = (unsigned int)srcdata[3];
srcdata += srcbytes;
}
}'
END Edit
Test call after creating the 128x128 texture:
int x, y;
byte temp[132*132*4];
// Test texture (pink and black checker)
for( y = 0; y < 16; y++ )
{
for( x = 0; x < 16; x++ )
{
if(( y < 8 ) ^ ( x < 8 ))
((uint *)&temp)[y*16+x] = 0xFFFF00FF;
else ((uint *)&temp)[y*16+x] = 0xFF000000;
}
}
UpdateSubImage (0, 0, 16, 16, temp )
The update Fuction:
void UpdateSubImage (int xoffset, int yoffset, int width, int height, const
GLvoid *pixels)
{
int x, y;
int srcbytes = 4; //Hard coded for now, as all tests are RGBA
int dstbytes = 4; // ^
byte *srcdata;
byte *dstdata;
D3DLOCKED_RECT lockrect;
pTexture->LockRect( 0, &lockrect, NULL, 0);
srcdata = (byte *) pixels;
dstdata = (byte *) lockrect.pBits;
dstdata += (yoffset * width + xoffset) * dstbytes;
for (y = yoffset; y < (yoffset + height); y++)
{
for (x = xoffset; x < (xoffset + width); x++)
{
if (srcbytes == 1)
{
if (dstbytes == 1)
dstdata[0] = srcdata[0];
else if (dstbytes == 4)
{
dstdata[0] = srcdata[0];
dstdata[1] = srcdata[0];
dstdata[2] = srcdata[0];
dstdata[3] = srcdata[0];
}
}
else if (srcbytes == 3)
{
if (dstbytes == 1)
dstdata[0] = ((int) srcdata[0] + (int) srcdata[1] + (int) srcdata[2]) / 3;
else if (dstbytes == 4)
{
dstdata[0] = srcdata[2];
dstdata[1] = srcdata[1];
dstdata[2] = srcdata[0];
dstdata[3] = 255;
}
}
else if (srcbytes == 4)
{
if (dstbytes == 1)
dstdata[0] = ((int) srcdata[0] + (int) srcdata[1] + (int) srcdata[2]) / 3;
else if (dstbytes == 4)
{
dstdata[0] = srcdata[2];
dstdata[1] = srcdata[1];
dstdata[2] = srcdata[0];
dstdata[3] = srcdata[3];
}
}
// advance
srcdata += srcbytes;
dstdata += dstbytes;
}
}
pTexture->UnlockRect(0);
}
What the output looks like:
What the output should look like:
You're assuming that the data accessable through lockrect.pBits is linear in memory. This is in general not the case. Instead you have a constant offset between your rows which is defined by the lockrect.Pitch value.
To get the address of a pixel in the destination use:
byte * destAddr = (lockrect.pBits + y * lockrect.Pitch + 4 * x);
// for 32 bit images. For other formats adjust the hard-coded 4.
Thanks for the help :), in the end the following code worked:
Can it be made faster?
for (y = yoffset; y < (yoffset + height); y++)
{
for (x = xoffset; x < (xoffset + width); x++)
{
ARGB pixel;
pixel.r = srcdata[0];
pixel.g = srcdata[1];
pixel.b = srcdata[2];
pixel.a = srcdata[3];
memcpy( &dstdata[lockrect.Pitch * y + dstbytes * x], &pixel, dstbytes );
srcdata += srcbytes;
}
}

Seeded region growing with opencv

I need to select a pixel value and apply the region growing in terms of the seed pixel. After trying to write the code, the result was always a black image
regardless of what seed point I used. The whole problem is involved in the GrowColor function. My guess is a logical error with the ifs.
#include <cv.h>
#include <highgui.h>
using namespace std;
int xDim, yDim, zDim;
float ThreshHold = 45.0;
unsigned long total[3];
int coont, tt;
IplImage *Image1;
IplImage *Image2;
CvScalar s = cvScalar(0, 0, 0, 0);
CvScalar s11 = cvScalar(0, 0, 0, 0);
int Diff, mean[3], temp[3];
void GrowColor(int x, int y);
int main(int argc, char *argv[]) {
char value[4];
int pixType, dimCut;
int Dbug = false;
int Xseed = 40, Yseed = 234;
int i = 0, x, y;
Image1 = cvLoadImage("lenah.jpg");
yDim = Image1->height;
xDim = Image1->width;
// int step= Image1->widthStep;
//uchar* data = (uchar *)Image1->imageData;
//New image
Image2 = cvCreateImage(cvSize(Image1->width, Image1->height), IPL_DEPTH_8U,
1);
cvZero(Image2);
total[0] = total[1] = total[2] = coont = 0;
//Process
for (y = Yseed - 5; y <= Yseed + 5; y++)
for (x = Xseed - 5; x <= Xseed + 5; x++)
if ((x > 0) && (y > 0) && (x < xDim) && (y < yDim)) {
coont++;
s = cvGet2D(Image1, x, y);
total[0] += abs(s.val[0]);
total[1] += abs(s.val[1]);
total[2] += abs(s.val[2]);
}
GrowColor(Xseed, Yseed);
cvNamedWindow("wndname", 1);
cvShowImage("original", Image1);
cvShowImage("wndname", Image2);
cvWaitKey(0);
return 0;
}
void GrowColor(int x, int y) {
//Check to see if point already part of region
s.val[0] = 0;
s.val[1] = 0;
s.val[2] = 0;
s.val[3] = 0;
if ((x < 1) && (y < 1))
s = cvGet2D(Image2, x, y);
if (s.val[0] == 0) {
int k;
if ((x == 1) && (y == 1))
s11 = cvGet2D(Image1, x, y);
mean[0] = total[0] / coont;
mean[1] = total[1] / coont;
mean[2] = total[2] / coont;
temp[0] = abs(s11.val[0]) - mean[0];
temp[1] = abs(s11.val[1]) - mean[1];
temp[2] = abs(s11.val[2]) - mean[2];
Diff =
(int) (sqrt(
(temp[0] * temp[0] + temp[1] * temp[1]
+ temp[2] * temp[2]) / 3));
if (Diff < ThreshHold) {
total[0] += abs(s11.val[0]);
total[1] += abs(s11.val[1]);
total[2] += abs(s11.val[2]);
coont++;
s.val[0] = 120;
if ((x > 0) && (y > 0))
cvSet2D(Image2, x, y, s);
if (x > 2)
GrowColor(x - 1, y);
if (y > 2)
GrowColor(x, y - 1);
if (x < xDim - 2)
GrowColor(x + 1, y);
if (y < yDim - 2)
GrowColor(x, y + 1);
}
}
}
Making GrowColor a recursive function may result in an infinite loop. Check the code in that function once.