I have to use glDrawPixels to implement a raster algorithm.
Right now I'm only trying to get a simple example of glDrawPixels working but having an issue.
GLint height, width, size = 0;
GLbyte *image = NULL;
int i,j=0;
width = 512;
height = 512;
size = width*height;
image = (GLbyte*)malloc(sizeof(GLbyte)*size*3);
for(i = 0; i < size*3; i=i+width*3){
for(j = i; j < width*3; j=j+3){
image[j] = 0xFF;
image[j+1] = 0x00;
image[j+2] = 0x00;
}
}
glDrawPixels(width, height, GL_RGB, GL_BYTE, image);
free(image);
gluSwapBuffers();
Above is the code that I'm trying to get to work, from my understanding it should simply draw a 512x512 red square.
However what I get is one red row at the bottom and everything else is grey.
Your second for() loop is broken -- you're starting at i, but only going up to width * 3, so it doesn't run at all when i > 0.
Here's a simpler approach:
GLbyte *p = image;
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
*p++ = 0xFF;
*p++ = 0x00;
*p++ = 0x00;
}
}
Your loop conditions look off to me. (After the first row, the condition on j will always be true, and the inner loop won't execute.) An easier way to do it would be to do something like this:
for (y = 0; y < height; y++)
{
// Go to the start of the next row
GLbyte* rowStart = image + (width * 3) * y;
GLbyte* row = rowStart;
for (x = 0; x < width; x++)
{
row [ x * 3 ] = 0xFF;
row [ (x * 3) + 1 ] = 0x00;
row [ (x * 3) + 2 ] = 0x00;
}
}
Related
For some context, I'm getting real-time image data from a camera in a form of 1D binary data and a specified format. I want to convert this format to RGBA or BGRA and use it to texture a screen-aligned quad. However, I seem to be misunderstanding some core concepts about how generating and loading texture in OpenGL works, since I can't get the following example to work correctly:
void OpenGLRenderer::renderScreenAlignedQuad(const XrCompositionLayerProjectionView& view)
{
CHECK_GL_ERROR(glBindVertexArray(m_screenAlignedQuad.vao));
CHECK_GL_ERROR(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_screenAlignedQuad.indexBuffer));
// Update texture
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
glBindTexture(GL_TEXTURE_2D, m_screenAlignedQuad.texture);
#define BUFF_HEIGHT 1152
#define BUFF_WIDTH 1152
unsigned char *buffer = new unsigned char[BUFF_HEIGHT * BUFF_WIDTH * 4];
for (int32_t y = 0; y < BUFF_HEIGHT; y++) {
for (int32_t x = 0; x < BUFF_WIDTH; x++) {
int32_t ind = y * BUFF_WIDTH + x * 4;
buffer[ind] = 255; // R
buffer[ind + 1] = 0; // G
buffer[ind + 2] = 0; // B
buffer[ind + 3] = 255; // A
}
}
{// =! Critical section !=
// The mutex will be unlocked when this object goes out of scope;
// Note that it blocks other threads from writing, but allows reading
std::shared_lock<std::shared_mutex> sl(m_videoStreamContext.g_currentFrameDataMutex);
CHECK_GL_ERROR(glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, BUFF_HEIGHT, BUFF_WIDTH, 0, GL_RGBA, GL_UNSIGNED_BYTE, buffer));
}// =! Critical section !=
CHECK_GL_ERROR(glDrawElements(GL_TRIANGLES, m_screenAlignedQuad.indexCount, GL_UNSIGNED_SHORT, 0));
}
What I wanted to achieve here is to texture to whole screen red. Instead, I get this:
The texture coordinates seem to be alright (I was able to texture a loaded image correctly before):
For some more debugging information I add some more colors:
void OpenGLRenderer::renderScreenAlignedQuad(const XrCompositionLayerProjectionView& view)
{
CHECK_GL_ERROR(glBindVertexArray(m_screenAlignedQuad.vao));
CHECK_GL_ERROR(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_screenAlignedQuad.indexBuffer));
// Update texture
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
glBindTexture(GL_TEXTURE_2D, m_screenAlignedQuad.texture);
unsigned char *buffer = new unsigned char[BUFF_HEIGHT * BUFF_WIDTH * 4];
for (int32_t y = 0; y < BUFF_HEIGHT; y++) {
for (int32_t x = 0; x < BUFF_WIDTH; x=x+4) {
int32_t ind = y * BUFF_WIDTH + x;
if (y < BUFF_HEIGHT / 2) {
buffer[ind] = 255; // R
buffer[ind + 1] = 0; // G
buffer[ind + 2] = 0; // B
buffer[ind + 3] = 255; // A
} else if (x < BUFF_WIDTH / 2) {
buffer[ind] = 0; // R
buffer[ind + 1] = 0; // G
buffer[ind + 2] = 255; // B
buffer[ind + 3] = 255; // A
} else {
buffer[ind] = 0; // R
buffer[ind + 1] = 255; // G
buffer[ind + 2] = 0; // B
buffer[ind + 3] = 255; // A
}
}
}
{// =! Critical section !=
// The mutex will be unlocked when this object goes out of scope;
// Note that it blocks other threads from writing, but allows reading
std::shared_lock<std::shared_mutex> sl(m_videoStreamContext.g_currentFrameDataMutex);
CHECK_GL_ERROR(glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, BUFF_HEIGHT, BUFF_WIDTH, 0, GL_RGBA, GL_UNSIGNED_BYTE, buffer));
}// =! Critical section !=
CHECK_GL_ERROR(glDrawElements(GL_TRIANGLES, m_screenAlignedQuad.indexCount, GL_UNSIGNED_SHORT, 0));
delete buffer;
}
The output looks like this:
So it looks like the texture is rendered too small in both directions. The wrap setting on the texture is set to clamp, so it should not repeat as it does. What am I doing wrong here?
Edit: Please, ignore any obvious inefficiencies or ugly code structure as long as it does not affect the correctness of the program. I'm trying to get the simplest possible version working for now.
Your 2d-to-1d index calculation is just broken:
int32_t ind = y * BUFF_WIDTH + x * 4;
That is supposed to be
int32_t ind = (y * BUFF_WIDTH + x) * 4;
You're making the same basic mistake also in your second approach, just obfuscated a bit more:
for (int32_t y = 0; y < BUFF_HEIGHT; y++) {
for (int32_t x = 0; x < BUFF_WIDTH; x=x+4) {
int32_t ind = y * BUFF_WIDTH + x;
x here is now what x * 4 was before (but your loop should then go to x <= 4*BUFF_WIDTH), and ind = y * 4 * BUFF_WIDTH + x would be correct.
I have the raw color data for four images, let's call them 1, 2, 3, and 4. I am storing the data in an unsigned char * with allocated memory. Individually I can manipulate or encode the images but when trying to concatenate or order them into a single image it works but takes more time than I would like.
I would like to create a 2 by 2 of the raw image data to encode as a single image.
1 2
3 4
For my example each image is 400 by 225 with RGBA (360000 bytes). Iim doing a for loop with memcpy where
for (int j = 0; j < 225; j++)
{
std::memcpy(dest + (j * (400 + 400) * 4), src + (j * 400 * 4), 400 * 4); //
}
for each image with an offset for the starting position added in (the example above would only work for the top left of course).
This works but I'm wondering if this is a solved problem with a better solution, either in an algorithm described somewhere or a small library.
#include <iostream>
const int width = 6;
const int height = 4;
constexpr int n = width * height;
int main()
{
unsigned char a[n], b[n], c[n], d[n];
unsigned char dst[n * 4];
int i = 0, j = 0;
/* init data */
for (; i < n; i++) {
a[i] = 'a';
b[i] = 'b';
c[i] = 'c';
d[i] = 'd';
}
/* re-order */
i = 0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++, i++, j++) {
dst[i ] = a[j];
dst[i + width] = b[j];
dst[i + n * 2 ] = c[j];
dst[i + n * 2 + width] = d[j];
}
i += width;
}
/* print result */
i = 0;
for (int y = 0; y < height * 2; y++) {
for (int x = 0; x < width * 2; x++, i++)
std::cout << dst[i];
std::cout << '\n';
}
return 0;
}
I've implemented this Function that applies Erosion Filter to an image
void applyErosionFilter(QImage &input, int matrixSize)
{
int filterOffset = (matrixSize - 1) / 2;
int byteOffset = 0;
uchar red, green, blue = 0;
uchar morphResetValue = 255;
uchar *data = input.bits();
int stride = input.bytesPerLine();
uchar *newdata = new uchar[stride * input.height()];
int i = 0;
for (int y = filterOffset; y < input.height() - filterOffset; y++)
{
for (int x = filterOffset; x < input.width() - filterOffset; x++)
{
byteOffset = y * stride + x * 4;
red = morphResetValue;
green = morphResetValue;
blue = morphResetValue;
for (int filterY = -filterOffset; filterY <= filterOffset; filterY++)
{
for (int filterX = -filterOffset; filterX <= filterOffset; filterX++)
{
i = byteOffset + (filterX * 4) + (filterY * stride);
if (data[i] < red)
red = data[i];
if (data[i + 1] < green)
green = data[i + 1];
if (data[i + 2] < blue)
blue = data[i + 2];
}
}
newdata[byteOffset] = red;
newdata[byteOffset + 1] = green;
newdata[byteOffset + 2] = blue;
newdata[byteOffset + 3] = 255;
}
}
input = input.fromImage(QImage(newdata, input.width(), input.height(), QImage::Format::Format_ARGB32));
delete [] newdata;
}
it works pretty well, but I've been wondering if there is another way to do this in a more efficient way, perhaps there is a way to perform the computations on the GPU using openGL or so.
As a quick improvement I would suggest using multiple threads to calculate several lines in parallel. You could also use OpenCL or Cuda to achieve this on the GPU, but that would require substantial boilerplate.
I have modified your code to use multiple threads, but I have not tested this, because I currently do not have Qt installed on this device. But this could at least give you a hint on where to start.
(BTW uchar red, green, blue = 0; only initializes blue with 0, while red and green stay uninitialized)
#include <thread>
#include <vector>
void applyErosionFilter(QImage &input, int matrixSize)
{
int filterOffset = (matrixSize - 1) / 2;
int byteOffset = 0;
uchar morphResetValue = 255;
uchar *data = input.bits();
int stride = input.bytesPerLine();
uchar *newdata = new uchar[stride * input.height()];
unsigned num_threads = std::thread::hardware_concurrency();
if (num_threads == 0)
num_threads = 1;
std::vector<std::thread> threads;
int i = 0;
for (unsigned i = 0; i < num_threads; ++i)
{
int start = (input.height() - 2 * filterOffset) * i / num_threads;
int end = (input.height() - 2 * filterOffset) * (i+1) / num_threads;
threads.emplace_back([&](int start_line, int end_line){
unsigned char red = 0;
unsigned char green = 0;
unsigned char blue = 0;
for (int y = start_line; y < end_line; y++)
{
for (int x = filterOffset; x < input.width() - filterOffset; x++)
{
byteOffset = y * stride + x * 4;
red = morphResetValue;
green = morphResetValue;
blue = morphResetValue;
for (int filterY = -filterOffset; filterY <= filterOffset; filterY++)
{
for (int filterX = -filterOffset; filterX <= filterOffset; filterX++)
{
i = byteOffset + (filterX * 4) + (filterY * stride);
if (data[i] < red)
red = data[i];
if (data[i + 1] < green)
green = data[i + 1];
if (data[i + 2] < blue)
blue = data[i + 2];
}
}
newdata[byteOffset] = red;
newdata[byteOffset + 1] = green;
newdata[byteOffset + 2] = blue;
newdata[byteOffset + 3] = 255;
}
}
},
start, end);
}
for (auto &thread : threads)
thread.join();
input = input.fromImage(QImage(newdata, input.width(), input.height(), QImage::Format::Format_ARGB32));
delete [] newdata;
}
Use multi-threading: cut the image horizontally in bands and process them separately. I did it with OpenMP, it's really strait forward.
A squared structuring element of size NxN can be decomposed in two segments (horizontal and vertical) of size 1xN and Nx1. So instead of doing NxN tests per pixel, you will do 2xN: N=3 9 vs 6, n=5 25 vs 10, etc. Much faster!
Use the algorithm(s) already implemented in the following libraries: SMIL by Matthieu Faessel (C++ auto-vectorized code based on lines comparison, fastest!!!), libmorpho by Marc Van Droogenbroeck (C++ but limited to 8 bits encoding if I am right), or Fulguro by Christophe Clienti (Lambert algorithm that is a good fit for SIMD optimizations). As you can see in these libraries, they use smart approaches/algorithms/architectures to get fast results. What you developed is the basic that is taught in courses for an easy understanding, but it's the slowest!
Thank you every body.
I've found what i was looking for, and i wanted to share it with you.
QOpenGLTexture *m_texImageInput;
QOpenGLShaderProgram *m_shaderComputeH;
void initiateShader()
{
if (m_texImageInput)
{
delete m_texImageInput;
m_texImageInput = nullptr;
}
QImage img(":/image.png");
m_texImageInput = new QOpenGLTexture(img.convertToFormat(QImage::Format_RGBA8888).mirrored());
if (m_shaderComputeH)
{
delete m_shaderComputeH;
m_shaderComputeH = nullptr;
}
m_shaderComputeH = new QOpenGLShaderProgram;
m_shaderComputeH->addShaderFromSourceFile(QOpenGLShader::Compute, ":/csErosionFilter.fsh");
m_shaderComputeH->link();
}
QSize getWorkGroups(int workGroupSize, const QSize &imageSize)
{
int x = imageSize.width();
x = (x % workGroupSize) ? (x / workGroupSize) + 1 : (x / workGroupSize);
int y = imageSize.height();
y = (y % workGroupSize) ? (y / workGroupSize) + 1 : (y / workGroupSize);
return QSize(x, y);
}
void executeFilter(int radius)
{
QOpenGLExtraFunctions *f = QOpenGLContext::currentContext()->extraFunctions();
// Process input image
QSize workGroups = getWorkGroups(32, QSize(m_texImageInput->width(), m_texImageInput->height()));
// Pass 2
f->glBindImageTexture(0, m_texImageInput->textureId(), 0, 0, 0, GL_READ_WRITE, GL_RGBA8);
f->glBindImageTexture(1, m_texImageProcessed->textureId(), 0, 0, 0, GL_READ_WRITE, GL_RGBA8);
m_shaderComputeH->bind();
m_shaderComputeH->setUniformValue("radius", radius);
f->glDispatchCompute(workGroups.width(), workGroups.height(), 1);
f->glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
m_shaderComputeH->release();
// Compute cleanup
f->glBindImageTexture(0, 0, 0, 0, 0, GL_READ_WRITE, GL_RGBA8);
f->glBindImageTexture(1, 0, 0, 0, 0, GL_READ_WRITE, GL_RGBA8);
}
and here's the compute shader
/// csErosionFilter.fsh
#version 430 core
#define COMPUTEPATCHSIZE 32
layout (local_size_x = COMPUTEPATCHSIZE, local_size_y = COMPUTEPATCHSIZE) in;
layout(binding=0, rgba8) uniform readonly highp image2D inputImage;
layout(binding=1, rgba8) uniform writeonly highp image2D resultImage;
uniform int radius;
void main()
{
ivec2 imgSize = imageSize(resultImage);
int x = int(gl_GlobalInvocationID.x);
int y = int(gl_GlobalInvocationID.y);
if ((x >= imgSize.x) || (y >= imgSize.y))
return;
vec4 newValue = vec4(1);
int left = clamp(x - radius, 0, imgSize.x - 1);
int right = clamp(x + radius, 0, imgSize.x - 1);
int top = clamp(y - radius, 0, imgSize.y - 1);
int bottom = clamp(y + radius, 0, imgSize.y - 1);
for (int iX = left; iX <= right; iX++)
{
for (int iY = top; iY <= bottom; iY++)
{
vec4 value = imageLoad(inputImage, ivec2(iX, iY));
if(value.x < newValue.x)
newValue.x = value.x;
if(value.y < newValue.y)
newValue.y = value.y;
if(value.z < newValue.z)
newValue.z = value.z;
}
}
imageStore(resultImage, ivec2(x,y), newValue);
}
I am running for displaying RGB image from raw in C++ without any library. When I input the square image (e.g: 512x512), my program can display the image perfectly, but it does not in not_square size image (e.g: 350x225). I understand that I need padding for this case, then I tried to find the same case but it didn't make sense for me how people can pad their image.
If anyone can show me how to pad, I would be thanks for this. And below is what I have done for RGB from Raw.
void CImage_MyClass::Class_MakeRGB(void)
{
m_BMPheader.biHeight = m_uiHeight;
m_BMPheader.biWidth = m_uiWidth;
m_pcBMP = new UCHAR[m_uiHeight * m_uiWidth * 3];
//RGB Image
{
int ind = 0;
for (UINT y = 0; y < m_uiHeight; y++)
{
for (UINT x = 0; x < m_uiHeight*3; x+=3)
{
m_pcBMP[ind++] = m_pcIBuff[m_uiHeight - y -1][x+2];
m_pcBMP[ind++] = m_pcIBuff[m_uiHeight - y -1][x+1];
m_pcBMP[ind++] = m_pcIBuff[m_uiHeight - y -1][x];
}
}
}
}
You need to pad the number of bytes in each line out to a multiple of 4.
void CImage_MyClass::Class_MakeRGB(void)
{
m_BMPheader.biHeight = m_uiHeight;
m_BMPheader.biWidth = m_uiWidth;
//Pad buffer width to next highest multiple of 4
const int bmStride = m_uiWidth * 3 + 3 & ~3;
m_pcBMP = new UCHAR[m_uiHeight * bmStride];
//Clear buffer so the padding bytes are 0
memset(m_pcBMP, 0, m_uiHeight * bmStride);
//RGB Image
{
for(UINT y = 0; y < m_uiHeight; y++)
{
for(UINT x = 0; x < m_uiWidth * 3; x += 3)
{
const int bmpPos = y * bmWidth + x;
m_pcBMP[bmpPos + 0] = m_pcIBuff[m_uiHeight - y - 1][x + 2];
m_pcBMP[bmpPos + 1] = m_pcIBuff[m_uiHeight - y - 1][x + 1];
m_pcBMP[bmpPos + 2] = m_pcIBuff[m_uiHeight - y - 1][x];
}
}
}
}
I also changed the inner for loop to use m_uiWidth instead of m_uiHeight.
#Retired Ninja, Thanks anyway for your answer... you showed me a simple way for this...
But by the way, I have fixed mine as well with different way.. here is it:
void CImage_MyClass::Class_MakeRGB(void)
{
m_BMPheader.biHeight = m_uiHeight;
m_BMPheader.biWidth = m_uiWidth;
int padding = 0;
int scanline = m_uiWidth * 3;
while ( ( scanline + padding ) % 4 != 0 )
{
padding++;
}
int psw = scanline + padding;
m_pcBMP = new UCHAR[m_uiHeight * m_uiWidth * 3 + m_uiHeight * padding];
//RGB Image
int ind = 0;
for (UINT y = 0; y < m_uiHeight; y++)
{
for (UINT x = 0; x < m_uiHeight*3; x+=3)
{
m_pcBMP[ind++] = m_pcIBuff[m_uiHeight - y -1][x+2];
m_pcBMP[ind++] = m_pcIBuff[m_uiHeight - y -1][x+1];
m_pcBMP[ind++] = m_pcIBuff[m_uiHeight - y -1][x];
}
for(int i = 0; i < padding; i++)
ind++;
}
}
So I have an image that I want to overlay with a checkerboard pattern.
This is what I have come up with so far:
for ( uint_8 nRow = 0; nRow < image.width(); ++nRow)
for (uint_8 nCol = 0; nCol < image.height(); ++nCol)
if(((nRow/20 + nCol/20) % 2) == 0)
memset(&image.data[nCol + nRow], 0, 1);
Produces a white image unfortunately. I dont think this is very performant because memset is called for every single pixel in the image instead of multiple.
Why does this code not produce a chckerboard pattern? How would you improve it?
For better performance, don't treat the image as a 2-dimensional entity. Instead, look at it as a 1D array of continuous data, where all lines of the image are arranged one after the other.
With this approach, you can write the pattern in one go with a single loop, where in every iteration you memset() multiple adjacent pixels and increase the index by twice the amount of pixels you set:
int data_size = image.width() * image.height();
for (auto it = image.data; it < image.data + data_size; it += 20) {
memset(it, 0, 20);
if (((it - data) + 40) % (20 * 400) == 0) {
it += 40;
} else if (((it - data) + 20) % (20 * 400) != 0) {
it += 20;
}
}
(Replace auto with the type of image.data if you're not using C++11; I suspect it's unsigned char*.)
This is quite friendly for the CPU cache prefetch. It's also friendly for the compiler, which can potentially vectorize and/or perform loop unrolling.
If you have an image's dimensions which are multiple of the checker square size :
(I coded in C but it is fairly easy to transpose to C++)
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define uint unsigned int
#define WIDTH 40
#define HEIGHT 40
#define BLOCK_SIZE 5
void create_checker_row(uint* row, uint size_block, uint nb_col, uint offset )
{
uint ic;
for (ic = size_block*offset ; ic < nb_col; ic+= 2*size_block )
{
memset( (row + ic) , 0, size_block*sizeof(uint) );
}
}
int main()
{
uint ir,ic;
// image creation
uint* pixels = (uint*) malloc(WIDTH*HEIGHT*sizeof(uint));
for (ir = 0; ir < WIDTH; ir++)
{
for ( ic = 0; ic < HEIGHT; ic++)
{
// arbitrary numbers
pixels[ir*WIDTH + ic] = (ir*WIDTH + ic) % 57 ;
printf("%d,", pixels[ir*WIDTH + ic] );
}
printf("\n");
}
for (ir = 0; ir < WIDTH; ir++)
{
create_checker_row( pixels + ir*WIDTH , // pointer at the beggining of n-th row
BLOCK_SIZE , // horizontal length for square
WIDTH , // image width
(ir/BLOCK_SIZE) % 2 // offset to create the checker pattern
);
}
// validation
printf("\n");
printf("Validation \n");
printf("\n");
for (ir = 0; ir < WIDTH; ir++)
{
for ( ic = 0; ic < HEIGHT; ic++)
{
printf("%d,", pixels[ir*WIDTH + ic] );
}
printf("\n");
}
return 0;
}
Seems pretty checkered for me : http://ideone.com/gp9so6
I use this and stb_image_write.h
#include <stdlib.h>
#include <stb_image_write.h>
int main(int argc, char *argv[])
{
const int w = 256, h = 256, ch = 4, segments = 8, box_sz = w / segments;
unsigned char rgba_fg[4] = {255, 255, 0, 255}; //yellow
unsigned char rgba_bg[4] = {255, 0, 0, 255}; //red
unsigned char* data = calloc(w * h * ch, sizeof(unsigned char));
int swap = 0;
int fill = 0; /* set to 1 to fill fg first*/
unsigned char* col = NULL;
for(int i = 0; i < w * h; i++)
{
if(i % (w * box_sz) == 0 && i != 0)
swap = !swap;
if(i % box_sz == 0 && i != 0)
fill = !fill;
if(fill)
{
if(swap)
col = rgba_bg;
else
col = rgba_fg;
}else
{
if(swap)
col = rgba_fg;
else
col = rgba_bg;
}
for(int j = 0; j < ch; j++)
{
data[i*ch + j] = col[j];
}
}
stbi_write_png("checker.png", w, h, ch, data, 0);
free(data);
return 0;
}
Its a bit slow with large images but gets the job done if you cache them