Related
I'm hoping to create a simple computer vision library in C++/CUDA C++ that allows me to do the following:
Grab some RGB data from the host memory. This data will come in a BGR byte array, 8 bits per channel per pixel.
Process that data in a CUDA kernel.
Write the output of that kernel back into some host memory.
Render the output in an OpenGL texture for easy viewing.
These functions would go inside a class like so:
class Processor{
public:
setInput(const byte* data, int imageWidth, int imageHeight);
void processData();
GLuint getInputTexture();
GLuint getOutputTexture();
void writeOutputTo(byte* destination);
}
setInput() is going to be called with every frame of a video (hundreds or thousands of images of the same dimensions).
How can I write the Processor class so that setInput() can efficiently update an instance's internal CUDA array and processData() can synchronize the CUDA array with the OpenGL texture?
Below is my attempt at implementing such a class, contained in one CUDA C++ file along with a simple test. (Requires GLFW and GLAD.) With this implementation, I can provide some input image data, run a CUDA kernel that produces an output image, and visualize both with OpenGL textures. But it's extremely inefficient because every time setInput() is called, two OpenGL textures and two CUDA surface objects need to be created. And if more than one image is processed, two OpenGL textures and two CUDA surface objects also have to be destroyed.
#include <glad/glad.h>
#include <GLFW/glfw3.h>
#include <cudaGL.h>
#include <cuda_gl_interop.h>
#include <iostream>
/** Macro for checking if CUDA has problems */
#define cudaCheckError() { \
cudaError_t err = cudaGetLastError(); \
if(err != cudaSuccess) { \
printf("Cuda error: %s:%d: %s\n", __FILE__, __LINE__, cudaGetErrorString(err)); \
exit(1); \
} \
}
/*Window dimensions*/
const int windowWidth = 1280, windowHeight = 720;
/*Window address*/
GLFWwindow* currentGLFWWindow = 0;
/**
* A simple image processing kernel that copies the inverted data from the input surface to the output surface.
*/
__global__ void kernel(cudaSurfaceObject_t input, cudaSurfaceObject_t output, int width, int height) {
//Get the pixel index
unsigned int xPx = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int yPx = threadIdx.y + blockIdx.y * blockDim.y;
//Don't do any computation if this thread is outside of the surface bounds.
if (xPx >= width || yPx >= height) return;
//Copy the contents of input to output.
uchar4 pixel = { 255,128,0,255 };
//Read a pixel from the input. Disable to default to the flat orange color above
surf2Dread<uchar4>(&pixel, input, xPx * sizeof(uchar4), yPx, cudaBoundaryModeClamp);
//Invert the color
pixel.x = ~pixel.x;
pixel.y = ~pixel.y;
pixel.z = ~pixel.z;
//Write the new pixel color to the
surf2Dwrite(pixel, output, xPx * sizeof(uchar4), yPx);
}
class Processor {
public:
void setInput( uint8_t* const data, int imageWidth, int imageHeight);
void processData();
GLuint getInputTexture();
GLuint getOutputTexture();
void writeOutputTo(uint8_t* destination);
private:
/**
* #brief True if the textures and surfaces are initialized.
*
* Prevents memory leaks
*/
bool surfacesInitialized = false;
/**
* #brief The width and height of a texture/surface pair.
*
*/
struct ImgDim { int width, height; };
/**
* #brief Creates a CUDA surface object, CUDA resource, and OpenGL texture from some data.
*/
void createTextureSurfacePair(const ImgDim& dimensions, uint8_t* const data, GLuint& textureOut, cudaGraphicsResource_t& graphicsResourceOut, cudaSurfaceObject_t& surfaceOut);
/**
* #brief Destroys every CUDA surface object, CUDA resource, and OpenGL texture created by this instance.
*/
void destroyEverything();
/**
* #brief The dimensions of an image and its corresponding texture.
*
*/
ImgDim imageInputDimensions, imageOutputDimensions;
/**
* #brief A CUDA surface that can be read to, written from, or synchronized with a Mat or
* OpenGL texture
*
*/
cudaSurfaceObject_t d_imageInputTexture = 0, d_imageOutputTexture = 0;
/**
* #brief A CUDA resource that's bound to an array in CUDA memory
*/
cudaGraphicsResource_t d_imageInputGraphicsResource, d_imageOutputGraphicsResource;
/**
* #brief A renderable OpenGL texture that is synchronized with the CUDA data
* #see d_imageInputTexture, d_imageOutputTexture
*/
GLuint imageInputTexture = 0, imageOutputTexture = 0;
/** Returns true if nothing can be rendered */
bool empty() { return imageInputTexture == 0; }
};
void Processor::setInput(uint8_t* const data, int imageWidth, int imageHeight)
{
//Same-size images don't need texture regeneration, so skip that.
if (imageHeight == imageInputDimensions.height && imageWidth == imageInputDimensions.width) {
/*
Possible shortcut: we know the input is the same size as the texture and CUDA surface object.
So instead of destroying the surface and texture, why not just overwrite them?
That's what I try to do in the following block, but because "data" is BGR and the texture
is RGBA, the channels get all messed up.
*/
/*
//Use the input surface's CUDAResourceDesc to gain access to the surface data array
struct cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
cudaGetSurfaceObjectResourceDesc(&resDesc, d_imageInputTexture);
cudaCheckError();
//Copy the data from the input array to the surface
cudaMemcpyToArray(resDesc.res.array.array, 0, 0, input.data, imageInputDimensions.width * imageInputDimensions.height * 3, cudaMemcpyHostToDevice);
cudaCheckError();
//Set status flags
surfacesInitialized = true;
return;
*/
}
//Clear everything that originally existed in the texture/surface
destroyEverything();
//Get the size of the image and place it here.
imageInputDimensions.width = imageWidth;
imageInputDimensions.height = imageHeight;
imageOutputDimensions.width = imageWidth;
imageOutputDimensions.height = imageHeight;
//Create the input surface/texture pair
createTextureSurfacePair(imageInputDimensions, data, imageInputTexture, d_imageInputGraphicsResource, d_imageInputTexture);
//Create the output surface/texture pair
uint8_t* outData = new uint8_t[imageOutputDimensions.width * imageOutputDimensions.height * 3];
createTextureSurfacePair(imageOutputDimensions, outData, imageOutputTexture, d_imageOutputGraphicsResource, d_imageOutputTexture);
delete outData;
//Set status flags
surfacesInitialized = true;
}
void Processor::processData()
{
const int threadsPerBlock = 128;
//Call the algorithm
//Set the number of blocks to call the kernel with.
dim3 blocks((unsigned int)ceil((float)imageInputDimensions.width / threadsPerBlock), imageInputDimensions.height);
kernel <<<blocks, threadsPerBlock >>> (d_imageInputTexture, d_imageOutputTexture, imageInputDimensions.width, imageInputDimensions.height);
//Sync the surface with the texture
cudaDeviceSynchronize();
cudaCheckError();
}
GLuint Processor::getInputTexture()
{
return imageInputTexture;
}
GLuint Processor::getOutputTexture()
{
return imageOutputTexture;
}
void Processor::writeOutputTo(uint8_t* destination)
{
//Haven't figured this out yet
}
void Processor::createTextureSurfacePair(const Processor::ImgDim& dimensions, uint8_t* const data, GLuint& textureOut, cudaGraphicsResource_t& graphicsResourceOut, cudaSurfaceObject_t& surfaceOut) {
// Create the OpenGL texture that will be displayed with GLAD and GLFW
glGenTextures(1, &textureOut);
// Bind to our texture handle
glBindTexture(GL_TEXTURE_2D, textureOut);
// Set texture interpolation methods for minification and magnification
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
// Set texture clamping method
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
// Create the texture and its attributes
glTexImage2D(GL_TEXTURE_2D, // Type of texture
0, // Pyramid level (for mip-mapping) - 0 is the top level
GL_RGBA, // Internal color format to convert to
dimensions.width, // Image width i.e. 640 for Kinect in standard mode
dimensions.height, // Image height i.e. 480 for Kinect in standard mode
0, // Border width in pixels (can either be 1 or 0)
GL_BGR, // Input image format (i.e. GL_RGB, GL_RGBA, GL_BGR etc.)
GL_UNSIGNED_BYTE, // Image data type.
data); // The actual image data itself
//Note that the type of this texture is an RGBA UNSIGNED_BYTE type. When CUDA surfaces
//are synchronized with OpenGL textures, the surfaces will be of the same type.
//They won't know or care about their data types though, for they are all just byte arrays
//at heart. So be careful to ensure that any CUDA kernel that handles a CUDA surface
//uses it as an appropriate type. You will see that the update_surface kernel (defined
//above) treats each pixel as four unsigned bytes along the X-axis: one for red, green, blue,
//and alpha respectively.
//Create the CUDA array and texture reference
cudaArray* bitmap_d;
//Register the GL texture with the CUDA graphics library. A new cudaGraphicsResource is created, and its address is placed in cudaTextureID.
//Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__OPENGL.html#group__CUDART__OPENGL_1g80d12187ae7590807c7676697d9fe03d
cudaGraphicsGLRegisterImage(&graphicsResourceOut, textureOut, GL_TEXTURE_2D,
cudaGraphicsRegisterFlagsNone);
cudaCheckError();
//Map graphics resources for access by CUDA.
//Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__INTEROP.html#group__CUDART__INTEROP_1gad8fbe74d02adefb8e7efb4971ee6322
cudaGraphicsMapResources(1, &graphicsResourceOut, 0);
cudaCheckError();
//Get the location of the array of pixels that was mapped by the previous function and place that address in bitmap_d
//Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__INTEROP.html#group__CUDART__INTEROP_1g0dd6b5f024dfdcff5c28a08ef9958031
cudaGraphicsSubResourceGetMappedArray(&bitmap_d, graphicsResourceOut, 0, 0);
cudaCheckError();
//Create a CUDA resource descriptor. This is used to get and set attributes of CUDA resources.
//This one will tell CUDA how we want the bitmap_surface to be configured.
//Documentation for the struct: https://docs.nvidia.com/cuda/cuda-runtime-api/structcudaResourceDesc.html#structcudaResourceDesc
struct cudaResourceDesc resDesc;
//Clear it with 0s so that some flags aren't arbitrarily left at 1s
memset(&resDesc, 0, sizeof(resDesc));
//Set the resource type to be an array for convenient processing in the CUDA kernel.
//List of resTypes: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g067b774c0e639817a00a972c8e2c203c
resDesc.resType = cudaResourceTypeArray;
//Bind the new descriptor with the bitmap created earlier.
resDesc.res.array.array = bitmap_d;
//Create a new CUDA surface ID reference.
//This is really just an unsigned long long.
//Docuentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1gbe57cf2ccbe7f9d696f18808dd634c0a
surfaceOut = 0;
//Create the surface with the given description. That surface ID is placed in bitmap_surface.
//Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__SURFACE__OBJECT.html#group__CUDART__SURFACE__OBJECT_1g958899474ab2c5f40d233b524d6c5a01
cudaCreateSurfaceObject(&surfaceOut, &resDesc);
cudaCheckError();
}
void Processor::destroyEverything()
{
if (surfacesInitialized) {
//Input image CUDA surface
cudaDestroySurfaceObject(d_imageInputTexture);
cudaGraphicsUnmapResources(1, &d_imageInputGraphicsResource);
cudaGraphicsUnregisterResource(d_imageInputGraphicsResource);
d_imageInputTexture = 0;
//Output image CUDA surface
cudaDestroySurfaceObject(d_imageOutputTexture);
cudaGraphicsUnmapResources(1, &d_imageOutputGraphicsResource);
cudaGraphicsUnregisterResource(d_imageOutputGraphicsResource);
d_imageOutputTexture = 0;
//Input image GL texture
glDeleteTextures(1, &imageInputTexture);
imageInputTexture = 0;
//Output image GL texture
glDeleteTextures(1, &imageOutputTexture);
imageOutputTexture = 0;
surfacesInitialized = false;
}
}
/** A way to initialize OpenGL with GLFW and GLAD */
void initGL() {
// Setup window
if (!glfwInit())
return;
// Decide GL+GLSL versions
#if __APPLE__
// GL 3.2 + GLSL 150
const char* glsl_version = "#version 150";
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); // 3.2+ only
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // Required on Mac
#else
// GL 3.0 + GLSL 130
const char* glsl_version = "#version 130";
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0);
//glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); // 3.2+ only
//glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // 3.0+ only
#endif
// Create window with graphics context
currentGLFWWindow = glfwCreateWindow(windowWidth, windowHeight, "Output image (OpenGL + GLFW)", NULL, NULL);
if (currentGLFWWindow == NULL)
return;
glfwMakeContextCurrent(currentGLFWWindow);
glfwSwapInterval(3); // Enable vsync
if (!gladLoadGL()) {
// GLAD failed
printf( "GLAD failed to initialize :(" );
return;
}
//Change GL settings
glViewport(0, 0, windowWidth, windowHeight); // use a screen size of WIDTH x HEIGHT
glMatrixMode(GL_PROJECTION); // Make a simple 2D projection on the entire window
glLoadIdentity();
glOrtho(0.0, windowWidth, windowHeight, 0.0, 0.0, 100.0);
glMatrixMode(GL_MODELVIEW); // Set the matrix mode to object modeling
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
glClearDepth(0.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear the window
}
/** Renders the textures on the GLFW window and requests GLFW to update */
void showTextures(GLuint top, GLuint bottom) {
// Clear color and depth buffers
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glMatrixMode(GL_MODELVIEW); // Operate on model-view matrix
glBindTexture(GL_TEXTURE_2D, top);
/* Draw top quad */
glEnable(GL_TEXTURE_2D);
glBegin(GL_QUADS);
glTexCoord2i(0, 0); glVertex2i(0, 0);
glTexCoord2i(0, 1); glVertex2i(0, windowHeight/2);
glTexCoord2i(1, 1); glVertex2i(windowWidth, windowHeight / 2);
glTexCoord2i(1, 0); glVertex2i(windowWidth, 0);
glEnd();
glDisable(GL_TEXTURE_2D);
/* Draw top quad */
glBindTexture(GL_TEXTURE_2D, bottom);
glEnable(GL_TEXTURE_2D);
glBegin(GL_QUADS);
glTexCoord2i(0, 0); glVertex2i(0, windowHeight / 2);
glTexCoord2i(0, 1); glVertex2i(0, windowHeight);
glTexCoord2i(1, 1); glVertex2i(windowWidth, windowHeight);
glTexCoord2i(1, 0); glVertex2i(windowWidth, windowHeight / 2);
glEnd();
glDisable(GL_TEXTURE_2D);
glfwSwapBuffers(currentGLFWWindow);
glfwPollEvents();
}
int main() {
initGL();
int imageWidth = windowWidth;
int imageHeight = windowHeight / 2;
uint8_t* imageData = new uint8_t[imageWidth * imageHeight * 3];
Processor p;
while (!glfwWindowShouldClose(currentGLFWWindow))
{
//Process the image here
p.setInput(imageData, imageWidth, imageHeight);
p.processData();
showTextures(p.getInputTexture(), p.getOutputTexture());
}
}
TL;DR: I can see at least 2 ways forward here, either convert your data to 4 byte pixels (somehow) and use cudaMemcpy2DToArray, or allow the CUDA kernel to take in raw data (instead of using a surface as input). I'll try to demonstrate both, although I don't wish to put in a large effort at polishing this, so really just demonstrating ideas.
This answer is working off the code you provided in an edit which is not your latest. However in the subsequent edits, mainly you seem to be just ripping out OpenCV, which I would normally applaud. However, since I've worked off your edit that had OpenCV in it, I've elected to use an OpenCV "test case" of my own.
Using 4 byte-per-pixel data, and cudaMemcpy2DToArray: This seems to adhere most closely to what you have demonstrated, albeit commented-out. The idea is we will access the input data by copying it to the CUDA array (acquired from the interop mechanism) directly. As you had previously pointed out, cudaMemcpyToArray is deprecated, so we won't use that. Furthermore, our data format (bytes per pixel) has to match what is in the array. I think there are a number of ways to solve this, depending on your overall pipeline, but the approach I show here isn't efficient, it's just to demonstrate that the method is "workable". If there is a way to use 4 byte per pixel data in your pipeline, however, you may be able to get rid of the "inefficiency" here. To use this method, compile the code with the -DUSE_1 switch.
Input of the data through the kernel. We can skip the inefficiency of the first case by just allowing the kernel to do the 3-byte to 4-byte conversion of data on the fly. Either way, there is a copy of data from host to device, but this method doesn't require 4 byte per pixel input data.
Here is code demonstrating both options:
//nvcc -arch=sm_35 -o t19 glad/src/glad.c t19.cu -lGL -lGLU -I./glad/include -lglfw -std=c++11 -lopencv_core -lopencv_highgui -lopencv_imgcodecs -Wno-deprecated-gpu-targets
#include <glad/glad.h>
#include <GLFW/glfw3.h>
#include <cudaGL.h>
#include <cuda_gl_interop.h>
#include <iostream>
#include <opencv2/highgui.hpp>
/** Macro for checking if CUDA has problems */
#define cudaCheckError() { \
cudaError_t err = cudaGetLastError(); \
if(err != cudaSuccess) { \
printf("Cuda error: %s:%d: %s\n", __FILE__, __LINE__, cudaGetErrorString(err)); \
exit(1); \
} \
}
/*Window dimensions*/
//const int windowWidth = 1280, windowHeight = 720;
/*Window address*/
GLFWwindow* currentGLFWWindow = 0;
/**
* A simple image processing kernel that copies the inverted data from the input surface to the output surface.
*/
__global__ void kernel(cudaSurfaceObject_t input, cudaSurfaceObject_t output, int width, int height, uint8_t *data) {
//Get the pixel index
unsigned int xPx = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int yPx = threadIdx.y + blockIdx.y * blockDim.y;
//Don't do any computation if this thread is outside of the surface bounds.
if (xPx >= width || yPx >= height) return;
//Copy the contents of input to output.
#ifdef USE_1
uchar4 pixel = { 255,128,0,255 };
//Read a pixel from the input. Disable to default to the flat orange color above
surf2Dread<uchar4>(&pixel, input, xPx * sizeof(uchar4), yPx, cudaBoundaryModeClamp);
#else
uchar4 pixel;
pixel.x = data[(xPx+yPx*width)*3 + 0];
pixel.y = data[(xPx+yPx*width)*3 + 1];
pixel.z = data[(xPx+yPx*width)*3 + 2];
pixel.w = 255;
surf2Dwrite(pixel, input, xPx * sizeof(uchar4), yPx);
#endif
//Invert the color
pixel.x = ~pixel.x;
pixel.y = ~pixel.y;
pixel.z = ~pixel.z;
//Write the new pixel color to the
surf2Dwrite(pixel, output, xPx * sizeof(uchar4), yPx);
}
class Processor {
public:
void setInput( uint8_t* const data, int imageWidth, int imageHeight);
void processData(uint8_t *data, uint8_t *d_data);
GLuint getInputTexture();
GLuint getOutputTexture();
void writeOutputTo(uint8_t* destination);
private:
/**
* #brief True if the textures and surfaces are initialized.
*
* Prevents memory leaks
*/
bool surfacesInitialized = false;
/**
* #brief The width and height of a texture/surface pair.
*
*/
struct ImgDim { int width, height; };
/**
* #brief Creates a CUDA surface object, CUDA resource, and OpenGL texture from some data.
*/
void createTextureSurfacePair(const ImgDim& dimensions, uint8_t* const data, GLuint& textureOut, cudaGraphicsResource_t& graphicsResourceOut, cudaSurfaceObject_t& surfaceOut);
/**
* #brief Destroys every CUDA surface object, CUDA resource, and OpenGL texture created by this instance.
*/
void destroyEverything();
/**
* #brief The dimensions of an image and its corresponding texture.
*
*/
ImgDim imageInputDimensions, imageOutputDimensions;
/**
* #brief A CUDA surface that can be read to, written from, or synchronized with a Mat or
* OpenGL texture
*
*/
cudaSurfaceObject_t d_imageInputTexture = 0, d_imageOutputTexture = 0;
/**
* #brief A CUDA resource that's bound to an array in CUDA memory
*/
cudaGraphicsResource_t d_imageInputGraphicsResource, d_imageOutputGraphicsResource;
/**
* #brief A renderable OpenGL texture that is synchronized with the CUDA data
* #see d_imageInputTexture, d_imageOutputTexture
*/
GLuint imageInputTexture = 0, imageOutputTexture = 0;
/** Returns true if nothing can be rendered */
bool empty() { return imageInputTexture == 0; }
};
void Processor::setInput(uint8_t* const data, int imageWidth, int imageHeight)
{
//Same-size images don't need texture regeneration, so skip that.
if (imageHeight == imageInputDimensions.height && imageWidth == imageInputDimensions.width) {
/*
Possible shortcut: we know the input is the same size as the texture and CUDA surface object.
So instead of destroying the surface and texture, why not just overwrite them?
That's what I try to do in the following block, but because "data" is BGR and the texture
is RGBA, the channels get all messed up.
*/
//Use the input surface's CUDAResourceDesc to gain access to the surface data array
#ifdef USE_1
struct cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
cudaGetSurfaceObjectResourceDesc(&resDesc, d_imageInputTexture);
cudaCheckError();
uint8_t *data4 = new uint8_t[imageInputDimensions.width*imageInputDimensions.height*4];
for (int i = 0; i < imageInputDimensions.width*imageInputDimensions.height; i++){
data4[i*4+0] = data[i*3+0];
data4[i*4+1] = data[i*3+1];
data4[i*4+2] = data[i*3+2];
data4[i*4+3] = 255;}
//Copy the data from the input array to the surface
// cudaMemcpyToArray(resDesc.res.array.array, 0, 0, data, imageInputDimensions.width * imageInputDimensions.height * 3, cudaMemcpyHostToDevice);
cudaMemcpy2DToArray(resDesc.res.array.array, 0, 0, data4, imageInputDimensions.width*4, imageInputDimensions.width*4, imageInputDimensions.height, cudaMemcpyHostToDevice);
cudaCheckError();
delete[] data4;
#endif
//Set status flags
surfacesInitialized = true;
return;
}
//Clear everything that originally existed in the texture/surface
destroyEverything();
//Get the size of the image and place it here.
imageInputDimensions.width = imageWidth;
imageInputDimensions.height = imageHeight;
imageOutputDimensions.width = imageWidth;
imageOutputDimensions.height = imageHeight;
//Create the input surface/texture pair
createTextureSurfacePair(imageInputDimensions, data, imageInputTexture, d_imageInputGraphicsResource, d_imageInputTexture);
//Create the output surface/texture pair
uint8_t* outData = new uint8_t[imageOutputDimensions.width * imageOutputDimensions.height * 3];
createTextureSurfacePair(imageOutputDimensions, outData, imageOutputTexture, d_imageOutputGraphicsResource, d_imageOutputTexture);
delete outData;
//Set status flags
surfacesInitialized = true;
}
void Processor::processData(uint8_t *data, uint8_t *d_data)
{
const int threadsPerBlock = 128;
//Call the algorithm
//Set the number of blocks to call the kernel with.
dim3 blocks((unsigned int)ceil((float)imageInputDimensions.width / threadsPerBlock), imageInputDimensions.height);
#ifndef USE_1
cudaMemcpy(d_data, data, imageInputDimensions.width*imageInputDimensions.height*3, cudaMemcpyHostToDevice);
#endif
kernel <<<blocks, threadsPerBlock >>> (d_imageInputTexture, d_imageOutputTexture, imageInputDimensions.width, imageInputDimensions.height, d_data);
//Sync the surface with the texture
cudaDeviceSynchronize();
cudaCheckError();
}
GLuint Processor::getInputTexture()
{
return imageInputTexture;
}
GLuint Processor::getOutputTexture()
{
return imageOutputTexture;
}
void Processor::writeOutputTo(uint8_t* destination)
{
//Haven't figured this out yet
}
void Processor::createTextureSurfacePair(const Processor::ImgDim& dimensions, uint8_t* const data, GLuint& textureOut, cudaGraphicsResource_t& graphicsResourceOut, cudaSurfaceObject_t& surfaceOut) {
// Create the OpenGL texture that will be displayed with GLAD and GLFW
glGenTextures(1, &textureOut);
// Bind to our texture handle
glBindTexture(GL_TEXTURE_2D, textureOut);
// Set texture interpolation methods for minification and magnification
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
// Set texture clamping method
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
// Create the texture and its attributes
glTexImage2D(GL_TEXTURE_2D, // Type of texture
0, // Pyramid level (for mip-mapping) - 0 is the top level
GL_RGBA, // Internal color format to convert to
dimensions.width, // Image width i.e. 640 for Kinect in standard mode
dimensions.height, // Image height i.e. 480 for Kinect in standard mode
0, // Border width in pixels (can either be 1 or 0)
GL_BGR, // Input image format (i.e. GL_RGB, GL_RGBA, GL_BGR etc.)
GL_UNSIGNED_BYTE, // Image data type.
data); // The actual image data itself
//Note that the type of this texture is an RGBA UNSIGNED_BYTE type. When CUDA surfaces
//are synchronized with OpenGL textures, the surfaces will be of the same type.
//They won't know or care about their data types though, for they are all just byte arrays
//at heart. So be careful to ensure that any CUDA kernel that handles a CUDA surface
//uses it as an appropriate type. You will see that the update_surface kernel (defined
//above) treats each pixel as four unsigned bytes along the X-axis: one for red, green, blue,
//and alpha respectively.
//Create the CUDA array and texture reference
cudaArray* bitmap_d;
//Register the GL texture with the CUDA graphics library. A new cudaGraphicsResource is created, and its address is placed in cudaTextureID.
//Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__OPENGL.html#group__CUDART__OPENGL_1g80d12187ae7590807c7676697d9fe03d
cudaGraphicsGLRegisterImage(&graphicsResourceOut, textureOut, GL_TEXTURE_2D,
cudaGraphicsRegisterFlagsNone);
cudaCheckError();
//Map graphics resources for access by CUDA.
//Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__INTEROP.html#group__CUDART__INTEROP_1gad8fbe74d02adefb8e7efb4971ee6322
cudaGraphicsMapResources(1, &graphicsResourceOut, 0);
cudaCheckError();
//Get the location of the array of pixels that was mapped by the previous function and place that address in bitmap_d
//Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__INTEROP.html#group__CUDART__INTEROP_1g0dd6b5f024dfdcff5c28a08ef9958031
cudaGraphicsSubResourceGetMappedArray(&bitmap_d, graphicsResourceOut, 0, 0);
cudaCheckError();
//Create a CUDA resource descriptor. This is used to get and set attributes of CUDA resources.
//This one will tell CUDA how we want the bitmap_surface to be configured.
//Documentation for the struct: https://docs.nvidia.com/cuda/cuda-runtime-api/structcudaResourceDesc.html#structcudaResourceDesc
struct cudaResourceDesc resDesc;
//Clear it with 0s so that some flags aren't arbitrarily left at 1s
memset(&resDesc, 0, sizeof(resDesc));
//Set the resource type to be an array for convenient processing in the CUDA kernel.
//List of resTypes: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g067b774c0e639817a00a972c8e2c203c
resDesc.resType = cudaResourceTypeArray;
//Bind the new descriptor with the bitmap created earlier.
resDesc.res.array.array = bitmap_d;
//Create a new CUDA surface ID reference.
//This is really just an unsigned long long.
//Docuentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1gbe57cf2ccbe7f9d696f18808dd634c0a
surfaceOut = 0;
//Create the surface with the given description. That surface ID is placed in bitmap_surface.
//Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__SURFACE__OBJECT.html#group__CUDART__SURFACE__OBJECT_1g958899474ab2c5f40d233b524d6c5a01
cudaCreateSurfaceObject(&surfaceOut, &resDesc);
cudaCheckError();
}
void Processor::destroyEverything()
{
if (surfacesInitialized) {
//Input image CUDA surface
cudaDestroySurfaceObject(d_imageInputTexture);
cudaGraphicsUnmapResources(1, &d_imageInputGraphicsResource);
cudaGraphicsUnregisterResource(d_imageInputGraphicsResource);
d_imageInputTexture = 0;
//Output image CUDA surface
cudaDestroySurfaceObject(d_imageOutputTexture);
cudaGraphicsUnmapResources(1, &d_imageOutputGraphicsResource);
cudaGraphicsUnregisterResource(d_imageOutputGraphicsResource);
d_imageOutputTexture = 0;
//Input image GL texture
glDeleteTextures(1, &imageInputTexture);
imageInputTexture = 0;
//Output image GL texture
glDeleteTextures(1, &imageOutputTexture);
imageOutputTexture = 0;
surfacesInitialized = false;
}
}
/** A way to initialize OpenGL with GLFW and GLAD */
void initGL(int windowWidth, int windowHeight) {
// Setup window
if (!glfwInit())
return;
// Decide GL+GLSL versions
#if __APPLE__
// GL 3.2 + GLSL 150
const char* glsl_version = "#version 150";
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); // 3.2+ only
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // Required on Mac
#else
// GL 3.0 + GLSL 130
//const char* glsl_version = "#version 130";
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0);
//glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); // 3.2+ only
//glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // 3.0+ only
#endif
// Create window with graphics context
currentGLFWWindow = glfwCreateWindow(windowWidth, windowHeight, "Output image (OpenGL + GLFW)", NULL, NULL);
if (currentGLFWWindow == NULL)
return;
glfwMakeContextCurrent(currentGLFWWindow);
glfwSwapInterval(3); // Enable vsync
if (!gladLoadGL()) {
// GLAD failed
printf( "GLAD failed to initialize :(" );
return;
}
//Change GL settings
glViewport(0, 0, windowWidth, windowHeight); // use a screen size of WIDTH x HEIGHT
glMatrixMode(GL_PROJECTION); // Make a simple 2D projection on the entire window
glLoadIdentity();
glOrtho(0.0, windowWidth, windowHeight, 0.0, 0.0, 100.0);
glMatrixMode(GL_MODELVIEW); // Set the matrix mode to object modeling
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
glClearDepth(0.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear the window
}
/** Renders the textures on the GLFW window and requests GLFW to update */
void showTextures(GLuint top, GLuint bottom, int windowWidth, int windowHeight) {
// Clear color and depth buffers
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glMatrixMode(GL_MODELVIEW); // Operate on model-view matrix
glBindTexture(GL_TEXTURE_2D, top);
/* Draw top quad */
glEnable(GL_TEXTURE_2D);
glBegin(GL_QUADS);
glTexCoord2i(0, 0); glVertex2i(0, 0);
glTexCoord2i(0, 1); glVertex2i(0, windowHeight/2);
glTexCoord2i(1, 1); glVertex2i(windowWidth, windowHeight / 2);
glTexCoord2i(1, 0); glVertex2i(windowWidth, 0);
glEnd();
glDisable(GL_TEXTURE_2D);
/* Draw bottom quad */
glBindTexture(GL_TEXTURE_2D, bottom);
glEnable(GL_TEXTURE_2D);
glBegin(GL_QUADS);
glTexCoord2i(0, 0); glVertex2i(0, windowHeight / 2);
glTexCoord2i(0, 1); glVertex2i(0, windowHeight);
glTexCoord2i(1, 1); glVertex2i(windowWidth, windowHeight);
glTexCoord2i(1, 0); glVertex2i(windowWidth, windowHeight / 2);
glEnd();
glDisable(GL_TEXTURE_2D);
glfwSwapBuffers(currentGLFWWindow);
glfwPollEvents();
}
int main() {
using namespace cv;
using namespace std;
// initGL();
std::string filename = "./lena.pgm";
Mat image;
image = imread(filename, CV_LOAD_IMAGE_COLOR); // Read the file
if(! image.data ) // Check for invalid input
{
cout << "Could not open or find the image" << std::endl ;
return -1;
}
int windoww = 1280;
int windowh = 720;
initGL(windoww,windowh);
uint8_t *d_data;
cudaMalloc(&d_data, image.cols*image.rows*3);
Processor p;
for (int i = 0; i < image.cols; i++)
{
image.data[i*3+0] = 0;
image.data[i*3+1] = 0;
image.data[i*3+2] = 0;
//Process the image here
p.setInput(image.data, image.cols, image.rows);
p.processData(image.data, d_data);
showTextures(p.getInputTexture(), p.getOutputTexture(), windoww, windowh);
}
}
Notes:
The compilation command is given in the comment in the first line
I created a "video" of sorts using a single image. The "video" will show the image with a black or white line moving horizontally from left to right in the top pixel row of the image. The input image is lena.pgm which can be found in the CUDA samples (for example, at /usr/local/cuda-10.1/samples/3_Imaging/SobelFilter/data/lena.pgm).
It looks to me like you are "sharing" resources between OpenGL and CUDA. This doesn't look like the right map/unmap sequence to me, but it seems to be working, and it doesn't seem to be the focus of your question. I haven't spent any time investigating. I may have missed something.
I'm not suggesting this code is defect free or suitable for any particular purpose. It is mostly your code. I've modified it slightly to demonstrate some ideas described in the text.
There shouldn't be any visual difference in the output whether you compile with -DUSE_1 or not.
This is an useful feature that came across first in (https://www.3dgep.com/opengl-interoperability-with-cuda/), and I have improved upon it to use latest CUDA APIs and flow. You can refer to these 2 functions in cudammf.
https://github.com/prabindh/cudammf/blob/5f93358784fcbaae7eea0850424c59d2ed057dab/cuda_postproces.cu#L119
https://github.com/prabindh/cudammf/blob/5f93358784fcbaae7eea0850424c59d2ed057dab/decoder3.cpp#L507
Basic working is as below:
Create a regular GL texture (GLTextureId). Map it for CUDA access, via cudaGraphicsGLRegisterImage
Do some CUDA processing, and result is in a CUDA buffer
USe cudaMemcpyToArray to transfer between the above 2 device memories
If your output is coming from a Nvidia codec output, you should also refer to the AppDecGL sample in the Nvidia Video SDK (https://developer.nvidia.com/nvidia-video-codec-sdk).
I have a code that map Depth value with Color camera image.
I used Realsense ZR300 to capture (x,y,z) information.
My difficulty now is I can't map depth to color information.
From nearest to furthest, the color change from one color to another color with different color information.
I made two subplot and like to plot Depth color in the first subplot and normal color image in the second subplot.
My code is as follow.
int main() try
{
// Create a context object. This object owns the handles to all connected realsense devices.
rs::context ctx;
printf("There are %d connected RealSense devices.\n", ctx.get_device_count());
if(ctx.get_device_count() == 0) return EXIT_FAILURE;
// This tutorial will access only a single device, but it is trivial to extend to multiple devices
rs::device * dev = ctx.get_device(0);
printf("\nUsing device 0, an %s\n", dev->get_name());
printf(" Serial number: %s\n", dev->get_serial());
printf(" Firmware version: %s\n", dev->get_firmware_version());
// Configure all streams to run at VGA resolution at 60 frames per second
dev->enable_stream(rs::stream::depth, 640, 480, rs::format::z16, 60);
dev->enable_stream(rs::stream::color, 640, 480, rs::format::rgb8, 60);
dev->start();
// Open a GLFW window to display our output
glfwInit();
GLFWwindow * win = glfwCreateWindow(1280, 480, "Depth & Color images", nullptr, nullptr);
glfwMakeContextCurrent(win);
while(!glfwWindowShouldClose(win))
{
// Wait for new frame data
glfwPollEvents();
dev->wait_for_frames();
glClear(GL_COLOR_BUFFER_BIT);
glPixelZoom(1, -1);
// Display depth data by linearly mapping depth between 0 and 2 meters to the red channel
glRasterPos2f(-1, 1);
rs::intrinsics depth_intrin = dev->get_stream_intrinsics(rs::stream::depth);
const uint16_t * depth_image = (const uint16_t *)dev->get_frame_data(rs::stream::depth);
float scale = dev->get_depth_scale();
glBegin(GL_POINTS);
for(int dy=0; dy<depth_intrin.height; ++dy)
{
for(int dx=0; dx<depth_intrin.width; ++dx)
{
uint16_t depth_value = depth_image[dy * depth_intrin.width + dx];
float depth_in_meters = depth_value * scale;
if(depth_value == 0) continue;
rs::float2 depth_pixel = {(float)dx, (float)dy};
rs::float3 depth_point = depth_intrin.deproject(depth_pixel, depth_in_meters);
//////////////////Here I need to plot Depth to Color////////////////////////
////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
}
}
glEnd();
//glPixelTransferf(GL_RED_SCALE, 0xFFFF * dev->get_depth_scale() / 2.0f);
//glDrawPixels(640, 480, GL_RED, GL_UNSIGNED_SHORT, dev->get_frame_data(rs::stream::depth));
//glPixelTransferf(GL_RED_SCALE, 1.0f);
// Display color image as RGB triples
glRasterPos2f(0, 1);
glDrawPixels(640, 480, GL_RGB, GL_UNSIGNED_BYTE, dev->get_frame_data(rs::stream::color));
glfwSwapBuffers(win);
}
return EXIT_SUCCESS;
}
catch(const rs::error & e)
{
// Method calls against librealsense objects may throw exceptions of type rs::error
printf("rs::error was thrown when calling %s(%s):\n", e.get_failed_function().c_str(), e.get_failed_args().c_str());
printf(" %s\n", e.what());
return EXIT_FAILURE;
}
I did as follow and I have x,y,d information and can plot depth mapping to RED color upto 4 meters.
while(!glfwWindowShouldClose(win))
{
// Wait for new frame data
glfwPollEvents();
dev->wait_for_frames();
//FPS
auto t1 = std::chrono::high_resolution_clock::now();
time += std::chrono::duration<float>(t1-t0).count();
t0 = t1;
++frames;
if(time > 0.5f)
{
fps = frames / time;
frames = 0;
time = 0;
}
//
glClear(GL_COLOR_BUFFER_BIT);
glPixelZoom(1, -1);
// Display depth data by linearly mapping depth between 0 and 2 meters to the red channel
glRasterPos2f(-1, 1);
rs::intrinsics depth_intrin = dev->get_stream_intrinsics(rs::stream::depth);
const uint16_t * depth_image = (const uint16_t *)dev->get_frame_data(rs::stream::depth);
float scale = dev->get_depth_scale();
for(int dy=0; dy<depth_intrin.height; ++dy)
{
for(int dx=0; dx<depth_intrin.width; ++dx)
{
uint16_t depth_value = depth_image[dy * depth_intrin.width + dx];
float depth_in_meters = depth_value * scale;
}
}
glPixelTransferf(GL_RED_SCALE, 0xFFFF * dev->get_depth_scale() / 4.0f);
glDrawPixels(640, 480, GL_RED, GL_UNSIGNED_SHORT, dev->get_frame_data(rs::stream::depth));
glPixelTransferf(GL_RED_SCALE, 1.0f);
// glDrawPixels( 640, 480, GL_RGB, GL_UNSIGNED_INT, data );
// Display color image as RGB triples
glRasterPos2f(0, 1);
glDrawPixels(640, 480, GL_RGB, GL_UNSIGNED_BYTE, dev->get_frame_data(rs::stream::color));
//ss.str(""); ss << fps << " FPS";
//printf("FPS %s\n", ss.str().c_str());
draw_text(0, 0, ss.str().c_str());
glfwSwapBuffers(win);
}
i am trying to render a couple of tga files with opengl, using only glDrawPixels function and not texturing. I basically have the display loop, and manage to display the two files. After that i try to set the raster position on the x axis to the right, by declaring a global x GlInt variable and then incrementing it on each display loop. Thing is, this is not working. If i put an initial value to x, for example 100, the function will work and translate the images 100 pixels to the right, but incrementing it during the display loop doesn't do anything. Any ideas on what i could be doing wrong?
//Aplicatia realizeaza deschiderea si afisarea unui fisier tga
#include "glos.h"
#include <GL/gl.h>
#include <GL/glu.h>
#include <glaux.h>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#pragma pack(1)
typedef struct
{
GLbyte identsize; // Size of ID field that follows header (0)
GLbyte colorMapType; // 0 = None, 1 = paletted
GLbyte imageType; // 0 = none, 1 = indexed, 2 = rgb, 3 = grey, +8=rle
unsigned short colorMapStart; // First colour map entry
unsigned short colorMapLength; // Number of colors
unsigned char colorMapBits; // bits per palette entry
unsigned short xstart; // image x origin
unsigned short ystart; // image y origin
unsigned short width; // width in pixels
unsigned short height; // height in pixels
GLbyte bits; // bits per pixel (8 16, 24, 32)
GLbyte descriptor; // image descriptor
} TGAHEADER;
#pragma pack(8)
void myinit(void);
void CALLBACK myReshape(GLsizei w, GLsizei h);
void CALLBACK display(void);
GLint x = 100;
////////////////////////////////////////////////////////////////////
// Allocate memory and load targa bits. Returns pointer to new buffer,
// height, and width of texture, and the OpenGL format of data.
// Call free() on buffer when finished!
// This only works on pretty vanilla targas... 8, 24, or 32 bit color
// only, no palettes, no RLE encoding.
GLbyte *gltLoadTGA(const char *szFileName, GLint *iWidth, GLint *iHeight, GLint *iComponents, GLenum *eFormat)
{
FILE *pFile; // File pointer
TGAHEADER tgaHeader; // TGA file header
unsigned long lImageSize; // Size in bytes of image
short sDepth; // Pixel depth;
GLbyte *pBits = NULL; // Pointer to bits
// Default/Failed values
*iWidth = 0;
*iHeight = 0;
*eFormat = GL_BGR_EXT;
*iComponents = GL_RGB8;
// Attempt to open the file
pFile = fopen(szFileName, "rb");
if(pFile == NULL)
return NULL;
// Read in header (binary)
fread(&tgaHeader, sizeof(TGAHEADER), 1, pFile);
// Get width, height, and depth of texture
*iWidth = tgaHeader.width;
*iHeight = tgaHeader.height;
sDepth = tgaHeader.bits / 8;
printf ("a citit fisierul %d\n",tgaHeader.height);
// Put some validity checks here. Very simply, I only understand
// or care about 8, 24, or 32 bit targa's.
// if(tgaHeader.bits != 8 && tgaHeader.bits != 24 && tgaHeader.bits != 32)
// return NULL;
// Calculate size of image buffer
lImageSize = tgaHeader.width * tgaHeader.height * sDepth;
printf ("a citit fisierul %d\n",lImageSize);
// Allocate memory and check for success
pBits = (GLbyte*)malloc(lImageSize * sizeof(GLbyte));
if(pBits == NULL)
return NULL;
// Read in the bits
// Check for read error. This should catch RLE or other
// weird formats that I don't want to recognize
if(fread(pBits, lImageSize, 1, pFile) != 1)
{
free(pBits);
return NULL;
}
// Set OpenGL format expected
switch(sDepth)
{
case 3: // Most likely case
*eFormat = GL_BGR_EXT;
*iComponents = GL_RGB8;
break;
case 4:
*eFormat = GL_BGRA_EXT;
*iComponents = GL_RGBA8;
break;
case 1:
*eFormat = GL_LUMINANCE;
*iComponents = GL_LUMINANCE8;
break;
};
// Done with File
fclose(pFile);
// Return pointer to image data
return pBits;
}
void myinit()
{
// Black background
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
}
///////////////////////////////////////////////////////////////////////
// Called to draw scene
void CALLBACK display(void)
{
GLbyte *pImage = NULL;
GLbyte *pImage2 = NULL;
GLint iWidth, iHeight, iComponents;
GLenum eFormat;
// Clear the window with current clearing color
glClear(GL_COLOR_BUFFER_BIT);
// Targa's are 1 byte aligned
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glLoadIdentity( );
pImage = gltLoadTGA("C:\\Users\\calin.oana\\Desktop\\grafica\\ACSLAB6S\\loadtga\\321104_fishes.tga", &iWidth, &iHeight, &iComponents, &eFormat);
glTranslatef(0.01, 0, 0);
//glRasterPos2f(0.0, 0.0);
// Draw the pixmap
if(pImage != NULL)
glDrawPixels(iWidth, iHeight, eFormat, GL_UNSIGNED_BYTE, pImage);
// Don't need the image data anymore
free(pImage);
pImage = gltLoadTGA("C:\\Users\\calin.oana\\Desktop\\grafica\\ACSLAB6S\\loadtga\\mata.tga", &iWidth, &iHeight, &iComponents, &eFormat);
if (pImage != NULL)
glDrawPixels(iWidth, iHeight, eFormat, GL_UNSIGNED_BYTE, pImage);
free(pImage);
glRasterPos2i(x, 0);
x++;
// Do the buffer Swap
auxSwapBuffers();
}
//////////////////////////////////////////////////////////////
// For this example, it really doesn't matter what the
// projection is since we are using glWindowPos
void CALLBACK myReshape(GLsizei w, GLsizei h)
{
// Prevent a divide by zero, when window is too short
// (you cant make a window of zero width).
if(h == 0)
h = 1;
glViewport(0, 0, w, h);
// Reset the coordinate system before modifying
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
//glTranslatef(x++, 0, 0);
// Set the clipping volume
gluOrtho2D(0.0f, (GLfloat) w, 0.0, (GLfloat) h);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
}
/////////////////////////////////////////////////////////////
// Main program entrypoint
int main(int argc, char* argv[])
{
auxInitDisplayMode (AUX_DOUBLE | AUX_RGB );
auxInitPosition (0, 0, 1000, 1000);
auxInitWindow("OpenGL Image Loading");
myinit();
auxReshapeFunc (myReshape);
auxMainLoop(display);
return(0);
}
Edit: i have found out what is wrong, the loop of the display function isnt properly happening.
I am currently trying to save the animation made in openGL to a video file. I have tried using openCV's videowriter but to no advantage. I have successfully been able to generate a snapshot and save it as bmp using the SDL library. If I save all snapshots and then generate the video using ffmpeg, that is like collecting 4 GB worth of images. Not practical.
How can I write video frames directly during rendering?
Here the code i use to take snapshots when I require:
void snapshot(){
SDL_Surface* snap = SDL_CreateRGBSurface(SDL_SWSURFACE,WIDTH,HEIGHT,24, 0x000000FF, 0x0000FF00, 0x00FF0000, 0);
char * pixels = new char [3 *WIDTH * HEIGHT];
glReadPixels(0, 0,WIDTH, HEIGHT, GL_RGB, GL_UNSIGNED_BYTE, pixels);
for (int i = 0 ; i <HEIGHT ; i++)
std::memcpy( ((char *) snap->pixels) + snap->pitch * i, pixels + 3 * WIDTH * (HEIGHT-i - 1), WIDTH*3 );
delete [] pixels;
SDL_SaveBMP(snap, "snapshot.bmp");
SDL_FreeSurface(snap);
}
I need the video output. I have discovered that ffmpeg can be used to create videos from C++ code but have not been able to figure out the process. Please help!
EDIT : I have tried using openCV CvVideoWriter class but the program crashes ("segmentation fault") the moment it is declared.Compilation shows no errors ofcourse. Any suggestions to that?
SOLUTION FOR PYTHON USERS (Requires Python2.7,python-imaging,python-opengl,python-opencv, codecs of format you want to write to, I am on Ubuntu 14.04 64-bit):
def snap():
pixels=[]
screenshot = glReadPixels(0,0,W,H,GL_RGBA,GL_UNSIGNED_BYTE)
snapshot = Image.frombuffer("RGBA",W,H),screenshot,"raw","RGBA",0,0)
snapshot.save(os.path.dirname(videoPath) + "/temp.jpg")
load = cv2.cv.LoadImage(os.path.dirname(videoPath) + "/temp.jpg")
cv2.cv.WriteFrame(videoWriter,load)
Here W and H are the window dimensions (width,height). What is happening is I am using PIL to convert the raw pixels read from the glReadPixels command into a JPEG image. I am loading that JPEG into the openCV image and writing to the videowriter. I was having certain issues by directly using the PIL image into the videowriter (which would save millions of clock cycles of I/O), but right now I am not working on that. Image is a PIL module cv2 is a python-opencv module.
It sounds as though you are using the command line utility: ffmpeg. Rather than using the command-line to encode video from a collection of still images, you should use libavcodec and libavformat. These are the libraries upon which ffmpeg is actually built, and will allow you to encode video and store it in a standard stream/interchange format (e.g. RIFF/AVI) without using a separate program.
You probably will not find a lot of tutorials on implementing this because it has traditionally been the case that people wanted to use ffmpeg to go the other way; that is, decode various video formats for display in OpenGL. I think this is going to change very soon with the introduction of gameplay video encoding to the PS4 and Xbox One consoles, suddenly demand for this functionality will skyrocket.
The general process is this, however:
Pick a container format and CODEC
Often one will decide the other, (e.g. MPEG-2 + MPEG Program Stream)
Start filling a buffer with your still frames
Periodically encode your buffer of still frames and write to your output (packet writing in MPEG terms)
You will do this either when the buffer becomes full, or every n-many ms; you might prefer one over the other depending on whether you want to stream your video live or not.
When your program terminates flush the buffer and close your stream
One nice thing about this is you do not actually need to write to a file. Since you are periodically encoding packets of data from your buffer of still frames, you can stream your encoded video over a network if you want - this is why codec and container (interchange) format are separate.
Another nice thing is you do not have to synchronize the CPU and GPU, you can setup a pixel buffer object and have OpenGL copy data into CPU memory a couple of frames behind the GPU. This makes real-time encoding of video much less demanding, you only have to encode and flush the video to disk or over the network periodically if video latency demands are not unreasonable. This works very well in real-time rendering, since you have a large enough pool of data to keep a CPU thread busy encoding at all times.
Encoding frames can even be done in real-time on the GPU provided enough storage for a large buffer of frames (since ultimately the encoded data has to be copied from GPU to CPU and you want to do this as infrequently as possible). Obviously this is not done using ffmpeg, there are specialized libraries using CUDA / OpenCL / compute shaders for this purpose. I have never used them, but they do exist.
For portability sake, you should stick with libavcodec and Pixel Buffer Objects for asynchronous GPU->CPU copy. CPUs these days have enough cores that you can probably get away without GPU-assisted encoding if you buffer enough frames and encode in multiple simultaneous threads (this creates added synchronization overhead and increased latency when outputting encoded video) or simply drop frames / lower resolution (poor man's solution).
There are a lot of concepts covered here that go well beyond the scope of SDL, but you did ask how to do this with better performance than your current solution. In short, use OpenGL Pixel Buffer Objects to transfer data, and libavcodec for encoding. An example application that encodes video can be found on the ffmpeg libavcodec examples page.
For some fast test something like the code below work (tested), resizable windows are unhandled.
#include <stdio.h>
FILE *avconv = NULL;
...
/* initialize */
avconv = popen("avconv -y -f rawvideo -s 800x600 -pix_fmt rgb24 -r 25 -i - -vf vflip -an -b:v 1000k test.mp4", "w");
...
/* save */
glReadPixels(0, 0, 800, 600, GL_RGB, GL_UNSIGNED_BYTE, pixels);
if (avconv)
fwrite(pixels ,800*600*3 , 1, avconv);
...
/* term */
if (avconv)
pclose(avconv);
Runnable mpg example with FFmpeg 2.7
Explanation and a superset example at: How to use GLUT/OpenGL to render to a file?
Consider https://github.com/FFmpeg/FFmpeg/blob/n3.0/doc/examples/muxing.c to generate a contained format.
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define GL_GLEXT_PROTOTYPES 1
#include <GL/gl.h>
#include <GL/glu.h>
#include <GL/glut.h>
#include <GL/glext.h>
#include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
enum Constants { SCREENSHOT_MAX_FILENAME = 256 };
static GLubyte *pixels = NULL;
static GLuint fbo;
static GLuint rbo_color;
static GLuint rbo_depth;
static const unsigned int HEIGHT = 100;
static const unsigned int WIDTH = 100;
static int offscreen = 1;
static unsigned int max_nframes = 100;
static unsigned int nframes = 0;
static unsigned int time0;
/* Model. */
static double angle;
static double delta_angle;
/* Adapted from: https://github.com/cirosantilli/cpp-cheat/blob/19044698f91fefa9cb75328c44f7a487d336b541/ffmpeg/encode.c */
static AVCodecContext *c = NULL;
static AVFrame *frame;
static AVPacket pkt;
static FILE *file;
static struct SwsContext *sws_context = NULL;
static uint8_t *rgb = NULL;
static void ffmpeg_encoder_set_frame_yuv_from_rgb(uint8_t *rgb) {
const int in_linesize[1] = { 4 * c->width };
sws_context = sws_getCachedContext(sws_context,
c->width, c->height, AV_PIX_FMT_RGB32,
c->width, c->height, AV_PIX_FMT_YUV420P,
0, NULL, NULL, NULL);
sws_scale(sws_context, (const uint8_t * const *)&rgb, in_linesize, 0,
c->height, frame->data, frame->linesize);
}
void ffmpeg_encoder_start(const char *filename, int codec_id, int fps, int width, int height) {
AVCodec *codec;
int ret;
avcodec_register_all();
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
c->bit_rate = 400000;
c->width = width;
c->height = height;
c->time_base.num = 1;
c->time_base.den = fps;
c->gop_size = 10;
c->max_b_frames = 1;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if (codec_id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
file = fopen(filename, "wb");
if (!file) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
ret = av_image_alloc(frame->data, frame->linesize, c->width, c->height, c->pix_fmt, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate raw picture buffer\n");
exit(1);
}
}
void ffmpeg_encoder_finish(void) {
uint8_t endcode[] = { 0, 0, 1, 0xb7 };
int got_output, ret;
do {
fflush(stdout);
ret = avcodec_encode_video2(c, &pkt, NULL, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, file);
av_packet_unref(&pkt);
}
} while (got_output);
fwrite(endcode, 1, sizeof(endcode), file);
fclose(file);
avcodec_close(c);
av_free(c);
av_freep(&frame->data[0]);
av_frame_free(&frame);
}
void ffmpeg_encoder_encode_frame(uint8_t *rgb) {
int ret, got_output;
ffmpeg_encoder_set_frame_yuv_from_rgb(rgb);
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, file);
av_packet_unref(&pkt);
}
}
void ffmpeg_encoder_glread_rgb(uint8_t **rgb, GLubyte **pixels, unsigned int width, unsigned int height) {
size_t i, j, k, cur_gl, cur_rgb, nvals;
const size_t format_nchannels = 4;
nvals = format_nchannels * width * height;
*pixels = realloc(*pixels, nvals * sizeof(GLubyte));
*rgb = realloc(*rgb, nvals * sizeof(uint8_t));
/* Get RGBA to align to 32 bits instead of just 24 for RGB. May be faster for FFmpeg. */
glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, *pixels);
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
cur_gl = format_nchannels * (width * (height - i - 1) + j);
cur_rgb = format_nchannels * (width * i + j);
for (k = 0; k < format_nchannels; k++)
(*rgb)[cur_rgb + k] = (*pixels)[cur_gl + k];
}
}
}
static int model_init(void) {
angle = 0;
delta_angle = 1;
}
static int model_update(void) {
angle += delta_angle;
return 0;
}
static int model_finished(void) {
return nframes >= max_nframes;
}
static void init(void) {
int glget;
if (offscreen) {
/* Framebuffer */
glGenFramebuffers(1, &fbo);
glBindFramebuffer(GL_FRAMEBUFFER, fbo);
/* Color renderbuffer. */
glGenRenderbuffers(1, &rbo_color);
glBindRenderbuffer(GL_RENDERBUFFER, rbo_color);
/* Storage must be one of: */
/* GL_RGBA4, GL_RGB565, GL_RGB5_A1, GL_DEPTH_COMPONENT16, GL_STENCIL_INDEX8. */
glRenderbufferStorage(GL_RENDERBUFFER, GL_RGB565, WIDTH, HEIGHT);
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, rbo_color);
/* Depth renderbuffer. */
glGenRenderbuffers(1, &rbo_depth);
glBindRenderbuffer(GL_RENDERBUFFER, rbo_depth);
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16, WIDTH, HEIGHT);
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rbo_depth);
glReadBuffer(GL_COLOR_ATTACHMENT0);
/* Sanity check. */
assert(glCheckFramebufferStatus(GL_FRAMEBUFFER));
glGetIntegerv(GL_MAX_RENDERBUFFER_SIZE, &glget);
assert(WIDTH * HEIGHT < (unsigned int)glget);
} else {
glReadBuffer(GL_BACK);
}
glClearColor(0.0, 0.0, 0.0, 0.0);
glEnable(GL_DEPTH_TEST);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
glViewport(0, 0, WIDTH, HEIGHT);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glMatrixMode(GL_MODELVIEW);
time0 = glutGet(GLUT_ELAPSED_TIME);
model_init();
ffmpeg_encoder_start("tmp.mpg", AV_CODEC_ID_MPEG1VIDEO, 25, WIDTH, HEIGHT);
}
static void deinit(void) {
printf("FPS = %f\n", 1000.0 * nframes / (double)(glutGet(GLUT_ELAPSED_TIME) - time0));
free(pixels);
ffmpeg_encoder_finish();
free(rgb);
if (offscreen) {
glDeleteFramebuffers(1, &fbo);
glDeleteRenderbuffers(1, &rbo_color);
glDeleteRenderbuffers(1, &rbo_depth);
}
}
static void draw_scene(void) {
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glLoadIdentity();
glRotatef(angle, 0.0f, 0.0f, -1.0f);
glBegin(GL_TRIANGLES);
glColor3f(1.0f, 0.0f, 0.0f);
glVertex3f( 0.0f, 0.5f, 0.0f);
glColor3f(0.0f, 1.0f, 0.0f);
glVertex3f(-0.5f, -0.5f, 0.0f);
glColor3f(0.0f, 0.0f, 1.0f);
glVertex3f( 0.5f, -0.5f, 0.0f);
glEnd();
}
static void display(void) {
char extension[SCREENSHOT_MAX_FILENAME];
char filename[SCREENSHOT_MAX_FILENAME];
draw_scene();
if (offscreen) {
glFlush();
} else {
glutSwapBuffers();
}
frame->pts = nframes;
ffmpeg_encoder_glread_rgb(&rgb, &pixels, WIDTH, HEIGHT);
ffmpeg_encoder_encode_frame(rgb);
nframes++;
if (model_finished())
exit(EXIT_SUCCESS);
}
static void idle(void) {
while (model_update());
glutPostRedisplay();
}
int main(int argc, char **argv) {
GLint glut_display;
glutInit(&argc, argv);
if (argc > 1)
offscreen = 0;
if (offscreen) {
/* TODO: if we use anything smaller than the window, it only renders a smaller version of things. */
/*glutInitWindowSize(50, 50);*/
glutInitWindowSize(WIDTH, HEIGHT);
glut_display = GLUT_SINGLE;
} else {
glutInitWindowSize(WIDTH, HEIGHT);
glutInitWindowPosition(100, 100);
glut_display = GLUT_DOUBLE;
}
glutInitDisplayMode(glut_display | GLUT_RGBA | GLUT_DEPTH);
glutCreateWindow(argv[0]);
if (offscreen) {
/* TODO: if we hide the window the program blocks. */
/*glutHideWindow();*/
}
init();
glutDisplayFunc(display);
glutIdleFunc(idle);
atexit(deinit);
glutMainLoop();
return EXIT_SUCCESS;
}
I solved the writing of a video file in Python from Python OpenGL the following way:
In the main section, setup the video file to write to:
#Set up video:
width=640
height=480
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
#Open video output file:
out = cv2.VideoWriter('videoout.mp4',fourcc, 20.0, (width,height))
And in the DisplayFunction:
#Read frame:
screenshot = glReadPixels(0,0,width,height,GL_RGB,GL_UNSIGNED_BYTE)
#Convert from binary to cv2 numpy array:
snapshot = Image.frombuffer("RGB",(width,height),screenshot,"raw","RGB",0,0)
snapshot= np.array(snapshot)
snapshot=cv2.flip(snapshot,0)
#write frame to video file:
out.write(snapshot)
if (...): #End movie
glutLeaveMainLoop()
out.release()
print("Exit")
This writes to "videoout.mp4". Observe that it needs the "out.release()" in the end to get a proper mp4 file.
I have a program which simulates a physical system that changes over time. I want to, at predetermined intervals (say every 10 seconds) output a visualization of the state of the simulation to a file. I want to do it in such a way that it is easy to "turn the visualization off" and not output the visualization at all.
I am looking at OpenGL and GLUT as graphics tools to do the visualization. However the problem seems to be that first of all, it looks like it only outputs to a window and can't output to a file. Second, in order to generate the visualization you have to call GLUTMainLoop and that stops the execution of the main function - the only functions that get called from then on are calls from the GUI. However I do not want this to be a GUI based application - I want it to just be an application that you run from the command line, and it generates a series of images. Is there a way to do this in GLUT/OpenGL? Or is OpenGL the wrong tool for this completely and I should use something else
glReadPixels runnable PBO example
The example below generates either:
one ppm per frame at 200 FPS and no extra dependencies,
one png per frame at 600 FPS with libpng
one mpg for all frames at 1200 FPS with FFmpeg
on a ramfs. The better the compression, the larger the FPS, so we must be memory IO bound.
FPS is larger than 200 on my 60 FPS screen, and all images are different, so I'm sure that it's not limited to the screen's FPS.
The GIF in this answer was generated from the video as explained at: https://askubuntu.com/questions/648603/how-to-create-an-animated-gif-from-mp4-video-via-command-line/837574#837574
glReadPixels is the key OpenGL function that reads pixels from screen. Also have a look at the setup under init().
glReadPixels reads the bottom line of pixels first, unlike most image formats, so converting that is usually needed.
offscreen.c
#ifndef PPM
#define PPM 1
#endif
#ifndef LIBPNG
#define LIBPNG 1
#endif
#ifndef FFMPEG
#define FFMPEG 1
#endif
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define GL_GLEXT_PROTOTYPES 1
#include <GL/gl.h>
#include <GL/glu.h>
#include <GL/glut.h>
#include <GL/glext.h>
#if LIBPNG
#include <png.h>
#endif
#if FFMPEG
#include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
#endif
enum Constants { SCREENSHOT_MAX_FILENAME = 256 };
static GLubyte *pixels = NULL;
static GLuint fbo;
static GLuint rbo_color;
static GLuint rbo_depth;
static int offscreen = 1;
static unsigned int max_nframes = 128;
static unsigned int nframes = 0;
static unsigned int time0;
static unsigned int height = 128;
static unsigned int width = 128;
#define PPM_BIT (1 << 0)
#define LIBPNG_BIT (1 << 1)
#define FFMPEG_BIT (1 << 2)
static unsigned int output_formats = PPM_BIT | LIBPNG_BIT | FFMPEG_BIT;
/* Model. */
static double angle;
static double delta_angle;
#if PPM
/* Take screenshot with glReadPixels and save to a file in PPM format.
*
* - filename: file path to save to, without extension
* - width: screen width in pixels
* - height: screen height in pixels
* - pixels: intermediate buffer to avoid repeated mallocs across multiple calls.
* Contents of this buffer do not matter. May be NULL, in which case it is initialized.
* You must `free` it when you won't be calling this function anymore.
*/
static void screenshot_ppm(const char *filename, unsigned int width,
unsigned int height, GLubyte **pixels) {
size_t i, j, cur;
const size_t format_nchannels = 3;
FILE *f = fopen(filename, "w");
fprintf(f, "P3\n%d %d\n%d\n", width, height, 255);
*pixels = realloc(*pixels, format_nchannels * sizeof(GLubyte) * width * height);
glReadPixels(0, 0, width, height, GL_RGB, GL_UNSIGNED_BYTE, *pixels);
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
cur = format_nchannels * ((height - i - 1) * width + j);
fprintf(f, "%3d %3d %3d ", (*pixels)[cur], (*pixels)[cur + 1], (*pixels)[cur + 2]);
}
fprintf(f, "\n");
}
fclose(f);
}
#endif
#if LIBPNG
/* Adapted from https://github.com/cirosantilli/cpp-cheat/blob/19044698f91fefa9cb75328c44f7a487d336b541/png/open_manipulate_write.c */
static png_byte *png_bytes = NULL;
static png_byte **png_rows = NULL;
static void screenshot_png(const char *filename, unsigned int width, unsigned int height,
GLubyte **pixels, png_byte **png_bytes, png_byte ***png_rows) {
size_t i, nvals;
const size_t format_nchannels = 4;
FILE *f = fopen(filename, "wb");
nvals = format_nchannels * width * height;
*pixels = realloc(*pixels, nvals * sizeof(GLubyte));
*png_bytes = realloc(*png_bytes, nvals * sizeof(png_byte));
*png_rows = realloc(*png_rows, height * sizeof(png_byte*));
glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, *pixels);
for (i = 0; i < nvals; i++)
(*png_bytes)[i] = (*pixels)[i];
for (i = 0; i < height; i++)
(*png_rows)[height - i - 1] = &(*png_bytes)[i * width * format_nchannels];
png_structp png = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
if (!png) abort();
png_infop info = png_create_info_struct(png);
if (!info) abort();
if (setjmp(png_jmpbuf(png))) abort();
png_init_io(png, f);
png_set_IHDR(
png,
info,
width,
height,
8,
PNG_COLOR_TYPE_RGBA,
PNG_INTERLACE_NONE,
PNG_COMPRESSION_TYPE_DEFAULT,
PNG_FILTER_TYPE_DEFAULT
);
png_write_info(png, info);
png_write_image(png, *png_rows);
png_write_end(png, NULL);
png_destroy_write_struct(&png, &info);
fclose(f);
}
#endif
#if FFMPEG
/* Adapted from: https://github.com/cirosantilli/cpp-cheat/blob/19044698f91fefa9cb75328c44f7a487d336b541/ffmpeg/encode.c */
static AVCodecContext *c = NULL;
static AVFrame *frame;
static AVPacket pkt;
static FILE *file;
static struct SwsContext *sws_context = NULL;
static uint8_t *rgb = NULL;
static void ffmpeg_encoder_set_frame_yuv_from_rgb(uint8_t *rgb) {
const int in_linesize[1] = { 4 * c->width };
sws_context = sws_getCachedContext(sws_context,
c->width, c->height, AV_PIX_FMT_RGB32,
c->width, c->height, AV_PIX_FMT_YUV420P,
0, NULL, NULL, NULL);
sws_scale(sws_context, (const uint8_t * const *)&rgb, in_linesize, 0,
c->height, frame->data, frame->linesize);
}
void ffmpeg_encoder_start(const char *filename, int codec_id, int fps, int width, int height) {
AVCodec *codec;
int ret;
avcodec_register_all();
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
c->bit_rate = 400000;
c->width = width;
c->height = height;
c->time_base.num = 1;
c->time_base.den = fps;
c->gop_size = 10;
c->max_b_frames = 1;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if (codec_id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
file = fopen(filename, "wb");
if (!file) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
ret = av_image_alloc(frame->data, frame->linesize, c->width, c->height, c->pix_fmt, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate raw picture buffer\n");
exit(1);
}
}
void ffmpeg_encoder_finish(void) {
uint8_t endcode[] = { 0, 0, 1, 0xb7 };
int got_output, ret;
do {
fflush(stdout);
ret = avcodec_encode_video2(c, &pkt, NULL, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, file);
av_packet_unref(&pkt);
}
} while (got_output);
fwrite(endcode, 1, sizeof(endcode), file);
fclose(file);
avcodec_close(c);
av_free(c);
av_freep(&frame->data[0]);
av_frame_free(&frame);
}
void ffmpeg_encoder_encode_frame(uint8_t *rgb) {
int ret, got_output;
ffmpeg_encoder_set_frame_yuv_from_rgb(rgb);
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, file);
av_packet_unref(&pkt);
}
}
void ffmpeg_encoder_glread_rgb(uint8_t **rgb, GLubyte **pixels, unsigned int width, unsigned int height) {
size_t i, j, k, cur_gl, cur_rgb, nvals;
const size_t format_nchannels = 4;
nvals = format_nchannels * width * height;
*pixels = realloc(*pixels, nvals * sizeof(GLubyte));
*rgb = realloc(*rgb, nvals * sizeof(uint8_t));
/* Get RGBA to align to 32 bits instead of just 24 for RGB. May be faster for FFmpeg. */
glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, *pixels);
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
cur_gl = format_nchannels * (width * (height - i - 1) + j);
cur_rgb = format_nchannels * (width * i + j);
for (k = 0; k < format_nchannels; k++)
(*rgb)[cur_rgb + k] = (*pixels)[cur_gl + k];
}
}
}
#endif
static void model_init(void) {
angle = 0;
delta_angle = 1;
}
static int model_update(void) {
angle += delta_angle;
return 0;
}
static int model_finished(void) {
return nframes >= max_nframes;
}
static void init(void) {
int glget;
if (offscreen) {
/* Framebuffer */
glGenFramebuffers(1, &fbo);
glBindFramebuffer(GL_FRAMEBUFFER, fbo);
/* Color renderbuffer. */
glGenRenderbuffers(1, &rbo_color);
glBindRenderbuffer(GL_RENDERBUFFER, rbo_color);
/* Storage must be one of: */
/* GL_RGBA4, GL_RGB565, GL_RGB5_A1, GL_DEPTH_COMPONENT16, GL_STENCIL_INDEX8. */
glRenderbufferStorage(GL_RENDERBUFFER, GL_RGB565, width, height);
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, rbo_color);
/* Depth renderbuffer. */
glGenRenderbuffers(1, &rbo_depth);
glBindRenderbuffer(GL_RENDERBUFFER, rbo_depth);
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16, width, height);
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rbo_depth);
glReadBuffer(GL_COLOR_ATTACHMENT0);
/* Sanity check. */
assert(glCheckFramebufferStatus(GL_FRAMEBUFFER));
glGetIntegerv(GL_MAX_RENDERBUFFER_SIZE, &glget);
assert(width < (unsigned int)glget);
assert(height < (unsigned int)glget);
} else {
glReadBuffer(GL_BACK);
}
glClearColor(0.0, 0.0, 0.0, 0.0);
glEnable(GL_DEPTH_TEST);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
glViewport(0, 0, width, height);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glMatrixMode(GL_MODELVIEW);
time0 = glutGet(GLUT_ELAPSED_TIME);
model_init();
#if FFMPEG
ffmpeg_encoder_start("tmp.mpg", AV_CODEC_ID_MPEG1VIDEO, 25, width, height);
#endif
}
static void deinit(void) {
printf("FPS = %f\n", 1000.0 * nframes / (double)(glutGet(GLUT_ELAPSED_TIME) - time0));
free(pixels);
#if LIBPNG
if (output_formats & LIBPNG_BIT) {
free(png_bytes);
free(png_rows);
}
#endif
#if FFMPEG
if (output_formats & FFMPEG_BIT) {
ffmpeg_encoder_finish();
free(rgb);
}
#endif
if (offscreen) {
glDeleteFramebuffers(1, &fbo);
glDeleteRenderbuffers(1, &rbo_color);
glDeleteRenderbuffers(1, &rbo_depth);
}
}
static void draw_scene(void) {
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glLoadIdentity();
glRotatef(angle, 0.0f, 0.0f, -1.0f);
glBegin(GL_TRIANGLES);
glColor3f(1.0f, 0.0f, 0.0f);
glVertex3f( 0.0f, 0.5f, 0.0f);
glColor3f(0.0f, 1.0f, 0.0f);
glVertex3f(-0.5f, -0.5f, 0.0f);
glColor3f(0.0f, 0.0f, 1.0f);
glVertex3f( 0.5f, -0.5f, 0.0f);
glEnd();
}
static void display(void) {
char filename[SCREENSHOT_MAX_FILENAME];
draw_scene();
if (offscreen) {
glFlush();
} else {
glutSwapBuffers();
}
#if PPM
if (output_formats & PPM_BIT) {
snprintf(filename, SCREENSHOT_MAX_FILENAME, "tmp.%d.ppm", nframes);
screenshot_ppm(filename, width, height, &pixels);
}
#endif
#if LIBPNG
if (output_formats & LIBPNG_BIT) {
snprintf(filename, SCREENSHOT_MAX_FILENAME, "tmp.%d.png", nframes);
screenshot_png(filename, width, height, &pixels, &png_bytes, &png_rows);
}
#endif
# if FFMPEG
if (output_formats & FFMPEG_BIT) {
frame->pts = nframes;
ffmpeg_encoder_glread_rgb(&rgb, &pixels, width, height);
ffmpeg_encoder_encode_frame(rgb);
}
#endif
nframes++;
if (model_finished())
exit(EXIT_SUCCESS);
}
static void idle(void) {
while (model_update());
glutPostRedisplay();
}
int main(int argc, char **argv) {
int arg;
GLint glut_display;
/* CLI args. */
glutInit(&argc, argv);
arg = 1;
if (argc > arg) {
offscreen = (argv[arg][0] == '1');
} else {
offscreen = 1;
}
arg++;
if (argc > arg) {
max_nframes = strtoumax(argv[arg], NULL, 10);
}
arg++;
if (argc > arg) {
width = strtoumax(argv[arg], NULL, 10);
}
arg++;
if (argc > arg) {
height = strtoumax(argv[arg], NULL, 10);
}
arg++;
if (argc > arg) {
output_formats = strtoumax(argv[arg], NULL, 10);
}
/* Work. */
if (offscreen) {
/* TODO: if we use anything smaller than the window, it only renders a smaller version of things. */
/*glutInitWindowSize(50, 50);*/
glutInitWindowSize(width, height);
glut_display = GLUT_SINGLE;
} else {
glutInitWindowSize(width, height);
glutInitWindowPosition(100, 100);
glut_display = GLUT_DOUBLE;
}
glutInitDisplayMode(glut_display | GLUT_RGBA | GLUT_DEPTH);
glutCreateWindow(argv[0]);
if (offscreen) {
/* TODO: if we hide the window the program blocks. */
/*glutHideWindow();*/
}
init();
glutDisplayFunc(display);
glutIdleFunc(idle);
atexit(deinit);
glutMainLoop();
return EXIT_SUCCESS;
}
On GitHub.
Compile with:
sudo apt-get install libpng-dev libavcodec-dev libavutil-dev
gcc -DPPM=1 -DLIBPNG=1 -DFFMPEG=1 -ggdb3 -std=c99 -O0 -Wall -Wextra \
-o offscreen offscreen.c -lGL -lGLU -lglut -lpng -lavcodec -lswscale -lavutil
Run 10 frames "offscreen" (mostly TODO, works but has no advantage), with size 200 x 100 and all output formats:
./offscreen 1 10 200 100 7
CLI format is:
./offscreen [offscreen [nframes [width [height [output_formats]]]]]
and output_formats is a bitmask:
ppm >> 0 | png >> 1 | mpeg >> 2
Run on-screen (does not limit my FPS either):
./offscreen 0
Benchmarked on Ubuntu 15.10, OpenGL 4.4.0 NVIDIA 352.63, Lenovo Thinkpad T430.
Also tested on ubuntu 18.04, OpenGL 4.6.0 NVIDIA 390.77, Lenovo Thinkpad P51.
TODO: find a way to do it on a machine without GUI (e.g. X11). It seems that OpenGL is just not made for offscreen rendering, and that reading pixels back to the GPU is implemented on the interface with the windowing system (e.g. GLX). See: OpenGL without X.org in linux
TODO: use a 1x1 window, make it un-resizable, and hide it to make things more robust. If I do either of those, the rendering fails, see code comments. Preventing resize seems impossible in Glut, but GLFW supports it. In any case, those don't matter much as my FPS is not limited by the screen refresh frequency, even when offscreen is off.
Other options besides PBO
render to backbuffer (default render place)
render to a texture
render to a Pixelbuffer object (PBO)
Framebuffer and Pixelbuffer are better than the backbuffer and texture since they are made for data to be read back to CPU, while the backbuffer and textures are made to stay on the GPU and show on screen.
PBO is for asynchronous transfers, so I think we don't need it, see: What are the differences between a Frame Buffer Object and a Pixel Buffer Object in OpenGL?,
Maybe off-screen Mesa is worth looking into: http://www.mesa3d.org/osmesa.html
Vulkan
It seems that Vulkan is designed to support offscreen rendering better than OpenGL.
This is mentioned on this NVIDIA overview: https://developer.nvidia.com/transitioning-opengl-vulkan
This is a runnable example that I just managed to run locally: https://github.com/SaschaWillems/Vulkan/tree/b9f0ac91d2adccc3055a904d3a8f6553b10ff6cd/examples/renderheadless/renderheadless.cpp
After installing the drivers and ensuring that the GPU is working I can do:
git clone https://github.com/SaschaWillems/Vulkan
cd Vulkan
git checkout b9f0ac91d2adccc3055a904d3a8f6553b10ff6cd
python download_assets.py
mkdir build
cd build
cmake ..
make -j`nproc`
cd bin
./renderheadless
and this immediately generates an image headless.ppm without opening any windows:
I have also managed to run this program an Ubuntu Ctrl + Alt + F3 non-graphical TTY, which further indicates it really does not need a screen.
Other examples that might be of interest:
https://github.com/SaschaWillems/Vulkan/blob/b9f0ac91d2adccc3055a904d3a8f6553b10ff6cd/examples/screenshot/screenshot.cpp starts a GUI, and you can click a button to take a screenshot, and it gets saved to `screenshot
https://github.com/SaschaWillems/Vulkan/blob/b9f0ac91d2adccc3055a904d3a8f6553b10ff6cd/examples/offscreen/offscreen.cpp render an image twice to create a reflection effect
Related: Is it possible to do offscreen rendering without Surface in Vulkan?
Tested on Ubuntu 20.04, NVIDIA driver 435.21, NVIDIA Quadro M1200 GPU.
apiretrace
https://github.com/apitrace/apitrace
Just works, and does not require you to modify your code at all:
git clone https://github.com/apitrace/apitrace
cd apitrace
git checkout 7.0
mkdir build
cd build
cmake ..
make
# Creates opengl_executable.out.trace
./apitrace trace /path/to/opengl_executable.out
./apitrace dump-images opengl_executable.out.trace
Also available on Ubuntu 18.10 with:
sudo apt-get install apitrace
You now have a bunch of screenshots named as:
animation.out.<n>.png
TODO: working principle.
The docs also suggest this for video:
apitrace dump-images -o - application.trace \
| ffmpeg -r 30 -f image2pipe -vcodec ppm -i pipe: -vcodec mpeg4 -y output.mp4
See also:
images to GIF: https://unix.stackexchange.com/questions/24014/creating-a-gif-animation-from-png-files/489210#489210
images to video: How to create a video from images with FFmpeg?
WebGL + canvas image saving
This is mostly a toy due to performance, but it kind of works for really basic use cases:
demo: https://cirosantilli.com#webgl
source: https://github.com/cirosantilli/cirosantilli.github.io/blame/fab7f46ec85a5357170a1bee9b3b0580bde6bd88/README.adoc#L2083
bibliography: how to save canvas as png image?
Bibliography
How to use GLUT/OpenGL to render to a file?
How to take screenshot in OpenGL
How to render offscreen on OpenGL?
glReadPixels() "data" argument usage?
Render OpenGL ES 2.0 to image
http://www.songho.ca/opengl/gl_fbo.html
http://www.mesa3d.org/brianp/sig97/offscrn.htm
Render off screen (with FBO and RenderBuffer) and pixel transfer of color, depth, stencil
https://gamedev.stackexchange.com/questions/59204/opengl-fbo-render-off-screen-and-texture
What are the differences between a Frame Buffer Object and a Pixel Buffer Object in OpenGL?
glReadPixels() "data" argument usage?
FBO larger than window:
OpenGL how to create, and render to, a framebuffer that's larger than the window?
FBO lwjgl bigger than Screen Size - What I'm doing wrong?
Renderbuffers larger than window size - OpenGL
problem saving openGL FBO larger than window
No Window / X11:
OpenGL without X.org in linux
Can you create OpenGL context without opening a window?
Using OpenGL Without X-Window System
You almost certainly don't want GLUT, regardless. Your requirements don't fit what it's intended to do (and even when your requirements do fit its intended purpose, you usually don't want it anyway).
You can use OpenGL. To generate output in a file, you basically set up OpenGL to render to a texture, and then read the resulting texture into main memory and save it to a file. At least on some systems (e.g., Windows), I'm pretty sure you'll still have to create a window and associate the rendering context with the window, though it will probably be fine if the window is always hidden.
Not to take away from the other excellent answers, but if you want an existing example we have been doing Offscreen GL rendering for a few years now in OpenSCAD, as part of the Test Framework rendering to .png files from the commandline. The relevant files are at https://github.com/openscad/openscad/tree/master/src under the Offscreen*.cc
It runs on OSX (CGL), Linux X11 (GLX), BSD (GLX), and Windows (WGL), with some quirks due to driver differences. The basic trick is to forget to open a window (like, Douglas Adams says the trick to flying is to forget to hit the ground). It even runs on 'headless' linux/bsd if you have a virtual X11 server running like Xvfb or Xvnc. There is also the possibility to use Software Rendering on Linux/BSD by setting the environment variable LIBGL_ALWAYS_SOFTWARE=1 before running your program, which can help in some situations.
This is not the only system to do this, I believe the VTK imaging system does something similar.
This code is a bit old in it's methods, (I ripped the GLX code out of Brian Paul's glxgears), especially as new systems come along, OSMesa, Mir, Wayland, EGL, Android, Vulkan, etc, but notice the OffscreenXXX.cc filenames where XXX is the GL context subsystem, it can in theory be ported to different context generators.
Not sure OpenGL is the best solution.
But you can always render to an off-screen buffer.
The typical way to write openGL output to a file is to use readPixels to copy the resulting scene pixel-pixel to an image file
You could use SFML http://www.sfml-dev.org/. You can use the image class to save your rendered output.
http://www.sfml-dev.org/documentation/1.6/classsf_1_1Image.htm
To get your rendered output, you can render to a texture or copy your screen.
Rendering to a texture:
http://nehe.gamedev.net/data/lessons/lesson.asp?lesson=36
http://zavie.free.fr/opengl/index.html.en#rendertotexture
Copying screen output:
http://eonstrife.wordpress.com/2007/06/02/taking-a-screenshot-from-an-opengl-application/
1. Rendering offscreen using EGL with a PbufferSurface instead of a WindowSurface
EGLDisplay display;
EGLSurface surface;
EGLContext context;
//bind desktop OpenGL
eglBindAPI(EGL_OPENGL_API);
display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
eglInitialize(display, nullptr, nullptr);
//example constraints
const EGLint egl_config_constraints[] = {
EGL_STENCIL_SIZE, static_cast<EGLint>(8),
EGL_DEPTH_SIZE, static_cast<EGLint>(16),
EGL_BUFFER_SIZE, static_cast<EGLint>(32),
EGL_RED_SIZE, static_cast<EGLint>(8),
EGL_GREEN_SIZE, static_cast<EGLint>(8),
EGL_BLUE_SIZE, static_cast<EGLint>(8),
EGL_ALPHA_SIZE, static_cast<EGLint>(8),
EGL_SAMPLE_BUFFERS, EGL_FALSE,
EGL_SAMPLES, 0,
EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,
EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT,
EGL_CONFORMANT, EGL_OPENGL_BIT,
EGL_CONFIG_CAVEAT, EGL_NONE,
EGL_NONE };
EGLint configCount;
EGLConfig configs[1];
//find a fitting config
eglChooseConfig(display, egl_config_constraints, configs, 1, &configCount);
//set up the PbufferSurface
EGLint pbuffer_attrib_list[] = {
EGL_WIDTH, WIDTH,
EGL_HEIGHT, HEIGHT,
EGL_NONE };
surface = eglCreatePbufferSurface(display, configs[0], pbuffer_attrib_list);
//setup the EGLContext
const EGLint contextVersion[] = {
EGL_CONTEXT_MAJOR_VERSION, 4,
EGL_CONTEXT_MINOR_VERSION, 6,
EGL_CONTEXT_OPENGL_PROFILE_MASK, EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT,
EGL_CONTEXT_OPENGL_DEBUG, debug ? EGL_TRUE : EGL_FALSE,
EGL_NONE };
context = eglCreateContext(display, configs[0], EGL_NO_CONTEXT, contextVersion);
eglMakeCurrent(display, surface, surface, context);
eglSwapInterval(display, 1);
2. Saving the images
There are several ways to achieve this. Actually the above code is part of a demo (https://github.com/kallaballa/GCV/blob/main/src/tetra/tetra-demo.cpp) i wrote that uses OpenCL/OpenGL/VAAPI interop in conjunction with OpenCV to write a video of what is rendered in OpenGL.
Please mind the README if you want to build the demo.