glFinish hanging forever after clEnqueueReleaseGLObjects - opengl

The following code runs fine (Windows 7, Nvidia GTX 750 Ti) with Nvidia drivers 361.91 (and earlier), but hangs with newer versions like 364.72 and 368.69. Now glFinish blocks the execution of the program only after clEnqueueReleaseGLObjects is invoked. Before blaming the drivers I'd suspect there is something wrong with how I do my OpenCL/OpenGL interop, so here is the code for a small entire program that reproduces the problem, the problem being at the very end:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <SDL.h>
#include <gl/glew.h>
#include <SDL_opengl.h>
#include <gl/glut.h>
#pragma comment (lib, "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v7.5\\lib\\x64\\OpenCL.lib")
#include <CL/cl.h>
#include <CL/cl_gl.h>
cl_int init_cl_context(cl_context *context, cl_command_queue *command_queue)
cl_int i, ret, pf_index=-1;
cl_platform_id platform_id[16];
cl_device_id device_id[16];
cl_uint ret_num_platforms;
cl_uint ret_num_devices;
ret = clGetPlatformIDs(sizeof(platform_id)/sizeof(*platform_id), platform_id, &ret_num_platforms); // get all the platforms
for (i=0; i<ret_num_platforms; i++) // go through all the platforms
ret = clGetDeviceIDs(platform_id[i], CL_DEVICE_TYPE_GPU, sizeof(device_id)/sizeof(*device_id), device_id, &ret_num_devices); // get all the suitable GPU devices
if (ret_num_devices > 0) // stop trying platforms when a suitable device is found
pf_index = i;
cl_context_properties properties[] = { CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(), CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(), CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id[pf_index], 0 };
*context = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &ret);
*command_queue = clCreateCommandQueue(*context, device_id[0], 0*CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | 0*CL_QUEUE_PROFILING_ENABLE, &ret);
return ret;
int main(int argc, char *argv[])
cl_int ret=0;
int w = 800, h = 600;
SDL_Window *window;
SDL_Renderer *renderer;
cl_context context;
cl_command_queue command_queue;
cl_mem cltex; // CL buffer of type image_2d_t pointing to the GL texture
uint32_t gltex; // ID of the GL texture for cltex
//**** Init SDL, OpenGL/glew ****
SDL_GetWindowSize(window, &w, &h);
glewExperimental = 1;
renderer = SDL_CreateRenderer(window, -1, 0*SDL_RENDERER_PRESENTVSYNC);
ret = init_cl_context(&context, &command_queue); // initialise the CL context to match GL as to make the interop possible
// create an OpenGL 2D texture normally
glGenTextures(1, &gltex); // generate the texture ID
glBindTexture(GL_TEXTURE_2D, gltex); // binding the texture
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); // specify texture dimensions, format etc
cltex = clCreateFromGLTexture(context, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, gltex, &ret); // Creating the OpenCL image corresponding to the texture (once)
ret = clFinish(command_queue);
//glFinish(); // this works fine
ret = clEnqueueReleaseGLObjects(command_queue, 1, &cltex, 0, 0, NULL); // release the ownership from CL back to GL
printf("This blocks the execution forever:\n");
glFinish(); // this blocks everything
printf("This never gets printed\n");
return 0;
In my much larger program (which had the exact same problem) everything ran perfectly until the driver update, and now even binaries compiled before the update show the same freeze demonstrated above. I removed the checks of return codes for the sake of readability but both in this small program and in the larger one it's from there were no reported problems at all. And I can't see anything obviously wrong that I might be doing...

Apparently my problem came from doing things backwards, as I try to get results from the previous frame before I enqueue things to do.
If on the first frame I skip that part and move on straight to enqueuing the first tasks then it doesn't block anymore.


C/C++ ffmpeg output is low quality and blurry

I've made a program that takes a video file as input, edits it using opengl/glfw, then encodes that edited video. The program works just fine, I get the desired output. However the video quality is really low and I don't know how to adjust it. The editing seems fine, since the display on the glfw window is high resolution. I don'T think its about scaling since it just reads the pixels on the glfw window and passes it to the encoder, and the glfw window is high res.
Here is what the glfw window looks like when the program is running:
I'm encoding in YUV420P formatting, but the information I'm getting from the glfw window is in RGBA format. I'm getting the data using:
glReadPixels(0, 0,
gl_width, gl_height,
(GLvoid*) state.glBuffer
I simply got the muxing.c example from ffmpeg's docs and edited it slightly so it looks something like this:
AVFrame* video_encoder::get_video_frame(OutputStream *ost)
AVCodecContext *c = ost->enc;
/* check if we want to generate more frames */
if (av_compare_ts(ost->next_pts, c->time_base,
(float) STREAM_DURATION / 1000, (AVRational){ 1, 1 }) > 0)
return NULL;
/* when we pass a frame to the encoder, it may keep a reference to it
* internally; make sure we do not overwrite it here */
if (av_frame_make_writable(ost->frame) < 0)
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
/* as we only generate a YUV420P picture, we must convert it
* to the codec pixel format if needed */
if (!ost->sws_ctx) {
ost->sws_ctx = sws_getContext(c->width, c->height,
c->width, c->height,
if (!ost->sws_ctx) {
"Could not initialize the conversion context\n");
image_for_audio_only(ost->tmp_frame, ost->next_pts, c->width, c->height);
sws_scale(ost->sws_ctx, (const uint8_t * const *) ost->tmp_frame->data,
ost->tmp_frame->linesize, 0, c->height, ost->frame->data,
} else {
//This is where I set the information I got from the glfw window.
set_frame_yuv_from_rgb(ost->frame, ost->sws_ctx);
ost->frame->pts = ost->next_pts++;
return ost->frame;
void video_encoder::set_frame_yuv_from_rgb(AVFrame *frame, struct SwsContext *sws_context) {
const int in_linesize[1] = { 4 * width };
//uint8_t* dest[4] = { rgb_data, NULL, NULL, NULL };
sws_context = sws_getContext(
width, height, AV_PIX_FMT_RGBA,
width, height, AV_PIX_FMT_YUV420P,
SWS_BICUBIC, 0, 0, 0);
sws_scale(sws_context, (const uint8_t * const *)&rgb_data, in_linesize, 0,
height, frame->data, frame->linesize);
rgb_data is the buffer I got from the glfw window. It's simply an uint8_t*.
And at the end of all this, here is what the encoded output looks like when ran through mplayer:
It's much lower quality compare to the glfw window. How can I improve the quality of the video?
Here are encoding settings from youtube for a better quality:
Make sure to have high bitrate and gop size. E.g. 5Mbps and 60 correspondingly.

Opengl sws_scale is not working (segmentation fault)

I am working on an OpenGL video application where I want to render a video in full screen mode. I am trying to render the video with following code:
GLFWmonitor* monitor = glfwGetPrimaryMonitor();
const GLFWvidmode* mode = glfwGetVideoMode(monitor);
glfwWindowHint(GLFW_RED_BITS, mode->redBits);
glfwWindowHint(GLFW_GREEN_BITS, mode->greenBits);
glfwWindowHint(GLFW_BLUE_BITS, mode->blueBits);
glfwWindowHint(GLFW_REFRESH_RATE, mode->refreshRate);
However, it generates a segmentation fault when I try to scale a video frame.
I am using the ffmpeg and sws scales for color conversion.
sws_scaler_ctx = sws_getContext(width, height, av_codec_ctx->pix_fmt,
width, height, AV_PIX_FMT_RGB0,
uint8_t* dest[4] = { frame_buffer, NULL, NULL, NULL };
int dest_linesize[4] = { width*4, 0, 0, 0 };
sws_scale(sws_scaler_ctx, av_frame->data, av_frame->linesize, 0, av_frame->height, dest, dest_linesize);
Do anyone know why this would cause a segmentation fault when trying to scale a video frame?
sws_scale(data->conv_ctx, data->av_frame->data, data->av_frame->linesize, 0,
data->codec_ctx->height, data->gl_frame->data, data->gl_frame->linesize);
it is scaleing windows monitor and fix my problem.

The function CreateWICTextureFromFile() will not actually load a texture (Direct3D11, C++)

I am trying to load a grass texture onto my game with the function DirectX::CreateWICTextureFromFile but everytime I do, the function won't seem to actually load anything, it just loads a black texture. The function successfully returns S_OK, and i've also called the CoInitialize(NULL) before I actually call the function. But it still doesn't work.
Down below is my usage of the function
// This is where i load the texture
void Load_Texture_for_Ground()
HRESULT status;
ID3D11ShaderResourceView * Texture;
status = DirectX::CreateWICTextureFromFile(device, L"AmazingGrass.jpg", NULL, &Texture);
if (Texture != NULL) // This returns true
MessageBox(MainWindow, L"The pointer points to the texture", L"MessageBox", MB_OK);
if (status == S_OK) //This returns true
MessageBox(MainWindow, L"The function succeeded", L"MessageBox", MB_OK);
// This is where i actually load the texture onto an object, assuming i already declared all the variables in this function
void DrawTheGround ()
DevContext->VSSetShader(VS, 0, 0);
DevContext->PSSetShader(PS, 0, 0);
/* Transforming the matrices*/
TransformedMatrix = GroundWorld * CameraView * CameraProjection ;
Data.WORLDSPACE = XMMatrixTranspose(GroundWorld);
Data.TRANSFORMEDMATRIX = XMMatrixTranspose(TransformedMatrix);
/* Updating the matrix in application's Constant Buffer*/
DevContext->VSSetConstantBuffers(0, 1, &ConstantBuffer);
DevContext->PSSetShaderResources(0, 1, &Texture);
DevContext->PSSetSamplers(0, 1, &TextureSamplerState);
DevContext->DrawIndexed(6, 0, 0);
What could be wrong here? Why won't the function load the texture?
A quick way to test if you have loaded the texture data correctly is to use SaveWICTextureToFile in the ScreenGrab module right after loading it. You'd only do this for debugging of course.
#include <wincodec.h>
#include <wrl/cient.h>
using Microsoft::WRL::ComPtr;
ComPtr<ID3D11Resource> Res;
ComPtr<ID3D11ShaderResourceView> Texture;
HRESULT status = DirectX::CreateWICTextureFromFile(device, L"AmazingGrass.jpg", &Res, &Texture);
if (FAILED(status))
// Error handling
#ifdef _DEBUG
status = SaveWICTextureToFile( DevContext, Res.Get(),
GUID_ContainerFormatBmp, L"SCREENSHOT.BMP" );
Then you can run the code and check that SCREENSHOT.BMP is not all black.
I strongly suggest you adopt the ComPtr smart pointer and the FAILED / SUCCEEDED macros in your coding style. Raw pointers and directly comparing HRESULT to S_OK is setting yourself up for a lot of bugs.
You should not call CoInitialize every frame. You should call it once as part of your application's initialization.
You should not be creating a new instance of SpriteBatch and SpriteFont every frame. Just create them after you create your device and hold on to them.

Writing OpenEXR 16bit image file in C++

I am trying to write a 16bit texture rendered with OpenGL using OpenEXR, following the example in page 4 from the documentation, but for some reason my code crashes when executing file_exr.writePixels(512). Is there anything I am missing here?
Update: I did check that fboId and pboId are well initialized and no OpenGL errors exist until this point.
const Imf::Rgba * dest;
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fboId);
glReadPixels(0, 0, 512, 512, GL_BGRA, GL_HALF_FLOAT_NV, 0);
dest = (const Imf::Rgba *)glMapBuffer(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB);
Imf::RgbaOutputFile file_exr("/tmp/file.exr", 512, 512, Imf::WRITE_RGBA);
file_exr.setFrameBuffer(dest, 1, 512);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
Did you just copy and paste that code (and just that code)? Then the reason for it failing is that:
The buffer object you want to read the pixels from OpenGL into does not exist; hence mapping it will fail, which meant you point OpenEXR to a null pointer
There's no single error condition check at all in above code.
Do this instead:
First a helper, to clean up the OpenGL error stack (which may accumulate multiple error conditions):
int check_gl_errors()
int errors = 0;
while( GL_NO_ERROR != glGetError() ) { errors++; }
return errors;
Then this
int const width = 512;
int const height = 512;
size_t const sizeof_half_float = 2;
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fboId);
width * height * sizeof_half_float,
if( !check_gl_errors() ) {
glPixelStorei(GL_PACK_ALIGNMENT, 1);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glPixelStorei(GL_PACK_SKIP_PIXELS, 0);
glPixelStorei(GL_PACK_SKIP_ROWS, 0);
/* BTW: You have to check that your system actually supports the
GL_HALF_FLOAT_NV format extension at all. */
glReadPixels(0, 0, width, width, GL_BGRA, GL_HALF_FLOAT_NV, 0);
if( !check_gl_errors() ) {
Imf::Rgba const * const dest = (Imf::Rgba const*)
if( !check_gl_errors() && nullptr != dest ) {
Imf::RgbaOutputFile file_exr(
width, height,
file_exr.setFrameBuffer(dest, 1, width);
else {
/* glMapBuffer failed */
else {
/* glReadPixels failed */
else {
/* glBufferDataARB failed => no valid buffer object
to work with in the first place */
All these error checks are important. They make your program not crash, but give diagnostics, what went wrong.
Anyway, the use of a PBO in the very order of operations doesn't help anyway, because it gets mapped immediately after the glReadPixels operation, which makes the whole thing synchronous.

OpenGL game screen capture

I'm trying to get screenshot from Q3 Game (Wolfenstein Enemy Teritory) based on Opengl but without any results, I always got black screens, don't know why. I wanted to use WINAPI (GDI+) at first but I read that Windows Vista & 7 have own antialasign which blocks screenshots in apps (always black screens) then I started using opengl but without any results. These references which I based on:
testMemIO &
How to take screenshot in opengl
typedef void (WINAPI qglReadPixels_t)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid *pixels);
typedef void (WINAPI qglReadBuffer_t)(GLenum mode);
qglReadPixels_t *qaglReadPixels;
qglReadBuffer_t *qaglReadBuffer;
void GetScreenData()
// Initialize FreeImage library
FIBITMAP *image2, *image1;
DWORD ImageSize = 0;
TCPSocketConnection FileServer;
EndPoint ServerAddress;
screen_struct ss_data;
int Width = 1366;
int Height = 768;
BYTE *pixels = new BYTE[3 * Width * Height];
BYTE *Data = NULL;
DWORD Size = 0;
FIMEMORY *memstream = FreeImage_OpenMemory();
HMODULE OpenGL = GetModuleHandle("opengl32");
qaglReadPixels = (qglReadPixels_t *)GetProcAddress(OpenGL, "glReadPixels");
qaglReadBuffer = (qglReadBuffer_t *)GetProcAddress(OpenGL, "glReadBuffer");
qaglReadPixels(0, 0, Width, Height, GL_RGB, GL_UNSIGNED_BYTE, pixels);
// Convert raw data into jpeg by FreeImage library
image1 = FreeImage_ConvertFromRawBits(pixels, Width, Height, 3 * Width, 24, 0x0000FF, 0xFF0000, 0x00FF00, false);
image2 = FreeImage_ConvertTo24Bits(image1);
// retrive image data
FreeImage_SaveToMemory(FIF_JPEG, image2, memstream, JPEG_QUALITYNORMAL);
FreeImage_AcquireMemory(memstream, &Data, &Size);
memset(&ss_data, 0x0, sizeof(screen_struct));
ss_data.size = size;
// Send image size to server
FileServer.Connect(Server->GetAddress(), 30003);
// Send entire image
FileServer.Send((char *)&ss_data, sizeof(screen_struct));
FileServer.SendAll((char *)Data, Size);
delete []pixels;
Problem is solved, I just calling GetScreenData(...) before SwapBuffers(...) now it works correctly but there is still a weird thing, on some computers I'v got shifted screens, for example: Screen #1 Don't know why it happens, for sure it happens on Nvidia 5xxx(m) i 7xxx(m) series so far as I know.
Big thanks for #AndonM.Coleman