I have just started using C++ for some image processing tasks. I want to integrate my RGB (OpenCV Mat) and Depth (PCL) data which I get from ros::Subscribe into colored-pointcloud data.
I use the cv::Mat acquiredImage to hold the transmitted image from ros::Subscribe and then the Mat acquiredImage is used for another processes in another threads, but I am facing segmentation fault. Or the error is shown like this:
[xcb] Unknown sequence number while processing queue
[xcb] Most likely this is a multi-threaded client and XInitThreads has not been called
[xcb] Aborting, sorry about that. Viewtest: ../../src/xcb_io.c:260:
poll_for_event: Assertion `!xcb_xlib_threads_sequence_lost' failed.
Aborted (core dumped)
I have tried using std::mutex but it still doesn't work. Could anyone tell me how to properly manage the cv::Mat in two different threads?
typedef pcl::PointXYZRGB XYZRGB;
typedef pcl::PointCloud<XYZRGB> pclXYZRGB;
typedef pcl::PointCloud<XYZRGB>::Ptr pclXYZRGBptr;
typedef pcl::PointCloud<XYZRGB>::ConstPtr pclXYZRGBcptr;
pclXYZRGBptr acquiredCloud (new pclXYZRGB());
pclXYZRGBptr acquiredCloud2 (new pclXYZRGB());
cv::Mat acquiredImage, acquiredImageRotated;
std::thread thread_RunThread;
std::mutex mutexMutex;
bool stopThread, has_data1, has_data2;
inline float PackRGB(uint8_t r, uint8_t g, uint8_t b) {
uint32_t color_uint = ((uint32_t)r << 16 | (uint32_t) g << 8 | (uint32_t)b);
return *reinterpret_cast<float *> (&color_uint);
void RunThread(){
void imageReceive(const sensor_msgs::ImageConstPtr& msg){
acquiredImage = cv::Mat(cv_bridge::toCvShare(msg, "bgr8")->image);
has_data1 = true;
void cloudReceive(const sensor_msgs::PointCloud2ConstPtr& cloudInMsg){
pcl::fromROSMsg(*cloudInMsg, *acquiredCloud);
has_data2 = true;
void StartThread(){
stopThread = false;
has_data1 = has_data2 = false;
thread_RunThread = std::thread(RunThread);
while(!has_data1 && !has_data2){
std::cout << has_data1 << "-" << has_data2 << std::endl;
void CloseThread(){
stopThread = true;
int main(int argc, char **argv){
ros::init(argc, argv, "Viewtest");
ros::NodeHandle nh;
image_transport::ImageTransport it(nh);
image_transport::Subscriber sub = it.subscribe("/rsCamera/image", 1, imageReceive);
ros::Subscriber pclsubAcquirer = nh.subscribe("/rsCamera/cloud", 1, cloudReceive);
while (ros::ok()){
if(!has_data1 && !has_data2){
std::cout << has_data1 << "-" << has_data2 << std::endl;
else {
cv::rotate(acquiredImage, acquiredImageRotated, cv::ROTATE_180);
copyPointCloud(*acquiredCloud, *acquiredCloud2);
int i = 640, j = 480, k;
for (auto& it : acquiredCloud2->points){
it.x = it.x; it.y = it.y; it.z = it.z;
it.rgb = PackRGB(<cv::Vec3b>(j,i)[2], // r<cv::Vec3b>(j,i)[1], // g<cv::Vec3b>(j,i)[0] // b
if(i <= 0) { i = 640; j--; }
if(j < 0) { break; }
return 0;
There is a function that has a while loop. int takes a variable. for example I initialized the function with 4. Then I want to initialize the function with 16.
void Widget::on_fourCameras_clicked(){
void Widget::on_sixteenCameras_clicked(){
isNewCameraSelected = true;
When I call it with 16, it does not delete the function I started with the previous 4 from memory. I'm trying to break the previous loop by putting the flag and start the 2nd one. This time, since the click function does not change the flag before it ends, it starts the function I sent 16 first. Then it changes the flag. I can't do it in multithread because of the functions I use. What do I need to do to repeatedly call the same function with a while loop and delete the previous ones from memory?
ubuntu 20.04.02
QT / c++
void Widget::CamerasInitialize(int camNumber) {
namedWindow( "Output", cv::WINDOW_OPENGL );
cv::cuda::GpuMat *inputFrames = new cv::cuda::GpuMat[camNumber];
cv::cuda::GpuMat *inputFramesConverted = new cv::cuda::GpuMat[camNumber];
cv::cuda::GpuMat *sphericalDistortionOutput = new cv::cuda::GpuMat[camNumber];
cv::Ptr<cv::cudacodec::VideoReader> videoReader[camNumber];
cv::cuda::PtrStepSz<int32_t> *d_ptrs = NULL;
cv::cuda::PtrStepSz<int32_t> *h_ptrs = new cv::cuda::PtrStepSz<int32_t>[camNumber];
cudaMalloc(&d_ptrs, camNumber * sizeof(cv::cuda::PtrStepSz<int32_t>));
std::time_t timeBegin = std::time(0);
int tick = 0;
long frameCounter = 0;
while (true)
for(int i=0; i<camNumber; i++){
h_ptrs[i] = inputFrames[i];
pBackSub = cuda::createBackgroundSubtractorMOG();
isMovedCamera = false;
cudaMemcpy(d_ptrs, h_ptrs, camNumber * sizeof(cv::cuda::PtrStepSz<int32_t>), cudaMemcpyHostToDevice);
GpuMat outval;
Mat outputCPU;
if(camNumber == 4){
arrayMult(d_ptrs, outval, cameraMatrixOnGpu4Camera, pixelW, pixelH);
}else if(camNumber == 16){
arrayMult(d_ptrs, outval, cameraMatrixOnGpu16Camera, pixelW, pixelH);
//cuda::bilateralFilter(videoFrames[0], colorImages[0], 90, 30, 30);
std::time_t timeNow = std::time(0) - timeBegin;
if (timeNow - tick >= 1)
//cout << "Frames per second: " << frameCounter << endl;
frameCounter = 0;
outval = outval(Rect(cropPositionsX, cropPositionsY, cropPositionsW, cropPositionsH));
imshow( "Output", outval);
setMouseCallback("Output", CallBackFunc, NULL);
if (waitKey(1) == 'q' || isNewCameraSelected)
isNewCameraSelected = false;
I need to make small video player with OpenCV, which have to support the following functionality. Key 'p' on keyboard - pause/unpause, 'q' - exit, left and right arrow keys - play video frame by frame straight and reverse when it is paused. So the problem is when I try to show video with high quality and I hold arrow key for several seconds it does not run, but freeze and then jump to current frame after I release key. I tried to fix this with adding this_thread::sleep after cv::imshow() to give time to draw, but it did not help at all. So here is the code. Also, I have some reasons to use boost instead of C++11, so it is ok.
#include "VideoPlayer.hpp"
#include <iostream>
int main(int argc, char *argv[])
if (argc < 2) {
std::cerr << "Video file full name required as argument." << std::endl;
VideoPlayer vp(argv[1]);
if (!
return 1;
return 0;
#pragma once
#include <opencv/cxcore.hpp>
#include <opencv/highgui.h>
#include <string>
class VideoPlayer
VideoPlayer(const std::string &video, const std::string &windowName = "Output window",
unsigned int delay = 30);
bool play();
cv::VideoCapture videoCapture_;
std::string windowName_;
unsigned int delay_;
bool processKey(int key);
void setFrame(int frameNum);
#include "VideoPlayer.hpp"
#include <iostream>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/thread/thread.hpp>
VideoPlayer::VideoPlayer(const std::string &video, const std::string &windowName,
unsigned int delay)
: videoCapture_(video)
, windowName_(windowName)
, delay_(delay)
bool VideoPlayer::play()
if (!videoCapture_.isOpened()) {
std::cerr << "Unable to open video." << std::endl;
return false;
for (;;) {
cv::Mat frame;
videoCapture_ >> frame;
cv::imshow(windowName_, frame);
int key = cv::waitKey(delay_);
if (processKey(key))
return true;
bool VideoPlayer::processKey(int key)
if (key == 'p') {
for (;;) {
int pausedKey = cv::waitKey(0);
if (pausedKey == 'p') {
} else if (pausedKey == 'q') {
return true;
} else if (pausedKey == 65363) {
int frameNum = videoCapture_.get(CV_CAP_PROP_POS_FRAMES);
} else if (pausedKey == 65361) {
int frameNum = videoCapture_.get(CV_CAP_PROP_POS_FRAMES);
setFrame(frameNum - 2);
} else if (key == 'q') {
return true;
return false;
void VideoPlayer::setFrame(int frameNum)
if (frameNum > 0 && frameNum < videoCapture_.get(CV_CAP_PROP_FRAME_COUNT)) {
std::cerr << frameNum << std::endl;
videoCapture_.set(CV_CAP_PROP_POS_FRAMES, frameNum);
cv::Mat frame;
videoCapture_ >> frame;
cv::imshow(windowName_, frame);
I also created a multithreading implementation with buffer based on std::queue with lock, but it didn't solve the problem. And I tried to use boost::lockfree::queue, but I could not manage to finish it because of some strange behavior. I will share this code later, if it is necessary.
So, if somebody knows some good practice, how to avoid this problem, help me please.
Replacing boost::this_thread::sleep(boost::posix_time::milliseconds(10)); with cv::waitKey(0) is bad, because is makes me to do two short presses on arrow key to change one frame, and it does not help, because holding key skips it very fast. So the following code helped, but it is too strange and it is necessary to select times each for each video.
void VideoPlayer::setFrame(int frameNum)
if (frameNum > 0 && frameNum < videoCapture_.get(CV_CAP_PROP_FRAME_COUNT)) {
std::cerr << frameNum << std::endl;
videoCapture_.set(CV_CAP_PROP_POS_FRAMES, frameNum);
cv::Mat frame;
videoCapture_ >> frame;
cv::imshow(windowName_, frame);
int times = 7;
for (int i = 0; i < times; i++)
Also, I'm unable to use Qt or something else, only C++03 with boost and OpenCV.
I think I need some trick to make cv::waitKey(time) wait fortime whether any key pressed or not.
I am using opencv c++ on Mac OS X 10.10.2 to process video frames and display them. The performance of imshow with waitKey to display the video is extremely slow.
I have the following code which displays HD (1920x1080) grayscale frames correctly, except that it runs about 10 times too slow (i.e. 2 to 3 frames per second instead of 30 frames per second).
cv::Mat framebuf[TEST_COUNT];
//--- Code here to allocate and fill the frame buffer with about 4 seconds of video. This part works correctly.
//--- This loop runs too slow by factor of approximately 10x
for (int f = 0; f < TEST_COUNT; f++)
cv::imshow(windowName, framebuf[f]);
Can anyone suggest how to get real-time or near real-time performance from opencv imshow()? I have seen many posts that state that they are displaying video in real-time or even faster than real-time, so I am not sure what I am doing wrong. Any help would be greatly appreciated.
I could be wrong but for me the problem is not with your code, but with your os/configuration. I've written a small test:
import cv2
import numpy as np
from random import randrange
img = np.zeros((1920, 1080), dtype = np.uint8)
counter = 0
while counter < 1000:
cv2.line(img, (randrange(0, 1920), randrange(0, 1080)), (randrange(0, 1920), randrange(0, 1080)), (randrange(0, 255)))
cv2.imshow('test', img)
temp = cv2.waitKey(1)
counter += 1
print counter
On my machine (Core 2 duo 2,6Ghz x64, 8gb ram, ssd) it took about 30 seconds for this test to complete. Run it and if you will get significantly bigger time than definitelly something is wrong with your laptop/opencv configuration/etc. I've used OpenCV 2.4.x on Mac OS X (it was 10.9 i think) and it was running fine. Reinstalling OpenCV is the most obvious solution which comes to my mind. When you remove OpenCV, use brew to install it again - brew install opencv --with-tbb --with-python --with-ffpmeg (or something similar - check using brew options opencv) should be fine. First options tells brew to build opencv with tbb(thread building block - library for multithreading, sometimes can significantly improve speed), second to install python wrappers, and the last one to install ffmpeg(handle codecs etc).
You would have to reduce the input to the function wait key. Try using a lower number in the range of 2-5. It also depends on the other processes you have running simultaneously, try shutting down other processes and see if it improves
you can create your own window to show the image. Add MyWindow.m MyWindow.h file to project.
#ifndef MY_WINDOW_H
#define MY_WINDOW_H
#ifdef __cplusplus
extern "C" {
void* createNSWindow(int x, int y, int w, int h);
void renderNSWindow(void* inwindow, void* data, int w, int h, int c);
void processNSEvent();
#ifdef __cplusplus
usage, in main.cpp, do not forget waitKey
#include "MyWindow.h"
// need create a cv window and do nothing
cv::namedWindow("xxx", 1);
// create window
void* w = createNSWindow(0, 0, 0, 0);
// frame image
cv::Mat frameImage;
// render loop
renderNSWindow(w,, frameImage.cols, frameImage.rows, frameImage.channels());
// need waitKey to display window
implement, in MyWindow.m, delete import "MyWindow.h"
#import <Cocoa/Cocoa.h>
#interface MyWindow : NSWindow
#property(nonatomic, strong) NSImageView *imgv;
#implementation MyWindow
static NSImage* _createNSImage(void* data, int w, int h, int c);
void* createNSWindow(int x, int y, int w, int h) {
NSRect screenFrame = [[NSScreen mainScreen] frame];
NSRect frame = NSMakeRect(x, y, w, h);
if (w == 0 || h == 0) {
frame = screenFrame;
MyWindow* window = [[MyWindow alloc] initWithContentRect:frame
defer:NO] ;
[window makeKeyAndOrderFront:NSApp];
window.titleVisibility = TRUE;
window.styleMask = NSWindowStyleMaskResizable | NSWindowStyleMaskTitled |NSWindowStyleMaskFullSizeContentView;
window.imgv = [[NSImageView alloc] initWithFrame:NSMakeRect(0, 0, frame.size.width, frame.size.height)];
[window.contentView addSubview:window.imgv];
return (void*)CFBridgingRetain(window);
static NSImage* _createNSImage(void* data, int w, int h, int c) {
size_t bufferLength = w * h * c;
CGDataProviderRef provider = CGDataProviderCreateWithData(NULL, data, bufferLength, NULL);
size_t bitsPerComponent = 8;
size_t bitsPerPixel = c * bitsPerComponent;
size_t bytesPerRow = c * w;
CGColorSpaceRef colorSpaceRef = CGColorSpaceCreateDeviceRGB();
CGBitmapInfo bitmapInfo = kCGBitmapByteOrder32Little | kCGImageAlphaPremultipliedLast;
if (c < 4) {
bitmapInfo = kCGBitmapByteOrderDefault | kCGImageAlphaNone;
unsigned char* buf = data;
for(int i = 0; i < w*h; i++) {
unsigned char temp = buf[i*c];
buf[i*c] = buf[i*c+c-1];
buf[i*c+c-1] = temp;
CGColorRenderingIntent renderingIntent = kCGRenderingIntentDefault;
CGImageRef iref = CGImageCreate(w,
provider, // data provider
NULL, // decode
YES, // should interpolate
NSImage* image = [[NSImage alloc] initWithCGImage:iref size:NSMakeSize(w, h)];
return image;
void renderNSWindow(void* inwindow, void* data, int w, int h, int c) {
MyWindow* window = (__bridge MyWindow*)inwindow;
window.imgv.image = _createNSImage(data, w, h, c);
void processNSEvent() {
for (;;)
NSEvent* event = [NSApp nextEventMatchingMask:NSEventMaskAny
untilDate:[NSDate distantPast]
if (event == nil)
[NSApp sendEvent:event];
other things, the waitKey now take about 20ms, you can do OpenCV in background thread, and show window in main thread. Also use processNSEvent instead of waitKey that only take about 10ms.
full source code:
#include <iostream>
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <dispatch/dispatch.h>
#include "MyWindow.h"
using namespace std;
using namespace cv;
int opencvfunc(int argc, const char *argv[]);
bool newFrame = false;
cv::Mat back_frame;
int main(int argc, const char * argv[]) {
cv::namedWindow("render", 1);
void* w = createNSWindow(0, 0, 0, 0);
dispatch_queue_t opencvq = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0);
dispatch_async(opencvq, ^{
opencvfunc(argc, argv);
while(true) {
if(newFrame) {
std::chrono::system_clock::time_point starttime = std::chrono::system_clock::now();
renderNSWindow(w,, back_frame.cols, back_frame.rows, back_frame.channels());
newFrame = false;
//auto key = cv::waitKey(1);
//if (key == 'q') {
// break;
std::chrono::system_clock::time_point endtime = std::chrono::system_clock::now();
std::cout << "imshow:" << std::chrono::duration_cast<std::chrono::duration<double>>(endtime-starttime).count()*1000 << std::endl;
return 0;
int opencvfunc(int argc, const char *argv[]) {
cv::VideoCapture cap;;
if (!cap.isOpened()) {
std::cout << "Couldn't open camera 0." << endl;
Mat frame, unmodified_frame;
for (;;) {
cap >> frame; // get a new frame from camera
if (frame.empty()) { // stop if we're at the end of the video
//unmodified_frame = frame.clone();
// ...
back_frame = frame.clone();
newFrame = true;
OpenCV 4 had resolved this issue, please update to new version.
One more thing, process video and show video in two thread.
#include <stdio.h>
#include <iostream>
#include <opencv2/opencv.hpp>
#include <dispatch/dispatch.h>
using namespace cv;
using namespace std;
bool newFrame = false;
Mat back_frame;
int opencvmain(int argc, char** argv ) {
// open camear
cv::VideoCapture cap;;
if (!cap.isOpened()) {
std::cout << "Couldn't open camera 0." << std::endl;
// define frame images
cv::Mat frame;
// frame loop
for (;;) {
// get video frame
cap >> frame;
if (frame.empty()) {
// render
back_frame = frame.clone();
newFrame = true;
return 0;
int main(int argc, char** argv ) {
namedWindow("video", WINDOW_AUTOSIZE );
dispatch_queue_t opencvq = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0);
dispatch_async(opencvq, ^{
opencvmain(argc, argv);
while(true) {
if(newFrame) {
imshow("video", back_frame);
auto key = cv::waitKey(1);
if (key == ' ') {
newFrame = false;
return 0;
I am processing video with opencv, but at the same time I need to play audio and simply control it, like loud or current frame number.
I think I should create a parallel process with ffmpeg, but I don't know how to do so. Can you explain what to do?
Or do you know another solution?
I think ffmpeg should be used to play audio and SDL for video in this case.
After opening the file with OpenCV and processing the frame, you can use OpenCV -> SDL to display it while retrieving the audio frames through ffmpeg and playing them with SDL.
Here is a nice collection of ffmpeg/SDL tutorials!
I also found a nice post that shows how to capture frames from a video file using ffmpeg, store them in OpenCV cv::Mat and display the result in a OpenCV window. But this way you can't play audio since OpenCV doesn't deal with that.
You might be interested in reading this post as well: How to avoid a growing delay with ffmpeg between sound and raw video data ?
I spent the last 4hrs coding a prototype to demonstrate how it's done. This demo reads video frames through OpenCV (so you can process them) and audio through ffmpeg, and SDL is used to play both! There are 2 limitations in this demo you must be aware: 1 - it assumes you are working with an OpenCV image packed as BGR (24bits), and 2 - audio and video are not being sync! Yes, I left have some work for you to do (yeeeey). But don't panic, page 6 has some ideas!
It's important to sync audio and video because you will be doing some processing on the frames, and that will certainly make the video and audio go out of sync real fast since they are being played independently of each other.
The ffmpeg tutorials I suggested above are very very important to understand the code, a lot of code from this demo came from there. They show how to deal with SDL, and how to read packets of audio/video streams.
#include <highgui.h>
#include <cv.h>
extern "C"
#include <SDL.h>
#include <SDL_thread.h>
#include <avcodec.h>
#include <avformat.h>
#include <iostream>
#include <stdio.h>
//#include <malloc.h>
using namespace cv;
typedef struct PacketQueue
AVPacketList *first_pkt, *last_pkt;
int nb_packets;
int size;
SDL_mutex *mutex;
SDL_cond *cond;
} PacketQueue;
PacketQueue audioq;
int audioStream = -1;
int videoStream = -1;
int quit = 0;
SDL_Surface* screen = NULL;
SDL_Surface* surface = NULL;
AVFormatContext* pFormatCtx = NULL;
AVCodecContext* aCodecCtx = NULL;
AVCodecContext* pCodecCtx = NULL;
void show_frame(IplImage* img)
if (!screen)
screen = SDL_SetVideoMode(img->width, img->height, 0, 0);
if (!screen)
fprintf(stderr, "SDL: could not set video mode - exiting\n");
// Assuming IplImage packed as BGR 24bits
SDL_Surface* surface = SDL_CreateRGBSurfaceFrom((void*)img->imageData,
img->depth * img->nChannels,
0xff0000, 0x00ff00, 0x0000ff, 0
SDL_BlitSurface(surface, 0, screen, 0);
void packet_queue_init(PacketQueue *q)
memset(q, 0, sizeof(PacketQueue));
q->mutex = SDL_CreateMutex();
q->cond = SDL_CreateCond();
int packet_queue_put(PacketQueue *q, AVPacket *pkt)
AVPacketList *pkt1;
if (av_dup_packet(pkt) < 0)
return -1;
//pkt1 = (AVPacketList*) av_malloc(sizeof(AVPacketList));
pkt1 = (AVPacketList*) malloc(sizeof(AVPacketList));
if (!pkt1) return -1;
pkt1->pkt = *pkt;
pkt1->next = NULL;
if (!q->last_pkt)
q->first_pkt = pkt1;
q->last_pkt->next = pkt1;
q->last_pkt = pkt1;
q->size += pkt1->pkt.size;
return 0;
static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
AVPacketList *pkt1;
int ret;
for (;;)
if( quit)
ret = -1;
pkt1 = q->first_pkt;
if (pkt1)
q->first_pkt = pkt1->next;
if (!q->first_pkt)
q->last_pkt = NULL;
q->size -= pkt1->pkt.size;
*pkt = pkt1->pkt;
ret = 1;
else if (!block)
ret = 0;
SDL_CondWait(q->cond, q->mutex);
return ret;
int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size)
static AVPacket pkt;
static uint8_t *audio_pkt_data = NULL;
static int audio_pkt_size = 0;
int len1, data_size;
for (;;)
while (audio_pkt_size > 0)
data_size = buf_size;
len1 = avcodec_decode_audio2(aCodecCtx, (int16_t*)audio_buf, &data_size,
audio_pkt_data, audio_pkt_size);
if (len1 < 0)
/* if error, skip frame */
audio_pkt_size = 0;
audio_pkt_data += len1;
audio_pkt_size -= len1;
if (data_size <= 0)
/* No data yet, get more frames */
/* We have data, return it and come back for more later */
return data_size;
if (
if (quit) return -1;
if (packet_queue_get(&audioq, &pkt, 1) < 0) return -1;
audio_pkt_data =;
audio_pkt_size = pkt.size;
void audio_callback(void *userdata, Uint8 *stream, int len)
AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
int len1, audio_size;
static uint8_t audio_buf[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
static unsigned int audio_buf_size = 0;
static unsigned int audio_buf_index = 0;
while (len > 0)
if (audio_buf_index >= audio_buf_size)
/* We have already sent all our data; get more */
audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));
if(audio_size < 0)
/* If error, output silence */
audio_buf_size = 1024; // arbitrary?
memset(audio_buf, 0, audio_buf_size);
audio_buf_size = audio_size;
audio_buf_index = 0;
len1 = audio_buf_size - audio_buf_index;
if (len1 > len)
len1 = len;
memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);
len -= len1;
stream += len1;
audio_buf_index += len1;
void setup_ffmpeg(char* filename)
if (av_open_input_file(&pFormatCtx, filename, NULL, 0, NULL) != 0)
fprintf(stderr, "FFmpeg failed to open file %s!\n", filename);
if (av_find_stream_info(pFormatCtx) < 0)
fprintf(stderr, "FFmpeg failed to retrieve stream info!\n");
// Dump information about file onto standard error
dump_format(pFormatCtx, 0, filename, 0);
// Find the first video stream
int i = 0;
for (i; i < pFormatCtx->nb_streams; i++)
if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_VIDEO && videoStream < 0)
videoStream = i;
if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_AUDIO && audioStream < 0)
audioStream = i;
if (videoStream == -1)
fprintf(stderr, "No video stream found in %s!\n", filename);
if (audioStream == -1)
fprintf(stderr, "No audio stream found in %s!\n", filename);
// Get a pointer to the codec context for the audio stream
aCodecCtx = pFormatCtx->streams[audioStream]->codec;
// Set audio settings from codec info
SDL_AudioSpec wanted_spec;
wanted_spec.freq = aCodecCtx->sample_rate;
wanted_spec.format = AUDIO_S16SYS;
wanted_spec.channels = aCodecCtx->channels;
wanted_spec.silence = 0;
wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
wanted_spec.callback = audio_callback;
wanted_spec.userdata = aCodecCtx;
SDL_AudioSpec spec;
if (SDL_OpenAudio(&wanted_spec, &spec) < 0)
fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
AVCodec* aCodec = avcodec_find_decoder(aCodecCtx->codec_id);
if (!aCodec)
fprintf(stderr, "Unsupported codec!\n");
avcodec_open(aCodecCtx, aCodec);
// audio_st = pFormatCtx->streams[index]
// Get a pointer to the codec context for the video stream
pCodecCtx = pFormatCtx->streams[videoStream]->codec;
// Find the decoder for the video stream
AVCodec* pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL)
fprintf(stderr, "Unsupported codec!\n");
exit(-1); // Codec not found
// Open codec
if (avcodec_open(pCodecCtx, pCodec) < 0)
fprintf(stderr, "Unsupported codec!\n");
exit(-1); // Could not open codec
int main(int argc, char* argv[])
if (argc < 2)
std::cout << "Usage: " << argv[0] << " <video>" << std::endl;
return -1;
// Init SDL
fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
return -1;
// Init ffmpeg and setup some SDL stuff related to Audio
VideoCapture cap(argv[1]); // open the default camera
if (!cap.isOpened()) // check if we succeeded
std::cout << "Failed to load file!" << std::endl;
return -1;
AVPacket packet;
while (av_read_frame(pFormatCtx, &packet) >= 0)
if (packet.stream_index == videoStream)
// Actually this is were SYNC between audio/video would happen.
// Right now I assume that every VIDEO packet contains an entire video frame, and that's not true. A video frame can be made by multiple packets!
// But for the time being, assume 1 video frame == 1 video packet,
// so instead of reading the frame through ffmpeg, I read it through OpenCV.
Mat frame;
cap >> frame; // get a new frame from camera
// do some processing on the frame, either as a Mat or as IplImage.
// For educational purposes, applying a lame grayscale conversion
IplImage ipl_frame = frame;
for (int i = 0; i < ipl_frame.width * ipl_frame.height * ipl_frame.nChannels; i += ipl_frame.nChannels)
ipl_frame.imageData[i] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //B
ipl_frame.imageData[i+1] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //G
ipl_frame.imageData[i+2] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //R
// Display it on SDL window
else if (packet.stream_index == audioStream)
packet_queue_put(&audioq, &packet);
SDL_Event event;
switch (event.type)
case SDL_QUIT:
// the camera will be deinitialized automatically in VideoCapture destructor
// Close the codec
// Close the video file
return 0;
On my Mac I compiled it with:
g++ ffmpeg_snd.cpp -o ffmpeg_snd -D_GNU_SOURCE=1 -D_THREAD_SAFE -I/usr/local/include/opencv -I/usr/local/include -I/usr/local/include/SDL -Wl,-framework,Cocoa -L/usr/local/lib -lopencv_core -lopencv_imgproc -lopencv_highgui -lopencv_ml -lopencv_video -lopencv_features2d -lopencv_calib3d -lopencv_objdetect -lopencv_contrib -lopencv_legacy -lopencv_flann -lSDLmain -lSDL -L/usr/local/lib -lavfilter -lavcodec -lavformat -I/usr/local/Cellar/ffmpeg/HEAD/include/libavcodec -I/usr/local/Cellar/ffmpeg/HEAD/include/libavformat