What to do when last_pts > current_pts using ffmpeg libs (C++) - c++

Im having some hard time figuring out where to find about this..
Im building a simple recorder to learn about this video compression universe and Im facing some weird behaviors..
Before all I need to explain the scenario...
Its very simple... everytime I call av_read_frame( input_context, input_packet ) I save the pts into the last_pts variable...
So...
Whats bothering me is the fact that about 10% of my calls to av_read_frame I get
input_packet.pts > last_pts
Resulting in a error message from the encoder when I try to do it...
Having it in mind I decided to just drop those frames when it happens....
I think it is not a good idea to just drop frames because if I get them, its needed somehow...
So... what to do when last_pts > current_pts ?
I will paste my test code that Im using capturing the video from webcam and saving to mp4 file with libx264 encoder
#include <QCoreApplication>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavutil/avutil.h>
#include <libavdevice/avdevice.h>
}
#include <QTime>
#include <QThread>
#include <QDebug>
#define SM_DEBUG
static const double max_fps = 30;
static const double min_loop_duration = 1000 / max_fps;
static const double max_duration = 5; // in seconds
static void sleep_if_needed(const int &elapsed) {
int sleep_duration = min_loop_duration - elapsed;
if (sleep_duration > 0) {
QThread::msleep(sleep_duration);
}
}
#ifdef SM_DEBUG
static void log_packet(const AVPacket *pkt,
const AVRational &time_base,
int is_input=0)
{
qDebug() << ((is_input) ? QString(">>") : QString("<<")) << "Size:" << QString::number(pkt->size) <<
"pts:" << QString::number(pkt->pts) <<
"pts_time:" << QString::number(av_q2d(time_base) * pkt->pts) <<
"dts:" << QString::number(pkt->dts) <<
"dts_time:" << QString::number(av_q2d(time_base) * pkt->dts);
}
#endif
int main()
{
int input_w, input_h, output_w = 640, output_h = 480;
av_register_all();
avdevice_register_all();
avcodec_register_all();
#ifdef SM_DEBUG
av_log_set_level(AV_LOG_DEBUG);
#else
av_log_set_level(AV_LOG_ERROR);
#endif
AVFormatContext *ic;
AVFormatContext *oc;
AVInputFormat *ifmt;
AVDictionary *opts = 0;
AVCodecContext* dec_ctx;
AVCodecContext* enc_ctx;
AVCodec *dec;
AVCodec *enc;
AVStream* ist;
AVStream* ost;
ifmt = av_find_input_format("v4l2");
av_dict_set(&opts, "tune", "zerolatency", AV_DICT_APPEND);
ic = avformat_alloc_context();
ic->flags |= AVFMT_FLAG_NONBLOCK;
avformat_open_input(&ic, "/dev/video0", ifmt, &opts);
avformat_find_stream_info(ic, NULL);
av_dump_format(ic, 0, ic->filename, 0);
AVFrame *frame;
AVFrame *tmp_frame;
ist = ic->streams[0];
dec_ctx = ist->codec;
input_w = dec_ctx->width;
input_h = dec_ctx->height;
dec_ctx->flags |= CODEC_FLAG_LOW_DELAY;
dec = avcodec_find_decoder(dec_ctx->codec_id);
av_format_set_video_codec(ic, dec);
avcodec_open2(dec_ctx, dec, NULL);
// output
avformat_alloc_output_context2(&oc, NULL, "MP4", "/home/poste9/grava.mp4");
enc = avcodec_find_encoder(AV_CODEC_ID_H264);
ost = avformat_new_stream(oc, enc);
enc_ctx = ost->codec;
enc_ctx->codec_id = AV_CODEC_ID_H264;
enc_ctx->width = output_w;
enc_ctx->height = output_h;
ost->time_base.num = ist->time_base.num;
ost->time_base.den = ist->time_base.den;
enc_ctx->time_base = ost->time_base;
enc_ctx->gop_size = 250;
enc_ctx->keyint_min = 25;
enc_ctx->qmax = 51;
enc_ctx->qmin = 30;
enc_ctx->pix_fmt = AV_PIX_FMT_YUV422P;
enc_ctx->max_b_frames = 6;
enc_ctx->flags |= CODEC_FLAG_GLOBAL_HEADER;
enc_ctx->flags |= CODEC_FLAG_LOW_DELAY;
avcodec_open2(enc_ctx, enc, NULL);
avio_open2(&oc->pb, oc->filename, AVIO_FLAG_WRITE,
&oc->interrupt_callback, NULL);
av_dump_format(oc, 0, oc->filename, 1);
avformat_write_header(oc, NULL);
struct SwsContext *sws_ctx;
sws_ctx = sws_getContext(input_w, input_h,
dec_ctx->pix_fmt,
output_w, output_h, enc_ctx->pix_fmt,
SWS_BICUBIC, NULL, NULL, NULL);
frame = av_frame_alloc();
tmp_frame = av_frame_alloc();
frame->format = enc_ctx->pix_fmt;
frame->width = output_w;
frame->height = output_h;
frame->pts = AV_NOPTS_VALUE;
av_frame_get_buffer(frame, 32);
av_frame_make_writable(frame);
int got_picture=0;
int got_packet=0;
double recording_duration = 0;
QTime timer;
AVPacket pkt_out;
av_init_packet(&pkt_out);
timer.start();
bool started_recording = false;
int64_t start_time = 0;
int64_t last_pts = INT64_MIN;
while(1) {
timer.restart();
AVPacket pkt_in;
av_read_frame(ic, &pkt_in);
if (pkt_in.size == 0) {
sleep_if_needed(timer.elapsed());
continue;
}
avcodec_decode_video2(dec_ctx, tmp_frame, &got_picture, &pkt_in);
#ifdef SM_DEBUG
log_packet(&pkt_in, ist->time_base, 1);
#endif
if (!started_recording) {
start_time = pkt_in.dts;
started_recording = true;
}
if (pkt_in.pts < last_pts) {
sleep_if_needed(timer.elapsed());
continue;
}
last_pts = pkt_in.pts;
frame->pts = (pkt_in.dts - start_time);
if (!got_picture) {
av_free_packet(&pkt_in);
sleep_if_needed(timer.elapsed());
continue;
} else {
sws_scale(sws_ctx, tmp_frame->data, tmp_frame->linesize,
0, input_h, frame->data, frame->linesize);
av_free_packet(&pkt_in);
}
av_init_packet(&pkt_out);
avcodec_encode_video2(enc_ctx, &pkt_out, frame, &got_packet);
if (got_packet) {
if (pkt_out.pts < pkt_out.dts) {
pkt_out.dts = pkt_out.pts;
}
pkt_out.stream_index = 0;
recording_duration = pkt_out.pts * av_q2d(ost->time_base);
#ifdef SM_DEBUG
log_packet(&pkt_out, ost->time_base, 0);
#endif
av_interleaved_write_frame(oc, &pkt_out);
av_free_packet(&pkt_out);
}
if (recording_duration >= max_duration) {
break;
} else {
sleep_if_needed(timer.elapsed());
}
}
av_write_trailer(oc);
av_dict_free(&opts);
av_frame_free(&frame);
av_frame_free(&tmp_frame);
sws_freeContext(sws_ctx);
avcodec_close(dec_ctx);
avcodec_close(enc_ctx);
avio_close(oc->pb);
avformat_free_context(oc);
avformat_close_input(&ic);
return 0;
}

These frames are B frames. B frames are saved to the stream in decoding order, not presentation order. If you look at the DTS it will probablly look ok. it is the job of the decoder to reorder frames into presentation order after they are decoded.
EDIT. to fix your code, use the PTS from the decoded frame, not the packet.

Related

FFmpeg: Parallel encoding with custom thread pool

One of the things I'm trying to achieve is parallel encoding via FFmpeg's c API. This looks to work out of the box quite nicely; however, I've changed the goal posts slightly:
In an existing application, I already have a thread pool at hand. Instead of using another thread pool via FFmpeg, I would like reuse the existing thread pool in my application. Having studied the latest FFmpeg trunk docs, it very much looks possible.
Using some FFmpeg sample code, I've created a sample application to demonstrate what I'm trying to achieve (see below). The sample app generates a video-only mpeg2 ts using the mp2v codec.
The problem I'm experiencing is that the custom 'thread_execute' or 'thread_execute2' are never invoked. This is despite the fact that the codec appears to indicate that threading is supported. Please be aware that I have not yet plumbed in the thread pool just yet. My first goal is for it to call the custom function pointer.
I've tried to get assistance on the FFmpeg mailing lists but to no avail.
#include <iostream>
#include <thread>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <cstring>
#include <future>
extern "C"
{
#include <libavutil/avassert.h>
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
//#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
}
#define STREAM_DURATION 1000.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
#define SCALE_FLAGS SWS_BICUBIC
// a wrapper around a single output AVStream
typedef struct OutputStream {
AVStream *st;
AVCodecContext *enc;
/* pts of the next frame that will be generated */
int64_t next_pts;
int samples_count;
AVFrame *frame;
AVFrame *tmp_frame;
float t, tincr, tincr2;
struct SwsContext *sws_ctx;
struct SwrContext *swr_ctx;
} OutputStream;
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_ts2str(char *buf, int64_t ts)
{
std::memset(buf,0,AV_TS_MAX_STRING_SIZE);
return av_ts_make_string(buf,ts);
}
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_ts2timestr(char *buf, int64_t ts, AVRational *tb)
{
std::memset(buf,0,sizeof(AV_TS_MAX_STRING_SIZE));
return av_ts_make_time_string(buf,ts,tb);
}
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_err2str(char *errbuf, size_t errbuf_size, int errnum)
{
std::memset(errbuf,0,errbuf_size);
return av_make_error_string(errbuf,errbuf_size,errnum);
}
int thread_execute(AVCodecContext* s, int (*func)(AVCodecContext *c2, void *arg2), void* arg, int* ret, int count, int size)
{
// Do it all serially for now
std::cout << "thread_execute" << std::endl;
for (int k = 0; k < count; ++k)
{
ret[k] = func(s, arg);
}
return 0;
}
int thread_execute2(AVCodecContext* s, int (*func)(AVCodecContext* c2, void* arg2, int, int), void* arg, int* ret, int count)
{
// Do it all serially for now
std::cout << "thread_execute2" << std::endl;
for (int k = 0; k < count; ++k)
{
ret[k] = func(s, arg, k, count);
}
return 0;
}
static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{
char s[AV_TS_MAX_STRING_SIZE];
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
cb_av_ts2str(s,pkt->pts), cb_av_ts2timestr(s,pkt->pts, time_base),
cb_av_ts2str(s,pkt->dts), cb_av_ts2timestr(s,pkt->dts, time_base),
cb_av_ts2str(s,pkt->duration), cb_av_ts2timestr(s,pkt->duration, time_base),
pkt->stream_index);
}
static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, *time_base, st->time_base);
pkt->stream_index = st->index;
/* Write the compressed frame to the media file. */
log_packet(fmt_ctx, pkt);
return av_interleaved_write_frame(fmt_ctx, pkt);
}
/* Add an output stream. */
static void add_stream(OutputStream *ost, AVFormatContext *oc,
AVCodec **codec,
enum AVCodecID codec_id)
{
AVCodecContext *c;
int i;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
ost->st = avformat_new_stream(oc, NULL);
if (!ost->st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
ost->st->id = oc->nb_streams-1;
c = avcodec_alloc_context3(*codec);
if (!c) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
ost->enc = c;
switch ((*codec)->type)
{
case AVMEDIA_TYPE_AUDIO:
c->sample_fmt = (*codec)->sample_fmts ?
(*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
c->bit_rate = 64000;
c->sample_rate = 44100;
if ((*codec)->supported_samplerates) {
c->sample_rate = (*codec)->supported_samplerates[0];
for (i = 0; (*codec)->supported_samplerates[i]; i++) {
if ((*codec)->supported_samplerates[i] == 44100)
c->sample_rate = 44100;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
c->channel_layout = AV_CH_LAYOUT_STEREO;
if ((*codec)->channel_layouts) {
c->channel_layout = (*codec)->channel_layouts[0];
for (i = 0; (*codec)->channel_layouts[i]; i++) {
if ((*codec)->channel_layouts[i] == AV_CH_LAYOUT_STEREO)
c->channel_layout = AV_CH_LAYOUT_STEREO;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
ost->st->time_base = (AVRational){ 1, c->sample_rate };
break;
case AVMEDIA_TYPE_VIDEO:
c->codec_id = codec_id;
c->bit_rate = 400000;
/* Resolution must be a multiple of two. */
c->width = 352;
c->height = 288;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };
c->time_base = ost->st->time_base;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
/* just for testing, we also add B-frames */
c->max_b_frames = 2;
}
if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c->mb_decision = 2;
}
break;
default:
break;
}
if (c->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS ||
c->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS)
{
if (c->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS)
{
c->thread_type = FF_THREAD_FRAME;
}
if (c->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS)
{
c->thread_type = FF_THREAD_SLICE;
}
c->execute = &thread_execute;
c->execute2 = &thread_execute2;
c->thread_count = 4;
// NOTE: Testing opaque.
c->opaque = (void*)0xff;
}
/* Some formats want stream headers to be separate. */
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
/**************************************************************/
/* video output */
static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *picture;
int ret;
picture = av_frame_alloc();
if (!picture)
return NULL;
picture->format = pix_fmt;
picture->width = width;
picture->height = height;
/* allocate the buffers for the frame data */
ret = av_frame_get_buffer(picture, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate frame data.\n");
exit(1);
}
return picture;
}
static void open_video(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{
int ret;
AVCodecContext *c = ost->enc;
//AVDictionary *opt = NULL;
//av_dict_copy(&opt, opt_arg, 0);
/* open the codec */
ret = avcodec_open2(c, codec, NULL);
//av_dict_free(&opt);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Could not open video codec: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
/* allocate and init a re-usable frame */
ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
if (!ost->frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
/* If the output format is not YUV420P, then a temporary YUV420P
* picture is needed too. It is then converted to the required
* output format. */
ost->tmp_frame = NULL;
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
if (!ost->tmp_frame) {
fprintf(stderr, "Could not allocate temporary picture\n");
exit(1);
}
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(ost->st->codecpar, c);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVFrame *pict, int frame_index,
int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static AVFrame *get_video_frame(OutputStream *ost)
{
AVCodecContext *c = ost->enc;
/* check if we want to generate more frames */
if (av_compare_ts(ost->next_pts, c->time_base,
STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)
return NULL;
/* when we pass a frame to the encoder, it may keep a reference to it
* internally; make sure we do not overwrite it here */
if (av_frame_make_writable(ost->frame) < 0)
exit(1);
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
/* as we only generate a YUV420P picture, we must convert it
* to the codec pixel format if needed */
/*if (!ost->sws_ctx) {
ost->sws_ctx = sws_getContext(c->width, c->height,
AV_PIX_FMT_YUV420P,
c->width, c->height,
c->pix_fmt,
SCALE_FLAGS, NULL, NULL, NULL);
if (!ost->sws_ctx) {
fprintf(stderr,
"Could not initialize the conversion context\n");
exit(1);
}
}
fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
sws_scale(ost->sws_ctx,
(const uint8_t * const *)ost->tmp_frame->data, ost->tmp_frame->linesize,
0, c->height, ost->frame->data, ost->frame->linesize);*/
} else {
fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
}
ost->frame->pts = ost->next_pts++;
return ost->frame;
}
/*
* encode one video frame and send it to the muxer
* return 1 when encoding is finished, 0 otherwise
*/
static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
{
int ret;
AVCodecContext *c;
AVFrame *frame;
int got_packet = 0;
AVPacket pkt = { 0 };
c = ost->enc;
frame = get_video_frame(ost);
if (frame)
{
ret = avcodec_send_frame(ost->enc, frame);
if (ret < 0)
{
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error encoding video frame: %s\n", cb_av_err2str(s, AV_ERROR_MAX_STRING_SIZE, ret));
exit(1);
}
}
av_init_packet(&pkt);
ret = avcodec_receive_packet(ost->enc,&pkt);
if (ret < 0)
{
if (ret == AVERROR(EAGAIN)) { ret = 0; }
else
{
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error receiving packet: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
}
else
{
got_packet = 1;
ret = write_frame(oc, &c->time_base, ost->st, &pkt);
}
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error while writing video frame: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
return (frame || got_packet) ? 0 : 1;
}
static void close_stream(AVFormatContext *oc, OutputStream *ost)
{
avcodec_free_context(&ost->enc);
av_frame_free(&ost->frame);
av_frame_free(&ost->tmp_frame);
//sws_freeContext(ost->sws_ctx);
//swr_free(&ost->swr_ctx);
}
/**************************************************************/
/* media file output */
int main(int argc, char **argv)
{
OutputStream video_st = { 0 }, audio_st = { 0 };
const char *filename;
AVOutputFormat *fmt;
AVFormatContext *oc;
AVCodec /**audio_codec,*/ *video_codec;
int ret;
int have_video = 0, have_audio = 0;
int encode_video = 0, encode_audio = 0;
AVDictionary *opt = NULL;
int i;
/* Initialize libavcodec, and register all codecs and formats. */
av_register_all();
avformat_network_init();
if (argc < 2) {
printf("usage: %s output_file\n"
"API example program to output a media file with libavformat.\n"
"This program generates a synthetic audio and video stream, encodes and\n"
"muxes them into a file named output_file.\n"
"The output format is automatically guessed according to the file extension.\n"
"Raw images can also be output by using '%%d' in the filename.\n"
"\n", argv[0]);
return 1;
}
filename = argv[1];
for (i = 2; i+1 < argc; i+=2) {
if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags"))
av_dict_set(&opt, argv[i]+1, argv[i+1], 0);
}
const char *pfilename = filename;
/* allocate the output media context */
avformat_alloc_output_context2(&oc, NULL, "mpegts", pfilename);
if (!oc) {
printf("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2(&oc, NULL, "mpeg", pfilename);
}
if (!oc)
return 1;
fmt = oc->oformat;
/* Add the audio and video streams using the default format codecs
* and initialize the codecs. */
if (fmt->video_codec != AV_CODEC_ID_NONE) {
add_stream(&video_st, oc, &video_codec, fmt->video_codec);
have_video = 1;
encode_video = 1;
}
/*if (fmt->audio_codec != AV_CODEC_ID_NONE) {
add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);
have_audio = 1;
encode_audio = 1;
}*/
/* Now that all the parameters are set, we can open the audio and
* video codecs and allocate the necessary encode buffers. */
if (have_video)
open_video(oc, video_codec, &video_st, opt);
//if (have_audio)
// open_audio(oc, audio_codec, &audio_st, opt);
av_dump_format(oc, 0, pfilename, 1);
/* open the output file, if needed */
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oc->pb, pfilename, AVIO_FLAG_WRITE);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Could not open '%s': %s\n", pfilename,
cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
return 1;
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(oc, &opt);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error occurred when opening output file: %s\n",
cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
return 1;
}
while (encode_video || encode_audio) {
/* select the stream to encode */
if (encode_video &&
(!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,
audio_st.next_pts, audio_st.enc->time_base) <= 0)) {
encode_video = !write_video_frame(oc, &video_st);
} else {
//encode_audio = !write_audio_frame(oc, &audio_st);
}
//std::this_thread::sleep_for(std::chrono::milliseconds(35));
}
/* Write the trailer, if any. The trailer must be written before you
* close the CodecContexts open when you wrote the header; otherwise
* av_write_trailer() may try to use memory that was freed on
* av_codec_close(). */
av_write_trailer(oc);
/* Close each codec. */
if (have_video)
close_stream(oc, &video_st);
if (have_audio)
close_stream(oc, &audio_st);
if (!(fmt->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_closep(&oc->pb);
/* free the stream */
avformat_free_context(oc);
return 0;
}
//
Environment:
Ubuntu Zesty (17.04)
FFmpeg version 3.2.4 (via package manager)
gcc 6.3 (C++)
You have to do following:
call avcodec_alloc_context3(...). This call will set default execute and execute2 functions in new context
set c->thread_count = number_of_threads_in_your_thread_pool()
call avcodec_open2(...).
set c->execute and c->execute2 to point to your functions
call ff_thread_free(c). This function isnt exposed in libavcodec headers but you can add following line:
extern "C" void ff_thread_free(AVCodecContext *s);
Drawback is that libavcodec will create internal thread pool after avcodec_open2(...) call, and that pool will be deleted in ff_thread_free() call.
Internal thread pool is very efficient, but its not good if you plan to do parallel encoding of multiple video feeds. In that case libavcodec will create separate thread pool for each encoding video feed.

Encoding AAC with ffmpeg (c++)

I'm working on video encoding that will be used in a Unity plugin. I have made image encoding work, but now I'm at the audio. So trying only with the audio in to a mp4 file with AAC encoding. And I'm stuck. The resulting file does not contain anything. Also, from what I understand, AAC in ffmpeg only supports AV_SAMPLE_FMT_FLTP, that's why I use it. Here's my code:
Setup:
int initialize_encoding_audio(const char *filename)
{
int ret;
AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
avcodec_register_all();
av_register_all();
aud_codec = avcodec_find_encoder(aud_codec_id);
avcodec_register(aud_codec);
if (!aud_codec)
return COULD_NOT_FIND_AUD_CODEC;
aud_codec_context = avcodec_alloc_context3(aud_codec);
if (!aud_codec_context)
return CONTEXT_CREATION_ERROR;
aud_codec_context->bit_rate = 192000;
aud_codec_context->sample_rate = select_sample_rate(aud_codec);
aud_codec_context->sample_fmt = sample_fmt;
aud_codec_context->channel_layout = AV_CH_LAYOUT_STEREO;
aud_codec_context->channels = av_get_channel_layout_nb_channels(aud_codec_context->channel_layout);
aud_codec_context->codec = aud_codec;
aud_codec_context->codec_id = aud_codec_id;
ret = avcodec_open2(aud_codec_context, aud_codec, NULL);
if (ret < 0)
return COULD_NOT_OPEN_AUD_CODEC;
outctx = avformat_alloc_context();
ret = avformat_alloc_output_context2(&outctx, NULL, "mp4", filename);
outctx->audio_codec = aud_codec;
outctx->audio_codec_id = aud_codec_id;
audio_st = avformat_new_stream(outctx, aud_codec);
audio_st->codecpar->bit_rate = aud_codec_context->bit_rate;
audio_st->codecpar->sample_rate = aud_codec_context->sample_rate;
audio_st->codecpar->channels = aud_codec_context->channels;
audio_st->codecpar->channel_layout = aud_codec_context->channel_layout;
audio_st->codecpar->codec_id = aud_codec_id;
audio_st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
audio_st->codecpar->format = sample_fmt;
audio_st->codecpar->frame_size = aud_codec_context->frame_size;
audio_st->codecpar->block_align = aud_codec_context->block_align;
audio_st->codecpar->initial_padding = aud_codec_context->initial_padding;
outctx->streams = new AVStream*[1];
outctx->streams[0] = audio_st;
av_dump_format(outctx, 0, filename, 1);
if (!(outctx->oformat->flags & AVFMT_NOFILE))
{
if (avio_open(&outctx->pb, filename, AVIO_FLAG_WRITE) < 0)
return COULD_NOT_OPEN_FILE;
}
ret = avformat_write_header(outctx, NULL);
aud_frame = av_frame_alloc();
aud_frame->nb_samples = aud_codec_context->frame_size;
aud_frame->format = aud_codec_context->sample_fmt;
aud_frame->channel_layout = aud_codec_context->channel_layout;
int buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
aud_codec_context->sample_fmt, 0);
av_frame_get_buffer(aud_frame, buffer_size / aud_codec_context->channels);
if (!aud_frame)
return COULD_NOT_ALLOCATE_FRAME;
aud_frame_counter = 0;
return 0;
}
Encoding:
int encode_audio_samples(uint8_t **aud_samples)
{
int ret;
int buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
aud_codec_context->sample_fmt, 0);
for (size_t i = 0; i < buffer_size / aud_codec_context->channels; i++)
{
aud_frame->data[0][i] = aud_samples[0][i];
aud_frame->data[1][i] = aud_samples[1][i];
}
aud_frame->pts = aud_frame_counter++;
ret = avcodec_send_frame(aud_codec_context, aud_frame);
if (ret < 0)
return ERROR_ENCODING_SAMPLES_SEND;
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
fflush(stdout);
while (true)
{
ret = avcodec_receive_packet(aud_codec_context, &pkt);
if (!ret)
{
av_packet_rescale_ts(&pkt, aud_codec_context->time_base, audio_st->time_base);
pkt.stream_index = audio_st->index;
av_write_frame(outctx, &pkt);
av_packet_unref(&pkt);
}
if (ret == AVERROR(EAGAIN))
break;
else if (ret < 0)
return ERROR_ENCODING_SAMPLES_RECEIVE;
else
break;
}
return 0;
}
Finish encoding:
int finish_audio_encoding()
{
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
fflush(stdout);
int ret = avcodec_send_frame(aud_codec_context, NULL);
if (ret < 0)
return ERROR_ENCODING_FRAME_SEND;
while (true)
{
ret = avcodec_receive_packet(aud_codec_context, &pkt);
if (!ret)
{
if (pkt.pts != AV_NOPTS_VALUE)
pkt.pts = av_rescale_q(pkt.pts, aud_codec_context->time_base, audio_st->time_base);
if (pkt.dts != AV_NOPTS_VALUE)
pkt.dts = av_rescale_q(pkt.dts, aud_codec_context->time_base, audio_st->time_base);
av_write_frame(outctx, &pkt);
av_packet_unref(&pkt);
}
if (ret == -AVERROR(AVERROR_EOF))
break;
else if (ret < 0)
return ERROR_ENCODING_FRAME_RECEIVE;
}
av_write_trailer(outctx);
}
Main:
void get_audio_frame(float_t *left_samples, float_t *right_samples, int frame_size, float* t, float* tincr, float* tincr2)
{
int j, i;
float v;
for (j = 0; j < frame_size; j++)
{
v = sin(*t);
*left_samples = v;
*right_samples = v;
left_samples++;
right_samples++;
*t += *tincr;
*tincr += *tincr2;
}
}
int main()
{
int frame_rate = 30; // this should be like 96000 / 1024 or somthing i guess?
float t, tincr, tincr2;
initialize_encoding_audio("audio.mp4");
int sec = 50;
float_t** aud_samples;
int src_samples_linesize;
int src_nb_samples = 1024;
int src_channels = 2;
int ret = av_samples_alloc_array_and_samples((uint8_t***)&aud_samples, &src_samples_linesize, src_channels,
src_nb_samples, AV_SAMPLE_FMT_FLTP, 0);
t = 0;
tincr = 0;
tincr2 = 0;
for (size_t i = 0; i < frame_rate * sec; i++)
{
get_audio_frame(aud_samples[0], aud_samples[1], src_nb_samples, &t, &tincr, &tincr2);
encode_audio_samples((uint8_t **)aud_samples);
}
finish_audio_encoding();
//cleanup();
return 0;
}
I guess the first thing that I would want to make sure I got right is the synthetic sound generation and how I transfer that to the AVFrame. Are my conversions correct? But feel free to point out anything that might be wrong.
Thanks in advance!
Edit: the whole source: http://pastebin.com/jYtmkhek
Edit2: Added initialization of tincr & tincr2
Unless I'm missing something from the pastebin, you forgot to initialize a few variables. You're using garbage to generate your samples.
float t, tincr, tincr2;
[...]
get_audio_frame(aud_samples[0], aud_samples[1], src_nb_samples, &t, &tincr, &tincr2);
You probably want to start with t=0 and increment by 2 * PI * frequency / sample rate for a sine wave.
Also, avformat_new_stream() creates the stream for you, don't do it with new.
Update:
I removed all the c++ stuff to test this. Here's the code that works: pastebin
And here's the resulting file: audio.mp4
ffmpeg -i audio.mp4 -filter_complex "showwaves=s=640x120:mode=line:colors=white" -frames:v 1 wave.jpg
Diff:
1,6d0
< #include "encoder.h"
< #include <algorithm>
< #include <iterator>
<
< extern "C"
< {
14a9
> #include <math.h>
40,41c35,36
< SwsContext *sws_ctx;
< SwrContext *swr_ctx = NULL;
---
> struct SwsContext *sws_ctx;
> struct SwrContext *swr_ctx = NULL;
76,77c71,72
< AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
< AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
---
> enum AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
> enum AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
125,126c120,121
< outctx->streams = new AVStream*[1];
< outctx->streams[0] = audio_st;
---
> //outctx->streams = new AVStream*[1];
> //outctx->streams[0] = audio_st;
182c177
< while (true)
---
> while (1)
216c211
< while (true)
---
> while (1)
291c286
< float t, tincr, tincr2;
---
> float t = 0, tincr = 2 * M_PI * 440.0 / 96000, tincr2 = 0;
317d311
< }

FFmpeg C++ api decode h264 error

I'm trying to use the C++ API of FFMpeg (version 20150526) under Windows using the prebuilt binaries to decode an h264 video file (*.ts).
I've written a very simple code that automatically detects the required codec from the file itself (and it is AV_CODEC_ID_H264, as expected).
Then I re-open the video file in read-binary mode and I read a fixed-size buffer of bytes from it and provide the read bytes to the decoder within a while-loop until the end of file. However when I call the function avcodec_decode_video2 a large amount of errors happen like the following ones:
[h264 # 008df020] top block unavailable for requested intro mode at 34 0
[h264 # 008df020] error while decoding MB 34 0, bytestream 3152
[h264 # 008df020] decode_slice_header error
Sometimes the function avcodec_decode_video2 sets the value of got_picture_ptr to 1 and hence I expect to find a good frame. Instead, though all the computations are successful, when I view the decoded frame (using OpenCV only for visualization purposes) I see a gray one with some artifacts.
If I employ the same code to decode an *.avi file it works fine.
Reading the examples of FFMpeg I did not find a solution to my problem. I've also implemented the solution proposed in the simlar question FFmpeg c++ H264 decoding error but it did not work.
Does anyone know where the error is?
Thank you in advance for any reply!
The code is the following [EDIT: code updated including the parser management]:
#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>
#include <opencv2/opencv.hpp>
#ifdef __cplusplus
extern "C"
{
#endif // __cplusplus
#include <libavcodec/avcodec.h>
#include <libavdevice/avdevice.h>
#include <libavfilter/avfilter.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavutil/avutil.h>
#include <libpostproc/postprocess.h>
#include <libswresample/swresample.h>
#include <libswscale/swscale.h>
#ifdef __cplusplus
} // end extern "C".
#endif // __cplusplus
#define INBUF_SIZE 4096
void main()
{
AVCodec* l_pCodec;
AVCodecContext* l_pAVCodecContext;
SwsContext* l_pSWSContext;
AVFormatContext* l_pAVFormatContext;
AVFrame* l_pAVFrame;
AVFrame* l_pAVFrameBGR;
AVPacket l_AVPacket;
AVPacket l_AVPacket_out;
AVStream* l_pStream;
AVCodecParserContext* l_pParser;
FILE* l_pFile_in;
FILE* l_pFile_out;
std::string l_sFile;
int l_iResult;
int l_iFrameCount;
int l_iGotFrame;
int l_iBufLength;
int l_iParsedBytes;
int l_iPts;
int l_iDts;
int l_iPos;
int l_iSize;
int l_iDecodedBytes;
uint8_t l_auiInBuf[INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
uint8_t* l_pData;
cv::Mat l_cvmImage;
l_pCodec = NULL;
l_pAVCodecContext = NULL;
l_pSWSContext = NULL;
l_pAVFormatContext = NULL;
l_pAVFrame = NULL;
l_pAVFrameBGR = NULL;
l_pParser = NULL;
l_pStream = NULL;
l_pFile_in = NULL;
l_pFile_out = NULL;
l_iPts = 0;
l_iDts = 0;
l_iPos = 0;
l_pData = NULL;
l_sFile = "myvideo.ts";
avdevice_register_all();
avfilter_register_all();
avcodec_register_all();
av_register_all();
avformat_network_init();
l_pAVFormatContext = avformat_alloc_context();
l_iResult = avformat_open_input(&l_pAVFormatContext,
l_sFile.c_str(),
NULL,
NULL);
if (l_iResult >= 0)
{
l_iResult = avformat_find_stream_info(l_pAVFormatContext, NULL);
if (l_iResult >= 0)
{
for (int i=0; i<l_pAVFormatContext->nb_streams; i++)
{
if (l_pAVFormatContext->streams[i]->codec->codec_type ==
AVMEDIA_TYPE_VIDEO)
{
l_pCodec = avcodec_find_decoder(
l_pAVFormatContext->streams[i]->codec->codec_id);
l_pStream = l_pAVFormatContext->streams[i];
}
}
}
}
av_init_packet(&l_AVPacket);
av_init_packet(&l_AVPacket_out);
memset(l_auiInBuf + INBUF_SIZE, 0, FF_INPUT_BUFFER_PADDING_SIZE);
if (l_pCodec)
{
l_pAVCodecContext = avcodec_alloc_context3(l_pCodec);
l_pParser = av_parser_init(l_pAVCodecContext->codec_id);
if (l_pParser)
{
av_register_codec_parser(l_pParser->parser);
}
if (l_pAVCodecContext)
{
if (l_pCodec->capabilities & CODEC_CAP_TRUNCATED)
{
l_pAVCodecContext->flags |= CODEC_FLAG_TRUNCATED;
}
l_iResult = avcodec_open2(l_pAVCodecContext, l_pCodec, NULL);
if (l_iResult >= 0)
{
l_pFile_in = fopen(l_sFile.c_str(), "rb");
if (l_pFile_in)
{
l_pAVFrame = av_frame_alloc();
l_pAVFrameBGR = av_frame_alloc();
if (l_pAVFrame)
{
l_iFrameCount = 0;
avcodec_get_frame_defaults(l_pAVFrame);
while (1)
{
l_iBufLength = fread(l_auiInBuf,
1,
INBUF_SIZE,
l_pFile_in);
if (l_iBufLength == 0)
{
break;
}
else
{
l_pData = l_auiInBuf;
l_iSize = l_iBufLength;
while (l_iSize > 0)
{
if (l_pParser)
{
l_iParsedBytes = av_parser_parse2(
l_pParser,
l_pAVCodecContext,
&l_AVPacket_out.data,
&l_AVPacket_out.size,
l_pData,
l_iSize,
l_AVPacket.pts,
l_AVPacket.dts,
AV_NOPTS_VALUE);
if (l_iParsedBytes <= 0)
{
break;
}
l_AVPacket.pts = l_AVPacket.dts = AV_NOPTS_VALUE;
l_AVPacket.pos = -1;
}
else
{
l_AVPacket_out.data = l_pData;
l_AVPacket_out.size = l_iSize;
}
l_iDecodedBytes =
avcodec_decode_video2(
l_pAVCodecContext,
l_pAVFrame,
&l_iGotFrame,
&l_AVPacket_out);
if (l_iDecodedBytes >= 0)
{
if (l_iGotFrame)
{
l_pSWSContext = sws_getContext(
l_pAVCodecContext->width,
l_pAVCodecContext->height,
l_pAVCodecContext->pix_fmt,
l_pAVCodecContext->width,
l_pAVCodecContext->height,
AV_PIX_FMT_BGR24,
SWS_BICUBIC,
NULL,
NULL,
NULL);
if (l_pSWSContext)
{
l_iResult = avpicture_alloc(
reinterpret_cast<AVPicture*>(l_pAVFrameBGR),
AV_PIX_FMT_BGR24,
l_pAVFrame->width,
l_pAVFrame->height);
l_iResult = sws_scale(
l_pSWSContext,
l_pAVFrame->data,
l_pAVFrame->linesize,
0,
l_pAVCodecContext->height,
l_pAVFrameBGR->data,
l_pAVFrameBGR->linesize);
if (l_iResult > 0)
{
l_cvmImage = cv::Mat(
l_pAVFrame->height,
l_pAVFrame->width,
CV_8UC3,
l_pAVFrameBGR->data[0],
l_pAVFrameBGR->linesize[0]);
if (l_cvmImage.empty() == false)
{
cv::imshow("image", l_cvmImage);
cv::waitKey(10);
}
}
}
l_iFrameCount++;
}
}
else
{
break;
}
l_pData += l_iParsedBytes;
l_iSize -= l_iParsedBytes;
}
}
} // end while(1).
}
fclose(l_pFile_in);
}
}
}
}
}
EDIT: The following is the final code that solves my problem, thanks to the suggestions of Ronald.
#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>
#include <opencv2/opencv.hpp>
#ifdef __cplusplus
extern "C"
{
#endif // __cplusplus
#include <libavcodec/avcodec.h>
#include <libavdevice/avdevice.h>
#include <libavfilter/avfilter.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavutil/avutil.h>
#include <libpostproc/postprocess.h>
#include <libswresample/swresample.h>
#include <libswscale/swscale.h>
#ifdef __cplusplus
} // end extern "C".
#endif // __cplusplus
void main()
{
AVCodec* l_pCodec;
AVCodecContext* l_pAVCodecContext;
SwsContext* l_pSWSContext;
AVFormatContext* l_pAVFormatContext;
AVFrame* l_pAVFrame;
AVFrame* l_pAVFrameBGR;
AVPacket l_AVPacket;
std::string l_sFile;
uint8_t* l_puiBuffer;
int l_iResult;
int l_iFrameCount;
int l_iGotFrame;
int l_iDecodedBytes;
int l_iVideoStreamIdx;
int l_iNumBytes;
cv::Mat l_cvmImage;
l_pCodec = NULL;
l_pAVCodecContext = NULL;
l_pSWSContext = NULL;
l_pAVFormatContext = NULL;
l_pAVFrame = NULL;
l_pAVFrameBGR = NULL;
l_puiBuffer = NULL;
l_sFile = "myvideo.ts";
av_register_all();
l_iResult = avformat_open_input(&l_pAVFormatContext,
l_sFile.c_str(),
NULL,
NULL);
if (l_iResult >= 0)
{
l_iResult = avformat_find_stream_info(l_pAVFormatContext, NULL);
if (l_iResult >= 0)
{
for (int i=0; i<l_pAVFormatContext->nb_streams; i++)
{
if (l_pAVFormatContext->streams[i]->codec->codec_type ==
AVMEDIA_TYPE_VIDEO)
{
l_iVideoStreamIdx = i;
l_pAVCodecContext =
l_pAVFormatContext->streams[l_iVideoStreamIdx]->codec;
if (l_pAVCodecContext)
{
l_pCodec = avcodec_find_decoder(l_pAVCodecContext->codec_id);
}
break;
}
}
}
}
if (l_pCodec && l_pAVCodecContext)
{
l_iResult = avcodec_open2(l_pAVCodecContext, l_pCodec, NULL);
if (l_iResult >= 0)
{
l_pAVFrame = av_frame_alloc();
l_pAVFrameBGR = av_frame_alloc();
l_iNumBytes = avpicture_get_size(PIX_FMT_BGR24,
l_pAVCodecContext->width,
l_pAVCodecContext->height);
l_puiBuffer = (uint8_t *)av_malloc(l_iNumBytes*sizeof(uint8_t));
avpicture_fill((AVPicture *)l_pAVFrameBGR,
l_puiBuffer,
PIX_FMT_RGB24,
l_pAVCodecContext->width,
l_pAVCodecContext->height);
l_pSWSContext = sws_getContext(
l_pAVCodecContext->width,
l_pAVCodecContext->height,
l_pAVCodecContext->pix_fmt,
l_pAVCodecContext->width,
l_pAVCodecContext->height,
AV_PIX_FMT_BGR24,
SWS_BICUBIC,
NULL,
NULL,
NULL);
while (av_read_frame(l_pAVFormatContext, &l_AVPacket) >= 0)
{
if (l_AVPacket.stream_index == l_iVideoStreamIdx)
{
l_iDecodedBytes = avcodec_decode_video2(
l_pAVCodecContext,
l_pAVFrame,
&l_iGotFrame,
&l_AVPacket);
if (l_iGotFrame)
{
if (l_pSWSContext)
{
l_iResult = sws_scale(
l_pSWSContext,
l_pAVFrame->data,
l_pAVFrame->linesize,
0,
l_pAVCodecContext->height,
l_pAVFrameBGR->data,
l_pAVFrameBGR->linesize);
if (l_iResult > 0)
{
l_cvmImage = cv::Mat(
l_pAVFrame->height,
l_pAVFrame->width,
CV_8UC3,
l_pAVFrameBGR->data[0],
l_pAVFrameBGR->linesize[0]);
if (l_cvmImage.empty() == false)
{
cv::imshow("image", l_cvmImage);
cv::waitKey(1);
}
}
}
l_iFrameCount++;
}
}
}
}
}
}
You're never using the l_pParser object, or in other words, you're not using a H264 parser, you're just sending raw file data into the decoder without proper NAL packetization. Please read the frame parsing API docs to figure out how to use the parser.

FPS too high when saving video in mp4 container

When I decode frames from avi file and then decode them in x264 and save to mp4 file, the fps of the output file is always 12,800. Therefore the file is played very fast. But, when I save the encoded in h264 frames in avi format and not mp4, so the fps is as I wanted - 25.
What could be the problem?
Here the code I wrote in VS2010:
#include "stdafx.h"
#include "inttypes.h"
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavutil/avutil.h"
#include <libswscale/swscale.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
#include <libavutil/imgutils.h>
}
#include <iostream>
using namespace std;
int main(int argc, char* argv[])
{
const char* inFileName = "C:\\000227_C1_GAME.avi";
const char* outFileName = "c:\\test.avi";
const char* outFileType = "avi";
av_register_all();
AVFormatContext* inContainer = NULL;
if(avformat_open_input(&inContainer, inFileName, NULL, NULL) < 0)
exit(1);
if(avformat_find_stream_info(inContainer, NULL) < 0)
exit(1);
// Find video stream
int videoStreamIndex = -1;
for (unsigned int i = 0; i < inContainer->nb_streams; ++i)
{
if (inContainer->streams[i] && inContainer->streams[i]->codec &&
inContainer->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoStreamIndex = i;
break;
}
}
if (videoStreamIndex == -1) exit(1);
AVFormatContext* outContainer = NULL;
if(avformat_alloc_output_context2(&outContainer, NULL, outFileType, outFileName) < 0)
exit(1);
// ----------------------------
// Decoder
// ----------------------------
AVStream const *const inStream = inContainer->streams[videoStreamIndex];
AVCodec *const decoder = avcodec_find_decoder(inStream->codec->codec_id);
if(!decoder)
exit(1);
if(avcodec_open2(inStream->codec, decoder, NULL) < 0)
exit(1);
// ----------------------------
// Encoder
// ----------------------------
AVCodec *encoder = avcodec_find_encoder(AV_CODEC_ID_H264);
if(!encoder)
exit(1);
AVStream *outStream = avformat_new_stream(outContainer, encoder);
if(!outStream)
exit(1);
avcodec_get_context_defaults3(outStream->codec, encoder);
// Construct encoder
if(outContainer->oformat->flags & AVFMT_GLOBALHEADER)
outStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
outStream->codec->coder_type = AVMEDIA_TYPE_VIDEO;
outStream->codec->pix_fmt = AV_PIX_FMT_YUV420P;
outStream->codec->width = inStream->codec->width;
outStream->codec->height = inStream->codec->height;
outStream->codec->codec_id = encoder->id;
outStream->codec->bit_rate = 500000;
//outStream->codec->rc_min_rate = 600000;
//outStream->codec->rc_max_rate = 800000;
outStream->codec->time_base.den = 25;
outStream->codec->time_base.num = 1;
outStream->codec->gop_size = 250; // Keyframe interval(=GOP length). Determines maximum distance distance between I-frames
outStream->codec->keyint_min = 25; // minimum GOP size
outStream->codec->max_b_frames = 3;//16; // maximum number of B-frames between non-B-frames
outStream->codec->b_frame_strategy = 1; // decides the best number of B-frames to use. Default mode in x264.
outStream->codec->scenechange_threshold = 40;
outStream->codec->refs = 6; // abillity to reference frames other than the one immediately prior to the current frame. specify how many references can be used.
outStream->codec->qmin = 0;//10;
outStream->codec->qmax = 69;//51;
outStream->codec->qcompress = 0.6;
outStream->codec->max_qdiff = 4;
outStream->codec->i_quant_factor = 1.4;//0.71;
outStream->codec->refs=1;//3;
outStream->codec->chromaoffset = -2;
outStream->codec->thread_count = 1;
outStream->codec->trellis = 1;
outStream->codec->me_range = 16;
outStream->codec->me_method = ME_HEX; //hex
outStream->codec->flags2 |= CODEC_FLAG2_FAST;
outStream->codec->coder_type = 1;
if(outStream->codec->codec_id == AV_CODEC_ID_H264)
{
av_opt_set(outStream->codec->priv_data, "preset", "slow", 0);
}
// Open encoder
if(avcodec_open2(outStream->codec, encoder, NULL) < 0)
exit(1);
// Open output container
if(avio_open(&outContainer->pb, outFileName, AVIO_FLAG_WRITE) < 0)
exit(1);
//close_o
AVFrame *decodedFrame = avcodec_alloc_frame();
if(!decodedFrame)
exit(1);
AVFrame *encodeFrame = avcodec_alloc_frame();
if(!encodeFrame)
exit(1);
encodeFrame->format = outStream->codec->pix_fmt;
encodeFrame->width = outStream->codec->width;
encodeFrame->height = outStream->codec->height;
if(av_image_alloc(encodeFrame->data, encodeFrame->linesize,
outStream->codec->width, outStream->codec->height,
outStream->codec->pix_fmt, 1) < 0)
exit(1);
av_dump_format(inContainer, 0, inFileName,0);
//Write header to ouput container
avformat_write_header(outContainer, NULL);
AVPacket decodePacket, encodedPacket;
int got_frame, len;
while(av_read_frame(inContainer, &decodePacket)>=0)
{
if (decodePacket.stream_index == videoStreamIndex)
{
len = avcodec_decode_video2(inStream->codec, decodedFrame, &got_frame, &decodePacket);
if(len < 0)
exit(1);
if(got_frame)
{
av_init_packet(&encodedPacket);
encodedPacket.data = NULL;
encodedPacket.size = 0;
if(avcodec_encode_video2(outStream->codec, &encodedPacket, decodedFrame, &got_frame) < 0)
exit(1);
if(got_frame)
{
if (outStream->codec->coded_frame->key_frame)
encodedPacket.flags |= AV_PKT_FLAG_KEY;
encodedPacket.stream_index = outStream->index;
if(av_interleaved_write_frame(outContainer, &encodedPacket) < 0)
exit(1);
av_free_packet(&encodedPacket);
}
}
}
av_free_packet(&decodePacket);
}
av_write_trailer(outContainer);
avio_close(outContainer->pb);
avcodec_free_frame(&encodeFrame);
avcodec_free_frame(&decodedFrame);
avformat_free_context(outContainer);
av_close_input_file(inContainer);
return 0;
}
The problem was with PTS and DTS of the packet. Before writing the packet to output( before av_interleaved_write_frame command) set PTS and DTS like this
if (encodedPacket.pts != AV_NOPTS_VALUE)
encodedPacket.pts = av_rescale_q(encodedPacket.pts, outStream->codec->time_base, outStream->time_base);
if (encodedPacket.dts != AV_NOPTS_VALUE)
encodedPacket.dts = av_rescale_q(encodedPacket.dts, outStream->codec->time_base, outStream->time_base);

Audio output with video processing with opencv

I am processing video with opencv, but at the same time I need to play audio and simply control it, like loud or current frame number.
I think I should create a parallel process with ffmpeg, but I don't know how to do so. Can you explain what to do?
Or do you know another solution?
I think ffmpeg should be used to play audio and SDL for video in this case.
After opening the file with OpenCV and processing the frame, you can use OpenCV -> SDL to display it while retrieving the audio frames through ffmpeg and playing them with SDL.
Here is a nice collection of ffmpeg/SDL tutorials!
I also found a nice post that shows how to capture frames from a video file using ffmpeg, store them in OpenCV cv::Mat and display the result in a OpenCV window. But this way you can't play audio since OpenCV doesn't deal with that.
You might be interested in reading this post as well: How to avoid a growing delay with ffmpeg between sound and raw video data ?
EDIT:
I spent the last 4hrs coding a prototype to demonstrate how it's done. This demo reads video frames through OpenCV (so you can process them) and audio through ffmpeg, and SDL is used to play both! There are 2 limitations in this demo you must be aware: 1 - it assumes you are working with an OpenCV image packed as BGR (24bits), and 2 - audio and video are not being sync! Yes, I left have some work for you to do (yeeeey). But don't panic, page 6 has some ideas!
It's important to sync audio and video because you will be doing some processing on the frames, and that will certainly make the video and audio go out of sync real fast since they are being played independently of each other.
The ffmpeg tutorials I suggested above are very very important to understand the code, a lot of code from this demo came from there. They show how to deal with SDL, and how to read packets of audio/video streams.
#include <highgui.h>
#include <cv.h>
extern "C"
{
#include <SDL.h>
#include <SDL_thread.h>
#include <avcodec.h>
#include <avformat.h>
}
#include <iostream>
#include <stdio.h>
//#include <malloc.h>
using namespace cv;
#define SDL_AUDIO_BUFFER_SIZE 1024
typedef struct PacketQueue
{
AVPacketList *first_pkt, *last_pkt;
int nb_packets;
int size;
SDL_mutex *mutex;
SDL_cond *cond;
} PacketQueue;
PacketQueue audioq;
int audioStream = -1;
int videoStream = -1;
int quit = 0;
SDL_Surface* screen = NULL;
SDL_Surface* surface = NULL;
AVFormatContext* pFormatCtx = NULL;
AVCodecContext* aCodecCtx = NULL;
AVCodecContext* pCodecCtx = NULL;
void show_frame(IplImage* img)
{
if (!screen)
{
screen = SDL_SetVideoMode(img->width, img->height, 0, 0);
if (!screen)
{
fprintf(stderr, "SDL: could not set video mode - exiting\n");
exit(1);
}
}
// Assuming IplImage packed as BGR 24bits
SDL_Surface* surface = SDL_CreateRGBSurfaceFrom((void*)img->imageData,
img->width,
img->height,
img->depth * img->nChannels,
img->widthStep,
0xff0000, 0x00ff00, 0x0000ff, 0
);
SDL_BlitSurface(surface, 0, screen, 0);
SDL_Flip(screen);
}
void packet_queue_init(PacketQueue *q)
{
memset(q, 0, sizeof(PacketQueue));
q->mutex = SDL_CreateMutex();
q->cond = SDL_CreateCond();
}
int packet_queue_put(PacketQueue *q, AVPacket *pkt)
{
AVPacketList *pkt1;
if (av_dup_packet(pkt) < 0)
{
return -1;
}
//pkt1 = (AVPacketList*) av_malloc(sizeof(AVPacketList));
pkt1 = (AVPacketList*) malloc(sizeof(AVPacketList));
if (!pkt1) return -1;
pkt1->pkt = *pkt;
pkt1->next = NULL;
SDL_LockMutex(q->mutex);
if (!q->last_pkt)
q->first_pkt = pkt1;
else
q->last_pkt->next = pkt1;
q->last_pkt = pkt1;
q->nb_packets++;
q->size += pkt1->pkt.size;
SDL_CondSignal(q->cond);
SDL_UnlockMutex(q->mutex);
return 0;
}
static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
AVPacketList *pkt1;
int ret;
SDL_LockMutex(q->mutex);
for (;;)
{
if( quit)
{
ret = -1;
break;
}
pkt1 = q->first_pkt;
if (pkt1)
{
q->first_pkt = pkt1->next;
if (!q->first_pkt)
q->last_pkt = NULL;
q->nb_packets--;
q->size -= pkt1->pkt.size;
*pkt = pkt1->pkt;
//av_free(pkt1);
free(pkt1);
ret = 1;
break;
}
else if (!block)
{
ret = 0;
break;
}
else
{
SDL_CondWait(q->cond, q->mutex);
}
}
SDL_UnlockMutex(q->mutex);
return ret;
}
int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size)
{
static AVPacket pkt;
static uint8_t *audio_pkt_data = NULL;
static int audio_pkt_size = 0;
int len1, data_size;
for (;;)
{
while (audio_pkt_size > 0)
{
data_size = buf_size;
len1 = avcodec_decode_audio2(aCodecCtx, (int16_t*)audio_buf, &data_size,
audio_pkt_data, audio_pkt_size);
if (len1 < 0)
{
/* if error, skip frame */
audio_pkt_size = 0;
break;
}
audio_pkt_data += len1;
audio_pkt_size -= len1;
if (data_size <= 0)
{
/* No data yet, get more frames */
continue;
}
/* We have data, return it and come back for more later */
return data_size;
}
if (pkt.data)
av_free_packet(&pkt);
if (quit) return -1;
if (packet_queue_get(&audioq, &pkt, 1) < 0) return -1;
audio_pkt_data = pkt.data;
audio_pkt_size = pkt.size;
}
}
void audio_callback(void *userdata, Uint8 *stream, int len)
{
AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
int len1, audio_size;
static uint8_t audio_buf[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
static unsigned int audio_buf_size = 0;
static unsigned int audio_buf_index = 0;
while (len > 0)
{
if (audio_buf_index >= audio_buf_size)
{
/* We have already sent all our data; get more */
audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));
if(audio_size < 0)
{
/* If error, output silence */
audio_buf_size = 1024; // arbitrary?
memset(audio_buf, 0, audio_buf_size);
}
else
{
audio_buf_size = audio_size;
}
audio_buf_index = 0;
}
len1 = audio_buf_size - audio_buf_index;
if (len1 > len)
len1 = len;
memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);
len -= len1;
stream += len1;
audio_buf_index += len1;
}
}
void setup_ffmpeg(char* filename)
{
if (av_open_input_file(&pFormatCtx, filename, NULL, 0, NULL) != 0)
{
fprintf(stderr, "FFmpeg failed to open file %s!\n", filename);
exit(-1);
}
if (av_find_stream_info(pFormatCtx) < 0)
{
fprintf(stderr, "FFmpeg failed to retrieve stream info!\n");
exit(-1);
}
// Dump information about file onto standard error
dump_format(pFormatCtx, 0, filename, 0);
// Find the first video stream
int i = 0;
for (i; i < pFormatCtx->nb_streams; i++)
{
if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_VIDEO && videoStream < 0)
{
videoStream = i;
}
if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_AUDIO && audioStream < 0)
{
audioStream = i;
}
}
if (videoStream == -1)
{
fprintf(stderr, "No video stream found in %s!\n", filename);
exit(-1);
}
if (audioStream == -1)
{
fprintf(stderr, "No audio stream found in %s!\n", filename);
exit(-1);
}
// Get a pointer to the codec context for the audio stream
aCodecCtx = pFormatCtx->streams[audioStream]->codec;
// Set audio settings from codec info
SDL_AudioSpec wanted_spec;
wanted_spec.freq = aCodecCtx->sample_rate;
wanted_spec.format = AUDIO_S16SYS;
wanted_spec.channels = aCodecCtx->channels;
wanted_spec.silence = 0;
wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
wanted_spec.callback = audio_callback;
wanted_spec.userdata = aCodecCtx;
SDL_AudioSpec spec;
if (SDL_OpenAudio(&wanted_spec, &spec) < 0)
{
fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
exit(-1);
}
AVCodec* aCodec = avcodec_find_decoder(aCodecCtx->codec_id);
if (!aCodec)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1);
}
avcodec_open(aCodecCtx, aCodec);
// audio_st = pFormatCtx->streams[index]
packet_queue_init(&audioq);
SDL_PauseAudio(0);
// Get a pointer to the codec context for the video stream
pCodecCtx = pFormatCtx->streams[videoStream]->codec;
// Find the decoder for the video stream
AVCodec* pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1); // Codec not found
}
// Open codec
if (avcodec_open(pCodecCtx, pCodec) < 0)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1); // Could not open codec
}
}
int main(int argc, char* argv[])
{
if (argc < 2)
{
std::cout << "Usage: " << argv[0] << " <video>" << std::endl;
return -1;
}
av_register_all();
// Init SDL
if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER))
{
fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
return -1;
}
// Init ffmpeg and setup some SDL stuff related to Audio
setup_ffmpeg(argv[1]);
VideoCapture cap(argv[1]); // open the default camera
if (!cap.isOpened()) // check if we succeeded
{
std::cout << "Failed to load file!" << std::endl;
return -1;
}
AVPacket packet;
while (av_read_frame(pFormatCtx, &packet) >= 0)
{
if (packet.stream_index == videoStream)
{
// Actually this is were SYNC between audio/video would happen.
// Right now I assume that every VIDEO packet contains an entire video frame, and that's not true. A video frame can be made by multiple packets!
// But for the time being, assume 1 video frame == 1 video packet,
// so instead of reading the frame through ffmpeg, I read it through OpenCV.
Mat frame;
cap >> frame; // get a new frame from camera
// do some processing on the frame, either as a Mat or as IplImage.
// For educational purposes, applying a lame grayscale conversion
IplImage ipl_frame = frame;
for (int i = 0; i < ipl_frame.width * ipl_frame.height * ipl_frame.nChannels; i += ipl_frame.nChannels)
{
ipl_frame.imageData[i] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //B
ipl_frame.imageData[i+1] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //G
ipl_frame.imageData[i+2] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //R
}
// Display it on SDL window
show_frame(&ipl_frame);
av_free_packet(&packet);
}
else if (packet.stream_index == audioStream)
{
packet_queue_put(&audioq, &packet);
}
else
{
av_free_packet(&packet);
}
SDL_Event event;
SDL_PollEvent(&event);
switch (event.type)
{
case SDL_QUIT:
SDL_FreeSurface(surface);
SDL_Quit();
break;
default:
break;
}
}
// the camera will be deinitialized automatically in VideoCapture destructor
// Close the codec
avcodec_close(pCodecCtx);
// Close the video file
av_close_input_file(pFormatCtx);
return 0;
}
On my Mac I compiled it with:
g++ ffmpeg_snd.cpp -o ffmpeg_snd -D_GNU_SOURCE=1 -D_THREAD_SAFE -I/usr/local/include/opencv -I/usr/local/include -I/usr/local/include/SDL -Wl,-framework,Cocoa -L/usr/local/lib -lopencv_core -lopencv_imgproc -lopencv_highgui -lopencv_ml -lopencv_video -lopencv_features2d -lopencv_calib3d -lopencv_objdetect -lopencv_contrib -lopencv_legacy -lopencv_flann -lSDLmain -lSDL -L/usr/local/lib -lavfilter -lavcodec -lavformat -I/usr/local/Cellar/ffmpeg/HEAD/include/libavcodec -I/usr/local/Cellar/ffmpeg/HEAD/include/libavformat