Trying to cancel execution and delete file using ffmpeg C API - c++

The code below is a class that handle the conversion of multiples images, through add_frame() method, into a GIF with encode(). It also use a filter to generate and apply the palette. The usage is like this:
Code call example
std::unique_ptr<px::GIF::FFMPEG> gif_obj = nullptr;
try
{
gif_obj = std::make_unique<px::GIF::FFMPEG>({1000,1000}, 12, "C:/out.gif",
"format=pix_fmts=rgb24,split [a][b];[a]palettegen[p];[b][p]paletteuse");
// Example: a simple vector of images (usually process internally)
for(auto img : image_vector)
gif_obj->add_frame(img);
// Once all frame were added, encode the final GIF with the filter applied.
gif_obj->encode();
}
catch(const std::exception& e)
{
// An error occured! We must close FFMPEG properly and delete the created file.
gif_obj->cancel();
}
I have the following issue. If the code for any reason throw an exception, I call ffmpeg->cancel() and it supposes to delete the GIF file on disk. But this is never working, I assume there is a lock on the file or something like that. So here are my question:
What is the proper way to close/free ffmpeg object in order to remove the file afterward ?
Full class code below
Header
// C++ Standard includes
#include <memory>
#include <string>
#include <vector>
// 3rd Party incldues
#ifdef __cplusplus
extern "C" {
#include "libavformat/avformat.h"
#include "libavfilter/avfilter.h"
#include "libavutil/opt.h"
#include "libavfilter/buffersrc.h"
#include "libavfilter/buffersink.h"
#include "libswscale/swscale.h"
#include "libavutil/imgutils.h"
}
#endif
#define FFMPEG_MSG_LEN 2000
namespace px
{
namespace GIF
{
class FFMPEG
{
public:
FFMPEG(const px::Point2D<int>& dim,
const int framerate,
const std::string& filename,
const std::string& filter_cmd);
~FFMPEG();
void add_frame(pxImage * const img);
void encode();
void cancel();
private:
void init_filters(); // Init everything that needed to filter the input frame.
void init_muxer(); // The muxer that creates the output file.
void muxing_one_frame(AVFrame* frame);
void release();
int _ret = 0; // status code from FFMPEG.
char _err_msg[FFMPEG_MSG_LEN]; // Error message buffer.
int m_width = 0; // The width that all futur images must have to be accepted.
int m_height = 0; // The height that all futur images must have to be accepted.
int m_framerate = 0; // GIF Framerate.
std::string m_filename = ""; // The GIF filename (on cache?)
std::string m_filter_desc = ""; // The FFMPEG filter to apply over the frames.
bool as_frame = false;
AVFrame* picture_rgb24 = nullptr; // Temporary frame that will hold the pxImage in an RGB24 format (NOTE: TOP-LEFT origin)
AVFormatContext* ofmt_ctx = nullptr; // ouput format context associated to the
AVCodecContext* o_codec_ctx = nullptr; // output codec for the GIF
AVFilterGraph* filter_graph = nullptr; // filter graph associate with the string we want to execute
AVFilterContext* buffersrc_ctx = nullptr; // The buffer that will store all the frames in one place for the palette generation.
AVFilterContext* buffersink_ctx = nullptr; // The buffer that will store the result afterward (once the palette are used).
int64_t m_pts_increment = 0;
};
};
};
ctor
px::GIF::FFMPEG::FFMPEG(const px::Point2D<int>& dim,
const int framerate,
const std::string& filename,
const std::string& filter_cmd) :
m_width(dim.x()),
m_height(dim.y()),
m_framerate(framerate),
m_filename(filename),
m_filter_desc(filter_cmd)
{
#if !_DEBUG
av_log_set_level(AV_LOG_QUIET); // Set the FFMPEG log to quiet to avoid too much logs.
#endif
// Allocate the temporary buffer that hold the ffmpeg image (pxImage to AVFrame conversion).
picture_rgb24 = av_frame_alloc();
picture_rgb24->pts = 0;
picture_rgb24->data[0] = NULL;
picture_rgb24->linesize[0] = -1;
picture_rgb24->format = AV_PIX_FMT_RGB24;
picture_rgb24->height = m_height;
picture_rgb24->width = m_width;
if ((_ret = av_image_alloc(picture_rgb24->data, picture_rgb24->linesize, m_width, m_height, (AVPixelFormat)picture_rgb24->format, 24)) < 0)
throw px::GIF::Error("Failed to allocate the AVFrame for pxImage conversion with error: " +
std::string(av_make_error_string(_err_msg, FFMPEG_MSG_LEN, _ret)),
"GIF::FFMPEG CTOR");
//printf("allocated picture of size %d, linesize %d %d %d %d\n", _ret, picture_rgb24->linesize[0], picture_rgb24->linesize[1], picture_rgb24->linesize[2], picture_rgb24->linesize[3]);
init_muxer(); // Prepare the GIF encoder (open it on disk).
init_filters(); // Prepare the filter that will be applied over the frame.
// Instead of hardcoder {1,100} which is the GIF tbn, we collect it from its stream.
// This will avoid future problem if the codec change in ffmpeg.
if (ofmt_ctx && ofmt_ctx->nb_streams > 0)
m_pts_increment = av_rescale_q(1, { 1, m_framerate }, ofmt_ctx->streams[0]->time_base);
else
m_pts_increment = av_rescale_q(1, { 1, m_framerate }, { 1, 100 });
}
FFMPEG Initialization (Filter and muxer)
void px::GIF::FFMPEG::init_filters()
{
const AVFilter* buffersrc = avfilter_get_by_name("buffer");
const AVFilter* buffersink = avfilter_get_by_name("buffersink");
AVRational time_base = { 1, m_framerate };
AVRational aspect_pixel = { 1, 1 };
AVFilterInOut* inputs = avfilter_inout_alloc();
AVFilterInOut* outputs = avfilter_inout_alloc();
filter_graph = avfilter_graph_alloc();
try
{
if (!outputs || !inputs || !filter_graph)
throw px::GIF::Error("Failed to 'init_filters' could not allocated the graph/filters.", "GIF::FFMPEG init_filters");
char args[512];
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
m_width, m_height,
picture_rgb24->format,
time_base.num, time_base.den,
aspect_pixel.num, aspect_pixel.den);
if (avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in", args, nullptr, filter_graph) < 0)
throw px::GIF::Error("Failed to create the 'source buffer' in init_filer method.", "GIF::FFMPEG init_filters");
if (avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", nullptr, nullptr, filter_graph) < 0)
throw px::GIF::Error("Failed to create the 'sink buffer' in init_filer method.", "GIF::FFMPEG init_filters");
// GIF has possible output of PAL8.
enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_PAL8, AV_PIX_FMT_NONE };
if (av_opt_set_int_list(buffersink_ctx, "pix_fmts", pix_fmts, AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN) < 0)
throw px::GIF::Error("Failed to set the output pixel format.", "GIF::FFMPEG init_filters");
outputs->name = av_strdup("in");
outputs->filter_ctx = buffersrc_ctx;
outputs->pad_idx = 0;
outputs->next = nullptr;
inputs->name = av_strdup("out");
inputs->filter_ctx = buffersink_ctx;
inputs->pad_idx = 0;
inputs->next = nullptr;
// GIF has possible output of PAL8.
if (avfilter_graph_parse_ptr(filter_graph, m_filter_desc.c_str(), &inputs, &outputs, nullptr) < 0)
throw px::GIF::Error("Failed to parse the filter graph (bad string!).", "GIF::FFMPEG init_filters");
if (avfilter_graph_config(filter_graph, nullptr) < 0)
throw px::GIF::Error("Failed to configure the filter graph (bad string!).", "GIF::FFMPEG init_filters");
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
}
catch (const std::exception& e)
{
// Catch exception to delete element.
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
throw e; // re-throuw
}
}
void px::GIF::FFMPEG::init_muxer()
{
AVOutputFormat* o_fmt = av_guess_format("gif", m_filename.c_str(), "video/gif");
if ((_ret = avformat_alloc_output_context2(&ofmt_ctx, o_fmt, "gif", m_filename.c_str())) < 0)
throw px::GIF::Error(std::string(av_make_error_string(_err_msg, FFMPEG_MSG_LEN, _ret)) + " allocate output format.", "GIF::FFMPEG init_muxer");
AVCodec* codec = avcodec_find_encoder(AV_CODEC_ID_GIF);
if (!codec) throw px::GIF::Error("Could to find the 'GIF' codec.", "GIF::FFMPEG init_muxer");
#if 0
const AVPixelFormat* p = codec->pix_fmts;
while (p != NULL && *p != AV_PIX_FMT_NONE) {
printf("supported pix fmt: %s\n", av_get_pix_fmt_name(*p));
++p;
}
#endif
AVStream* stream = avformat_new_stream(ofmt_ctx, codec);
AVCodecParameters* codec_paramters = stream->codecpar;
codec_paramters->codec_tag = 0;
codec_paramters->codec_id = codec->id;
codec_paramters->codec_type = AVMEDIA_TYPE_VIDEO;
codec_paramters->width = m_width;
codec_paramters->height = m_height;
codec_paramters->format = AV_PIX_FMT_PAL8;
o_codec_ctx = avcodec_alloc_context3(codec);
avcodec_parameters_to_context(o_codec_ctx, codec_paramters);
o_codec_ctx->time_base = { 1, m_framerate };
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
o_codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
if ((_ret = avcodec_open2(o_codec_ctx, codec, NULL)) < 0)
throw px::GIF::Error(std::string(av_make_error_string(_err_msg, FFMPEG_MSG_LEN, _ret)) + " open output codec.", "GIF::FFMPEG init_muxer");
if ((_ret = avio_open(&ofmt_ctx->pb, m_filename.c_str(), AVIO_FLAG_WRITE)) < 0)
throw px::GIF::Error(std::string(av_make_error_string(_err_msg, FFMPEG_MSG_LEN, _ret)) + " avio open error.", "GIF::FFMPEG init_muxer");
if ((_ret = avformat_write_header(ofmt_ctx, NULL)) < 0)
throw px::GIF::Error(std::string(av_make_error_string(_err_msg, FFMPEG_MSG_LEN, _ret)) + " write GIF header", "GIF::FFMPEG init_muxer");
#if _DEBUG
// This print the stream/output format.
av_dump_format(ofmt_ctx, -1, m_filename.c_str(), 1);
#endif
}
Add frame (usually in a loop)
void px::GIF::FFMPEG::add_frame(pxImage * const img)
{
if (img->getImageType() != PXT_BYTE || img->getNChannels() != 4)
throw px::GIF::Error("Failed to 'add_frame' since image is not PXT_BYTE and 4-channels.", "GIF::FFMPEG add_frame");
if (img->getWidth() != m_width || img->getHeight() != m_height)
throw px::GIF::Error("Failed to 'add_frame' since the size is not same to other inputs.", "GIF::FFMPEG add_frame");
const int pitch = picture_rgb24->linesize[0];
auto px_ptr = getImageAccessor<pxUChar_C4>(img);
for (int y = 0; y < m_height; y++)
{
const int px_row = img->getOrigin() == ORIGIN_BOT_LEFT ? m_height - y - 1 : y;
for (int x = 0; x < m_width; x++)
{
const int idx = y * pitch + 3 * x;
picture_rgb24->data[0][idx] = px_ptr[px_row][x].ch[PX_RE];
picture_rgb24->data[0][idx + 1] = px_ptr[px_row][x].ch[PX_GR];
picture_rgb24->data[0][idx + 2] = px_ptr[px_row][x].ch[PX_BL];
}
}
// palettegen need a whole stream, just add frame to buffer.
if ((_ret = av_buffersrc_add_frame_flags(buffersrc_ctx, picture_rgb24, AV_BUFFERSRC_FLAG_KEEP_REF)) < 0)
throw px::GIF::Error("Failed to 'add_frame' to global buffer with error: " +
std::string(av_make_error_string(_err_msg, FFMPEG_MSG_LEN, _ret)),
"GIF::FFMPEG add_frame");
// Increment the FPS of the picture for the next add-up to the buffer.
picture_rgb24->pts += m_pts_increment;
as_frame = true;
}
Encoder (final step)
void px::GIF::FFMPEG::encode()
{
if (!as_frame)
throw px::GIF::Error("Please 'add_frame' before running the Encoding().", "GIF::FFMPEG encode");
// end of buffer
if ((_ret = av_buffersrc_add_frame_flags(buffersrc_ctx, nullptr, AV_BUFFERSRC_FLAG_KEEP_REF)) < 0)
throw px::GIF::Error("error add frame to buffer source: " + std::string(av_make_error_string(_err_msg, FFMPEG_MSG_LEN, _ret)), "GIF::FFMPEG encode");
do {
AVFrame* filter_frame = av_frame_alloc();
_ret = av_buffersink_get_frame(buffersink_ctx, filter_frame);
if (_ret == AVERROR(EAGAIN) || _ret == AVERROR_EOF) {
av_frame_unref(filter_frame);
break;
}
// write the filter frame to output file
muxing_one_frame(filter_frame);
av_frame_unref(filter_frame);
} while (_ret >= 0);
av_write_trailer(ofmt_ctx);
}
void px::GIF::FFMPEG::muxing_one_frame(AVFrame* frame)
{
int ret = avcodec_send_frame(o_codec_ctx, frame);
AVPacket *pkt = av_packet_alloc();
av_init_packet(pkt);
while (ret >= 0) {
ret = avcodec_receive_packet(o_codec_ctx, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
}
av_write_frame(ofmt_ctx, pkt);
}
av_packet_unref(pkt);
}
DTOR, Release and Cancel
px::GIF::FFMPEG::~FFMPEG()
{
release();
}
void px::GIF::FFMPEG::release()
{
// Muxer stuffs
if (ofmt_ctx != nullptr) avformat_free_context(ofmt_ctx);
if (o_codec_ctx != nullptr) avcodec_close(o_codec_ctx);
if (o_codec_ctx != nullptr) avcodec_free_context(&o_codec_ctx);
ofmt_ctx = nullptr;
o_codec_ctx = nullptr;
// Filter stuffs
if (buffersrc_ctx != nullptr) avfilter_free(buffersrc_ctx);
if (buffersink_ctx != nullptr) avfilter_free(buffersink_ctx);
if (filter_graph != nullptr) avfilter_graph_free(&filter_graph);
buffersrc_ctx = nullptr;
buffersink_ctx = nullptr;
filter_graph = nullptr;
// Conversion image.
if (picture_rgb24 != nullptr) av_frame_free(&picture_rgb24);
picture_rgb24 = nullptr;
}
void px::GIF::FFMPEG::cancel()
{
// In-case of failure we must close ffmpeg and exit.
av_write_trailer(ofmt_ctx);
// Release and close all elements.
release();
// Delete the file on disk.
if (remove(m_filename.c_str()) != 0)
PX_LOG0(PX_LOGLEVEL_ERROR, "GIF::FFMPEG - On 'cancel' failed to remove the file.");
}

Took me a while but finally get it!
I was missing a avio_close(ofmt_ctx->pb); in my cancel method.
Once the file is released from ffmpeg, the std::remove() works like a charm.
Note, the av_write_trailer and avio_close should only be called if the init_muxer successfully executed, so I had a member variable to flag success or not. Then, I do the appropriate call in the cancel.

Related

Wrap audio data of the pcm_alaw type into an MKA audio file using the ffmpeg API

Imagine that in my project, I receive RTP packets with the payload type-8, for later saving this load as the Nth part of the audio track. I extract this load from the RTP packet and save it to a temporary buffer:
...
while ((rtp = receiveRtpPackets()).withoutErrors()) {
payloadData.push(rtp.getPayloadData());
}
audioGenerator.setPayloadData(payloadData);
audioGenerator.recordToFile();
...
After filling a temporary buffer of a certain size with this payload, I process this buffer, namely, extract the entire payload and encode it using ffmpeg for further saving to an audio file in Matroska format. But I have a problem. Since the payload of the RTP packet is type 8, I have to save the raw audio data of the pcm_alaw format to mka audio format. But when saving raw data pcm_alaw to an audio file, I get these messages from the library:
...
[libopus # 0x18eff60] Queue input is backward in time
[libopus # 0x18eff60] Queue input is backward in time
[libopus # 0x18eff60] Queue input is backward in time
[libopus # 0x18eff60] Queue input is backward in time
...
When you open an audio file in vlc, nothing is played (the audio track timestamp is missing).
The task of my project is to simply take pcm_alaw data and pack it in a container, in mka format. The best way to determine the codec is to use the av_guess_codec() function, which in turn automatically selects the desired codec ID. But how do I pack the raw data into the container correctly, I do not know.
It is important to note that I can get as raw data any format of this data (audio formats only) defined by the RTP packet type (All types of RTP packet payload). All I know is that in any case, I have to pack the audio data in an mka container.
I also attach the code (borrowed from this resource) that I use:
audiogenerater.h
extern "C"
{
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libswresample/swresample.h"
}
class AudioGenerater
{
public:
AudioGenerater();
~AudioGenerater() = default;
void generateAudioFileWithOptions(
QString fileName,
QByteArray pcmData,
int channel,
int bitRate,
int sampleRate,
AVSampleFormat format);
private:
// init Format
bool initFormat(QString audioFileName);
private:
AVCodec *m_AudioCodec = nullptr;
AVCodecContext *m_AudioCodecContext = nullptr;
AVFormatContext *m_FormatContext = nullptr;
AVOutputFormat *m_OutputFormat = nullptr;
};
audiogenerater.cpp
AudioGenerater::AudioGenerater()
{
av_register_all();
avcodec_register_all();
}
AudioGenerater::~AudioGenerater()
{
// ...
}
bool AudioGenerater::initFormat(QString audioFileName)
{
// Create an output Format context
int result = avformat_alloc_output_context2(&m_FormatContext, nullptr, nullptr, audioFileName.toLocal8Bit().data());
if (result < 0) {
return false;
}
m_OutputFormat = m_FormatContext->oformat;
// Create an audio stream
AVStream* audioStream = avformat_new_stream(m_FormatContext, m_AudioCodec);
if (audioStream == nullptr) {
avformat_free_context(m_FormatContext);
return false;
}
// Set the parameters in the stream
audioStream->id = m_FormatContext->nb_streams - 1;
audioStream->time_base = { 1, 8000 };
result = avcodec_parameters_from_context(audioStream->codecpar, m_AudioCodecContext);
if (result < 0) {
avformat_free_context(m_FormatContext);
return false;
}
// Print FormatContext information
av_dump_format(m_FormatContext, 0, audioFileName.toLocal8Bit().data(), 1);
// Open file IO
if (!(m_OutputFormat->flags & AVFMT_NOFILE)) {
result = avio_open(&m_FormatContext->pb, audioFileName.toLocal8Bit().data(), AVIO_FLAG_WRITE);
if (result < 0) {
avformat_free_context(m_FormatContext);
return false;
}
}
return true;
}
void AudioGenerater::generateAudioFileWithOptions(
QString _fileName,
QByteArray _pcmData,
int _channel,
int _bitRate,
int _sampleRate,
AVSampleFormat _format)
{
AVFormatContext* oc;
if (avformat_alloc_output_context2(
&oc, nullptr, nullptr, _fileName.toStdString().c_str())
< 0) {
qDebug() << "Error in line: " << __LINE__;
return;
}
if (!oc) {
printf("Could not deduce output format from file extension: using mka.\n");
avformat_alloc_output_context2(
&oc, nullptr, "mka", _fileName.toStdString().c_str());
}
if (!oc) {
qDebug() << "Error in line: " << __LINE__;
return;
}
AVOutputFormat* fmt = oc->oformat;
if (fmt->audio_codec == AV_CODEC_ID_NONE) {
qDebug() << "Error in line: " << __LINE__;
return;
}
AVCodecID codecID = av_guess_codec(
fmt, nullptr, _fileName.toStdString().c_str(), nullptr, AVMEDIA_TYPE_AUDIO);
// Find Codec
m_AudioCodec = avcodec_find_encoder(codecID);
if (m_AudioCodec == nullptr) {
qDebug() << "Error in line: " << __LINE__;
return;
}
// Create an encoder context
m_AudioCodecContext = avcodec_alloc_context3(m_AudioCodec);
if (m_AudioCodecContext == nullptr) {
qDebug() << "Error in line: " << __LINE__;
return;
}
// Setting parameters
m_AudioCodecContext->bit_rate = _bitRate;
m_AudioCodecContext->sample_rate = _sampleRate;
m_AudioCodecContext->sample_fmt = _format;
m_AudioCodecContext->channels = _channel;
m_AudioCodecContext->channel_layout = av_get_default_channel_layout(_channel);
m_AudioCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
// Turn on the encoder
int result = avcodec_open2(m_AudioCodecContext, m_AudioCodec, nullptr);
if (result < 0) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
// Create a package
if (!initFormat(_fileName)) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
// write to the file header
result = avformat_write_header(m_FormatContext, nullptr);
if (result < 0) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
// Create Frame
AVFrame* frame = av_frame_alloc();
if (frame == nullptr) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
int nb_samples = 0;
if (m_AudioCodecContext->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) {
nb_samples = 10000;
}
else {
nb_samples = m_AudioCodecContext->frame_size;
}
// Set the parameters of the Frame
frame->nb_samples = nb_samples;
frame->format = m_AudioCodecContext->sample_fmt;
frame->channel_layout = m_AudioCodecContext->channel_layout;
// Apply for data memory
result = av_frame_get_buffer(frame, 0);
if (result < 0) {
av_frame_free(&frame);
{
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
}
// Set the Frame to be writable
result = av_frame_make_writable(frame);
if (result < 0) {
av_frame_free(&frame);
{
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
}
int perFrameDataSize = frame->linesize[0];
int count = _pcmData.size() / perFrameDataSize;
bool needAddOne = false;
if (_pcmData.size() % perFrameDataSize != 0) {
count++;
needAddOne = true;
}
int frameCount = 0;
for (int i = 0; i < count; ++i) {
// Create a Packet
AVPacket* pkt = av_packet_alloc();
if (pkt == nullptr) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
av_init_packet(pkt);
if (i == count - 1)
perFrameDataSize = _pcmData.size() % perFrameDataSize;
// Synthesize WAV files
memset(frame->data[0], 0, perFrameDataSize);
memcpy(frame->data[0], &(_pcmData.data()[perFrameDataSize * i]), perFrameDataSize);
frame->pts = frameCount++;
// send Frame
result = avcodec_send_frame(m_AudioCodecContext, frame);
if (result < 0)
continue;
// Receive the encoded Packet
result = avcodec_receive_packet(m_AudioCodecContext, pkt);
if (result < 0) {
av_packet_free(&pkt);
continue;
}
// write to file
av_packet_rescale_ts(pkt, m_AudioCodecContext->time_base, m_FormatContext->streams[0]->time_base);
pkt->stream_index = 0;
result = av_interleaved_write_frame(m_FormatContext, pkt);
if (result < 0)
continue;
av_packet_free(&pkt);
}
// write to the end of the file
av_write_trailer(m_FormatContext);
// Close file IO
avio_closep(&m_FormatContext->pb);
// Release Frame memory
av_frame_free(&frame);
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
}
main.cpp
int main(int argc, char **argv)
{
av_log_set_level(AV_LOG_TRACE);
QFile file("rawDataOfPcmAlawType.bin");
if (!file.open(QIODevice::ReadOnly)) {
return EXIT_FAILURE;
}
QByteArray rawData(file.readAll());
AudioGenerater generator;
generator.generateAudioFileWithOptions(
"test.mka",
rawData,
1,
64000,
8000,
AV_SAMPLE_FMT_S16);
return 0;
}
It is IMPORTANT you help me find the most appropriate way to record pcm_alaw or a different data format in an MKA audio file.
I ask everyone who knows anything to help (there is too little time left to implement this project)
These useful links will help you:
A good overview of the data processing sequence in libav: ffmpeg-libav-tutorial
Examples from the ffmpeg developers themselves: avio_reading, resampling_audio, transcode_aac

Decoding YUYV422 raw images using FFmpeg

I have a collection of sequential YUYV422 raw images that I wish to turn into a video. The problem seems to be that when the frame is created in the avcodec_receive_frame. The frame only contains one channel instead of four in the YUYV format. This results in Input picture width <640> is greater then stride (0) since only the zeroth index of data and linesize is set in the frame. I don't know if this is a ffmpeg bug or a misconfiguration on my part.
#include "icsFfmpegImageDecoder.h"
#include <stdexcept>
ImageDecoder::ImageDecoder(std::string filename)
{
AVInputFormat* iformat;
if (!(iformat = av_find_input_format("image2")))
throw std::invalid_argument(std::string("input Codec not found\n"));
this->fctx = NULL;
if (avformat_open_input(&this->fctx, filename.c_str(), iformat, NULL) < 0)
{
std::string error = "Failed to open input file ";
error += filename;
error += "\n";
throw std::invalid_argument(error);
}
#ifdef LIB_AVFORMAT_STREAM_CODEC_DEPRECATED
if (!(this->codec = avcodec_find_decoder(this->fctx->streams[0]->codecpar->codec_id)))
throw std::invalid_argument(std::string("Failed to find codec\n"));
if (!(this->cctx = avcodec_alloc_context3(this->codec)))
throw std::invalid_argument(std::string("could not create image read context codec"));
if (avcodec_parameters_to_context(this->cctx, this->fctx->streams[0]->codecpar) < 0)
throw std::invalid_argument(std::string("could not get contest codec from stream"));
#else
this->cctx = this->fctx->streams[0]->codec;
if (!(this->codec = avcodec_find_decoder(this->cctx->codec_id)))
throw std::invalid_argument(std::string("Failed to find codec\n"));
#endif
if (this->cctx->codec_id == AV_CODEC_ID_RAWVIDEO) {
// TODO Make Dynamic
this->cctx->pix_fmt = AV_PIX_FMT_YUYV422 ;
this->cctx->height = 800;
this->cctx->width = 1280;
}
if (avcodec_open2(this->cctx, this->codec, NULL) < 0)
throw std::invalid_argument(std::string("Failed to open codec\n"));
#ifdef USING_NEW_AVPACKET_SETUP
if (!(this->pkt = av_packet_alloc()))
throw std::invalid_argument(std::string("Failed to alloc frame\n"));
#else
this->pkt = new AVPacket();
av_init_packet(this->pkt);
#endif
read_file();
}
ImageDecoder::~ImageDecoder()
{
avcodec_close(this->cctx);
avformat_close_input(&this->fctx);
#ifdef USING_NEW_AVPACKET_SETUP
av_packet_free(&this->pkt);
#else
av_free_packet(this->pkt);
delete this->pkt;
#endif
}
void ImageDecoder::read_file()
{
if (av_read_frame(this->fctx, this->pkt) < 0)
throw std::invalid_argument(std::string("Failed to read frame from file\n"));
if (this->pkt->size == 0)
this->ret = -1;
}
#ifdef LIB_AVCODEC_USE_SEND_RECEIVE_NOTATION
void ImageDecoder::send_next_packet() {
if ((this->ret = avcodec_send_packet(this->cctx, this->pkt)) < 0)
throw std::invalid_argument("Error sending a packet for decoding\n");
}
bool ImageDecoder::receive_next_frame(AVFrame* frame)
{
if (this->ret >= 0)
{
this->ret = avcodec_receive_frame(this->cctx, frame);
if (this->ret == AVERROR_EOF)
return false;
else if (this->ret == AVERROR(11))//11 == EAGAIN builder sucks
return false;
else if (this->ret < 0)
throw std::invalid_argument("Error during decoding\n");
return true;
}
return false;
}
#else
void ImageDecoder::decode_frame(AVFrame* frame)
{
int got_frame = 0;
if (avcodec_decode_video2(this->cctx, frame, &got_frame, this->pkt) < 0)
throw std::invalid_argument("Error while decoding frame %d\n");
}
#endif

FFmpeg: Parallel encoding with custom thread pool

One of the things I'm trying to achieve is parallel encoding via FFmpeg's c API. This looks to work out of the box quite nicely; however, I've changed the goal posts slightly:
In an existing application, I already have a thread pool at hand. Instead of using another thread pool via FFmpeg, I would like reuse the existing thread pool in my application. Having studied the latest FFmpeg trunk docs, it very much looks possible.
Using some FFmpeg sample code, I've created a sample application to demonstrate what I'm trying to achieve (see below). The sample app generates a video-only mpeg2 ts using the mp2v codec.
The problem I'm experiencing is that the custom 'thread_execute' or 'thread_execute2' are never invoked. This is despite the fact that the codec appears to indicate that threading is supported. Please be aware that I have not yet plumbed in the thread pool just yet. My first goal is for it to call the custom function pointer.
I've tried to get assistance on the FFmpeg mailing lists but to no avail.
#include <iostream>
#include <thread>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <cstring>
#include <future>
extern "C"
{
#include <libavutil/avassert.h>
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
//#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
}
#define STREAM_DURATION 1000.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
#define SCALE_FLAGS SWS_BICUBIC
// a wrapper around a single output AVStream
typedef struct OutputStream {
AVStream *st;
AVCodecContext *enc;
/* pts of the next frame that will be generated */
int64_t next_pts;
int samples_count;
AVFrame *frame;
AVFrame *tmp_frame;
float t, tincr, tincr2;
struct SwsContext *sws_ctx;
struct SwrContext *swr_ctx;
} OutputStream;
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_ts2str(char *buf, int64_t ts)
{
std::memset(buf,0,AV_TS_MAX_STRING_SIZE);
return av_ts_make_string(buf,ts);
}
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_ts2timestr(char *buf, int64_t ts, AVRational *tb)
{
std::memset(buf,0,sizeof(AV_TS_MAX_STRING_SIZE));
return av_ts_make_time_string(buf,ts,tb);
}
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_err2str(char *errbuf, size_t errbuf_size, int errnum)
{
std::memset(errbuf,0,errbuf_size);
return av_make_error_string(errbuf,errbuf_size,errnum);
}
int thread_execute(AVCodecContext* s, int (*func)(AVCodecContext *c2, void *arg2), void* arg, int* ret, int count, int size)
{
// Do it all serially for now
std::cout << "thread_execute" << std::endl;
for (int k = 0; k < count; ++k)
{
ret[k] = func(s, arg);
}
return 0;
}
int thread_execute2(AVCodecContext* s, int (*func)(AVCodecContext* c2, void* arg2, int, int), void* arg, int* ret, int count)
{
// Do it all serially for now
std::cout << "thread_execute2" << std::endl;
for (int k = 0; k < count; ++k)
{
ret[k] = func(s, arg, k, count);
}
return 0;
}
static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{
char s[AV_TS_MAX_STRING_SIZE];
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
cb_av_ts2str(s,pkt->pts), cb_av_ts2timestr(s,pkt->pts, time_base),
cb_av_ts2str(s,pkt->dts), cb_av_ts2timestr(s,pkt->dts, time_base),
cb_av_ts2str(s,pkt->duration), cb_av_ts2timestr(s,pkt->duration, time_base),
pkt->stream_index);
}
static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, *time_base, st->time_base);
pkt->stream_index = st->index;
/* Write the compressed frame to the media file. */
log_packet(fmt_ctx, pkt);
return av_interleaved_write_frame(fmt_ctx, pkt);
}
/* Add an output stream. */
static void add_stream(OutputStream *ost, AVFormatContext *oc,
AVCodec **codec,
enum AVCodecID codec_id)
{
AVCodecContext *c;
int i;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
ost->st = avformat_new_stream(oc, NULL);
if (!ost->st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
ost->st->id = oc->nb_streams-1;
c = avcodec_alloc_context3(*codec);
if (!c) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
ost->enc = c;
switch ((*codec)->type)
{
case AVMEDIA_TYPE_AUDIO:
c->sample_fmt = (*codec)->sample_fmts ?
(*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
c->bit_rate = 64000;
c->sample_rate = 44100;
if ((*codec)->supported_samplerates) {
c->sample_rate = (*codec)->supported_samplerates[0];
for (i = 0; (*codec)->supported_samplerates[i]; i++) {
if ((*codec)->supported_samplerates[i] == 44100)
c->sample_rate = 44100;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
c->channel_layout = AV_CH_LAYOUT_STEREO;
if ((*codec)->channel_layouts) {
c->channel_layout = (*codec)->channel_layouts[0];
for (i = 0; (*codec)->channel_layouts[i]; i++) {
if ((*codec)->channel_layouts[i] == AV_CH_LAYOUT_STEREO)
c->channel_layout = AV_CH_LAYOUT_STEREO;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
ost->st->time_base = (AVRational){ 1, c->sample_rate };
break;
case AVMEDIA_TYPE_VIDEO:
c->codec_id = codec_id;
c->bit_rate = 400000;
/* Resolution must be a multiple of two. */
c->width = 352;
c->height = 288;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };
c->time_base = ost->st->time_base;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
/* just for testing, we also add B-frames */
c->max_b_frames = 2;
}
if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c->mb_decision = 2;
}
break;
default:
break;
}
if (c->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS ||
c->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS)
{
if (c->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS)
{
c->thread_type = FF_THREAD_FRAME;
}
if (c->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS)
{
c->thread_type = FF_THREAD_SLICE;
}
c->execute = &thread_execute;
c->execute2 = &thread_execute2;
c->thread_count = 4;
// NOTE: Testing opaque.
c->opaque = (void*)0xff;
}
/* Some formats want stream headers to be separate. */
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
/**************************************************************/
/* video output */
static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *picture;
int ret;
picture = av_frame_alloc();
if (!picture)
return NULL;
picture->format = pix_fmt;
picture->width = width;
picture->height = height;
/* allocate the buffers for the frame data */
ret = av_frame_get_buffer(picture, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate frame data.\n");
exit(1);
}
return picture;
}
static void open_video(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{
int ret;
AVCodecContext *c = ost->enc;
//AVDictionary *opt = NULL;
//av_dict_copy(&opt, opt_arg, 0);
/* open the codec */
ret = avcodec_open2(c, codec, NULL);
//av_dict_free(&opt);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Could not open video codec: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
/* allocate and init a re-usable frame */
ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
if (!ost->frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
/* If the output format is not YUV420P, then a temporary YUV420P
* picture is needed too. It is then converted to the required
* output format. */
ost->tmp_frame = NULL;
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
if (!ost->tmp_frame) {
fprintf(stderr, "Could not allocate temporary picture\n");
exit(1);
}
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(ost->st->codecpar, c);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVFrame *pict, int frame_index,
int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static AVFrame *get_video_frame(OutputStream *ost)
{
AVCodecContext *c = ost->enc;
/* check if we want to generate more frames */
if (av_compare_ts(ost->next_pts, c->time_base,
STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)
return NULL;
/* when we pass a frame to the encoder, it may keep a reference to it
* internally; make sure we do not overwrite it here */
if (av_frame_make_writable(ost->frame) < 0)
exit(1);
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
/* as we only generate a YUV420P picture, we must convert it
* to the codec pixel format if needed */
/*if (!ost->sws_ctx) {
ost->sws_ctx = sws_getContext(c->width, c->height,
AV_PIX_FMT_YUV420P,
c->width, c->height,
c->pix_fmt,
SCALE_FLAGS, NULL, NULL, NULL);
if (!ost->sws_ctx) {
fprintf(stderr,
"Could not initialize the conversion context\n");
exit(1);
}
}
fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
sws_scale(ost->sws_ctx,
(const uint8_t * const *)ost->tmp_frame->data, ost->tmp_frame->linesize,
0, c->height, ost->frame->data, ost->frame->linesize);*/
} else {
fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
}
ost->frame->pts = ost->next_pts++;
return ost->frame;
}
/*
* encode one video frame and send it to the muxer
* return 1 when encoding is finished, 0 otherwise
*/
static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
{
int ret;
AVCodecContext *c;
AVFrame *frame;
int got_packet = 0;
AVPacket pkt = { 0 };
c = ost->enc;
frame = get_video_frame(ost);
if (frame)
{
ret = avcodec_send_frame(ost->enc, frame);
if (ret < 0)
{
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error encoding video frame: %s\n", cb_av_err2str(s, AV_ERROR_MAX_STRING_SIZE, ret));
exit(1);
}
}
av_init_packet(&pkt);
ret = avcodec_receive_packet(ost->enc,&pkt);
if (ret < 0)
{
if (ret == AVERROR(EAGAIN)) { ret = 0; }
else
{
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error receiving packet: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
}
else
{
got_packet = 1;
ret = write_frame(oc, &c->time_base, ost->st, &pkt);
}
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error while writing video frame: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
return (frame || got_packet) ? 0 : 1;
}
static void close_stream(AVFormatContext *oc, OutputStream *ost)
{
avcodec_free_context(&ost->enc);
av_frame_free(&ost->frame);
av_frame_free(&ost->tmp_frame);
//sws_freeContext(ost->sws_ctx);
//swr_free(&ost->swr_ctx);
}
/**************************************************************/
/* media file output */
int main(int argc, char **argv)
{
OutputStream video_st = { 0 }, audio_st = { 0 };
const char *filename;
AVOutputFormat *fmt;
AVFormatContext *oc;
AVCodec /**audio_codec,*/ *video_codec;
int ret;
int have_video = 0, have_audio = 0;
int encode_video = 0, encode_audio = 0;
AVDictionary *opt = NULL;
int i;
/* Initialize libavcodec, and register all codecs and formats. */
av_register_all();
avformat_network_init();
if (argc < 2) {
printf("usage: %s output_file\n"
"API example program to output a media file with libavformat.\n"
"This program generates a synthetic audio and video stream, encodes and\n"
"muxes them into a file named output_file.\n"
"The output format is automatically guessed according to the file extension.\n"
"Raw images can also be output by using '%%d' in the filename.\n"
"\n", argv[0]);
return 1;
}
filename = argv[1];
for (i = 2; i+1 < argc; i+=2) {
if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags"))
av_dict_set(&opt, argv[i]+1, argv[i+1], 0);
}
const char *pfilename = filename;
/* allocate the output media context */
avformat_alloc_output_context2(&oc, NULL, "mpegts", pfilename);
if (!oc) {
printf("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2(&oc, NULL, "mpeg", pfilename);
}
if (!oc)
return 1;
fmt = oc->oformat;
/* Add the audio and video streams using the default format codecs
* and initialize the codecs. */
if (fmt->video_codec != AV_CODEC_ID_NONE) {
add_stream(&video_st, oc, &video_codec, fmt->video_codec);
have_video = 1;
encode_video = 1;
}
/*if (fmt->audio_codec != AV_CODEC_ID_NONE) {
add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);
have_audio = 1;
encode_audio = 1;
}*/
/* Now that all the parameters are set, we can open the audio and
* video codecs and allocate the necessary encode buffers. */
if (have_video)
open_video(oc, video_codec, &video_st, opt);
//if (have_audio)
// open_audio(oc, audio_codec, &audio_st, opt);
av_dump_format(oc, 0, pfilename, 1);
/* open the output file, if needed */
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oc->pb, pfilename, AVIO_FLAG_WRITE);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Could not open '%s': %s\n", pfilename,
cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
return 1;
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(oc, &opt);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error occurred when opening output file: %s\n",
cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
return 1;
}
while (encode_video || encode_audio) {
/* select the stream to encode */
if (encode_video &&
(!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,
audio_st.next_pts, audio_st.enc->time_base) <= 0)) {
encode_video = !write_video_frame(oc, &video_st);
} else {
//encode_audio = !write_audio_frame(oc, &audio_st);
}
//std::this_thread::sleep_for(std::chrono::milliseconds(35));
}
/* Write the trailer, if any. The trailer must be written before you
* close the CodecContexts open when you wrote the header; otherwise
* av_write_trailer() may try to use memory that was freed on
* av_codec_close(). */
av_write_trailer(oc);
/* Close each codec. */
if (have_video)
close_stream(oc, &video_st);
if (have_audio)
close_stream(oc, &audio_st);
if (!(fmt->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_closep(&oc->pb);
/* free the stream */
avformat_free_context(oc);
return 0;
}
//
Environment:
Ubuntu Zesty (17.04)
FFmpeg version 3.2.4 (via package manager)
gcc 6.3 (C++)
You have to do following:
call avcodec_alloc_context3(...). This call will set default execute and execute2 functions in new context
set c->thread_count = number_of_threads_in_your_thread_pool()
call avcodec_open2(...).
set c->execute and c->execute2 to point to your functions
call ff_thread_free(c). This function isnt exposed in libavcodec headers but you can add following line:
extern "C" void ff_thread_free(AVCodecContext *s);
Drawback is that libavcodec will create internal thread pool after avcodec_open2(...) call, and that pool will be deleted in ff_thread_free() call.
Internal thread pool is very efficient, but its not good if you plan to do parallel encoding of multiple video feeds. In that case libavcodec will create separate thread pool for each encoding video feed.

What to pass to avcodec_decode_video2 for H.264 Transport Stream?

I want to decode H.264 video from a collection of MPEG-2 Transport Stream packets but I am not clear what to pass to avcodec_decode_video2
The documentation says to pass "the input AVPacket containing the input buffer."
But what should be in the input buffer?
A PES packet will be spread across the payload portion of several TS packets, with NALU(s) inside the PES. So pass a TS fragment? The entire PES? PES payload only?
This Sample Code mentions:
BUT some other codecs (msmpeg4, mpeg4) are inherently frame based, so
you must call them with all the data for one frame exactly. You must
also initialize 'width' and 'height' before initializing them.
But I can find no info on what "all the data" means...
Passing a fragment of a TS packet payload is not working:
AVPacket avDecPkt;
av_init_packet(&avDecPkt);
avDecPkt.data = inbuf_ptr;
avDecPkt.size = esBufSize;
len = avcodec_decode_video2(mpDecoderContext, mpFrameDec, &got_picture, &avDecPkt);
if (len < 0)
{
printf(" TS PKT #%.0f. Error decoding frame #%04d [rc=%d '%s']\n",
tsPacket.pktNum, mDecodedFrameNum, len, av_make_error_string(errMsg, 128, len));
return;
}
output
[h264 # 0x81cd2a0] no frame!
TS PKT #2973. Error decoding frame #0001 [rc=-1094995529 'Invalid data found when processing input']
EDIT
Using the excellent hits from WLGfx, I made this simple program to try decoding TS packets. As input, I prepared a file containing only TS packets from the Video PID.
It feels close but I don't know how to set up the FormatContext. The code below segfaults at av_read_frame() (and internally at ret = s->iformat->read_packet(s, pkt)). s->iformat is zero.
Suggestions?
EDIT II - Sorry, for got post source code **
**EDIT III - Sample code updated to simulate reading TS PKT Queue
/*
* Test program for video decoder
*/
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
extern "C" {
#ifdef __cplusplus
#define __STDC_CONSTANT_MACROS
#ifdef _STDINT_H
#undef _STDINT_H
#endif
#include <stdint.h>
#endif
}
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavutil/imgutils.h"
#include "libavutil/opt.h"
}
class VideoDecoder
{
public:
VideoDecoder();
bool rcvTsPacket(AVPacket &inTsPacket);
private:
AVCodec *mpDecoder;
AVCodecContext *mpDecoderContext;
AVFrame *mpDecodedFrame;
AVFormatContext *mpFmtContext;
};
VideoDecoder::VideoDecoder()
{
av_register_all();
// FORMAT CONTEXT SETUP
mpFmtContext = avformat_alloc_context();
mpFmtContext->flags = AVFMT_NOFILE;
// ????? WHAT ELSE ???? //
// DECODER SETUP
mpDecoder = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!mpDecoder)
{
printf("Could not load decoder\n");
exit(11);
}
mpDecoderContext = avcodec_alloc_context3(NULL);
if (avcodec_open2(mpDecoderContext, mpDecoder, NULL) < 0)
{
printf("Cannot open decoder context\n");
exit(1);
}
mpDecodedFrame = av_frame_alloc();
}
bool
VideoDecoder::rcvTsPacket(AVPacket &inTsPkt)
{
bool ret = true;
if ((av_read_frame(mpFmtContext, &inTsPkt)) < 0)
{
printf("Error in av_read_frame()\n");
ret = false;
}
else
{
// success. Decode the TS packet
int got;
int len = avcodec_decode_video2(mpDecoderContext, mpDecodedFrame, &got, &inTsPkt);
if (len < 0)
ret = false;
if (got)
printf("GOT A DECODED FRAME\n");
}
return ret;
}
int
main(int argc, char **argv)
{
if (argc != 2)
{
printf("Usage: %s tsInFile\n", argv[0]);
exit(1);
}
FILE *tsInFile = fopen(argv[1], "r");
if (!tsInFile)
{
perror("Could not open TS input file");
exit(2);
}
unsigned int tsPktNum = 0;
uint8_t tsBuffer[256];
AVPacket tsPkt;
av_init_packet(&tsPkt);
VideoDecoder vDecoder;
while (!feof(tsInFile))
{
tsPktNum++;
tsPkt.size = 188;
tsPkt.data = tsBuffer;
fread(tsPkt.data, 188, 1, tsInFile);
vDecoder.rcvTsPacket(tsPkt);
}
}
I've got some code snippets that might help you out as I've been working with MPEG-TS also.
Starting with my packet thread which checks each packet against the stream ID's which I've already found and got the codec contexts:
void *FFMPEG::thread_packet_function(void *arg) {
FFMPEG *ffmpeg = (FFMPEG*)arg;
for (int c = 0; c < MAX_PACKETS; c++)
ffmpeg->free_packets[c] = &ffmpeg->packet_list[c];
ffmpeg->packet_pos = MAX_PACKETS;
Audio.start_decoding();
Video.start_decoding();
Subtitle.start_decoding();
while (!ffmpeg->thread_quit) {
if (ffmpeg->packet_pos != 0 &&
Audio.okay_add_packet() &&
Video.okay_add_packet() &&
Subtitle.okay_add_packet()) {
pthread_mutex_lock(&ffmpeg->packet_mutex); // get free packet
AVPacket *pkt = ffmpeg->free_packets[--ffmpeg->packet_pos]; // pre decrement
pthread_mutex_unlock(&ffmpeg->packet_mutex);
if ((av_read_frame(ffmpeg->fContext, pkt)) >= 0) { // success
int id = pkt->stream_index;
if (id == ffmpeg->aud_stream.stream_id) Audio.add_packet(pkt);
else if (id == ffmpeg->vid_stream.stream_id) Video.add_packet(pkt);
else if (id == ffmpeg->sub_stream.stream_id) Subtitle.add_packet(pkt);
else { // unknown packet
av_packet_unref(pkt);
pthread_mutex_lock(&ffmpeg->packet_mutex); // put packet back
ffmpeg->free_packets[ffmpeg->packet_pos++] = pkt;
pthread_mutex_unlock(&ffmpeg->packet_mutex);
//LOGI("Dumping unknown packet, id %d", id);
}
} else {
av_packet_unref(pkt);
pthread_mutex_lock(&ffmpeg->packet_mutex); // put packet back
ffmpeg->free_packets[ffmpeg->packet_pos++] = pkt;
pthread_mutex_unlock(&ffmpeg->packet_mutex);
//LOGI("No packet read");
}
} else { // buffers full so yield
//LOGI("Packet reader on hold: Audio-%d, Video-%d, Subtitle-%d",
// Audio.packet_pos, Video.packet_pos, Subtitle.packet_pos);
usleep(1000);
//sched_yield();
}
}
return 0;
}
Each decoder for audio, video and subtitles have their own threads which receive the packets from the above thread in ring buffers. I've had to separate the decoders into their own threads because CPU usage was increasing when I started using the deinterlace filter.
My video decoder reads the packets from the buffers and when it has finished with the packet sends it back to be unref'd and can be used again. Balancing the packet buffers doesn't take that much time once everything is running.
Here's the snipped from my video decoder:
void *VideoManager::decoder(void *arg) {
LOGI("Video decoder started");
VideoManager *mgr = (VideoManager *)arg;
while (!ffmpeg.thread_quit) {
pthread_mutex_lock(&mgr->packet_mutex);
if (mgr->packet_pos != 0) {
// fetch first packet to decode
AVPacket *pkt = mgr->packets[0];
// shift list down one
for (int c = 1; c < mgr->packet_pos; c++) {
mgr->packets[c-1] = mgr->packets[c];
}
mgr->packet_pos--;
pthread_mutex_unlock(&mgr->packet_mutex); // finished with packets array
int got;
AVFrame *frame = ffmpeg.vid_stream.frame;
avcodec_decode_video2(ffmpeg.vid_stream.context, frame, &got, pkt);
ffmpeg.finished_with_packet(pkt);
if (got) {
#ifdef INTERLACE_ALL
if (!frame->interlaced_frame) mgr->add_av_frame(frame, 0);
else {
if (!mgr->filter_initialised) mgr->init_filter_graph(frame);
av_buffersrc_add_frame_flags(mgr->filter_src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
int c = 0;
while (true) {
AVFrame *filter_frame = ffmpeg.vid_stream.filter_frame;
int result = av_buffersink_get_frame(mgr->filter_sink_ctx, filter_frame);
if (result == AVERROR(EAGAIN) ||
result == AVERROR(AVERROR_EOF) ||
result < 0)
break;
mgr->add_av_frame(filter_frame, c++);
av_frame_unref(filter_frame);
}
//LOGI("Interlaced %d frames, decode %d, playback %d", c, mgr->decode_pos, mgr->playback_pos);
}
#elif defined(INTERLACE_HALF)
if (!frame->interlaced_frame) mgr->add_av_frame(frame, 0);
else {
if (!mgr->filter_initialised) mgr->init_filter_graph(frame);
av_buffersrc_add_frame_flags(mgr->filter_src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
int c = 0;
while (true) {
AVFrame *filter_frame = ffmpeg.vid_stream.filter_frame;
int result = av_buffersink_get_frame(mgr->filter_sink_ctx, filter_frame);
if (result == AVERROR(EAGAIN) ||
result == AVERROR(AVERROR_EOF) ||
result < 0)
break;
mgr->add_av_frame(filter_frame, c++);
av_frame_unref(filter_frame);
}
//LOGI("Interlaced %d frames, decode %d, playback %d", c, mgr->decode_pos, mgr->playback_pos);
}
#else
mgr->add_av_frame(frame, 0);
#endif
}
//LOGI("decoded video packet");
} else {
pthread_mutex_unlock(&mgr->packet_mutex);
}
}
LOGI("Video decoder ended");
}
As you can see, I'm using a mutex when passing packets back and forth.
Once a frame has been got I just copy the YUV buffers from the frame for later use into another buffer list. I don't convert the YUV, I use a shader which converts the YUV to RGB on the GPU.
The next snippet adds my decoded frame to my buffer list. This may help understand how to deal with the data.
void VideoManager::add_av_frame(AVFrame *frame, int field_num) {
int y_linesize = frame->linesize[0];
int u_linesize = frame->linesize[1];
int hgt = frame->height;
int y_buffsize = y_linesize * hgt;
int u_buffsize = u_linesize * hgt / 2;
int buffsize = y_buffsize + u_buffsize + u_buffsize;
VideoBuffer *buffer = &buffers[decode_pos];
if (ffmpeg.is_network && playback_pos == decode_pos) { // patched 25/10/16 wlgfx
buffer->used = false;
if (!buffer->data) buffer->data = (char*)mem.alloc(buffsize);
if (!buffer->data) {
LOGI("Dropped frame, allocation error");
return;
}
} else if (playback_pos == decode_pos) {
LOGI("Dropped frame, ran out of decoder frame buffers");
return;
} else if (!buffer->data) {
buffer->data = (char*)mem.alloc(buffsize);
if (!buffer->data) {
LOGI("Dropped frame, allocation error.");
return;
}
}
buffer->y_frame = buffer->data;
buffer->u_frame = buffer->y_frame + y_buffsize;
buffer->v_frame = buffer->y_frame + y_buffsize + u_buffsize;
buffer->wid = frame->width;
buffer->hgt = hgt;
buffer->y_linesize = y_linesize;
buffer->u_linesize = u_linesize;
int64_t pts = av_frame_get_best_effort_timestamp(frame);
buffer->pts = pts;
buffer->buffer_size = buffsize;
double field_add = av_q2d(ffmpeg.vid_stream.context->time_base) * field_num;
buffer->frame_time = av_q2d(ts_stream) * pts + field_add;
memcpy(buffer->y_frame, frame->data[0], (size_t) (buffer->y_linesize * buffer->hgt));
memcpy(buffer->u_frame, frame->data[1], (size_t) (buffer->u_linesize * buffer->hgt / 2));
memcpy(buffer->v_frame, frame->data[2], (size_t) (buffer->u_linesize * buffer->hgt / 2));
buffer->used = true;
decode_pos = (++decode_pos) % MAX_VID_BUFFERS;
//if (field_num == 0) LOGI("Video %.2f, %d - %d",
// buffer->frame_time - Audio.pts_start_time, decode_pos, playback_pos);
}
If there's anything else that I may be able to help with just give me a shout. :-)
EDIT:
The snippet how I open my video stream context which automatically determines the codec, whether it is h264, mpeg2, or another:
void FFMPEG::open_video_stream() {
vid_stream.stream_id = av_find_best_stream(fContext, AVMEDIA_TYPE_VIDEO,
-1, -1, &vid_stream.codec, 0);
if (vid_stream.stream_id == -1) return;
vid_stream.context = fContext->streams[vid_stream.stream_id]->codec;
if (!vid_stream.codec || avcodec_open2(vid_stream.context,
vid_stream.codec, NULL) < 0) {
vid_stream.stream_id = -1;
return;
}
vid_stream.frame = av_frame_alloc();
vid_stream.filter_frame = av_frame_alloc();
}
EDIT2:
This is how I've opened the input stream, whether it be file or URL. The AVFormatContext is the main context for the stream.
bool FFMPEG::start_stream(char *url_, float xtrim, float ytrim, int gain) {
aud_stream.stream_id = -1;
vid_stream.stream_id = -1;
sub_stream.stream_id = -1;
this->url = url_;
this->xtrim = xtrim;
this->ytrim = ytrim;
Audio.volume = gain;
Audio.init();
Video.init();
fContext = avformat_alloc_context();
if ((avformat_open_input(&fContext, url_, NULL, NULL)) != 0) {
stop_stream();
return false;
}
if ((avformat_find_stream_info(fContext, NULL)) < 0) {
stop_stream();
return false;
}
// network stream will overwrite packets if buffer is full
is_network = url.substr(0, 4) == "udp:" ||
url.substr(0, 4) == "rtp:" ||
url.substr(0, 5) == "rtsp:" ||
url.substr(0, 5) == "http:"; // added for wifi broadcasting ability
// determine if stream is audio only
is_mp3 = url.substr(url.size() - 4) == ".mp3";
LOGI("Stream: %s", url_);
if (!open_audio_stream()) {
stop_stream();
return false;
}
if (is_mp3) {
vid_stream.stream_id = -1;
sub_stream.stream_id = -1;
} else {
open_video_stream();
open_subtitle_stream();
if (vid_stream.stream_id == -1) { // switch to audio only
close_subtitle_stream();
is_mp3 = true;
}
}
LOGI("Audio: %d, Video: %d, Subtitle: %d",
aud_stream.stream_id,
vid_stream.stream_id,
sub_stream.stream_id);
if (aud_stream.stream_id != -1) {
LOGD("Audio stream time_base {%d, %d}",
aud_stream.context->time_base.num,
aud_stream.context->time_base.den);
}
if (vid_stream.stream_id != -1) {
LOGD("Video stream time_base {%d, %d}",
vid_stream.context->time_base.num,
vid_stream.context->time_base.den);
}
LOGI("Starting packet and decode threads");
thread_quit = false;
pthread_create(&thread_packet, NULL, &FFMPEG::thread_packet_function, this);
Display.set_overlay_timout(3.0);
return true;
}
EDIT: (constructing an AVPacket)
Construct an AVPacket to send to the decoder...
AVPacket packet;
av_init_packet(&packet);
packet.data = myTSpacketdata; // pointer to the TS packet
packet.size = 188;
You should be able to reuse the packet. And it might need unref'ing.
You must first use the avcodec library to get the compressed frames out of the file. Then you can decode them using avcodec_decode_video2. look at this tutorial http://dranger.com/ffmpeg/

FPS too high when saving video in mp4 container

When I decode frames from avi file and then decode them in x264 and save to mp4 file, the fps of the output file is always 12,800. Therefore the file is played very fast. But, when I save the encoded in h264 frames in avi format and not mp4, so the fps is as I wanted - 25.
What could be the problem?
Here the code I wrote in VS2010:
#include "stdafx.h"
#include "inttypes.h"
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavutil/avutil.h"
#include <libswscale/swscale.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
#include <libavutil/imgutils.h>
}
#include <iostream>
using namespace std;
int main(int argc, char* argv[])
{
const char* inFileName = "C:\\000227_C1_GAME.avi";
const char* outFileName = "c:\\test.avi";
const char* outFileType = "avi";
av_register_all();
AVFormatContext* inContainer = NULL;
if(avformat_open_input(&inContainer, inFileName, NULL, NULL) < 0)
exit(1);
if(avformat_find_stream_info(inContainer, NULL) < 0)
exit(1);
// Find video stream
int videoStreamIndex = -1;
for (unsigned int i = 0; i < inContainer->nb_streams; ++i)
{
if (inContainer->streams[i] && inContainer->streams[i]->codec &&
inContainer->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoStreamIndex = i;
break;
}
}
if (videoStreamIndex == -1) exit(1);
AVFormatContext* outContainer = NULL;
if(avformat_alloc_output_context2(&outContainer, NULL, outFileType, outFileName) < 0)
exit(1);
// ----------------------------
// Decoder
// ----------------------------
AVStream const *const inStream = inContainer->streams[videoStreamIndex];
AVCodec *const decoder = avcodec_find_decoder(inStream->codec->codec_id);
if(!decoder)
exit(1);
if(avcodec_open2(inStream->codec, decoder, NULL) < 0)
exit(1);
// ----------------------------
// Encoder
// ----------------------------
AVCodec *encoder = avcodec_find_encoder(AV_CODEC_ID_H264);
if(!encoder)
exit(1);
AVStream *outStream = avformat_new_stream(outContainer, encoder);
if(!outStream)
exit(1);
avcodec_get_context_defaults3(outStream->codec, encoder);
// Construct encoder
if(outContainer->oformat->flags & AVFMT_GLOBALHEADER)
outStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
outStream->codec->coder_type = AVMEDIA_TYPE_VIDEO;
outStream->codec->pix_fmt = AV_PIX_FMT_YUV420P;
outStream->codec->width = inStream->codec->width;
outStream->codec->height = inStream->codec->height;
outStream->codec->codec_id = encoder->id;
outStream->codec->bit_rate = 500000;
//outStream->codec->rc_min_rate = 600000;
//outStream->codec->rc_max_rate = 800000;
outStream->codec->time_base.den = 25;
outStream->codec->time_base.num = 1;
outStream->codec->gop_size = 250; // Keyframe interval(=GOP length). Determines maximum distance distance between I-frames
outStream->codec->keyint_min = 25; // minimum GOP size
outStream->codec->max_b_frames = 3;//16; // maximum number of B-frames between non-B-frames
outStream->codec->b_frame_strategy = 1; // decides the best number of B-frames to use. Default mode in x264.
outStream->codec->scenechange_threshold = 40;
outStream->codec->refs = 6; // abillity to reference frames other than the one immediately prior to the current frame. specify how many references can be used.
outStream->codec->qmin = 0;//10;
outStream->codec->qmax = 69;//51;
outStream->codec->qcompress = 0.6;
outStream->codec->max_qdiff = 4;
outStream->codec->i_quant_factor = 1.4;//0.71;
outStream->codec->refs=1;//3;
outStream->codec->chromaoffset = -2;
outStream->codec->thread_count = 1;
outStream->codec->trellis = 1;
outStream->codec->me_range = 16;
outStream->codec->me_method = ME_HEX; //hex
outStream->codec->flags2 |= CODEC_FLAG2_FAST;
outStream->codec->coder_type = 1;
if(outStream->codec->codec_id == AV_CODEC_ID_H264)
{
av_opt_set(outStream->codec->priv_data, "preset", "slow", 0);
}
// Open encoder
if(avcodec_open2(outStream->codec, encoder, NULL) < 0)
exit(1);
// Open output container
if(avio_open(&outContainer->pb, outFileName, AVIO_FLAG_WRITE) < 0)
exit(1);
//close_o
AVFrame *decodedFrame = avcodec_alloc_frame();
if(!decodedFrame)
exit(1);
AVFrame *encodeFrame = avcodec_alloc_frame();
if(!encodeFrame)
exit(1);
encodeFrame->format = outStream->codec->pix_fmt;
encodeFrame->width = outStream->codec->width;
encodeFrame->height = outStream->codec->height;
if(av_image_alloc(encodeFrame->data, encodeFrame->linesize,
outStream->codec->width, outStream->codec->height,
outStream->codec->pix_fmt, 1) < 0)
exit(1);
av_dump_format(inContainer, 0, inFileName,0);
//Write header to ouput container
avformat_write_header(outContainer, NULL);
AVPacket decodePacket, encodedPacket;
int got_frame, len;
while(av_read_frame(inContainer, &decodePacket)>=0)
{
if (decodePacket.stream_index == videoStreamIndex)
{
len = avcodec_decode_video2(inStream->codec, decodedFrame, &got_frame, &decodePacket);
if(len < 0)
exit(1);
if(got_frame)
{
av_init_packet(&encodedPacket);
encodedPacket.data = NULL;
encodedPacket.size = 0;
if(avcodec_encode_video2(outStream->codec, &encodedPacket, decodedFrame, &got_frame) < 0)
exit(1);
if(got_frame)
{
if (outStream->codec->coded_frame->key_frame)
encodedPacket.flags |= AV_PKT_FLAG_KEY;
encodedPacket.stream_index = outStream->index;
if(av_interleaved_write_frame(outContainer, &encodedPacket) < 0)
exit(1);
av_free_packet(&encodedPacket);
}
}
}
av_free_packet(&decodePacket);
}
av_write_trailer(outContainer);
avio_close(outContainer->pb);
avcodec_free_frame(&encodeFrame);
avcodec_free_frame(&decodedFrame);
avformat_free_context(outContainer);
av_close_input_file(inContainer);
return 0;
}
The problem was with PTS and DTS of the packet. Before writing the packet to output( before av_interleaved_write_frame command) set PTS and DTS like this
if (encodedPacket.pts != AV_NOPTS_VALUE)
encodedPacket.pts = av_rescale_q(encodedPacket.pts, outStream->codec->time_base, outStream->time_base);
if (encodedPacket.dts != AV_NOPTS_VALUE)
encodedPacket.dts = av_rescale_q(encodedPacket.dts, outStream->codec->time_base, outStream->time_base);