How to write silent audio data to an audio stream? - c++

I am writing some images to an AVStream and after that I am reading an mp3 file and writing it to an diffrent AVStream. The propblem is that the audio stream is a bit shorter then the video stream, so if I add more images and another audio file the audio is not in sync with the video any more. So my idear was to write silent audio data to the audio stream before I write another audio file to the audio stream. But I can not figure out how to write the silent data to the audio stream.
I found this post but I don't know how to calculate the packet size or how to write the packet to the audio stream.
This was my most "successfully" approach so far, but the result (audioTest(0xff).mp4) is far from silent.
/* set up the audio convert context */
libffmpeg::SwrContext* audioConvertContext = libffmpeg::swr_alloc();
libffmpeg::av_opt_set_int(audioConvertContext, "in_channel_count", data->audioCodecContext->channels, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "out_channel_count", data->audioCodecContext->channels, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "in_channel_layout", data->audioCodecContext->channel_layout, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "out_channel_layout", data->audioCodecContext->channel_layout, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "in_sample_rate", data->audioCodecContext->sample_rate, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "out_sample_rate", data->audioCodecContext->sample_rate, 0);
libffmpeg::av_opt_set_sample_fmt(audioConvertContext, "in_sample_fmt", libffmpeg::AV_SAMPLE_FMT_S16, 0);
libffmpeg::av_opt_set_sample_fmt(audioConvertContext, "out_sample_fmt", data->audioCodecContext->sample_fmt, 0);
int ret = libffmpeg::swr_init(audioConvertContext);
if (ret < 0)
{
Helper::ThrowError("Failed to allocate audio reformat context.", ret);
}
/* set up silent frame */
libffmpeg::AVFrame* silentFrame = libffmpeg::av_frame_alloc();
if (!silentFrame)
{
Helper::ThrowError("Failed to allocate audio encode frame.");
}
silentFrame->nb_samples = data->audioCodecContext->frame_size;
silentFrame->format = data->audioCodecContext->sample_fmt;
silentFrame->channel_layout = data->audioCodecContext->channel_layout;
silentFrame->channels = data->audioCodecContext->channels;
silentFrame->sample_rate = data->audioCodecContext->sample_rate;
/* alloc the frame buffer */
ret = libffmpeg::av_frame_get_buffer(silentFrame, 0);
if (ret < 0)
{
Helper::ThrowError("Could not allocate audio data buffers.");
}
int got_output;
int samples_count;
double duration = 4 * (double)data->audioStream->time_base.den / (double)data->audioStream->time_base.num;
while (av_stream_get_end_pts(data->audioStream) < duration)
{
libffmpeg::AVPacket pkt;
libffmpeg::av_init_packet(&pkt);
ret = libffmpeg::av_frame_make_writable(silentFrame);
if (ret < 0)
{
Helper::ThrowError("Could not make frame writable.");
}
for (int j = 0; j < data->audioCodecContext->frame_size; j++)
{
silentFrame->data[0][2 * j] = 0xff;
for (int k = 1; k < data->audioCodecContext->channels; k++)
{
silentFrame->data[0][2 * j + k] = silentFrame->data[0][2 * j];
}
}
int dst_nb_samples = libffmpeg::av_rescale_rnd(
libffmpeg::swr_get_delay(audioConvertContext, data->audioCodecContext->sample_rate) + silentFrame->nb_samples,
data->audioCodecContext->sample_rate, data->audioCodecContext->sample_rate,
libffmpeg::AV_ROUND_UP);
ret = libffmpeg::swr_convert(
audioConvertContext,
silentFrame->data, dst_nb_samples,
(const libffmpeg::uint8_t * *) & silentFrame->data,
silentFrame->nb_samples);
if (ret < 0)
{
Helper::ThrowError("Error while converting audio frame.", ret);
}
silentFrame->pts = libffmpeg::av_rescale_q(samples_count, libffmpeg::AVRational{ 1, data->audioCodecContext->sample_rate }, data->audioCodecContext->time_base);
samples_count += dst_nb_samples;
ret = libffmpeg::avcodec_encode_audio2(data->audioCodecContext, &pkt, silentFrame, &got_output);
if (ret < 0)
{
Helper::ThrowError("Error while encoding audio frame.", ret);
}
if (got_output)
{
pkt.stream_index = data->audioStream->index;
if (ret = av_write_frame(data->formatContext, &pkt))
{
Helper::ThrowError("Error while writing audio frame.", ret);
}
libffmpeg::av_packet_unref(&pkt);
}
}
libffmpeg::av_frame_free(&silentFrame);

The mistake was how I wrote to the array. I am not that used to c++ so my solution maybe a bit messy, but at least it works now.
/* set up the audio convert context */
libffmpeg::SwrContext* audioConvertContext = libffmpeg::swr_alloc();
libffmpeg::av_opt_set_int(audioConvertContext, "in_channel_count", data->audioCodecContext->channels, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "out_channel_count", data->audioCodecContext->channels, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "in_channel_layout", data->audioCodecContext->channel_layout, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "out_channel_layout", data->audioCodecContext->channel_layout, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "in_sample_rate", data->audioCodecContext->sample_rate, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "out_sample_rate", data->audioCodecContext->sample_rate, 0);
libffmpeg::av_opt_set_sample_fmt(audioConvertContext, "in_sample_fmt", libffmpeg::AV_SAMPLE_FMT_S16, 0);
libffmpeg::av_opt_set_sample_fmt(audioConvertContext, "out_sample_fmt", data->audioCodecContext->sample_fmt, 0);
int ret = libffmpeg::swr_init(audioConvertContext);
if (ret < 0)
{
Helper::ThrowError("Failed to allocate audio reformat context.", ret);
}
/* set up silent frame */
libffmpeg::AVFrame* silentFrame = libffmpeg::av_frame_alloc();
if (!silentFrame)
{
Helper::ThrowError("Failed to allocate audio encode frame.");
}
silentFrame->nb_samples = data->audioCodecContext->frame_size;
silentFrame->format = data->audioCodecContext->sample_fmt;
silentFrame->channel_layout = data->audioCodecContext->channel_layout;
silentFrame->channels = data->audioCodecContext->channels;
silentFrame->sample_rate = data->audioCodecContext->sample_rate;
/* alloc the frame buffer */
ret = libffmpeg::av_frame_get_buffer(silentFrame, 0);
if (ret < 0)
{
Helper::ThrowError("Could not allocate audio data buffers.");
}
libffmpeg::AVPacket* pkt = libffmpeg::av_packet_alloc();
if (!pkt)
{
Helper::ThrowError("could not allocate the packet.");
}
void* buffer = malloc(data->audioCodecContext->frame_size * data->audioCodecContext->channels * 16);
for (int i = 0; i < data->audioCodecContext->frame_size * data->audioCodecContext->channels * 2; i++)
{
*((int*)buffer + i) = 0x0;
}
int got_output;
int samples_count;
double duration = 4 * (double)data->audioStream->time_base.den / (double)data->audioStream->time_base.num;
while (av_stream_get_end_pts(data->audioStream) < duration)
{
libffmpeg::AVPacket pkt;
libffmpeg::av_init_packet(&pkt);
ret = libffmpeg::av_frame_make_writable(silentFrame);
if (ret < 0)
{
Helper::ThrowError("Could not make frame writable.");
}
silentFrame->data[0] = (libffmpeg::uint8_t*) buffer;
int dst_nb_samples = libffmpeg::av_rescale_rnd(
libffmpeg::swr_get_delay(audioConvertContext, data->audioCodecContext->sample_rate) + silentFrame->nb_samples,
data->audioCodecContext->sample_rate, data->audioCodecContext->sample_rate,
libffmpeg::AV_ROUND_UP);
ret = libffmpeg::swr_convert(
audioConvertContext,
silentFrame->data, dst_nb_samples,
(const libffmpeg::uint8_t * *) & silentFrame->data,
silentFrame->nb_samples);
if (ret < 0)
{
Helper::ThrowError("Error while converting audio frame.", ret);
}
silentFrame->pts = libffmpeg::av_rescale_q(samples_count, libffmpeg::AVRational{ 1, data->audioCodecContext->sample_rate }, data->audioCodecContext->time_base);
samples_count += dst_nb_samples;
ret = libffmpeg::avcodec_encode_audio2(data->audioCodecContext, &pkt, silentFrame, &got_output);
if (ret < 0)
{
Helper::ThrowError("Error while encoding audio frame.", ret);
}
if (got_output)
{
pkt.stream_index = data->audioStream->index;
if (ret = av_write_frame(data->formatContext, &pkt))
{
Helper::ThrowError("Error while writing audio frame.", ret);
}
libffmpeg::av_packet_unref(&pkt);
}
}
free(buffer);
libffmpeg::av_frame_free(&silentFrame);

Related

How to use FFmpeg API overlay filter in C / C++

I have a C++ project which creates 7/24 WebTV like RTMP stream and allows operations like changing current content on runtime, seeking content, looping through a playlist which is constructed by a json array, also supports changing whole playlist on runtime.
Currently i am reading H264 and AAC encoded packets from mp4 files then sending them to destination RTMP server after adjusting their PTS & DTS values without any encoding or decoding.
But i want to apply overlay images to raw frames using FFmpeg "overlay" filter after decoding H264 packets. I looked at sample which came with FFmpeg examples ;
#define _XOPEN_SOURCE 600 /* for usleep */
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
const char *filter_descr = "scale=78:24,transpose=cclock";
/* other way:
scale=78:24 [scl]; [scl] transpose=cclock // assumes "[in]" and "[out]" to be input output pads respectively
*/
static AVFormatContext *fmt_ctx;
static AVCodecContext *dec_ctx;
AVFilterContext *buffersink_ctx;
AVFilterContext *buffersrc_ctx;
AVFilterGraph *filter_graph;
static int video_stream_index = -1;
static int64_t last_pts = AV_NOPTS_VALUE;
static int open_input_file(const char *filename)
{
int ret;
AVCodec *dec;
if ((ret = avformat_open_input(&fmt_ctx, filename, NULL, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot open input file\n");
return ret;
}
if ((ret = avformat_find_stream_info(fmt_ctx, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot find stream information\n");
return ret;
}
/* select the video stream */
ret = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &dec, 0);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot find a video stream in the input file\n");
return ret;
}
video_stream_index = ret;
/* create decoding context */
dec_ctx = avcodec_alloc_context3(dec);
if (!dec_ctx)
return AVERROR(ENOMEM);
avcodec_parameters_to_context(dec_ctx, fmt_ctx->streams[video_stream_index]->codecpar);
/* init the video decoder */
if ((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot open video decoder\n");
return ret;
}
return 0;
}
static int init_filters(const char *filters_descr)
{
char args[512];
int ret = 0;
const AVFilter *buffersrc = avfilter_get_by_name("buffer");
const AVFilter *buffersink = avfilter_get_by_name("buffersink");
AVFilterInOut *outputs = avfilter_inout_alloc();
AVFilterInOut *inputs = avfilter_inout_alloc();
AVRational time_base = fmt_ctx->streams[video_stream_index]->time_base;
enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE };
filter_graph = avfilter_graph_alloc();
if (!outputs || !inputs || !filter_graph) {
ret = AVERROR(ENOMEM);
goto end;
}
/* buffer video source: the decoded frames from the decoder will be inserted here. */
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt,
time_base.num, time_base.den,
dec_ctx->sample_aspect_ratio.num, dec_ctx->sample_aspect_ratio.den);
ret = avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in",
args, NULL, filter_graph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer source\n");
goto end;
}
/* buffer video sink: to terminate the filter chain. */
ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out",
NULL, NULL, filter_graph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer sink\n");
goto end;
}
ret = av_opt_set_int_list(buffersink_ctx, "pix_fmts", pix_fmts,
AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output pixel format\n");
goto end;
}
/*
* Set the endpoints for the filter graph. The filter_graph will
* be linked to the graph described by filters_descr.
*/
/*
* The buffer source output must be connected to the input pad of
* the first filter described by filters_descr; since the first
* filter input label is not specified, it is set to "in" by
* default.
*/
outputs->name = av_strdup("in");
outputs->filter_ctx = buffersrc_ctx;
outputs->pad_idx = 0;
outputs->next = NULL;
/*
* The buffer sink input must be connected to the output pad of
* the last filter described by filters_descr; since the last
* filter output label is not specified, it is set to "out" by
* default.
*/
inputs->name = av_strdup("out");
inputs->filter_ctx = buffersink_ctx;
inputs->pad_idx = 0;
inputs->next = NULL;
if ((ret = avfilter_graph_parse_ptr(filter_graph, filters_descr,
&inputs, &outputs, NULL)) < 0)
goto end;
if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0)
goto end;
end:
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
return ret;
}
static void display_frame(const AVFrame *frame, AVRational time_base)
{
int x, y;
uint8_t *p0, *p;
int64_t delay;
if (frame->pts != AV_NOPTS_VALUE) {
if (last_pts != AV_NOPTS_VALUE) {
/* sleep roughly the right amount of time;
* usleep is in microseconds, just like AV_TIME_BASE. */
delay = av_rescale_q(frame->pts - last_pts,
time_base, AV_TIME_BASE_Q);
if (delay > 0 && delay < 1000000)
usleep(delay);
}
last_pts = frame->pts;
}
/* Trivial ASCII grayscale display. */
p0 = frame->data[0];
puts("\033c");
for (y = 0; y < frame->height; y++) {
p = p0;
for (x = 0; x < frame->width; x++)
putchar(" .-+#"[*(p++) / 52]);
putchar('\n');
p0 += frame->linesize[0];
}
fflush(stdout);
}
int main(int argc, char **argv)
{
int ret;
AVPacket packet;
AVFrame *frame;
AVFrame *filt_frame;
if (argc != 2) {
fprintf(stderr, "Usage: %s file\n", argv[0]);
exit(1);
}
frame = av_frame_alloc();
filt_frame = av_frame_alloc();
if (!frame || !filt_frame) {
perror("Could not allocate frame");
exit(1);
}
if ((ret = open_input_file(argv[1])) < 0)
goto end;
if ((ret = init_filters(filter_descr)) < 0)
goto end;
/* read all packets */
while (1) {
if ((ret = av_read_frame(fmt_ctx, &packet)) < 0)
break;
if (packet.stream_index == video_stream_index) {
ret = avcodec_send_packet(dec_ctx, &packet);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while sending a packet to the decoder\n");
break;
}
while (ret >= 0) {
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
} else if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while receiving a frame from the decoder\n");
goto end;
}
frame->pts = frame->best_effort_timestamp;
/* push the decoded frame into the filtergraph */
if (av_buffersrc_add_frame_flags(buffersrc_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while feeding the filtergraph\n");
break;
}
/* pull filtered frames from the filtergraph */
while (1) {
ret = av_buffersink_get_frame(buffersink_ctx, filt_frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
if (ret < 0)
goto end;
display_frame(filt_frame, buffersink_ctx->inputs[0]->time_base);
av_frame_unref(filt_frame);
}
av_frame_unref(frame);
}
}
av_packet_unref(&packet);
}
end:
avfilter_graph_free(&filter_graph);
avcodec_free_context(&dec_ctx);
avformat_close_input(&fmt_ctx);
av_frame_free(&frame);
av_frame_free(&filt_frame);
if (ret < 0 && ret != AVERROR_EOF) {
fprintf(stderr, "Error occurred: %s\n", av_err2str(ret));
exit(1);
}
exit(0);
}
That sample uses these filters ;
"scale=78:24,transpose=cclock"
I compiled and run it with a sample video file but it just outputs fancy characters to console, the code block given below is responsible for this ;
/* Trivial ASCII grayscale display. */
p0 = frame->data[0];
puts("\033c");
for (y = 0; y < frame->height; y++) {
p = p0;
for (x = 0; x < frame->width; x++)
putchar(" .-+#"[*(p++) / 52]);
putchar('\n');
p0 += frame->linesize[0];
}
fflush(stdout);
I have no issues with Encoding & Decoding, i just don't know how to apply "overlay" filter. Are there any tutorials out there demonstrate how to use "overlay" filter?
Just like in the example, except you use "overlay".
snprintf(args, sizeof(args), args here...);
avfilter_graph_create_filter(sink, avfilter_get_by_name("overlay"), nullptr, nullptr, arg, graph);
Then you need TWO create two source pads. i.e.
avfilter_graph_create_filter(sourceX, avfilter_get_by_name("buffer"), nullptr, args, nullptr, m_graph);
and one sink pad. Then feed one source with the video frame and other second with the image to overlay
Following code fragments will be helpful..
char args[512];
int ret = 0;
const AVFilter *bufferSrc = avfilter_get_by_name("buffer");
const AVFilter *bufferOvr = avfilter_get_by_name("buffer");
const AVFilter *bufferSink = avfilter_get_by_name("buffersink");
const AVFilter *ovrFilter = avfilter_get_by_name("overlay");
const AVFilter *colorFilter = avfilter_get_by_name("colorchannelmixer");
enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE };
fFilterGraph = avfilter_graph_alloc();
if (!fFilterGraph) {
ret = AVERROR(ENOMEM);
goto end;
}
/* buffer video source: the decoded frames from the decoder will be inserted here. */
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
decCtx->width, decCtx->height, decCtx->pix_fmt,
fTimeBase.num, fTimeBase.den,
decCtx->sample_aspect_ratio.num, decCtx->sample_aspect_ratio.den);
ret = avfilter_graph_create_filter(&fBufSrc0Ctx, bufferSrc, "in0",
args, NULL, fFilterGraph);
if (ret < 0)
goto end;
/* buffer video overlay source: the overlayed frame from the file will be inserted here. */
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
ovrCtx->width, ovrCtx->height, ovrCtx->pix_fmt,
fTimeBase.num, fTimeBase.den,
ovrCtx->sample_aspect_ratio.num, ovrCtx->sample_aspect_ratio.den);
ret = avfilter_graph_create_filter(&fBufSrc1Ctx, bufferOvr, "in1",
args, NULL, fFilterGraph);
if (ret < 0)
goto end;
/* color filter */
snprintf(args, sizeof(args), "aa=%f", (float)fWatermarkOpacity / 10.0);
ret = avfilter_graph_create_filter(&fColorFilterCtx, colorFilter, "colorFilter",
args, NULL, fFilterGraph);
if (ret < 0)
goto end;
/* overlay filter */
switch (fWatermarkPos) {
case 0:
/* Top left */
snprintf(args, sizeof(args), "x=%d:y=%d:repeatlast=1",
fWatermarkOffset, fWatermarkOffset);
break;
case 1:
/* Top right */
snprintf(args, sizeof(args), "x=W-w-%d:y=%d:repeatlast=1",
fWatermarkOffset, fWatermarkOffset);
break;
case 3:
/* Bottom left */
snprintf(args, sizeof(args), "x=%d:y=H-h-%d:repeatlast=1",
fWatermarkOffset, fWatermarkOffset);
break;
case 4:
/* Bottom right */
snprintf(args, sizeof(args), "x=W-w-%d:y=H-h-%d:repeatlast=1",
fWatermarkOffset, fWatermarkOffset);
break;
case 2:
default:
/* Center */
snprintf(args, sizeof(args), "x=(W-w)/2:y=(H-h)/2:repeatlast=1");
break;
}
ret = avfilter_graph_create_filter(&fOvrFilterCtx, ovrFilter, "overlay",
args, NULL, fFilterGraph);
if (ret < 0)
goto end;
/* buffer sink - destination of the final video */
ret = avfilter_graph_create_filter(&fBufSinkCtx, bufferSink, "out",
NULL, NULL, fFilterGraph);
if (ret < 0)
goto end;
ret = av_opt_set_int_list(fBufSinkCtx, "pix_fmts", pix_fmts,
AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN);
if (ret < 0)
goto end;
/*
* Link all filters..
*/
avfilter_link(fBufSrc0Ctx, 0, fOvrFilterCtx, 0);
avfilter_link(fBufSrc1Ctx, 0, fColorFilterCtx, 0);
avfilter_link(fColorFilterCtx, 0, fOvrFilterCtx, 1);
avfilter_link(fOvrFilterCtx, 0, fBufSinkCtx, 0);
if ((ret = avfilter_graph_config(fFilterGraph, NULL)) < 0)
goto end;
end:

FFMpeg: extracting audio frames with avcodec creates noise

In my Android app, I implemented the FFMpeg library, and try to use it so I can extract audio samples from an audio file on the fly.
Here is what I did (I simplified the code here so it's easier to read):
AVPacket packet;
AVCodecContext *codecContext = NULL;
AVFormatContext *formatContext;
AVFrame *frame = NULL;
SwrContext *swrContext;
int audio_stream_index = -1;
int ret;
uint8_t *localBuffer;
int FFMpegPlayer::createFFmpeg(const char *filename)
{
int ret;
AVCodec *dec;
frame = av_frame_alloc();
av_register_all();
avformat_open_input(&formatContext, filename, NULL, NULL))
avformat_find_stream_info(formatContext, NULL))
// select the audio stream
audio_stream_index = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &dec, 0);
// create decoding context
codecContext = avcodec_alloc_context3(dec);
avcodec_parameters_to_context(codecContext, formatContext->streams[audio_stream_index]->codecpar);
av_opt_set_int(codecContext, "refcounted_frames", 1, 0);
// init the audio decoder
avcodec_open2(codecContext, dec, NULL))
swrContext = swr_alloc();
// we assume here that the audio file is a 44100 Hz stereo audio file
localBuffer = (uint8_t *) av_malloc(44100 * 2);
swr_alloc_set_opts(swrContext, AV_CH_LAYOUT_STEREO, AV_SAMPLE_FMT_FLT, codecContext->sample_rate,
codecContext->channel_layout, codecContext->sample_fmt, codecContext->sample_rate, 0,
NULL);
swr_init(swrContext);
return 0;
}
void FFMpegPlayer::getPcmFloat(float *buffer)
{
// init :
int i, ch, dataSize;
bool extraxted = false;
float sample = 0;
// extract :
while (av_read_frame(formatContext, &packet) >= 0 && !extraxted)
{
if (packet.stream_index == audio_stream_index)
{
// send the packet with the compressed data to the decoder
ret = avcodec_send_packet(codecContext, &packet);
// read all the output frames (in general there may be any number of them
while (ret >= 0)
{
ret = avcodec_receive_frame(codecContext, frame);
if (ret == AVERROR(EAGAIN))
{
LOGW("AVERROR(EAGAIN)\n");
break;
}
else if (ret == AVERROR_EOF)
{
LOGW("AVERROR_EOF\n");
break;
}
dataSize = av_get_bytes_per_sample(codecContext->sample_fmt);
swr_convert(swrContext, &localBuffer, 44100 * 2, (const uint8_t **) frame->data, frame->nb_samples);
int a = 0;
for (i = 0; i < frame->nb_samples; i++)
{
for (ch = 0; ch < codecContext->channels; ch++)
{
memcpy(&sample, &localBuffer[(codecContext->channels * i + ch) * dataSize], dataSize);
buffer[a] = sample;
a++;
}
}
// exit extract:
extraxted = true;
}
}
}
}
Anytime I need audio samples, I call the getPcmFloat() function.
Thanks to that code, I can clearly listen to the audio file.
The problem is: I have some crackling in the sound, and I have no idea where it comes from, or how to fix it.
Does anyone know how to get the exact frames whithout glitches?
Thanks for your help.

FFMPEG H264 encode each single image

i encode currently a QImage from RGB888 to H264, but i want to encode each image (even if this is not the perfect way) by itself.
Im able to encode the image, but its needed to send the same image 46 times. And i dont know what i do wrong (probably wrong config of the encode, but i cannot find the issue there).
Afterwards i decode this image and then convert it back to a QImage. I do this only for testing some other code.
avcodec_register_all();
AVCodec *nVidiaCodec = avcodec_find_encoder_by_name("h264_nvenc");
if (!nVidiaCodec)
{
return false;
}
AVCodecContext* av_codec_context_ = NULL;
av_codec_context_ = avcodec_alloc_context3(nVidiaCodec);
if (!av_codec_context_)
{
return false;
}
av_codec_context_->width = dst->width;
av_codec_context_->height = dst->height;
av_codec_context_->pix_fmt = AV_PIX_FMT_YUV420P;
av_codec_context_->gop_size = 1;
av_codec_context_->keyint_min = 0;
av_codec_context_->scenechange_threshold = 0;
av_codec_context_->bit_rate = 8000000;
av_codec_context_->time_base.den = 1;
av_codec_context_->time_base.num = 1;
av_codec_context_->refs = 0;
av_codec_context_->qmin = 1;
av_codec_context_->qmax = 1;
av_codec_context_->b_frame_strategy = 0;
av_codec_context_->max_b_frames = 0;
av_codec_context_->thread_count = 1;
av_opt_set(av_codec_context_, "preset", "slow", 0);
av_opt_set(av_codec_context_, "tune", "zerolatency", 0);
int ret = avcodec_open2(av_codec_context_, nVidiaCodec, NULL);
if (0 > ret)
{
return false;
}
AVFrame *picture = av_frame_alloc();
picture->format = AV_PIX_FMT_RGB24;
picture->width = dst->width;
picture->height = dst->height;
ret = avpicture_fill((AVPicture *)picture, imgSrc.bits(), AV_PIX_FMT_RGB24, dst->width, dst->height);
if (0 > ret)
{
return false;
}
AVFrame *tmp_picture = av_frame_alloc();
tmp_picture->format = AV_PIX_FMT_YUV420P;
tmp_picture->width = dst->width;
tmp_picture->height = dst->height;
ret = av_frame_get_buffer(tmp_picture, 32);
SwsContext *img_convert_ctx = sws_getContext(av_codec_context_->width, av_codec_context_->height, AV_PIX_FMT_RGB24, av_codec_context_->width, av_codec_context_->height, av_codec_context_->pix_fmt, SWS_BICUBIC, NULL, NULL, NULL);
if (!img_convert_ctx)
{
return false;
}
ret = sws_scale(img_convert_ctx, picture->data, picture->linesize, 0, av_codec_context_->height, tmp_picture->data, tmp_picture->linesize);
if (0 > ret)
{
return false;
}
ret = avcodec_send_frame(av_codec_context_, tmp_picture);
if (0 > ret)
{
return false;
}
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
do
{
ret = avcodec_receive_packet(av_codec_context_, &pkt);
if (ret == 0)
{
break;
}
else if ((ret < 0) && (ret != AVERROR(EAGAIN)))
{
return false;
}
else if (ret == AVERROR(EAGAIN))
{
ret = avcodec_send_frame(av_codec_context_, tmp_picture);
if (0 > ret)
{
return false;
}
}
} while (ret == 0);
// the do while is called 46 times, then i get the packet, but i want to get the packet at the first call
It would be very nice if you can help me.
Thanks guys.
I assume you just want to encode a single frame. You need to flush the encoder after you have sent your single uncompressed frame by sending NULL instead of a valid buffer.
int result = 0;
// encoder init
// send one uncompressed frame
result = avcodec_send_frame(av_codec_context_, tmp_picture);
if (result < 0) return false;
// send NULL to indicate flushing
result = avcodec_send_frame(av_codec_context_, NULL);
if (result < 0) return false;
while (result != AVERROR_EOF)
{
result = avcodec_receive_packet(av_codec_context_, &pkt);
if (!result)
{
// you should have your encoded frame; do something with it
}
}

FFMPEG (C++) convert & compress a single image out of buffer

i try to encode (with compression) and decode (without compression) a image with ffmpeg. But if i want to get the sent image back with avcodec_receive_packet i get only the error AVERROR(EAGAIN).
It doesnt matter what i change ... allways AVERROR(EAGAIN) is the outcome. Is it maybe a problem of sending just one single frame to the encoder? And if yes, how to fix it?
Code (only relevant stuff shown):
avcodec_register_all();
/* ------ init codec ------------------*/
AVCodec *codec;
codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!codec)
{
print("compressH264, could not find decoder:\"AV_CODEC_ID_H264\"!!!");
return false;
}
AVCodec *nVidiaCodec = avcodec_find_encoder_by_name("h264_nvenc");
if (!nVidiaCodec)
{
print("err");
}
/* ------ ------------ ------------------*/
/* ------ init context ------------------*/
AVCodecContext* av_codec_context_ = NULL;
av_codec_context_ = avcodec_alloc_context3(nVidiaCodec);
if (!av_codec_context_)
{
print("compressH264, avcodec_alloc_context3 failed!!!");
return false;
}
int w = imgSrc.width();
int h = imgSrc.height();
if ((w % 2) != 0)
{
++w;
}
if ((h % 2) != 0)
{
++h;
}
av_codec_context_->width = w;
av_codec_context_->height = h;
av_codec_context_->pix_fmt = AV_PIX_FMT_YUV420P;
av_codec_context_->gop_size = 1;
av_codec_context_->max_b_frames = 1;
av_codec_context_->bit_rate = 400000;
av_codec_context_->time_base.den = 1;
av_codec_context_->time_base.num = 1;
av_opt_set(av_codec_context_->priv_data, "preset", "slow", 0);
int ret = avcodec_open2(av_codec_context_, nVidiaCodec, NULL);
if (0 > ret)
{
print("compressH264, could not open codec context for decoder:\"AV_CODEC_ID_H264\"!!!");
return false;
}
AVFrame *picture = av_frame_alloc();
picture->format = AV_PIX_FMT_RGB24;
picture->width = w;
picture->height = h;
ret = avpicture_fill((AVPicture *)picture, imgSrc.bits(), AV_PIX_FMT_RGB24, w, h);
if (0 > ret)
{
print("compressH264, avpicture_fill - failed!!!");
return false;
}
AVFrame *tmp_picture = av_frame_alloc();
tmp_picture->format = AV_PIX_FMT_YUV420P;
tmp_picture->width = w;
tmp_picture->height = h;
ret = av_frame_get_buffer(tmp_picture, 32);
SwsContext *img_convert_ctx = sws_getContext(av_codec_context_->width, av_codec_context_->height, AV_PIX_FMT_RGB24, av_codec_context_->width, av_codec_context_->height, av_codec_context_->pix_fmt, SWS_BICUBIC, NULL, NULL, NULL);
ret = sws_scale(img_convert_ctx, picture->data, picture->linesize, 0, av_codec_context_->height, tmp_picture->data, tmp_picture->linesize);
int h264Size = avpicture_get_size(AV_PIX_FMT_YUV420P, w, h);
ret = avcodec_send_frame(av_codec_context_, tmp_picture);
if (0 > ret)
{
char err[AV_ERROR_MAX_STRING_SIZE];
av_make_error_string(err, AV_ERROR_MAX_STRING_SIZE, ret);
print("compressH264, avcodec_send_frame: %s", err);
}
AVPacket *pkt = av_packet_alloc();
while (ret >= 0)
{
ret = avcodec_receive_packet(av_codec_context_, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
{
break;
}
else if (ret < 0)
{
fprintf(stderr, "Error during encoding\n");
exit(1);
}
av_packet_unref(pkt);
}
print("success");
Everything works well until:
- avcodec_receive_packet ... i get all time the error AVERROR(EAGAIN).
I can start decoding just if i have the compressed image.
Thanks for your help guys.
Edit:
If i do now the following code, i get a packet and ret == 0, but i have to send 46 times the same image ... for me this makes no sence.
do
{
ret = avcodec_receive_packet(av_codec_context_, &pkt);
if (ret == 0)
{
break;
}
else if ((ret < 0) && (ret != AVERROR(EAGAIN)))
{
coutF("error");
}
else if (ret == AVERROR(EAGAIN))
{
ret = avcodec_send_frame(av_codec_context_, tmp_picture);
if (0 > ret)
{
char err[AV_ERROR_MAX_STRING_SIZE];
av_make_error_string(err, AV_ERROR_MAX_STRING_SIZE, ret);
coutFRed("compressH264, avcodec_send_frame: %s", err);
}
coutF("cnt:%d", ++cnt);
}
} while (ret == 0);
Edit:
Good morning,
after more invest, i got the issue. I have to send the same frame a lot of time, because of the keyframe stuff for h264. The question now is, if it is possible to remove the h264 standart stuff from the encoder and just let FFMPEG convert one single frame.
I am not sure but following an ffmpeg example it seems that that just means it is done and you should return like they do in this code snippet:
/* if no more frames for output - returns AVERROR(EAGAIN)
* if flushed and no more frames for output - returns AVERROR_EOF
* rewrite retcode to 0 to show it as normal procedure completion
*/
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
ret = 0;
In their comments they seem to imply that it signals "normal procedure completion"
the answer for this issue is this:
do
{
ret = avcodec_receive_packet(av_codec_context_, &pkt);
if (ret == 0)
{
break;
}
else if ((ret < 0) && (ret != AVERROR(EAGAIN)))
{
return false;
}
else if (ret == AVERROR(EAGAIN))
{
ret = avcodec_send_frame(av_codec_context_, NULL);
if (0 > ret)
{
return false;
}
}
} while (ret == 0);
The NULL frame will flush the buffer and we are able to get the encoded frame.

Decoding H264 Stream Fails Eventually

After many weeks trying to connect to my home cctv in my c++/Qt project using libav on my raspberry pi (armv7) I am able to correctly display my camera for a good while (at least 20 minutes) with hardly any NAL warnings in the console (but some warnings about invalid NAL unit). However, eventually I will just start to see just a grey screen and lots of NAL warnings in the console and I have to close and reopen the connection to get my camera displaying correctly. I have searched the internet many times to make sure I am understanding av_parser_parse2 correctly and I think I am.
I have pasted my 3 main functions below in the hope that somebody can tell me - is there more to decoding a h264 stream using libav than I understand currently?
Thank you.
void MainWindow::playStreamToScreen(MainWindow* app)
{
avcodec_register_all();
codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!codec) {
qDebug() << "codec not found\n";
return;
}
c = avcodec_alloc_context3(codec);
/* if(codec->capabilities & CODEC_CAP_TRUNCATED)
c->flags |= CODEC_FLAG_TRUNCATED;
if(codec->capabilities & CODEC_FLAG2_CHUNKS)
c->flags |= CODEC_FLAG2_CHUNKS;*//* We may send incomplete frames */
picture = av_frame_alloc();
pFrameRGB=av_frame_alloc();
int numBytes=avpicture_get_size(pix_fmt_dst, myWidth, myHeight);
buffer= (uint8_t*)malloc(numBytes);
if(avpicture_fill((AVPicture *)pFrameRGB, buffer, pix_fmt_dst,
myWidth, myHeight) < 0)
{
qDebug() << "failed fill picture";
}
if (avcodec_open2(c, codec, NULL) < 0) {
qDebug() << "could not open codec\n";
return;
}
frame = 0;
if(parser == NULL)parser = av_parser_init(AV_CODEC_ID_H264);
if(parser == NULL)
{
qDebug() << "after init parser is null";
return;
}
parser->flags |= PARSER_FLAG_ONCE;
struct timeval t;
float inv_fps2;// = 4;
int ii = 0;
inv_fps2 = 1e6/9.98;//av_q2d(c->time_base);
qDebug() << "before timebase";
c->time_base = (AVRational){1,10};
c->bit_rate = 251000;
c->width = 352;
c->height = 288;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if(img_convert_ctx == NULL)
{
img_convert_ctx = sws_getCachedContext ( img_convert_ctx, c->width, c->height,
AV_PIX_FMT_YUV420P, myWidth, myHeight, AV_PIX_FMT_RGB555,
SWS_LANCZOS | SWS_ACCURATE_RND , NULL, NULL, NULL );
}
running = true;
int retval;
gettimeofday(&t, 0);
}
void MainWindow::display_frame(AVCodecContext* cc, AVFrame *frame, SwsContext* img_convert_ctx, MainWindow* app)
{
sws_scale ( img_convert_ctx, frame->data, frame->linesize, 0,
288, pFrameRGB->data, pFrameRGB->linesize );
app->currentImage = QImage(pFrameRGB->data[0], app->myWidth, app->myHeight, pFrameRGB->linesize[0], QImage::Format_RGB555) .copy();// - See more at: http://www.mzan.com/article/30784549-best-simplest-way-to-display-ffmpeg-frames-in-qt5-solved.shtml#sthash.RvAFLQsc.dpuf
QImage img2 = app->currentImage.scaled(app->width(), app->height(), Qt::KeepAspectRatio);
QMetaObject::invokeMethod(app->myLabel, "setImage", Qt::QueuedConnection, Q_ARG(QImage, img2));
app->myLabel->setFixedSize(app->width(), app->height());
}
void MainWindow::socketReadyRead()
{
if(connectingToLive && i == 0)
{
qDebug() << "starting live stream";
i++;
this->playStreamToScreen(this);
}
else if(connectingToLive && i > 0)
{
QByteArray temp;
int inbuf_start = 0;
int inbuf_len = 0;
temp.append(socket->readAll());//reads from network here
inbuf_len = temp.size();
inbuf_start = 0;
int out_size;
while (inbuf_len)
{
av_init_packet(&packet2);
packet2.data = 0;
packet2.size = 0;
len = av_parser_parse2(parser, c, &packet2.data, &packet2.size,
(uint8_t*)temp.constData()+ inbuf_start, inbuf_len,
AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
inbuf_start += len;
inbuf_len -= len;
if(packet2.size == 0 && len >= 0)
{
break;
}
else if(len)
{
retval = avcodec_decode_video2(c, picture, &got_picture, &packet2);
if (got_picture && retval > 0)
{
display_frame(c, picture, img_convert_ctx, this);
}
}
}
}
}