Resampling audio using libswresample, leaves small amount of noise after resampling - c++

I'm trying to resample audio from 44Khz to 48Khz and I'm getting s small light noise after resampling. As if someone is gently ticking the mic. This happens both ways. From 48Khz to 44Khz and vice versa.
I've read that this can happen because swrContext still has some data left and that I shoudl flush the context before resampling next frame. And although this helps a little (less noticeable noise), it's still present.
I've tried using FFmpeg resample filter instead, but the output is just loud incoherent noise. I'm pretty sure that libswresample should not output any noise on resampling which means that I just don't know how to use it well and I'm missing some options.
This is the code for resampler.
int ResampleFrame(VideoState * videoState, AVFrame *decoded_audio_frame, enum AVSampleFormat out_sample_fmt, uint8_t * out_buf)
{
int in_sample_rate = videoState->audio->ptrAudioCodecCtx_->sample_rate;
int out_sample_rate = SAMPLE_RATE;
// get an instance of the AudioResamplingState struct, create if NULL
AudioResamplingState* arState = getAudioResampling(videoState->audio->ptrAudioCodecCtx_->channel_layout);
if (!arState->swr_ctx)
{
printf("swr_alloc error.\n");
return -1;
}
// get input audio channels
arState->in_channel_layout = (videoState->audio->ptrAudioCodecCtx_->channels ==
av_get_channel_layout_nb_channels(videoState->audio->ptrAudioCodecCtx_->channel_layout)) ?
videoState->audio->ptrAudioCodecCtx_->channel_layout :
av_get_default_channel_layout(videoState->audio->ptrAudioCodecCtx_->channels);
// check input audio channels correctly retrieved
if (arState->in_channel_layout <= 0)
{
printf("in_channel_layout error.\n");
return -1;
}
arState->out_channel_layout = AV_CH_LAYOUT_STEREO;
// retrieve number of audio samples (per channel)
arState->in_nb_samples = decoded_audio_frame->nb_samples;
if (arState->in_nb_samples <= 0)
{
printf("in_nb_samples error.\n");
return -1;
}
// Set SwrContext parameters for resampling
av_opt_set_int(arState->swr_ctx, "in_channel_layout", arState->in_channel_layout, 0);
av_opt_set_int(arState->swr_ctx, "in_sample_rate", in_sample_rate, 0);
av_opt_set_sample_fmt(arState->swr_ctx, "in_sample_fmt", videoState->audio->ptrAudioCodecCtx_->sample_fmt, 0);
// Set SwrContext parameters for resampling
av_opt_set_int(arState->swr_ctx, "out_channel_layout", arState->out_channel_layout, 0);
av_opt_set_int(arState->swr_ctx, "out_sample_rate", out_sample_rate, 0);
av_opt_set_sample_fmt(arState->swr_ctx, "out_sample_fmt", out_sample_fmt, 0);
// initialize SWR context after user parameters have been set
int ret = swr_init(arState->swr_ctx);
if (ret < 0)
{
printf("Failed to initialize the resampling context.\n");
return -1;
}
// retrieve output samples number taking into account the progressive delay
int64_t delay = swr_get_delay(arState->swr_ctx, videoState->audio->ptrAudioCodecCtx_->sample_rate) + arState->in_nb_samples;
arState->out_nb_samples = av_rescale_rnd(delay, out_sample_rate, in_sample_rate, AV_ROUND_UP );
// check output samples number was correctly rescaled
if (arState->out_nb_samples <= 0)
{
printf("av_rescale_rnd error\n");
return -1;
}
// get number of output audio channels
arState->out_nb_channels = av_get_channel_layout_nb_channels(arState->out_channel_layout);
// allocate data pointers array for arState->resampled_data and fill data
// pointers and linesize accordingly
// check memory allocation for the resampled data was successful
ret = av_samples_alloc_array_and_samples(&arState->resampled_data, &arState->out_linesize, arState->out_nb_channels, arState->out_nb_samples, out_sample_fmt, 0);
if (ret < 0)
{
printf("av_samples_alloc_array_and_samples() error: Could not allocate destination samples.\n");
return -1;
}
if (arState->swr_ctx)
{
// do the actual audio data resampling
// check audio conversion was successful
int ret_num_samples = swr_convert(arState->swr_ctx,arState->resampled_data,arState->out_nb_samples,(const uint8_t**)decoded_audio_frame->data, decoded_audio_frame->nb_samples);
//int ret_num_samples = swr_convert_frame(arState->swr_ctx,arState->resampled_data,arState->out_nb_samples,(const uint8_t**)decoded_audio_frame->data, decoded_audio_frame->nb_samples);
if (ret_num_samples < 0)
{
printf("swr_convert_error.\n");
return -1;
}
// get the required buffer size for the given audio parameters
// check audio buffer size
arState->resampled_data_size = av_samples_get_buffer_size(&arState->out_linesize, arState->out_nb_channels,ret_num_samples,out_sample_fmt,1);
if (arState->resampled_data_size < 0)
{
printf("av_samples_get_buffer_size error.\n");
return -1;
}
} else {
printf("swr_ctx null error.\n");
return -1;
}
// copy the resampled data to the output buffer
memcpy(out_buf, arState->resampled_data[0], arState->resampled_data_size);
// flush the swr context
int delayed = swr_convert(arState->swr_ctx,arState->resampled_data,arState->out_nb_samples,NULL,0);
if (arState->resampled_data)
{
av_freep(&arState->resampled_data[0]);
}
av_freep(&arState->resampled_data);
arState->resampled_data = NULL;
int ret_data_size = arState->resampled_data_size;
return ret_data_size;
}
I also tries using the filter as shown here but my output is just noise.
This is my filter code
int ResampleFrame(AVFrame *frame, uint8_t *out_buf)
{
/* Push the decoded frame into the filtergraph */
qint32 ret;
ret = av_buffersrc_add_frame_flags(buffersrc_ctx1, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
if (ret < 0)
{
printf("ResampleFrame: Error adding frame to buffer\n");
// Delete input frame and return null
av_frame_unref(frame);
return 0;
}
//printf("resampling\n");
AVFrame *resampled_frame = av_frame_alloc();
/* Pull filtered frames from the filtergraph */
ret = av_buffersink_get_frame(buffersink_ctx1, resampled_frame);
/* Set the timestamp on the resampled frame */
resampled_frame->best_effort_timestamp = resampled_frame->pts;
if (ret < 0)
{
av_frame_unref(frame);
av_frame_unref(resampled_frame);
return 0;
}
int buffer_size = av_samples_get_buffer_size(NULL, 2,resampled_frame->nb_samples,AV_SAMPLE_FMT_S16,1);
memcpy(out_buf,resampled_frame->data,buffer_size);
//av_frame_unref(frame);
av_frame_unref(resampled_frame);
return buffer_size;
}
QString filter_description1 = "aresample=48000,aformat=sample_fmts=s16:channel_layouts=stereo,asetnsamples=n=1024:p=0";
int InitAudioFilter(AVStream *inputStream)
{
char args[512];
int ret;
const AVFilter *buffersrc = avfilter_get_by_name("abuffer");
const AVFilter *buffersink = avfilter_get_by_name("abuffersink");
AVFilterInOut *outputs = avfilter_inout_alloc();
AVFilterInOut *inputs = avfilter_inout_alloc();
filter_graph = avfilter_graph_alloc();
const enum AVSampleFormat out_sample_fmts[] = {AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE};
const int64_t out_channel_layouts[] = {AV_CH_LAYOUT_STEREO, -1};
const int out_sample_rates[] = {48000, -1};
snprintf(args, sizeof(args), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%" PRIx64,
inputStream->codec->time_base.num, inputStream->codec->time_base.den,
inputStream->codec->sample_rate,
av_get_sample_fmt_name(inputStream->codec->sample_fmt),
inputStream->codec->channel_layout);
ret = avfilter_graph_create_filter(&buffersrc_ctx1, buffersrc, "in", args, NULL, filter_graph);
if (ret < 0)
{
printf("InitAudioFilter: Unable to create buffersrc\n");
return -1;
}
ret = avfilter_graph_create_filter(&buffersink_ctx1, buffersink, "out", NULL, NULL, filter_graph);
if (ret < 0)
{
printf("InitAudioFilter: Unable to create buffersink\n");
return ret;
}
// set opt SAMPLE FORMATS
ret = av_opt_set_int_list(buffersink_ctx1, "sample_fmts", out_sample_fmts, -1, AV_OPT_SEARCH_CHILDREN);
if (ret < 0)
{
printf("InitAudioFilter: Cannot set output sample format\n");
return ret;
}
// set opt CHANNEL LAYOUTS
ret = av_opt_set_int_list(buffersink_ctx1, "channel_layouts", out_channel_layouts, -1, AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
printf("InitAudioFilter: Cannot set output channel layout\n");
return ret;
}
// set opt OUT SAMPLE RATES
ret = av_opt_set_int_list(buffersink_ctx1, "sample_rates", out_sample_rates, -1, AV_OPT_SEARCH_CHILDREN);
if (ret < 0)
{
printf("InitAudioFilter: Cannot set output sample rate\n");
return ret;
}
/* Endpoints for the filter graph. */
outputs -> name = av_strdup("in");
outputs -> filter_ctx = buffersrc_ctx1;
outputs -> pad_idx = 0;
outputs -> next = NULL;
/* Endpoints for the filter graph. */
inputs -> name = av_strdup("out");
inputs -> filter_ctx = buffersink_ctx1;
inputs -> pad_idx = 0;
inputs -> next = NULL;
if ((ret = avfilter_graph_parse_ptr(filter_graph, filter_description1.toStdString().c_str(), &inputs, &outputs, NULL)) < 0)
{
printf("InitAudioFilter: Could not add the filter to graph\n");
}
if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0)
{
printf("InitAudioFilter: Could not configure the graph\n");
}
/* Print summary of the sink buffer
* Note: args buffer is reused to store channel layout string */
AVFilterLink *outlink = buffersink_ctx1->inputs[0];
av_get_channel_layout_string(args, sizeof(args), -1, outlink->channel_layout);
QString str = args;
printf("Output: srate:%dHz fmt:%s chlayout: %s\n", (int) outlink->sample_rate,
av_get_sample_fmt_name((AVSampleFormat) outlink->format),
str.toStdString().c_str());
filterGraphInitialized_ = true;
}
And since I don't have much experience with filters or audio for that matter, I'm also probably missing something here. But Can't figure out what.
Thanks

Related

FFMPEG using AV_PIX_FMT_D3D11 gives "Error registering the input resource" from NVENC

Input frames start on the GPU as ID3D11Texture2D pointers.
I encode them to H264 using FFMPEG + NVENC. NVENC works perfectly if I download the textures to CPU memory as format AV_PIX_FMT_BGR0, but I'd like to cut out the CPU texture download entirely, and pass the GPU memory pointer directly into the encoder in native format. I write frames like this:
int write_gpu_video_frame(ID3D11Texture2D* gpuTex, AVFormatContext* oc, OutputStream* ost) {
AVFrame *hw_frame = ost->hw_frame;
printf("gpuTex address = 0x%x\n", &gpuTex);
hw_frame->data[0] = (uint8_t *) gpuTex;
hw_frame->data[1] = (uint8_t *) (intptr_t) 0;
hw_frame->pts = ost->next_pts++;
return write_frame(oc, ost->enc, ost->st, hw_frame);
// write_frame is identical to sample code in ffmpeg repo
}
Running the code with this modification gives the following error:
gpuTex address = 0x4582f6d0
[h264_nvenc # 00000191233e1bc0] Error registering an input resource: invalid call (9):
[h264_nvenc # 00000191233e1bc0] Could not register an input HW frame
Error sending a frame to the encoder: Unknown error occurred
Here's some supplemental code used in setting up and configuring the hw context and encoder:
/* A few config flags */
#define ENABLE_NVENC TRUE
#define USE_D3D11 TRUE // Skip downloading textures to CPU memory and send it straight to NVENC
/* Init hardware frame context */
static int set_hwframe_ctx(AVCodecContext* ctx, AVBufferRef* hw_device_ctx) {
AVBufferRef* hw_frames_ref;
AVHWFramesContext* frames_ctx = NULL;
int err = 0;
if (!(hw_frames_ref = av_hwframe_ctx_alloc(hw_device_ctx))) {
fprintf(stderr, "Failed to create HW frame context.\n");
throw;
}
frames_ctx = (AVHWFramesContext*) (hw_frames_ref->data);
frames_ctx->format = AV_PIX_FMT_D3D11;
frames_ctx->sw_format = AV_PIX_FMT_NV12;
frames_ctx->width = STREAM_WIDTH;
frames_ctx->height = STREAM_HEIGHT;
//frames_ctx->initial_pool_size = 20;
if ((err = av_hwframe_ctx_init(hw_frames_ref)) < 0) {
fprintf(stderr, "Failed to initialize hw frame context. Error code: %s\n", av_err2str(err));
av_buffer_unref(&hw_frames_ref);
throw;
}
ctx->hw_frames_ctx = av_buffer_ref(hw_frames_ref);
if (!ctx->hw_frames_ctx)
err = AVERROR(ENOMEM);
av_buffer_unref(&hw_frames_ref);
return err;
}
/* Add an output stream. */
static void add_video_stream(
OutputStream* ost,
AVFormatContext* oc,
const AVCodec** codec,
enum AVCodecID codec_id,
int width,
int height
) {
AVCodecContext* c;
int i;
bool nvenc = false;
/* find the encoder */
if (ENABLE_NVENC) {
printf("Getting nvenc encoder\n");
*codec = avcodec_find_encoder_by_name("h264_nvenc");
nvenc = true;
}
if (!ENABLE_NVENC || *codec == NULL) {
printf("Getting standard encoder\n");
avcodec_find_encoder(codec_id);
nvenc = false;
}
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
ost->st = avformat_new_stream(oc, NULL);
if (!ost->st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
ost->st->id = oc->nb_streams - 1;
c = avcodec_alloc_context3(*codec);
if (!c) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
ost->enc = c;
printf("Using video codec %s\n", avcodec_get_name(codec_id));
c->codec_id = codec_id;
c->bit_rate = 4000000;
/* Resolution must be a multiple of two. */
c->width = STREAM_WIDTH;
c->height = STREAM_HEIGHT;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
ost->st->time_base = {1, STREAM_FRAME_RATE};
c->time_base = ost->st->time_base;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
if (nvenc && USE_D3D11) {
const std::string hw_device_name = "d3d11va";
AVHWDeviceType device_type = av_hwdevice_find_type_by_name(hw_device_name.c_str());
// set up hw device context
AVBufferRef *hw_device_ctx;
// const char* device = "0"; // Default GPU (may be integrated in the case of switchable graphics!)
const char* device = "1";
ret = av_hwdevice_ctx_create(&hw_device_ctx, device_type, device, nullptr, 0);
if (ret < 0) {
fprintf(stderr, "Could not create hwdevice context; %s", av_err2str(ret));
}
set_hwframe_ctx(c, hw_device_ctx);
c->pix_fmt = AV_PIX_FMT_D3D11;
} else if (nvenc && !USE_D3D11)
c->pix_fmt = AV_PIX_FMT_BGR0;
else
c->pix_fmt = STREAM_PIX_FMT;
if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
/* just for testing, we also add B-frames */
c->max_b_frames = 2;
}
if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c->mb_decision = 2;
}
/* Some formats want stream headers to be separate. */
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}

FFMPEG encoder:Application provided invalid, non monotonically increasing dts to muxer in stream

I am encoding video with h264 codec and mp4 container. The source is a sequence of images I load from memory. The encoding seems to be working good. I am able to verify the exact amount of frames encoded, but what bugs me is the following warning/error I am getting at the end of the session from the muxer in terminal:
Application provided invalid, non monotonically increasing dts to
muxer in stream 0:
I tried different techniques to calculate pts/dts and it is the same. I did notice though, that avcodec_receive_packet() sometimes returns packets out of order,so may be that's the reason for the warning? Below is my function where I encode the image data.
bool LibAVEncoder::EncodeFrame(uint8_t* data)
{
int err;
if (!mVideoFrame)//init frame once
{
mVideoFrame = av_frame_alloc();
mVideoFrame->format = mVideoStream->codecpar->format;
mVideoFrame->width = mCodecContext->width;
mVideoFrame->height = mCodecContext->height;
if ((err = av_frame_get_buffer(mVideoFrame, 0)) < 0)
{
printf("Failed to allocate picture: %i\n", err);
return false;
}
}
err = av_frame_make_writable(mVideoFrame);
if (!mSwsCtx)
{
mSwsCtx = sws_getContext(mCodecContext->width, mCodecContext->height, AV_PIX_FMT_RGBA, mCodecContext->width,
mCodecContext->height, AV_PIX_FMT_YUV420P, SWS_BICUBIC, 0, 0, 0);
}
const int inLinesize[1] = { 4 * mCodecContext->width };
// RGBA to YUV
sws_scale(mSwsCtx, (const uint8_t* const*)&data, inLinesize, 0, mCodecContext->height, mVideoFrame->data, mVideoFrame->linesize);
mVideoFrame->pts = mFrameCount++;
//Encoding
err = avcodec_send_frame(mCodecContext, mVideoFrame);
if (err < 0)
{
printf("Failed to send frame: %i\n", err);
return false;
}
while (err >= 0)
{
err = avcodec_receive_packet(mCodecContext, mPacket);
if (err == AVERROR(EAGAIN))
{
return true;
}
else if (err == AVERROR_EOF)
{
return true;
}
else if (err < 0)
{
fprintf(stderr, "Error during encoding,shutting down\n");
exit(1);
}
// 1. way of calc pts/dts
mPacket->pts = av_rescale_q_rnd(mPacket->pts, mCodecContext->time_base, mVideoStream->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
mPacket->dts = av_rescale_q_rnd(mPacket->dts, mCodecContext->time_base, mVideoStream->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
mPacket->duration = av_rescale_q(mPacket->duration, mCodecContext->time_base, mVideoStream->time_base);
/*
//2. another way of calc pts/dts
const int64_t duration = av_rescale_q(1, mCodecContext->time_base, mVideoStream->time_base);
mPacket->duration = duration;
mPacket->pts = totalDuration;
mPacket->dts = totalDuration;
totalDuration += duration;
*/
mPacket->stream_index = mVideoStream->index;
av_interleaved_write_frame(mOFormatContext, mPacket);
av_packet_unref(mPacket);
}
return true;
}
I would appreciate any pointer to what I am doing wrong here.

Audio/Video encoding with ffmpeg

Audio/Video encoding with ffmpeg:
I am trying to create an avi file with encoded video and audio, using ffmpeg.
First, I create the file:
//define BITRATE 10000000
//define GOP 300
//define FPS 60
//define VIDEOTYPE "avi"
if (!encoder_->createFile(QFileInfo(*(videoFile_.data())).absoluteFilePath(), targetRect.width(), targetRect.height(), BITRATE*(1000 / FPS), GOP, 1000))
The buffers are initialized as:
audio_outbuf_size = 44100 * 0.005 * 16; //5ms of audio should be encoded, each time this function is called
audio_outbuf = new uint8_t[audio_outbuf_size];
outbuf_size = getWidth()*getHeight() * 3;
outbuf = new uint8_t[outbuf_size];
Then add audio and video streams (audio: CODEC_ID_PCM_S16LE, 16000 kb/s and 44100 Hz, video: PIX_FMT_YUV420P)
void MediaMuxer::addAudioStream(QString fileName, ffmpeg::CodecID codec_id)
{
// Add the audio stream
ffmpeg::AVCodec *encoder = avcodec_find_encoder(codec_id);
pAudioStream_ = ffmpeg::av_new_stream(pOutputFormatCtx_, 0);
if (!pAudioStream_) {
printf("Could not allocate stream\n");
return;
}
pAudioCodecCtx_ = pAudioStream_->codec;
pAudioCodecCtx_->codec_id = codec_id;
pAudioCodecCtx_->codec_type = ffmpeg::AVMEDIA_TYPE_AUDIO;
pAudioCodecCtx_->sample_fmt = ffmpeg::AV_SAMPLE_FMT_S16;
pAudioCodecCtx_->sample_fmt = encoder->sample_fmts[0];
pAudioCodecCtx_->bit_rate = 16000;
//pAudioCodecCtx_->bit_rate = 64000;
pAudioCodecCtx_->sample_rate = N;
pAudioCodecCtx_->channels = 1;
pAudioCodecCtx_->time_base.den = FPS;
pAudioCodecCtx_->time_base.num = 1;
avcodec_thread_init(pAudioCodecCtx_, 10);
// some formats want stream headers to be separate
if (pOutputFormatCtx_->oformat->flags & AVFMT_GLOBALHEADER)
pAudioCodecCtx_->flags |= CODEC_FLAG_GLOBAL_HEADER;
if (av_set_parameters(pOutputFormatCtx_, NULL) < 0)
{
printf("Invalid output format parameters\n");
return;
}
//ffmpeg::dump_format(pOutputFormatCtx_, 0, fileName.toStdString().c_str(), 1);
// open_video
// find the audio encoder
pAudioCodec_ = avcodec_find_encoder(pAudioCodecCtx_->codec_id);
if (!pAudioCodec_)
{
printf("codec not found\n");
return;
}
// open the codec
if (avcodec_open(pAudioCodecCtx_, pAudioCodec_) < 0)
{
printf("could not open codec\n");
return;
}
// Allocate memory for output
if (!initAudioOutputBuf())
{
printf("Can't allocate memory for audio output bitstream\n");
return;
}
// Allocate the audio frame
if (!initAudioFrame())
{
printf("Can't init audio frame\n");
return;
}
if (url_fopen(&pOutputFormatCtx_->pb, fileName.toStdString().c_str(), URL_WRONLY) < 0)
{
printf("Could not open '%s'\n", fileName.toStdString().c_str());
return;
}
av_write_header(pOutputFormatCtx_);
}
void MediaMuxer::addVideoStream(QString fileName)
{
// Add the video stream
pVideoStream_ = ffmpeg::av_new_stream(pOutputFormatCtx_, 0);
if (!pVideoStream_)
{
printf("Could not allocate stream\n");
return;
}
pVideoCodecCtx_ = pVideoStream_->codec;
pVideoCodecCtx_->codec_id = pOutputFormat_->video_codec;
pVideoCodecCtx_->codec_type = ffmpeg::AVMEDIA_TYPE_VIDEO;
pVideoCodecCtx_->bit_rate = Bitrate;
pVideoCodecCtx_->width = getWidth();
pVideoCodecCtx_->height = getHeight();
pVideoCodecCtx_->time_base.den = FPS;
pVideoCodecCtx_->time_base.num = 1;
pVideoCodecCtx_->gop_size = Gop;
pVideoCodecCtx_->pix_fmt = ffmpeg::PIX_FMT_YUV420P;
avcodec_thread_init(pVideoCodecCtx_, 10);
// some formats want stream headers to be separate
if (pOutputFormatCtx_->oformat->flags & AVFMT_GLOBALHEADER)
pVideoCodecCtx_->flags |= CODEC_FLAG_GLOBAL_HEADER;
if (av_set_parameters(pOutputFormatCtx_, NULL) < 0)
{
printf("Invalid output format parameters\n");
return;
}
//ffmpeg::dump_format(pOutputFormatCtx_, 0, fileName.toStdString().c_str(), 1);
// open_video
// find the video encoder
pVideoCodec_ = avcodec_find_encoder(pVideoCodecCtx_->codec_id);
if (!pVideoCodec_)
{
printf("codec not found\n");
return;
}
// open the codec
if (avcodec_open(pVideoCodecCtx_, pVideoCodec_) < 0)
{
printf("could not open codec\n");
return;
}
// Allocate memory for output
if (!initOutputBuf())
{
printf("Can't allocate memory for output bitstream\n");
return;
}
// Allocate the YUV frame
if (!initFrame())
{
printf("Can't init frame\n");
return;
}
if (url_fopen(&pOutputFormatCtx_->pb, fileName.toStdString().c_str(), URL_WRONLY) < 0)
{
printf("Could not open '%s'\n", fileName.toStdString().c_str());
return;
}
av_write_header(pOutputFormatCtx_);
}
Finally, I call alternatively encodeVideo/encodeAudio to encode video and PCM audio frames at specific recording times(pts):
int MediaMuxer::encodeVideo(const QImage &img, unsigned pts)
{
convertImage_sws(img); // SWS conversion
pVideoCodecCtx_->coded_frame->pts = pts; // Set the time stamp
int out_size = ffmpeg::avcodec_encode_video(pVideoCodecCtx_, outbuf, outbuf_size, ppicture);
pVideoCodecCtx_->coded_frame->pts = pts; // Set the time stamp
if (out_size > 0)
{
ffmpeg::av_init_packet(&pkt);
if (pVideoCodecCtx_->coded_frame->pts != (0x8000000000000000LL))
pkt.pts = av_rescale_q(pVideoCodecCtx_->coded_frame->pts, pVideoCodecCtx_->time_base, pVideoStream_->time_base);
if (pVideoCodecCtx_->coded_frame->key_frame)
pkt.flags |= AV_PKT_FLAG_KEY;
pkt.stream_index = pVideoStream_->index;
pkt.data = outbuf;
pkt.size = out_size;
int ret = ffmpeg::av_interleaved_write_frame(pOutputFormatCtx_, &pkt);
if (ret<0)
return -1;
}
return out_size;
}
int MediaMuxer::encodeAudio(unsigned pts)
{
pAudioCodecCtx_->coded_frame->pts = pts; // Set the time stamp
// simple sound encoding
int16_t samples[220] = { 0 }; // buffer
int n; // buffer index
double Fs = 44100.0; // sampling frequency
// Generate audio data
for (n = 0; n < 220; ++n) //220 samples (44100*.005sec as the interval between 2 video frames is 10ms)
samples[n] = 16383.0 * sin(n*1000.0*2.0*M_PI / Fs); //sine wav
int out_size = ffmpeg::avcodec_encode_audio(pAudioCodecCtx_, audio_outbuf, audio_outbuf_size, (const short*)samples);
pAudioCodecCtx_->coded_frame->pts = pts; // Set the time stamp
if (out_size>0)
{
// Packet
ffmpeg::AVPacket pkt = { 0 };
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 0;
if (pAudioCodecCtx_->coded_frame->pts != (0x8000000000000000LL))
pkt.pts = av_rescale_q(pAudioCodecCtx_->coded_frame->pts, pAudioCodecCtx_->time_base, pAudioStream_->time_base);
if (pAudioCodecCtx_->coded_frame->key_frame)
pkt.flags |= AV_PKT_FLAG_KEY;
pkt.stream_index = pAudioStream_->index;
pkt.data = audio_outbuf;
pkt.size = out_size;
int ret = av_interleaved_write_frame(pOutputFormatCtx_, &pkt);
if (ret<0)
return -1;
av_free_packet(&pkt);
}
//end simple sound encoding
return pkt.size;
}
The result is a nice video with some audio behind (either a regular beeping sound at regular intervals but ending way earlier than the video or a continuous longer sound that also last shorter than the video).
I want to generate a beeping sound each time the function encodeAudio() is called - at non-regular intervals. I have tried to modify the sampling rate, the buffer size, the pkt size and the number of samples but without any success. I also tried to set the pts at different times but it did not get me where I want to be. Could someone please help?

c++ videoplayer ffmpeg: get pixel data?

I want to get the pixel data of a frame. I found this (in original version as old code) and changed some things.
I have this code:
AVFormatContext *pFormatCtx;
pFormatCtx = avformat_alloc_context();
// Open file
if (int err = avformat_open_input(&pFormatCtx, file, NULL, 0) != 0)
{
exit(2);
}
// Get infromation about streams
if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
{
exit(2);
}
// # video stream
int videoStreamIndex = -1;
AVCodecContext *pVideoCodecCtx;
AVCodec *pVideoCodec;
int res = 0;
int width = 0;
int height = 0;
for (unsigned int i = 0; i < pFormatCtx->nb_streams; i++)
{
if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoStreamIndex = i;
pVideoCodecCtx = pFormatCtx->streams[i]->codec;
// Find decoder
pVideoCodec = avcodec_find_decoder(pVideoCodecCtx->codec_id);
if (pVideoCodec)
{
// Open decoder
res = !(avcodec_open2(pVideoCodecCtx, pVideoCodec, NULL) < 0);
width = pVideoCodecCtx->coded_width;
height = pVideoCodecCtx->coded_height;
}
break;
}
}
// Frame width
width = pFormatCtx->streams[videoStreamIndex]->codec->width;
// Frame height
height = pFormatCtx->streams[videoStreamIndex]->codec->height;
AVPacket packet;
int got_picture_ptr;
AVPacket *avpkt;
AVFrame * pOutFrame;
pOutFrame = av_frame_alloc();
AVFrame * rgbOutFrame = av_frame_alloc();
if (!pOutFrame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
while (av_read_frame(pFormatCtx, &packet) >= 0)
{
if (packet.stream_index == videoStreamIndex)
{
// Decode packeg to frame.
int videoFrameBytes = avcodec_decode_video2(pVideoCodecCtx, pOutFrame,
&got_picture_ptr, &packet);
// Create context
SwsContext* pImgConvertCtx = sws_getContext(pVideoCodecCtx->width,
pVideoCodecCtx->height,
pVideoCodecCtx->pix_fmt,
pVideoCodecCtx->width, pVideoCodecCtx->height,
AV_PIX_FMT_YUV420P,
SWS_BICUBIC, NULL, NULL, NULL);
// Convert frame
sws_scale(pImgConvertCtx, pOutFrame->data, pOutFrame->linesize,
width, height, rgbOutFrame->data, rgbOutFrame->linesize);
}
}
I know, that the code from SwsContext and sws_scale is wrong but I wonder, where can I find the pixel data of my frame... (and in which format it is stored).
Can someone help me here?
Pixel data is stored in data field.
According to the documentation:
uint8_t* AVFrame::data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Look here for more information.
Generally speaking, your code is a bit misleading and rather buggy. I can point out some drawbacks:
1) You don't need to create new SwsContext on every incoming video packet. Just create it once before while cycle.
2) Next, you have an rgbOutFrame, but SwsContext is created for scaling into the YUV420 pixel format. It looks strange.
3) Besides, avcodec_decode_video2 is invoked, but you never check neither return value nor got_picture_ptr flag. Such practice is really error-prone.
And so on...
Hope it'll help you to improve your program and get necessary results.

C++ - applying filter in ffmpeg

I'm trying to deinterlace a frame using ffmpeg (latest release). Related with this question, I can get the filter I want using this sentence:
AVFilter *filter = avfilter_get_by_name("yadif");
After that, I open the filter context as:
AVFilterContext *filter_ctx;
avfilter_open(&filter_ctx, filter, NULL);
My first question is about this function. Visual Studio warns me about avfilter_open is deprecated. Which is the alternative?
After that, I do:
avfilter_init_str(filter_ctx, "yadif=1:-1");
And always fails. I've tried "1:-1" instead "yadif=1:-1", but always fails too, what parameter I should use?
EDIT: A value of "1" or "2", for example, it works. Debuging it, I found that with one of this values, the function uses mode=1 or mode=2. The explanation of those values is in this link.
Then, I have a AVFrame *frame that is the frame I want to deinterlace. When the last sentence work, I'll have the filter and his context init. How do I apply this filter to my frame?
Thanks for your help.
I undertsnad your question is over a year now but recently I had to work with interlaced DVB-TS streams so I might be able to help anybody else coming across this subject.
These snippets are from a finished player I've written
Initialise the filter graph:
void VideoManager::init_filter_graph(AVFrame *frame) {
if (filter_initialised) return;
int result;
AVFilter *buffer_src = avfilter_get_by_name("buffer");
AVFilter *buffer_sink = avfilter_get_by_name("buffersink");
AVFilterInOut *inputs = avfilter_inout_alloc();
AVFilterInOut *outputs = avfilter_inout_alloc();
AVCodecContext *ctx = ffmpeg.vid_stream.context;
char args[512];
int frame_fix = 0; // fix bad width on some streams
if (frame->width < 704) frame_fix = 2;
else if (frame->width > 704) frame_fix = -16;
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
frame->width + frame_fix,
frame->height,
frame->format,// ctx->pix_fmt,
ctx->time_base.num,
ctx->time_base.den,
ctx->sample_aspect_ratio.num,
ctx->sample_aspect_ratio.den);
const char *description = "yadif=1:-1:0";
LOGD("Filter: %s - Settings: %s", description, args);
filter_graph = avfilter_graph_alloc();
result = avfilter_graph_create_filter(&filter_src_ctx, buffer_src, "in", args, NULL, filter_graph);
if (result < 0) {
LOGI("Filter graph - Unable to create buffer source");
return;
}
AVBufferSinkParams *params = av_buffersink_params_alloc();
enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE };
params->pixel_fmts = pix_fmts;
result = avfilter_graph_create_filter(&filter_sink_ctx, buffer_sink, "out", NULL, params, filter_graph);
av_free(params);
if (result < 0) {
LOGI("Filter graph - Unable to create buffer sink");
return;
}
inputs->name = av_strdup("out");
inputs->filter_ctx = filter_sink_ctx;
inputs->pad_idx = 0;
inputs->next = NULL;
outputs->name = av_strdup("in");
outputs->filter_ctx = filter_src_ctx;
outputs->pad_idx = 0;
outputs->next = NULL;
result = avfilter_graph_parse_ptr(filter_graph, description, &inputs, &outputs, NULL);
if (result < 0) LOGI("avfilter_graph_parse_ptr ERROR");
result = avfilter_graph_config(filter_graph, NULL);
if (result < 0) LOGI("avfilter_graph_config ERROR");
filter_initialised = true;
}
When processing the video packets from the stream, check if it is an interlaced frame and send the frame off to the filter. The filter will then return the de-interlaced frames back to you.
void FFMPEG::process_video_packet(AVPacket *pkt) {
int got;
AVFrame *frame = vid_stream.frame;
avcodec_decode_video2(vid_stream.context, frame, &got, pkt);
if (got) {
if (!frame->interlaced_frame) { // not interlaced
Video.add_av_frame(frame, 0);
} else {
if (!Video.filter_initialised) {
Video.init_filter_graph(frame);
}
av_buffersrc_add_frame_flags(Video.filter_src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
int c = 0;
while (true) {
AVFrame *filter_frame = ffmpeg.vid_stream.filter_frame;
int result = av_buffersink_get_frame(Video.filter_sink_ctx, filter_frame);
if (result == AVERROR(EAGAIN) || result == AVERROR(AVERROR_EOF)) break;
if (result < 0) return;
Video.add_av_frame(filter_frame, c++);
av_frame_unref(filter_frame);
}
}
}
}
Hope this helps anyone because finding information about ffmpeg is tough going.