run time inconsistency DXVA hardware video decoding - c++

I am currently working on a project that involves using DXVA API and the FFmpeg framework to implement hardware-accelerated decoding of H264 video stream files.
I have done some research on GPU decoding and constructed my code based on the hardware acceleration implementation in VLC. From my understanding, using DXVA in FFmpeg involves initializing the DirectXVideoDecoder and implementing several callback functions in AVCodecContext. The decoding process is done with the FFmpeg function avcodec_decode_video2() and each frame is parsed with av_read_frame(). The decoded frame is stored in the graphics memory and displayed using Direct3D.
I tried to time each process with :GetTickCount() function and noticed that the execution time of the program for a 1550 frame video is 35000ms, with the display function taking 90% of the time and decoding function taking 6% of the time.
However, when I tried to comment out the displaying process and execute the code only decoding each frame, the total decoding time surprisingly increased to 25,000ms for the same video, taking 94% of the total time.
Here is the code for the decoding function:
//record start time
DWORD start_time = ::GetTickCount();
//media file to be loaded
const char *filename = "123.mkv";
//time recording parameters
unsigned frame_read_time_total = 0;
unsigned decode_frame_time_total = 0;
unsigned display_time_total = 0;
unsigned setup_time_total = 0;
/*********************Setup and Initialization Code*******************************/
unsigned setup_time_start = ::GetTickCount();
av_register_all();
av_log_set_level(AV_LOG_DEBUG);
int res;
AVFormatContext *file = NULL;
res = avformat_open_input(&file, filename, NULL, NULL);//´ò¿ªÎļþ
if (res < 0) {
printf("error %x in avformat_open_input\n", res);
return 1;
}
res = avformat_find_stream_info(file, NULL);//È¡³öÁ÷ÐÅÏ¢
if (res < 0)
{
printf("error %x in avformat_find_stream_info\n", res);
return 1;
}
av_dump_format(file, 0, filename, 0);//ÁгöÊäÈëÎļþµÄÏà¹ØÁ÷ÐÅÏ¢
int i;
int videoindex = -1;
int audioindex = -1;
for (i = 0; i < file->nb_streams; i++){
if (file->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO){
videoindex = i;
}
if (file->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO){
audioindex = i;
}
}
if (videoindex == -1){
av_log(NULL, AV_LOG_DEBUG, "can't find video stream\n");
return 0;
}
AVCodec *codec = avcodec_find_decoder(file->streams[videoindex]->codec->codec_id);//¸ù¾ÝÁ÷ÐÅÏ¢ÕÒµ½½âÂëÆ÷
if (!codec){
printf("decoder not found\n");
return 1;
}
AVCodecContext *codecctx = file->streams[videoindex]->codec;
screen_width = codecctx->width;
screen_height = codecctx->height;
//Initialize Win API Window
WNDCLASSEX window;
ZeroMemory(&window, sizeof(window));
window.cbSize = sizeof(window);
window.hbrBackground = (HBRUSH)(COLOR_WINDOW + 1);
window.lpfnWndProc = (WNDPROC)WindowProcess;
window.lpszClassName = L"D3D";
window.style = CS_HREDRAW | CS_VREDRAW;
RegisterClassEx(&window);
HWND hwnd_temp = CreateWindow(L"D3D", L"Player", WS_OVERLAPPEDWINDOW,
0, 0, screen_width, screen_height, NULL, NULL, NULL, NULL);
if (hwnd_temp == NULL){
av_log(NULL, AV_LOG_ERROR, "Error: Cannot create window\n");
system("pause");
}
hwnd.push_back(hwnd_temp);
vlc_va_dxva2_t *dxva = vlc_va_NewDxva2(codecctx->codec_id);
if (NULL == dxva){
return 0;
}
res = Setup(dxva, &codecctx->hwaccel_context, &codecctx->pix_fmt, screen_width, screen_height);
if (res < 0) {
printf("error DXVA setup\n", res);
return 1;
}
//Assign callback function
codecctx->opaque = dxva;
codecctx->get_format = ffmpeg_GetFormat;
codecctx->get_buffer = ffmpeg_GetFrameBuf;
codecctx->reget_buffer = ffmpeg_ReGetFrameBuf;
codecctx->release_buffer = ffmpeg_ReleaseFrameBuf;
codecctx->thread_count = 1;
res = avcodec_open2(codecctx, codec, NULL);
if (res < 0) {
printf("error %x in avcodec_open2\n", res);
return 1;
}
//Initialize Packet
AVPacket pkt = { 0 };
AVFrame *picture = avcodec_alloc_frame();
DWORD wait_for_keyframe = 60;
//initialize frame count
int count = 0;
ShowWindow(hwnd.at(0), SW_SHOWNORMAL);
UpdateWindow(hwnd.at(0));
RECT screen_size;
screen_size.top = 0;
screen_size.bottom = screen_height;
screen_size.left = 0;
screen_size.right = screen_width;
unsigned setup_time_end = ::GetTickCount();
setup_time_total = setup_time_end - setup_time_start;
MSG msg;
ZeroMemory(&msg, sizeof(msg));
while(msg.message!=WM_QUIT)
{
if (PeekMessage(&msg, NULL, 0,0, PM_REMOVE)){
TranslateMessage(&msg);
DispatchMessage(&msg);
continue;
}
int read_status;
unsigned read_frame_start = ::GetTickCount();
read_status = av_read_frame(file, &pkt);
if (read_status < 0)
{
av_free_packet(&pkt);
goto done;
}
unsigned read_frame_end = ::GetTickCount();
frame_read_time_total += (read_frame_end - read_frame_start);
int got_picture = 0;
unsigned decode_start = ::GetTickCount();
int bytes_used = avcodec_decode_video2(codecctx, picture, &got_picture, &pkt);
unsigned decode_end = ::GetTickCount();
decode_frame_time_total += (decode_end - decode_start);
if (got_picture)
{
count++;
unsigned display_start = ::GetTickCount();
//display_frame((vlc_va_dxva2_t *)codecctx->opaque, picture, screen_size,0);
unsigned display_end = ::GetTickCount();
display_time_total += (display_end - display_start);
}
av_free_packet(&pkt);
}
done:
UnregisterClass(L"D3D",0);
printf("Frames = %d\n",count);
unsigned stop_time = ::GetTickCount();
unsigned total_time = stop_time - start_time;
printf("total frame = %d\n", count);
printf("time cost = %d\n", total_time);
printf("total setup time = %d, %f %% total execution time\n", setup_time_total,(float) setup_time_total / total_time * 100);
printf("total frame read time = %d, %f %% total execution time\n", frame_read_time_total, (float)frame_read_time_total / total_time*100);
printf("total frame decode time = %d, %f %% total execution time\n", decode_frame_time_total, (float)decode_frame_time_total / total_time*100);
printf("total display time = %d, %f %% of total execution time\n", display_time_total, (float)display_time_total / total_time*100);
av_free(picture);
av_close_input_file(file);
system("pause");
return 0;
What could be the cause of this strange behavior? My guess is that it may be the possible incorrect use of :GetTickCount() or may be it has to do with the DXVA hardware-accelerated decoding process. Sorry for the long post. Any input and suggestion is appreciated. Thanks in advance.

I think it is a correct behaviour, if the decoding process is asynchronous. I know Ffmpeg uses threads, but it depends on compilation flags or decoding setup.
If the display process is very long, the decoder decodes frames, while the display process executes. So when you ask for rendering, some frames are already decoded, and it's fast.
If you avoid the display process, the decoding process takes all the processor time. Normally, the display process uses some sort of timestamp that lets enough time to the decoding process.
PS : from what i know about Ffmpeg and Dxva2, you also need to provide the directx texture.

Related

FFMPEG using AV_PIX_FMT_D3D11 gives "Error registering the input resource" from NVENC

Input frames start on the GPU as ID3D11Texture2D pointers.
I encode them to H264 using FFMPEG + NVENC. NVENC works perfectly if I download the textures to CPU memory as format AV_PIX_FMT_BGR0, but I'd like to cut out the CPU texture download entirely, and pass the GPU memory pointer directly into the encoder in native format. I write frames like this:
int write_gpu_video_frame(ID3D11Texture2D* gpuTex, AVFormatContext* oc, OutputStream* ost) {
AVFrame *hw_frame = ost->hw_frame;
printf("gpuTex address = 0x%x\n", &gpuTex);
hw_frame->data[0] = (uint8_t *) gpuTex;
hw_frame->data[1] = (uint8_t *) (intptr_t) 0;
hw_frame->pts = ost->next_pts++;
return write_frame(oc, ost->enc, ost->st, hw_frame);
// write_frame is identical to sample code in ffmpeg repo
}
Running the code with this modification gives the following error:
gpuTex address = 0x4582f6d0
[h264_nvenc # 00000191233e1bc0] Error registering an input resource: invalid call (9):
[h264_nvenc # 00000191233e1bc0] Could not register an input HW frame
Error sending a frame to the encoder: Unknown error occurred
Here's some supplemental code used in setting up and configuring the hw context and encoder:
/* A few config flags */
#define ENABLE_NVENC TRUE
#define USE_D3D11 TRUE // Skip downloading textures to CPU memory and send it straight to NVENC
/* Init hardware frame context */
static int set_hwframe_ctx(AVCodecContext* ctx, AVBufferRef* hw_device_ctx) {
AVBufferRef* hw_frames_ref;
AVHWFramesContext* frames_ctx = NULL;
int err = 0;
if (!(hw_frames_ref = av_hwframe_ctx_alloc(hw_device_ctx))) {
fprintf(stderr, "Failed to create HW frame context.\n");
throw;
}
frames_ctx = (AVHWFramesContext*) (hw_frames_ref->data);
frames_ctx->format = AV_PIX_FMT_D3D11;
frames_ctx->sw_format = AV_PIX_FMT_NV12;
frames_ctx->width = STREAM_WIDTH;
frames_ctx->height = STREAM_HEIGHT;
//frames_ctx->initial_pool_size = 20;
if ((err = av_hwframe_ctx_init(hw_frames_ref)) < 0) {
fprintf(stderr, "Failed to initialize hw frame context. Error code: %s\n", av_err2str(err));
av_buffer_unref(&hw_frames_ref);
throw;
}
ctx->hw_frames_ctx = av_buffer_ref(hw_frames_ref);
if (!ctx->hw_frames_ctx)
err = AVERROR(ENOMEM);
av_buffer_unref(&hw_frames_ref);
return err;
}
/* Add an output stream. */
static void add_video_stream(
OutputStream* ost,
AVFormatContext* oc,
const AVCodec** codec,
enum AVCodecID codec_id,
int width,
int height
) {
AVCodecContext* c;
int i;
bool nvenc = false;
/* find the encoder */
if (ENABLE_NVENC) {
printf("Getting nvenc encoder\n");
*codec = avcodec_find_encoder_by_name("h264_nvenc");
nvenc = true;
}
if (!ENABLE_NVENC || *codec == NULL) {
printf("Getting standard encoder\n");
avcodec_find_encoder(codec_id);
nvenc = false;
}
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
ost->st = avformat_new_stream(oc, NULL);
if (!ost->st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
ost->st->id = oc->nb_streams - 1;
c = avcodec_alloc_context3(*codec);
if (!c) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
ost->enc = c;
printf("Using video codec %s\n", avcodec_get_name(codec_id));
c->codec_id = codec_id;
c->bit_rate = 4000000;
/* Resolution must be a multiple of two. */
c->width = STREAM_WIDTH;
c->height = STREAM_HEIGHT;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
ost->st->time_base = {1, STREAM_FRAME_RATE};
c->time_base = ost->st->time_base;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
if (nvenc && USE_D3D11) {
const std::string hw_device_name = "d3d11va";
AVHWDeviceType device_type = av_hwdevice_find_type_by_name(hw_device_name.c_str());
// set up hw device context
AVBufferRef *hw_device_ctx;
// const char* device = "0"; // Default GPU (may be integrated in the case of switchable graphics!)
const char* device = "1";
ret = av_hwdevice_ctx_create(&hw_device_ctx, device_type, device, nullptr, 0);
if (ret < 0) {
fprintf(stderr, "Could not create hwdevice context; %s", av_err2str(ret));
}
set_hwframe_ctx(c, hw_device_ctx);
c->pix_fmt = AV_PIX_FMT_D3D11;
} else if (nvenc && !USE_D3D11)
c->pix_fmt = AV_PIX_FMT_BGR0;
else
c->pix_fmt = STREAM_PIX_FMT;
if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
/* just for testing, we also add B-frames */
c->max_b_frames = 2;
}
if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c->mb_decision = 2;
}
/* Some formats want stream headers to be separate. */
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}

Resampling audio using libswresample, leaves small amount of noise after resampling

I'm trying to resample audio from 44Khz to 48Khz and I'm getting s small light noise after resampling. As if someone is gently ticking the mic. This happens both ways. From 48Khz to 44Khz and vice versa.
I've read that this can happen because swrContext still has some data left and that I shoudl flush the context before resampling next frame. And although this helps a little (less noticeable noise), it's still present.
I've tried using FFmpeg resample filter instead, but the output is just loud incoherent noise. I'm pretty sure that libswresample should not output any noise on resampling which means that I just don't know how to use it well and I'm missing some options.
This is the code for resampler.
int ResampleFrame(VideoState * videoState, AVFrame *decoded_audio_frame, enum AVSampleFormat out_sample_fmt, uint8_t * out_buf)
{
int in_sample_rate = videoState->audio->ptrAudioCodecCtx_->sample_rate;
int out_sample_rate = SAMPLE_RATE;
// get an instance of the AudioResamplingState struct, create if NULL
AudioResamplingState* arState = getAudioResampling(videoState->audio->ptrAudioCodecCtx_->channel_layout);
if (!arState->swr_ctx)
{
printf("swr_alloc error.\n");
return -1;
}
// get input audio channels
arState->in_channel_layout = (videoState->audio->ptrAudioCodecCtx_->channels ==
av_get_channel_layout_nb_channels(videoState->audio->ptrAudioCodecCtx_->channel_layout)) ?
videoState->audio->ptrAudioCodecCtx_->channel_layout :
av_get_default_channel_layout(videoState->audio->ptrAudioCodecCtx_->channels);
// check input audio channels correctly retrieved
if (arState->in_channel_layout <= 0)
{
printf("in_channel_layout error.\n");
return -1;
}
arState->out_channel_layout = AV_CH_LAYOUT_STEREO;
// retrieve number of audio samples (per channel)
arState->in_nb_samples = decoded_audio_frame->nb_samples;
if (arState->in_nb_samples <= 0)
{
printf("in_nb_samples error.\n");
return -1;
}
// Set SwrContext parameters for resampling
av_opt_set_int(arState->swr_ctx, "in_channel_layout", arState->in_channel_layout, 0);
av_opt_set_int(arState->swr_ctx, "in_sample_rate", in_sample_rate, 0);
av_opt_set_sample_fmt(arState->swr_ctx, "in_sample_fmt", videoState->audio->ptrAudioCodecCtx_->sample_fmt, 0);
// Set SwrContext parameters for resampling
av_opt_set_int(arState->swr_ctx, "out_channel_layout", arState->out_channel_layout, 0);
av_opt_set_int(arState->swr_ctx, "out_sample_rate", out_sample_rate, 0);
av_opt_set_sample_fmt(arState->swr_ctx, "out_sample_fmt", out_sample_fmt, 0);
// initialize SWR context after user parameters have been set
int ret = swr_init(arState->swr_ctx);
if (ret < 0)
{
printf("Failed to initialize the resampling context.\n");
return -1;
}
// retrieve output samples number taking into account the progressive delay
int64_t delay = swr_get_delay(arState->swr_ctx, videoState->audio->ptrAudioCodecCtx_->sample_rate) + arState->in_nb_samples;
arState->out_nb_samples = av_rescale_rnd(delay, out_sample_rate, in_sample_rate, AV_ROUND_UP );
// check output samples number was correctly rescaled
if (arState->out_nb_samples <= 0)
{
printf("av_rescale_rnd error\n");
return -1;
}
// get number of output audio channels
arState->out_nb_channels = av_get_channel_layout_nb_channels(arState->out_channel_layout);
// allocate data pointers array for arState->resampled_data and fill data
// pointers and linesize accordingly
// check memory allocation for the resampled data was successful
ret = av_samples_alloc_array_and_samples(&arState->resampled_data, &arState->out_linesize, arState->out_nb_channels, arState->out_nb_samples, out_sample_fmt, 0);
if (ret < 0)
{
printf("av_samples_alloc_array_and_samples() error: Could not allocate destination samples.\n");
return -1;
}
if (arState->swr_ctx)
{
// do the actual audio data resampling
// check audio conversion was successful
int ret_num_samples = swr_convert(arState->swr_ctx,arState->resampled_data,arState->out_nb_samples,(const uint8_t**)decoded_audio_frame->data, decoded_audio_frame->nb_samples);
//int ret_num_samples = swr_convert_frame(arState->swr_ctx,arState->resampled_data,arState->out_nb_samples,(const uint8_t**)decoded_audio_frame->data, decoded_audio_frame->nb_samples);
if (ret_num_samples < 0)
{
printf("swr_convert_error.\n");
return -1;
}
// get the required buffer size for the given audio parameters
// check audio buffer size
arState->resampled_data_size = av_samples_get_buffer_size(&arState->out_linesize, arState->out_nb_channels,ret_num_samples,out_sample_fmt,1);
if (arState->resampled_data_size < 0)
{
printf("av_samples_get_buffer_size error.\n");
return -1;
}
} else {
printf("swr_ctx null error.\n");
return -1;
}
// copy the resampled data to the output buffer
memcpy(out_buf, arState->resampled_data[0], arState->resampled_data_size);
// flush the swr context
int delayed = swr_convert(arState->swr_ctx,arState->resampled_data,arState->out_nb_samples,NULL,0);
if (arState->resampled_data)
{
av_freep(&arState->resampled_data[0]);
}
av_freep(&arState->resampled_data);
arState->resampled_data = NULL;
int ret_data_size = arState->resampled_data_size;
return ret_data_size;
}
I also tries using the filter as shown here but my output is just noise.
This is my filter code
int ResampleFrame(AVFrame *frame, uint8_t *out_buf)
{
/* Push the decoded frame into the filtergraph */
qint32 ret;
ret = av_buffersrc_add_frame_flags(buffersrc_ctx1, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
if (ret < 0)
{
printf("ResampleFrame: Error adding frame to buffer\n");
// Delete input frame and return null
av_frame_unref(frame);
return 0;
}
//printf("resampling\n");
AVFrame *resampled_frame = av_frame_alloc();
/* Pull filtered frames from the filtergraph */
ret = av_buffersink_get_frame(buffersink_ctx1, resampled_frame);
/* Set the timestamp on the resampled frame */
resampled_frame->best_effort_timestamp = resampled_frame->pts;
if (ret < 0)
{
av_frame_unref(frame);
av_frame_unref(resampled_frame);
return 0;
}
int buffer_size = av_samples_get_buffer_size(NULL, 2,resampled_frame->nb_samples,AV_SAMPLE_FMT_S16,1);
memcpy(out_buf,resampled_frame->data,buffer_size);
//av_frame_unref(frame);
av_frame_unref(resampled_frame);
return buffer_size;
}
QString filter_description1 = "aresample=48000,aformat=sample_fmts=s16:channel_layouts=stereo,asetnsamples=n=1024:p=0";
int InitAudioFilter(AVStream *inputStream)
{
char args[512];
int ret;
const AVFilter *buffersrc = avfilter_get_by_name("abuffer");
const AVFilter *buffersink = avfilter_get_by_name("abuffersink");
AVFilterInOut *outputs = avfilter_inout_alloc();
AVFilterInOut *inputs = avfilter_inout_alloc();
filter_graph = avfilter_graph_alloc();
const enum AVSampleFormat out_sample_fmts[] = {AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE};
const int64_t out_channel_layouts[] = {AV_CH_LAYOUT_STEREO, -1};
const int out_sample_rates[] = {48000, -1};
snprintf(args, sizeof(args), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%" PRIx64,
inputStream->codec->time_base.num, inputStream->codec->time_base.den,
inputStream->codec->sample_rate,
av_get_sample_fmt_name(inputStream->codec->sample_fmt),
inputStream->codec->channel_layout);
ret = avfilter_graph_create_filter(&buffersrc_ctx1, buffersrc, "in", args, NULL, filter_graph);
if (ret < 0)
{
printf("InitAudioFilter: Unable to create buffersrc\n");
return -1;
}
ret = avfilter_graph_create_filter(&buffersink_ctx1, buffersink, "out", NULL, NULL, filter_graph);
if (ret < 0)
{
printf("InitAudioFilter: Unable to create buffersink\n");
return ret;
}
// set opt SAMPLE FORMATS
ret = av_opt_set_int_list(buffersink_ctx1, "sample_fmts", out_sample_fmts, -1, AV_OPT_SEARCH_CHILDREN);
if (ret < 0)
{
printf("InitAudioFilter: Cannot set output sample format\n");
return ret;
}
// set opt CHANNEL LAYOUTS
ret = av_opt_set_int_list(buffersink_ctx1, "channel_layouts", out_channel_layouts, -1, AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
printf("InitAudioFilter: Cannot set output channel layout\n");
return ret;
}
// set opt OUT SAMPLE RATES
ret = av_opt_set_int_list(buffersink_ctx1, "sample_rates", out_sample_rates, -1, AV_OPT_SEARCH_CHILDREN);
if (ret < 0)
{
printf("InitAudioFilter: Cannot set output sample rate\n");
return ret;
}
/* Endpoints for the filter graph. */
outputs -> name = av_strdup("in");
outputs -> filter_ctx = buffersrc_ctx1;
outputs -> pad_idx = 0;
outputs -> next = NULL;
/* Endpoints for the filter graph. */
inputs -> name = av_strdup("out");
inputs -> filter_ctx = buffersink_ctx1;
inputs -> pad_idx = 0;
inputs -> next = NULL;
if ((ret = avfilter_graph_parse_ptr(filter_graph, filter_description1.toStdString().c_str(), &inputs, &outputs, NULL)) < 0)
{
printf("InitAudioFilter: Could not add the filter to graph\n");
}
if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0)
{
printf("InitAudioFilter: Could not configure the graph\n");
}
/* Print summary of the sink buffer
* Note: args buffer is reused to store channel layout string */
AVFilterLink *outlink = buffersink_ctx1->inputs[0];
av_get_channel_layout_string(args, sizeof(args), -1, outlink->channel_layout);
QString str = args;
printf("Output: srate:%dHz fmt:%s chlayout: %s\n", (int) outlink->sample_rate,
av_get_sample_fmt_name((AVSampleFormat) outlink->format),
str.toStdString().c_str());
filterGraphInitialized_ = true;
}
And since I don't have much experience with filters or audio for that matter, I'm also probably missing something here. But Can't figure out what.
Thanks

ffmpeg memory leak in the avcodec_open2 method

I've developed an application which handles live video stream. The problem is that it should run as a service and over time I am noticing some memory increase. When I check the application with valgrind - it did not find any leak related issues.
So I've check it with google profile tools. This is a result(substracting the one of the first dumps from the latest) after approximately 6 hour run:
30.0 35.7% 35.7% 30.0 35.7% av_malloc
28.9 34.4% 70.2% 28.9 34.4% av_reallocp
24.5 29.2% 99.4% 24.5 29.2% x264_malloc
When I check the memory on the graph I see, that these allocations are related to avcodec_open2. The client code is:
` g_EncoderMutex.lock();
ffmpeg_encoder_start(OutFileName.c_str(), AV_CODEC_ID_H264, m_FPS, width, height);
for (pts = 0; pts < VideoImages.size(); pts++) {
m_frame->pts = pts;
ffmpeg_encoder_encode_frame(VideoImages[pts].RGBimage[0]);
}
ffmpeg_encoder_finish();
g_EncoderMutex.unlock()
The ffmpeg_encoder_start method is:
void VideoEncoder::ffmpeg_encoder_start(const char *filename, int codec_id, int fps, int width, int height)
{
int ret;
m_FPS=fps;
AVOutputFormat * fmt = av_guess_format(filename, NULL, NULL);
m_oc = NULL;
avformat_alloc_output_context2(&m_oc, NULL, NULL, filename);
m_stream = avformat_new_stream(m_oc, 0);
AVCodec *codec=NULL;
codec = avcodec_find_encoder(codec_id);
if (!codec)
{
fprintf(stderr, "Codec not found\n");
return; //-1
}
m_c=m_stream->codec;
avcodec_get_context_defaults3(m_c, codec);
m_c->bit_rate = 400000;
m_c->width = width;
m_c->height = height;
m_c->time_base.num = 1;
m_c->time_base.den = m_FPS;
m_c->gop_size = 10;
m_c->max_b_frames = 1;
m_c->pix_fmt = AV_PIX_FMT_YUV420P;
if (codec_id == AV_CODEC_ID_H264)
av_opt_set(m_c->priv_data, "preset", "ultrafast", 0);
if (m_oc->oformat->flags & AVFMT_GLOBALHEADER)
m_c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
avcodec_open2( m_c, codec, NULL );
m_stream->time_base=(AVRational){1, m_FPS};
if (avio_open(&m_oc->pb, filename, AVIO_FLAG_WRITE) < 0)
{
printf( "Could not open '%s'\n", filename);
exit(1);
}
avformat_write_header(m_oc, NULL);
m_frame = av_frame_alloc();
if (!m_frame) {
printf( "Could not allocate video frame\n");
exit(1);
}
m_frame->format = m_c->pix_fmt;
m_frame->width = m_c->width;
m_frame->height = m_c->height;
ret = av_image_alloc(m_frame->data, m_frame->linesize, m_c->width, m_c->height, m_c->pix_fmt, 32);
if (ret < 0) {
printf("Could not allocate raw picture buffer\n");
exit(1);
}
}
The ffmpeg_encoder_encode_frame is:
void VideoEncoder::ffmpeg_encoder_encode_frame(uint8_t *rgb)
{
int ret, got_output;
ffmpeg_encoder_set_frame_yuv_from_rgb(rgb);
av_init_packet(&m_pkt);
m_pkt.data = NULL;
m_pkt.size = 0;
ret = avcodec_encode_video2(m_c, &m_pkt, m_frame, &got_output);
if (ret < 0) {
printf("Error encoding frame\n");
exit(1);
}
if (got_output)
{
av_packet_rescale_ts(&m_pkt,
(AVRational){1, m_FPS}, m_stream->time_base);
m_pkt.stream_index = m_stream->index;
int ret = av_interleaved_write_frame(m_oc, &m_pkt);
av_packet_unref(&m_pkt);
}
}
ffmpeg_encoder_finish code is:
void VideoEncoder::ffmpeg_encoder_finish(void)
{
int got_output, ret;
do {
ret = avcodec_encode_video2(m_c, &m_pkt, NULL, &got_output);
if (ret < 0) {
printf( "Error encoding frame\n");
exit(1);
}
if (got_output) {
av_packet_rescale_ts(&m_pkt,
(AVRational){1, m_FPS}, m_stream->time_base);
m_pkt.stream_index = m_stream->index;
int ret = av_interleaved_write_frame(m_oc, &m_pkt);
av_packet_unref(&m_pkt);
}
} while (got_output);
av_write_trailer(m_oc);
avio_closep(&m_oc->pb);
avformat_free_context(m_oc);
av_freep(&m_frame->data[0]);
av_frame_free(&m_frame);
av_packet_unref(&m_pkt);
sws_freeContext(m_sws_context);
}
This code runs multiple times in the loop.
So my question is - what am I doing wrong? maybe ffmpeg is using some kind of internal buffering? If so, how to disable it? Because such an increase in memory usage is unacceptable at all.
You didn't close encoder context. Add avcodec_close(m_c) to ffmpeg_encoder_finish().
See ffmpeg.org
User is required to call avcodec_close() and avformat_free_context() to clean up the allocation by avformat_new_stream().
Plus I don't see how m_c is allocated. Usually it is allocated with avcodec_alloc_context and must be deallocated with av_free (after closing of course).
Don't use valgrind to check memory leaks for your own projects, use sanitizers, with these you can pin point the source of the leak. Check this out: Multi-Threaded Video Decoder Leaks Memory
Hope that helps.
It's sufficient to call 'avcodec_free_context(m_c)', this procedure calls 'avcodec_close' and also de-allocates 'extradata'(if it's was allocated) and 'subtitle_header' (if it was allocated).

Audio/Video encoding with ffmpeg

Audio/Video encoding with ffmpeg:
I am trying to create an avi file with encoded video and audio, using ffmpeg.
First, I create the file:
//define BITRATE 10000000
//define GOP 300
//define FPS 60
//define VIDEOTYPE "avi"
if (!encoder_->createFile(QFileInfo(*(videoFile_.data())).absoluteFilePath(), targetRect.width(), targetRect.height(), BITRATE*(1000 / FPS), GOP, 1000))
The buffers are initialized as:
audio_outbuf_size = 44100 * 0.005 * 16; //5ms of audio should be encoded, each time this function is called
audio_outbuf = new uint8_t[audio_outbuf_size];
outbuf_size = getWidth()*getHeight() * 3;
outbuf = new uint8_t[outbuf_size];
Then add audio and video streams (audio: CODEC_ID_PCM_S16LE, 16000 kb/s and 44100 Hz, video: PIX_FMT_YUV420P)
void MediaMuxer::addAudioStream(QString fileName, ffmpeg::CodecID codec_id)
{
// Add the audio stream
ffmpeg::AVCodec *encoder = avcodec_find_encoder(codec_id);
pAudioStream_ = ffmpeg::av_new_stream(pOutputFormatCtx_, 0);
if (!pAudioStream_) {
printf("Could not allocate stream\n");
return;
}
pAudioCodecCtx_ = pAudioStream_->codec;
pAudioCodecCtx_->codec_id = codec_id;
pAudioCodecCtx_->codec_type = ffmpeg::AVMEDIA_TYPE_AUDIO;
pAudioCodecCtx_->sample_fmt = ffmpeg::AV_SAMPLE_FMT_S16;
pAudioCodecCtx_->sample_fmt = encoder->sample_fmts[0];
pAudioCodecCtx_->bit_rate = 16000;
//pAudioCodecCtx_->bit_rate = 64000;
pAudioCodecCtx_->sample_rate = N;
pAudioCodecCtx_->channels = 1;
pAudioCodecCtx_->time_base.den = FPS;
pAudioCodecCtx_->time_base.num = 1;
avcodec_thread_init(pAudioCodecCtx_, 10);
// some formats want stream headers to be separate
if (pOutputFormatCtx_->oformat->flags & AVFMT_GLOBALHEADER)
pAudioCodecCtx_->flags |= CODEC_FLAG_GLOBAL_HEADER;
if (av_set_parameters(pOutputFormatCtx_, NULL) < 0)
{
printf("Invalid output format parameters\n");
return;
}
//ffmpeg::dump_format(pOutputFormatCtx_, 0, fileName.toStdString().c_str(), 1);
// open_video
// find the audio encoder
pAudioCodec_ = avcodec_find_encoder(pAudioCodecCtx_->codec_id);
if (!pAudioCodec_)
{
printf("codec not found\n");
return;
}
// open the codec
if (avcodec_open(pAudioCodecCtx_, pAudioCodec_) < 0)
{
printf("could not open codec\n");
return;
}
// Allocate memory for output
if (!initAudioOutputBuf())
{
printf("Can't allocate memory for audio output bitstream\n");
return;
}
// Allocate the audio frame
if (!initAudioFrame())
{
printf("Can't init audio frame\n");
return;
}
if (url_fopen(&pOutputFormatCtx_->pb, fileName.toStdString().c_str(), URL_WRONLY) < 0)
{
printf("Could not open '%s'\n", fileName.toStdString().c_str());
return;
}
av_write_header(pOutputFormatCtx_);
}
void MediaMuxer::addVideoStream(QString fileName)
{
// Add the video stream
pVideoStream_ = ffmpeg::av_new_stream(pOutputFormatCtx_, 0);
if (!pVideoStream_)
{
printf("Could not allocate stream\n");
return;
}
pVideoCodecCtx_ = pVideoStream_->codec;
pVideoCodecCtx_->codec_id = pOutputFormat_->video_codec;
pVideoCodecCtx_->codec_type = ffmpeg::AVMEDIA_TYPE_VIDEO;
pVideoCodecCtx_->bit_rate = Bitrate;
pVideoCodecCtx_->width = getWidth();
pVideoCodecCtx_->height = getHeight();
pVideoCodecCtx_->time_base.den = FPS;
pVideoCodecCtx_->time_base.num = 1;
pVideoCodecCtx_->gop_size = Gop;
pVideoCodecCtx_->pix_fmt = ffmpeg::PIX_FMT_YUV420P;
avcodec_thread_init(pVideoCodecCtx_, 10);
// some formats want stream headers to be separate
if (pOutputFormatCtx_->oformat->flags & AVFMT_GLOBALHEADER)
pVideoCodecCtx_->flags |= CODEC_FLAG_GLOBAL_HEADER;
if (av_set_parameters(pOutputFormatCtx_, NULL) < 0)
{
printf("Invalid output format parameters\n");
return;
}
//ffmpeg::dump_format(pOutputFormatCtx_, 0, fileName.toStdString().c_str(), 1);
// open_video
// find the video encoder
pVideoCodec_ = avcodec_find_encoder(pVideoCodecCtx_->codec_id);
if (!pVideoCodec_)
{
printf("codec not found\n");
return;
}
// open the codec
if (avcodec_open(pVideoCodecCtx_, pVideoCodec_) < 0)
{
printf("could not open codec\n");
return;
}
// Allocate memory for output
if (!initOutputBuf())
{
printf("Can't allocate memory for output bitstream\n");
return;
}
// Allocate the YUV frame
if (!initFrame())
{
printf("Can't init frame\n");
return;
}
if (url_fopen(&pOutputFormatCtx_->pb, fileName.toStdString().c_str(), URL_WRONLY) < 0)
{
printf("Could not open '%s'\n", fileName.toStdString().c_str());
return;
}
av_write_header(pOutputFormatCtx_);
}
Finally, I call alternatively encodeVideo/encodeAudio to encode video and PCM audio frames at specific recording times(pts):
int MediaMuxer::encodeVideo(const QImage &img, unsigned pts)
{
convertImage_sws(img); // SWS conversion
pVideoCodecCtx_->coded_frame->pts = pts; // Set the time stamp
int out_size = ffmpeg::avcodec_encode_video(pVideoCodecCtx_, outbuf, outbuf_size, ppicture);
pVideoCodecCtx_->coded_frame->pts = pts; // Set the time stamp
if (out_size > 0)
{
ffmpeg::av_init_packet(&pkt);
if (pVideoCodecCtx_->coded_frame->pts != (0x8000000000000000LL))
pkt.pts = av_rescale_q(pVideoCodecCtx_->coded_frame->pts, pVideoCodecCtx_->time_base, pVideoStream_->time_base);
if (pVideoCodecCtx_->coded_frame->key_frame)
pkt.flags |= AV_PKT_FLAG_KEY;
pkt.stream_index = pVideoStream_->index;
pkt.data = outbuf;
pkt.size = out_size;
int ret = ffmpeg::av_interleaved_write_frame(pOutputFormatCtx_, &pkt);
if (ret<0)
return -1;
}
return out_size;
}
int MediaMuxer::encodeAudio(unsigned pts)
{
pAudioCodecCtx_->coded_frame->pts = pts; // Set the time stamp
// simple sound encoding
int16_t samples[220] = { 0 }; // buffer
int n; // buffer index
double Fs = 44100.0; // sampling frequency
// Generate audio data
for (n = 0; n < 220; ++n) //220 samples (44100*.005sec as the interval between 2 video frames is 10ms)
samples[n] = 16383.0 * sin(n*1000.0*2.0*M_PI / Fs); //sine wav
int out_size = ffmpeg::avcodec_encode_audio(pAudioCodecCtx_, audio_outbuf, audio_outbuf_size, (const short*)samples);
pAudioCodecCtx_->coded_frame->pts = pts; // Set the time stamp
if (out_size>0)
{
// Packet
ffmpeg::AVPacket pkt = { 0 };
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 0;
if (pAudioCodecCtx_->coded_frame->pts != (0x8000000000000000LL))
pkt.pts = av_rescale_q(pAudioCodecCtx_->coded_frame->pts, pAudioCodecCtx_->time_base, pAudioStream_->time_base);
if (pAudioCodecCtx_->coded_frame->key_frame)
pkt.flags |= AV_PKT_FLAG_KEY;
pkt.stream_index = pAudioStream_->index;
pkt.data = audio_outbuf;
pkt.size = out_size;
int ret = av_interleaved_write_frame(pOutputFormatCtx_, &pkt);
if (ret<0)
return -1;
av_free_packet(&pkt);
}
//end simple sound encoding
return pkt.size;
}
The result is a nice video with some audio behind (either a regular beeping sound at regular intervals but ending way earlier than the video or a continuous longer sound that also last shorter than the video).
I want to generate a beeping sound each time the function encodeAudio() is called - at non-regular intervals. I have tried to modify the sampling rate, the buffer size, the pkt size and the number of samples but without any success. I also tried to set the pts at different times but it did not get me where I want to be. Could someone please help?

waveOutWrite compatible with ASIO?

I am writing an application where I get sound data using low-latency ASIO card. The low-latency means that I get only 128 samples per batch, for 48k sample rate. From the ASIO card, I get raw samples in 32-bit signed integer range.
Now I want to listen to the sound coming through the ASIO card, but not on the ASIO card, but on the default output device in Windows. I am using waveOutWrite set up with WAVE_FORMAT_PCM and the same characteristics as the ASIO input. I call it every time I get a new 128-sample long batch. Now, because Wav format does not allow for 32-bit integer samples, I downgrade them to 16-bits.
HWAVEOUT waveOut;
void startListening(){
WAVEFORMATEX format;
format.wFormatTag = WAVE_FORMAT_PCM;
format.nChannels = 1;
format.nSamplesPerSec = sampleRate;
format.nAvgBytesPerSec = sampleRate * 2;
format.nBlockAlign = 2;
format.wBitsPerSample = 16;
format.cbSize = 0;
MMRESULT result = waveOutOpen(waveOut, WAVE_MAPPER, &format, 0, 0, CALLBACK_NULL);
if(result != MMSYSERR_NOERROR){
return;
}
}
typedef struct{
short *buffer;
int length;
HWAVEOUT waveOut;
} ListenInfo;
void newListeningData(void *buffer, int length){
ListenInfo *listenInfo = new ListenInfo();
listenInfo->buffer = new short[length];
listenInfo->length = length;
listenInfo->waveOut = *waveOut;
if(bitrate == 32){
int *bufferInt = (int *)buffer;
for(int i = 0; i < length; i++){
listenInfo->buffer[i] = (bufferInt[i]);
}
CreateThread(NULL, 0, &(listen), listenInfo, 0, NULL);
}
else if(bitrate == 16){
memcpy(listenInfo->buffer, (short *)buffer, length * 2);
CreateThread(NULL, 0, &(listen), listenInfo, 0, NULL);
}
else{
printf("%d: Bitrate is not 16 or 32!\n", index);
}
}
DWORD WINAPI listen(__in LPVOID lpParameter){
ListenInfo *info = (ListenInfo *)lpParameter;
WAVEHDR header;
memset(&header, 0, sizeof(WAVEHDR));
header.dwBufferLength = info->length;
header.lpData = (char *)(info->buffer);
MMRESULT result = waveOutPrepareHeader(info->waveOut, &header, sizeof(WAVEHDR));
result = waveOutWrite(info->waveOut, &header, sizeof(WAVEHDR));
while(waveOutUnprepareHeader(info->waveOut, &header, sizeof(WAVEHDR)) == WAVERR_STILLPLAYING){
Sleep(10);
}
delete[] info->buffer;
delete info;
return 0;
}
The problem is that I can hear only severe clipping and squeaking. The sound is distorted beyond recognition. I know it is not a synchronization error, because I also save the samples into a wav file with the same characteristics and the sound is distorted in the same way.
How can I convert signed 32-bit samples into something that waveOutWrite can play?
The problem was cause by the fact that I was using different bitrate than I was led to believe I was using. When I modified the WAVEFORMATEX with correct values, it worked!