I'm trying to use the C++ API of FFMpeg (version 20150526) under Windows using the prebuilt binaries to decode an h264 video file (*.ts).
I've written a very simple code that automatically detects the required codec from the file itself (and it is AV_CODEC_ID_H264, as expected).
Then I re-open the video file in read-binary mode and I read a fixed-size buffer of bytes from it and provide the read bytes to the decoder within a while-loop until the end of file. However when I call the function avcodec_decode_video2 a large amount of errors happen like the following ones:
[h264 # 008df020] top block unavailable for requested intro mode at 34 0
[h264 # 008df020] error while decoding MB 34 0, bytestream 3152
[h264 # 008df020] decode_slice_header error
Sometimes the function avcodec_decode_video2 sets the value of got_picture_ptr to 1 and hence I expect to find a good frame. Instead, though all the computations are successful, when I view the decoded frame (using OpenCV only for visualization purposes) I see a gray one with some artifacts.
If I employ the same code to decode an *.avi file it works fine.
Reading the examples of FFMpeg I did not find a solution to my problem. I've also implemented the solution proposed in the simlar question FFmpeg c++ H264 decoding error but it did not work.
Does anyone know where the error is?
Thank you in advance for any reply!
The code is the following [EDIT: code updated including the parser management]:
#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>
#include <opencv2/opencv.hpp>
#ifdef __cplusplus
extern "C"
{
#endif // __cplusplus
#include <libavcodec/avcodec.h>
#include <libavdevice/avdevice.h>
#include <libavfilter/avfilter.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavutil/avutil.h>
#include <libpostproc/postprocess.h>
#include <libswresample/swresample.h>
#include <libswscale/swscale.h>
#ifdef __cplusplus
} // end extern "C".
#endif // __cplusplus
#define INBUF_SIZE 4096
void main()
{
AVCodec* l_pCodec;
AVCodecContext* l_pAVCodecContext;
SwsContext* l_pSWSContext;
AVFormatContext* l_pAVFormatContext;
AVFrame* l_pAVFrame;
AVFrame* l_pAVFrameBGR;
AVPacket l_AVPacket;
AVPacket l_AVPacket_out;
AVStream* l_pStream;
AVCodecParserContext* l_pParser;
FILE* l_pFile_in;
FILE* l_pFile_out;
std::string l_sFile;
int l_iResult;
int l_iFrameCount;
int l_iGotFrame;
int l_iBufLength;
int l_iParsedBytes;
int l_iPts;
int l_iDts;
int l_iPos;
int l_iSize;
int l_iDecodedBytes;
uint8_t l_auiInBuf[INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
uint8_t* l_pData;
cv::Mat l_cvmImage;
l_pCodec = NULL;
l_pAVCodecContext = NULL;
l_pSWSContext = NULL;
l_pAVFormatContext = NULL;
l_pAVFrame = NULL;
l_pAVFrameBGR = NULL;
l_pParser = NULL;
l_pStream = NULL;
l_pFile_in = NULL;
l_pFile_out = NULL;
l_iPts = 0;
l_iDts = 0;
l_iPos = 0;
l_pData = NULL;
l_sFile = "myvideo.ts";
avdevice_register_all();
avfilter_register_all();
avcodec_register_all();
av_register_all();
avformat_network_init();
l_pAVFormatContext = avformat_alloc_context();
l_iResult = avformat_open_input(&l_pAVFormatContext,
l_sFile.c_str(),
NULL,
NULL);
if (l_iResult >= 0)
{
l_iResult = avformat_find_stream_info(l_pAVFormatContext, NULL);
if (l_iResult >= 0)
{
for (int i=0; i<l_pAVFormatContext->nb_streams; i++)
{
if (l_pAVFormatContext->streams[i]->codec->codec_type ==
AVMEDIA_TYPE_VIDEO)
{
l_pCodec = avcodec_find_decoder(
l_pAVFormatContext->streams[i]->codec->codec_id);
l_pStream = l_pAVFormatContext->streams[i];
}
}
}
}
av_init_packet(&l_AVPacket);
av_init_packet(&l_AVPacket_out);
memset(l_auiInBuf + INBUF_SIZE, 0, FF_INPUT_BUFFER_PADDING_SIZE);
if (l_pCodec)
{
l_pAVCodecContext = avcodec_alloc_context3(l_pCodec);
l_pParser = av_parser_init(l_pAVCodecContext->codec_id);
if (l_pParser)
{
av_register_codec_parser(l_pParser->parser);
}
if (l_pAVCodecContext)
{
if (l_pCodec->capabilities & CODEC_CAP_TRUNCATED)
{
l_pAVCodecContext->flags |= CODEC_FLAG_TRUNCATED;
}
l_iResult = avcodec_open2(l_pAVCodecContext, l_pCodec, NULL);
if (l_iResult >= 0)
{
l_pFile_in = fopen(l_sFile.c_str(), "rb");
if (l_pFile_in)
{
l_pAVFrame = av_frame_alloc();
l_pAVFrameBGR = av_frame_alloc();
if (l_pAVFrame)
{
l_iFrameCount = 0;
avcodec_get_frame_defaults(l_pAVFrame);
while (1)
{
l_iBufLength = fread(l_auiInBuf,
1,
INBUF_SIZE,
l_pFile_in);
if (l_iBufLength == 0)
{
break;
}
else
{
l_pData = l_auiInBuf;
l_iSize = l_iBufLength;
while (l_iSize > 0)
{
if (l_pParser)
{
l_iParsedBytes = av_parser_parse2(
l_pParser,
l_pAVCodecContext,
&l_AVPacket_out.data,
&l_AVPacket_out.size,
l_pData,
l_iSize,
l_AVPacket.pts,
l_AVPacket.dts,
AV_NOPTS_VALUE);
if (l_iParsedBytes <= 0)
{
break;
}
l_AVPacket.pts = l_AVPacket.dts = AV_NOPTS_VALUE;
l_AVPacket.pos = -1;
}
else
{
l_AVPacket_out.data = l_pData;
l_AVPacket_out.size = l_iSize;
}
l_iDecodedBytes =
avcodec_decode_video2(
l_pAVCodecContext,
l_pAVFrame,
&l_iGotFrame,
&l_AVPacket_out);
if (l_iDecodedBytes >= 0)
{
if (l_iGotFrame)
{
l_pSWSContext = sws_getContext(
l_pAVCodecContext->width,
l_pAVCodecContext->height,
l_pAVCodecContext->pix_fmt,
l_pAVCodecContext->width,
l_pAVCodecContext->height,
AV_PIX_FMT_BGR24,
SWS_BICUBIC,
NULL,
NULL,
NULL);
if (l_pSWSContext)
{
l_iResult = avpicture_alloc(
reinterpret_cast<AVPicture*>(l_pAVFrameBGR),
AV_PIX_FMT_BGR24,
l_pAVFrame->width,
l_pAVFrame->height);
l_iResult = sws_scale(
l_pSWSContext,
l_pAVFrame->data,
l_pAVFrame->linesize,
0,
l_pAVCodecContext->height,
l_pAVFrameBGR->data,
l_pAVFrameBGR->linesize);
if (l_iResult > 0)
{
l_cvmImage = cv::Mat(
l_pAVFrame->height,
l_pAVFrame->width,
CV_8UC3,
l_pAVFrameBGR->data[0],
l_pAVFrameBGR->linesize[0]);
if (l_cvmImage.empty() == false)
{
cv::imshow("image", l_cvmImage);
cv::waitKey(10);
}
}
}
l_iFrameCount++;
}
}
else
{
break;
}
l_pData += l_iParsedBytes;
l_iSize -= l_iParsedBytes;
}
}
} // end while(1).
}
fclose(l_pFile_in);
}
}
}
}
}
EDIT: The following is the final code that solves my problem, thanks to the suggestions of Ronald.
#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>
#include <opencv2/opencv.hpp>
#ifdef __cplusplus
extern "C"
{
#endif // __cplusplus
#include <libavcodec/avcodec.h>
#include <libavdevice/avdevice.h>
#include <libavfilter/avfilter.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavutil/avutil.h>
#include <libpostproc/postprocess.h>
#include <libswresample/swresample.h>
#include <libswscale/swscale.h>
#ifdef __cplusplus
} // end extern "C".
#endif // __cplusplus
void main()
{
AVCodec* l_pCodec;
AVCodecContext* l_pAVCodecContext;
SwsContext* l_pSWSContext;
AVFormatContext* l_pAVFormatContext;
AVFrame* l_pAVFrame;
AVFrame* l_pAVFrameBGR;
AVPacket l_AVPacket;
std::string l_sFile;
uint8_t* l_puiBuffer;
int l_iResult;
int l_iFrameCount;
int l_iGotFrame;
int l_iDecodedBytes;
int l_iVideoStreamIdx;
int l_iNumBytes;
cv::Mat l_cvmImage;
l_pCodec = NULL;
l_pAVCodecContext = NULL;
l_pSWSContext = NULL;
l_pAVFormatContext = NULL;
l_pAVFrame = NULL;
l_pAVFrameBGR = NULL;
l_puiBuffer = NULL;
l_sFile = "myvideo.ts";
av_register_all();
l_iResult = avformat_open_input(&l_pAVFormatContext,
l_sFile.c_str(),
NULL,
NULL);
if (l_iResult >= 0)
{
l_iResult = avformat_find_stream_info(l_pAVFormatContext, NULL);
if (l_iResult >= 0)
{
for (int i=0; i<l_pAVFormatContext->nb_streams; i++)
{
if (l_pAVFormatContext->streams[i]->codec->codec_type ==
AVMEDIA_TYPE_VIDEO)
{
l_iVideoStreamIdx = i;
l_pAVCodecContext =
l_pAVFormatContext->streams[l_iVideoStreamIdx]->codec;
if (l_pAVCodecContext)
{
l_pCodec = avcodec_find_decoder(l_pAVCodecContext->codec_id);
}
break;
}
}
}
}
if (l_pCodec && l_pAVCodecContext)
{
l_iResult = avcodec_open2(l_pAVCodecContext, l_pCodec, NULL);
if (l_iResult >= 0)
{
l_pAVFrame = av_frame_alloc();
l_pAVFrameBGR = av_frame_alloc();
l_iNumBytes = avpicture_get_size(PIX_FMT_BGR24,
l_pAVCodecContext->width,
l_pAVCodecContext->height);
l_puiBuffer = (uint8_t *)av_malloc(l_iNumBytes*sizeof(uint8_t));
avpicture_fill((AVPicture *)l_pAVFrameBGR,
l_puiBuffer,
PIX_FMT_RGB24,
l_pAVCodecContext->width,
l_pAVCodecContext->height);
l_pSWSContext = sws_getContext(
l_pAVCodecContext->width,
l_pAVCodecContext->height,
l_pAVCodecContext->pix_fmt,
l_pAVCodecContext->width,
l_pAVCodecContext->height,
AV_PIX_FMT_BGR24,
SWS_BICUBIC,
NULL,
NULL,
NULL);
while (av_read_frame(l_pAVFormatContext, &l_AVPacket) >= 0)
{
if (l_AVPacket.stream_index == l_iVideoStreamIdx)
{
l_iDecodedBytes = avcodec_decode_video2(
l_pAVCodecContext,
l_pAVFrame,
&l_iGotFrame,
&l_AVPacket);
if (l_iGotFrame)
{
if (l_pSWSContext)
{
l_iResult = sws_scale(
l_pSWSContext,
l_pAVFrame->data,
l_pAVFrame->linesize,
0,
l_pAVCodecContext->height,
l_pAVFrameBGR->data,
l_pAVFrameBGR->linesize);
if (l_iResult > 0)
{
l_cvmImage = cv::Mat(
l_pAVFrame->height,
l_pAVFrame->width,
CV_8UC3,
l_pAVFrameBGR->data[0],
l_pAVFrameBGR->linesize[0]);
if (l_cvmImage.empty() == false)
{
cv::imshow("image", l_cvmImage);
cv::waitKey(1);
}
}
}
l_iFrameCount++;
}
}
}
}
}
}
You're never using the l_pParser object, or in other words, you're not using a H264 parser, you're just sending raw file data into the decoder without proper NAL packetization. Please read the frame parsing API docs to figure out how to use the parser.
Related
I need to implement screen recording in my software but I have a problem with getting gdigrab in windows 10. This code return
"can't find input device." (av_find_input_format("gdigrab") return NULL).
If I try to record desktop by ffmpeg.exe using ffmpeg -f gdigrab -i desktop out.avi, the desktop is recording correctly.
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
extern "C"
{
#include "ffmpeg-x86-4.4/include/libavcodec/avcodec.h"
#include "ffmpeg-x86-4.4/include/libavformat/avformat.h"
#include "ffmpeg-x86-4.4/include/libswscale/swscale.h"
#include "ffmpeg-x86-4.4/include/libavdevice/avdevice.h"
#include "ffmpeg-x86-4.4/include/libavutil/imgutils.h"
#include "ffmpeg-x86-4.4/include/libavutil/dict.h"
#include "SDL2/include/SDL.h"
}
#define OUTPUT_YUV420P 1
#define OUTPUT_H264 1
int main(int argc, char* argv[])
{
AVFormatContext* pFormatCtx;
AVStream* videoStream;
AVCodecContext* pCodecCtx;
AVCodec* pCodec;
AVFrame* pFrame, * pFrameYUV;
AVPacket* pPacket;
SwsContext* pImgConvertCtx;
int videoIndex = -1;
unsigned int i = 0;
SDL_Window* screen;
SDL_Renderer* sdlRenderer;
SDL_Texture* sdlTexture;
SDL_Rect sdlRect;
int screen_w = 0;
int screen_h = 0;
printf("Starting...\n");
//AVInputFormat* p = NULL;
//register device
avdevice_register_all();
//use gdigrab
AVInputFormat* ifmt = av_find_input_format("gdigrab");
if (!ifmt)
{
printf("can't find input device.\n");
return -1;
}
AVDictionary* options = NULL;
if (avformat_open_input(&pFormatCtx, "desktop", ifmt, &options) != 0)
{
printf("can't open input stream.\n");
return -1;
}
if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
{
printf("can't find stream information.\n");
return -1;
}
I'm so close to have this working but playing with the output sample format or codec context doesn't seem to solve and don't know where to go from here.
#include <iostream>
#include <SFML/Audio.hpp>
#include "MyAudioStream.h"
extern "C"
{
#include <libavutil/opt.h>
#include <libavutil/avutil.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/audio_fifo.h>
#include <libswresample/swresample.h>
}
void setupInput(AVFormatContext *input_format_context, AVCodecContext **input_codec_context, const char *streamURL)
{
// av_find_input_format("mp3");
avformat_open_input(&input_format_context, streamURL, NULL, NULL);
avformat_find_stream_info(input_format_context, NULL);
AVDictionary *metadata = input_format_context->metadata;
AVDictionaryEntry *name = av_dict_get(metadata, "icy-name", NULL, 0);
if (name != NULL)
{
std::cout << name->value << std::endl;
}
AVDictionaryEntry *title = av_dict_get(metadata, "StreamTitle", NULL, 0);
if (title != NULL)
{
std::cout << title->value << std::endl;
}
AVStream *stream = input_format_context->streams[0];
AVCodecParameters *codec_params = stream->codecpar;
AVCodec *codec = avcodec_find_decoder(codec_params->codec_id);
*input_codec_context = avcodec_alloc_context3(codec);
avcodec_parameters_to_context(*input_codec_context, codec_params);
avcodec_open2(*input_codec_context, codec, NULL);
}
void setupOutput(AVCodecContext *input_codec_context, AVCodecContext **output_codec_context)
{
AVCodec *output_codec = avcodec_find_encoder(AV_CODEC_ID_PCM_S16LE); // AV_CODEC_ID_PCM_S16LE ?? AV_CODEC_ID_PCM_S16BE
*output_codec_context = avcodec_alloc_context3(output_codec);
(*output_codec_context)->channels = 2;
(*output_codec_context)->channel_layout = av_get_default_channel_layout(2);
(*output_codec_context)->sample_rate = input_codec_context->sample_rate;
(*output_codec_context)->sample_fmt = output_codec->sample_fmts[0]; // AV_SAMPLE_FMT_S16 ??
avcodec_open2(*output_codec_context, output_codec, NULL);
}
void setupResampler(AVCodecContext *input_codec_context, AVCodecContext *output_codec_context, SwrContext **resample_context)
{
*resample_context = swr_alloc_set_opts(
*resample_context,
output_codec_context->channel_layout,
output_codec_context->sample_fmt,
output_codec_context->sample_rate,
input_codec_context->channel_layout,
input_codec_context->sample_fmt,
input_codec_context->sample_rate,
0, NULL);
swr_init(*resample_context);
}
MyAudioStream::MyAudioStream()
{
input_format_context = avformat_alloc_context();
resample_context = swr_alloc();
}
MyAudioStream::~MyAudioStream()
{
// clean up
avformat_close_input(&input_format_context);
avformat_free_context(input_format_context);
}
void MyAudioStream::load(const char *streamURL)
{
setupInput(input_format_context, &input_codec_context, streamURL);
setupOutput(input_codec_context, &output_codec_context);
setupResampler(input_codec_context, output_codec_context, &resample_context);
initialize(output_codec_context->channels, output_codec_context->sample_rate);
}
bool MyAudioStream::onGetData(Chunk &data)
{
// init
AVFrame *input_frame = av_frame_alloc();
AVPacket *input_packet = av_packet_alloc();
input_packet->data = NULL;
input_packet->size = 0;
// read
av_read_frame(input_format_context, input_packet);
avcodec_send_packet(input_codec_context, input_packet);
avcodec_receive_frame(input_codec_context, input_frame);
// convert
uint8_t *converted_input_samples = (uint8_t *)calloc(output_codec_context->channels, sizeof(*converted_input_samples));
av_samples_alloc(&converted_input_samples, NULL, output_codec_context->channels, input_frame->nb_samples, output_codec_context->sample_fmt, 0);
swr_convert(resample_context, &converted_input_samples, input_frame->nb_samples, (const uint8_t **)input_frame->extended_data, input_frame->nb_samples);
data.sampleCount = input_frame->nb_samples;
data.samples = (sf::Int16 *)converted_input_samples;
// av_freep(&converted_input_samples[0]);
// free(converted_input_samples);
av_packet_free(&input_packet);
av_frame_free(&input_frame);
return true;
}
void MyAudioStream::onSeek(sf::Time timeOffset)
{
// no op
}
sf::Int64 MyAudioStream::onLoop()
{
// no loop
return -1;
}
Called with
#include <iostream>
#include "./MyAudioStream.h"
extern "C"
{
#include <libavutil/opt.h>
#include <libavutil/avutil.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
}
const char *streamURL = "http://s5radio.ponyvillelive.com:8026/stream.mp3";
int main(int, char **)
{
MyAudioStream myStream;
myStream.load(streamURL);
std::cout << "Hello, world!" << std::endl;
myStream.play();
while (myStream.getStatus() == MyAudioStream::Playing)
{
sf::sleep(sf::seconds(0.1f));
}
return 0;
}
I solved. The count returned by swr_get_out_samples is per channel it seems. I multiplied by 2 when setting sf::SoundStream::Chunk::sampleCount, as in:
data.sampleCount = out_samples * 2;
and that works.
I am trying to write a simple audio capturing application in c++. and i found it is a very hard job to capture audio using c++ library, i couldn't find no more than a few library and I came up with ImediaObject. I found everything is okay in some other pc, but in my pc it can't record audio. When I call ImediaObject.ProcessOutput, it returns S_FALSE. I don't know what's wrong with my pc. I am using Visual Studio 2019.
Can anyone give me any clue ? what is wrong here ?
For your understanding I am sharing my main code here
#include <windows.h>
#include <dmo.h>
#include <Mmsystem.h>
#include <objbase.h>
#include <mediaobj.h>
#include <uuids.h>
#include <propidl.h>
#include <wmcodecdsp.h>
#include <atlbase.h>
#include <ATLComCli.h>
#include <audioclient.h>
#include <MMDeviceApi.h>
#include <AudioEngineEndPoint.h>
#include <DeviceTopology.h>
#include <propkey.h>
#include <strsafe.h>
#include <conio.h>
#include "AecKsBinder.h"
#include "mediabuf.h"
#define SAFE_ARRAYDELETE(p) {if (p) delete[] (p); (p) = NULL;}
#define SAFE_RELEASE(p) {if (NULL != p) {(p)->Release(); (p) = NULL;}}
#define VBFALSE (VARIANT_BOOL)0
#define VBTRUE (VARIANT_BOOL)-1
#define STREAM_BUFFER_LENGTH 0.1f //streaming buffer is 0.1 second long.
#define CHECK_RET(hr, message) if (FAILED(hr)) { puts(message); goto exit;}
#define CHECKHR(x) hr = x; if (FAILED(hr)) {printf("%d: %08X\n", __LINE__, hr); goto exit;}
#define CHECK_ALLOC(pb, message) if (NULL == pb) { puts(message); goto exit;}
class CStaticMediaBuffer : public CBaseMediaBuffer {
public:
STDMETHODIMP_(ULONG) AddRef() { return 2; }
STDMETHODIMP_(ULONG) Release() { return 1; }
void Init(BYTE* pData, ULONG ulSize, ULONG ulData) {
m_pData = pData;
m_ulSize = ulSize;
m_ulData = ulData;
}
};
void OutputUsage();
int __cdecl _tmain()
//int __cdecl _tmain()
{
HRESULT hr = S_OK;
CoInitialize(NULL);
IMediaObject* pDMO = NULL;
IPropertyStore* pPS = NULL;
CStaticMediaBuffer outputBuffer;
DMO_OUTPUT_DATA_BUFFER OutputBufferStruct = { 0 };
OutputBufferStruct.pBuffer = &outputBuffer;
DMO_MEDIA_TYPE mt = { 0 };
ULONG cbProduced = 0;
DWORD dwStatus;
// Parameters to config DMO
int iSystemMode = MODE_NOT_SET; // AEC-MicArray DMO system mode
int iOutFileIdx = -1; // argument index for otuput file name
int iMicDevIdx = -2; // microphone device index
int iSpkDevIdx = -2; // speaker device index
BOOL bFeatrModeOn = 0; // turn feature mode on/off
BOOL bNoiseSup = 1; // turn noise suppression on/off
BOOL bAGC = 0; // turn digital auto gain control on/off
BOOL bCntrClip = 0; // turn center clippng on/off
// control how long the Demo runs
int iDuration = 60; // seconds
int cTtlToGo = 0;
FILE* pfMicOutPCM; // dump output signal using PCM format
DWORD cOutputBufLen = 0;
BYTE* pbOutputBuffer = NULL;
UINT uCapDevCount = 0;
UINT uRenDevCount = 0;
char pcScanBuf[256] = { 0 };
WAVEFORMATEX wfxOut = { WAVE_FORMAT_PCM, 1, 22050, 44100, 2, 16, 0 };
AUDIO_DEVICE_INFO* pCaptureDeviceInfo = NULL, * pRenderDeviceInfo = NULL;
int i;
iMicDevIdx = 0;
iSpkDevIdx = 0;
iSystemMode = 0;
bFeatrModeOn = 1;
bNoiseSup = 1;
bAGC = 1;
bCntrClip = 1;
HANDLE currThread;
HANDLE currProcess;
BOOL iRet;
currProcess = GetCurrentProcess();
currThread = GetCurrentThread();
iRet = SetPriorityClass(currProcess, HIGH_PRIORITY_CLASS);
if (0 == iRet)
{
// call getLastError.
puts("failed to set process priority\n");
goto exit;
}
// DMO initialization
CHECKHR(CoCreateInstance(CLSID_CWMAudioAEC, NULL, CLSCTX_INPROC_SERVER, IID_IMediaObject, (void**)&pDMO));
CHECKHR(pDMO->QueryInterface(IID_IPropertyStore, (void**)&pPS));
// Select capture device
hr = GetCaptureDeviceNum(uCapDevCount);
CHECK_RET(hr, "GetCaptureDeviceNum failed");
pCaptureDeviceInfo = new AUDIO_DEVICE_INFO[uCapDevCount];
hr = EnumCaptureDevice(uCapDevCount, pCaptureDeviceInfo);
CHECK_RET(hr, "EnumCaptureDevice failed");
printf("\nSystem has totally %d capture devices\n", uCapDevCount);
for (i = 0; i < (int)uCapDevCount; i++)
{
_tprintf(_T("Device %d is %s"), i, pCaptureDeviceInfo[i].szDeviceName);
if (pCaptureDeviceInfo[i].bIsMicArrayDevice)
_tprintf(_T(" -- Mic Array Device \n"));
else
_tprintf(_T("\n"));
}
if (iMicDevIdx < -1 || iMicDevIdx >= (int)uCapDevCount)
{
do {
printf("Select device ");
scanf_s("%255s", pcScanBuf, 255);
iMicDevIdx = atoi(pcScanBuf);
if (iMicDevIdx < -1 || iMicDevIdx >= (int)uCapDevCount)
printf("Invalid Capture Device ID \n");
else
break;
} while (1);
}
if (iMicDevIdx == -1)
_tprintf(_T("\n Default device will be used for capturing \n"));
else
_tprintf(_T("\n %s is selected for capturing\n"), pCaptureDeviceInfo[iMicDevIdx].szDeviceName);
SAFE_ARRAYDELETE(pCaptureDeviceInfo);
// Select render device
if (iSystemMode == SINGLE_CHANNEL_AEC ||
iSystemMode == ADAPTIVE_ARRAY_AND_AEC ||
iSystemMode == OPTIBEAM_ARRAY_AND_AEC)
{
hr = GetRenderDeviceNum(uRenDevCount);
CHECK_RET(hr, "GetRenderDeviceNum failed");
pRenderDeviceInfo = new AUDIO_DEVICE_INFO[uRenDevCount];
hr = EnumRenderDevice(uRenDevCount, pRenderDeviceInfo);
CHECK_RET(hr, "EnumRenderDevice failed");
printf("\nSystem has totally %d render devices\n", uRenDevCount);
for (i = 0; i < (int)uRenDevCount; i++)
{
_tprintf(_T("Device %d is %s \n"), i, pRenderDeviceInfo[i].szDeviceName);
}
if (iSpkDevIdx < -1 || iSpkDevIdx >= (int)uRenDevCount)
{
do {
printf("Select device ");
scanf_s("%255s", pcScanBuf, 255);
iSpkDevIdx = atoi(pcScanBuf);
if (iSpkDevIdx < -1 || iSpkDevIdx >= (int)uRenDevCount)
printf("Invalid Render Device ID \n");
else
break;
} while (1);
}
if (iSpkDevIdx == -1)
_tprintf(_T("\n Default device will be used for rendering \n"));
else
_tprintf(_T("\n %s is selected for rendering \n"), pRenderDeviceInfo[iSpkDevIdx].szDeviceName);
}
else {
iSpkDevIdx = -1;
}
SAFE_ARRAYDELETE(pRenderDeviceInfo);
TCHAR* fileName;
fileName = (TCHAR*)"test.raw";
// --- PREPARE OUTPUT --- //
if (NULL != _tfopen_s(&pfMicOutPCM, fileName, _T("wb")))
{
puts("cannot open file for output.\n");
goto exit;
}
// Set AEC mode and other parameters
// Not all user changeable options are given in this sample code.
// Please refer to readme.txt for more options.
// Set AEC-MicArray DMO system mode.
// This must be set for the DMO to work properly
puts("\nAEC settings:");
PROPVARIANT pvSysMode;
PropVariantInit(&pvSysMode);
pvSysMode.vt = VT_I4;
pvSysMode.lVal = (LONG)(iSystemMode);
CHECKHR(pPS->SetValue(MFPKEY_WMAAECMA_SYSTEM_MODE, pvSysMode));
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_SYSTEM_MODE, &pvSysMode));
printf("%20s %5d \n", "System Mode is", pvSysMode.lVal);
PropVariantClear(&pvSysMode);
// Tell DMO which capture and render device to use
// This is optional. If not specified, default devices will be used
if (iMicDevIdx >= 0 || iSpkDevIdx >= 0)
{
PROPVARIANT pvDeviceId;
PropVariantInit(&pvDeviceId);
pvDeviceId.vt = VT_I4;
pvDeviceId.lVal = (unsigned long)(iSpkDevIdx << 16) + (unsigned long)(0x0000ffff & iMicDevIdx);
CHECKHR(pPS->SetValue(MFPKEY_WMAAECMA_DEVICE_INDEXES, pvDeviceId));
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_DEVICE_INDEXES, &pvDeviceId));
PropVariantClear(&pvDeviceId);
}
if (bFeatrModeOn)
{
// Turn on feature modes
PROPVARIANT pvFeatrModeOn;
PropVariantInit(&pvFeatrModeOn);
pvFeatrModeOn.vt = VT_BOOL;
pvFeatrModeOn.boolVal = bFeatrModeOn ? VBTRUE : VBFALSE;
CHECKHR(pPS->SetValue(MFPKEY_WMAAECMA_FEATURE_MODE, pvFeatrModeOn));
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_FEATURE_MODE, &pvFeatrModeOn));
printf("%20s %5d \n", "Feature Mode is", pvFeatrModeOn.boolVal);
PropVariantClear(&pvFeatrModeOn);
// Turn on/off noise suppression
PROPVARIANT pvNoiseSup;
PropVariantInit(&pvNoiseSup);
pvNoiseSup.vt = VT_I4;
pvNoiseSup.lVal = (LONG)bNoiseSup;
CHECKHR(pPS->SetValue(MFPKEY_WMAAECMA_FEATR_NS, pvNoiseSup));
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_FEATR_NS, &pvNoiseSup));
printf("%20s %5d \n", "Noise suppresion is", pvNoiseSup.lVal);
PropVariantClear(&pvNoiseSup);
// Turn on/off AGC
PROPVARIANT pvAGC;
PropVariantInit(&pvAGC);
pvAGC.vt = VT_BOOL;
pvAGC.boolVal = bAGC ? VBTRUE : VBFALSE;
CHECKHR(pPS->SetValue(MFPKEY_WMAAECMA_FEATR_AGC, pvAGC));
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_FEATR_AGC, &pvAGC));
printf("%20s %5d \n", "AGC is", pvAGC.boolVal);
PropVariantClear(&pvAGC);
// Turn on/off center clip
PROPVARIANT pvCntrClip;
PropVariantInit(&pvCntrClip);
pvCntrClip.vt = VT_BOOL;
pvCntrClip.boolVal = bCntrClip ? VBTRUE : VBFALSE;
CHECKHR(pPS->SetValue(MFPKEY_WMAAECMA_FEATR_CENTER_CLIP, pvCntrClip));
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_FEATR_CENTER_CLIP, &pvCntrClip));
printf("%20s %5d \n", "Center clip is", (BOOL)pvCntrClip.boolVal);
PropVariantClear(&pvCntrClip);
}
// Set DMO output format
hr = MoInitMediaType(&mt, sizeof(WAVEFORMATEX));
CHECK_RET(hr, "MoInitMediaType failed");
mt.majortype = MEDIATYPE_Audio;
mt.subtype = MEDIASUBTYPE_PCM;
mt.lSampleSize = 0;
mt.bFixedSizeSamples = TRUE;
mt.bTemporalCompression = FALSE;
mt.formattype = FORMAT_WaveFormatEx;
memcpy(mt.pbFormat, &wfxOut, sizeof(WAVEFORMATEX));
hr = pDMO->SetOutputType(0, &mt, 0);
CHECK_RET(hr, "SetOutputType failed");
MoFreeMediaType(&mt);
// Allocate streaming resources. This step is optional. If it is not called here, it
// will be called when first time ProcessInput() is called. However, if you want to
// get the actual frame size being used, it should be called explicitly here.
hr = pDMO->AllocateStreamingResources();
CHECK_RET(hr, "AllocateStreamingResources failed");
// Get actually frame size being used in the DMO. (optional, do as you need)
int iFrameSize;
PROPVARIANT pvFrameSize;
PropVariantInit(&pvFrameSize);
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_FEATR_FRAME_SIZE, &pvFrameSize));
iFrameSize = pvFrameSize.lVal;
PropVariantClear(&pvFrameSize);
// allocate output buffer
cOutputBufLen = wfxOut.nSamplesPerSec * wfxOut.nBlockAlign;
pbOutputBuffer = new BYTE[cOutputBufLen];
CHECK_ALLOC(pbOutputBuffer, "out of memory.\n");
// number of frames to play
cTtlToGo = iDuration * 100;
// main loop to get mic output from the DMO
puts("\nAEC-MicArray is running ... Press \"s\" to stop");
while (1)
{
Sleep(10); //sleep 10ms
if (cTtlToGo-- <= 0)
break;
do {
outputBuffer.Init((byte*)pbOutputBuffer, cOutputBufLen, 0);
OutputBufferStruct.dwStatus = 0;
hr = pDMO->ProcessOutput(0, 1, &OutputBufferStruct, &dwStatus);
CHECK_RET(hr, "ProcessOutput failed");
if (hr == S_FALSE) {
cbProduced = 0;
}
else {
hr = outputBuffer.GetBufferAndLength(NULL, &cbProduced);
CHECK_RET(hr, "GetBufferAndLength failed");
}
// dump output data into a file with PCM format.
if (fwrite(pbOutputBuffer, 1, cbProduced, pfMicOutPCM) != cbProduced)
{
puts("write error");
goto exit;
}
} while (OutputBufferStruct.dwStatus & DMO_OUTPUT_DATA_BUFFERF_INCOMPLETE);
// check keyboard input to stop
if (_kbhit())
{
int ch = _getch();
if (ch == 's' || ch == 'S')
break;
}
}
exit:
SAFE_ARRAYDELETE(pbOutputBuffer);
SAFE_ARRAYDELETE(pCaptureDeviceInfo);
SAFE_ARRAYDELETE(pRenderDeviceInfo);
SAFE_RELEASE(pDMO);
SAFE_RELEASE(pPS);
CoUninitialize();
return hr;
}
void OutputUsage()
{
printf("MFWMAAEC (Aec-MicArray DMO) Demo. \n");
printf("Copyright (c) 2004-2006, Microsoft Corporation. All rights reserved. \n\n");
printf("Usage: AecSDKDemo.exe -out mic_out.pcm -mod 0 [-feat 1] [-ns 1] [-agc 0] \n");
printf(" [-cntrclip 0] [-micdev 0] [-spkdev 0] [-duration 60]\n");
return;
}
NOTE: My main concern is to record audio and use the native noise and echo canceler. If there is any suggestion regarding this, I would very much appriciate it. Thanks.
Im having some hard time figuring out where to find about this..
Im building a simple recorder to learn about this video compression universe and Im facing some weird behaviors..
Before all I need to explain the scenario...
Its very simple... everytime I call av_read_frame( input_context, input_packet ) I save the pts into the last_pts variable...
So...
Whats bothering me is the fact that about 10% of my calls to av_read_frame I get
input_packet.pts > last_pts
Resulting in a error message from the encoder when I try to do it...
Having it in mind I decided to just drop those frames when it happens....
I think it is not a good idea to just drop frames because if I get them, its needed somehow...
So... what to do when last_pts > current_pts ?
I will paste my test code that Im using capturing the video from webcam and saving to mp4 file with libx264 encoder
#include <QCoreApplication>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavutil/avutil.h>
#include <libavdevice/avdevice.h>
}
#include <QTime>
#include <QThread>
#include <QDebug>
#define SM_DEBUG
static const double max_fps = 30;
static const double min_loop_duration = 1000 / max_fps;
static const double max_duration = 5; // in seconds
static void sleep_if_needed(const int &elapsed) {
int sleep_duration = min_loop_duration - elapsed;
if (sleep_duration > 0) {
QThread::msleep(sleep_duration);
}
}
#ifdef SM_DEBUG
static void log_packet(const AVPacket *pkt,
const AVRational &time_base,
int is_input=0)
{
qDebug() << ((is_input) ? QString(">>") : QString("<<")) << "Size:" << QString::number(pkt->size) <<
"pts:" << QString::number(pkt->pts) <<
"pts_time:" << QString::number(av_q2d(time_base) * pkt->pts) <<
"dts:" << QString::number(pkt->dts) <<
"dts_time:" << QString::number(av_q2d(time_base) * pkt->dts);
}
#endif
int main()
{
int input_w, input_h, output_w = 640, output_h = 480;
av_register_all();
avdevice_register_all();
avcodec_register_all();
#ifdef SM_DEBUG
av_log_set_level(AV_LOG_DEBUG);
#else
av_log_set_level(AV_LOG_ERROR);
#endif
AVFormatContext *ic;
AVFormatContext *oc;
AVInputFormat *ifmt;
AVDictionary *opts = 0;
AVCodecContext* dec_ctx;
AVCodecContext* enc_ctx;
AVCodec *dec;
AVCodec *enc;
AVStream* ist;
AVStream* ost;
ifmt = av_find_input_format("v4l2");
av_dict_set(&opts, "tune", "zerolatency", AV_DICT_APPEND);
ic = avformat_alloc_context();
ic->flags |= AVFMT_FLAG_NONBLOCK;
avformat_open_input(&ic, "/dev/video0", ifmt, &opts);
avformat_find_stream_info(ic, NULL);
av_dump_format(ic, 0, ic->filename, 0);
AVFrame *frame;
AVFrame *tmp_frame;
ist = ic->streams[0];
dec_ctx = ist->codec;
input_w = dec_ctx->width;
input_h = dec_ctx->height;
dec_ctx->flags |= CODEC_FLAG_LOW_DELAY;
dec = avcodec_find_decoder(dec_ctx->codec_id);
av_format_set_video_codec(ic, dec);
avcodec_open2(dec_ctx, dec, NULL);
// output
avformat_alloc_output_context2(&oc, NULL, "MP4", "/home/poste9/grava.mp4");
enc = avcodec_find_encoder(AV_CODEC_ID_H264);
ost = avformat_new_stream(oc, enc);
enc_ctx = ost->codec;
enc_ctx->codec_id = AV_CODEC_ID_H264;
enc_ctx->width = output_w;
enc_ctx->height = output_h;
ost->time_base.num = ist->time_base.num;
ost->time_base.den = ist->time_base.den;
enc_ctx->time_base = ost->time_base;
enc_ctx->gop_size = 250;
enc_ctx->keyint_min = 25;
enc_ctx->qmax = 51;
enc_ctx->qmin = 30;
enc_ctx->pix_fmt = AV_PIX_FMT_YUV422P;
enc_ctx->max_b_frames = 6;
enc_ctx->flags |= CODEC_FLAG_GLOBAL_HEADER;
enc_ctx->flags |= CODEC_FLAG_LOW_DELAY;
avcodec_open2(enc_ctx, enc, NULL);
avio_open2(&oc->pb, oc->filename, AVIO_FLAG_WRITE,
&oc->interrupt_callback, NULL);
av_dump_format(oc, 0, oc->filename, 1);
avformat_write_header(oc, NULL);
struct SwsContext *sws_ctx;
sws_ctx = sws_getContext(input_w, input_h,
dec_ctx->pix_fmt,
output_w, output_h, enc_ctx->pix_fmt,
SWS_BICUBIC, NULL, NULL, NULL);
frame = av_frame_alloc();
tmp_frame = av_frame_alloc();
frame->format = enc_ctx->pix_fmt;
frame->width = output_w;
frame->height = output_h;
frame->pts = AV_NOPTS_VALUE;
av_frame_get_buffer(frame, 32);
av_frame_make_writable(frame);
int got_picture=0;
int got_packet=0;
double recording_duration = 0;
QTime timer;
AVPacket pkt_out;
av_init_packet(&pkt_out);
timer.start();
bool started_recording = false;
int64_t start_time = 0;
int64_t last_pts = INT64_MIN;
while(1) {
timer.restart();
AVPacket pkt_in;
av_read_frame(ic, &pkt_in);
if (pkt_in.size == 0) {
sleep_if_needed(timer.elapsed());
continue;
}
avcodec_decode_video2(dec_ctx, tmp_frame, &got_picture, &pkt_in);
#ifdef SM_DEBUG
log_packet(&pkt_in, ist->time_base, 1);
#endif
if (!started_recording) {
start_time = pkt_in.dts;
started_recording = true;
}
if (pkt_in.pts < last_pts) {
sleep_if_needed(timer.elapsed());
continue;
}
last_pts = pkt_in.pts;
frame->pts = (pkt_in.dts - start_time);
if (!got_picture) {
av_free_packet(&pkt_in);
sleep_if_needed(timer.elapsed());
continue;
} else {
sws_scale(sws_ctx, tmp_frame->data, tmp_frame->linesize,
0, input_h, frame->data, frame->linesize);
av_free_packet(&pkt_in);
}
av_init_packet(&pkt_out);
avcodec_encode_video2(enc_ctx, &pkt_out, frame, &got_packet);
if (got_packet) {
if (pkt_out.pts < pkt_out.dts) {
pkt_out.dts = pkt_out.pts;
}
pkt_out.stream_index = 0;
recording_duration = pkt_out.pts * av_q2d(ost->time_base);
#ifdef SM_DEBUG
log_packet(&pkt_out, ost->time_base, 0);
#endif
av_interleaved_write_frame(oc, &pkt_out);
av_free_packet(&pkt_out);
}
if (recording_duration >= max_duration) {
break;
} else {
sleep_if_needed(timer.elapsed());
}
}
av_write_trailer(oc);
av_dict_free(&opts);
av_frame_free(&frame);
av_frame_free(&tmp_frame);
sws_freeContext(sws_ctx);
avcodec_close(dec_ctx);
avcodec_close(enc_ctx);
avio_close(oc->pb);
avformat_free_context(oc);
avformat_close_input(&ic);
return 0;
}
These frames are B frames. B frames are saved to the stream in decoding order, not presentation order. If you look at the DTS it will probablly look ok. it is the job of the decoder to reorder frames into presentation order after they are decoded.
EDIT. to fix your code, use the PTS from the decoded frame, not the packet.
When I decode frames from avi file and then decode them in x264 and save to mp4 file, the fps of the output file is always 12,800. Therefore the file is played very fast. But, when I save the encoded in h264 frames in avi format and not mp4, so the fps is as I wanted - 25.
What could be the problem?
Here the code I wrote in VS2010:
#include "stdafx.h"
#include "inttypes.h"
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavutil/avutil.h"
#include <libswscale/swscale.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
#include <libavutil/imgutils.h>
}
#include <iostream>
using namespace std;
int main(int argc, char* argv[])
{
const char* inFileName = "C:\\000227_C1_GAME.avi";
const char* outFileName = "c:\\test.avi";
const char* outFileType = "avi";
av_register_all();
AVFormatContext* inContainer = NULL;
if(avformat_open_input(&inContainer, inFileName, NULL, NULL) < 0)
exit(1);
if(avformat_find_stream_info(inContainer, NULL) < 0)
exit(1);
// Find video stream
int videoStreamIndex = -1;
for (unsigned int i = 0; i < inContainer->nb_streams; ++i)
{
if (inContainer->streams[i] && inContainer->streams[i]->codec &&
inContainer->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoStreamIndex = i;
break;
}
}
if (videoStreamIndex == -1) exit(1);
AVFormatContext* outContainer = NULL;
if(avformat_alloc_output_context2(&outContainer, NULL, outFileType, outFileName) < 0)
exit(1);
// ----------------------------
// Decoder
// ----------------------------
AVStream const *const inStream = inContainer->streams[videoStreamIndex];
AVCodec *const decoder = avcodec_find_decoder(inStream->codec->codec_id);
if(!decoder)
exit(1);
if(avcodec_open2(inStream->codec, decoder, NULL) < 0)
exit(1);
// ----------------------------
// Encoder
// ----------------------------
AVCodec *encoder = avcodec_find_encoder(AV_CODEC_ID_H264);
if(!encoder)
exit(1);
AVStream *outStream = avformat_new_stream(outContainer, encoder);
if(!outStream)
exit(1);
avcodec_get_context_defaults3(outStream->codec, encoder);
// Construct encoder
if(outContainer->oformat->flags & AVFMT_GLOBALHEADER)
outStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
outStream->codec->coder_type = AVMEDIA_TYPE_VIDEO;
outStream->codec->pix_fmt = AV_PIX_FMT_YUV420P;
outStream->codec->width = inStream->codec->width;
outStream->codec->height = inStream->codec->height;
outStream->codec->codec_id = encoder->id;
outStream->codec->bit_rate = 500000;
//outStream->codec->rc_min_rate = 600000;
//outStream->codec->rc_max_rate = 800000;
outStream->codec->time_base.den = 25;
outStream->codec->time_base.num = 1;
outStream->codec->gop_size = 250; // Keyframe interval(=GOP length). Determines maximum distance distance between I-frames
outStream->codec->keyint_min = 25; // minimum GOP size
outStream->codec->max_b_frames = 3;//16; // maximum number of B-frames between non-B-frames
outStream->codec->b_frame_strategy = 1; // decides the best number of B-frames to use. Default mode in x264.
outStream->codec->scenechange_threshold = 40;
outStream->codec->refs = 6; // abillity to reference frames other than the one immediately prior to the current frame. specify how many references can be used.
outStream->codec->qmin = 0;//10;
outStream->codec->qmax = 69;//51;
outStream->codec->qcompress = 0.6;
outStream->codec->max_qdiff = 4;
outStream->codec->i_quant_factor = 1.4;//0.71;
outStream->codec->refs=1;//3;
outStream->codec->chromaoffset = -2;
outStream->codec->thread_count = 1;
outStream->codec->trellis = 1;
outStream->codec->me_range = 16;
outStream->codec->me_method = ME_HEX; //hex
outStream->codec->flags2 |= CODEC_FLAG2_FAST;
outStream->codec->coder_type = 1;
if(outStream->codec->codec_id == AV_CODEC_ID_H264)
{
av_opt_set(outStream->codec->priv_data, "preset", "slow", 0);
}
// Open encoder
if(avcodec_open2(outStream->codec, encoder, NULL) < 0)
exit(1);
// Open output container
if(avio_open(&outContainer->pb, outFileName, AVIO_FLAG_WRITE) < 0)
exit(1);
//close_o
AVFrame *decodedFrame = avcodec_alloc_frame();
if(!decodedFrame)
exit(1);
AVFrame *encodeFrame = avcodec_alloc_frame();
if(!encodeFrame)
exit(1);
encodeFrame->format = outStream->codec->pix_fmt;
encodeFrame->width = outStream->codec->width;
encodeFrame->height = outStream->codec->height;
if(av_image_alloc(encodeFrame->data, encodeFrame->linesize,
outStream->codec->width, outStream->codec->height,
outStream->codec->pix_fmt, 1) < 0)
exit(1);
av_dump_format(inContainer, 0, inFileName,0);
//Write header to ouput container
avformat_write_header(outContainer, NULL);
AVPacket decodePacket, encodedPacket;
int got_frame, len;
while(av_read_frame(inContainer, &decodePacket)>=0)
{
if (decodePacket.stream_index == videoStreamIndex)
{
len = avcodec_decode_video2(inStream->codec, decodedFrame, &got_frame, &decodePacket);
if(len < 0)
exit(1);
if(got_frame)
{
av_init_packet(&encodedPacket);
encodedPacket.data = NULL;
encodedPacket.size = 0;
if(avcodec_encode_video2(outStream->codec, &encodedPacket, decodedFrame, &got_frame) < 0)
exit(1);
if(got_frame)
{
if (outStream->codec->coded_frame->key_frame)
encodedPacket.flags |= AV_PKT_FLAG_KEY;
encodedPacket.stream_index = outStream->index;
if(av_interleaved_write_frame(outContainer, &encodedPacket) < 0)
exit(1);
av_free_packet(&encodedPacket);
}
}
}
av_free_packet(&decodePacket);
}
av_write_trailer(outContainer);
avio_close(outContainer->pb);
avcodec_free_frame(&encodeFrame);
avcodec_free_frame(&decodedFrame);
avformat_free_context(outContainer);
av_close_input_file(inContainer);
return 0;
}
The problem was with PTS and DTS of the packet. Before writing the packet to output( before av_interleaved_write_frame command) set PTS and DTS like this
if (encodedPacket.pts != AV_NOPTS_VALUE)
encodedPacket.pts = av_rescale_q(encodedPacket.pts, outStream->codec->time_base, outStream->time_base);
if (encodedPacket.dts != AV_NOPTS_VALUE)
encodedPacket.dts = av_rescale_q(encodedPacket.dts, outStream->codec->time_base, outStream->time_base);