Ffmpeg decode H264 video from RTP stream very slow - c++

I have H264 RTP stream over network and I need to decode it. I use libx264 to encode stream and ffmpeg to decode. When I connect to server using VLC, it correct play video without any problem. But when I connect to server using my application I have a long period, when widget, which draw video from this stream, draw only one image.
I check log file and found, that avcodec_decode_video2() set got_image into 1 very rarely! Decoder give me new decoded frame average every 1-2 seconds, but on the server I have 12-15 fps on encoder!
What the reasons of this delays on decoder and how I can fix its?
avcodec_register_all();
av_init_packet(&m_packet);
m_decoder = avcodec_find_decoder(CODEC_ID_H264);
if (!m_decoder)
{
QString str = QString("Can't find H264 decoder!");
emit criticalError(str);
}
m_decoderContext = avcodec_alloc_context3(m_decoder);
m_decoderContext->flags |= CODEC_FLAG_LOW_DELAY;
m_decoderContext->flags2 |= CODEC_FLAG2_CHUNKS;
AVDictionary* dictionary = nullptr;
if (avcodec_open2(m_decoderContext, m_decoder, &dictionary) < 0)
{
QString str = QString("Failed to open decoder!");
emit criticalError(str);
}
qDebug() << "H264 Decoder successfully opened";
m_picture = avcodec_alloc_frame();
...
while(m_packet.size > 0)
{
int got_picture;
int len = avcodec_decode_video2(m_decoderContext, m_picture, &got_picture, &m_packet);
if (len < 0)
{
QString err("Decoding error");
qDebug() << err;
//emit criticalError(err);
return false;
}
if (got_picture)
{
//create from frame QImage and send it into GUI thread
qDebug() << "H264Decoder: frame decoded!";
I try to change some options of m_decoderContext (i.e. thread_count) but this not changing anything.

Related

How to decode MJPEG with FFmpeg

I am trying to decode a MJPEG stream with libav. The stream comes from V4L2 driver. When working with high resolutions the following code works fine. However, when using low resolutions (For instance 320x190) it produces strange artefacts
void V4L2::compressedCapturingThread(){
const AVCodec *codec=avcodec_find_decoder(m_currVidMode.codec.toAVCodecID());
if(!codec)
return; //Something went horribly wrong
//Allocate the context
AVCodecContext* codecCtx=avcodec_alloc_context3(codec);
if(!codecCtx){
return; //Error
}
codecCtx->width=m_currVidMode.res.width;
codecCtx->height=m_currVidMode.res.height;
//Open the context
if (avcodec_open2(codecCtx, codec, nullptr) < 0) {
avcodec_free_context(&codecCtx);
return;
}
//Allocate space for the packet
AVPacket *pkt=av_packet_alloc();
if(!pkt){
avcodec_free_context(&codecCtx);
return;
}
AVFrame* decodedFrame=av_frame_alloc();
if(!decodedFrame){
avcodec_free_context(&codecCtx);
avcodec_free_context(&codecCtx);
return;
}
Graphics::Uploader uplo;
v4l2_buffer buf;
int ret;
Utils::ImageBuffer decodedImgBuf(
Utils::ImageAttributes(
m_currVidMode.res,
m_currVidMode.pixFmt
), (u_int8_t*)nullptr
);
//Main loop
while(!m_threadExit){
reqBuffer(&buf);
if(!buf.bytesused){
continue;
}
//Create the packet with the given data
u_int8_t* bufData=(u_int8_t*)av_malloc(buf.bytesused);
memcpy(bufData, m_buffers[buf.index].buffer, buf.bytesused); //copy the data
av_packet_from_data (pkt, bufData, buf.bytesused);
freeBuffer(&buf); //V4L2 buffer no longer needed
//Try to decode the packet
ret=avcodec_send_packet(codecCtx, pkt);
av_packet_unref(pkt);
if(ret<0){
continue;
}
ret = avcodec_receive_frame(codecCtx, decodedFrame);
if(ret<0){
continue;
}
memcpy(decodedImgBuf.data, decodedFrame->data, sizeof(decodedImgBuf.data)); //Copy plane pointers
if(decodedImgBuf.att.pixFmt == Utils::PixelFormats::NONE){
decodedImgBuf.att.pixFmt=Utils::PixelFormat(codecCtx->pix_fmt);
//Change deprecated formats
if(decodedImgBuf.att.pixFmt == Utils::PixelFormats::YUVJ420P)
decodedImgBuf.att.pixFmt = Utils::PixelFormats::YUV420P;
else if(decodedImgBuf.att.pixFmt == Utils::PixelFormats::YUVJ422P)
decodedImgBuf.att.pixFmt = Utils::PixelFormats::YUV422P;
else if(decodedImgBuf.att.pixFmt == Utils::PixelFormats::YUVJ440P)
decodedImgBuf.att.pixFmt = Utils::PixelFormats::YUV440P;
else if(decodedImgBuf.att.pixFmt == Utils::PixelFormats::YUVJ444P)
decodedImgBuf.att.pixFmt = Utils::PixelFormats::YUV444P;
}
std::unique_ptr<const Graphics::Frame> frame;
{
Graphics::UniqueContext ctx(Graphics::Context::getAvalibleCtx());
frame=uplo.getFrame(decodedImgBuf);
}
Stream::AsyncSource<Graphics::Frame>::push(std::move(frame));
}
//Free everything
avcodec_free_context(&codecCtx);
av_packet_free(&pkt);
av_frame_free(&decodedFrame);
}
If I try to read the AVFrame's contents to disk just after avcodec_receive_frame() I get the mentioned "strange results" (I see it by uploading it to rawpixels.net ), so the problem is not after this line. If I save the pkt 's data to disk as JPEG just before freeBuffer() the image can be seen properly. I'll attach some pictures
V4L2 configured at 1280x720
V4L2 configured at 320x190
The complete code can be found at:
https://github.com/oierlauzi/zuazo
https://github.com/oierlauzi/zuazo/blob/master/src/Zuazo/Sources/V4L2.cpp
Edit 1:
I forgot to mention, codecCtx->pix_fmt has the value of AL_PIX_FMT_YUVJ422P (given by libav)

Live555 truncates encoded data of FFMpeg

I am trying to stream H264 based data using Live555 over RTSP.
I am capturing data using V4L2, and then encodes it using FFMPEG and then passing data to Live555's DeviceSource file, in that I using H264VideoStreamFramer class,
Below is my codec settings to configure AVCodecContext of encoder,
codec = avcodec_find_encoder_by_name(CODEC_NAME);
if (!codec) {
cerr << "Codec " << codec_name << " not found\n";
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
cerr << "Could not allocate video codec context\n";
exit(1);
}
pkt = av_packet_alloc();
if (!pkt)
exit(1);
/* put sample parameters */
c->bit_rate = 400000;
/* resolution must be a multiple of two */
c->width = PIC_HEIGHT;
c->height = PIC_WIDTH;
/* frames per second */
c->time_base = (AVRational){1, FPS};
c->framerate = (AVRational){FPS, 1};
c->gop_size = 10;
c->max_b_frames = 1;
c->pix_fmt = AV_PIX_FMT_YUV420P;
c->rtp_payload_size = 30000;
if (codec->id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "fast", 0);
av_opt_set_int(c->priv_data, "slice-max-size", 30000, 0);
/* open it */
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
cerr << "Could not open codec\n";
exit(1);
}
And I am getting encoded data using avcodec_receive_packet() function. which will return AVPacket.
And I am passing AVPacket's data into DeviceSource file below is code snippet of my Live555 code:
void DeviceSource::deliverFrame() {
if (!isCurrentlyAwaitingData()) return; // we're not ready for the data yet
u_int8_t* newFrameDataStart = (u_int8_t*) pkt->data;
unsigned newFrameSize = pkt->size; //%%% TO BE WRITTEN %%%
// Deliver the data here:
if (newFrameSize > fMaxSize) { // Condition becomes true many times
fFrameSize = fMaxSize;
fNumTruncatedBytes = newFrameSize - fMaxSize;
} else {
fFrameSize = newFrameSize;
}
gettimeofday(&fPresentationTime, NULL); // If you have a more accurate time - e.g., from an encoder - then use that instead.
// If the device is *not* a 'live source' (e.g., it comes instead from a file or buffer), then set "fDurationInMicroseconds" here.
memmove(fTo, newFrameDataStart, fFrameSize);
}
But here, sometimes my packet's size is getting more than fMaxSize value and as per LIVE555 logic it will truncate frame data, so that sometimes I am getting bad frames on my VLC,
From Live555 forum, I get to know that encoder should not send packet whose size is more than fMaxSize value, so my question is:
How to restrict encoder to limit size of packet?
Thanks in Advance,
Harshil
You can increase the maximum allowed sample size by changing "maxSize" in the OutPacketBuffer class in MediaSink.cpp. This worked for me. There are cases we may require high-quality video to be streamed, I don't think we will always be able to restrict the encoder to not to produce samples of size more than a particular value which would result in video quality issues. In fact, the samples are fragmented by the UDP sink live555 to match the default MTU (1500), so increasing the max sample size limit has no side effects.

C++ ffmpeg video missing frames and won't play in Quicktime

I wrote some C++ code that uses ffmpeg to encode a video. I'm having two strange issues:
The final video is always missing 1 frame. That is, if I have it encode 10 frames the final video only has 9 (at least that's what ffprobe -show_frames -pretty $VIDEO | grep -F '[FRAME]' | wc -l tells me.
The final video plays fine in some players (mpv and vlc) but not in Quicktime. Quicktime just shows a completely black screen.
My code is roughly this (modified a bit to remove types that are unique to our code base):
First, I open the video file, write the headers and initialize things:
template <class PtrT>
using UniquePtrWithDeleteFunction = std::unique_ptr<PtrT, std::function<void (PtrT*)>>;
std::unique_ptr<FfmpegEncodingFrameSink> FfmpegEncodingFrameSink::Create(
const std::string& dest_url) {
AVFormatContext* tmp_format_ctxt;
auto alloc_format_res = avformat_alloc_output_context2(&tmp_format_ctxt, nullptr, "mp4", dest_url.c_str());
if (alloc_format_res < 0) {
throw FfmpegException("Error opening output file.");
}
auto format_ctxt = UniquePtrWithDeleteFunction<AVFormatContext>(
tmp_format_ctxt, CloseAvFormatContext);
AVStream* out_stream_video = avformat_new_stream(format_ctxt.get(), nullptr);
if (out_stream_video == nullptr) {
throw FfmpegException("Could not create outputstream");
}
auto codec_context = GetCodecContext(options);
out_stream_video->time_base = codec_context->time_base;
auto ret = avcodec_parameters_from_context(out_stream_video->codecpar, codec_context.get());
if (ret < 0) {
throw FfmpegException("Failed to copy encoder parameters to outputstream");
}
if (!(format_ctxt->oformat->flags & AVFMT_NOFILE)) {
ret = avio_open(&format_ctxt->pb, dest_url.c_str(), AVIO_FLAG_WRITE);
if (ret < 0) {
throw VideoDecodeException("Could not open output file: " + dest_url);
}
}
ret = avformat_init_output(format_ctxt.get(), nullptr);
if (ret < 0) {
throw FfmpegException("Unable to initialize the codec.");
}
ret = avformat_write_header(format_ctxt.get(), nullptr);
if (ret < 0) {
throw FfmpegException("Error occurred writing format header");
}
return std::unique_ptr<FfmpegEncodingFrameSink>(
new FfmpegEncodingFrameSink(std::move(format_ctxt), std::move(codec_context)));
}
Then, every time I get a new frame to encode I pass it to this function (the frames are being decoded via ffmpeg from another mp4 file which Quicktime plays just fine):
// If frame == nullptr then we're done and we're just flushing the encoder
// otherwise encode an actual frame
void FfmpegEncodingFrameSink::EncodeAndWriteFrame(
const AVFrame* frame) {
auto ret = avcodec_send_frame(codec_ctxt_.get(), frame);
if (ret < 0) {
throw FfmpegException("Error encoding the frame.");
}
AVPacket enc_packet;
enc_packet.data = nullptr;
enc_packet.size = 0;
av_init_packet(&enc_packet);
do {
ret = avcodec_receive_packet(codec_ctxt_.get(), &enc_packet);
if (ret == AVERROR(EAGAIN)) {
CHECK(frame != nullptr);
break;
} else if (ret == AVERROR_EOF) {
CHECK(frame == nullptr);
break;
} else if (ret < 0) {
throw FfmpegException("Error putting the encoded frame into the packet.");
}
assert(ret == 0);
enc_packet.stream_index = 0;
LOG(INFO) << "Writing packet to stream.";
av_interleaved_write_frame(format_ctxt_.get(), &enc_packet);
av_packet_unref(&enc_packet);
} while (ret == 0);
}
Finally, in my destructor I close everything up like so:
FfmpegEncodingFrameSink::~FfmpegEncodingFrameSink() {
// Pass a nullptr to EncodeAndWriteFrame so it flushes the encoder
EncodeAndWriteFrame(nullptr);
// write mp4 trailer
av_write_trailer(format_ctxt_.get());
}
If I run this passing n frames to EncodeAndWriteFrame line LOG(INFO) << "Writing packet to stream."; gets run n times indicating the n packets were written to the stream. But ffprobe always shows only n - 1 frames int he video. And the final video doesn't play on quicktime.
What am I doing wrong??
Sorry for the delay but as i just had the same problem and noticed that this question deserves an answer, here how i solved this.
Up in front, the Problem only occured for me when using mov, mp4, 3gp as format. It worked frame accurate when using e.g. avi format. When i wrote uncompressed video frames to the container, i saw that the avi and mov had the same count of frames stored but the mov obviously had some problem in it's header.
Counting the number of frames in the mov using header metadata showed one frame is missing:
ffprobe -v error -count_frames -select_streams v:0 -show_entries stream=nb_read_frames -of default=nokey=1:noprint_wrappers=1 c:\temp\myinput.mov
While ignoring the index showed the correct number of frames:
-ignore_editlist 1
The solution for me was, set the timebase to the AVStream->CodeContext of the video stream.
The code above attempts to do this in this line:
out_stream_video->time_base = codec_context->time_base;
But the problem is that the posted code above does not expose the function GetCodecContext so we do not know if the time_base is correctly set for "codec_context". So it is my believe that the author's problem was that his function GetCodecContext did not set the time_base correctly.

Using libavformat to mux H.264 frames into RTP

I have an encoder that produces a series of H.264 I-frames and P-frames. I'm trying to use libavformat to mux and transmit these frames over RTP, but I'm stuck.
My program sends RTP data, but the RTP timestamp increments by 1 each successive frame, instead of 90000/fps. It also doesn't look like it's doing the proper framing for H.264 NAL, since I can't decode the stream as H.264 in Wireshark.
I suspect that I'm not setting up the codec information properly, but it appears in many places in the output format context, so it's unclear what exactly needs to be setup. The examples seem to all copy codec context info from encoders, which isn't my use case.
This is what I'm trying:
int main() {
AVFormatContext context = avformat_alloc_context();
if (!context) {
printf("avformat_alloc_context failed\n");
return;
}
AVOutputFormat *format = av_guess_format("rtp", NULL, NULL);
if (!format) {
printf("av_guess_format failed\n");
return;
}
context->oformat = format;
snprintf(context->filename, sizeof(context->filename), "rtp://%s:%d", "192.168.2.16", 10000);
if (avio_open(&(context->pb), context->filename, AVIO_FLAG_READ_WRITE) < 0) {
printf("avio_open failed\n");
return;
}
stream = avformat_new_stream(context, NULL);
if (!stream) {
printf("avformat_new_stream failed\n");
return;
}
stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
stream->codecpar->codec_id = AV_CODEC_ID_H264;
stream->codecpar->width = 1920;
stream->codecpar->height = 1080;
avformat_write_header(context, NULL);
...
write packets
...
}
Example write packet:
int write_packet(uint8_t *data, int size) {
AVPacket p;
av_init_packet(&p);
p.data = buffer;
p.size = size;
p.stream_index = stream->index;
av_interleaved_write_frame(context, &p);
}
I've even went so far to build in libx264, find the encoder, and copy the codec context info from there into the stream codecpar, with the same result. My goal is to build without libx264, and any other libs that aren't required, but it isn't clear whether libx264 is required for defaults such as time base.
How can the libavformat RTP muxer be initialized to properly send H.264 frames over RTCP+RTP?

FFmpeg + OpenAL - playback streaming sound from video won't work

I am decoding an OGG video (theora & vorbis as codecs) and want to show it on the screen (using Ogre 3D) while playing its sound. I can decode the image stream just fine and the video plays perfectly with the correct frame rate, etc.
However, I cannot get the sound to play at all with OpenAL.
Edit: I managed to make the playing sound resemble the actual audio in the video at least somewhat. Updated sample code.
Edit 2: I was able to get "almost" correct sound now. I had to set OpenAL to use AL_FORMAT_STEREO_FLOAT32 (after initializing the extension) instead of just STEREO16. Now the sound is "only" extremely high pitched and stuttering, but at the correct speed.
Here is how I decode audio packets (in a background thread, the equivalent works just fine for the image stream of the video file):
//------------------------------------------------------------------------------
int decodeAudioPacket( AVPacket& p_packet, AVCodecContext* p_audioCodecContext, AVFrame* p_frame,
FFmpegVideoPlayer* p_player, VideoInfo& p_videoInfo)
{
// Decode audio frame
int got_frame = 0;
int decoded = avcodec_decode_audio4(p_audioCodecContext, p_frame, &got_frame, &p_packet);
if (decoded < 0)
{
p_videoInfo.error = "Error decoding audio frame.";
return decoded;
}
// Frame is complete, store it in audio frame queue
if (got_frame)
{
int bufferSize = av_samples_get_buffer_size(NULL, p_audioCodecContext->channels, p_frame->nb_samples,
p_audioCodecContext->sample_fmt, 0);
int64_t duration = p_frame->pkt_duration;
int64_t dts = p_frame->pkt_dts;
if (staticOgreLog)
{
staticOgreLog->logMessage("Audio frame bufferSize / duration / dts: "
+ boost::lexical_cast<std::string>(bufferSize) + " / "
+ boost::lexical_cast<std::string>(duration) + " / "
+ boost::lexical_cast<std::string>(dts), Ogre::LML_NORMAL);
}
// Create the audio frame
AudioFrame* frame = new AudioFrame();
frame->dataSize = bufferSize;
frame->data = new uint8_t[bufferSize];
if (p_frame->channels == 2)
{
memcpy(frame->data, p_frame->data[0], bufferSize >> 1);
memcpy(frame->data + (bufferSize >> 1), p_frame->data[1], bufferSize >> 1);
}
else
{
memcpy(frame->data, p_frame->data, bufferSize);
}
double timeBase = ((double)p_audioCodecContext->time_base.num) / (double)p_audioCodecContext->time_base.den;
frame->lifeTime = duration * timeBase;
p_player->addAudioFrame(frame);
}
return decoded;
}
So, as you can see, I decode the frame, memcpy it to my own struct, AudioFrame. Now, when the sound is played, I use these audio frame like this:
int numBuffers = 4;
ALuint buffers[4];
alGenBuffers(numBuffers, buffers);
ALenum success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on alGenBuffers : " + Ogre::StringConverter::toString(success) + alGetString(success));
return;
}
// Fill a number of data buffers with audio from the stream
std::vector<AudioFrame*> audioBuffers;
std::vector<unsigned int> audioBufferSizes;
unsigned int numReturned = FFMPEG_PLAYER->getDecodedAudioFrames(numBuffers, audioBuffers, audioBufferSizes);
// Assign the data buffers to the OpenAL buffers
for (unsigned int i = 0; i < numReturned; ++i)
{
alBufferData(buffers[i], _streamingFormat, audioBuffers[i]->data, audioBufferSizes[i], _streamingFrequency);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on alBufferData : " + Ogre::StringConverter::toString(success) + alGetString(success)
+ " size: " + Ogre::StringConverter::toString(audioBufferSizes[i]));
return;
}
}
// Queue the buffers into OpenAL
alSourceQueueBuffers(_source, numReturned, buffers);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error queuing streaming buffers: " + Ogre::StringConverter::toString(success) + alGetString(success));
return;
}
}
alSourcePlay(_source);
The format and frequency I give to OpenAL are AL_FORMAT_STEREO_FLOAT32 (it is a stereo sound stream, and I did initialize the FLOAT32 extension) and 48000 (which is the sample rate of the AVCodecContext of the audio stream).
And during playback, I do the following to refill OpenAL's buffers:
ALint numBuffersProcessed;
// Check if OpenAL is done with any of the queued buffers
alGetSourcei(_source, AL_BUFFERS_PROCESSED, &numBuffersProcessed);
if(numBuffersProcessed <= 0)
return;
// Fill a number of data buffers with audio from the stream
std::vector<AudiFrame*> audioBuffers;
std::vector<unsigned int> audioBufferSizes;
unsigned int numFilled = FFMPEG_PLAYER->getDecodedAudioFrames(numBuffersProcessed, audioBuffers, audioBufferSizes);
// Assign the data buffers to the OpenAL buffers
ALuint buffer;
for (unsigned int i = 0; i < numFilled; ++i)
{
// Pop the oldest queued buffer from the source,
// fill it with the new data, then re-queue it
alSourceUnqueueBuffers(_source, 1, &buffer);
ALenum success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error Unqueuing streaming buffers: " + Ogre::StringConverter::toString(success));
return;
}
alBufferData(buffer, _streamingFormat, audioBuffers[i]->data, audioBufferSizes[i], _streamingFrequency);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on re- alBufferData: " + Ogre::StringConverter::toString(success));
return;
}
alSourceQueueBuffers(_source, 1, &buffer);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error re-queuing streaming buffers: " + Ogre::StringConverter::toString(success) + " "
+ alGetString(success));
return;
}
}
// Make sure the source is still playing,
// and restart it if needed.
ALint playStatus;
alGetSourcei(_source, AL_SOURCE_STATE, &playStatus);
if(playStatus != AL_PLAYING)
alSourcePlay(_source);
As you can see, I do quite heavy error checking. But I do not get any errors, neither from OpenAL nor from FFmpeg.
Edit: What I hear somewhat resembles the actual audio from the video, but VERY high pitched and stuttering VERY much. Also, it seems to be playing on top of TV noise. Very strange. Plus, it is playing much slower than the correct audio would.
Edit: 2 After using AL_FORMAT_STEREO_FLOAT32, the sound plays at the correct speed, but is still very high pitched and stuttering (though less than before).
The video itself is not broken, it can be played fine on any player. OpenAL can also play *.way files just fine in the same application, so it is also working.
Any ideas what could be wrong here or how to do this correctly?
My only guess is that somehow, FFmpeg's decode function does not produce data OpenGL can read. But this is as far as the FFmpeg decode example goes, so I don't know what's missing. As I understand it, the decode_audio4 function decodes the frame to raw data. And OpenAL should be able to work with RAW data (or rather, doesn't work with anything else).
So, I finally figured out how to do it. Gee, what a mess. It was a hint from a user on the libav-users mailing list that put me on the correct path.
Here are my mistakes:
Using the wrong format in the alBufferData function. I used AL_FORMAT_STEREO16 (as that is what every single streaming example with OpenAL uses). I should have used AL_FORMAT_STEREO_FLOAT32, as the video I stream is Ogg and vorbis is stored in floating points. And using swr_convert to convert from AV_SAMPLE_FMT_FLTP to AV_SAMPLE_FMT_S16 just crashes. No idea why.
Not using swr_convert to convert the decoded audio frame to the target format. After I was trying to use swr_convert to convert from FLTP to S16, and it would simply crash without a reason given, I assumed it was broken. But after figuring out my first mistake, I tried again, converting from FLTP to FLT (non-planar) and then it worked! So OpenAL uses interleaved format, not planar. Good to know.
So here is the decodeAudioPacket function that is working for me with Ogg video, vorbis audio stream:
int decodeAudioPacket( AVPacket& p_packet, AVCodecContext* p_audioCodecContext, AVFrame* p_frame,
SwrContext* p_swrContext, uint8_t** p_destBuffer, int p_destLinesize,
FFmpegVideoPlayer* p_player, VideoInfo& p_videoInfo)
{
// Decode audio frame
int got_frame = 0;
int decoded = avcodec_decode_audio4(p_audioCodecContext, p_frame, &got_frame, &p_packet);
if (decoded < 0)
{
p_videoInfo.error = "Error decoding audio frame.";
return decoded;
}
if(decoded <= p_packet.size)
{
/* Move the unread data to the front and clear the end bits */
int remaining = p_packet.size - decoded;
memmove(p_packet.data, &p_packet.data[decoded], remaining);
av_shrink_packet(&p_packet, remaining);
}
// Frame is complete, store it in audio frame queue
if (got_frame)
{
int outputSamples = swr_convert(p_swrContext,
p_destBuffer, p_destLinesize,
(const uint8_t**)p_frame->extended_data, p_frame->nb_samples);
int bufferSize = av_get_bytes_per_sample(AV_SAMPLE_FMT_FLT) * p_videoInfo.audioNumChannels
* outputSamples;
int64_t duration = p_frame->pkt_duration;
int64_t dts = p_frame->pkt_dts;
if (staticOgreLog)
{
staticOgreLog->logMessage("Audio frame bufferSize / duration / dts: "
+ boost::lexical_cast<std::string>(bufferSize) + " / "
+ boost::lexical_cast<std::string>(duration) + " / "
+ boost::lexical_cast<std::string>(dts), Ogre::LML_NORMAL);
}
// Create the audio frame
AudioFrame* frame = new AudioFrame();
frame->dataSize = bufferSize;
frame->data = new uint8_t[bufferSize];
memcpy(frame->data, p_destBuffer[0], bufferSize);
double timeBase = ((double)p_audioCodecContext->time_base.num) / (double)p_audioCodecContext->time_base.den;
frame->lifeTime = duration * timeBase;
p_player->addAudioFrame(frame);
}
return decoded;
}
And here is how I initialize the context and the destination buffer:
// Initialize SWR context
SwrContext* swrContext = swr_alloc_set_opts(NULL,
audioCodecContext->channel_layout, AV_SAMPLE_FMT_FLT, audioCodecContext->sample_rate,
audioCodecContext->channel_layout, audioCodecContext->sample_fmt, audioCodecContext->sample_rate,
0, NULL);
int result = swr_init(swrContext);
// Create destination sample buffer
uint8_t** destBuffer = NULL;
int destBufferLinesize;
av_samples_alloc_array_and_samples( &destBuffer,
&destBufferLinesize,
videoInfo.audioNumChannels,
2048,
AV_SAMPLE_FMT_FLT,
0);