C++ ffmpeg extract audio to mp3 (demuxing) - c++

I am trying to write a C++ program that allows me to extract the audio from a video file to an mp3 file. I searched the internet and stackoverflow, but couldn't get it to work.
The library I chose is ffmpeg and I have to do it in C/C++. This is what I have so far.
// Step 1 - Register all formats and codecs
avcodec_register_all();
av_register_all();
AVFormatContext* fmtCtx = avformat_alloc_context();
// Step 2 - Open input file, and allocate format context
if(avformat_open_input(&fmtCtx, filePath.toLocal8Bit().data(), NULL, NULL) < 0)
qDebug() << "Error while opening " << filePath;
// Step 3 - Retrieve stream information
if(avformat_find_stream_info(fmtCtx, NULL) < 0)
qDebug() << "Error while finding stream info";
// Step 4 - Find the audiostream
audioStreamIdx = -1;
for(uint i=0; i<fmtCtx->nb_streams; i++) {
if(fmtCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
audioStreamIdx = i;
break;
}
}
if(audioStreamIdx != -1) {
// Step 5
AVCodec *aCodec = avcodec_find_decoder(AV_CODEC_ID_MP3);
AVCodecContext *audioDecCtx = avcodec_alloc_context3(aCodec);
avcodec_open2(audioDecCtx, aCodec, NULL);
// Step 6
AVPacket pkt;
AVFrame *frame = av_frame_alloc();
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
int got_packet = 0;
while(av_read_frame(fmtCtx, &pkt) == 0) {
int got_frame = 0;
int ret = avcodec_decode_audio4(audioDecCtx, frame, &got_frame, &pkt);
if(got_frame) {
qDebug() << "got frame";
}
}
av_free_packet(&pkt);
}
avformat_close_input(&fmtCtx);
But the error I get when executing avcodec_decode_audio4() is "[mp3 # 825fc30] Header missing".
Thanks in advance!
[EDIT]
I found out that the audio of the video was not MP3 but AAC. So I changed Step 5 to the following lines of code
// Step 5
AVCodecContext *audioDecCtx = fmtCtx->streams[audioStreamIdx]->codec;
AVCodec *aCodec = avcodec_find_decoder(audioDecCtx->codec_id);
avcodec_open2(audioDecCtx, aCodec, NULL);
Now it outputs "got frame" and the avcodec_decode_audio4() returns the number of bytes it decoded.
Now I have to write the audio to a file, preferably to an MP3 file. I found out that I have to do it with the function avcodec_encode_audio2(). But some extra help on how to use it is more then welcome!

Related

Decoding MediaRecorder produced webm stream

I am trying to decode a video stream from the browser using the ffmpeg API. The stream is produced by the webcam and recorded with MediaRecorder as webm format. What I ultimately need is a vector of opencv cv::Mat objects for further processing.
I have written a C++ webserver using the uWebsocket library. The video stream is sent via websocket from the browser to the server once per second. On the server, I append the received data to my custom buffer and decode it with the ffmpeg API.
If I just save the data on the disk and later I play it with a media player, it works fine. So, whatever the browser sends is a valid video.
I do not think that I correctly understand how should the custom IO behave with network streaming as nothing seems to be working.
The custom buffer:
struct Buffer
{
std::vector<uint8_t> data;
int currentPos = 0;
};
The readAVBuffer method for custom IO
int MediaDecoder::readAVBuffer(void* opaque, uint8_t* buf, int buf_size)
{
MediaDecoder::Buffer* mbuf = (MediaDecoder::Buffer*)opaque;
int count = 0;
for(int i=0;i<buf_size;i++)
{
int index = i + mbuf->currentPos;
if(index >= (int)mbuf->data.size())
{
break;
}
count++;
buf[i] = mbuf->data.at(index);
}
if(count > 0) mbuf->currentPos+=count;
std::cout << "read : "<<count<<" "<<mbuf->currentPos<<", buff size:"<<mbuf->data.size() << std::endl;
if(count <= 0) return AVERROR(EAGAIN); //is this error that should be returned? It cannot be EOF since we're not done yet, most likely
return count;
}
The big decode method, that's supposed to return whatever frames it could read
std::vector<cv::Mat> MediaDecoder::decode(const char* data, size_t length)
{
std::vector<cv::Mat> frames;
//add data to the buffer
for(size_t i=0;i<length;i++) {
buf.data.push_back(data[i]);
}
//do not invoke the decoders until we have 1MB of data
if(((buf.data.size() - buf.currentPos) < 1*1024*1024) && !initializedCodecs) return frames;
std::cout << "decoding data length "<<length<<std::endl;
if(!initializedCodecs) //initialize ffmpeg objects. Custom I/O, format, decoder, etc.
{
//these are just members of the class
avioCtxPtr = std::unique_ptr<AVIOContext,avio_context_deleter>(
avio_alloc_context((uint8_t*)av_malloc(4096),4096,0,&buf,&readAVBuffer,nullptr,nullptr),
avio_context_deleter());
if(!avioCtxPtr)
{
std::cerr << "Could not create IO buffer" << std::endl;
return frames;
}
fmt_ctx = std::unique_ptr<AVFormatContext,avformat_context_deleter>(avformat_alloc_context(),
avformat_context_deleter());
fmt_ctx->pb = avioCtxPtr.get();
fmt_ctx->flags |= AVFMT_FLAG_CUSTOM_IO ;
//fmt_ctx->max_analyze_duration = 2 * AV_TIME_BASE; // read 2 seconds of data
{
AVFormatContext *fmtCtxRaw = fmt_ctx.get();
if (avformat_open_input(&fmtCtxRaw, "", nullptr, nullptr) < 0) {
std::cerr << "Could not open movie" << std::endl;
return frames;
}
}
if (avformat_find_stream_info(fmt_ctx.get(), nullptr) < 0) {
std::cerr << "Could not find stream information" << std::endl;
return frames;
}
if((video_stream_idx = av_find_best_stream(fmt_ctx.get(), AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0)) < 0)
{
std::cerr << "Could not find video stream" << std::endl;
return frames;
}
AVStream *video_stream = fmt_ctx->streams[video_stream_idx];
AVCodec *dec = avcodec_find_decoder(video_stream->codecpar->codec_id);
video_dec_ctx = std::unique_ptr<AVCodecContext,avcodec_context_deleter> (avcodec_alloc_context3(dec),
avcodec_context_deleter());
if (!video_dec_ctx)
{
std::cerr << "Failed to allocate the video codec context" << std::endl;
return frames;
}
avcodec_parameters_to_context(video_dec_ctx.get(),video_stream->codecpar);
video_dec_ctx->thread_count = 1;
/* video_dec_ctx->max_b_frames = 0;
video_dec_ctx->frame_skip_threshold = 10;*/
AVDictionary *opts = nullptr;
av_dict_set(&opts, "refcounted_frames", "1", 0);
av_dict_set(&opts, "deadline", "1", 0);
av_dict_set(&opts, "auto-alt-ref", "0", 0);
av_dict_set(&opts, "lag-in-frames", "1", 0);
av_dict_set(&opts, "rc_lookahead", "1", 0);
av_dict_set(&opts, "drop_frame", "1", 0);
av_dict_set(&opts, "error-resilient", "1", 0);
int width = video_dec_ctx->width;
videoHeight = video_dec_ctx->height;
if(avcodec_open2(video_dec_ctx.get(), dec, &opts) < 0)
{
std::cerr << "Failed to open the video codec context" << std::endl;
return frames;
}
AVPixelFormat pFormat = AV_PIX_FMT_BGR24;
img_convert_ctx = std::unique_ptr<SwsContext,swscontext_deleter>(sws_getContext(width, videoHeight,
video_dec_ctx->pix_fmt, width, videoHeight, pFormat,
SWS_BICUBIC, nullptr, nullptr,nullptr),swscontext_deleter());
frame = std::unique_ptr<AVFrame,avframe_deleter>(av_frame_alloc(),avframe_deleter());
frameRGB = std::unique_ptr<AVFrame,avframe_deleter>(av_frame_alloc(),avframe_deleter());
int numBytes = av_image_get_buffer_size(pFormat, width, videoHeight,32 /*https://stackoverflow.com/questions/35678041/what-is-linesize-alignment-meaning*/);
std::unique_ptr<uint8_t,avbuffer_deleter> imageBuffer((uint8_t *) av_malloc(numBytes*sizeof(uint8_t)),avbuffer_deleter());
av_image_fill_arrays(frameRGB->data,frameRGB->linesize,imageBuffer.get(),pFormat,width,videoHeight,32);
frameRGB->width = width;
frameRGB->height = videoHeight;
initializedCodecs = true;
}
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = nullptr;
pkt.size = 0;
int read_frame_return = 0;
while ( (read_frame_return=av_read_frame(fmt_ctx.get(), &pkt)) >= 0)
{
readFrame(&frames,&pkt,video_dec_ctx.get(),frame.get(),img_convert_ctx.get(),
videoHeight,frameRGB.get());
//if(cancelled) break;
}
avioCtxPtr->eof_reached = 0;
avioCtxPtr->error = 0;
//flush
// readFrame(frames.get(),nullptr,video_dec_ctx.get(),frame.get(),
// img_convert_ctx.get(),videoHeight,frameRGB.get());
avioCtxPtr->eof_reached = 0;
avioCtxPtr->error = 0;
if(frames->size() <= 0)
{
std::cout << "buffer pos: "<<buf.currentPos<<", buff size:"<<buf.data.size()
<<",read_frame_return:"<<read_frame_return<< std::endl;
}
return frames;
}
What I would expect to happen would be for a continuous extraction of cv::Mat frames as I feed it more and more data. What actually happens is that after the the buffer is fully read I see:
[matroska,webm # 0x507b450] Read error at pos. 1278266 (0x13813a)
[matroska,webm # 0x507b450] Seek to desired resync point failed. Seeking to earliest point available instead.
And then no more bytes are read from the buffer even if later I increase the size of it.
There is something terribly wrong I'm doing here and I don't understand what.
What I ended up doing was to do the reading of the incoming data and actual decoding in a different thread. The read method, however, will just block if there are no more bytes available, waiting until anything is coming.
When new bytes are arriving, they're added to the buffer and the conditional_variable signals the waiting thread to wake up and start reading data again from the buffer.
It works well enough.

What to pass to avcodec_decode_video2 for H.264 Transport Stream?

I want to decode H.264 video from a collection of MPEG-2 Transport Stream packets but I am not clear what to pass to avcodec_decode_video2
The documentation says to pass "the input AVPacket containing the input buffer."
But what should be in the input buffer?
A PES packet will be spread across the payload portion of several TS packets, with NALU(s) inside the PES. So pass a TS fragment? The entire PES? PES payload only?
This Sample Code mentions:
BUT some other codecs (msmpeg4, mpeg4) are inherently frame based, so
you must call them with all the data for one frame exactly. You must
also initialize 'width' and 'height' before initializing them.
But I can find no info on what "all the data" means...
Passing a fragment of a TS packet payload is not working:
AVPacket avDecPkt;
av_init_packet(&avDecPkt);
avDecPkt.data = inbuf_ptr;
avDecPkt.size = esBufSize;
len = avcodec_decode_video2(mpDecoderContext, mpFrameDec, &got_picture, &avDecPkt);
if (len < 0)
{
printf(" TS PKT #%.0f. Error decoding frame #%04d [rc=%d '%s']\n",
tsPacket.pktNum, mDecodedFrameNum, len, av_make_error_string(errMsg, 128, len));
return;
}
output
[h264 # 0x81cd2a0] no frame!
TS PKT #2973. Error decoding frame #0001 [rc=-1094995529 'Invalid data found when processing input']
EDIT
Using the excellent hits from WLGfx, I made this simple program to try decoding TS packets. As input, I prepared a file containing only TS packets from the Video PID.
It feels close but I don't know how to set up the FormatContext. The code below segfaults at av_read_frame() (and internally at ret = s->iformat->read_packet(s, pkt)). s->iformat is zero.
Suggestions?
EDIT II - Sorry, for got post source code **
**EDIT III - Sample code updated to simulate reading TS PKT Queue
/*
* Test program for video decoder
*/
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
extern "C" {
#ifdef __cplusplus
#define __STDC_CONSTANT_MACROS
#ifdef _STDINT_H
#undef _STDINT_H
#endif
#include <stdint.h>
#endif
}
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavutil/imgutils.h"
#include "libavutil/opt.h"
}
class VideoDecoder
{
public:
VideoDecoder();
bool rcvTsPacket(AVPacket &inTsPacket);
private:
AVCodec *mpDecoder;
AVCodecContext *mpDecoderContext;
AVFrame *mpDecodedFrame;
AVFormatContext *mpFmtContext;
};
VideoDecoder::VideoDecoder()
{
av_register_all();
// FORMAT CONTEXT SETUP
mpFmtContext = avformat_alloc_context();
mpFmtContext->flags = AVFMT_NOFILE;
// ????? WHAT ELSE ???? //
// DECODER SETUP
mpDecoder = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!mpDecoder)
{
printf("Could not load decoder\n");
exit(11);
}
mpDecoderContext = avcodec_alloc_context3(NULL);
if (avcodec_open2(mpDecoderContext, mpDecoder, NULL) < 0)
{
printf("Cannot open decoder context\n");
exit(1);
}
mpDecodedFrame = av_frame_alloc();
}
bool
VideoDecoder::rcvTsPacket(AVPacket &inTsPkt)
{
bool ret = true;
if ((av_read_frame(mpFmtContext, &inTsPkt)) < 0)
{
printf("Error in av_read_frame()\n");
ret = false;
}
else
{
// success. Decode the TS packet
int got;
int len = avcodec_decode_video2(mpDecoderContext, mpDecodedFrame, &got, &inTsPkt);
if (len < 0)
ret = false;
if (got)
printf("GOT A DECODED FRAME\n");
}
return ret;
}
int
main(int argc, char **argv)
{
if (argc != 2)
{
printf("Usage: %s tsInFile\n", argv[0]);
exit(1);
}
FILE *tsInFile = fopen(argv[1], "r");
if (!tsInFile)
{
perror("Could not open TS input file");
exit(2);
}
unsigned int tsPktNum = 0;
uint8_t tsBuffer[256];
AVPacket tsPkt;
av_init_packet(&tsPkt);
VideoDecoder vDecoder;
while (!feof(tsInFile))
{
tsPktNum++;
tsPkt.size = 188;
tsPkt.data = tsBuffer;
fread(tsPkt.data, 188, 1, tsInFile);
vDecoder.rcvTsPacket(tsPkt);
}
}
I've got some code snippets that might help you out as I've been working with MPEG-TS also.
Starting with my packet thread which checks each packet against the stream ID's which I've already found and got the codec contexts:
void *FFMPEG::thread_packet_function(void *arg) {
FFMPEG *ffmpeg = (FFMPEG*)arg;
for (int c = 0; c < MAX_PACKETS; c++)
ffmpeg->free_packets[c] = &ffmpeg->packet_list[c];
ffmpeg->packet_pos = MAX_PACKETS;
Audio.start_decoding();
Video.start_decoding();
Subtitle.start_decoding();
while (!ffmpeg->thread_quit) {
if (ffmpeg->packet_pos != 0 &&
Audio.okay_add_packet() &&
Video.okay_add_packet() &&
Subtitle.okay_add_packet()) {
pthread_mutex_lock(&ffmpeg->packet_mutex); // get free packet
AVPacket *pkt = ffmpeg->free_packets[--ffmpeg->packet_pos]; // pre decrement
pthread_mutex_unlock(&ffmpeg->packet_mutex);
if ((av_read_frame(ffmpeg->fContext, pkt)) >= 0) { // success
int id = pkt->stream_index;
if (id == ffmpeg->aud_stream.stream_id) Audio.add_packet(pkt);
else if (id == ffmpeg->vid_stream.stream_id) Video.add_packet(pkt);
else if (id == ffmpeg->sub_stream.stream_id) Subtitle.add_packet(pkt);
else { // unknown packet
av_packet_unref(pkt);
pthread_mutex_lock(&ffmpeg->packet_mutex); // put packet back
ffmpeg->free_packets[ffmpeg->packet_pos++] = pkt;
pthread_mutex_unlock(&ffmpeg->packet_mutex);
//LOGI("Dumping unknown packet, id %d", id);
}
} else {
av_packet_unref(pkt);
pthread_mutex_lock(&ffmpeg->packet_mutex); // put packet back
ffmpeg->free_packets[ffmpeg->packet_pos++] = pkt;
pthread_mutex_unlock(&ffmpeg->packet_mutex);
//LOGI("No packet read");
}
} else { // buffers full so yield
//LOGI("Packet reader on hold: Audio-%d, Video-%d, Subtitle-%d",
// Audio.packet_pos, Video.packet_pos, Subtitle.packet_pos);
usleep(1000);
//sched_yield();
}
}
return 0;
}
Each decoder for audio, video and subtitles have their own threads which receive the packets from the above thread in ring buffers. I've had to separate the decoders into their own threads because CPU usage was increasing when I started using the deinterlace filter.
My video decoder reads the packets from the buffers and when it has finished with the packet sends it back to be unref'd and can be used again. Balancing the packet buffers doesn't take that much time once everything is running.
Here's the snipped from my video decoder:
void *VideoManager::decoder(void *arg) {
LOGI("Video decoder started");
VideoManager *mgr = (VideoManager *)arg;
while (!ffmpeg.thread_quit) {
pthread_mutex_lock(&mgr->packet_mutex);
if (mgr->packet_pos != 0) {
// fetch first packet to decode
AVPacket *pkt = mgr->packets[0];
// shift list down one
for (int c = 1; c < mgr->packet_pos; c++) {
mgr->packets[c-1] = mgr->packets[c];
}
mgr->packet_pos--;
pthread_mutex_unlock(&mgr->packet_mutex); // finished with packets array
int got;
AVFrame *frame = ffmpeg.vid_stream.frame;
avcodec_decode_video2(ffmpeg.vid_stream.context, frame, &got, pkt);
ffmpeg.finished_with_packet(pkt);
if (got) {
#ifdef INTERLACE_ALL
if (!frame->interlaced_frame) mgr->add_av_frame(frame, 0);
else {
if (!mgr->filter_initialised) mgr->init_filter_graph(frame);
av_buffersrc_add_frame_flags(mgr->filter_src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
int c = 0;
while (true) {
AVFrame *filter_frame = ffmpeg.vid_stream.filter_frame;
int result = av_buffersink_get_frame(mgr->filter_sink_ctx, filter_frame);
if (result == AVERROR(EAGAIN) ||
result == AVERROR(AVERROR_EOF) ||
result < 0)
break;
mgr->add_av_frame(filter_frame, c++);
av_frame_unref(filter_frame);
}
//LOGI("Interlaced %d frames, decode %d, playback %d", c, mgr->decode_pos, mgr->playback_pos);
}
#elif defined(INTERLACE_HALF)
if (!frame->interlaced_frame) mgr->add_av_frame(frame, 0);
else {
if (!mgr->filter_initialised) mgr->init_filter_graph(frame);
av_buffersrc_add_frame_flags(mgr->filter_src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
int c = 0;
while (true) {
AVFrame *filter_frame = ffmpeg.vid_stream.filter_frame;
int result = av_buffersink_get_frame(mgr->filter_sink_ctx, filter_frame);
if (result == AVERROR(EAGAIN) ||
result == AVERROR(AVERROR_EOF) ||
result < 0)
break;
mgr->add_av_frame(filter_frame, c++);
av_frame_unref(filter_frame);
}
//LOGI("Interlaced %d frames, decode %d, playback %d", c, mgr->decode_pos, mgr->playback_pos);
}
#else
mgr->add_av_frame(frame, 0);
#endif
}
//LOGI("decoded video packet");
} else {
pthread_mutex_unlock(&mgr->packet_mutex);
}
}
LOGI("Video decoder ended");
}
As you can see, I'm using a mutex when passing packets back and forth.
Once a frame has been got I just copy the YUV buffers from the frame for later use into another buffer list. I don't convert the YUV, I use a shader which converts the YUV to RGB on the GPU.
The next snippet adds my decoded frame to my buffer list. This may help understand how to deal with the data.
void VideoManager::add_av_frame(AVFrame *frame, int field_num) {
int y_linesize = frame->linesize[0];
int u_linesize = frame->linesize[1];
int hgt = frame->height;
int y_buffsize = y_linesize * hgt;
int u_buffsize = u_linesize * hgt / 2;
int buffsize = y_buffsize + u_buffsize + u_buffsize;
VideoBuffer *buffer = &buffers[decode_pos];
if (ffmpeg.is_network && playback_pos == decode_pos) { // patched 25/10/16 wlgfx
buffer->used = false;
if (!buffer->data) buffer->data = (char*)mem.alloc(buffsize);
if (!buffer->data) {
LOGI("Dropped frame, allocation error");
return;
}
} else if (playback_pos == decode_pos) {
LOGI("Dropped frame, ran out of decoder frame buffers");
return;
} else if (!buffer->data) {
buffer->data = (char*)mem.alloc(buffsize);
if (!buffer->data) {
LOGI("Dropped frame, allocation error.");
return;
}
}
buffer->y_frame = buffer->data;
buffer->u_frame = buffer->y_frame + y_buffsize;
buffer->v_frame = buffer->y_frame + y_buffsize + u_buffsize;
buffer->wid = frame->width;
buffer->hgt = hgt;
buffer->y_linesize = y_linesize;
buffer->u_linesize = u_linesize;
int64_t pts = av_frame_get_best_effort_timestamp(frame);
buffer->pts = pts;
buffer->buffer_size = buffsize;
double field_add = av_q2d(ffmpeg.vid_stream.context->time_base) * field_num;
buffer->frame_time = av_q2d(ts_stream) * pts + field_add;
memcpy(buffer->y_frame, frame->data[0], (size_t) (buffer->y_linesize * buffer->hgt));
memcpy(buffer->u_frame, frame->data[1], (size_t) (buffer->u_linesize * buffer->hgt / 2));
memcpy(buffer->v_frame, frame->data[2], (size_t) (buffer->u_linesize * buffer->hgt / 2));
buffer->used = true;
decode_pos = (++decode_pos) % MAX_VID_BUFFERS;
//if (field_num == 0) LOGI("Video %.2f, %d - %d",
// buffer->frame_time - Audio.pts_start_time, decode_pos, playback_pos);
}
If there's anything else that I may be able to help with just give me a shout. :-)
EDIT:
The snippet how I open my video stream context which automatically determines the codec, whether it is h264, mpeg2, or another:
void FFMPEG::open_video_stream() {
vid_stream.stream_id = av_find_best_stream(fContext, AVMEDIA_TYPE_VIDEO,
-1, -1, &vid_stream.codec, 0);
if (vid_stream.stream_id == -1) return;
vid_stream.context = fContext->streams[vid_stream.stream_id]->codec;
if (!vid_stream.codec || avcodec_open2(vid_stream.context,
vid_stream.codec, NULL) < 0) {
vid_stream.stream_id = -1;
return;
}
vid_stream.frame = av_frame_alloc();
vid_stream.filter_frame = av_frame_alloc();
}
EDIT2:
This is how I've opened the input stream, whether it be file or URL. The AVFormatContext is the main context for the stream.
bool FFMPEG::start_stream(char *url_, float xtrim, float ytrim, int gain) {
aud_stream.stream_id = -1;
vid_stream.stream_id = -1;
sub_stream.stream_id = -1;
this->url = url_;
this->xtrim = xtrim;
this->ytrim = ytrim;
Audio.volume = gain;
Audio.init();
Video.init();
fContext = avformat_alloc_context();
if ((avformat_open_input(&fContext, url_, NULL, NULL)) != 0) {
stop_stream();
return false;
}
if ((avformat_find_stream_info(fContext, NULL)) < 0) {
stop_stream();
return false;
}
// network stream will overwrite packets if buffer is full
is_network = url.substr(0, 4) == "udp:" ||
url.substr(0, 4) == "rtp:" ||
url.substr(0, 5) == "rtsp:" ||
url.substr(0, 5) == "http:"; // added for wifi broadcasting ability
// determine if stream is audio only
is_mp3 = url.substr(url.size() - 4) == ".mp3";
LOGI("Stream: %s", url_);
if (!open_audio_stream()) {
stop_stream();
return false;
}
if (is_mp3) {
vid_stream.stream_id = -1;
sub_stream.stream_id = -1;
} else {
open_video_stream();
open_subtitle_stream();
if (vid_stream.stream_id == -1) { // switch to audio only
close_subtitle_stream();
is_mp3 = true;
}
}
LOGI("Audio: %d, Video: %d, Subtitle: %d",
aud_stream.stream_id,
vid_stream.stream_id,
sub_stream.stream_id);
if (aud_stream.stream_id != -1) {
LOGD("Audio stream time_base {%d, %d}",
aud_stream.context->time_base.num,
aud_stream.context->time_base.den);
}
if (vid_stream.stream_id != -1) {
LOGD("Video stream time_base {%d, %d}",
vid_stream.context->time_base.num,
vid_stream.context->time_base.den);
}
LOGI("Starting packet and decode threads");
thread_quit = false;
pthread_create(&thread_packet, NULL, &FFMPEG::thread_packet_function, this);
Display.set_overlay_timout(3.0);
return true;
}
EDIT: (constructing an AVPacket)
Construct an AVPacket to send to the decoder...
AVPacket packet;
av_init_packet(&packet);
packet.data = myTSpacketdata; // pointer to the TS packet
packet.size = 188;
You should be able to reuse the packet. And it might need unref'ing.
You must first use the avcodec library to get the compressed frames out of the file. Then you can decode them using avcodec_decode_video2. look at this tutorial http://dranger.com/ffmpeg/

ffmpeg memory increase when Playing a UDP-MJPEG-Stream

im reading a udp-mjpeg-stream with the ffmpeg-API. When i read and display the Stream with an ARM-Processor i have 2 Problems:
1- The Applikation is too slow and there is a big delay between network cam and displayed video.
2- the memory usage increases every time when i call the function av_read_frame().
The Source code
const char *cam1_url = "udp://192.168.1.1:1234";
AVCodec *pCodec;
AVFrame *pFrame, *pFrameRGB;
AVCodecContext *pCodecCon;
AVDictionary *pUdpStreamOptions = NULL;
AVInputFormat *pMjpegFormat = av_find_input_format("mjpeg");
av_dict_set(&pUdpStreamOptions, "fifo_size", "5000000", 0);
av_register_all();
avdevice_register_all();
avcodec_register_all();
avformat_network_init();
AVFormatContext *pFormatCont = avformat_alloc_context();
if(avformat_open_input(&pFormatCont,cam1_url,pMjpegFormat,&pUdpStreamOptions) < 0)
{
cout << "!! Error !! - avformat_open_input(): failed to open input URL" << endl;
}
if(avformat_find_stream_info(pFormatCont,NULL) < 0)
{
cout << "!! Error !! - avformat_find_stream_info(), Failed to retrieve stream info" << endl;
}
av_dump_format(pFormatCont, 0, cam1_url, 0);
int videoStream;
for(int i=0; i< pFormatCont->nb_streams; i++)
{
if(pFormatCont->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO)
{
videoStream=i;
cout << " videoStream = " << videoStream << endl;
}
}
pCodecCon = pFormatCont->streams[videoStream]->codec;
pCodec = avcodec_find_decoder(pCodecCon->codec_id);
if(NULL == pCodec)
{
cout << "couldnt find codec" << endl;
return EXIT_FAILURE;
}
if(avcodec_open2(pCodecCon,pCodec,NULL) < 0)
{
cout << "!! Error !! - in avcodec_open2()" << endl;
return EXIT_FAILURE;
}
uint8_t *frameBuffer;
int numRxBytes = 0;
AVPixelFormat pFormat =AV_PIX_FMT_BGR24;
int width_rgb = (int)((float)pCodecCon->width);
int height_rgb = (int)((float)pCodecCon->height);
numRxBytes = avpicture_get_size(pFormat,width_rgb,height_rgb);
frameBuffer = (uint8_t *) av_malloc(numRxBytes*sizeof(uint8_t));
avpicture_fill((AVPicture *) pFrameRGB, frameBuffer, pFormat,width_rgb,height_rgb);
AVPacket rx_pkt; // received packet
int frameFinished = 0;
struct SwsContext *imgConvertCtx;
av_init_packet(&rx_pkt);
while(av_read_frame(pFormatCont, &rx_pkt) >= 0)
{
if(rx_pkt.stream_index == videoStream)
{
av_frame_free(&pFrame);
pFrame = av_frame_alloc();
av_frame_free(&pFrameRGB);
pFrameRGB = av_frame_alloc();
avcodec_decode_video2(pCodecCon, pFrame, &frameFinished,&rx_pkt);
if(frameFinished)
{
imgConvertCtx = sws_getCachedContext(NULL, pFrame->width,pFrame->height, AV_PIX_FMT_YUVJ420P,width_rgb,height_rgb,AV_PIX_FMT_BGR24, SWS_BICUBIC, NULL, NULL,NULL);
sws_scale(imgConvertCtx, ((AVPicture*)pFrame)->data, ((AVPicture*)pFrame)->linesize, 0, pCodecCon->height, ((AVPicture *)pFrameRGB)->data, ((AVPicture *)pFrameRGB)->linesize);
av_frame_unref(pFrame);
av_frame_unref(pFrameRGB);
}
}
av_free_packet(&rx_pkt);
av_packet_unref(&rx_pkt);
}
//cvDestroyWindow("Cam1Video");
av_free_packet(&rx_pkt);
avcodec_close(pCodecCon);
av_free(pFrame);
av_free(pFrameRGB);
avformat_close_input(&pFormatCont);
I have read, the reason could be that the ffmpeg-Libs saves the incomming frames in the cache but the arm-processor isnt fast enough to process them. After like 4 minutes the system craches.
How could i solve the Problem.
one option could be to tell ffmpeg to act as frame grabber, also to read frames in real time, with the flag "-re". How can i set this Flag in the c++ source code. Or can anybody help me to solve that Problem.
Thank you very much

How to extract elementary video from mp4 using ffmpeg programmatically?

I have started learning ffmpeg few weaks ago. At the moment I am able to transcode any video to mp4 using h264/AVC codec. The main scheme is something like that:
-open input
-demux
-decode
-encode
-mux
The actual code is below:
#include <iostream>
#include <math.h>
extern "C" {
#
ifndef __STDC_CONSTANT_MACROS# undef main /* Prevents SDL from overriding main() */ # define __STDC_CONSTANT_MACROS# endif
# pragma comment(lib, "avcodec.lib")# pragma comment(lib, "avformat.lib")# pragma comment(lib, "swscale.lib")# pragma comment(lib, "avutil.lib")
#include <libavcodec\avcodec.h>
#include <libavformat\avformat.h>
#include <libswscale\swscale.h>
#include <libavutil\mem.h>
#include <libavutil/opt.h>
#include <libavutil\channel_layout.h>
#include <libavutil\common.h>
#include <libavutil\imgutils.h>
#include <libavutil\mathematics.h>
#include <libavutil\samplefmt.h>
}
using namespace std;
void open_video(AVFormatContext * oc, AVCodec * codec, AVStream * st) {
int ret;
AVCodecContext * c;
c = st - > codec;
/*open codec */
cout << "probably starts here" << endl;
ret = avcodec_open2(c, codec, NULL);
cout << "and ends here" << endl;
if (ret < 0) {
cout << ("Could not open video codec") << endl;
}
}
/*This function will add a new stream to our file.
#param
oc -> Format context that the new stream will be added.
codec -> codec of the stream, this will be passed.
codec_id ->
chWidth->
chHeight->
*/
AVStream * addStream(AVFormatContext * oc, AVCodec ** codec, enum AVCodecID codec_id, int chWidth, int chHeight, int fps) {
AVCodecContext * c;
AVStream * st;
//find encoder of the stream, it passes this information to #codec, later on
//it will be used in encoding the video # avcodec_encode_video2 in loop.
* codec = avcodec_find_encoder(AV_CODEC_ID_H264);
if (( * codec) == NULL)
cout << "ERROR CAN NOT FIND ENCODER! ERROR! ERROR! AVCODEC_FIND_ENCODER FAILED !!!1 "
"" << endl;
if (!( * codec))
printf("Could not find encoder for ' %s ' ", avcodec_get_name(codec_id));
//create a new stream with the found codec inside oc(AVFormatContext).
st = avformat_new_stream(oc, * codec);
if (!st)
cout << " Cannot allocate stream " << endl;
//Setting the stream id.
//Since, there can be other streams in this AVFormatContext,
//we should find the first non used index. And this is oc->nb_streams(number of streams) - 1
st - > id = oc - > nb_streams - 1;
c = st - > codec;
//setting the stream's codec's properties.
c - > codec_id = codec_id;
c - > bit_rate = 4000000;
c - > width = chWidth;
c - > height = chHeight;
c - > time_base.den = fps;
//fps;
c - > time_base.num = 1;
c - > gop_size = 12;
c - > pix_fmt = AV_PIX_FMT_YUV420P;
if (c - > codec_id == AV_CODEC_ID_MPEG2VIDEO) {
/* just for testing, we also add B frames */
c - > max_b_frames = 2;
}
if (c - > codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c - > mb_decision = 2;
}
/* Some formats want stream headers to be separate. */
if (oc - > oformat - > flags & AVFMT_GLOBALHEADER)
c - > flags |= CODEC_FLAG_GLOBAL_HEADER;
//returning our lovely new brand stream.
return st;
}
int changeResolution(string source, int format) {
//Data members
struct SwsContext * sws_ctx = NULL;
AVFrame * pFrame = NULL;
AVFrame * outFrame = NULL;
AVPacket packet;
uint8_t * buffer = NULL;
uint8_t endcode[] = {
0,
0,
1,
0xb7
};
AVDictionary * optionsDict = NULL;
AVFormatContext * pFormatCtx = NULL;
AVFormatContext * outputContext = NULL;
AVCodecContext * pCodecCtx;
AVCodec * pCodec;
AVCodec * codec;
AVCodec * videoCodec;
AVOutputFormat * fmt;
AVStream * video_stream;
int changeWidth;
int changeHeight;
int frameFinished;
int numBytes;
int fps;
int lock = 0;
//Register all codecs & other important stuff. Vital!..
av_register_all();
//Selects the desired resolution.
if (format == 0) {
changeWidth = 320;
changeHeight = 180;
} else if (format == 1) {
changeWidth = 640;
changeHeight = 480;
} else if (format == 2) {
changeWidth = 960;
changeHeight = 540;
} else if (format == 3) {
changeWidth = 1024;
changeHeight = 768;
} else {
changeWidth = 1280;
changeHeight = 720;
}
// Open video file
int aaa;
aaa = avformat_open_input( & pFormatCtx, source.c_str(), NULL, NULL);
if (aaa != 0) {
cout << " cannot open input file \n" << endl;
cout << "aaa = " << aaa << endl;
return -1; // Couldn't open file
}
// Retrieve stream information
if (av_find_stream_info(pFormatCtx) < 0)
return -1; // Couldn't find stream information
//just checking duration casually for no reason
/*int64_t duration = pFormatCtx->duration;
cout << "the duration is " << duration << " " << endl;*/
//this writes the info about the file
av_dump_format(pFormatCtx, 0, 0, 0);
cin >> lock;
// Find the first video stream
int videoStream = -1;
int i;
for (i = 0; i < 3; i++)
if (pFormatCtx - > streams[i] - > codec - > codec_type == AVMEDIA_TYPE_VIDEO) {
videoStream = i;
cout << " lel \n ";
break;
}
if (videoStream == -1)
return -1; // Didn't find a video stream
// Get a pointer to the codec context for the video stream
pCodecCtx = pFormatCtx - > streams[videoStream] - > codec;
fps = pCodecCtx - > time_base.den;
//Find the decoder of the input file, for the video stream
pCodec = avcodec_find_decoder(pCodecCtx - > codec_id);
if (pCodec == NULL) {
fprintf(stderr, "Unsupported codec!\n");
return -1; // Codec not found
}
// Open codec, you must open it first, in order to use it.
if (avcodec_open2(pCodecCtx, pCodec, & optionsDict) < 0)
return -1; // Could not open codec
// Allocate video frame ( pFrame for taking the packets into, outFrame for processed frames to packet.)
pFrame = avcodec_alloc_frame();
outFrame = avcodec_alloc_frame();
i = 0;
int ret;
int video_frame_count = 0;
//Initiate the outFrame set the buffer & fill the properties
numBytes = avpicture_get_size(PIX_FMT_YUV420P, changeWidth, changeHeight);
buffer = (uint8_t * ) av_malloc(numBytes * sizeof(uint8_t));
avpicture_fill((AVPicture * ) outFrame, buffer, PIX_FMT_YUV420P, changeWidth, changeHeight);
int pp;
int frameNo = 0;
//allocate the outputContext, it will be the AVFormatContext of our output file.
//It will try to find the format by giving the file name.
avformat_alloc_output_context2( & outputContext, NULL, NULL, "myoutput.mp4");
//Cant find the file extension, using MPEG as default.
if (!outputContext) {
printf("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2( & outputContext, NULL, "mpeg", "myoutput.mp4");
}
//Still cant set file extension, exit.
if (!outputContext) {
return 1;
}
//set AVOutputFormat fmt to our outputContext's format.
fmt = outputContext - > oformat;
video_stream = NULL;
//If fmt has a valid codec_id, create a new video stream.
//This function will set the streams codec & codecs desired properties.
//Stream's codec will be passed to videoCodec for later usage.
if (fmt - > video_codec != AV_CODEC_ID_NONE)
video_stream = addStream(outputContext, & videoCodec, fmt - > video_codec, changeWidth, changeHeight, fps);
//open the video using videoCodec. by avcodec_open2() i.e open the codec.
if (video_stream)
open_video(outputContext, videoCodec, video_stream);
//Creating our new output file.
if (!(fmt - > flags & AVFMT_NOFILE)) {
ret = avio_open( & outputContext - > pb, "toBeStreamed.264", AVIO_FLAG_WRITE);
if (ret < 0) {
cout << " cant open file " << endl;
return 1;
}
}
//Writing the header of format context.
//ret = avformat_write_header(outputContext, NULL);
if (ret >= 0) {
cout << "writing header success !!!" << endl;
}
//Start reading packages from input file.
while (av_read_frame(pFormatCtx, & packet) >= 0) {
// Is this a packet from the video stream?
if (packet.stream_index == videoStream) {
// Decode video package into frames
ret = avcodec_decode_video2(pCodecCtx, pFrame, & frameFinished, & packet);
if (ret < 0) {
printf(" Error decoding frame !!..");
return ret;
}
if (frameFinished) {
printf("video_frame n:%d coded_n:%d\n", video_frame_count++, pFrame - > coded_picture_number);
}
av_free_packet( & packet);
//do stuff with frame, in this case we are changing the resolution.
static struct SwsContext * img_convert_ctx_in = NULL;
if (img_convert_ctx_in == NULL) {
img_convert_ctx_in = sws_getContext(pCodecCtx - > width,
pCodecCtx - > height,
pCodecCtx - > pix_fmt,
changeWidth,
changeHeight,
PIX_FMT_YUV420P,
SWS_BICUBIC,
NULL,
NULL,
NULL);
}
//scale the frames
sws_scale(img_convert_ctx_in,
pFrame - > data,
pFrame - > linesize,
0,
pCodecCtx - > height,
outFrame - > data,
outFrame - > linesize);
//initiate the pts value
if (frameNo == 0)
outFrame - > pts = 0;
//calculate the pts value & set it.
outFrame - > pts += av_rescale_q(1, video_stream - > codec - > time_base, video_stream - > time_base);
//encode frames into packages. Package passed in #packet.
if (avcodec_encode_video2(outputContext - > streams[0] - > codec, & packet, outFrame, & pp) < 0)
cout << "Encoding frames into packages, failed. " << endl;
frameNo++;
//write the packages into file, resulting in creating a video file.
av_interleaved_write_frame(outputContext, & packet);
}
}
av_free_packet( & packet);
//av_write_trailer(outputContext);
avio_close(outputContext - > pb);
// Free the RGB image
av_free(buffer);
av_free(outFrame);
// Free the YUV frame
av_free(pFrame);
// Close the codec
avcodec_close(video_stream - > codec);
avcodec_close(pCodecCtx);
// Close the video file
avformat_close_input( & pFormatCtx);
return 0;
}
at the end of the process I get my desired file with desired codec & container & resolution.
My problem is, in a part of our project I need to get elementary video streams IN file. Such as example.264. However I can not add a stream without creating an AVFormatContext. I can not create an AVFormatContext because 264 files does not have a container,they are just raw video?, as far as I know.
I have tried the way in decoding_encoding.c which uses fwrite. However that example was for mpeg-2 codec and when I try to adapt that code to H264/AVC codec, I got "floating point division by zero" error from mediainfo and moreover, some of the properties of the video was not showing (such as FPS & playtime & quality factor). I think it has to do with the "endcode" the example adds at the end of the code. It is for mpeg-2. ( uint8_t endcode[] = { 0, 0, 1, 0xb7 }; )
Anyway, I would love to get a startpoint for this task. I have managed to come this far by using internet resources ( quite few & outdated for ffmpeg) but now I'm stuck a little.

Audio output with video processing with opencv

I am processing video with opencv, but at the same time I need to play audio and simply control it, like loud or current frame number.
I think I should create a parallel process with ffmpeg, but I don't know how to do so. Can you explain what to do?
Or do you know another solution?
I think ffmpeg should be used to play audio and SDL for video in this case.
After opening the file with OpenCV and processing the frame, you can use OpenCV -> SDL to display it while retrieving the audio frames through ffmpeg and playing them with SDL.
Here is a nice collection of ffmpeg/SDL tutorials!
I also found a nice post that shows how to capture frames from a video file using ffmpeg, store them in OpenCV cv::Mat and display the result in a OpenCV window. But this way you can't play audio since OpenCV doesn't deal with that.
You might be interested in reading this post as well: How to avoid a growing delay with ffmpeg between sound and raw video data ?
EDIT:
I spent the last 4hrs coding a prototype to demonstrate how it's done. This demo reads video frames through OpenCV (so you can process them) and audio through ffmpeg, and SDL is used to play both! There are 2 limitations in this demo you must be aware: 1 - it assumes you are working with an OpenCV image packed as BGR (24bits), and 2 - audio and video are not being sync! Yes, I left have some work for you to do (yeeeey). But don't panic, page 6 has some ideas!
It's important to sync audio and video because you will be doing some processing on the frames, and that will certainly make the video and audio go out of sync real fast since they are being played independently of each other.
The ffmpeg tutorials I suggested above are very very important to understand the code, a lot of code from this demo came from there. They show how to deal with SDL, and how to read packets of audio/video streams.
#include <highgui.h>
#include <cv.h>
extern "C"
{
#include <SDL.h>
#include <SDL_thread.h>
#include <avcodec.h>
#include <avformat.h>
}
#include <iostream>
#include <stdio.h>
//#include <malloc.h>
using namespace cv;
#define SDL_AUDIO_BUFFER_SIZE 1024
typedef struct PacketQueue
{
AVPacketList *first_pkt, *last_pkt;
int nb_packets;
int size;
SDL_mutex *mutex;
SDL_cond *cond;
} PacketQueue;
PacketQueue audioq;
int audioStream = -1;
int videoStream = -1;
int quit = 0;
SDL_Surface* screen = NULL;
SDL_Surface* surface = NULL;
AVFormatContext* pFormatCtx = NULL;
AVCodecContext* aCodecCtx = NULL;
AVCodecContext* pCodecCtx = NULL;
void show_frame(IplImage* img)
{
if (!screen)
{
screen = SDL_SetVideoMode(img->width, img->height, 0, 0);
if (!screen)
{
fprintf(stderr, "SDL: could not set video mode - exiting\n");
exit(1);
}
}
// Assuming IplImage packed as BGR 24bits
SDL_Surface* surface = SDL_CreateRGBSurfaceFrom((void*)img->imageData,
img->width,
img->height,
img->depth * img->nChannels,
img->widthStep,
0xff0000, 0x00ff00, 0x0000ff, 0
);
SDL_BlitSurface(surface, 0, screen, 0);
SDL_Flip(screen);
}
void packet_queue_init(PacketQueue *q)
{
memset(q, 0, sizeof(PacketQueue));
q->mutex = SDL_CreateMutex();
q->cond = SDL_CreateCond();
}
int packet_queue_put(PacketQueue *q, AVPacket *pkt)
{
AVPacketList *pkt1;
if (av_dup_packet(pkt) < 0)
{
return -1;
}
//pkt1 = (AVPacketList*) av_malloc(sizeof(AVPacketList));
pkt1 = (AVPacketList*) malloc(sizeof(AVPacketList));
if (!pkt1) return -1;
pkt1->pkt = *pkt;
pkt1->next = NULL;
SDL_LockMutex(q->mutex);
if (!q->last_pkt)
q->first_pkt = pkt1;
else
q->last_pkt->next = pkt1;
q->last_pkt = pkt1;
q->nb_packets++;
q->size += pkt1->pkt.size;
SDL_CondSignal(q->cond);
SDL_UnlockMutex(q->mutex);
return 0;
}
static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
AVPacketList *pkt1;
int ret;
SDL_LockMutex(q->mutex);
for (;;)
{
if( quit)
{
ret = -1;
break;
}
pkt1 = q->first_pkt;
if (pkt1)
{
q->first_pkt = pkt1->next;
if (!q->first_pkt)
q->last_pkt = NULL;
q->nb_packets--;
q->size -= pkt1->pkt.size;
*pkt = pkt1->pkt;
//av_free(pkt1);
free(pkt1);
ret = 1;
break;
}
else if (!block)
{
ret = 0;
break;
}
else
{
SDL_CondWait(q->cond, q->mutex);
}
}
SDL_UnlockMutex(q->mutex);
return ret;
}
int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size)
{
static AVPacket pkt;
static uint8_t *audio_pkt_data = NULL;
static int audio_pkt_size = 0;
int len1, data_size;
for (;;)
{
while (audio_pkt_size > 0)
{
data_size = buf_size;
len1 = avcodec_decode_audio2(aCodecCtx, (int16_t*)audio_buf, &data_size,
audio_pkt_data, audio_pkt_size);
if (len1 < 0)
{
/* if error, skip frame */
audio_pkt_size = 0;
break;
}
audio_pkt_data += len1;
audio_pkt_size -= len1;
if (data_size <= 0)
{
/* No data yet, get more frames */
continue;
}
/* We have data, return it and come back for more later */
return data_size;
}
if (pkt.data)
av_free_packet(&pkt);
if (quit) return -1;
if (packet_queue_get(&audioq, &pkt, 1) < 0) return -1;
audio_pkt_data = pkt.data;
audio_pkt_size = pkt.size;
}
}
void audio_callback(void *userdata, Uint8 *stream, int len)
{
AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
int len1, audio_size;
static uint8_t audio_buf[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
static unsigned int audio_buf_size = 0;
static unsigned int audio_buf_index = 0;
while (len > 0)
{
if (audio_buf_index >= audio_buf_size)
{
/* We have already sent all our data; get more */
audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));
if(audio_size < 0)
{
/* If error, output silence */
audio_buf_size = 1024; // arbitrary?
memset(audio_buf, 0, audio_buf_size);
}
else
{
audio_buf_size = audio_size;
}
audio_buf_index = 0;
}
len1 = audio_buf_size - audio_buf_index;
if (len1 > len)
len1 = len;
memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);
len -= len1;
stream += len1;
audio_buf_index += len1;
}
}
void setup_ffmpeg(char* filename)
{
if (av_open_input_file(&pFormatCtx, filename, NULL, 0, NULL) != 0)
{
fprintf(stderr, "FFmpeg failed to open file %s!\n", filename);
exit(-1);
}
if (av_find_stream_info(pFormatCtx) < 0)
{
fprintf(stderr, "FFmpeg failed to retrieve stream info!\n");
exit(-1);
}
// Dump information about file onto standard error
dump_format(pFormatCtx, 0, filename, 0);
// Find the first video stream
int i = 0;
for (i; i < pFormatCtx->nb_streams; i++)
{
if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_VIDEO && videoStream < 0)
{
videoStream = i;
}
if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_AUDIO && audioStream < 0)
{
audioStream = i;
}
}
if (videoStream == -1)
{
fprintf(stderr, "No video stream found in %s!\n", filename);
exit(-1);
}
if (audioStream == -1)
{
fprintf(stderr, "No audio stream found in %s!\n", filename);
exit(-1);
}
// Get a pointer to the codec context for the audio stream
aCodecCtx = pFormatCtx->streams[audioStream]->codec;
// Set audio settings from codec info
SDL_AudioSpec wanted_spec;
wanted_spec.freq = aCodecCtx->sample_rate;
wanted_spec.format = AUDIO_S16SYS;
wanted_spec.channels = aCodecCtx->channels;
wanted_spec.silence = 0;
wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
wanted_spec.callback = audio_callback;
wanted_spec.userdata = aCodecCtx;
SDL_AudioSpec spec;
if (SDL_OpenAudio(&wanted_spec, &spec) < 0)
{
fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
exit(-1);
}
AVCodec* aCodec = avcodec_find_decoder(aCodecCtx->codec_id);
if (!aCodec)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1);
}
avcodec_open(aCodecCtx, aCodec);
// audio_st = pFormatCtx->streams[index]
packet_queue_init(&audioq);
SDL_PauseAudio(0);
// Get a pointer to the codec context for the video stream
pCodecCtx = pFormatCtx->streams[videoStream]->codec;
// Find the decoder for the video stream
AVCodec* pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1); // Codec not found
}
// Open codec
if (avcodec_open(pCodecCtx, pCodec) < 0)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1); // Could not open codec
}
}
int main(int argc, char* argv[])
{
if (argc < 2)
{
std::cout << "Usage: " << argv[0] << " <video>" << std::endl;
return -1;
}
av_register_all();
// Init SDL
if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER))
{
fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
return -1;
}
// Init ffmpeg and setup some SDL stuff related to Audio
setup_ffmpeg(argv[1]);
VideoCapture cap(argv[1]); // open the default camera
if (!cap.isOpened()) // check if we succeeded
{
std::cout << "Failed to load file!" << std::endl;
return -1;
}
AVPacket packet;
while (av_read_frame(pFormatCtx, &packet) >= 0)
{
if (packet.stream_index == videoStream)
{
// Actually this is were SYNC between audio/video would happen.
// Right now I assume that every VIDEO packet contains an entire video frame, and that's not true. A video frame can be made by multiple packets!
// But for the time being, assume 1 video frame == 1 video packet,
// so instead of reading the frame through ffmpeg, I read it through OpenCV.
Mat frame;
cap >> frame; // get a new frame from camera
// do some processing on the frame, either as a Mat or as IplImage.
// For educational purposes, applying a lame grayscale conversion
IplImage ipl_frame = frame;
for (int i = 0; i < ipl_frame.width * ipl_frame.height * ipl_frame.nChannels; i += ipl_frame.nChannels)
{
ipl_frame.imageData[i] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //B
ipl_frame.imageData[i+1] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //G
ipl_frame.imageData[i+2] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //R
}
// Display it on SDL window
show_frame(&ipl_frame);
av_free_packet(&packet);
}
else if (packet.stream_index == audioStream)
{
packet_queue_put(&audioq, &packet);
}
else
{
av_free_packet(&packet);
}
SDL_Event event;
SDL_PollEvent(&event);
switch (event.type)
{
case SDL_QUIT:
SDL_FreeSurface(surface);
SDL_Quit();
break;
default:
break;
}
}
// the camera will be deinitialized automatically in VideoCapture destructor
// Close the codec
avcodec_close(pCodecCtx);
// Close the video file
av_close_input_file(pFormatCtx);
return 0;
}
On my Mac I compiled it with:
g++ ffmpeg_snd.cpp -o ffmpeg_snd -D_GNU_SOURCE=1 -D_THREAD_SAFE -I/usr/local/include/opencv -I/usr/local/include -I/usr/local/include/SDL -Wl,-framework,Cocoa -L/usr/local/lib -lopencv_core -lopencv_imgproc -lopencv_highgui -lopencv_ml -lopencv_video -lopencv_features2d -lopencv_calib3d -lopencv_objdetect -lopencv_contrib -lopencv_legacy -lopencv_flann -lSDLmain -lSDL -L/usr/local/lib -lavfilter -lavcodec -lavformat -I/usr/local/Cellar/ffmpeg/HEAD/include/libavcodec -I/usr/local/Cellar/ffmpeg/HEAD/include/libavformat