I am processing video with opencv, but at the same time I need to play audio and simply control it, like loud or current frame number.
I think I should create a parallel process with ffmpeg, but I don't know how to do so. Can you explain what to do?
Or do you know another solution?
I think ffmpeg should be used to play audio and SDL for video in this case.
After opening the file with OpenCV and processing the frame, you can use OpenCV -> SDL to display it while retrieving the audio frames through ffmpeg and playing them with SDL.
Here is a nice collection of ffmpeg/SDL tutorials!
I also found a nice post that shows how to capture frames from a video file using ffmpeg, store them in OpenCV cv::Mat and display the result in a OpenCV window. But this way you can't play audio since OpenCV doesn't deal with that.
You might be interested in reading this post as well: How to avoid a growing delay with ffmpeg between sound and raw video data ?
EDIT:
I spent the last 4hrs coding a prototype to demonstrate how it's done. This demo reads video frames through OpenCV (so you can process them) and audio through ffmpeg, and SDL is used to play both! There are 2 limitations in this demo you must be aware: 1 - it assumes you are working with an OpenCV image packed as BGR (24bits), and 2 - audio and video are not being sync! Yes, I left have some work for you to do (yeeeey). But don't panic, page 6 has some ideas!
It's important to sync audio and video because you will be doing some processing on the frames, and that will certainly make the video and audio go out of sync real fast since they are being played independently of each other.
The ffmpeg tutorials I suggested above are very very important to understand the code, a lot of code from this demo came from there. They show how to deal with SDL, and how to read packets of audio/video streams.
#include <highgui.h>
#include <cv.h>
extern "C"
{
#include <SDL.h>
#include <SDL_thread.h>
#include <avcodec.h>
#include <avformat.h>
}
#include <iostream>
#include <stdio.h>
//#include <malloc.h>
using namespace cv;
#define SDL_AUDIO_BUFFER_SIZE 1024
typedef struct PacketQueue
{
AVPacketList *first_pkt, *last_pkt;
int nb_packets;
int size;
SDL_mutex *mutex;
SDL_cond *cond;
} PacketQueue;
PacketQueue audioq;
int audioStream = -1;
int videoStream = -1;
int quit = 0;
SDL_Surface* screen = NULL;
SDL_Surface* surface = NULL;
AVFormatContext* pFormatCtx = NULL;
AVCodecContext* aCodecCtx = NULL;
AVCodecContext* pCodecCtx = NULL;
void show_frame(IplImage* img)
{
if (!screen)
{
screen = SDL_SetVideoMode(img->width, img->height, 0, 0);
if (!screen)
{
fprintf(stderr, "SDL: could not set video mode - exiting\n");
exit(1);
}
}
// Assuming IplImage packed as BGR 24bits
SDL_Surface* surface = SDL_CreateRGBSurfaceFrom((void*)img->imageData,
img->width,
img->height,
img->depth * img->nChannels,
img->widthStep,
0xff0000, 0x00ff00, 0x0000ff, 0
);
SDL_BlitSurface(surface, 0, screen, 0);
SDL_Flip(screen);
}
void packet_queue_init(PacketQueue *q)
{
memset(q, 0, sizeof(PacketQueue));
q->mutex = SDL_CreateMutex();
q->cond = SDL_CreateCond();
}
int packet_queue_put(PacketQueue *q, AVPacket *pkt)
{
AVPacketList *pkt1;
if (av_dup_packet(pkt) < 0)
{
return -1;
}
//pkt1 = (AVPacketList*) av_malloc(sizeof(AVPacketList));
pkt1 = (AVPacketList*) malloc(sizeof(AVPacketList));
if (!pkt1) return -1;
pkt1->pkt = *pkt;
pkt1->next = NULL;
SDL_LockMutex(q->mutex);
if (!q->last_pkt)
q->first_pkt = pkt1;
else
q->last_pkt->next = pkt1;
q->last_pkt = pkt1;
q->nb_packets++;
q->size += pkt1->pkt.size;
SDL_CondSignal(q->cond);
SDL_UnlockMutex(q->mutex);
return 0;
}
static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
AVPacketList *pkt1;
int ret;
SDL_LockMutex(q->mutex);
for (;;)
{
if( quit)
{
ret = -1;
break;
}
pkt1 = q->first_pkt;
if (pkt1)
{
q->first_pkt = pkt1->next;
if (!q->first_pkt)
q->last_pkt = NULL;
q->nb_packets--;
q->size -= pkt1->pkt.size;
*pkt = pkt1->pkt;
//av_free(pkt1);
free(pkt1);
ret = 1;
break;
}
else if (!block)
{
ret = 0;
break;
}
else
{
SDL_CondWait(q->cond, q->mutex);
}
}
SDL_UnlockMutex(q->mutex);
return ret;
}
int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size)
{
static AVPacket pkt;
static uint8_t *audio_pkt_data = NULL;
static int audio_pkt_size = 0;
int len1, data_size;
for (;;)
{
while (audio_pkt_size > 0)
{
data_size = buf_size;
len1 = avcodec_decode_audio2(aCodecCtx, (int16_t*)audio_buf, &data_size,
audio_pkt_data, audio_pkt_size);
if (len1 < 0)
{
/* if error, skip frame */
audio_pkt_size = 0;
break;
}
audio_pkt_data += len1;
audio_pkt_size -= len1;
if (data_size <= 0)
{
/* No data yet, get more frames */
continue;
}
/* We have data, return it and come back for more later */
return data_size;
}
if (pkt.data)
av_free_packet(&pkt);
if (quit) return -1;
if (packet_queue_get(&audioq, &pkt, 1) < 0) return -1;
audio_pkt_data = pkt.data;
audio_pkt_size = pkt.size;
}
}
void audio_callback(void *userdata, Uint8 *stream, int len)
{
AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
int len1, audio_size;
static uint8_t audio_buf[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
static unsigned int audio_buf_size = 0;
static unsigned int audio_buf_index = 0;
while (len > 0)
{
if (audio_buf_index >= audio_buf_size)
{
/* We have already sent all our data; get more */
audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));
if(audio_size < 0)
{
/* If error, output silence */
audio_buf_size = 1024; // arbitrary?
memset(audio_buf, 0, audio_buf_size);
}
else
{
audio_buf_size = audio_size;
}
audio_buf_index = 0;
}
len1 = audio_buf_size - audio_buf_index;
if (len1 > len)
len1 = len;
memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);
len -= len1;
stream += len1;
audio_buf_index += len1;
}
}
void setup_ffmpeg(char* filename)
{
if (av_open_input_file(&pFormatCtx, filename, NULL, 0, NULL) != 0)
{
fprintf(stderr, "FFmpeg failed to open file %s!\n", filename);
exit(-1);
}
if (av_find_stream_info(pFormatCtx) < 0)
{
fprintf(stderr, "FFmpeg failed to retrieve stream info!\n");
exit(-1);
}
// Dump information about file onto standard error
dump_format(pFormatCtx, 0, filename, 0);
// Find the first video stream
int i = 0;
for (i; i < pFormatCtx->nb_streams; i++)
{
if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_VIDEO && videoStream < 0)
{
videoStream = i;
}
if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_AUDIO && audioStream < 0)
{
audioStream = i;
}
}
if (videoStream == -1)
{
fprintf(stderr, "No video stream found in %s!\n", filename);
exit(-1);
}
if (audioStream == -1)
{
fprintf(stderr, "No audio stream found in %s!\n", filename);
exit(-1);
}
// Get a pointer to the codec context for the audio stream
aCodecCtx = pFormatCtx->streams[audioStream]->codec;
// Set audio settings from codec info
SDL_AudioSpec wanted_spec;
wanted_spec.freq = aCodecCtx->sample_rate;
wanted_spec.format = AUDIO_S16SYS;
wanted_spec.channels = aCodecCtx->channels;
wanted_spec.silence = 0;
wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
wanted_spec.callback = audio_callback;
wanted_spec.userdata = aCodecCtx;
SDL_AudioSpec spec;
if (SDL_OpenAudio(&wanted_spec, &spec) < 0)
{
fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
exit(-1);
}
AVCodec* aCodec = avcodec_find_decoder(aCodecCtx->codec_id);
if (!aCodec)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1);
}
avcodec_open(aCodecCtx, aCodec);
// audio_st = pFormatCtx->streams[index]
packet_queue_init(&audioq);
SDL_PauseAudio(0);
// Get a pointer to the codec context for the video stream
pCodecCtx = pFormatCtx->streams[videoStream]->codec;
// Find the decoder for the video stream
AVCodec* pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1); // Codec not found
}
// Open codec
if (avcodec_open(pCodecCtx, pCodec) < 0)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1); // Could not open codec
}
}
int main(int argc, char* argv[])
{
if (argc < 2)
{
std::cout << "Usage: " << argv[0] << " <video>" << std::endl;
return -1;
}
av_register_all();
// Init SDL
if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER))
{
fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
return -1;
}
// Init ffmpeg and setup some SDL stuff related to Audio
setup_ffmpeg(argv[1]);
VideoCapture cap(argv[1]); // open the default camera
if (!cap.isOpened()) // check if we succeeded
{
std::cout << "Failed to load file!" << std::endl;
return -1;
}
AVPacket packet;
while (av_read_frame(pFormatCtx, &packet) >= 0)
{
if (packet.stream_index == videoStream)
{
// Actually this is were SYNC between audio/video would happen.
// Right now I assume that every VIDEO packet contains an entire video frame, and that's not true. A video frame can be made by multiple packets!
// But for the time being, assume 1 video frame == 1 video packet,
// so instead of reading the frame through ffmpeg, I read it through OpenCV.
Mat frame;
cap >> frame; // get a new frame from camera
// do some processing on the frame, either as a Mat or as IplImage.
// For educational purposes, applying a lame grayscale conversion
IplImage ipl_frame = frame;
for (int i = 0; i < ipl_frame.width * ipl_frame.height * ipl_frame.nChannels; i += ipl_frame.nChannels)
{
ipl_frame.imageData[i] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //B
ipl_frame.imageData[i+1] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //G
ipl_frame.imageData[i+2] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //R
}
// Display it on SDL window
show_frame(&ipl_frame);
av_free_packet(&packet);
}
else if (packet.stream_index == audioStream)
{
packet_queue_put(&audioq, &packet);
}
else
{
av_free_packet(&packet);
}
SDL_Event event;
SDL_PollEvent(&event);
switch (event.type)
{
case SDL_QUIT:
SDL_FreeSurface(surface);
SDL_Quit();
break;
default:
break;
}
}
// the camera will be deinitialized automatically in VideoCapture destructor
// Close the codec
avcodec_close(pCodecCtx);
// Close the video file
av_close_input_file(pFormatCtx);
return 0;
}
On my Mac I compiled it with:
g++ ffmpeg_snd.cpp -o ffmpeg_snd -D_GNU_SOURCE=1 -D_THREAD_SAFE -I/usr/local/include/opencv -I/usr/local/include -I/usr/local/include/SDL -Wl,-framework,Cocoa -L/usr/local/lib -lopencv_core -lopencv_imgproc -lopencv_highgui -lopencv_ml -lopencv_video -lopencv_features2d -lopencv_calib3d -lopencv_objdetect -lopencv_contrib -lopencv_legacy -lopencv_flann -lSDLmain -lSDL -L/usr/local/lib -lavfilter -lavcodec -lavformat -I/usr/local/Cellar/ffmpeg/HEAD/include/libavcodec -I/usr/local/Cellar/ffmpeg/HEAD/include/libavformat
Related
I am new with ffmpeg & video encoding, after looking for some related questions on this page, I found this post which is very useful to understand the overview process of ffmpeg.
However, my work not only needs to manipulate with Mat frame, after extract important information from video (extract edge, position of edge block, type of each edge block, block number, motion vector), I have to encode and send them to client. I tried to find an example code for this part but it seems nobody have done it before.
My problems is how to encode these additional information along with video frame, and send both to client. I read about Huffman Coding which can help lossless compression, But is it possible encode edge & motion data using huffman coding while encoding video frame using ffmpeg? I'm doing experiment using udp protocol.
I can not find any information about this.
I read into metadata & side information in ffmpeg but it's not what I want to do.
I hope if you can give me an advice or a directions to research into this area, so I can understand and try to implement it. If there is any example code for this case, I would be very grateful for your sharing.
Thank you so much.
Below is encoder part on server side:
int encode(Mat& input_frame, EncodedCallback callback, void* userdata = nullptr) {
AVPacket pkt;
/* encode 1 second of video */
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 0;
int size = 0;
fflush(stdout);
cvtFrame2AVFrameYUV420(input_frame, &frame);
static int time;
frame->pts = time++;
/* encode the image */
ret = avcodec_send_frame(c, frame);
if (ret < 0) {
fprintf(stderr, "Error avcodec_send_frame\n");
exit(1);
}
nbFramesEncoded++;
ret = avcodec_receive_packet(c, &pkt);
if (!isFirstFrameEmmited) {
nbNeededFramesInBuffer++;
printf("nbNeededFramesInBuffer: %d\n", nbNeededFramesInBuffer);
}
if (ret < 0) {
if (ret == -EAGAIN) {
//output is not available, we must send more input
} else {
fprintf(stderr, "Error avcodec_receive_packet %d\n", ret);
exit(1);
}
} else {
if (callback) {
callback(pkt, userdata);
}
size = pkt.size + 4;
av_packet_unref(&pkt);
}
return size;
}
Below is code to handle frame processing (presently we check & send motioned block to client)
void updateFrame(Mat& frame) {
//Get all Streams ready
bool isReady = true;
if (!frameStreamer->encoder->isFirstFrameEmmited) {
frameStreamer->sendFrame(frame);
isReady = false;
}
for (int yidx = 0; yidx < gridSize.height; yidx++) {
for (int xidx = 0; xidx < gridSize.width; xidx++) {
StreamPtr& stream = streamGrid[yidx][xidx];
if (!stream->encoder->isFirstFrameEmmited) {
Mat block = frame(stream->irect);
stream->sendFrame(block);
isReady = false;
}
}
}
if (isReady == false) {
return;
}
if (pGray.empty()) {
frameStreamer->sendFrame(frame);
frameStreamer->sendFrame(frame);
cvtColor(frame, pGray, CV_BGR2GRAY);
return;
}
//Motion Detection
Mat gray;
cvtColor(frame, gray, CV_BGR2GRAY);
Mat diff;
absdiff(gray, pGray, diff);
threshold(diff, diff, NOISE_THRESHOLD, 255, CV_THRESH_BINARY);
if (HEAT_IMAGE) {
gray.copyTo(diff, diff);
imshow("Gray", gray);
threshold(diff, diff, HEAT_THRESH, 255, CV_THRESH_TOZERO);
}
if (USE_MORPH_NOISE) {
Morph_Noise(diff);
}
Mat motionImg = Mat::zeros(frameSize, CV_8UC3);
//Block Classification
int nbModifiedBlocks = 0;
for (int yidx = 0; yidx < gridSize.height; yidx++) {
for (int xidx = 0; xidx < gridSize.width; xidx++) {
Rect irect(xidx * blockSize.width, yidx * blockSize.height,
blockSize.width, blockSize.height);
int blockDiff = sum(diff(irect))[0];
if (blockDiff > BLOCK_THRESHOLD * 255) {
this->blockCls.at<uchar>(yidx, xidx) = MODI_BLOCK;
nbModifiedBlocks++;
} else {
this->blockCls.at<uchar>(yidx, xidx) = SKIP_BLOCK;
}
}
}
//Send
if (nbModifiedBlocks > this->nbBlocksThresh) {
nbSentBytes += this->frameStreamer->sendFrame(frame);
} else {
for (int yidx = 0; yidx < gridSize.height; yidx++) {
for (int xidx = 0; xidx < gridSize.width; xidx++) {
uchar cls = this->blockCls.at<uchar>(yidx, xidx);
StreamPtr& stream = streamGrid[yidx][xidx];
bool send = false;
if (cls == MODI_BLOCK) {
if (DEBUG_NETWORK) {
printf("Normal (%d, %d): ", xidx, yidx);
}
send = true;
stream->encoder->nbFramesBuffered = stream->encoder->nbNeededFramesInBuffer;
rectangle(motionImg, stream->irect, Scalar(0, 0, 255), CV_FILLED);
} else if (stream->encoder->nbFramesBuffered > 0) {
if (DEBUG_NETWORK) {
printf("Extra (%d, %d): ", xidx, yidx);
}
send = true;
stream->encoder->nbFramesBuffered--;
stream->encoder->nbFlushFrames++;
rectangle(motionImg, stream->irect, Scalar(0, 255, 0), CV_FILLED);
}
if (send) {
Mat block = frame(stream->irect);
nbSentBytes += stream->sendFrame(block);
gray(stream->irect).copyTo(pGray(stream->irect));
}
}
}
}
}
I may not quite get what you ask, I think you asking adding additional stream to your output which may have some custom data. This is how you add data stream, add this after where you add video stream.
AVCodecParameters *par;
AVStream *st;
st = avformat_new_stream(ctx->oc, NULL); // ctx->oc is AVFormatContext *oc same you used where you add video stream
if (st == NULL)
{
fprintf(stderr, "Error: avformat_new_stream() failed.\n");
exit(EXIT_FAILURE);
}
par = st->codecpar;
par->codec_id = AV_CODEC_ID_TEXT; // raw UTF-8 text
par->codec_type = AVMEDIA_TYPE_DATA; // Opaque data information usually continuous
After that you can put your "data" to a pkt.data and send it with av_interleaved_write_frame after associated video frame output.
One of the things I'm trying to achieve is parallel encoding via FFmpeg's c API. This looks to work out of the box quite nicely; however, I've changed the goal posts slightly:
In an existing application, I already have a thread pool at hand. Instead of using another thread pool via FFmpeg, I would like reuse the existing thread pool in my application. Having studied the latest FFmpeg trunk docs, it very much looks possible.
Using some FFmpeg sample code, I've created a sample application to demonstrate what I'm trying to achieve (see below). The sample app generates a video-only mpeg2 ts using the mp2v codec.
The problem I'm experiencing is that the custom 'thread_execute' or 'thread_execute2' are never invoked. This is despite the fact that the codec appears to indicate that threading is supported. Please be aware that I have not yet plumbed in the thread pool just yet. My first goal is for it to call the custom function pointer.
I've tried to get assistance on the FFmpeg mailing lists but to no avail.
#include <iostream>
#include <thread>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <cstring>
#include <future>
extern "C"
{
#include <libavutil/avassert.h>
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
//#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
}
#define STREAM_DURATION 1000.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
#define SCALE_FLAGS SWS_BICUBIC
// a wrapper around a single output AVStream
typedef struct OutputStream {
AVStream *st;
AVCodecContext *enc;
/* pts of the next frame that will be generated */
int64_t next_pts;
int samples_count;
AVFrame *frame;
AVFrame *tmp_frame;
float t, tincr, tincr2;
struct SwsContext *sws_ctx;
struct SwrContext *swr_ctx;
} OutputStream;
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_ts2str(char *buf, int64_t ts)
{
std::memset(buf,0,AV_TS_MAX_STRING_SIZE);
return av_ts_make_string(buf,ts);
}
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_ts2timestr(char *buf, int64_t ts, AVRational *tb)
{
std::memset(buf,0,sizeof(AV_TS_MAX_STRING_SIZE));
return av_ts_make_time_string(buf,ts,tb);
}
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_err2str(char *errbuf, size_t errbuf_size, int errnum)
{
std::memset(errbuf,0,errbuf_size);
return av_make_error_string(errbuf,errbuf_size,errnum);
}
int thread_execute(AVCodecContext* s, int (*func)(AVCodecContext *c2, void *arg2), void* arg, int* ret, int count, int size)
{
// Do it all serially for now
std::cout << "thread_execute" << std::endl;
for (int k = 0; k < count; ++k)
{
ret[k] = func(s, arg);
}
return 0;
}
int thread_execute2(AVCodecContext* s, int (*func)(AVCodecContext* c2, void* arg2, int, int), void* arg, int* ret, int count)
{
// Do it all serially for now
std::cout << "thread_execute2" << std::endl;
for (int k = 0; k < count; ++k)
{
ret[k] = func(s, arg, k, count);
}
return 0;
}
static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{
char s[AV_TS_MAX_STRING_SIZE];
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
cb_av_ts2str(s,pkt->pts), cb_av_ts2timestr(s,pkt->pts, time_base),
cb_av_ts2str(s,pkt->dts), cb_av_ts2timestr(s,pkt->dts, time_base),
cb_av_ts2str(s,pkt->duration), cb_av_ts2timestr(s,pkt->duration, time_base),
pkt->stream_index);
}
static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, *time_base, st->time_base);
pkt->stream_index = st->index;
/* Write the compressed frame to the media file. */
log_packet(fmt_ctx, pkt);
return av_interleaved_write_frame(fmt_ctx, pkt);
}
/* Add an output stream. */
static void add_stream(OutputStream *ost, AVFormatContext *oc,
AVCodec **codec,
enum AVCodecID codec_id)
{
AVCodecContext *c;
int i;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
ost->st = avformat_new_stream(oc, NULL);
if (!ost->st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
ost->st->id = oc->nb_streams-1;
c = avcodec_alloc_context3(*codec);
if (!c) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
ost->enc = c;
switch ((*codec)->type)
{
case AVMEDIA_TYPE_AUDIO:
c->sample_fmt = (*codec)->sample_fmts ?
(*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
c->bit_rate = 64000;
c->sample_rate = 44100;
if ((*codec)->supported_samplerates) {
c->sample_rate = (*codec)->supported_samplerates[0];
for (i = 0; (*codec)->supported_samplerates[i]; i++) {
if ((*codec)->supported_samplerates[i] == 44100)
c->sample_rate = 44100;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
c->channel_layout = AV_CH_LAYOUT_STEREO;
if ((*codec)->channel_layouts) {
c->channel_layout = (*codec)->channel_layouts[0];
for (i = 0; (*codec)->channel_layouts[i]; i++) {
if ((*codec)->channel_layouts[i] == AV_CH_LAYOUT_STEREO)
c->channel_layout = AV_CH_LAYOUT_STEREO;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
ost->st->time_base = (AVRational){ 1, c->sample_rate };
break;
case AVMEDIA_TYPE_VIDEO:
c->codec_id = codec_id;
c->bit_rate = 400000;
/* Resolution must be a multiple of two. */
c->width = 352;
c->height = 288;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };
c->time_base = ost->st->time_base;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
/* just for testing, we also add B-frames */
c->max_b_frames = 2;
}
if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c->mb_decision = 2;
}
break;
default:
break;
}
if (c->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS ||
c->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS)
{
if (c->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS)
{
c->thread_type = FF_THREAD_FRAME;
}
if (c->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS)
{
c->thread_type = FF_THREAD_SLICE;
}
c->execute = &thread_execute;
c->execute2 = &thread_execute2;
c->thread_count = 4;
// NOTE: Testing opaque.
c->opaque = (void*)0xff;
}
/* Some formats want stream headers to be separate. */
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
/**************************************************************/
/* video output */
static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *picture;
int ret;
picture = av_frame_alloc();
if (!picture)
return NULL;
picture->format = pix_fmt;
picture->width = width;
picture->height = height;
/* allocate the buffers for the frame data */
ret = av_frame_get_buffer(picture, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate frame data.\n");
exit(1);
}
return picture;
}
static void open_video(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{
int ret;
AVCodecContext *c = ost->enc;
//AVDictionary *opt = NULL;
//av_dict_copy(&opt, opt_arg, 0);
/* open the codec */
ret = avcodec_open2(c, codec, NULL);
//av_dict_free(&opt);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Could not open video codec: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
/* allocate and init a re-usable frame */
ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
if (!ost->frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
/* If the output format is not YUV420P, then a temporary YUV420P
* picture is needed too. It is then converted to the required
* output format. */
ost->tmp_frame = NULL;
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
if (!ost->tmp_frame) {
fprintf(stderr, "Could not allocate temporary picture\n");
exit(1);
}
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(ost->st->codecpar, c);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVFrame *pict, int frame_index,
int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static AVFrame *get_video_frame(OutputStream *ost)
{
AVCodecContext *c = ost->enc;
/* check if we want to generate more frames */
if (av_compare_ts(ost->next_pts, c->time_base,
STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)
return NULL;
/* when we pass a frame to the encoder, it may keep a reference to it
* internally; make sure we do not overwrite it here */
if (av_frame_make_writable(ost->frame) < 0)
exit(1);
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
/* as we only generate a YUV420P picture, we must convert it
* to the codec pixel format if needed */
/*if (!ost->sws_ctx) {
ost->sws_ctx = sws_getContext(c->width, c->height,
AV_PIX_FMT_YUV420P,
c->width, c->height,
c->pix_fmt,
SCALE_FLAGS, NULL, NULL, NULL);
if (!ost->sws_ctx) {
fprintf(stderr,
"Could not initialize the conversion context\n");
exit(1);
}
}
fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
sws_scale(ost->sws_ctx,
(const uint8_t * const *)ost->tmp_frame->data, ost->tmp_frame->linesize,
0, c->height, ost->frame->data, ost->frame->linesize);*/
} else {
fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
}
ost->frame->pts = ost->next_pts++;
return ost->frame;
}
/*
* encode one video frame and send it to the muxer
* return 1 when encoding is finished, 0 otherwise
*/
static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
{
int ret;
AVCodecContext *c;
AVFrame *frame;
int got_packet = 0;
AVPacket pkt = { 0 };
c = ost->enc;
frame = get_video_frame(ost);
if (frame)
{
ret = avcodec_send_frame(ost->enc, frame);
if (ret < 0)
{
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error encoding video frame: %s\n", cb_av_err2str(s, AV_ERROR_MAX_STRING_SIZE, ret));
exit(1);
}
}
av_init_packet(&pkt);
ret = avcodec_receive_packet(ost->enc,&pkt);
if (ret < 0)
{
if (ret == AVERROR(EAGAIN)) { ret = 0; }
else
{
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error receiving packet: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
}
else
{
got_packet = 1;
ret = write_frame(oc, &c->time_base, ost->st, &pkt);
}
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error while writing video frame: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
return (frame || got_packet) ? 0 : 1;
}
static void close_stream(AVFormatContext *oc, OutputStream *ost)
{
avcodec_free_context(&ost->enc);
av_frame_free(&ost->frame);
av_frame_free(&ost->tmp_frame);
//sws_freeContext(ost->sws_ctx);
//swr_free(&ost->swr_ctx);
}
/**************************************************************/
/* media file output */
int main(int argc, char **argv)
{
OutputStream video_st = { 0 }, audio_st = { 0 };
const char *filename;
AVOutputFormat *fmt;
AVFormatContext *oc;
AVCodec /**audio_codec,*/ *video_codec;
int ret;
int have_video = 0, have_audio = 0;
int encode_video = 0, encode_audio = 0;
AVDictionary *opt = NULL;
int i;
/* Initialize libavcodec, and register all codecs and formats. */
av_register_all();
avformat_network_init();
if (argc < 2) {
printf("usage: %s output_file\n"
"API example program to output a media file with libavformat.\n"
"This program generates a synthetic audio and video stream, encodes and\n"
"muxes them into a file named output_file.\n"
"The output format is automatically guessed according to the file extension.\n"
"Raw images can also be output by using '%%d' in the filename.\n"
"\n", argv[0]);
return 1;
}
filename = argv[1];
for (i = 2; i+1 < argc; i+=2) {
if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags"))
av_dict_set(&opt, argv[i]+1, argv[i+1], 0);
}
const char *pfilename = filename;
/* allocate the output media context */
avformat_alloc_output_context2(&oc, NULL, "mpegts", pfilename);
if (!oc) {
printf("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2(&oc, NULL, "mpeg", pfilename);
}
if (!oc)
return 1;
fmt = oc->oformat;
/* Add the audio and video streams using the default format codecs
* and initialize the codecs. */
if (fmt->video_codec != AV_CODEC_ID_NONE) {
add_stream(&video_st, oc, &video_codec, fmt->video_codec);
have_video = 1;
encode_video = 1;
}
/*if (fmt->audio_codec != AV_CODEC_ID_NONE) {
add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);
have_audio = 1;
encode_audio = 1;
}*/
/* Now that all the parameters are set, we can open the audio and
* video codecs and allocate the necessary encode buffers. */
if (have_video)
open_video(oc, video_codec, &video_st, opt);
//if (have_audio)
// open_audio(oc, audio_codec, &audio_st, opt);
av_dump_format(oc, 0, pfilename, 1);
/* open the output file, if needed */
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oc->pb, pfilename, AVIO_FLAG_WRITE);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Could not open '%s': %s\n", pfilename,
cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
return 1;
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(oc, &opt);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error occurred when opening output file: %s\n",
cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
return 1;
}
while (encode_video || encode_audio) {
/* select the stream to encode */
if (encode_video &&
(!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,
audio_st.next_pts, audio_st.enc->time_base) <= 0)) {
encode_video = !write_video_frame(oc, &video_st);
} else {
//encode_audio = !write_audio_frame(oc, &audio_st);
}
//std::this_thread::sleep_for(std::chrono::milliseconds(35));
}
/* Write the trailer, if any. The trailer must be written before you
* close the CodecContexts open when you wrote the header; otherwise
* av_write_trailer() may try to use memory that was freed on
* av_codec_close(). */
av_write_trailer(oc);
/* Close each codec. */
if (have_video)
close_stream(oc, &video_st);
if (have_audio)
close_stream(oc, &audio_st);
if (!(fmt->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_closep(&oc->pb);
/* free the stream */
avformat_free_context(oc);
return 0;
}
//
Environment:
Ubuntu Zesty (17.04)
FFmpeg version 3.2.4 (via package manager)
gcc 6.3 (C++)
You have to do following:
call avcodec_alloc_context3(...). This call will set default execute and execute2 functions in new context
set c->thread_count = number_of_threads_in_your_thread_pool()
call avcodec_open2(...).
set c->execute and c->execute2 to point to your functions
call ff_thread_free(c). This function isnt exposed in libavcodec headers but you can add following line:
extern "C" void ff_thread_free(AVCodecContext *s);
Drawback is that libavcodec will create internal thread pool after avcodec_open2(...) call, and that pool will be deleted in ff_thread_free() call.
Internal thread pool is very efficient, but its not good if you plan to do parallel encoding of multiple video feeds. In that case libavcodec will create separate thread pool for each encoding video feed.
I want to decode H.264 video from a collection of MPEG-2 Transport Stream packets but I am not clear what to pass to avcodec_decode_video2
The documentation says to pass "the input AVPacket containing the input buffer."
But what should be in the input buffer?
A PES packet will be spread across the payload portion of several TS packets, with NALU(s) inside the PES. So pass a TS fragment? The entire PES? PES payload only?
This Sample Code mentions:
BUT some other codecs (msmpeg4, mpeg4) are inherently frame based, so
you must call them with all the data for one frame exactly. You must
also initialize 'width' and 'height' before initializing them.
But I can find no info on what "all the data" means...
Passing a fragment of a TS packet payload is not working:
AVPacket avDecPkt;
av_init_packet(&avDecPkt);
avDecPkt.data = inbuf_ptr;
avDecPkt.size = esBufSize;
len = avcodec_decode_video2(mpDecoderContext, mpFrameDec, &got_picture, &avDecPkt);
if (len < 0)
{
printf(" TS PKT #%.0f. Error decoding frame #%04d [rc=%d '%s']\n",
tsPacket.pktNum, mDecodedFrameNum, len, av_make_error_string(errMsg, 128, len));
return;
}
output
[h264 # 0x81cd2a0] no frame!
TS PKT #2973. Error decoding frame #0001 [rc=-1094995529 'Invalid data found when processing input']
EDIT
Using the excellent hits from WLGfx, I made this simple program to try decoding TS packets. As input, I prepared a file containing only TS packets from the Video PID.
It feels close but I don't know how to set up the FormatContext. The code below segfaults at av_read_frame() (and internally at ret = s->iformat->read_packet(s, pkt)). s->iformat is zero.
Suggestions?
EDIT II - Sorry, for got post source code **
**EDIT III - Sample code updated to simulate reading TS PKT Queue
/*
* Test program for video decoder
*/
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
extern "C" {
#ifdef __cplusplus
#define __STDC_CONSTANT_MACROS
#ifdef _STDINT_H
#undef _STDINT_H
#endif
#include <stdint.h>
#endif
}
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavutil/imgutils.h"
#include "libavutil/opt.h"
}
class VideoDecoder
{
public:
VideoDecoder();
bool rcvTsPacket(AVPacket &inTsPacket);
private:
AVCodec *mpDecoder;
AVCodecContext *mpDecoderContext;
AVFrame *mpDecodedFrame;
AVFormatContext *mpFmtContext;
};
VideoDecoder::VideoDecoder()
{
av_register_all();
// FORMAT CONTEXT SETUP
mpFmtContext = avformat_alloc_context();
mpFmtContext->flags = AVFMT_NOFILE;
// ????? WHAT ELSE ???? //
// DECODER SETUP
mpDecoder = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!mpDecoder)
{
printf("Could not load decoder\n");
exit(11);
}
mpDecoderContext = avcodec_alloc_context3(NULL);
if (avcodec_open2(mpDecoderContext, mpDecoder, NULL) < 0)
{
printf("Cannot open decoder context\n");
exit(1);
}
mpDecodedFrame = av_frame_alloc();
}
bool
VideoDecoder::rcvTsPacket(AVPacket &inTsPkt)
{
bool ret = true;
if ((av_read_frame(mpFmtContext, &inTsPkt)) < 0)
{
printf("Error in av_read_frame()\n");
ret = false;
}
else
{
// success. Decode the TS packet
int got;
int len = avcodec_decode_video2(mpDecoderContext, mpDecodedFrame, &got, &inTsPkt);
if (len < 0)
ret = false;
if (got)
printf("GOT A DECODED FRAME\n");
}
return ret;
}
int
main(int argc, char **argv)
{
if (argc != 2)
{
printf("Usage: %s tsInFile\n", argv[0]);
exit(1);
}
FILE *tsInFile = fopen(argv[1], "r");
if (!tsInFile)
{
perror("Could not open TS input file");
exit(2);
}
unsigned int tsPktNum = 0;
uint8_t tsBuffer[256];
AVPacket tsPkt;
av_init_packet(&tsPkt);
VideoDecoder vDecoder;
while (!feof(tsInFile))
{
tsPktNum++;
tsPkt.size = 188;
tsPkt.data = tsBuffer;
fread(tsPkt.data, 188, 1, tsInFile);
vDecoder.rcvTsPacket(tsPkt);
}
}
I've got some code snippets that might help you out as I've been working with MPEG-TS also.
Starting with my packet thread which checks each packet against the stream ID's which I've already found and got the codec contexts:
void *FFMPEG::thread_packet_function(void *arg) {
FFMPEG *ffmpeg = (FFMPEG*)arg;
for (int c = 0; c < MAX_PACKETS; c++)
ffmpeg->free_packets[c] = &ffmpeg->packet_list[c];
ffmpeg->packet_pos = MAX_PACKETS;
Audio.start_decoding();
Video.start_decoding();
Subtitle.start_decoding();
while (!ffmpeg->thread_quit) {
if (ffmpeg->packet_pos != 0 &&
Audio.okay_add_packet() &&
Video.okay_add_packet() &&
Subtitle.okay_add_packet()) {
pthread_mutex_lock(&ffmpeg->packet_mutex); // get free packet
AVPacket *pkt = ffmpeg->free_packets[--ffmpeg->packet_pos]; // pre decrement
pthread_mutex_unlock(&ffmpeg->packet_mutex);
if ((av_read_frame(ffmpeg->fContext, pkt)) >= 0) { // success
int id = pkt->stream_index;
if (id == ffmpeg->aud_stream.stream_id) Audio.add_packet(pkt);
else if (id == ffmpeg->vid_stream.stream_id) Video.add_packet(pkt);
else if (id == ffmpeg->sub_stream.stream_id) Subtitle.add_packet(pkt);
else { // unknown packet
av_packet_unref(pkt);
pthread_mutex_lock(&ffmpeg->packet_mutex); // put packet back
ffmpeg->free_packets[ffmpeg->packet_pos++] = pkt;
pthread_mutex_unlock(&ffmpeg->packet_mutex);
//LOGI("Dumping unknown packet, id %d", id);
}
} else {
av_packet_unref(pkt);
pthread_mutex_lock(&ffmpeg->packet_mutex); // put packet back
ffmpeg->free_packets[ffmpeg->packet_pos++] = pkt;
pthread_mutex_unlock(&ffmpeg->packet_mutex);
//LOGI("No packet read");
}
} else { // buffers full so yield
//LOGI("Packet reader on hold: Audio-%d, Video-%d, Subtitle-%d",
// Audio.packet_pos, Video.packet_pos, Subtitle.packet_pos);
usleep(1000);
//sched_yield();
}
}
return 0;
}
Each decoder for audio, video and subtitles have their own threads which receive the packets from the above thread in ring buffers. I've had to separate the decoders into their own threads because CPU usage was increasing when I started using the deinterlace filter.
My video decoder reads the packets from the buffers and when it has finished with the packet sends it back to be unref'd and can be used again. Balancing the packet buffers doesn't take that much time once everything is running.
Here's the snipped from my video decoder:
void *VideoManager::decoder(void *arg) {
LOGI("Video decoder started");
VideoManager *mgr = (VideoManager *)arg;
while (!ffmpeg.thread_quit) {
pthread_mutex_lock(&mgr->packet_mutex);
if (mgr->packet_pos != 0) {
// fetch first packet to decode
AVPacket *pkt = mgr->packets[0];
// shift list down one
for (int c = 1; c < mgr->packet_pos; c++) {
mgr->packets[c-1] = mgr->packets[c];
}
mgr->packet_pos--;
pthread_mutex_unlock(&mgr->packet_mutex); // finished with packets array
int got;
AVFrame *frame = ffmpeg.vid_stream.frame;
avcodec_decode_video2(ffmpeg.vid_stream.context, frame, &got, pkt);
ffmpeg.finished_with_packet(pkt);
if (got) {
#ifdef INTERLACE_ALL
if (!frame->interlaced_frame) mgr->add_av_frame(frame, 0);
else {
if (!mgr->filter_initialised) mgr->init_filter_graph(frame);
av_buffersrc_add_frame_flags(mgr->filter_src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
int c = 0;
while (true) {
AVFrame *filter_frame = ffmpeg.vid_stream.filter_frame;
int result = av_buffersink_get_frame(mgr->filter_sink_ctx, filter_frame);
if (result == AVERROR(EAGAIN) ||
result == AVERROR(AVERROR_EOF) ||
result < 0)
break;
mgr->add_av_frame(filter_frame, c++);
av_frame_unref(filter_frame);
}
//LOGI("Interlaced %d frames, decode %d, playback %d", c, mgr->decode_pos, mgr->playback_pos);
}
#elif defined(INTERLACE_HALF)
if (!frame->interlaced_frame) mgr->add_av_frame(frame, 0);
else {
if (!mgr->filter_initialised) mgr->init_filter_graph(frame);
av_buffersrc_add_frame_flags(mgr->filter_src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
int c = 0;
while (true) {
AVFrame *filter_frame = ffmpeg.vid_stream.filter_frame;
int result = av_buffersink_get_frame(mgr->filter_sink_ctx, filter_frame);
if (result == AVERROR(EAGAIN) ||
result == AVERROR(AVERROR_EOF) ||
result < 0)
break;
mgr->add_av_frame(filter_frame, c++);
av_frame_unref(filter_frame);
}
//LOGI("Interlaced %d frames, decode %d, playback %d", c, mgr->decode_pos, mgr->playback_pos);
}
#else
mgr->add_av_frame(frame, 0);
#endif
}
//LOGI("decoded video packet");
} else {
pthread_mutex_unlock(&mgr->packet_mutex);
}
}
LOGI("Video decoder ended");
}
As you can see, I'm using a mutex when passing packets back and forth.
Once a frame has been got I just copy the YUV buffers from the frame for later use into another buffer list. I don't convert the YUV, I use a shader which converts the YUV to RGB on the GPU.
The next snippet adds my decoded frame to my buffer list. This may help understand how to deal with the data.
void VideoManager::add_av_frame(AVFrame *frame, int field_num) {
int y_linesize = frame->linesize[0];
int u_linesize = frame->linesize[1];
int hgt = frame->height;
int y_buffsize = y_linesize * hgt;
int u_buffsize = u_linesize * hgt / 2;
int buffsize = y_buffsize + u_buffsize + u_buffsize;
VideoBuffer *buffer = &buffers[decode_pos];
if (ffmpeg.is_network && playback_pos == decode_pos) { // patched 25/10/16 wlgfx
buffer->used = false;
if (!buffer->data) buffer->data = (char*)mem.alloc(buffsize);
if (!buffer->data) {
LOGI("Dropped frame, allocation error");
return;
}
} else if (playback_pos == decode_pos) {
LOGI("Dropped frame, ran out of decoder frame buffers");
return;
} else if (!buffer->data) {
buffer->data = (char*)mem.alloc(buffsize);
if (!buffer->data) {
LOGI("Dropped frame, allocation error.");
return;
}
}
buffer->y_frame = buffer->data;
buffer->u_frame = buffer->y_frame + y_buffsize;
buffer->v_frame = buffer->y_frame + y_buffsize + u_buffsize;
buffer->wid = frame->width;
buffer->hgt = hgt;
buffer->y_linesize = y_linesize;
buffer->u_linesize = u_linesize;
int64_t pts = av_frame_get_best_effort_timestamp(frame);
buffer->pts = pts;
buffer->buffer_size = buffsize;
double field_add = av_q2d(ffmpeg.vid_stream.context->time_base) * field_num;
buffer->frame_time = av_q2d(ts_stream) * pts + field_add;
memcpy(buffer->y_frame, frame->data[0], (size_t) (buffer->y_linesize * buffer->hgt));
memcpy(buffer->u_frame, frame->data[1], (size_t) (buffer->u_linesize * buffer->hgt / 2));
memcpy(buffer->v_frame, frame->data[2], (size_t) (buffer->u_linesize * buffer->hgt / 2));
buffer->used = true;
decode_pos = (++decode_pos) % MAX_VID_BUFFERS;
//if (field_num == 0) LOGI("Video %.2f, %d - %d",
// buffer->frame_time - Audio.pts_start_time, decode_pos, playback_pos);
}
If there's anything else that I may be able to help with just give me a shout. :-)
EDIT:
The snippet how I open my video stream context which automatically determines the codec, whether it is h264, mpeg2, or another:
void FFMPEG::open_video_stream() {
vid_stream.stream_id = av_find_best_stream(fContext, AVMEDIA_TYPE_VIDEO,
-1, -1, &vid_stream.codec, 0);
if (vid_stream.stream_id == -1) return;
vid_stream.context = fContext->streams[vid_stream.stream_id]->codec;
if (!vid_stream.codec || avcodec_open2(vid_stream.context,
vid_stream.codec, NULL) < 0) {
vid_stream.stream_id = -1;
return;
}
vid_stream.frame = av_frame_alloc();
vid_stream.filter_frame = av_frame_alloc();
}
EDIT2:
This is how I've opened the input stream, whether it be file or URL. The AVFormatContext is the main context for the stream.
bool FFMPEG::start_stream(char *url_, float xtrim, float ytrim, int gain) {
aud_stream.stream_id = -1;
vid_stream.stream_id = -1;
sub_stream.stream_id = -1;
this->url = url_;
this->xtrim = xtrim;
this->ytrim = ytrim;
Audio.volume = gain;
Audio.init();
Video.init();
fContext = avformat_alloc_context();
if ((avformat_open_input(&fContext, url_, NULL, NULL)) != 0) {
stop_stream();
return false;
}
if ((avformat_find_stream_info(fContext, NULL)) < 0) {
stop_stream();
return false;
}
// network stream will overwrite packets if buffer is full
is_network = url.substr(0, 4) == "udp:" ||
url.substr(0, 4) == "rtp:" ||
url.substr(0, 5) == "rtsp:" ||
url.substr(0, 5) == "http:"; // added for wifi broadcasting ability
// determine if stream is audio only
is_mp3 = url.substr(url.size() - 4) == ".mp3";
LOGI("Stream: %s", url_);
if (!open_audio_stream()) {
stop_stream();
return false;
}
if (is_mp3) {
vid_stream.stream_id = -1;
sub_stream.stream_id = -1;
} else {
open_video_stream();
open_subtitle_stream();
if (vid_stream.stream_id == -1) { // switch to audio only
close_subtitle_stream();
is_mp3 = true;
}
}
LOGI("Audio: %d, Video: %d, Subtitle: %d",
aud_stream.stream_id,
vid_stream.stream_id,
sub_stream.stream_id);
if (aud_stream.stream_id != -1) {
LOGD("Audio stream time_base {%d, %d}",
aud_stream.context->time_base.num,
aud_stream.context->time_base.den);
}
if (vid_stream.stream_id != -1) {
LOGD("Video stream time_base {%d, %d}",
vid_stream.context->time_base.num,
vid_stream.context->time_base.den);
}
LOGI("Starting packet and decode threads");
thread_quit = false;
pthread_create(&thread_packet, NULL, &FFMPEG::thread_packet_function, this);
Display.set_overlay_timout(3.0);
return true;
}
EDIT: (constructing an AVPacket)
Construct an AVPacket to send to the decoder...
AVPacket packet;
av_init_packet(&packet);
packet.data = myTSpacketdata; // pointer to the TS packet
packet.size = 188;
You should be able to reuse the packet. And it might need unref'ing.
You must first use the avcodec library to get the compressed frames out of the file. Then you can decode them using avcodec_decode_video2. look at this tutorial http://dranger.com/ffmpeg/
this is my first question so i hope i did it correctly. If not, please let me know to fix it.
I'm trying to convert a short (10 secs) mp4 video file into a gif using ffmpeg libraries (I'm pretty new using ffmpeg). The program works pretty well converting to gif, but some times it randomly crash.
This is the version of the ffmpeg libraries I'm using:
libavutil 54. 27.100
libavcodec 56. 41.100
libavformat 56. 36.100
libavdevice 56. 4.100
libavfilter 5. 16.101
libavresample 2. 1. 0
libswscale 3. 1.101
libswresample 1. 2.100
libpostproc 53. 3.100
I'm using a 1920x1080p video, so in order to generate the gif I'm doing a pixel format convertion, from AV_PIX_FMT_YUV420P to AV_PIX_FMT_RGB8 with a resizing from the initial resolution to 432x240p.
Here is the code:
int VideoManager::loadVideo(QString filename, bool showInfo)
{
if(avformat_open_input(&iFmtCtx, filename.toStdString().c_str(), 0, 0) < 0)
{
qDebug() << "Could not open input file " << filename;
closeInput();
return -1;
}
if (avformat_find_stream_info(iFmtCtx, 0) < 0)
{
qDebug() << "Failed to retrieve input stream information";
closeInput();
return -2;
}
videoStreamIndex = -1;
for(unsigned int i = 0; i < iFmtCtx->nb_streams; ++i)
if(iFmtCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoStreamIndex = i;
break;
}
if(videoStreamIndex == -1)
{
qDebug() << "Didn't find any video stream!";
closeInput();
return -3;
}
iCodecCtx = iFmtCtx->streams[videoStreamIndex]->codec;
iCodec = avcodec_find_decoder(iCodecCtx->codec_id);
if(iCodec == NULL) // Codec not found
{
qDebug() << "Codec not found!";
closeInput();
return -4;
}
if(avcodec_open2(iCodecCtx, iCodec, NULL) < 0)
{
qDebug() << "Could not open codec!";
closeInput();
return -1;
}
if(showInfo)
av_dump_format(iFmtCtx, 0, filename.toStdString().c_str(), 0);
return 0;
}
void VideoManager::generateGif(QString filename)
{
int ret, frameCount = 0;
AVPacket packet;
packet.data = NULL;
packet.size = 0;
AVFrame *frame = NULL;
unsigned int stream_index;
int got_frame;
gifHeight = iFmtCtx->streams[videoStreamIndex]->codec->height;
gifWidth = iFmtCtx->streams[videoStreamIndex]->codec->width;
if(gifHeight > MAX_GIF_HEIGHT || gifWidth > MAX_GIF_WIDTH)
{
if(gifHeight > gifWidth)
{
gifWidth = (float)gifWidth * ( (float)MAX_GIF_HEIGHT / (float)gifHeight );
gifHeight = MAX_GIF_HEIGHT;
}
else
{
gifHeight = (float)gifHeight * ( (float)MAX_GIF_WIDTH / (float)gifWidth );
gifWidth = MAX_GIF_WIDTH;
}
}
if(openOutputFile(filename.toStdString().c_str()) < 0)
{
qDebug() << "Error openning output file: " << filename;
return;
}
while (1) {
int ret = av_read_frame(iFmtCtx, &packet);
if (ret < 0)
{
if(ret != AVERROR_EOF)
qDebug() << "Error reading frame: " << ret;
break;
}
stream_index = packet.stream_index;
if(stream_index == videoStreamIndex)
{
frame = av_frame_alloc();
if (!frame) {
qDebug() << "Error allocating frame";
break;
}
av_packet_rescale_ts(&packet,
iFmtCtx->streams[stream_index]->time_base,
iFmtCtx->streams[stream_index]->codec->time_base);
ret = avcodec_decode_video2(iFmtCtx->streams[stream_index]->codec, frame,
&got_frame, &packet);
if (ret < 0) {
qDebug() << "Decoding failed";
break;
}
if(got_frame)
{
qDebug() << ++frameCount;
nframes++;
frame->pts = av_frame_get_best_effort_timestamp(frame);
////////////////////////////////////////////////////////////////////////////////
/// Pixel format convertion and resize
////////////////////////////////////////////////////////////////////////////////
uint8_t *out_buffer = NULL;
SwsContext *img_convert_ctx = NULL;
AVFrame *pFrameRGB = av_frame_alloc();
if(pFrameRGB == NULL)
{
qDebug() << "Error allocating frameRGB";
break;
}
AVPixelFormat pixFmt;
switch (iFmtCtx->streams[stream_index]->codec->pix_fmt)
{
case AV_PIX_FMT_YUVJ420P : pixFmt = AV_PIX_FMT_YUV420P; break;
case AV_PIX_FMT_YUVJ422P : pixFmt = AV_PIX_FMT_YUV422P; break;
case AV_PIX_FMT_YUVJ444P : pixFmt = AV_PIX_FMT_YUV444P; break;
case AV_PIX_FMT_YUVJ440P : pixFmt = AV_PIX_FMT_YUV440P; break;
default:
pixFmt = iFmtCtx->streams[stream_index]->codec->pix_fmt;
}
out_buffer = (uint8_t*)av_malloc( avpicture_get_size( AV_PIX_FMT_RGB8,
gifWidth,
gifHeight ));
if(!out_buffer)
{
qDebug() << "Error alocatting out_buffer!";
}
avpicture_fill((AVPicture *)pFrameRGB, out_buffer, AV_PIX_FMT_RGB8,
gifWidth,
gifHeight);
img_convert_ctx = sws_getContext( iFmtCtx->streams[stream_index]->codec->width,
iFmtCtx->streams[stream_index]->codec->height,
pixFmt,
gifWidth,
gifHeight,
AV_PIX_FMT_RGB8,
SWS_ERROR_DIFFUSION, NULL, NULL, NULL );
if(!img_convert_ctx)
{
qDebug() << "error getting sws context";
}
sws_scale( img_convert_ctx, (const uint8_t* const*)frame->data,
frame->linesize, 0,
iFmtCtx->streams[stream_index]->codec->height,
pFrameRGB->data,
pFrameRGB->linesize );
pFrameRGB->format = AV_PIX_FMT_RGB8;
pFrameRGB->pts = frame->pts;
pFrameRGB->best_effort_timestamp = frame->best_effort_timestamp;
pFrameRGB->width = gifWidth;
pFrameRGB->height = gifHeight;
pFrameRGB->pkt_dts = frame->pkt_dts;
pFrameRGB->pkt_pts = frame->pkt_pts;
pFrameRGB->pkt_duration = frame->pkt_duration;
pFrameRGB->pkt_pos = frame->pkt_pos;
pFrameRGB->pkt_size = frame->pkt_size;
pFrameRGB->interlaced_frame = frame->interlaced_frame;
////////////////////////////////////////////////////////////////////////////////
ret = encodeAndWriteFrame(pFrameRGB, stream_index, NULL);
//av_frame_free(&frame);
//av_free(out_buffer);
//sws_freeContext(img_convert_ctx);
if (ret < 0)
{
qDebug() << "Error encoding and writting frame";
//av_free_packet(&packet);
closeOutput();
}
}
else {
//av_frame_free(&frame);
}
}
av_free_packet(&packet);
}
ret = flushEncoder(videoStreamIndex);
if (ret < 0)
{
qDebug() << "Flushing encoder failed";
}
av_write_trailer(oFmtCtx);
//av_free_packet(&packet);
//av_frame_free(&frame);
closeOutput();
}
void VideoManager::closeOutput()
{
if (oFmtCtx && oFmtCtx->nb_streams > 0 && oFmtCtx->streams[0] && oFmtCtx->streams[0]->codec)
avcodec_close(oFmtCtx->streams[0]->codec);
if (oFmtCtx && oFmt && !(oFmt->flags & AVFMT_NOFILE))
avio_closep(&oFmtCtx->pb);
avformat_free_context(oFmtCtx);
}
int VideoManager::openOutputFile(const char *filename)
{
AVStream *out_stream;
AVStream *in_stream;
AVCodecContext *dec_ctx, *enc_ctx;
AVCodec *encoder;
int ret;
oFmtCtx = NULL;
avformat_alloc_output_context2(&oFmtCtx, NULL, NULL, filename);
if (!oFmtCtx) {
qDebug() << "Could not create output context";
return AVERROR_UNKNOWN;
}
oFmt = oFmtCtx->oformat;
out_stream = avformat_new_stream(oFmtCtx, NULL);
if (!out_stream) {
qDebug() << "Failed allocating output stream";
return AVERROR_UNKNOWN;
}
in_stream = iFmtCtx->streams[videoStreamIndex];
dec_ctx = in_stream->codec;
enc_ctx = out_stream->codec;
encoder = avcodec_find_encoder(AV_CODEC_ID_GIF);
if (!encoder) {
qDebug() << "FATAL!: Necessary encoder not found";
return AVERROR_INVALIDDATA;
}
enc_ctx->height = gifHeight;
enc_ctx->width = gifWidth;
enc_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
enc_ctx->pix_fmt = AV_PIX_FMT_RGB8;
enc_ctx->time_base = dec_ctx->time_base;
ret = avcodec_open2(enc_ctx, encoder, NULL);
if (ret < 0) {
qDebug() << "Cannot open video encoder for gif";
return ret;
}
if (oFmt->flags & AVFMT_GLOBALHEADER)
enc_ctx->flags |= CODEC_FLAG_GLOBAL_HEADER;
if (!(oFmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oFmtCtx->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
qDebug() << "Could not open output file " << filename;
return ret;
}
}
ret = avformat_write_header(oFmtCtx, NULL);
if (ret < 0) {
qDebug() << "Error occurred when opening output file";
return ret;
}
return 0;
}
int VideoManager::encodeAndWriteFrame(AVFrame *frame, unsigned int stream_index, int *got_frame) {
int ret;
int got_frame_local;
AVPacket enc_pkt;
if (!got_frame)
got_frame = &got_frame_local;
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
ret = avcodec_encode_video2(oFmtCtx->streams[stream_index]->codec, &enc_pkt,
frame, got_frame);
//av_frame_free(&frame);
if (ret < 0)
return ret;
if (!(*got_frame))
return 0;
enc_pkt.stream_index = stream_index;
av_packet_rescale_ts(&enc_pkt,
oFmtCtx->streams[stream_index]->codec->time_base,
oFmtCtx->streams[stream_index]->time_base);
ret = av_interleaved_write_frame(oFmtCtx, &enc_pkt);
return ret;
}
int VideoManager::flushEncoder(unsigned int stream_index)
{
int ret;
int got_frame;
if (!(oFmtCtx->streams[stream_index]->codec->codec->capabilities &
CODEC_CAP_DELAY))
return 0;
while (1) {
ret = encodeAndWriteFrame(NULL, stream_index, &got_frame);
if (ret < 0)
break;
if (!got_frame)
return 0;
}
return ret;
}
I know there are a lot of memory leaks. I deleted/commented most of the free functions intentionality because i thought that was the problem.
I'm using Qtcreator, so when i debug the programs this is the output:
Level Function Line
0 av_image_copy 303
1 frame_copy_video 650
2 av_frame_copy 687
3 av_frame_ref 384
4 gif_encode_frame 307
5 avcodec_encode_video2 2191
6 VideoManager::encodeAndWriteFrame 813
7 VideoManager::generateGif 375
8 qMain 31
9 WinMain*16 112
10 main
I've checked if there is a specific frame the program crash at, but it's a random frame too.
Any idea of what i'm doing wrong? Any help would be very appreciated.
EDIT:
After a few days of pain, suffering and frustation I decided to write the whole code from scratch. Both times i started from this example and modified it in order to works as I described before. And it works perfectly now :D! The only error i could find in the old code (posted before) is when i try to access to the video stream in the output file I used videoStreamIndex, but that index is from the video stream in the input file. Some times it could be the same index and some times not. But it doesn't explain why it crashed randomly. If that was the reason of the crash, it should crash every time i ran the code with the same video. So probably, there are more errors in that code.
Notice that i've not tested if fixing that error in the code above actually solve the crashing problems.
I think you may have your parameters mixed up. According to what I'm reading from the documentation avcodec_decode_video2's prototype looks like:
int avcodec_decode_video2 (AVCodecContext * avctx,
AVFrame * picture,
int * got_picture_ptr,
const AVPacket * avpkt)
And is called with:
ret = avcodec_encode_video2(oFmtCtx->streams[stream_index]->codec, // Dunno.
&enc_pkt, //AVPacket * should be AVFrame *
frame, //AVFrame * Should be int *
got_frame); // int * should be AVPacket *
When I decode frames from avi file and then decode them in x264 and save to mp4 file, the fps of the output file is always 12,800. Therefore the file is played very fast. But, when I save the encoded in h264 frames in avi format and not mp4, so the fps is as I wanted - 25.
What could be the problem?
Here the code I wrote in VS2010:
#include "stdafx.h"
#include "inttypes.h"
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavutil/avutil.h"
#include <libswscale/swscale.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
#include <libavutil/imgutils.h>
}
#include <iostream>
using namespace std;
int main(int argc, char* argv[])
{
const char* inFileName = "C:\\000227_C1_GAME.avi";
const char* outFileName = "c:\\test.avi";
const char* outFileType = "avi";
av_register_all();
AVFormatContext* inContainer = NULL;
if(avformat_open_input(&inContainer, inFileName, NULL, NULL) < 0)
exit(1);
if(avformat_find_stream_info(inContainer, NULL) < 0)
exit(1);
// Find video stream
int videoStreamIndex = -1;
for (unsigned int i = 0; i < inContainer->nb_streams; ++i)
{
if (inContainer->streams[i] && inContainer->streams[i]->codec &&
inContainer->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoStreamIndex = i;
break;
}
}
if (videoStreamIndex == -1) exit(1);
AVFormatContext* outContainer = NULL;
if(avformat_alloc_output_context2(&outContainer, NULL, outFileType, outFileName) < 0)
exit(1);
// ----------------------------
// Decoder
// ----------------------------
AVStream const *const inStream = inContainer->streams[videoStreamIndex];
AVCodec *const decoder = avcodec_find_decoder(inStream->codec->codec_id);
if(!decoder)
exit(1);
if(avcodec_open2(inStream->codec, decoder, NULL) < 0)
exit(1);
// ----------------------------
// Encoder
// ----------------------------
AVCodec *encoder = avcodec_find_encoder(AV_CODEC_ID_H264);
if(!encoder)
exit(1);
AVStream *outStream = avformat_new_stream(outContainer, encoder);
if(!outStream)
exit(1);
avcodec_get_context_defaults3(outStream->codec, encoder);
// Construct encoder
if(outContainer->oformat->flags & AVFMT_GLOBALHEADER)
outStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
outStream->codec->coder_type = AVMEDIA_TYPE_VIDEO;
outStream->codec->pix_fmt = AV_PIX_FMT_YUV420P;
outStream->codec->width = inStream->codec->width;
outStream->codec->height = inStream->codec->height;
outStream->codec->codec_id = encoder->id;
outStream->codec->bit_rate = 500000;
//outStream->codec->rc_min_rate = 600000;
//outStream->codec->rc_max_rate = 800000;
outStream->codec->time_base.den = 25;
outStream->codec->time_base.num = 1;
outStream->codec->gop_size = 250; // Keyframe interval(=GOP length). Determines maximum distance distance between I-frames
outStream->codec->keyint_min = 25; // minimum GOP size
outStream->codec->max_b_frames = 3;//16; // maximum number of B-frames between non-B-frames
outStream->codec->b_frame_strategy = 1; // decides the best number of B-frames to use. Default mode in x264.
outStream->codec->scenechange_threshold = 40;
outStream->codec->refs = 6; // abillity to reference frames other than the one immediately prior to the current frame. specify how many references can be used.
outStream->codec->qmin = 0;//10;
outStream->codec->qmax = 69;//51;
outStream->codec->qcompress = 0.6;
outStream->codec->max_qdiff = 4;
outStream->codec->i_quant_factor = 1.4;//0.71;
outStream->codec->refs=1;//3;
outStream->codec->chromaoffset = -2;
outStream->codec->thread_count = 1;
outStream->codec->trellis = 1;
outStream->codec->me_range = 16;
outStream->codec->me_method = ME_HEX; //hex
outStream->codec->flags2 |= CODEC_FLAG2_FAST;
outStream->codec->coder_type = 1;
if(outStream->codec->codec_id == AV_CODEC_ID_H264)
{
av_opt_set(outStream->codec->priv_data, "preset", "slow", 0);
}
// Open encoder
if(avcodec_open2(outStream->codec, encoder, NULL) < 0)
exit(1);
// Open output container
if(avio_open(&outContainer->pb, outFileName, AVIO_FLAG_WRITE) < 0)
exit(1);
//close_o
AVFrame *decodedFrame = avcodec_alloc_frame();
if(!decodedFrame)
exit(1);
AVFrame *encodeFrame = avcodec_alloc_frame();
if(!encodeFrame)
exit(1);
encodeFrame->format = outStream->codec->pix_fmt;
encodeFrame->width = outStream->codec->width;
encodeFrame->height = outStream->codec->height;
if(av_image_alloc(encodeFrame->data, encodeFrame->linesize,
outStream->codec->width, outStream->codec->height,
outStream->codec->pix_fmt, 1) < 0)
exit(1);
av_dump_format(inContainer, 0, inFileName,0);
//Write header to ouput container
avformat_write_header(outContainer, NULL);
AVPacket decodePacket, encodedPacket;
int got_frame, len;
while(av_read_frame(inContainer, &decodePacket)>=0)
{
if (decodePacket.stream_index == videoStreamIndex)
{
len = avcodec_decode_video2(inStream->codec, decodedFrame, &got_frame, &decodePacket);
if(len < 0)
exit(1);
if(got_frame)
{
av_init_packet(&encodedPacket);
encodedPacket.data = NULL;
encodedPacket.size = 0;
if(avcodec_encode_video2(outStream->codec, &encodedPacket, decodedFrame, &got_frame) < 0)
exit(1);
if(got_frame)
{
if (outStream->codec->coded_frame->key_frame)
encodedPacket.flags |= AV_PKT_FLAG_KEY;
encodedPacket.stream_index = outStream->index;
if(av_interleaved_write_frame(outContainer, &encodedPacket) < 0)
exit(1);
av_free_packet(&encodedPacket);
}
}
}
av_free_packet(&decodePacket);
}
av_write_trailer(outContainer);
avio_close(outContainer->pb);
avcodec_free_frame(&encodeFrame);
avcodec_free_frame(&decodedFrame);
avformat_free_context(outContainer);
av_close_input_file(inContainer);
return 0;
}
The problem was with PTS and DTS of the packet. Before writing the packet to output( before av_interleaved_write_frame command) set PTS and DTS like this
if (encodedPacket.pts != AV_NOPTS_VALUE)
encodedPacket.pts = av_rescale_q(encodedPacket.pts, outStream->codec->time_base, outStream->time_base);
if (encodedPacket.dts != AV_NOPTS_VALUE)
encodedPacket.dts = av_rescale_q(encodedPacket.dts, outStream->codec->time_base, outStream->time_base);