I writting a software for my classes that receive a h264 stream from a drone and i need to convert the video stream to opencv Mat.
I have no trouble to receive the frame and if i save it to a .264 file i can read the output with VLC.
The drone is sending IDR-Frame and P-Frame , since i don't need to see the video stream juste some frame of it, i was thinking of only using the IDR-Frame to get the image but i have trouble to understand how to use libavcodec from FFMPEG, how can i create a AVFrame from my IDR-Frame and how to convert it to cv::Mat after.
i have try the following code, i have store the full frame in .raw file and i try to decode them , but i get read error when i try to parse the packet to a frame, i don't thing a initialize the buffer of ARPacket the right way :
AVFormatContext* fc = 0;
int vi = -1; // vi veut dire video index
inline cv::Mat avframe_to_mat(const AVFrame* avframe)
{
AVFrame dst;
cv::Mat m;
memset(&dst, 0, sizeof(dst));
int w = avframe->width, h = avframe->height;
m = cv::Mat(h, w, CV_8UC3);
dst.data[0] = (uint8_t*)m.data;
avpicture_fill((AVPicture*)&dst, dst.data[0], AV_PIX_FMT_BGR24, w, h);
struct SwsContext *convert_ctx = NULL;
enum AVPixelFormat src_pixfmt = AV_PIX_FMT_BGR24;
enum AVPixelFormat dst_pixfmt = AV_PIX_FMT_BGR24;
convert_ctx = sws_getContext(w, h, src_pixfmt, w, h, dst_pixfmt, SWS_FAST_BILINEAR, NULL, NULL, NULL);
sws_scale(convert_ctx, avframe->data, avframe->linesize, 0, h, dst.data, dst.linesize);
sws_freeContext(convert_ctx);
return m;
}
inline bool init_stream(unsigned char* data, int len)
{
const char* file = "test.avi";
const AVCodecID codec_id = AV_CODEC_ID_H264;
AVCodec* codec = avcodec_find_encoder(codec_id);
// Crée le container pour le stream
fc = avformat_alloc_context();
/*
AVOutputFormat *of = av_guess_format(0, file, 0);
fc = avformat_alloc_context();
fc->oformat = of;
strcpy(fc->filename, file);
*/
int br = 1000000;
int w = 640;
int h = 360;
int fps = 24;
// ajoute un stream video
AVStream* pst = avformat_new_stream(fc, codec); // Pourquoi je passe pas le codec ici ?
vi = pst->index;
codec_context = avcodec_alloc_context3(codec);
codec_context->bit_rate = br;
codec_context->width = w;
codec_context->height = h;
codec_context->time_base = {1,fps};
codec_context->gop_size = 10; // Emit one intra frame every ten frames
codec_context->max_b_frames = 1;
codec_context->pix_fmt = AV_PIX_FMT_YUV420P;
// Vu quon n'est en h264
av_opt_set(codec_context->priv_data, "preset", "slow", 0);
// Ouvre notre codec
if(avcodec_open2(codec_context, codec,nullptr) < 0)
{
cerr << "Impossible d'ouvrir le codec" << endl;
return false;
}
if (!(fc->oformat->flags & AVFMT_NOFILE))
avio_open(&fc->pb, fc->filename,0);
// avformat_write_header(fc,nullptr);
return true;
}
inline void append_stream(uint8_t* data, int len)
{
if( 0 > vi)
{
cerr << "video index is less than 0" << endl;
return;
}
AVStream* pst = fc->streams[vi];
AVPacket pkt;
// Init un nouveau packet
av_init_packet(&pkt);
pkt.flags |= AV_PKT_FLAG_KEY;
pkt.data = data;
pkt.stream_index = pst->index;
pkt.size = len;
pkt.dts = AV_NOPTS_VALUE;
pkt.pts = AV_NOPTS_VALUE;
// ERROR accessing location
av_interleaved_write_frame(fc, &pkt);
}
inline void execute_staging_test(const fs::path& folder, int nbr_trame)
{
fs::path file_name = folder / "stream.bin";
if(!fs::exists(file_name))
{
cerr << "The file " << file_name.string() << " does not exists" << endl;
return;
}
avcodec_register_all();
av_log_set_level(AV_LOG_DEBUG);
int length = 0;
char* buffer;
for(int i = 0; i < nbr_trame;i++)
{
fs::path file = std::to_string(i) + ".raw";
file = folder / file;
cout << "Got frame on " << file.string() << endl;
ifstream ifs(file, ofstream::binary);
// Get la longeur du fichier pour savoir le buffer a prendre
ifs.seekg(0, ios::end);
length = ifs.tellg();
ifs.seekg(0, ios::beg);
if (length == 0) {
std::cerr << "No data in file " << file << std::endl;
return;
}
buffer = new char[length];
std::cout << "File " << file << " length is " << length << std::endl;
ifs.read(buffer, length);
cv::VideoWriter vw;
int codec = cv::VideoWriter::fourcc('X', '2', '6', '4');
if(!fc)
{
if(!init_stream((unsigned char*)buffer, length))
{
return;
}
}
if(fc)
{
append_stream((unsigned char*)buffer, length);
}
}
}
Thanks you very mutch if you can help me , i'm a novice in c++ and i have never deal with video stream. If you want to see the full code its host on github repo to this project
One of the things I'm trying to achieve is parallel encoding via FFmpeg's c API. This looks to work out of the box quite nicely; however, I've changed the goal posts slightly:
In an existing application, I already have a thread pool at hand. Instead of using another thread pool via FFmpeg, I would like reuse the existing thread pool in my application. Having studied the latest FFmpeg trunk docs, it very much looks possible.
Using some FFmpeg sample code, I've created a sample application to demonstrate what I'm trying to achieve (see below). The sample app generates a video-only mpeg2 ts using the mp2v codec.
The problem I'm experiencing is that the custom 'thread_execute' or 'thread_execute2' are never invoked. This is despite the fact that the codec appears to indicate that threading is supported. Please be aware that I have not yet plumbed in the thread pool just yet. My first goal is for it to call the custom function pointer.
I've tried to get assistance on the FFmpeg mailing lists but to no avail.
#include <iostream>
#include <thread>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <cstring>
#include <future>
extern "C"
{
#include <libavutil/avassert.h>
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
//#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
}
#define STREAM_DURATION 1000.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
#define SCALE_FLAGS SWS_BICUBIC
// a wrapper around a single output AVStream
typedef struct OutputStream {
AVStream *st;
AVCodecContext *enc;
/* pts of the next frame that will be generated */
int64_t next_pts;
int samples_count;
AVFrame *frame;
AVFrame *tmp_frame;
float t, tincr, tincr2;
struct SwsContext *sws_ctx;
struct SwrContext *swr_ctx;
} OutputStream;
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_ts2str(char *buf, int64_t ts)
{
std::memset(buf,0,AV_TS_MAX_STRING_SIZE);
return av_ts_make_string(buf,ts);
}
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_ts2timestr(char *buf, int64_t ts, AVRational *tb)
{
std::memset(buf,0,sizeof(AV_TS_MAX_STRING_SIZE));
return av_ts_make_time_string(buf,ts,tb);
}
/////////////////////////////////////////////////////////////////////////////
// The ffmpeg variation raises compiler warnings.
char *cb_av_err2str(char *errbuf, size_t errbuf_size, int errnum)
{
std::memset(errbuf,0,errbuf_size);
return av_make_error_string(errbuf,errbuf_size,errnum);
}
int thread_execute(AVCodecContext* s, int (*func)(AVCodecContext *c2, void *arg2), void* arg, int* ret, int count, int size)
{
// Do it all serially for now
std::cout << "thread_execute" << std::endl;
for (int k = 0; k < count; ++k)
{
ret[k] = func(s, arg);
}
return 0;
}
int thread_execute2(AVCodecContext* s, int (*func)(AVCodecContext* c2, void* arg2, int, int), void* arg, int* ret, int count)
{
// Do it all serially for now
std::cout << "thread_execute2" << std::endl;
for (int k = 0; k < count; ++k)
{
ret[k] = func(s, arg, k, count);
}
return 0;
}
static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{
char s[AV_TS_MAX_STRING_SIZE];
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
cb_av_ts2str(s,pkt->pts), cb_av_ts2timestr(s,pkt->pts, time_base),
cb_av_ts2str(s,pkt->dts), cb_av_ts2timestr(s,pkt->dts, time_base),
cb_av_ts2str(s,pkt->duration), cb_av_ts2timestr(s,pkt->duration, time_base),
pkt->stream_index);
}
static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, *time_base, st->time_base);
pkt->stream_index = st->index;
/* Write the compressed frame to the media file. */
log_packet(fmt_ctx, pkt);
return av_interleaved_write_frame(fmt_ctx, pkt);
}
/* Add an output stream. */
static void add_stream(OutputStream *ost, AVFormatContext *oc,
AVCodec **codec,
enum AVCodecID codec_id)
{
AVCodecContext *c;
int i;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
ost->st = avformat_new_stream(oc, NULL);
if (!ost->st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
ost->st->id = oc->nb_streams-1;
c = avcodec_alloc_context3(*codec);
if (!c) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
ost->enc = c;
switch ((*codec)->type)
{
case AVMEDIA_TYPE_AUDIO:
c->sample_fmt = (*codec)->sample_fmts ?
(*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
c->bit_rate = 64000;
c->sample_rate = 44100;
if ((*codec)->supported_samplerates) {
c->sample_rate = (*codec)->supported_samplerates[0];
for (i = 0; (*codec)->supported_samplerates[i]; i++) {
if ((*codec)->supported_samplerates[i] == 44100)
c->sample_rate = 44100;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
c->channel_layout = AV_CH_LAYOUT_STEREO;
if ((*codec)->channel_layouts) {
c->channel_layout = (*codec)->channel_layouts[0];
for (i = 0; (*codec)->channel_layouts[i]; i++) {
if ((*codec)->channel_layouts[i] == AV_CH_LAYOUT_STEREO)
c->channel_layout = AV_CH_LAYOUT_STEREO;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
ost->st->time_base = (AVRational){ 1, c->sample_rate };
break;
case AVMEDIA_TYPE_VIDEO:
c->codec_id = codec_id;
c->bit_rate = 400000;
/* Resolution must be a multiple of two. */
c->width = 352;
c->height = 288;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };
c->time_base = ost->st->time_base;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
/* just for testing, we also add B-frames */
c->max_b_frames = 2;
}
if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c->mb_decision = 2;
}
break;
default:
break;
}
if (c->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS ||
c->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS)
{
if (c->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS)
{
c->thread_type = FF_THREAD_FRAME;
}
if (c->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS)
{
c->thread_type = FF_THREAD_SLICE;
}
c->execute = &thread_execute;
c->execute2 = &thread_execute2;
c->thread_count = 4;
// NOTE: Testing opaque.
c->opaque = (void*)0xff;
}
/* Some formats want stream headers to be separate. */
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
/**************************************************************/
/* video output */
static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *picture;
int ret;
picture = av_frame_alloc();
if (!picture)
return NULL;
picture->format = pix_fmt;
picture->width = width;
picture->height = height;
/* allocate the buffers for the frame data */
ret = av_frame_get_buffer(picture, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate frame data.\n");
exit(1);
}
return picture;
}
static void open_video(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{
int ret;
AVCodecContext *c = ost->enc;
//AVDictionary *opt = NULL;
//av_dict_copy(&opt, opt_arg, 0);
/* open the codec */
ret = avcodec_open2(c, codec, NULL);
//av_dict_free(&opt);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Could not open video codec: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
/* allocate and init a re-usable frame */
ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
if (!ost->frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
/* If the output format is not YUV420P, then a temporary YUV420P
* picture is needed too. It is then converted to the required
* output format. */
ost->tmp_frame = NULL;
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
if (!ost->tmp_frame) {
fprintf(stderr, "Could not allocate temporary picture\n");
exit(1);
}
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(ost->st->codecpar, c);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVFrame *pict, int frame_index,
int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static AVFrame *get_video_frame(OutputStream *ost)
{
AVCodecContext *c = ost->enc;
/* check if we want to generate more frames */
if (av_compare_ts(ost->next_pts, c->time_base,
STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)
return NULL;
/* when we pass a frame to the encoder, it may keep a reference to it
* internally; make sure we do not overwrite it here */
if (av_frame_make_writable(ost->frame) < 0)
exit(1);
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
/* as we only generate a YUV420P picture, we must convert it
* to the codec pixel format if needed */
/*if (!ost->sws_ctx) {
ost->sws_ctx = sws_getContext(c->width, c->height,
AV_PIX_FMT_YUV420P,
c->width, c->height,
c->pix_fmt,
SCALE_FLAGS, NULL, NULL, NULL);
if (!ost->sws_ctx) {
fprintf(stderr,
"Could not initialize the conversion context\n");
exit(1);
}
}
fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
sws_scale(ost->sws_ctx,
(const uint8_t * const *)ost->tmp_frame->data, ost->tmp_frame->linesize,
0, c->height, ost->frame->data, ost->frame->linesize);*/
} else {
fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
}
ost->frame->pts = ost->next_pts++;
return ost->frame;
}
/*
* encode one video frame and send it to the muxer
* return 1 when encoding is finished, 0 otherwise
*/
static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
{
int ret;
AVCodecContext *c;
AVFrame *frame;
int got_packet = 0;
AVPacket pkt = { 0 };
c = ost->enc;
frame = get_video_frame(ost);
if (frame)
{
ret = avcodec_send_frame(ost->enc, frame);
if (ret < 0)
{
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error encoding video frame: %s\n", cb_av_err2str(s, AV_ERROR_MAX_STRING_SIZE, ret));
exit(1);
}
}
av_init_packet(&pkt);
ret = avcodec_receive_packet(ost->enc,&pkt);
if (ret < 0)
{
if (ret == AVERROR(EAGAIN)) { ret = 0; }
else
{
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error receiving packet: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
}
else
{
got_packet = 1;
ret = write_frame(oc, &c->time_base, ost->st, &pkt);
}
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error while writing video frame: %s\n", cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
exit(1);
}
return (frame || got_packet) ? 0 : 1;
}
static void close_stream(AVFormatContext *oc, OutputStream *ost)
{
avcodec_free_context(&ost->enc);
av_frame_free(&ost->frame);
av_frame_free(&ost->tmp_frame);
//sws_freeContext(ost->sws_ctx);
//swr_free(&ost->swr_ctx);
}
/**************************************************************/
/* media file output */
int main(int argc, char **argv)
{
OutputStream video_st = { 0 }, audio_st = { 0 };
const char *filename;
AVOutputFormat *fmt;
AVFormatContext *oc;
AVCodec /**audio_codec,*/ *video_codec;
int ret;
int have_video = 0, have_audio = 0;
int encode_video = 0, encode_audio = 0;
AVDictionary *opt = NULL;
int i;
/* Initialize libavcodec, and register all codecs and formats. */
av_register_all();
avformat_network_init();
if (argc < 2) {
printf("usage: %s output_file\n"
"API example program to output a media file with libavformat.\n"
"This program generates a synthetic audio and video stream, encodes and\n"
"muxes them into a file named output_file.\n"
"The output format is automatically guessed according to the file extension.\n"
"Raw images can also be output by using '%%d' in the filename.\n"
"\n", argv[0]);
return 1;
}
filename = argv[1];
for (i = 2; i+1 < argc; i+=2) {
if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags"))
av_dict_set(&opt, argv[i]+1, argv[i+1], 0);
}
const char *pfilename = filename;
/* allocate the output media context */
avformat_alloc_output_context2(&oc, NULL, "mpegts", pfilename);
if (!oc) {
printf("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2(&oc, NULL, "mpeg", pfilename);
}
if (!oc)
return 1;
fmt = oc->oformat;
/* Add the audio and video streams using the default format codecs
* and initialize the codecs. */
if (fmt->video_codec != AV_CODEC_ID_NONE) {
add_stream(&video_st, oc, &video_codec, fmt->video_codec);
have_video = 1;
encode_video = 1;
}
/*if (fmt->audio_codec != AV_CODEC_ID_NONE) {
add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);
have_audio = 1;
encode_audio = 1;
}*/
/* Now that all the parameters are set, we can open the audio and
* video codecs and allocate the necessary encode buffers. */
if (have_video)
open_video(oc, video_codec, &video_st, opt);
//if (have_audio)
// open_audio(oc, audio_codec, &audio_st, opt);
av_dump_format(oc, 0, pfilename, 1);
/* open the output file, if needed */
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oc->pb, pfilename, AVIO_FLAG_WRITE);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Could not open '%s': %s\n", pfilename,
cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
return 1;
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(oc, &opt);
if (ret < 0) {
char s[AV_ERROR_MAX_STRING_SIZE];
fprintf(stderr, "Error occurred when opening output file: %s\n",
cb_av_err2str(s,AV_ERROR_MAX_STRING_SIZE,ret));
return 1;
}
while (encode_video || encode_audio) {
/* select the stream to encode */
if (encode_video &&
(!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,
audio_st.next_pts, audio_st.enc->time_base) <= 0)) {
encode_video = !write_video_frame(oc, &video_st);
} else {
//encode_audio = !write_audio_frame(oc, &audio_st);
}
//std::this_thread::sleep_for(std::chrono::milliseconds(35));
}
/* Write the trailer, if any. The trailer must be written before you
* close the CodecContexts open when you wrote the header; otherwise
* av_write_trailer() may try to use memory that was freed on
* av_codec_close(). */
av_write_trailer(oc);
/* Close each codec. */
if (have_video)
close_stream(oc, &video_st);
if (have_audio)
close_stream(oc, &audio_st);
if (!(fmt->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_closep(&oc->pb);
/* free the stream */
avformat_free_context(oc);
return 0;
}
//
Environment:
Ubuntu Zesty (17.04)
FFmpeg version 3.2.4 (via package manager)
gcc 6.3 (C++)
You have to do following:
call avcodec_alloc_context3(...). This call will set default execute and execute2 functions in new context
set c->thread_count = number_of_threads_in_your_thread_pool()
call avcodec_open2(...).
set c->execute and c->execute2 to point to your functions
call ff_thread_free(c). This function isnt exposed in libavcodec headers but you can add following line:
extern "C" void ff_thread_free(AVCodecContext *s);
Drawback is that libavcodec will create internal thread pool after avcodec_open2(...) call, and that pool will be deleted in ff_thread_free() call.
Internal thread pool is very efficient, but its not good if you plan to do parallel encoding of multiple video feeds. In that case libavcodec will create separate thread pool for each encoding video feed.
I have a question regarding buffer read with ffmpeg.
Idea is as it follows: an outside module (can not change it) is providing me video stream in chunks of data and it is giving me input data and it's size in bytes ("framfunction" function input parameters). I have to copy input data to a buffer and read it with ffmpeg (Zeranoe) and extract video frames. Each time I receive new data, my function "framfunction" will be called. All unprocessed data from a first run will be moved at the beginning of the buffer followed by a new data on the second run and so on. It is essentially based on source and Dranger tutorials. My current attempt is like this and just look at comments (I've left only ones regarding current buffer function) in code to get a picture what I want to do(I know it is messy and it works - sort of; skipping some frames. Any suggestions around ffmpeg code and buffer design are welcome):
#include <iostream>
#include <string>
extern "C"
{
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
#include <libavformat/avio.h>
#include <libavutil/file.h>
}
struct buffer_data {
uint8_t *ptr;
size_t size;
};
static int read_packet(void *opaque, uint8_t *buf, int buf_size)
{
struct buffer_data *bd = (struct buffer_data *)opaque;
buf_size = FFMIN(buf_size, bd->size);
memcpy(buf, bd->ptr, buf_size);
bd->ptr += buf_size;
bd->size -= buf_size;
return buf_size;
}
class videoclass
{
private:
uint8_t* inputdatabuffer;
size_t offset;
public:
videoclass();
~videoclass();
int framfunction(uint8_t* inputbytes, int inputbytessize);
};
videoclass::videoclass()
: inputdatabuffer(nullptr)
, offset(0)
{
inputdatabuffer = new uint8_t[8388608]; //buffer where the input data will be stored
}
videoclass::~videoclass()
{
delete[] inputdatabuffer;
}
int videoclass::framfunction(uint8_t* inputbytes, int inputbytessize)
{
int i, videoStream, numBytes, frameFinished;
AVFormatContext *pFormatCtx = NULL;
AVCodecContext *pCodecCtx = NULL;
AVIOContext *avio_ctx = NULL;
AVCodec *pCodec = NULL;
AVFrame *pFrame = NULL;
AVFrame *pFrameRGB = NULL;
AVPacket packet;
uint8_t *buffer = NULL;
uint8_t *avio_ctx_buffer = NULL;
size_t avio_ctx_buffer_size = 4096;
size_t bytes_processed = 0;
struct buffer_data bd = { 0 };
//if (av_file_map("sample.ts", &inputbytes, &inputbytessize, 0, NULL) < 0)//
// return -1;
memcpy(inputdatabuffer + offset, inputbytes, inputbytessize);//copy new data to buffer inputdatabuffer with offset calculated at the end of previous function run. In other words - cope new data after unprocessed data from a previous call
offset += inputbytessize; //total number of bytes in buffer. Size of an unprocessed data from the last run + size of new data (inputbytessize)
bd.ptr = inputdatabuffer;
bd.size = offset;
if (!(pFormatCtx = avformat_alloc_context()))
return -1;
avio_ctx_buffer = (uint8_t *)av_malloc(avio_ctx_buffer_size);
avio_ctx = avio_alloc_context(avio_ctx_buffer, avio_ctx_buffer_size,0, &bd, &read_packet, NULL, NULL);
pFormatCtx->pb = avio_ctx;
av_register_all();
avcodec_register_all();
pFrame = av_frame_alloc();
pFrameRGB = av_frame_alloc();
if (avformat_open_input(&pFormatCtx, NULL, NULL, NULL) != 0)
return -2;
if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
return -3;
videoStream = -1;
for (i = 0; i < pFormatCtx->nb_streams; i++)
if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
videoStream = i;
break;
}
if (videoStream == -1)
return -4;
pCodecCtx = pFormatCtx->streams[videoStream]->codec;
pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL){
std::cout << "Unsupported codec" << std::endl;
return -5;
}
if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0)
return -6;
numBytes = avpicture_get_size(PIX_FMT_BGR24, pCodecCtx->width, pCodecCtx->height);
buffer = (uint8_t *)av_malloc(numBytes*sizeof(uint8_t));
avpicture_fill((AVPicture *)pFrameRGB, buffer, PIX_FMT_BGR24, pCodecCtx->width, pCodecCtx->height);
while (av_read_frame(pFormatCtx, &packet) >= 0){
if (packet.stream_index == videoStream){
avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);
if (frameFinished){
std::cout << "Yaay, frame found" << std::endl;
}
}
av_free_packet(&packet);
bytes_processed = (size_t)pFormatCtx->pb->pos; //data which is processed so far (x bytes out of inputbytessize)??????????????????????????????
}
offset -= bytes_processed; //size of unprocessed data
av_free(buffer);
av_free(pFrameRGB);
av_free(pFrame);
avcodec_close(pCodecCtx);
av_freep(&avio_ctx->buffer);
av_freep(&avio_ctx);
avformat_close_input(&pFormatCtx);
memmove(inputdatabuffer, inputdatabuffer + bytes_processed, offset);//move unprocessed data to begining of the main buffer
return 0;
}
Call of my function would be something like this
WHILE(VIDEO_INPUT)
{
READ VIDEO DATA FROM INPUT BUFFER
STORE DATA FROM INPUT BUFFER AND SIZE OP THAT DATA TO VARIABLES NEW_DATA AND NEW_DATA_SIZE
CALL FUNCTION FRAMMUNCTION AND PASS NEW_DATA AND NEW_DATA_FUNCTION
DO OTHER THINGS
}
What I would like to know is what is exact size of unprocessed data. Comments in code are showing my attempt but I think it is not good enough so I need some help with that issue.
EDIT: magic question is how to get correct "bytes_processed" size. I've also made an pdf with explanation how my buffer should work pdf file
Thanks
I want to decode a stream of AAC frames continuously, one frame at a time.
I went through the ffmpeg examples (The correct answer doesn't need to make use of ffmpeg necessarily), and I only found examples using complete AAC files and batch algorithms. But I want to decode a continuous AAC stream. How can I do this?
UPDATE: Following the comments and Decode AAC to PCM with ffmpeg on android , I was able to decode to PCM using ffmpeg, however the output is very metallic and noisy. What am I doing wrong here when calling this method for each AAC frame:
...
/*loop that receives frame in buffer*/
while(1){
/*receive frame*/
input = receive_one_buffer();
/*decode frame*/
decodeBuffer(input,strlen(input),Outfile);
}
...
/*decode frame*/
void decodeBuffer(char * input, int numBytes, ofstream& Outfile) {
/*"input" contains one AAC-LC frame*/
//copy bytes from buffer
uint8_t inputBytes[numBytes + FF_INPUT_BUFFER_PADDING_SIZE];
memset(inputBytes, 0, numBytes + FF_INPUT_BUFFER_PADDING_SIZE);
memcpy(inputBytes, input, numBytes);
av_register_all();
AVCodec *codec = avcodec_find_decoder(CODEC_ID_AAC);
AVCodecContext *avCtx = avcodec_alloc_context();
avCtx->channels = 1;
avCtx->sample_rate = 44100;
//the input buffer
AVPacket avPacket;
av_init_packet(&avPacket);
avPacket.size = numBytes; //input buffer size
avPacket.data = inputBytes; // the input buffer
int outSize;
int len;
uint8_t *outbuf = static_cast<uint8_t *>(malloc(AVCODEC_MAX_AUDIO_FRAME_SIZE));
while (avPacket.size > 0) {
outSize = AVCODEC_MAX_AUDIO_FRAME_SIZE;
len = avcodec_decode_audio3(avCtx, (short *) outbuf, &outSize,
&avPacket);
Outfile.write((char*)outbuf, outSize);
avPacket.size -= len;
avPacket.data += len;
}
av_free_packet(&avPacket);
avcodec_close(avCtx);
//av_free(avCtx);
return;
}
You have to keep the decoder alive between subsequent decode calls.
The AAC decoder must decode the previous buffer to be correctly "primed".
Please check for details:
https://developer.apple.com/library/mac/technotes/tn2258/_index.html
The following code assumes that the "ReceiveBuffer" function returns exactly one
complete AAC access unit.
(BTW: you can't use strlen on a binary buffer; you'll get the distance to the first zero and not the buffer length)
#include <iostream>
#include <fstream>
#include "libavcodec\avcodec.h"
#include "libavformat\avformat.h"
#include "libavdevice\avdevice.h"
#include "libavfilter\avfilter.h"
AVCodecContext * CreateContext()
{
av_register_all();
AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_AAC);
AVCodecContext *avCtx = avcodec_alloc_context3(codec);
return avCtx;
}
int32_t DecodeBuffer
(
std::ostream & output,
uint8_t * pInput,
uint32_t cbInputSize,
AVCodecContext * pAVContext
)
{
int32_t cbDecoded = 0;
//the input buffer
AVPacket avPacket;
av_init_packet(&avPacket);
avPacket.size = cbInputSize; //input buffer size
avPacket.data = pInput; // the input bufferra
AVFrame * pDecodedFrame = av_frame_alloc();
int nGotFrame = 0;
cbDecoded = avcodec_decode_audio4( pAVContext,
pDecodedFrame,
& nGotFrame,
& avPacket);
int data_size = av_samples_get_buffer_size( NULL,
pAVContext->channels,
pDecodedFrame->nb_samples,
pAVContext->sample_fmt,
1);
output.write((const char*)pDecodedFrame->data[0],data_size);
av_frame_free(&pDecodedFrame);
return cbDecoded;
}
uint8_t * ReceiveBuffer( uint32_t * cbBufferSize)
{
// TODO implement
return NULL;
}
int main
(
int argc,
char *argv[]
)
{
int nResult = 0;
AVCodecContext * pAVContext = CreateContext();
std::ofstream myOutputFile("audio.pcm",std::ios::binary);
while(1)
{
uint32_t cbBufferSize = 0;
uint8_t *pCompressedAudio = ReceiveBuffer( &cbBufferSize);
if(cbBufferSize && pCompressedAudio)
{
DecodeBuffer( myOutputFile,
pCompressedAudio,
cbBufferSize,
pAVContext);
}
else
{
break;
}
}
avcodec_close(pAVContext);
av_free(pAVContext);
return nResult;
}
When I decode frames from avi file and then decode them in x264 and save to mp4 file, the fps of the output file is always 12,800. Therefore the file is played very fast. But, when I save the encoded in h264 frames in avi format and not mp4, so the fps is as I wanted - 25.
What could be the problem?
Here the code I wrote in VS2010:
#include "stdafx.h"
#include "inttypes.h"
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavutil/avutil.h"
#include <libswscale/swscale.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
#include <libavutil/imgutils.h>
}
#include <iostream>
using namespace std;
int main(int argc, char* argv[])
{
const char* inFileName = "C:\\000227_C1_GAME.avi";
const char* outFileName = "c:\\test.avi";
const char* outFileType = "avi";
av_register_all();
AVFormatContext* inContainer = NULL;
if(avformat_open_input(&inContainer, inFileName, NULL, NULL) < 0)
exit(1);
if(avformat_find_stream_info(inContainer, NULL) < 0)
exit(1);
// Find video stream
int videoStreamIndex = -1;
for (unsigned int i = 0; i < inContainer->nb_streams; ++i)
{
if (inContainer->streams[i] && inContainer->streams[i]->codec &&
inContainer->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoStreamIndex = i;
break;
}
}
if (videoStreamIndex == -1) exit(1);
AVFormatContext* outContainer = NULL;
if(avformat_alloc_output_context2(&outContainer, NULL, outFileType, outFileName) < 0)
exit(1);
// ----------------------------
// Decoder
// ----------------------------
AVStream const *const inStream = inContainer->streams[videoStreamIndex];
AVCodec *const decoder = avcodec_find_decoder(inStream->codec->codec_id);
if(!decoder)
exit(1);
if(avcodec_open2(inStream->codec, decoder, NULL) < 0)
exit(1);
// ----------------------------
// Encoder
// ----------------------------
AVCodec *encoder = avcodec_find_encoder(AV_CODEC_ID_H264);
if(!encoder)
exit(1);
AVStream *outStream = avformat_new_stream(outContainer, encoder);
if(!outStream)
exit(1);
avcodec_get_context_defaults3(outStream->codec, encoder);
// Construct encoder
if(outContainer->oformat->flags & AVFMT_GLOBALHEADER)
outStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
outStream->codec->coder_type = AVMEDIA_TYPE_VIDEO;
outStream->codec->pix_fmt = AV_PIX_FMT_YUV420P;
outStream->codec->width = inStream->codec->width;
outStream->codec->height = inStream->codec->height;
outStream->codec->codec_id = encoder->id;
outStream->codec->bit_rate = 500000;
//outStream->codec->rc_min_rate = 600000;
//outStream->codec->rc_max_rate = 800000;
outStream->codec->time_base.den = 25;
outStream->codec->time_base.num = 1;
outStream->codec->gop_size = 250; // Keyframe interval(=GOP length). Determines maximum distance distance between I-frames
outStream->codec->keyint_min = 25; // minimum GOP size
outStream->codec->max_b_frames = 3;//16; // maximum number of B-frames between non-B-frames
outStream->codec->b_frame_strategy = 1; // decides the best number of B-frames to use. Default mode in x264.
outStream->codec->scenechange_threshold = 40;
outStream->codec->refs = 6; // abillity to reference frames other than the one immediately prior to the current frame. specify how many references can be used.
outStream->codec->qmin = 0;//10;
outStream->codec->qmax = 69;//51;
outStream->codec->qcompress = 0.6;
outStream->codec->max_qdiff = 4;
outStream->codec->i_quant_factor = 1.4;//0.71;
outStream->codec->refs=1;//3;
outStream->codec->chromaoffset = -2;
outStream->codec->thread_count = 1;
outStream->codec->trellis = 1;
outStream->codec->me_range = 16;
outStream->codec->me_method = ME_HEX; //hex
outStream->codec->flags2 |= CODEC_FLAG2_FAST;
outStream->codec->coder_type = 1;
if(outStream->codec->codec_id == AV_CODEC_ID_H264)
{
av_opt_set(outStream->codec->priv_data, "preset", "slow", 0);
}
// Open encoder
if(avcodec_open2(outStream->codec, encoder, NULL) < 0)
exit(1);
// Open output container
if(avio_open(&outContainer->pb, outFileName, AVIO_FLAG_WRITE) < 0)
exit(1);
//close_o
AVFrame *decodedFrame = avcodec_alloc_frame();
if(!decodedFrame)
exit(1);
AVFrame *encodeFrame = avcodec_alloc_frame();
if(!encodeFrame)
exit(1);
encodeFrame->format = outStream->codec->pix_fmt;
encodeFrame->width = outStream->codec->width;
encodeFrame->height = outStream->codec->height;
if(av_image_alloc(encodeFrame->data, encodeFrame->linesize,
outStream->codec->width, outStream->codec->height,
outStream->codec->pix_fmt, 1) < 0)
exit(1);
av_dump_format(inContainer, 0, inFileName,0);
//Write header to ouput container
avformat_write_header(outContainer, NULL);
AVPacket decodePacket, encodedPacket;
int got_frame, len;
while(av_read_frame(inContainer, &decodePacket)>=0)
{
if (decodePacket.stream_index == videoStreamIndex)
{
len = avcodec_decode_video2(inStream->codec, decodedFrame, &got_frame, &decodePacket);
if(len < 0)
exit(1);
if(got_frame)
{
av_init_packet(&encodedPacket);
encodedPacket.data = NULL;
encodedPacket.size = 0;
if(avcodec_encode_video2(outStream->codec, &encodedPacket, decodedFrame, &got_frame) < 0)
exit(1);
if(got_frame)
{
if (outStream->codec->coded_frame->key_frame)
encodedPacket.flags |= AV_PKT_FLAG_KEY;
encodedPacket.stream_index = outStream->index;
if(av_interleaved_write_frame(outContainer, &encodedPacket) < 0)
exit(1);
av_free_packet(&encodedPacket);
}
}
}
av_free_packet(&decodePacket);
}
av_write_trailer(outContainer);
avio_close(outContainer->pb);
avcodec_free_frame(&encodeFrame);
avcodec_free_frame(&decodedFrame);
avformat_free_context(outContainer);
av_close_input_file(inContainer);
return 0;
}
The problem was with PTS and DTS of the packet. Before writing the packet to output( before av_interleaved_write_frame command) set PTS and DTS like this
if (encodedPacket.pts != AV_NOPTS_VALUE)
encodedPacket.pts = av_rescale_q(encodedPacket.pts, outStream->codec->time_base, outStream->time_base);
if (encodedPacket.dts != AV_NOPTS_VALUE)
encodedPacket.dts = av_rescale_q(encodedPacket.dts, outStream->codec->time_base, outStream->time_base);