Related
After experimenting with the examples on the FFmpeg documentation, I was finally able to create a short program that extracts every nth frame from a video. However, the output files that it produces are huge at over 15mb for each image. How can I change this to produce lower quality images?
The result I am trying to get is done easily on the command line with:
ffmpeg -i [input video] -vf "select=not(mod(n\,10))" -fps_mode vfr img_%03d.jpg
For a video with about 500 frames, this creates 50 images that are only about 800kb each; how am would I be able to mimic this in my program?
My code consists of opening the input file, decoding the packets, then saving the frames:
#include <cstdio>
#include <cstdlib>
#include <iostream>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
}
static AVFormatContext *fmt_ctx;
static AVCodecContext *dec_ctx;
static int video_stream_index = -1;
// OPEN THE INPUT FILE
static int open_input_file(const char *filename) {
// INIT VARS AND FFMPEG OBJECTS
int ret;
const AVCodec *dec;
// OPEN INPUT FILE
if((ret = avformat_open_input(&fmt_ctx, filename, NULL, NULL)) < 0) {
printf("ERROR: failed to open input file\n");
return ret;
}
// FIND STREAM INFO BASED ON INPUT FILE
if((ret = avformat_find_stream_info(fmt_ctx, NULL)) < 0) {
printf("ERROR: failed to find stream information\n");
return ret;
}
// FIND THE BEST VIDEO STREAM FOR THE INPUT FILE
ret = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &dec, 0);
if(ret < 0) {
printf("ERROR: failed to find a video stream in the input file\n");
return ret;
}
video_stream_index = ret;
// ALLOCATE THE DECODING CONTEXT FOR THE INPUT FILE
dec_ctx = avcodec_alloc_context3(dec);
if(!dec_ctx) {
printf("ERROR: failed to allocate decoding context\n");
// CAN NOT ALLOCATE MEMORY ERROR
return AVERROR(ENOMEM);
}
avcodec_parameters_to_context(dec_ctx, fmt_ctx->streams[video_stream_index]->codecpar);
// INIT THE VIDEO DECODER
if((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
printf("ERROR: failed to open video decoder\n");
return ret;
}
return 0;
}
// SAVE THE FILE
static void save(unsigned char *buf, int wrap, int x_size, int y_size, char *file_name) {
// INIT THE EMPTY FILE
FILE *file;
// OPEN AND WRITE THE IMAGE FILE
file = fopen(file_name, "wb");
fprintf(file, "P6\n%d %d\n%d\n", x_size, y_size, 255);
for(int i = 0; i < y_size; i++) {
fwrite(buf + i * wrap, 1, x_size * 3, file);
}
fclose(file);
}
// DECODE FRAME AND CONVERT IT TO AN RGB IMAGE
static void decode(AVCodecContext *cxt, AVFrame *frame, AVPacket *pkt,
const char *out_file_name, const char *file_ext, int mod=1) {
// INIT A BLANK CHAR TO HOLD THE FILE NAME AND AN EMPTY INT TO HOLD FUNCTION RETURN VALUES
char buf[1024];
int ret;
// SEND PACKET TO DECODER
ret = avcodec_send_packet(cxt, pkt);
if(ret < 0) {
printf("ERROR: error sending packet for decoding\n");
exit(1);
}
// CREATE A SCALAR CONTEXT FOR CONVERSION
SwsContext *sws_ctx = sws_getContext(dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt, dec_ctx->width,
dec_ctx->height, AV_PIX_FMT_RGB24, SWS_BICUBIC, NULL, NULL, NULL);
// CREATE A NEW RGB FRAME FOR CONVERSION
AVFrame* rgb_frame = av_frame_alloc();
rgb_frame->format = AV_PIX_FMT_RGB24;
rgb_frame->width = dec_ctx->width;
rgb_frame->height = dec_ctx->height;
// ALLOCATE A NEW BUFFER FOR THE RGB CONVERSION FRAME
av_frame_get_buffer(rgb_frame, 0);
// WHILE RETURN COMES BACK OKAY (FUNCTION RETURNS >= 0)...
while(ret >= 0) {
// GET FRAME BACK FROM DECODER
ret = avcodec_receive_frame(cxt, frame);
// IF "RESOURCE TEMP NOT AVAILABLE" OR "END OF FILE" ERROR...
if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
return;
} else if(ret < 0) {
printf("ERROR: error during decoding\n");
exit(1);
}
// IF FRAME NUMBER IF THE (MOD)TH FRAME...
if(cxt->frame_number % mod == 0){
// OUTPUT WHICH FRAME IS BEING SAVED
printf("saving frame %03d\n", cxt->frame_number);
// REMOVES TEMPORARY BUFFERED DATA
fflush(stdout);
// SCALE (CONVERT) THE OLD FRAME TO THE NEW RGB FRAME
sws_scale(sws_ctx, frame->data, frame->linesize, 0, frame->height,
rgb_frame->data, rgb_frame->linesize);
// SET "BUF" TO THE OUTPUT FILE PATH (SAVES TO "out_file_name_###.file_ext")
snprintf(buf, sizeof(buf), "%s_%03d.%s", out_file_name, cxt->frame_number, file_ext);
// SAVE THE FRAME
save(rgb_frame->data[0], rgb_frame->linesize[0], rgb_frame->width, rgb_frame->height, buf);
}
}
}
int main() {
// SIMULATE COMMAND LINE ARGUMENTS
char argv0[] = "test";
char argv1[] = "/User/Desktop/frames/test_video.mov";
char *argv[] = {argv0, argv1, nullptr};
// INIT VARS AND FFMPEG OBJECTS
int ret;
AVPacket *packet;
AVFrame *frame;
// ALLOCATE FRAME AND PACKET
frame = av_frame_alloc();
packet = av_packet_alloc();
if (!frame || !packet) {
fprintf(stderr, "Could not allocate frame or packet\n");
exit(1);
}
// IF FILE DOESN'T OPEN, GO TO THE END
if((ret = open_input_file(argv[1])) < 0) {
goto end;
}
// READ ALL THE PACKETS - simple
while(av_read_frame(fmt_ctx, packet) >= 0) {
// IF PACKET INDEX MATCHES VIDEO INDEX...
if (packet->stream_index == video_stream_index) {
// SEND PACKET TO THE DECODER and SAVE
std::string name = "/User/Desktop/frames/img";
std::string ext = "bmp";
decode(dec_ctx, frame, packet, name.c_str(), ext.c_str(), 5);
}
// UNREFERENCE THE PACKET
av_packet_unref(packet);
}
// END MARKER
end:
avcodec_free_context(&dec_ctx);
avformat_close_input(&fmt_ctx);
av_frame_free(&frame);
av_packet_free(&packet);
// FINAL ERROR CATCH
if (ret < 0 && ret != AVERROR_EOF) {
fprintf(stderr, "Error occurred: %s\n", av_err2str(ret));
exit(1);
}
exit(0);
}
I am not sure how to go about producing images that are much smaller in size like the ones produced on the command line. I have a feeling that this is possible somehow during the conversion to RGB or the saving of the file but I can't seem to figure out how.
Also, is there any way that I could go about this much more efficiently? On the command line, this finishes very quickly (no more than a second or two for a 9 sec. movie at ~60 fps).
The command line version compresses the frame into jpeg file hence the size is very small. On the other hand, your code writes the rgb values directly into a file (regardless of the file extension). The size of the image is then Height x Width x 3 bytes, which is very big.
Solution: Adjust your save function to also compress the image.
Code example from Github - save_frame_as_jpeg.c:
int save_frame_as_jpeg(AVCodecContext *pCodecCtx, AVFrame *pFrame, int FrameNo)
{
AVCodec *jpegCodec = avcodec_find_encoder(AV_CODEC_ID_JPEG2000);
if (!jpegCodec) { return -1; }
AVCodecContext *jpegContext = avcodec_alloc_context3(jpegCodec);
if (!jpegContext) { return -1; }
jpegContext->pix_fmt = pCodecCtx->pix_fmt;
jpegContext->height = pFrame->height;
jpegContext->width = pFrame->width;
if (avcodec_open2(jpegContext, jpegCodec, NULL) < 0)
{ return -1; }
FILE *JPEGFile;
char JPEGFName[256];
AVPacket packet = {.data = NULL, .size = 0};
av_init_packet(&packet);
int gotFrame;
if (avcodec_encode_video2(jpegContext, &packet, pFrame, &gotFrame) < 0)
{ return -1; }
sprintf(JPEGFName, "dvr-%06d.jpg", FrameNo);
JPEGFile = fopen(JPEGFName, "wb");
fwrite(packet.data, 1, packet.size, JPEGFile);
fclose(JPEGFile);
av_free_packet(&packet);
avcodec_close(jpegContext);
return 0;
}
So I have a program that reads an opengl window and encodes the read data as a video. Now through a series of experimentation I have learned that the bit format of my glfw window is 8:8:8 as returned by glfwGetVideoMode(monitor). So I use this function to read the window:
glReadPixels(0, 0,gl_width, gl_height,GL_RGBA, GL_UNSIGNED_BYTE, (GLvoid*) Buffer);
and I simply encode it in the AV_PIX_FMT_YUV420P format.
Under normal circumstances this method works just fine. However, when I actually run the program, the output I get, as opposed to what I can see in the glfw window, is really low resolution and a bit pixelated.
Here is what my GLFW window looks like:
Now this is what I want it to look like. It looks just fine on the opengl window, and I encode it directly without altering Buffer.
And here is what the encoded result, test.mp4 looks like when I run it using mplayer or similar software:
It's a lot more blurry and pixelated compare to the GLFW window. With some experimentation and following an answer to another question I asked, I us avcodec_find_best_pix_fmt_of_list((*codec)->pix_fmts, AV_PIX_FMT_RGBA, 1, &ret) and it returned 13. Which led me to believe using AV_PIX_FMT_YUVJ422P is the best option for this convertion to not have a blurry/pixelated result. However, no matter which function I pass, every single format gives off an error except AV_PIX_FMT_YUV420P. The error is:
[mpeg4 # 0x558e74f47900] Specified pixel format yuvj422p is invalid or not supported
I have no idea why this is happening, as the format is bound to a define and it is changed throughout the entire program when I change the define.
Here is my encoder so far (I have trimmed some parts):
video_encoder.cpp:
int video_encoder::write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c,
AVStream *st, AVFrame *frame, AVPacket *pkt)
{
int ret;
// Conditional jump or move depends on uninitialised value
// Use of uninitialised value of size 8
// send the frame to the encoder
// Error is about c.
ret = avcodec_send_frame(c, frame);
if (ret < 0) {
std::cout << "Error sending a frame to the encoder: " << ret << std::endl;
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_packet(c, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
else if (ret < 0) {
std::cout << "Error encoding a frame: " << ret << std::endl;
exit(1);
}
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, c->time_base, st->time_base);
pkt->stream_index = st->index;
/* Write the compressed frame to the media file. */
//log_packet(fmt_ctx, pkt);
//std::cout << "Packet: " << pkt << std::endl;
ret = av_interleaved_write_frame(fmt_ctx, pkt);
/* pkt is now blank (av_interleaved_write_frame() takes ownership of
* its contents and resets pkt), so that no unreferencing is necessary.
* This would be different if one used av_write_frame(). */
if (ret < 0) {
std::cout << "Error while writing output packet: " << ret << std::endl;
exit(1);
}
}
return ret == AVERROR_EOF ? 1 : 0;
}
/* Add an output stream. */
void video_encoder::add_stream(OutputStream *ost, AVFormatContext *oc,
const AVCodec **codec,
enum AVCodecID codec_id)
{
AVCodecContext *c;
int i;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
ost->tmp_pkt = av_packet_alloc();
if (!ost->tmp_pkt) {
fprintf(stderr, "Could not allocate AVPacket\n");
exit(1);
}
ost->st = avformat_new_stream(oc, NULL);
if (!ost->st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
ost->st->id = oc->nb_streams-1;
c = avcodec_alloc_context3(*codec);
if (!c) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
ost->enc = c;
switch ((*codec)->type) {
case AVMEDIA_TYPE_AUDIO:
...
case AVMEDIA_TYPE_VIDEO:
c->codec_id = codec_id;
c->bit_rate = 10000;
/* Resolution must be a multiple of two. */
c->width = width;
c->height = height;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE }; // *frame_rate
c->time_base = ost->st->time_base;
c->gop_size = 7; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
//if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO)
// c->max_b_frames = 2;
if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c->mb_decision = 2;
}
if ((*codec)->pix_fmts){
//c->pix_fmt = (*codec)->pix_fmts[0];
std::cout << "NEW FORMAT : " << c->pix_fmt << std::endl;
}
int ret;
avcodec_find_best_pix_fmt_of_list((*codec)->pix_fmts, AV_PIX_FMT_RGBA, 1, &ret);
std::cout << "Desired format is: " << ret << std::endl;
break;
}
/* Some formats want stream headers to be separate. */
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
/**************************************************************/
/* video output */
AVFrame* video_encoder::alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *picture;
int ret;
picture = av_frame_alloc();
if (!picture)
return NULL;
picture->format = pix_fmt;
picture->width = width;
picture->height = height;
/* allocate the buffers for the frame data */
ret = av_frame_get_buffer(picture, 0);
if (ret < 0) {
fprintf(stderr, "Could not allocate frame data.\n");
exit(1);
}
return picture;
}
void video_encoder::open_video(AVFormatContext *oc, const AVCodec *codec,
OutputStream *ost, AVDictionary *opt_arg)
{
int ret;
AVCodecContext *c = ost->enc;
AVDictionary *opt = NULL;
av_dict_copy(&opt, opt_arg, 0);
/* open the codec */
ret = avcodec_open2(c, codec, &opt);
av_dict_free(&opt);
if (ret < 0) {
fprintf(stderr, "Could not open video codec: %s\n", ret);
exit(1);
}
/* allocate and init a re-usable frame */
ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
if (!ost->frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
/* If the output format is not YUV420P, then a temporary YUV420P
* picture is needed too. It is then converted to the required
* output format. */
ost->tmp_frame = NULL;
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(ost->st->codecpar, c);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
}
void video_encoder::set_frame_yuv_from_rgb(AVFrame *frame, struct SwsContext *sws_context) {
const int in_linesize[1] = { 4 * width };
//uint8_t* dest[4] = { rgb_data, NULL, NULL, NULL };
sws_context = sws_getContext(
width, height, AV_PIX_FMT_RGBA,
width, height, STREAM_PIX_FMT,
SCALE_FLAGS, 0, 0, 0);
sws_scale(sws_context, (const uint8_t * const *)&rgb_data, in_linesize, 0,
height, frame->data, frame->linesize);
}
AVFrame* video_encoder::get_video_frame(OutputStream *ost)
{
AVCodecContext *c = ost->enc;
/* check if we want to generate more frames */
if (av_compare_ts(ost->next_pts, c->time_base,
(float) STREAM_DURATION / 1000, (AVRational){ 1, 1 }) > 0)
return NULL;
/* when we pass a frame to the encoder, it may keep a reference to it
* internally; make sure we do not overwrite it here */
if (av_frame_make_writable(ost->frame) < 0)
exit(1);
set_frame_yuv_from_rgb(ost->frame, ost->sws_ctx);
ost->frame->pts = ost->next_pts++;
return ost->frame;
}
/*
* encode one video frame and send it to the muxer
* return 1 when encoding is finished, 0 otherwise
*/
int video_encoder::write_video_frame(AVFormatContext *oc, OutputStream *ost)
{
return write_frame(oc, ost->enc, ost->st, get_video_frame(ost), ost->tmp_pkt);
}
void video_encoder::close_stream(AVFormatContext *oc, OutputStream *ost)
{
avcodec_free_context(&ost->enc);
av_frame_free(&ost->frame);
av_frame_free(&ost->tmp_frame);
av_packet_free(&ost->tmp_pkt);
//sws_freeContext(ost->sws_ctx);
//swr_free(&ost->swr_ctx);
}
/**************************************************************/
/* media file output */
void video_encoder::set_encode_framebuffer(uint8_t* data, bool audio_only)
{
rgb_data = data;
}
video_encoder::~video_encoder()
{
av_write_trailer(enc_inf.oc);
/* Close each codec. */
if (enc_inf.have_video)
close_stream(enc_inf.oc, &enc_inf.video_st);
if (!(enc_inf.fmt->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_closep(&enc_inf.oc->pb);
/* free the stream */
avformat_free_context(enc_inf.oc);
std::cout << "Done, closing." << std::endl;
}
bool video_encoder::encode_one_frame()
{
if (enc_inf.encode_video || enc_inf.encode_audio) {
/* select the stream to encode */
if (enc_inf.encode_video &&
(!enc_inf.encode_audio || av_compare_ts(enc_inf.video_st.next_pts, enc_inf.video_st.enc->time_base,
enc_inf.audio_st.next_pts, enc_inf.audio_st.enc->time_base) <= 0)) {
enc_inf.encode_video = !write_video_frame(enc_inf.oc, &enc_inf.video_st);
return true;
}
}
return false;
}
video_encoder::video_encoder(int w, int h, float fps, unsigned int duration)
:width(w), height(h), STREAM_FRAME_RATE(fps), STREAM_DURATION(duration)
{
//std::filesystem::create_directory("media");
//std::string as_str = "./output/" + std::string(getenv ("OUTPUT_UUID")) + ".mp4";
std::string as_str = "./output/video.mp4";
char* filename = const_cast<char*>(as_str.c_str());
enc_inf.video_st, enc_inf.audio_st = (struct OutputStream) { 0 };
enc_inf.video_st.next_pts = 1;
enc_inf.audio_st.next_pts = 1;
enc_inf.encode_audio, enc_inf.encode_video = 0;
int ret;
int i;
//rgb_data = (uint8_t*)malloc( 48 * sizeof(uint8_t) );
/* allocate the output media context */
avformat_alloc_output_context2(&enc_inf.oc, NULL, NULL, filename);
if (!enc_inf.oc) {
//VI_ERROR("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2(&enc_inf.oc, NULL, "mpeg", filename);
}
if (!enc_inf.oc)
std::cout << "FAILED" << std::endl;
//return 1;
enc_inf.fmt = enc_inf.oc->oformat;
/* Add the audio and video streams using the default format codecs
* and initialize the codecs. */
if (enc_inf.fmt->video_codec != AV_CODEC_ID_NONE) {
add_stream(&enc_inf.video_st, enc_inf.oc, &video_codec, enc_inf.fmt->video_codec);
enc_inf.have_video = 1;
enc_inf.encode_video = 1;
}
/* Now that all the parameters are set, we can open the audio and
* video codecs and allocate the necessary encode buffers. */
if (enc_inf.have_video)
open_video(enc_inf.oc, video_codec, &enc_inf.video_st, opt);
/* open the output file, if needed */
if (!(enc_inf.fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&enc_inf.oc->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
//VI_ERROR("Could not open '%s': %s\n", filename, ret);
//return 1;
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(enc_inf.oc, &opt);
if (ret < 0) {
VI_ERROR("Error occurred when opening output file:");
//return 1;
}
//return 0;
}
video_encoder.h:
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
#define SCALE_FLAGS SWS_SPLINE
/* The output bit rate in bit/s */
#define OUTPUT_BIT_RATE 96000
/* The number of output channels */
#define OUTPUT_CHANNELS 2
typedef struct OutputStream {
AVStream *st;
AVCodecContext *enc;
/* pts of the next frame that will be generated */
int64_t next_pts;
int samples_count;
AVFrame *frame;
AVFrame *tmp_frame;
AVPacket *tmp_pkt;
float t, tincr, tincr2;
struct SwsContext *sws_ctx;
struct SwrContext *swr_ctx;
} OutputStream;
typedef struct {
OutputStream video_st, audio_st;
const AVOutputFormat *fmt;
AVFormatContext *oc;
int have_video, have_audio, encode_video, encode_audio;
std::string name;
} encode_info;
Again, changing STREAM_PIX_FMT anything other than AV_PIX_FMT_YUV420P causes the program to give the error.
What is the cause of this and how can I fix this? Also am I on the right track for fixing the pixelation problem? I'm using ubuntu.
I am writing some images to an AVStream and after that I am reading an mp3 file and writing it to an diffrent AVStream. The propblem is that the audio stream is a bit shorter then the video stream, so if I add more images and another audio file the audio is not in sync with the video any more. So my idear was to write silent audio data to the audio stream before I write another audio file to the audio stream. But I can not figure out how to write the silent data to the audio stream.
I found this post but I don't know how to calculate the packet size or how to write the packet to the audio stream.
This was my most "successfully" approach so far, but the result (audioTest(0xff).mp4) is far from silent.
/* set up the audio convert context */
libffmpeg::SwrContext* audioConvertContext = libffmpeg::swr_alloc();
libffmpeg::av_opt_set_int(audioConvertContext, "in_channel_count", data->audioCodecContext->channels, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "out_channel_count", data->audioCodecContext->channels, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "in_channel_layout", data->audioCodecContext->channel_layout, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "out_channel_layout", data->audioCodecContext->channel_layout, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "in_sample_rate", data->audioCodecContext->sample_rate, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "out_sample_rate", data->audioCodecContext->sample_rate, 0);
libffmpeg::av_opt_set_sample_fmt(audioConvertContext, "in_sample_fmt", libffmpeg::AV_SAMPLE_FMT_S16, 0);
libffmpeg::av_opt_set_sample_fmt(audioConvertContext, "out_sample_fmt", data->audioCodecContext->sample_fmt, 0);
int ret = libffmpeg::swr_init(audioConvertContext);
if (ret < 0)
{
Helper::ThrowError("Failed to allocate audio reformat context.", ret);
}
/* set up silent frame */
libffmpeg::AVFrame* silentFrame = libffmpeg::av_frame_alloc();
if (!silentFrame)
{
Helper::ThrowError("Failed to allocate audio encode frame.");
}
silentFrame->nb_samples = data->audioCodecContext->frame_size;
silentFrame->format = data->audioCodecContext->sample_fmt;
silentFrame->channel_layout = data->audioCodecContext->channel_layout;
silentFrame->channels = data->audioCodecContext->channels;
silentFrame->sample_rate = data->audioCodecContext->sample_rate;
/* alloc the frame buffer */
ret = libffmpeg::av_frame_get_buffer(silentFrame, 0);
if (ret < 0)
{
Helper::ThrowError("Could not allocate audio data buffers.");
}
int got_output;
int samples_count;
double duration = 4 * (double)data->audioStream->time_base.den / (double)data->audioStream->time_base.num;
while (av_stream_get_end_pts(data->audioStream) < duration)
{
libffmpeg::AVPacket pkt;
libffmpeg::av_init_packet(&pkt);
ret = libffmpeg::av_frame_make_writable(silentFrame);
if (ret < 0)
{
Helper::ThrowError("Could not make frame writable.");
}
for (int j = 0; j < data->audioCodecContext->frame_size; j++)
{
silentFrame->data[0][2 * j] = 0xff;
for (int k = 1; k < data->audioCodecContext->channels; k++)
{
silentFrame->data[0][2 * j + k] = silentFrame->data[0][2 * j];
}
}
int dst_nb_samples = libffmpeg::av_rescale_rnd(
libffmpeg::swr_get_delay(audioConvertContext, data->audioCodecContext->sample_rate) + silentFrame->nb_samples,
data->audioCodecContext->sample_rate, data->audioCodecContext->sample_rate,
libffmpeg::AV_ROUND_UP);
ret = libffmpeg::swr_convert(
audioConvertContext,
silentFrame->data, dst_nb_samples,
(const libffmpeg::uint8_t * *) & silentFrame->data,
silentFrame->nb_samples);
if (ret < 0)
{
Helper::ThrowError("Error while converting audio frame.", ret);
}
silentFrame->pts = libffmpeg::av_rescale_q(samples_count, libffmpeg::AVRational{ 1, data->audioCodecContext->sample_rate }, data->audioCodecContext->time_base);
samples_count += dst_nb_samples;
ret = libffmpeg::avcodec_encode_audio2(data->audioCodecContext, &pkt, silentFrame, &got_output);
if (ret < 0)
{
Helper::ThrowError("Error while encoding audio frame.", ret);
}
if (got_output)
{
pkt.stream_index = data->audioStream->index;
if (ret = av_write_frame(data->formatContext, &pkt))
{
Helper::ThrowError("Error while writing audio frame.", ret);
}
libffmpeg::av_packet_unref(&pkt);
}
}
libffmpeg::av_frame_free(&silentFrame);
The mistake was how I wrote to the array. I am not that used to c++ so my solution maybe a bit messy, but at least it works now.
/* set up the audio convert context */
libffmpeg::SwrContext* audioConvertContext = libffmpeg::swr_alloc();
libffmpeg::av_opt_set_int(audioConvertContext, "in_channel_count", data->audioCodecContext->channels, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "out_channel_count", data->audioCodecContext->channels, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "in_channel_layout", data->audioCodecContext->channel_layout, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "out_channel_layout", data->audioCodecContext->channel_layout, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "in_sample_rate", data->audioCodecContext->sample_rate, 0);
libffmpeg::av_opt_set_int(audioConvertContext, "out_sample_rate", data->audioCodecContext->sample_rate, 0);
libffmpeg::av_opt_set_sample_fmt(audioConvertContext, "in_sample_fmt", libffmpeg::AV_SAMPLE_FMT_S16, 0);
libffmpeg::av_opt_set_sample_fmt(audioConvertContext, "out_sample_fmt", data->audioCodecContext->sample_fmt, 0);
int ret = libffmpeg::swr_init(audioConvertContext);
if (ret < 0)
{
Helper::ThrowError("Failed to allocate audio reformat context.", ret);
}
/* set up silent frame */
libffmpeg::AVFrame* silentFrame = libffmpeg::av_frame_alloc();
if (!silentFrame)
{
Helper::ThrowError("Failed to allocate audio encode frame.");
}
silentFrame->nb_samples = data->audioCodecContext->frame_size;
silentFrame->format = data->audioCodecContext->sample_fmt;
silentFrame->channel_layout = data->audioCodecContext->channel_layout;
silentFrame->channels = data->audioCodecContext->channels;
silentFrame->sample_rate = data->audioCodecContext->sample_rate;
/* alloc the frame buffer */
ret = libffmpeg::av_frame_get_buffer(silentFrame, 0);
if (ret < 0)
{
Helper::ThrowError("Could not allocate audio data buffers.");
}
libffmpeg::AVPacket* pkt = libffmpeg::av_packet_alloc();
if (!pkt)
{
Helper::ThrowError("could not allocate the packet.");
}
void* buffer = malloc(data->audioCodecContext->frame_size * data->audioCodecContext->channels * 16);
for (int i = 0; i < data->audioCodecContext->frame_size * data->audioCodecContext->channels * 2; i++)
{
*((int*)buffer + i) = 0x0;
}
int got_output;
int samples_count;
double duration = 4 * (double)data->audioStream->time_base.den / (double)data->audioStream->time_base.num;
while (av_stream_get_end_pts(data->audioStream) < duration)
{
libffmpeg::AVPacket pkt;
libffmpeg::av_init_packet(&pkt);
ret = libffmpeg::av_frame_make_writable(silentFrame);
if (ret < 0)
{
Helper::ThrowError("Could not make frame writable.");
}
silentFrame->data[0] = (libffmpeg::uint8_t*) buffer;
int dst_nb_samples = libffmpeg::av_rescale_rnd(
libffmpeg::swr_get_delay(audioConvertContext, data->audioCodecContext->sample_rate) + silentFrame->nb_samples,
data->audioCodecContext->sample_rate, data->audioCodecContext->sample_rate,
libffmpeg::AV_ROUND_UP);
ret = libffmpeg::swr_convert(
audioConvertContext,
silentFrame->data, dst_nb_samples,
(const libffmpeg::uint8_t * *) & silentFrame->data,
silentFrame->nb_samples);
if (ret < 0)
{
Helper::ThrowError("Error while converting audio frame.", ret);
}
silentFrame->pts = libffmpeg::av_rescale_q(samples_count, libffmpeg::AVRational{ 1, data->audioCodecContext->sample_rate }, data->audioCodecContext->time_base);
samples_count += dst_nb_samples;
ret = libffmpeg::avcodec_encode_audio2(data->audioCodecContext, &pkt, silentFrame, &got_output);
if (ret < 0)
{
Helper::ThrowError("Error while encoding audio frame.", ret);
}
if (got_output)
{
pkt.stream_index = data->audioStream->index;
if (ret = av_write_frame(data->formatContext, &pkt))
{
Helper::ThrowError("Error while writing audio frame.", ret);
}
libffmpeg::av_packet_unref(&pkt);
}
}
free(buffer);
libffmpeg::av_frame_free(&silentFrame);
I am streaming AAC audio over network, and I need to use ffmpeg to decode the stream. I have tried in local and everything works fine, but over the network I am not sure how to initialize my AVFormatContext.
I have had a look at the functions av_probe_input_buffer* and av_probe_input_format* but it doesn't look like these functions are suited for what I want to do. My AVFormatContext is always incomplete, and I cannot find an audio stream, which prevents me from getting a codec context and initializing my decoder.
The problematic piece of code looks more or less like this:
AVFormatContext *pFormatCtx = avformat_alloc_context();
AVFrame *pFrame = av_frame_alloc();
AVPacket *packet = (AVPacket *)av_malloc(sizeof(AVPacket));
av_init_packet(packet);
packet->buf = NULL;
packet->data = NULL;
pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
// Read 10 packets to give ffmpeg some hint about the data
for (int i = 0; i < 10; i++) {
uint32_t packet_size;
fread(&packet_size, 1, sizeof(packet_size), f);
uint8_t *pdata = (uint8_t*)malloc(packet_size);
int len = fread(pdata, 1, packet_size, f);
AVProbeData probeData;
probeData.buf = pdata;
probeData.buf_size = packet_size - 1;
probeData.filename = "";
pFormatCtx->iformat = av_probe_input_format(&probeData, 1);
}
// This is working, no error here
if (avformat_find_stream_info(pFormatCtx, NULL) < 0){
printf("Error finding stream info!");
}
int audioStream = -1;
for (int i = 0; i < pFormatCtx->nb_streams; i++)
if(pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO){
audioStream = i;
break;
}
// I get this error, so actually no audio stream is detected
if (audioStream == -1){
printf("Didn't find a audio stream.\n");
return -1;
}
printf("Audio stream found at index %d\n", audioStream);
// I do not get here, because an audio stream is not detected.
AVCodecContext *pCodecCtx = pFormatCtx->streams[audioStream]->codec;
// This is where I want to be!
AVCodec *pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL) {
printf("Codec not found.\n");
return -1;
}
// Open codec
if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0) {
printf("Could not open codec.\n");
return -1;
}
I want to apply processing to a video clip with sound track, extract and process frame by frame and write result to output file. Number of frames, size of frame and speed remains unchanged in output clip. Also I want to keep the same audio track as I have in source.
I can read clip, decode frames and process then using opencv. Audio packets are also writes fine. I'm stuck on forming output video stream.
The minimal runnable code I have for now (sorry it not so short, but cant do it shorter):
extern "C" {
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
#include "libavcodec/avcodec.h"
#include <libavutil/opt.h>
#include <libavdevice/avdevice.h>
#include <libswscale/swscale.h>
}
#include "opencv2/opencv.hpp"
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55,28,1)
#define av_frame_alloc avcodec_alloc_frame
#endif
using namespace std;
using namespace cv;
static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt, const char *tag)
{
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
char buf1[AV_TS_MAX_STRING_SIZE] = { 0 };
av_ts_make_string(buf1, pkt->pts);
char buf2[AV_TS_MAX_STRING_SIZE] = { 0 };
av_ts_make_string(buf1, pkt->dts);
char buf3[AV_TS_MAX_STRING_SIZE] = { 0 };
av_ts_make_string(buf1, pkt->duration);
char buf4[AV_TS_MAX_STRING_SIZE] = { 0 };
av_ts_make_time_string(buf1, pkt->pts, time_base);
char buf5[AV_TS_MAX_STRING_SIZE] = { 0 };
av_ts_make_time_string(buf1, pkt->dts, time_base);
char buf6[AV_TS_MAX_STRING_SIZE] = { 0 };
av_ts_make_time_string(buf1, pkt->duration, time_base);
printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
buf1, buf4,
buf2, buf5,
buf3, buf6,
pkt->stream_index);
}
int main(int argc, char **argv)
{
AVOutputFormat *ofmt = NULL;
AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL;
AVPacket pkt;
AVFrame *pFrame = NULL;
AVFrame *pFrameRGB = NULL;
int frameFinished = 0;
pFrame = av_frame_alloc();
pFrameRGB = av_frame_alloc();
const char *in_filename, *out_filename;
int ret, i;
in_filename = "../../TestClips/Audio Video Sync Test.mp4";
out_filename = "out.mp4";
// Initialize FFMPEG
av_register_all();
// Get input file format context
if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0)
{
fprintf(stderr, "Could not open input file '%s'", in_filename);
goto end;
}
// Extract streams description
if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0)
{
fprintf(stderr, "Failed to retrieve input stream information");
goto end;
}
// Print detailed information about the input or output format,
// such as duration, bitrate, streams, container, programs, metadata, side data, codec and time base.
av_dump_format(ifmt_ctx, 0, in_filename, 0);
// Allocate an AVFormatContext for an output format.
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename);
if (!ofmt_ctx)
{
fprintf(stderr, "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
// The output container format.
ofmt = ofmt_ctx->oformat;
// Allocating output streams
for (i = 0; i < ifmt_ctx->nb_streams; i++)
{
AVStream *in_stream = ifmt_ctx->streams[i];
AVStream *out_stream = avformat_new_stream(ofmt_ctx, in_stream->codec->codec);
if (!out_stream)
{
fprintf(stderr, "Failed allocating output stream\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ret = avcodec_copy_context(out_stream->codec, in_stream->codec);
if (ret < 0)
{
fprintf(stderr, "Failed to copy context from input to output stream codec context\n");
goto end;
}
out_stream->codec->codec_tag = 0;
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
{
out_stream->codec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
}
// Show output format info
av_dump_format(ofmt_ctx, 0, out_filename, 1);
// Open output file
if (!(ofmt->flags & AVFMT_NOFILE))
{
ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
if (ret < 0)
{
fprintf(stderr, "Could not open output file '%s'", out_filename);
goto end;
}
}
// Write output file header
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0)
{
fprintf(stderr, "Error occurred when opening output file\n");
goto end;
}
// Search for input video codec info
AVCodec *in_codec = nullptr;
AVCodecContext* avctx = nullptr;
int video_stream_index = -1;
for (int i = 0; i < ifmt_ctx->nb_streams; i++)
{
if (ifmt_ctx->streams[i]->codec->coder_type == AVMEDIA_TYPE_VIDEO)
{
video_stream_index = i;
avctx = ifmt_ctx->streams[i]->codec;
in_codec = avcodec_find_decoder(avctx->codec_id);
if (!in_codec)
{
fprintf(stderr, "in codec not found\n");
exit(1);
}
break;
}
}
// Search for output video codec info
AVCodec *out_codec = nullptr;
AVCodecContext* o_avctx = nullptr;
int o_video_stream_index = -1;
for (int i = 0; i < ofmt_ctx->nb_streams; i++)
{
if (ofmt_ctx->streams[i]->codec->coder_type == AVMEDIA_TYPE_VIDEO)
{
o_video_stream_index = i;
o_avctx = ofmt_ctx->streams[i]->codec;
out_codec = avcodec_find_encoder(o_avctx->codec_id);
if (!out_codec)
{
fprintf(stderr, "out codec not found\n");
exit(1);
}
break;
}
}
// openCV pixel format
AVPixelFormat pFormat = AV_PIX_FMT_RGB24;
// Data size
int numBytes = avpicture_get_size(pFormat, avctx->width, avctx->height);
// allocate buffer
uint8_t *buffer = (uint8_t *)av_malloc(numBytes * sizeof(uint8_t));
// fill frame structure
avpicture_fill((AVPicture *)pFrameRGB, buffer, pFormat, avctx->width, avctx->height);
// frame area
int y_size = avctx->width * avctx->height;
// Open input codec
avcodec_open2(avctx, in_codec, NULL);
// Main loop
while (1)
{
AVStream *in_stream, *out_stream;
ret = av_read_frame(ifmt_ctx, &pkt);
if (ret < 0)
{
break;
}
in_stream = ifmt_ctx->streams[pkt.stream_index];
out_stream = ofmt_ctx->streams[pkt.stream_index];
log_packet(ifmt_ctx, &pkt, "in");
// copy packet
pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
pkt.pos = -1;
log_packet(ofmt_ctx, &pkt, "out");
if (pkt.stream_index == video_stream_index)
{
avcodec_decode_video2(avctx, pFrame, &frameFinished, &pkt);
if (frameFinished)
{
struct SwsContext *img_convert_ctx;
img_convert_ctx = sws_getCachedContext(NULL,
avctx->width,
avctx->height,
avctx->pix_fmt,
avctx->width,
avctx->height,
AV_PIX_FMT_BGR24,
SWS_BICUBIC,
NULL,
NULL,
NULL);
sws_scale(img_convert_ctx,
((AVPicture*)pFrame)->data,
((AVPicture*)pFrame)->linesize,
0,
avctx->height,
((AVPicture *)pFrameRGB)->data,
((AVPicture *)pFrameRGB)->linesize);
sws_freeContext(img_convert_ctx);
// Do some image processing
cv::Mat img(pFrame->height, pFrame->width, CV_8UC3, pFrameRGB->data[0],false);
cv::GaussianBlur(img,img,Size(5,5),3);
cv::imshow("Display", img);
cv::waitKey(5);
// --------------------------------
// Transform back to initial format
// --------------------------------
img_convert_ctx = sws_getCachedContext(NULL,
avctx->width,
avctx->height,
AV_PIX_FMT_BGR24,
avctx->width,
avctx->height,
avctx->pix_fmt,
SWS_BICUBIC,
NULL,
NULL,
NULL);
sws_scale(img_convert_ctx,
((AVPicture*)pFrameRGB)->data,
((AVPicture*)pFrameRGB)->linesize,
0,
avctx->height,
((AVPicture *)pFrame)->data,
((AVPicture *)pFrame)->linesize);
// --------------------------------------------
// Something must be here
// --------------------------------------------
//
// Write fideo frame (How to write frame to output stream ?)
//
// --------------------------------------------
sws_freeContext(img_convert_ctx);
}
}
else // write sound frame
{
ret = av_interleaved_write_frame(ofmt_ctx, &pkt);
}
if (ret < 0)
{
fprintf(stderr, "Error muxing packet\n");
break;
}
// Decrease packet ref counter
av_packet_unref(&pkt);
}
av_write_trailer(ofmt_ctx);
end:
avformat_close_input(&ifmt_ctx);
// close output
if (ofmt_ctx && !(ofmt->flags & AVFMT_NOFILE))
{
avio_closep(&ofmt_ctx->pb);
}
avformat_free_context(ofmt_ctx);
if (ret < 0 && ret != AVERROR_EOF)
{
char buf_err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
av_make_error_string(buf_err, AV_ERROR_MAX_STRING_SIZE, ret);
fprintf(stderr, "Error occurred: %s\n", buf_err);
return 1;
}
avcodec_close(avctx);
av_free(pFrame);
av_free(pFrameRGB);
return 0;
}
Your original code segfaults in my case. Initializing the output codec context seems to fix it. The code below works for me but I didn't test the OpenCV stuff as I don't have the lib installed.
Get the codec context:
// Search for output video codec info
AVCodec *out_codec = NULL;
AVCodecContext* o_avctx = NULL;
int o_video_stream_index = -1;
for (int i = 0; i < ofmt_ctx->nb_streams; i++)
{
if (ofmt_ctx->streams[i]->codec->coder_type == AVMEDIA_TYPE_VIDEO)
{
o_video_stream_index = i;
out_codec = avcodec_find_encoder(ofmt_ctx->streams[i]->codec->codec_id);
o_avctx = avcodec_alloc_context3(out_codec);
o_avctx->height = avctx->height;
o_avctx->width = avctx->width;
o_avctx->sample_aspect_ratio = avctx->sample_aspect_ratio;
if (out_codec->pix_fmts)
o_avctx->pix_fmt = out_codec->pix_fmts[0];
else
o_avctx->pix_fmt = avctx->pix_fmt;
o_avctx->time_base = avctx->time_base;
avcodec_open2(o_avctx, out_codec, NULL);
}
}
Encode and write:
// Main loop
while (1)
{
...
if (pkt.stream_index == video_stream_index)
{
avcodec_decode_video2(avctx, pFrame, &frameFinished, &pkt);
if (frameFinished)
{
...
// --------------------------------------------
// Something must be here
// --------------------------------------------
int got_packet = 0;
AVPacket enc_pkt = { 0 };
av_init_packet(&enc_pkt);
avcodec_encode_video2(o_avctx, &enc_pkt, pFrame, &got_packet);
av_interleaved_write_frame(ofmt_ctx, &enc_pkt);
....
}
}
you should assign processed frame's packets information to your Original packets then pass it to av_interleaved_write_frame