Related
I'm trying to decode my Hevc file in c++ with using FFmpeg. I used Hevc decoder and try to save the frames in ppm format(Almost the whole source code comes from FFmpeg example [decode_video.c] https://ffmpeg.org/doxygen/trunk/decode_video_8c-example.html, what's new is the conversion from yuv to rgb). My Hevc file has 677 frames, which i checked with ffprobe in command window. But i any got 676 frames with my project. Also i have checked with other Hevc files, the results are same, i got always one frame less.
I also tried another FFmpeg example [demuxing_decoding.c] (https://ffmpeg.org/doxygen/trunk/demuxing_decoding_8c-example.html), the result is same, one frame less...
That seems to just happy with H265 and H264 files, is it a bug of FFmpeg?
Can anybody help me, i post my code here. Sorry, don't know how to attach my project and test files. Thanks a lot!
Best regards,
Ivan
#include <iostream>
extern "C"
{
#include "../Headers/libavcodec/avcodec.h"
#include "../Headers/libavformat/avformat.h"
#include "../Headers/libswscale/swscale.h"
}
#define INBUF_SIZE 4096
//Save RGB image as PPM file format
static void ppm_save(char* filename, AVFrame* frame)
{
FILE* file;
int i;
fopen_s(&file, filename, "wb");
fprintf(file, "P6\n%d %d\n%d\n", frame->width, frame->height, 255);
for (i = 0; i < frame->height; i++)
fwrite(frame->data[0] + i * frame->linesize[0], 1, frame->width * 3, file);
fclose(file);
}
void decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt, const char* outfilePrefix)
{
char buf[1024];
int ret;
ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0) {
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
int sts;
////////////////////////////////////////////////////////////////////////////
//Create SWS Context for converting from decode pixel format (like YUV420) to RGB
struct SwsContext* sws_ctx = NULL;
sws_ctx = sws_getContext(dec_ctx->width,
dec_ctx->height,
dec_ctx->pix_fmt,
dec_ctx->width,
dec_ctx->height,
AV_PIX_FMT_RGB24,
SWS_BICUBIC,
NULL,
NULL,
NULL);
if (sws_ctx == nullptr)
{
return; //Error!
}
//Allocate frame for storing image converted to RGB.
AVFrame* pRGBFrame = av_frame_alloc();
pRGBFrame->format = AV_PIX_FMT_RGB24;
pRGBFrame->width = dec_ctx->width;
pRGBFrame->height = dec_ctx->height;
sts = av_frame_get_buffer(pRGBFrame, 0);
if (sts < 0)
{
goto free;
//return; //Error!
}
while (ret >= 0)
{
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
goto free;
//return;
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
printf("saving frame %3d\n", dec_ctx->frame_number);//
fflush(stdout);
//////////////////////////////////////////////////////////////////////////
//Convert from input format (e.g YUV420) to RGB and save to PPM:
sts = sws_scale(sws_ctx, //struct SwsContext* c,
frame->data, //const uint8_t* const srcSlice[],
frame->linesize, //const int srcStride[],
0, //int srcSliceY,
frame->height, //int srcSliceH,
pRGBFrame->data, //uint8_t* const dst[],
pRGBFrame->linesize); //const int dstStride[]);
snprintf(buf, sizeof(buf), "%s-%d.ppm", outfilePrefix, dec_ctx->frame_number);
ppm_save(buf, pRGBFrame);
}
free:
//Free
////////////////////////////////////////////////////////////////////////////
sws_freeContext(sws_ctx);
av_frame_free(&pRGBFrame);
}
int main()
{
const char* filename, * outfilePrefix, * seqfilename;
const AVCodec* codec;
AVCodecParserContext* parser;
AVCodecContext* codecContext = NULL;
FILE* file;
AVFrame* frame;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t* data;
size_t data_size;
int ret;
AVPacket* pkt;
#ifdef _DEBUG
filename = "D:\\TestFiles\\sample_1280x720.hevc";
outfilePrefix = "D:\\TestFiles\\sample_1280x720_output\\output";
#else
if (argc <= 2) {
fprintf(stderr, "Usage: %s <input file> <output file>\n"
"And check your input file is encoded by mpeg1video please.\n", argv[0]);
exit(0);
}
filename = argv[1];
outfilePrefix = argv[2];
#endif
pkt = av_packet_alloc();
if (!pkt)
exit(1);
/* set end of buffer to 0 (this ensures that no overreading happens for damaged MPEG streams) */
memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
/* find the HEVC video decoder */
codec = avcodec_find_decoder(AV_CODEC_ID_HEVC);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
parser = av_parser_init(codec->id);
if (!parser) {
fprintf(stderr, "parser not found\n");
exit(1);
}
codecContext = avcodec_alloc_context3(codec);
if (!codecContext) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
/* For some codecs, such as msmpeg4 and mpeg4, width and height
MUST be initialized there because this information is not
available in the bitstream. */
/* open it */
if (avcodec_open2(codecContext, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
fopen_s(&file, filename, "rb");
if (!file) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
while (!feof(file)) {
/* read raw data from the input file */
data_size = fread(inbuf, 1, INBUF_SIZE, file);
if (!data_size)
break;
/* use the parser to split the data into frames */
data = inbuf;
while (data_size > 0)
{
ret = av_parser_parse2(parser, codecContext, &pkt->data, &pkt->size,
data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0) {
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (pkt->size)
decode(codecContext, frame, pkt, outfilePrefix);
}
}
/* flush the decoder */
decode(codecContext, frame, NULL, outfilePrefix);
fclose(file);
av_parser_close(parser);
avcodec_free_context(&codecContext);
av_frame_free(&frame);
av_packet_free(&pkt);
}
the problem is that you're not calling av_parser_parse2() with data_size=0 to signal EOF. See the API docs:
buf_size: input length, to signal EOF, this should be 0 (so that the last frame can be output).
Without that call, one frame will be cached in the parser, and that's the one missing in your output.
[edit]
To be clear, I acknowledge that you copied the example code in the API docs correctly:
[..]
while(in_len){
len = av_parser_parse2(myparser, AVCodecContext, &data, &size,
in_data, in_len,
pts, dts, pos);
[..]
However, that code is unfortunately incomplete. If you look at the relevant usage of that code in demux.c, you'll see that explicit flush is required:
[..]
1134 while (size > 0 || (flush && got_output)) {
1135 int64_t next_pts = pkt->pts;
1136 int64_t next_dts = pkt->dts;
1137 int len;
1138
1139 len = av_parser_parse2(sti->parser, sti->avctx,
1140 &out_pkt->data, &out_pkt->size, data, size,
1141 pkt->pts, pkt->dts, pkt->pos);
[..]
In my project, i'd like to save one of the frames from Hevc file. I'm using FFmpeg in source code to decode the Hevc file and get AVFrame and AVCodecContext.
What i need is to save the frame as picture(with full colors).
I have tried to save it as *.pgm file, so the picture is just grey, which not really i need.
Any suggesstion? Thanks!
void HevcDecoder::Images_Save(char* filename, AVFrame *frame)
{
FILE* file;
int i;
fopen_s(&file, filename, "wb");
fprintf(file, "P5\n%d %d\n%d\n", frame->width, frame->height, 255);
for (i = 0; i < frame->height; i++)
fwrite(frame->data[0] + i * frame->linesize[0], 1, frame->width, file);
fclose(file);
}
void HevcDecoder::Decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt, const char* filename)
{
char buf[1024];
int ret;
ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0) {
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
printf("saving frame %3d\n", dec_ctx->frame_number);
fflush(stdout);
/* the picture is allocated by the decoder. no need to
free it */
snprintf(buf, sizeof(buf), "%s-%d.pgm", filename, dec_ctx->frame_number);
Images_Save(buf, frame/*, dec_ctx*/);
}
}
Converting raw HEVC file to sequence of images image using FFmpeg CLI, is simple.
Assume input.265 is the input file (raw HEVC video stream):
Converting to PNG images:
ffmpeg -i input.265 %05d.png
Converting to PPM images:
ffmpeg -i input.265 %05d.ppm
In case the input video uses MP4 container and you want JPEG images:
ffmpeg -i input.265 %05d.jpg
Using FFmpeg C interface (Libav):
For making things reproducible, start by creating an input video file using FFmpeg CLI:
ffmpeg -y -f lavfi -i testsrc=size=192x108:rate=1:duration=10 -vcodec libx265 -pix_fmt yuv420p input.265
The above command creates HEVC (H.265) encoded stream - 10 frames with resolution 192x108 and pixel format YUV420 (synthetic pattern).
The encoded stream is raw video stream (without container).
Note:
RAW HEVC (H.265) video stream is not commonly used file format.
Usually the stream is wrapped by container (like MP4 / MKV / AVI...).
We use the raw video stream for educational purposes - the code used for decoding is simpler.
Saving the images as color images:
The code sample reuses the code from the this post.
PGM is a grayscale format, for equivalent color format we may use PPM format.
We may use SWS Scale to convert the format from YUV420 to RGB.
We can use the code sample from this post
Here is the code sample:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
}
#define INBUF_SIZE 1024
//static void pgm_save(unsigned char* buf, int wrap, int xsize, int ysize, char* filename)
//{
// FILE* f;
// int i;
//
// f = fopen(filename, "wb");
// fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
// for (i = 0; i < ysize; i++)
// fwrite(buf + i * wrap, 1, xsize, f);
// fclose(f);
//}
//Save RGB image as PPM file format
static void ppm_save(unsigned char* buf, int wrap, int xsize, int ysize, char* filename)
{
FILE* f;
int i;
f = fopen(filename, "wb");
fprintf(f, "P6\n%d %d\n%d\n", xsize, ysize, 255);
for (i = 0; i < ysize; i++)
{
fwrite(buf + i * wrap, 1, xsize*3, f);
}
fclose(f);
}
static void decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt, const char* filename)
{
struct SwsContext* sws_ctx = NULL;
char buf[1024];
int ret;
int sts;
ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0)
{
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
//Create SWS Context for converting from decode pixel format (like YUV420) to RGB
////////////////////////////////////////////////////////////////////////////
sws_ctx = sws_getContext(dec_ctx->width,
dec_ctx->height,
dec_ctx->pix_fmt,
dec_ctx->width,
dec_ctx->height,
AV_PIX_FMT_RGB24,
SWS_BICUBIC,
NULL,
NULL,
NULL);
if (sws_ctx == nullptr)
{
return; //Error!
}
////////////////////////////////////////////////////////////////////////////
//Allocate frame for storing image converted to RGB.
////////////////////////////////////////////////////////////////////////////
AVFrame* pRGBFrame = av_frame_alloc();
pRGBFrame->format = AV_PIX_FMT_RGB24;
pRGBFrame->width = dec_ctx->width;
pRGBFrame->height = dec_ctx->height;
sts = av_frame_get_buffer(pRGBFrame, 0);
if (sts < 0)
{
return; //Error!
}
////////////////////////////////////////////////////////////////////////////
while (ret >= 0)
{
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
{
return;
}
else if (ret < 0)
{
fprintf(stderr, "Error during decoding\n");
exit(1);
}
printf("saving frame %3d\n", dec_ctx->frame_number);
fflush(stdout);
/* the picture is allocated by the decoder. no need to
free it */
//snprintf(buf, sizeof(buf), "%s_%03d.pgm", filename, dec_ctx->frame_number);
//pgm_save(frame->data[0], frame->linesize[0],
// frame->width, frame->height, buf);
//Convert from input format (e.g YUV420) to RGB and save to PPM:
////////////////////////////////////////////////////////////////////////////
sts = sws_scale(sws_ctx, //struct SwsContext* c,
frame->data, //const uint8_t* const srcSlice[],
frame->linesize, //const int srcStride[],
0, //int srcSliceY,
frame->height, //int srcSliceH,
pRGBFrame->data, //uint8_t* const dst[],
pRGBFrame->linesize); //const int dstStride[]);
if (sts != frame->height)
{
return; //Error!
}
snprintf(buf, sizeof(buf), "%s_%03d.ppm", filename, dec_ctx->frame_number);
ppm_save(pRGBFrame->data[0], pRGBFrame->linesize[0], pRGBFrame->width, pRGBFrame->height, buf);
////////////////////////////////////////////////////////////////////////////
}
//Free
sws_freeContext(sws_ctx);
av_frame_free(&pRGBFrame);
}
int main(int argc, char** argv)
{
const char* filename, * outfilename;
const AVCodec* codec;
AVCodecParserContext* parser;
AVCodecContext* c = NULL;
FILE* f;
AVFrame* frame;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t* data;
size_t data_size;
int ret;
AVPacket* pkt;
filename = argv[1];
outfilename = argv[2];
pkt = av_packet_alloc();
if (!pkt)
{
exit(1);
}
//memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
memset(inbuf, 0, sizeof(inbuf));
codec = avcodec_find_decoder(AV_CODEC_ID_HEVC);
if (!codec)
{
fprintf(stderr, "Codec not found\n");
exit(1);
}
parser = av_parser_init(codec->id);
if (!parser)
{
fprintf(stderr, "parser not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c)
{
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
if (avcodec_open2(c, codec, NULL) < 0)
{
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "rb");
if (!f)
{
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame)
{
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
while (!feof(f))
{
/* read raw data from the input file */
data_size = fread(inbuf, 1, INBUF_SIZE, f);
if (!data_size)
{
break;
}
/* use the parser to split the data into frames */
data = inbuf;
while (data_size > 0)
{
ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size, data, (int)data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0)
{
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (pkt->data)
{
printf("NICE\n");
decode(c, frame, pkt, outfilename);
}
}
}
/* flush the decoder */
decode(c, frame, NULL, outfilename);
fclose(f);
av_parser_close(parser);
avcodec_free_context(&c);
av_frame_free(&frame);
av_packet_free(&pkt);
return 0;
}
Showing images using OpenCV:
One of the simplest ways to show an image is using OpenCV library.
Setting up a project that uses both FFmpeg and OpenCV for the first time may be challenging.
We need the image to be in BGR format.
For showing the image, use: cv::imshow followed by cv::waitKey.
Code sample:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//Use OpenCV for showing the inage
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
}
#define INBUF_SIZE 1024
//static void pgm_save(unsigned char* buf, int wrap, int xsize, int ysize, char* filename)
//{
// FILE* f;
// int i;
//
// f = fopen(filename, "wb");
// fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
// for (i = 0; i < ysize; i++)
// fwrite(buf + i * wrap, 1, xsize, f);
// fclose(f);
//}
//Save RGB image as PPM file format
//static void ppm_save(unsigned char* buf, int wrap, int xsize, int ysize, char* filename)
//{
// FILE* f;
// int i;
//
// f = fopen(filename, "wb");
// fprintf(f, "P6\n%d %d\n%d\n", xsize, ysize, 255);
//
// for (i = 0; i < ysize; i++)
// {
// fwrite(buf + i * wrap, 1, xsize*3, f);
// }
//
// fclose(f);
//}
static void decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt, const char* filename)
{
struct SwsContext* sws_ctx = NULL;
char filename_buf[1024];
int ret;
int sts;
ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0)
{
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
//Create SWS Context for converting from decode pixel format (like YUV420) to BGR
////////////////////////////////////////////////////////////////////////////
sws_ctx = sws_getContext(dec_ctx->width,
dec_ctx->height,
dec_ctx->pix_fmt,
dec_ctx->width,
dec_ctx->height,
AV_PIX_FMT_BGR24, //For OpenCV, we want BGR pixel format.
SWS_BICUBIC,
NULL,
NULL,
NULL);
if (sws_ctx == nullptr)
{
return; //Error!
}
////////////////////////////////////////////////////////////////////////////
//Allocate frame for storing image converted to RGB.
////////////////////////////////////////////////////////////////////////////
AVFrame* pBGRFrame = av_frame_alloc();
pBGRFrame->format = AV_PIX_FMT_BGR24;
pBGRFrame->width = dec_ctx->width;
pBGRFrame->height = dec_ctx->height;
sts = av_frame_get_buffer(pBGRFrame, 0);
if (sts < 0)
{
return; //Error!
}
////////////////////////////////////////////////////////////////////////////
while (ret >= 0)
{
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
{
return;
}
else if (ret < 0)
{
fprintf(stderr, "Error during decoding\n");
exit(1);
}
printf("saving frame %3d\n", dec_ctx->frame_number);
fflush(stdout);
/* the picture is allocated by the decoder. no need to
free it */
//snprintf(buf, sizeof(buf), "%s_%03d.pgm", filename, dec_ctx->frame_number);
//pgm_save(frame->data[0], frame->linesize[0],
// frame->width, frame->height, buf);
//Convert from input format (e.g YUV420) to BGR:
////////////////////////////////////////////////////////////////////////////
sts = sws_scale(sws_ctx, //struct SwsContext* c,
frame->data, //const uint8_t* const srcSlice[],
frame->linesize, //const int srcStride[],
0, //int srcSliceY,
frame->height, //int srcSliceH,
pBGRFrame->data, //uint8_t* const dst[],
pBGRFrame->linesize); //const int dstStride[]);
if (sts != frame->height)
{
return; //Error!
}
snprintf(filename_buf, sizeof(filename_buf), "%s_%03d.jpg", filename, dec_ctx->frame_number);
//ppm_save(pBGRFrame->data[0], pBGRFrame->linesize[0], pBGRFrame->width, pBGRFrame->height, buf);
////////////////////////////////////////////////////////////////////////////
//Use OpenCV for showing the image (and save the image in JPEG format):
////////////////////////////////////////////////////////////////////////////
cv::Mat img = cv::Mat(pBGRFrame->height, pBGRFrame->width, CV_8UC3, pBGRFrame->data[0], pBGRFrame->linesize[0]); //cv::Mat is OpenCV "thin image wrapper".
cv::imshow("img", img);
cv::waitKey(100); //Wait 100msec (relativly long time - for testing).
//Save the inage in JPEG format using OpenCV
cv::imwrite(filename_buf, img);
////////////////////////////////////////////////////////////////////////////
}
//Free
sws_freeContext(sws_ctx);
av_frame_free(&pBGRFrame);
}
int main(int argc, char** argv)
{
const char* filename, * outfilename;
const AVCodec* codec;
AVCodecParserContext* parser;
AVCodecContext* c = NULL;
FILE* f;
AVFrame* frame;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t* data;
size_t data_size;
int ret;
AVPacket* pkt;
filename = argv[1];
outfilename = argv[2];
pkt = av_packet_alloc();
if (!pkt)
{
exit(1);
}
//memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
memset(inbuf, 0, sizeof(inbuf));
codec = avcodec_find_decoder(AV_CODEC_ID_HEVC);
if (!codec)
{
fprintf(stderr, "Codec not found\n");
exit(1);
}
parser = av_parser_init(codec->id);
if (!parser)
{
fprintf(stderr, "parser not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c)
{
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
if (avcodec_open2(c, codec, NULL) < 0)
{
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "rb");
if (!f)
{
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame)
{
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
while (!feof(f))
{
/* read raw data from the input file */
data_size = fread(inbuf, 1, INBUF_SIZE, f);
if (!data_size)
{
break;
}
/* use the parser to split the data into frames */
data = inbuf;
while (data_size > 0)
{
ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size, data, (int)data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0)
{
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (pkt->data)
{
printf("NICE\n");
decode(c, frame, pkt, outfilename);
}
}
}
/* flush the decoder */
decode(c, frame, NULL, outfilename);
fclose(f);
av_parser_close(parser);
avcodec_free_context(&c);
av_frame_free(&frame);
av_packet_free(&pkt);
return 0;
}
Sample output:
output_001.jpg:
output_002.jpg:
output_003.jpg:
I'm trying to direct the output from opengl into a mp4 file.
Currently, I'm getting the error "Invalid input" from the call avcodec_send_frame(c, frame). Why am I getting this error?
class VideoCapture2
{
public:
VideoCapture2(const char *filename, unsigned int width, unsigned int height, int framerate, unsigned int bitrate){
avformat_alloc_output_context2(&avFormatContext, NULL, NULL, filename);
if (!avFormatContext) {
printf("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2(&avFormatContext, NULL, "mpeg", filename);
}
if (!avFormatContext)
exit(1);
avOutputFormat = avFormatContext->oformat;
// Video Stream
/* find the encoder */
AVCodecID codec_id = AV_CODEC_ID_H264;
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
pkt = av_packet_alloc();
if (!pkt) {
fprintf(stderr, "Could not allocate AVPacket\n");
exit(1);
}
avStream = avformat_new_stream(avFormatContext, NULL);
if (!avStream) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
avStream->id = avFormatContext->nb_streams-1;
codec_ctx = avcodec_alloc_context3(codec);
if (!codec_ctx) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
codec_ctx->codec_id = codec_id;
/* put sample parameters */
codec_ctx->bit_rate = bitrate;
/* resolution must be a multiple of two */
if(width % 2 != 0)
throw std::invalid_argument( "The width must be devisible by two" );
if(height % 2 != 0)
throw std::invalid_argument( "The height must be devisible by two" );
codec_ctx->width = width;
codec_ctx->height = height;
/* frames per second */
codec_ctx->framerate = (AVRational){framerate, 1};
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
avStream->time_base = (AVRational){ 1, framerate };
codec_ctx->time_base = avStream->time_base;
codec_ctx->gop_size = 10; /* emit one intra frame every twelve frames at most */
codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
/* Some formats want stream headers to be separate. */
if (avOutputFormat->flags & AVFMT_GLOBALHEADER)
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
frame = alloc_frame(codec_ctx->pix_fmt, codec_ctx->width, codec_ctx->height);
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(avStream->codecpar, codec_ctx);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
// Color format Conversion
sws = sws_getContext( codec_ctx->width
, codec_ctx->height
, AV_PIX_FMT_RGB32
, codec_ctx->width
, codec_ctx->height
, AV_PIX_FMT_YUV420P
, SWS_FAST_BILINEAR // Change this???
, 0, 0, 0);
// Check output file
av_dump_format(avFormatContext, 0, filename, 1);
/* open the output file, if needed */
if (!(avOutputFormat->flags & AVFMT_NOFILE)) {
ret = avio_open(&avFormatContext->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open '%s': %s\n", filename,
av_err2str(ret));
exit(1);
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(avFormatContext, &avDict);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file: %s\n",
av_err2str(ret));
exit(1);
}
}
void addFrame(){
fflush(stdout);
/* Make sure the frame data is writable.
On the first round, the frame is fresh from av_frame_get_buffer()
and therefore we know it is writable.
But on the next rounds, encode() will have called
avcodec_send_frame(), and the codec may have kept a reference to
the frame in its internal structures, that makes the frame
unwritable.
av_frame_make_writable() checks that and allocates a new buffer
for the frame only if necessary.
*/
ret = av_frame_make_writable(frame);
if (ret < 0){
fprintf(stderr, "Could not make the frame writable\n");
exit(1); // Wait... you should throw error instead!
}
size_t nvals = 4 * codec_ctx->width * codec_ctx->height; //GL_BGRA
pixels = (GLubyte *) realloc(pixels, nvals * sizeof(GLubyte)); // I don't think I need to do this every time since the size is constant
glReadPixels(0, 0, codec_ctx->width, codec_ctx->height, GL_BGRA, GL_UNSIGNED_BYTE, pixels);
// CONVERT TO YUV AND ENCODE
ret = av_image_alloc(frame->data, frame->linesize, codec_ctx->width, codec_ctx->height, AV_PIX_FMT_YUV420P, 32);
if (ret < 0){
fprintf(stderr, "Could not allocate the image\n");
exit(1); // Wait... you should throw error instead!
}
// Compensate for OpenGL y-axis pointing upwards and ffmpeg y-axis pointing downwards
uint8_t *in_data[1] = {(uint8_t *) pixels + (codec_ctx->height-1)*codec_ctx->width*4}; // address of the last line
int in_linesize[1] = {- codec_ctx->width * 4}; // negative stride
sws_scale(sws, in_data, in_linesize, 0, codec_ctx->height, frame->data, frame->linesize);
frame->pts = frame_order;
frame_order++;
/* encode the image */
write_frame(avFormatContext, codec_ctx, avStream, frame, pkt);
}
void close()
{
write_frame(avFormatContext, codec_ctx, avStream, NULL, pkt);
av_write_trailer(avFormatContext);
avcodec_free_context(&codec_ctx);
av_frame_free(&frame);
sws_freeContext(sws);
if (!(avFormatContext->oformat->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_closep(&avFormatContext->pb);
avformat_free_context(avFormatContext);
}
private:
AVOutputFormat *avOutputFormat;
AVFormatContext* avFormatContext = NULL;
AVStream* avStream;
AVDictionary *avDict = NULL; // "create" an empty dictionary
GLubyte *pixels = NULL;
struct SwsContext *sws;
const AVCodec *codec;
AVCodecContext *codec_ctx= NULL;
// Should be ref counted??? https://ffmpeg.org/doxygen/3.3/group__lavc__encdec.html
AVFrame *frame;
AVPacket *pkt;
//
int frame_order, ret;
int write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c,
AVStream *st, AVFrame *frame, AVPacket *pkt)
{
int ret;
// ERROR OCCURS HERE
ret = avcodec_send_frame(c, frame);
// ERROR OCCURS HERE
if (ret < 0) {
fprintf(stderr, "Error sending a frame to the encoder: %s\n",
av_err2str(ret));
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_packet(c, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
else if (ret < 0) {
fprintf(stderr, "Error encoding a frame: %s\n", av_err2str(ret));
exit(1);
}
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, c->time_base, st->time_base);
pkt->stream_index = st->index;
/* Write the compressed frame to the media file. */
log_packet(fmt_ctx, pkt);
ret = av_interleaved_write_frame(fmt_ctx, pkt);
/* pkt is now blank (av_interleaved_write_frame() takes ownership of
* its contents and resets pkt), so that no unreferencing is necessary.
* This would be different if one used av_write_frame(). */
if (ret < 0) {
fprintf(stderr, "Error while writing output packet: %s\n", av_err2str(ret));
exit(1);
}
}
return ret == AVERROR_EOF ? 1 : 0;
}
void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
pkt->stream_index);
}
AVFrame *alloc_frame(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *frame;
int ret;
frame = av_frame_alloc();
if (!frame)
return NULL;
frame->format = pix_fmt;
frame->width = width;
frame->height = height;
/* allocate the buffers for the frame data */
ret = av_frame_get_buffer(frame, 0);
if (ret < 0) {
fprintf(stderr, "Could not allocate frame data.\n");
exit(1);
}
return frame;
}
};
The issue turned out to be that I had missed calling avcodec_open2(..) now it works :)
/* open the codec */
AVDictionary *opt = NULL;
av_dict_copy(&opt, avDict, 0);
ret = avcodec_open2(codec_ctx, codec, &opt);
av_dict_free(&opt);
if (ret < 0) {
fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));
exit(1);
}
// Then allocate frame...
frame = alloc_frame(codec_ctx->pix_fmt, codec_ctx->width, codec_ctx->height);
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
For reference, this is the full code (but the video quality is shit so you will have to tune that yourself)
#ifndef VIDEO_CAPTURE2_H
#define VIDEO_CAPTURE2_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../include/glad/glad.h"
#include "finite_math.hpp"
#include <stdexcept>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
#include <libavutil/opt.h>
#include <libavutil/imgutils.h>
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/mathematics.h>
#include <libavutil/timestamp.h>
}
// These exist to patch three functions for which gcc gets compiler errors
#ifdef av_err2str
#undef av_err2str
#include <string>
av_always_inline std::string av_err2string(int errnum) {
char str[AV_ERROR_MAX_STRING_SIZE];
return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum);
}
#define av_err2str(err) av_err2string(err).c_str()
#endif
#ifdef av_ts2str
#undef av_ts2str
#include <string>
av_always_inline std::string av_ts2string(int ts) {
char str[AV_TS_MAX_STRING_SIZE];
return av_ts_make_string(str, ts);
}
#define av_ts2str(ts) av_ts2string(ts).c_str()
#endif
#ifdef av_ts2timestr
#undef av_ts2timestr
#include <string>
av_always_inline std::string av_ts2timestring(int ts, AVRational *tb) {
char str[AV_TS_MAX_STRING_SIZE];
return av_ts_make_time_string(str, ts, tb);
}
#define av_ts2timestr(ts, tb) av_ts2timestring(ts, tb).c_str()
#endif
class VideoCapture2
{
public:
VideoCapture2(const char *filename, unsigned int width, unsigned int height, int framerate, unsigned int bitrate){
avformat_alloc_output_context2(&avFormatContext, NULL, NULL, filename);
if (!avFormatContext) {
printf("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2(&avFormatContext, NULL, "mpeg", filename);
}
if (!avFormatContext)
exit(1);
avOutputFormat = avFormatContext->oformat;
// Video Stream
/* find the mpeg1video encoder */
/* find the encoder */
AVCodecID codec_id = AV_CODEC_ID_H264;
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
pkt = av_packet_alloc();
if (!pkt) {
fprintf(stderr, "Could not allocate AVPacket\n");
exit(1);
}
avStream = avformat_new_stream(avFormatContext, NULL);
if (!avStream) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
avStream->id = avFormatContext->nb_streams-1;
codec_ctx = avcodec_alloc_context3(codec);
if (!codec_ctx) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
codec_ctx->codec_id = codec_id;
/* put sample parameters */
codec_ctx->bit_rate = bitrate;
/* resolution must be a multiple of two */
if(width % 2 != 0)
throw std::invalid_argument( "The width must be devisible by two" );
if(height % 2 != 0)
throw std::invalid_argument( "The height must be devisible by two" );
codec_ctx->width = width;
codec_ctx->height = height;
/* frames per second */
codec_ctx->framerate = (AVRational){framerate, 1};
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
avStream->time_base = (AVRational){ 1, framerate };
codec_ctx->time_base = avStream->time_base;
codec_ctx->gop_size = 10; /* emit one intra frame every twelve frames at most */
codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
/* Some formats want stream headers to be separate. */
if (avOutputFormat->flags & AVFMT_GLOBALHEADER)
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
/* open the codec */
AVDictionary *opt = NULL;
av_dict_copy(&opt, avDict, 0);
ret = avcodec_open2(codec_ctx, codec, &opt);
av_dict_free(&opt);
if (ret < 0) {
fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));
exit(1);
}
frame = alloc_frame(codec_ctx->pix_fmt, codec_ctx->width, codec_ctx->height);
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(avStream->codecpar, codec_ctx);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
// Color fromat COnversion
sws = sws_getContext( codec_ctx->width
, codec_ctx->height
, AV_PIX_FMT_RGB32
, codec_ctx->width
, codec_ctx->height
, AV_PIX_FMT_YUV420P
, SWS_FAST_BILINEAR // Change this???
, 0, 0, 0);
// Check output file
av_dump_format(avFormatContext, 0, filename, 1);
/* open the output file, if needed */
if (!(avOutputFormat->flags & AVFMT_NOFILE)) {
ret = avio_open(&avFormatContext->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open '%s': %s\n", filename,
av_err2str(ret));
exit(1);
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(avFormatContext, &avDict);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file: %s\n",
av_err2str(ret));
exit(1);
}
}
void addFrame(){
fflush(stdout);
/* Make sure the frame data is writable.
On the first round, the frame is fresh from av_frame_get_buffer()
and therefore we know it is writable.
But on the next rounds, encode() will have called
avcodec_send_frame(), and the codec may have kept a reference to
the frame in its internal structures, that makes the frame
unwritable.
av_frame_make_writable() checks that and allocates a new buffer
for the frame only if necessary.
*/
ret = av_frame_make_writable(frame);
if (ret < 0){
fprintf(stderr, "Could not make the frame writable\n");
exit(1); // Wait... you should throw error instead!
}
size_t nvals = 4 * codec_ctx->width * codec_ctx->height; //GL_BGRA
pixels = (GLubyte *) realloc(pixels, nvals * sizeof(GLubyte)); // I don't think I need to do this every time since the size is constant
glReadPixels(0, 0, codec_ctx->width, codec_ctx->height, GL_BGRA, GL_UNSIGNED_BYTE, pixels);
// CONVERT TO YUV AND ENCODE
ret = av_image_alloc(frame->data, frame->linesize, codec_ctx->width, codec_ctx->height, AV_PIX_FMT_YUV420P, 32);
if (ret < 0){
fprintf(stderr, "Could not allocate the image\n");
exit(1); // Wait... you should throw error instead!
}
// Compensate for OpenGL y-axis pointing upwards and ffmpeg y-axis pointing downwards
uint8_t *in_data[1] = {(uint8_t *) pixels + (codec_ctx->height-1)*codec_ctx->width*4}; // address of the last line
int in_linesize[1] = {- codec_ctx->width * 4}; // negative stride
sws_scale(sws, in_data, in_linesize, 0, codec_ctx->height, frame->data, frame->linesize);
frame->pts = frame_order;
frame_order++;
/* encode the image */
write_frame(avFormatContext, codec_ctx, avStream, frame, pkt);
}
void close()
{
write_frame(avFormatContext, codec_ctx, avStream, NULL, pkt);
av_write_trailer(avFormatContext);
avcodec_free_context(&codec_ctx);
av_frame_free(&frame);
sws_freeContext(sws);
if (!(avFormatContext->oformat->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_closep(&avFormatContext->pb);
avformat_free_context(avFormatContext);
}
private:
AVOutputFormat *avOutputFormat;
AVFormatContext* avFormatContext = NULL;
AVStream* avStream;
AVDictionary *avDict = NULL; // "create" an empty dictionary
GLubyte *pixels = NULL;
struct SwsContext *sws;
const AVCodec *codec;
AVCodecContext *codec_ctx= NULL;
// Should be ref counted??? https://ffmpeg.org/doxygen/3.3/group__lavc__encdec.html
AVFrame *frame;
AVPacket *pkt;
//
int frame_order, ret;
int write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c,
AVStream *st, AVFrame *frame, AVPacket *pkt)
{
int ret;
// send the frame to the encoder
ret = avcodec_send_frame(c, frame);
if (ret < 0) {
fprintf(stderr, "Error sending a frame to the encoder: %s\n",
av_err2str(ret));
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_packet(c, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
else if (ret < 0) {
fprintf(stderr, "Error encoding a frame: %s\n", av_err2str(ret));
exit(1);
}
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, c->time_base, st->time_base);
pkt->stream_index = st->index;
/* Write the compressed frame to the media file. */
log_packet(fmt_ctx, pkt);
ret = av_interleaved_write_frame(fmt_ctx, pkt);
/* pkt is now blank (av_interleaved_write_frame() takes ownership of
* its contents and resets pkt), so that no unreferencing is necessary.
* This would be different if one used av_write_frame(). */
if (ret < 0) {
fprintf(stderr, "Error while writing output packet: %s\n", av_err2str(ret));
exit(1);
}
}
return ret == AVERROR_EOF ? 1 : 0;
}
void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
pkt->stream_index);
}
AVFrame *alloc_frame(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *frame;
int ret;
frame = av_frame_alloc();
if (!frame)
return NULL;
frame->format = pix_fmt;
frame->width = width;
frame->height = height;
/* allocate the buffers for the frame data */
ret = av_frame_get_buffer(frame, 0);
if (ret < 0) {
fprintf(stderr, "Could not allocate frame data.\n");
exit(1);
}
return frame;
}
};
#endif
I am trying to write c++ code on how to extract audio from mp4 format file. I have compiled the examples in ffmpeg library and tried to run the demuxing_decoding.c file. The problem is that on running the code, it starts to decode way more than the actual file size and decodes wrong codec format (the decoded files cant be run).
here is the demuxing_decoding.c that I am trying to run:
#include <math.h>
#include <libavutil/opt.h>
#include <libavcodec/avcodec.h>
#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavutil/imgutils.h>
#include <libavutil/mathematics.h>
#include <libavutil/samplefmt.h>
#define INBUF_SIZE 4096
#define AUDIO_INBUF_SIZE 20480
#define AUDIO_REFILL_THRESH 4096
/* check that a given sample format is supported by the encoder */
static int check_sample_fmt(AVCodec *codec, enum AVSampleFormat sample_fmt)
{
const enum AVSampleFormat *p = codec->sample_fmts;
while (*p != AV_SAMPLE_FMT_NONE) {
if (*p == sample_fmt)
return 1;
p++;
}
return 0;
}
/* just pick the highest supported samplerate */
static int select_sample_rate(AVCodec *codec)
{
const int *p;
int best_samplerate = 0;
if (!codec->supported_samplerates)
return 44100;
p = codec->supported_samplerates;
while (*p) {
best_samplerate = FFMAX(*p, best_samplerate);
p++;
}
return best_samplerate;
}
/* select layout with the highest channel count */
static int select_channel_layout(AVCodec *codec)
{
const uint64_t *p;
uint64_t best_ch_layout = 0;
int best_nb_channels = 0;
if (!codec->channel_layouts)
return AV_CH_LAYOUT_STEREO;
p = codec->channel_layouts;
while (*p) {
int nb_channels = av_get_channel_layout_nb_channels(*p);
if (nb_channels > best_nb_channels) {
best_ch_layout = *p;
best_nb_channels = nb_channels;
}
p++;
}
return best_ch_layout;
}
/*
* Audio encoding example
*/
static void audio_encode_example(const char *filename)
{
AVCodec *codec;
AVCodecContext *c= NULL;
AVFrame *frame;
AVPacket pkt;
int i, j, k, ret, got_output;
int buffer_size;
FILE *f;
uint16_t *samples;
float t, tincr;
printf("Encode audio file %s\n", filename);
/* find the MP2 encoder */
codec = avcodec_find_encoder(AV_CODEC_ID_MP2);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate audio codec context\n");
exit(1);
}
/* put sample parameters */
c->bit_rate = 64000;
/* check that the encoder supports s16 pcm input */
c->sample_fmt = AV_SAMPLE_FMT_S16;
if (!check_sample_fmt(codec, c->sample_fmt)) {
fprintf(stderr, "Encoder does not support sample format %s",
av_get_sample_fmt_name(c->sample_fmt));
exit(1);
}
/* select other audio parameters supported by the encoder */
c->sample_rate = select_sample_rate(codec);
c->channel_layout = select_channel_layout(codec);
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
/* frame containing input raw audio */
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate audio frame\n");
exit(1);
}
frame->nb_samples = c->frame_size;
frame->format = c->sample_fmt;
frame->channel_layout = c->channel_layout;
/* the codec gives us the frame size, in samples,
* we calculate the size of the samples buffer in bytes */
buffer_size = av_samples_get_buffer_size(NULL, c->channels, c->frame_size,
c->sample_fmt, 0);
if (buffer_size < 0) {
fprintf(stderr, "Could not get sample buffer size\n");
exit(1);
}
samples = av_malloc(buffer_size);
if (!samples) {
fprintf(stderr, "Could not allocate %d bytes for samples buffer\n",
buffer_size);
exit(1);
}
/* setup the data pointers in the AVFrame */
ret = avcodec_fill_audio_frame(frame, c->channels, c->sample_fmt,
(const uint8_t*)samples, buffer_size, 0);
if (ret < 0) {
fprintf(stderr, "Could not setup audio frame\n");
exit(1);
}
/* encode a single tone sound */
t = 0;
tincr = 2 * M_PI * 440.0 / c->sample_rate;
for (i = 0; i < 200; i++) {
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 0;
for (j = 0; j < c->frame_size; j++) {
samples[2*j] = (int)(sin(t) * 10000);
for (k = 1; k < c->channels; k++)
samples[2*j + k] = samples[2*j];
t += tincr;
}
/* encode the samples */
ret = avcodec_encode_audio2(c, &pkt, frame, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding audio frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
}
/* get the delayed frames */
for (got_output = 1; got_output; i++) {
ret = avcodec_encode_audio2(c, &pkt, NULL, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
}
fclose(f);
av_freep(&samples);
av_frame_free(&frame);
avcodec_close(c);
av_free(c);
}
/*
* Audio decoding.
*/
static void audio_decode_example(const char *outfilename, const char *filename)
{
AVCodec *codec;
AVCodecContext *c= NULL;
int len;
FILE *f, *outfile;
uint8_t inbuf[AUDIO_INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
AVPacket avpkt;
AVFrame *decoded_frame = NULL;
av_init_packet(&avpkt);
printf("Decode audio file %s to %s\n", filename, outfilename);
/* find the mpeg audio decoder */
codec = avcodec_find_decoder(AV_CODEC_ID_MP2);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate audio codec context\n");
exit(1);
}
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "rb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
outfile = fopen(outfilename, "wb");
if (!outfile) {
av_free(c);
exit(1);
}
/* decode until eof */
avpkt.data = inbuf;
avpkt.size = fread(inbuf, 1, AUDIO_INBUF_SIZE, f);
while (avpkt.size > 0) {
int i, ch;
int got_frame = 0;
if (!decoded_frame) {
if (!(decoded_frame = av_frame_alloc())) {
fprintf(stderr, "Could not allocate audio frame\n");
exit(1);
}
}
len = avcodec_decode_audio4(c, decoded_frame, &got_frame, &avpkt);
if (len < 0) {
fprintf(stderr, "Error while decoding\n");
exit(1);
}
if (got_frame) {
/* if a frame has been decoded, output it */
int data_size = av_get_bytes_per_sample(c->sample_fmt);
if (data_size < 0) {
/* This should not occur, checking just for paranoia */
fprintf(stderr, "Failed to calculate data size\n");
exit(1);
}
for (i=0; i<decoded_frame->nb_samples; i++)
for (ch=0; ch<c->channels; ch++)
fwrite(decoded_frame->data[ch] + data_size*i, 1, data_size, outfile);
}
avpkt.size -= len;
avpkt.data += len;
avpkt.dts =
avpkt.pts = AV_NOPTS_VALUE;
if (avpkt.size < AUDIO_REFILL_THRESH) {
/* Refill the input buffer, to avoid trying to decode
* incomplete frames. Instead of this, one could also use
* a parser, or use a proper container format through
* libavformat. */
memmove(inbuf, avpkt.data, avpkt.size);
avpkt.data = inbuf;
len = fread(avpkt.data + avpkt.size, 1,
AUDIO_INBUF_SIZE - avpkt.size, f);
if (len > 0)
avpkt.size += len;
}
}
fclose(outfile);
fclose(f);
avcodec_close(c);
av_free(c);
av_frame_free(&decoded_frame);
}
/*
* Video encoding example
*/
static void video_encode_example(const char *filename, int codec_id)
{
AVCodec *codec;
AVCodecContext *c= NULL;
int i, ret, x, y, got_output;
FILE *f;
AVFrame *frame;
AVPacket pkt;
uint8_t endcode[] = { 0, 0, 1, 0xb7 };
printf("Encode video file %s\n", filename);
/* find the mpeg1 video encoder */
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
/* put sample parameters */
c->bit_rate = 400000;
/* resolution must be a multiple of two */
c->width = 352;
c->height = 288;
/* frames per second */
c->time_base = (AVRational){1,25};
/* emit one intra frame every ten frames
* check frame pict_type before passing frame
* to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
* then gop_size is ignored and the output of encoder
* will always be I frame irrespective to gop_size
*/
c->gop_size = 10;
c->max_b_frames = 1;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if (codec_id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
/* the image can be allocated by any means and av_image_alloc() is
* just the most convenient way if av_malloc() is to be used */
ret = av_image_alloc(frame->data, frame->linesize, c->width, c->height,
c->pix_fmt, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate raw picture buffer\n");
exit(1);
}
/* encode 1 second of video */
for (i = 0; i < 25; i++) {
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 0;
fflush(stdout);
/* prepare a dummy image */
/* Y */
for (y = 0; y < c->height; y++) {
for (x = 0; x < c->width; x++) {
frame->data[0][y * frame->linesize[0] + x] = x + y + i * 3;
}
}
/* Cb and Cr */
for (y = 0; y < c->height/2; y++) {
for (x = 0; x < c->width/2; x++) {
frame->data[1][y * frame->linesize[1] + x] = 128 + y + i * 2;
frame->data[2][y * frame->linesize[2] + x] = 64 + x + i * 5;
}
}
frame->pts = i;
/* encode the image */
ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", i, pkt.size);
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
}
/* get the delayed frames */
for (got_output = 1; got_output; i++) {
fflush(stdout);
ret = avcodec_encode_video2(c, &pkt, NULL, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", i, pkt.size);
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
}
/* add sequence end code to have a real mpeg file */
fwrite(endcode, 1, sizeof(endcode), f);
fclose(f);
avcodec_close(c);
av_free(c);
av_freep(&frame->data[0]);
av_frame_free(&frame);
printf("\n");
}
/*
* Video decoding example
*/
static void pgm_save(unsigned char *buf, int wrap, int xsize, int ysize,
char *filename)
{
FILE *f;
int i;
f = fopen(filename,"w");
fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
for (i = 0; i < ysize; i++)
fwrite(buf + i * wrap, 1, xsize, f);
fclose(f);
}
static int decode_write_frame(const char *outfilename, AVCodecContext *avctx,
AVFrame *frame, int *frame_count, AVPacket *pkt, int last)
{
int len, got_frame;
char buf[1024];
len = avcodec_decode_video2(avctx, frame, &got_frame, pkt);
if (len < 0) {
fprintf(stderr, "Error while decoding frame %d\n", *frame_count);
return len;
}
if (got_frame) {
printf("Saving %sframe %3d\n", last ? "last " : "", *frame_count);
fflush(stdout);
/* the picture is allocated by the decoder, no need to free it */
snprintf(buf, sizeof(buf), outfilename, *frame_count);
pgm_save(frame->data[0], frame->linesize[0],
frame->width, frame->height, buf);
(*frame_count)++;
}
if (pkt->data) {
pkt->size -= len;
pkt->data += len;
}
return 0;
}
static void video_decode_example(const char *outfilename, const char *filename)
{
AVCodec *codec;
AVCodecContext *c= NULL;
int frame_count;
FILE *f;
AVFrame *frame;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
AVPacket avpkt;
av_init_packet(&avpkt);
/* set end of buffer to 0 (this ensures that no overreading happens for damaged mpeg streams) */
memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
printf("Decode video file %s to %s\n", filename, outfilename);
/* find the mpeg1 video decoder */
codec = avcodec_find_decoder(AV_CODEC_ID_MPEG1VIDEO);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
if (codec->capabilities & AV_CODEC_CAP_TRUNCATED)
c->flags |= AV_CODEC_FLAG_TRUNCATED; // we do not send complete frames
/* For some codecs, such as msmpeg4 and mpeg4, width and height
MUST be initialized there because this information is not
available in the bitstream. */
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "rb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame_count = 0;
for (;;) {
avpkt.size = fread(inbuf, 1, INBUF_SIZE, f);
if (avpkt.size == 0)
break;
/* NOTE1: some codecs are stream based (mpegvideo, mpegaudio)
and this is the only method to use them because you cannot
know the compressed data size before analysing it.
BUT some other codecs (msmpeg4, mpeg4) are inherently frame
based, so you must call them with all the data for one
frame exactly. You must also initialize 'width' and
'height' before initializing them. */
/* NOTE2: some codecs allow the raw parameters (frame size,
sample rate) to be changed at any frame. We handle this, so
you should also take care of it */
/* here, we use a stream based decoder (mpeg1video), so we
feed decoder and see if it could decode a frame */
avpkt.data = inbuf;
while (avpkt.size > 0)
if (decode_write_frame(outfilename, c, frame, &frame_count, &avpkt, 0) < 0)
exit(1);
}
/* some codecs, such as MPEG, transmit the I and P frame with a
latency of one frame. You must do the following to have a
chance to get the last frame of the video */
avpkt.data = NULL;
avpkt.size = 0;
decode_write_frame(outfilename, c, frame, &frame_count, &avpkt, 1);
fclose(f);
avcodec_close(c);
av_free(c);
av_frame_free(&frame);
printf("\n");
}
int main(int argc, char **argv)
{
const char *output_type;
/* register all the codecs */
avcodec_register_all();
if (argc < 2) {
printf("usage: %s output_type\n"
"API example program to decode/encode a media stream with libavcodec.\n"
"This program generates a synthetic stream and encodes it to a file\n"
"named test.h264, test.mp2 or test.mpg depending on output_type.\n"
"The encoded stream is then decoded and written to a raw data output.\n"
"output_type must be chosen between 'h264', 'mp2', 'mpg'.\n",
argv[0]);
return 1;
}
output_type = argv[1];
if (!strcmp(output_type, "h264")) {
video_encode_example("test.h264", AV_CODEC_ID_H264);
} else if (!strcmp(output_type, "mp2")) {
audio_encode_example("test.mp2");
audio_decode_example("test.pcm", "test.mp2");
} else if (!strcmp(output_type, "mpg")) {
video_encode_example("test.mpg", AV_CODEC_ID_MPEG1VIDEO);
video_decode_example("test%02d.pgm", "test.mpg");
} else {
fprintf(stderr, "Invalid output type '%s', choose between 'h264', 'mp2', or 'mpg'\n",
output_type);
return 1;
}
return 0;
}
What could be wrong?
The command I used is:
./demuxing_decoding /home/cortana/Burn.mp4 /home/cortana/Desktop/Burn.mp4 /home/cortana/Desktop/Burn.aac
I have a program which capture video from webcam, encode with ffmpeg, encoded packet then write to buffer. At the receiver side, read from buffer decode with ffmpeg and play.
Now I merge sender and receiver in one program for testing. It works fine with AV_CODEC_ID_MPEG1VIDEO, but when I change the ffmpeg codec to AV_CODEC_ID_H264, at the decoding progress, it shows error:
The whole program is here FYI, I made a loop to let the whole progress run twice.
What is the cause of the error, is there anything special for H264? Thanks in advance!
#include <math.h>
extern "C" {
#include <libavutil/opt.h>
#include <libavcodec/avcodec.h>
#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavutil/imgutils.h>
#include <libavutil/mathematics.h>
#include <libavutil/samplefmt.h>
#include <libswscale/swscale.h>
#include "v4l2.h"
}
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
using namespace cv;
using namespace std;
#define INBUF_SIZE 4096
static uint8_t inbuf[INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
static AVCodec *codec;
static AVCodecContext *c= NULL;
static int ret, got_output;
static int frame_count;
static FILE *f;
static AVPacket pkt;
static AVFrame *frame;
static AVFrame *frameDecode;
static AVFrame *framergb;
static uint8_t endcode[] = { 0, 0, 1, 0xb7 };
static AVPacket avpkt;
int totalSize=0;
#define SUBSITY 3
static int decode_write_frame(AVCodecContext *avctx,
AVFrame *frame, int *frame_count, AVPacket *pkt, int last)
{
int len, got_frame;
char buf[1024];
struct SwsContext *convert_ctx;
Mat m;
AVFrame dst;
len = avcodec_decode_video2(avctx, frame, &got_frame, pkt);
if (len < 0) {
fprintf(stderr, "Error while decoding frame %d\n", *frame_count);
return len;
}
if (got_frame) {
printf("Saving %s frame %3d\n", last ? "last " : "", *frame_count);
fflush(stdout);
int w = avctx->width;
int h = avctx->height;
/*convert AVFrame to opencv Mat frame*/
m = cv::Mat(h, w, CV_8UC3);
dst.data[0] = (uint8_t *)m.data;
avpicture_fill( (AVPicture *)&dst, dst.data[0], PIX_FMT_BGR24, w, h);
enum PixelFormat src_pixfmt = (enum PixelFormat)frame->format;
enum PixelFormat dst_pixfmt = PIX_FMT_BGR24;
convert_ctx = sws_getContext(w, h, src_pixfmt, w, h, dst_pixfmt,
SWS_FAST_BILINEAR, NULL, NULL, NULL);
if(convert_ctx == NULL) {
fprintf(stderr, "Cannot initialize the conversion context!\n");
exit(1);
}
sws_scale(convert_ctx, frame->data, frame->linesize, 0, h,
dst.data, dst.linesize);
imshow("MyVideo", m);
//video.write(m);
waitKey(10); //wait next frame time
(*frame_count)++;
}
if (pkt->data) {
pkt->size -= len;
pkt->data += len;
}
return 0;
}
static void video_decode_example(char *inbufout)
{
int bytes;
uint8_t *buffer;
av_init_packet(&avpkt);
memset(inbuf + INBUF_SIZE, 0, FF_INPUT_BUFFER_PADDING_SIZE);
codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
if(codec->capabilities&CODEC_CAP_TRUNCATED)
c->flags|= CODEC_FLAG_TRUNCATED; /* we do not send complete frames */
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
frameDecode = avcodec_alloc_frame();
if (!frameDecode) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
bytes=avpicture_get_size(PIX_FMT_RGB24, CAMER_WIDTH, CAMER_HEIGHT);
buffer=(uint8_t *)av_malloc(bytes*sizeof(uint8_t));
avpicture_fill((AVPicture *)framergb, buffer, PIX_FMT_RGB24,
CAMER_WIDTH, CAMER_HEIGHT);*/
frame_count = 0;
namedWindow("MyVideo",CV_WINDOW_AUTOSIZE); //create a window called "MyVideo"
int size1=0;
for(;;) {
memcpy(inbuf,inbufout+size1,INBUF_SIZE);
size1+=INBUF_SIZE;
if (size1>(totalSize-INBUF_SIZE))
break;
avpkt.size=INBUF_SIZE;
avpkt.data = inbuf;
/*frame by frame process*/
while (avpkt.size > 0)
if (decode_write_frame(c, frameDecode, &frame_count, &avpkt, 0) < 0)
exit(1);
}
avpkt.data = NULL;
avpkt.size = 0;
decode_write_frame(c, frameDecode, &frame_count, &avpkt, 1);
}
static void init_video_encode(const char *filename, AVCodecID codec_id, int max_f)
{
printf("Encode video file %s\n", filename);
/* find the mpeg1 video encoder */
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
/* put sample parameters */
c->bit_rate = 400000;
/* resolution must be a multiple of two */
c->width = 640;
c->height = 480;
/* frames per second */
c->time_base= (AVRational){1,25};
c->gop_size = 10; /* emit one intra frame every ten frames */
c->max_b_frames=max_f;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if(codec_id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
frame = avcodec_alloc_frame();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
ret = av_image_alloc(frame->data, frame->linesize, c->width, c->height,
c->pix_fmt, 32);
/* get the delayed frames */
if (ret < 0) {
fprintf(stderr, "Could not allocate raw picture buffer\n");
exit(1);
}
printf("\n");
}
int video_encode(int frameNo,char *inbufout)
{
static int count = 0;
static int i = 0;
/* encode 1 frame of video */
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 0;
//cout<<"\nBefore YUV\n";
if(count == 0)
read_yuv420(frame->data[0]);
count ++;
if(count == SUBSITY) {
count = 0;
}
frame->pts = i++;
/* encode the image */
ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
return -1;
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", i, pkt.size);
memcpy(inbufout+totalSize,pkt.data,pkt.size);
totalSize+=pkt.size;
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
return 0;
}
void cancle_encode(void)
{
fclose(f);
avcodec_close(c);
av_free(c);
av_freep(&frame->data[0]);
avcodec_free_frame(&frame);
}
int main(int argc, char **argv)
{
int i;
char inbufout[25*50*(INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE)];
if(init_v4l2() < 0) {
printf("can't open camera\n");
return 0;
}
/* register all the codecs */
avcodec_register_all();
for(int j=0;j<2;j++){
//init_video_encode("test.mpg", AV_CODEC_ID_MPEG1VIDEO, 15);
init_video_encode("test.mpg", AV_CODEC_ID_H264, 15);
//for(i = 0;i< 10*15;i++ ) {
for(i = 0;i< 25*10;i++ ) {
if(video_encode(i,inbufout) < 0)
return 0;
}
cout<<"\n"<<totalSize<<"\n"<<endl;
video_decode_example(inbufout);
cancle_encode();
totalSize=0;
}
exit_v4l2();
return 0;
}
You need to include a parser. The ffmpeg mpeg1/2 decoders happen to work fine without a parser, but h264/mpeg4/vp9 need a parser, or you'll get errors like the above.
Note that if you use libavformat for demuxing and call avformat_read_frame(), it will automatically parse for you, but since you're doing buffer management yourself, you need to include the parser yourself also.