Related
I'm trying to decode my Hevc file in c++ with using FFmpeg. I used Hevc decoder and try to save the frames in ppm format(Almost the whole source code comes from FFmpeg example [decode_video.c] https://ffmpeg.org/doxygen/trunk/decode_video_8c-example.html, what's new is the conversion from yuv to rgb). My Hevc file has 677 frames, which i checked with ffprobe in command window. But i any got 676 frames with my project. Also i have checked with other Hevc files, the results are same, i got always one frame less.
I also tried another FFmpeg example [demuxing_decoding.c] (https://ffmpeg.org/doxygen/trunk/demuxing_decoding_8c-example.html), the result is same, one frame less...
That seems to just happy with H265 and H264 files, is it a bug of FFmpeg?
Can anybody help me, i post my code here. Sorry, don't know how to attach my project and test files. Thanks a lot!
Best regards,
Ivan
#include <iostream>
extern "C"
{
#include "../Headers/libavcodec/avcodec.h"
#include "../Headers/libavformat/avformat.h"
#include "../Headers/libswscale/swscale.h"
}
#define INBUF_SIZE 4096
//Save RGB image as PPM file format
static void ppm_save(char* filename, AVFrame* frame)
{
FILE* file;
int i;
fopen_s(&file, filename, "wb");
fprintf(file, "P6\n%d %d\n%d\n", frame->width, frame->height, 255);
for (i = 0; i < frame->height; i++)
fwrite(frame->data[0] + i * frame->linesize[0], 1, frame->width * 3, file);
fclose(file);
}
void decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt, const char* outfilePrefix)
{
char buf[1024];
int ret;
ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0) {
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
int sts;
////////////////////////////////////////////////////////////////////////////
//Create SWS Context for converting from decode pixel format (like YUV420) to RGB
struct SwsContext* sws_ctx = NULL;
sws_ctx = sws_getContext(dec_ctx->width,
dec_ctx->height,
dec_ctx->pix_fmt,
dec_ctx->width,
dec_ctx->height,
AV_PIX_FMT_RGB24,
SWS_BICUBIC,
NULL,
NULL,
NULL);
if (sws_ctx == nullptr)
{
return; //Error!
}
//Allocate frame for storing image converted to RGB.
AVFrame* pRGBFrame = av_frame_alloc();
pRGBFrame->format = AV_PIX_FMT_RGB24;
pRGBFrame->width = dec_ctx->width;
pRGBFrame->height = dec_ctx->height;
sts = av_frame_get_buffer(pRGBFrame, 0);
if (sts < 0)
{
goto free;
//return; //Error!
}
while (ret >= 0)
{
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
goto free;
//return;
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
printf("saving frame %3d\n", dec_ctx->frame_number);//
fflush(stdout);
//////////////////////////////////////////////////////////////////////////
//Convert from input format (e.g YUV420) to RGB and save to PPM:
sts = sws_scale(sws_ctx, //struct SwsContext* c,
frame->data, //const uint8_t* const srcSlice[],
frame->linesize, //const int srcStride[],
0, //int srcSliceY,
frame->height, //int srcSliceH,
pRGBFrame->data, //uint8_t* const dst[],
pRGBFrame->linesize); //const int dstStride[]);
snprintf(buf, sizeof(buf), "%s-%d.ppm", outfilePrefix, dec_ctx->frame_number);
ppm_save(buf, pRGBFrame);
}
free:
//Free
////////////////////////////////////////////////////////////////////////////
sws_freeContext(sws_ctx);
av_frame_free(&pRGBFrame);
}
int main()
{
const char* filename, * outfilePrefix, * seqfilename;
const AVCodec* codec;
AVCodecParserContext* parser;
AVCodecContext* codecContext = NULL;
FILE* file;
AVFrame* frame;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t* data;
size_t data_size;
int ret;
AVPacket* pkt;
#ifdef _DEBUG
filename = "D:\\TestFiles\\sample_1280x720.hevc";
outfilePrefix = "D:\\TestFiles\\sample_1280x720_output\\output";
#else
if (argc <= 2) {
fprintf(stderr, "Usage: %s <input file> <output file>\n"
"And check your input file is encoded by mpeg1video please.\n", argv[0]);
exit(0);
}
filename = argv[1];
outfilePrefix = argv[2];
#endif
pkt = av_packet_alloc();
if (!pkt)
exit(1);
/* set end of buffer to 0 (this ensures that no overreading happens for damaged MPEG streams) */
memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
/* find the HEVC video decoder */
codec = avcodec_find_decoder(AV_CODEC_ID_HEVC);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
parser = av_parser_init(codec->id);
if (!parser) {
fprintf(stderr, "parser not found\n");
exit(1);
}
codecContext = avcodec_alloc_context3(codec);
if (!codecContext) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
/* For some codecs, such as msmpeg4 and mpeg4, width and height
MUST be initialized there because this information is not
available in the bitstream. */
/* open it */
if (avcodec_open2(codecContext, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
fopen_s(&file, filename, "rb");
if (!file) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
while (!feof(file)) {
/* read raw data from the input file */
data_size = fread(inbuf, 1, INBUF_SIZE, file);
if (!data_size)
break;
/* use the parser to split the data into frames */
data = inbuf;
while (data_size > 0)
{
ret = av_parser_parse2(parser, codecContext, &pkt->data, &pkt->size,
data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0) {
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (pkt->size)
decode(codecContext, frame, pkt, outfilePrefix);
}
}
/* flush the decoder */
decode(codecContext, frame, NULL, outfilePrefix);
fclose(file);
av_parser_close(parser);
avcodec_free_context(&codecContext);
av_frame_free(&frame);
av_packet_free(&pkt);
}
the problem is that you're not calling av_parser_parse2() with data_size=0 to signal EOF. See the API docs:
buf_size: input length, to signal EOF, this should be 0 (so that the last frame can be output).
Without that call, one frame will be cached in the parser, and that's the one missing in your output.
[edit]
To be clear, I acknowledge that you copied the example code in the API docs correctly:
[..]
while(in_len){
len = av_parser_parse2(myparser, AVCodecContext, &data, &size,
in_data, in_len,
pts, dts, pos);
[..]
However, that code is unfortunately incomplete. If you look at the relevant usage of that code in demux.c, you'll see that explicit flush is required:
[..]
1134 while (size > 0 || (flush && got_output)) {
1135 int64_t next_pts = pkt->pts;
1136 int64_t next_dts = pkt->dts;
1137 int len;
1138
1139 len = av_parser_parse2(sti->parser, sti->avctx,
1140 &out_pkt->data, &out_pkt->size, data, size,
1141 pkt->pts, pkt->dts, pkt->pos);
[..]
I'm trying to direct the output from opengl into a mp4 file.
Currently, I'm getting the error "Invalid input" from the call avcodec_send_frame(c, frame). Why am I getting this error?
class VideoCapture2
{
public:
VideoCapture2(const char *filename, unsigned int width, unsigned int height, int framerate, unsigned int bitrate){
avformat_alloc_output_context2(&avFormatContext, NULL, NULL, filename);
if (!avFormatContext) {
printf("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2(&avFormatContext, NULL, "mpeg", filename);
}
if (!avFormatContext)
exit(1);
avOutputFormat = avFormatContext->oformat;
// Video Stream
/* find the encoder */
AVCodecID codec_id = AV_CODEC_ID_H264;
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
pkt = av_packet_alloc();
if (!pkt) {
fprintf(stderr, "Could not allocate AVPacket\n");
exit(1);
}
avStream = avformat_new_stream(avFormatContext, NULL);
if (!avStream) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
avStream->id = avFormatContext->nb_streams-1;
codec_ctx = avcodec_alloc_context3(codec);
if (!codec_ctx) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
codec_ctx->codec_id = codec_id;
/* put sample parameters */
codec_ctx->bit_rate = bitrate;
/* resolution must be a multiple of two */
if(width % 2 != 0)
throw std::invalid_argument( "The width must be devisible by two" );
if(height % 2 != 0)
throw std::invalid_argument( "The height must be devisible by two" );
codec_ctx->width = width;
codec_ctx->height = height;
/* frames per second */
codec_ctx->framerate = (AVRational){framerate, 1};
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
avStream->time_base = (AVRational){ 1, framerate };
codec_ctx->time_base = avStream->time_base;
codec_ctx->gop_size = 10; /* emit one intra frame every twelve frames at most */
codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
/* Some formats want stream headers to be separate. */
if (avOutputFormat->flags & AVFMT_GLOBALHEADER)
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
frame = alloc_frame(codec_ctx->pix_fmt, codec_ctx->width, codec_ctx->height);
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(avStream->codecpar, codec_ctx);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
// Color format Conversion
sws = sws_getContext( codec_ctx->width
, codec_ctx->height
, AV_PIX_FMT_RGB32
, codec_ctx->width
, codec_ctx->height
, AV_PIX_FMT_YUV420P
, SWS_FAST_BILINEAR // Change this???
, 0, 0, 0);
// Check output file
av_dump_format(avFormatContext, 0, filename, 1);
/* open the output file, if needed */
if (!(avOutputFormat->flags & AVFMT_NOFILE)) {
ret = avio_open(&avFormatContext->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open '%s': %s\n", filename,
av_err2str(ret));
exit(1);
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(avFormatContext, &avDict);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file: %s\n",
av_err2str(ret));
exit(1);
}
}
void addFrame(){
fflush(stdout);
/* Make sure the frame data is writable.
On the first round, the frame is fresh from av_frame_get_buffer()
and therefore we know it is writable.
But on the next rounds, encode() will have called
avcodec_send_frame(), and the codec may have kept a reference to
the frame in its internal structures, that makes the frame
unwritable.
av_frame_make_writable() checks that and allocates a new buffer
for the frame only if necessary.
*/
ret = av_frame_make_writable(frame);
if (ret < 0){
fprintf(stderr, "Could not make the frame writable\n");
exit(1); // Wait... you should throw error instead!
}
size_t nvals = 4 * codec_ctx->width * codec_ctx->height; //GL_BGRA
pixels = (GLubyte *) realloc(pixels, nvals * sizeof(GLubyte)); // I don't think I need to do this every time since the size is constant
glReadPixels(0, 0, codec_ctx->width, codec_ctx->height, GL_BGRA, GL_UNSIGNED_BYTE, pixels);
// CONVERT TO YUV AND ENCODE
ret = av_image_alloc(frame->data, frame->linesize, codec_ctx->width, codec_ctx->height, AV_PIX_FMT_YUV420P, 32);
if (ret < 0){
fprintf(stderr, "Could not allocate the image\n");
exit(1); // Wait... you should throw error instead!
}
// Compensate for OpenGL y-axis pointing upwards and ffmpeg y-axis pointing downwards
uint8_t *in_data[1] = {(uint8_t *) pixels + (codec_ctx->height-1)*codec_ctx->width*4}; // address of the last line
int in_linesize[1] = {- codec_ctx->width * 4}; // negative stride
sws_scale(sws, in_data, in_linesize, 0, codec_ctx->height, frame->data, frame->linesize);
frame->pts = frame_order;
frame_order++;
/* encode the image */
write_frame(avFormatContext, codec_ctx, avStream, frame, pkt);
}
void close()
{
write_frame(avFormatContext, codec_ctx, avStream, NULL, pkt);
av_write_trailer(avFormatContext);
avcodec_free_context(&codec_ctx);
av_frame_free(&frame);
sws_freeContext(sws);
if (!(avFormatContext->oformat->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_closep(&avFormatContext->pb);
avformat_free_context(avFormatContext);
}
private:
AVOutputFormat *avOutputFormat;
AVFormatContext* avFormatContext = NULL;
AVStream* avStream;
AVDictionary *avDict = NULL; // "create" an empty dictionary
GLubyte *pixels = NULL;
struct SwsContext *sws;
const AVCodec *codec;
AVCodecContext *codec_ctx= NULL;
// Should be ref counted??? https://ffmpeg.org/doxygen/3.3/group__lavc__encdec.html
AVFrame *frame;
AVPacket *pkt;
//
int frame_order, ret;
int write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c,
AVStream *st, AVFrame *frame, AVPacket *pkt)
{
int ret;
// ERROR OCCURS HERE
ret = avcodec_send_frame(c, frame);
// ERROR OCCURS HERE
if (ret < 0) {
fprintf(stderr, "Error sending a frame to the encoder: %s\n",
av_err2str(ret));
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_packet(c, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
else if (ret < 0) {
fprintf(stderr, "Error encoding a frame: %s\n", av_err2str(ret));
exit(1);
}
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, c->time_base, st->time_base);
pkt->stream_index = st->index;
/* Write the compressed frame to the media file. */
log_packet(fmt_ctx, pkt);
ret = av_interleaved_write_frame(fmt_ctx, pkt);
/* pkt is now blank (av_interleaved_write_frame() takes ownership of
* its contents and resets pkt), so that no unreferencing is necessary.
* This would be different if one used av_write_frame(). */
if (ret < 0) {
fprintf(stderr, "Error while writing output packet: %s\n", av_err2str(ret));
exit(1);
}
}
return ret == AVERROR_EOF ? 1 : 0;
}
void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
pkt->stream_index);
}
AVFrame *alloc_frame(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *frame;
int ret;
frame = av_frame_alloc();
if (!frame)
return NULL;
frame->format = pix_fmt;
frame->width = width;
frame->height = height;
/* allocate the buffers for the frame data */
ret = av_frame_get_buffer(frame, 0);
if (ret < 0) {
fprintf(stderr, "Could not allocate frame data.\n");
exit(1);
}
return frame;
}
};
The issue turned out to be that I had missed calling avcodec_open2(..) now it works :)
/* open the codec */
AVDictionary *opt = NULL;
av_dict_copy(&opt, avDict, 0);
ret = avcodec_open2(codec_ctx, codec, &opt);
av_dict_free(&opt);
if (ret < 0) {
fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));
exit(1);
}
// Then allocate frame...
frame = alloc_frame(codec_ctx->pix_fmt, codec_ctx->width, codec_ctx->height);
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
For reference, this is the full code (but the video quality is shit so you will have to tune that yourself)
#ifndef VIDEO_CAPTURE2_H
#define VIDEO_CAPTURE2_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../include/glad/glad.h"
#include "finite_math.hpp"
#include <stdexcept>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
#include <libavutil/opt.h>
#include <libavutil/imgutils.h>
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/mathematics.h>
#include <libavutil/timestamp.h>
}
// These exist to patch three functions for which gcc gets compiler errors
#ifdef av_err2str
#undef av_err2str
#include <string>
av_always_inline std::string av_err2string(int errnum) {
char str[AV_ERROR_MAX_STRING_SIZE];
return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum);
}
#define av_err2str(err) av_err2string(err).c_str()
#endif
#ifdef av_ts2str
#undef av_ts2str
#include <string>
av_always_inline std::string av_ts2string(int ts) {
char str[AV_TS_MAX_STRING_SIZE];
return av_ts_make_string(str, ts);
}
#define av_ts2str(ts) av_ts2string(ts).c_str()
#endif
#ifdef av_ts2timestr
#undef av_ts2timestr
#include <string>
av_always_inline std::string av_ts2timestring(int ts, AVRational *tb) {
char str[AV_TS_MAX_STRING_SIZE];
return av_ts_make_time_string(str, ts, tb);
}
#define av_ts2timestr(ts, tb) av_ts2timestring(ts, tb).c_str()
#endif
class VideoCapture2
{
public:
VideoCapture2(const char *filename, unsigned int width, unsigned int height, int framerate, unsigned int bitrate){
avformat_alloc_output_context2(&avFormatContext, NULL, NULL, filename);
if (!avFormatContext) {
printf("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2(&avFormatContext, NULL, "mpeg", filename);
}
if (!avFormatContext)
exit(1);
avOutputFormat = avFormatContext->oformat;
// Video Stream
/* find the mpeg1video encoder */
/* find the encoder */
AVCodecID codec_id = AV_CODEC_ID_H264;
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
pkt = av_packet_alloc();
if (!pkt) {
fprintf(stderr, "Could not allocate AVPacket\n");
exit(1);
}
avStream = avformat_new_stream(avFormatContext, NULL);
if (!avStream) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
avStream->id = avFormatContext->nb_streams-1;
codec_ctx = avcodec_alloc_context3(codec);
if (!codec_ctx) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
codec_ctx->codec_id = codec_id;
/* put sample parameters */
codec_ctx->bit_rate = bitrate;
/* resolution must be a multiple of two */
if(width % 2 != 0)
throw std::invalid_argument( "The width must be devisible by two" );
if(height % 2 != 0)
throw std::invalid_argument( "The height must be devisible by two" );
codec_ctx->width = width;
codec_ctx->height = height;
/* frames per second */
codec_ctx->framerate = (AVRational){framerate, 1};
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
avStream->time_base = (AVRational){ 1, framerate };
codec_ctx->time_base = avStream->time_base;
codec_ctx->gop_size = 10; /* emit one intra frame every twelve frames at most */
codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
/* Some formats want stream headers to be separate. */
if (avOutputFormat->flags & AVFMT_GLOBALHEADER)
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
/* open the codec */
AVDictionary *opt = NULL;
av_dict_copy(&opt, avDict, 0);
ret = avcodec_open2(codec_ctx, codec, &opt);
av_dict_free(&opt);
if (ret < 0) {
fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));
exit(1);
}
frame = alloc_frame(codec_ctx->pix_fmt, codec_ctx->width, codec_ctx->height);
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(avStream->codecpar, codec_ctx);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
// Color fromat COnversion
sws = sws_getContext( codec_ctx->width
, codec_ctx->height
, AV_PIX_FMT_RGB32
, codec_ctx->width
, codec_ctx->height
, AV_PIX_FMT_YUV420P
, SWS_FAST_BILINEAR // Change this???
, 0, 0, 0);
// Check output file
av_dump_format(avFormatContext, 0, filename, 1);
/* open the output file, if needed */
if (!(avOutputFormat->flags & AVFMT_NOFILE)) {
ret = avio_open(&avFormatContext->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open '%s': %s\n", filename,
av_err2str(ret));
exit(1);
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(avFormatContext, &avDict);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file: %s\n",
av_err2str(ret));
exit(1);
}
}
void addFrame(){
fflush(stdout);
/* Make sure the frame data is writable.
On the first round, the frame is fresh from av_frame_get_buffer()
and therefore we know it is writable.
But on the next rounds, encode() will have called
avcodec_send_frame(), and the codec may have kept a reference to
the frame in its internal structures, that makes the frame
unwritable.
av_frame_make_writable() checks that and allocates a new buffer
for the frame only if necessary.
*/
ret = av_frame_make_writable(frame);
if (ret < 0){
fprintf(stderr, "Could not make the frame writable\n");
exit(1); // Wait... you should throw error instead!
}
size_t nvals = 4 * codec_ctx->width * codec_ctx->height; //GL_BGRA
pixels = (GLubyte *) realloc(pixels, nvals * sizeof(GLubyte)); // I don't think I need to do this every time since the size is constant
glReadPixels(0, 0, codec_ctx->width, codec_ctx->height, GL_BGRA, GL_UNSIGNED_BYTE, pixels);
// CONVERT TO YUV AND ENCODE
ret = av_image_alloc(frame->data, frame->linesize, codec_ctx->width, codec_ctx->height, AV_PIX_FMT_YUV420P, 32);
if (ret < 0){
fprintf(stderr, "Could not allocate the image\n");
exit(1); // Wait... you should throw error instead!
}
// Compensate for OpenGL y-axis pointing upwards and ffmpeg y-axis pointing downwards
uint8_t *in_data[1] = {(uint8_t *) pixels + (codec_ctx->height-1)*codec_ctx->width*4}; // address of the last line
int in_linesize[1] = {- codec_ctx->width * 4}; // negative stride
sws_scale(sws, in_data, in_linesize, 0, codec_ctx->height, frame->data, frame->linesize);
frame->pts = frame_order;
frame_order++;
/* encode the image */
write_frame(avFormatContext, codec_ctx, avStream, frame, pkt);
}
void close()
{
write_frame(avFormatContext, codec_ctx, avStream, NULL, pkt);
av_write_trailer(avFormatContext);
avcodec_free_context(&codec_ctx);
av_frame_free(&frame);
sws_freeContext(sws);
if (!(avFormatContext->oformat->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_closep(&avFormatContext->pb);
avformat_free_context(avFormatContext);
}
private:
AVOutputFormat *avOutputFormat;
AVFormatContext* avFormatContext = NULL;
AVStream* avStream;
AVDictionary *avDict = NULL; // "create" an empty dictionary
GLubyte *pixels = NULL;
struct SwsContext *sws;
const AVCodec *codec;
AVCodecContext *codec_ctx= NULL;
// Should be ref counted??? https://ffmpeg.org/doxygen/3.3/group__lavc__encdec.html
AVFrame *frame;
AVPacket *pkt;
//
int frame_order, ret;
int write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c,
AVStream *st, AVFrame *frame, AVPacket *pkt)
{
int ret;
// send the frame to the encoder
ret = avcodec_send_frame(c, frame);
if (ret < 0) {
fprintf(stderr, "Error sending a frame to the encoder: %s\n",
av_err2str(ret));
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_packet(c, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
else if (ret < 0) {
fprintf(stderr, "Error encoding a frame: %s\n", av_err2str(ret));
exit(1);
}
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, c->time_base, st->time_base);
pkt->stream_index = st->index;
/* Write the compressed frame to the media file. */
log_packet(fmt_ctx, pkt);
ret = av_interleaved_write_frame(fmt_ctx, pkt);
/* pkt is now blank (av_interleaved_write_frame() takes ownership of
* its contents and resets pkt), so that no unreferencing is necessary.
* This would be different if one used av_write_frame(). */
if (ret < 0) {
fprintf(stderr, "Error while writing output packet: %s\n", av_err2str(ret));
exit(1);
}
}
return ret == AVERROR_EOF ? 1 : 0;
}
void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
pkt->stream_index);
}
AVFrame *alloc_frame(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *frame;
int ret;
frame = av_frame_alloc();
if (!frame)
return NULL;
frame->format = pix_fmt;
frame->width = width;
frame->height = height;
/* allocate the buffers for the frame data */
ret = av_frame_get_buffer(frame, 0);
if (ret < 0) {
fprintf(stderr, "Could not allocate frame data.\n");
exit(1);
}
return frame;
}
};
#endif
I am facing issue in opening the raw h264 stream of 8MP resolution over tcp server from Android in Qt Application. To open the stream in ffplay, I give following command in terminal and it is able to play it
ffplay -f h264 -codec:v h264 -probesize 32M <tcp://ipaddress:port>
But when I try to open the stream in Qt Application, avformat_open_input() gives error Invalid data found while processing input.
Below is the code I am using in Qt Application:
av_register_all();
avcodec_register_all();
avformat_network_init();
AVFormatContext *refrenceFormatCtx = NULL;
SwsContext *img_convert_ctx;
AVIOContext *avio_ctx = NULL;
int video_stream_index = 0;
AVCodecContext* codec_ctx = NULL;
AVSampleFormat *fmt = NULL;
char errorsdef[100];
AVDictionary *options = NULL;
av_dict_set(&options, "video_size","3264x2448",0);
av_dict_set(&options,"pixel_format","yuv420p",0);
av_dict_set(&options, "f", "h264", 0);
av_dict_set(&options, "codec:v", "h264", 0);
av_dict_set(&options, "codec:a", "aac", 0);
av_dict_set(&options, "probesize", "32M", 0);
int err = avformat_open_input(&refrenceFormatCtx,"tcp://192.168.42.129:2226", NULL, &options);
av_strerror(err,errorsdef,100);
qDebug() << "OPening Stream error: "<< err << " "<< errorsdef;
if(err<0)
abort();
av_dict_free(&options);
Is the path to set the options in avformat_open_input is correct? Are parameters set by me are correct?
I got the answer for my above asked question. Code for the issue and getting rgb frames from raw H.264 Frame for 8MP resolution is as follows:
avcodec_register_all();
av_register_all();
avformat_network_init();
AVDictionary *options = NULL;
AVFormatContext *refrenceFormatCtx = NULL;
AVInputFormat *fmts = av_find_input_format("h264");
char errorsdef[100];
AVCodecContext* codec_ctx = NULL;
int video_stream_index = 0;
SwsContext *img_convert_ctx = NULL;
AVFrame* picture_yuv = NULL;
AVFrame* picture_rgb = NULL;
uint8_t* picture_buffer_rgb;
uint8_t *rgb_image_data;
int sizeofrgbpicture = 0;
int initialize_rgb_requirements=1;
picture_yuv = av_frame_alloc();
av_dict_set(&options, "flags", "bicubic", 0);
av_opt_set(refrenceFormatCtx,"f","h264", AV_OPT_SEARCH_CHILDREN);
av_opt_set(refrenceFormatCtx,"codec:v","h264",AV_OPT_SEARCH_CHILDREN);
av_opt_set(refrenceFormatCtx,"probesize","32M", AV_OPT_SEARCH_CHILDREN);
// Open video file
int err = avformat_open_input(&refrenceFormatCtx,"tcp://192.168.42.129:2226", fmts, &options);
if (!options) {
int dict_count = av_dict_count(options);
qDebug() << "dict_count " << dict_count;
}
av_strerror(err,errorsdef,100);
qDebug() << "OPening Stream error: "<< err << " "<< errorsdef;
if (refrenceFormatCtx!=NULL){
err = avformat_find_stream_info(refrenceFormatCtx, &options);
if( err< 0){
av_strerror(err,errorsdef,100);
qDebug() << "Not able to find stream: "<< err << " "<< errorsdef;
}
}else{
qDebug() << "referencecontext null";
exit(1);
}
//search video stream
for (int i = 0; i < (int)refrenceFormatCtx->nb_streams; i++) {
AVStream* s = refrenceFormatCtx->streams[i];
if (s->codec == NULL){
continue;
}
codec_ctx = (s->codec);
if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO){
video_stream_index = i;
}
}
AVPacket packet;
av_init_packet(&packet);
//open output file
AVFormatContext* output_ctx = avformat_alloc_context();
AVStream* stream = NULL;
//start reading packets from stream and emit data pointer to slot
av_read_play(refrenceFormatCtx); //play RTSP
avcodec_copy_context(codec_ctx, refrenceFormatCtx->streams[video_stream_index]->codec);
if (avcodec_open2(codec_ctx, avcodec_find_decoder(AV_CODEC_ID_H264), NULL) < 0){
qDebug() << "avcodec_open2 null";
}
while (av_read_frame(refrenceFormatCtx, &packet) >= 0) {
if (packet.stream_index == video_stream_index) { //packet is video
if (stream == NULL) { //create stream in file
stream = avformat_new_stream(output_ctx, refrenceFormatCtx->streams[video_stream_index]->codec->codec);
avcodec_copy_context(stream->codec, refrenceFormatCtx->streams[video_stream_index]->codec);
stream->sample_aspect_ratio = refrenceFormatCtx->streams[video_stream_index]->codec->sample_aspect_ratio;
}
int check = 0;
packet.stream_index = stream->id;
int result = avcodec_decode_video2(codec_ctx, picture_yuv, &check, &packet);
av_free_packet(&packet);
av_packet_unref(&packet);
if(result <= 0 || check == 0){
continue;
}
if(initialize_rgb_requirements)
{
sizeofrgbpicture = avpicture_get_size(AV_PIX_FMT_RGB24, codec_ctx->width, codec_ctx->height);
picture_rgb = av_frame_alloc();
picture_buffer_rgb = (uint8_t*) (av_malloc(sizeofrgbpicture));
avpicture_fill((AVPicture *) picture_rgb, picture_buffer_rgb, AV_PIX_FMT_RGB24, codec_ctx->width, codec_ctx->height);
img_convert_ctx = sws_getContext(codec_ctx->width, codec_ctx->height, AV_PIX_FMT_YUV420P, codec_ctx->width, codec_ctx->height, AV_PIX_FMT_RGB24, SWS_BICUBIC, NULL, NULL, NULL);
initialize_rgb_requirements=0;
}
int height = 0;
if(picture_yuv->data != NULL)
{
height = sws_scale(img_convert_ctx, ((AVPicture*)picture_yuv)->data, ((AVPicture*)picture_yuv)->linesize, 0, codec_ctx->height, ((AVPicture*)picture_rgb)->data,((AVPicture*)picture_rgb)->linesize);
}
rgb_image_data = (uint8_t *)malloc(sizeofrgbpicture * sizeof(uint8_t));
int ret = avpicture_layout((AVPicture *)picture_rgb, AV_PIX_FMT_RGB24, codec_ctx->width, codec_ctx->height, rgb_image_data, sizeofrgbpicture);
emit imageQueued(rgb_image_data, codec_ctx->width,codec_ctx->height);
}
msleep(1);
}
av_freep(picture_buffer_rgb);
av_frame_free(&picture_rgb);
avio_close(output_ctx->pb);
avformat_free_context(output_ctx);
avformat_close_input(&refrenceFormatCtx);
I came to know that for raw H.264 stream we have to tell ffmpeg that the format is h264. For that I have used AVInputFormat, to set other options like video codec and probesize, I have used av_op_set(). To set the default flags in ffmpeg, I have used av_dict_set(). I have emitted the data pointer to my required slot. If any one wants to create a file from it, then it can generate .ppm file by writing this pointer into file.
I have a program which capture video from webcam, encode with ffmpeg, encoded packet then write to buffer. At the receiver side, read from buffer decode with ffmpeg and play.
Now I merge sender and receiver in one program for testing. It works fine with AV_CODEC_ID_MPEG1VIDEO, but when I change the ffmpeg codec to AV_CODEC_ID_H264, at the decoding progress, it shows error:
The whole program is here FYI, I made a loop to let the whole progress run twice.
What is the cause of the error, is there anything special for H264? Thanks in advance!
#include <math.h>
extern "C" {
#include <libavutil/opt.h>
#include <libavcodec/avcodec.h>
#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavutil/imgutils.h>
#include <libavutil/mathematics.h>
#include <libavutil/samplefmt.h>
#include <libswscale/swscale.h>
#include "v4l2.h"
}
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
using namespace cv;
using namespace std;
#define INBUF_SIZE 4096
static uint8_t inbuf[INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
static AVCodec *codec;
static AVCodecContext *c= NULL;
static int ret, got_output;
static int frame_count;
static FILE *f;
static AVPacket pkt;
static AVFrame *frame;
static AVFrame *frameDecode;
static AVFrame *framergb;
static uint8_t endcode[] = { 0, 0, 1, 0xb7 };
static AVPacket avpkt;
int totalSize=0;
#define SUBSITY 3
static int decode_write_frame(AVCodecContext *avctx,
AVFrame *frame, int *frame_count, AVPacket *pkt, int last)
{
int len, got_frame;
char buf[1024];
struct SwsContext *convert_ctx;
Mat m;
AVFrame dst;
len = avcodec_decode_video2(avctx, frame, &got_frame, pkt);
if (len < 0) {
fprintf(stderr, "Error while decoding frame %d\n", *frame_count);
return len;
}
if (got_frame) {
printf("Saving %s frame %3d\n", last ? "last " : "", *frame_count);
fflush(stdout);
int w = avctx->width;
int h = avctx->height;
/*convert AVFrame to opencv Mat frame*/
m = cv::Mat(h, w, CV_8UC3);
dst.data[0] = (uint8_t *)m.data;
avpicture_fill( (AVPicture *)&dst, dst.data[0], PIX_FMT_BGR24, w, h);
enum PixelFormat src_pixfmt = (enum PixelFormat)frame->format;
enum PixelFormat dst_pixfmt = PIX_FMT_BGR24;
convert_ctx = sws_getContext(w, h, src_pixfmt, w, h, dst_pixfmt,
SWS_FAST_BILINEAR, NULL, NULL, NULL);
if(convert_ctx == NULL) {
fprintf(stderr, "Cannot initialize the conversion context!\n");
exit(1);
}
sws_scale(convert_ctx, frame->data, frame->linesize, 0, h,
dst.data, dst.linesize);
imshow("MyVideo", m);
//video.write(m);
waitKey(10); //wait next frame time
(*frame_count)++;
}
if (pkt->data) {
pkt->size -= len;
pkt->data += len;
}
return 0;
}
static void video_decode_example(char *inbufout)
{
int bytes;
uint8_t *buffer;
av_init_packet(&avpkt);
memset(inbuf + INBUF_SIZE, 0, FF_INPUT_BUFFER_PADDING_SIZE);
codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
if(codec->capabilities&CODEC_CAP_TRUNCATED)
c->flags|= CODEC_FLAG_TRUNCATED; /* we do not send complete frames */
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
frameDecode = avcodec_alloc_frame();
if (!frameDecode) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
bytes=avpicture_get_size(PIX_FMT_RGB24, CAMER_WIDTH, CAMER_HEIGHT);
buffer=(uint8_t *)av_malloc(bytes*sizeof(uint8_t));
avpicture_fill((AVPicture *)framergb, buffer, PIX_FMT_RGB24,
CAMER_WIDTH, CAMER_HEIGHT);*/
frame_count = 0;
namedWindow("MyVideo",CV_WINDOW_AUTOSIZE); //create a window called "MyVideo"
int size1=0;
for(;;) {
memcpy(inbuf,inbufout+size1,INBUF_SIZE);
size1+=INBUF_SIZE;
if (size1>(totalSize-INBUF_SIZE))
break;
avpkt.size=INBUF_SIZE;
avpkt.data = inbuf;
/*frame by frame process*/
while (avpkt.size > 0)
if (decode_write_frame(c, frameDecode, &frame_count, &avpkt, 0) < 0)
exit(1);
}
avpkt.data = NULL;
avpkt.size = 0;
decode_write_frame(c, frameDecode, &frame_count, &avpkt, 1);
}
static void init_video_encode(const char *filename, AVCodecID codec_id, int max_f)
{
printf("Encode video file %s\n", filename);
/* find the mpeg1 video encoder */
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
/* put sample parameters */
c->bit_rate = 400000;
/* resolution must be a multiple of two */
c->width = 640;
c->height = 480;
/* frames per second */
c->time_base= (AVRational){1,25};
c->gop_size = 10; /* emit one intra frame every ten frames */
c->max_b_frames=max_f;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if(codec_id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
frame = avcodec_alloc_frame();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
ret = av_image_alloc(frame->data, frame->linesize, c->width, c->height,
c->pix_fmt, 32);
/* get the delayed frames */
if (ret < 0) {
fprintf(stderr, "Could not allocate raw picture buffer\n");
exit(1);
}
printf("\n");
}
int video_encode(int frameNo,char *inbufout)
{
static int count = 0;
static int i = 0;
/* encode 1 frame of video */
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 0;
//cout<<"\nBefore YUV\n";
if(count == 0)
read_yuv420(frame->data[0]);
count ++;
if(count == SUBSITY) {
count = 0;
}
frame->pts = i++;
/* encode the image */
ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
return -1;
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", i, pkt.size);
memcpy(inbufout+totalSize,pkt.data,pkt.size);
totalSize+=pkt.size;
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
return 0;
}
void cancle_encode(void)
{
fclose(f);
avcodec_close(c);
av_free(c);
av_freep(&frame->data[0]);
avcodec_free_frame(&frame);
}
int main(int argc, char **argv)
{
int i;
char inbufout[25*50*(INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE)];
if(init_v4l2() < 0) {
printf("can't open camera\n");
return 0;
}
/* register all the codecs */
avcodec_register_all();
for(int j=0;j<2;j++){
//init_video_encode("test.mpg", AV_CODEC_ID_MPEG1VIDEO, 15);
init_video_encode("test.mpg", AV_CODEC_ID_H264, 15);
//for(i = 0;i< 10*15;i++ ) {
for(i = 0;i< 25*10;i++ ) {
if(video_encode(i,inbufout) < 0)
return 0;
}
cout<<"\n"<<totalSize<<"\n"<<endl;
video_decode_example(inbufout);
cancle_encode();
totalSize=0;
}
exit_v4l2();
return 0;
}
You need to include a parser. The ffmpeg mpeg1/2 decoders happen to work fine without a parser, but h264/mpeg4/vp9 need a parser, or you'll get errors like the above.
Note that if you use libavformat for demuxing and call avformat_read_frame(), it will automatically parse for you, but since you're doing buffer management yourself, you need to include the parser yourself also.
I am currently work on a project which decode the received frame using ffmepg, after decode, I want to convert the AVFrame to opencv Mat frame so that I can play it on the imShow function.
What I have is the byte stream, I read it into buffer, decoded to AVFrame:
f = fopen(filename, "rb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = avcodec_alloc_frame();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
framergb = avcodec_alloc_frame();
if (!framergb) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
bytes=avpicture_get_size(PIX_FMT_RGB24, CAMER_WIDTH, CAMER_HEIGHT);
buffer=(uint8_t *)av_malloc(bytes*sizeof(uint8_t));
avpicture_fill((AVPicture *)framergb, buffer, PIX_FMT_RGB24,
CAMER_WIDTH, CAMER_HEIGHT);
frame_count = 0;
for(;;) {
avpkt.size = fread(inbuf, 1, INBUF_SIZE, f);
if (avpkt.size == 0)
break;
avpkt.data = inbuf;
while (avpkt.size > 0)
if (decode_write_frame(outfilename, c, frame, &frame_count, &avpkt, 0) < 0)
exit(1);
}
avpkt.data = NULL;
avpkt.size = 0;
decode_write_frame(outfilename, c, frame, &frame_count, &avpkt, 1);
and the decode_write_frame defined like this:
static int decode_write_frame(const char *outfilename, AVCodecContext *avctx,AVFrame *frame, int *frame_count, AVPacket *pkt, int last)
{
int len, got_frame;
char buf[1024];
struct SwsContext *convert_ctx;
len = avcodec_decode_video2(avctx, frame, &got_frame, pkt);
if (len < 0) {
fprintf(stderr, "Error while decoding frame %d\n", *frame_count);
return len;
}
if (got_frame) {
printf("Saving %sframe %3d\n", last ? "last " : "", *frame_count);
fflush(stdout);
int w = avctx->width;
int h = avctx->height;
convert_ctx = sws_getContext(w, h, avctx->pix_fmt,
w, h, PIX_FMT_RGB24, SWS_BICUBIC,
NULL, NULL, NULL);
if(convert_ctx == NULL) {
fprintf(stderr, "Cannot initialize the conversion context!\n");
exit(1);
}
sws_scale(convert_ctx, frame->data,
frame->linesize, 0,
h,
framergb->data, framergb->linesize);
/* the picture is allocated by the decoder, no need to free it */
snprintf(buf, sizeof(buf), outfilename, *frame_count);
bmp_save(framergb->data[0], framergb->linesize[0],
avctx->width, avctx->height, buf);
(*frame_count)++;
}
if (pkt->data) {
pkt->size -= len;
pkt->data += len;
}
return 0;
}
here the bmp_save() is defined by the original code author to realise AVFrame to bmp picture conversion. I want to modify here so that let the AVFrame convert to opencv Mat frame. How should I do this conversion?
Thanks in advance.
Using the appropriate Mat constructor, replace the bmp_save line by:
Mat mat(avctx->height, avctx->width, CV_8UC3, framergb->data[0], framergb->linesize[0]);
imshow("frame", mat);
waitKey(10);
Also replace the PIX_FMT_RGB24 flag in sws_getContext by PIX_FMT_BGR24, because OpenCV use BGR format internally.
Thank you for your answer, I also solved by this way:
say AVFrame *frame is the original ffmepg frame ready to be convert,
Mat m;
AVFrame dst;
int w = frame->width;
int h = frame->height;
m = cv::Mat(h, w, CV_8UC3);
dst.data[0] = (uint8_t *)m.data;
avpicture_fill( (AVPicture *)&dst, dst.data[0], PIX_FMT_BGR24, w, h);
enum PixelFormat src_pixfmt = (enum PixelFormat)frame->format;
enum PixelFormat dst_pixfmt = PIX_FMT_BGR24;
convert_ctx = sws_getContext(w, h, src_pixfmt, w, h, dst_pixfmt,
SWS_FAST_BILINEAR, NULL, NULL, NULL);
if(convert_ctx == NULL) {
fprintf(stderr, "Cannot initialize the conversion context!\n");
exit(1);
}
sws_scale(convert_ctx, frame->data, frame->linesize, 0, h,
dst.data, dst.linesize);
imshow("MyVideo", m);
waitKey(30);
Worked nicely!