Decoding m4a and dumping PCM data gives back noise

Decoding m4a and dumping PCM data gives back noise - c++

I'm using the code below (modified from the examples given in libavcodec) to decode audio files
int main(int argc, char **argv)
{
av_register_all();
avcodec_register_all();
char *filename = argv[1];
char *outfilename = argv[2];
FILE *outfile;
AVCodec *codec;
AVCodecContext *c= NULL;
AVPacket avpkt;
AVFrame *frame = av_frame_alloc();
printf("Decode audio file %s to %s\n", filename, outfilename);
outfile = fopen(outfilename, "wb");
if (!outfile) {
fprintf(stderr, "Could not write to %s\n", outfilename);
av_free(c);
exit(1);
}
AVFormatContext *format_context = NULL;
avformat_open_input(&format_context, filename, NULL, NULL);
printf("Opened format input\n");
int find_result = avformat_find_stream_info(format_context, NULL);
if (find_result < 0) {
fprintf(stderr, "Cannot find stream info\n");
avformat_close_input(&format_context);
exit(-1);
}
int audio_stream_idx = av_find_best_stream(format_context, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
if (audio_stream_idx < 0) {
fprintf(stderr,"Couldn't find stream information\n");
exit(-1);
}
// Get a pointer to the codec context for the audio stream
c = format_context->streams[audio_stream_idx]->codec;
av_opt_set_int(c, "refcounted_frames", 1, 0);
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(-1);
}
// read the audio frames
int ret, got_frame;
while (1) {
if ((ret = av_read_frame(format_context, &avpkt)) < 0)
break;
if (avpkt.stream_index == audio_stream_idx) {
avcodec_get_frame_defaults(frame);
got_frame = 0;
ret = avcodec_decode_audio4(c, frame, &got_frame, &avpkt);
if (ret < 0) {
fprintf(stderr, "Error decoding audio\n");
continue;
}
if (got_frame) {
// write to disk
fwrite(frame->extended_data[0], 1, frame->linesize[0], outfile);
}
}
av_free_packet(&avpkt);
}
fclose(outfile);
printf("Finished\n");
if (c)
avcodec_close(c);
avformat_close_input(&format_context);
av_frame_free(&frame);
}
I tried .mp3 and .m4a files; .mp3 files work fine but not for .m4a files. Any help?

Most aac files are in the FLOAT format i.e. AV_SAMPLE_FMT_FLTP, while libraries such as libao are only able to play audio as integer format i.e. AV_SAMPLE_FORMAT_S16.
As a result, you'd need to use libavresample to resample the music and convert the format as appropriate.

Related

How parse and decode H264 file with libav/ffmpeg?

According to official documentations I try decode my test.mp4 with AV_CODEC_ID_H264.
Of course I can do this with av_read_frame(), but how do it with av_parser_parse2()?
The problem occurs at avcodec_send_packet(...) at decode_nal_units(...) at ff_h2645_packet_split(...) [h264dec.c]
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
}
//#define INBUF_SIZE 4096
#define INBUF_SIZE 256000
void decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt, const char* filename);
int main(int argc, char** argv)
{
const char* filename;
const AVCodec* codec;
AVFormatContext* formatCtx = NULL;
AVCodecParserContext* parser;
AVCodecContext* c = NULL;
AVStream* videoStream = NULL;
FILE* f;
AVFrame* frame;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t* data;
size_t data_size;
int ret;
AVPacket* pkt;
filename = "D:\\test.mp4";
//if (avformat_open_input(&formatCtx, filename, nullptr, nullptr) < 0) {
// throw std::exception("Could not open source file");
//}
//if (avformat_find_stream_info(formatCtx, nullptr) < 0) {
// throw std::exception("Could not find stream information");
//}
//videoStream = formatCtx->streams[0];
pkt = av_packet_alloc();
if (!pkt)
exit(1);
/* set end of buffer to 0 (this ensures that no overreading happens for damaged MPEG streams) */
memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
/* find the MPEG-1 video decoder */
//codec = avcodec_find_decoder(AV_CODEC_ID_MPEG1VIDEO);
codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
parser = av_parser_init(codec->id);
if (!parser) {
fprintf(stderr, "parser not found\n");
exit(1);
}
parser->flags = PARSER_FLAG_COMPLETE_FRAMES;
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
/* For some codecs, such as msmpeg4 and mpeg4, width and height
MUST be initialized there because this information is not
available in the bitstream. */
//avcodec_parameters_to_context(c, videoStream->codecpar);
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "rb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
// ---- Use parser to get packets ----
while (!feof(f)) {
/* read raw data from the input file */
data_size = fread(inbuf, 1, INBUF_SIZE, f);
if (!data_size)
break;
/* use the parser to split the data into frames */
data = inbuf;
while (data_size > 0) {
ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size, data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0) {
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (pkt->size)
decode(c, frame, pkt, outfilename);
}
}
// ---- Use FormatContext to get packets ----
// while (av_read_frame(fmt_ctx, pkt) == 0)
// {
// if (pkt->stream_index == AVMEDIA_TYPE_VIDEO) {
// if (pkt->size > 0)
// decode(cdc_ctx, frame, pkt, fp_out);
// }
// }
/* flush the decoder */
decode(c, frame, NULL, outfilename);
fclose(f);
av_parser_close(parser);
avcodec_free_context(&c);
av_frame_free(&frame);
av_packet_free(&pkt);
return 0;
}
void decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt, const char* filename)
{
char buf[1024];
int ret;
ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0) {
char buff[255]{ 0 };
std::string strError = av_make_error_string(buff, 255, ret);
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
printf("saving frame %3d\n", dec_ctx->frame_number);
fflush(stdout);
/* the picture is allocated by the decoder. no need to
free it */
// handle frame ...
}
}

Decoding HEVC file in C++ with FFmpeg missing one frame

I'm trying to decode my Hevc file in c++ with using FFmpeg. I used Hevc decoder and try to save the frames in ppm format(Almost the whole source code comes from FFmpeg example [decode_video.c] https://ffmpeg.org/doxygen/trunk/decode_video_8c-example.html, what's new is the conversion from yuv to rgb). My Hevc file has 677 frames, which i checked with ffprobe in command window. But i any got 676 frames with my project. Also i have checked with other Hevc files, the results are same, i got always one frame less.
I also tried another FFmpeg example [demuxing_decoding.c] (https://ffmpeg.org/doxygen/trunk/demuxing_decoding_8c-example.html), the result is same, one frame less...
That seems to just happy with H265 and H264 files, is it a bug of FFmpeg?
Can anybody help me, i post my code here. Sorry, don't know how to attach my project and test files. Thanks a lot!
Best regards,
Ivan
#include <iostream>
extern "C"
{
#include "../Headers/libavcodec/avcodec.h"
#include "../Headers/libavformat/avformat.h"
#include "../Headers/libswscale/swscale.h"
}
#define INBUF_SIZE 4096
//Save RGB image as PPM file format
static void ppm_save(char* filename, AVFrame* frame)
{
FILE* file;
int i;
fopen_s(&file, filename, "wb");
fprintf(file, "P6\n%d %d\n%d\n", frame->width, frame->height, 255);
for (i = 0; i < frame->height; i++)
fwrite(frame->data[0] + i * frame->linesize[0], 1, frame->width * 3, file);
fclose(file);
}
void decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt, const char* outfilePrefix)
{
char buf[1024];
int ret;
ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0) {
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
int sts;
////////////////////////////////////////////////////////////////////////////
//Create SWS Context for converting from decode pixel format (like YUV420) to RGB
struct SwsContext* sws_ctx = NULL;
sws_ctx = sws_getContext(dec_ctx->width,
dec_ctx->height,
dec_ctx->pix_fmt,
dec_ctx->width,
dec_ctx->height,
AV_PIX_FMT_RGB24,
SWS_BICUBIC,
NULL,
NULL,
NULL);
if (sws_ctx == nullptr)
{
return; //Error!
}
//Allocate frame for storing image converted to RGB.
AVFrame* pRGBFrame = av_frame_alloc();
pRGBFrame->format = AV_PIX_FMT_RGB24;
pRGBFrame->width = dec_ctx->width;
pRGBFrame->height = dec_ctx->height;
sts = av_frame_get_buffer(pRGBFrame, 0);
if (sts < 0)
{
goto free;
//return; //Error!
}
while (ret >= 0)
{
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
goto free;
//return;
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
printf("saving frame %3d\n", dec_ctx->frame_number);//
fflush(stdout);
//////////////////////////////////////////////////////////////////////////
//Convert from input format (e.g YUV420) to RGB and save to PPM:
sts = sws_scale(sws_ctx, //struct SwsContext* c,
frame->data, //const uint8_t* const srcSlice[],
frame->linesize, //const int srcStride[],
0, //int srcSliceY,
frame->height, //int srcSliceH,
pRGBFrame->data, //uint8_t* const dst[],
pRGBFrame->linesize); //const int dstStride[]);
snprintf(buf, sizeof(buf), "%s-%d.ppm", outfilePrefix, dec_ctx->frame_number);
ppm_save(buf, pRGBFrame);
}
free:
//Free
////////////////////////////////////////////////////////////////////////////
sws_freeContext(sws_ctx);
av_frame_free(&pRGBFrame);
}
int main()
{
const char* filename, * outfilePrefix, * seqfilename;
const AVCodec* codec;
AVCodecParserContext* parser;
AVCodecContext* codecContext = NULL;
FILE* file;
AVFrame* frame;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t* data;
size_t data_size;
int ret;
AVPacket* pkt;
#ifdef _DEBUG
filename = "D:\\TestFiles\\sample_1280x720.hevc";
outfilePrefix = "D:\\TestFiles\\sample_1280x720_output\\output";
#else
if (argc <= 2) {
fprintf(stderr, "Usage: %s <input file> <output file>\n"
"And check your input file is encoded by mpeg1video please.\n", argv[0]);
exit(0);
}
filename = argv[1];
outfilePrefix = argv[2];
#endif
pkt = av_packet_alloc();
if (!pkt)
exit(1);
/* set end of buffer to 0 (this ensures that no overreading happens for damaged MPEG streams) */
memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
/* find the HEVC video decoder */
codec = avcodec_find_decoder(AV_CODEC_ID_HEVC);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
parser = av_parser_init(codec->id);
if (!parser) {
fprintf(stderr, "parser not found\n");
exit(1);
}
codecContext = avcodec_alloc_context3(codec);
if (!codecContext) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
/* For some codecs, such as msmpeg4 and mpeg4, width and height
MUST be initialized there because this information is not
available in the bitstream. */
/* open it */
if (avcodec_open2(codecContext, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
fopen_s(&file, filename, "rb");
if (!file) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
while (!feof(file)) {
/* read raw data from the input file */
data_size = fread(inbuf, 1, INBUF_SIZE, file);
if (!data_size)
break;
/* use the parser to split the data into frames */
data = inbuf;
while (data_size > 0)
{
ret = av_parser_parse2(parser, codecContext, &pkt->data, &pkt->size,
data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0) {
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (pkt->size)
decode(codecContext, frame, pkt, outfilePrefix);
}
}
/* flush the decoder */
decode(codecContext, frame, NULL, outfilePrefix);
fclose(file);
av_parser_close(parser);
avcodec_free_context(&codecContext);
av_frame_free(&frame);
av_packet_free(&pkt);
}

the problem is that you're not calling av_parser_parse2() with data_size=0 to signal EOF. See the API docs:
buf_size: input length, to signal EOF, this should be 0 (so that the last frame can be output).
Without that call, one frame will be cached in the parser, and that's the one missing in your output.
[edit]
To be clear, I acknowledge that you copied the example code in the API docs correctly:
[..]
while(in_len){
len = av_parser_parse2(myparser, AVCodecContext, &data, &size,
in_data, in_len,
pts, dts, pos);
[..]
However, that code is unfortunately incomplete. If you look at the relevant usage of that code in demux.c, you'll see that explicit flush is required:
[..]
1134 while (size > 0 || (flush && got_output)) {
1135 int64_t next_pts = pkt->pts;
1136 int64_t next_dts = pkt->dts;
1137 int len;
1138
1139 len = av_parser_parse2(sti->parser, sti->avctx,
1140 &out_pkt->data, &out_pkt->size, data, size,
1141 pkt->pts, pkt->dts, pkt->pos);
[..]

How to save AVFrame as image in C++ using FFmpeg

In my project, i'd like to save one of the frames from Hevc file. I'm using FFmpeg in source code to decode the Hevc file and get AVFrame and AVCodecContext.
What i need is to save the frame as picture(with full colors).
I have tried to save it as *.pgm file, so the picture is just grey, which not really i need.
Any suggesstion? Thanks!
void HevcDecoder::Images_Save(char* filename, AVFrame *frame)
{
FILE* file;
int i;
fopen_s(&file, filename, "wb");
fprintf(file, "P5\n%d %d\n%d\n", frame->width, frame->height, 255);
for (i = 0; i < frame->height; i++)
fwrite(frame->data[0] + i * frame->linesize[0], 1, frame->width, file);
fclose(file);
}
void HevcDecoder::Decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt, const char* filename)
{
char buf[1024];
int ret;
ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0) {
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
printf("saving frame %3d\n", dec_ctx->frame_number);
fflush(stdout);
/* the picture is allocated by the decoder. no need to
free it */
snprintf(buf, sizeof(buf), "%s-%d.pgm", filename, dec_ctx->frame_number);
Images_Save(buf, frame/*, dec_ctx*/);
}
}

Converting raw HEVC file to sequence of images image using FFmpeg CLI, is simple.
Assume input.265 is the input file (raw HEVC video stream):
Converting to PNG images:
ffmpeg -i input.265 %05d.png
Converting to PPM images:
ffmpeg -i input.265 %05d.ppm
In case the input video uses MP4 container and you want JPEG images:
ffmpeg -i input.265 %05d.jpg
Using FFmpeg C interface (Libav):
For making things reproducible, start by creating an input video file using FFmpeg CLI:
ffmpeg -y -f lavfi -i testsrc=size=192x108:rate=1:duration=10 -vcodec libx265 -pix_fmt yuv420p input.265
The above command creates HEVC (H.265) encoded stream - 10 frames with resolution 192x108 and pixel format YUV420 (synthetic pattern).
The encoded stream is raw video stream (without container).
Note:
RAW HEVC (H.265) video stream is not commonly used file format.
Usually the stream is wrapped by container (like MP4 / MKV / AVI...).
We use the raw video stream for educational purposes - the code used for decoding is simpler.
Saving the images as color images:
The code sample reuses the code from the this post.
PGM is a grayscale format, for equivalent color format we may use PPM format.
We may use SWS Scale to convert the format from YUV420 to RGB.
We can use the code sample from this post
Here is the code sample:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
}
#define INBUF_SIZE 1024
//static void pgm_save(unsigned char* buf, int wrap, int xsize, int ysize, char* filename)
//{
// FILE* f;
// int i;
//
// f = fopen(filename, "wb");
// fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
// for (i = 0; i < ysize; i++)
// fwrite(buf + i * wrap, 1, xsize, f);
// fclose(f);
//}
//Save RGB image as PPM file format
static void ppm_save(unsigned char* buf, int wrap, int xsize, int ysize, char* filename)
{
FILE* f;
int i;
f = fopen(filename, "wb");
fprintf(f, "P6\n%d %d\n%d\n", xsize, ysize, 255);
for (i = 0; i < ysize; i++)
{
fwrite(buf + i * wrap, 1, xsize*3, f);
}
fclose(f);
}
static void decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt, const char* filename)
{
struct SwsContext* sws_ctx = NULL;
char buf[1024];
int ret;
int sts;
ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0)
{
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
//Create SWS Context for converting from decode pixel format (like YUV420) to RGB
////////////////////////////////////////////////////////////////////////////
sws_ctx = sws_getContext(dec_ctx->width,
dec_ctx->height,
dec_ctx->pix_fmt,
dec_ctx->width,
dec_ctx->height,
AV_PIX_FMT_RGB24,
SWS_BICUBIC,
NULL,
NULL,
NULL);
if (sws_ctx == nullptr)
{
return; //Error!
}
////////////////////////////////////////////////////////////////////////////
//Allocate frame for storing image converted to RGB.
////////////////////////////////////////////////////////////////////////////
AVFrame* pRGBFrame = av_frame_alloc();
pRGBFrame->format = AV_PIX_FMT_RGB24;
pRGBFrame->width = dec_ctx->width;
pRGBFrame->height = dec_ctx->height;
sts = av_frame_get_buffer(pRGBFrame, 0);
if (sts < 0)
{
return; //Error!
}
////////////////////////////////////////////////////////////////////////////
while (ret >= 0)
{
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
{
return;
}
else if (ret < 0)
{
fprintf(stderr, "Error during decoding\n");
exit(1);
}
printf("saving frame %3d\n", dec_ctx->frame_number);
fflush(stdout);
/* the picture is allocated by the decoder. no need to
free it */
//snprintf(buf, sizeof(buf), "%s_%03d.pgm", filename, dec_ctx->frame_number);
//pgm_save(frame->data[0], frame->linesize[0],
// frame->width, frame->height, buf);
//Convert from input format (e.g YUV420) to RGB and save to PPM:
////////////////////////////////////////////////////////////////////////////
sts = sws_scale(sws_ctx, //struct SwsContext* c,
frame->data, //const uint8_t* const srcSlice[],
frame->linesize, //const int srcStride[],
0, //int srcSliceY,
frame->height, //int srcSliceH,
pRGBFrame->data, //uint8_t* const dst[],
pRGBFrame->linesize); //const int dstStride[]);
if (sts != frame->height)
{
return; //Error!
}
snprintf(buf, sizeof(buf), "%s_%03d.ppm", filename, dec_ctx->frame_number);
ppm_save(pRGBFrame->data[0], pRGBFrame->linesize[0], pRGBFrame->width, pRGBFrame->height, buf);
////////////////////////////////////////////////////////////////////////////
}
//Free
sws_freeContext(sws_ctx);
av_frame_free(&pRGBFrame);
}
int main(int argc, char** argv)
{
const char* filename, * outfilename;
const AVCodec* codec;
AVCodecParserContext* parser;
AVCodecContext* c = NULL;
FILE* f;
AVFrame* frame;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t* data;
size_t data_size;
int ret;
AVPacket* pkt;
filename = argv[1];
outfilename = argv[2];
pkt = av_packet_alloc();
if (!pkt)
{
exit(1);
}
//memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
memset(inbuf, 0, sizeof(inbuf));
codec = avcodec_find_decoder(AV_CODEC_ID_HEVC);
if (!codec)
{
fprintf(stderr, "Codec not found\n");
exit(1);
}
parser = av_parser_init(codec->id);
if (!parser)
{
fprintf(stderr, "parser not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c)
{
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
if (avcodec_open2(c, codec, NULL) < 0)
{
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "rb");
if (!f)
{
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame)
{
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
while (!feof(f))
{
/* read raw data from the input file */
data_size = fread(inbuf, 1, INBUF_SIZE, f);
if (!data_size)
{
break;
}
/* use the parser to split the data into frames */
data = inbuf;
while (data_size > 0)
{
ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size, data, (int)data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0)
{
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (pkt->data)
{
printf("NICE\n");
decode(c, frame, pkt, outfilename);
}
}
}
/* flush the decoder */
decode(c, frame, NULL, outfilename);
fclose(f);
av_parser_close(parser);
avcodec_free_context(&c);
av_frame_free(&frame);
av_packet_free(&pkt);
return 0;
}
Showing images using OpenCV:
One of the simplest ways to show an image is using OpenCV library.
Setting up a project that uses both FFmpeg and OpenCV for the first time may be challenging.
We need the image to be in BGR format.
For showing the image, use: cv::imshow followed by cv::waitKey.
Code sample:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//Use OpenCV for showing the inage
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
}
#define INBUF_SIZE 1024
//static void pgm_save(unsigned char* buf, int wrap, int xsize, int ysize, char* filename)
//{
// FILE* f;
// int i;
//
// f = fopen(filename, "wb");
// fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
// for (i = 0; i < ysize; i++)
// fwrite(buf + i * wrap, 1, xsize, f);
// fclose(f);
//}
//Save RGB image as PPM file format
//static void ppm_save(unsigned char* buf, int wrap, int xsize, int ysize, char* filename)
//{
// FILE* f;
// int i;
//
// f = fopen(filename, "wb");
// fprintf(f, "P6\n%d %d\n%d\n", xsize, ysize, 255);
//
// for (i = 0; i < ysize; i++)
// {
// fwrite(buf + i * wrap, 1, xsize*3, f);
// }
//
// fclose(f);
//}
static void decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt, const char* filename)
{
struct SwsContext* sws_ctx = NULL;
char filename_buf[1024];
int ret;
int sts;
ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0)
{
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
//Create SWS Context for converting from decode pixel format (like YUV420) to BGR
////////////////////////////////////////////////////////////////////////////
sws_ctx = sws_getContext(dec_ctx->width,
dec_ctx->height,
dec_ctx->pix_fmt,
dec_ctx->width,
dec_ctx->height,
AV_PIX_FMT_BGR24, //For OpenCV, we want BGR pixel format.
SWS_BICUBIC,
NULL,
NULL,
NULL);
if (sws_ctx == nullptr)
{
return; //Error!
}
////////////////////////////////////////////////////////////////////////////
//Allocate frame for storing image converted to RGB.
////////////////////////////////////////////////////////////////////////////
AVFrame* pBGRFrame = av_frame_alloc();
pBGRFrame->format = AV_PIX_FMT_BGR24;
pBGRFrame->width = dec_ctx->width;
pBGRFrame->height = dec_ctx->height;
sts = av_frame_get_buffer(pBGRFrame, 0);
if (sts < 0)
{
return; //Error!
}
////////////////////////////////////////////////////////////////////////////
while (ret >= 0)
{
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
{
return;
}
else if (ret < 0)
{
fprintf(stderr, "Error during decoding\n");
exit(1);
}
printf("saving frame %3d\n", dec_ctx->frame_number);
fflush(stdout);
/* the picture is allocated by the decoder. no need to
free it */
//snprintf(buf, sizeof(buf), "%s_%03d.pgm", filename, dec_ctx->frame_number);
//pgm_save(frame->data[0], frame->linesize[0],
// frame->width, frame->height, buf);
//Convert from input format (e.g YUV420) to BGR:
////////////////////////////////////////////////////////////////////////////
sts = sws_scale(sws_ctx, //struct SwsContext* c,
frame->data, //const uint8_t* const srcSlice[],
frame->linesize, //const int srcStride[],
0, //int srcSliceY,
frame->height, //int srcSliceH,
pBGRFrame->data, //uint8_t* const dst[],
pBGRFrame->linesize); //const int dstStride[]);
if (sts != frame->height)
{
return; //Error!
}
snprintf(filename_buf, sizeof(filename_buf), "%s_%03d.jpg", filename, dec_ctx->frame_number);
//ppm_save(pBGRFrame->data[0], pBGRFrame->linesize[0], pBGRFrame->width, pBGRFrame->height, buf);
////////////////////////////////////////////////////////////////////////////
//Use OpenCV for showing the image (and save the image in JPEG format):
////////////////////////////////////////////////////////////////////////////
cv::Mat img = cv::Mat(pBGRFrame->height, pBGRFrame->width, CV_8UC3, pBGRFrame->data[0], pBGRFrame->linesize[0]); //cv::Mat is OpenCV "thin image wrapper".
cv::imshow("img", img);
cv::waitKey(100); //Wait 100msec (relativly long time - for testing).
//Save the inage in JPEG format using OpenCV
cv::imwrite(filename_buf, img);
////////////////////////////////////////////////////////////////////////////
}
//Free
sws_freeContext(sws_ctx);
av_frame_free(&pBGRFrame);
}
int main(int argc, char** argv)
{
const char* filename, * outfilename;
const AVCodec* codec;
AVCodecParserContext* parser;
AVCodecContext* c = NULL;
FILE* f;
AVFrame* frame;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t* data;
size_t data_size;
int ret;
AVPacket* pkt;
filename = argv[1];
outfilename = argv[2];
pkt = av_packet_alloc();
if (!pkt)
{
exit(1);
}
//memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
memset(inbuf, 0, sizeof(inbuf));
codec = avcodec_find_decoder(AV_CODEC_ID_HEVC);
if (!codec)
{
fprintf(stderr, "Codec not found\n");
exit(1);
}
parser = av_parser_init(codec->id);
if (!parser)
{
fprintf(stderr, "parser not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c)
{
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
if (avcodec_open2(c, codec, NULL) < 0)
{
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "rb");
if (!f)
{
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame)
{
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
while (!feof(f))
{
/* read raw data from the input file */
data_size = fread(inbuf, 1, INBUF_SIZE, f);
if (!data_size)
{
break;
}
/* use the parser to split the data into frames */
data = inbuf;
while (data_size > 0)
{
ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size, data, (int)data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0)
{
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (pkt->data)
{
printf("NICE\n");
decode(c, frame, pkt, outfilename);
}
}
}
/* flush the decoder */
decode(c, frame, NULL, outfilename);
fclose(f);
av_parser_close(parser);
avcodec_free_context(&c);
av_frame_free(&frame);
av_packet_free(&pkt);
return 0;
}
Sample output:
output_001.jpg:
output_002.jpg:
output_003.jpg:

Extracting the h264 part of a video file (demuxing)

I am trying to demux a video file into the video part (h264, mpeg4, h265, vp8, etc) and the audio part (mp3, aac, ac3, etc) and the subtitle part (srt) using ffmpeg in c++.
The audio part came out alright and played on all the media players I have, so also did the subtitle part. The video part however came out WITHOUT error and saved into a .h264 file but when I use ffprobe to check it or ffplay to play it, it always give the error "Invalid data found when processing input".
The code below
/* Separate a media file into audio, video and subtitle files (demuxing, complex) */
//TODO: mute error when subtitle is not present
#define __STDC_CONSTANT_MACROS
extern "C"
{
#include "libavformat/avformat.h"
}
int main()
{
//Input AVFormatContext and Output AVFormatContext
AVOutputFormat *ofmt_a = NULL, *ofmt_v = NULL, *ofmt_s = NULL;
AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx_a = NULL, *ofmt_ctx_v = NULL, *ofmt_ctx_s = NULL;
AVPacket pkt;
int ret, i;
int videoindex=-1, audioindex=-1, srtindex=-1;
int frame_index=0;
//Input file URL
const char *in_filename = "sample.mp4";
//Output file URL
const char *out_filename_v = "sample.h264";
const char *out_filename_a = "sample.mp3";
const char *out_filename_s = "sample.srt";
av_register_all();
//Input
if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
printf( "Could not open input file.");
goto end;
}
if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
printf( "Failed to retrieve input stream information");
goto end;
}
//Output
avformat_alloc_output_context2(&ofmt_ctx_v, NULL, NULL, out_filename_v);
if (!ofmt_ctx_v) {
printf( "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ofmt_v = ofmt_ctx_v->oformat;
avformat_alloc_output_context2(&ofmt_ctx_a, NULL, NULL, out_filename_a);
if (!ofmt_ctx_a) {
printf( "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ofmt_a = ofmt_ctx_a->oformat;
avformat_alloc_output_context2(&ofmt_ctx_s, NULL, NULL, out_filename_s);
if (!ofmt_ctx_a) {
printf( "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ofmt_s = ofmt_ctx_s->oformat;
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
//Create output AVStream according to input AVStream
AVFormatContext *ofmt_ctx;
AVStream *in_stream = ifmt_ctx->streams[i];
AVStream *out_stream = NULL;
if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO){
videoindex=i;
out_stream=avformat_new_stream(ofmt_ctx_v, in_stream->codec->codec);
ofmt_ctx=ofmt_ctx_v;
}
else if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO){
audioindex=i;
out_stream=avformat_new_stream(ofmt_ctx_a, in_stream->codec->codec);
ofmt_ctx=ofmt_ctx_a;
}
else if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_SUBTITLE){
srtindex=i;
out_stream=avformat_new_stream(ofmt_ctx_s, in_stream->codec->codec);
ofmt_ctx=ofmt_ctx_s;
}
else{
break;
}
if (!out_stream) {
printf( "Failed allocating output stream\n");
ret = AVERROR_UNKNOWN;
goto end;
}
//Copy the settings of AVCodecContext
if (avcodec_copy_context(out_stream->codec, in_stream->codec) < 0) {
printf( "Failed to copy context from input to output stream codec context\n");
goto end;
}
out_stream->codec->codec_tag = 0;
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
out_stream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
//Dump Format------------------
printf("\n==============Input Video=============\n");
av_dump_format(ifmt_ctx, 0, in_filename, 0);
printf("\n==============Output Video============\n");
av_dump_format(ofmt_ctx_v, 0, out_filename_v, 1);
printf("\n==============Output Audio============\n");
av_dump_format(ofmt_ctx_a, 0, out_filename_a, 1);
/*printf("\n==============Output Subtitle============\n");
av_dump_format(ofmt_ctx_s, 0, out_filename_s, 1);*/
printf("\n======================================\n");
//Open output file
if (!(ofmt_v->flags & AVFMT_NOFILE)) {
if (avio_open(&ofmt_ctx_v->pb, out_filename_v, AVIO_FLAG_WRITE) < 0) {
printf( "Could not open output file '%s'", out_filename_v);
goto end;
}
}
if (!(ofmt_a->flags & AVFMT_NOFILE)) {
if (avio_open(&ofmt_ctx_a->pb, out_filename_a, AVIO_FLAG_WRITE) < 0) {
printf( "Could not open output file '%s'", out_filename_a);
goto end;
}
}
if (!(ofmt_a->flags & AVFMT_NOFILE)) {
if (avio_open(&ofmt_ctx_s->pb, out_filename_s, AVIO_FLAG_WRITE) < 0) {
printf( "Could not open output file '%s'", out_filename_s);
goto end;
}
}
//Write file header
if (avformat_write_header(ofmt_ctx_v, NULL) < 0) {
printf( "Error occurred when opening video output file\n");
goto end;
}
system("pause");
if (avformat_write_header(ofmt_ctx_a, NULL) < 0) {
printf( "Error occurred when opening audio output file\n");
goto end;
}
if (avformat_write_header(ofmt_ctx_s, NULL) < 0) {
printf( "Error occurred when opening audio output file\n");
goto end;
}
AVBitStreamFilterContext* h264bsfc = av_bitstream_filter_init("h264_mp4toannexb");
while (1) {
AVFormatContext *ofmt_ctx;
AVStream *in_stream, *out_stream;
//Get an AVPacket
if (av_read_frame(ifmt_ctx, &pkt) < 0)
break;
in_stream = ifmt_ctx->streams[pkt.stream_index];
if(pkt.stream_index==videoindex){
out_stream = ofmt_ctx_v->streams[0];
ofmt_ctx=ofmt_ctx_v;
printf("Write Video Packet. size:%d\tpts:%lld\n",pkt.size,pkt.pts);
av_bitstream_filter_filter(h264bsfc, in_stream->codec, NULL, &pkt.data, &pkt.size, pkt.data, pkt.size, 0);
}else if(pkt.stream_index==audioindex){
out_stream = ofmt_ctx_a->streams[0];
ofmt_ctx=ofmt_ctx_a;
printf("Write Audio Packet. size:%d\tpts:%lld\n",pkt.size,pkt.pts);
}
else if(pkt.stream_index==srtindex){
out_stream = ofmt_ctx_s->streams[0];
ofmt_ctx=ofmt_ctx_s;
printf("Write Subtitle Packet. size:%d\tpts:%lld\n",pkt.size,pkt.pts);
}
else{
continue;
}
//Convert PTS/DTS
pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
pkt.pos = -1;
pkt.stream_index=0;
//Write
if (av_interleaved_write_frame(ofmt_ctx, &pkt) < 0) {
printf( "Error muxing packet\n");
break;
}
//printf("Write %8d frames to output file\n",frame_index);
av_free_packet(&pkt);
frame_index++;
}
av_bitstream_filter_close(h264bsfc);
//Write file trailer
av_write_trailer(ofmt_ctx_a);
av_write_trailer(ofmt_ctx_v);
av_write_trailer(ofmt_ctx_s);
end:
avformat_close_input(&ifmt_ctx);
/* close output */
if (ofmt_ctx_a && !(ofmt_a->flags & AVFMT_NOFILE))
avio_close(ofmt_ctx_a->pb);
if (ofmt_ctx_v && !(ofmt_v->flags & AVFMT_NOFILE))
avio_close(ofmt_ctx_v->pb);
if (ofmt_ctx_s && !(ofmt_s->flags & AVFMT_NOFILE))
avio_close(ofmt_ctx_s->pb);
avformat_free_context(ofmt_ctx_a);
avformat_free_context(ofmt_ctx_v);
avformat_free_context(ofmt_ctx_s);
system("pause");
if (ret < 0 && ret != AVERROR_EOF) {
printf( "Error occurred.\n");
return -1;
}
return 0;
}
EDIT 1
Screen shot of resultant h264 file
EDIT 2
I think the "error" has to do with FFMPEG's "Using AVStream.codec.time_base as a timebase hint to the muxer is deprecated. Set AVStream.time_base instead" error.
I revert to an older version of FFMPEG and with the same code, the resultant h264 file was ok!

You need to convert an H.264 bitstream from length prefixed mode to start code prefixed mode.This is required by some streaming formats, typically the MPEG-2 transport stream format ("mpegts").
Take a look at https://www.ffmpeg.org/ffmpeg-bitstream-filters.html#h264_005fmp4toannexb
Look at lines from 402 to 424 and from 842 to 843.
https://www.ffmpeg.org/doxygen/0.7/crystalhd_8c-source.html
I used it, to extract h264 from mp4.
//Use this filter on your first h264 input AVPacket
AVFormatContext *ifmt_ctx = NULL;
//...
//... //init input
//...
AVPacket *firstPacket;
//...
//... //get packet from stream
//...
uint8_t *dummy_p;
int dummy_int;
AVBitStreamFilterContext *filter = v_bitstream_filter_init("h264_mp4toannexb");
if (!filter)
{
printf("Can't open filter\n");
exit(1);
}
ret = av_bitstream_filter_filter(filter, ifmt_ctx->streams[videoindex]->codec, NULL,
&dummy_p, &dummy_int,
firstPacket->data, firstPacket->size, 0);
if( ret < 0 )
{
printf("Can't filter\n");
exit(1);
}
// use dummy_p to write to file, as first packet

Wma decoding with ffmpeg

I am new to ffmpeg and I tried using api-example.c to decode wma files. However when I run the program, it gave me an error saying
"frame_len overflow". Does anyone know how to fix this error?
Here is my code:
extern "C" {
#include <avcodec.h>
#include "../libavcodec/avcodec.h"
#include <avformat.h>
}
#include <iostream>
#include <assert.h>
#include <windows.h>
#include <mmsystem.h>
#define INBUF_SIZE 4096
#define AUDIO_INBUF_SIZE 20480
#define AUDIO_REFILL_THRESH 4096
int main(int argc, char *argv[]) {
avcodec_init();
avcodec_register_all();
//avdevice_register_all();
av_register_all();
AVCodec *codec;
AVCodecContext *c= NULL;
AVCodec *ocodec;
AVCodecContext *oc= NULL;
int out_size, len,out_size2;
FILE *f, *outfile;
uint8_t *outbuf;
uint8_t inbuf[AUDIO_INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
AVPacket avpkt;
char* outfilename="test.wma";
char* filename="Beethoven's.wma";
AVFormatContext *pFormatCtx;
WAVEFORMATEX* wfx=new WAVEFORMATEX;
int ret;
ret=av_open_input_file(&pFormatCtx, filename, NULL, 0, NULL);
if(ret!=0)
{
std::cout<<"cannot open file!"<<std::endl;
exit(1);
}
if(av_find_stream_info(pFormatCtx)<0)
{
std::cout<<"cannot find stream!"<<std::endl;
exit(1);
}
int audioStream;
AVCodecContext *pCodecCtx;
// Find the first video stream
audioStream=-1;
for(int i=0; i<pFormatCtx->nb_streams; i++)
if(pFormatCtx->streams[i]->codec->codec_type==CODEC_TYPE_AUDIO)
{
audioStream=i;
break;
}
if(audioStream==-1)
{
std::cout<<"cannot find audio!"<<std::endl;
}
// Get a pointer to the codec context for the audio stream
pCodecCtx=pFormatCtx->streams[audioStream]->codec;
av_init_packet(&avpkt);
printf("Audio decoding\n");
/* find the suitable audio decoder */
codec = avcodec_find_decoder(pCodecCtx->codec_id);
if (!codec) {
fprintf(stderr, "codec not found\n");
exit(1);
}
if(codec->capabilities & CODEC_CAP_TRUNCATED)
pCodecCtx->flags|=CODEC_FLAG_TRUNCATED;
//open the codec (for decoding)
int test = avcodec_open(pCodecCtx, codec);
if (test < 0) {
fprintf(stderr, "could not open codec\n");
exit(1);
}
//find mp3 encoder
ocodec = avcodec_find_encoder(CODEC_ID_MP3);
if (!ocodec) {
fprintf(stderr, "codec not found\n");
exit(1);
}
//allocate context
oc= avcodec_alloc_context();
/* put sample parameters */
oc->bit_rate = 64000;
oc->sample_rate = 44100;
oc->channels = 1;
/* open it */
if (avcodec_open(oc, ocodec) < 0) {
fprintf(stderr, "could not open encoding codec\n");
exit(1);
}
//buffer
outbuf = (uint8_t*)malloc(AVCODEC_MAX_AUDIO_FRAME_SIZE);
//open inputfile
f = fopen(filename, "rb");
if (!f) {
fprintf(stderr, "could not open %s\n", filename);
exit(1);
}
//open outputfile
outfile = fopen(outfilename, "wb");
if (!outfile) {
av_free(c);
exit(1);
}
/* decode until eof */
avpkt.data = inbuf;
avpkt.size = fread(inbuf, 1, AUDIO_INBUF_SIZE, f);
//while there is still data
while (avpkt.size > 0) {
std::cout<<"decoding..."<<std::endl;
out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
//decode
len = avcodec_decode_audio3(pCodecCtx, (short *)outbuf, &out_size, &avpkt);
if (len < 0) {
fprintf(stderr, "Error while decoding\n");
exit(1);
}
if (out_size > 0) {
/* if a frame has been decoded, output it */
std::cout<<"1 frame decoded!"<<std::endl;
out_size2 = avcodec_encode_audio(oc, outbuf, out_size, (short*)outbuf);
fwrite(outbuf, 1, out_size2, outfile);
}
//subtract data from whatever decode function returns
avpkt.size -= len;
avpkt.data += len;
if (avpkt.size < AUDIO_REFILL_THRESH) {
/* Refill the input buffer, to avoid trying to decode
* incomplete frames. Instead of this, one could also use
* a parser, or use a proper container format through
* libavformat. */
memmove(inbuf, avpkt.data, avpkt.size);
avpkt.data = inbuf;
len = fread(avpkt.data + avpkt.size, 1,
AUDIO_INBUF_SIZE - avpkt.size, f);
if (len > 0)
avpkt.size += len;
}
}
fclose(outfile);
fclose(f);
free(outbuf);
avcodec_close(c);
av_free(c);
}
I have been stuck on this for quite a long time. Please help me.
anyone know whats wrong with my code?
Thanks,
Izak

Use debug messages to determine the point of failure.
Though I am of the strong opinion that this error occurs while encoding, because you are using the same buffer and respective buffer size.

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Decoding m4a and dumping PCM data gives back noise - c++

Most aac files are in the FLOAT format i.e. AV_SAMPLE_FMT_FLTP, while libraries such as libao are only able to play audio as integer format i.e. AV_SAMPLE_FORMAT_S16. As a result, you'd need to use libavresample to resample the music and convert the format as appropriate.

Related

How parse and decode H264 file with libav/ffmpeg?

Decoding HEVC file in C++ with FFmpeg missing one frame

How to save AVFrame as image in C++ using FFmpeg

Extracting the h264 part of a video file (demuxing)

Wma decoding with ffmpeg

Categories

Resources