FFMPEG, change default audio track with code

FFMPEG, change default audio track with code - c++

I have mkv video, and what just change default audio track without remux, encode/decode and other stuff. How it can be done with code?
AVFormatContext *pFormatCtx;
av_register_all();
avcodec_register_all();
avformat_network_init();
pFormatCtx = avformat_alloc_context();
if (avformat_open_input(&pFormatCtx, [videoPath UTF8String], NULL, NULL) != 0) {
avformat_close_input(&pFormatCtx);
return;
}
if (avformat_find_stream_info(pFormatCtx, NULL) < 0) {
avformat_close_input(&pFormatCtx);
return;
}
for (int i = 0; i < pFormatCtx->nb_streams; i++) {
if (pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
// so we have audio track info
// what todo next?
}
}
avformat_close_input(&pFormatCtx);

Related

FFMPEG C++ Non monotonically increasing dts to muxer

I have two inputs:
Stream with audio and video
Stream with only video
I want to generate an ouput stream mixing Audio from the first stream and Video from the second stream.
But when i generate the output it says i am non monotonically increasing dts to muxer. I think im messing up with the dts.
Please help!!
Here is my code:
#include <iostream>
//Linux...
#ifdef __cplusplus
extern "C"
{
#endif
#include <libavformat/avformat.h>
#include <libavutil/mathematics.h>
#include <libavutil/time.h>
#ifdef __cplusplus
}
#endif
using namespace std;
int main(int argc, char* argv[])
{
AVOutputFormat* ofmt = NULL;
//Input AVFormatContext and Output AVFormatContext
AVFormatContext *audio_video_ifmt_ctx = NULL,
* only_video_ifmt_ctx = NULL,
* ofmt_ctx = NULL;
AVPacket audio_video_pkt, only_video_pkt;
const char *audio_video_in_filename, * only_video_in_filename, * out_filename;
int ret, i;
int audio_video_audioindex = -1;
int audio_video_videoindex = -1;
int only_video_videoindex = -1;
int audio_video_dataindex = -1;
int only_video_dataindex = -1;
int audio_video_frame_index = 0, only_video_frame_index = 0;
audio_video_in_filename = "rtmp://localhost/live/STREAM_NAME.flv";
only_video_in_filename = "rtmp://localhost/live/STREAM_NAME2.flv";
out_filename = "rtmp://localhost/live/STREAM_NAME3.flv";
//Network
avformat_network_init();
//Input ONLY VIDEO
if ((ret = avformat_open_input(&only_video_ifmt_ctx, only_video_in_filename, 0, 0)) < 0) {
printf("Could not open audio and video input file.");
return 0;
}
if ((ret = avformat_find_stream_info(only_video_ifmt_ctx, 0)) < 0) {
printf("Failed to retrieve audio and video input stream information");
return 0;
}
//Input AUDIO AND VIDEO
if ((ret = avformat_open_input(&audio_video_ifmt_ctx, audio_video_in_filename, 0, 0)) < 0) {
printf("Could not open only video input file.");
return 0;
}
if ((ret = avformat_find_stream_info(audio_video_ifmt_ctx, 0)) < 0) {
printf("Failed to retrieve only video input stream information");
return 0;
}
//Search audio_video stream index
for (i = 0; i < audio_video_ifmt_ctx->nb_streams; i++) {
if (audio_video_ifmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
audio_video_videoindex = i;
}
if (audio_video_ifmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
audio_video_audioindex = i;
}
if (audio_video_ifmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
audio_video_dataindex = i;
}
}
//Search only_video stream index
for (i = 0; i < only_video_ifmt_ctx->nb_streams; i++) {
if (only_video_ifmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
only_video_videoindex = i;
}
else if(audio_video_ifmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
only_video_dataindex = i;
}
}
av_dump_format(audio_video_ifmt_ctx, 0, audio_video_in_filename, 0);
av_dump_format(only_video_ifmt_ctx, 0, only_video_in_filename, 0);
//Allocate output stream context
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename); //RTMP
if (!ofmt_ctx) {
printf("Could not create output context\n");
ret = AVERROR_UNKNOWN;
return 0;
}
ofmt = ofmt_ctx->oformat;
//Create Output Data and Audio Stream
for (i = 0; i < audio_video_ifmt_ctx->nb_streams; i++) {
AVStream* video_in_stream;
//Create output AVStream according to input AVStream
if (i == audio_video_dataindex) {
video_in_stream = audio_video_ifmt_ctx->streams[audio_video_dataindex];
}
else if (i == audio_video_audioindex) {
video_in_stream = audio_video_ifmt_ctx->streams[audio_video_audioindex];
}
else {
video_in_stream = NULL;
continue;
}
//AVStream *out_stream = avformat_new_stream(ofmt_ctx, in_stream->codec->codec);
AVCodec* codec = avcodec_find_decoder(video_in_stream->codecpar->codec_id);
AVStream* out_stream = avformat_new_stream(ofmt_ctx, codec);
if (!out_stream) {
printf("Failed allocating output stream\n");
ret = AVERROR_UNKNOWN;
return 0;
}
AVCodecContext* p_codec_ctx = avcodec_alloc_context3(codec);
ret = avcodec_parameters_to_context(p_codec_ctx, video_in_stream->codecpar);
//Copy the settings of AVCodecContext
if (ret < 0) {
printf("Failed to copy context from input to output stream codec context\n");
return 0;
}
p_codec_ctx->codec_tag = 0;
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
p_codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
ret = avcodec_parameters_from_context(out_stream->codecpar, p_codec_ctx);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "eno:[%d] error to paramters codec paramter \n", ret);
}
}
//Create Output Video Stream
for (i = 0; i < only_video_ifmt_ctx->nb_streams; i++) {
AVStream* video_in_stream;
//Create output AVStream according to input AVStream
if (i == only_video_dataindex) {
continue;
video_in_stream = only_video_ifmt_ctx->streams[only_video_dataindex];
}
else if (i == only_video_videoindex) {
video_in_stream = only_video_ifmt_ctx->streams[only_video_videoindex];
}
else {
video_in_stream = NULL;
continue;
}
//AVStream *out_stream = avformat_new_stream(ofmt_ctx, in_stream->codec->codec);
AVCodec* codec = avcodec_find_decoder(video_in_stream->codecpar->codec_id);
AVStream* out_stream = avformat_new_stream(ofmt_ctx, codec);
if (!out_stream) {
printf("Failed allocating output stream\n");
ret = AVERROR_UNKNOWN;
return 0;
}
AVCodecContext* p_codec_ctx = avcodec_alloc_context3(codec);
ret = avcodec_parameters_to_context(p_codec_ctx, video_in_stream->codecpar);
//Copy the settings of AVCodecContext
if (ret < 0) {
printf("Failed to copy context from input to output stream codec context\n");
return 0;
}
p_codec_ctx->codec_tag = 0;
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
p_codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
ret = avcodec_parameters_from_context(out_stream->codecpar, p_codec_ctx);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "eno:[%d] error to paramters codec paramter \n", ret);
}
}
//Dump Format------------------
av_dump_format(ofmt_ctx, 0, out_filename, 1);
//Open output URL
if (!(ofmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
if (ret < 0) {
printf("Could not open output URL '%s'", out_filename);
goto end;
}
}
//Write output file header
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
printf("Error occurred when opening output URL\n");
goto end;
}
//Pull stream
while (1)
{
AVStream *audio_video_in_stream, * only_video_in_stream, * out_stream;
ret = av_read_frame(audio_video_ifmt_ctx, &audio_video_pkt);
ret = av_read_frame(only_video_ifmt_ctx, &only_video_pkt);
if (ret < 0)
break;
audio_video_in_stream = audio_video_ifmt_ctx->streams[audio_video_pkt.stream_index];
only_video_in_stream = only_video_ifmt_ctx->streams[only_video_pkt.stream_index];
out_stream = NULL;
///// TREAT AUDIO PACKETS /////
if (audio_video_pkt.stream_index == audio_video_dataindex) {
out_stream = ofmt_ctx->streams[0];
}
if (audio_video_pkt.stream_index == audio_video_audioindex) {
out_stream = ofmt_ctx->streams[1];
}
//Convert PTS/DTS
if (out_stream != NULL) {
audio_video_pkt.pts = av_rescale_q_rnd(audio_video_pkt.pts, audio_video_in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
audio_video_pkt.dts = av_rescale_q_rnd(audio_video_pkt.dts, audio_video_in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
audio_video_pkt.duration = av_rescale_q(audio_video_pkt.duration, audio_video_in_stream->time_base, out_stream->time_base);
audio_video_pkt.pos = -1;
}
//Writing audio stream
if (out_stream != NULL) {
ret = av_interleaved_write_frame(ofmt_ctx, &audio_video_pkt);
}
///// TREAT VIDEO PACKETS /////
if (only_video_pkt.stream_index == only_video_videoindex) {
out_stream = ofmt_ctx->streams[2];
}
//Convert PTS/DTS
if (out_stream != NULL) {
only_video_pkt.pts = av_rescale_q_rnd(only_video_pkt.pts, only_video_in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
only_video_pkt.dts = av_rescale_q_rnd(only_video_pkt.dts, only_video_in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
only_video_pkt.duration = av_rescale_q(only_video_pkt.duration, only_video_in_stream->time_base, out_stream->time_base);
only_video_pkt.pos = -1;
}
//Writing video
if (out_stream != NULL) {
ret = av_interleaved_write_frame(ofmt_ctx, &only_video_pkt);
}
//Desreferenciamos el paquete
av_packet_unref(&audio_video_pkt);
av_packet_unref(&only_video_pkt);
if (ret < 0) {
printf("Error muxing packet\n");
break;
}
}
//Write file trailer
av_write_trailer(ofmt_ctx);
end:
avformat_close_input(&audio_video_ifmt_ctx);
avformat_close_input(&only_video_ifmt_ctx);
/* close output */
if (ofmt_ctx && !(ofmt->flags & AVFMT_NOFILE))
avio_close(ofmt_ctx->pb);
avformat_free_context(ofmt_ctx);
if (ret < 0 && ret != AVERROR_EOF) {
printf("Error occurred.\n");
return -1;
}
return 0;
}
Here is the output:
Input #0, flv, from 'rtmp://localhost/live/STREAM_NAME.flv':
Metadata:
fileSize : 0
audiochannels : 2
2.1 : false
3.1 : false
4.0 : false
4.1 : false
5.1 : false
7.1 : false
encoder : obs-output module (libobs version 25.0.4)
Duration: 00:00:00.00, start: 2910.332000, bitrate: N/A
Stream #0:0: Data: none
Stream #0:1: Audio: aac (LC), 44100 Hz, stereo, fltp, 163 kb/s
Stream #0:2: Video: h264 (High), yuv420p(progressive), 1920x1080, 2560 kb/s, 23.98 fps, 23.98 tbr, 1k tbn, 47.95 tbc
Input #0, flv, from 'rtmp://localhost/live/STREAM_NAME2.flv':
Metadata:
encoder : Lavf58.65.101
Duration: 00:00:00.00, start: 2726.365000, bitrate: N/A
Stream #0:0: Data: none
Stream #0:1: Video: h264 (High), yuv420p(progressive), 1920x1080, 52 fps, 52 tbr, 1k tbn, 104 tbc
Output #0, flv, to 'rtmp://localhost/live/STREAM_NAME3.flv':
Stream #0:0: Data: none
Stream #0:1: Audio: aac (LC), 44100 Hz, stereo, fltp, 163 kb/s
Stream #0:2: Video: h264 (High), yuv420p(progressive), 1920x1080, q=2-31
[flv # 0000014F2CDB9500] Application provided invalid, non monotonically increasing dts to muxer in stream 1: 2910332 >= 2726346
Error muxing packet
[flv # 0000014F2CDB9500] Failed to update header with correct duration.
[flv # 0000014F2CDB9500] Failed to update header with correct filesize.
Error occurred.

Initialize AVFormatContext from buffer data

I am streaming AAC audio over network, and I need to use ffmpeg to decode the stream. I have tried in local and everything works fine, but over the network I am not sure how to initialize my AVFormatContext.
I have had a look at the functions av_probe_input_buffer* and av_probe_input_format* but it doesn't look like these functions are suited for what I want to do. My AVFormatContext is always incomplete, and I cannot find an audio stream, which prevents me from getting a codec context and initializing my decoder.
The problematic piece of code looks more or less like this:
AVFormatContext *pFormatCtx = avformat_alloc_context();
AVFrame *pFrame = av_frame_alloc();
AVPacket *packet = (AVPacket *)av_malloc(sizeof(AVPacket));
av_init_packet(packet);
packet->buf = NULL;
packet->data = NULL;
pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
// Read 10 packets to give ffmpeg some hint about the data
for (int i = 0; i < 10; i++) {
uint32_t packet_size;
fread(&packet_size, 1, sizeof(packet_size), f);
uint8_t *pdata = (uint8_t*)malloc(packet_size);
int len = fread(pdata, 1, packet_size, f);
AVProbeData probeData;
probeData.buf = pdata;
probeData.buf_size = packet_size - 1;
probeData.filename = "";
pFormatCtx->iformat = av_probe_input_format(&probeData, 1);
}
// This is working, no error here
if (avformat_find_stream_info(pFormatCtx, NULL) < 0){
printf("Error finding stream info!");
}
int audioStream = -1;
for (int i = 0; i < pFormatCtx->nb_streams; i++)
if(pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO){
audioStream = i;
break;
}
// I get this error, so actually no audio stream is detected
if (audioStream == -1){
printf("Didn't find a audio stream.\n");
return -1;
}
printf("Audio stream found at index %d\n", audioStream);
// I do not get here, because an audio stream is not detected.
AVCodecContext *pCodecCtx = pFormatCtx->streams[audioStream]->codec;
// This is where I want to be!
AVCodec *pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL) {
printf("Codec not found.\n");
return -1;
}
// Open codec
if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0) {
printf("Could not open codec.\n");
return -1;
}

FFMpeg: extracting audio frames with avcodec creates noise

In my Android app, I implemented the FFMpeg library, and try to use it so I can extract audio samples from an audio file on the fly.
Here is what I did (I simplified the code here so it's easier to read):
AVPacket packet;
AVCodecContext *codecContext = NULL;
AVFormatContext *formatContext;
AVFrame *frame = NULL;
SwrContext *swrContext;
int audio_stream_index = -1;
int ret;
uint8_t *localBuffer;
int FFMpegPlayer::createFFmpeg(const char *filename)
{
int ret;
AVCodec *dec;
frame = av_frame_alloc();
av_register_all();
avformat_open_input(&formatContext, filename, NULL, NULL))
avformat_find_stream_info(formatContext, NULL))
// select the audio stream
audio_stream_index = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &dec, 0);
// create decoding context
codecContext = avcodec_alloc_context3(dec);
avcodec_parameters_to_context(codecContext, formatContext->streams[audio_stream_index]->codecpar);
av_opt_set_int(codecContext, "refcounted_frames", 1, 0);
// init the audio decoder
avcodec_open2(codecContext, dec, NULL))
swrContext = swr_alloc();
// we assume here that the audio file is a 44100 Hz stereo audio file
localBuffer = (uint8_t *) av_malloc(44100 * 2);
swr_alloc_set_opts(swrContext, AV_CH_LAYOUT_STEREO, AV_SAMPLE_FMT_FLT, codecContext->sample_rate,
codecContext->channel_layout, codecContext->sample_fmt, codecContext->sample_rate, 0,
NULL);
swr_init(swrContext);
return 0;
}
void FFMpegPlayer::getPcmFloat(float *buffer)
{
// init :
int i, ch, dataSize;
bool extraxted = false;
float sample = 0;
// extract :
while (av_read_frame(formatContext, &packet) >= 0 && !extraxted)
{
if (packet.stream_index == audio_stream_index)
{
// send the packet with the compressed data to the decoder
ret = avcodec_send_packet(codecContext, &packet);
// read all the output frames (in general there may be any number of them
while (ret >= 0)
{
ret = avcodec_receive_frame(codecContext, frame);
if (ret == AVERROR(EAGAIN))
{
LOGW("AVERROR(EAGAIN)\n");
break;
}
else if (ret == AVERROR_EOF)
{
LOGW("AVERROR_EOF\n");
break;
}
dataSize = av_get_bytes_per_sample(codecContext->sample_fmt);
swr_convert(swrContext, &localBuffer, 44100 * 2, (const uint8_t **) frame->data, frame->nb_samples);
int a = 0;
for (i = 0; i < frame->nb_samples; i++)
{
for (ch = 0; ch < codecContext->channels; ch++)
{
memcpy(&sample, &localBuffer[(codecContext->channels * i + ch) * dataSize], dataSize);
buffer[a] = sample;
a++;
}
}
// exit extract:
extraxted = true;
}
}
}
}
Anytime I need audio samples, I call the getPcmFloat() function.
Thanks to that code, I can clearly listen to the audio file.
The problem is: I have some crackling in the sound, and I have no idea where it comes from, or how to fix it.
Does anyone know how to get the exact frames whithout glitches?
Thanks for your help.

FFMPEG. Read frame, process it, put it to output video. Copy sound stream unchanged

I want to apply processing to a video clip with sound track, extract and process frame by frame and write result to output file. Number of frames, size of frame and speed remains unchanged in output clip. Also I want to keep the same audio track as I have in source.
I can read clip, decode frames and process then using opencv. Audio packets are also writes fine. I'm stuck on forming output video stream.
The minimal runnable code I have for now (sorry it not so short, but cant do it shorter):
extern "C" {
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
#include "libavcodec/avcodec.h"
#include <libavutil/opt.h>
#include <libavdevice/avdevice.h>
#include <libswscale/swscale.h>
}
#include "opencv2/opencv.hpp"
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55,28,1)
#define av_frame_alloc avcodec_alloc_frame
#endif
using namespace std;
using namespace cv;
static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt, const char *tag)
{
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
char buf1[AV_TS_MAX_STRING_SIZE] = { 0 };
av_ts_make_string(buf1, pkt->pts);
char buf2[AV_TS_MAX_STRING_SIZE] = { 0 };
av_ts_make_string(buf1, pkt->dts);
char buf3[AV_TS_MAX_STRING_SIZE] = { 0 };
av_ts_make_string(buf1, pkt->duration);
char buf4[AV_TS_MAX_STRING_SIZE] = { 0 };
av_ts_make_time_string(buf1, pkt->pts, time_base);
char buf5[AV_TS_MAX_STRING_SIZE] = { 0 };
av_ts_make_time_string(buf1, pkt->dts, time_base);
char buf6[AV_TS_MAX_STRING_SIZE] = { 0 };
av_ts_make_time_string(buf1, pkt->duration, time_base);
printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
buf1, buf4,
buf2, buf5,
buf3, buf6,
pkt->stream_index);
}
int main(int argc, char **argv)
{
AVOutputFormat *ofmt = NULL;
AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL;
AVPacket pkt;
AVFrame *pFrame = NULL;
AVFrame *pFrameRGB = NULL;
int frameFinished = 0;
pFrame = av_frame_alloc();
pFrameRGB = av_frame_alloc();
const char *in_filename, *out_filename;
int ret, i;
in_filename = "../../TestClips/Audio Video Sync Test.mp4";
out_filename = "out.mp4";
// Initialize FFMPEG
av_register_all();
// Get input file format context
if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0)
{
fprintf(stderr, "Could not open input file '%s'", in_filename);
goto end;
}
// Extract streams description
if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0)
{
fprintf(stderr, "Failed to retrieve input stream information");
goto end;
}
// Print detailed information about the input or output format,
// such as duration, bitrate, streams, container, programs, metadata, side data, codec and time base.
av_dump_format(ifmt_ctx, 0, in_filename, 0);
// Allocate an AVFormatContext for an output format.
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename);
if (!ofmt_ctx)
{
fprintf(stderr, "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
// The output container format.
ofmt = ofmt_ctx->oformat;
// Allocating output streams
for (i = 0; i < ifmt_ctx->nb_streams; i++)
{
AVStream *in_stream = ifmt_ctx->streams[i];
AVStream *out_stream = avformat_new_stream(ofmt_ctx, in_stream->codec->codec);
if (!out_stream)
{
fprintf(stderr, "Failed allocating output stream\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ret = avcodec_copy_context(out_stream->codec, in_stream->codec);
if (ret < 0)
{
fprintf(stderr, "Failed to copy context from input to output stream codec context\n");
goto end;
}
out_stream->codec->codec_tag = 0;
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
{
out_stream->codec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
}
// Show output format info
av_dump_format(ofmt_ctx, 0, out_filename, 1);
// Open output file
if (!(ofmt->flags & AVFMT_NOFILE))
{
ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
if (ret < 0)
{
fprintf(stderr, "Could not open output file '%s'", out_filename);
goto end;
}
}
// Write output file header
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0)
{
fprintf(stderr, "Error occurred when opening output file\n");
goto end;
}
// Search for input video codec info
AVCodec *in_codec = nullptr;
AVCodecContext* avctx = nullptr;
int video_stream_index = -1;
for (int i = 0; i < ifmt_ctx->nb_streams; i++)
{
if (ifmt_ctx->streams[i]->codec->coder_type == AVMEDIA_TYPE_VIDEO)
{
video_stream_index = i;
avctx = ifmt_ctx->streams[i]->codec;
in_codec = avcodec_find_decoder(avctx->codec_id);
if (!in_codec)
{
fprintf(stderr, "in codec not found\n");
exit(1);
}
break;
}
}
// Search for output video codec info
AVCodec *out_codec = nullptr;
AVCodecContext* o_avctx = nullptr;
int o_video_stream_index = -1;
for (int i = 0; i < ofmt_ctx->nb_streams; i++)
{
if (ofmt_ctx->streams[i]->codec->coder_type == AVMEDIA_TYPE_VIDEO)
{
o_video_stream_index = i;
o_avctx = ofmt_ctx->streams[i]->codec;
out_codec = avcodec_find_encoder(o_avctx->codec_id);
if (!out_codec)
{
fprintf(stderr, "out codec not found\n");
exit(1);
}
break;
}
}
// openCV pixel format
AVPixelFormat pFormat = AV_PIX_FMT_RGB24;
// Data size
int numBytes = avpicture_get_size(pFormat, avctx->width, avctx->height);
// allocate buffer
uint8_t *buffer = (uint8_t *)av_malloc(numBytes * sizeof(uint8_t));
// fill frame structure
avpicture_fill((AVPicture *)pFrameRGB, buffer, pFormat, avctx->width, avctx->height);
// frame area
int y_size = avctx->width * avctx->height;
// Open input codec
avcodec_open2(avctx, in_codec, NULL);
// Main loop
while (1)
{
AVStream *in_stream, *out_stream;
ret = av_read_frame(ifmt_ctx, &pkt);
if (ret < 0)
{
break;
}
in_stream = ifmt_ctx->streams[pkt.stream_index];
out_stream = ofmt_ctx->streams[pkt.stream_index];
log_packet(ifmt_ctx, &pkt, "in");
// copy packet
pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
pkt.pos = -1;
log_packet(ofmt_ctx, &pkt, "out");
if (pkt.stream_index == video_stream_index)
{
avcodec_decode_video2(avctx, pFrame, &frameFinished, &pkt);
if (frameFinished)
{
struct SwsContext *img_convert_ctx;
img_convert_ctx = sws_getCachedContext(NULL,
avctx->width,
avctx->height,
avctx->pix_fmt,
avctx->width,
avctx->height,
AV_PIX_FMT_BGR24,
SWS_BICUBIC,
NULL,
NULL,
NULL);
sws_scale(img_convert_ctx,
((AVPicture*)pFrame)->data,
((AVPicture*)pFrame)->linesize,
0,
avctx->height,
((AVPicture *)pFrameRGB)->data,
((AVPicture *)pFrameRGB)->linesize);
sws_freeContext(img_convert_ctx);
// Do some image processing
cv::Mat img(pFrame->height, pFrame->width, CV_8UC3, pFrameRGB->data[0],false);
cv::GaussianBlur(img,img,Size(5,5),3);
cv::imshow("Display", img);
cv::waitKey(5);
// --------------------------------
// Transform back to initial format
// --------------------------------
img_convert_ctx = sws_getCachedContext(NULL,
avctx->width,
avctx->height,
AV_PIX_FMT_BGR24,
avctx->width,
avctx->height,
avctx->pix_fmt,
SWS_BICUBIC,
NULL,
NULL,
NULL);
sws_scale(img_convert_ctx,
((AVPicture*)pFrameRGB)->data,
((AVPicture*)pFrameRGB)->linesize,
0,
avctx->height,
((AVPicture *)pFrame)->data,
((AVPicture *)pFrame)->linesize);
// --------------------------------------------
// Something must be here
// --------------------------------------------
//
// Write fideo frame (How to write frame to output stream ?)
//
// --------------------------------------------
sws_freeContext(img_convert_ctx);
}
}
else // write sound frame
{
ret = av_interleaved_write_frame(ofmt_ctx, &pkt);
}
if (ret < 0)
{
fprintf(stderr, "Error muxing packet\n");
break;
}
// Decrease packet ref counter
av_packet_unref(&pkt);
}
av_write_trailer(ofmt_ctx);
end:
avformat_close_input(&ifmt_ctx);
// close output
if (ofmt_ctx && !(ofmt->flags & AVFMT_NOFILE))
{
avio_closep(&ofmt_ctx->pb);
}
avformat_free_context(ofmt_ctx);
if (ret < 0 && ret != AVERROR_EOF)
{
char buf_err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
av_make_error_string(buf_err, AV_ERROR_MAX_STRING_SIZE, ret);
fprintf(stderr, "Error occurred: %s\n", buf_err);
return 1;
}
avcodec_close(avctx);
av_free(pFrame);
av_free(pFrameRGB);
return 0;
}

Your original code segfaults in my case. Initializing the output codec context seems to fix it. The code below works for me but I didn't test the OpenCV stuff as I don't have the lib installed.
Get the codec context:
// Search for output video codec info
AVCodec *out_codec = NULL;
AVCodecContext* o_avctx = NULL;
int o_video_stream_index = -1;
for (int i = 0; i < ofmt_ctx->nb_streams; i++)
{
if (ofmt_ctx->streams[i]->codec->coder_type == AVMEDIA_TYPE_VIDEO)
{
o_video_stream_index = i;
out_codec = avcodec_find_encoder(ofmt_ctx->streams[i]->codec->codec_id);
o_avctx = avcodec_alloc_context3(out_codec);
o_avctx->height = avctx->height;
o_avctx->width = avctx->width;
o_avctx->sample_aspect_ratio = avctx->sample_aspect_ratio;
if (out_codec->pix_fmts)
o_avctx->pix_fmt = out_codec->pix_fmts[0];
else
o_avctx->pix_fmt = avctx->pix_fmt;
o_avctx->time_base = avctx->time_base;
avcodec_open2(o_avctx, out_codec, NULL);
}
}
Encode and write:
// Main loop
while (1)
{
...
if (pkt.stream_index == video_stream_index)
{
avcodec_decode_video2(avctx, pFrame, &frameFinished, &pkt);
if (frameFinished)
{
...
// --------------------------------------------
// Something must be here
// --------------------------------------------
int got_packet = 0;
AVPacket enc_pkt = { 0 };
av_init_packet(&enc_pkt);
avcodec_encode_video2(o_avctx, &enc_pkt, pFrame, &got_packet);
av_interleaved_write_frame(ofmt_ctx, &enc_pkt);
....
}
}

you should assign processed frame's packets information to your Original packets then pass it to av_interleaved_write_frame

Extracting the h264 part of a video file (demuxing)

I am trying to demux a video file into the video part (h264, mpeg4, h265, vp8, etc) and the audio part (mp3, aac, ac3, etc) and the subtitle part (srt) using ffmpeg in c++.
The audio part came out alright and played on all the media players I have, so also did the subtitle part. The video part however came out WITHOUT error and saved into a .h264 file but when I use ffprobe to check it or ffplay to play it, it always give the error "Invalid data found when processing input".
The code below
/* Separate a media file into audio, video and subtitle files (demuxing, complex) */
//TODO: mute error when subtitle is not present
#define __STDC_CONSTANT_MACROS
extern "C"
{
#include "libavformat/avformat.h"
}
int main()
{
//Input AVFormatContext and Output AVFormatContext
AVOutputFormat *ofmt_a = NULL, *ofmt_v = NULL, *ofmt_s = NULL;
AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx_a = NULL, *ofmt_ctx_v = NULL, *ofmt_ctx_s = NULL;
AVPacket pkt;
int ret, i;
int videoindex=-1, audioindex=-1, srtindex=-1;
int frame_index=0;
//Input file URL
const char *in_filename = "sample.mp4";
//Output file URL
const char *out_filename_v = "sample.h264";
const char *out_filename_a = "sample.mp3";
const char *out_filename_s = "sample.srt";
av_register_all();
//Input
if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
printf( "Could not open input file.");
goto end;
}
if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
printf( "Failed to retrieve input stream information");
goto end;
}
//Output
avformat_alloc_output_context2(&ofmt_ctx_v, NULL, NULL, out_filename_v);
if (!ofmt_ctx_v) {
printf( "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ofmt_v = ofmt_ctx_v->oformat;
avformat_alloc_output_context2(&ofmt_ctx_a, NULL, NULL, out_filename_a);
if (!ofmt_ctx_a) {
printf( "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ofmt_a = ofmt_ctx_a->oformat;
avformat_alloc_output_context2(&ofmt_ctx_s, NULL, NULL, out_filename_s);
if (!ofmt_ctx_a) {
printf( "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ofmt_s = ofmt_ctx_s->oformat;
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
//Create output AVStream according to input AVStream
AVFormatContext *ofmt_ctx;
AVStream *in_stream = ifmt_ctx->streams[i];
AVStream *out_stream = NULL;
if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO){
videoindex=i;
out_stream=avformat_new_stream(ofmt_ctx_v, in_stream->codec->codec);
ofmt_ctx=ofmt_ctx_v;
}
else if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO){
audioindex=i;
out_stream=avformat_new_stream(ofmt_ctx_a, in_stream->codec->codec);
ofmt_ctx=ofmt_ctx_a;
}
else if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_SUBTITLE){
srtindex=i;
out_stream=avformat_new_stream(ofmt_ctx_s, in_stream->codec->codec);
ofmt_ctx=ofmt_ctx_s;
}
else{
break;
}
if (!out_stream) {
printf( "Failed allocating output stream\n");
ret = AVERROR_UNKNOWN;
goto end;
}
//Copy the settings of AVCodecContext
if (avcodec_copy_context(out_stream->codec, in_stream->codec) < 0) {
printf( "Failed to copy context from input to output stream codec context\n");
goto end;
}
out_stream->codec->codec_tag = 0;
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
out_stream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
//Dump Format------------------
printf("\n==============Input Video=============\n");
av_dump_format(ifmt_ctx, 0, in_filename, 0);
printf("\n==============Output Video============\n");
av_dump_format(ofmt_ctx_v, 0, out_filename_v, 1);
printf("\n==============Output Audio============\n");
av_dump_format(ofmt_ctx_a, 0, out_filename_a, 1);
/*printf("\n==============Output Subtitle============\n");
av_dump_format(ofmt_ctx_s, 0, out_filename_s, 1);*/
printf("\n======================================\n");
//Open output file
if (!(ofmt_v->flags & AVFMT_NOFILE)) {
if (avio_open(&ofmt_ctx_v->pb, out_filename_v, AVIO_FLAG_WRITE) < 0) {
printf( "Could not open output file '%s'", out_filename_v);
goto end;
}
}
if (!(ofmt_a->flags & AVFMT_NOFILE)) {
if (avio_open(&ofmt_ctx_a->pb, out_filename_a, AVIO_FLAG_WRITE) < 0) {
printf( "Could not open output file '%s'", out_filename_a);
goto end;
}
}
if (!(ofmt_a->flags & AVFMT_NOFILE)) {
if (avio_open(&ofmt_ctx_s->pb, out_filename_s, AVIO_FLAG_WRITE) < 0) {
printf( "Could not open output file '%s'", out_filename_s);
goto end;
}
}
//Write file header
if (avformat_write_header(ofmt_ctx_v, NULL) < 0) {
printf( "Error occurred when opening video output file\n");
goto end;
}
system("pause");
if (avformat_write_header(ofmt_ctx_a, NULL) < 0) {
printf( "Error occurred when opening audio output file\n");
goto end;
}
if (avformat_write_header(ofmt_ctx_s, NULL) < 0) {
printf( "Error occurred when opening audio output file\n");
goto end;
}
AVBitStreamFilterContext* h264bsfc = av_bitstream_filter_init("h264_mp4toannexb");
while (1) {
AVFormatContext *ofmt_ctx;
AVStream *in_stream, *out_stream;
//Get an AVPacket
if (av_read_frame(ifmt_ctx, &pkt) < 0)
break;
in_stream = ifmt_ctx->streams[pkt.stream_index];
if(pkt.stream_index==videoindex){
out_stream = ofmt_ctx_v->streams[0];
ofmt_ctx=ofmt_ctx_v;
printf("Write Video Packet. size:%d\tpts:%lld\n",pkt.size,pkt.pts);
av_bitstream_filter_filter(h264bsfc, in_stream->codec, NULL, &pkt.data, &pkt.size, pkt.data, pkt.size, 0);
}else if(pkt.stream_index==audioindex){
out_stream = ofmt_ctx_a->streams[0];
ofmt_ctx=ofmt_ctx_a;
printf("Write Audio Packet. size:%d\tpts:%lld\n",pkt.size,pkt.pts);
}
else if(pkt.stream_index==srtindex){
out_stream = ofmt_ctx_s->streams[0];
ofmt_ctx=ofmt_ctx_s;
printf("Write Subtitle Packet. size:%d\tpts:%lld\n",pkt.size,pkt.pts);
}
else{
continue;
}
//Convert PTS/DTS
pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
pkt.pos = -1;
pkt.stream_index=0;
//Write
if (av_interleaved_write_frame(ofmt_ctx, &pkt) < 0) {
printf( "Error muxing packet\n");
break;
}
//printf("Write %8d frames to output file\n",frame_index);
av_free_packet(&pkt);
frame_index++;
}
av_bitstream_filter_close(h264bsfc);
//Write file trailer
av_write_trailer(ofmt_ctx_a);
av_write_trailer(ofmt_ctx_v);
av_write_trailer(ofmt_ctx_s);
end:
avformat_close_input(&ifmt_ctx);
/* close output */
if (ofmt_ctx_a && !(ofmt_a->flags & AVFMT_NOFILE))
avio_close(ofmt_ctx_a->pb);
if (ofmt_ctx_v && !(ofmt_v->flags & AVFMT_NOFILE))
avio_close(ofmt_ctx_v->pb);
if (ofmt_ctx_s && !(ofmt_s->flags & AVFMT_NOFILE))
avio_close(ofmt_ctx_s->pb);
avformat_free_context(ofmt_ctx_a);
avformat_free_context(ofmt_ctx_v);
avformat_free_context(ofmt_ctx_s);
system("pause");
if (ret < 0 && ret != AVERROR_EOF) {
printf( "Error occurred.\n");
return -1;
}
return 0;
}
EDIT 1
Screen shot of resultant h264 file
EDIT 2
I think the "error" has to do with FFMPEG's "Using AVStream.codec.time_base as a timebase hint to the muxer is deprecated. Set AVStream.time_base instead" error.
I revert to an older version of FFMPEG and with the same code, the resultant h264 file was ok!

You need to convert an H.264 bitstream from length prefixed mode to start code prefixed mode.This is required by some streaming formats, typically the MPEG-2 transport stream format ("mpegts").
Take a look at https://www.ffmpeg.org/ffmpeg-bitstream-filters.html#h264_005fmp4toannexb
Look at lines from 402 to 424 and from 842 to 843.
https://www.ffmpeg.org/doxygen/0.7/crystalhd_8c-source.html
I used it, to extract h264 from mp4.
//Use this filter on your first h264 input AVPacket
AVFormatContext *ifmt_ctx = NULL;
//...
//... //init input
//...
AVPacket *firstPacket;
//...
//... //get packet from stream
//...
uint8_t *dummy_p;
int dummy_int;
AVBitStreamFilterContext *filter = v_bitstream_filter_init("h264_mp4toannexb");
if (!filter)
{
printf("Can't open filter\n");
exit(1);
}
ret = av_bitstream_filter_filter(filter, ifmt_ctx->streams[videoindex]->codec, NULL,
&dummy_p, &dummy_int,
firstPacket->data, firstPacket->size, 0);
if( ret < 0 )
{
printf("Can't filter\n");
exit(1);
}
// use dummy_p to write to file, as first packet

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

FFMPEG, change default audio track with code - c++

Related

FFMPEG C++ Non monotonically increasing dts to muxer

Initialize AVFormatContext from buffer data

FFMpeg: extracting audio frames with avcodec creates noise

FFMPEG. Read frame, process it, put it to output video. Copy sound stream unchanged

Extracting the h264 part of a video file (demuxing)

Categories

Resources