How to save h264 frames as jpeg images using ffmpeg?

How to save h264 frames as jpeg images using ffmpeg? - c++

I would like to save thumbnails from a h264 stream that I'm turning into ffmpeg avpackets as jpegs.
I'm started with a h264 AVPacket (iframe) and decode it into an AVFrame using avcodec_send_packet/avcodec_receive_frame. Now trying to go from AVFrame and convert into AVPacket using avcodec_send_frame/avcodec_receive_packet
I can convert to png instead jpg, though I do get a the video looking like it's outputting three separate frames squeezed side by side into one. Wondering if it's one frame is R, next G, and finally B. I'm not sure, clearly I'm doing something wrong there. I figured it's possible it's the png encoder and I don't need it, so let's get jpg working first. But jpg is outputting unopenable files.
Any advice?
Here is my code:
int output_thumbnails(AVPacket* video_packet)
{
char png_file_name[max_chars_per_filename];
char thumbnail_id_char[10];
_itoa_s(thumbnail_id, thumbnail_id_char, 10);
strcpy_s(png_file_name, max_chars_per_filename, time_stamped_filepath);
strcat_s(png_file_name, max_chars_per_filename, time_stamped_filename);
strcat_s(png_file_name, max_chars_per_filename, thumbnail_id_char);
strcat_s(png_file_name, max_chars_per_filename, ".jpg");
thumbnail_id++;
int error_code = send_AVPacket_to_videocard(video_packet, av_codec_context_RTSP);
//if (error_code == AVERROR_EOF)
//{
// // error_code = videocard_to_PNG(png_file_name, av_codec_context_RTSP, av_codec_RTSP);
//}
if (error_code == AVERROR(EAGAIN)) //send packets to videocard until function returns EAGAIN
{
error_code = videocard_to_PNG(png_file_name, av_codec_context_RTSP);
//EAGAIN means that the video card buffer is ready to have the png pulled off of it
if (error_code == AVERROR_EOF)
{
// error_code = videocard_to_PNG(png_file_name, av_codec_context_RTSP, av_codec_RTSP);
}
else if (error_code == AVERROR(EAGAIN))
{
}
else
{
deal_with_av_errors(error_code, __LINE__, __FILE__);
}
}
else
{
deal_with_av_errors(error_code, __LINE__, __FILE__);
}
return 0;
}
VideoThumbnailGenerator.h:
#include "VideoThumbnailGenerator.h"
bool decoder_context_created = false;
bool encoder_context_created = false;
AVCodecContext* h264_decoder_codec_ctx;
AVCodecContext* thumbnail_encoder_codec_ctx;
int send_AVPacket_to_videocard(AVPacket* packet, AVCodecContext* codec_ctx)
{
if(!decoder_context_created)
{
AVCodec* h264_codec = avcodec_find_decoder(codec_ctx->codec_id);
h264_decoder_codec_ctx = avcodec_alloc_context3(h264_codec);
h264_decoder_codec_ctx->width = codec_ctx->width;
h264_decoder_codec_ctx->height = codec_ctx->height;
h264_decoder_codec_ctx->pix_fmt = AV_PIX_FMT_RGB24;
h264_decoder_codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
h264_decoder_codec_ctx->skip_frame = AVDISCARD_NONINTRA;//AVDISCARD_NONREF;//AVDISCARD_NONINTRA;
h264_decoder_codec_ctx->time_base.num = 1;
h264_decoder_codec_ctx->time_base.den = 30;
h264_decoder_codec_ctx->extradata = codec_ctx->extradata;
h264_decoder_codec_ctx->extradata_size = codec_ctx->extradata_size;
int error_code = avcodec_open2(h264_decoder_codec_ctx, h264_codec, NULL);
if (!h264_codec) {
return -1;
}
if (error_code < 0)
{
return error_code;
}
decoder_context_created = true;
}
//use hardware decoding to decode video frame
int error_code = avcodec_send_packet(h264_decoder_codec_ctx, packet);
if(error_code == AVERROR(EAGAIN))
{
return AVERROR(EAGAIN);
}
if(error_code<0)
{
printf("Error: Could not send packet to video card");
return error_code;
}
return 0;
}
int videocard_to_PNG(char *png_file_path, AVCodecContext* codec_ctx)
{
if (!encoder_context_created)
{
//AVCodec* thumbnail_codec = avcodec_find_encoder(AV_CODEC_ID_PNG);
AVCodec* thumbnail_codec = avcodec_find_encoder(AV_CODEC_ID_JPEG2000);
thumbnail_encoder_codec_ctx = avcodec_alloc_context3(thumbnail_codec);
thumbnail_encoder_codec_ctx->width = 128;
thumbnail_encoder_codec_ctx->height = (int)(((float)codec_ctx->height/(float)codec_ctx->width) * 128);
thumbnail_encoder_codec_ctx->pix_fmt = AV_PIX_FMT_RGB24; //AV_PIX_FMT_YUVJ420P
thumbnail_encoder_codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
thumbnail_encoder_codec_ctx->time_base.num = 1;
thumbnail_encoder_codec_ctx->time_base.den = 30;
bool thread_check = thumbnail_encoder_codec_ctx->thread_type & FF_THREAD_FRAME;
bool frame_threads_check = thumbnail_encoder_codec_ctx->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS;
int error_code = avcodec_open2(thumbnail_encoder_codec_ctx, thumbnail_codec, NULL);
if (!thumbnail_codec) {
return -1;
}
if (error_code < 0)
{
return error_code;
}
encoder_context_created = true;
}
AVFrame* thumbnail_frame = av_frame_alloc();
AVPacket* thumbnail_packet = av_packet_alloc();
//av_init_packet(png_packet);
int error_code = avcodec_receive_frame(h264_decoder_codec_ctx, thumbnail_frame);
//check for errors everytime
//note EAGAIN errors won't get here since they won't get past while
if (error_code < 0 && error_code != AVERROR(EAGAIN))
{
printf("Error: Could not get frame from video card");
return error_code;
}
//empty buffer if there are any more frames to pull (there shouldn't be)
//while(error_code != AVERROR(EAGAIN))
//{
// //check for errors everytime
// //note EAGAIN errors won't get here since they won't get past while
// if (error_code < 0)
// {
// printf("Error: Could not get frame from video card");
// return error_code;
// }
//
// error_code = avcodec_receive_frame(h264_decoder_codec_ctx, png_frame);
//}
//now we convert back to AVPacket, this time one holding PNG info, so we can store to file
error_code = avcodec_send_frame(thumbnail_encoder_codec_ctx, thumbnail_frame);
if (error_code >= 0) {
error_code = avcodec_receive_packet(thumbnail_encoder_codec_ctx, thumbnail_packet);
FILE* out_PNG;
errno_t err = fopen_s(&out_PNG, png_file_path, "wb");
if (err == 0) {
fwrite(thumbnail_packet->data, thumbnail_packet->size, 1, out_PNG);
}
fclose(out_PNG);
}
return error_code;
}

Related

Trying to decode and encode audio files with the FFMPEG C API

My ultimate goal will be to split multi channel WAV files into single mono ones, after few days of experiments my plan is the sequence:
Decode audio file into a frame.
Convert interleaved frame into a planar one. (in order to separate the data buffer into multiple ones)
Grab the planar frame buffers and encode each of them into a new file.
So far I'm stuck trying to convert a wav file from interleaved to a planar one, and reprint the wav file.
edit:
I've turned on guard malloc and apparently the error is within the convert function
Here's the code:
AVCodecContext* initializeAndOpenCodecContext(AVFormatContext* formatContext, AVStream* stream){
// grab our stream, most audio files only have one anyway
const AVCodec* decoder = avcodec_find_decoder(stream->codecpar->codec_id);
if (!decoder){
std::cout << "no decoder, can't go ahead!\n";
return nullptr;
}
AVCodecContext* codecContext = avcodec_alloc_context3(decoder);
avcodec_parameters_to_context(codecContext, stream->codecpar);
int err = avcodec_open2(codecContext, decoder, nullptr);
if (err < 0){
std::cout << "couldn't open codex!\n";
}
return codecContext;
}
void initialiseResampler(SwrContext* resampler, AVFrame* inputFrame, AVFrame* outputFrame){
av_opt_set_chlayout(resampler, "in_channel_layout", &inputFrame->ch_layout, 0);
av_opt_set_chlayout(resampler, "out_channel_layout", &outputFrame->ch_layout, 0);
av_opt_set_int(resampler, "in_sample_fmt", inputFrame->format, 0);
av_opt_set_int(resampler, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
av_opt_set_int(resampler, "in_sample_rate", inputFrame->sample_rate, 0);
av_opt_set_int(resampler, "out_sample_rate", outputFrame->sample_rate, 0);
}
AVFrame* initialisePlanarFrame(AVFrame* frameToInit, AVFrame* inputFrame){
//AVFrame *planar_frame = av_frame_alloc();
frameToInit->nb_samples = inputFrame->nb_samples;
frameToInit->ch_layout = inputFrame->ch_layout;
frameToInit->format = AV_SAMPLE_FMT_FLTP;
frameToInit->sample_rate = inputFrame->sample_rate;
return nullptr;
}
int main() {
AVCodecContext *codingContext= NULL;
const AVCodec *codec;
codec = avcodec_find_encoder(AV_CODEC_ID_PCM_F32LE);
codingContext = avcodec_alloc_context3(codec);
codingContext->bit_rate = 16000;
codingContext->sample_fmt = AV_SAMPLE_FMT_FLT;
codingContext->sample_rate = 48000;
codingContext->ch_layout.nb_channels = 2;
codingContext->ch_layout.order = (AVChannelOrder)0;
uint8_t **buffer_ = NULL;
AVFrame* planar_frame = NULL;
// open input
AVFormatContext* formatContext = nullptr;
int err = avformat_open_input(&formatContext, "/Users/tonytorm/Desktop/drum kits/DECAP - Drums That Knock Vol. 9/Kicks/Brash Full Metal Kick.wav", nullptr, nullptr);
if (err < 0){
fprintf(stderr, "Unable to open file!\n");
return;
}
// find audio stream
err = avformat_find_stream_info(formatContext, nullptr);
if (err > 0){
fprintf(stderr, "Unable to retrieve stream info!\n");
return;
}
int index = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0);
if (index < 0){
std::cout<< "coudn't find audio stream in this file" << '\n';
}
AVStream* stream = formatContext->streams[index];
auto fileName = "/Users/tonytorm/Desktop/newFile.wav";
FILE* newFile = fopen(fileName, "w+");
// find right codec and open it
if (auto openCodecContext = initializeAndOpenCodecContext(formatContext, stream)){
AVPacket* packet = av_packet_alloc();
AVFrame* frame = av_frame_alloc();
AVFrame* planar_frame = av_frame_alloc();
SwrContext *avr = swr_alloc(); //audio resampling context
AVChannelLayout monoChannelLayout{(AVChannelOrder)0};
monoChannelLayout.nb_channels = 2;
while (!av_read_frame(formatContext, packet)){
if (packet->stream_index != stream->index) continue; // we only care about audio
int ret = avcodec_send_packet(openCodecContext, packet);
if ( ret < 0) {
if (ret != AVERROR(EAGAIN)){ // if error is actual error not EAGAIN
std::cout << "can't do shit\n";
return;
}
}
while (int bret = avcodec_receive_frame(openCodecContext, frame) == 0){
initialisePlanarFrame(planar_frame, frame);
int buffer_size_in = av_samples_get_buffer_size(nullptr,
frame->ch_layout.nb_channels,
frame->nb_samples,
(AVSampleFormat)frame->format,
0);
int buffer_size_out = buffer_size_in/frame->ch_layout.nb_channels;
//planar_frame->linesize[0] = buffer_size_out;
int ret = av_samples_alloc(planar_frame->data,
NULL,
planar_frame->ch_layout.nb_channels,
planar_frame->nb_samples,
AV_SAMPLE_FMT_FLTP,
0);
initialiseResampler(avr, frame, planar_frame);
if (int errRet = swr_init(avr) < 0) {
fprintf(stderr, "Failed to initialize the resampling context\n");
}
if (ret < 0){
char error_message[AV_ERROR_MAX_STRING_SIZE];
av_strerror(ret, error_message, AV_ERROR_MAX_STRING_SIZE);
fprintf(stderr, "Error allocating sample buffer: %s\n", error_message);
return -1;
}
int samples_converted = swr_convert(avr,
planar_frame->data,
buffer_size_out,
(const uint8_t **)frame->data,
buffer_size_in);
if (samples_converted < 0) {
// handle error
std::cout << "error in conversion\n";
return;
}
if (avcodec_open2(codingContext, codec, NULL) < 0) {
std::cout << "can't encode!\n";
return;
}
AVPacket* nu_packet = av_packet_alloc();
while (int copy = avcodec_send_frame(codingContext, planar_frame) != 0){
if (copy == AVERROR(EAGAIN) || copy == AVERROR_EOF){
std::cout << "can't encode file\n";
return;
}
if (avcodec_receive_packet(codingContext, nu_packet) >=0){
fwrite(nu_packet->data, 4, nu_packet->size, newFile);
//av_write_frame(avc, nu_packet);
}
}
av_freep(planar_frame->data);
av_frame_unref(frame);
av_frame_unref(planar_frame);
}
// av_packet_free(&packet);
// av_packet_free(&nu_packet);
}
swr_free(&avr);
avcodec_free_context(&codingContext);
}
fclose(newFile);
}
I know i should write a header to the new wave file but for now I'm just trying to write the raw audio data. I'm getting always the same error but in different parts of the code (randomly), sometimes the code even compiles (writing the raw audio data, but filling it with some rubbish as well, i end up with a data file that is thrice the original one, sometimes i end up with a slightly smaller file - i guess the raw audio without the headers), results are basically random.
Here are some of the functions that trigger the error:
int ret = av_samples_alloc(); //(this the most common one)
swr_convert()
av_freep();
the error is:
main(64155,0x101b5d5c0) malloc: Incorrect checksum for freed object 0x106802600: probably modified after being freed.
Corrupt value: 0x0
main(64155,0x101b5d5c0) malloc: *** set a breakpoint in malloc_error_break to debug */

How to display H264 TCP stream on the web

I have a set of tiny cameras, which are streaming H264 encoded video over TCP. I need to somehow connect to them on-demand based on user actions in the browser and display the live stream to them.
I've been searching all over the Internet on how this can be achieved but not successfully.
The closest I got to this result was writing a small program using libav and C++ to connect to the video stream, save them as motion JPEG and then use mjpg_streamer to display the result as a live stream. But this solution is overly complicated and my program crashes with errors like:
Failed to decode av_out_packet: Operation now in progress
Or
Failed to read av_frame
Here's the piece of code I use to decode the stream.
void decode_stream(const char * address, int threadIdx, const char * output_dir) {
std::cout << "Started decoding thread ID: " << std::this_thread::get_id() << " TID: " << threadIdx << std::endl;
AVFormatContext *av_format_ctx = avformat_alloc_context();
// register timeout callback
auto * ith = new ffmpeg_interrupt_handler(default_timeout * 10);
av_format_ctx->interrupt_callback.opaque = (void *)ith;
av_format_ctx->interrupt_callback.callback = &ffmpeg_interrupt_handler::check_interrupt;
AVInputFormat *av_input_fmt = av_find_input_format("h264");
if (avformat_open_input(&av_format_ctx, address, av_input_fmt, nullptr) != 0) {
avformat_close_input(&av_format_ctx);
perror("Could not open input context");
exit(EXIT_FAILURE);
}
int video_stream_index = -1;
AVCodec* av_codec;
AVCodecParameters * av_codec_params;
//find valid video stream
for (int i = 0; i < av_format_ctx->nb_streams; ++i) {
av_codec_params = av_format_ctx->streams[i]->codecpar;
av_codec = avcodec_find_decoder(av_codec_params->codec_id);
if (!av_codec) {
perror("Could not find coded decoder");
continue;
}
if (av_codec_params->codec_type == AVMEDIA_TYPE_VIDEO) {
video_stream_index = i;
break;
}
}
if (video_stream_index == -1) {
perror("Could find valid video stream.");
exit(EXIT_FAILURE);
}
//allocate codec context
AVCodecContext * av_codec_ctx = avcodec_alloc_context3(av_codec);
if (!av_codec_ctx) {
perror("Could not create AVCodec Context\n");
exit(EXIT_FAILURE);
}
if (avcodec_parameters_to_context(av_codec_ctx, av_codec_params) < 0) {
perror("Could not initialize AVCodec Context\n");
exit(EXIT_FAILURE);
}
if (avcodec_open2(av_codec_ctx, av_codec, nullptr) < 0) {
perror("Could not open AVCodec\n");
exit(EXIT_FAILURE);
}
AVFrame* av_frame = av_frame_alloc();
if (!av_frame) {
perror("Could not allocate AVFrame");
exit(EXIT_FAILURE);
}
AVPacket *av_packet = av_packet_alloc();
if (!av_packet) {
perror("Could not allocate AVFrame");
exit(EXIT_FAILURE);
}
AVCodec *av_out_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
if (!av_out_codec) {
perror("Could not find MJPEG codec");
exit(EXIT_FAILURE);
}
AVCodecContext *av_out_codec_ctx = avcodec_alloc_context3(av_out_codec);
if (!av_out_codec_ctx) {
perror("Could not allocate output context");
exit(EXIT_FAILURE);
}
av_out_codec_ctx->width = 1280;
av_out_codec_ctx->height = 720;
av_out_codec_ctx->pix_fmt = AV_PIX_FMT_YUVJ420P;
av_out_codec_ctx->time_base = (AVRational){5, AVFMT_VARIABLE_FPS};
if (avcodec_open2(av_out_codec_ctx, av_out_codec, nullptr) < 0) {
perror("Could not open output codec");
exit(EXIT_FAILURE);
}
AVPacket *av_out_packet = av_packet_alloc();
std::string output_filename = output_dir;
if (! fs::exists(output_dir)) {
fs::create_directory(output_dir);
} else if ( fs::exists(output_dir) && ! fs::is_directory(output_dir)) {
perror("Target output is not a directory!");
exit(EXIT_FAILURE);
}
std::string output_final_dir = output_dir;
output_final_dir += "stream_" + std::to_string(threadIdx);
if (! fs::exists(output_final_dir)) {
fs::create_directory(output_final_dir);
}
output_filename += "stream_" + std::to_string(threadIdx) + "/stream_" + std::to_string(threadIdx) + ".jpg";
int response;
FILE *JPEGFile = nullptr;
ith->reset(default_timeout);
while (av_read_frame(av_format_ctx, av_packet) >= 0) {
if (av_packet->stream_index == video_stream_index) {
response = avcodec_send_packet(av_codec_ctx, av_packet);
if (response < 0) {
perror("Failed to decode av_out_packet");
exit(EXIT_FAILURE);
}
response = avcodec_receive_frame(av_codec_ctx, av_frame);
if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {
continue;
} else if (response < 0) {
perror("Failed to decode av_out_packet");
exit(EXIT_FAILURE);
}
if (av_frame->format != AV_PIX_FMT_YUV420P) {
printf("Generated file may not be a grayscale\n");
}
// send frame to encode into out format
avcodec_send_frame(av_out_codec_ctx, av_frame);
// receive encoded out data
avcodec_receive_packet(av_out_codec_ctx, av_out_packet);
// open output
JPEGFile = fopen(output_filename.c_str(), "wb");
if (JPEGFile == nullptr || JPEGFile == NULL) {
perror("Could not open output file");
fclose(JPEGFile);
JPEGFile = nullptr;
break;
}
// write to output
fwrite(av_out_packet->data, 1, av_out_packet->size, JPEGFile);
// close output
if (! fclose(JPEGFile)) {
JPEGFile = nullptr;
}
// unref out packet
av_packet_unref(av_out_packet);
av_packet_unref(av_packet);
// reset packet timeout
ith->reset(default_timeout);
}
}
if (JPEGFile != nullptr) {
fclose(JPEGFile);
JPEGFile = nullptr;
}
std::cout << "Exiting thread: " << threadIdx << std::endl;
should_stop_thread[threadIdx] = true;
av_packet_free(&av_out_packet);
avcodec_close(av_out_codec_ctx);
av_frame_free(&av_frame);
av_packet_free(&av_packet);
avformat_close_input(&av_format_ctx);
avformat_free_context(av_format_ctx);
avcodec_free_context(&av_codec_ctx);
}
Anyways, if there is a simpler solution which I am missing, I am open to it. Delay between real stream and displayed video is critical for me and can not be more than 1 second.

FFMpeg: extracting audio frames with avcodec creates noise

In my Android app, I implemented the FFMpeg library, and try to use it so I can extract audio samples from an audio file on the fly.
Here is what I did (I simplified the code here so it's easier to read):
AVPacket packet;
AVCodecContext *codecContext = NULL;
AVFormatContext *formatContext;
AVFrame *frame = NULL;
SwrContext *swrContext;
int audio_stream_index = -1;
int ret;
uint8_t *localBuffer;
int FFMpegPlayer::createFFmpeg(const char *filename)
{
int ret;
AVCodec *dec;
frame = av_frame_alloc();
av_register_all();
avformat_open_input(&formatContext, filename, NULL, NULL))
avformat_find_stream_info(formatContext, NULL))
// select the audio stream
audio_stream_index = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &dec, 0);
// create decoding context
codecContext = avcodec_alloc_context3(dec);
avcodec_parameters_to_context(codecContext, formatContext->streams[audio_stream_index]->codecpar);
av_opt_set_int(codecContext, "refcounted_frames", 1, 0);
// init the audio decoder
avcodec_open2(codecContext, dec, NULL))
swrContext = swr_alloc();
// we assume here that the audio file is a 44100 Hz stereo audio file
localBuffer = (uint8_t *) av_malloc(44100 * 2);
swr_alloc_set_opts(swrContext, AV_CH_LAYOUT_STEREO, AV_SAMPLE_FMT_FLT, codecContext->sample_rate,
codecContext->channel_layout, codecContext->sample_fmt, codecContext->sample_rate, 0,
NULL);
swr_init(swrContext);
return 0;
}
void FFMpegPlayer::getPcmFloat(float *buffer)
{
// init :
int i, ch, dataSize;
bool extraxted = false;
float sample = 0;
// extract :
while (av_read_frame(formatContext, &packet) >= 0 && !extraxted)
{
if (packet.stream_index == audio_stream_index)
{
// send the packet with the compressed data to the decoder
ret = avcodec_send_packet(codecContext, &packet);
// read all the output frames (in general there may be any number of them
while (ret >= 0)
{
ret = avcodec_receive_frame(codecContext, frame);
if (ret == AVERROR(EAGAIN))
{
LOGW("AVERROR(EAGAIN)\n");
break;
}
else if (ret == AVERROR_EOF)
{
LOGW("AVERROR_EOF\n");
break;
}
dataSize = av_get_bytes_per_sample(codecContext->sample_fmt);
swr_convert(swrContext, &localBuffer, 44100 * 2, (const uint8_t **) frame->data, frame->nb_samples);
int a = 0;
for (i = 0; i < frame->nb_samples; i++)
{
for (ch = 0; ch < codecContext->channels; ch++)
{
memcpy(&sample, &localBuffer[(codecContext->channels * i + ch) * dataSize], dataSize);
buffer[a] = sample;
a++;
}
}
// exit extract:
extraxted = true;
}
}
}
}
Anytime I need audio samples, I call the getPcmFloat() function.
Thanks to that code, I can clearly listen to the audio file.
The problem is: I have some crackling in the sound, and I have no idea where it comes from, or how to fix it.
Does anyone know how to get the exact frames whithout glitches?
Thanks for your help.

How to set up ffmpeg options for HLS?

I have a streaming device that streams mpegts video and audio. I am trying to capture those streams and save them multiple .ts file using HLS of ffmpeg.
So, I have been successful in capturing and saving the streams in a single .ts file. It seems like when I send the output file to be .m3u8, ffmpeg automatically chooses hls demuxer. But, doing so I get a floating point exception.
Here is my simple code...
static int ipcam_streaming_main_configure_input_parameters(const char *p_ifilename, AVFormatContext **ppx_ifmt_ctx)
{
AVStream *px_istream = NULL;
AVCodecContext *px_icodec_ctx = NULL;
int ret = -1;
unsigned int i = 0;
enum AVCodecID input_codec_id = AV_CODEC_ID_NONE;
AVCodec *p_decoder = NULL;
if (avformat_open_input(ppx_ifmt_ctx, p_ifilename, NULL, NULL) < 0)
{
printf("%s(): avformat_open_input() failed\n", __FUNCTION__);
}
else if (avformat_find_stream_info(*ppx_ifmt_ctx, NULL) < 0)
{
printf("%s(): avformat_find_stream_info() failed\n", __FUNCTION__);
}
else
{
/* find the input streams to be remuxed */
for (i = 0; i < (*ppx_ifmt_ctx)->nb_streams; i++)
{
/* get the stream, codec context for the stream */
px_istream = (*ppx_ifmt_ctx)->streams[i];
px_icodec_ctx = px_istream->codec;
if ((AVMEDIA_TYPE_VIDEO == px_icodec_ctx->codec_type)
|| (AVMEDIA_TYPE_AUDIO == px_icodec_ctx->codec_type))
{
/* get the codec_id for the audio/video stream */
input_codec_id = px_icodec_ctx->codec_id;
/* get the decoder for the input codec id */
p_decoder = avcodec_find_decoder(px_icodec_ctx->codec_id);
/* Open decoder for the input codec audio/video */
ret = avcodec_open2(px_icodec_ctx,
p_decoder,
NULL);
if (ret < 0)
{
printf("%s(): avcodec_open2() failed\n", __FUNCTION__);
}
else
{
printf("Input stream type <%d> with codec_id <%d> found and decoder opened\n", px_icodec_ctx->codec_type, input_codec_id);
}
}
}
}
/* dump the data into stdout */
av_dump_format(*ppx_ifmt_ctx, 0, p_ifilename, 0);
return ret;
}
static int ipcam_streaming_main_configure_output_parameters(const char *p_ofilename,
AVFormatContext *px_ifmt_ctx,
AVFormatContext **ppx_ofmt_ctx)
{
AVStream *px_ostream = NULL;
AVStream *px_istream = NULL;
AVCodecContext *px_dec_ctx = NULL;
AVCodecContext *px_enc_ctx = NULL;
int ret = -1;
unsigned int i = 0;
if ((NULL == p_ofilename) || (NULL == px_ifmt_ctx) || (NULL == ppx_ofmt_ctx))
{
printf("%s(): NULL arg(s) <%p, %p, %p>", __FUNCTION__, p_ofilename, px_ifmt_ctx, ppx_ofmt_ctx);
return -1;
}
/* remove the output file if already exists */
remove(p_ofilename);
/* allocate the output format context */
if (avformat_alloc_output_context2(ppx_ofmt_ctx, NULL, NULL, p_ofilename) < 0)
{
printf("%s(): avformat_alloc_output_context2() failed\n", __FUNCTION__);
}
else
{
for (i = 0; i < px_ifmt_ctx->nb_streams; i++)
{
if ((AVMEDIA_TYPE_VIDEO == px_ifmt_ctx->streams[i]->codec->codec_type)
|| (AVMEDIA_TYPE_AUDIO == px_ifmt_ctx->streams[i]->codec->codec_type))
{
printf("Stream <%d> is type <%d>: Adding to output stream\n", i, px_ifmt_ctx->streams[i]->codec->codec_type);
/* create a new output stream */
px_ostream = avformat_new_stream(*ppx_ofmt_ctx, NULL);
if (NULL == px_ostream)
{
printf("%s(): avformat_new_stream() failed\n", __FUNCTION__);
}
else
{
px_istream = px_ifmt_ctx->streams[i];
px_dec_ctx = px_istream->codec;
px_enc_ctx = px_ostream->codec;
/* Since, we do not need to encode the video stream, it is just remuxing
just copying the input codec context to output is sufficient */
ret = avcodec_copy_context((*ppx_ofmt_ctx)->streams[i]->codec,
px_ifmt_ctx->streams[i]->codec);
if ((*ppx_ofmt_ctx)->oformat->flags & AVFMT_GLOBALHEADER)
{
px_enc_ctx->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
}
}
else
{
printf("Stream <%d> is Unknown: Ignore it \n", i);
}
}
/* dump the output media file into stdout */
av_dump_format(*ppx_ofmt_ctx, 0, p_ofilename, 1);
if (0 == ((*ppx_ofmt_ctx)->oformat->flags & AVFMT_NOFILE))
{
/* open the output media file so that we can write the data into it */
ret = avio_open(&(*ppx_ofmt_ctx)->pb, p_ofilename, AVIO_FLAG_WRITE);
if (ret < 0)
{
printf("%s(): avio_open() failed\n", __FUNCTION__);
}
else
{
/* init muxer, write output file header */
ret = avformat_write_header((*ppx_ofmt_ctx), NULL);
if (ret < 0)
{
printf("%s(): avformat_write_header() failed\n", __FUNCTION__);
}
}
}
}
return ret;
}
int main(int argnum, char **argv)
{
AVFormatContext *px_ifmt_ctx = NULL;
AVFormatContext *px_ofmt_ctx = NULL;
AVPacket packet = {0};
enum AVMediaType type = AVMEDIA_TYPE_UNKNOWN;
unsigned int stream_index = -1;
unsigned int i = 0;
int ret = -1;
if (argnum != 3)
{
printf("Please enough number of parameters\n");
return -1;
}
/* register all the services requred */
av_register_all();
avformat_network_init();
if (0 != ipcam_streaming_main_configure_input_parameters(argv[1],
&px_ifmt_ctx))
{
printf("%s(): ipcam_streaming_main_configure_iput_parameters() failed\n", __FUNCTION__);
}
else if (0 != ipcam_streaming_main_configure_output_parameters(argv[2],
px_ifmt_ctx,
&px_ofmt_ctx))
{
printf("%s(): ipcam_streaming_main_configure_output_parameters() failed\n", __FUNCTION__);
}
else
{
printf("Input and output configuration done successfully: Now reading packets\n");
while (true)
{
if ((ret = av_read_frame(px_ifmt_ctx, &packet)) < 0)
{
printf("av_read_frame() failed with error <%d>: Exit\n", ret);
break;
}
/* get the stream index and codec type of the packet read */
stream_index = packet.stream_index;
type = px_ifmt_ctx->streams[stream_index]->codec->codec_type;
/* remux only if the type is video, otherwise ignore it */
if ((AVMEDIA_TYPE_VIDEO == type)
|| (AVMEDIA_TYPE_AUDIO == type))
{
printf("Remuxing the stream type <%d>, frame with stream index <%d>\n", type, stream_index);
/* remux this frame without reencoding */
packet.dts = av_rescale_q_rnd(packet.dts,
px_ifmt_ctx->streams[stream_index]->time_base,
px_ofmt_ctx->streams[stream_index]->time_base,
AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
packet.pts = av_rescale_q_rnd(packet.pts,
px_ifmt_ctx->streams[stream_index]->time_base,
px_ofmt_ctx->streams[stream_index]->time_base,
AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
/* write the frame into the output media file */
ret = av_write_frame(px_ofmt_ctx, &packet);
if (ret < 0)
{
printf("Ignoring video packet stream index <%d>\n", packet.stream_index);
}
/* free the packet for next use */
av_free_packet(&packet);
}
else
{
printf("Ignoring stream index <%d>, type <%d>\n", packet.stream_index, type);
}
}
}
/* write the trailer */
av_write_trailer(px_ofmt_ctx);
av_free_packet(&packet);
for (i = 0; i < px_ifmt_ctx->nb_streams; i++)
{
/* close the input codec that has been opened */
avcodec_close(px_ifmt_ctx->streams[i]->codec);
if ((NULL != px_ofmt_ctx) && (px_ofmt_ctx->nb_streams > i) &&
(NULL != px_ofmt_ctx->streams[i]) && ( NULL != px_ofmt_ctx->streams[i]->codec))
{
/* close the output code */
avcodec_close(px_ofmt_ctx->streams[i]->codec);
}
}
/* close the input */
avformat_close_input(&px_ifmt_ctx);
if ((NULL != px_ofmt_ctx) && (0 == (px_ofmt_ctx->oformat->flags & AVFMT_NOFILE)))
{
/* close the output context */
avio_close(px_ofmt_ctx->pb);
}
/* free the output context */
avformat_free_context(px_ofmt_ctx);
return ret;
}
So, If i pass the output filename to be .m3u8 file, it gives a floating point exception.

Decode audio and video and process both streams -- ffmpeg, sdl, opencv

My goal is to proceed on audio and video of mpeg-2 file independently, and to keep synchronicity on both flows. Duration of video is about 1 or 2 minutes maximum.
First, following this post "opencv for reading videos (and do the process),ffmpeg for audio , and SDL used to play both" sounds perfect. I have done some modification on the code considering recent ffmpeg naming changes. Compilation with cmake on 64-bits machine is fine. I get an error "Unsupported codec [3]" when opening codec.
The code is following.
Second, I looking for code dealing with synchronicity on both flows.
#include "opencv/highgui.h"
#include "opencv/cv.h"
#ifndef INT64_C
#define INT64_C(c) (c ## LL)
#define UINT64_C(c) (c ## ULL)
#endif
extern "C"{
#include <SDL/SDL.h>
#include <SDL/SDL_thread.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
}
#include <iostream>
#include <stdio.h>
#include <malloc.h>
using namespace cv;
#define SDL_AUDIO_BUFFER_SIZE 1024
typedef struct PacketQueue
{
AVPacketList *first_pkt, *last_pkt;
int nb_packets;
int size;
SDL_mutex *mutex;
SDL_cond *cond;
} PacketQueue;
PacketQueue audioq;
int audioStream = -1;
int videoStream = -1;
int quit = 0;
SDL_Surface* screen = NULL;
SDL_Surface* surface = NULL;
AVFormatContext* pFormatCtx = NULL;
AVCodecContext* aCodecCtx = NULL;
AVCodecContext* pCodecCtx = NULL;
void show_frame(IplImage* img){
if (!screen){
screen = SDL_SetVideoMode(img->width, img->height, 0, 0);
if (!screen){
fprintf(stderr, "SDL: could not set video mode - exiting\n");
exit(1);
}
}
// Assuming IplImage packed as BGR 24bits
SDL_Surface* surface = SDL_CreateRGBSurfaceFrom((void*)img->imageData,
img->width,
img->height,
img->depth * img->nChannels,
img->widthStep,
0xff0000, 0x00ff00, 0x0000ff, 0
);
SDL_BlitSurface(surface, 0, screen, 0);
SDL_Flip(screen);
}
void packet_queue_init(PacketQueue *q){
memset(q, 0, sizeof(PacketQueue));
q->mutex = SDL_CreateMutex();
q->cond = SDL_CreateCond();
}
int packet_queue_put(PacketQueue *q, AVPacket *pkt){
AVPacketList *pkt1;
if (av_dup_packet(pkt) < 0){
return -1;
}
pkt1 = (AVPacketList*) av_malloc(sizeof(AVPacketList));
//pkt1 = (AVPacketList*) malloc(sizeof(AVPacketList));
if (!pkt1) return -1;
pkt1->pkt = *pkt;
pkt1->next = NULL;
SDL_LockMutex(q->mutex);
if (!q->last_pkt)
q->first_pkt = pkt1;
else
q->last_pkt->next = pkt1;
q->last_pkt = pkt1;
q->nb_packets++;
q->size += pkt1->pkt.size;
SDL_CondSignal(q->cond);
SDL_UnlockMutex(q->mutex);
return 0;
}
static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block){
AVPacketList *pkt1;
int ret;
SDL_LockMutex(q->mutex);
for (;;){
if( quit){
ret = -1;
break;
}
pkt1 = q->first_pkt;
if (pkt1){
q->first_pkt = pkt1->next;
if (!q->first_pkt)
q->last_pkt = NULL;
q->nb_packets--;
q->size -= pkt1->pkt.size;
*pkt = pkt1->pkt;
av_free(pkt1);
//free(pkt1);
ret = 1;
break;
}
else if (!block){
ret = 0;
break;
}
else{
SDL_CondWait(q->cond, q->mutex);
}
}
SDL_UnlockMutex(q->mutex);
return ret;
}
int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size){
static AVPacket pkt;
static uint8_t *audio_pkt_data = NULL;
static int audio_pkt_size = 0;
int len1, data_size;
for (;;){
while (audio_pkt_size > 0){
data_size = buf_size;
len1 = avcodec_decode_audio3(aCodecCtx, (int16_t*)audio_buf, &data_size, &pkt);
if (len1 < 0){
// if error, skip frame
audio_pkt_size = 0;
break;
}
audio_pkt_data += len1;
audio_pkt_size -= len1;
if (data_size <= 0){
// No data yet, get more frames
continue;
}
// We have data, return it and come back for more later
return data_size;
}
if (pkt.data)
av_free_packet(&pkt);
if (quit) return -1;
if (packet_queue_get(&audioq, &pkt, 1) < 0) return -1;
audio_pkt_data = pkt.data;
audio_pkt_size = pkt.size;
}
}
void audio_callback(void *userdata, Uint8 *stream, int len){
AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
int len1, audio_size;
static uint8_t audio_buf[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
static unsigned int audio_buf_size = 0;
static unsigned int audio_buf_index = 0;
while (len > 0){
if (audio_buf_index >= audio_buf_size){
// We have already sent all our data; get more
audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));
if(audio_size < 0){
// If error, output silence
audio_buf_size = 1024; // arbitrary?
memset(audio_buf, 0, audio_buf_size);
}
else{
audio_buf_size = audio_size;
}
audio_buf_index = 0;
}
len1 = audio_buf_size - audio_buf_index;
if (len1 > len)
len1 = len;
memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);
len -= len1;
stream += len1;
audio_buf_index += len1;
}
}
void setup_ffmpeg(char* filename)
{
if (avformat_open_input(&pFormatCtx, filename, NULL, NULL) != 0){
fprintf(stderr, "FFmpeg failed to open file %s!\n", filename);
exit(-1);
}
if (av_find_stream_info(pFormatCtx) < 0){
fprintf(stderr, "FFmpeg failed to retrieve stream info!\n");
exit(-1);
}
// Dump information about file onto standard error
av_dump_format(pFormatCtx, 0, filename, 0);
// Find the first video stream
int i = 0;
for (i; i < pFormatCtx->nb_streams; i++){
if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO && videoStream < 0){
videoStream = i;
}
if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO && audioStream < 0){
audioStream = i;
}
}
if (videoStream == -1){
fprintf(stderr, "No video stream found in %s!\n", filename);
exit(-1);
}
if (audioStream == -1){
fprintf(stderr, "No audio stream found in %s!\n", filename);
exit(-1);
}
// Get a pointer to the codec context for the audio stream
aCodecCtx = pFormatCtx->streams[audioStream]->codec;
// Set audio settings from codec info
SDL_AudioSpec wanted_spec;
wanted_spec.freq = aCodecCtx->sample_rate;
wanted_spec.format = AUDIO_S16SYS;
wanted_spec.channels = aCodecCtx->channels;
wanted_spec.silence = 0;
wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
wanted_spec.callback = audio_callback;
wanted_spec.userdata = aCodecCtx;
SDL_AudioSpec spec;
if (SDL_OpenAudio(&wanted_spec, &spec) < 0){
fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
exit(-1);
}
AVCodec* aCodec = avcodec_find_decoder(aCodecCtx->codec_id);
if (!aCodec){
fprintf(stderr, "Unsupported codec [1]!\n");
exit(-1);
}
avcodec_open(aCodecCtx, aCodec);
// audio_st = pFormatCtx->streams[index]
packet_queue_init(&audioq);
SDL_PauseAudio(0);
// Get a pointer to the codec context for the video stream
pCodecCtx = pFormatCtx->streams[videoStream]->codec;
// Find the decoder for the video stream
AVCodec* pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL){
fprintf(stderr, "Unsupported codec [2]!\n");
exit(-1); // Codec not found
}
// Open codec
if (avcodec_open(pCodecCtx, pCodec) < 0){
fprintf(stderr, "Unsupported codec [3]!\n");
exit(-1); // Could not open codec
}
}
int main(int argc, char* argv[])
{
if (argc < 2){
std::cout << "Usage: " << argv[0] << " <video>" << std::endl;
return -1;
}
av_register_all();
// Init SDL
if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER))
{
fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
return -1;
}
// Init ffmpeg and setup some SDL stuff related to Audio
setup_ffmpeg(argv[1]);
VideoCapture cap(argv[1]);
if (!cap.isOpened()){
std::cout << "Failed to load file!" << std::endl;
return -1;
}
AVPacket packet;
while (av_read_frame(pFormatCtx, &packet) >= 0)
{
if (packet.stream_index == videoStream)
{
// Actually this is were SYNC between audio/video would happen.
// Right now I assume that every VIDEO packet contains an entire video frame, and that's not true. A video frame can be made by multiple packets!
// But for the time being, assume 1 video frame == 1 video packet,
// so instead of reading the frame through ffmpeg, I read it through OpenCV.
Mat frame;
cap >> frame; // get a new frame from camera
// do some processing on the frame, either as a Mat or as IplImage.
// For educational purposes, applying a lame grayscale conversion
IplImage ipl_frame = frame;
for (int i = 0; i < ipl_frame.width * ipl_frame.height * ipl_frame.nChannels; i += ipl_frame.nChannels)
{
ipl_frame.imageData[i] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //B
ipl_frame.imageData[i+1] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //G
ipl_frame.imageData[i+2] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //R
}
// Display it on SDL window
show_frame(&ipl_frame);
av_free_packet(&packet);
}
else if (packet.stream_index == audioStream)
{
packet_queue_put(&audioq, &packet);
}
else
{
av_free_packet(&packet);
}
SDL_Event event;
SDL_PollEvent(&event);
switch (event.type)
{
case SDL_QUIT:
SDL_FreeSurface(surface);
SDL_Quit();
break;
default:
break;
}
}
// the camera will be deinitialized automatically in VideoCapture destructor
// Close the codec
avcodec_close(pCodecCtx);
// Close the video file
av_close_input_file(pFormatCtx);
return 0;
}

I solved the "Unsupported codec" error. Remplace AVMEDIA_TYPE_VIDEO with AVMEDIA_TYPE_AUDIO in the following line :
if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO && audioStream < 0){
audioStream = i;
}
Grayscale conversion is not true due to uchar opencv pixel format.
I still have the synchro not working ... any help ?
Thanks

See here for syncing:
http://dranger.com/ffmpeg/tutorial06.html
and here:
http://dranger.com/ffmpeg/tutorial07.html

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

How to save h264 frames as jpeg images using ffmpeg? - c++

Related

Trying to decode and encode audio files with the FFMPEG C API

How to display H264 TCP stream on the web

FFMpeg: extracting audio frames with avcodec creates noise

How to set up ffmpeg options for HLS?

Decode audio and video and process both streams -- ffmpeg, sdl, opencv

Categories

Resources