libav c++ raw h264 TCP Stream to opencv Mat - c++

I writting a software for my classes that receive a h264 stream from a drone and i need to convert the video stream to opencv Mat.
I have no trouble to receive the frame and if i save it to a .264 file i can read the output with VLC.
The drone is sending IDR-Frame and P-Frame , since i don't need to see the video stream juste some frame of it, i was thinking of only using the IDR-Frame to get the image but i have trouble to understand how to use libavcodec from FFMPEG, how can i create a AVFrame from my IDR-Frame and how to convert it to cv::Mat after.
i have try the following code, i have store the full frame in .raw file and i try to decode them , but i get read error when i try to parse the packet to a frame, i don't thing a initialize the buffer of ARPacket the right way :
AVFormatContext* fc = 0;
int vi = -1; // vi veut dire video index
inline cv::Mat avframe_to_mat(const AVFrame* avframe)
{
AVFrame dst;
cv::Mat m;
memset(&dst, 0, sizeof(dst));
int w = avframe->width, h = avframe->height;
m = cv::Mat(h, w, CV_8UC3);
dst.data[0] = (uint8_t*)m.data;
avpicture_fill((AVPicture*)&dst, dst.data[0], AV_PIX_FMT_BGR24, w, h);
struct SwsContext *convert_ctx = NULL;
enum AVPixelFormat src_pixfmt = AV_PIX_FMT_BGR24;
enum AVPixelFormat dst_pixfmt = AV_PIX_FMT_BGR24;
convert_ctx = sws_getContext(w, h, src_pixfmt, w, h, dst_pixfmt, SWS_FAST_BILINEAR, NULL, NULL, NULL);
sws_scale(convert_ctx, avframe->data, avframe->linesize, 0, h, dst.data, dst.linesize);
sws_freeContext(convert_ctx);
return m;
}
inline bool init_stream(unsigned char* data, int len)
{
const char* file = "test.avi";
const AVCodecID codec_id = AV_CODEC_ID_H264;
AVCodec* codec = avcodec_find_encoder(codec_id);
// Crée le container pour le stream
fc = avformat_alloc_context();
/*
AVOutputFormat *of = av_guess_format(0, file, 0);
fc = avformat_alloc_context();
fc->oformat = of;
strcpy(fc->filename, file);
*/
int br = 1000000;
int w = 640;
int h = 360;
int fps = 24;
// ajoute un stream video
AVStream* pst = avformat_new_stream(fc, codec); // Pourquoi je passe pas le codec ici ?
vi = pst->index;
codec_context = avcodec_alloc_context3(codec);
codec_context->bit_rate = br;
codec_context->width = w;
codec_context->height = h;
codec_context->time_base = {1,fps};
codec_context->gop_size = 10; // Emit one intra frame every ten frames
codec_context->max_b_frames = 1;
codec_context->pix_fmt = AV_PIX_FMT_YUV420P;
// Vu quon n'est en h264
av_opt_set(codec_context->priv_data, "preset", "slow", 0);
// Ouvre notre codec
if(avcodec_open2(codec_context, codec,nullptr) < 0)
{
cerr << "Impossible d'ouvrir le codec" << endl;
return false;
}
if (!(fc->oformat->flags & AVFMT_NOFILE))
avio_open(&fc->pb, fc->filename,0);
// avformat_write_header(fc,nullptr);
return true;
}
inline void append_stream(uint8_t* data, int len)
{
if( 0 > vi)
{
cerr << "video index is less than 0" << endl;
return;
}
AVStream* pst = fc->streams[vi];
AVPacket pkt;
// Init un nouveau packet
av_init_packet(&pkt);
pkt.flags |= AV_PKT_FLAG_KEY;
pkt.data = data;
pkt.stream_index = pst->index;
pkt.size = len;
pkt.dts = AV_NOPTS_VALUE;
pkt.pts = AV_NOPTS_VALUE;
// ERROR accessing location
av_interleaved_write_frame(fc, &pkt);
}
inline void execute_staging_test(const fs::path& folder, int nbr_trame)
{
fs::path file_name = folder / "stream.bin";
if(!fs::exists(file_name))
{
cerr << "The file " << file_name.string() << " does not exists" << endl;
return;
}
avcodec_register_all();
av_log_set_level(AV_LOG_DEBUG);
int length = 0;
char* buffer;
for(int i = 0; i < nbr_trame;i++)
{
fs::path file = std::to_string(i) + ".raw";
file = folder / file;
cout << "Got frame on " << file.string() << endl;
ifstream ifs(file, ofstream::binary);
// Get la longeur du fichier pour savoir le buffer a prendre
ifs.seekg(0, ios::end);
length = ifs.tellg();
ifs.seekg(0, ios::beg);
if (length == 0) {
std::cerr << "No data in file " << file << std::endl;
return;
}
buffer = new char[length];
std::cout << "File " << file << " length is " << length << std::endl;
ifs.read(buffer, length);
cv::VideoWriter vw;
int codec = cv::VideoWriter::fourcc('X', '2', '6', '4');
if(!fc)
{
if(!init_stream((unsigned char*)buffer, length))
{
return;
}
}
if(fc)
{
append_stream((unsigned char*)buffer, length);
}
}
}
Thanks you very mutch if you can help me , i'm a novice in c++ and i have never deal with video stream. If you want to see the full code its host on github repo to this project

Related

Decoding MediaRecorder produced webm stream

I am trying to decode a video stream from the browser using the ffmpeg API. The stream is produced by the webcam and recorded with MediaRecorder as webm format. What I ultimately need is a vector of opencv cv::Mat objects for further processing.
I have written a C++ webserver using the uWebsocket library. The video stream is sent via websocket from the browser to the server once per second. On the server, I append the received data to my custom buffer and decode it with the ffmpeg API.
If I just save the data on the disk and later I play it with a media player, it works fine. So, whatever the browser sends is a valid video.
I do not think that I correctly understand how should the custom IO behave with network streaming as nothing seems to be working.
The custom buffer:
struct Buffer
{
std::vector<uint8_t> data;
int currentPos = 0;
};
The readAVBuffer method for custom IO
int MediaDecoder::readAVBuffer(void* opaque, uint8_t* buf, int buf_size)
{
MediaDecoder::Buffer* mbuf = (MediaDecoder::Buffer*)opaque;
int count = 0;
for(int i=0;i<buf_size;i++)
{
int index = i + mbuf->currentPos;
if(index >= (int)mbuf->data.size())
{
break;
}
count++;
buf[i] = mbuf->data.at(index);
}
if(count > 0) mbuf->currentPos+=count;
std::cout << "read : "<<count<<" "<<mbuf->currentPos<<", buff size:"<<mbuf->data.size() << std::endl;
if(count <= 0) return AVERROR(EAGAIN); //is this error that should be returned? It cannot be EOF since we're not done yet, most likely
return count;
}
The big decode method, that's supposed to return whatever frames it could read
std::vector<cv::Mat> MediaDecoder::decode(const char* data, size_t length)
{
std::vector<cv::Mat> frames;
//add data to the buffer
for(size_t i=0;i<length;i++) {
buf.data.push_back(data[i]);
}
//do not invoke the decoders until we have 1MB of data
if(((buf.data.size() - buf.currentPos) < 1*1024*1024) && !initializedCodecs) return frames;
std::cout << "decoding data length "<<length<<std::endl;
if(!initializedCodecs) //initialize ffmpeg objects. Custom I/O, format, decoder, etc.
{
//these are just members of the class
avioCtxPtr = std::unique_ptr<AVIOContext,avio_context_deleter>(
avio_alloc_context((uint8_t*)av_malloc(4096),4096,0,&buf,&readAVBuffer,nullptr,nullptr),
avio_context_deleter());
if(!avioCtxPtr)
{
std::cerr << "Could not create IO buffer" << std::endl;
return frames;
}
fmt_ctx = std::unique_ptr<AVFormatContext,avformat_context_deleter>(avformat_alloc_context(),
avformat_context_deleter());
fmt_ctx->pb = avioCtxPtr.get();
fmt_ctx->flags |= AVFMT_FLAG_CUSTOM_IO ;
//fmt_ctx->max_analyze_duration = 2 * AV_TIME_BASE; // read 2 seconds of data
{
AVFormatContext *fmtCtxRaw = fmt_ctx.get();
if (avformat_open_input(&fmtCtxRaw, "", nullptr, nullptr) < 0) {
std::cerr << "Could not open movie" << std::endl;
return frames;
}
}
if (avformat_find_stream_info(fmt_ctx.get(), nullptr) < 0) {
std::cerr << "Could not find stream information" << std::endl;
return frames;
}
if((video_stream_idx = av_find_best_stream(fmt_ctx.get(), AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0)) < 0)
{
std::cerr << "Could not find video stream" << std::endl;
return frames;
}
AVStream *video_stream = fmt_ctx->streams[video_stream_idx];
AVCodec *dec = avcodec_find_decoder(video_stream->codecpar->codec_id);
video_dec_ctx = std::unique_ptr<AVCodecContext,avcodec_context_deleter> (avcodec_alloc_context3(dec),
avcodec_context_deleter());
if (!video_dec_ctx)
{
std::cerr << "Failed to allocate the video codec context" << std::endl;
return frames;
}
avcodec_parameters_to_context(video_dec_ctx.get(),video_stream->codecpar);
video_dec_ctx->thread_count = 1;
/* video_dec_ctx->max_b_frames = 0;
video_dec_ctx->frame_skip_threshold = 10;*/
AVDictionary *opts = nullptr;
av_dict_set(&opts, "refcounted_frames", "1", 0);
av_dict_set(&opts, "deadline", "1", 0);
av_dict_set(&opts, "auto-alt-ref", "0", 0);
av_dict_set(&opts, "lag-in-frames", "1", 0);
av_dict_set(&opts, "rc_lookahead", "1", 0);
av_dict_set(&opts, "drop_frame", "1", 0);
av_dict_set(&opts, "error-resilient", "1", 0);
int width = video_dec_ctx->width;
videoHeight = video_dec_ctx->height;
if(avcodec_open2(video_dec_ctx.get(), dec, &opts) < 0)
{
std::cerr << "Failed to open the video codec context" << std::endl;
return frames;
}
AVPixelFormat pFormat = AV_PIX_FMT_BGR24;
img_convert_ctx = std::unique_ptr<SwsContext,swscontext_deleter>(sws_getContext(width, videoHeight,
video_dec_ctx->pix_fmt, width, videoHeight, pFormat,
SWS_BICUBIC, nullptr, nullptr,nullptr),swscontext_deleter());
frame = std::unique_ptr<AVFrame,avframe_deleter>(av_frame_alloc(),avframe_deleter());
frameRGB = std::unique_ptr<AVFrame,avframe_deleter>(av_frame_alloc(),avframe_deleter());
int numBytes = av_image_get_buffer_size(pFormat, width, videoHeight,32 /*https://stackoverflow.com/questions/35678041/what-is-linesize-alignment-meaning*/);
std::unique_ptr<uint8_t,avbuffer_deleter> imageBuffer((uint8_t *) av_malloc(numBytes*sizeof(uint8_t)),avbuffer_deleter());
av_image_fill_arrays(frameRGB->data,frameRGB->linesize,imageBuffer.get(),pFormat,width,videoHeight,32);
frameRGB->width = width;
frameRGB->height = videoHeight;
initializedCodecs = true;
}
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = nullptr;
pkt.size = 0;
int read_frame_return = 0;
while ( (read_frame_return=av_read_frame(fmt_ctx.get(), &pkt)) >= 0)
{
readFrame(&frames,&pkt,video_dec_ctx.get(),frame.get(),img_convert_ctx.get(),
videoHeight,frameRGB.get());
//if(cancelled) break;
}
avioCtxPtr->eof_reached = 0;
avioCtxPtr->error = 0;
//flush
// readFrame(frames.get(),nullptr,video_dec_ctx.get(),frame.get(),
// img_convert_ctx.get(),videoHeight,frameRGB.get());
avioCtxPtr->eof_reached = 0;
avioCtxPtr->error = 0;
if(frames->size() <= 0)
{
std::cout << "buffer pos: "<<buf.currentPos<<", buff size:"<<buf.data.size()
<<",read_frame_return:"<<read_frame_return<< std::endl;
}
return frames;
}
What I would expect to happen would be for a continuous extraction of cv::Mat frames as I feed it more and more data. What actually happens is that after the the buffer is fully read I see:
[matroska,webm # 0x507b450] Read error at pos. 1278266 (0x13813a)
[matroska,webm # 0x507b450] Seek to desired resync point failed. Seeking to earliest point available instead.
And then no more bytes are read from the buffer even if later I increase the size of it.
There is something terribly wrong I'm doing here and I don't understand what.
What I ended up doing was to do the reading of the incoming data and actual decoding in a different thread. The read method, however, will just block if there are no more bytes available, waiting until anything is coming.
When new bytes are arriving, they're added to the buffer and the conditional_variable signals the waiting thread to wake up and start reading data again from the buffer.
It works well enough.

ffmpeg memory increase when Playing a UDP-MJPEG-Stream

im reading a udp-mjpeg-stream with the ffmpeg-API. When i read and display the Stream with an ARM-Processor i have 2 Problems:
1- The Applikation is too slow and there is a big delay between network cam and displayed video.
2- the memory usage increases every time when i call the function av_read_frame().
The Source code
const char *cam1_url = "udp://192.168.1.1:1234";
AVCodec *pCodec;
AVFrame *pFrame, *pFrameRGB;
AVCodecContext *pCodecCon;
AVDictionary *pUdpStreamOptions = NULL;
AVInputFormat *pMjpegFormat = av_find_input_format("mjpeg");
av_dict_set(&pUdpStreamOptions, "fifo_size", "5000000", 0);
av_register_all();
avdevice_register_all();
avcodec_register_all();
avformat_network_init();
AVFormatContext *pFormatCont = avformat_alloc_context();
if(avformat_open_input(&pFormatCont,cam1_url,pMjpegFormat,&pUdpStreamOptions) < 0)
{
cout << "!! Error !! - avformat_open_input(): failed to open input URL" << endl;
}
if(avformat_find_stream_info(pFormatCont,NULL) < 0)
{
cout << "!! Error !! - avformat_find_stream_info(), Failed to retrieve stream info" << endl;
}
av_dump_format(pFormatCont, 0, cam1_url, 0);
int videoStream;
for(int i=0; i< pFormatCont->nb_streams; i++)
{
if(pFormatCont->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO)
{
videoStream=i;
cout << " videoStream = " << videoStream << endl;
}
}
pCodecCon = pFormatCont->streams[videoStream]->codec;
pCodec = avcodec_find_decoder(pCodecCon->codec_id);
if(NULL == pCodec)
{
cout << "couldnt find codec" << endl;
return EXIT_FAILURE;
}
if(avcodec_open2(pCodecCon,pCodec,NULL) < 0)
{
cout << "!! Error !! - in avcodec_open2()" << endl;
return EXIT_FAILURE;
}
uint8_t *frameBuffer;
int numRxBytes = 0;
AVPixelFormat pFormat =AV_PIX_FMT_BGR24;
int width_rgb = (int)((float)pCodecCon->width);
int height_rgb = (int)((float)pCodecCon->height);
numRxBytes = avpicture_get_size(pFormat,width_rgb,height_rgb);
frameBuffer = (uint8_t *) av_malloc(numRxBytes*sizeof(uint8_t));
avpicture_fill((AVPicture *) pFrameRGB, frameBuffer, pFormat,width_rgb,height_rgb);
AVPacket rx_pkt; // received packet
int frameFinished = 0;
struct SwsContext *imgConvertCtx;
av_init_packet(&rx_pkt);
while(av_read_frame(pFormatCont, &rx_pkt) >= 0)
{
if(rx_pkt.stream_index == videoStream)
{
av_frame_free(&pFrame);
pFrame = av_frame_alloc();
av_frame_free(&pFrameRGB);
pFrameRGB = av_frame_alloc();
avcodec_decode_video2(pCodecCon, pFrame, &frameFinished,&rx_pkt);
if(frameFinished)
{
imgConvertCtx = sws_getCachedContext(NULL, pFrame->width,pFrame->height, AV_PIX_FMT_YUVJ420P,width_rgb,height_rgb,AV_PIX_FMT_BGR24, SWS_BICUBIC, NULL, NULL,NULL);
sws_scale(imgConvertCtx, ((AVPicture*)pFrame)->data, ((AVPicture*)pFrame)->linesize, 0, pCodecCon->height, ((AVPicture *)pFrameRGB)->data, ((AVPicture *)pFrameRGB)->linesize);
av_frame_unref(pFrame);
av_frame_unref(pFrameRGB);
}
}
av_free_packet(&rx_pkt);
av_packet_unref(&rx_pkt);
}
//cvDestroyWindow("Cam1Video");
av_free_packet(&rx_pkt);
avcodec_close(pCodecCon);
av_free(pFrame);
av_free(pFrameRGB);
avformat_close_input(&pFormatCont);
I have read, the reason could be that the ffmpeg-Libs saves the incomming frames in the cache but the arm-processor isnt fast enough to process them. After like 4 minutes the system craches.
How could i solve the Problem.
one option could be to tell ffmpeg to act as frame grabber, also to read frames in real time, with the flag "-re". How can i set this Flag in the c++ source code. Or can anybody help me to solve that Problem.
Thank you very much

Random segmentation fault with avcodec_encode_video2()

this is my first question so i hope i did it correctly. If not, please let me know to fix it.
I'm trying to convert a short (10 secs) mp4 video file into a gif using ffmpeg libraries (I'm pretty new using ffmpeg). The program works pretty well converting to gif, but some times it randomly crash.
This is the version of the ffmpeg libraries I'm using:
libavutil 54. 27.100
libavcodec 56. 41.100
libavformat 56. 36.100
libavdevice 56. 4.100
libavfilter 5. 16.101
libavresample 2. 1. 0
libswscale 3. 1.101
libswresample 1. 2.100
libpostproc 53. 3.100
I'm using a 1920x1080p video, so in order to generate the gif I'm doing a pixel format convertion, from AV_PIX_FMT_YUV420P to AV_PIX_FMT_RGB8 with a resizing from the initial resolution to 432x240p.
Here is the code:
int VideoManager::loadVideo(QString filename, bool showInfo)
{
if(avformat_open_input(&iFmtCtx, filename.toStdString().c_str(), 0, 0) < 0)
{
qDebug() << "Could not open input file " << filename;
closeInput();
return -1;
}
if (avformat_find_stream_info(iFmtCtx, 0) < 0)
{
qDebug() << "Failed to retrieve input stream information";
closeInput();
return -2;
}
videoStreamIndex = -1;
for(unsigned int i = 0; i < iFmtCtx->nb_streams; ++i)
if(iFmtCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoStreamIndex = i;
break;
}
if(videoStreamIndex == -1)
{
qDebug() << "Didn't find any video stream!";
closeInput();
return -3;
}
iCodecCtx = iFmtCtx->streams[videoStreamIndex]->codec;
iCodec = avcodec_find_decoder(iCodecCtx->codec_id);
if(iCodec == NULL) // Codec not found
{
qDebug() << "Codec not found!";
closeInput();
return -4;
}
if(avcodec_open2(iCodecCtx, iCodec, NULL) < 0)
{
qDebug() << "Could not open codec!";
closeInput();
return -1;
}
if(showInfo)
av_dump_format(iFmtCtx, 0, filename.toStdString().c_str(), 0);
return 0;
}
void VideoManager::generateGif(QString filename)
{
int ret, frameCount = 0;
AVPacket packet;
packet.data = NULL;
packet.size = 0;
AVFrame *frame = NULL;
unsigned int stream_index;
int got_frame;
gifHeight = iFmtCtx->streams[videoStreamIndex]->codec->height;
gifWidth = iFmtCtx->streams[videoStreamIndex]->codec->width;
if(gifHeight > MAX_GIF_HEIGHT || gifWidth > MAX_GIF_WIDTH)
{
if(gifHeight > gifWidth)
{
gifWidth = (float)gifWidth * ( (float)MAX_GIF_HEIGHT / (float)gifHeight );
gifHeight = MAX_GIF_HEIGHT;
}
else
{
gifHeight = (float)gifHeight * ( (float)MAX_GIF_WIDTH / (float)gifWidth );
gifWidth = MAX_GIF_WIDTH;
}
}
if(openOutputFile(filename.toStdString().c_str()) < 0)
{
qDebug() << "Error openning output file: " << filename;
return;
}
while (1) {
int ret = av_read_frame(iFmtCtx, &packet);
if (ret < 0)
{
if(ret != AVERROR_EOF)
qDebug() << "Error reading frame: " << ret;
break;
}
stream_index = packet.stream_index;
if(stream_index == videoStreamIndex)
{
frame = av_frame_alloc();
if (!frame) {
qDebug() << "Error allocating frame";
break;
}
av_packet_rescale_ts(&packet,
iFmtCtx->streams[stream_index]->time_base,
iFmtCtx->streams[stream_index]->codec->time_base);
ret = avcodec_decode_video2(iFmtCtx->streams[stream_index]->codec, frame,
&got_frame, &packet);
if (ret < 0) {
qDebug() << "Decoding failed";
break;
}
if(got_frame)
{
qDebug() << ++frameCount;
nframes++;
frame->pts = av_frame_get_best_effort_timestamp(frame);
////////////////////////////////////////////////////////////////////////////////
/// Pixel format convertion and resize
////////////////////////////////////////////////////////////////////////////////
uint8_t *out_buffer = NULL;
SwsContext *img_convert_ctx = NULL;
AVFrame *pFrameRGB = av_frame_alloc();
if(pFrameRGB == NULL)
{
qDebug() << "Error allocating frameRGB";
break;
}
AVPixelFormat pixFmt;
switch (iFmtCtx->streams[stream_index]->codec->pix_fmt)
{
case AV_PIX_FMT_YUVJ420P : pixFmt = AV_PIX_FMT_YUV420P; break;
case AV_PIX_FMT_YUVJ422P : pixFmt = AV_PIX_FMT_YUV422P; break;
case AV_PIX_FMT_YUVJ444P : pixFmt = AV_PIX_FMT_YUV444P; break;
case AV_PIX_FMT_YUVJ440P : pixFmt = AV_PIX_FMT_YUV440P; break;
default:
pixFmt = iFmtCtx->streams[stream_index]->codec->pix_fmt;
}
out_buffer = (uint8_t*)av_malloc( avpicture_get_size( AV_PIX_FMT_RGB8,
gifWidth,
gifHeight ));
if(!out_buffer)
{
qDebug() << "Error alocatting out_buffer!";
}
avpicture_fill((AVPicture *)pFrameRGB, out_buffer, AV_PIX_FMT_RGB8,
gifWidth,
gifHeight);
img_convert_ctx = sws_getContext( iFmtCtx->streams[stream_index]->codec->width,
iFmtCtx->streams[stream_index]->codec->height,
pixFmt,
gifWidth,
gifHeight,
AV_PIX_FMT_RGB8,
SWS_ERROR_DIFFUSION, NULL, NULL, NULL );
if(!img_convert_ctx)
{
qDebug() << "error getting sws context";
}
sws_scale( img_convert_ctx, (const uint8_t* const*)frame->data,
frame->linesize, 0,
iFmtCtx->streams[stream_index]->codec->height,
pFrameRGB->data,
pFrameRGB->linesize );
pFrameRGB->format = AV_PIX_FMT_RGB8;
pFrameRGB->pts = frame->pts;
pFrameRGB->best_effort_timestamp = frame->best_effort_timestamp;
pFrameRGB->width = gifWidth;
pFrameRGB->height = gifHeight;
pFrameRGB->pkt_dts = frame->pkt_dts;
pFrameRGB->pkt_pts = frame->pkt_pts;
pFrameRGB->pkt_duration = frame->pkt_duration;
pFrameRGB->pkt_pos = frame->pkt_pos;
pFrameRGB->pkt_size = frame->pkt_size;
pFrameRGB->interlaced_frame = frame->interlaced_frame;
////////////////////////////////////////////////////////////////////////////////
ret = encodeAndWriteFrame(pFrameRGB, stream_index, NULL);
//av_frame_free(&frame);
//av_free(out_buffer);
//sws_freeContext(img_convert_ctx);
if (ret < 0)
{
qDebug() << "Error encoding and writting frame";
//av_free_packet(&packet);
closeOutput();
}
}
else {
//av_frame_free(&frame);
}
}
av_free_packet(&packet);
}
ret = flushEncoder(videoStreamIndex);
if (ret < 0)
{
qDebug() << "Flushing encoder failed";
}
av_write_trailer(oFmtCtx);
//av_free_packet(&packet);
//av_frame_free(&frame);
closeOutput();
}
void VideoManager::closeOutput()
{
if (oFmtCtx && oFmtCtx->nb_streams > 0 && oFmtCtx->streams[0] && oFmtCtx->streams[0]->codec)
avcodec_close(oFmtCtx->streams[0]->codec);
if (oFmtCtx && oFmt && !(oFmt->flags & AVFMT_NOFILE))
avio_closep(&oFmtCtx->pb);
avformat_free_context(oFmtCtx);
}
int VideoManager::openOutputFile(const char *filename)
{
AVStream *out_stream;
AVStream *in_stream;
AVCodecContext *dec_ctx, *enc_ctx;
AVCodec *encoder;
int ret;
oFmtCtx = NULL;
avformat_alloc_output_context2(&oFmtCtx, NULL, NULL, filename);
if (!oFmtCtx) {
qDebug() << "Could not create output context";
return AVERROR_UNKNOWN;
}
oFmt = oFmtCtx->oformat;
out_stream = avformat_new_stream(oFmtCtx, NULL);
if (!out_stream) {
qDebug() << "Failed allocating output stream";
return AVERROR_UNKNOWN;
}
in_stream = iFmtCtx->streams[videoStreamIndex];
dec_ctx = in_stream->codec;
enc_ctx = out_stream->codec;
encoder = avcodec_find_encoder(AV_CODEC_ID_GIF);
if (!encoder) {
qDebug() << "FATAL!: Necessary encoder not found";
return AVERROR_INVALIDDATA;
}
enc_ctx->height = gifHeight;
enc_ctx->width = gifWidth;
enc_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
enc_ctx->pix_fmt = AV_PIX_FMT_RGB8;
enc_ctx->time_base = dec_ctx->time_base;
ret = avcodec_open2(enc_ctx, encoder, NULL);
if (ret < 0) {
qDebug() << "Cannot open video encoder for gif";
return ret;
}
if (oFmt->flags & AVFMT_GLOBALHEADER)
enc_ctx->flags |= CODEC_FLAG_GLOBAL_HEADER;
if (!(oFmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oFmtCtx->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
qDebug() << "Could not open output file " << filename;
return ret;
}
}
ret = avformat_write_header(oFmtCtx, NULL);
if (ret < 0) {
qDebug() << "Error occurred when opening output file";
return ret;
}
return 0;
}
int VideoManager::encodeAndWriteFrame(AVFrame *frame, unsigned int stream_index, int *got_frame) {
int ret;
int got_frame_local;
AVPacket enc_pkt;
if (!got_frame)
got_frame = &got_frame_local;
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
ret = avcodec_encode_video2(oFmtCtx->streams[stream_index]->codec, &enc_pkt,
frame, got_frame);
//av_frame_free(&frame);
if (ret < 0)
return ret;
if (!(*got_frame))
return 0;
enc_pkt.stream_index = stream_index;
av_packet_rescale_ts(&enc_pkt,
oFmtCtx->streams[stream_index]->codec->time_base,
oFmtCtx->streams[stream_index]->time_base);
ret = av_interleaved_write_frame(oFmtCtx, &enc_pkt);
return ret;
}
int VideoManager::flushEncoder(unsigned int stream_index)
{
int ret;
int got_frame;
if (!(oFmtCtx->streams[stream_index]->codec->codec->capabilities &
CODEC_CAP_DELAY))
return 0;
while (1) {
ret = encodeAndWriteFrame(NULL, stream_index, &got_frame);
if (ret < 0)
break;
if (!got_frame)
return 0;
}
return ret;
}
I know there are a lot of memory leaks. I deleted/commented most of the free functions intentionality because i thought that was the problem.
I'm using Qtcreator, so when i debug the programs this is the output:
Level Function Line
0 av_image_copy 303
1 frame_copy_video 650
2 av_frame_copy 687
3 av_frame_ref 384
4 gif_encode_frame 307
5 avcodec_encode_video2 2191
6 VideoManager::encodeAndWriteFrame 813
7 VideoManager::generateGif 375
8 qMain 31
9 WinMain*16 112
10 main
I've checked if there is a specific frame the program crash at, but it's a random frame too.
Any idea of what i'm doing wrong? Any help would be very appreciated.
EDIT:
After a few days of pain, suffering and frustation I decided to write the whole code from scratch. Both times i started from this example and modified it in order to works as I described before. And it works perfectly now :D! The only error i could find in the old code (posted before) is when i try to access to the video stream in the output file I used videoStreamIndex, but that index is from the video stream in the input file. Some times it could be the same index and some times not. But it doesn't explain why it crashed randomly. If that was the reason of the crash, it should crash every time i ran the code with the same video. So probably, there are more errors in that code.
Notice that i've not tested if fixing that error in the code above actually solve the crashing problems.
I think you may have your parameters mixed up. According to what I'm reading from the documentation avcodec_decode_video2's prototype looks like:
int avcodec_decode_video2 (AVCodecContext * avctx,
AVFrame * picture,
int * got_picture_ptr,
const AVPacket * avpkt)
And is called with:
ret = avcodec_encode_video2(oFmtCtx->streams[stream_index]->codec, // Dunno.
&enc_pkt, //AVPacket * should be AVFrame *
frame, //AVFrame * Should be int *
got_frame); // int * should be AVPacket *

How to extract elementary video from mp4 using ffmpeg programmatically?

I have started learning ffmpeg few weaks ago. At the moment I am able to transcode any video to mp4 using h264/AVC codec. The main scheme is something like that:
-open input
-demux
-decode
-encode
-mux
The actual code is below:
#include <iostream>
#include <math.h>
extern "C" {
#
ifndef __STDC_CONSTANT_MACROS# undef main /* Prevents SDL from overriding main() */ # define __STDC_CONSTANT_MACROS# endif
# pragma comment(lib, "avcodec.lib")# pragma comment(lib, "avformat.lib")# pragma comment(lib, "swscale.lib")# pragma comment(lib, "avutil.lib")
#include <libavcodec\avcodec.h>
#include <libavformat\avformat.h>
#include <libswscale\swscale.h>
#include <libavutil\mem.h>
#include <libavutil/opt.h>
#include <libavutil\channel_layout.h>
#include <libavutil\common.h>
#include <libavutil\imgutils.h>
#include <libavutil\mathematics.h>
#include <libavutil\samplefmt.h>
}
using namespace std;
void open_video(AVFormatContext * oc, AVCodec * codec, AVStream * st) {
int ret;
AVCodecContext * c;
c = st - > codec;
/*open codec */
cout << "probably starts here" << endl;
ret = avcodec_open2(c, codec, NULL);
cout << "and ends here" << endl;
if (ret < 0) {
cout << ("Could not open video codec") << endl;
}
}
/*This function will add a new stream to our file.
#param
oc -> Format context that the new stream will be added.
codec -> codec of the stream, this will be passed.
codec_id ->
chWidth->
chHeight->
*/
AVStream * addStream(AVFormatContext * oc, AVCodec ** codec, enum AVCodecID codec_id, int chWidth, int chHeight, int fps) {
AVCodecContext * c;
AVStream * st;
//find encoder of the stream, it passes this information to #codec, later on
//it will be used in encoding the video # avcodec_encode_video2 in loop.
* codec = avcodec_find_encoder(AV_CODEC_ID_H264);
if (( * codec) == NULL)
cout << "ERROR CAN NOT FIND ENCODER! ERROR! ERROR! AVCODEC_FIND_ENCODER FAILED !!!1 "
"" << endl;
if (!( * codec))
printf("Could not find encoder for ' %s ' ", avcodec_get_name(codec_id));
//create a new stream with the found codec inside oc(AVFormatContext).
st = avformat_new_stream(oc, * codec);
if (!st)
cout << " Cannot allocate stream " << endl;
//Setting the stream id.
//Since, there can be other streams in this AVFormatContext,
//we should find the first non used index. And this is oc->nb_streams(number of streams) - 1
st - > id = oc - > nb_streams - 1;
c = st - > codec;
//setting the stream's codec's properties.
c - > codec_id = codec_id;
c - > bit_rate = 4000000;
c - > width = chWidth;
c - > height = chHeight;
c - > time_base.den = fps;
//fps;
c - > time_base.num = 1;
c - > gop_size = 12;
c - > pix_fmt = AV_PIX_FMT_YUV420P;
if (c - > codec_id == AV_CODEC_ID_MPEG2VIDEO) {
/* just for testing, we also add B frames */
c - > max_b_frames = 2;
}
if (c - > codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c - > mb_decision = 2;
}
/* Some formats want stream headers to be separate. */
if (oc - > oformat - > flags & AVFMT_GLOBALHEADER)
c - > flags |= CODEC_FLAG_GLOBAL_HEADER;
//returning our lovely new brand stream.
return st;
}
int changeResolution(string source, int format) {
//Data members
struct SwsContext * sws_ctx = NULL;
AVFrame * pFrame = NULL;
AVFrame * outFrame = NULL;
AVPacket packet;
uint8_t * buffer = NULL;
uint8_t endcode[] = {
0,
0,
1,
0xb7
};
AVDictionary * optionsDict = NULL;
AVFormatContext * pFormatCtx = NULL;
AVFormatContext * outputContext = NULL;
AVCodecContext * pCodecCtx;
AVCodec * pCodec;
AVCodec * codec;
AVCodec * videoCodec;
AVOutputFormat * fmt;
AVStream * video_stream;
int changeWidth;
int changeHeight;
int frameFinished;
int numBytes;
int fps;
int lock = 0;
//Register all codecs & other important stuff. Vital!..
av_register_all();
//Selects the desired resolution.
if (format == 0) {
changeWidth = 320;
changeHeight = 180;
} else if (format == 1) {
changeWidth = 640;
changeHeight = 480;
} else if (format == 2) {
changeWidth = 960;
changeHeight = 540;
} else if (format == 3) {
changeWidth = 1024;
changeHeight = 768;
} else {
changeWidth = 1280;
changeHeight = 720;
}
// Open video file
int aaa;
aaa = avformat_open_input( & pFormatCtx, source.c_str(), NULL, NULL);
if (aaa != 0) {
cout << " cannot open input file \n" << endl;
cout << "aaa = " << aaa << endl;
return -1; // Couldn't open file
}
// Retrieve stream information
if (av_find_stream_info(pFormatCtx) < 0)
return -1; // Couldn't find stream information
//just checking duration casually for no reason
/*int64_t duration = pFormatCtx->duration;
cout << "the duration is " << duration << " " << endl;*/
//this writes the info about the file
av_dump_format(pFormatCtx, 0, 0, 0);
cin >> lock;
// Find the first video stream
int videoStream = -1;
int i;
for (i = 0; i < 3; i++)
if (pFormatCtx - > streams[i] - > codec - > codec_type == AVMEDIA_TYPE_VIDEO) {
videoStream = i;
cout << " lel \n ";
break;
}
if (videoStream == -1)
return -1; // Didn't find a video stream
// Get a pointer to the codec context for the video stream
pCodecCtx = pFormatCtx - > streams[videoStream] - > codec;
fps = pCodecCtx - > time_base.den;
//Find the decoder of the input file, for the video stream
pCodec = avcodec_find_decoder(pCodecCtx - > codec_id);
if (pCodec == NULL) {
fprintf(stderr, "Unsupported codec!\n");
return -1; // Codec not found
}
// Open codec, you must open it first, in order to use it.
if (avcodec_open2(pCodecCtx, pCodec, & optionsDict) < 0)
return -1; // Could not open codec
// Allocate video frame ( pFrame for taking the packets into, outFrame for processed frames to packet.)
pFrame = avcodec_alloc_frame();
outFrame = avcodec_alloc_frame();
i = 0;
int ret;
int video_frame_count = 0;
//Initiate the outFrame set the buffer & fill the properties
numBytes = avpicture_get_size(PIX_FMT_YUV420P, changeWidth, changeHeight);
buffer = (uint8_t * ) av_malloc(numBytes * sizeof(uint8_t));
avpicture_fill((AVPicture * ) outFrame, buffer, PIX_FMT_YUV420P, changeWidth, changeHeight);
int pp;
int frameNo = 0;
//allocate the outputContext, it will be the AVFormatContext of our output file.
//It will try to find the format by giving the file name.
avformat_alloc_output_context2( & outputContext, NULL, NULL, "myoutput.mp4");
//Cant find the file extension, using MPEG as default.
if (!outputContext) {
printf("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2( & outputContext, NULL, "mpeg", "myoutput.mp4");
}
//Still cant set file extension, exit.
if (!outputContext) {
return 1;
}
//set AVOutputFormat fmt to our outputContext's format.
fmt = outputContext - > oformat;
video_stream = NULL;
//If fmt has a valid codec_id, create a new video stream.
//This function will set the streams codec & codecs desired properties.
//Stream's codec will be passed to videoCodec for later usage.
if (fmt - > video_codec != AV_CODEC_ID_NONE)
video_stream = addStream(outputContext, & videoCodec, fmt - > video_codec, changeWidth, changeHeight, fps);
//open the video using videoCodec. by avcodec_open2() i.e open the codec.
if (video_stream)
open_video(outputContext, videoCodec, video_stream);
//Creating our new output file.
if (!(fmt - > flags & AVFMT_NOFILE)) {
ret = avio_open( & outputContext - > pb, "toBeStreamed.264", AVIO_FLAG_WRITE);
if (ret < 0) {
cout << " cant open file " << endl;
return 1;
}
}
//Writing the header of format context.
//ret = avformat_write_header(outputContext, NULL);
if (ret >= 0) {
cout << "writing header success !!!" << endl;
}
//Start reading packages from input file.
while (av_read_frame(pFormatCtx, & packet) >= 0) {
// Is this a packet from the video stream?
if (packet.stream_index == videoStream) {
// Decode video package into frames
ret = avcodec_decode_video2(pCodecCtx, pFrame, & frameFinished, & packet);
if (ret < 0) {
printf(" Error decoding frame !!..");
return ret;
}
if (frameFinished) {
printf("video_frame n:%d coded_n:%d\n", video_frame_count++, pFrame - > coded_picture_number);
}
av_free_packet( & packet);
//do stuff with frame, in this case we are changing the resolution.
static struct SwsContext * img_convert_ctx_in = NULL;
if (img_convert_ctx_in == NULL) {
img_convert_ctx_in = sws_getContext(pCodecCtx - > width,
pCodecCtx - > height,
pCodecCtx - > pix_fmt,
changeWidth,
changeHeight,
PIX_FMT_YUV420P,
SWS_BICUBIC,
NULL,
NULL,
NULL);
}
//scale the frames
sws_scale(img_convert_ctx_in,
pFrame - > data,
pFrame - > linesize,
0,
pCodecCtx - > height,
outFrame - > data,
outFrame - > linesize);
//initiate the pts value
if (frameNo == 0)
outFrame - > pts = 0;
//calculate the pts value & set it.
outFrame - > pts += av_rescale_q(1, video_stream - > codec - > time_base, video_stream - > time_base);
//encode frames into packages. Package passed in #packet.
if (avcodec_encode_video2(outputContext - > streams[0] - > codec, & packet, outFrame, & pp) < 0)
cout << "Encoding frames into packages, failed. " << endl;
frameNo++;
//write the packages into file, resulting in creating a video file.
av_interleaved_write_frame(outputContext, & packet);
}
}
av_free_packet( & packet);
//av_write_trailer(outputContext);
avio_close(outputContext - > pb);
// Free the RGB image
av_free(buffer);
av_free(outFrame);
// Free the YUV frame
av_free(pFrame);
// Close the codec
avcodec_close(video_stream - > codec);
avcodec_close(pCodecCtx);
// Close the video file
avformat_close_input( & pFormatCtx);
return 0;
}
at the end of the process I get my desired file with desired codec & container & resolution.
My problem is, in a part of our project I need to get elementary video streams IN file. Such as example.264. However I can not add a stream without creating an AVFormatContext. I can not create an AVFormatContext because 264 files does not have a container,they are just raw video?, as far as I know.
I have tried the way in decoding_encoding.c which uses fwrite. However that example was for mpeg-2 codec and when I try to adapt that code to H264/AVC codec, I got "floating point division by zero" error from mediainfo and moreover, some of the properties of the video was not showing (such as FPS & playtime & quality factor). I think it has to do with the "endcode" the example adds at the end of the code. It is for mpeg-2. ( uint8_t endcode[] = { 0, 0, 1, 0xb7 }; )
Anyway, I would love to get a startpoint for this task. I have managed to come this far by using internet resources ( quite few & outdated for ffmpeg) but now I'm stuck a little.

using libvo-aacenc with libav in C++ program

I'm making a program that records and encodes this recording to aac.I made a function for linux and libfaac that does this job. Now I need to make this program for windows.I know i need to use libvo-aacenc and i don't know what to change in my code.Could you tell me what should I do? Here's my code.
static void encodeAac( const char *infilename,const char *filename)
{
AVCodec *codec;
AVCodecContext *c = NULL;
int frame_size, i, j, out_size, outbuf_size;
FILE *f,*fin;
SAMPLE *samples;
float t, tincr;
uint8_t *outbuf;
avcodec_register_all(); //Load all codecs
av_register_all();
codec = avcodec_find_encoder(AV_CODEC_ID_AAC); //Search for AAC codec
if (!codec) {
error("Codec not found");
}
c = avcodec_alloc_context();
c->bit_rate = 64000;
c->sample_fmt = AV_SAMPLE_FMT_S16;
c->sample_rate = SAMPLE_RATE;
c->channels = NUM_CHANNELS;
c->time_base.num= 1;
c->time_base.den= SAMPLE_RATE;
c->profile= FF_PROFILE_AAC_MAIN;
if (avcodec_open(c, codec) < 0) {
error(add("","",avcodec_open(c, codec)).c_str());
exit(1);
}
f = fopen(filename, "wb");
fin=fopen(infilename,"rb");
if (!fin) {
error("could not open temporary file");
}
if (!f) {
error("could not open output file");
}
std::cout << c->frame_size*c->channels << std::endl;
samples = new SAMPLE[c->frame_size*c->channels];
outbuf = new uint8_t[FRAMES_PER_BUFFER * NUM_CHANNELS];
while(fread(samples,sizeof(SAMPLE),c->frame_size*c->channels,fin)){
out_size=avcodec_encode_audio(c,outbuf,FRAMES_PER_BUFFER * NUM_CHANNELS,samples);
fwrite(outbuf,sizeof(uint8_t),out_size,f);
}
for(int i=1;i<=4;i++){ //For buffer flushing
out_size=avcodec_encode_audio(c,outbuf,FRAMES_PER_BUFFER * NUM_CHANNELS,NULL);
fwrite(outbuf,sizeof(uint8_t),out_size,f);
}
fclose(f);
delete outbuf;
delete samples;
avcodec_close(c);
av_free(c);
}