I am using libx264 compiled from source. It was configured to get both .dll and .lib by this command
./configure --disable-cli --enable-shared --extra-ldflags=-Wl,--output-def=libx264.def`
I am using the libx264 API in my screen-sharing program with the preset - "veryfast", tune - "zerolatency", profile - "high" and also the following settings.
param.i_csp = X264_CSP_BGRA;
param.i_threads = 1;
param.i_width = width;
param.i_height = height;
param.i_fps_num = fps;
param.i_fps_den = 1;
param.rc.i_bitrate = bitrate;
param.rc.i_rc_method = X264_RC_ABR;
param.rc.b_filler = true;
param.rc.f_rf_constant = (float)0;
param.rc.i_vbv_max_bitrate = param.rc.i_bitrate;
param.rc.i_vbv_buffer_size = param.rc.i_bitrate;
param.b_repeat_headers = 0;
param.b_annexb = 1;
For these settings the program works fine. I specified it as single threaded by setting param.i_threads = 1.
If this is removed, x264 defaults to using multiple threads and sets param.i_threads as 1.5x of number of cores in the CPU automatically. This will give faster performance than running in single thread.
But when I remove the param.i_threads = 1 to make it multi-threaded, the generated output is fully grey. I cannot see any output when I view the live stream with VLC or some times I can view a weird output.
I am using this bitmap image as an example (https://imgur.com/a/l8LCd1l). Only this same image is being encoded multiple times. When it is saved into .h264 video, it is viewable clearly. But when the encoded payload is sent through rtmp, the live stream produces very bad and weird output (or sometimes no output). This is the weird output which im seeing most of the time for this image: https://imgur.com/a/VdyX1Zm
This is the full example code in which I am both streaming and writing video file of the same picture. This is using the srs librtmp library. There is no error but the stream has weird output.
In this code if you set add param.i_threads = 1; then only the output stream will be viewable. The problem is that it should be viewable in both single-threaded and multi-threaded encoding.
#include <iostream>
#include <stdio.h>
#include <sstream>
#include <x264.h>
#include "srs_librtmp.h"
#pragma comment(lib, "C:/Softwares/x264/libx264.lib")
using namespace std;
int check_ret(int ret);
int main()
{
int dts = 0;
x264_param_t param;
x264_t* h;
x264_nal_t* nals;
int i_nal;
int pts = 0;
int i_frame_size;
x264_picture_t picIn;
x264_picture_t picOut;
x264_param_default_preset(¶m, "veryfast", "zerolatency");
//x264 settings
param.i_csp = X264_CSP_BGRA;
param.i_width = 1920;
param.i_height = 1080;
param.i_fps_num = 30;
param.i_fps_den = 1;
param.rc.i_bitrate = 2500;
param.rc.i_rc_method = X264_RC_ABR;
param.rc.b_filler = true;
param.rc.f_rf_constant = (float)0;
param.rc.i_vbv_max_bitrate = param.rc.i_bitrate;
param.rc.i_vbv_buffer_size = param.rc.i_bitrate;
param.b_repeat_headers = 0;
param.b_annexb = 1;
x264_param_apply_profile(¶m, "high");
h = x264_encoder_open(¶m);
//allocate picture
x264_picture_alloc(&picIn, param.i_csp, param.i_width, param.i_height);
//picture settings
picIn.img.i_plane = 1;
picIn.img.i_stride[0] = 4 * param.i_width;
picIn.i_type = X264_TYPE_AUTO;
int header_size = x264_encoder_headers(h, &nals, &i_nal);
FILE* fptr;
fopen_s(&fptr, "example1.h264", "wb");
// write sps and pps in the video file
fwrite(nals->p_payload, header_size, 1, fptr);
int size = 1920 * 1080 * 4;
char* bmp = new char[size];
FILE* bitptr;
errno_t err = fopen_s(&bitptr, "flower.bmp", "rb");
fseek(bitptr, 54, SEEK_SET);
fread(bmp, size, 1, bitptr);
fclose(bitptr);
srs_rtmp_t rtmp = srs_rtmp_create("127.0.0.1:1935/live/test");
if (srs_rtmp_handshake(rtmp) != 0)
{
std::cout << "Simple handshake failed.";
return -1;
}
std::cout << "Handshake completed successfully.\n";
if (srs_rtmp_connect_app(rtmp) != 0) {
std::cout << "Connecting to host failed.";
return -1;
}
std::cout << "Connected to host successfully.\n";
if (srs_rtmp_publish_stream(rtmp) != 0) {
std::cout << "Publish signal failed.";
}
std::cout << "Publish signal success\n";
// write sps and pps in the live stream
int ret = srs_h264_write_raw_frames(rtmp, reinterpret_cast<char*>(nals->p_payload), header_size, 0, 0);
ret = check_ret(ret);
if (!ret)
return -1;
std::cout << "SPS and PPS sent.\n";
// main loop
std::cout << "Now streaming and encoding\n";
int i = 1800;
while (i--)
{
picIn.img.plane[0] = reinterpret_cast<uint8_t*>(bmp);
picIn.i_pts = pts++;
i_frame_size = x264_encoder_encode(h, &nals, &i_nal, &picIn, &picOut);
if (i_frame_size)
{
for (int j = 0; j < i_nal; j++)
{
x264_nal_t* nal = nals + j;
// write data in the video file
fwrite(nal->p_payload, nal->i_payload, 1, fptr);
// write data in the live stream
ret = srs_h264_write_raw_frames(rtmp, reinterpret_cast<char*>(nal->p_payload), nal->i_payload, dts, dts);
ret = check_ret(ret);
if (!ret)
{
return -1;
}
}
}
else
{
std::cout << "i_frame_size = 0 (encoder failed)\n";
}
dts += 33;
}
while (x264_encoder_delayed_frames(h))
{
i_frame_size = x264_encoder_encode(h, &nals, &i_nal, NULL, &picOut);
if (i_frame_size)
{
fwrite(nals->p_payload, i_frame_size, 1, fptr);
}
}
std::cout << "\nAll done\n";
std::cout << "Output video is example1.h264 and it is viewable in VLC";
return 0;
}
int check_ret(int ret)
{
if (ret != 0) {
if (srs_h264_is_dvbsp_error(ret)) {
srs_human_trace("ignoring drop video error, code=%d", ret);
}
else if (srs_h264_is_duplicated_sps_error(ret)) {
srs_human_trace("ignoring duplicated sps, code=%d", ret);
}
else if (srs_h264_is_duplicated_pps_error(ret)) {
srs_human_trace("ignoring duplicated pps, code=%d", ret);
}
else {
srs_human_trace("sending h264 raw data failed. ret=%d", ret);
return 0;
}
}
return 1;
}
If you would like to download the original flower.bmp file, here is the link: https://gofile.io/d/w2kX56
This error can be reproduced in any other bmp file also.
Please tell me what is causing this problem when multi-threading is enabled. Am I setting wrong values? Is the code in which I am streaming the encoded data wrong?
Related
I am trying to decode a video stream from the browser using the ffmpeg API. The stream is produced by the webcam and recorded with MediaRecorder as webm format. What I ultimately need is a vector of opencv cv::Mat objects for further processing.
I have written a C++ webserver using the uWebsocket library. The video stream is sent via websocket from the browser to the server once per second. On the server, I append the received data to my custom buffer and decode it with the ffmpeg API.
If I just save the data on the disk and later I play it with a media player, it works fine. So, whatever the browser sends is a valid video.
I do not think that I correctly understand how should the custom IO behave with network streaming as nothing seems to be working.
The custom buffer:
struct Buffer
{
std::vector<uint8_t> data;
int currentPos = 0;
};
The readAVBuffer method for custom IO
int MediaDecoder::readAVBuffer(void* opaque, uint8_t* buf, int buf_size)
{
MediaDecoder::Buffer* mbuf = (MediaDecoder::Buffer*)opaque;
int count = 0;
for(int i=0;i<buf_size;i++)
{
int index = i + mbuf->currentPos;
if(index >= (int)mbuf->data.size())
{
break;
}
count++;
buf[i] = mbuf->data.at(index);
}
if(count > 0) mbuf->currentPos+=count;
std::cout << "read : "<<count<<" "<<mbuf->currentPos<<", buff size:"<<mbuf->data.size() << std::endl;
if(count <= 0) return AVERROR(EAGAIN); //is this error that should be returned? It cannot be EOF since we're not done yet, most likely
return count;
}
The big decode method, that's supposed to return whatever frames it could read
std::vector<cv::Mat> MediaDecoder::decode(const char* data, size_t length)
{
std::vector<cv::Mat> frames;
//add data to the buffer
for(size_t i=0;i<length;i++) {
buf.data.push_back(data[i]);
}
//do not invoke the decoders until we have 1MB of data
if(((buf.data.size() - buf.currentPos) < 1*1024*1024) && !initializedCodecs) return frames;
std::cout << "decoding data length "<<length<<std::endl;
if(!initializedCodecs) //initialize ffmpeg objects. Custom I/O, format, decoder, etc.
{
//these are just members of the class
avioCtxPtr = std::unique_ptr<AVIOContext,avio_context_deleter>(
avio_alloc_context((uint8_t*)av_malloc(4096),4096,0,&buf,&readAVBuffer,nullptr,nullptr),
avio_context_deleter());
if(!avioCtxPtr)
{
std::cerr << "Could not create IO buffer" << std::endl;
return frames;
}
fmt_ctx = std::unique_ptr<AVFormatContext,avformat_context_deleter>(avformat_alloc_context(),
avformat_context_deleter());
fmt_ctx->pb = avioCtxPtr.get();
fmt_ctx->flags |= AVFMT_FLAG_CUSTOM_IO ;
//fmt_ctx->max_analyze_duration = 2 * AV_TIME_BASE; // read 2 seconds of data
{
AVFormatContext *fmtCtxRaw = fmt_ctx.get();
if (avformat_open_input(&fmtCtxRaw, "", nullptr, nullptr) < 0) {
std::cerr << "Could not open movie" << std::endl;
return frames;
}
}
if (avformat_find_stream_info(fmt_ctx.get(), nullptr) < 0) {
std::cerr << "Could not find stream information" << std::endl;
return frames;
}
if((video_stream_idx = av_find_best_stream(fmt_ctx.get(), AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0)) < 0)
{
std::cerr << "Could not find video stream" << std::endl;
return frames;
}
AVStream *video_stream = fmt_ctx->streams[video_stream_idx];
AVCodec *dec = avcodec_find_decoder(video_stream->codecpar->codec_id);
video_dec_ctx = std::unique_ptr<AVCodecContext,avcodec_context_deleter> (avcodec_alloc_context3(dec),
avcodec_context_deleter());
if (!video_dec_ctx)
{
std::cerr << "Failed to allocate the video codec context" << std::endl;
return frames;
}
avcodec_parameters_to_context(video_dec_ctx.get(),video_stream->codecpar);
video_dec_ctx->thread_count = 1;
/* video_dec_ctx->max_b_frames = 0;
video_dec_ctx->frame_skip_threshold = 10;*/
AVDictionary *opts = nullptr;
av_dict_set(&opts, "refcounted_frames", "1", 0);
av_dict_set(&opts, "deadline", "1", 0);
av_dict_set(&opts, "auto-alt-ref", "0", 0);
av_dict_set(&opts, "lag-in-frames", "1", 0);
av_dict_set(&opts, "rc_lookahead", "1", 0);
av_dict_set(&opts, "drop_frame", "1", 0);
av_dict_set(&opts, "error-resilient", "1", 0);
int width = video_dec_ctx->width;
videoHeight = video_dec_ctx->height;
if(avcodec_open2(video_dec_ctx.get(), dec, &opts) < 0)
{
std::cerr << "Failed to open the video codec context" << std::endl;
return frames;
}
AVPixelFormat pFormat = AV_PIX_FMT_BGR24;
img_convert_ctx = std::unique_ptr<SwsContext,swscontext_deleter>(sws_getContext(width, videoHeight,
video_dec_ctx->pix_fmt, width, videoHeight, pFormat,
SWS_BICUBIC, nullptr, nullptr,nullptr),swscontext_deleter());
frame = std::unique_ptr<AVFrame,avframe_deleter>(av_frame_alloc(),avframe_deleter());
frameRGB = std::unique_ptr<AVFrame,avframe_deleter>(av_frame_alloc(),avframe_deleter());
int numBytes = av_image_get_buffer_size(pFormat, width, videoHeight,32 /*https://stackoverflow.com/questions/35678041/what-is-linesize-alignment-meaning*/);
std::unique_ptr<uint8_t,avbuffer_deleter> imageBuffer((uint8_t *) av_malloc(numBytes*sizeof(uint8_t)),avbuffer_deleter());
av_image_fill_arrays(frameRGB->data,frameRGB->linesize,imageBuffer.get(),pFormat,width,videoHeight,32);
frameRGB->width = width;
frameRGB->height = videoHeight;
initializedCodecs = true;
}
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = nullptr;
pkt.size = 0;
int read_frame_return = 0;
while ( (read_frame_return=av_read_frame(fmt_ctx.get(), &pkt)) >= 0)
{
readFrame(&frames,&pkt,video_dec_ctx.get(),frame.get(),img_convert_ctx.get(),
videoHeight,frameRGB.get());
//if(cancelled) break;
}
avioCtxPtr->eof_reached = 0;
avioCtxPtr->error = 0;
//flush
// readFrame(frames.get(),nullptr,video_dec_ctx.get(),frame.get(),
// img_convert_ctx.get(),videoHeight,frameRGB.get());
avioCtxPtr->eof_reached = 0;
avioCtxPtr->error = 0;
if(frames->size() <= 0)
{
std::cout << "buffer pos: "<<buf.currentPos<<", buff size:"<<buf.data.size()
<<",read_frame_return:"<<read_frame_return<< std::endl;
}
return frames;
}
What I would expect to happen would be for a continuous extraction of cv::Mat frames as I feed it more and more data. What actually happens is that after the the buffer is fully read I see:
[matroska,webm # 0x507b450] Read error at pos. 1278266 (0x13813a)
[matroska,webm # 0x507b450] Seek to desired resync point failed. Seeking to earliest point available instead.
And then no more bytes are read from the buffer even if later I increase the size of it.
There is something terribly wrong I'm doing here and I don't understand what.
What I ended up doing was to do the reading of the incoming data and actual decoding in a different thread. The read method, however, will just block if there are no more bytes available, waiting until anything is coming.
When new bytes are arriving, they're added to the buffer and the conditional_variable signals the waiting thread to wake up and start reading data again from the buffer.
It works well enough.
In a project I'm working on, I'd like to use stb_vorbis to stream audio from an ogg file. However, after implementing the processes to do this, I found the audio was crackly. I feel the problem may be similar to this question, but I can't see where a problem could be.
Here is my code:
#include <SDL2/SDL.h>
#include <stb/vorbis.h>
#include <iostream>
void sdlAudioCallback(void* userData, Uint8* stream, int len)
{
stb_vorbis* myVorbis = (stb_vorbis*) userData;
SDL_memset(stream, 0, len);
stb_vorbis_get_frame_short_interleaved(myVorbis, 2, (short*) stream, len/sizeof(short));
}
int main()
{
if (SDL_Init(SDL_INIT_AUDIO) != 0)
return -1;
int error = 0;
stb_vorbis_alloc* alloc;
stb_vorbis* vorbis = stb_vorbis_open_filename("res/thingy.ogg", &error, alloc);
if (error != 0)
return -2;
stb_vorbis_info info = stb_vorbis_get_info(vorbis);
SDL_AudioSpec spec;
spec.freq = info.sample_rate;
spec.format = AUDIO_S16;
spec.channels = 2;
spec.samples = 1024;
spec.callback = sdlAudioCallback;
spec.userdata = vorbis;
if (SDL_OpenAudio(&spec, NULL) < 0) {
std::cout << "failed to open audio device: " << SDL_GetError() << std::endl;
SDL_Quit();
return -3;
}
SDL_PauseAudio(0);
SDL_Delay(5000);
}
More information:
thingy.ogg is from Sean Barrett's samples
building with g++ on an OSX machine
I want to decode H.264 video from a collection of MPEG-2 Transport Stream packets but I am not clear what to pass to avcodec_decode_video2
The documentation says to pass "the input AVPacket containing the input buffer."
But what should be in the input buffer?
A PES packet will be spread across the payload portion of several TS packets, with NALU(s) inside the PES. So pass a TS fragment? The entire PES? PES payload only?
This Sample Code mentions:
BUT some other codecs (msmpeg4, mpeg4) are inherently frame based, so
you must call them with all the data for one frame exactly. You must
also initialize 'width' and 'height' before initializing them.
But I can find no info on what "all the data" means...
Passing a fragment of a TS packet payload is not working:
AVPacket avDecPkt;
av_init_packet(&avDecPkt);
avDecPkt.data = inbuf_ptr;
avDecPkt.size = esBufSize;
len = avcodec_decode_video2(mpDecoderContext, mpFrameDec, &got_picture, &avDecPkt);
if (len < 0)
{
printf(" TS PKT #%.0f. Error decoding frame #%04d [rc=%d '%s']\n",
tsPacket.pktNum, mDecodedFrameNum, len, av_make_error_string(errMsg, 128, len));
return;
}
output
[h264 # 0x81cd2a0] no frame!
TS PKT #2973. Error decoding frame #0001 [rc=-1094995529 'Invalid data found when processing input']
EDIT
Using the excellent hits from WLGfx, I made this simple program to try decoding TS packets. As input, I prepared a file containing only TS packets from the Video PID.
It feels close but I don't know how to set up the FormatContext. The code below segfaults at av_read_frame() (and internally at ret = s->iformat->read_packet(s, pkt)). s->iformat is zero.
Suggestions?
EDIT II - Sorry, for got post source code **
**EDIT III - Sample code updated to simulate reading TS PKT Queue
/*
* Test program for video decoder
*/
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
extern "C" {
#ifdef __cplusplus
#define __STDC_CONSTANT_MACROS
#ifdef _STDINT_H
#undef _STDINT_H
#endif
#include <stdint.h>
#endif
}
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavutil/imgutils.h"
#include "libavutil/opt.h"
}
class VideoDecoder
{
public:
VideoDecoder();
bool rcvTsPacket(AVPacket &inTsPacket);
private:
AVCodec *mpDecoder;
AVCodecContext *mpDecoderContext;
AVFrame *mpDecodedFrame;
AVFormatContext *mpFmtContext;
};
VideoDecoder::VideoDecoder()
{
av_register_all();
// FORMAT CONTEXT SETUP
mpFmtContext = avformat_alloc_context();
mpFmtContext->flags = AVFMT_NOFILE;
// ????? WHAT ELSE ???? //
// DECODER SETUP
mpDecoder = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!mpDecoder)
{
printf("Could not load decoder\n");
exit(11);
}
mpDecoderContext = avcodec_alloc_context3(NULL);
if (avcodec_open2(mpDecoderContext, mpDecoder, NULL) < 0)
{
printf("Cannot open decoder context\n");
exit(1);
}
mpDecodedFrame = av_frame_alloc();
}
bool
VideoDecoder::rcvTsPacket(AVPacket &inTsPkt)
{
bool ret = true;
if ((av_read_frame(mpFmtContext, &inTsPkt)) < 0)
{
printf("Error in av_read_frame()\n");
ret = false;
}
else
{
// success. Decode the TS packet
int got;
int len = avcodec_decode_video2(mpDecoderContext, mpDecodedFrame, &got, &inTsPkt);
if (len < 0)
ret = false;
if (got)
printf("GOT A DECODED FRAME\n");
}
return ret;
}
int
main(int argc, char **argv)
{
if (argc != 2)
{
printf("Usage: %s tsInFile\n", argv[0]);
exit(1);
}
FILE *tsInFile = fopen(argv[1], "r");
if (!tsInFile)
{
perror("Could not open TS input file");
exit(2);
}
unsigned int tsPktNum = 0;
uint8_t tsBuffer[256];
AVPacket tsPkt;
av_init_packet(&tsPkt);
VideoDecoder vDecoder;
while (!feof(tsInFile))
{
tsPktNum++;
tsPkt.size = 188;
tsPkt.data = tsBuffer;
fread(tsPkt.data, 188, 1, tsInFile);
vDecoder.rcvTsPacket(tsPkt);
}
}
I've got some code snippets that might help you out as I've been working with MPEG-TS also.
Starting with my packet thread which checks each packet against the stream ID's which I've already found and got the codec contexts:
void *FFMPEG::thread_packet_function(void *arg) {
FFMPEG *ffmpeg = (FFMPEG*)arg;
for (int c = 0; c < MAX_PACKETS; c++)
ffmpeg->free_packets[c] = &ffmpeg->packet_list[c];
ffmpeg->packet_pos = MAX_PACKETS;
Audio.start_decoding();
Video.start_decoding();
Subtitle.start_decoding();
while (!ffmpeg->thread_quit) {
if (ffmpeg->packet_pos != 0 &&
Audio.okay_add_packet() &&
Video.okay_add_packet() &&
Subtitle.okay_add_packet()) {
pthread_mutex_lock(&ffmpeg->packet_mutex); // get free packet
AVPacket *pkt = ffmpeg->free_packets[--ffmpeg->packet_pos]; // pre decrement
pthread_mutex_unlock(&ffmpeg->packet_mutex);
if ((av_read_frame(ffmpeg->fContext, pkt)) >= 0) { // success
int id = pkt->stream_index;
if (id == ffmpeg->aud_stream.stream_id) Audio.add_packet(pkt);
else if (id == ffmpeg->vid_stream.stream_id) Video.add_packet(pkt);
else if (id == ffmpeg->sub_stream.stream_id) Subtitle.add_packet(pkt);
else { // unknown packet
av_packet_unref(pkt);
pthread_mutex_lock(&ffmpeg->packet_mutex); // put packet back
ffmpeg->free_packets[ffmpeg->packet_pos++] = pkt;
pthread_mutex_unlock(&ffmpeg->packet_mutex);
//LOGI("Dumping unknown packet, id %d", id);
}
} else {
av_packet_unref(pkt);
pthread_mutex_lock(&ffmpeg->packet_mutex); // put packet back
ffmpeg->free_packets[ffmpeg->packet_pos++] = pkt;
pthread_mutex_unlock(&ffmpeg->packet_mutex);
//LOGI("No packet read");
}
} else { // buffers full so yield
//LOGI("Packet reader on hold: Audio-%d, Video-%d, Subtitle-%d",
// Audio.packet_pos, Video.packet_pos, Subtitle.packet_pos);
usleep(1000);
//sched_yield();
}
}
return 0;
}
Each decoder for audio, video and subtitles have their own threads which receive the packets from the above thread in ring buffers. I've had to separate the decoders into their own threads because CPU usage was increasing when I started using the deinterlace filter.
My video decoder reads the packets from the buffers and when it has finished with the packet sends it back to be unref'd and can be used again. Balancing the packet buffers doesn't take that much time once everything is running.
Here's the snipped from my video decoder:
void *VideoManager::decoder(void *arg) {
LOGI("Video decoder started");
VideoManager *mgr = (VideoManager *)arg;
while (!ffmpeg.thread_quit) {
pthread_mutex_lock(&mgr->packet_mutex);
if (mgr->packet_pos != 0) {
// fetch first packet to decode
AVPacket *pkt = mgr->packets[0];
// shift list down one
for (int c = 1; c < mgr->packet_pos; c++) {
mgr->packets[c-1] = mgr->packets[c];
}
mgr->packet_pos--;
pthread_mutex_unlock(&mgr->packet_mutex); // finished with packets array
int got;
AVFrame *frame = ffmpeg.vid_stream.frame;
avcodec_decode_video2(ffmpeg.vid_stream.context, frame, &got, pkt);
ffmpeg.finished_with_packet(pkt);
if (got) {
#ifdef INTERLACE_ALL
if (!frame->interlaced_frame) mgr->add_av_frame(frame, 0);
else {
if (!mgr->filter_initialised) mgr->init_filter_graph(frame);
av_buffersrc_add_frame_flags(mgr->filter_src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
int c = 0;
while (true) {
AVFrame *filter_frame = ffmpeg.vid_stream.filter_frame;
int result = av_buffersink_get_frame(mgr->filter_sink_ctx, filter_frame);
if (result == AVERROR(EAGAIN) ||
result == AVERROR(AVERROR_EOF) ||
result < 0)
break;
mgr->add_av_frame(filter_frame, c++);
av_frame_unref(filter_frame);
}
//LOGI("Interlaced %d frames, decode %d, playback %d", c, mgr->decode_pos, mgr->playback_pos);
}
#elif defined(INTERLACE_HALF)
if (!frame->interlaced_frame) mgr->add_av_frame(frame, 0);
else {
if (!mgr->filter_initialised) mgr->init_filter_graph(frame);
av_buffersrc_add_frame_flags(mgr->filter_src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
int c = 0;
while (true) {
AVFrame *filter_frame = ffmpeg.vid_stream.filter_frame;
int result = av_buffersink_get_frame(mgr->filter_sink_ctx, filter_frame);
if (result == AVERROR(EAGAIN) ||
result == AVERROR(AVERROR_EOF) ||
result < 0)
break;
mgr->add_av_frame(filter_frame, c++);
av_frame_unref(filter_frame);
}
//LOGI("Interlaced %d frames, decode %d, playback %d", c, mgr->decode_pos, mgr->playback_pos);
}
#else
mgr->add_av_frame(frame, 0);
#endif
}
//LOGI("decoded video packet");
} else {
pthread_mutex_unlock(&mgr->packet_mutex);
}
}
LOGI("Video decoder ended");
}
As you can see, I'm using a mutex when passing packets back and forth.
Once a frame has been got I just copy the YUV buffers from the frame for later use into another buffer list. I don't convert the YUV, I use a shader which converts the YUV to RGB on the GPU.
The next snippet adds my decoded frame to my buffer list. This may help understand how to deal with the data.
void VideoManager::add_av_frame(AVFrame *frame, int field_num) {
int y_linesize = frame->linesize[0];
int u_linesize = frame->linesize[1];
int hgt = frame->height;
int y_buffsize = y_linesize * hgt;
int u_buffsize = u_linesize * hgt / 2;
int buffsize = y_buffsize + u_buffsize + u_buffsize;
VideoBuffer *buffer = &buffers[decode_pos];
if (ffmpeg.is_network && playback_pos == decode_pos) { // patched 25/10/16 wlgfx
buffer->used = false;
if (!buffer->data) buffer->data = (char*)mem.alloc(buffsize);
if (!buffer->data) {
LOGI("Dropped frame, allocation error");
return;
}
} else if (playback_pos == decode_pos) {
LOGI("Dropped frame, ran out of decoder frame buffers");
return;
} else if (!buffer->data) {
buffer->data = (char*)mem.alloc(buffsize);
if (!buffer->data) {
LOGI("Dropped frame, allocation error.");
return;
}
}
buffer->y_frame = buffer->data;
buffer->u_frame = buffer->y_frame + y_buffsize;
buffer->v_frame = buffer->y_frame + y_buffsize + u_buffsize;
buffer->wid = frame->width;
buffer->hgt = hgt;
buffer->y_linesize = y_linesize;
buffer->u_linesize = u_linesize;
int64_t pts = av_frame_get_best_effort_timestamp(frame);
buffer->pts = pts;
buffer->buffer_size = buffsize;
double field_add = av_q2d(ffmpeg.vid_stream.context->time_base) * field_num;
buffer->frame_time = av_q2d(ts_stream) * pts + field_add;
memcpy(buffer->y_frame, frame->data[0], (size_t) (buffer->y_linesize * buffer->hgt));
memcpy(buffer->u_frame, frame->data[1], (size_t) (buffer->u_linesize * buffer->hgt / 2));
memcpy(buffer->v_frame, frame->data[2], (size_t) (buffer->u_linesize * buffer->hgt / 2));
buffer->used = true;
decode_pos = (++decode_pos) % MAX_VID_BUFFERS;
//if (field_num == 0) LOGI("Video %.2f, %d - %d",
// buffer->frame_time - Audio.pts_start_time, decode_pos, playback_pos);
}
If there's anything else that I may be able to help with just give me a shout. :-)
EDIT:
The snippet how I open my video stream context which automatically determines the codec, whether it is h264, mpeg2, or another:
void FFMPEG::open_video_stream() {
vid_stream.stream_id = av_find_best_stream(fContext, AVMEDIA_TYPE_VIDEO,
-1, -1, &vid_stream.codec, 0);
if (vid_stream.stream_id == -1) return;
vid_stream.context = fContext->streams[vid_stream.stream_id]->codec;
if (!vid_stream.codec || avcodec_open2(vid_stream.context,
vid_stream.codec, NULL) < 0) {
vid_stream.stream_id = -1;
return;
}
vid_stream.frame = av_frame_alloc();
vid_stream.filter_frame = av_frame_alloc();
}
EDIT2:
This is how I've opened the input stream, whether it be file or URL. The AVFormatContext is the main context for the stream.
bool FFMPEG::start_stream(char *url_, float xtrim, float ytrim, int gain) {
aud_stream.stream_id = -1;
vid_stream.stream_id = -1;
sub_stream.stream_id = -1;
this->url = url_;
this->xtrim = xtrim;
this->ytrim = ytrim;
Audio.volume = gain;
Audio.init();
Video.init();
fContext = avformat_alloc_context();
if ((avformat_open_input(&fContext, url_, NULL, NULL)) != 0) {
stop_stream();
return false;
}
if ((avformat_find_stream_info(fContext, NULL)) < 0) {
stop_stream();
return false;
}
// network stream will overwrite packets if buffer is full
is_network = url.substr(0, 4) == "udp:" ||
url.substr(0, 4) == "rtp:" ||
url.substr(0, 5) == "rtsp:" ||
url.substr(0, 5) == "http:"; // added for wifi broadcasting ability
// determine if stream is audio only
is_mp3 = url.substr(url.size() - 4) == ".mp3";
LOGI("Stream: %s", url_);
if (!open_audio_stream()) {
stop_stream();
return false;
}
if (is_mp3) {
vid_stream.stream_id = -1;
sub_stream.stream_id = -1;
} else {
open_video_stream();
open_subtitle_stream();
if (vid_stream.stream_id == -1) { // switch to audio only
close_subtitle_stream();
is_mp3 = true;
}
}
LOGI("Audio: %d, Video: %d, Subtitle: %d",
aud_stream.stream_id,
vid_stream.stream_id,
sub_stream.stream_id);
if (aud_stream.stream_id != -1) {
LOGD("Audio stream time_base {%d, %d}",
aud_stream.context->time_base.num,
aud_stream.context->time_base.den);
}
if (vid_stream.stream_id != -1) {
LOGD("Video stream time_base {%d, %d}",
vid_stream.context->time_base.num,
vid_stream.context->time_base.den);
}
LOGI("Starting packet and decode threads");
thread_quit = false;
pthread_create(&thread_packet, NULL, &FFMPEG::thread_packet_function, this);
Display.set_overlay_timout(3.0);
return true;
}
EDIT: (constructing an AVPacket)
Construct an AVPacket to send to the decoder...
AVPacket packet;
av_init_packet(&packet);
packet.data = myTSpacketdata; // pointer to the TS packet
packet.size = 188;
You should be able to reuse the packet. And it might need unref'ing.
You must first use the avcodec library to get the compressed frames out of the file. Then you can decode them using avcodec_decode_video2. look at this tutorial http://dranger.com/ffmpeg/
im reading a udp-mjpeg-stream with the ffmpeg-API. When i read and display the Stream with an ARM-Processor i have 2 Problems:
1- The Applikation is too slow and there is a big delay between network cam and displayed video.
2- the memory usage increases every time when i call the function av_read_frame().
The Source code
const char *cam1_url = "udp://192.168.1.1:1234";
AVCodec *pCodec;
AVFrame *pFrame, *pFrameRGB;
AVCodecContext *pCodecCon;
AVDictionary *pUdpStreamOptions = NULL;
AVInputFormat *pMjpegFormat = av_find_input_format("mjpeg");
av_dict_set(&pUdpStreamOptions, "fifo_size", "5000000", 0);
av_register_all();
avdevice_register_all();
avcodec_register_all();
avformat_network_init();
AVFormatContext *pFormatCont = avformat_alloc_context();
if(avformat_open_input(&pFormatCont,cam1_url,pMjpegFormat,&pUdpStreamOptions) < 0)
{
cout << "!! Error !! - avformat_open_input(): failed to open input URL" << endl;
}
if(avformat_find_stream_info(pFormatCont,NULL) < 0)
{
cout << "!! Error !! - avformat_find_stream_info(), Failed to retrieve stream info" << endl;
}
av_dump_format(pFormatCont, 0, cam1_url, 0);
int videoStream;
for(int i=0; i< pFormatCont->nb_streams; i++)
{
if(pFormatCont->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO)
{
videoStream=i;
cout << " videoStream = " << videoStream << endl;
}
}
pCodecCon = pFormatCont->streams[videoStream]->codec;
pCodec = avcodec_find_decoder(pCodecCon->codec_id);
if(NULL == pCodec)
{
cout << "couldnt find codec" << endl;
return EXIT_FAILURE;
}
if(avcodec_open2(pCodecCon,pCodec,NULL) < 0)
{
cout << "!! Error !! - in avcodec_open2()" << endl;
return EXIT_FAILURE;
}
uint8_t *frameBuffer;
int numRxBytes = 0;
AVPixelFormat pFormat =AV_PIX_FMT_BGR24;
int width_rgb = (int)((float)pCodecCon->width);
int height_rgb = (int)((float)pCodecCon->height);
numRxBytes = avpicture_get_size(pFormat,width_rgb,height_rgb);
frameBuffer = (uint8_t *) av_malloc(numRxBytes*sizeof(uint8_t));
avpicture_fill((AVPicture *) pFrameRGB, frameBuffer, pFormat,width_rgb,height_rgb);
AVPacket rx_pkt; // received packet
int frameFinished = 0;
struct SwsContext *imgConvertCtx;
av_init_packet(&rx_pkt);
while(av_read_frame(pFormatCont, &rx_pkt) >= 0)
{
if(rx_pkt.stream_index == videoStream)
{
av_frame_free(&pFrame);
pFrame = av_frame_alloc();
av_frame_free(&pFrameRGB);
pFrameRGB = av_frame_alloc();
avcodec_decode_video2(pCodecCon, pFrame, &frameFinished,&rx_pkt);
if(frameFinished)
{
imgConvertCtx = sws_getCachedContext(NULL, pFrame->width,pFrame->height, AV_PIX_FMT_YUVJ420P,width_rgb,height_rgb,AV_PIX_FMT_BGR24, SWS_BICUBIC, NULL, NULL,NULL);
sws_scale(imgConvertCtx, ((AVPicture*)pFrame)->data, ((AVPicture*)pFrame)->linesize, 0, pCodecCon->height, ((AVPicture *)pFrameRGB)->data, ((AVPicture *)pFrameRGB)->linesize);
av_frame_unref(pFrame);
av_frame_unref(pFrameRGB);
}
}
av_free_packet(&rx_pkt);
av_packet_unref(&rx_pkt);
}
//cvDestroyWindow("Cam1Video");
av_free_packet(&rx_pkt);
avcodec_close(pCodecCon);
av_free(pFrame);
av_free(pFrameRGB);
avformat_close_input(&pFormatCont);
I have read, the reason could be that the ffmpeg-Libs saves the incomming frames in the cache but the arm-processor isnt fast enough to process them. After like 4 minutes the system craches.
How could i solve the Problem.
one option could be to tell ffmpeg to act as frame grabber, also to read frames in real time, with the flag "-re". How can i set this Flag in the c++ source code. Or can anybody help me to solve that Problem.
Thank you very much
this is my first question so i hope i did it correctly. If not, please let me know to fix it.
I'm trying to convert a short (10 secs) mp4 video file into a gif using ffmpeg libraries (I'm pretty new using ffmpeg). The program works pretty well converting to gif, but some times it randomly crash.
This is the version of the ffmpeg libraries I'm using:
libavutil 54. 27.100
libavcodec 56. 41.100
libavformat 56. 36.100
libavdevice 56. 4.100
libavfilter 5. 16.101
libavresample 2. 1. 0
libswscale 3. 1.101
libswresample 1. 2.100
libpostproc 53. 3.100
I'm using a 1920x1080p video, so in order to generate the gif I'm doing a pixel format convertion, from AV_PIX_FMT_YUV420P to AV_PIX_FMT_RGB8 with a resizing from the initial resolution to 432x240p.
Here is the code:
int VideoManager::loadVideo(QString filename, bool showInfo)
{
if(avformat_open_input(&iFmtCtx, filename.toStdString().c_str(), 0, 0) < 0)
{
qDebug() << "Could not open input file " << filename;
closeInput();
return -1;
}
if (avformat_find_stream_info(iFmtCtx, 0) < 0)
{
qDebug() << "Failed to retrieve input stream information";
closeInput();
return -2;
}
videoStreamIndex = -1;
for(unsigned int i = 0; i < iFmtCtx->nb_streams; ++i)
if(iFmtCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoStreamIndex = i;
break;
}
if(videoStreamIndex == -1)
{
qDebug() << "Didn't find any video stream!";
closeInput();
return -3;
}
iCodecCtx = iFmtCtx->streams[videoStreamIndex]->codec;
iCodec = avcodec_find_decoder(iCodecCtx->codec_id);
if(iCodec == NULL) // Codec not found
{
qDebug() << "Codec not found!";
closeInput();
return -4;
}
if(avcodec_open2(iCodecCtx, iCodec, NULL) < 0)
{
qDebug() << "Could not open codec!";
closeInput();
return -1;
}
if(showInfo)
av_dump_format(iFmtCtx, 0, filename.toStdString().c_str(), 0);
return 0;
}
void VideoManager::generateGif(QString filename)
{
int ret, frameCount = 0;
AVPacket packet;
packet.data = NULL;
packet.size = 0;
AVFrame *frame = NULL;
unsigned int stream_index;
int got_frame;
gifHeight = iFmtCtx->streams[videoStreamIndex]->codec->height;
gifWidth = iFmtCtx->streams[videoStreamIndex]->codec->width;
if(gifHeight > MAX_GIF_HEIGHT || gifWidth > MAX_GIF_WIDTH)
{
if(gifHeight > gifWidth)
{
gifWidth = (float)gifWidth * ( (float)MAX_GIF_HEIGHT / (float)gifHeight );
gifHeight = MAX_GIF_HEIGHT;
}
else
{
gifHeight = (float)gifHeight * ( (float)MAX_GIF_WIDTH / (float)gifWidth );
gifWidth = MAX_GIF_WIDTH;
}
}
if(openOutputFile(filename.toStdString().c_str()) < 0)
{
qDebug() << "Error openning output file: " << filename;
return;
}
while (1) {
int ret = av_read_frame(iFmtCtx, &packet);
if (ret < 0)
{
if(ret != AVERROR_EOF)
qDebug() << "Error reading frame: " << ret;
break;
}
stream_index = packet.stream_index;
if(stream_index == videoStreamIndex)
{
frame = av_frame_alloc();
if (!frame) {
qDebug() << "Error allocating frame";
break;
}
av_packet_rescale_ts(&packet,
iFmtCtx->streams[stream_index]->time_base,
iFmtCtx->streams[stream_index]->codec->time_base);
ret = avcodec_decode_video2(iFmtCtx->streams[stream_index]->codec, frame,
&got_frame, &packet);
if (ret < 0) {
qDebug() << "Decoding failed";
break;
}
if(got_frame)
{
qDebug() << ++frameCount;
nframes++;
frame->pts = av_frame_get_best_effort_timestamp(frame);
////////////////////////////////////////////////////////////////////////////////
/// Pixel format convertion and resize
////////////////////////////////////////////////////////////////////////////////
uint8_t *out_buffer = NULL;
SwsContext *img_convert_ctx = NULL;
AVFrame *pFrameRGB = av_frame_alloc();
if(pFrameRGB == NULL)
{
qDebug() << "Error allocating frameRGB";
break;
}
AVPixelFormat pixFmt;
switch (iFmtCtx->streams[stream_index]->codec->pix_fmt)
{
case AV_PIX_FMT_YUVJ420P : pixFmt = AV_PIX_FMT_YUV420P; break;
case AV_PIX_FMT_YUVJ422P : pixFmt = AV_PIX_FMT_YUV422P; break;
case AV_PIX_FMT_YUVJ444P : pixFmt = AV_PIX_FMT_YUV444P; break;
case AV_PIX_FMT_YUVJ440P : pixFmt = AV_PIX_FMT_YUV440P; break;
default:
pixFmt = iFmtCtx->streams[stream_index]->codec->pix_fmt;
}
out_buffer = (uint8_t*)av_malloc( avpicture_get_size( AV_PIX_FMT_RGB8,
gifWidth,
gifHeight ));
if(!out_buffer)
{
qDebug() << "Error alocatting out_buffer!";
}
avpicture_fill((AVPicture *)pFrameRGB, out_buffer, AV_PIX_FMT_RGB8,
gifWidth,
gifHeight);
img_convert_ctx = sws_getContext( iFmtCtx->streams[stream_index]->codec->width,
iFmtCtx->streams[stream_index]->codec->height,
pixFmt,
gifWidth,
gifHeight,
AV_PIX_FMT_RGB8,
SWS_ERROR_DIFFUSION, NULL, NULL, NULL );
if(!img_convert_ctx)
{
qDebug() << "error getting sws context";
}
sws_scale( img_convert_ctx, (const uint8_t* const*)frame->data,
frame->linesize, 0,
iFmtCtx->streams[stream_index]->codec->height,
pFrameRGB->data,
pFrameRGB->linesize );
pFrameRGB->format = AV_PIX_FMT_RGB8;
pFrameRGB->pts = frame->pts;
pFrameRGB->best_effort_timestamp = frame->best_effort_timestamp;
pFrameRGB->width = gifWidth;
pFrameRGB->height = gifHeight;
pFrameRGB->pkt_dts = frame->pkt_dts;
pFrameRGB->pkt_pts = frame->pkt_pts;
pFrameRGB->pkt_duration = frame->pkt_duration;
pFrameRGB->pkt_pos = frame->pkt_pos;
pFrameRGB->pkt_size = frame->pkt_size;
pFrameRGB->interlaced_frame = frame->interlaced_frame;
////////////////////////////////////////////////////////////////////////////////
ret = encodeAndWriteFrame(pFrameRGB, stream_index, NULL);
//av_frame_free(&frame);
//av_free(out_buffer);
//sws_freeContext(img_convert_ctx);
if (ret < 0)
{
qDebug() << "Error encoding and writting frame";
//av_free_packet(&packet);
closeOutput();
}
}
else {
//av_frame_free(&frame);
}
}
av_free_packet(&packet);
}
ret = flushEncoder(videoStreamIndex);
if (ret < 0)
{
qDebug() << "Flushing encoder failed";
}
av_write_trailer(oFmtCtx);
//av_free_packet(&packet);
//av_frame_free(&frame);
closeOutput();
}
void VideoManager::closeOutput()
{
if (oFmtCtx && oFmtCtx->nb_streams > 0 && oFmtCtx->streams[0] && oFmtCtx->streams[0]->codec)
avcodec_close(oFmtCtx->streams[0]->codec);
if (oFmtCtx && oFmt && !(oFmt->flags & AVFMT_NOFILE))
avio_closep(&oFmtCtx->pb);
avformat_free_context(oFmtCtx);
}
int VideoManager::openOutputFile(const char *filename)
{
AVStream *out_stream;
AVStream *in_stream;
AVCodecContext *dec_ctx, *enc_ctx;
AVCodec *encoder;
int ret;
oFmtCtx = NULL;
avformat_alloc_output_context2(&oFmtCtx, NULL, NULL, filename);
if (!oFmtCtx) {
qDebug() << "Could not create output context";
return AVERROR_UNKNOWN;
}
oFmt = oFmtCtx->oformat;
out_stream = avformat_new_stream(oFmtCtx, NULL);
if (!out_stream) {
qDebug() << "Failed allocating output stream";
return AVERROR_UNKNOWN;
}
in_stream = iFmtCtx->streams[videoStreamIndex];
dec_ctx = in_stream->codec;
enc_ctx = out_stream->codec;
encoder = avcodec_find_encoder(AV_CODEC_ID_GIF);
if (!encoder) {
qDebug() << "FATAL!: Necessary encoder not found";
return AVERROR_INVALIDDATA;
}
enc_ctx->height = gifHeight;
enc_ctx->width = gifWidth;
enc_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
enc_ctx->pix_fmt = AV_PIX_FMT_RGB8;
enc_ctx->time_base = dec_ctx->time_base;
ret = avcodec_open2(enc_ctx, encoder, NULL);
if (ret < 0) {
qDebug() << "Cannot open video encoder for gif";
return ret;
}
if (oFmt->flags & AVFMT_GLOBALHEADER)
enc_ctx->flags |= CODEC_FLAG_GLOBAL_HEADER;
if (!(oFmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oFmtCtx->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
qDebug() << "Could not open output file " << filename;
return ret;
}
}
ret = avformat_write_header(oFmtCtx, NULL);
if (ret < 0) {
qDebug() << "Error occurred when opening output file";
return ret;
}
return 0;
}
int VideoManager::encodeAndWriteFrame(AVFrame *frame, unsigned int stream_index, int *got_frame) {
int ret;
int got_frame_local;
AVPacket enc_pkt;
if (!got_frame)
got_frame = &got_frame_local;
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
ret = avcodec_encode_video2(oFmtCtx->streams[stream_index]->codec, &enc_pkt,
frame, got_frame);
//av_frame_free(&frame);
if (ret < 0)
return ret;
if (!(*got_frame))
return 0;
enc_pkt.stream_index = stream_index;
av_packet_rescale_ts(&enc_pkt,
oFmtCtx->streams[stream_index]->codec->time_base,
oFmtCtx->streams[stream_index]->time_base);
ret = av_interleaved_write_frame(oFmtCtx, &enc_pkt);
return ret;
}
int VideoManager::flushEncoder(unsigned int stream_index)
{
int ret;
int got_frame;
if (!(oFmtCtx->streams[stream_index]->codec->codec->capabilities &
CODEC_CAP_DELAY))
return 0;
while (1) {
ret = encodeAndWriteFrame(NULL, stream_index, &got_frame);
if (ret < 0)
break;
if (!got_frame)
return 0;
}
return ret;
}
I know there are a lot of memory leaks. I deleted/commented most of the free functions intentionality because i thought that was the problem.
I'm using Qtcreator, so when i debug the programs this is the output:
Level Function Line
0 av_image_copy 303
1 frame_copy_video 650
2 av_frame_copy 687
3 av_frame_ref 384
4 gif_encode_frame 307
5 avcodec_encode_video2 2191
6 VideoManager::encodeAndWriteFrame 813
7 VideoManager::generateGif 375
8 qMain 31
9 WinMain*16 112
10 main
I've checked if there is a specific frame the program crash at, but it's a random frame too.
Any idea of what i'm doing wrong? Any help would be very appreciated.
EDIT:
After a few days of pain, suffering and frustation I decided to write the whole code from scratch. Both times i started from this example and modified it in order to works as I described before. And it works perfectly now :D! The only error i could find in the old code (posted before) is when i try to access to the video stream in the output file I used videoStreamIndex, but that index is from the video stream in the input file. Some times it could be the same index and some times not. But it doesn't explain why it crashed randomly. If that was the reason of the crash, it should crash every time i ran the code with the same video. So probably, there are more errors in that code.
Notice that i've not tested if fixing that error in the code above actually solve the crashing problems.
I think you may have your parameters mixed up. According to what I'm reading from the documentation avcodec_decode_video2's prototype looks like:
int avcodec_decode_video2 (AVCodecContext * avctx,
AVFrame * picture,
int * got_picture_ptr,
const AVPacket * avpkt)
And is called with:
ret = avcodec_encode_video2(oFmtCtx->streams[stream_index]->codec, // Dunno.
&enc_pkt, //AVPacket * should be AVFrame *
frame, //AVFrame * Should be int *
got_frame); // int * should be AVPacket *