Fragmented MP4 - problem playing in browser - c++

I try to create fragmented MP4 from raw H264 video data so I could play it in internet browser's player. My goal is to create live streaming system, where media server would send fragmented MP4 pieces to browser. The server would buffer input data from RaspberryPi camera, which sends video as H264 frames. It would then mux that video data and make it available for client. The browser would play media data (that were muxed by server and sent i.e. through websocket) by using Media Source Extensions.
For test purpose I wrote the following pieces of code (using many examples I found in the intenet):
C++ application using avcodec which muxes raw H264 video to fragmented MP4 and saves it to a file:
#define READBUFSIZE 4096
#define IOBUFSIZE 4096
#define ERRMSGSIZE 128
#include <cstdint>
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
extern "C"
{
#include <libavformat/avformat.h>
#include <libavutil/error.h>
#include <libavutil/opt.h>
}
enum NalType : uint8_t
{
//NALs containing stream metadata
SEQ_PARAM_SET = 0x7,
PIC_PARAM_SET = 0x8
};
std::vector<uint8_t> outputData;
int mediaMuxCallback(void *opaque, uint8_t *buf, int bufSize)
{
outputData.insert(outputData.end(), buf, buf + bufSize);
return bufSize;
}
std::string getAvErrorString(int errNr)
{
char errMsg[ERRMSGSIZE];
av_strerror(errNr, errMsg, ERRMSGSIZE);
return std::string(errMsg);
}
int main(int argc, char **argv)
{
if(argc < 2)
{
std::cout << "Missing file name" << std::endl;
return 1;
}
std::fstream file(argv[1], std::ios::in | std::ios::binary);
if(!file.is_open())
{
std::cout << "Couldn't open file " << argv[1] << std::endl;
return 2;
}
std::vector<uint8_t> inputMediaData;
do
{
char buf[READBUFSIZE];
file.read(buf, READBUFSIZE);
int size = file.gcount();
if(size > 0)
inputMediaData.insert(inputMediaData.end(), buf, buf + size);
} while(!file.eof());
file.close();
//Initialize avcodec
av_register_all();
uint8_t *ioBuffer;
AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264);
AVCodecContext *codecCtxt = avcodec_alloc_context3(codec);
AVCodecParserContext *parserCtxt = av_parser_init(AV_CODEC_ID_H264);
AVOutputFormat *outputFormat = av_guess_format("mp4", nullptr, nullptr);
AVFormatContext *formatCtxt;
AVIOContext *ioCtxt;
AVStream *videoStream;
int res = avformat_alloc_output_context2(&formatCtxt, outputFormat, nullptr, nullptr);
if(res < 0)
{
std::cout << "Couldn't initialize format context; the error was: " << getAvErrorString(res) << std::endl;
return 3;
}
if((videoStream = avformat_new_stream( formatCtxt, avcodec_find_encoder(formatCtxt->oformat->video_codec) )) == nullptr)
{
std::cout << "Couldn't initialize video stream" << std::endl;
return 4;
}
else if(!codec)
{
std::cout << "Couldn't initialize codec" << std::endl;
return 5;
}
else if(codecCtxt == nullptr)
{
std::cout << "Couldn't initialize codec context" << std::endl;
return 6;
}
else if(parserCtxt == nullptr)
{
std::cout << "Couldn't initialize parser context" << std::endl;
return 7;
}
else if((ioBuffer = (uint8_t*)av_malloc(IOBUFSIZE)) == nullptr)
{
std::cout << "Couldn't allocate I/O buffer" << std::endl;
return 8;
}
else if((ioCtxt = avio_alloc_context(ioBuffer, IOBUFSIZE, 1, nullptr, nullptr, mediaMuxCallback, nullptr)) == nullptr)
{
std::cout << "Couldn't initialize I/O context" << std::endl;
return 9;
}
//Set video stream data
videoStream->id = formatCtxt->nb_streams - 1;
videoStream->codec->width = 1280;
videoStream->codec->height = 720;
videoStream->time_base.den = 60; //FPS
videoStream->time_base.num = 1;
videoStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
formatCtxt->pb = ioCtxt;
//Retrieve SPS and PPS for codec extdata
const uint32_t synchMarker = 0x01000000;
unsigned int i = 0;
int spsStart = -1, ppsStart = -1;
uint16_t spsSize = 0, ppsSize = 0;
while(spsSize == 0 || ppsSize == 0)
{
uint32_t *curr = (uint32_t*)(inputMediaData.data() + i);
if(*curr == synchMarker)
{
unsigned int currentNalStart = i;
i += sizeof(uint32_t);
uint8_t nalType = inputMediaData.data()[i] & 0x1F;
if(nalType == SEQ_PARAM_SET)
spsStart = currentNalStart;
else if(nalType == PIC_PARAM_SET)
ppsStart = currentNalStart;
if(spsStart >= 0 && spsSize == 0 && spsStart != i)
spsSize = currentNalStart - spsStart;
else if(ppsStart >= 0 && ppsSize == 0 && ppsStart != i)
ppsSize = currentNalStart - ppsStart;
}
++i;
}
videoStream->codec->extradata = inputMediaData.data() + spsStart;
videoStream->codec->extradata_size = ppsStart + ppsSize;
//Write main header
AVDictionary *options = nullptr;
av_dict_set(&options, "movflags", "frag_custom+empty_moov", 0);
res = avformat_write_header(formatCtxt, &options);
if(res < 0)
{
std::cout << "Couldn't write container main header; the error was: " << getAvErrorString(res) << std::endl;
return 10;
}
//Retrieve frames from input video and wrap them in container
int currentInputIndex = 0;
int framesInSecond = 0;
while(currentInputIndex < inputMediaData.size())
{
uint8_t *frameBuffer;
int frameSize;
res = av_parser_parse2(parserCtxt, codecCtxt, &frameBuffer, &frameSize, inputMediaData.data() + currentInputIndex,
inputMediaData.size() - currentInputIndex, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if(frameSize == 0) //No more frames while some data still remains (is that even possible?)
{
std::cout << "Some data left unparsed: " << std::to_string(inputMediaData.size() - currentInputIndex) << std::endl;
break;
}
//Prepare packet with video frame to be dumped into container
AVPacket packet;
av_init_packet(&packet);
packet.data = frameBuffer;
packet.size = frameSize;
packet.stream_index = videoStream->index;
currentInputIndex += frameSize;
//Write packet to the video stream
res = av_write_frame(formatCtxt, &packet);
if(res < 0)
{
std::cout << "Couldn't write packet with video frame; the error was: " << getAvErrorString(res) << std::endl;
return 11;
}
if(++framesInSecond == 60) //We want 1 segment per second
{
framesInSecond = 0;
res = av_write_frame(formatCtxt, nullptr); //Flush segment
}
}
res = av_write_frame(formatCtxt, nullptr); //Flush if something has been left
//Write media data in container to file
file.open("my_mp4.mp4", std::ios::out | std::ios::binary);
if(!file.is_open())
{
std::cout << "Couldn't open output file " << std::endl;
return 12;
}
file.write((char*)outputData.data(), outputData.size());
if(file.fail())
{
std::cout << "Couldn't write to file" << std::endl;
return 13;
}
std::cout << "Media file muxed successfully" << std::endl;
return 0;
}
(I hardcoded a few values, such as video dimensions or framerate, but as I said this is just a test code.)
Simple HTML webpage using MSE to play my fragmented MP4
<!DOCTYPE html>
<html>
<head>
<title>Test strumienia</title>
</head>
<body>
<video width="1280" height="720" controls>
</video>
</body>
<script>
var vidElement = document.querySelector('video');
if (window.MediaSource) {
var mediaSource = new MediaSource();
vidElement.src = URL.createObjectURL(mediaSource);
mediaSource.addEventListener('sourceopen', sourceOpen);
} else {
console.log("The Media Source Extensions API is not supported.")
}
function sourceOpen(e) {
URL.revokeObjectURL(vidElement.src);
var mime = 'video/mp4; codecs="avc1.640028"';
var mediaSource = e.target;
var sourceBuffer = mediaSource.addSourceBuffer(mime);
var videoUrl = 'my_mp4.mp4';
fetch(videoUrl)
.then(function(response) {
return response.arrayBuffer();
})
.then(function(arrayBuffer) {
sourceBuffer.addEventListener('updateend', function(e) {
if (!sourceBuffer.updating && mediaSource.readyState === 'open') {
mediaSource.endOfStream();
}
});
sourceBuffer.appendBuffer(arrayBuffer);
});
}
</script>
</html>
Output MP4 file generated by my C++ application can be played i.e. in MPC, but it doesn't play in any web browser I tested it with. It also doesn't have any duration (MPC keeps showing 00:00).
To compare output MP4 file I got from my C++ application described above, I also used FFMPEG to create fragmented MP4 file from the same source file with raw H264 stream. I used the following command:
ffmpeg -r 60 -i input.h264 -c:v copy -f mp4 -movflags empty_moov+default_base_moof+frag_keyframe test.mp4
This file generated by FFMPEG is played correctly by every web browser I used for tests. It also has correct duration (but also it has trailing atom, which wouldn't be present in my live stream anyway, and as I need a live stream, it won't have any fixed duration in the first place).
MP4 atoms for both files look very similiar (they have identical avcc section for sure). What's interesting (but not sure if it's of any importance), both files have different NALs format than input file (RPI camera produces video stream in Annex-B format, while output MP4 files contain NALs in AVCC format... or at least it looks like it's the case when I compare mdat atoms with input H264 data).
I assume there is some field (or a few fields) I need to set for avcodec to make it produce video stream that would be properly decoded and played by browsers players. But what field(s) do I need to set? Or maybe problem lies somewhere else? I ran out of ideas.
EDIT 1:
As suggested, I investigated binary content of both MP4 files (generated by my app and FFMPEG tool) with hex editor. What I can confirm:
both files have identical avcc section (they match perfectly and are in AVCC format, I analyzed it byte after byte and there's no mistake about it)
both files have NALs in AVCC format (I looked closely at mdat atoms and they don't differ between both MP4 files)
So I guess there's nothing wrong with the extradata creation in my code - avcodec takes care of it properly, even if I just feed it with SPS and PPS NALs. It converts them by itself, so no need for me to do it by hand. Still, my original problem remains.
EDIT 2: I achieved partial success - MP4 generated by my app now plays in Firefox. I added this line to the code (along with rest of stream initialization):
videoStream->codec->time_base = videoStream->time_base;
So now this section of my code looks like this:
//Set video stream data
videoStream->id = formatCtxt->nb_streams - 1;
videoStream->codec->width = 1280;
videoStream->codec->height = 720;
videoStream->time_base.den = 60; //FPS
videoStream->time_base.num = 1;
videoStream->codec->time_base = videoStream->time_base;
videoStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
formatCtxt->pb = ioCtxt;

I finally found the solution. My MP4 now plays in Chrome (while still playing in other tested browsers).
In Chrome chrome://media-internals/ shows MSE logs (of a sort). When I looked there, I found a few of following warnings for my test player:
ISO-BMFF container metadata for video frame indicates that the frame is not a keyframe, but the video frame contents indicate the opposite.
That made me think and encouraged to set AV_PKT_FLAG_KEY for packets with keyframes. I added following code to section with filling AVPacket structure:
//Check if keyframe field needs to be set
int allowedNalsCount = 3; //In one packet there would be at most three NALs: SPS, PPS and video frame
packet.flags = 0;
for(int i = 0; i < frameSize && allowedNalsCount > 0; ++i)
{
uint32_t *curr = (uint32_t*)(frameBuffer + i);
if(*curr == synchMarker)
{
uint8_t nalType = frameBuffer[i + sizeof(uint32_t)] & 0x1F;
if(nalType == KEYFRAME)
{
std::cout << "Keyframe detected at frame nr " << framesTotal << std::endl;
packet.flags = AV_PKT_FLAG_KEY;
break;
}
else
i += sizeof(uint32_t) + 1; //We parsed this already, no point in doing it again
--allowedNalsCount;
}
}
A KEYFRAME constant turns out to be 0x5 in my case (Slice IDR).

MP4 atoms for both files look very similiar (they have identical avcc
section for sure)
Double check that, The code supplied suggests otherwise to me.
What's interesting (but not sure if it's of any importance), both
files have different NALs format than input file (RPI camera produces
video stream in Annex-B format, while output MP4 files contain NALs in
AVCC format... or at least it looks like it's the case when I compare
mdat atoms with input H264 data).
It is very important, mp4 will not work with annex b.

You need to fill in extradata with AVC Decoder Configuration Record, not just SPS/PPS
Here's how the record should look like:
AVCDCR

We can find this explanation in [Chrome Source]
(https://chromium.googlesource.com/chromium/src/+/refs/heads/master/media/formats/mp4/mp4_stream_parser.cc#799) "chrome media source code":
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Use |analysis.is_keyframe|, if it was actually determined, for logging
// if the analysis mismatches the container's keyframe metadata for
// |frame_buf|.
if (analysis.is_keyframe.has_value() &&
is_keyframe != analysis.is_keyframe.value()) {
LIMITED_MEDIA_LOG(DEBUG, media_log_, num_video_keyframe_mismatches_,
kMaxVideoKeyframeMismatchLogs)
<< "ISO-BMFF container metadata for video frame indicates that the "
"frame is "
<< (is_keyframe ? "" : "not ")
<< "a keyframe, but the video frame contents indicate the "
"opposite.";
// As of September 2018, it appears that all of Edge, Firefox, Safari
// work with content that marks non-avc-keyframes as a keyframe in the
// container. Encoders/muxers/old streams still exist that produce
// all-keyframe mp4 video tracks, though many of the coded frames are
// not keyframes (likely workaround due to the impact on low-latency
// live streams until https://crbug.com/229412 was fixed). We'll trust
// the AVC frame's keyframe-ness over the mp4 container's metadata if
// they mismatch. If other out-of-order codecs in mp4 (e.g. HEVC, DV)
// implement keyframe analysis in their frame_bitstream_converter, we'll
// similarly trust that analysis instead of the mp4.
is_keyframe = analysis.is_keyframe.value();
}
As the code comment show, chrome trust the AVC frame's keyframe-ness over the mp4 container's metadata. So nalu type in H264/HEVC should be more important than mp4 container box sdtp & trun description.

Related

C++ ffmpeg video missing frames and won't play in Quicktime

I wrote some C++ code that uses ffmpeg to encode a video. I'm having two strange issues:
The final video is always missing 1 frame. That is, if I have it encode 10 frames the final video only has 9 (at least that's what ffprobe -show_frames -pretty $VIDEO | grep -F '[FRAME]' | wc -l tells me.
The final video plays fine in some players (mpv and vlc) but not in Quicktime. Quicktime just shows a completely black screen.
My code is roughly this (modified a bit to remove types that are unique to our code base):
First, I open the video file, write the headers and initialize things:
template <class PtrT>
using UniquePtrWithDeleteFunction = std::unique_ptr<PtrT, std::function<void (PtrT*)>>;
std::unique_ptr<FfmpegEncodingFrameSink> FfmpegEncodingFrameSink::Create(
const std::string& dest_url) {
AVFormatContext* tmp_format_ctxt;
auto alloc_format_res = avformat_alloc_output_context2(&tmp_format_ctxt, nullptr, "mp4", dest_url.c_str());
if (alloc_format_res < 0) {
throw FfmpegException("Error opening output file.");
}
auto format_ctxt = UniquePtrWithDeleteFunction<AVFormatContext>(
tmp_format_ctxt, CloseAvFormatContext);
AVStream* out_stream_video = avformat_new_stream(format_ctxt.get(), nullptr);
if (out_stream_video == nullptr) {
throw FfmpegException("Could not create outputstream");
}
auto codec_context = GetCodecContext(options);
out_stream_video->time_base = codec_context->time_base;
auto ret = avcodec_parameters_from_context(out_stream_video->codecpar, codec_context.get());
if (ret < 0) {
throw FfmpegException("Failed to copy encoder parameters to outputstream");
}
if (!(format_ctxt->oformat->flags & AVFMT_NOFILE)) {
ret = avio_open(&format_ctxt->pb, dest_url.c_str(), AVIO_FLAG_WRITE);
if (ret < 0) {
throw VideoDecodeException("Could not open output file: " + dest_url);
}
}
ret = avformat_init_output(format_ctxt.get(), nullptr);
if (ret < 0) {
throw FfmpegException("Unable to initialize the codec.");
}
ret = avformat_write_header(format_ctxt.get(), nullptr);
if (ret < 0) {
throw FfmpegException("Error occurred writing format header");
}
return std::unique_ptr<FfmpegEncodingFrameSink>(
new FfmpegEncodingFrameSink(std::move(format_ctxt), std::move(codec_context)));
}
Then, every time I get a new frame to encode I pass it to this function (the frames are being decoded via ffmpeg from another mp4 file which Quicktime plays just fine):
// If frame == nullptr then we're done and we're just flushing the encoder
// otherwise encode an actual frame
void FfmpegEncodingFrameSink::EncodeAndWriteFrame(
const AVFrame* frame) {
auto ret = avcodec_send_frame(codec_ctxt_.get(), frame);
if (ret < 0) {
throw FfmpegException("Error encoding the frame.");
}
AVPacket enc_packet;
enc_packet.data = nullptr;
enc_packet.size = 0;
av_init_packet(&enc_packet);
do {
ret = avcodec_receive_packet(codec_ctxt_.get(), &enc_packet);
if (ret == AVERROR(EAGAIN)) {
CHECK(frame != nullptr);
break;
} else if (ret == AVERROR_EOF) {
CHECK(frame == nullptr);
break;
} else if (ret < 0) {
throw FfmpegException("Error putting the encoded frame into the packet.");
}
assert(ret == 0);
enc_packet.stream_index = 0;
LOG(INFO) << "Writing packet to stream.";
av_interleaved_write_frame(format_ctxt_.get(), &enc_packet);
av_packet_unref(&enc_packet);
} while (ret == 0);
}
Finally, in my destructor I close everything up like so:
FfmpegEncodingFrameSink::~FfmpegEncodingFrameSink() {
// Pass a nullptr to EncodeAndWriteFrame so it flushes the encoder
EncodeAndWriteFrame(nullptr);
// write mp4 trailer
av_write_trailer(format_ctxt_.get());
}
If I run this passing n frames to EncodeAndWriteFrame line LOG(INFO) << "Writing packet to stream."; gets run n times indicating the n packets were written to the stream. But ffprobe always shows only n - 1 frames int he video. And the final video doesn't play on quicktime.
What am I doing wrong??
Sorry for the delay but as i just had the same problem and noticed that this question deserves an answer, here how i solved this.
Up in front, the Problem only occured for me when using mov, mp4, 3gp as format. It worked frame accurate when using e.g. avi format. When i wrote uncompressed video frames to the container, i saw that the avi and mov had the same count of frames stored but the mov obviously had some problem in it's header.
Counting the number of frames in the mov using header metadata showed one frame is missing:
ffprobe -v error -count_frames -select_streams v:0 -show_entries stream=nb_read_frames -of default=nokey=1:noprint_wrappers=1 c:\temp\myinput.mov
While ignoring the index showed the correct number of frames:
-ignore_editlist 1
The solution for me was, set the timebase to the AVStream->CodeContext of the video stream.
The code above attempts to do this in this line:
out_stream_video->time_base = codec_context->time_base;
But the problem is that the posted code above does not expose the function GetCodecContext so we do not know if the time_base is correctly set for "codec_context". So it is my believe that the author's problem was that his function GetCodecContext did not set the time_base correctly.

FFMPEG RTSP stream to MPEG4/H264 file using libx264

Heyo folks,
I'm attempting to transcode/remux an RTSP stream in H264 format into a MPEG4 container, containing just the H264 video stream. Basically, webcam output into a MP4 container.
I can get a poorly coded MP4 produced, using this code:
// Variables here for demo
AVFormatContext * video_file_output_format = nullptr;
AVFormatContext * rtsp_format_context = nullptr;
AVCodecContext * video_file_codec_context = nullptr;
AVCodecContext * rtsp_vidstream_codec_context = nullptr;
AVPacket packet = {0};
AVStream * video_file_stream = nullptr;
AVCodec * rtsp_decoder_codec = nullptr;
int errorNum = 0, video_stream_index = 0;
std::string outputMP4file = "D:\\somemp4file.mp4";
// begin
AVDictionary * opts = nullptr;
av_dict_set(&opts, "rtsp_transport", "tcp", 0);
if ((errorNum = avformat_open_input(&rtsp_format_context, uriANSI.c_str(), NULL, &opts)) < 0) {
errOut << "Connection failed: avformat_open_input failed with error " << errorNum << ":\r\n" << ErrorRead(errorNum);
TacticalAbort();
return;
}
rtsp_format_context->max_analyze_duration = 50000;
if ((errorNum = avformat_find_stream_info(rtsp_format_context, NULL)) < 0) {
errOut << "Connection failed: avformat_find_stream_info failed with error " << errorNum << ":\r\n" << ErrorRead(errorNum);
TacticalAbort();
return;
}
video_stream_index = errorNum = av_find_best_stream(rtsp_format_context, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
if (video_stream_index < 0) {
errOut << "Connection in unexpected state; made a connection, but there was no video stream.\r\n"
"Attempts to find a video stream resulted in error " << errorNum << ": " << ErrorRead(errorNum);
TacticalAbort();
return;
}
rtsp_vidstream_codec_context = rtsp_format_context->streams[video_stream_index]->codec;
av_init_packet(&packet);
if (!(video_file_output_format = av_guess_format(NULL, outputMP4file.c_str(), NULL))) {
TacticalAbort();
throw std::exception("av_guess_format");
}
if (!(rtsp_decoder_codec = avcodec_find_decoder(rtsp_vidstream_codec_context->codec_id))) {
errOut << "Connection failed: connected, but avcodec_find_decoder returned null.\r\n"
"Couldn't find codec with an AV_CODEC_ID value of " << rtsp_vidstream_codec_context->codec_id << ".";
TacticalAbort();
return;
}
video_file_format_context = avformat_alloc_context();
video_file_format_context->oformat = video_file_output_format;
if (strcpy_s(video_file_format_context->filename, sizeof(video_file_format_context->filename), outputMP4file.c_str())) {
errOut << "Couldn't open video file: strcpy_s failed with error " << errno << ".";
std::string log = errOut.str();
TacticalAbort();
throw std::exception("strcpy_s");
}
if (!(video_file_encoder_codec = avcodec_find_encoder(video_file_output_format->video_codec))) {
TacticalAbort();
throw std::exception("avcodec_find_encoder");
}
// MARKER ONE
if (!outputMP4file.empty() &&
!(video_file_output_format->flags & AVFMT_NOFILE) &&
(errorNum = avio_open2(&video_file_format_context->pb, outputMP4file.c_str(), AVIO_FLAG_WRITE, nullptr, &opts)) < 0) {
errOut << "Couldn't open video file \"" << outputMP4file << "\" for writing : avio_open2 failed with error " << errorNum << ": " << ErrorRead(errorNum);
TacticalAbort();
return;
}
// Create stream in MP4 file
if (!(video_file_stream = avformat_new_stream(video_file_format_context, video_file_encoder_codec))) {
TacticalAbort();
return;
}
AVCodecContext * video_file_codec_context = video_file_stream->codec;
// MARKER TWO
// error -22/-21 in avio_open2 if this is skipped
if ((errorNum = avcodec_copy_context(video_file_codec_context, rtsp_vidstream_codec_context)) != 0) {
TacticalAbort();
throw std::exception("avcodec_copy_context");
}
//video_file_codec_context->codec_tag = 0;
/*
// MARKER 3 - is this not needed? Examples suggest not.
if ((errorNum = avcodec_open2(video_file_codec_context, video_file_encoder_codec, &opts)) < 0)
{
errOut << "Couldn't open video file codec context: avcodec_open2 failed with error " << errorNum << ": " << ErrorRead(errorNum);
std::string log = errOut.str();
TacticalAbort();
throw std::exception("avcodec_open2, video file");
}*/
//video_file_format_context->flags |= AVFMT_FLAG_GENPTS;
if (video_file_format_context->oformat->flags & AVFMT_GLOBALHEADER)
{
video_file_codec_context->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
if ((errorNum = avformat_write_header(video_file_format_context, &opts)) < 0) {
errOut << "Couldn't open video file: avformat_write_header failed with error " << errorNum << ":\r\n" << ErrorRead(errorNum);
std::string log = errOut.str();
TacticalAbort();
return;
}
However, there are several issues:
I can't pass any x264 options to the output file. The output H264 matches the input H264's profile/level - switching cameras to a different model switches H264 level.
The timing of the output file is off, noticeably.
The duration of the output file is off, massively. A few seconds of footage becomes hours, although playtime doesn't match. (FWIW, I'm using VLC to play them.)
Passing x264 options
If I manually increment PTS per packet, and set DTS equal to PTS, it plays too fast, ~2-3 seconds' worth of footage in one second playtime, and duration is hours long. The footage also blurs past several seconds, about 10 seconds' footage in a second.
If I let FFMPEG decide (with or without GENPTS flag), the file has a variable frame rate (probably as expected), but it plays the whole file in an instant and has a long duration too (over forty hours for a few seconds). The duration isn't "real", as the file plays in an instant.
At Marker One, I try to set the profile by passing options to avio_open2. The options are simply ignored by libx264. I've tried:
av_dict_set(&opts, "vprofile", "main", 0);
av_dict_set(&opts, "profile", "main", 0); // error, missing '('
// FF_PROFILE_H264_MAIN equals 77, so I also tried
av_dict_set(&opts, "vprofile", "77", 0);
av_dict_set(&opts, "profile", "77", 0);
It does seem to read the profile setting, but it doesn't use them. At Marker Two, I tried to set it after the avio_open2, before avformat_write_header .
// I tried all 4 av_dict_set from earlier, passing it to avformat_write_header.
// None had any effect, they weren't consumed.
av_opt_set(video_file_codec_context, "profile", "77", 0);
av_opt_set(video_file_codec_context, "profile", "main", 0);
video_file_codec_context->profile = FF_PROFILE_H264_MAIN;
av_opt_set(video_file_codec_context->priv_data, "profile", "77", 0);
av_opt_set(video_file_codec_context->priv_data, "profile", "main", 0);
Messing with privdata made the program unstable, but I was trying anything at that point.
I'd like to solve issue 1 with passing settings, since I imagine it'd bottleneck any attempt to solve issues 2 or 3.
I've been fiddling with this for the better part of a month now. I've been through dozens of documentation, Q&As, examples. It doesn't help that quite a few are outdated.
Any help would be appreciated.
Cheers
Okay, firstly, I wasn't using ffmpeg, but a fork of ffmpeg called libav. Not to be confused, ffmpeg is more recent, and libav was used in some distributions of Linux.
Compiling for Visual Studio
Once I upgraded to the main branch, I had to compile it manually again, since I was using it in Visual Studio and the only static libraries are G++, so linking doesn't work nicely.
The official guide is https://trac.ffmpeg.org/wiki/CompilationGuide/MSVC.
First, compiling as such works fine:
Ensure VS is in PATH. Your PATH should read in this order:
C:\Program Files (x86)\Microsoft Visual Studio XX.0\VC\bin
D:\MinGW\msys64\mingw32\bin
D:\MinGW\msys64\usr\bin
D:\MinGW\bin
Then run Visual Studio x86 Native Tools prompt. Should be in your Start Menu.
In the CMD, run
(your path to MinGW)\msys64\msys2_shell.cmd -full-path
In the created MinGW window, run:
$ cd /your dev path/
$ git clone https://git.ffmpeg.org/ffmpeg.git ffmpeg
After about five minutes you'll have the FFMPEG source in the subfolder ffmpeg.
Access the source via:
$ cd ffmpeg
Then run:
$ which link
if it doesn't provide the VS path from PATH, but usr/link or usr/bin/link, rename similarly:
$ mv /usr/bin/link.exe /usr/bin/msys-link.exe
If it does skip the $ mv step.
Finally, run this command:
$ ./configure --toolchain=msvc and whatever other cmdlines you want
(you can see commandlines via ./configure --help)
It may appear inactive for a long time. Once it's done you'll get a couple pages of output.
Then run:
$ make
$ make install
Note for static builds (configure with --enable-static), although you get Windows static lib files, it'll produce them with extension *.a files. Just rename to .lib.
(you can use cmd: ren *.a *.lib)
Using FFMPEG
To just copy from FFMPEG RTSP to a file, using source profile, level etc, just use:
read network frame av_read_frame(rtsp_format_context)
pass to MP4 av_write_frame(video_file_format_context)
You don't need to open a AVCodecContext, a decoder or encoder; just avformat_open_input, and the video file AVFormatContext and AVIOContext.
If you want to re-encode, you have to:
read network frame
av_read_frame(rtsp_format_context)
pass packet to decoder
avcodec_send_packet(rtsp_decoder_context)
read frames from decoder (in loop)
avcodec_receive_frame(rtsp_decoder_context)
send each decoded frame to encoder
avcodec_send_frame(video_file_encoder_context)
read packets from encoder (in loop)
avcodec_receive_packet(video_file_encoder_context)
send each encoded packet to output video av_write_frame(video_file_format_context)
Some gotchas
Copy out the width, height, and pixel format manually. For H264 it's YUV420P.
As example, for level 3.1, profile high:
AVCodecParameters * video_file_codec_params = video_file_stream->codecpar;
video_file_codec_params->profile = FF_PROFILE_H264_HIGH;
video_file_codec_params->format = AV_PIX_FMT_YUV420P;
video_file_codec_params->level = 31;
video_file_codec_params->width = rtsp_vidstream->codecpar->width;
video_file_codec_params->height = rtsp_vidstream->codecpar->height;
libx264 accepts a H264 preset via the opts parameter in avcodec_open2. Example for "veryfast" preset:
AVDictionary * mydict;
av_dict_set(&mydict, "preset", "veryfast", 0);
avcodec_open2(video_file_encoder_context, video_file_encoder_codec, &opts)
// avcodec_open2 returns < 0 for errors.
// Recognised options will be removed from the mydict variable.
// If all are recognised, mydict will be NULL.
Output timing is a volatile thing. Use this before ``.
video_file_stream->avg_frame_rate = rtsp_vidstream->avg_frame_rate;
video_file_stream->r_frame_rate = rtsp_vidstream->r_frame_rate;
video_file_stream->time_base = rtsp_vidstream->time_base;
video_file_encoder_context->time_base = rtsp_vidstream_codec_context->time_base;
// Decreasing GOP size for more seek positions doesn't end well.
// libx264 forces the new GOP size.
video_file_encoder_context->gop_size = rtsp_vidstream_codec_context->gop_size;
if ((errorNum = avcodec_open2(video_file_encoder_context,...)) < 0) {
// an error...
}
H264 may write at double-speed to the file, so playback is doubly fast. To change this, go manual with encoded packets' timing:
packet->pts = packet->dts = frameNum++;
av_packet_rescale_ts(packet, video_file_encoder_context->time_base, video_file_stream->time_base);
packet->pts *= 2;
packet->dts *= 2;
av_interleaved_write_frame(video_file_format_context, packet)
// av_interleaved_write_frame returns < 0 for errors.
Note we switch av_write_frame to av_interleaved_write_frame, and set both PTS and DTS. frameNum should be a int64_t, and should start from 0 (although that's not required).
Also note the av_rescale_ts call's parameters are the video file encoder context, and the video file stream - RTSP isn't involved.
VLC media player won't play H.264 streams encoded with an FPS of 4 or lower.
So if your RTSP streams are showing the first decoded frame and never progressing, or showing pure green until the video ends, make sure your FPS is high enough. (that's VLC v2.2.4)

Is it possible to get good FPS using Raspberry Pi camera v4l2 in c++?

I'm trying to stream video on a Raspberry Pi using the official V4L2 driver with the Raspberry Pi camera, from C++ on raspbian (2015-02 release), and I'm having low FPS issues.
Currently I'm just creating a window and copying the buffer to the screen (which takes about 30ms) whereas the select() takes about 140ms (for a total of 5-6 fps). I also tried sleeping for 100ms and it decreases the select() time by a similar amount (resulting in the same fps). CPU load is about 5-15%.
I also tried changing the driver fps from console (or system()) but it only works downwards (for example, if I set the driver fps to 1fps, I'll get 1fps but if I set it to 90fps I still get 5-6fps, even though the driver confirms setting it to 90fps).
Also, when querying FPS modes for the used resolution I get 90fps.
I included the parts of the code related to V4L2 (code omitted between different parts) :
//////////////////
// Open device
//////////////////
mFD = open(mDevName, O_RDWR | O_NONBLOCK, 0);
if (mFD == -1) ErrnoExit("Open device failed");
//////////////////
// Setup format
//////////////////
struct v4l2_format fmt;
memset(&fmt, 0, sizeof(fmt));
fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
Xioctl(VIDIOC_G_FMT, &fmt);
mImgWidth = fmt.fmt.pix.width;
mImgHeight = fmt.fmt.pix.height;
cout << "width=" << mImgWidth << " height=" << mImgHeight << "\nbytesperline=" << fmt.fmt.pix.bytesperline << " sizeimage=" << fmt.fmt.pix.sizeimage << "\n";
// For some reason querying the format always sets pixelformat to JPEG
// no matter the input, so set it back to YUYV
fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
if (Xioctl(VIDIOC_S_FMT, &fmt) == -1)
{
cout << "Set video format failed : " << strerror(errno) << "\n";
}
//////////////////
// Setup streaming
//////////////////
struct v4l2_requestbuffers req;
memset(&req, 0, sizeof(req));
req.count = 20;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
req.memory = V4L2_MEMORY_MMAP;
if (-1 == Xioctl(VIDIOC_REQBUFS, &req))
{
ErrnoExit("Reqbufs");
}
if (req.count < 2)
throw "Not enough buffer memory !";
mNBuffers = req.count;
mBuffers = new CBuffer[mNBuffers];
if (!mBuffers) throw "Out of memory !";
for (unsigned int i = 0; i < mNBuffers; i++)
{
struct v4l2_buffer buf;
memset(&buf, 0, sizeof(buf));
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = i;
if (-1 == Xioctl(VIDIOC_QUERYBUF, &buf))
ErrnoExit("Querybuf");
mBuffers[i].mLength = buf.length;
mBuffers[i].pStart = mmap(NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, mFD, buf.m.offset);
if (mBuffers[i].pStart == MAP_FAILED)
ErrnoExit("mmap");
}
//////////////////
// Start streaming
//////////////////
unsigned int i;
enum v4l2_buf_type type;
struct v4l2_buffer buf;
for (i = 0; i < mNBuffers; i++)
{
memset(&buf, 0, sizeof(buf));
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = i;
if (-1 == Xioctl(VIDIOC_QBUF, &buf))
ErrnoExit("QBUF");
}
type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if (-1==Xioctl(VIDIOC_STREAMON, &type))
ErrnoExit("STREAMON");
And the last two parts in the main loop :
//////////////////
// Get frame
//////////////////
FD_ZERO(&fds);
FD_SET(mFD, &fds);
tv.tv_sec = 3;
tv.tv_usec = 0;
struct timespec t0, t1;
clock_gettime(CLOCK_REALTIME, &t0);
// This line takes about 140ms which I don't get
r = select(mFD + 1, &fds, NULL, NULL, &tv);
clock_gettime(CLOCK_REALTIME, &t1);
cout << "select time : " << ((float)(t1.tv_sec - t0.tv_sec))*1000.0f + ((float)(t1.tv_nsec - t0.tv_nsec))/1000000.0f << "\n";
if (-1 == r)
{
if (EINTR == errno)
continue;
ErrnoExit("select");
}
if (r == 0)
throw "Select timeout\n";
// Read the frame
//~ struct v4l2_buffer buf;
memset(&mCurBuf, 0, sizeof(mCurBuf));
mCurBuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
mCurBuf.memory = V4L2_MEMORY_MMAP;
// DQBUF about 2ms
if (-1 == Xioctl(VIDIOC_DQBUF, &mCurBuf))
{
if (errno == EAGAIN) continue;
ErrnoExit("DQBUF");
}
clock_gettime(CLOCK_REALTIME, &mCaptureTime);
// Manage frame in mBuffers[buf.index]
mCurBufIndex = mCurBuf.index;
break;
}
//////////////////
// Release frame
//////////////////
if (-1 == Xioctl(VIDIOC_QBUF, &mCurBuf))
ErrnoExit("VIDIOC_QBUF during mainloop");
I have been looking at the various methods of using the picamera and I'm hardly an expert, but it would seem that the default camera settings are what's holding you back. There are many modes and switches. I don't know if they are exposed through ioctls or how yet, I just started. But I had to use a program called v4l-ctl to get things ready for the mode I wanted. A deep look at that source and some code lifting should let you achieve greatness. Oh, and I doubt the select call is an issue, it's simply waiting on the descriptor which is slow to become readable. Depending on mode, etc. there can be a mandatory wait for autoexposure, etc.
Edit: I meant to say "a default setting" as you've changed some. There are also rules not codified in the driver.
The pixelformat matters. I encountered the similar low-fps problem, and I spent some time testing using my program in Go and C++ using V4L2 API. What I found is, Rpi Cam Module has good accelaration with H.264/MJPG pixelformat. I can easily get 60fps at 640*480, same as non-compressed formats like YUYV/RGB. However JPEG runs very slow. I can only get 4fps even at 320*240. And I also found the current is higher (>700mA) with JPEG compare to 500mA with H.264/MJPG.

FFmpeg + OpenAL - playback streaming sound from video won't work

I am decoding an OGG video (theora & vorbis as codecs) and want to show it on the screen (using Ogre 3D) while playing its sound. I can decode the image stream just fine and the video plays perfectly with the correct frame rate, etc.
However, I cannot get the sound to play at all with OpenAL.
Edit: I managed to make the playing sound resemble the actual audio in the video at least somewhat. Updated sample code.
Edit 2: I was able to get "almost" correct sound now. I had to set OpenAL to use AL_FORMAT_STEREO_FLOAT32 (after initializing the extension) instead of just STEREO16. Now the sound is "only" extremely high pitched and stuttering, but at the correct speed.
Here is how I decode audio packets (in a background thread, the equivalent works just fine for the image stream of the video file):
//------------------------------------------------------------------------------
int decodeAudioPacket( AVPacket& p_packet, AVCodecContext* p_audioCodecContext, AVFrame* p_frame,
FFmpegVideoPlayer* p_player, VideoInfo& p_videoInfo)
{
// Decode audio frame
int got_frame = 0;
int decoded = avcodec_decode_audio4(p_audioCodecContext, p_frame, &got_frame, &p_packet);
if (decoded < 0)
{
p_videoInfo.error = "Error decoding audio frame.";
return decoded;
}
// Frame is complete, store it in audio frame queue
if (got_frame)
{
int bufferSize = av_samples_get_buffer_size(NULL, p_audioCodecContext->channels, p_frame->nb_samples,
p_audioCodecContext->sample_fmt, 0);
int64_t duration = p_frame->pkt_duration;
int64_t dts = p_frame->pkt_dts;
if (staticOgreLog)
{
staticOgreLog->logMessage("Audio frame bufferSize / duration / dts: "
+ boost::lexical_cast<std::string>(bufferSize) + " / "
+ boost::lexical_cast<std::string>(duration) + " / "
+ boost::lexical_cast<std::string>(dts), Ogre::LML_NORMAL);
}
// Create the audio frame
AudioFrame* frame = new AudioFrame();
frame->dataSize = bufferSize;
frame->data = new uint8_t[bufferSize];
if (p_frame->channels == 2)
{
memcpy(frame->data, p_frame->data[0], bufferSize >> 1);
memcpy(frame->data + (bufferSize >> 1), p_frame->data[1], bufferSize >> 1);
}
else
{
memcpy(frame->data, p_frame->data, bufferSize);
}
double timeBase = ((double)p_audioCodecContext->time_base.num) / (double)p_audioCodecContext->time_base.den;
frame->lifeTime = duration * timeBase;
p_player->addAudioFrame(frame);
}
return decoded;
}
So, as you can see, I decode the frame, memcpy it to my own struct, AudioFrame. Now, when the sound is played, I use these audio frame like this:
int numBuffers = 4;
ALuint buffers[4];
alGenBuffers(numBuffers, buffers);
ALenum success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on alGenBuffers : " + Ogre::StringConverter::toString(success) + alGetString(success));
return;
}
// Fill a number of data buffers with audio from the stream
std::vector<AudioFrame*> audioBuffers;
std::vector<unsigned int> audioBufferSizes;
unsigned int numReturned = FFMPEG_PLAYER->getDecodedAudioFrames(numBuffers, audioBuffers, audioBufferSizes);
// Assign the data buffers to the OpenAL buffers
for (unsigned int i = 0; i < numReturned; ++i)
{
alBufferData(buffers[i], _streamingFormat, audioBuffers[i]->data, audioBufferSizes[i], _streamingFrequency);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on alBufferData : " + Ogre::StringConverter::toString(success) + alGetString(success)
+ " size: " + Ogre::StringConverter::toString(audioBufferSizes[i]));
return;
}
}
// Queue the buffers into OpenAL
alSourceQueueBuffers(_source, numReturned, buffers);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error queuing streaming buffers: " + Ogre::StringConverter::toString(success) + alGetString(success));
return;
}
}
alSourcePlay(_source);
The format and frequency I give to OpenAL are AL_FORMAT_STEREO_FLOAT32 (it is a stereo sound stream, and I did initialize the FLOAT32 extension) and 48000 (which is the sample rate of the AVCodecContext of the audio stream).
And during playback, I do the following to refill OpenAL's buffers:
ALint numBuffersProcessed;
// Check if OpenAL is done with any of the queued buffers
alGetSourcei(_source, AL_BUFFERS_PROCESSED, &numBuffersProcessed);
if(numBuffersProcessed <= 0)
return;
// Fill a number of data buffers with audio from the stream
std::vector<AudiFrame*> audioBuffers;
std::vector<unsigned int> audioBufferSizes;
unsigned int numFilled = FFMPEG_PLAYER->getDecodedAudioFrames(numBuffersProcessed, audioBuffers, audioBufferSizes);
// Assign the data buffers to the OpenAL buffers
ALuint buffer;
for (unsigned int i = 0; i < numFilled; ++i)
{
// Pop the oldest queued buffer from the source,
// fill it with the new data, then re-queue it
alSourceUnqueueBuffers(_source, 1, &buffer);
ALenum success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error Unqueuing streaming buffers: " + Ogre::StringConverter::toString(success));
return;
}
alBufferData(buffer, _streamingFormat, audioBuffers[i]->data, audioBufferSizes[i], _streamingFrequency);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on re- alBufferData: " + Ogre::StringConverter::toString(success));
return;
}
alSourceQueueBuffers(_source, 1, &buffer);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error re-queuing streaming buffers: " + Ogre::StringConverter::toString(success) + " "
+ alGetString(success));
return;
}
}
// Make sure the source is still playing,
// and restart it if needed.
ALint playStatus;
alGetSourcei(_source, AL_SOURCE_STATE, &playStatus);
if(playStatus != AL_PLAYING)
alSourcePlay(_source);
As you can see, I do quite heavy error checking. But I do not get any errors, neither from OpenAL nor from FFmpeg.
Edit: What I hear somewhat resembles the actual audio from the video, but VERY high pitched and stuttering VERY much. Also, it seems to be playing on top of TV noise. Very strange. Plus, it is playing much slower than the correct audio would.
Edit: 2 After using AL_FORMAT_STEREO_FLOAT32, the sound plays at the correct speed, but is still very high pitched and stuttering (though less than before).
The video itself is not broken, it can be played fine on any player. OpenAL can also play *.way files just fine in the same application, so it is also working.
Any ideas what could be wrong here or how to do this correctly?
My only guess is that somehow, FFmpeg's decode function does not produce data OpenGL can read. But this is as far as the FFmpeg decode example goes, so I don't know what's missing. As I understand it, the decode_audio4 function decodes the frame to raw data. And OpenAL should be able to work with RAW data (or rather, doesn't work with anything else).
So, I finally figured out how to do it. Gee, what a mess. It was a hint from a user on the libav-users mailing list that put me on the correct path.
Here are my mistakes:
Using the wrong format in the alBufferData function. I used AL_FORMAT_STEREO16 (as that is what every single streaming example with OpenAL uses). I should have used AL_FORMAT_STEREO_FLOAT32, as the video I stream is Ogg and vorbis is stored in floating points. And using swr_convert to convert from AV_SAMPLE_FMT_FLTP to AV_SAMPLE_FMT_S16 just crashes. No idea why.
Not using swr_convert to convert the decoded audio frame to the target format. After I was trying to use swr_convert to convert from FLTP to S16, and it would simply crash without a reason given, I assumed it was broken. But after figuring out my first mistake, I tried again, converting from FLTP to FLT (non-planar) and then it worked! So OpenAL uses interleaved format, not planar. Good to know.
So here is the decodeAudioPacket function that is working for me with Ogg video, vorbis audio stream:
int decodeAudioPacket( AVPacket& p_packet, AVCodecContext* p_audioCodecContext, AVFrame* p_frame,
SwrContext* p_swrContext, uint8_t** p_destBuffer, int p_destLinesize,
FFmpegVideoPlayer* p_player, VideoInfo& p_videoInfo)
{
// Decode audio frame
int got_frame = 0;
int decoded = avcodec_decode_audio4(p_audioCodecContext, p_frame, &got_frame, &p_packet);
if (decoded < 0)
{
p_videoInfo.error = "Error decoding audio frame.";
return decoded;
}
if(decoded <= p_packet.size)
{
/* Move the unread data to the front and clear the end bits */
int remaining = p_packet.size - decoded;
memmove(p_packet.data, &p_packet.data[decoded], remaining);
av_shrink_packet(&p_packet, remaining);
}
// Frame is complete, store it in audio frame queue
if (got_frame)
{
int outputSamples = swr_convert(p_swrContext,
p_destBuffer, p_destLinesize,
(const uint8_t**)p_frame->extended_data, p_frame->nb_samples);
int bufferSize = av_get_bytes_per_sample(AV_SAMPLE_FMT_FLT) * p_videoInfo.audioNumChannels
* outputSamples;
int64_t duration = p_frame->pkt_duration;
int64_t dts = p_frame->pkt_dts;
if (staticOgreLog)
{
staticOgreLog->logMessage("Audio frame bufferSize / duration / dts: "
+ boost::lexical_cast<std::string>(bufferSize) + " / "
+ boost::lexical_cast<std::string>(duration) + " / "
+ boost::lexical_cast<std::string>(dts), Ogre::LML_NORMAL);
}
// Create the audio frame
AudioFrame* frame = new AudioFrame();
frame->dataSize = bufferSize;
frame->data = new uint8_t[bufferSize];
memcpy(frame->data, p_destBuffer[0], bufferSize);
double timeBase = ((double)p_audioCodecContext->time_base.num) / (double)p_audioCodecContext->time_base.den;
frame->lifeTime = duration * timeBase;
p_player->addAudioFrame(frame);
}
return decoded;
}
And here is how I initialize the context and the destination buffer:
// Initialize SWR context
SwrContext* swrContext = swr_alloc_set_opts(NULL,
audioCodecContext->channel_layout, AV_SAMPLE_FMT_FLT, audioCodecContext->sample_rate,
audioCodecContext->channel_layout, audioCodecContext->sample_fmt, audioCodecContext->sample_rate,
0, NULL);
int result = swr_init(swrContext);
// Create destination sample buffer
uint8_t** destBuffer = NULL;
int destBufferLinesize;
av_samples_alloc_array_and_samples( &destBuffer,
&destBufferLinesize,
videoInfo.audioNumChannels,
2048,
AV_SAMPLE_FMT_FLT,
0);

How to write bitmaps as frames to Ogg Theora in C\C++?

How to write bitmaps as frames to Ogg Theora in C\C++?
Some Examples with source would be grate!)
The entire solution is a little lengthy to post on here as a code sample, but if you download libtheora from Xiph.org, there is an example png2theora. All of the library functions I am about to mention can be found in the documentation on Xiph.org for theora and ogg.
Call th_info_init() to initialise a th_info structure, then set up you output parameters by assigning the appropriate members in that.
Use that structure in a call to th_encode_alloc() to get an encoder context
Initialise an ogg stream, with ogg_stream_init()
Initialise a blank th_comment structure using th_comment_init
Iterate through the following:
Call th_encode_flushheader with the the encoder context, the blank comment structure and an ogg_packet.
Send the resulting packet to the ogg stream with ogg_stream_packetin()
Until th_encode_flushheader returns 0 (or an error code)
Now, repeatedly call ogg_stream_pageout(), every time writing the page.header and then page.body to an output file, until it returns 0. Now call ogg_stream_flush and write the resulting page to the file.
You can now write frames to the encoder. Here is how I did it:
int theora_write_frame(int outputFd, unsigned long w, unsigned long h, unsigned char *yuv_y, unsigned char *yuv_u, unsigned char *yuv_v, int last)
{
th_ycbcr_buffer ycbcr;
ogg_packet op;
ogg_page og;
unsigned long yuv_w;
unsigned long yuv_h;
/* Must hold: yuv_w >= w */
yuv_w = (w + 15) & ~15;
/* Must hold: yuv_h >= h */
yuv_h = (h + 15) & ~15;
//Fill out the ycbcr buffer
ycbcr[0].width = yuv_w;
ycbcr[0].height = yuv_h;
ycbcr[0].stride = yuv_w;
ycbcr[1].width = yuv_w;
ycbcr[1].stride = ycbcr[1].width;
ycbcr[1].height = yuv_h;
ycbcr[2].width = ycbcr[1].width;
ycbcr[2].stride = ycbcr[1].stride;
ycbcr[2].height = ycbcr[1].height;
if(encoderInfo->pixel_fmt == TH_PF_420)
{
//Chroma is decimated by 2 in both directions
ycbcr[1].width = yuv_w >> 1;
ycbcr[2].width = yuv_w >> 1;
ycbcr[1].height = yuv_h >> 1;
ycbcr[2].height = yuv_h >> 1;
}else if(encoderInfo->pixel_fmt == TH_PF_422)
{
ycbcr[1].width = yuv_w >> 1;
ycbcr[2].width = yuv_w >> 1;
}else if(encoderInfo->pixel_fmt != TH_PF_422)
{
//Then we have an unknown pixel format
//We don't know how long the arrays are!
fprintf(stderr, "[theora_write_frame] Unknown pixel format in writeFrame!\n");
return -1;
}
ycbcr[0].data = yuv_y;
ycbcr[1].data = yuv_u;
ycbcr[2].data = yuv_v;
/* Theora is a one-frame-in,one-frame-out system; submit a frame
for compression and pull out the packet */
if(th_encode_ycbcr_in(encoderContext, ycbcr)) {
fprintf(stderr, "[theora_write_frame] Error: could not encode frame\n");
return -1;
}
if(!th_encode_packetout(encoderContext, last, &op)) {
fprintf(stderr, "[theora_write_frame] Error: could not read packets\n");
return -1;
}
ogg_stream_packetin(&theoraStreamState, &op);
ssize_t bytesWritten = 0;
int pagesOut = 0;
while(ogg_stream_pageout(&theoraStreamState, &og)) {
pagesOut ++;
bytesWritten = write(outputFd, og.header, og.header_len);
if(bytesWritten != og.header_len)
{
fprintf(stderr, "[theora_write_frame] Error: Could not write to file\n");
return -1;
}
bytesWritten = write(outputFd, og.body, og.body_len);
if(bytesWritten != og.body_len)
{
bytesWritten = fprintf(stderr, "[theora_write_frame] Error: Could not write to file\n");
return -1;
}
}
return pagesOut;
}
Where encoderInfo is the th_info structure used to initialise the encoder (static in the data section for me).
On your last frame, setting the last frame on th_encode_packetout() will make sure the stream terminates properly.
Once your done, just make sure to clean up (closing fds mainly). th_info_clear() will clear the th_info structure, and th_encode_free() will free your encoder context.
Obviously, you'll need to convert your bitmap into YUV planes before you can pass them to theora_write_frame().
Hope this is of some help. Good luck!
Here's the libtheora API and example code.
Here's a micro howto that shows how to use the theora binaries. As the encoder reads raw, uncompressed 'yuv4mpeg' data for video you could use that from your app, too by piping the video frames to the encoder.