Gstreamer gst_buffer_pool_acquire_buffer function is slow on ARM - gstreamer

My Gstreamer version is 1.17, cross compiled using instructions from here.
Here is my gstreamer pipeline,
appsrc name=framesrc0 do-timestamp=true format=time ! video/x-raw,width=640,height=480,framerate=30/1,format=NV12 ! queue ! x264enc ! queue ! h264parse ! mpegtsmux ! filesink name=mysink location=./myfile.ts
I feed NV12 frames to appsrc using the below function (6404801.5 = 460800 bytes)
bool BelGst::FeedData0(uint8_t *buf, uint32_t len)
{
GstFlowReturn ret;
GstBuffer *buffer;
GstMapInfo info;
timespec ts_beg, ts_end;
uint32_t time_ms;
clock_gettime(CLOCK_MONOTONIC, &ts_beg);
ret = gst_buffer_pool_acquire_buffer (pool0, &buffer, NULL);
if (G_UNLIKELY (ret != GST_FLOW_OK)) {
cout << "BufferPool pool0 failed" << endl;
return FALSE;
}
clock_gettime(CLOCK_MONOTONIC, &ts_end);
time_ms = (ts_end.tv_sec - ts_beg.tv_sec)*1000 + (ts_end.tv_nsec - ts_beg.tv_nsec) / 1e6;
cout << "Buffer pool acquire time = " << time_ms << "ms" << endl;
/* Set its timestamp and duration */
GST_BUFFER_TIMESTAMP(buffer) = timestamp0;
GST_BUFFER_DURATION(buffer) = gst_util_uint64_scale(1, GST_SECOND, 30);
GST_BUFFER_OFFSET(buffer) = offset0++;
timestamp0 += GST_BUFFER_DURATION(buffer);
gst_buffer_map(buffer, &info, GST_MAP_WRITE);
memcpy(info.data, buf, len);
gst_buffer_unmap(buffer, &info);
g_signal_emit_by_name(app_source0, "push-buffer", buffer, &ret);
gst_buffer_unref(buffer);
return TRUE;
}
I've setup the buffer pool as shown below,
void BufferPoolSetup(GstBufferPool *&pool)
{
GstStructure *config;
int size, min, max;
GstCaps *caps;
pool = gst_buffer_pool_new();
/* get config structure */
config = gst_buffer_pool_get_config(pool);
size = 640*480*1.5;
min = 1;
max = 4;
caps = gst_caps_from_string("video/x-raw");
/* set caps, size, minimum and maximum buffers in the pool */
gst_buffer_pool_config_set_params (config, caps, size, min, max);
gst_caps_unref(caps);
gst_buffer_pool_set_config (pool, config);
/* and activate */
gst_buffer_pool_set_active (pool, TRUE);
return;
}
When I run the pipeline, I see that the function gst_buffer_pool_acquire_buffer is taking somewhere between 20ms to 60ms. Could someone point if there is something wrong in my approach? Am I missing something?

Related

GStreamer sendonly to multiple WebRTC clients

I've been trying to setup a simple sendonly WebRTC client with GStreamer but I'm having issues with getting the actual video to display on the WebRTC receiver side. I am new to both GStreamer and WebRTC.
I'm using the examples from https://gitlab.freedesktop.org/gstreamer/gst-examples/-/tree/master/webrtc to try and come up with a combination of certain parts. I've had 1:1 communication working but I wanted to introduce the rooms so I can have more clients viewing the "view-only" stream from GStreamer.
My current code is based on the multiparty-sendrecv example where I swapped out the audio for video. Furthermore, I'm using a modified version of the signalling server and a modified version of the javascript webrtc client. If necessary I could provide code for all of the above, but to keep things simple I won't. This is because I don't think the problem lies in either the signalling server or webrtc client, because the ICE candidates have been successfully negotiated along with the SDP offer & answer according to chrome://webrtc-internals/. See the image below.
In order to figure out what's going on I've exported a graph that shows the GStreamer pipeline after a user has joined the room and was added to the pipeline. See graph below.
As far as I can tell I should be receiving video data on my frontend, but I'm not. I've had a single weird case where the videotestsrc did show up, but I haven't been able to reproduce it. But because of this, it makes me think that the pipeline itself isn't neccesarily wrong, but perhaps we're dealing with some kind of race condition.
I've added the modified example of multiparty-sendrecv below, please take a look at it. Most of the methods have purposely been left out due to Stackoverflow's character limit.
Main functions
static void
handle_media_stream(GstPad* pad, GstElement* pipe, const char* convert_name,
const char* sink_name)
{
GstPad* qpad;
GstElement* q, * conv, * sink;
GstPadLinkReturn ret;
q = gst_element_factory_make("queue", NULL);
g_assert_nonnull(q);
conv = gst_element_factory_make(convert_name, NULL);
g_assert_nonnull(conv);
sink = gst_element_factory_make(sink_name, NULL);
g_assert_nonnull(sink);
gst_bin_add_many(GST_BIN(pipe), q, conv, sink, NULL);
gst_element_sync_state_with_parent(q);
gst_element_sync_state_with_parent(conv);
gst_element_sync_state_with_parent(sink);
gst_element_link_many(q, conv, sink, NULL);
qpad = gst_element_get_static_pad(q, "sink");
ret = gst_pad_link(pad, qpad);
g_assert_cmpint(ret, == , GST_PAD_LINK_OK);
}
static void
on_incoming_decodebin_stream(GstElement* decodebin, GstPad* pad,
GstElement* pipe)
{
GstCaps* caps;
const gchar* name;
if (!gst_pad_has_current_caps(pad)) {
g_printerr("Pad '%s' has no caps, can't do anything, ignoring\n",
GST_PAD_NAME(pad));
return;
}
caps = gst_pad_get_current_caps(pad);
name = gst_structure_get_name(gst_caps_get_structure(caps, 0));
if (g_str_has_prefix(name, "video")) {
handle_media_stream(pad, pipe, "videoconvert", "autovideosink");
}
else if (g_str_has_prefix(name, "audio")) {
handle_media_stream(pad, pipe, "audioconvert", "autoaudiosink");
}
else {
g_printerr("Unknown pad %s, ignoring", GST_PAD_NAME(pad));
}
}
static void
on_incoming_stream(GstElement* webrtc, GstPad* pad, GstElement* pipe)
{
GstElement* decodebin;
GstPad* sinkpad;
if (GST_PAD_DIRECTION(pad) != GST_PAD_SRC)
return;
decodebin = gst_element_factory_make("decodebin", NULL);
g_signal_connect(decodebin, "pad-added",
G_CALLBACK(on_incoming_decodebin_stream), pipe);
gst_bin_add(GST_BIN(pipe), decodebin);
gst_element_sync_state_with_parent(decodebin);
sinkpad = gst_element_get_static_pad(decodebin, "sink");
gst_pad_link(pad, sinkpad);
gst_object_unref(sinkpad);
}
static void
add_peer_to_pipeline(const gchar* peer_id, gboolean offer)
{
int ret;
gchar* tmp;
GstElement* tee, * webrtc, * q;
GstPad* srcpad, * sinkpad;
tmp = g_strdup_printf("queue-%s", peer_id);
q = gst_element_factory_make("queue", tmp);
g_free(tmp);
webrtc = gst_element_factory_make("webrtcbin", peer_id);
g_object_set(webrtc, "bundle-policy", GST_WEBRTC_BUNDLE_POLICY_MAX_BUNDLE, NULL);
gst_bin_add_many(GST_BIN(pipeline), q, webrtc, NULL);
srcpad = gst_element_get_static_pad(q, "src");
g_assert_nonnull(srcpad);
sinkpad = gst_element_get_request_pad(webrtc, "sink_%u");
g_assert_nonnull(sinkpad);
ret = gst_pad_link(srcpad, sinkpad);
g_assert_cmpint(ret, == , GST_PAD_LINK_OK);
gst_object_unref(srcpad);
gst_object_unref(sinkpad);
tee = gst_bin_get_by_name(GST_BIN(pipeline), "videotee");
g_assert_nonnull(tee);
srcpad = gst_element_get_request_pad(tee, "src_%u");
g_assert_nonnull(srcpad);
gst_object_unref(tee);
sinkpad = gst_element_get_static_pad(q, "sink");
g_assert_nonnull(sinkpad);
ret = gst_pad_link(srcpad, sinkpad);
g_assert_cmpint(ret, == , GST_PAD_LINK_OK);
gst_object_unref(srcpad);
gst_object_unref(sinkpad);
/* This is the gstwebrtc entry point where we create the offer and so on. It
* will be called when the pipeline goes to PLAYING.
* XXX: We must connect this after webrtcbin has been linked to a source via
* get_request_pad() and before we go from NULL->READY otherwise webrtcbin
* will create an SDP offer with no media lines in it. */
if (offer)
g_signal_connect(webrtc, "on-negotiation-needed",
G_CALLBACK(on_negotiation_needed), (gpointer)peer_id);
/* We need to transmit this ICE candidate to the browser via the websockets
* signalling server. Incoming ice candidates from the browser need to be
* added by us too, see on_server_message() */
g_signal_connect(webrtc, "on-ice-candidate",
G_CALLBACK(send_ice_candidate_message), (gpointer)peer_id);
/* Incoming streams will be exposed via this signal */
g_signal_connect(webrtc, "pad-added", G_CALLBACK(on_incoming_stream),
pipeline);
/* Set to pipeline branch to PLAYING */
ret = gst_element_sync_state_with_parent(q);
g_assert_true(ret);
ret = gst_element_sync_state_with_parent(webrtc);
g_assert_true(ret);
GST_DEBUG_BIN_TO_DOT_FILE(GST_BIN(pipeline), GST_DEBUG_GRAPH_SHOW_ALL, "pipeline");
}
static gboolean
start_pipeline(void)
{
GstStateChangeReturn ret;
GError* error = NULL;
/* NOTE: webrtcbin currently does not support dynamic addition/removal of
* streams, so we use a separate webrtcbin for each peer, but all of them are
* inside the same pipeline. We start by connecting it to a fakesink so that
* we can preroll early. */
/*pipeline = gst_parse_launch("tee name=videotee ! queue ! fakesink "
"videotestsrc is-live=true pattern=ball ! videoconvert ! queue ! vp8enc deadline=1 ! rtpvp8pay ! "
"queue ! " RTP_CAPS_VP8 "96 ! videotee. ", &error);*/
pipeline = gst_parse_launch("tee name=videotee ! queue ! fakesink "
"videotestsrc is-live=true pattern=ball ! videoconvert ! queue ! vp8enc deadline=1 ! rtpvp8pay ! "
"queue ! " RTP_CAPS_VP8 "96 ! videotee. ", &error);
if (error) {
g_printerr("Failed to parse launch: %s\n", error->message);
g_error_free(error);
goto err;
}
g_print("Starting pipeline, not transmitting yet\n");
ret = gst_element_set_state(GST_ELEMENT(pipeline), GST_STATE_PLAYING);
if (ret == GST_STATE_CHANGE_FAILURE)
goto err;
return TRUE;
err:
g_print("State change failure\n");
if (pipeline)
g_clear_object(&pipeline);
return FALSE;
}
/*
* When we join a room, we are responsible for calling by starting negotiation
* with each peer in it by sending an SDP offer and ICE candidates.
*/
static void
do_join_room(const gchar* text)
{
gint ii, len;
gchar** peer_ids;
if (app_state != ROOM_JOINING) {
cleanup_and_quit_loop("ERROR: Received ROOM_OK when not calling",
ROOM_JOIN_ERROR);
return;
}
app_state = ROOM_JOINED;
g_print("Room joined\n");
/* Start recording, but not transmitting */
if (!start_pipeline()) {
cleanup_and_quit_loop("ERROR: Failed to start pipeline", ROOM_CALL_ERROR);
return;
}
peer_ids = g_strsplit(text, " ", -1);
g_assert_cmpstr(peer_ids[0], == , "ROOM_OK");
len = g_strv_length(peer_ids);
/* There are peers in the room already. We need to start negotiation
* (exchange SDP and ICE candidates) and transmission of media. */
if (len > 1 && strlen(peer_ids[1]) > 0) {
g_print("Found %i peers already in room\n", len - 1);
app_state = ROOM_CALL_OFFERING;
for (ii = 1; ii < len; ii++) {
gchar* peer_id = g_strdup(peer_ids[ii]);
g_print("Negotiating with peer %s\n", peer_id);
/* This might fail asynchronously */
call_peer(peer_id);
peers = g_list_prepend(peers, peer_id);
}
}
g_strfreev(peer_ids);
return;
}
int
main(int argc, char* argv[])
{
GOptionContext* context;
GError* error = NULL;
context = g_option_context_new("- gstreamer webrtc sendrecv demo");
g_option_context_add_main_entries(context, entries, NULL);
g_option_context_add_group(context, gst_init_get_option_group());
if (!g_option_context_parse(context, &argc, &argv, &error)) {
g_printerr("Error initializing: %s\n", error->message);
return -1;
}
if (!check_plugins())
return -1;
if (!room_id) {
g_printerr("--room-id is a required argument\n");
return -1;
}
if (!local_id)
local_id = g_strdup_printf("%s-%i", g_get_user_name(),
g_random_int_range(10, 10000));
/* Sanitize by removing whitespace, modifies string in-place */
g_strdelimit(local_id, " \t\n\r", '-');
g_print("Our local id is %s\n", local_id);
if (!server_url)
server_url = g_strdup(default_server_url);
/* Don't use strict ssl when running a localhost server, because
* it's probably a test server with a self-signed certificate */
{
GstUri* uri = gst_uri_from_string(server_url);
if (g_strcmp0("localhost", gst_uri_get_host(uri)) == 0 ||
g_strcmp0("127.0.0.1", gst_uri_get_host(uri)) == 0)
strict_ssl = FALSE;
gst_uri_unref(uri);
}
loop = g_main_loop_new(NULL, FALSE);
connect_to_websocket_server_async();
g_main_loop_run(loop);
gst_element_set_state(GST_ELEMENT(pipeline), GST_STATE_NULL);
g_print("Pipeline stopped\n");
gst_object_unref(pipeline);
g_free(server_url);
g_free(local_id);
g_free(room_id);
return 0;
}

FFMPEG buffer underflow

I'm recording a video with FFMPEG and getting some wierd message in the process
[mpeg # 01011c80] packet too large, ignoring buffer limits to mux it
[mpeg # 01011c80] buffer underflow st=0 bufi=236198 size=412405
[mpeg # 01011c80] buffer underflow st=0 bufi=238239 size=412405
and I have no idea how to deal with it. Here's my code for adding frames
void ofxFFMPEGVideoWriter::addFrame(const uint8_t* pixels)
{
memcpy(picture_rgb24->data[0], pixels, size);
sws_scale(swsContext, picture_rgb24->data, picture_rgb24->linesize, 0, codecContext->height, picture->data, picture->linesize);
AVPacket packet = { 0 };
int got_packet;
av_init_packet(&packet);
int ret = avcodec_encode_video2(codecContext, &packet, picture, &got_packet);
if (ret < 0) qDebug() << "Error encoding video frame: " << ret;
if (!ret && got_packet && packet.size)
{
packet.stream_index = videoStream->index;
ret = av_interleaved_write_frame(formatContext, &packet);
}
picture->pts += av_rescale_q(1, videoStream->codec->time_base, videoStream->time_base);
}
The file itself seems to be fine, and readable, but that message is really bugging me. Does anybody know how to fix it?

FFMPEG I/O output buffer

I'm currently having issues trying to encapsulate raw H264 nal packets into a mp4 container. Instead of writing them to disk however, I want to have the result stored in memory. I followed this approach Raw H264 frames in mpegts container using libavcodec but haven't been successful so far.
First, is this the right way to write to memory? I have a small struct in my header
struct IOOutput {
uint8_t* outBuffer;
int bytesSet;
};
where I initialize the buffer and bytesset. I then initialize my AVIOContext variable
AVIOContext* pIOCtx = avio_alloc_context(pBuffer, iBufSize, 1, outptr, NULL, write_packet, NULL);
where outptr is a void pointer to IOOutput output, and write_packet looks like the following
int write_packet (void *opaque, uint8_t *buf, int buf_size) {
IOOutput* out = reinterpret_cast<IOOutput*>(opaque);
memcpy(out->outBuffer+out->bytesSet, buf, buf_size);
out->bytesSet+=buf_size;
return buf_size;
}
I then set
fc->pb = pIOCtx;
fc->flags = AVFMT_FLAG_CUSTOM_IO;
on my AVFormatContext *fc variable.
Then, whenever I encode the nal packets I have from a frame, I write them to the AVFormatContext via av_interleaved_write_frame and then get the mp4 contents via
void getBufferContent(char* buffer) {
memcpy(buffer, output.outBuffer, output.bytesSet);
output.bytesSet=0;
}
and thus reset the variable bytesSet, so during the next writing operation bytes will be inserted at the start of the buffer. Is there a better way to do this? Is this actually a valid way to do it? Does FFMPEG do any reading operation if I only do call av_interleaved_write_frame and avformat_write_header in order to add packets?
Thank you very much in advance!
EDIT
Here is the code regarding the muxing process - in my encode Function I have something like
int frame_size = x264_encoder_encode(obj->mEncoder, &obj->nals, &obj->i_nals, obj->pic_in, obj->pic_out);
int total_size=0;
for(int i=0; i<obj->i_nals;i++)
{
if ( !obj->fc ) {
obj->create( obj->nals[i].p_payload, obj->nals[i].i_payload );
}
if ( obj->fc ) {
obj->write_frame( obj->nals[i].p_payload, obj->nals[i].i_payload);
}
}
// Here I get the output values
int currentBufferSize = obj->output.bytesSet;
char* mem = new char[currentBufferSize];
obj->getBufferContent(mem);
And the create and write functions look like this
int create(void *p, int len) {
AVOutputFormat *of = av_guess_format( "mp4", 0, 0 );
fc = avformat_alloc_context();
// Add video stream
AVStream *pst = av_new_stream( fc, 0 );
vi = pst->index;
void* outptr = (void*) &output;
// Create Buffer
pIOCtx = avio_alloc_context(pBuffer, iBufSize, 1, outptr, NULL, write_packet, NULL);
fc->oformat = of;
fc->pb = pIOCtx;
fc->flags = AVFMT_FLAG_CUSTOM_IO;
pcc = pst->codec;
AVCodec c= {0};
c.type= AVMEDIA_TYPE_VIDEO;
avcodec_get_context_defaults3( pcc, &c );
pcc->codec_type = AVMEDIA_TYPE_VIDEO;
pcc->codec_id = codec_id;
pcc->bit_rate = br;
pcc->width = w;
pcc->height = h;
pcc->time_base.num = 1;
pcc->time_base.den = fps;
}
void write_frame( const void* p, int len ) {
AVStream *pst = fc->streams[ vi ];
// Init packet
AVPacket pkt;
av_init_packet( &pkt );
pkt.flags |= ( 0 >= getVopType( p, len ) ) ? AV_PKT_FLAG_KEY : 0;
pkt.stream_index = pst->index;
pkt.data = (uint8_t*)p;
pkt.size = len;
pkt.dts = AV_NOPTS_VALUE;
pkt.pts = AV_NOPTS_VALUE;
av_interleaved_write_frame( fc, &pkt );
}
See the AVFormatContext.pb documentation. You set it correctly, but you shouldn't touch AVFormatContext.flags. Also, make sure you set it before calling avformat_write_header().
When you say "it doesn't work", what exactly doesn't work? Is the callback not invoked? Is the data in it not of the expected type/format? Something else? If all you want to do is write raw nal packets, then you could just take encoded data directly from the encoder (in the AVPacket), that's the raw nal data. If you use libx264's api directly, it even gives you each nal individually so you don't need to parse it.

Gstreamer. Write appsink to filesink

I have written a code for appsrc to appsink and it works. I see the actual buffer. It's encoded in H264(vpuenc=avc). Now I want to save it in a file(filesink). How I approach it?
app:
int main(int argc, char *argv[]) {
gst_init (NULL, NULL);
GstElement *pipeline, *sink;
gchar *descr;
GError *error = NULL;
GstAppSink *appsink;
descr = g_strdup_printf (
"mfw_v4lsrc device=/dev/video1 capture_mode=0 ! " // grab from mipi camera
"ffmpegcolorspace ! vpuenc codec=avc ! "
"appsink name=sink"
);
pipeline = gst_parse_launch (descr, &error);
if (error != NULL) {
g_print ("could not construct pipeline: %s\n", error->message);
g_error_free (error);
exit (-1);
}
gst_element_set_state(pipeline, GST_STATE_PAUSED);
sink = gst_bin_get_by_name (GST_BIN (pipeline), "sink");
appsink = (GstAppSink *) sink;
gst_app_sink_set_max_buffers ( appsink, 2); // limit number of buffers queued
gst_app_sink_set_drop( appsink, true ); // drop old buffers in queue when full
gst_element_set_state (pipeline, GST_STATE_PLAYING);
int i = 0;
while( !gst_app_sink_is_eos(appsink) )
{
GstBuffer *buffer = gst_app_sink_pull_buffer(appsink);
uint8_t* data = (uint8_t*)GST_BUFFER_DATA(buffer);
uint32_t size = GST_BUFFER_SIZE(buffer);
gst_buffer_unref(buffer);
}
return 0; }
If as mentioned in the comments, what you actually want to know is how to do a network video stream in GStreamer, you should probably close this question because you're on the wrong path. You don't need to use an appsink or filesink for that. What you'll want to investigate are the GStreamer elements related to RTP, RTSP, RTMP, MPEGTS, or even MJPEGs (if your image size is small enough).
Here are two basic send/receive video stream pipelines:
gst-launch-0.10 v4l2src ! ffmpegcolorspace ! videoscale ! video/x-raw-yuv,width=640,height=480 ! vpuenc ! h264parse ! rtph264pay ! udpsink host=localhost port=5555
gst-launch-0.10 udpsrc port=5555 ! application/x-rtp,encoding-name=H264,payload=96 ! rtph264depay ! h264parse ! ffdec_h264 ! videoconvert ! ximagesink
In this situation you don't write your own while loop. You register callbacks and wait for buffers (GStreamer 0.10) to arrive. If you're using GStreamer 1.0, you use samples instead of buffers. Samples are a huge pain in the ass compared to buffers but oh well.
Register the callback:
GstAppSinkCallbacks* appsink_callbacks = (GstAppSinkCallbacks*)malloc(sizeof(GstAppSinkCallbacks));
appsink_callbacks->eos = NULL;
appsink_callbacks->new_preroll = NULL;
appsink_callbacks->new_sample = app_sink_new_sample;
gst_app_sink_set_callbacks(GST_APP_SINK(appsink), appsink_callbacks, (gpointer)pointer_to_data_passed_to_the_callback, free);
And your callback:
GstFlowReturn app_sink_new_sample(GstAppSink *sink, gpointer user_data) {
prog_data* pd = (prog_data*)user_data;
GstSample* sample = gst_app_sink_pull_sample(sink);
if(sample == NULL) {
return GST_FLOW_ERROR;
}
GstBuffer* buffer = gst_sample_get_buffer(src);
GstMemory* memory = gst_buffer_get_all_memory(buffer);
GstMapInfo map_info;
if(! gst_memory_map(memory, &map_info, GST_MAP_READ)) {
gst_memory_unref(memory);
gst_sample_unref(sample);
return GST_FLOW_ERROR;
}
//render using map_info.data
gst_memory_unmap(memory, &map_info);
gst_memory_unref(memory);
gst_sample_unref(sample);
return GST_FLOW_OK;
}
You can keep your while loop as it is--using gst_app_sink_is_eos()--but make sure to put a sleep in it. Most of the time I use something like the following instead:
GMainLoop* loop = g_main_loop_new(NULL, FALSE);
g_main_loop_run(loop);
g_main_loop_unref(loop);
Note: Unless you need to do something special with the data you can use the "filesink" element directly.
Simpler option would be write to the file directly in the appsink itself ie when you get a callback when the buffer is done write to the file and make sure you close it on eos.
Hope that helps.

FFmpeg + OpenAL - playback streaming sound from video won't work

I am decoding an OGG video (theora & vorbis as codecs) and want to show it on the screen (using Ogre 3D) while playing its sound. I can decode the image stream just fine and the video plays perfectly with the correct frame rate, etc.
However, I cannot get the sound to play at all with OpenAL.
Edit: I managed to make the playing sound resemble the actual audio in the video at least somewhat. Updated sample code.
Edit 2: I was able to get "almost" correct sound now. I had to set OpenAL to use AL_FORMAT_STEREO_FLOAT32 (after initializing the extension) instead of just STEREO16. Now the sound is "only" extremely high pitched and stuttering, but at the correct speed.
Here is how I decode audio packets (in a background thread, the equivalent works just fine for the image stream of the video file):
//------------------------------------------------------------------------------
int decodeAudioPacket( AVPacket& p_packet, AVCodecContext* p_audioCodecContext, AVFrame* p_frame,
FFmpegVideoPlayer* p_player, VideoInfo& p_videoInfo)
{
// Decode audio frame
int got_frame = 0;
int decoded = avcodec_decode_audio4(p_audioCodecContext, p_frame, &got_frame, &p_packet);
if (decoded < 0)
{
p_videoInfo.error = "Error decoding audio frame.";
return decoded;
}
// Frame is complete, store it in audio frame queue
if (got_frame)
{
int bufferSize = av_samples_get_buffer_size(NULL, p_audioCodecContext->channels, p_frame->nb_samples,
p_audioCodecContext->sample_fmt, 0);
int64_t duration = p_frame->pkt_duration;
int64_t dts = p_frame->pkt_dts;
if (staticOgreLog)
{
staticOgreLog->logMessage("Audio frame bufferSize / duration / dts: "
+ boost::lexical_cast<std::string>(bufferSize) + " / "
+ boost::lexical_cast<std::string>(duration) + " / "
+ boost::lexical_cast<std::string>(dts), Ogre::LML_NORMAL);
}
// Create the audio frame
AudioFrame* frame = new AudioFrame();
frame->dataSize = bufferSize;
frame->data = new uint8_t[bufferSize];
if (p_frame->channels == 2)
{
memcpy(frame->data, p_frame->data[0], bufferSize >> 1);
memcpy(frame->data + (bufferSize >> 1), p_frame->data[1], bufferSize >> 1);
}
else
{
memcpy(frame->data, p_frame->data, bufferSize);
}
double timeBase = ((double)p_audioCodecContext->time_base.num) / (double)p_audioCodecContext->time_base.den;
frame->lifeTime = duration * timeBase;
p_player->addAudioFrame(frame);
}
return decoded;
}
So, as you can see, I decode the frame, memcpy it to my own struct, AudioFrame. Now, when the sound is played, I use these audio frame like this:
int numBuffers = 4;
ALuint buffers[4];
alGenBuffers(numBuffers, buffers);
ALenum success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on alGenBuffers : " + Ogre::StringConverter::toString(success) + alGetString(success));
return;
}
// Fill a number of data buffers with audio from the stream
std::vector<AudioFrame*> audioBuffers;
std::vector<unsigned int> audioBufferSizes;
unsigned int numReturned = FFMPEG_PLAYER->getDecodedAudioFrames(numBuffers, audioBuffers, audioBufferSizes);
// Assign the data buffers to the OpenAL buffers
for (unsigned int i = 0; i < numReturned; ++i)
{
alBufferData(buffers[i], _streamingFormat, audioBuffers[i]->data, audioBufferSizes[i], _streamingFrequency);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on alBufferData : " + Ogre::StringConverter::toString(success) + alGetString(success)
+ " size: " + Ogre::StringConverter::toString(audioBufferSizes[i]));
return;
}
}
// Queue the buffers into OpenAL
alSourceQueueBuffers(_source, numReturned, buffers);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error queuing streaming buffers: " + Ogre::StringConverter::toString(success) + alGetString(success));
return;
}
}
alSourcePlay(_source);
The format and frequency I give to OpenAL are AL_FORMAT_STEREO_FLOAT32 (it is a stereo sound stream, and I did initialize the FLOAT32 extension) and 48000 (which is the sample rate of the AVCodecContext of the audio stream).
And during playback, I do the following to refill OpenAL's buffers:
ALint numBuffersProcessed;
// Check if OpenAL is done with any of the queued buffers
alGetSourcei(_source, AL_BUFFERS_PROCESSED, &numBuffersProcessed);
if(numBuffersProcessed <= 0)
return;
// Fill a number of data buffers with audio from the stream
std::vector<AudiFrame*> audioBuffers;
std::vector<unsigned int> audioBufferSizes;
unsigned int numFilled = FFMPEG_PLAYER->getDecodedAudioFrames(numBuffersProcessed, audioBuffers, audioBufferSizes);
// Assign the data buffers to the OpenAL buffers
ALuint buffer;
for (unsigned int i = 0; i < numFilled; ++i)
{
// Pop the oldest queued buffer from the source,
// fill it with the new data, then re-queue it
alSourceUnqueueBuffers(_source, 1, &buffer);
ALenum success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error Unqueuing streaming buffers: " + Ogre::StringConverter::toString(success));
return;
}
alBufferData(buffer, _streamingFormat, audioBuffers[i]->data, audioBufferSizes[i], _streamingFrequency);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on re- alBufferData: " + Ogre::StringConverter::toString(success));
return;
}
alSourceQueueBuffers(_source, 1, &buffer);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error re-queuing streaming buffers: " + Ogre::StringConverter::toString(success) + " "
+ alGetString(success));
return;
}
}
// Make sure the source is still playing,
// and restart it if needed.
ALint playStatus;
alGetSourcei(_source, AL_SOURCE_STATE, &playStatus);
if(playStatus != AL_PLAYING)
alSourcePlay(_source);
As you can see, I do quite heavy error checking. But I do not get any errors, neither from OpenAL nor from FFmpeg.
Edit: What I hear somewhat resembles the actual audio from the video, but VERY high pitched and stuttering VERY much. Also, it seems to be playing on top of TV noise. Very strange. Plus, it is playing much slower than the correct audio would.
Edit: 2 After using AL_FORMAT_STEREO_FLOAT32, the sound plays at the correct speed, but is still very high pitched and stuttering (though less than before).
The video itself is not broken, it can be played fine on any player. OpenAL can also play *.way files just fine in the same application, so it is also working.
Any ideas what could be wrong here or how to do this correctly?
My only guess is that somehow, FFmpeg's decode function does not produce data OpenGL can read. But this is as far as the FFmpeg decode example goes, so I don't know what's missing. As I understand it, the decode_audio4 function decodes the frame to raw data. And OpenAL should be able to work with RAW data (or rather, doesn't work with anything else).
So, I finally figured out how to do it. Gee, what a mess. It was a hint from a user on the libav-users mailing list that put me on the correct path.
Here are my mistakes:
Using the wrong format in the alBufferData function. I used AL_FORMAT_STEREO16 (as that is what every single streaming example with OpenAL uses). I should have used AL_FORMAT_STEREO_FLOAT32, as the video I stream is Ogg and vorbis is stored in floating points. And using swr_convert to convert from AV_SAMPLE_FMT_FLTP to AV_SAMPLE_FMT_S16 just crashes. No idea why.
Not using swr_convert to convert the decoded audio frame to the target format. After I was trying to use swr_convert to convert from FLTP to S16, and it would simply crash without a reason given, I assumed it was broken. But after figuring out my first mistake, I tried again, converting from FLTP to FLT (non-planar) and then it worked! So OpenAL uses interleaved format, not planar. Good to know.
So here is the decodeAudioPacket function that is working for me with Ogg video, vorbis audio stream:
int decodeAudioPacket( AVPacket& p_packet, AVCodecContext* p_audioCodecContext, AVFrame* p_frame,
SwrContext* p_swrContext, uint8_t** p_destBuffer, int p_destLinesize,
FFmpegVideoPlayer* p_player, VideoInfo& p_videoInfo)
{
// Decode audio frame
int got_frame = 0;
int decoded = avcodec_decode_audio4(p_audioCodecContext, p_frame, &got_frame, &p_packet);
if (decoded < 0)
{
p_videoInfo.error = "Error decoding audio frame.";
return decoded;
}
if(decoded <= p_packet.size)
{
/* Move the unread data to the front and clear the end bits */
int remaining = p_packet.size - decoded;
memmove(p_packet.data, &p_packet.data[decoded], remaining);
av_shrink_packet(&p_packet, remaining);
}
// Frame is complete, store it in audio frame queue
if (got_frame)
{
int outputSamples = swr_convert(p_swrContext,
p_destBuffer, p_destLinesize,
(const uint8_t**)p_frame->extended_data, p_frame->nb_samples);
int bufferSize = av_get_bytes_per_sample(AV_SAMPLE_FMT_FLT) * p_videoInfo.audioNumChannels
* outputSamples;
int64_t duration = p_frame->pkt_duration;
int64_t dts = p_frame->pkt_dts;
if (staticOgreLog)
{
staticOgreLog->logMessage("Audio frame bufferSize / duration / dts: "
+ boost::lexical_cast<std::string>(bufferSize) + " / "
+ boost::lexical_cast<std::string>(duration) + " / "
+ boost::lexical_cast<std::string>(dts), Ogre::LML_NORMAL);
}
// Create the audio frame
AudioFrame* frame = new AudioFrame();
frame->dataSize = bufferSize;
frame->data = new uint8_t[bufferSize];
memcpy(frame->data, p_destBuffer[0], bufferSize);
double timeBase = ((double)p_audioCodecContext->time_base.num) / (double)p_audioCodecContext->time_base.den;
frame->lifeTime = duration * timeBase;
p_player->addAudioFrame(frame);
}
return decoded;
}
And here is how I initialize the context and the destination buffer:
// Initialize SWR context
SwrContext* swrContext = swr_alloc_set_opts(NULL,
audioCodecContext->channel_layout, AV_SAMPLE_FMT_FLT, audioCodecContext->sample_rate,
audioCodecContext->channel_layout, audioCodecContext->sample_fmt, audioCodecContext->sample_rate,
0, NULL);
int result = swr_init(swrContext);
// Create destination sample buffer
uint8_t** destBuffer = NULL;
int destBufferLinesize;
av_samples_alloc_array_and_samples( &destBuffer,
&destBufferLinesize,
videoInfo.audioNumChannels,
2048,
AV_SAMPLE_FMT_FLT,
0);