Oboe Async Audio Extraction - c++

I am trying to build a NDK based c++ low latancy audio player which will encounter three operations for multiple audios.
Play from assets.
Stream from an online source.
Play from local device storage.
From one of the Oboe samples provided by Google, I added another function to the class NDKExtractor.cpp to extract a URL based audio and render it to audio device while reading from source at the same time.
int32_t NDKExtractor::decode(char *file, uint8_t *targetData, AudioProperties targetProperties) {
LOGD("Using NDK decoder: %s",file);
// Extract the audio frames
AMediaExtractor *extractor = AMediaExtractor_new();
//using this method instead of AMediaExtractor_setDataSourceFd() as used for asset files in the rythem game example
media_status_t amresult = AMediaExtractor_setDataSource(extractor, file);
if (amresult != AMEDIA_OK) {
LOGE("Error setting extractor data source, err %d", amresult);
return 0;
}
// Specify our desired output format by creating it from our source
AMediaFormat *format = AMediaExtractor_getTrackFormat(extractor, 0);
int32_t sampleRate;
if (AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_SAMPLE_RATE, &sampleRate)) {
LOGD("Source sample rate %d", sampleRate);
if (sampleRate != targetProperties.sampleRate) {
LOGE("Input (%d) and output (%d) sample rates do not match. "
"NDK decoder does not support resampling.",
sampleRate,
targetProperties.sampleRate);
return 0;
}
} else {
LOGE("Failed to get sample rate");
return 0;
};
int32_t channelCount;
if (AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &channelCount)) {
LOGD("Got channel count %d", channelCount);
if (channelCount != targetProperties.channelCount) {
LOGE("NDK decoder does not support different "
"input (%d) and output (%d) channel counts",
channelCount,
targetProperties.channelCount);
}
} else {
LOGE("Failed to get channel count");
return 0;
}
const char *formatStr = AMediaFormat_toString(format);
LOGD("Output format %s", formatStr);
const char *mimeType;
if (AMediaFormat_getString(format, AMEDIAFORMAT_KEY_MIME, &mimeType)) {
LOGD("Got mime type %s", mimeType);
} else {
LOGE("Failed to get mime type");
return 0;
}
// Obtain the correct decoder
AMediaCodec *codec = nullptr;
AMediaExtractor_selectTrack(extractor, 0);
codec = AMediaCodec_createDecoderByType(mimeType);
AMediaCodec_configure(codec, format, nullptr, nullptr, 0);
AMediaCodec_start(codec);
// DECODE
bool isExtracting = true;
bool isDecoding = true;
int32_t bytesWritten = 0;
while (isExtracting || isDecoding) {
if (isExtracting) {
// Obtain the index of the next available input buffer
ssize_t inputIndex = AMediaCodec_dequeueInputBuffer(codec, 2000);
//LOGV("Got input buffer %d", inputIndex);
// The input index acts as a status if its negative
if (inputIndex < 0) {
if (inputIndex == AMEDIACODEC_INFO_TRY_AGAIN_LATER) {
// LOGV("Codec.dequeueInputBuffer try again later");
} else {
LOGE("Codec.dequeueInputBuffer unknown error status");
}
} else {
// Obtain the actual buffer and read the encoded data into it
size_t inputSize;
uint8_t *inputBuffer = AMediaCodec_getInputBuffer(codec, inputIndex,
&inputSize);
//LOGV("Sample size is: %d", inputSize);
ssize_t sampleSize = AMediaExtractor_readSampleData(extractor, inputBuffer,
inputSize);
auto presentationTimeUs = AMediaExtractor_getSampleTime(extractor);
if (sampleSize > 0) {
// Enqueue the encoded data
AMediaCodec_queueInputBuffer(codec, inputIndex, 0, sampleSize,
presentationTimeUs,
0);
AMediaExtractor_advance(extractor);
} else {
LOGD("End of extractor data stream");
isExtracting = false;
// We need to tell the codec that we've reached the end of the stream
AMediaCodec_queueInputBuffer(codec, inputIndex, 0, 0,
presentationTimeUs,
AMEDIACODEC_BUFFER_FLAG_END_OF_STREAM);
}
}
}
if (isDecoding) {
// Dequeue the decoded data
AMediaCodecBufferInfo info;
ssize_t outputIndex = AMediaCodec_dequeueOutputBuffer(codec, &info, 0);
if (outputIndex >= 0) {
// Check whether this is set earlier
if (info.flags & AMEDIACODEC_BUFFER_FLAG_END_OF_STREAM) {
LOGD("Reached end of decoding stream");
isDecoding = false;
} else {
// Valid index, acquire buffer
size_t outputSize;
uint8_t *outputBuffer = AMediaCodec_getOutputBuffer(codec, outputIndex,
&outputSize);
/*LOGV("Got output buffer index %d, buffer size: %d, info size: %d writing to pcm index %d",
outputIndex,
outputSize,
info.size,
m_writeIndex);*/
// copy the data out of the buffer
memcpy(targetData + bytesWritten, outputBuffer, info.size);
bytesWritten += info.size;
AMediaCodec_releaseOutputBuffer(codec, outputIndex, false);
}
} else {
// The outputIndex doubles as a status return if its value is < 0
switch (outputIndex) {
case AMEDIACODEC_INFO_TRY_AGAIN_LATER:
LOGD("dequeueOutputBuffer: try again later");
break;
case AMEDIACODEC_INFO_OUTPUT_BUFFERS_CHANGED:
LOGD("dequeueOutputBuffer: output buffers changed");
break;
case AMEDIACODEC_INFO_OUTPUT_FORMAT_CHANGED:
LOGD("dequeueOutputBuffer: output outputFormat changed");
format = AMediaCodec_getOutputFormat(codec);
LOGD("outputFormat changed to: %s", AMediaFormat_toString(format));
break;
}
}
}
}
// Clean up
AMediaFormat_delete(format);
AMediaCodec_delete(codec);
AMediaExtractor_delete(extractor);
return bytesWritten;
}
Now the problem i am facing is that this code it first extracts all the audio data saves it into a buffer which then becomes part of AFileDataSource which i derived from DataSource class in the same sample.
And after its done extracting the whole file it plays by calling the onAudioReady() for Oboe AudioStreamBuilder.
What I need is to play as it streams the chunk of audio buffer.
Optional Query: Also aside from the question it blocks the UI even though i created a foreground service to communicate with the NDK functions to execute this code. Any thoughts on this?

You probably solved this already, but for future readers...
You need a FIFO buffer to store the decoded audio. You can use the Oboe's FIFO buffer e.g. oboe::FifoBuffer.
You can have a low/high watermark for the buffer and a state machine, so you start decoding when the buffer is almost empty and you stop decoding when it's full (you'll figure out the other states that you need).
As a side note, I implemented such player only to find at some later time, that the AAC codec is broken on some devices (Xiaomi and Amazon come to mind), so I had to throw away the AMediaCodec/AMediaExtractor parts and use an AAC library instead.

You have to implement a ringBuffer (or use the one implemented in the oboe example LockFreeQueue.h) and copy the data on buffers that you send on the ringbuffer from the extracting thread. On the other end of the RingBuffer, the audio thread will get that data from the queue and copy it to the audio buffer. This will happen on onAudioReady(oboe::AudioStream *oboeStream, void *audioData, int32_t numFrames) callback that you have to implement in your class (look oboe docs). Be sure to follow all the good practices on the Audio thread (don't allocate/deallocate memory there, no mutexes and no file I/O etc.)
Optional query: A service doesn't run in a separate thread, so obviously if you call it from UI thread it blocks the UI. Look at other types of services, there you can have IntentService or a service with a Messenger that will launch a separate thread on Java, or you can create threads in C++ side using std::thread

Related

Sound playback using FFmpeg and libsoundio in c++

I am trying to make a video player desktop application in c++ using primarily FFmpeg and Qt6. As of for now, I can decode and play video frames correctly at the right speed, that is not a problem. I am now trying to get to playback audio, which is much harder than I expected it to be. I am using libsoundio for my audio library but the documentation is really poor and there are not many examples/tutorials on it. I am also a beginner when it comes to audio programming, although I understand the basics. First off, if anyone can recommend an audio library for this type of job let me know, but I would like to use open source libraries. Anyways, here is how I decode my audio data with FFmpeg. I'm not sure if I am doing it correctly as I could barely find documentation on that as well...
I have a struct that contains all the information which is initiated through a function:
struct VideoReader
{
bool valid;
int width, height;
int video_stream_index;
int audio_stream_index;
AVRational time_base;
AVFormatContext* av_format_ctx;
AVCodecContext* av_vi_codec_ctx;
AVCodecContext* av_au_codec_ctx;
AVPacket* packet;
AVFrame* frame;
SwsContext* sws_ctx;
SwrContext* swr_ctx;
};
The function that initiates it is quite long and is not necessary to share but it populates all those values except for the sws_ctx and the swr_ctx.
Here is how I decode packets, this function is simplified, I left the video decoding out of it, ill take care of syncing once I can properly playback audio:
bool video_reader_read_au_frame(VideoReader *video_reader, unsigned char **frame_buffer)
{
// Unpack video_reader
auto& av_format_ctx = video_reader->av_format_ctx;
auto& av_codec_ctx = video_reader->av_au_codec_ctx;
auto& av_packet = video_reader->packet;
auto& av_frame = video_reader->frame;
auto& swr_ctx = video_reader->swr_ctx;
int& audio_stream_index = video_reader->audio_stream_index;
// Decode the video frame data
int response;
while (av_read_frame(av_format_ctx, av_packet) >= 0)
{
last_frame = false;
if (av_packet->stream_index != audio_stream_index)
{
av_packet_unref(av_packet);
continue;
}
response = avcodec_send_packet(av_codec_ctx, av_packet);
if (response < 0)
{
Logger::error("Could not decode packet.");
return false;
}
response = avcodec_receive_frame(av_codec_ctx, av_frame);
if (response == AVERROR(EAGAIN) || response == AVERROR_EOF)
{
av_packet_unref(av_packet);
continue;
}
else if (response < 0)
{
Logger::error("Could not decode packet.");
return false;
}
av_packet_unref(av_packet);
break;
}
// Initialize SwrContext
if (!swr_ctx) {
swr_ctx = swr_alloc_set_opts(nullptr,
av_codec_ctx->channel_layout, AV_SAMPLE_FMT_FLT,
av_codec_ctx->sample_rate, av_codec_ctx->channel_layout,
av_codec_ctx->sample_fmt, av_codec_ctx->sample_rate,
0, nullptr);
if (!swr_ctx)
{
Logger::error("Could not create SwrContext.");
return false;
}
if (swr_init(swr_ctx) < 0)
{
Logger::error("Could not initialize SwrContext.");
return false;
}
}
const int MAX_BUFFER_SIZE = av_samples_get_buffer_size(nullptr, av_frame->channels, av_frame->nb_samples, AV_SAMPLE_FMT_FLT, 1);
*frame_buffer = (unsigned char*)av_malloc(MAX_BUFFER_SIZE);
swr_convert(swr_ctx, frame_buffer, av_frame->nb_samples,
(const unsigned char**)av_frame->data, av_frame->nb_samples);
av_frame_unref(av_frame);
return true;
}
Here is how I would normally call this function:
VideoReader vr{};
if(!video_reader_open(&vr, "C:/Path/to/file.mp4"))
{
Logger::error("Could not initialize VideoReader.");
return 1;
}
unsigned char* buffer;
if(!video_reader_read_au_frame(&vr, &buffer))
{
Logger::error("Could not read audio data.");
return 1;
}
play_audio(&buffer); <-- Find a way to play audio once buffer has data in it
video_reader_close(&vr);
return 0;
Obviously I will loop over video_reader_read_au_frame(&vr, &buffer) to playback the whole video.
I believe my code puts the samples from the decoded frame in buffer, but I am really not sure.. I am unsure as well if I need to convert to AV_SAMPLE_FMT_FLT audio format or something else or just leave it as it is. For libsoundio, I kind of understand this example: http://libsound.io/ but I'm not sure I fully understand how this library works, especially the callback function. I know I have to pass buffer in outstream->userdata as a void pointer, but I don't know how to use it in the callback function. Any help or guidance would be greatly appreciated. Note that later on in this project I might want to send this data over a network to play the video on another computer in sync.

How to use deviceRequest of IOUSBHostDevice / IOUSBHostInterface?

I'm currently experimenting with Apple's I/O Kit to develop a kernel module. I'm trying to create a device driver for a USB Wireless Card (TP-Link WN722N-v1) using IOUSBHostDevice.
I managed to get the kext loaded and it probes correctly with the card, but I need to send the firmware to the card, which I am trying to do in probe().
I don't have the perfect knowledge of how everything goes, but it seems that I need to send a request to the configuration endpoint 0, but the ways I have tried I get kernel panic, and the other examples I have are using an obsolete DeviceRequest with no exchangeable arguments with the new deviceRequest.
I have a reference for writing the firmware from github project "BrcmPatchRAM" but all the forks use old IOUSBInterface instead of IOUSBHostInterface.
I have search this site for similar questions but found none helpful in this matter, I also searched Apple developer website documentation and it references also the old ways of DeviceRequest.
Which is the right way of re-writing this for example:
IOReturn USBInterfaceShim::hciCommand(void* command, UInt16 length)
{
IOUSBDevRequest request =
{
.bmRequestType = USBmakebmRequestType(kUSBOut, kUSBClass, kUSBDevice),
.bRequest = 0,
.wValue = 0,
.wIndex = 0,
.wLength = length,
.pData = command
};
return m_pInterface->DeviceRequest(&request);
}
Using some other implementation of this function gives me an error when loading the driver and tells me that I am sending the request before calling prepare, but I did called prepare before sending the request.
Here is a section of my messy code:
<pre>IOReturn AirPort_Atheros9271::pipeCommand(UInt8 requestType, UInt8 command, UInt16 address, IOMemoryDescriptor *buffer)
{
DeviceRequest request;
request.bmRequestType = requestType;
request.bRequest = command;
request.wValue = address;
request.wIndex = 0;
request.wLength = buffer->getLength();
uint32_t bytesTransferred;
return fInterface->deviceRequest(request, buffer, bytesTransferred, kUSBHostStandardRequestCompletionTimeout);
}
IOReturn AirPort_Atheros9271::pipeCommand(UInt8 requestType, UInt8 command, UInt16 address, void *buffer, UInt16 length)
{
DeviceRequest request;
request.bmRequestType = requestType;
request.bRequest = command;
request.wValue = address;
request.wIndex = 0;
request.wLength = length;
uint32_t bytesTransferred;
return fInterface->deviceRequest(request, buffer, bytesTransferred, kUSBHostStandardRequestCompletionTimeout);
}
bool TL_WN722N::performUpgrade()
{
IOLog("TL_WN722N::[%04x:%04x]: Performing firmware upgrade.\n", fVendorId, fProductId);
OSData *firmwareData = OSData::withBytes(ar9271_fw, ar9271_fw_len);
IOMemoryDescriptor *buffer = IOMemoryDescriptor::withAddress((void*)firmwareData->getBytesNoCopy(), firmwareData->getLength(), kIODirectionIn);
bool success = true;
IOLog("TL_WN722N::[%04x:%04x]: I have firmwareData and created the buffer\n", fVendorId, fProductId);
IOLockLock(fCompletionLock);
IOReturn result;
if (buffer!=NULL)
{
IOLog("TL_WN722N::[%04x:%04x]: Buffer is not null, now calling prepare()\n", fVendorId, fProductId);
if ((result = buffer->prepare(kIODirectionNone)) == kIOReturnSuccess)
{
IOLog("TL_WN722N::[%04x:%04x]: prepare() called and pass! Piping I - writing firmware\n", fVendorId, fProductId);
IOLog("TL_WN722N::[%04x:%04x]: Resultado de prepare: Result: %d\n", fVendorId, fProductId,result);
IOLog("TL_WN722N::[%04x:%04x]: About to excecute pipeCommand\n", fVendorId, fProductId);
result = pipeCommand(0x40, FIRMWARE_DOWNLOAD, AR9271_FIRMWARE >> 8, buffer); //TODO: EXPLOTION LINE
if (result>0) {
IOLog("TL_WN722N::[%04x:%04x]: Resultado de pipeCommand: %d\n", fVendorId, fProductId,result);
}
if (result != kIOReturnSuccess)
IOLog("TL_WN722N::[%04x:%04x]: Unable to write the firmware (0x%08x).\n", fVendorId, fProductId, result);
else
{
if ((result = pipeCommand(0x40, FIRMWARE_DOWNLOAD_COMP, AR9271_FIRMWARE_TEXT >> 8, NULL, 0)) != kIOReturnSuccess)
IOLog("TL_WN722N::[%04x:%04x]: Unable to write the firmware complete sequence (0x%08x).\n", fVendorId, fProductId, result);
else
{
IOLog("TL_WN722N::[%04x:%04x]: Success in writing the firmware sequence.\n", fVendorId, fProductId);
success = true;
}
}
}
else
IOLog("TL_WN722N::[%04x:%04x]: Failed to prepare write memory buffer (0x%08x).\n", fVendorId, fProductId, result);
if ((result = buffer->complete()) != kIOReturnSuccess)
IOLog("TL_WN722N::[%04x:%04x]: Failed to complete write memory buffer (0x%08x).\n", fVendorId, fProductId, result);
}
else
IOLog("TL_WN722N::[%04x:%04x]: Unable to allocate write memory buffer.\n", fVendorId, fProductId);
IOLockUnlock(fCompletionLock);
OSSafeReleaseNULL(buffer);
OSSafeReleaseNULL(firmwareData);
return success;
}</pre>
Putting together a few pieces here:
Using some other implementation of this function gives me an error when loading the driver and tells me that I am sending the request before calling prepare, but I did called prepare before sending the request.
This gives us a pretty big clue that your issue is with the memory buffer. So let's see where the creation and prepare() are happening:
IOMemoryDescriptor *buffer = IOMemoryDescriptor::withAddress(
(void*)firmwareData->getBytesNoCopy(), firmwareData->getLength(),
kIODirectionIn);
// ^^^^^^^^^^^^^^^^
So you're creating an input (read) memory descriptor.
if ((result = buffer->prepare(kIODirectionNone)) == kIOReturnSuccess)
kIODirectionNone just uses the creation direction, so you're preparing for reading data from the device.
And then, the I/O:
result = pipeCommand(0x40, FIRMWARE_DOWNLOAD, AR9271_FIRMWARE >> 8, buffer); //TODO: EXPLOTION LINE
// --------------------------^^^^
This means your bmRequestType is 0x40, and therefore does not have the 0x80 bit set. This means it's host-to-device, i.e. an output/write.
So you've got a direction mismatch: you're preparing some memory for reading from the USB device and then try to use it to write to the device. This won't work.
A few more comments:
An OSData isn't really an ideal candidate for I/O buffer allocation. If you want to allocate system memory for I/O, use an IOBufferMemoryDescriptor. If ar9271_fw is a statically allocated array, you can also simply wrap it using IOMemoryDescriptor::withAddress() - this avoids the copy that OSData::withBytes() performs.
You're performing I/O while holding the IOLockLock(fCompletionLock); lock. This isn't a great idea even when launching async I/O, but it looks like you're using the blocking version of deviceRequest? That's almost certainly not what you want: the function call can literally take seconds to complete or fail. You should not be holding locks for that long.

Figuring out a race condition

I am building a screen recorder, I am using ffmpeg to make the video out from frames I get from Google Chrome. I get green screen in the output video. I think there is a race condition in the threads since I am not allowed to use main thread to do the processing. here how the code look like
This function works each time I get a new frame, I suspect the functions avpicture_fill & vpx_codec_get_cx_data are being rewritten before write_ivf_frame_header & WriteFile are done.
I am thinking of creating a queue where this function push the object pp::VideoFrame then another thread with mutex will dequeue and do the processing below.
What is the best solution for this problem? and what is the optimal way of debugging it
void EncoderInstance::OnGetFrame(int32_t result, pp::VideoFrame frame) {
if (result != PP_OK)
return;
const uint8_t* data = static_cast<const uint8_t*>(frame.GetDataBuffer());
pp::Size size;
frame.GetSize(&size);
uint32_t buffersize = frame.GetDataBufferSize();
if (is_recording_) {
vpx_codec_iter_t iter = NULL;
const vpx_codec_cx_pkt_t *pkt;
// copy the pixels into our "raw input" container.
int bytes_filled = avpicture_fill(&pic_raw, data, AV_PIX_FMT_YUV420P, out_width, out_height);
if(!bytes_filled) {
Logger::Log("Cannot fill the raw input buffer");
return;
}
if(vpx_codec_encode(&codec, &raw, frame_cnt, 1, flags, VPX_DL_REALTIME))
die_codec(&codec, "Failed to encode frame");
while( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) {
switch(pkt->kind) {
case VPX_CODEC_CX_FRAME_PKT:
glb_app_thread.message_loop().PostWork(callback_factory_.NewCallback(&EncoderInstance::write_ivf_frame_header, pkt));
glb_app_thread.message_loop().PostWork(callback_factory_.NewCallback(&EncoderInstance::WriteFile, pkt));
break;
default:break;
}
}
frame_cnt++;
}
video_track_.RecycleFrame(frame);
if (need_config_) {
ConfigureTrack();
need_config_ = false;
} else {
video_track_.GetFrame(
callback_factory_.NewCallbackWithOutput(
&EncoderInstance::OnGetFrame));
}
}

SDL2 & SMPEG2 - Empty sound buffer trying to read a MP3

I'm trying to load a MP3 in a buffer using the SMPEG2 library, which comes with the SDL2. Every SMPEG function calls returns without error, but when I'm done, the sound buffer is full of zeros.
Here's the code :
bool LoadMP3(char* filename)
{
bool success = false;
const Uint32 Mp3ChunkLen = 4096;
SMPEG* mp3;
SMPEG_Info infoMP3;
Uint8 * ChunkBuffer;
Uint32 MP3Length = 0;
// Allocate a chunk buffer
ChunkBuffer = (Uint8*)malloc(Mp3ChunkLen);
SDL_RWops *mp3File = SDL_RWFromFile(filename, "rb");
if (mp3File != NULL)
{
mp3 = SMPEG_new_rwops(mp3File, &infoMP3, 1, 0);
if(mp3 != NULL)
{
if(infoMP3.has_audio)
{
Uint32 readLen;
// Inform the MP3 of the output audio specifications
SMPEG_actualSpec(mp3, &asDeviceSpecs); // static SDL_AudioSpec asDeviceSpecs; containing valid values after a call to SDL_OpenAudioDevice
// Enable the audio and disable the video.
SMPEG_enableaudio(mp3, 1);
SMPEG_enablevideo(mp3, 0);
// Play the MP3 once to get the size of the needed finale buffer
SMPEG_play(mp3);
while ((readLen = SMPEG_playAudio(mp3, ChunkBuffer, Mp3ChunkLen)) > 0)
{
MP3Length += readLen;
}
SMPEG_stop(mp3);
if(MP3Length > 0)
{
// Reallocate the buffer with the new length (if needed)
if (MP3Length != Mp3ChunkLen)
{
ChunkBuffer = (Uint8*)realloc(ChunkBuffer, MP3Length);
}
// Replay the entire MP3 into the new ChunkBuffer.
SMPEG_rewind(mp3);
SMPEG_play(mp3);
bool readBackSuccess = (MP3Length == SMPEG_playAudio(mp3, ChunkBuffer, MP3Length));
SMPEG_stop(mp3);
if(readBackSuccess)
{
// !!! Here, ChunkBuffer contains only zeros !!!
success = true;
}
}
}
SMPEG_delete(mp3);
mp3 = NULL;
}
SDL_RWclose(mp3File);
mp3File = NULL;
}
free(ChunkBuffer);
return success;
}
The code's widely based on SDL_Mixer, which I cannot use for my projet, based on its limitations.
I know Ogg Vorbis would be a better choice of file format, but I'm porting a very old project, and it worked entirely with MP3s.
I'm sure the sound system is initialized correctly because I can play WAV files just fine. It's intialized with a frequency of 44100, 2 channels, 1024 samples, and the AUDIO_S16SYS format (the latter which is, as I understood from the SMPEG source, mandatory).
I've calculated the anticipated buffer size, based on the bitrate, the amount of data in the MP3 and the OpenAudioDevice audio specs, and everything is consistent.
I cannot figure why everything but the buffer data seems to be working.
UPDATE #1
Still trying to figure out what's wrong, I thought the support for MP3 might not be working, so I created the following function :
SMPEG *mpeg;
SMPEG_Info info;
mpeg = SMPEG_new(filename,&info, 1);
SMPEG_play(mpeg);
do { SDL_Delay(50); } while(SMPEG_status(mpeg) == SMPEG_PLAYING);
SMPEG_delete(mpeg);
The MP3 played. So, the decoding should actually be working. But that's not what I need ; I really need the sound buffer data so I can send it to my mixer.
After much tinkering, research and digging through the SMPEG source code, I realized that I had to pass 1 as the SDLAudio parameter to SMPEG_new_rwops function.
The comment found in smpeg.h is misleading :
The sdl_audio parameter indicates if SMPEG should initialize the SDL audio subsystem. If not, you will have to use the SMPEG_playaudio() function below to extract the decoded data.
Since the audio subsystem was already initialized and I was using the SMPEG_playaudio() function, I had no reason to think I needed this parameter to be non-zero. In SMPEG, this parameter triggers the audio decompression at opening time, but even though I called SMPEG_enableaudio(mp3, 1); the data is never reparsed. This might be a bug/a shady feature.
I had another problem with the freesrc parameter which needed to be 0, since I freed the SDL_RWops object myself.
For future reference, once ChunkBuffer has the MP3 data, it needs to pass through SDL_BuildAudioCVT/SDL_ConvertAudio if it's to be played through an already opened audio device.
The final working code is :
// bool ReadMP3ToBuffer(char* filename)
bool success = false;
const Uint32 Mp3ChunkLen = 4096;
SDL_AudioSpec mp3Specs;
SMPEG* mp3;
SMPEG_Info infoMP3;
Uint8 * ChunkBuffer;
Uint32 MP3Length = 0;
// Allocate a chunk buffer
ChunkBuffer = (Uint8*)malloc(Mp3ChunkLen);
memset(ChunkBuffer, 0, Mp3ChunkLen);
SDL_RWops *mp3File = SDL_RWFromFile(filename, "rb"); // filename is a char* passed to the function.
if (mp3File != NULL)
{
mp3 = SMPEG_new_rwops(mp3File, &infoMP3, 0, 1);
if(mp3 != NULL)
{
if(infoMP3.has_audio)
{
Uint32 readLen;
// Get the MP3 audio specs for later conversion
SMPEG_wantedSpec(mp3, &mp3Specs);
SMPEG_enablevideo(mp3, 0);
// Play the MP3 once to get the size of the needed buffer in relation with the audio specs
SMPEG_play(mp3);
while ((readLen = SMPEG_playAudio(mp3, ChunkBuffer, Mp3ChunkLen)) > 0)
{
MP3Length += readLen;
}
SMPEG_stop(mp3);
if(MP3Length > 0)
{
// Reallocate the buffer with the new length (if needed)
if (MP3Length != Mp3ChunkLen)
{
ChunkBuffer = (Uint8*)realloc(ChunkBuffer, MP3Length);
memset(ChunkBuffer, 0, MP3Length);
}
// Replay the entire MP3 into the new ChunkBuffer.
SMPEG_rewind(mp3);
SMPEG_play(mp3);
bool readBackSuccess = (MP3Length == SMPEG_playAudio(mp3, ChunkBuffer, MP3Length));
SMPEG_stop(mp3);
if(readBackSuccess)
{
SDL_AudioCVT convertedSound;
// NOTE : static SDL_AudioSpec asDeviceSpecs; containing valid values after a call to SDL_OpenAudioDevice
if(SDL_BuildAudioCVT(&convertedSound, mp3Specs.format, mp3Specs.channels, mp3Specs.freq, asDeviceSpecs.format, asDeviceSpecs.channels, asDeviceSpecs.freq) >= 0)
{
Uint32 newBufferLen = MP3Length*convertedSound.len_mult;
// Make sure the audio length is a multiple of a sample size to avoid sound clicking
int sampleSize = ((asDeviceSpecs.format & 0xFF)/8)*asDeviceSpecs.channels;
newBufferLen &= ~(sampleSize-1);
// Allocate the new buffer and proceed with the actual conversion.
convertedSound.buf = (Uint8*)malloc(newBufferLen);
memcpy(convertedSound.buf, ChunkBuffer, MP3Length);
convertedSound.len = MP3Length;
if(SDL_ConvertAudio(&convertedSound) == 0)
{
// Save convertedSound.buf and convertedSound.len_cvt for future use in your mixer code.
// Dont forget to free convertedSound.buf once it's not used anymore.
success = true;
}
}
}
}
}
SMPEG_delete(mp3);
mp3 = NULL;
}
SDL_RWclose(mp3File);
mp3File = NULL;
}
free(ChunkBuffer);
return success;
NOTE : Some MP3 files I tried lost a few milliseconds and cutoff too early during playback when I resampled them with this code. Some others didn't. I could reproduce the same behaviour in Audacity, so I'm not sure what's going on. There may still have a bug with my code, a bug in SMPEG, or it maybe a known issue with the MP3 format itself. If someone can provide and explanation in the comments, that would be great!

FFmpeg + OpenAL - playback streaming sound from video won't work

I am decoding an OGG video (theora & vorbis as codecs) and want to show it on the screen (using Ogre 3D) while playing its sound. I can decode the image stream just fine and the video plays perfectly with the correct frame rate, etc.
However, I cannot get the sound to play at all with OpenAL.
Edit: I managed to make the playing sound resemble the actual audio in the video at least somewhat. Updated sample code.
Edit 2: I was able to get "almost" correct sound now. I had to set OpenAL to use AL_FORMAT_STEREO_FLOAT32 (after initializing the extension) instead of just STEREO16. Now the sound is "only" extremely high pitched and stuttering, but at the correct speed.
Here is how I decode audio packets (in a background thread, the equivalent works just fine for the image stream of the video file):
//------------------------------------------------------------------------------
int decodeAudioPacket( AVPacket& p_packet, AVCodecContext* p_audioCodecContext, AVFrame* p_frame,
FFmpegVideoPlayer* p_player, VideoInfo& p_videoInfo)
{
// Decode audio frame
int got_frame = 0;
int decoded = avcodec_decode_audio4(p_audioCodecContext, p_frame, &got_frame, &p_packet);
if (decoded < 0)
{
p_videoInfo.error = "Error decoding audio frame.";
return decoded;
}
// Frame is complete, store it in audio frame queue
if (got_frame)
{
int bufferSize = av_samples_get_buffer_size(NULL, p_audioCodecContext->channels, p_frame->nb_samples,
p_audioCodecContext->sample_fmt, 0);
int64_t duration = p_frame->pkt_duration;
int64_t dts = p_frame->pkt_dts;
if (staticOgreLog)
{
staticOgreLog->logMessage("Audio frame bufferSize / duration / dts: "
+ boost::lexical_cast<std::string>(bufferSize) + " / "
+ boost::lexical_cast<std::string>(duration) + " / "
+ boost::lexical_cast<std::string>(dts), Ogre::LML_NORMAL);
}
// Create the audio frame
AudioFrame* frame = new AudioFrame();
frame->dataSize = bufferSize;
frame->data = new uint8_t[bufferSize];
if (p_frame->channels == 2)
{
memcpy(frame->data, p_frame->data[0], bufferSize >> 1);
memcpy(frame->data + (bufferSize >> 1), p_frame->data[1], bufferSize >> 1);
}
else
{
memcpy(frame->data, p_frame->data, bufferSize);
}
double timeBase = ((double)p_audioCodecContext->time_base.num) / (double)p_audioCodecContext->time_base.den;
frame->lifeTime = duration * timeBase;
p_player->addAudioFrame(frame);
}
return decoded;
}
So, as you can see, I decode the frame, memcpy it to my own struct, AudioFrame. Now, when the sound is played, I use these audio frame like this:
int numBuffers = 4;
ALuint buffers[4];
alGenBuffers(numBuffers, buffers);
ALenum success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on alGenBuffers : " + Ogre::StringConverter::toString(success) + alGetString(success));
return;
}
// Fill a number of data buffers with audio from the stream
std::vector<AudioFrame*> audioBuffers;
std::vector<unsigned int> audioBufferSizes;
unsigned int numReturned = FFMPEG_PLAYER->getDecodedAudioFrames(numBuffers, audioBuffers, audioBufferSizes);
// Assign the data buffers to the OpenAL buffers
for (unsigned int i = 0; i < numReturned; ++i)
{
alBufferData(buffers[i], _streamingFormat, audioBuffers[i]->data, audioBufferSizes[i], _streamingFrequency);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on alBufferData : " + Ogre::StringConverter::toString(success) + alGetString(success)
+ " size: " + Ogre::StringConverter::toString(audioBufferSizes[i]));
return;
}
}
// Queue the buffers into OpenAL
alSourceQueueBuffers(_source, numReturned, buffers);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error queuing streaming buffers: " + Ogre::StringConverter::toString(success) + alGetString(success));
return;
}
}
alSourcePlay(_source);
The format and frequency I give to OpenAL are AL_FORMAT_STEREO_FLOAT32 (it is a stereo sound stream, and I did initialize the FLOAT32 extension) and 48000 (which is the sample rate of the AVCodecContext of the audio stream).
And during playback, I do the following to refill OpenAL's buffers:
ALint numBuffersProcessed;
// Check if OpenAL is done with any of the queued buffers
alGetSourcei(_source, AL_BUFFERS_PROCESSED, &numBuffersProcessed);
if(numBuffersProcessed <= 0)
return;
// Fill a number of data buffers with audio from the stream
std::vector<AudiFrame*> audioBuffers;
std::vector<unsigned int> audioBufferSizes;
unsigned int numFilled = FFMPEG_PLAYER->getDecodedAudioFrames(numBuffersProcessed, audioBuffers, audioBufferSizes);
// Assign the data buffers to the OpenAL buffers
ALuint buffer;
for (unsigned int i = 0; i < numFilled; ++i)
{
// Pop the oldest queued buffer from the source,
// fill it with the new data, then re-queue it
alSourceUnqueueBuffers(_source, 1, &buffer);
ALenum success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error Unqueuing streaming buffers: " + Ogre::StringConverter::toString(success));
return;
}
alBufferData(buffer, _streamingFormat, audioBuffers[i]->data, audioBufferSizes[i], _streamingFrequency);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on re- alBufferData: " + Ogre::StringConverter::toString(success));
return;
}
alSourceQueueBuffers(_source, 1, &buffer);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error re-queuing streaming buffers: " + Ogre::StringConverter::toString(success) + " "
+ alGetString(success));
return;
}
}
// Make sure the source is still playing,
// and restart it if needed.
ALint playStatus;
alGetSourcei(_source, AL_SOURCE_STATE, &playStatus);
if(playStatus != AL_PLAYING)
alSourcePlay(_source);
As you can see, I do quite heavy error checking. But I do not get any errors, neither from OpenAL nor from FFmpeg.
Edit: What I hear somewhat resembles the actual audio from the video, but VERY high pitched and stuttering VERY much. Also, it seems to be playing on top of TV noise. Very strange. Plus, it is playing much slower than the correct audio would.
Edit: 2 After using AL_FORMAT_STEREO_FLOAT32, the sound plays at the correct speed, but is still very high pitched and stuttering (though less than before).
The video itself is not broken, it can be played fine on any player. OpenAL can also play *.way files just fine in the same application, so it is also working.
Any ideas what could be wrong here or how to do this correctly?
My only guess is that somehow, FFmpeg's decode function does not produce data OpenGL can read. But this is as far as the FFmpeg decode example goes, so I don't know what's missing. As I understand it, the decode_audio4 function decodes the frame to raw data. And OpenAL should be able to work with RAW data (or rather, doesn't work with anything else).
So, I finally figured out how to do it. Gee, what a mess. It was a hint from a user on the libav-users mailing list that put me on the correct path.
Here are my mistakes:
Using the wrong format in the alBufferData function. I used AL_FORMAT_STEREO16 (as that is what every single streaming example with OpenAL uses). I should have used AL_FORMAT_STEREO_FLOAT32, as the video I stream is Ogg and vorbis is stored in floating points. And using swr_convert to convert from AV_SAMPLE_FMT_FLTP to AV_SAMPLE_FMT_S16 just crashes. No idea why.
Not using swr_convert to convert the decoded audio frame to the target format. After I was trying to use swr_convert to convert from FLTP to S16, and it would simply crash without a reason given, I assumed it was broken. But after figuring out my first mistake, I tried again, converting from FLTP to FLT (non-planar) and then it worked! So OpenAL uses interleaved format, not planar. Good to know.
So here is the decodeAudioPacket function that is working for me with Ogg video, vorbis audio stream:
int decodeAudioPacket( AVPacket& p_packet, AVCodecContext* p_audioCodecContext, AVFrame* p_frame,
SwrContext* p_swrContext, uint8_t** p_destBuffer, int p_destLinesize,
FFmpegVideoPlayer* p_player, VideoInfo& p_videoInfo)
{
// Decode audio frame
int got_frame = 0;
int decoded = avcodec_decode_audio4(p_audioCodecContext, p_frame, &got_frame, &p_packet);
if (decoded < 0)
{
p_videoInfo.error = "Error decoding audio frame.";
return decoded;
}
if(decoded <= p_packet.size)
{
/* Move the unread data to the front and clear the end bits */
int remaining = p_packet.size - decoded;
memmove(p_packet.data, &p_packet.data[decoded], remaining);
av_shrink_packet(&p_packet, remaining);
}
// Frame is complete, store it in audio frame queue
if (got_frame)
{
int outputSamples = swr_convert(p_swrContext,
p_destBuffer, p_destLinesize,
(const uint8_t**)p_frame->extended_data, p_frame->nb_samples);
int bufferSize = av_get_bytes_per_sample(AV_SAMPLE_FMT_FLT) * p_videoInfo.audioNumChannels
* outputSamples;
int64_t duration = p_frame->pkt_duration;
int64_t dts = p_frame->pkt_dts;
if (staticOgreLog)
{
staticOgreLog->logMessage("Audio frame bufferSize / duration / dts: "
+ boost::lexical_cast<std::string>(bufferSize) + " / "
+ boost::lexical_cast<std::string>(duration) + " / "
+ boost::lexical_cast<std::string>(dts), Ogre::LML_NORMAL);
}
// Create the audio frame
AudioFrame* frame = new AudioFrame();
frame->dataSize = bufferSize;
frame->data = new uint8_t[bufferSize];
memcpy(frame->data, p_destBuffer[0], bufferSize);
double timeBase = ((double)p_audioCodecContext->time_base.num) / (double)p_audioCodecContext->time_base.den;
frame->lifeTime = duration * timeBase;
p_player->addAudioFrame(frame);
}
return decoded;
}
And here is how I initialize the context and the destination buffer:
// Initialize SWR context
SwrContext* swrContext = swr_alloc_set_opts(NULL,
audioCodecContext->channel_layout, AV_SAMPLE_FMT_FLT, audioCodecContext->sample_rate,
audioCodecContext->channel_layout, audioCodecContext->sample_fmt, audioCodecContext->sample_rate,
0, NULL);
int result = swr_init(swrContext);
// Create destination sample buffer
uint8_t** destBuffer = NULL;
int destBufferLinesize;
av_samples_alloc_array_and_samples( &destBuffer,
&destBufferLinesize,
videoInfo.audioNumChannels,
2048,
AV_SAMPLE_FMT_FLT,
0);