I'm currently using these settings with OpenAL and recording from a Mic:
BUFFERSIZE 4410
FREQ 22050 // Sample rate
CAP_SIZE 10000 // How much to capture at a time (affects latency)
AL_FORMAT_MONO16
Is it possible to go lower in recording quality? I've tried reducing the sample rate but the end result is a faster playback speed.
Alright, so this is some of the most hacky code I've ever written, and I truly hope no one in their right mind ever uses it in production... just sooooo many bad things.
But to answer your question, I've been able to get the quality down to 8bitMono recording at 11025. However, everything I've recorded from my mic comes with significant amounts of static, and I'm not entirely sure I know why. I've generated 8bit karplus-strong string plucks that sound fantastic, so it could just be my recording device.
#include <AL/al.h>
#include <AL/alc.h>
#include <conio.h>
#include <stdio.h>
#include <vector>
#include <time.h>
void sleep( clock_t wait )
{
clock_t goal;
goal = wait + clock();
while( goal > clock() )
;
}
#define BUFFERSIZE 4410
const int SRATE = 11025;
int main()
{
std::vector<ALchar> vBuffer;
ALCdevice *pDevice = NULL;
ALCcontext *pContext = NULL;
ALCdevice *pCaptureDevice;
const ALCchar *szDefaultCaptureDevice;
ALint iSamplesAvailable;
ALchar Buffer[BUFFERSIZE];
ALint iDataSize = 0;
ALint iSize;
// NOTE : This code does NOT setup the Wave Device's Audio Mixer to select a recording input
// or a recording level.
pDevice = alcOpenDevice(NULL);
pContext = alcCreateContext(pDevice, NULL);
alcMakeContextCurrent(pContext);
printf("Capture Application\n");
if (pDevice == NULL)
{
printf("Failed to initialize OpenAL\n");
//Shutdown code goes here
return 0;
}
// Check for Capture Extension support
pContext = alcGetCurrentContext();
pDevice = alcGetContextsDevice(pContext);
if (alcIsExtensionPresent(pDevice, "ALC_EXT_CAPTURE") == AL_FALSE){
printf("Failed to detect Capture Extension\n");
//Shutdown code goes here
return 0;
}
// Get list of available Capture Devices
const ALchar *pDeviceList = alcGetString(NULL, ALC_CAPTURE_DEVICE_SPECIFIER);
if (pDeviceList){
printf("\nAvailable Capture Devices are:-\n");
while (*pDeviceList)
{
printf("%s\n", pDeviceList);
pDeviceList += strlen(pDeviceList) + 1;
}
}
// Get the name of the 'default' capture device
szDefaultCaptureDevice = alcGetString(NULL, ALC_CAPTURE_DEFAULT_DEVICE_SPECIFIER);
printf("\nDefault Capture Device is '%s'\n\n", szDefaultCaptureDevice);
pCaptureDevice = alcCaptureOpenDevice(szDefaultCaptureDevice, SRATE, AL_FORMAT_MONO8, BUFFERSIZE);
if (pCaptureDevice)
{
printf("Opened '%s' Capture Device\n\n", alcGetString(pCaptureDevice, ALC_CAPTURE_DEVICE_SPECIFIER));
// Start audio capture
alcCaptureStart(pCaptureDevice);
// Wait for any key to get pressed before exiting
while (!_kbhit())
{
// Release some CPU time ...
sleep(1);
// Find out how many samples have been captured
alcGetIntegerv(pCaptureDevice, ALC_CAPTURE_SAMPLES, 1, &iSamplesAvailable);
printf("Samples available : %d\r", iSamplesAvailable);
// When we have enough data to fill our BUFFERSIZE byte buffer, grab the samples
if (iSamplesAvailable > (BUFFERSIZE / 2))
{
// Consume Samples
alcCaptureSamples(pCaptureDevice, Buffer, BUFFERSIZE / 2);
// Write the audio data to a file
//fwrite(Buffer, BUFFERSIZE, 1, pFile);
for(int i = 0; i < BUFFERSIZE / 2; i++){
vBuffer.push_back(Buffer[i]);
}
// Record total amount of data recorded
iDataSize += BUFFERSIZE / 2;
}
}
// Stop capture
alcCaptureStop(pCaptureDevice);
// Check if any Samples haven't been consumed yet
alcGetIntegerv(pCaptureDevice, ALC_CAPTURE_SAMPLES, 1, &iSamplesAvailable);
while (iSamplesAvailable)
{
if (iSamplesAvailable > (BUFFERSIZE / 2))
{
alcCaptureSamples(pCaptureDevice, Buffer, BUFFERSIZE / 2);
for(int i = 0; i < BUFFERSIZE/2; i++){
vBuffer.push_back(Buffer[i]);
}
iSamplesAvailable -= (BUFFERSIZE / 2);
iDataSize += BUFFERSIZE;
}
else
{
//TODO::Fix
alcCaptureSamples(pCaptureDevice, Buffer, iSamplesAvailable);
for(int i = 0; i < BUFFERSIZE/2; i++){
vBuffer.push_back(Buffer[i]);
}
iDataSize += iSamplesAvailable * 2;
iSamplesAvailable = 0;
}
}
alcCaptureCloseDevice(pCaptureDevice);
}
//TODO::Make less hacky
ALuint bufferID; // The OpenAL sound buffer ID
ALuint sourceID; // The OpenAL sound source
// Create sound buffer and source
alGenBuffers(1, &bufferID);
alGenSources(1, &sourceID);
alListener3f(AL_POSITION, 0.0f, 0.0f, 0.0f);
alSource3f(sourceID, AL_POSITION, 0.0f, 0.0f, 0.0f);
alBufferData(bufferID, AL_FORMAT_MONO8, &vBuffer[0], static_cast<ALsizei>(vBuffer.size()), SRATE);
// Attach sound buffer to source
alSourcei(sourceID, AL_BUFFER, bufferID);
// Finally, play the sound!!!
alSourcePlay(sourceID);
printf("Press any key to continue...");
getchar();
return 0;
}
As you can see from:
alBufferData(bufferID, AL_FORMAT_MONO8, &vBuffer[0], static_cast<ALsizei>(vBuffer.size()), SRATE);
I've verified that this is the case. For demonstration code I'm okay throwing this example out there, but I wouldn't ever use it in production.
I'm not sure but for me FREQ is the output frequency but not the sample rate.
define sampling-rate 48000
see this link : http://supertux.lethargik.org/wiki/OpenAL_Configuration
Related
I'm using IMFSourceReader to continuously buffer 1 second portions of audio files from disk. I'm unable to accurately seek M4A audio data (AAC encoded) and this results in a discontinuous audio stream.
I'm aware that the data returned by IMFSourceReader.Read() is usually offset by a few hundred frames into the past relative to the position set in IMFSourceReader.SetCurrentPosition(). However, even accounting for this offset I'm unable to create a continuous glitch free stream (see readCall == 0 condition).
I am able to accurately seek portions of WAV files (uncompressed) so my offset calculation appears to be correct.
My question is whether the Media Foundation library is able to accurately seek/read portions of AAC encoded M4A files (or any compressed audio for that matter)?
Here's the code. inStartFrame is the sample frame I'm trying to read. Output format is configured as 32bit floating point data (see final function). To trim it down a little I've removed some error checks and cleanup e.g. end of file.
bool WindowsM4AReader::read(float** outBuffer, int inNumChannels, int64_t inStartFrame, int64_t inNumFramesToRead)
{
int64_t hnsToRequest = SampleFrameToHNS(inStartFrame);
int64_t frameRequested = HNSToSampleFrame(hnsToRequest);
PROPVARIANT positionProp;
positionProp.vt = VT_I8;
positionProp.hVal.QuadPart = hnsToRequest;
HRESULT hr = mReader->SetCurrentPosition(GUID_NULL, positionProp);
mReader->Flush(0);
IMFSample* pSample = nullptr;
int bytesPerFrame = sizeof(float) * mNumChannels;
int64_t totalFramesWritten = 0;
int64_t remainingFrames = inNumFramesToRead;
int readCall = 0;
bool quit = false;
while (!quit) {
DWORD streamIndex = 0;
DWORD flags = 0;
LONGLONG llTimeStamp = 0;
hr = mReader->ReadSample(
MF_SOURCE_READER_FIRST_AUDIO_STREAM, // Stream index.
0, // Flags.
&streamIndex, // Receives the actual stream index.
&flags, // Receives status flags.
&llTimeStamp, // Receives the time stamp.
&pSample // Receives the sample or NULL.
);
int64_t frameOffset = 0;
if (readCall == 0) {
int64_t hnsOffset = hnsToRequest - llTimeStamp;
frameOffset = HNSToSampleFrame(hnsOffset);
}
++readCall;
if (pSample) {
IMFMediaBuffer* decodedBuffer = nullptr;
pSample->ConvertToContiguousBuffer(&decodedBuffer);
BYTE* rawBuffer = nullptr;
DWORD maxLength = 0;
DWORD bufferLengthInBytes = 0;
decodedBuffer->Lock(&rawBuffer, &maxLength, &bufferLengthInBytes);
int64_t availableFrames = bufferLengthInBytes / bytesPerFrame;
availableFrames -= frameOffset;
int64_t framesToCopy = min(availableFrames, remainingFrames);
// copy to outputBuffer
float* floatBuffer = (float*)rawBuffer;
float* offsetBuffer = &floatBuffer[frameOffset * mNumChannels];
for (int channel = 0; channel < mNumChannels; ++channel) {
for (int64_t frame = 0; frame < framesToCopy; ++frame) {
float sampleValue = offsetBuffer[frame * mNumChannels + channel];
outBuffer[channel][totalFramesWritten + frame] = sampleValue;
}
}
decodedBuffer->Unlock();
totalFramesWritten += framesToCopy;
remainingFrames -= framesToCopy;
if (totalFramesWritten >= inNumFramesToRead)
quit = true;
}
}
}
LONGLONG WindowsM4AReader::SampleFrameToHNS(int64_t inFrame)
{
return inFrame * (10000000.0 / mSampleRate);
}
int64_t WindowsM4AReader::HNSToSampleFrame(LONGLONG inHNS)
{
return inHNS / 10000000.0 * mSampleRate;
}
bool WindowsM4AReader::ConfigureAsFloatDecoder()
{
IMFMediaType* outputType = nullptr;
HRESULT hr = MFCreateMediaType(&outputType);
UINT32 bitsPerSample = sizeof(float) * 8;
UINT32 blockAlign = mNumChannels * (bitsPerSample / 8);
UINT32 bytesPerSecond = blockAlign * (UINT32)mSampleRate;
hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
hr = outputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_Float);
hr = outputType->SetUINT32(MF_MT_AUDIO_PREFER_WAVEFORMATEX, TRUE);
hr = outputType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, (UINT32)mNumChannels);
hr = outputType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, (UINT32)mSampleRate);
hr = outputType->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, blockAlign);
hr = outputType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, bytesPerSecond);
hr = outputType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitsPerSample);
hr = outputType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE);
DWORD streamIndex = 0;
hr = mReader->SetCurrentMediaType(streamIndex, NULL, outputType);
return true;
}
If you are using AAC Decoder provided by Microsoft (AAC Decoder), and the MPEG-4 File Source, yes i confirm, you can't seek audio frame with the same precision of wave file.
I'll have to make more test, but i think it's possible to find a workaround in your case.
EDIT
I've made a program to check seek position with SourceReader :
github mofo7777
Under Stackoverflow > AudioSourceReaderSeek
Wav format is perfect at seeking, mp3 is good, and m4a not really good.
But the m4a file was encoded with VLC. I encoded a m4a file using Mediafoundation encoder. The result is better when seeking with this file (like mp3).
So i would say that the encoder is important for seeking.
It would be interesting to test different audio format, with different encoders.
Also, there is IMFSeekInfo interface
I can't test this interface, because i'm under Windows Seven, and it's for Win8. It would be interesting for someone to test.
I'm trying to encode a webcam frames with libx264 in realtime, and face with one problem - the resulting video length is exactly what I set, but camera is delays somtimes and the real capture time is more than video length. As a result the picture in video changes to fast.I think it is due to constant FPS in x264 settings, so I need to make it dynamic somehow. Is it possible? If I wrong about FPS, so what I need to do, to synchronize capturing and writing?
Also I would like to know what are the optimal encoder parameters for streaming via internet and for recording to disk (the client is streaming from camera or screen, and the server is recording)?
Here is console logs screenshot and my code:
#include <stdint.h>
#include "stringf.h"
#include "Capture.h"
#include "x264.h"
int main( int argc, char **argv ){
Camera instance;
if(!instance.Enable(0)){printf("Camera not available\n");return 1;}
// Initializing metrics and buffer of frame
unsigned int width, height, size = instance.GetMetrics(width, height);
unsigned char *data = (unsigned char *)malloc(size);
// Setting encoder (I'm not sure about all parameters)
x264_param_t param;
x264_param_default_preset(¶m, "ultrafast", "zerolatency");
param.i_threads = 1;
param.i_width = width;
param.i_height = height;
param.i_fps_num = 20;
param.i_fps_den = 1;
// Intra refres:
param.i_keyint_max = 8;
param.b_intra_refresh = 1;
// Rate control:
param.rc.i_rc_method = X264_RC_CRF;
param.rc.f_rf_constant = 25;
param.rc.f_rf_constant_max = 35;
// For streaming:
param.b_repeat_headers = 1;
param.b_annexb = 1;
x264_param_apply_profile(¶m, "baseline");
x264_t* encoder = x264_encoder_open(¶m);
int seconds, expected_time, operation_start, i_nals, frame_size, frames_count;
expected_time = 1000/param.i_fps_num;
operation_start = 0;
seconds = 1;
frames_count = param.i_fps_num * seconds;
int *Timings = new int[frames_count];
x264_picture_t pic_in, pic_out;
x264_nal_t* nals;
x264_picture_alloc(&pic_in, X264_CSP_I420, param.i_width, param.i_height);
// Capture-Encode-Write loop
for(int i = 0; i < frames_count; i++){
operation_start = GetTickCount();
size = instance.GrabBGR(&data);
instance.BGRtoI420(data, &pic_in.img.plane[0], &pic_in.img.plane[1], &pic_in.img.plane[2], param.i_width, param.i_height);
frame_size = x264_encoder_encode(encoder, &nals, &i_nals, &pic_in, &pic_out);
if( frame_size > 0){
stringf::WriteBufferToFile("test.h264",std::string(reinterpret_cast<char*>(nals->p_payload), frame_size),1);
}
Timings[i] = GetTickCount() - operation_start;
}
while( x264_encoder_delayed_frames( encoder ) ){ // Flush delayed frames
frame_size = x264_encoder_encode(encoder, &nals, &i_nals, NULL, &pic_out);
if( frame_size > 0 ){stringf::WriteBufferToFile("test.h264",std::string(reinterpret_cast<char*>(nals->p_payload), frame_size),1);}
}
unsigned int total_time = 0;
printf("Expected operation time was %d ms per frame at %u FPS\n",expected_time, param.i_fps_num);
for(unsigned int i = 0; i < frames_count; i++){
total_time += Timings[i];
printf("Frame %u takes %d ms\n",(i+1), Timings[i]);
}
printf("Record takes %u ms\n",total_time);
free(data);
x264_encoder_close( encoder );
x264_picture_clean( &pic_in );
return 0;
}
The capture takes 1453 ms and the output file plays exactly 1 sec.
So, in general, the video length must be the same as a capture time, but not as encoder "wants".How to do it?
I've created 2 functions :
- One that records the microphone
- One that plays the sound of the microphone
It records the microphone for 3 seconds
#include <iostream>
#include <Windows.h>
#include <vector>
using namespace std;
#pragma comment(lib, "winmm.lib")
short int waveIn[44100 * 3];
void PlayRecord();
void StartRecord()
{
const int NUMPTS = 44100 * 3; // 3 seconds
int sampleRate = 44100;
// 'short int' is a 16-bit type; I request 16-bit samples below
// for 8-bit capture, you'd use 'unsigned char' or 'BYTE' 8-bit types
HWAVEIN hWaveIn;
MMRESULT result;
WAVEFORMATEX pFormat;
pFormat.wFormatTag=WAVE_FORMAT_PCM; // simple, uncompressed format
pFormat.nChannels=1; // 1=mono, 2=stereo
pFormat.nSamplesPerSec=sampleRate; // 44100
pFormat.nAvgBytesPerSec=sampleRate*2; // = nSamplesPerSec * n.Channels * wBitsPerSample/8
pFormat.nBlockAlign=2; // = n.Channels * wBitsPerSample/8
pFormat.wBitsPerSample=16; // 16 for high quality, 8 for telephone-grade
pFormat.cbSize=0;
// Specify recording parameters
result = waveInOpen(&hWaveIn, WAVE_MAPPER,&pFormat,
0L, 0L, WAVE_FORMAT_DIRECT);
WAVEHDR WaveInHdr;
// Set up and prepare header for input
WaveInHdr.lpData = (LPSTR)waveIn;
WaveInHdr.dwBufferLength = NUMPTS*2;
WaveInHdr.dwBytesRecorded=0;
WaveInHdr.dwUser = 0L;
WaveInHdr.dwFlags = 0L;
WaveInHdr.dwLoops = 0L;
waveInPrepareHeader(hWaveIn, &WaveInHdr, sizeof(WAVEHDR));
// Insert a wave input buffer
result = waveInAddBuffer(hWaveIn, &WaveInHdr, sizeof(WAVEHDR));
// Commence sampling input
result = waveInStart(hWaveIn);
cout << "recording..." << endl;
Sleep(3 * 1000);
// Wait until finished recording
waveInClose(hWaveIn);
PlayRecord();
}
void PlayRecord()
{
const int NUMPTS = 44100 * 3; // 3 seconds
int sampleRate = 44100;
// 'short int' is a 16-bit type; I request 16-bit samples below
// for 8-bit capture, you'd use 'unsigned char' or 'BYTE' 8-bit types
HWAVEIN hWaveIn;
WAVEFORMATEX pFormat;
pFormat.wFormatTag=WAVE_FORMAT_PCM; // simple, uncompressed format
pFormat.nChannels=1; // 1=mono, 2=stereo
pFormat.nSamplesPerSec=sampleRate; // 44100
pFormat.nAvgBytesPerSec=sampleRate*2; // = nSamplesPerSec * n.Channels * wBitsPerSample/8
pFormat.nBlockAlign=2; // = n.Channels * wBitsPerSample/8
pFormat.wBitsPerSample=16; // 16 for high quality, 8 for telephone-grade
pFormat.cbSize=0;
// Specify recording parameters
waveInOpen(&hWaveIn, WAVE_MAPPER,&pFormat, 0L, 0L, WAVE_FORMAT_DIRECT);
WAVEHDR WaveInHdr;
// Set up and prepare header for input
WaveInHdr.lpData = (LPSTR)waveIn;
WaveInHdr.dwBufferLength = NUMPTS*2;
WaveInHdr.dwBytesRecorded=0;
WaveInHdr.dwUser = 0L;
WaveInHdr.dwFlags = 0L;
WaveInHdr.dwLoops = 0L;
waveInPrepareHeader(hWaveIn, &WaveInHdr, sizeof(WAVEHDR));
HWAVEOUT hWaveOut;
cout << "playing..." << endl;
waveOutOpen(&hWaveOut, WAVE_MAPPER, &pFormat, 0, 0, WAVE_FORMAT_DIRECT);
waveOutWrite(hWaveOut, &WaveInHdr, sizeof(WaveInHdr)); // Playing the data
Sleep(3 * 1000); //Sleep for as long as there was recorded
waveInClose(hWaveIn);
waveOutClose(hWaveOut);
}
int main()
{
StartRecord();
return 0;
}
How can I change my StartRecord function (and I guess my PlayRecord function aswell), to make it record untill theres no input from the microphone?
(So far, those 2 functions are working perfectly - records the microphone for 3 seconds, then plays the recording)...
Thanks!
Edit: by no sound, I mean the sound level is too low or something (means the person probably isnt speaking)...
Because sound is a wave, it oscillates between high and low pressures. This waveform is usually recorded as positive and negative numbers, with zero being the neutral pressure. If you take the absolute value of the signal and keep a running average it should be sufficient.
The average should be taken over a long enough period that you account for the appropriate amount of silence. A very cheap way to keep an estimate of the running average is like this:
const double threshold = 50; // Whatever threshold you need
const int max_samples = 10000; // The representative running average size
double average = 0; // The running average
int sample_count = 0; // When we are building the average
while( sample_count < max_samples || average > threshold ) {
// New sample arrives, stored in 'sample'
// Adjust the running absolute average
if( sample_count < max_samples ) sample_count++;
average *= double(sample_count-1) / sample_count;
average += std::abs(sample) / sample_count;
}
The larger max_samples, the slower average will respond to a signal. After the sound stops, it will slowly trail off. However, it will be slow to rise again too. This would be fine for reasonably continuous sound.
With something like speech, which can have short or long pauses, you may want to use an impulse-based approach. You can just define the number of samples of 'silence' that you expect, and reset it whenever you receive an impulse that exceeds the threshold. Using the running average above with a much shorter window size will give you a simple way of detecting an impulse. Then you just need to count...
const int max_samples = 100; // Smaller window size for impulse
const int max_silence_samples = 10000; // Maximum samples below threshold
int silence = 0; // Number of samples below threshold
while( silence < max_silence_samples ) {
// Compute running average as before
//...
// Check for silence. If there's a signal, reset the counter.
if( average > threshold ) silence = 0;
else ++silence;
}
Adjusting threshold and max_samples will control the sensitivity to pops and clicks, while max_silence_samples gives you control over how much silence is allowed before you stop recording.
There are undoubtedly more technical ways to achieve your goals, but it's always good to try the simple one first. See how you go with this.
I suggest you to do it via DirectShow. You should create an instance of microphone, SampleGrabber, audio encoder and file writer. Your graph should be like this:
Microphone -> SampleGrabber -> Audio Encoder -> File Writer
Every sample passes through SampleGrabber and you can read all raw samples and check if you should continue record or not. This is the best way you and both record and check it's contents.
I need to play stream in OpenAL. But i dont understand what i need to do with buffers and source. My pseudocode:
FirstTime = true;
while (true)
{
if (!FirstTime)
{
alSourceUnqueueBuffers(alSource, 1, &unbuf);
}
//get buffer to play in boost::array buf (882 elements) (MONO16).
if (NumberOfSampleSet >=3)
{
alBufferData(alSampleSet[NumberOfSampleSet], AL_FORMAT_MONO16, buf.data(), buf.size(), 44100);
alSourceQueueBuffers(alSource, 1, &alSampleSet[NumberOfSampleSet++]);
if (NumberOfSampleSet == 4)
{
FirstTime = false;
NumberOfSampleSet = 0;
}
}
alSourcePlay(alSource);
}
What am i doing wrong? In speakers i listen repeating clicks. Please tell me what i need to do with buffers to play my sound?
4 buffers (882 samples each) and a 44kHz source give only (4 * 882/ (2 * 44100) ) = 0.04 seconds of playback - that's just a "click".
To produce longer sounds you should load more data (though only two buffers is usually sufficient).
Imagine you have a 100Mb of uncompressed .wav file. Just read say 22050 samples (that is 44100 bytes of data) and enqueue them to the OpenAL's queue associated with Source. Then read another 22050 samples into the second buffer and enqueue them also. Then just switch buffers (like you do now at NumberOfSampleSet == 4) and repeat until the file is not finished.
If you want a pure sine wave of e.g. 440Hz, then using the same 22050-sample buffers just fill them with the values of sine wave:
const int BufferSize = 22050;
const int NumSamples = 44100;
// phase offset to avoid "clicks" between buffers
int LastOffset = 0;
const float Omega = 440.0f;
for(int i = 0 ; i < BufferSize ; i++)
{
float t = ( 2.0f * PI * Omega * ( i + LastOffset ) ) / static_cast<float>( NumSamples );
short VV = (short)(volume * sin(t));;
// 16-bit sample: 2 bytes
buffers[CurrentBuffer][i * 2 + 0] = VV & 0xFF;
buffers[CurrentBuffer][i * 2 + 1] = VV >> 8;
}
LastOffset += BufferSize / 2;
LastOffset %= FSignalFreq;
EDIT1:
To process something in real-time (with severe latency, unfortunately) you have to create the buffers, push some initial data and then check for how much data OpenAL needs:
int StreamBuffer( ALuint BufferID )
{
// get sound to the buffer somehow - load from file, read from input channel (queue), generate etc.
// do the custom sound processing here in buffers[CurrentBuffer]
// submit more data to OpenAL
alBufferData( BufferID, Format, buffers[CurrentBuffer].data(), buffers[CurrentBuffer].size(), SamplesPerSec );
}
int main()
{
....
ALuint FBufferID[2];
alGenBuffers( 2, &FBufferID[0] );
StreamBuffer( FBufferID[0], BUFFER_SIZE );
StreamBuffer( FBufferID[1], BUFFER_SIZE );
alSourceQueueBuffers( FSourceID, 2, &FBufferID[0] );
while(true)
{
// Check how much data is processed in OpenAL's internal queue
ALint Processed;
alGetSourcei( FSourceID, AL_BUFFERS_PROCESSED, &Processed );
// add more buffers while we need them
while ( Processed-- )
{
Luint BufID;
alSourceUnqueueBuffers( SourceID, 1, &BufID );
StreamBuffer(BufID);
alSourceQueueBuffers( SourceID, 1, &BufID );
}
}
....
}
I am having an issue again with ffmpeg, I'm a newbie with ffmpeg, and I can't find a good tutorial up to date...
This time, when I play a video with ffmpeg, it plays too fast, ffmpeg is ignoring the FPS, I don't want to handle that with a thread sleep, because the videos have differents FPS's.
I created a thread, there you can find the loop:
AVPacket framepacket;
while(av_read_frame(formatContext,&framepacket)>= 0){
pausecontrol.lock();
// Is it a video or audio frame¿?
if(framepacket.stream_index==gotVideoCodec){
int framereaded;
// Video? Ok
avcodec_decode_video2(videoCodecContext,videoFrame,&framereaded,&framepacket);
// Yeah, did we get it?
if(framereaded && doit){
AVRational millisecondbase = {1,1000};
int f_number = framepacket.dts;
int f_time = av_rescale_q(framepacket.dts,formatContext->streams[gotVideoCodec]->time_base,millisecondbase);
currentTime=f_time;
currentFrameNumber=f_number;
int stWidth = videoCodecContext->width;
int stHeight = videoCodecContext->height;
SwsContext *ctx = sws_getContext(stWidth, stHeight, videoCodecContext->pix_fmt, stWidth,
stHeight, PIX_FMT_RGB24, SWS_BICUBIC, NULL, NULL, NULL);
if(ctx!=0){
sws_scale(ctx,videoFrame->data,videoFrame->linesize,0,videoCodecContext->height,videoFrameRGB->data,videoFrameRGB->linesize);
QImage framecapsule=QImage(stWidth,stHeight,QImage::Format_RGB888);
for(int y=0;y<stHeight;y++){
memcpy(framecapsule.scanLine(y),videoFrameRGB->data[0]+y*videoFrameRGB->linesize[0],stWidth*3);
}
emit newFrameReady(framecapsule);
sws_freeContext(ctx);
}
}
}
if(framepacket.stream_index==gotAudioCodec){
// Audio? Ok
}
pausecontrol.unlock();
av_free_packet(&framepacket);
}
Any Idea?
The simplest solution is to use a delay based on the FPS value
firstFrame = true;
for(;;)
{
// decoding, color conversion, etc.
if (!firstFrame)
{
const double frameDuration = 1000.0 / frameRate;
duration_t actualDelay = get_local_time() - lastTime;
if (frameDuration > actualDelay)
sleep(frameDuration - actualDelay);
}
else
firstFrame = false;
emit newFrameReady(framecapsule);
lastTime = get_local_time();
}
get_local_time() and duration_t is abstract.
A more accurate method is to use a time stamp for each frame, but the idea is the same