I'm currently trying to take in sound and feed it back to the speakers. I'm using the openframeworks library that makes this fairly simple.
I'm using this class
http://www.openframeworks.cc/documentation?detail=ofSoundStream
The setup function is
ofSoundStreamSetup(int nOutputs, int nInputs, ofSimpleApp * OFSA, int sampleRate, int bufferSize, int nBuffers)
and I am using
ofSoundStreamSetup(1, 1, this, 44100, 512, 4)
My header info is
float buffer1[1000000];
float buffer2[1000000];
float* readPointer;
float* writePointer;
int readp;
int writep;
I've got two functions
audioReceived (float * input, int bufferSize, int nChannels)
if (writep < 10)
{
for (int i = 0;i < bufferSize; i++)
{
writePointer[writep*i] = input[i];
}
writep++;
if (writep >= 10)
{
writep = 0;
}
}
audioRequested(float * output, int buffersize, int numChannels)
{
if (writep > 0)
{
for (int i = 0; i < bufferSize; i++)
{
output[i] = readPointer[readp * i];
}
readp++;
if (readp >=10)
{
readp = 0;
}
}
}
This is working but the quality seems poppy and crackly. I think I may have to implement a proper circle buffer, or double buffering, but I'm not sure.
Can anyone point me in the correct direction for how I can get the audio to sound good, using as simple a method as possible?
I would definitely suggest using double buffering. Otherwise a buffer becomes available at the same time you want a buffer. This potentially results in a case of you editing a buffer that is currently in use.
In general when audio is received you add it to buffer 1. When audio is requested you give it buffer 2. Now when audio is received put it in buffer 2 and when the request arrives give it buffer 1. And so on.
Related
I am using Superpowered for various real-time FX and they all work very straightforward. However the pitch shifting is a whole other story, I think in fact because it's based on the time-stretching algorithm that of course has to deal with output that changes in time which is a lot more complex than applying FX like EQ or reverb. However I'm only interested in change the pitch of my mic input.
I looked at the only example I could find on GitHub and I slightly adapted it to fit my work:
static bool audioProcessing(void *clientdata,
float **buffers,
unsigned int inputChannels,
unsigned int outputChannels,
unsigned int numberOfSamples,
unsigned int samplerate,
uint64_t hostTime) {
__unsafe_unretained Superpowered *self = (__bridge Superpowered *)clientdata;
SuperpoweredAudiobufferlistElement inputBuffer;
inputBuffer.startSample = 0;
inputBuffer.samplesUsed = 0;
inputBuffer.endSample = self->timeStretcher->numberOfInputSamplesNeeded;
inputBuffer.buffers[0] = SuperpoweredAudiobufferPool::getBuffer(self->timeStretcher->numberOfInputSamplesNeeded * 8 + 64);
inputBuffer.buffers[1] = inputBuffer.buffers[2] = inputBuffer.buffers[3] = NULL;
self->outputBuffers->clear();
self->timeStretcher->process(&inputBuffer, self->outputBuffers);
int samples = self->timeStretcher->numberOfInputSamplesNeeded;
float *timeStretchedAudio = (float *)self->outputBuffers->nextSliceItem(&samples);
if (timeStretchedAudio != 0) {
SuperpoweredDeInterleave(timeStretchedAudio, buffers[0], buffers[1], numberOfSamples);
}
//self->outputBuffers->rewindSlice();
return true;
}
I have removed most of the code that I thought wasn't necessary. For example there was a while loop that seemed to deal with time-stretch scenarios, I'm just outputting the same time as I input.
Some observations:
If I don't clear the outputBuffers my memory usage goes through the roof
If I use self->outputBuffers->rewindSlice(); the app becomes silent, probably meaning the buffers are getting overwritten with silence
If I do not use self->outputBuffers->rewindSlice(); I can hear my own voice coming back, but timeStretchedAudio is always 0 except the very first time
I finally got it working:
static bool audioProcessing(void *clientdata,
float **buffers,
unsigned int inputChannels,
unsigned int outputChannels,
unsigned int numberOfSamples,
unsigned int samplerate,
uint64_t hostTime) {
__unsafe_unretained Superpowered *self = (__bridge Superpowered *)clientdata;
//timeStretching->setRateAndPitchShift(realTimeRate, realTimePitch);
SuperpoweredAudiobufferlistElement inputBuffer;
inputBuffer.startSample = 0;
inputBuffer.samplesUsed = 0;
inputBuffer.endSample = numberOfSamples;
inputBuffer.buffers[0] = SuperpoweredAudiobufferPool::getBuffer((unsigned int) (numberOfSamples * 8 + 64));
inputBuffer.buffers[1] = inputBuffer.buffers[2] = inputBuffer.buffers[3] = NULL;
// Converting the 16-bit integer samples to 32-bit floating point.
SuperpoweredInterleave(buffers[0], buffers[1], (float *)inputBuffer.buffers[0], numberOfSamples);
//SuperpoweredShortIntToFloat(audioInputOutput, (float *)inputBuffer.buffers[0], (unsigned int) numberOfSamples);
self->timeStretcher->process(&inputBuffer, self->outputBuffers);
// Do we have some output?
if (self->outputBuffers->makeSlice(0, self->outputBuffers->sampleLength)) {
while (true) { // Iterate on every output slice.
// Get pointer to the output samples.
int numSamples = 0;
float *timeStretchedAudio = (float *)self->outputBuffers->nextSliceItem(&numSamples);
if (!timeStretchedAudio || *timeStretchedAudio == 0) {
break;
}
// Convert the time stretched PCM samples from 32-bit floating point to 16-bit integer.
//SuperpoweredFloatToShortInt(timeStretchedAudio, audioInputOutput,
// (unsigned int) numSamples);
SuperpoweredDeInterleave(timeStretchedAudio, buffers[0], buffers[1], numSamples);
self->recorder->process(timeStretchedAudio, numSamples);
// Write the audio to disk.
//fwrite(audioInputOutput, 1, numSamples * 4, fd);
}
// Clear the output buffer list.
self->outputBuffers->clear();
// If we have enough samples in the fifo output buffer, pass them to the audio output.
//SuperpoweredFloatToShortInt((float *)inputBuffer.buffers[0], audioInputOutput, (unsigned int) numberOfSamples);
}
return true;
}
I am not sure if changing the rate also works, but I don't care for this application. YMMV.
Implement the part marked with TODO. That's the point where you need to provide input for the timeStretcher. Also take care of separating the output from the input. Output could be written before the input is consumed.
I am using FFmpeg library to decode and (potentially) modify some audio.
I managed to use the following functions to iterate through all frames of the audio file:
avformat_open_input // Obtains formatContext
avformat_find_stream_info
av_find_best_stream // The argument AVMEDIA_TYPE_AUDIO is fed in to find the audio stream
avcodec_open2 // Obtains codecContext
av_init_packet
// The following is used to loop through the frames
av_read_frame
avcodec_decode_audio4
In the end, I have these three values available on each iteration
int dataSize; // return value of avcodec_decode_audio4
AVFrame* frame;
AVCodecContext* codecContext; // Codec context of the best stream
I supposed that a loop like this can be used to iterate over all samples:
for (int i = 0; i < frame->nb_samples; ++i)
{
// Bytes/Sample is known to be 4
// Extracts audio from Channel 1. There are in total 2 channels.
int* sample = (int*)frame->data[0] + dataSize * i;
// Now *sample is accessible
}
However, when I plotted the data using gnuplot, I did not get a waveform as expected, and some of the values reached the the limit of 32 bits integers: (The audio stream is not silent in the first few seconds)
I suppose that some form of quantisation is going on to prevent the data from being interpreted mathematically. What should I do to de-quantise this?
for (int i = 0; i < frame->nb_samples; ++i)
{
// Bytes/Sample is known to be 4
// Extracts audio from Channel 1. There are in total 2 channels.
int* sample = (int*)frame->data[0] + dataSize * i;
// Now *sample is accessible
}
Well... No. So, first of all, we'll need to know the data type. Check frame->format. It's an enum AVSampleFormat, most likely flt, fltp, s16 or s16p.
So, how do you interpret frame->data[] given the format? Well, first, is it planar or not? If it's planar, it means each channel is in frame->data[n], where n is the channel number. frame->channels is the number of channels. If it's not planar, it means all data is interleaved (per sample) in frame->data[0].
Second, what is the storage type? If it's s16/s16p, it's int16_t *. If it's flt/fltp, it's float *. So the correct interpretation for fltp would be:
for (int c = 0; c < frame->channels; c++) {
float *samples = frame->data[c];
for (int i = 0; i < frame->nb_samples; i++) {
float sample = samples[i];
// now this sample is accessible, it's in the range [-1.0, 1.0]
}
}
Whereas for s16, it would be:
int16_t *samples = frame->data[0];
for (int c = 0; c < frame->channels; c++) {
for (int i = 0; i < frame->nb_samples; i++) {
int sample = samples[i * frame->channels + c];
// now this sample is accessible, it's in the range [-32768,32767]
}
}
I'm trying to re-sample captured 2channel/48khz/32bit audio to 1channel/8khz/32bit using libsamplerate in a windows phone project using WASAPI.
I need to get 160 frames from 960 original frames by re-sampling.After capturing audio using GetBuffer method I send the captured BYTE array of 7680 byte to the below method:
void BackEndAudio::ChangeSampleRate(BYTE* buf)
{
int er2;
st=src_new(2,1,&er2);
//SRC_DATA sd defined before
sd=new SRC_DATA;
BYTE *onechbuf = new BYTE[3840];
int outputIndex = 0;
//convert Stereo to Mono
for (int n = 0; n < 7680; n+=8)
{
onechbuf[outputIndex++] = buf[n];
onechbuf[outputIndex++] = buf[n+1];
onechbuf[outputIndex++] = buf[n+2];
onechbuf[outputIndex++] = buf[n+3];
}
float *res1=new float[960];
res1=(float *)onechbuf;
float *res2=new float[160];
//change samplerate
sd->data_in=res1;
sd->data_out=res2;
sd->input_frames=960;
sd->output_frames=160;
sd->src_ratio=(double)1/6;
sd->end_of_input=1;
int er=src_process(st,sd);
transportController->WriteAudio((BYTE *)res2,640);
delete[] onechbuf;
src_delete(st);
delete sd;
}
src_process method returns no error and sd->input_frames_used set to 960 and sd->output_frames_gen set to 159 but the rendering output is only noise.
I use the code in a real-time VoIP app.
What could be the source of problem ?
I found the problem.I shouldn't make a new SRC_STATE object and delete it in each call of my function by calling st=src_new(2,1,&er2); and src_delete(st);but call them once is enough for the whole audio re-sampling.Also there is no need to using pointer for the SRC_DATA . I modified my code as below and it works fine now.
void BackEndAudio::ChangeSampleRate(BYTE* buf)
{
BYTE *onechbuf = new BYTE[3840];
int outputIndex = 0;
//convert Stereo to Mono
for (int n = 0; n < 7680; n+=8)
{
onechbuf[outputIndex++] = buf[n];
onechbuf[outputIndex++] = buf[n+1];
onechbuf[outputIndex++] = buf[n+2];
onechbuf[outputIndex++] = buf[n+3];
}
float *out=new float[160];
//change samplerate
sd.data_in=(float *)onechbuf;
sd.data_out=out;
sd.input_frames=960;
sd.output_frames=160;
sd.src_ratio=(double)1/6;
sd.end_of_input=0;
int er=src_process(st,&sd);
}
I have a method which calculates an integral image (description here) commonly used in computer vision applications.
float *Integral(unsigned char *grayscaleSource, int height, int width, int widthStep)
{
// convert the image to single channel 32f
unsigned char *img = grayscaleSource;
// set up variables for data access
int step = widthStep/sizeof(float);
uint8_t *data = (uint8_t *)img;
float *i_data = (float *)malloc(height * width * sizeof(float));
// first row only
float rs = 0.0f;
for(int j=0; j<width; j++)
{
rs += (float)data[j];
i_data[j] = rs;
}
// remaining cells are sum above and to the left
for(int i=1; i<height; ++i)
{
rs = 0.0f;
for(int j=0; j<width; ++j)
{
rs += data[i*step+j];
i_data[i*step+j] = rs + i_data[(i-1)*step+j];
}
}
// return the integral image
return i_data;
}
I am trying to make it as fast as possible. It seems to me like this should be able to take advantage of Apple's Accelerate.framework, or perhaps ARMs neon intrinsics, but I can't see exactly how. It seems like that nested loop is potentially quite slow (for real time applications at least).
Does anyone think this is possible to speed up using any other techniques??
You can certainly vectorize the row by row summation. That is vDSP_vadd(). The horizontal direction is vDSP_vrsum().
If you want to write your own vector code, the horizontal sum might be sped up by something like psadbw, but that is Intel. Also, take a look at prefix sum algorithms, which are famously parallelizable.
So, I'm writing (or have written) a contour mapping application for mapping power frequency in North America, and it works really well... On linux. I run it on a loop to update a bmp map file, which I'm eventually going to output on a website. It can run and update itself in about 3 seconds, which is great for me. The problem came when I tried to port the application to windows. I moved the code into Visual Studios 2012. Linked the libraries, compiled and such, I had to make it ignore a few warnings about float to integer conversion, but I got it working.
Then, I had it run, and it didn't seem to do anything. But, after adding a few output commands, I realized it was doing something, it was just doing it probably 100x slower than the Linux executable! I mean, admittedly, the code is pretty intensive (around 400,000 iterations), but this is just ridiculous.
I saw a lot of other topics about VS running slow in debug mode, but even after I compile and run an executable, it's still just as slow.
Here's relevant code, let me know if you have any ideas. Some of the functions you won't recognize because either they aren't relevant to performance (I only call them once, and I know they aren't the source of the speed problem) or they come from the Easy BMP library I'm using for image manipulation. Right now, I have it set to output a 100X100 image, but originally I was outputting a 800x500 pixel image:
float get_value(int x, int y, int dataNum, vector<vector<float> > data)
{
vector<float> distance;
float value=0; float distanceTotal = 0;
for(int i=0; i<dataNum; i++)
{
distance.push_back(sqrt(pow(data[0][i]-x,2) + pow(data[1][i]-y,2)));
if(distance[i] < 2)
return 0;
distance[i] = 1/pow(distance[i],3);
distanceTotal+=distance[i];
}
for(int i=0; i<dataNum; i++)
{
value+=distance[i]/distanceTotal*data[2][i];
}
return value;
}
int _tmain(int argc, _TCHAR* argv[])
{
//set image attributes
int height=100; int width = 100; int colorScale = 5*255; string dataFile = "data2.txt";
//get data and colormap
vector<vector<float> > data = getData(dataFile, width, height);
vector<RGBApixel> colorMap = makeColorMap(colorScale);
int dataNum = data[0].size();
pair<float,float> range = make_pair(*min_element(data[2].begin(),data[2].end()),*max_element(data[2].begin(),data[2].end()));
//make image
BMP newMap;
newMap.SetBitDepth(16);
newMap.SetSize(width,height);
for(int x=0; x<width; x++)
{
for(int y=0; y<height; y++)
{
//for debug purposes
cout << x << " " << y << endl;
float value = get_value(x,y,dataNum,data);
//get color value based on data value
int colorValue = floor((value-range.first)/(range.second-range.first)*colorScale);
//handle border cases
if(colorValue < 0 )
colorValue=0;
else if(colorValue > colorScale-1)
colorValue=colorScale-1;
newMap.SetPixel(x,y,colorMap[colorValue]);
}
}
newMap.WriteToFile("map.bmp");
return 0;
}
Any thoughts?
Thanks!