I am trying to convert in real time the audio from my iPhone mic to MP3.
I have it setup as such:
let format = AVAudioFormat(commonFormat: AVAudioCommonFormat.pcmFormatInt16,
sampleRate: 44100.0,
channels: 1,
interleaved: true)
mic.avAudioUnitOrNode.installTap(onBus: 0, bufferSize: AVAudioFrameCount((format?.sampleRate)!), format: format, block: { (buffer: AVAudioPCMBuffer!, time: AVAudioTime!) -> Void in
let audioBuffer : AVAudioBuffer = buffer
self.audioProcessor?.processBuffer( audioBuffer.mutableAudioBufferList)
})
-(void)processBuffer: (AudioBufferList*) audioBufferList;
{
const int PCM_SIZE = 8192;
const int MP3_SIZE = 8192;
short int pcm_buffer[PCM_SIZE*2];
unsigned char mp3_buffer[MP3_SIZE];
int write = lame_encode_buffer_interleaved(mLame, pcm_buffer,(int*) audioBufferList->mBuffers[0].mData, mp3_buffer, MP3_SIZE);
//some other stuff
}
but I am getting a crash as soon as I get to the encoding portion.
EDIT:
I got it to stop crashing, but the audio quality is pretty harsh:
int size = audioBufferList->mBuffers[0].mDataByteSize / 2;
unsigned char mp3_buffer[size * 4];
int write = lame_encode_buffer(mLame, audioBufferList->mBuffers[0].mData, audioBufferList->mBuffers[0].mData, size, mp3_buffer, size*4);
There was a mismatch on the sampling rates between the source audio and the encoder.
Related
I am currently trying to learn audio programming. My goal is to open a wav file, extract everything and play the samples with RtAudio.
I made a WaveLoader class which let's me extract the samples and meta data. I used this guide to do that and I checked that everything is correct with 010 editor. Here is a snapshot of 010 editor showing the structure and data.
And this is how i store the raw samples inside WaveLoader class:
data = new short[wave_data.payloadSize]; // - Allocates memory size of chunk size
if (!fread(data, 1, wave_data.payloadSize, sound_file))
{
throw ("Could not read wav data");
}
If i print out each sample I get : 1, -3, 4, -5 ... which seems ok.
The problem is that I am not sure how I can play them. This is what I've done:
/*
* Using PortAudio to play samples
*/
bool Player::Play()
{
ShowDevices();
rt.showWarnings(true);
RtAudio::StreamParameters oParameters; //, iParameters;
oParameters.deviceId = rt.getDefaultOutputDevice();
oParameters.firstChannel = 0;
oParameters.nChannels = mAudio.channels;
//iParameters.deviceId = rt.getDefaultInputDevice();
//iParameters.nChannels = 2;
unsigned int sampleRate = mAudio.sampleRate;
// Use a buffer of 512, we need to feed callback with 512 bytes everytime!
unsigned int nBufferFrames = 512;
RtAudio::StreamOptions options;
options.flags = RTAUDIO_SCHEDULE_REALTIME;
options.flags = RTAUDIO_NONINTERLEAVED;
//¶meters, NULL, RTAUDIO_FLOAT64,sampleRate, &bufferFrames, &mCallback, (void *)&rawData
try {
rt.openStream(&oParameters, NULL, RTAUDIO_SINT16, sampleRate, &nBufferFrames, &mCallback, (void*) &mAudio);
rt.startStream();
}
catch (RtAudioError& e) {
std::cout << e.getMessage() << std::endl;
return false;
}
return true;
}
/*
* RtAudio Callback
*
*/
int mCallback(void * outputBuffer, void * inputBuffer, unsigned int nBufferFrames, double streamTime, RtAudioStreamStatus status, void * userData)
{
unsigned int i = 0;
short *out = static_cast<short*>(outputBuffer);
auto *data = static_cast<Player::AUDIO_DATA*>(userData);
// if i is more than our data size, we are done!
if (i > data->dataSize) return 1;
// First time callback is called data->ptr is 0, this means that the offset is 0
// Second time data->ptr is 1, this means offset = nBufferFrames (512) * 1 = 512
unsigned int offset = nBufferFrames * data->ptr++;
printf("Offset: %i\n", offset);
// First time callback is called offset is 0, we are starting from 0 and looping nBufferFrames (512) times, this gives us 512 bytes
// Second time, the offset is 1, we are starting from 512 bytes and looping to 512 + 512 = 1024
for (i = offset; i < offset + nBufferFrames; ++i)
{
short sample = data->rawData[i]; // Get raw sample from our struct
*out++ = sample; // Pass to output buffer for playback
printf("Current sample value: %i\n", sample); // this is showing 1, -3, 4, -5 check 010 editor
}
printf("Current time: %f\n", streamTime);
return 0;
}
Inside callback function, when I print out sample values I get exactly like 010 editor? Why isnt rtaudio playing them. What is wrong here? Do I need to normalize sample values to between -1 and 1?
Edit:
The wav file I am trying to play:
Chunksize: 16
Format: 1
Channel: 1
SampleRate: 48000
ByteRate: 96000
BlockAlign: 2
BitPerSample: 16
Size of raw samples total: 2217044 bytes
For some reason it works when I pass input parameters to the openStream()
RtAudio::StreamParameters oParameters, iParameters;
oParameters.deviceId = rt.getDefaultOutputDevice();
oParameters.firstChannel = 0;
//oParameters.nChannels = mAudio.channels;
oParameters.nChannels = mAudio.channels;
iParameters.deviceId = rt.getDefaultInputDevice();
iParameters.nChannels = 1;
unsigned int sampleRate = mAudio.sampleRate;
// Use a buffer of 512, we need to feed callback with 512 bytes everytime!
unsigned int nBufferFrames = 512;
RtAudio::StreamOptions options;
options.flags = RTAUDIO_SCHEDULE_REALTIME;
options.flags = RTAUDIO_NONINTERLEAVED;
//¶meters, NULL, RTAUDIO_FLOAT64,sampleRate, &bufferFrames, &mCallback, (void *)&rawData
try {
rt.openStream(&oParameters, &iParameters, RTAUDIO_SINT16, sampleRate, &nBufferFrames, &mCallback, (void*) &mAudio);
rt.startStream();
}
catch (RtAudioError& e) {
std::cout << e.getMessage() << std::endl;
return false;
}
return true;
It was so random when I was trying to playback my mic. I left input parameters and my wav file was suddenly playing. Is this is a bug?
I'm attempting to use the SoundTouch C++ library for audio speed and pitch changes in an Android app. I have successfully pushed a Java byte[] array (from a .wav) through JNI, returned it, and played it back with an AudioTrack.
The next step is attempting to push a sample byte[] through the SoundTouch pipeline. I have dissected the source of the SoundStretch console program included with the library and have attempted to adapt it. I am using a stereo, 16-bit source for testing purposes.
With my current temporary setup I am ignoring the RIFF header and converting it along with the .wav data because the Java AudioTrack object does not need to read the header, it just plays raw PCM. Playing the raw byte[] without sending through SoundTouch just results in a small click where the header is.
After sending through the SoundTouch pipeline, I am playing back white noise where the beginning of the audio is supposed to be. I assume I am having a problem at the end of my write() function, where I am casting short's to signed chars. Here, the console app is writing to a file, instead of pushing to a vector:
int res = (int)fwrite(temp, 1, numBytes, fptr);
I have read the documentation for fwrite but I don't know enough about bit twiddling or audio processing to know what to do here to correctly get this information in a char[] instead of writing to a file. I know I am loosing information with the cast, but I am unsure of how to correct it.
In case anyone is extra motivated, the SoundStretch source can be found here: http://www.surina.net/soundtouch/sourcecode.html
extern "C" DLL_PUBLIC jbyteArray
Java_net_surina_soundtouch_SoundTouch_getMutatedBytes
(JNIEnv *env, jobject thiz, jbyteArray input, jint length)
{
const int BUFF_SIZE = 2048000;
SoundTouch soundTouch;
jboolean isCopy;
jbyte* ar = env->GetByteArrayElements(input, &isCopy);
signed char* cBufferIn = (signed char*)ar;
SAMPLETYPE* fBufferIn = new SAMPLETYPE[length];
vector<signed char> fBufferOut;
//converts the chars to floats per the SoundTouch console app.
convertInput16(cBufferIn, fBufferIn, length);
//channels, sampling rate, speed, pitch change
setup(&soundTouch, 2, 44100, 1.0, 0);
//transform floats from fBufferIn to fBufferout
process(&soundTouch, fBufferIn, fBufferOut, BUFF_SIZE);
signed char* res = &fBufferOut[0];
jbyteArray result = env->NewByteArray(length);
env->SetByteArrayRegion(result, 0, fBufferOut.size(), res);
LOGV("fBufferOut Size: %d", fBufferOut.size());
delete[] fBufferIn;
return result;
}
process():
static void process(SoundTouch* soundTouch, SAMPLETYPE* fBufferIn, vector<signed char>& fBufferOut, int BUFF_SIZE)
{
int nSamples = BUFF_SIZE / 2; //2 bytes per sample, using 16 bit sample for testing
int buffSizeSamples = BUFF_SIZE / 2; //2 channel stereo
soundTouch->putSamples(fBufferIn, nSamples);
do
{
nSamples = soundTouch->receiveSamples(fBufferIn, buffSizeSamples);
write(fBufferIn, fBufferOut, nSamples / 2); //2 channels
} while (nSamples != 0);
soundTouch->flush();
do
{
nSamples = soundTouch->receiveSamples(fBufferIn, buffSizeSamples);
write(fBufferIn, fBufferOut, nSamples / 2);
LOGV("NUMBER OF SAMPLES: %d", nSamples);
} while (nSamples != 0);
}
write():
static void write(const float *bufferIn, vector<signed char>& bufferOut, int numElems)
{
int numBytes;
int bytesPerSample;
if (numElems == 0) return;
bytesPerSample = 16 / 8; //16 bit test sample / bits in a byte
numBytes = numElems * bytesPerSample;
short *temp = (short*)getConvBuffer(numBytes);
switch (bytesPerSample)
{
case 2: //16 bit encoding per the SoundStretch console app
{
short *temp2 = (short *)temp;
for (int i = 0; i < numElems; i++)
{
short value = (short)saturate(bufferIn[i] * 32768.0f, -32768.0f, 32767.0f); //magic to me
temp2[i] = value; //works for little endian only.
}
break;
}
default:
assert(false);
}
for (int i = 0; i < numElems; ++i)
{
bufferOut.push_back((signed char)temp[i]); //I think my problem is here.
}
delete[] temp;
//bytesWritten += numBytes;
}
I just needed to get all the bits in char[]:
for (int i = 0; i < numElems; ++i)
{
bufferOut.push_back(temp[i] & 0xff);
bufferOut.push_back((temp[i] >> 8) & 0xff);
}
I'm working on a VOIP client using Portaudio and opus.
I read from the microphone in a frame
-encode each frame with Opus and put it in a list
-pop the first element from the list and decode it
-read it with portaudio
If i do the same thing without encoding my sound it works great. But when I use Opus my sound is bad, I can't understand the voice (which is bad for a voip client)
HandlerOpus::HandlerOpus(int sample_rate, int num_channels)
{
this->num_channels = num_channels;
this->enc = opus_encoder_create(sample_rate, num_channels, OPUS_APPLICATION_VOIP, &this->error);
this->dec = opus_decoder_create(sample_rate, num_channels, &this->error);
opus_int32 rate;
opus_encoder_ctl(enc, OPUS_GET_BANDWIDTH(&rate));
this->encoded_data_size = rate;
}
HandlerOpus::~HandlerOpus(void)
{
opus_encoder_destroy(this->enc);
opus_decoder_destroy(this->dec);
}
unsigned char *HandlerOpus::encodeFrame(const float *frame, int frame_size)
{
unsigned char *compressed_buffer;
int ret;
compressed_buffer = new (unsigned char[this->encoded_data_size]);
ret = opus_encode_float(this->enc, frame, frame_size, compressed_buffer, this->encoded_data_size);
return (compressed_buffer);
}
float *HandlerOpus::decodeFrame(const unsigned char *data, int frame_size)
{
int ret;
float *frame = new (float[frame_size * this->num_channels]);
opus_packet_get_nb_channels(data);
ret = opus_decode_float(this->dec, data, this->encoded_data_size, frame, frame_size, 0);
return (frame);
}
I can't change the library I have to use Opus.
The sample rate is 48000 and the frames per buffer is 480 and I tried in mono and stereo.
What am I doing wrong?
I solved the problem myself I changed the config : The sample rate to 24000 and the frames per buffer is still 480.
It's 6 years later, but I'm gonna post an answer for future googlers like me:
I had very similiar problem and fixed it by changing PortAudio sample type to paInt32 and switched from opus_decode_float to just opus_decode
How to get data samples from QAudioInput
I found in this examples code from audioinput example code
void InputTest::readMore()
{
if(!m_audioInput)
return;
qint64 len = m_audioInput->bytesReady();
if(len > 4096)
len = 4096;
qint64 l = m_input->read(m_buffer.data(), len);
if(l > 0) {
m_audioInfo->write(m_buffer.constData(), l);
}
}
I understood that m_buffer contains audio data samples
but my audio processing library receives short samples
How I can convert this to short pointer
My audio library function like this
putSample( short *Sample, int numberOfSample)
I can get number of samples from
Q_ASSERT(m_format.sampleSize() % 8 == 0);
const int channelBytes = m_format.sampleSize() / 8;
const int sampleBytes = m_format.channels() * channelBytes;
Q_ASSERT(len % sampleBytes == 0);
const int numSamples = len / sampleBytes;
This page indicates read() is expecting a char* to store the data in. If you have set up the format of the audio device properly the data will indeed be 'segmented' as shorts in the char array and you can simply cast the char* to a short* before passing it to your library.
I have really simple program add the add an audio stream into an avi file with a pre-existing video stream.
The issue is that the resulting file contains a video stream but there does not appear to be any data in the stream.
The audio file is read by SDKwavefile from the DirectX samples.
AVIFileInit();
PAVIFILE avi;
AVIFileOpen(&avi, argv[1], OF_WRITE, NULL);
CWaveFile wav;
wav.Open(argv[2], NULL, WAVEFILE_READ);
WAVEFORMATEX *wavFormat = wav.GetFormat();
PAVISTREAM audioStream;
AVIFileCreateStream(avi, &audioStream, &audioInfo);
AVISTREAMINFO audioInfo;
memset(&audioInfo, 0, sizeof(AVISTREAMINFO));
audioInfo.fccType = streamtypeAUDIO;
audioInfo.dwScale = wavFormat->nBlockAlign;
audioInfo.dwRate = wavFormat->nSamplesPerSec * wavFormat->nBlockAlign;
audioInfo.dwSampleSize = wavFormat->nBlockAlign;
audioInfo.dwQuality = (DWORD)-1;
AVIStreamSetFormat(audioStream, 0, wavFormat, sizeof(WAVEFORMATEX));
BYTE *data = (BYTE *)malloc(wav.GetSize());
DWORD sizeRead;
wav.Read(data, wav.GetSize(), &sizeRead);
AVIStreamWrite(audioStream, 0, (wav.GetSize() * 8) / wavFormat->wBitsPerSample, data, wav.GetSize(), 0, NULL, NULL);
AVIStreamRelease(audioStream);
free(data);
wav.Close();
AVIFileRelease(avi);
AVIFileExit();
(Also, I know I shouldn't be using VFW anymore but that decision goes way above my head. And I know I'm not checking the results of anything, that can come later.)
Thanks.
I tried to use this to add a .wav to an existing .avi (although I had a class CWaveSoundRead).
If you check the return codes, you get to AVIStreamWrite() which returns 0x80044065, which turns out to be AVIERR_UNSUPPORTED.
In hindsight, I'd say you called AVIFileCreateStream() before you filled in the AVISTREAMINFO object. Actually, now that I see it, it's hard to imagine your code compiling as-is, since audioInfo is defined AFTER AVIFileCreateStream!
Here's something I did, although it still mistakes the audio stream length:
struct FmtChunk {
char id[4]; //="fmt "
unsigned long size; //=16 or 0x28
short wFormatTag; //=WAVE_FORMAT_PCM=1
unsigned short wChannels; //=1 or 2 for mono or stereo
unsigned long dwSamplesPerSec; //=11025 or 22050 or 44100
unsigned long dwAvgBytesPerSec; //=wBlockAlign * dwSamplesPerSec
unsigned short wBlockAlign; //=wChannels * (wBitsPerSample==8?1:2)
unsigned short wBitsPerSample; //=8 or 16, for bits per sample
};
struct DataChunk {
char id[4]; //="data"
unsigned long size; //=datsize, size of the following array
unsigned char data[1]; //=the raw data goes here
};
struct WavChunk {
char id[4]; //="RIFF"
unsigned long size; //=datsize+8+16+4
char type[4]; //="WAVE"
};
bool Q_AVI_AddWav(cstring fnameVideo,cstring fnameAudio)
// Adds a .wav file to an existing .avi (with video stream)
{
IAVIStream* m_pStreamAudio=0;
HRESULT hr;
AVIFileInit();
PAVIFILE avi;
hr=AVIFileOpen(&avi, fnameVideo,OF_WRITE,NULL);
CHECK(hr,"AVIFileOpen");
WavChunk wav;
FmtChunk fmt;
DataChunk dat;
//read wav file
FILE *fr;
int pos;
fr=qfopen(fnameAudio,"rb");
// Read header
fread(&wav,1,sizeof(wav),fr);
// Read 'fmt' chunk; may be 16 or 40 in length
pos=ftell(fr);
fread(&fmt,1,sizeof(fmt),fr);
if(fmt.size==40)fseek(fr,40-16,SEEK_CUR); // Skip rest of fmt
// else it's ok
// Read data specs
fread(&dat,sizeof(dat),1,fr);
char *buf = new char[dat.size];
qdbg("Wav data %d bytes\n",dat.size);
fread(buf,1,dat.size,fr);
qfclose(fr);
// set wave format info
WAVEFORMATEX wfx;
wfx.wFormatTag=fmt.wFormatTag;
wfx.cbSize=0;
wfx.nAvgBytesPerSec=fmt.dwAvgBytesPerSec;
wfx.nBlockAlign=fmt.wBlockAlign;
wfx.nChannels=fmt.wChannels;
wfx.nSamplesPerSec=fmt.dwSamplesPerSec;
wfx.wBitsPerSample=fmt.wBitsPerSample;
// create audio stream
AVISTREAMINFO ahdr; ZeroMemory(&ahdr,sizeof(ahdr));
ahdr.fccType=streamtypeAUDIO;
ahdr.dwScale=wfx.nBlockAlign;
ahdr.dwRate=wfx.nSamplesPerSec*wfx.nBlockAlign;
ahdr.dwSampleSize=wfx.nBlockAlign;
ahdr.dwQuality=(DWORD)-1;
hr=AVIFileCreateStream(avi, &m_pStreamAudio, &ahdr);
CHECK(hr,"AVIFileCreateStream");
if(hr!=AVIERR_OK) {if (buf) QDELETE_ARRAY(buf); /*delete[] buf;*/ return false;}
hr = AVIStreamSetFormat(m_pStreamAudio,0,&wfx,sizeof(WAVEFORMATEX));
CHECK(hr,"AVIStreamSetFormat");
if(hr!=AVIERR_OK) {if (buf) QDELETE_ARRAY(buf); /*delete[] buf;*/ return false;}
//write audio stream
unsigned long numbytes = dat.size;
unsigned long numsamps = fmt.wChannels*numbytes*8 / wfx.wBitsPerSample;
hr = AVIStreamWrite(m_pStreamAudio,0,numsamps,buf,numbytes,0,0,0);
CHECK(hr,"AVIStreamWrite");
qdbg("Write numsamps %d, numbytes %d\n",numsamps,numbytes);
QDELETE_ARRAY(buf); //if(buf)delete[] buf;
// Release audio stream
AVIStreamRelease(m_pStreamAudio);
// Close AVI
hr=AVIFileRelease(avi);
CHECK(hr,"AVIFileRelease");
// Close VFW
AVIFileExit();
return hr==AVIERR_OK;
}