Direct Sound: How do I read captured data from a small buffer? - c++

I'm trying to capture waveforms of floating point PCM data from a microphone. The application is only asking for a small number of samples each cycle (For 20'000Hz and a frame size of 0.003s, it would ask for 60 samples)
I would like to set the buffer size depending on how many ms the app is interested in but it seems that dwBufferBytes has to be a certain size. Instead, I set it to nAvgBytesPerSec and only lock/copy 60 samples each time (even though much more data would be available to read)
Is this a valid approach or is there a different way to throttle the sound driver? Is there a way to reduce the size of the buffer to only give me as much data as the app is requesting? I don't want to get a ton of sound
data if the application only wants 60 values.
Using this approach, I certainly will run into problems if the buffer catches up with my (slow) read cursor.
unsigned short channelNum = 2;
unsigned short bitsPerSample = 32;
unsigned long sampleRate = 20000;
unsigned short blockAlign = (channelNum * bitsPerSample) / 8;
unsigned long avgBytesPerSec = sampleRate * blockAlign;
WAVEFORMATEX wfx = { WAVE_FORMAT_IEEE_FLOAT, channelNum, sampleRate, avgBytesPerSec, blockAlign, bitsPerSample, 0 };
unsigned int mSampleBufferSize = 60; // 1400
DSCBUFFERDESC bufferDesc;
bufferDesc.dwSize = sizeof(DSCBUFFERDESC);
bufferDesc.dwFlags = 0;
bufferDesc.dwBufferBytes = wfx.nAvgBytesPerSec;
bufferDesc.dwReserved = 0;
bufferDesc.lpwfxFormat = &wfx;
bufferDesc.dwFXCount = 0;
bufferDesc.lpDSCFXDesc = NULL;
IDirectSoundCaptureBuffer *buffer = 0;
bool bufferRunning = false;
if (directSound && capture)
{
hr = capture->CreateCaptureBuffer(&bufferDesc, &buffer, NULL);
if (FAILED(hr))
std::cout << "SampleThread() -- Error creating DirectSoundCaptureBuffer " << endl;
else
{
hr = buffer->Start(DSCBSTART_LOOPING);
if (SUCCEEDED(hr)) {
bufferRunning = true;
}
}
}
void* primaryBuffer = NULL;
unsigned long primaryBufferSizeBytes = 0;
void* secondaryBuffer = NULL;
unsigned long secondaryBufferSize = 0;
bool mStopExecution = false;
unsigned long lastReadPosition = 0;
if (directSound && capture && buffer)
{
while (!mStopExecution)
{
DWORD readPos;
WORD remainingSize = 0;
DWORD capturePos;
hr = buffer->GetCurrentPosition(&capturePos, &readPos);
if (FAILED(hr))
{
cout << "SampleThread() -- Error GetCurrentPosition" << endl;
return 0;
}
buffer->Lock(lastReadPos, mSampleBufferSize, &primaryBuffer, &primaryBufferSizeBytes, &secondaryBuffer, &secondaryBufferSize, NULL);
memcpy(mBuffer, (float*)primaryBuffer, primaryBufferSizeBytes / sizeof(float));
// .... copy secondary buffer
hr = buffer->Unlock(primaryBuffer, primaryBufferSizeBytes, secondaryBuffer, secondaryBufferSize);
lastReadPosition = (lastReadPosition + mSampleBufferSize) % bufferDesc.dwBufferBytes;
}
}

Related

How to make a thread stop excution (eg: std::this_thread::sleep_for) for an accturate interval

I am currently making a small discord bot that can play music to improve my skill. That's why i don't use any discord lib.
I want the music as smooth as possible, but when i played some piece of music, the music produced is very choppy.
here is my code:
concurrency::task<void> play(std::string id) {
auto shared_token = std::make_shared<concurrency::cancellation_token*>(&p_token);
auto shared_running = std::make_shared<bool*>(&running);
return concurrency::create_task([this, id, shared_token] {
audio* source = new audio(id); // create a s16le binary stream using FFMPEG
speak(); // sending speak packet
printf("creating opus encoder\n");
const unsigned short FRAME_MILLIS = 20;
const unsigned short FRAME_SIZE = 960;
const unsigned short SAMPLE_RATE = 48000;
const unsigned short CHANNELS = 2;
const unsigned int BITRATE = 64000;
#define MAX_PACKET_SIZE FRAME_SIZE * 5
int error;
OpusEncoder* encoder = opus_encoder_create(SAMPLE_RATE, CHANNELS, OPUS_APPLICATION_AUDIO, &error);
if (error < 0) {
throw "failed to create opus encoder: " + std::string(opus_strerror(error));
}
error = opus_encoder_ctl(encoder, OPUS_SET_BITRATE(BITRATE));
if (error < 0) {
throw "failed to set bitrate for opus encoder: " + std::string(opus_strerror(error));
}
if (sodium_init() == -1) {
throw "libsodium initialisation failed";
}
int num_opus_bytes;
unsigned char* pcm_data = new unsigned char[FRAME_SIZE * CHANNELS * 2];
opus_int16* in_data;
std::vector<unsigned char> opus_data(MAX_PACKET_SIZE);
class timer_event {
bool is_set = false;
public:
bool get_is_set() { return is_set; };
void set() { is_set = true; };
void unset() { is_set = false; };
};
timer_event* run_timer = new timer_event();
run_timer->set();
//this is the send loop
concurrency::create_task([run_timer, this, shared_token] {
while (run_timer->get_is_set()) {
speak();
int i = 0;
while (i < 15) {
utils::sleep(1000);
if (run_timer->get_is_set() == false) {
std::cout << "Stop sending speak packet due to turn off\n";
concurrency::cancel_current_task();
return;
}
if ((*shared_token)->is_canceled()) {
std::cout << "Stop sending speak packet due to cancel\n";
concurrency::cancel_current_task();
return;
}
}
}});
std::deque<std::string>* buffer = new std::deque<std::string>();
auto timer = concurrency::create_task([run_timer, this, buffer, FRAME_MILLIS, shared_token] {
while (run_timer->get_is_set() || buffer->size() > 0) {
utils::sleep(5 * FRAME_MILLIS); //std::this_thread::sleep_for
int loop = 0;
int sent = 0;
auto start = boost::chrono::high_resolution_clock::now();
while (buffer->size() > 0) {
if (udpclient.send(buffer->front()) != 0) { //send frame
//udpclient.send ~ winsock sendto
std::cout << "Stop sendding voice data due to udp error\n";
return;
}
buffer->pop_front();
if ((*shared_token)->is_canceled()) {
std::cout << "Stop sending voice data due to cancel\n";
concurrency::cancel_current_task();
}
sent++; //count sent frame
//calculate next time point we should (in theory) send next frame and store in *delay*
long long next_time = (long long)(sent+1) * (long long)(FRAME_MILLIS) * 1000 ;
auto now = boost::chrono::high_resolution_clock::now();
long long mcs_elapsed = (boost::chrono::duration_cast<boost::chrono::microseconds>(now - start)).count(); // elapsed time from start loop
long long delay = std::max((long long)0, (next_time - mcs_elapsed));
//wait for next time point
boost::asio::deadline_timer timer(context_io);
timer.expires_from_now(boost::posix_time::microseconds(delay));
timer.wait();
}
}
});
unsigned short _sequence = 0;
unsigned int _timestamp = 0;
while (1) {
if (buffer->size() >= 50) {
utils::sleep(FRAME_MILLIS);
}
if (source->read((char*)pcm_data, FRAME_SIZE * CHANNELS * 2) != true)
break;
if ((*shared_token)->is_canceled()) {
std::cout << "Stop encoding due to cancel\n";
break;
}
in_data = (opus_int16*)pcm_data;
num_opus_bytes = opus_encode(encoder, in_data, FRAME_SIZE, opus_data.data(), MAX_PACKET_SIZE);
if (num_opus_bytes <= 0) {
throw "failed to encode frame: " + std::string(opus_strerror(num_opus_bytes));
}
opus_data.resize(num_opus_bytes);
std::vector<unsigned char> packet(12 + opus_data.size() + crypto_secretbox_MACBYTES);
packet[0] = 0x80; //Type
packet[1] = 0x78; //Version
packet[2] = _sequence >> 8; //Sequence
packet[3] = (unsigned char)_sequence;
packet[4] = _timestamp >> 24; //Timestamp
packet[5] = _timestamp >> 16;
packet[6] = _timestamp >> 8;
packet[7] = _timestamp;
packet[8] = (unsigned char)(ssrc >> 24); //SSRC
packet[9] = (unsigned char)(ssrc >> 16);
packet[10] = (unsigned char)(ssrc >> 8);
packet[11] = (unsigned char)ssrc;
_sequence++;
_timestamp += SAMPLE_RATE / 1000 * FRAME_MILLIS; //48000Hz / 1000 * 20(ms)
unsigned char nonce[crypto_secretbox_NONCEBYTES];
memset(nonce, 0, crypto_secretbox_NONCEBYTES);
for (int i = 0; i < 12; i++) {
nonce[i] = packet[i];
}
crypto_secretbox_easy(packet.data() + 12, opus_data.data(), opus_data.size(), nonce, key.data());
packet.resize(12 + opus_data.size() + crypto_secretbox_MACBYTES);
std::string msg;
msg.resize(packet.size(), '\0');
for (unsigned int i = 0; i < packet.size(); i++) {
msg[i] = packet[i];
}
buffer->push_back(msg);
}
run_timer->unset();
timer.wait();
unspeak();
delete run_timer;
delete buffer;
opus_encoder_destroy(encoder);
delete[] pcm_data;
});
}
There are 3 possible causes:
I send packet late so server-end buffer run out, so the sound produced has some silence between each each 2 packets. Maybe the timer is not accurate so the sound is out of sync.
The encode process is wrong which causes lost data somehow.
Bad network (i have tested an open source bot written on java, it worked so i can assume that my network is good enough)
So i post this question, hope someone has experienced this situation show me what wrong and what should i do to correct it.
I figured out the problem myself. I want to post solution here for someone who need.
The problem is the timer is unstable so it's usually sleep more than it should, so it makes the music broken.
I changed it to an accurate sleep function which i found somewhere on the internet(i don't remember the source, sorry for that, if you know it please credit it bellow).
Function source code:
#include <math.h>
#include <chrono>
#include <window.h>
static void timerSleep(double seconds) {
using namespace std::chrono;
static HANDLE timer = CreateWaitableTimer(NULL, FALSE, NULL);
static double estimate = 5e-3;
static double mean = 5e-3;
static double m2 = 0;
static int64_t count = 1;
while (seconds - estimate > 1e-7) {
double toWait = seconds - estimate;
LARGE_INTEGER due;
due.QuadPart = -int64_t(toWait * 1e7);
auto start = high_resolution_clock::now();
SetWaitableTimerEx(timer, &due, 0, NULL, NULL, NULL, 0);
WaitForSingleObject(timer, INFINITE);
auto end = high_resolution_clock::now();
double observed = (end - start).count() / 1e9;
seconds -= observed;
++count;
double error = observed - toWait;
double delta = error - mean;
mean += delta / count;
m2 += delta * (error - mean);
double stddev = sqrt(m2 / (count - 1));
estimate = mean + stddev;
}
// spin lock
auto start = high_resolution_clock::now();
while ((high_resolution_clock::now() - start).count() / 1e9 < seconds);
}
Thank you for your support!

Filling audio endpoint buffer provided by WASAPI not playing sound

I am trying to play noise through the default audio endpoint renderer using the WASPAI interface. I am trying to use the code provided by Microsoft on this page: https://learn.microsoft.com/en-us/windows/win32/coreaudio/rendering-a-stream. I want to write a class that can generate noise for this code sample.
I have tried writing signed and unsigned integer values to the buffer of the default audio endpoint renderer, and see that values are being written to the buffer, but there is no sound playing.
To start, I made a header with the needed methods, and a random number generator.
#pragma once
// RNG
#include <random>
template <typename T>
class Random {
public:
Random(T low, T high) : mLow(low), mHigh(high), function(std::mt19937_64(__rdtsc())) {};
T operator()() {
signed __int64 f = function();
return ((f % ((signed __int64) mHigh + (signed __int64) mLow)) + (signed __int64) mLow); }
private:
T mLow;
T mHigh;
std::mt19937_64 function;
};
class Noise_Gen {
public:
Noise_Gen() : nChannels(NULL), nSamplesPerSec(NULL), nAvgBytesPerSec(NULL), nByteAlign(NULL), wBitsPerSample(NULL),
wValidBitsPerSample(NULL), wSamplesPerBlock(NULL), dwChannelMask(NULL), rd(NULL) {};
~Noise_Gen() {
if(rd != NULL) {
delete rd;
}
};
HRESULT SetFormat(WAVEFORMATEX*);
HRESULT LoadData(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags);
private:
void* rd;
// WAVEFORMATEX
WORD nChannels;
DWORD nSamplesPerSec;
DWORD nAvgBytesPerSec;
WORD nByteAlign;
WORD wBitsPerSample;
// WAVEFORMATEXTENSIBLE
WORD wValidBitsPerSample;
WORD wSamplesPerBlock;
DWORD dwChannelMask;
};
Then I added the definitions:
// WASAPI
#include <Audiopolicy.h>
#include <Audioclient.h>
#include <time.h>
#include "Noise_Gen.h"
HRESULT Noise_Gen::SetFormat(WAVEFORMATEX* format) {
nChannels = format->nChannels;
nSamplesPerSec = format->nSamplesPerSec;
nAvgBytesPerSec = format->nAvgBytesPerSec;
nByteAlign = format->nBlockAlign;
wBitsPerSample = format->wBitsPerSample;
WORD wFormatTag = format->wFormatTag;
if(wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
WAVEFORMATEXTENSIBLE* pWFE = reinterpret_cast<WAVEFORMATEXTENSIBLE*>(format);
wValidBitsPerSample = pWFE->Samples.wValidBitsPerSample;
wSamplesPerBlock = pWFE->Samples.wSamplesPerBlock;
dwChannelMask = pWFE->dwChannelMask;
} else {
wValidBitsPerSample = wBitsPerSample;
}
double amplitude = std::pow(2.0, wValidBitsPerSample) - 1;
switch(wBitsPerSample / 8) {
case(1):
rd = new Random<unsigned __int8>(0.0, amplitude);
break;
case(2):
rd = new Random<unsigned __int16>(0.0, amplitude);
break;
case(3):
rd = new Random<unsigned __int32>(0.0, amplitude);
break;
case(4):
rd = new Random<signed __int32>(-amplitude, amplitude);
break;
case(5):
rd = new Random<unsigned __int64>(0.0, amplitude);
break;
case(6):
rd = new Random<unsigned __int64>(0.0, amplitude);
break;
case(7):
rd = new Random<unsigned __int64>(0.0, amplitude);
break;
case(8):
rd = new Random<unsigned __int64>(0.0, amplitude);
break;
default:
return E_NOTIMPL;
}
return S_OK;
}
// (The size of an audio frame = nChannels * wBitsPerSample)
HRESULT Noise_Gen::LoadData(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags) {
for(UINT32 i = 0; i < nChannels *bufferFrameCount; i++) {
switch(wBitsPerSample / 8) {
case(1):
pData[i] = (((Random<unsigned __int8>*)rd)->operator()());
break;
case(2):{
unsigned __int16* pData2 = (unsigned __int16*) pData;
pData2[i] = (((Random<unsigned __int16>*)rd)->operator()());
break;
}
case(3): {
__int32 data = ((Random<unsigned __int32>*)rd)->operator()();
unsigned char* cp = (unsigned char*) (&data);
pData[(3 * i)] = cp[0];
pData[1 + (3 * i)] = cp[1];
pData[2 + (3 * i)] = cp[2];
break;
}
case(4):{
signed __int32* pData2 = (signed __int32*) pData;
pData2[i] = (((Random<signed __int32>*)rd)->operator()());
break;
}
case(5): {
__int64 data = ((Random<unsigned __int64>*)rd)->operator()();
unsigned char* cp = (unsigned char*) &data;
pData[(5 * i)] = cp[0];
pData[1 + (5 * i)] = cp[1];
pData[2 + (5 * i)] = cp[2];
pData[3 + (5 * i)] = cp[3];
pData[4 + (5 * i)] = cp[4];
break;
}
case(6): {
__int64 data = ((Random<unsigned __int64>*)rd)->operator()();
unsigned char* cp = (unsigned char*) &data;
pData[(6 * i)] = cp[0];
pData[1 + (6 * i)] = cp[1];
pData[2 + (6 * i)] = cp[2];
pData[3 + (6 * i)] = cp[3];
pData[4 + (6 * i)] = cp[4];
pData[5 + (6 * i)] = cp[5];
break;
}
case(7): {
__int64 data = ((Random<unsigned __int64>*)rd)->operator()();
unsigned char* cp = (unsigned char*) &data;
pData[(7 * i)] = cp[0];
pData[1 + (7 * i)] = cp[1];
pData[2 + (7 * i)] = cp[2];
pData[3 + (7 * i)] = cp[3];
pData[4 + (7 * i)] = cp[4];
pData[5 + (7 * i)] = cp[5];
pData[6 + (7 * i)] = cp[6];
break;
}
case(8): {
unsigned __int64* pData2 = (unsigned __int64*) pData;
pData2[i] = (((Random<unsigned __int64>*)rd)->operator()());
break;
}
default:
// For stopping playback
(*flags) = AUDCLNT_BUFFERFLAGS_SILENT;
return E_NOTIMPL;
}
}
return S_OK;
}
Then I added my class to the template provided by Microsoft and printed the default audio endpoint renderer to the console.
#include <InitGuid.h>
#include <iostream>
#include <Windows.h>
#include <dshow.h>
// Windows multimedia device
#include <Mmdeviceapi.h>
#include <Functiondiscoverykeys_devpkey.h>
// WASAPI
#include <Audiopolicy.h>
#include <Audioclient.h>
#include "Noise_Gen.h"
//-----------------------------------------------------------
// Play an audio stream on the default audio rendering
// device. The PlayAudioStream function allocates a shared
// buffer big enough to hold one second of PCM audio data.
// The function uses this buffer to stream data to the
// rendering device. The inner loop runs every 1/2 second.
//-----------------------------------------------------------
// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
#define EXIT_ON_ERROR(hres) \
if (FAILED(hres)) { goto Exit; }
#define SAFE_RELEASE(punk) \
if ((punk) != NULL) \
{ (punk)->Release(); (punk) = NULL; }
const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
const IID IID_IAudioClient = __uuidof(IAudioClient);
const IID IID_IAudioRenderClient = __uuidof(IAudioRenderClient);
HRESULT PlayAudioStream(Noise_Gen* pMySource) {
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
REFERENCE_TIME hnsActualDuration;
IMMDeviceEnumerator* pEnumerator = NULL;
IMMDevice* pDevice = NULL;
IAudioClient* pAudioClient = NULL;
IAudioRenderClient* pRenderClient = NULL;
WAVEFORMATEX* pwfx = NULL;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
UINT32 numFramesPadding;
BYTE* pData;
DWORD flags = 0;
IPropertyStore* pPropertyStore = NULL;
PROPVARIANT name;
hr = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL,
CLSCTX_ALL, IID_IMMDeviceEnumerator,
(void**) &pEnumerator);
EXIT_ON_ERROR(hr);
hr = pEnumerator->GetDefaultAudioEndpoint(
eRender, eConsole, &pDevice);
hr = pDevice->OpenPropertyStore(STGM_READ, &pPropertyStore);
PropVariantInit(&name);
hr = pPropertyStore->GetValue(PKEY_Device_FriendlyName, &name);
printf("%S", name.pwszVal);
printf("\n");
EXIT_ON_ERROR(hr);
hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL,
NULL, (void**) &pAudioClient);
EXIT_ON_ERROR(hr);
hr = pAudioClient->GetMixFormat(&pwfx);
EXIT_ON_ERROR(hr);
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
0, hnsRequestedDuration,
0, pwfx, NULL);
EXIT_ON_ERROR(hr);
// Tell the audio source which format to use.
hr = pMySource->SetFormat(pwfx);
EXIT_ON_ERROR(hr);
// Get the actual size of the allocated buffer.
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
EXIT_ON_ERROR(hr);
hr = pAudioClient->GetService(IID_IAudioRenderClient,
(void**) &pRenderClient);
EXIT_ON_ERROR(hr);
// Grab the entire buffer for the initial fill operation.
hr = pRenderClient->GetBuffer(bufferFrameCount, &pData);
EXIT_ON_ERROR(hr);
// Load the initial data into the shared buffer.
hr = pMySource->LoadData(bufferFrameCount, pData, &flags);
EXIT_ON_ERROR(hr);
hr = pRenderClient->ReleaseBuffer(bufferFrameCount, flags);
EXIT_ON_ERROR(hr);
// Calculate the actual duration of the allocated buffer.
hnsActualDuration = (double) REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;
hr = pAudioClient->Start(); // Start playing.
EXIT_ON_ERROR(hr);
// Each loop fills about half of the shared buffer.
while(flags != AUDCLNT_BUFFERFLAGS_SILENT) {
// Sleep for half the buffer duration.
Sleep((DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2));
// See how much buffer space is available.
hr = pAudioClient->GetCurrentPadding(&numFramesPadding);
EXIT_ON_ERROR(hr);
numFramesAvailable = bufferFrameCount - numFramesPadding;
// Grab all the available space in the shared buffer.
hr = pRenderClient->GetBuffer(numFramesAvailable, &pData);
EXIT_ON_ERROR(hr);
// Get next 1/2-second of data from the audio source.
hr = pMySource->LoadData(numFramesAvailable, pData, &flags);
EXIT_ON_ERROR(hr);
hr = pRenderClient->ReleaseBuffer(numFramesAvailable, flags);
EXIT_ON_ERROR(hr);
}
// Wait for last data in buffer to play before stopping.
Sleep((DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2));
hr = pAudioClient->Stop(); // Stop playing.
EXIT_ON_ERROR(hr);
Exit:
CoTaskMemFree(pwfx);
SAFE_RELEASE(pEnumerator);
SAFE_RELEASE(pDevice);
SAFE_RELEASE(pAudioClient);
SAFE_RELEASE(pRenderClient);
return hr;
}
int main() {
HRESULT hr = CoInitialize(nullptr);
if(FAILED(hr)) { return hr; }
Noise_Gen* ng = new Noise_Gen();
PlayAudioStream(ng);
delete ng;
CoUninitialize();
}
The default audio endpoint renderer on my system uses 32 bit values, so the code started by writing unsigned 32 bit values to the buffer. I then tried to use signed values, which can be seen in the code above. No sound was played in both these cases. I checked the contents of the buffer while debugging and they do change. I printed the default audio endpoint renderer to the console, and it is my system's speaker. Windows even shows my app in the Volume mixer, but there is no sound showing even with the volume all the way up. I then checked the sleep time to be sure it was sleeping so the system had access to the buffer, and it does sleep for 500ms between writes to the buffer.
Update: I found out I am using the KSDATAFORMAT_SUBTYPE_IEEE_FLOAT subformat and have tried feeding the buffer floats in the -amplitude to amplitude range, the the 0 to amplitude range, the -1 to 1 range, and the 0 to 1 range.
What am I missing?
Your random number distribution code does not work correctly for floating point formats (which is basically always going to be the mix format in shared mode as far as I know).
It's wrong even for integers. I assume you meant to write
((f % ((signed __int64) mHigh - (signed __int64) mLow)) + (signed __int64) mLow);
(note the minus),
but you should not use raw modulus anyway because it's slightly biased.
For floating point formats you always use the -1 to 1 range.
I have adapted your code to use std::uniform_real_distribution and I get noise playing on my speakers.
#include <cstdio>
#include <Windows.h>
// Windows multimedia device
#include <Mmdeviceapi.h>
#include <Functiondiscoverykeys_devpkey.h>
// WASAPI
#include <Audiopolicy.h>
#include <Audioclient.h>
#include <random>
class Noise_Gen {
public:
Noise_Gen() : format(), engine(__rdtsc()), float_dist(-1.f, 1.f) {};
void SetFormat(WAVEFORMATEX* wfex) {
if(wfex->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
format = *reinterpret_cast<WAVEFORMATEXTENSIBLE*>(wfex);
} else {
format.Format = *wfex;
format.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
INIT_WAVEFORMATEX_GUID(&format.SubFormat, wfex->wFormatTag);
format.Samples.wValidBitsPerSample = format.Format.wBitsPerSample;
format.dwChannelMask = 0;
}
}
// (The size of an audio frame = nChannels * wBitsPerSample)
void FillBuffer(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags) {
const UINT16 formatTag = EXTRACT_WAVEFORMATEX_ID(&format.SubFormat);
if(formatTag == WAVE_FORMAT_IEEE_FLOAT) {
float* fData = (float*)pData;
for(UINT32 i = 0; i < format.Format.nChannels * bufferFrameCount; i++) {
fData[i] = float_dist(engine);
}
} else if(formatTag == WAVE_FORMAT_PCM) {
using rndT = decltype(engine)::result_type;
UINT32 iterations = format.Format.nBlockAlign * bufferFrameCount / sizeof(rndT);
UINT32 leftoverBytes = format.Format.nBlockAlign * bufferFrameCount % sizeof(rndT);
rndT* iData = (rndT*)pData;
UINT32 i = 0;
for(; i < iterations; i++) {
iData[i] = engine();
}
if(leftoverBytes != 0) {
rndT lastRnd = engine();
BYTE* pLastBytes = pData + i * sizeof(rndT);
for(UINT32 j = 0; j < leftoverBytes; ++j) {
pLastBytes[j] = lastRnd >> (j * 8) & 0xFF;
}
}
} else {
//memset(pData, 0, wfex.Format.nBlockAlign * bufferFrameCount);
*flags = AUDCLNT_BUFFERFLAGS_SILENT;
}
}
private:
WAVEFORMATEXTENSIBLE format;
std::mt19937_64 engine;
std::uniform_real_distribution<float> float_dist;
};
// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC 10000000ll
#define REFTIMES_PER_MILLISEC 10000
#define EXIT_ON_ERROR(hres) \
if (FAILED(hres)) { goto Exit; }
#define SAFE_RELEASE(punk) \
if ((punk) != NULL) \
{ (punk)->Release(); (punk) = NULL; }
HRESULT PlayAudioStream(Noise_Gen* pMySource) {
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
REFERENCE_TIME hnsActualDuration;
IMMDeviceEnumerator* pEnumerator = NULL;
IPropertyStore* pPropertyStore = NULL;
IMMDevice* pDevice = NULL;
IAudioClient* pAudioClient = NULL;
IAudioRenderClient* pRenderClient = NULL;
WAVEFORMATEX* pwfx = NULL;
UINT32 bufferFrameCount;
BYTE* pData;
DWORD flags = 0;
PROPVARIANT name;
hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL,
CLSCTX_ALL, IID_PPV_ARGS(&pEnumerator));
EXIT_ON_ERROR(hr);
hr = pEnumerator->GetDefaultAudioEndpoint(
eRender, eConsole, &pDevice);
EXIT_ON_ERROR(hr);
hr = pDevice->OpenPropertyStore(STGM_READ, &pPropertyStore);
EXIT_ON_ERROR(hr);
PropVariantInit(&name);
hr = pPropertyStore->GetValue(PKEY_Device_FriendlyName, &name);
EXIT_ON_ERROR(hr);
printf("%S", name.pwszVal);
printf("\n");
hr = pDevice->Activate(__uuidof(pAudioClient), CLSCTX_ALL,
NULL, (void**) &pAudioClient);
EXIT_ON_ERROR(hr);
hr = pAudioClient->GetMixFormat(&pwfx);
EXIT_ON_ERROR(hr);
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
0, hnsRequestedDuration,
0, pwfx, NULL);
EXIT_ON_ERROR(hr);
// Tell the audio source which format to use.
pMySource->SetFormat(pwfx);
// Get the actual size of the allocated buffer.
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
EXIT_ON_ERROR(hr);
hr = pAudioClient->GetService(IID_PPV_ARGS(&pRenderClient));
EXIT_ON_ERROR(hr);
// Grab the entire buffer for the initial fill operation.
hr = pRenderClient->GetBuffer(bufferFrameCount, &pData);
EXIT_ON_ERROR(hr);
// Load the initial data into the shared buffer.
pMySource->FillBuffer(bufferFrameCount, pData, &flags);
hr = pRenderClient->ReleaseBuffer(bufferFrameCount, flags);
EXIT_ON_ERROR(hr);
// Calculate the actual duration of the allocated buffer.
hnsActualDuration = REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;
hr = pAudioClient->Start(); // Start playing.
EXIT_ON_ERROR(hr);
// Each loop fills about half of the shared buffer.
DWORD sleepTime;
while(flags != AUDCLNT_BUFFERFLAGS_SILENT) {
// Sleep for half the buffer duration.
sleepTime = (DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
if(sleepTime != 0)
Sleep(sleepTime);
// See how much buffer space is available.
UINT32 numFramesPadding;
hr = pAudioClient->GetCurrentPadding(&numFramesPadding);
EXIT_ON_ERROR(hr);
UINT32 numFramesAvailable = bufferFrameCount - numFramesPadding;
// Grab all the available space in the shared buffer.
hr = pRenderClient->GetBuffer(numFramesAvailable, &pData);
EXIT_ON_ERROR(hr);
// Get next 1/2-second of data from the audio source.
pMySource->FillBuffer(numFramesAvailable, pData, &flags);
hr = pRenderClient->ReleaseBuffer(numFramesAvailable, flags);
EXIT_ON_ERROR(hr);
}
// Wait for last data in buffer to play before stopping.
sleepTime = (DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
if(sleepTime != 0)
Sleep(sleepTime);
hr = pAudioClient->Stop(); // Stop playing.
EXIT_ON_ERROR(hr);
Exit:
CoTaskMemFree(pwfx);
SAFE_RELEASE(pRenderClient);
SAFE_RELEASE(pAudioClient);
SAFE_RELEASE(pDevice);
SAFE_RELEASE(pPropertyStore); // you forgot to free the property store
SAFE_RELEASE(pEnumerator);
return hr;
}
int main() {
HRESULT hr = CoInitialize(nullptr);
if(FAILED(hr)) { return hr; }
Noise_Gen ng;
PlayAudioStream(&ng);
CoUninitialize();
}

SDL Audio Pitch - Playback Rate

My goal is to connect the RPM of an engine to the pitch of an sound. I am using SDL as my audio Backend.
So my idea was to sample from the wave buffer quicker than normal. So by trail and error I am now able to pitch my engine sound "step by step".
Question #1
If I change this part from:
audioBuff += 1 + pitch * 2;
to
audioBuff += 2
I get just noise. Why? Does this have to do with stereo channels?
Question #2
How can I make this a linear pitch? Currently it's a "stepping" pitch.
Here is the full code:
#include "SDL2/SDL.h"
#include <iostream>
void audioCallback(void* userdata, Uint8 *stream, int len);
Uint8 *audioBuff = nullptr;
Uint8 *audioBuffEnd = nullptr;
Uint32 audioLen = 0;
bool quit = false;
Uint16 pitch = 0;
int main()
{
if(SDL_Init(SDL_INIT_AUDIO) < 0)
return -1;
Uint32 wavLen = 0;
Uint8 *wavBuff = nullptr;
SDL_AudioSpec wavSpec;
if(SDL_LoadWAV("test.wav", &wavSpec, &wavBuff, &wavLen) == nullptr)
{
return 1;
}
wavSpec.callback = audioCallback;
wavSpec.userdata = nullptr;
wavSpec.format = AUDIO_S16;
wavSpec.samples = 2048;
audioBuff = wavBuff;
audioBuffEnd = &wavBuff[wavLen];
audioLen = wavLen;
if( SDL_OpenAudio(&wavSpec, NULL) < 0)
{
fprintf(stderr, "Could not open audio: %s\n", SDL_GetError());
return 1;
}
SDL_PauseAudio(0);
while(!quit)
{
SDL_Delay(500);
pitch ++;
}
SDL_CloseAudio();
SDL_FreeWAV(wavBuff);
return 0;
}
Uint32 sampleIndex = 0;
void audioCallback(void* userdata, Uint8 *stream, int len)
{
Uint32 length = (Uint32)len;
length = (length > audioLen ? audioLen : length);
for(Uint32 i = 0; i < length; i++)
{
if(audioBuff > audioBuffEnd)
{
quit = true;
return;
}
// why pitch * 2?
// how to get a smooth pitch?
stream[i] = audioBuff[0];
audioBuff += 1 + pitch * 2;
fprintf(stdout, "pitch: %u\n", pitch);
}
}
You're setting the audio format to AUDIO_S16, which is "Signed 16-bit little-endian samples". Each sample is two bytes, with the first byte being the LSB. When you read the data in audioCallback, you're reading it as bytes (8 bits), then passing those bytes back to something expecting 16 bits. You're getting noise because of this, and when you use audioBuff +=2; you're always reading the LSB of the audio sample, which essentially is noise when used that way.
You should consistently use either 16 bit or 8 bit samples.

C++: Convert float[] to unsigned char* or BYTE*

I'm developing a project where I need to convert PCM 16-bits 2 channels sound into a IEEE Float 32-bits 2 channels.
To do this I'm using the following code:
void CAudioConverter::ConvI16ToF32(BYTE* pcmFrom, BYTE* floatTo, int length)
{
short* src = reinterpret_cast<short*>(pcmFrom);
float* dst = reinterpret_cast<float*>(floatTo);
for (int n = 0; n < length; n++)
{
dst[n] = static_cast<float>(src[n]) / 32768.0f;
}
}
I have initialized the variable __pcm32_bytesPerFrame with:
WAVEFORMATEX* closestFormat;
ws->default_pb_dev->GetMixFormat(&closestFormat);
__pcm32_bytesPerFrame = closestFormat->nAvgBytesPerSec * (prm->samples_per_frame * 1000 / (prm->clock_rate * closestFormat->nChannels)) / 1000;
strm->pb_max_frame_count is:
hr = ws->default_pb_dev->GetBufferSize(&ws->pb_max_frame_count);
I have a while loop in a dedicated thread the does something like:
hr = strm->default_pb_dev->GetCurrentPadding(&padding);
incoming_frame = __pcm32_bytesPerFrame / 4;
frame_to_render = strm->pb_max_frame_count - padding;
if (frame_to_render >= incoming_frame)
{
frame_to_render = incoming_frame;
} else {
/* Don't get new frame because there's no space */
frame_to_render = 0;
}
if (frame_to_render > 0)
{
pjmedia_frame frame;
hr = strm->pb_client->GetBuffer(frame_to_render, &cur_pb_buf);
if (FAILED(hr)) {
continue;
}
void* destBuffer = (void*)malloc(strm->bytes_per_frame*frame_to_render*sizeof(pj_uint16_t));
if (strm->fmt_id == PJMEDIA_FORMAT_L16) {
/* PCM mode */
frame.type = PJMEDIA_FRAME_TYPE_AUDIO;
frame.size = strm->bytes_per_frame;
frame.timestamp.u64 = strm->pb_timestamp.u64;
frame.bit_info = 0;
frame.buf = destBuffer;
}
status = (*strm->pb_cb)(strm->user_data, &frame);
CAudioConverter* conv = new CAudioConverter();
conv->ConvI16ToF32((BYTE*)destBuffer, cur_pb_buf, frame_to_render);
hr = strm->pb_client->ReleaseBuffer(frame_to_render, 0);
(...)
But, to send the sound to the WASAPI capture buffer I need a BYTE*.
How can I fill my 'floatTo' argument?
Any ideas?
Thanks
What about this:
void CAudioConverter::ConvI16ToF32(BYTE* pcmFrom, BYTE* floatTo, int length)
{
short* src = reinterpret_cast<short*>(pcmFrom);
float* dst = reinterpret_cast<float*>(floatTo);
for (int n = 0; n < length; n++)
{
dst[n] = static_cast<float>(src[n]) / 32768.0f;
}
}
Additionally make sure length indicates the number of elments in pcmFrom and floatTo, and not the number of bytes allocated. In you case pcmFrom should have allocated length*2 bytes and floatTo needs room for length*4 bytes.

Examples or tutorials of using libjpeg-turbo's TurboJPEG

The instructions for libjpeg-turbo here describes the TurboJPEG API thus: "This API wraps libjpeg-turbo and provides an easy-to-use interface for compressing and decompressing JPEG images in memory". Great, but are there some solid examples of using this API available? Just looking to decompress a fairly vanilla jpeg in memory.
I've found a few bits such as https://github.com/erlyvideo/jpeg/blob/master/c_src/jpeg.c, which appears to be using the TurboJPEG API, but are there any more solid/varied examples?
The source for libjpeg-turbo is well documented, so that does help.
Ok, I know that you did already solve your problem, but as some people, just like me, could be searching some simple example I will share what I created.
It is an example, compressing and decompressing an RGB image. Otherwise I think that the API documentation of TurboJPEG is quite easy to understand!
Compression:
#include <turbojpeg.h>
const int JPEG_QUALITY = 75;
const int COLOR_COMPONENTS = 3;
int _width = 1920;
int _height = 1080;
long unsigned int _jpegSize = 0;
unsigned char* _compressedImage = NULL; //!< Memory is allocated by tjCompress2 if _jpegSize == 0
unsigned char buffer[_width*_height*COLOR_COMPONENTS]; //!< Contains the uncompressed image
tjhandle _jpegCompressor = tjInitCompress();
tjCompress2(_jpegCompressor, buffer, _width, 0, _height, TJPF_RGB,
&_compressedImage, &_jpegSize, TJSAMP_444, JPEG_QUALITY,
TJFLAG_FASTDCT);
tjDestroy(_jpegCompressor);
//to free the memory allocated by TurboJPEG (either by tjAlloc(),
//or by the Compress/Decompress) after you are done working on it:
tjFree(&_compressedImage);
After that you have the compressed image in _compressedImage.
To decompress you have to do the following:
Decompression:
#include <turbojpeg.h>
long unsigned int _jpegSize; //!< _jpegSize from above
unsigned char* _compressedImage; //!< _compressedImage from above
int jpegSubsamp, width, height;
unsigned char buffer[width*height*COLOR_COMPONENTS]; //!< will contain the decompressed image
tjhandle _jpegDecompressor = tjInitDecompress();
tjDecompressHeader2(_jpegDecompressor, _compressedImage, _jpegSize, &width, &height, &jpegSubsamp);
tjDecompress2(_jpegDecompressor, _compressedImage, _jpegSize, buffer, width, 0/*pitch*/, height, TJPF_RGB, TJFLAG_FASTDCT);
tjDestroy(_jpegDecompressor);
Some random thoughts:
I just came back over this as I am writing my bachelor thesis, and I noticed that if you run the compression in a loop it is preferable to store the biggest size of the JPEG buffer to not have to allocate a new one every turn. Basically, instead of doing:
long unsigned int _jpegSize = 0;
tjCompress2(_jpegCompressor, buffer, _width, 0, _height, TJPF_RGB,
&_compressedImage, &_jpegSize, TJSAMP_444, JPEG_QUALITY,
TJFLAG_FASTDCT);
we would add an object variable, holding the size of the allocated memory long unsigned int _jpegBufferSize = 0; and before every compression round we would set the jpegSize back to that value:
long unsigned int jpegSize = _jpegBufferSize;
tjCompress2(_jpegCompressor, buffer, _width, 0, _height, TJPF_RGB,
&_compressedImage, &jpegSize, TJSAMP_444, JPEG_QUALITY,
TJFLAG_FASTDCT);
_jpegBufferSize = _jpegBufferSize >= jpegSize? _jpegBufferSize : jpegSize;
after the compression one would compare the memory size with the actual jpegSize and set it to the jpegSize if it is higher than the previous memory size.
I ended up using below code as a working example for both JPEG encoding and decoding. Best example that I can find, it's self-contained that initializes a dummy image and output the encoded image to a local file.
Below code is NOT my own, credit goes to https://sourceforge.net/p/libjpeg-turbo/discussion/1086868/thread/e402d36f/#8722 . Posting it here again to help anyone finds it's difficult to get libjpeg turbo working.
#include "turbojpeg.h"
#include <iostream>
#include <string.h>
#include <errno.h>
using namespace std;
int main(void)
{
unsigned char *srcBuf; //passed in as a param containing pixel data in RGB pixel interleaved format
tjhandle handle = tjInitCompress();
if(handle == NULL)
{
const char *err = (const char *) tjGetErrorStr();
cerr << "TJ Error: " << err << " UNABLE TO INIT TJ Compressor Object\n";
return -1;
}
int jpegQual =92;
int width = 128;
int height = 128;
int nbands = 3;
int flags = 0;
unsigned char* jpegBuf = NULL;
int pitch = width * nbands;
int pixelFormat = TJPF_GRAY;
int jpegSubsamp = TJSAMP_GRAY;
if(nbands == 3)
{
pixelFormat = TJPF_RGB;
jpegSubsamp = TJSAMP_411;
}
unsigned long jpegSize = 0;
srcBuf = new unsigned char[width * height * nbands];
for(int j = 0; j < height; j++)
{
for(int i = 0; i < width; i++)
{
srcBuf[(j * width + i) * nbands + 0] = (i) % 256;
srcBuf[(j * width + i) * nbands + 1] = (j) % 256;
srcBuf[(j * width + i) * nbands + 2] = (j + i) % 256;
}
}
int tj_stat = tjCompress2( handle, srcBuf, width, pitch, height,
pixelFormat, &(jpegBuf), &jpegSize, jpegSubsamp, jpegQual, flags);
if(tj_stat != 0)
{
const char *err = (const char *) tjGetErrorStr();
cerr << "TurboJPEG Error: " << err << " UNABLE TO COMPRESS JPEG IMAGE\n";
tjDestroy(handle);
handle = NULL;
return -1;
}
FILE *file = fopen("out.jpg", "wb");
if (!file) {
cerr << "Could not open JPEG file: " << strerror(errno);
return -1;
}
if (fwrite(jpegBuf, jpegSize, 1, file) < 1) {
cerr << "Could not write JPEG file: " << strerror(errno);
return -1;
}
fclose(file);
//write out the compress date to the image file
//cleanup
int tjstat = tjDestroy(handle); //should deallocate data buffer
handle = 0;
}
In the end I used a combination of random code found on the internet (e.g. https://github.com/erlyvideo/jpeg/blob/master/c_src/jpeg.c) and the .c and header files for libjeg-turbo, which are well documented.
This official API is a good information source aswell.
Here's a fragment of code what I use to load jpeg's from memory. Maybe it will require a bit of fixing, because I extracted it from different files in my project. It will load both - grayscale and rgb images (bpp will be set either to 1 or to 3).
struct Image
{
int bpp;
int width;
int height;
unsigned char* data;
};
struct jerror_mgr
{
jpeg_error_mgr base;
jmp_buf jmp;
};
METHODDEF(void) jerror_exit(j_common_ptr jinfo)
{
jerror_mgr* err = (jerror_mgr*)jinfo->err;
longjmp(err->jmp, 1);
}
METHODDEF(void) joutput_message(j_common_ptr)
{
}
bool Image_LoadJpeg(Image* image, unsigned char* img_data, unsigned int img_size)
{
jpeg_decompress_struct jinfo;
jerror_mgr jerr;
jinfo.err = jpeg_std_error(&jerr.base);
jerr.base.error_exit = jerror_exit;
jerr.base.output_message = joutput_message;
jpeg_create_decompress(&jinfo);
image->data = NULL;
if (setjmp(jerr.jmp)) goto bail;
jpeg_mem_src(&jinfo, img_data, img_size);
if (jpeg_read_header(&jinfo, TRUE) != JPEG_HEADER_OK) goto bail;
jinfo.dct_method = JDCT_FLOAT; // change this to JDCT_ISLOW on Android/iOS
if (!jpeg_start_decompress(&jinfo)) goto bail;
if (jinfo.num_components != 1 && jinfo.num_components != 3) goto bail;
image->data = new (std::nothrow) unsigned char [jinfo.output_width * jinfo.output_height * jinfo.output_components];
if (!image->data) goto bail;
{
JSAMPROW ptr = image->data;
while (jinfo.output_scanline < jinfo.output_height)
{
if (jpeg_read_scanlines(&jinfo, &ptr, 1) != 1) goto bail;
ptr += jinfo.output_width * jinfo.output_components;
}
}
if (!jpeg_finish_decompress(&jinfo)) goto bail;
image->bpp = jinfo.output_components;
image->width = jinfo.output_width;
image->height = jinfo.output_height;
jpeg_destroy_decompress(&jinfo);
return true;
bail:
jpeg_destroy_decompress(&jinfo);
if (image->data) delete [] data;
return false;
}