Filling audio endpoint buffer provided by WASAPI not playing sound - c++

I am trying to play noise through the default audio endpoint renderer using the WASPAI interface. I am trying to use the code provided by Microsoft on this page: https://learn.microsoft.com/en-us/windows/win32/coreaudio/rendering-a-stream. I want to write a class that can generate noise for this code sample.
I have tried writing signed and unsigned integer values to the buffer of the default audio endpoint renderer, and see that values are being written to the buffer, but there is no sound playing.
To start, I made a header with the needed methods, and a random number generator.
#pragma once
// RNG
#include <random>
template <typename T>
class Random {
public:
Random(T low, T high) : mLow(low), mHigh(high), function(std::mt19937_64(__rdtsc())) {};
T operator()() {
signed __int64 f = function();
return ((f % ((signed __int64) mHigh + (signed __int64) mLow)) + (signed __int64) mLow); }
private:
T mLow;
T mHigh;
std::mt19937_64 function;
};
class Noise_Gen {
public:
Noise_Gen() : nChannels(NULL), nSamplesPerSec(NULL), nAvgBytesPerSec(NULL), nByteAlign(NULL), wBitsPerSample(NULL),
wValidBitsPerSample(NULL), wSamplesPerBlock(NULL), dwChannelMask(NULL), rd(NULL) {};
~Noise_Gen() {
if(rd != NULL) {
delete rd;
}
};
HRESULT SetFormat(WAVEFORMATEX*);
HRESULT LoadData(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags);
private:
void* rd;
// WAVEFORMATEX
WORD nChannels;
DWORD nSamplesPerSec;
DWORD nAvgBytesPerSec;
WORD nByteAlign;
WORD wBitsPerSample;
// WAVEFORMATEXTENSIBLE
WORD wValidBitsPerSample;
WORD wSamplesPerBlock;
DWORD dwChannelMask;
};
Then I added the definitions:
// WASAPI
#include <Audiopolicy.h>
#include <Audioclient.h>
#include <time.h>
#include "Noise_Gen.h"
HRESULT Noise_Gen::SetFormat(WAVEFORMATEX* format) {
nChannels = format->nChannels;
nSamplesPerSec = format->nSamplesPerSec;
nAvgBytesPerSec = format->nAvgBytesPerSec;
nByteAlign = format->nBlockAlign;
wBitsPerSample = format->wBitsPerSample;
WORD wFormatTag = format->wFormatTag;
if(wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
WAVEFORMATEXTENSIBLE* pWFE = reinterpret_cast<WAVEFORMATEXTENSIBLE*>(format);
wValidBitsPerSample = pWFE->Samples.wValidBitsPerSample;
wSamplesPerBlock = pWFE->Samples.wSamplesPerBlock;
dwChannelMask = pWFE->dwChannelMask;
} else {
wValidBitsPerSample = wBitsPerSample;
}
double amplitude = std::pow(2.0, wValidBitsPerSample) - 1;
switch(wBitsPerSample / 8) {
case(1):
rd = new Random<unsigned __int8>(0.0, amplitude);
break;
case(2):
rd = new Random<unsigned __int16>(0.0, amplitude);
break;
case(3):
rd = new Random<unsigned __int32>(0.0, amplitude);
break;
case(4):
rd = new Random<signed __int32>(-amplitude, amplitude);
break;
case(5):
rd = new Random<unsigned __int64>(0.0, amplitude);
break;
case(6):
rd = new Random<unsigned __int64>(0.0, amplitude);
break;
case(7):
rd = new Random<unsigned __int64>(0.0, amplitude);
break;
case(8):
rd = new Random<unsigned __int64>(0.0, amplitude);
break;
default:
return E_NOTIMPL;
}
return S_OK;
}
// (The size of an audio frame = nChannels * wBitsPerSample)
HRESULT Noise_Gen::LoadData(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags) {
for(UINT32 i = 0; i < nChannels *bufferFrameCount; i++) {
switch(wBitsPerSample / 8) {
case(1):
pData[i] = (((Random<unsigned __int8>*)rd)->operator()());
break;
case(2):{
unsigned __int16* pData2 = (unsigned __int16*) pData;
pData2[i] = (((Random<unsigned __int16>*)rd)->operator()());
break;
}
case(3): {
__int32 data = ((Random<unsigned __int32>*)rd)->operator()();
unsigned char* cp = (unsigned char*) (&data);
pData[(3 * i)] = cp[0];
pData[1 + (3 * i)] = cp[1];
pData[2 + (3 * i)] = cp[2];
break;
}
case(4):{
signed __int32* pData2 = (signed __int32*) pData;
pData2[i] = (((Random<signed __int32>*)rd)->operator()());
break;
}
case(5): {
__int64 data = ((Random<unsigned __int64>*)rd)->operator()();
unsigned char* cp = (unsigned char*) &data;
pData[(5 * i)] = cp[0];
pData[1 + (5 * i)] = cp[1];
pData[2 + (5 * i)] = cp[2];
pData[3 + (5 * i)] = cp[3];
pData[4 + (5 * i)] = cp[4];
break;
}
case(6): {
__int64 data = ((Random<unsigned __int64>*)rd)->operator()();
unsigned char* cp = (unsigned char*) &data;
pData[(6 * i)] = cp[0];
pData[1 + (6 * i)] = cp[1];
pData[2 + (6 * i)] = cp[2];
pData[3 + (6 * i)] = cp[3];
pData[4 + (6 * i)] = cp[4];
pData[5 + (6 * i)] = cp[5];
break;
}
case(7): {
__int64 data = ((Random<unsigned __int64>*)rd)->operator()();
unsigned char* cp = (unsigned char*) &data;
pData[(7 * i)] = cp[0];
pData[1 + (7 * i)] = cp[1];
pData[2 + (7 * i)] = cp[2];
pData[3 + (7 * i)] = cp[3];
pData[4 + (7 * i)] = cp[4];
pData[5 + (7 * i)] = cp[5];
pData[6 + (7 * i)] = cp[6];
break;
}
case(8): {
unsigned __int64* pData2 = (unsigned __int64*) pData;
pData2[i] = (((Random<unsigned __int64>*)rd)->operator()());
break;
}
default:
// For stopping playback
(*flags) = AUDCLNT_BUFFERFLAGS_SILENT;
return E_NOTIMPL;
}
}
return S_OK;
}
Then I added my class to the template provided by Microsoft and printed the default audio endpoint renderer to the console.
#include <InitGuid.h>
#include <iostream>
#include <Windows.h>
#include <dshow.h>
// Windows multimedia device
#include <Mmdeviceapi.h>
#include <Functiondiscoverykeys_devpkey.h>
// WASAPI
#include <Audiopolicy.h>
#include <Audioclient.h>
#include "Noise_Gen.h"
//-----------------------------------------------------------
// Play an audio stream on the default audio rendering
// device. The PlayAudioStream function allocates a shared
// buffer big enough to hold one second of PCM audio data.
// The function uses this buffer to stream data to the
// rendering device. The inner loop runs every 1/2 second.
//-----------------------------------------------------------
// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
#define EXIT_ON_ERROR(hres) \
if (FAILED(hres)) { goto Exit; }
#define SAFE_RELEASE(punk) \
if ((punk) != NULL) \
{ (punk)->Release(); (punk) = NULL; }
const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
const IID IID_IAudioClient = __uuidof(IAudioClient);
const IID IID_IAudioRenderClient = __uuidof(IAudioRenderClient);
HRESULT PlayAudioStream(Noise_Gen* pMySource) {
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
REFERENCE_TIME hnsActualDuration;
IMMDeviceEnumerator* pEnumerator = NULL;
IMMDevice* pDevice = NULL;
IAudioClient* pAudioClient = NULL;
IAudioRenderClient* pRenderClient = NULL;
WAVEFORMATEX* pwfx = NULL;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
UINT32 numFramesPadding;
BYTE* pData;
DWORD flags = 0;
IPropertyStore* pPropertyStore = NULL;
PROPVARIANT name;
hr = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL,
CLSCTX_ALL, IID_IMMDeviceEnumerator,
(void**) &pEnumerator);
EXIT_ON_ERROR(hr);
hr = pEnumerator->GetDefaultAudioEndpoint(
eRender, eConsole, &pDevice);
hr = pDevice->OpenPropertyStore(STGM_READ, &pPropertyStore);
PropVariantInit(&name);
hr = pPropertyStore->GetValue(PKEY_Device_FriendlyName, &name);
printf("%S", name.pwszVal);
printf("\n");
EXIT_ON_ERROR(hr);
hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL,
NULL, (void**) &pAudioClient);
EXIT_ON_ERROR(hr);
hr = pAudioClient->GetMixFormat(&pwfx);
EXIT_ON_ERROR(hr);
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
0, hnsRequestedDuration,
0, pwfx, NULL);
EXIT_ON_ERROR(hr);
// Tell the audio source which format to use.
hr = pMySource->SetFormat(pwfx);
EXIT_ON_ERROR(hr);
// Get the actual size of the allocated buffer.
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
EXIT_ON_ERROR(hr);
hr = pAudioClient->GetService(IID_IAudioRenderClient,
(void**) &pRenderClient);
EXIT_ON_ERROR(hr);
// Grab the entire buffer for the initial fill operation.
hr = pRenderClient->GetBuffer(bufferFrameCount, &pData);
EXIT_ON_ERROR(hr);
// Load the initial data into the shared buffer.
hr = pMySource->LoadData(bufferFrameCount, pData, &flags);
EXIT_ON_ERROR(hr);
hr = pRenderClient->ReleaseBuffer(bufferFrameCount, flags);
EXIT_ON_ERROR(hr);
// Calculate the actual duration of the allocated buffer.
hnsActualDuration = (double) REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;
hr = pAudioClient->Start(); // Start playing.
EXIT_ON_ERROR(hr);
// Each loop fills about half of the shared buffer.
while(flags != AUDCLNT_BUFFERFLAGS_SILENT) {
// Sleep for half the buffer duration.
Sleep((DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2));
// See how much buffer space is available.
hr = pAudioClient->GetCurrentPadding(&numFramesPadding);
EXIT_ON_ERROR(hr);
numFramesAvailable = bufferFrameCount - numFramesPadding;
// Grab all the available space in the shared buffer.
hr = pRenderClient->GetBuffer(numFramesAvailable, &pData);
EXIT_ON_ERROR(hr);
// Get next 1/2-second of data from the audio source.
hr = pMySource->LoadData(numFramesAvailable, pData, &flags);
EXIT_ON_ERROR(hr);
hr = pRenderClient->ReleaseBuffer(numFramesAvailable, flags);
EXIT_ON_ERROR(hr);
}
// Wait for last data in buffer to play before stopping.
Sleep((DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2));
hr = pAudioClient->Stop(); // Stop playing.
EXIT_ON_ERROR(hr);
Exit:
CoTaskMemFree(pwfx);
SAFE_RELEASE(pEnumerator);
SAFE_RELEASE(pDevice);
SAFE_RELEASE(pAudioClient);
SAFE_RELEASE(pRenderClient);
return hr;
}
int main() {
HRESULT hr = CoInitialize(nullptr);
if(FAILED(hr)) { return hr; }
Noise_Gen* ng = new Noise_Gen();
PlayAudioStream(ng);
delete ng;
CoUninitialize();
}
The default audio endpoint renderer on my system uses 32 bit values, so the code started by writing unsigned 32 bit values to the buffer. I then tried to use signed values, which can be seen in the code above. No sound was played in both these cases. I checked the contents of the buffer while debugging and they do change. I printed the default audio endpoint renderer to the console, and it is my system's speaker. Windows even shows my app in the Volume mixer, but there is no sound showing even with the volume all the way up. I then checked the sleep time to be sure it was sleeping so the system had access to the buffer, and it does sleep for 500ms between writes to the buffer.
Update: I found out I am using the KSDATAFORMAT_SUBTYPE_IEEE_FLOAT subformat and have tried feeding the buffer floats in the -amplitude to amplitude range, the the 0 to amplitude range, the -1 to 1 range, and the 0 to 1 range.
What am I missing?

Your random number distribution code does not work correctly for floating point formats (which is basically always going to be the mix format in shared mode as far as I know).
It's wrong even for integers. I assume you meant to write
((f % ((signed __int64) mHigh - (signed __int64) mLow)) + (signed __int64) mLow);
(note the minus),
but you should not use raw modulus anyway because it's slightly biased.
For floating point formats you always use the -1 to 1 range.
I have adapted your code to use std::uniform_real_distribution and I get noise playing on my speakers.
#include <cstdio>
#include <Windows.h>
// Windows multimedia device
#include <Mmdeviceapi.h>
#include <Functiondiscoverykeys_devpkey.h>
// WASAPI
#include <Audiopolicy.h>
#include <Audioclient.h>
#include <random>
class Noise_Gen {
public:
Noise_Gen() : format(), engine(__rdtsc()), float_dist(-1.f, 1.f) {};
void SetFormat(WAVEFORMATEX* wfex) {
if(wfex->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
format = *reinterpret_cast<WAVEFORMATEXTENSIBLE*>(wfex);
} else {
format.Format = *wfex;
format.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
INIT_WAVEFORMATEX_GUID(&format.SubFormat, wfex->wFormatTag);
format.Samples.wValidBitsPerSample = format.Format.wBitsPerSample;
format.dwChannelMask = 0;
}
}
// (The size of an audio frame = nChannels * wBitsPerSample)
void FillBuffer(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags) {
const UINT16 formatTag = EXTRACT_WAVEFORMATEX_ID(&format.SubFormat);
if(formatTag == WAVE_FORMAT_IEEE_FLOAT) {
float* fData = (float*)pData;
for(UINT32 i = 0; i < format.Format.nChannels * bufferFrameCount; i++) {
fData[i] = float_dist(engine);
}
} else if(formatTag == WAVE_FORMAT_PCM) {
using rndT = decltype(engine)::result_type;
UINT32 iterations = format.Format.nBlockAlign * bufferFrameCount / sizeof(rndT);
UINT32 leftoverBytes = format.Format.nBlockAlign * bufferFrameCount % sizeof(rndT);
rndT* iData = (rndT*)pData;
UINT32 i = 0;
for(; i < iterations; i++) {
iData[i] = engine();
}
if(leftoverBytes != 0) {
rndT lastRnd = engine();
BYTE* pLastBytes = pData + i * sizeof(rndT);
for(UINT32 j = 0; j < leftoverBytes; ++j) {
pLastBytes[j] = lastRnd >> (j * 8) & 0xFF;
}
}
} else {
//memset(pData, 0, wfex.Format.nBlockAlign * bufferFrameCount);
*flags = AUDCLNT_BUFFERFLAGS_SILENT;
}
}
private:
WAVEFORMATEXTENSIBLE format;
std::mt19937_64 engine;
std::uniform_real_distribution<float> float_dist;
};
// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC 10000000ll
#define REFTIMES_PER_MILLISEC 10000
#define EXIT_ON_ERROR(hres) \
if (FAILED(hres)) { goto Exit; }
#define SAFE_RELEASE(punk) \
if ((punk) != NULL) \
{ (punk)->Release(); (punk) = NULL; }
HRESULT PlayAudioStream(Noise_Gen* pMySource) {
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
REFERENCE_TIME hnsActualDuration;
IMMDeviceEnumerator* pEnumerator = NULL;
IPropertyStore* pPropertyStore = NULL;
IMMDevice* pDevice = NULL;
IAudioClient* pAudioClient = NULL;
IAudioRenderClient* pRenderClient = NULL;
WAVEFORMATEX* pwfx = NULL;
UINT32 bufferFrameCount;
BYTE* pData;
DWORD flags = 0;
PROPVARIANT name;
hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL,
CLSCTX_ALL, IID_PPV_ARGS(&pEnumerator));
EXIT_ON_ERROR(hr);
hr = pEnumerator->GetDefaultAudioEndpoint(
eRender, eConsole, &pDevice);
EXIT_ON_ERROR(hr);
hr = pDevice->OpenPropertyStore(STGM_READ, &pPropertyStore);
EXIT_ON_ERROR(hr);
PropVariantInit(&name);
hr = pPropertyStore->GetValue(PKEY_Device_FriendlyName, &name);
EXIT_ON_ERROR(hr);
printf("%S", name.pwszVal);
printf("\n");
hr = pDevice->Activate(__uuidof(pAudioClient), CLSCTX_ALL,
NULL, (void**) &pAudioClient);
EXIT_ON_ERROR(hr);
hr = pAudioClient->GetMixFormat(&pwfx);
EXIT_ON_ERROR(hr);
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
0, hnsRequestedDuration,
0, pwfx, NULL);
EXIT_ON_ERROR(hr);
// Tell the audio source which format to use.
pMySource->SetFormat(pwfx);
// Get the actual size of the allocated buffer.
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
EXIT_ON_ERROR(hr);
hr = pAudioClient->GetService(IID_PPV_ARGS(&pRenderClient));
EXIT_ON_ERROR(hr);
// Grab the entire buffer for the initial fill operation.
hr = pRenderClient->GetBuffer(bufferFrameCount, &pData);
EXIT_ON_ERROR(hr);
// Load the initial data into the shared buffer.
pMySource->FillBuffer(bufferFrameCount, pData, &flags);
hr = pRenderClient->ReleaseBuffer(bufferFrameCount, flags);
EXIT_ON_ERROR(hr);
// Calculate the actual duration of the allocated buffer.
hnsActualDuration = REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;
hr = pAudioClient->Start(); // Start playing.
EXIT_ON_ERROR(hr);
// Each loop fills about half of the shared buffer.
DWORD sleepTime;
while(flags != AUDCLNT_BUFFERFLAGS_SILENT) {
// Sleep for half the buffer duration.
sleepTime = (DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
if(sleepTime != 0)
Sleep(sleepTime);
// See how much buffer space is available.
UINT32 numFramesPadding;
hr = pAudioClient->GetCurrentPadding(&numFramesPadding);
EXIT_ON_ERROR(hr);
UINT32 numFramesAvailable = bufferFrameCount - numFramesPadding;
// Grab all the available space in the shared buffer.
hr = pRenderClient->GetBuffer(numFramesAvailable, &pData);
EXIT_ON_ERROR(hr);
// Get next 1/2-second of data from the audio source.
pMySource->FillBuffer(numFramesAvailable, pData, &flags);
hr = pRenderClient->ReleaseBuffer(numFramesAvailable, flags);
EXIT_ON_ERROR(hr);
}
// Wait for last data in buffer to play before stopping.
sleepTime = (DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
if(sleepTime != 0)
Sleep(sleepTime);
hr = pAudioClient->Stop(); // Stop playing.
EXIT_ON_ERROR(hr);
Exit:
CoTaskMemFree(pwfx);
SAFE_RELEASE(pRenderClient);
SAFE_RELEASE(pAudioClient);
SAFE_RELEASE(pDevice);
SAFE_RELEASE(pPropertyStore); // you forgot to free the property store
SAFE_RELEASE(pEnumerator);
return hr;
}
int main() {
HRESULT hr = CoInitialize(nullptr);
if(FAILED(hr)) { return hr; }
Noise_Gen ng;
PlayAudioStream(&ng);
CoUninitialize();
}

Related

IMediaObject.ProcessOutput returning S_FALSE

I am trying to write a simple audio capturing application in c++. and i found it is a very hard job to capture audio using c++ library, i couldn't find no more than a few library and I came up with ImediaObject. I found everything is okay in some other pc, but in my pc it can't record audio. When I call ImediaObject.ProcessOutput, it returns S_FALSE. I don't know what's wrong with my pc. I am using Visual Studio 2019.
Can anyone give me any clue ? what is wrong here ?
For your understanding I am sharing my main code here
#include <windows.h>
#include <dmo.h>
#include <Mmsystem.h>
#include <objbase.h>
#include <mediaobj.h>
#include <uuids.h>
#include <propidl.h>
#include <wmcodecdsp.h>
#include <atlbase.h>
#include <ATLComCli.h>
#include <audioclient.h>
#include <MMDeviceApi.h>
#include <AudioEngineEndPoint.h>
#include <DeviceTopology.h>
#include <propkey.h>
#include <strsafe.h>
#include <conio.h>
#include "AecKsBinder.h"
#include "mediabuf.h"
#define SAFE_ARRAYDELETE(p) {if (p) delete[] (p); (p) = NULL;}
#define SAFE_RELEASE(p) {if (NULL != p) {(p)->Release(); (p) = NULL;}}
#define VBFALSE (VARIANT_BOOL)0
#define VBTRUE (VARIANT_BOOL)-1
#define STREAM_BUFFER_LENGTH 0.1f //streaming buffer is 0.1 second long.
#define CHECK_RET(hr, message) if (FAILED(hr)) { puts(message); goto exit;}
#define CHECKHR(x) hr = x; if (FAILED(hr)) {printf("%d: %08X\n", __LINE__, hr); goto exit;}
#define CHECK_ALLOC(pb, message) if (NULL == pb) { puts(message); goto exit;}
class CStaticMediaBuffer : public CBaseMediaBuffer {
public:
STDMETHODIMP_(ULONG) AddRef() { return 2; }
STDMETHODIMP_(ULONG) Release() { return 1; }
void Init(BYTE* pData, ULONG ulSize, ULONG ulData) {
m_pData = pData;
m_ulSize = ulSize;
m_ulData = ulData;
}
};
void OutputUsage();
int __cdecl _tmain()
//int __cdecl _tmain()
{
HRESULT hr = S_OK;
CoInitialize(NULL);
IMediaObject* pDMO = NULL;
IPropertyStore* pPS = NULL;
CStaticMediaBuffer outputBuffer;
DMO_OUTPUT_DATA_BUFFER OutputBufferStruct = { 0 };
OutputBufferStruct.pBuffer = &outputBuffer;
DMO_MEDIA_TYPE mt = { 0 };
ULONG cbProduced = 0;
DWORD dwStatus;
// Parameters to config DMO
int iSystemMode = MODE_NOT_SET; // AEC-MicArray DMO system mode
int iOutFileIdx = -1; // argument index for otuput file name
int iMicDevIdx = -2; // microphone device index
int iSpkDevIdx = -2; // speaker device index
BOOL bFeatrModeOn = 0; // turn feature mode on/off
BOOL bNoiseSup = 1; // turn noise suppression on/off
BOOL bAGC = 0; // turn digital auto gain control on/off
BOOL bCntrClip = 0; // turn center clippng on/off
// control how long the Demo runs
int iDuration = 60; // seconds
int cTtlToGo = 0;
FILE* pfMicOutPCM; // dump output signal using PCM format
DWORD cOutputBufLen = 0;
BYTE* pbOutputBuffer = NULL;
UINT uCapDevCount = 0;
UINT uRenDevCount = 0;
char pcScanBuf[256] = { 0 };
WAVEFORMATEX wfxOut = { WAVE_FORMAT_PCM, 1, 22050, 44100, 2, 16, 0 };
AUDIO_DEVICE_INFO* pCaptureDeviceInfo = NULL, * pRenderDeviceInfo = NULL;
int i;
iMicDevIdx = 0;
iSpkDevIdx = 0;
iSystemMode = 0;
bFeatrModeOn = 1;
bNoiseSup = 1;
bAGC = 1;
bCntrClip = 1;
HANDLE currThread;
HANDLE currProcess;
BOOL iRet;
currProcess = GetCurrentProcess();
currThread = GetCurrentThread();
iRet = SetPriorityClass(currProcess, HIGH_PRIORITY_CLASS);
if (0 == iRet)
{
// call getLastError.
puts("failed to set process priority\n");
goto exit;
}
// DMO initialization
CHECKHR(CoCreateInstance(CLSID_CWMAudioAEC, NULL, CLSCTX_INPROC_SERVER, IID_IMediaObject, (void**)&pDMO));
CHECKHR(pDMO->QueryInterface(IID_IPropertyStore, (void**)&pPS));
// Select capture device
hr = GetCaptureDeviceNum(uCapDevCount);
CHECK_RET(hr, "GetCaptureDeviceNum failed");
pCaptureDeviceInfo = new AUDIO_DEVICE_INFO[uCapDevCount];
hr = EnumCaptureDevice(uCapDevCount, pCaptureDeviceInfo);
CHECK_RET(hr, "EnumCaptureDevice failed");
printf("\nSystem has totally %d capture devices\n", uCapDevCount);
for (i = 0; i < (int)uCapDevCount; i++)
{
_tprintf(_T("Device %d is %s"), i, pCaptureDeviceInfo[i].szDeviceName);
if (pCaptureDeviceInfo[i].bIsMicArrayDevice)
_tprintf(_T(" -- Mic Array Device \n"));
else
_tprintf(_T("\n"));
}
if (iMicDevIdx < -1 || iMicDevIdx >= (int)uCapDevCount)
{
do {
printf("Select device ");
scanf_s("%255s", pcScanBuf, 255);
iMicDevIdx = atoi(pcScanBuf);
if (iMicDevIdx < -1 || iMicDevIdx >= (int)uCapDevCount)
printf("Invalid Capture Device ID \n");
else
break;
} while (1);
}
if (iMicDevIdx == -1)
_tprintf(_T("\n Default device will be used for capturing \n"));
else
_tprintf(_T("\n %s is selected for capturing\n"), pCaptureDeviceInfo[iMicDevIdx].szDeviceName);
SAFE_ARRAYDELETE(pCaptureDeviceInfo);
// Select render device
if (iSystemMode == SINGLE_CHANNEL_AEC ||
iSystemMode == ADAPTIVE_ARRAY_AND_AEC ||
iSystemMode == OPTIBEAM_ARRAY_AND_AEC)
{
hr = GetRenderDeviceNum(uRenDevCount);
CHECK_RET(hr, "GetRenderDeviceNum failed");
pRenderDeviceInfo = new AUDIO_DEVICE_INFO[uRenDevCount];
hr = EnumRenderDevice(uRenDevCount, pRenderDeviceInfo);
CHECK_RET(hr, "EnumRenderDevice failed");
printf("\nSystem has totally %d render devices\n", uRenDevCount);
for (i = 0; i < (int)uRenDevCount; i++)
{
_tprintf(_T("Device %d is %s \n"), i, pRenderDeviceInfo[i].szDeviceName);
}
if (iSpkDevIdx < -1 || iSpkDevIdx >= (int)uRenDevCount)
{
do {
printf("Select device ");
scanf_s("%255s", pcScanBuf, 255);
iSpkDevIdx = atoi(pcScanBuf);
if (iSpkDevIdx < -1 || iSpkDevIdx >= (int)uRenDevCount)
printf("Invalid Render Device ID \n");
else
break;
} while (1);
}
if (iSpkDevIdx == -1)
_tprintf(_T("\n Default device will be used for rendering \n"));
else
_tprintf(_T("\n %s is selected for rendering \n"), pRenderDeviceInfo[iSpkDevIdx].szDeviceName);
}
else {
iSpkDevIdx = -1;
}
SAFE_ARRAYDELETE(pRenderDeviceInfo);
TCHAR* fileName;
fileName = (TCHAR*)"test.raw";
// --- PREPARE OUTPUT --- //
if (NULL != _tfopen_s(&pfMicOutPCM, fileName, _T("wb")))
{
puts("cannot open file for output.\n");
goto exit;
}
// Set AEC mode and other parameters
// Not all user changeable options are given in this sample code.
// Please refer to readme.txt for more options.
// Set AEC-MicArray DMO system mode.
// This must be set for the DMO to work properly
puts("\nAEC settings:");
PROPVARIANT pvSysMode;
PropVariantInit(&pvSysMode);
pvSysMode.vt = VT_I4;
pvSysMode.lVal = (LONG)(iSystemMode);
CHECKHR(pPS->SetValue(MFPKEY_WMAAECMA_SYSTEM_MODE, pvSysMode));
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_SYSTEM_MODE, &pvSysMode));
printf("%20s %5d \n", "System Mode is", pvSysMode.lVal);
PropVariantClear(&pvSysMode);
// Tell DMO which capture and render device to use
// This is optional. If not specified, default devices will be used
if (iMicDevIdx >= 0 || iSpkDevIdx >= 0)
{
PROPVARIANT pvDeviceId;
PropVariantInit(&pvDeviceId);
pvDeviceId.vt = VT_I4;
pvDeviceId.lVal = (unsigned long)(iSpkDevIdx << 16) + (unsigned long)(0x0000ffff & iMicDevIdx);
CHECKHR(pPS->SetValue(MFPKEY_WMAAECMA_DEVICE_INDEXES, pvDeviceId));
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_DEVICE_INDEXES, &pvDeviceId));
PropVariantClear(&pvDeviceId);
}
if (bFeatrModeOn)
{
// Turn on feature modes
PROPVARIANT pvFeatrModeOn;
PropVariantInit(&pvFeatrModeOn);
pvFeatrModeOn.vt = VT_BOOL;
pvFeatrModeOn.boolVal = bFeatrModeOn ? VBTRUE : VBFALSE;
CHECKHR(pPS->SetValue(MFPKEY_WMAAECMA_FEATURE_MODE, pvFeatrModeOn));
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_FEATURE_MODE, &pvFeatrModeOn));
printf("%20s %5d \n", "Feature Mode is", pvFeatrModeOn.boolVal);
PropVariantClear(&pvFeatrModeOn);
// Turn on/off noise suppression
PROPVARIANT pvNoiseSup;
PropVariantInit(&pvNoiseSup);
pvNoiseSup.vt = VT_I4;
pvNoiseSup.lVal = (LONG)bNoiseSup;
CHECKHR(pPS->SetValue(MFPKEY_WMAAECMA_FEATR_NS, pvNoiseSup));
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_FEATR_NS, &pvNoiseSup));
printf("%20s %5d \n", "Noise suppresion is", pvNoiseSup.lVal);
PropVariantClear(&pvNoiseSup);
// Turn on/off AGC
PROPVARIANT pvAGC;
PropVariantInit(&pvAGC);
pvAGC.vt = VT_BOOL;
pvAGC.boolVal = bAGC ? VBTRUE : VBFALSE;
CHECKHR(pPS->SetValue(MFPKEY_WMAAECMA_FEATR_AGC, pvAGC));
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_FEATR_AGC, &pvAGC));
printf("%20s %5d \n", "AGC is", pvAGC.boolVal);
PropVariantClear(&pvAGC);
// Turn on/off center clip
PROPVARIANT pvCntrClip;
PropVariantInit(&pvCntrClip);
pvCntrClip.vt = VT_BOOL;
pvCntrClip.boolVal = bCntrClip ? VBTRUE : VBFALSE;
CHECKHR(pPS->SetValue(MFPKEY_WMAAECMA_FEATR_CENTER_CLIP, pvCntrClip));
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_FEATR_CENTER_CLIP, &pvCntrClip));
printf("%20s %5d \n", "Center clip is", (BOOL)pvCntrClip.boolVal);
PropVariantClear(&pvCntrClip);
}
// Set DMO output format
hr = MoInitMediaType(&mt, sizeof(WAVEFORMATEX));
CHECK_RET(hr, "MoInitMediaType failed");
mt.majortype = MEDIATYPE_Audio;
mt.subtype = MEDIASUBTYPE_PCM;
mt.lSampleSize = 0;
mt.bFixedSizeSamples = TRUE;
mt.bTemporalCompression = FALSE;
mt.formattype = FORMAT_WaveFormatEx;
memcpy(mt.pbFormat, &wfxOut, sizeof(WAVEFORMATEX));
hr = pDMO->SetOutputType(0, &mt, 0);
CHECK_RET(hr, "SetOutputType failed");
MoFreeMediaType(&mt);
// Allocate streaming resources. This step is optional. If it is not called here, it
// will be called when first time ProcessInput() is called. However, if you want to
// get the actual frame size being used, it should be called explicitly here.
hr = pDMO->AllocateStreamingResources();
CHECK_RET(hr, "AllocateStreamingResources failed");
// Get actually frame size being used in the DMO. (optional, do as you need)
int iFrameSize;
PROPVARIANT pvFrameSize;
PropVariantInit(&pvFrameSize);
CHECKHR(pPS->GetValue(MFPKEY_WMAAECMA_FEATR_FRAME_SIZE, &pvFrameSize));
iFrameSize = pvFrameSize.lVal;
PropVariantClear(&pvFrameSize);
// allocate output buffer
cOutputBufLen = wfxOut.nSamplesPerSec * wfxOut.nBlockAlign;
pbOutputBuffer = new BYTE[cOutputBufLen];
CHECK_ALLOC(pbOutputBuffer, "out of memory.\n");
// number of frames to play
cTtlToGo = iDuration * 100;
// main loop to get mic output from the DMO
puts("\nAEC-MicArray is running ... Press \"s\" to stop");
while (1)
{
Sleep(10); //sleep 10ms
if (cTtlToGo-- <= 0)
break;
do {
outputBuffer.Init((byte*)pbOutputBuffer, cOutputBufLen, 0);
OutputBufferStruct.dwStatus = 0;
hr = pDMO->ProcessOutput(0, 1, &OutputBufferStruct, &dwStatus);
CHECK_RET(hr, "ProcessOutput failed");
if (hr == S_FALSE) {
cbProduced = 0;
}
else {
hr = outputBuffer.GetBufferAndLength(NULL, &cbProduced);
CHECK_RET(hr, "GetBufferAndLength failed");
}
// dump output data into a file with PCM format.
if (fwrite(pbOutputBuffer, 1, cbProduced, pfMicOutPCM) != cbProduced)
{
puts("write error");
goto exit;
}
} while (OutputBufferStruct.dwStatus & DMO_OUTPUT_DATA_BUFFERF_INCOMPLETE);
// check keyboard input to stop
if (_kbhit())
{
int ch = _getch();
if (ch == 's' || ch == 'S')
break;
}
}
exit:
SAFE_ARRAYDELETE(pbOutputBuffer);
SAFE_ARRAYDELETE(pCaptureDeviceInfo);
SAFE_ARRAYDELETE(pRenderDeviceInfo);
SAFE_RELEASE(pDMO);
SAFE_RELEASE(pPS);
CoUninitialize();
return hr;
}
void OutputUsage()
{
printf("MFWMAAEC (Aec-MicArray DMO) Demo. \n");
printf("Copyright (c) 2004-2006, Microsoft Corporation. All rights reserved. \n\n");
printf("Usage: AecSDKDemo.exe -out mic_out.pcm -mod 0 [-feat 1] [-ns 1] [-agc 0] \n");
printf(" [-cntrclip 0] [-micdev 0] [-spkdev 0] [-duration 60]\n");
return;
}
NOTE: My main concern is to record audio and use the native noise and echo canceler. If there is any suggestion regarding this, I would very much appriciate it. Thanks.

Trying to create a wav file with 'Capturing a Stream" from Win32 WASAPI C++

I'm having trouble attempting to interpret (BYTE* pData) audio data that the getBuffer function returns. I'm attempting to write captured audio from my mic into a wav file I myself am constructing all for the purpose of better understanding audio devices, audio data, and audio formats.
Below is all code most of which is used from or a reference from the Windows doc: https://learn.microsoft.com/en-us/windows/win32/coreaudio/capturing-a-stream. Trying to keep things as simple as possible nothing fancy. This code captures a few second of mic audio which you can hear distorted and really static-y. Is the distortion due to how I'm writing the pData contents into the file?
Main.cpp
Note - please ignore the 'cout' all over the place, only for debugging
#pragma once
#include "MyAudioSink.h"
#include <windows.h>
// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
#define EXIT_ON_ERROR(hres) \
if (FAILED(hres)) { goto Exit; }
#define SAFE_RELEASE(punk) \
if ((punk) != NULL) \
{ (punk)->Release(); (punk) = NULL; }
const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
const IID IID_IAudioClient = __uuidof(IAudioClient);
const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient);
HRESULT RecordAudioStream(MyAudioSink * pMySink);
int main() {
HRESULT hr;
hr = CoInitialize(nullptr);
//declare MyAudioSink object
MyAudioSink pMySink;
hr = RecordAudioStream(&pMySink);
cout << "done";
}
//-----------------------------------------------------------
// Record an audio stream from the default audio capture
// device. The RecordAudioStream function allocates a shared
// buffer big enough to hold one second of PCM audio data.
// The function uses this buffer to stream data from the
// capture device. The main loop runs every 1/2 second.
//-----------------------------------------------------------
HRESULT RecordAudioStream(MyAudioSink* pMySink)
{
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
REFERENCE_TIME hnsActualDuration;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
IMMDeviceEnumerator* pEnumerator = NULL;
IMMDevice* pDevice = NULL;
IAudioClient* pAudioClient = NULL;
IAudioCaptureClient* pCaptureClient = NULL;
WAVEFORMATEX* pwfx = NULL;
UINT32 packetLength = 0;
BOOL bDone = FALSE;
BYTE* pData;
DWORD flags;
cout << "starting...";
hr = CoCreateInstance(
CLSID_MMDeviceEnumerator, NULL,
CLSCTX_ALL, IID_IMMDeviceEnumerator,
(void**)&pEnumerator);
cout << "test1" ;
EXIT_ON_ERROR(hr)
hr = pEnumerator->GetDefaultAudioEndpoint(
eCapture, eConsole, &pDevice);
cout << "test2" ;
EXIT_ON_ERROR(hr)
hr = pDevice->Activate(
IID_IAudioClient, CLSCTX_ALL,
NULL, (void**)&pAudioClient);
cout << "test3" ;
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetMixFormat(&pwfx);
cout << "test4" ;
EXIT_ON_ERROR(hr)
hr = pAudioClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
0,
hnsRequestedDuration,
0,
pwfx,
NULL);
cout << "test5" ;
EXIT_ON_ERROR(hr)
// Get the size of the allocated buffer.
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
cout << "test6" ;
EXIT_ON_ERROR(hr)
hr = pAudioClient->GetService(
IID_IAudioCaptureClient,
(void**)&pCaptureClient);
cout << "test7" ;
EXIT_ON_ERROR(hr)
// Calculate the actual duration of the allocated buffer.
hnsActualDuration = (double)REFTIMES_PER_SEC *
bufferFrameCount / pwfx->nSamplesPerSec;
// Notify the audio sink which format to use.
hr = pMySink->SetFormat(pwfx);
cout << "test8" ;
EXIT_ON_ERROR(hr)
//initialize the wav file with the specifications set by SetFormat
hr = pMySink->_Initialize_File();
cout << "test9" ;
EXIT_ON_ERROR(hr)
hr = pAudioClient->Start(); // Start recording.
cout << "test10" ;
EXIT_ON_ERROR(hr)
cout << "about to run while...";
// Each loop fills about half of the shared buffer.
while (bDone == FALSE)
{
// Sleep for half the buffer duration.
Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr)
while (packetLength != 0)
{
// Get the available data in the shared buffer.
hr = pCaptureClient->GetBuffer(
&pData,
&numFramesAvailable,
&flags, NULL, NULL);
EXIT_ON_ERROR(hr)
if (flags & AUDCLNT_BUFFERFLAGS_SILENT)
{
cout << "silent";
pData = NULL; // Tell CopyData to write silence.
}
// Copy the available capture data to the audio sink.
hr = pMySink->CopyData(
pData, numFramesAvailable, &bDone);
EXIT_ON_ERROR(hr)
hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
EXIT_ON_ERROR(hr)
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr)
}
}
hr = pMySink->_File_WrapUp();
EXIT_ON_ERROR(hr)
hr = pAudioClient->Stop(); // Stop recording.
EXIT_ON_ERROR(hr)
Exit:
CoTaskMemFree(pwfx);
SAFE_RELEASE(pEnumerator)
SAFE_RELEASE(pDevice)
SAFE_RELEASE(pAudioClient)
SAFE_RELEASE(pCaptureClient)
return hr;
}
MyAudioSink.cpp
Note ** - this is where the issue is. You may notice that a UDF called 'write_word' is what initializes the wav file with all of the audio format parameters, however, I haven't been able to figure out how to use this function to write the pData contents, so tried to use the ostream write function instead which yielded the best results so far (hearing my voice) but it sounds extremely static-y and distorted.
#pragma once
#include "MyAudioSink.h"
#include <string.h>
namespace little_endian_io
{
template <typename Word>
std::ostream& write_word(std::ostream& outs, Word value, unsigned size = sizeof(Word))
{
for (; size; --size, value >>= 8)
outs.put(static_cast <char> (value & 0xFF));
return outs;
}
}
using namespace little_endian_io;
HRESULT MyAudioSink::_Initialize_File() {
cout << "initializing file";
// prepare our wav file
mainFile.open("example.wav", ios::out | ios::binary);
// Write the file headers and sound format
mainFile << "RIFF----WAVEfmt "; // (chunk size to be filled in later)
write_word(mainFile, 16, 4); // no extension data
write_word(mainFile, 1, 2); // PCM - integer samples
write_word(mainFile, nChannels, 2); // two channels (stereo file)
write_word(mainFile, nSamplesPerSec, 4); // samples per second (Hz)
write_word(mainFile, nAvgBytesPerSec, 4); // (Sample Rate * BitsPerSample * Channels) / 8
write_word(mainFile, nBlockAlign, 2); // data block size (size of two integer samples, one for each channel, in bytes)
write_word(mainFile, wBitsPerSample, 2); // number of bits per sample (use a multiple of 8)
// Write the data chunk header
data_chunk_pos = mainFile.tellp();
mainFile << "data----"; // (chunk size to be filled in later)..
//start by setting our complete variable to False, main func will turn to true
bComplete = false;
//testing
test = 0;
return S_OK;
}
HRESULT MyAudioSink::SetFormat(WAVEFORMATEX* pwfx) {
//Update our format variables
wFormatTag = pwfx->wFormatTag;
nChannels = pwfx->nChannels;
nSamplesPerSec = pwfx->nSamplesPerSec;
nAvgBytesPerSec = pwfx->nAvgBytesPerSec;
nBlockAlign = pwfx->nBlockAlign;
wBitsPerSample = pwfx->wBitsPerSample;
cbSize = pwfx->cbSize;
return S_OK;
}
HRESULT MyAudioSink::CopyData(BYTE* pData, UINT32 numFramesAvailable, BOOL* bDone) {
//TODO
//forgot how to do this part, figure it out
for (int i = 0; i < numFramesAvailable; i++) {
mainFile.write((const char*) pData+(i* nBlockAlign), nBlockAlign);
}
//test
test++;
if (test >= nBlockAlign * 120) bComplete = true;
//check if our main function is done to finish capture
if (bComplete) *bDone = true;
return S_OK;
}
HRESULT MyAudioSink::_File_WrapUp() {
// (We'll need the final file size to fix the chunk sizes above)
file_length = mainFile.tellp();
// Fix the data chunk header to contain the data size
mainFile.seekp(data_chunk_pos + 4);
write_word(mainFile, file_length - data_chunk_pos + 8);
// Fix the file header to contain the proper RIFF chunk size, which is (file size - 8) bytes
mainFile.seekp(0 + 4);
write_word(mainFile, file_length - 8, 4);
mainFile.close();
cout << "finalized file";
return S_OK;
}
MyAudioSink.h
#pragma once
//
#include <audioclient.h>
#include <Mmdeviceapi.h>
#include <fstream>
#include <iostream>
#include <cmath>
using namespace std;
class MyAudioSink
{
private:
size_t data_chunk_pos;
size_t file_length;
ofstream mainFile;
//sample format
WORD wFormatTag;
WORD nChannels;
DWORD nSamplesPerSec;
DWORD nAvgBytesPerSec;
WORD nBlockAlign;
WORD wBitsPerSample;
WORD cbSize;
int test;
public:
bool bComplete;
HRESULT _Initialize_File();
HRESULT SetFormat(WAVEFORMATEX* pwfx);
HRESULT CopyData(BYTE* pData, UINT32 numFramesAvailable, BOOL* bDone);
HRESULT _File_WrapUp();
};
The problem I suspect is that your program only handles PCM format, not Extensible Format. The final header will not be the same WAVE Specifications
Add this code to confirm :
pAudioClient->GetMixFormat(&pwfx);
switch(pwfx->wFormatTag)
{
case WAVE_FORMAT_PCM:
cout << "WAVE_FORMAT_PCM";
break;
case WAVE_FORMAT_IEEE_FLOAT:
cout << "WAVE_FORMAT_IEEE_FLOAT";
break;
case WAVE_FORMAT_EXTENSIBLE:
cout << "WAVE_FORMAT_EXTENSIBLE";
WAVEFORMATEXTENSIBLE *pWaveFormatExtensible = reinterpret_cast<WAVEFORMATEXTENSIBLE *>(pwfx);
if(pWaveFormatExtensible->SubFormat == KSDATAFORMAT_SUBTYPE_PCM)
{
cout << "KSDATAFORMAT_SUBTYPE_PCM";
}
else if(pWaveFormatExtensible->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
{
cout << "KSDATAFORMAT_SUBTYPE_IEEE_FLOAT";
}
break;
}
I think the most common case will be WAVE_FORMAT_EXTENSIBLE and KSDATAFORMAT_SUBTYPE_IEEE_FLOAT ...
EDIT
I've made a quick sample here : WasapiCapture

rapid TS fragment ffmpeg decoding - memory leak

Environment:
Ubuntu 16.04 (x64)
C++
ffmpeg
Use-case
Multiple MPEG-TS fragments are rapidly decoded ( numerous every sec )
The format of the TS fragments is dynamic and can't be known ahead of time
The first A/V frames of each fragment are needed to be extracted
Problem statement
The code bellow successfully decodes A/V, BUT, has a huge memory leak ( MBytes/sec )
According to the docs seems all memory is freed as it should ( does it... ? )
Why do I get this huge mem leak, what am I missing in the following code snap ?
struct MEDIA_TYPE {
ffmpeg::AVMediaType eType;
union {
struct {
ffmpeg::AVPixelFormat colorspace;
int width, height;
float fFPS;
} video;
struct : WAVEFORMATEX {
short sSampleFormat;
} audio;
} format;
};
struct FRAME {
enum { MAX_PALNES = 3 + 1 };
int iStrmId;
int64_t pts; // Duration in 90Khz clock resolution
uint8_t** ppData; // Null terminated
int32_t* pStride;// Zero terminated
};
HRESULT ProcessTS(IN Operation op, IN uint8_t* pTS, IN uint32_t uiBytes, bool(*cb)(IN const MEDIA_TYPE& mt, IN FRAME& frame, IN PVOID pCtx), IN PVOID pCbCtx)
{
uiBytes -= uiBytes % 188;// align to 188 packet size
struct CONTEXT {
uint8_t* pTS;
uint32_t uiBytes;
int32_t iPos;
} ctx = { pTS, uiBytes, 0 };
LOGTRACE(TSDecoder, "ProcessTS(%d, 0x%.8x, %d, 0x%.8x, 0x%.8x), this=0x%.8x\r\n", (int)op, pTS, uiBytes, cb, pCbCtx, this);
ffmpeg::AVFormatContext* pFmtCtx = 0;
if (0 == (pFmtCtx = ffmpeg::avformat_alloc_context()))
return E_OUTOFMEMORY;
ffmpeg::AVIOContext* pIoCtx = ffmpeg::avio_alloc_context(pTS, uiBytes, 0, &ctx
, [](void *opaque, uint8_t *buf, int buf_size)->int {
auto pCtx = (CONTEXT*)opaque;
int size = pCtx->uiBytes;
if (pCtx->uiBytes - pCtx->iPos < buf_size)
size = pCtx->uiBytes - pCtx->iPos;
if (size > 0) {
memcpy(buf, pCtx->pTS + pCtx->iPos, size);
pCtx->iPos += size;
}
return size;
}
, 0
, [](void* opaque, int64_t offset, int whence)->int64_t {
auto pCtx = (CONTEXT*)opaque;
switch (whence)
{
case SEEK_SET:
pCtx->iPos = offset;
break;
case SEEK_CUR:
pCtx->iPos += offset;
break;
case SEEK_END:
pCtx->iPos = pCtx->uiBytes - offset;
break;
case AVSEEK_SIZE:
return pCtx->uiBytes;
}
return pCtx->iPos;
});
pFmtCtx->pb = pIoCtx;
int iRet = ffmpeg::avformat_open_input(&pFmtCtx, "fakevideo.ts", m_pInputFmt, 0);
if (ERROR_SUCCESS != iRet) {
assert(false);
pFmtCtx = 0;// a user-supplied AVFormatContext will be freed on failure.
return E_FAIL;
}
struct DecodeContext {
ffmpeg::AVStream* pStream;
ffmpeg::AVCodec* pDecoder;
int iFramesProcessed;
};
HRESULT hr = S_OK;
int iStreamsProcessed = 0;
bool bVideoFound = false;
int64_t ptsLast = 0;
int64_t dtsLast = 0;
auto pContext = (DecodeContext*)alloca(sizeof(DecodeContext) * pFmtCtx->nb_streams);
for (unsigned int i = 0; i < pFmtCtx->nb_streams; i++) {
assert(pFmtCtx->streams[i]->index == i);
pContext[i].pStream = pFmtCtx->streams[i];
pContext[i].pDecoder = ffmpeg::avcodec_find_decoder(pFmtCtx->streams[i]->codec->codec_id);
pContext[i].iFramesProcessed= 0;
if (0 == pContext[i].pDecoder)
continue;
if ((iRet = ffmpeg::avcodec_open2(pFmtCtx->streams[i]->codec, pContext[i].pDecoder, NULL)) < 0) {
_ASSERT(FALSE);
hr = E_FAIL;
goto ErrExit;
}
}
while (S_OK == hr) {
ffmpeg::AVFrame* pFrame = 0;
ffmpeg::AVPacket pkt;
ffmpeg::av_init_packet(&pkt);
if (ERROR_SUCCESS != (iRet = ffmpeg::av_read_frame(pFmtCtx, &pkt))) {
hr = E_FAIL;
break;
}
if ((0 == dtsLast) && (0 != pkt.dts))
dtsLast = pkt.dts;
if ((0 == ptsLast) && (0 != pkt.pts))
ptsLast = pkt.pts;
DecodeContext& ctx = pContext[pkt.stream_index];
if (Operation::DECODE_FIRST_FRAME_OF_EACH_STREAM == op) {
if (iStreamsProcessed == pFmtCtx->nb_streams) {
hr = S_FALSE;
goto Next;
}
if (ctx.iFramesProcessed > 0)
goto Next;
iStreamsProcessed++;
}
if (0 == ctx.pDecoder)
goto Next;
if (0 == (pFrame = ffmpeg::av_frame_alloc())) {
hr = E_OUTOFMEMORY;
goto Next;
}
LOGTRACE(TSDecoder, "ProcessTS(%d, 0x%.8x, %d, 0x%.8x, 0x%.8x), this=0x%.8x, decode, S:%d, T:%d\r\n", (int)op, pTS, uiBytes, cb, pCbCtx, this, pkt.stream_index, ctx.pStream->codec->codec_type);
int bGotFrame = false;
int iBytesUsed = 0;
MEDIA_TYPE mt;
memset(&mt, 0, sizeof(mt));
mt.eType = ctx.pStream->codec->codec_type;
switch (mt.eType) {
case ffmpeg::AVMediaType::AVMEDIA_TYPE_AUDIO:
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
if((iRet = ffmpeg::avcodec_decode_audio4(ctx.pStream->codec, pFrame, &bGotFrame, &pkt)) < 0) {
hr = E_FAIL;
goto Next;
}
_ASSERT(pkt.size == iRet);
// FFMPEG AAC decoder oddity, first call to 'avcodec_decode_audio4' results mute audio where the second result the expected audio
bGotFrame = false;
if ((iRet = ffmpeg::avcodec_decode_audio4(ctx.pStream->codec, pFrame, &bGotFrame, &pkt)) < 0) {
hr = E_FAIL;
goto Next;
}
_ASSERT(pkt.size == iRet);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
if (false == bGotFrame)
goto Next;
iBytesUsed = ctx.pStream->codec->frame_size;
mt.format.audio.nChannels = ctx.pStream->codec->channels;
mt.format.audio.nSamplesPerSec = ctx.pStream->codec->sample_rate;
mt.format.audio.wBitsPerSample = ffmpeg::av_get_bytes_per_sample(ctx.pStream->codec->sample_fmt) * 8;
mt.format.audio.nBlockAlign = mt.format.audio.nChannels * mt.format.audio.wBitsPerSample / 8;
mt.format.audio.sSampleFormat = (short)pFrame->format;
break;
case ffmpeg::AVMediaType::AVMEDIA_TYPE_VIDEO:
if ((iRet = ffmpeg::avcodec_decode_video2(ctx.pStream->codec, pFrame, &bGotFrame, &pkt)) < 0) {
hr = E_FAIL;
break;
}
if (false == bGotFrame)
goto Next;
assert(ffmpeg::AVPixelFormat::AV_PIX_FMT_YUV420P == ctx.pStream->codec->pix_fmt);// Thats is the only color space currently supported
iBytesUsed = (ctx.pStream->codec->width * ctx.pStream->codec->height * 3) / 2;
mt.format.video.width = ctx.pStream->codec->width;
mt.format.video.height = ctx.pStream->codec->height;
mt.format.video.colorspace = ctx.pStream->codec->pix_fmt;
mt.format.video.fFPS = (float)ctx.pStream->codec->framerate.num / ctx.pStream->codec->framerate.den;
bVideoFound = true;
break;
default:
goto Next;
}
ctx.iFramesProcessed++;
{
FRAME f = { ctx.pStream->index, ((0 == ptsLast) ? dtsLast : ptsLast), (uint8_t**)pFrame->data, (int32_t*)pFrame->linesize };
if ((iRet > 0) && (false == cb(mt, f, pCbCtx)))
hr = S_FALSE;// Breaks the loop
}
Next:
ffmpeg::av_free_packet(&pkt);
if (0 != pFrame) {
//ffmpeg::av_frame_unref(pFrame);
ffmpeg::av_frame_free(&pFrame);
pFrame = 0;
}
}
ErrExit:
for (unsigned int i = 0; i < pFmtCtx->nb_streams; i++)
ffmpeg::avcodec_close(pFmtCtx->streams[i]->codec);
pIoCtx->buffer = 0;// We have allocated the buffer, no need for ffmpeg to free it 4 us
pFmtCtx->pb = 0;
ffmpeg::av_free(pIoCtx);
ffmpeg::avformat_close_input(&pFmtCtx);
ffmpeg::avformat_free_context(pFmtCtx);
return hr;
}
You need to unref the packets before reusing them. And there's no need to allocate and deallocate them all the time.
Here's how I do it which might help you:
// Initialise a packet queue
std::list<AVPacket *> packets;
...
for (int c = 0; c < MAX_PACKETS; c++) {
ff->packets.push_back(av_packet_alloc());
}
while (!quit) {
... get packet from queue
int err = av_read_frame(ff->context, packet);
... process packet (audio, video, etc)
av_packet_unref(packet); // add back to queue for reuse
}
// Release packets
while (ff->packets.size()) { // free packets
AVPacket *packet = ff->packets.front();
av_packet_free(&packet);
ff->packets.pop_front();
}
In your code you've freed a packet which wasn't allocated in the first place.

Direct Sound: How do I read captured data from a small buffer?

I'm trying to capture waveforms of floating point PCM data from a microphone. The application is only asking for a small number of samples each cycle (For 20'000Hz and a frame size of 0.003s, it would ask for 60 samples)
I would like to set the buffer size depending on how many ms the app is interested in but it seems that dwBufferBytes has to be a certain size. Instead, I set it to nAvgBytesPerSec and only lock/copy 60 samples each time (even though much more data would be available to read)
Is this a valid approach or is there a different way to throttle the sound driver? Is there a way to reduce the size of the buffer to only give me as much data as the app is requesting? I don't want to get a ton of sound
data if the application only wants 60 values.
Using this approach, I certainly will run into problems if the buffer catches up with my (slow) read cursor.
unsigned short channelNum = 2;
unsigned short bitsPerSample = 32;
unsigned long sampleRate = 20000;
unsigned short blockAlign = (channelNum * bitsPerSample) / 8;
unsigned long avgBytesPerSec = sampleRate * blockAlign;
WAVEFORMATEX wfx = { WAVE_FORMAT_IEEE_FLOAT, channelNum, sampleRate, avgBytesPerSec, blockAlign, bitsPerSample, 0 };
unsigned int mSampleBufferSize = 60; // 1400
DSCBUFFERDESC bufferDesc;
bufferDesc.dwSize = sizeof(DSCBUFFERDESC);
bufferDesc.dwFlags = 0;
bufferDesc.dwBufferBytes = wfx.nAvgBytesPerSec;
bufferDesc.dwReserved = 0;
bufferDesc.lpwfxFormat = &wfx;
bufferDesc.dwFXCount = 0;
bufferDesc.lpDSCFXDesc = NULL;
IDirectSoundCaptureBuffer *buffer = 0;
bool bufferRunning = false;
if (directSound && capture)
{
hr = capture->CreateCaptureBuffer(&bufferDesc, &buffer, NULL);
if (FAILED(hr))
std::cout << "SampleThread() -- Error creating DirectSoundCaptureBuffer " << endl;
else
{
hr = buffer->Start(DSCBSTART_LOOPING);
if (SUCCEEDED(hr)) {
bufferRunning = true;
}
}
}
void* primaryBuffer = NULL;
unsigned long primaryBufferSizeBytes = 0;
void* secondaryBuffer = NULL;
unsigned long secondaryBufferSize = 0;
bool mStopExecution = false;
unsigned long lastReadPosition = 0;
if (directSound && capture && buffer)
{
while (!mStopExecution)
{
DWORD readPos;
WORD remainingSize = 0;
DWORD capturePos;
hr = buffer->GetCurrentPosition(&capturePos, &readPos);
if (FAILED(hr))
{
cout << "SampleThread() -- Error GetCurrentPosition" << endl;
return 0;
}
buffer->Lock(lastReadPos, mSampleBufferSize, &primaryBuffer, &primaryBufferSizeBytes, &secondaryBuffer, &secondaryBufferSize, NULL);
memcpy(mBuffer, (float*)primaryBuffer, primaryBufferSizeBytes / sizeof(float));
// .... copy secondary buffer
hr = buffer->Unlock(primaryBuffer, primaryBufferSizeBytes, secondaryBuffer, secondaryBufferSize);
lastReadPosition = (lastReadPosition + mSampleBufferSize) % bufferDesc.dwBufferBytes;
}
}

0x00000010 Error reading characters of string. PROPVARIANT structure

Here's how I obtain the PROPVARIANT structure with WASAPI API related functions:
//Pointer for stored audio stream
IAudioClient *iac = NULL;
//Endpoint device selection
IMMDeviceEnumerator *pEnumerator = NULL;
IMMDevice *pDevice;
IMMDeviceCollection *pCollection = NULL;
CoInitialize(NULL);
hr = CoCreateInstance(
CLSID_MMDeviceEnumerator, NULL,
CLSCTX_ALL, IID_IMMDeviceEnumerator,
(void**)&pEnumerator);
hr = pEnumerator->EnumAudioEndpoints(eRender, DEVICE_STATE_ACTIVE, &pCollection);
//Create vector of IMMDevices
UINT endpointCount = NULL;
(*pCollection).GetCount(&endpointCount);
std::vector<IMMDevice**> IMMDevicePP; //IMMDevice seems to contain all endpoint devices, so why have a collection here?
for (UINT i = 0; i < (endpointCount); i++)
{
IMMDevice* pp = NULL;
(*pCollection).Item(i, &pp);
IMMDevicePP.assign(1, &pp);
}
UINT IMMDeviceCount = IMMDevicePP.size();
//Enumerate Properties of IMMDevices
std::vector<IPropertyStore*> IMMDeviceProperties;
for (int k = 0; k < IMMDeviceCount; k++) {
IPropertyStore* prop = NULL;
(**IMMDevicePP[k]).OpenPropertyStore(STGM_READ, &prop);
IMMDeviceProperties.assign(1, prop);
}
UINT PropertyStoreCount = IMMDeviceProperties.size();
//Find name property of device
std::vector<PROPVARIANT*> properties;
for (int i = 0; i < PropertyStoreCount; i++) {
DWORD propCount = 1;
HRESULT countResult = (*IMMDeviceProperties[i]).GetCount(&propCount);
if (countResult == S_OK) { }
else {
int x = 5;
}
for (int p = 0; p < propCount; p++) {
PROPERTYKEY key;
HRESULT keyResult = (*IMMDeviceProperties[i]).GetAt(p, &key);
HRESULT getAT;
PROPVARIANT propVari;
HRESULT propVariResult = (*IMMDeviceProperties[i]).GetValue(key, &propVari);
propVari.vt = VT_LPWSTR;
LPWSTR test = propVari.pwszVal;
//char pwszValTest;
//strcpy(&pwszValTest, propVari.pwszVal);
//WCHAR friendlyName = *propVari.pwszVal;
properties.assign(1, &propVari);
}
}
All HRESULT's return S_OK.
The resulting PROPVARIANT struct renders correctly at first glance. However, when inspecting further with VS's property watch all of the string type properties return the error reflected in the title of this question. So when I attempt to retrieve the name of my Audio Endpoint Device which is contained the the pwszVal property of my PROPVARIANT struct like so:
LPWSTR test = propVari.pwszVal;
I am unable to retrieve the desired data. I have tried copying the string with various converter methods to no avail. I know this error is on a ton of questions but I can't seem to crack this error.
Here's the doc for PROPVARIANT and its corresponding properties:
http://msdn.microsoft.com/en-us/library/windows/desktop/aa380072(v=vs.85).aspx
In this documentation it states that "PROPVARIANT member vt is set to VT_LPWSTR" VT_LPWSTR is an enum type and corresponds to the value 31. Whereas VT_BLOB corresponds to the value 65. My vt member is being set to VT_BLOB or 65 instead of 31 or VT_LPWSTR. Why is this so? This is contradictory to the value stated in this documentation:
http://msdn.microsoft.com/en-us/library/windows/desktop/dd370812(v=vs.85).aspx
Manually setting the vt member also does not change/fix the string reading error:
propVari.vt = VT_LPWSTR;
The PKEY_Device_FriendlyName is what I'm essentially after. Any help/tips is much appreciated.
You are not filling your vectors correctly. You are storing memory addresses of local variables, not the actual items that variables refer to.
And worse, you are using std::vector::assign() to add items. assign() replaces the entire contents of a vector with the specified value. If you have multiple devices in a collection, you will not end up with a vector of multiple devices. You should be using push_back() instead of assign().
You are making those mistakes with all of your vectors.
On a side note, you should use the -> operator instead of using (*). when calling methods of the objects. It will make the code cleaner and easier to read.
Try this instead:
//Endpoint device selection
IMMDeviceEnumerator *pEnumerator = NULL;
IMMDeviceCollection *pCollection = NULL;
CoInitialize(NULL);
hr = CoCreateInstance(
CLSID_MMDeviceEnumerator, NULL,
CLSCTX_ALL, IID_IMMDeviceEnumerator,
(void**)&pEnumerator);
hr = pEnumerator->EnumAudioEndpoints(eRender, DEVICE_STATE_ACTIVE, &pCollection);
pEnumerator->Release();
//Create vector of IMMDevices
std::vector<IMMDevice*> IMMDevice;
UINT endpointCount = 0;
hr = pCollection->GetCount(&endpointCount);
if (hr == S_OK) {
IMMDevice.reserve(endpointCount);
for (UINT i = 0; i < endpointCount; ++i) {
IMMDevice *pDevice = NULL;
hr = pCollection->Item(i, &pDevice);
if (hr == S_OK) {
IMMDevice.push_back(pDevice);
}
}
}
UINT IMMDeviceCount = IMMDevice.size();
pCollection->Release();
//Enumerate Properties of IMMDevices
std::vector<IPropertyStore*> IMMDeviceProperties;
IMMDeviceProperties.reserve(IMMDeviceCount);
for (int k = 0; k < IMMDeviceCount; k++) {
IPropertyStore* prop = NULL;
hr = IMMDevice[k]->OpenPropertyStore(STGM_READ, &prop);
if (hr == S_OK) {
IMMDeviceProperties.push_back(prop);
}
}
UINT PropertyStoreCount = IMMDeviceProperties.size();
//Find name property of devices
std::vector<std::wstring> MMDeviceFriendlyNames;
MMDeviceFriendlyNames.reserve(IMMDeviceCount);
for (int i = 0; i < PropertyStoreCount; i++) {
PROPVARIANT propVari;
PropVariantInit(&propVari);
hr = IMMDeviceProperties[i]->GetValue(PKEY_Device_FriendlyName, &propVari);
if (hr == S_OK) {
MMDeviceFriendlyNames.push_back(propVari.pwszVal);
PropVariantClear(&propVari);
}
}
// use vectors as needed...
for (UINT i = 0; i < PropertyStoreCount; ++i) {
IMMDeviceProperties[i]->Release();
}
for (UINT i = 0; i < IMMDeviceCount; ++i) {
IMMDevice[i]->Release();
}
The following code, based upon yours but without the obfuscating vectors appears to work fine. In runnin g it I get "FriendlyName: Speakers / HP (IDT High Definition Audio CODEC)" which seems correct here for this laptop.
When working with COM and without some kind of smart pointer, be really careful to release all the pointers. And always check all the results. COM calls can fail for all kinds of reasons.
#define WINVER _WIN32_WINNT_VISTA
#define WIN32_LEAN_AND_MEAN
#define UNICODE
#define STRICT
#include <windows.h>
#include <ole2.h>
#include <mmdeviceapi.h>
#include <propsys.h>
#include <propvarutil.h>
#include <stdio.h>
#include <Functiondiscoverykeys_devpkey.h>
#pragma comment(lib, "ole32")
#pragma comment(lib, "propsys")
const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
static HRESULT
DumpDeviceProperties(IMMDevice *pDevice)
{
IPropertyStore *pStore = NULL;
HRESULT hr = pDevice->OpenPropertyStore(STGM_READ, &pStore);
if (SUCCEEDED(hr))
{
PROPVARIANT prop;
PropVariantInit(&prop);
hr = pStore->GetValue(PKEY_Device_FriendlyName, &prop);
if (SUCCEEDED(hr))
{
if (IsPropVariantString(prop))
wprintf(L"FriendlyName: %s\n", PropVariantToStringWithDefault(prop, L"(missing)"));
else
hr = E_UNEXPECTED;
}
PropVariantClear(&prop);
pStore->Release();
}
return hr;
}
int
wmain(int argc, WCHAR *argv[])
{
HRESULT hr = CoInitializeEx(0, COINIT_APARTMENTTHREADED);
if (SUCCEEDED(hr))
{
IMMDeviceEnumerator *pEnumerator = NULL;
hr = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL, IID_IMMDeviceEnumerator, reinterpret_cast<void **>(&pEnumerator));
if (SUCCEEDED(hr))
{
IMMDeviceCollection *pCollection = NULL;
hr = pEnumerator->EnumAudioEndpoints(eRender, DEVICE_STATE_ACTIVE, &pCollection);
if (SUCCEEDED(hr))
{
UINT cEndpoints = 0;
hr = pCollection->GetCount(&cEndpoints);
if (SUCCEEDED(hr))
{
for (UINT n = 0; SUCCEEDED(hr) && n < cEndpoints; ++n)
{
IMMDevice *pDevice = NULL;
hr = pCollection->Item(n, &pDevice);
if (SUCCEEDED(hr))
{
hr = DumpDeviceProperties(pDevice);
pDevice->Release();
}
}
}
pCollection->Release();
}
pEnumerator->Release();
}
CoUninitialize();
}
return SUCCEEDED(hr) ? 0 : 1;
}
Compiled using: cl -nologo -MDd -Zi -W3 -Od lsdevices.cpp with MSVC 2013.
Code that I made, for people that wonder on this page like I did:
You also need this policyconfig.h file
#include <windows.h>
#include <ole2.h>
#include <ShellAPI.h>
#include <olectl.h>
#include <mmdeviceapi.h>
#include <propsys.h>
#include <propvarutil.h>
#include <stdio.h>
#include <Functiondiscoverykeys_devpkey.h>
#include <sstream>
#include <iostream>
#include <string>
#include <vector>
#include <atlstr.h>
#include <atlcore.h>
#include "Policyconfig.h"
#include "Propidl.h"
#include "Functiondiscoverykeys_devpkey.h"
#pragma comment(lib, "ole32")
#pragma comment(lib, "propsys")
using namespace std;
const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
class DeviceProps {
public:
string id;
string name;
string dll;
int iconID = 0;
bool isActive = false;
};
static HRESULT getDeviceProperty(IMMDevice* pDevice, DeviceProps* output)
{
IPropertyStore* pStore = NULL;
HRESULT hr = pDevice->OpenPropertyStore(STGM_READ, &pStore);
if (SUCCEEDED(hr))
{
PROPVARIANT prop;
PropVariantInit(&prop);
hr = pStore->GetValue(PKEY_Device_FriendlyName, &prop);
if (SUCCEEDED(hr))
{
if (IsPropVariantString(prop))
{
std::wstring wstr(PropVariantToStringWithDefault(prop, L"missing"));
std::string str(wstr.begin(), wstr.end());
output->name = str.c_str();
}
else
hr = E_UNEXPECTED;
}
hr = pStore->GetValue(PKEY_DeviceClass_IconPath, &prop);
if (SUCCEEDED(hr))
{
if (IsPropVariantString(prop))
{
PCWSTR propValue = PropVariantToStringWithDefault(prop, L"missing,0");
std::wstring propW(propValue);
std::string cPropValue(propW.begin(), propW.end());
vector<string> strings;
istringstream f(cPropValue);
string s;
while (getline(f, s, ',')) {
strings.push_back(s);
}
string location = strings[0];
string id = strings[1];
output->dll = location;
output->iconID = stoi(id);
}
else
hr = E_UNEXPECTED;
}
PropVariantClear(&prop);
pStore->Release();
}
return hr;
}
std::vector<DeviceProps> EnumAudioDevices(EDataFlow deviceType = eRender)
{
std::vector<DeviceProps> output;
HRESULT hr = CoInitializeEx(0, COINIT_APARTMENTTHREADED);
if (SUCCEEDED(hr))
{
IMMDeviceEnumerator* pEnumerator = NULL;
hr = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL, IID_IMMDeviceEnumerator, reinterpret_cast<void**>(&pEnumerator));
if (SUCCEEDED(hr))
{
IMMDevice* pActive = NULL;
pEnumerator->GetDefaultAudioEndpoint(deviceType , eMultimedia, &pActive);
DeviceProps activeDevice;
getDeviceProperty(pActive, &activeDevice);
LPWSTR aid;
pActive->GetId(&aid);
std::wstring aid2(aid);
std::string aidS(aid2.begin(), aid2.end());
activeDevice.id = aidS;
//output.push_back(activeDevice);
pActive->Release();
IMMDeviceCollection* pCollection = NULL;
hr = pEnumerator->EnumAudioEndpoints(deviceType , DEVICE_STATE_ACTIVE, &pCollection);
if (SUCCEEDED(hr))
{
UINT cEndpoints = 0;
hr = pCollection->GetCount(&cEndpoints);
if (SUCCEEDED(hr))
{
for (UINT n = 0; SUCCEEDED(hr) && n < cEndpoints; ++n)
{
IMMDevice* pDevice = NULL;
hr = pCollection->Item(n, &pDevice);
if (SUCCEEDED(hr))
{
DeviceProps device;
hr = getDeviceProperty(pDevice, &device);
LPWSTR id;
pDevice->GetId(&id);
std::wstring id2(id);
std::string idS(id2.begin(), id2.end());
device.id = idS;
if (device.id == activeDevice.id)
device.isActive = true;
output.push_back(device);
pDevice->Release();
}
}
}
pCollection->Release();
}
pEnumerator->Release();
}
//CoUninitialize();
}
return output;
}
static HRESULT setDefaultDevice(string id)
{
string:wstring devID(id.begin(), id.end());
IPolicyConfigVista* pPolicyConfig;
HRESULT hr = CoCreateInstance(__uuidof(CPolicyConfigVistaClient),
NULL, CLSCTX_ALL, __uuidof(IPolicyConfigVista), (LPVOID*)&pPolicyConfig);
if (SUCCEEDED(hr))
{
hr = pPolicyConfig->SetDefaultEndpoint(devID.c_str(), eConsole);
hr = pPolicyConfig->SetDefaultEndpoint(devID.c_str(), eMultimedia);
hr = pPolicyConfig->SetDefaultEndpoint(devID.c_str(), eCommunications);
pPolicyConfig->Release();
}
return hr;
}
static int switchDefaultDevice(EDataFlow deviceType = eRender)
{
std::vector<DeviceProps> result = EnumAudioDevices(deviceType);
if (!result.empty())
{
std::string activateID("");
for (const auto& device : result)
{
if (activateID== "x") {
activateID = device.id;
break;
}
if (device.isActive) activateID= "x";
}
if (activateID == "x" || activateID == "") activateID = result[0].id;
setDefaultDevice(activateID);
return 1;
}
return 0;
}
int wmain(int argc, WCHAR* argv[])
{
std::vector<DeviceProps> result = EnumAudioDevices(eRender);
for (const auto& device : result)
{
std::cout << (device.isActive ? "ACTIVE:" : "") << "Name: " << device.name << " DLL: " << device.dll << " (#" << device.iconID << ")" << "\n" << device.id << "\n";
}
switchDefaultDevice(eRender);
}