No Sound When Playing Back PCM-Decoded Audio - c++

I am reading AAC audio frames which I then decode to PCM with Media Foundation and am trying to play back through WASAPI. Particularly 48000khz 2 channels, 16 bit. I am able to decode the frames, write them to a file full.pcm, and then open and play that PCM file successfully in Audacity. However, my code to play back through the device speakers gives me nothing. The source I am trying to play through is the default source, which is my DAC. I am not getting any bad HRESULTS from any of the WASAPI-related code, so I'm confused. WASAPI is new to me though, so maybe there is something obvious I am missing.
#include "AudioDecoder.h"
#include <vector>
#include <__msvc_chrono.hpp>
#include <string>
#include <fstream>
#include <cassert>
#include <filesystem>
#include <mmdeviceapi.h>
#include <endpointvolume.h>
#include <functiondiscoverykeys.h>
#include <audioclient.h>
int fps_counter = 0;
int frame_index = 0;
IAudioClient* audio_client;
IAudioRenderClient* render_client = nullptr;
int setup_audio_playback()
{
HRESULT hr = S_OK;
IMMDeviceEnumerator* pEnumerator = nullptr;
IMMDevice* pDevice = nullptr;
ATLENSURE_SUCCEEDED(CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator));
ATLENSURE_SUCCEEDED(pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice));
IPropertyStore* ips;
ATLENSURE_SUCCEEDED(pDevice->OpenPropertyStore(STGM_READ, &ips));
PROPVARIANT varName;
// Initialize container for property value.
PropVariantInit(&varName);
ATLENSURE_SUCCEEDED(ips->GetValue(PKEY_Device_FriendlyName, &varName));
std::wcout << L"Device name: " << varName.pwszVal << std::endl;
ATLENSURE_SUCCEEDED(pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, (void**)&audio_client));
WAVEFORMATEX* format;
ATLENSURE_SUCCEEDED(audio_client->GetMixFormat(&format));
ATLENSURE_SUCCEEDED(audio_client->Initialize(AUDCLNT_SHAREMODE_SHARED, 0, 10000000, 0, format, NULL));
uint32_t bufferFrameCount;
ATLENSURE_SUCCEEDED(audio_client->GetBufferSize(&bufferFrameCount));
ATLENSURE_SUCCEEDED(audio_client->GetService(__uuidof(IAudioRenderClient), (void**)&render_client));
ATLENSURE_SUCCEEDED(audio_client->Start());
return hr;
}
int main()
{
HRESULT hr = S_OK;
std::ofstream fout_all_frames_pcm;
std::filesystem::remove(std::filesystem::current_path() / "full.pcm");
fout_all_frames_pcm.open("full.pcm", std::ios::binary | std::ios::out);
if (FAILED(hr = CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED)))
return hr;
if (FAILED(hr = MFStartup(MF_VERSION)))
return hr;
setup_audio_playback();
AudioDecoder* ad = new AudioDecoder();
std::vector<uint8_t> data;
while (true)
{
std::chrono::time_point<std::chrono::steady_clock> iteration_time = std::chrono::high_resolution_clock::now();
// Read frame data
std::ifstream fin("Encoded Audio Frames\\frame" + std::to_string(frame_index) + ".aac", std::ios::binary | std::ios::in);
if (fin.fail())
{
//throw std::runtime_error("Invalid file path specified");
break;
}
// Get file length
fin.seekg(0, std::ios::end);
size_t const length = fin.tellg();
fin.seekg(0, std::ios::beg);
if (length > data.size())
{
static size_t constexpr const granularity = 64 << 10;
data.resize((length + (granularity - 1)) & ~(granularity - 1));
assert(length <= data.size());
}
// Copy frame data from file to array;
fin.read(reinterpret_cast<char*>(data.data()), length);
fin.close();
CComPtr<IMFSample> pcm_sample;
while (!ad->decode_sync(data.data(), length, &pcm_sample))
{
if (pcm_sample == nullptr) // This will happen if the color converter isn't able to produce output, so we will continue in that case
continue;
CComPtr<IMFMediaBuffer> buffer;
if (FAILED(hr = pcm_sample->ConvertToContiguousBuffer(&buffer)))
return hr;
unsigned char* datas;
DWORD length;
if (FAILED(hr = buffer->GetCurrentLength(&length)))
return hr;
if (FAILED(hr = buffer->Lock(&datas, nullptr, &length)))
return hr;
fout_all_frames_pcm.write((char*)datas, length);
// Does nothing
//Sleep(120);
// Grab all the available space in the shared buffer.
uint8_t* pData;
ATLENSURE_SUCCEEDED(render_client->GetBuffer(1, &pData));
memcpy(pData, datas, length);
DWORD flags = 0;
ATLENSURE_SUCCEEDED(render_client->ReleaseBuffer(1, flags));
pcm_sample.Release();
}
frame_index++;
}
audio_client->Stop();
return 0;
}

Doing
render_client->GetBuffer(1, ...
will not give you any stable behavior because you are trying to submit data sample by sample. Literally, one PCM sample of your 48000 samples per second. Of course, the code is likely to be broken more than this because you seem to be simply losing most of the data getting much more from decoder and feeding just one sample to the device.
You would want to check this article in the part where the code identifies how many samples the GetBuffer will carry and then loop with filling those buffers accurately until you consume your IMFsample data.
How large those buffers are, those you obtain with GetBuffer? For 10 ms buffers which are pretty typical and 48 kHz sampling rate, you would have 480 samples per buffer. With stereo and 16-bit PCM you have four bytes per sample and so you would be delivering around 2K bytes every GetBuffer/ReleaseBuffer iteration.

Related

BIts Per Sample / Pixel libtiff vs WIC

TIFF *TiffImage;
uint16 photo, bps, spp, fillorder;
uint32 width,height;
unsigned long stripSize;
unsigned long imageOffset, result;
int stripMax, stripCount;
unsigned char *buffer, tempbyte;
unsigned short *buffer16;
unsigned int *buffer32;
unsigned long bufferSize, count;
bool success = true;
int shiftCount = 0;
//read image to InData
const char *InFileName = fileName.c_str();
if((TiffImage = TIFFOpen(InFileName, "r")) == NULL){
ErrMsg("Could not open incoming image\n");
return false;
}
// Check that it is of a type that we support
if(TIFFGetField(TiffImage, TIFFTAG_BITSPERSAMPLE, &bps) == 0) {
ErrMsg("Either undefined or unsupported number of bits per sample\n");
return false;
}
TBitPrecision bitPrecision = (TBitPrecision)bps;
char* imageDesc = NULL;
TIFFGetField(TiffImage, TIFFTAG_IMAGEDESCRIPTION, &imageDesc);
// Get actual bit precision for CP Images
if (bps > 8 && bps <= 16)
{
if (GetCpTiffTag(imageDesc, CP_TIFFTAG_BITPRECISION, (uint32*)&bitPrecision) == true)
{
shiftCount = 16 - bitPrecision;
}
}
In my libtiff implementation I have used 12 bit per pixel image and also 10 bpp
it is very easy to set this info in libtiff
I can't find a similar way to do so in WIC
uint16_t photo, bps, spp, fillorder;
uint32_t width,height;
unsigned long stripSize;
unsigned long imageOffset, result;
int stripMax, stripCount;
unsigned char *buffer, tempbyte;
unsigned short *buffer16;
unsigned int *buffer32;
unsigned long bufferSize, count;
bool success = true;
int shiftCount = 0;
//read image to InData
const char *InFileName = fileName.c_str();
IWICImagingFactory* piFactory = NULL;
// Create WIC factory
HRESULT hr = CoCreateInstance(
CLSID_WICImagingFactory,
NULL,
CLSCTX_INPROC_SERVER,
IID_PPV_ARGS(&piFactory)
);
// Create a decoder
IWICBitmapDecoder *pIDecoder = NULL;
IWICBitmapFrameDecode *pIDecoderFrame = NULL;
std::wstring ws;
ws.assign(fileName.begin(), fileName.end());
// get temporary LPCWSTR (pretty safe)
LPCWSTR pcwstr = ws.c_str();
hr = piFactory->CreateDecoderFromFilename(
pcwstr, // Image to be decoded
NULL, // Do not prefer a particular vendor
GENERIC_READ, // Desired read access to the file
WICDecodeMetadataCacheOnDemand, // Cache metadata when needed
&pIDecoder // Pointer to the decoder
);
// Retrieve the first bitmap frame.
if (SUCCEEDED(hr))
{
hr = pIDecoder->GetFrame(0, &pIDecoderFrame);
}
else
{
ErrMsg("Could not open incoming image\n");
}
return true;
Am I suppose to use EXIF or XMP? how do I find the TIFFTAG's for WIC?
The DirectXTex library has lots of examples of using WIC from C++.
You need something like:
using Microsoft::WRL::ComPtr;
ComPtr<IWICMetadataQueryReader> metareader;
hr = pIDecoderFrame->GetMetadataQueryReader(metareader.GetAddressOf());
if (SUCCEEDED(hr))
{
PROPVARIANT value;
PropVariantInit(&value);
if (SUCCEEDED(metareader->GetMetadataByName(L"/ifd/{ushort=258}", &value))
&& value.vt == VT_UI2)
{
// BitsPerSample is in value.uiVal
}
PropVariantClear(&value);
}
You should get in the habit of using a smart-pointer like ComPtr rather than using raw interface pointers to keep the ref counts correct.

DirectSound API explanation

As a college project we have to develop a Server-Client music streaming application using the DirectSound API. However, due to lack of information, guides or tutorials online, the only source I can gather info about it is the piece of code provided below (which was the only thing provided by the lecturer). Can anyone help me understand the general purpose of these functions and the order they should be implemented in?
Thanks in advance.
IDirectSound8 * directSound = nullptr;
IDirectSoundBuffer * primaryBuffer = nullptr;
IDirectSoundBuffer8 * secondaryBuffer = nullptr;
BYTE * dataBuffer = nullptr;
DWORD dataBufferSize;
DWORD averageBytesPerSecond;
// Search the file for the chunk we want
// Returns the size of the chunk and its location in the file
HRESULT FindChunk(HANDLE fileHandle, FOURCC fourcc, DWORD & chunkSize, DWORD & chunkDataPosition)
{
HRESULT hr = S_OK;
DWORD chunkType;
DWORD chunkDataSize;
DWORD riffDataSize = 0;
DWORD fileType;
DWORD bytesRead = 0;
DWORD offset = 0;
if (SetFilePointer(fileHandle, 0, NULL, FILE_BEGIN) == INVALID_SET_FILE_POINTER)
{
return HRESULT_FROM_WIN32(GetLastError());
}
while (hr == S_OK)
{
if (ReadFile(fileHandle, &chunkType, sizeof(DWORD), &bytesRead, NULL) == 0)
{
hr = HRESULT_FROM_WIN32(GetLastError());
}
if (ReadFile(fileHandle, &chunkDataSize, sizeof(DWORD), &bytesRead, NULL) == 0)
{
hr = HRESULT_FROM_WIN32(GetLastError());
}
switch (chunkType)
{
case fourccRIFF:
riffDataSize = chunkDataSize;
chunkDataSize = 4;
if (ReadFile(fileHandle, &fileType, sizeof(DWORD), &bytesRead, NULL) == 0)
{
hr = HRESULT_FROM_WIN32(GetLastError());
}
break;
default:
if (SetFilePointer(fileHandle, chunkDataSize, NULL, FILE_CURRENT) == INVALID_SET_FILE_POINTER)
{
return HRESULT_FROM_WIN32(GetLastError());
}
}
offset += sizeof(DWORD) * 2;
if (chunkType == fourcc)
{
chunkSize = chunkDataSize;
chunkDataPosition = offset;
return S_OK;
}
offset += chunkDataSize;
if (bytesRead >= riffDataSize)
{
return S_FALSE;
}
}
return S_OK;
}
// Read a chunk of data of the specified size from the file at the specifed location into the
supplied buffer
HRESULT ReadChunkData(HANDLE fileHandle, void * buffer, DWORD buffersize, DWORD bufferoffset)
{
HRESULT hr = S_OK;
DWORD bytesRead;
if (SetFilePointer(fileHandle, bufferoffset, NULL, FILE_BEGIN) == INVALID_SET_FILE_POINTER)
{
return HRESULT_FROM_WIN32(GetLastError());
}
if (ReadFile(fileHandle, buffer, buffersize, &bytesRead, NULL) == 0)
{
hr = HRESULT_FROM_WIN32(GetLastError());
}
return hr;
}
bool Initialise()
{
HRESULT result;
DSBUFFERDESC bufferDesc;
WAVEFORMATEX waveFormat;
// Initialize the direct sound interface pointer for the default sound device.
result = DirectSoundCreate8(NULL, &directSound, NULL);
if (FAILED(result))
{
return false;
}
// Set the cooperative level to priority so the format of the primary sound buffer can be modified.
// We use the handle of the desktop window since we are a console application. If you do write a
// graphical application, you should use the HWnd of the graphical application.
result = directSound->SetCooperativeLevel(GetDesktopWindow(), DSSCL_PRIORITY);
if (FAILED(result))
{
return false;
}
// Setup the primary buffer description.
bufferDesc.dwSize = sizeof(DSBUFFERDESC);
bufferDesc.dwFlags = DSBCAPS_PRIMARYBUFFER | DSBCAPS_CTRLVOLUME;
bufferDesc.dwBufferBytes = 0;
bufferDesc.dwReserved = 0;
bufferDesc.lpwfxFormat = NULL;
bufferDesc.guid3DAlgorithm = GUID_NULL;
// Get control of the primary sound buffer on the default sound device.
result = directSound->CreateSoundBuffer(&bufferDesc, &primaryBuffer, NULL);
if (FAILED(result))
{
return false;
}
// Setup the format of the primary sound bufffer.
// In this case it is a .WAV file recorded at 44,100 samples per second in 16-bit stereo (cd audio
format).
// Really, we should set this up from the wave file format loaded from the file.
waveFormat.wFormatTag = WAVE_FORMAT_PCM;
waveFormat.nSamplesPerSec = 44100;
waveFormat.wBitsPerSample = 16;
waveFormat.nChannels = 2;
waveFormat.nBlockAlign = (waveFormat.wBitsPerSample / 8) * waveFormat.nChannels;
waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
waveFormat.cbSize = 0;
// Set the primary buffer to be the wave format specified.
result = primaryBuffer->SetFormat(&waveFormat);
if (FAILED(result))
{
return false;
}
return true;
}
void Shutdown()
{
// Destroy the data buffer
if (dataBuffer != nullptr)
{
delete[] dataBuffer;
dataBuffer = nullptr;
}
// Release the primary sound buffer pointer.
if (primaryBuffer != nullptr)
{
primaryBuffer->Release();
primaryBuffer = nullptr;
}
// Release the direct sound interface pointer.
if (directSound != nullptr)
{
directSound->Release();
directSound = nullptr;
}
}
// Load the wave file into memory and setup the secondary buffer.
bool LoadWaveFile(TCHAR * filename)
{
WAVEFORMATEXTENSIBLE wfx = { 0 };
WAVEFORMATEX waveFormat;
DSBUFFERDESC bufferDesc;
HRESULT result;
IDirectSoundBuffer * tempBuffer;
DWORD chunkSize;
DWORD chunkPosition;
DWORD filetype;
HRESULT hr = S_OK;
// Open the wave file
HANDLE fileHandle = CreateFile(filename, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0,
NULL);
if (fileHandle == INVALID_HANDLE_VALUE)
{
return false;
}
if (SetFilePointer(fileHandle, 0, NULL, FILE_BEGIN) == INVALID_SET_FILE_POINTER)
{
return false;
}
// Make sure we have a RIFF wave file
FindChunk(fileHandle, fourccRIFF, chunkSize, chunkPosition);
ReadChunkData(fileHandle, &filetype, sizeof(DWORD), chunkPosition);
if (filetype != fourccWAVE)
{
return false;
}
// Locate the 'fmt ' chunk, and copy its contents into a WAVEFORMATEXTENSIBLE structure.
FindChunk(fileHandle, fourccFMT, chunkSize, chunkPosition);
ReadChunkData(fileHandle, &wfx, chunkSize, chunkPosition);
// Find the audio data chunk
FindChunk(fileHandle, fourccDATA, chunkSize, chunkPosition);
dataBufferSize = chunkSize;
// Read the audio data from the 'data' chunk. This is the data that needs to be copied into
// the secondary buffer for playing
dataBuffer = new BYTE[dataBufferSize];
ReadChunkData(fileHandle, dataBuffer, dataBufferSize, chunkPosition);
CloseHandle(fileHandle);
// Set the wave format of the secondary buffer that this wave file will be loaded onto.
// The value of wfx.Format.nAvgBytesPerSec will be very useful to you since it gives you
// an approximate value for how many bytes it takes to hold one second of audio data.
waveFormat.wFormatTag = wfx.Format.wFormatTag;
waveFormat.nSamplesPerSec = wfx.Format.nSamplesPerSec;
waveFormat.wBitsPerSample = wfx.Format.wBitsPerSample;
waveFormat.nChannels = wfx.Format.nChannels;
waveFormat.nBlockAlign = wfx.Format.nBlockAlign;
waveFormat.nAvgBytesPerSec = wfx.Format.nAvgBytesPerSec;
waveFormat.cbSize = 0;
// Set the buffer description of the secondary sound buffer that the wave file will be loaded onto.
// In this example, we setup a buffer the same size as that of the audio data. For the assignment,
// your secondary buffer should only be large enough to hold approximately four seconds of data.
bufferDesc.dwSize = sizeof(DSBUFFERDESC);
bufferDesc.dwFlags = DSBCAPS_CTRLVOLUME | DSBCAPS_GLOBALFOCUS | DSBCAPS_CTRLPOSITIONNOTIFY;
bufferDesc.dwBufferBytes = dataBufferSize;
bufferDesc.dwReserved = 0;
bufferDesc.lpwfxFormat = &waveFormat;
bufferDesc.guid3DAlgorithm = GUID_NULL;
// Create a temporary sound buffer with the specific buffer settings.
result = directSound->CreateSoundBuffer(&bufferDesc, &tempBuffer, NULL);
if (FAILED(result))
{
return false;
}
// Test the buffer format against the direct sound 8 interface and create the secondary buffer.
result = tempBuffer->QueryInterface(IID_IDirectSoundBuffer8, (void**)&secondaryBuffer);
if (FAILED(result))
{
return false;
}
// Release the temporary buffer.
tempBuffer->Release();
tempBuffer = nullptr;
return true;
}
void ReleaseSecondaryBuffer()
{
// Release the secondary sound buffer.
if (secondaryBuffer != nullptr)
{
(secondaryBuffer)->Release();
secondaryBuffer = nullptr;
}
}
bool PlayWaveFile()
{
HRESULT result;
unsigned char * bufferPtr1;
unsigned long bufferSize1;
unsigned char * bufferPtr2;
unsigned long bufferSize2;
BYTE * dataBufferPtr = dataBuffer;
DWORD soundBytesOutput = 0;
bool fillFirstHalf = true;
LPDIRECTSOUNDNOTIFY8 directSoundNotify;
DSBPOSITIONNOTIFY positionNotify[2];
// Set position of playback at the beginning of the sound buffer.
result = secondaryBuffer->SetCurrentPosition(0);
if (FAILED(result))
{
return false;
}
// Set volume of the buffer to 100%.
result = secondaryBuffer->SetVolume(DSBVOLUME_MAX);
if (FAILED(result))
{
return false;
}
// Create an event for notification that playing has stopped. This is only useful
// when your audio file fits in the entire secondary buffer (as in this example).
// For the assignment, you are going to need notifications when the playback has reached the
// first quarter of the buffer or the third quarter of the buffer so that you know when
// you should copy more data into the secondary buffer.
HANDLE playEventHandles[1];
playEventHandles[0] = CreateEvent(NULL, FALSE, FALSE, NULL);
result = secondaryBuffer->QueryInterface(IID_IDirectSoundNotify8, (LPVOID*)&directSoundNotify);
if (FAILED(result))
{
return false;
}
// This notification is used to indicate that we have finished playing the buffer of audio. In
// the assignment, you will need two different notifications as mentioned above.
positionNotify[0].dwOffset = DSBPN_OFFSETSTOP;
positionNotify[0].hEventNotify = playEventHandles[0];
directSoundNotify->SetNotificationPositions(1, positionNotify);
directSoundNotify->Release();
// Now we can fill our secondary buffer and play it. In the assignment, you will not be able to fill
// the buffer all at once since the secondary buffer will not be large enough. Instead, you will need to
// loop through the data that you have retrieved from the server, filling different sections of the
// secondary buffer as you receive notifications.
// Lock the first part of the secondary buffer to write wave data into it. In this case, we lock the entire
// buffer, but for the assignment, you will only want to lock the half of the buffer that is not being played.
// You will definately want to look up the methods for the IDIRECTSOUNDBUFFER8 interface to see what these
// methods do and what the parameters are used for.
result = secondaryBuffer->Lock(0, dataBufferSize, (void**)&bufferPtr1, (DWORD*)&bufferSize1, (void**)&bufferPtr2, (DWORD*)&bufferSize2, 0);
if (FAILED(result))
{
return false;
}
// Copy the wave data into the buffer. If you need to insert some silence into the buffer, insert values of 0.
memcpy(bufferPtr1, dataBuffer, bufferSize1);
if (bufferPtr2 != NULL)
{
memcpy(bufferPtr2, dataBuffer, bufferSize2);
}
// Unlock the secondary buffer after the data has been written to it.
result = secondaryBuffer->Unlock((void*)bufferPtr1, bufferSize1, (void*)bufferPtr2, bufferSize2);
if (FAILED(result))
{
return false;
}
// Play the contents of the secondary sound buffer. If you want play to go back to the start of the buffer
// again, set the last parameter to DSBPLAY_LOOPING instead of 0. If play is already in progress, then
// play will just continue.
result = secondaryBuffer->Play(0, 0, 0);
if (FAILED(result))
{
return false;
}
// Wait for notifications. In this case, we only have one notification so we could use WaitForSingleObject,
// but for the assignment you will need more than one notification, so you will need WaitForMultipleObjects
result = WaitForMultipleObjects(1, playEventHandles, FALSE, INFINITE);
// In this case, we have been notified that playback has finished so we can just finish. In the assignment,
// you should use the appropriate notification to determine which part of the secondary buffer needs to be
// filled and handle it accordingly.
CloseHandle(playEventHandles[0]);
return true;
}
DirectSound is deprecated. See below for recommended replacements.
Documentation can be found on Microsoft Docs. The last time samples for DirectSound were shipped was in the legacy DirectX SDK (November 2007) release which is why you are having a hard time finding them. You can find them on GitHub. The headers and link libraries for DirectSound are in the Windows SDK.
Recommendations
For 'real-time mixing and effects' often used in games, the modern replacement is XAudio2. XAudio 2.9 is included in Windows 10, and is available through a simple side-by-side redistribution model for Windows 7, Windows 8.0, and Windows 8.1. Documentation can be found here, samples can be found here, and the
redist can be found here. You may also want to take a look at DirectX Tool Kit for Audio.
For other audio output and input, see Windows Core Audio APIs (WASAPI) which is supported on Windows Vista, Windows 7, Windows 8.0, Windows 8.1, and Windows 10. Documentation can be found here. Some samples can be found on GitHub in Xbox-ATG-Samples and Windows-universal-samples--while these are all UWP samples, the API also supports Win32 desktop.
There's also a new Microsoft Spatial Sounds API on Windows 10 (a.k.a. Windows Sonic). Documentation can be found here. Samples can be found on GitHub in Xbox-ATG-Samples.

How to record continuous raw audio data into a circular buffer with C++ on Windows 10?

Since Windows Multimedia turned out to be utterly incapable of recording continuous audio, I got the hint to use Windows Core Audio. There is sort of a manual here, but I can't figure out how to write the loads of overhead code to get the recording working. Can anyone provide a complete, minimal implementation of continuous audio recording to a circular buffer?
So far I am stuck at the code below not getting past the line pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice); because pEnumerator remains nullptr.
#define VC_EXTRALEAN
#define _USE_MATH_DEFINES
#include <Windows.h>
#include <Audioclient.h>
#include <Mmdeviceapi.h>
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
int main() {
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
IMMDeviceEnumerator* pEnumerator = NULL;
IMMDevice* pDevice = NULL;
IAudioClient* pAudioClient = NULL;
IAudioCaptureClient* pCaptureClient = NULL;
WAVEFORMATEX* pwfx = NULL;
UINT32 packetLength = 0;
BYTE* pData;
DWORD flags;
CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator);
pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
pAudioClient->GetMixFormat(&pwfx);
pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pwfx, NULL);
pAudioClient->GetBufferSize(&bufferFrameCount); // Get the size of the allocated buffer.
pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);
// Calculate the actual duration of the allocated buffer.
REFERENCE_TIME hnsActualDuration = (double)REFTIMES_PER_SEC* bufferFrameCount / pwfx->nSamplesPerSec;
pAudioClient->Start(); // Start recording.
// Each loop fills about half of the shared buffer.
while(true) {
// Sleep for half the buffer duration.
Sleep(hnsActualDuration/REFTIMES_PER_MILLISEC/2);
pCaptureClient->GetNextPacketSize(&packetLength);
while(packetLength != 0) {
// Get the available data in the shared buffer.
pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
if(flags&AUDCLNT_BUFFERFLAGS_SILENT) {
pData = NULL; // Tell CopyData to write silence.
}
// Copy the available capture data to the audio sink.
//hr = pMySink->CopyData(pData, numFramesAvailable, &bDone);
pCaptureClient->ReleaseBuffer(numFramesAvailable);
pCaptureClient->GetNextPacketSize(&packetLength);
}
}
pAudioClient->Stop();
return 0;
}
EDIT (24.07.2021):
Here is an update of my code for troubleshooting:
#define VC_EXTRALEAN
#define _USE_MATH_DEFINES
#include <Windows.h>
#include <Audioclient.h>
#include <Mmdeviceapi.h>
#include <chrono>
class Clock {
private:
typedef chrono::high_resolution_clock clock;
chrono::time_point<clock> t;
public:
Clock() { start(); }
void start() { t = clock::now(); }
double stop() const { return chrono::duration_cast<chrono::duration<double>>(clock::now()-t).count(); }
};
const uint base = 4096;
const uint sample_rate = 48000; // must be supported by microphone
const uint sample_size = 1*base; // must be a power of 2
const uint bandwidth = 5000; // must be <= sample_rate/2
float* wave = new float[sample_size]; // circular buffer
void fill(float* const wave, const float* const buffer, int offset) {
for(int i=sample_size; i>=offset; i--) {
wave[i] = wave[i-offset];
}
for(int i=0; i<offset; i++) {
const uint p = offset-1-i;
wave[i] = 0.5f*(buffer[2*p]+buffer[2*p+1]); // left and right channels
}
}
int main() {
for(uint i=0; i<sample_size; i++) wave[i] = 0.0f;
Clock clock;
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
IMMDeviceEnumerator* pEnumerator = NULL;
IMMDevice* pDevice = NULL;
IAudioClient* pAudioClient = NULL;
IAudioCaptureClient* pCaptureClient = NULL;
WAVEFORMATEX* pwfx = NULL;
UINT32 packetLength = 0;
BYTE* pData;
DWORD flags;
CoInitializeEx(NULL, COINIT_MULTITHREADED);
CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator);
pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
pAudioClient->GetMixFormat(&pwfx);
println(pwfx->wFormatTag);// 65534
println(WAVE_FORMAT_PCM);// 1
println(pwfx->nChannels);// 2
println((uint)pwfx->nSamplesPerSec);// 48000
println(pwfx->wBitsPerSample);// 32
println(pwfx->nBlockAlign);// 8
println(pwfx->wBitsPerSample*pwfx->nChannels/8);// 8
println((uint)pwfx->nAvgBytesPerSec);// 384000
println((uint)(pwfx->nBlockAlign*pwfx->nSamplesPerSec*pwfx->nChannels));// 768000
println(pwfx->cbSize);// 22
pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pwfx, NULL);
pAudioClient->GetBufferSize(&bufferFrameCount); // Get the size of the allocated buffer.
pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);
// Calculate the actual duration of the allocated buffer.
//REFERENCE_TIME hnsActualDuration = (double)REFTIMES_PER_SEC* bufferFrameCount / pwfx->nSamplesPerSec;
pAudioClient->Start(); // Start recording.
while(running) {
pCaptureClient->GetNextPacketSize(&packetLength); // packetLength and numFramesAvailable are either 0 or 480
pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
const int offset = (uint)numFramesAvailable;
if(offset>0) {
fill(wave, (float*)pData, offset); // here I add pData to the circular buffer "wave"
}
while(packetLength != 0) {
pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL); // Get the available data in the shared buffer.
if(flags&AUDCLNT_BUFFERFLAGS_SILENT) {
pData = NULL; // Tell CopyData to write silence.
}
pCaptureClient->ReleaseBuffer(numFramesAvailable);
pCaptureClient->GetNextPacketSize(&packetLength);
}
sleep(1.0/120.0-clock.stop());
clock.start();
}
pAudioClient->Stop();
}
You're not calling CoInitializeEx, so all COM calls will fail.
You should also be testing all calls to see if they return an error.
To address the questions posed in the comments:
I believe that if you want to operate the endpoint in shared mode then you have to use the parameters returned by GetFixFormat. This means that:
you are limited to the one sample rate (unless you write code to perform a conversion, which is a non-trivial task)
if you want the samples as floats, you will have to convert them yourself
To write code that runs on all machines, you must cater for whatever the mix format throws at you. This might be:
16 bit integers
24 bit integers (nBlockAlign = 3)
24 bit integers in 32 bit containers (nBlockAlign = 4)
32 bit integers
32 bit floating point (rare)
64 bit floating point (unheard of, in my experience)
The samples will be in the native byte order of the machine your code is running on, and are interleaved.
So, case out on the various parameters in pwfx and write the relevant code for each sample format you want to support.
Assuming you want your floats to be normalised to -1 .. +1, and 2-channel input data, you might do this for 16 bit integers, for example:
const int16_t *inbuf = (const int16_t *) pData;
float *outbuf = ...;
for (int i = 0; i < numFramesAvailable * 2; ++i)
{
int16_t sample = *inbuf++;
*outbuf++ = (float) (sample * (1.0 / 32767));
}
Note that I avoid a (slow) floating point division by multiplying by the reciprocal (the compiler will pre-calculate 1.0 / 32767).
I'll leave the rest to you.
You could use this audio library instead. Its way easier to get up and running than trying to interface with the platform specific SDKs:
http://www.music.mcgill.ca/~gary/rtaudio/recording.html
Also, while removing the sleep might not help in your example you should never call sleep, lock a mutex, or allocate memory during audio processing. The delay introduced by those is completely arbitrary compared to the short buffer times, so will always create problems for you.

Programmatically read fMP4 using ffmpeg

All of my attempts to parse fmp4 have failed with the following errors:
avcodec_send_packet returned with -10949955291 and printed the following errors:
[h264 # 0x105001600] No start code is found.
[h264 # 0x105001600] Error splitting the input into NAL units.
What did i do?
Downloaded a perfectly playable h.264 file and fragmented it using the following cmd
ffmpeg -i long.mp4 -an -sn -vcodec libx264 -force_key_frames "expr:gte(t,n_forced*4)" -sc_threshold 0 -r 25 -f hls -hls_time 4 -hls_list_size 99999 -start_number 1 -hls_segment_type fmp4 -hls_fmp4_init_filename init.mp4 -t 30 -threads 0 big_bunny.m3u8
and used the following AVIO read example
#include <fstream>
#include <iterator>
#include <algorithm>
#include <libavcodec/avcodec.h>
std::vector<unsigned char> b1;
std::vector<unsigned char> b2;
bool initialized = false;
int bytesread = 0;
static int read_packet(void* opaque, uint8_t* buf, int buf_size)
{
if (b1.size() && !initialized) {
size_t bytesToRead = std::min(buf_size, (int)b1.size());
::memcpy(buf, b1.data(), bytesToRead);
bytesread += bytesToRead;
if (bytesread >= b1.size()) {
initialized = true;
bytesread = 0;
}
return bytesToRead;
}
::memcpy(buf, b2.data() + bytesread, buf_size);
bytesread += buf_size;
return buf_size;
}
int main(int argc, char** argv)
{
AVFormatContext* fmt_ctx = NULL;
AVIOContext* avio_ctx = NULL;
uint8_t *buffer = NULL, *buffer2 = nullptr, *avio_ctx_buffer = NULL;
size_t buffer_size, buffer_size2, avio_ctx_buffer_size = 4096;
char* input_filename = NULL;
int ret = 0;
struct buffer_data bd = {0};
std::ifstream input1("/Users/x/Downloads/fmp4/init.mp4", std::ios::binary);
std::ifstream input2("/Users/x/Downloads/fmp4/big_bunny1.m4s", std::ios::binary);
b1 = std::vector<unsigned char>((std::istreambuf_iterator<char>(input1)), std::istreambuf_iterator<char>());
b2 = std::vector<unsigned char>((std::istreambuf_iterator<char>(input2)), std::istreambuf_iterator<char>());
avio_ctx_buffer = (uint8_t*)av_malloc(avio_ctx_buffer_size);
fmt_ctx = avformat_alloc_context();
avio_ctx = avio_alloc_context(avio_ctx_buffer, avio_ctx_buffer_size, 0, nullptr, &read_packet, NULL, NULL);
fmt_ctx->pb = avio_ctx;
AVDictionary* opts = NULL;
// av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov", 0);
ret = avformat_open_input(&fmt_ctx, NULL, NULL, &opts);
ret = avformat_find_stream_info(fmt_ctx, NULL);
AVCodec* decoder = nullptr;
decoder = avcodec_find_decoder(fmt_ctx->streams[0]->codecpar->codec_id);
AVCodecContext* decoderCtx = avcodec_alloc_context3(decoder);
ret = avcodec_open2(decoderCtx, decoder, nullptr);
AVPacket pkt;
av_init_packet(&pkt);
AVFrame* frame = av_frame_alloc();
while (true) {
ret = av_read_frame(fmt_ctx, &pkt);
ret = avcodec_send_packet(decoderCtx, &pkt);
if (ret != 0)
assert(false);
for (;;) {
ret = avcodec_receive_frame(decoderCtx, frame);
if (ret < 0) {
break;
}
int g;
g = 0;
}
}
I'm not even sure that this is the right way to process fmp4 types. but for the clarity of this example, i simply loaded the init file into the first buffer, and the actual media file on the second buffer, and switch between the buffers respectfully to buf_size's value.
I don't think that in a case where you use avio_alloc_context demuxer get's all the needed info - see Opening a media file, but this is a wild guess since your output shows no errors from the mp4 demuxer but only from the H.264 bitstream parser, so I am leaning that the probed file format could be raw H.264, but it is hard to tell. But if I am mistaken, you have not provided a seek callback, and demuxing an mp4 would certainly require seeking to jump around the boxes and then seek to the sample data. So, I'd first try to provide a seek callback, and see if that gets called. But from the example it is not sure why you even need the I/O callbacks, since the data reside in a file not in a memory location, so I'd suggest trying to use the standard avformat_open_input way.

Serialize: CArchive a CImage

I am currently trying to figure out how to properly store a CImage file within a CArchive (JPEG). My current approach to this is the following (pseudo) code:
BOOL CPicture::Serialize(CArchive &ar)
{
IStream *pStream = NULL;
HRESULT hr;
CImage *img = GetImage();
if (ar.IsLoading())
{
HGLOBAL hMem = GlobalAlloc(GMEM_FIXED, 54262);
hr = CreateStreamOnHGlobal(hMem, FALSE, &pStream);
if(SUCCEEDED(hr))
{
ar.Read(pStream, 54262);
img->Load(pStream);
pStream->Release();
GlobalUnlock(hMem);
GlobalFree(hMem);
}
}
else
{
hr = CreateStreamOnHGlobal(0, TRUE, &pStream);
if (SUCCEEDED(hr))
{
hr = img->Save(pStream, Gdiplus::ImageFormatJPEG);
if (SUCCEEDED(hr))
ar.Write(pStream, 54262);
}
}
...
I am just now getting back into C++ and have only done a little with it in the past. Any help would be very much appreciated.
Thank you very much in advance.
I'm not an expert on IStream, but I think you may not be using it correctly. The following code seems to work. It currently archives in png format, but it can easily be changed through Gdiplus::ImageFormatPNG.
It owes a lot to the tutorial "Embracing IStream as just a stream of bytes" by S. Arman on CodeProject.
void ImageArchive(CImage *pImage, CArchive &ar)
{
HRESULT hr;
if (ar.IsStoring())
{
// create a stream
IStream *pStream = SHCreateMemStream(NULL, 0);
ASSERT(pStream != NULL);
if (pStream == NULL)
return;
// write the image to a stream rather than file (the stream in this case is just a chunk of memory automatically allocated by the stream itself)
pImage->Save(pStream, Gdiplus::ImageFormatPNG); // Note: IStream will automatically grow up as necessary.
// find the size of memory written (i.e. the image file size)
STATSTG statsg;
hr = pStream->Stat(&statsg, STATFLAG_NONAME);
ASSERT(hr == S_OK);
ASSERT(statsg.cbSize.QuadPart < ULONG_MAX);
ULONG nImgBytes = ULONG(statsg.cbSize.QuadPart); // any image that can be displayed had better not have more than MAX_ULONG bytes
// go to the start of the stream
LARGE_INTEGER seekPos;
seekPos.QuadPart = 0;
hr = pStream->Seek(seekPos, STREAM_SEEK_SET, NULL);
ASSERT(hr == S_OK);
// get data in stream into a standard byte array
char *bytes = new char[nImgBytes];
ULONG nRead;
hr = pStream->Read(bytes, nImgBytes, &nRead); // read the data from the stream into normal memory. nRead should be equal to statsg.cbSize.QuadPart.
ASSERT(hr == S_OK);
ASSERT(nImgBytes == nRead);
// write data to archive and finish
ar << nRead; // need to save the amount of memory needed for the file, since we will need to read this amount later
ar.Write(bytes, nRead); // write the data to the archive file from the stream memory
pStream->Release();
delete[] bytes;
}
else
{
// get the data from the archive
ULONG nBytes;
ar >> nBytes;
char *bytes = new char[nBytes]; // ordinary memory to hold data from archive file
UINT nBytesRead = ar.Read(bytes, nBytes); // read the data from the archive file
ASSERT(nBytesRead == UINT(nBytes));
// make the stream
IStream *pStream = SHCreateMemStream(NULL, 0);
ASSERT(pStream != NULL);
if (pStream == NULL)
return;
// put the archive data into the stream
ULONG nBytesWritten;
pStream->Write(bytes, nBytes, &nBytesWritten);
ASSERT(nBytes == nBytesWritten);
if (nBytes != nBytesWritten)
return;
// go to the start of the stream
LARGE_INTEGER seekPos;
seekPos.QuadPart = 0;
hr = pStream->Seek(seekPos, STREAM_SEEK_SET, NULL);
ASSERT(hr == S_OK);
// load the stream into CImage and finish
pImage->Load(pStream); // pass the archive data to CImage
pStream->Release();
delete[] bytes;
}
}