How to use IAudioClient3 (WASAPI) with Real-Time Work Queue API - c++

I'm working on a lowest-possible latency MIDI synthetizer software. I'm aware of ASIO and other alternatives, but as they have apparently made significant improvements to the WASAPI stack (in shared mode, at least), I'm curious to try it out. I first wrote a simple event-driven version of program, but as that's not the recommended way to do low-latency audio on Windows 10 (according to the docs), I'm trying to migrate to the Real-Time Work Queue API.
The documentation on Low Latency Audio states that it is recommended to use the Real-Time Work Queue API or MFCreateMFByteStreamOnStreamEx with WASAPI in order for the OS to manage work items in a way that will avoid interference from non-audio subsystems. This seems like a good idea, but the latter option seems to require some managed code (demonstrated in this WindowsAudioSession example), which I know nothing about and would preferably avoid (also the header Robytestream.h which has defs for the IRandomAccessStream isn't found on my system either).
The RTWQ example included in the docs is incomplete (doesn't compile as such), and I have made the necessary additions to make it compilable:
class my_rtqueue : IRtwqAsyncCallback {
public:
IRtwqAsyncResult* pAsyncResult;
RTWQWORKITEM_KEY workItemKey;
DWORD WorkQueueId;
STDMETHODIMP GetParameters(DWORD* pdwFlags, DWORD* pdwQueue)
{
HRESULT hr = S_OK;
*pdwFlags = 0;
*pdwQueue = WorkQueueId;
return hr;
}
//-------------------------------------------------------
STDMETHODIMP Invoke(IRtwqAsyncResult* pAsyncResult)
{
HRESULT hr = S_OK;
IUnknown* pState = NULL;
WCHAR className[20];
DWORD bufferLength = 20;
DWORD taskID = 0;
LONG priority = 0;
BYTE* pData;
hr = render_info.renderclient->GetBuffer(render_info.buffer_framecount, &pData);
ERROR_EXIT(hr);
update_buffer((unsigned short*)pData, render_info.framesize_bytes / (2*sizeof(unsigned short))); // 2 channels, sizeof(unsigned short) == 2
hr = render_info.renderclient->ReleaseBuffer(render_info.buffer_framecount, 0);
ERROR_EXIT(hr);
return S_OK;
}
STDMETHODIMP QueryInterface(const IID &riid, void **ppvObject) {
return 0;
}
ULONG AddRef() {
return 0;
}
ULONG Release() {
return 0;
}
HRESULT queue(HANDLE event) {
HRESULT hr;
hr = RtwqPutWaitingWorkItem(event, 1, this->pAsyncResult, &this->workItemKey);
return hr;
}
my_rtqueue() : workItemKey(0) {
HRESULT hr = S_OK;
IRtwqAsyncCallback* callback = NULL;
DWORD taskId = 0;
WorkQueueId = RTWQ_MULTITHREADED_WORKQUEUE;
//WorkQueueId = RTWQ_STANDARD_WORKQUEUE;
hr = RtwqLockSharedWorkQueue(L"Pro Audio", 0, &taskId, &WorkQueueId);
ERROR_THROW(hr);
hr = RtwqCreateAsyncResult(NULL, reinterpret_cast<IRtwqAsyncCallback*>(this), NULL, &pAsyncResult);
ERROR_THROW(hr);
}
int stop() {
HRESULT hr;
if (pAsyncResult)
pAsyncResult->Release();
if (0xFFFFFFFF != this->WorkQueueId) {
hr = RtwqUnlockWorkQueue(this->WorkQueueId);
if (FAILED(hr)) {
printf("Failed with RtwqUnlockWorkQueue 0x%x\n", hr);
return 0;
}
}
return 1;
}
};
And so, the actual WASAPI code (HRESULT error checking is omitted for clarity):
void thread_main(LPVOID param) {
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = 0;
IMMDeviceEnumerator* pEnumerator = NULL;
IMMDevice* pDevice = NULL;
IAudioClient3* pAudioClient = NULL;
IAudioRenderClient* pRenderClient = NULL;
WAVEFORMATEX* pwfx = NULL;
HANDLE hEvent = NULL;
HANDLE hTask = NULL;
UINT32 bufferFrameCount;
BYTE* pData;
DWORD flags = 0;
hr = RtwqStartup();
// also, hr is checked for errors every step of the way
hr = CoInitialize(NULL);
hr = CoCreateInstance(
CLSID_MMDeviceEnumerator, NULL,
CLSCTX_ALL, IID_IMMDeviceEnumerator,
(void**)&pEnumerator);
hr = pEnumerator->GetDefaultAudioEndpoint(
eRender, eConsole, &pDevice);
hr = pDevice->Activate(
IID_IAudioClient, CLSCTX_ALL,
NULL, (void**)&pAudioClient);
WAVEFORMATEX wave_format = {};
wave_format.wFormatTag = WAVE_FORMAT_PCM;
wave_format.nChannels = 2;
wave_format.nSamplesPerSec = 48000;
wave_format.nAvgBytesPerSec = 48000 * 2 * 16 / 8;
wave_format.nBlockAlign = 2 * 16 / 8;
wave_format.wBitsPerSample = 16;
UINT32 DP, FP, MINP, MAXP;
hr = pAudioClient->GetSharedModeEnginePeriod(&wave_format, &DP, &FP, &MINP, &MAXP);
printf("DefaultPeriod: %u, Fundamental period: %u, min_period: %u, max_period: %u\n", DP, FP, MINP, MAXP);
hr = pAudioClient->InitializeSharedAudioStream(AUDCLNT_STREAMFLAGS_EVENTCALLBACK, MINP, &wave_format, 0);
my_rtqueue* workqueue = NULL;
try {
workqueue = new my_rtqueue();
}
catch (...) {
hr = E_ABORT;
ERROR_EXIT(hr);
}
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
PWAVEFORMATEX wf = &wave_format;
UINT32 current_period;
pAudioClient->GetCurrentSharedModeEnginePeriod(&wf, &current_period);
INT32 FrameSize_bytes = bufferFrameCount * wave_format.nChannels * wave_format.wBitsPerSample / 8;
printf("bufferFrameCount: %u, FrameSize_bytes: %d, current_period: %u\n", bufferFrameCount, FrameSize_bytes, current_period);
hr = pAudioClient->GetService(
IID_IAudioRenderClient,
(void**)&pRenderClient);
render_info.framesize_bytes = FrameSize_bytes;
render_info.buffer_framecount = bufferFrameCount;
render_info.renderclient = pRenderClient;
hEvent = CreateEvent(nullptr, false, false, nullptr);
if (hEvent == INVALID_HANDLE_VALUE) { ERROR_EXIT(0); }
hr = pAudioClient->SetEventHandle(hEvent);
const size_t num_samples = FrameSize_bytes / sizeof(unsigned short);
DWORD taskIndex = 0;
hTask = AvSetMmThreadCharacteristics(TEXT("Pro Audio"), &taskIndex);
if (hTask == NULL) {
hr = E_FAIL;
}
hr = pAudioClient->Start(); // Start playing.
running = 1;
while (running) {
workqueue->queue(hEvent);
}
workqueue->stop();
hr = RtwqShutdown();
delete workqueue;
running = 0;
return 1;
}
This seems to kind of work (ie. audio is being output), but on every other invocation of my_rtqueue::Invoke(), IAudioRenderClient::GetBuffer() returns a HRESULT of 0x88890006 (-> AUDCLNT_E_BUFFER_TOO_LARGE), and the actual audio output is certainly not what I intend it to be.
What issues are there with my code? Is this the right way to use RTWQ with WASAPI?

Turns out there were a number of issues with my code, none of which had really anything to do with Rtwq. The biggest issue was me assuming that the shared mode audio stream was using 16-bit integer samples, when in reality my audio was setup for 32-bit float format (WAVE_FORMAT_IEEE_FLOAT). The currently active shared mode format, period etc. should be fetched like this:
WAVEFORMATEX *wavefmt = NULL;
UINT32 current_period = 0;
hr = pAudioClient->GetCurrentSharedModeEnginePeriod((WAVEFORMATEX**)&wavefmt, &current_period);
wavefmt now contains the output format info of the current shared mode. If the wFormatTag field is equal to WAVE_FORMAT_EXTENSIBLE, one needs to cast WAVEFORMATEX to WAVEFORMATEXTENSIBLE to see what the actual format is. After this, one needs to fetch the minimum period supported by the current setup, like so:
UINT32 DP, FP, MINP, MAXP;
hr = pAudioClient->GetSharedModeEnginePeriod(wavefmt, &DP, &FP, &MINP, &MAXP);
and then initialize the audio stream with the new InitializeSharedAudioStream function:
hr = pAudioClient->InitializeSharedAudioStream(AUDCLNT_STREAMFLAGS_EVENTCALLBACK, MINP, wavefmt, NULL);
... get the buffer's actual size:
hr = pAudioClient->GetBufferSize(&render_info.buffer_framecount);
and use GetCurrentPadding in the Get/ReleaseBuffer logic:
UINT32 pad = 0;
hr = render_info.audioclient->GetCurrentPadding(&pad);
int actual_size = (render_info.buffer_framecount - pad);
hr = render_info.renderclient->GetBuffer(actual_size, &pData);
if (SUCCEEDED(hr)) {
update_buffer((float*)pData, actual_size);
hr = render_info.renderclient->ReleaseBuffer(actual_size, 0);
ERROR_EXIT(hr);
}
The documentation for IAudioClient::Initialize states the following about shared mode streams (I assume it also applies to the new IAudioClient3):
Each time the thread awakens, it should call IAudioClient::GetCurrentPadding to determine how much data to write to a rendering buffer or read from a capture buffer. In contrast to the two buffers that the Initialize method allocates for an exclusive-mode stream that uses event-driven buffering, a shared-mode stream requires a single buffer.
Using GetCurrentPadding solves the problem with AUDCLNT_E_BUFFER_TOO_LARGE, and feeding the buffer with 32-bit float samples instead of 16-bit integers makes the output sound fine on my system (although the effect was quite funky!).
If someone comes up with better/more correct ways to use the Rtwq API, I would love to hear them.

Related

AvSetMmThreadCharacteristicsW for UWP

I'm working on a WASAPI UWP audio application with cpp/winrt which needs to take audio from an input and send it to an output after being processed.
I want to set my audio thread characteristics with AvSetMmThreadCharacteristicsW(L"Pro Audio", &taskIndex), but I just noticed this function (and most of avrt.h) is limited to WINAPI_PARTITION_DESKTOP and WINAPI_PARTITION_GAMES.
I think I need this because when my code is integrated into my UWP app, the audio input is full of discontinuity, and I have no issue in my test code which uses the avrt API.
Is there another way to configure my thread for audio processing?
Edit: here is my test program https://github.com/loics2/test-wasapi. The interesting part happens in the AudioStream class. I can't share my UWP app, but I can copy as is these classes into a Windows Runtime Component.
Edit 2: here's the audio thread code :
void AudioStream::StreamWorker()
{
WAVEFORMATEX* captureFormat = nullptr;
WAVEFORMATEX* renderFormat = nullptr;
RingBuffer<float> captureBuffer;
RingBuffer<float> renderBuffer;
BYTE* streamBuffer = nullptr;
unsigned int streamBufferSize = 0;
unsigned int bufferFrameCount = 0;
unsigned int numFramesPadding = 0;
unsigned int inputBufferSize = 0;
unsigned int outputBufferSize = 0;
DWORD captureFlags = 0;
winrt::hresult hr = S_OK;
// m_inputClient is a winrt::com_ptr<IAudioClient3>
if (m_inputClient) {
hr = m_inputClient->GetMixFormat(&captureFormat);
// m_audioCaptureClient is a winrt::com_ptr<IAudioCaptureClient>
if (!m_audioCaptureClient) {
hr = m_inputClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
0,
0,
captureFormat,
nullptr);
hr = m_inputClient->GetService(__uuidof(IAudioCaptureClient), m_audioCaptureClient.put_void());
hr = m_inputClient->SetEventHandle(m_inputReadyEvent.get());
hr = m_inputClient->Reset();
hr = m_inputClient->Start();
}
}
hr = m_inputClient->GetBufferSize(&inputBufferSize);
// multiplying the buffer size by the number of channels
inputBufferSize *= 2;
// m_outputClient is a winrt::com_ptr<IAudioClient3>
if (m_outputClient) {
hr = m_outputClient->GetMixFormat(&renderFormat);
// m_audioRenderClientis a winrt::com_ptr<IAudioRenderClient>
if (!m_audioRenderClient) {
hr = m_outputClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
0,
0,
captureFormat,
nullptr);
hr = m_outputClient->GetService(__uuidof(IAudioRenderClient), m_audioRenderClient.put_void());
hr = m_outputClient->SetEventHandle(m_outputReadyEvent.get());
hr = m_outputClient->Reset();
hr = m_outputClient->Start();
}
}
hr = m_outputClient->GetBufferSize(&outputBufferSize);
// multiplying the buffer size by the number of channels
outputBufferSize *= 2;
while (m_isRunning)
{
// ===== INPUT =====
// waiting for the capture event
WaitForSingleObject(m_inputReadyEvent.get(), INFINITE);
// getting the input buffer data
hr = m_audioCaptureClient->GetNextPacketSize(&bufferFrameCount);
while (SUCCEEDED(hr) && bufferFrameCount > 0) {
m_audioCaptureClient->GetBuffer(&streamBuffer, &bufferFrameCount, &captureFlags, nullptr, nullptr);
if (bufferFrameCount != 0) {
captureBuffer.write(reinterpret_cast<float*>(streamBuffer), bufferFrameCount * 2);
hr = m_audioCaptureClient->ReleaseBuffer(bufferFrameCount);
if (FAILED(hr)) {
m_audioCaptureClient->ReleaseBuffer(0);
}
}
else
{
m_audioCaptureClient->ReleaseBuffer(0);
}
hr = m_audioCaptureClient->GetNextPacketSize(&bufferFrameCount);
}
// ===== CALLBACK =====
auto size = captureBuffer.size();
float* userInputData = (float*)calloc(size, sizeof(float));
float* userOutputData = (float*)calloc(size, sizeof(float));
captureBuffer.read(userInputData, size);
OnData(userInputData, userOutputData, size / 2, 2, 48000);
renderBuffer.write(userOutputData, size);
free(userInputData);
free(userOutputData);
// ===== OUTPUT =====
// waiting for the render event
WaitForSingleObject(m_outputReadyEvent.get(), INFINITE);
// getting information about the output buffer
hr = m_outputClient->GetBufferSize(&bufferFrameCount);
hr = m_outputClient->GetCurrentPadding(&numFramesPadding);
// adjust the frame count with the padding
bufferFrameCount -= numFramesPadding;
if (bufferFrameCount != 0) {
hr = m_audioRenderClient->GetBuffer(bufferFrameCount, &streamBuffer);
auto count = (bufferFrameCount * 2);
if (renderBuffer.read(reinterpret_cast<float*>(streamBuffer), count) < count) {
// captureBuffer is not full enough, we should fill the remainder with 0
}
hr = m_audioRenderClient->ReleaseBuffer(bufferFrameCount, 0);
if (FAILED(hr)) {
m_audioRenderClient->ReleaseBuffer(0, 0);
}
}
else
{
m_audioRenderClient->ReleaseBuffer(0, 0);
}
}
exit:
// Cleanup code
}
I removed the error handling code for clarity, most of it is :
if (FAILED(hr))
goto exit;
#IInspectable was right, there's something wrong with my code : the audio processing is done by a library which then calls callbacks with some results.
In my callback, I try to raise a winrt::event, but it sometimes takes more than 50ms. When it happens, it blocks the audio thread, and creates discontinuity...

Playing audio through WASAPI - IAudioRenderClient and getting static

I'm trying to learn how to play audio through my default playback device using WASAPI. My test is capturing audio from my default device and sending it to the playback device to render so that I can hear myself talk through my speakers.
It appears that the data I'm getting from my capture device is working fine, but when I send it to the IAudioRenderClient buffer, sometimes I get just pure static and sometimes I just get silence.
If you think you need to see my capture device logic, just ask. I haven't posted it since it appears to me to be a playback device issue.
My devices are working properly in other apps. So it's not a device problem.
Can anyone tell me what I'm doing wrong?
Playback Device Init
#define REFTIMES_PER_SEC 48000
bool AudioManager::InitPlaybackDevice()
{
HRESULT hr;
WAVEFORMATEX* pWfx = nullptr;
BYTE* pData = nullptr;
UINT32 nNumFramesPadding = 0;
// Query the Enumerator for the default playback device
hr = m_pEnumerator->GetDefaultAudioEndpoint(eRender, eCommunications, &m_pPlaybackDevice);
EXIT_ON_ERROR_SHUTDOWN(hr);
hr = m_pPlaybackDevice->Activate(IID_IAudioClient, CLSCTX_ALL, NULL, (void**)&m_pAudioRenderClient);
EXIT_ON_ERROR_SHUTDOWN(hr);
hr = m_pAudioRenderClient->GetMixFormat(&pWfx);
EXIT_ON_ERROR_SHUTDOWN(hr);
hr = m_pAudioRenderClient->Initialize(AUDCLNT_SHAREMODE_SHARED, 0, REFTIMES_PER_SEC, 0, pWfx, NULL);
EXIT_ON_ERROR_SHUTDOWN(hr);
hr = m_pAudioRenderClient->GetBufferSize(&m_nPlaybackBufferFrameCount);
EXIT_ON_ERROR_SHUTDOWN(hr);
hr = m_pAudioRenderClient->GetService(IID_IAudioRenderClient, (void**)&m_pAudioRenderClientObj);
EXIT_ON_ERROR_SHUTDOWN(hr);
hr = m_pAudioRenderClient->GetCurrentPadding(&nNumFramesPadding);
EXIT_ON_ERROR_SHUTDOWN(hr);
hr = m_pAudioRenderClientObj->GetBuffer(m_nPlaybackBufferFrameCount - nNumFramesPadding, &pData);
EXIT_ON_ERROR_SHUTDOWN(hr);
hr = m_pAudioRenderClientObj->ReleaseBuffer(m_nPlaybackBufferFrameCount - nNumFramesPadding, AUDCLNT_BUFFERFLAGS_SILENT);
EXIT_ON_ERROR_SHUTDOWN(hr);
hr = m_pAudioRenderClient->Start();
EXIT_ON_ERROR(hr);
return true;
}
Sending audio data to the playback device
bool AudioManager::SendSampleToPlaybackDevice(BYTE * i_pData, UINT32 i_nSize)
{
if (m_pAudioRenderClientObj != nullptr)
{
HRESULT hr;
BYTE* pData = nullptr;
UINT32 nNumFramesPadding = 0;
int nNumFrames = 0;
hr = m_pAudioRenderClient->GetCurrentPadding(&nNumFramesPadding);
EXIT_ON_ERROR(hr);
nNumFrames = min(i_nSize, m_nPlaybackBufferFrameCount - nNumFramesPadding);
hr = m_pAudioRenderClientObj->GetBuffer(nNumFrames, &pData);
EXIT_ON_ERROR(hr);
if(pData != nullptr)
memcpy(pData, i_pData, nNumFrames);
hr = m_pAudioRenderClientObj->ReleaseBuffer(nNumFrames, 0);
EXIT_ON_ERROR(hr);
return true;
}
return false;
}

Why MediaFoundation InitializeSinkWriter (SetInputMediaType) only accepts WMV3 format?

Taken from the MSDN help pages, InitializeSinkWriter works fine so long as the video encoding and video input format is WMV3/RGB32, however if I change it to WMV1, MPEG2, etc. then SetInputMediaType fails.
AFAIK I have WMV1 installed as a codec according to Sherlock the Codec Detective program.
Here is the code that causes the issue:
(to find the problem code, search for "problem" in source comments, there is a lot of boiler plate code that is irrelevant)
// Format constants
const UINT32 VIDEO_WIDTH = 640;
const UINT32 VIDEO_HEIGHT = 480;
const UINT32 VIDEO_FPS = 30;
const UINT64 VIDEO_FRAME_DURATION = 10 * 1000 * 1000 / VIDEO_FPS;
const UINT32 VIDEO_BIT_RATE = 800000;
const GUID VIDEO_ENCODING_FORMAT = MFVideoFormat_WMV1 ; // problem here, must be WMV3
const GUID VIDEO_INPUT_FORMAT = MFVideoFormat_WMV3 ; // problem here if not wmv3 too
const UINT32 VIDEO_PELS = VIDEO_WIDTH * VIDEO_HEIGHT;
const UINT32 VIDEO_FRAME_COUNT = 20 * VIDEO_FPS;
HRESULT InitializeSinkWriter(IMFSinkWriter **ppWriter, DWORD *pStreamIndex)
{
*ppWriter = NULL;
*pStreamIndex = NULL;
IMFSinkWriter *pSinkWriter = NULL;
IMFMediaType *pMediaTypeOut = NULL;
IMFMediaType *pMediaTypeIn = NULL;
DWORD streamIndex;
HRESULT hr = MFCreateSinkWriterFromURL(L"output.wmv", NULL, NULL, &pSinkWriter);
// Set the output media type.
if (SUCCEEDED(hr))
{
hr = MFCreateMediaType(&pMediaTypeOut);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeOut->SetGUID(MF_MT_SUBTYPE, VIDEO_ENCODING_FORMAT);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeOut->SetUINT32(MF_MT_AVG_BITRATE, VIDEO_BIT_RATE);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeOut->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
}
if (SUCCEEDED(hr))
{
hr = MFSetAttributeSize(pMediaTypeOut, MF_MT_FRAME_SIZE, VIDEO_WIDTH, VIDEO_HEIGHT);
}
if (SUCCEEDED(hr))
{
hr = MFSetAttributeRatio(pMediaTypeOut, MF_MT_FRAME_RATE, VIDEO_FPS, 1);
}
if (SUCCEEDED(hr))
{
hr = MFSetAttributeRatio(pMediaTypeOut, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
}
if (SUCCEEDED(hr))
{
hr = pSinkWriter->AddStream(pMediaTypeOut, &streamIndex);
}
// Set the input media type.
if (SUCCEEDED(hr))
{
hr = MFCreateMediaType(&pMediaTypeIn);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeIn->SetGUID(MF_MT_SUBTYPE, VIDEO_INPUT_FORMAT);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeIn->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
}
if (SUCCEEDED(hr))
{
hr = MFSetAttributeSize(pMediaTypeIn, MF_MT_FRAME_SIZE, VIDEO_WIDTH, VIDEO_HEIGHT);
}
if (SUCCEEDED(hr))
{
hr = MFSetAttributeRatio(pMediaTypeIn, MF_MT_FRAME_RATE, VIDEO_FPS, 1);
}
if (SUCCEEDED(hr))
{
hr = MFSetAttributeRatio(pMediaTypeIn, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
}
if (SUCCEEDED(hr))
{
// Problem here! Codec issue with wmv1, mpeg, etc.
hr = pSinkWriter->SetInputMediaType(streamIndex, pMediaTypeIn, NULL);
}
else {
puts("setattributeratio failed");
}
// Tell the sink writer to start accepting data.
if (SUCCEEDED(hr))
{
hr = pSinkWriter->BeginWriting();
}
else {
puts("setinputmediatype failed"); // <-- HR result problem here
}
// Return the pointer to the caller.
if (SUCCEEDED(hr))
{
*ppWriter = pSinkWriter;
(*ppWriter)->AddRef();
*pStreamIndex = streamIndex;
}
else {
puts("beginwriting failed");
}
SafeRelease(&pSinkWriter);
SafeRelease(&pMediaTypeOut);
SafeRelease(&pMediaTypeIn);
return hr;
}
Initialize sink writer is called with this code:
void main()
{
DWORD streamidx = 0;
const WCHAR *SAMPLE_FILE = L"sample.wmv";
IMFSourceReader *pReader = NULL;
IMFSinkWriter *pWriter = NULL;
puts("Initializing...");
HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
if (SUCCEEDED(hr))
{
hr = MFStartup(MF_VERSION);
if (SUCCEEDED(hr))
{
// problem here !
hr = InitializeSinkWriter(&pWriter, &streamidx);
if (SUCCEEDED(hr))
{
// more code would go here...
}
else {
puts("InitializeSinkWriter failed"); // this is called
}
SafeRelease(&pWriter);
MFShutdown();
}
CoUninitialize();
}
puts("Finished...");
}
This is a standard Windows 7 computer I am using, so, if it only accepts WMV3 as the encoder or input type, does it mean I have to install codecs? This seems absurd since popular formats like WMV1 and MPEG should already be installed, and Sherlock codec detective says they are
There is no support for codecs you are trying in Windows Media Foundation (even though some third party software could report availability of other codecs for other APIs).
See:
Supported Media Formats in Media Foundation - Video Codecs - Encoder column in the table under Video Codecs
Windows Media Video 9 Encoder - Output Fomats - there is no WMV1 there

Capture a frame from video using directshow filters in C++

I have taken a code from the net to capture a frame from a video file and modified to capture all frames and store it as bmp images.
HRESULT GrabVideoBitmap(PCWSTR pszVideoFile)
{
IGraphBuilder *pGraph = NULL;
IMediaControl *pControl = NULL;
IMediaEventEx *pEvent = NULL;
IBaseFilter *pGrabberF = NULL;
ISampleGrabber *pGrabber = NULL;
IBaseFilter *pSourceF = NULL;
IEnumPins *pEnum = NULL;
IPin *pPin = NULL;
IBaseFilter *pNullF = NULL;
long evCode;
wchar_t temp[10];
wchar_t framename[50] = IMAGE_FILE_PATH; // L"D:\\sampleframe";
BYTE *pBuffer = NULL;
HRESULT hr = CoInitialize(NULL);
if (FAILED(hr))
return 0;
hr = CoCreateInstance(CLSID_FilterGraph, NULL, CLSCTX_INPROC_SERVER,
IID_PPV_ARGS(&pGraph));
hr = pGraph->QueryInterface(IID_PPV_ARGS(&pControl));
hr = pGraph->QueryInterface(IID_PPV_ARGS(&pEvent));
// Create the Sample Grabber filter.
hr = CoCreateInstance(CLSID_SampleGrabber, NULL, CLSCTX_INPROC_SERVER,
IID_PPV_ARGS(&pGrabberF));
hr = pGraph->AddFilter(pGrabberF, L"Sample Grabber");
hr = pGrabberF->QueryInterface(IID_PPV_ARGS(&pGrabber));
// Displays the metadata of the file
DisplayFileInfo((wchar_t*)pszVideoFile); // to display video information
AM_MEDIA_TYPE mt;
ZeroMemory(&mt, sizeof(mt));
mt.majortype = MEDIATYPE_Video;
mt.subtype = MEDIASUBTYPE_RGB24;
hr = pGrabber->SetMediaType(&mt);
hr = pGraph->AddSourceFilter(pszVideoFile, L"Source", &pSourceF);
hr = pSourceF->EnumPins(&pEnum);
while (S_OK == pEnum->Next(1, &pPin, NULL))
{
hr = ConnectFilters(pGraph, pPin, pGrabberF);
SafeRelease(&pPin);
if (SUCCEEDED(hr))
{
break;
}
}
hr = CoCreateInstance(CLSID_NullRenderer, NULL, CLSCTX_INPROC_SERVER,
IID_PPV_ARGS(&pNullF));
hr = pGraph->AddFilter(pNullF, L"Null Filter");
hr = ConnectFilters(pGraph, pGrabberF, pNullF);
hr = pGrabber->SetOneShot(TRUE);
hr = pGrabber->SetBufferSamples(TRUE);
hr = pControl->Run();
hr = pEvent->WaitForCompletion(INFINITE, &evCode);
for (int i = 0; i < 10; i++)
{
// Find the required buffer size.
long cbBuffer;
hr = pGrabber->GetCurrentBuffer(&cbBuffer, NULL);
pBuffer = (BYTE*)CoTaskMemAlloc(cbBuffer);
hr = pGrabber->GetCurrentBuffer(&cbBuffer, (long*)pBuffer);
hr = pGrabber->GetConnectedMediaType(&mt);
// Examine the format block.
if ((mt.formattype == FORMAT_VideoInfo) &&
(mt.cbFormat >= sizeof(VIDEOINFOHEADER)) &&
(mt.pbFormat != NULL))
{
swprintf(temp, 5, L"%d", i);
wcscat_s(framename, temp);
wcscat_s(framename, L".bmp");
VIDEOINFOHEADER *pVih = (VIDEOINFOHEADER*)mt.pbFormat;
hr = WriteBitmap((PCWSTR)framename, &pVih->bmiHeader,
mt.cbFormat - SIZE_PREHEADER, pBuffer, cbBuffer);
wcscpy_s(framename, IMAGE_FILE_PATH);
}
else
{
// Invalid format.
hr = VFW_E_INVALIDMEDIATYPE;
}
FreeMediaType(mt);
}
done:
CoTaskMemFree(pBuffer);
SafeRelease(&pPin);
SafeRelease(&pEnum);
SafeRelease(&pNullF);
SafeRelease(&pSourceF);
SafeRelease(&pGrabber);
SafeRelease(&pGrabberF);
SafeRelease(&pControl);
SafeRelease(&pEvent);
SafeRelease(&pGraph);
return hr;
}
The input video file has 132 frames.
But only 68 images are generated.
Also last frame of the video is captured for the last 38 images.
I think the directshow graph is running continuously and WriteBitmap() is missing frames.
How to get the control in directX to capture one frame and write it to bmp file and capture the next frame and thus capture all the frames as bmp images.
Thanks
Arun
Your approach is wrong. Currently, you set the sample grabber to one shot and after that you wait for the graph completion. This way it only works for capturing a single frame. You need to capture the frames inside the ISampleGrabberCB callback of your pGrabber. You need to implement ISampleGrabberCB interface and use ISampleGrabber::SetCallback on your pGrabber filter to point it to your implementation. After that you can capture the frames inside either SampleCB or BufferCB methods. http://www.infognition.com/blog/2013/accessing_raw_video_in_directshow.html

cannot readsample from IMFSource in synchronous mode

I am having trouble with a video recording application that I am writing using Microsoft Media Foundation.
Specifically, the read/write function (which I put on a loop that lives on it's own thread) doesn't make it pass the call to ReadSample:
HRESULT WinCapture::rwFunction(void) {
HRESULT hr;
DWORD streamIndex, flags;
LONGLONG llTimeStamp;
IMFSample *pSample = NULL;
EnterCriticalSection(&m_critsec);
// Read another sample.
hr = m_pReader->ReadSample(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
0,
&streamIndex, // actual
&flags,//NULL, // flags
&llTimeStamp,//NULL, // timestamp
&pSample // sample
);
if (FAILED(hr)) { goto done; }
hr = m_pWriter->WriteSample(0, pSample);
goto done;
done:
return hr;
SafeRelease(&pSample);
LeaveCriticalSection(&m_critsec);
}
The value of hr is an exception code: 0xc00d3704 so the code snippet skips the call to WriteSample.
It is a lot of steps, but I am fairly certain that I am setting up m_pReader (type IMFSource *) correctly.
HRESULT WinCapture::OpenMediaSource(IMFMediaSource *pSource)
{
HRESULT hr = S_OK;
IMFAttributes *pAttributes = NULL;
hr = MFCreateAttributes(&pAttributes, 2);
// use a callback
//if (SUCCEEDED(hr))
//{
// hr = pAttributes->SetUnknown(MF_SOURCE_READER_ASYNC_CALLBACK, this);
//}
// set the desired format type
DWORD dwFormatIndex = (DWORD)formatIdx;
IMFPresentationDescriptor *pPD = NULL;
IMFStreamDescriptor *pSD = NULL;
IMFMediaTypeHandler *pHandler = NULL;
IMFMediaType *pType = NULL;
// create the source reader
if (SUCCEEDED(hr))
{
hr = MFCreateSourceReaderFromMediaSource(
pSource,
pAttributes,
&m_pReader
);
}
// steps to set the selected format type
hr = pSource->CreatePresentationDescriptor(&pPD);
if (FAILED(hr))
{
goto done;
}
BOOL fSelected;
hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD);
if (FAILED(hr))
{
goto done;
}
hr = pSD->GetMediaTypeHandler(&pHandler);
if (FAILED(hr))
{
goto done;
}
hr = pHandler->GetMediaTypeByIndex(dwFormatIndex, &pType);
if (FAILED(hr))
{
goto done;
}
hr = pHandler->SetCurrentMediaType(pType);
{
goto done;
}
hr = m_pReader->SetCurrentMediaType(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
NULL,
pType
);
// set to maximum framerate?
hr = pHandler->GetCurrentMediaType(&pType);
if (FAILED(hr))
{
goto done;
}
// Get the maximum frame rate for the selected capture format.
// Note: To get the minimum frame rate, use the
// MF_MT_FRAME_RATE_RANGE_MIN attribute instead.
PROPVARIANT var;
if (SUCCEEDED(pType->GetItem(MF_MT_FRAME_RATE_RANGE_MAX, &var)))
{
hr = pType->SetItem(MF_MT_FRAME_RATE, var);
PropVariantClear(&var);
if (FAILED(hr))
{
goto done;
}
hr = pHandler->SetCurrentMediaType(pType);
{
goto done;
}
hr = m_pReader->SetCurrentMediaType(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
NULL,
pType
);
}
goto done;
done:
SafeRelease(&pPD);
SafeRelease(&pSD);
SafeRelease(&pHandler);
SafeRelease(&pType);
SafeRelease(&pAttributes);
return hr;
}
This code is all copied from Microsoft documentation pages and the SDK sample code. The variable formatIdx is 0, I get it from enumerating the camera formats and choosing the first one.
UPDATE
I have rewritten this program so that it uses callbacks instead of a blocking read/write function and I have exactly the same issue.
Here I get the device and initiate the callback method:
HRESULT WinCapture::initCapture(const WCHAR *pwszFileName, IMFMediaSource *pSource) {
HRESULT hr = S_OK;
EncodingParameters params;
params.subtype = MFVideoFormat_WMV3; // TODO, paramterize this
params.bitrate = TARGET_BIT_RATE;
m_llBaseTime = 0;
IMFMediaType *pType = NULL;
DWORD sink_stream = 0;
EnterCriticalSection(&m_critsec);
hr = m_ppDevices[selectedDevice]->ActivateObject(IID_PPV_ARGS(&pSource));
//m_bIsCapturing = false; // this is set externally here
if (SUCCEEDED(hr))
hr = OpenMediaSource(pSource); // also creates the reader
if (SUCCEEDED(hr))
{
hr = m_pReader->GetCurrentMediaType(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
&pType
);
}
// Create the sink writer
if (SUCCEEDED(hr))
{
hr = MFCreateSinkWriterFromURL(
pwszFileName,
NULL,
NULL,
&m_pWriter
);
}
if (SUCCEEDED(hr))
hr = ConfigureEncoder(params, pType, m_pWriter, &sink_stream);
// kick off the recording
if (SUCCEEDED(hr))
{
m_llBaseTime = 0;
m_bIsCapturing = TRUE;
hr = m_pReader->ReadSample(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
0,
NULL,
NULL,
NULL,
NULL
);
}
SafeRelease(&pType);
SafeRelease(&pSource);
pType = NULL;
LeaveCriticalSection(&m_critsec);
return hr;
}
The OpenMediaSource method is here:
HRESULT WinCapture::OpenMediaSource(IMFMediaSource *pSource)
{
HRESULT hr = S_OK;
IMFAttributes *pAttributes = NULL;
hr = MFCreateAttributes(&pAttributes, 2);
// use a callback
if (SUCCEEDED(hr))
{
hr = pAttributes->SetUnknown(MF_SOURCE_READER_ASYNC_CALLBACK, this);
}
// set the desired format type
DWORD dwFormatIndex = (DWORD)formatIdx;
IMFPresentationDescriptor *pPD = NULL;
IMFStreamDescriptor *pSD = NULL;
IMFMediaTypeHandler *pHandler = NULL;
IMFMediaType *pType = NULL;
// create the source reader
if (SUCCEEDED(hr))
{
hr = MFCreateSourceReaderFromMediaSource(
pSource,
pAttributes,
&m_pReader
);
}
// steps to set the selected format type
if (SUCCEEDED(hr)) hr = pSource->CreatePresentationDescriptor(&pPD);
if (FAILED(hr))
{
goto done;
}
BOOL fSelected;
hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD);
if (FAILED(hr))
{
goto done;
}
hr = pSD->GetMediaTypeHandler(&pHandler);
if (FAILED(hr))
{
goto done;
}
hr = pHandler->GetMediaTypeByIndex(dwFormatIndex, &pType);
if (FAILED(hr))
{
goto done;
}
hr = pHandler->SetCurrentMediaType(pType);
if (FAILED(hr))
{
goto done;
}
// get available framerates
hr = MFGetAttributeRatio(pType, MF_MT_FRAME_RATE, &frameRate, &denominator);
std::cout << "frameRate " << frameRate << " denominator " << denominator << std::endl;
hr = m_pReader->SetCurrentMediaType(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
NULL,
pType
);
// set to maximum framerate?
hr = pHandler->GetCurrentMediaType(&pType);
if (FAILED(hr))
{
goto done;
}
goto done;
done:
SafeRelease(&pPD);
SafeRelease(&pSD);
SafeRelease(&pHandler);
SafeRelease(&pType);
SafeRelease(&pAttributes);
return hr;
}
Here, formatIdx is a field of this class that get sets by the user via the GUI. I leave it 0 in order to test. So, I don't think I am missing any steps to get the camera going, but maybe I am.
When I inspect what applications are using the webcam (using this method) after the call to ActivateObject, I see that my application is using the webcam as expected. But, when I enter the callback routine, I see there are two instances of my application using the webcam. This is the same using a blocking method.
I don't know if that is good or bad, but when I enter my callback method:
HRESULT WinCapture::OnReadSample(
HRESULT hrStatus,
DWORD /*dwStreamIndex*/,
DWORD /*dwStreamFlags*/,
LONGLONG llTimeStamp,
IMFSample *pSample // Can be NULL
)
{
EnterCriticalSection(&m_critsec);
if (!IsCapturing() || m_bIsCapturing == false)
{
LeaveCriticalSection(&m_critsec);
return S_OK;
}
HRESULT hr = S_OK;
if (FAILED(hrStatus))
{
hr = hrStatus;
goto done;
}
if (pSample)
{
if (m_bFirstSample)
{
m_llBaseTime = llTimeStamp;
m_bFirstSample = FALSE;
}
// rebase the time stamp
llTimeStamp -= m_llBaseTime;
hr = pSample->SetSampleTime(llTimeStamp);
if (FAILED(hr)) { goto done; }
hr = m_pWriter->WriteSample(0, pSample);
if (FAILED(hr)) { goto done; }
}
// Read another sample.
hr = m_pReader->ReadSample(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
0,
NULL, // actual
NULL, // flags
NULL, // timestamp
NULL // sample
);
done:
if (FAILED(hr))
{
//NotifyError(hr);
}
LeaveCriticalSection(&m_critsec);
return hr;
}
hrStatus is the 0x00d3704 error I was getting before, and the callback goes straight to done thus killing the callbacks.
Finally, I should say that I am modeling (read, 'copying') my code from the example MFCaptureToFile in the Windows SDK Samples and that doesn't work either. Although, there I get this weird negative number for the failed HRESULT: -1072875772.
If you have got error [0xC00D3704] - it means that source does not initialized. Such error can be caused by mistake of initialization, busy camera by another application (process) or unsupport of the camera by UVC driver(old cameras support DirectShow driver with partly compatibleness with UVC. It is possible read some general info from old cameras via UVC as friendly name, symbolic link. However, old cameras support DirectShow models - PUSH, while camera pushes bytes into the pipeline, while Media Foundation PULL data - sends special signal and wait data).
For checking your code I would like advise to research articles about capturing video from web cam on site "CodeProject" - search "videoInput" name.