Split Contiguous Buffer in RGB Channels (IMFMediaBuffer) - c++

I'm trying to split up an contiguous buffer into 3 byte channels (RGB). Here is my acutal workflow to get the buffer filled with an image:
Set up an Source Reader (MFVideoFormat_RGB32)
Receive video format information
Read first image and convert to contiguous buffer...
In addition to that, here is the code:
HRESULT hr = S_OK;
IMFAttributes *attributes = NULL;
SafeRelease(&_sourcereader);
hr = MFCreateAttributes(&attributes, 1);
if (FAILED(hr)) {
// TODO: set error
return false;
}
hr = attributes->SetUINT32(MF_SOURCE_READER_ENABLE_VIDEO_PROCESSING, true);
if (FAILED(hr)) {
// TODO: set error
return false;
}
// conversion from qstring to const wchar*
const WCHAR* wfilename = filename.toStdWString().c_str();
// create source reader from file with attributes
hr = MFCreateSourceReaderFromURL(wfilename, attributes, &_sourcereader);
if (FAILED(hr)) {
// TODO: set error
return false;
}
// configure sourcereader for progressive RGB32 frames
IMFMediaType *mediatype = NULL;
hr = MFCreateMediaType(&mediatype);
if (SUCCEEDED(hr))
{
hr = mediatype->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
}
if (SUCCEEDED(hr))
{
hr = mediatype->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_RGB32);
}
if (SUCCEEDED(hr))
{
hr = _sourcereader->SetCurrentMediaType(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
NULL, mediatype);
}
// Ensure the stream is selected.
if (SUCCEEDED(hr))
{
hr = _sourcereader->SetStreamSelection(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM, TRUE);
}
if (FAILED(hr)) {
// TODO: Error log for failed configuration
std::cout << "(ConfigureSourceReader) Configuration failed" << std::endl;
return false;
}
//------------------------------------------------------------------
//---------------------- Get Video Format Infos --------------------
//------------------------------------------------------------------
GUID subtype = { 0 };
// Get the media type from the stream.
hr = _sourcereader->GetCurrentMediaType(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM, &mediatype );
// Make sure it is a video format.
hr = mediatype->GetGUID(MF_MT_SUBTYPE, &subtype);
if (subtype != MFVideoFormat_RGB32)
{
hr = E_UNEXPECTED;
// TODO: Error log message
SafeRelease(&mediatype);
return false;
}
//------------------------------------------------------------------
// Get the width and height
UINT32 width = 0, height = 0;
hr = MFGetAttributeSize(mediatype, MF_MT_FRAME_SIZE, &width, &height);
if (FAILED(hr))
{
// TODO: Error log message
SafeRelease(&mediatype);
return false;
}
//assign dimensions to VideoInfo
_videoinfo.imageHeight = height; _videoinfo.imageWidth = width;
//std::cout << "(GetVideoFormat) width: " << width << ", height: " << height << std::endl;
//------------------------------------------------------------------
//get framerate
UINT32 framerate_num = 0, framerate_denom = 0;
hr = MFGetAttributeRatio(mediatype, MF_MT_FRAME_RATE, &framerate_num, &framerate_denom);
if (FAILED(hr))
{
// TODO: Error log message
SafeRelease(&mediatype);
return false;
}
//set frame rate in struct
_videoinfo.fps = framerate_num / framerate_denom; // TODO: check for valid fps 24,25,30 ...
//------------------------------------------------------------------
// Get length
LONGLONG length = 0;
PROPVARIANT var;
PropVariantInit(&var);
hr = _sourcereader->GetPresentationAttribute((DWORD)MF_SOURCE_READER_MEDIASOURCE,
MF_PD_DURATION,
&var
);
if (SUCCEEDED(hr)) {
assert(var.vt == VT_UI8);
length = var.hVal.QuadPart;
} else {
// TODO : erro log msg
return false;
}
//Get total framenumber and length: save to info struct
_videoinfo.noofFrames = length / 10000000 * this->getFrameRate(); // incl. conversion from nano sec to sec
_videoinfo.duration = length;
//------------------------------------------------------------------
// Get the stride to find out if the bitmap is top-down or bottom-up.
LONG lStride = 0;
lStride = (LONG)MFGetAttributeUINT32(mediatype, MF_MT_DEFAULT_STRIDE, 1);
_videoinfo.stride = lStride;
_videoinfo.bTopDown = (lStride > 0);
//------------------------------------------------------------------
SafeRelease(&mediatype);
// return true and flag if initialization went well
_bInitialized = true;
return true;
After that I call a function to read a single frame (at the moment the first one).
HRESULT hr = S_OK;
IMFSample *pSample = NULL;
IMFMediaBuffer *buffer = NULL;
DWORD streamIndex, flags;
LONGLONG llTimeStamp;
// Read Sample (RGB32)
hr = _sourcereader->ReadSample (
(DWORD) MF_SOURCE_READER_FIRST_VIDEO_STREAM,
0,
&streamIndex,
&flags,
&llTimeStamp,
&pSample);
if (FAILED (hr)) {
// TODO handle fail case
}
//convert sample data to buffer
hr = pSample->ConvertToContiguousBuffer(&buffer);
if (FAILED (hr)) {
// TODO handle fail case
}
I know that by calling the function buffer->Lock(&pixels, NULL, &nPixels) that I can get the BYTE-stream stored in pixels. In my case I create a custom image with the given height and width (from SourceReader; [first function]). From the empty image I can get an empty color matrix which has to be filled with the following funtion: Color (byte red, byte green, byte blue)
I dont know how to split my RGB32 BYTE array into the single channels to fill my image? Maybe it is a silly question but I am relatively new to this area...

For RGB32 the byte format is:
R=Red
G=Green
B=Blue
A=Transparency
RGBARGBARGBA...
A very simple pseudo-code example of extracting the channels is shown below.
for (int row = 0; row < height; row++) {
for (int col = 0; col < stride; col += 4) {
redBuf[rIndex++] = sample[row * stride + col];
greenBuf[gIndex++] = sample[row * stride + col + 1];
blueBuf[bIndex++] = sample[row * stride + col + 2];
transparencyBuf[tIndex++] = sample[row * stride + col + 3];
}
}

Related

Capture from desktop (VFR) to mp4 - how to deal with frame rate?

I'm putting frames from the Desktop Duplication API through a Media Foundation H264 encoder transform (in this case, NVIDIA's hardware accelerated encoder) and then into a SinkWriter to put the frames into an MP4 container. This whole process works quite well and is extremely fast.
The problem
The video is in slow motion, so to speak. I have the frame rates set everywhere to 60 fps, and the sample time and durations are based on that 60 fps. What is happening is that I am providing way more frames than 60 per second to the SinkWriter. I don't quite know how the MP4 format and video players work but I assume it is simply looking at the frame duration (16.7ms for 60fps) and since there are way more frames than 60, it appears slowed down.
I've had a very hard time finding out how to 'properly' limit the frames being provided to the encoder/sink writer. If I simply Sleep for 15ms or so, the video appears fine but I realize that's not the way to do it - it was just for testing and to confirm my theory. I've tried using the Frame Rate Converter DSP but it gives me an E_UNEXPECTED because I don't think it expects a 'live' source.
Essentially I think I need to do a variable frame rate to constant frame rate conversion.
My question
How would you normally deal with this issue? How do you do this? Are there ways to do it with a 'live' source in Media Foundation? Or is a manual implementation required (e.g. calculating, dropping frames if faster or duplicating them if slower, etc)?
Code provided below;
#define WIN32_LEAN_AND_MEAN
#include <iostream>
#include <mfapi.h>
#include <d3d11.h>
#include <d3d11_4.h>
#include <dxgi1_5.h>
#include <atlcomcli.h>
#include <mftransform.h>
#include <cassert>
#include <mfidl.h>
#include <mfreadwrite.h>
#include <wmcodecdsp.h>
#include <evr.h>
void SetupEncoder(ID3D11Device*, IMFTransform**);
void SetupFrameRateConverter(ID3D11Device*, IMFTransform**);
void SetupSinkWriter(IMFSinkWriter**);
void InitializeBuffer(IMFTransform*, MFT_OUTPUT_DATA_BUFFER*, UINT32);
int main()
{
SetProcessDpiAwarenessContext(DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2);
IDXGIFactory1* dxgiFactory;
auto hr = CreateDXGIFactory1(__uuidof(IDXGIFactory1), (void**)(&dxgiFactory));
IDXGIAdapter* adapter = NULL;
dxgiFactory->EnumAdapters(0, &adapter);
DXGI_ADAPTER_DESC desc = {};
adapter->GetDesc(&desc);
printf("GPU %d: %S (Vendor %04x Device %04x)\n", 0, desc.Description, desc.VendorId, desc.DeviceId);
IDXGIOutput* output;
adapter->EnumOutputs(1, &output);
DXGI_OUTPUT_DESC outputDesc = {};
output->GetDesc(&outputDesc);
printf("Output %S\n", outputDesc.DeviceName);
IDXGIOutput5* dxgiOutput5;
output->QueryInterface(&dxgiOutput5);
// Set up D3D11
D3D_FEATURE_LEVEL featureLevels[] =
{
D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0,
};
ID3D11Device* device;
D3D_FEATURE_LEVEL levelChosen;
ID3D11DeviceContext* deviceContext;
auto result = D3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, D3D11_CREATE_DEVICE_BGRA_SUPPORT, featureLevels, _countof(featureLevels), D3D11_SDK_VERSION, &device, &levelChosen, &deviceContext);
ID3D11Multithread* multithread;
device->QueryInterface(&multithread);
multithread->SetMultithreadProtected(true);
// Set up output duplication
DXGI_FORMAT formats[] =
{
DXGI_FORMAT_B8G8R8A8_UNORM
};
IDXGIOutputDuplication* duplication;
result = dxgiOutput5->DuplicateOutput1(device, 0, _countof(formats), formats, &duplication);
IMFTransform* encoder, * fpsConverter;
IMFSinkWriter* sinkWriter;
SetupEncoder(device, &encoder);
SetupFrameRateConverter(device, &fpsConverter);
SetupSinkWriter(&sinkWriter);
// Allocate buffers
IMFMediaType* outputType;
fpsConverter->GetOutputCurrentType(0, &outputType);
MFT_OUTPUT_DATA_BUFFER buffer;
DWORD status;
UINT32 uiFrameSize = 0;
hr = outputType->GetUINT32(MF_MT_SAMPLE_SIZE, &uiFrameSize);
InitializeBuffer(fpsConverter, &buffer, uiFrameSize);
// Event generator for async MFT
IMFMediaEventGenerator* eventGenerator;
encoder->QueryInterface(&eventGenerator);
// Start up
result = encoder->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL);
result = encoder->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL);
result = encoder->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL);
long startTime = 0;
long frameDuration = (long)((1 / 60.f) * 10000000);
UINT frameCounter = 0;
while (frameCounter < 1000)
{
IMFMediaEvent* mediaEvent;
eventGenerator->GetEvent(0, &mediaEvent);
MediaEventType eventType;
mediaEvent->GetType(&eventType);
if (eventType == METransformNeedInput)
{
// Grab frame first
DXGI_OUTDUPL_FRAME_INFO frameInfo;
IDXGIResource* screenResource;
duplication->AcquireNextFrame(10000, &frameInfo, &screenResource);
ID3D11Texture2D* texture;
screenResource->QueryInterface(&texture);
// Verify correct screen for now
D3D11_TEXTURE2D_DESC d;
texture->GetDesc(&d);
assert(d.Width == 1920);
// Create sample for it
IMFSample* sample;
IMFMediaBuffer* mediaBuffer;
result = MFCreateVideoSampleFromSurface(NULL, &sample);
result = MFCreateDXGISurfaceBuffer(IID_ID3D11Texture2D, texture, 0, TRUE, &mediaBuffer);
result = sample->AddBuffer(mediaBuffer);
////////////////////////
// Does not work, E_UNEXPECTED
// Put it through the FPS converter
/*result = fpsConverter->ProcessInput(0, sample, 0);
if (FAILED(result))
break;
result = fpsConverter->ProcessOutput(0, 1, &buffer, &status);*/
///////////////////////
sample->SetSampleDuration(frameDuration);
sample->SetSampleTime(startTime);
startTime += frameDuration;
result = encoder->ProcessInput(0, sample, 0);
sample->Release();
mediaBuffer->Release();
++frameCounter;
// Important, do not forget to release frame
duplication->ReleaseFrame();
}
else if (eventType == METransformHaveOutput)
{
MFT_OUTPUT_DATA_BUFFER encodingOutputBuffer;
encodingOutputBuffer.dwStreamID = 0;
encodingOutputBuffer.pSample = nullptr;
encodingOutputBuffer.dwStatus = 0;
encodingOutputBuffer.pEvents = nullptr;
result = encoder->ProcessOutput(0, 1, &encodingOutputBuffer, 0);
// Now write to sink
sinkWriter->WriteSample(0, encodingOutputBuffer.pSample);
if (encodingOutputBuffer.pSample)
encodingOutputBuffer.pSample->Release();
if (encodingOutputBuffer.pEvents)
encodingOutputBuffer.pEvents->Release();
}
}
encoder->ProcessMessage(MFT_MESSAGE_NOTIFY_END_OF_STREAM, NULL);
encoder->ProcessMessage(MFT_MESSAGE_NOTIFY_END_STREAMING, NULL);
encoder->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL);
result = sinkWriter->Finalize();
sinkWriter->Release();
duplication->Release();
adapter->Release();
device->Release();
}
void SetupEncoder(ID3D11Device* device, IMFTransform** encoderOut)
{
MFStartup(MF_VERSION, MFSTARTUP_FULL);
IMFAttributes* ptr = NULL;
MFCreateAttributes(&ptr, 0);
UINT token;
IMFDXGIDeviceManager* deviceManager;
MFCreateDXGIDeviceManager(&token, &deviceManager);
deviceManager->ResetDevice(device, token);
MFT_REGISTER_TYPE_INFO outputType;
outputType.guidMajorType = MFMediaType_Video;
outputType.guidSubtype = MFVideoFormat_H264;
IMFActivate** activates = NULL;
UINT count = 0;
MFTEnumEx(MFT_CATEGORY_VIDEO_ENCODER, MFT_ENUM_FLAG_HARDWARE | MFT_ENUM_FLAG_SORTANDFILTER, NULL, &outputType, &activates, &count);
IMFTransform* encoder;
activates[0]->ActivateObject(IID_PPV_ARGS(&encoder));
// Release the rest
for (UINT32 i = 0; i < count; i++)
{
activates[i]->Release();
}
IMFAttributes* attribs;
encoder->GetAttributes(&attribs);
// Required
attribs->SetUINT32(MF_TRANSFORM_ASYNC_UNLOCK, 1);
attribs->SetUINT32(MF_LOW_LATENCY, 1);
LPWSTR friendlyName = 0;
UINT friendlyNameLength;
attribs->GetAllocatedString(MFT_FRIENDLY_NAME_Attribute, &friendlyName, &friendlyNameLength);
printf("Using encoder %S", friendlyName);
auto result = encoder->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER, reinterpret_cast<ULONG_PTR>(deviceManager));
DWORD inputStreamId, outputStreamId;
encoder->GetStreamIDs(1, &inputStreamId, 1, &outputStreamId);
// Set up output media type
IMFMediaType* mediaType;
MFCreateMediaType(&mediaType);
mediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
mediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);
mediaType->SetUINT32(MF_MT_AVG_BITRATE, 10240000);
mediaType->SetUINT32(MF_MT_INTERLACE_MODE, 2);
mediaType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, 1);
MFSetAttributeSize(mediaType, MF_MT_FRAME_SIZE, 1920, 1080);
MFSetAttributeRatio(mediaType, MF_MT_FRAME_RATE, 60000, 1001);
result = encoder->SetOutputType(outputStreamId, mediaType, 0);
// Set up input media type
IMFMediaType* suggestedInputType;
result = encoder->GetInputAvailableType(inputStreamId, 0, &suggestedInputType);
suggestedInputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
suggestedInputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12);
MFSetAttributeSize(suggestedInputType, MF_MT_FRAME_SIZE, 1920, 1080);
MFSetAttributeRatio(suggestedInputType, MF_MT_FRAME_RATE, 60000, 1001);
result = encoder->SetInputType(inputStreamId, suggestedInputType, 0);
*encoderOut = encoder;
}
void SetupFrameRateConverter(ID3D11Device* device, IMFTransform** fpsConverterTransformOut)
{
// Set up DSP
IMFTransform* fpsConverter;
CoCreateInstance(CLSID_CFrameRateConvertDmo, NULL, CLSCTX_INPROC_SERVER, IID_IMFTransform, reinterpret_cast<void**>(&fpsConverter));
// Set up fps input type
IMFMediaType* mediaType;
MFCreateMediaType(&mediaType);
UINT32 imageSize;
MFCalculateImageSize(MFVideoFormat_ARGB32, 1920, 1080, &imageSize);
mediaType->SetUINT32(MF_MT_SAMPLE_SIZE, imageSize);
mediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
mediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_ARGB32);
MFSetAttributeSize(mediaType, MF_MT_FRAME_SIZE, 1920, 1080);
auto result = fpsConverter->SetInputType(0, mediaType, 0);
// Set up fps output type
MFSetAttributeRatio(mediaType, MF_MT_FRAME_RATE, 60000, 1001);
result = fpsConverter->SetOutputType(0, mediaType, 0);
// Start up FPS
fpsConverter->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL);
fpsConverter->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL);
*fpsConverterTransformOut = fpsConverter;
}
void SetupSinkWriter(IMFSinkWriter** sinkWriterOut)
{
IMFAttributes* attribs;
MFCreateAttributes(&attribs, 0);
attribs->SetUINT32(MF_LOW_LATENCY, 1);
attribs->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, 1);
attribs->SetGUID(MF_TRANSCODE_CONTAINERTYPE, MFTranscodeContainerType_MPEG4);
attribs->SetUINT32(MF_SINK_WRITER_DISABLE_THROTTLING, 1);
IMFSinkWriter* sinkWriter;
MFCreateSinkWriterFromURL(L"output.mp4", NULL, attribs, &sinkWriter);
// Set up input type
IMFMediaType* mediaType;
MFCreateMediaType(&mediaType);
mediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
mediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);
mediaType->SetUINT32(MF_MT_AVG_BITRATE, 10240000);
MFSetAttributeSize(mediaType, MF_MT_FRAME_SIZE, 1920, 1080);
MFSetAttributeRatio(mediaType, MF_MT_FRAME_RATE, 60000, 1001);
MFSetAttributeRatio(mediaType, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
mediaType->SetUINT32(MF_MT_INTERLACE_MODE, 2);
mediaType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, 0);
DWORD streamIndex;
auto result = sinkWriter->AddStream(mediaType, &streamIndex);
result = sinkWriter->SetInputMediaType(streamIndex, mediaType, NULL);
sinkWriter->BeginWriting();
*sinkWriterOut = sinkWriter;
}
void InitializeBuffer(IMFTransform* transform, MFT_OUTPUT_DATA_BUFFER* buffer, const UINT32 frameSize)
{
MFT_OUTPUT_STREAM_INFO outputStreamInfo;
DWORD outputStreamId = 0;
ZeroMemory(&outputStreamInfo, sizeof(outputStreamInfo));
ZeroMemory(buffer, sizeof(*buffer));
auto hr = transform->GetOutputStreamInfo(outputStreamId, &outputStreamInfo);
if (SUCCEEDED(hr))
{
if ((outputStreamInfo.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) == 0 &&
(outputStreamInfo.dwFlags & MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES) == 0) {
IMFSample* pOutputSample = NULL;
IMFMediaBuffer* pMediaBuffer = NULL;
hr = MFCreateSample(&pOutputSample);
if (SUCCEEDED(hr)) {
hr = MFCreateMemoryBuffer(frameSize, &pMediaBuffer);
}
if (SUCCEEDED(hr)) {
hr = pOutputSample->AddBuffer(pMediaBuffer);
}
if (SUCCEEDED(hr)) {
buffer->pSample = pOutputSample;
buffer->pSample->AddRef();
}
pMediaBuffer->Release();
pOutputSample->Release();
}
else
{
std::cout << "Stream provides samples";
}
}
}

AvSetMmThreadCharacteristicsW for UWP

I'm working on a WASAPI UWP audio application with cpp/winrt which needs to take audio from an input and send it to an output after being processed.
I want to set my audio thread characteristics with AvSetMmThreadCharacteristicsW(L"Pro Audio", &taskIndex), but I just noticed this function (and most of avrt.h) is limited to WINAPI_PARTITION_DESKTOP and WINAPI_PARTITION_GAMES.
I think I need this because when my code is integrated into my UWP app, the audio input is full of discontinuity, and I have no issue in my test code which uses the avrt API.
Is there another way to configure my thread for audio processing?
Edit: here is my test program https://github.com/loics2/test-wasapi. The interesting part happens in the AudioStream class. I can't share my UWP app, but I can copy as is these classes into a Windows Runtime Component.
Edit 2: here's the audio thread code :
void AudioStream::StreamWorker()
{
WAVEFORMATEX* captureFormat = nullptr;
WAVEFORMATEX* renderFormat = nullptr;
RingBuffer<float> captureBuffer;
RingBuffer<float> renderBuffer;
BYTE* streamBuffer = nullptr;
unsigned int streamBufferSize = 0;
unsigned int bufferFrameCount = 0;
unsigned int numFramesPadding = 0;
unsigned int inputBufferSize = 0;
unsigned int outputBufferSize = 0;
DWORD captureFlags = 0;
winrt::hresult hr = S_OK;
// m_inputClient is a winrt::com_ptr<IAudioClient3>
if (m_inputClient) {
hr = m_inputClient->GetMixFormat(&captureFormat);
// m_audioCaptureClient is a winrt::com_ptr<IAudioCaptureClient>
if (!m_audioCaptureClient) {
hr = m_inputClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
0,
0,
captureFormat,
nullptr);
hr = m_inputClient->GetService(__uuidof(IAudioCaptureClient), m_audioCaptureClient.put_void());
hr = m_inputClient->SetEventHandle(m_inputReadyEvent.get());
hr = m_inputClient->Reset();
hr = m_inputClient->Start();
}
}
hr = m_inputClient->GetBufferSize(&inputBufferSize);
// multiplying the buffer size by the number of channels
inputBufferSize *= 2;
// m_outputClient is a winrt::com_ptr<IAudioClient3>
if (m_outputClient) {
hr = m_outputClient->GetMixFormat(&renderFormat);
// m_audioRenderClientis a winrt::com_ptr<IAudioRenderClient>
if (!m_audioRenderClient) {
hr = m_outputClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
0,
0,
captureFormat,
nullptr);
hr = m_outputClient->GetService(__uuidof(IAudioRenderClient), m_audioRenderClient.put_void());
hr = m_outputClient->SetEventHandle(m_outputReadyEvent.get());
hr = m_outputClient->Reset();
hr = m_outputClient->Start();
}
}
hr = m_outputClient->GetBufferSize(&outputBufferSize);
// multiplying the buffer size by the number of channels
outputBufferSize *= 2;
while (m_isRunning)
{
// ===== INPUT =====
// waiting for the capture event
WaitForSingleObject(m_inputReadyEvent.get(), INFINITE);
// getting the input buffer data
hr = m_audioCaptureClient->GetNextPacketSize(&bufferFrameCount);
while (SUCCEEDED(hr) && bufferFrameCount > 0) {
m_audioCaptureClient->GetBuffer(&streamBuffer, &bufferFrameCount, &captureFlags, nullptr, nullptr);
if (bufferFrameCount != 0) {
captureBuffer.write(reinterpret_cast<float*>(streamBuffer), bufferFrameCount * 2);
hr = m_audioCaptureClient->ReleaseBuffer(bufferFrameCount);
if (FAILED(hr)) {
m_audioCaptureClient->ReleaseBuffer(0);
}
}
else
{
m_audioCaptureClient->ReleaseBuffer(0);
}
hr = m_audioCaptureClient->GetNextPacketSize(&bufferFrameCount);
}
// ===== CALLBACK =====
auto size = captureBuffer.size();
float* userInputData = (float*)calloc(size, sizeof(float));
float* userOutputData = (float*)calloc(size, sizeof(float));
captureBuffer.read(userInputData, size);
OnData(userInputData, userOutputData, size / 2, 2, 48000);
renderBuffer.write(userOutputData, size);
free(userInputData);
free(userOutputData);
// ===== OUTPUT =====
// waiting for the render event
WaitForSingleObject(m_outputReadyEvent.get(), INFINITE);
// getting information about the output buffer
hr = m_outputClient->GetBufferSize(&bufferFrameCount);
hr = m_outputClient->GetCurrentPadding(&numFramesPadding);
// adjust the frame count with the padding
bufferFrameCount -= numFramesPadding;
if (bufferFrameCount != 0) {
hr = m_audioRenderClient->GetBuffer(bufferFrameCount, &streamBuffer);
auto count = (bufferFrameCount * 2);
if (renderBuffer.read(reinterpret_cast<float*>(streamBuffer), count) < count) {
// captureBuffer is not full enough, we should fill the remainder with 0
}
hr = m_audioRenderClient->ReleaseBuffer(bufferFrameCount, 0);
if (FAILED(hr)) {
m_audioRenderClient->ReleaseBuffer(0, 0);
}
}
else
{
m_audioRenderClient->ReleaseBuffer(0, 0);
}
}
exit:
// Cleanup code
}
I removed the error handling code for clarity, most of it is :
if (FAILED(hr))
goto exit;
#IInspectable was right, there's something wrong with my code : the audio processing is done by a library which then calls callbacks with some results.
In my callback, I try to raise a winrt::event, but it sometimes takes more than 50ms. When it happens, it blocks the audio thread, and creates discontinuity...

Extract dirty rects RGB pixel buffer data DirectX

I'm using Desktop Duplication from Windows API.
Here is the code to access next frame and get the rectangle of pixels that have change from previous frame.
//
// Get next frame and write it into Data
//
_Success_(*Timeout == false && return == DUPL_RETURN_SUCCESS)
DUPL_RETURN DUPLICATIONMANAGER::GetFrame(_Out_ FRAME_DATA* Data, _Out_ bool* Timeout)
{
IDXGIResource* DesktopResource = nullptr;
DXGI_OUTDUPL_FRAME_INFO FrameInfo;
// Get new frame
HRESULT hr = m_DeskDupl->AcquireNextFrame(10000, &FrameInfo, &DesktopResource);
if (hr == DXGI_ERROR_WAIT_TIMEOUT)
{
*Timeout = true;
return DUPL_RETURN_SUCCESS;
}
*Timeout = false;
if (FAILED(hr))
{
return ProcessFailure(m_Device, L"Failed to acquire next frame in DUPLICATIONMANAGER", L"Error", hr, FrameInfoExpectedErrors);
}
// If still holding old frame, destroy it
if (m_AcquiredDesktopImage)
{
m_AcquiredDesktopImage->Release();
m_AcquiredDesktopImage = nullptr;
}
// QI for IDXGIResource
hr = DesktopResource->QueryInterface(__uuidof(ID3D11Texture2D), reinterpret_cast<void **>(&m_AcquiredDesktopImage));
DesktopResource->Release();
DesktopResource = nullptr;
if (FAILED(hr))
{
return ProcessFailure(nullptr, L"Failed to QI for ID3D11Texture2D from acquired IDXGIResource in DUPLICATIONMANAGER", L"Error", hr);
}
// Get metadata
if (FrameInfo.TotalMetadataBufferSize)
{
// Old buffer too small
if (FrameInfo.TotalMetadataBufferSize > m_MetaDataSize)
{
if (m_MetaDataBuffer)
{
delete [] m_MetaDataBuffer;
m_MetaDataBuffer = nullptr;
}
m_MetaDataBuffer = new (std::nothrow) BYTE[FrameInfo.TotalMetadataBufferSize];
if (!m_MetaDataBuffer)
{
m_MetaDataSize = 0;
Data->MoveCount = 0;
Data->DirtyCount = 0;
return ProcessFailure(nullptr, L"Failed to allocate memory for metadata in DUPLICATIONMANAGER", L"Error", E_OUTOFMEMORY);
}
m_MetaDataSize = FrameInfo.TotalMetadataBufferSize;
}
UINT BufSize = FrameInfo.TotalMetadataBufferSize;
// Get move rectangles
hr = m_DeskDupl->GetFrameMoveRects(BufSize, reinterpret_cast<DXGI_OUTDUPL_MOVE_RECT*>(m_MetaDataBuffer), &BufSize);
if (FAILED(hr))
{
Data->MoveCount = 0;
Data->DirtyCount = 0;
return ProcessFailure(nullptr, L"Failed to get frame move rects in DUPLICATIONMANAGER", L"Error", hr, FrameInfoExpectedErrors);
}
Data->MoveCount = BufSize / sizeof(DXGI_OUTDUPL_MOVE_RECT);
BYTE* DirtyRects = m_MetaDataBuffer + BufSize;
BufSize = FrameInfo.TotalMetadataBufferSize - BufSize;
// Get dirty rectangles
hr = m_DeskDupl->GetFrameDirtyRects(BufSize, reinterpret_cast<RECT*>(DirtyRects), &BufSize);
if (FAILED(hr))
{
Data->MoveCount = 0;
Data->DirtyCount = 0;
return ProcessFailure(nullptr, L"Failed to get frame dirty rects in DUPLICATIONMANAGER", L"Error", hr, FrameInfoExpectedErrors);
}
Data->DirtyCount = BufSize / sizeof(RECT);
Data->MetaData = m_MetaDataBuffer;
}
Data->Frame = m_AcquiredDesktopImage;
Data->FrameInfo = FrameInfo;
//Here I would like to access pixel data from Data->Frame. A buffer of RGBA pixel
return DUPL_RETURN_SUCCESS;
}
Here is Frame_Data structure
typedef struct _FRAME_DATA
{
ID3D11Texture2D* Frame;
DXGI_OUTDUPL_FRAME_INFO FrameInfo;
_Field_size_bytes_((MoveCount * sizeof(DXGI_OUTDUPL_MOVE_RECT)) + (DirtyCount * sizeof(RECT))) BYTE* MetaData;
UINT DirtyCount;
UINT MoveCount;
} FRAME_DATA;
Is it possible to access pixel buffer data that have been modified from Data->Frame
Here is my code to access data :
BYTE* DISPLAYMANAGER::GetImageData(ID3D11Texture2D* texture2D, D3D11_TEXTURE2D_DESC Desc)
{
if (texture2D != NULL)
{
D3D11_TEXTURE2D_DESC description;
texture2D->GetDesc(&description);
description.BindFlags = 0;
description.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
description.Usage = D3D11_USAGE_STAGING;
description.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
ID3D11Texture2D* texTemp = NULL;
HRESULT hr = m_Device->CreateTexture2D(&description, NULL, &texTemp);
if (FAILED(hr))
{
if (texTemp)
{
texTemp->Release();
texTemp = NULL;
}
return NULL;
}
m_DeviceContext->CopyResource(texTemp, texture2D);
D3D11_MAPPED_SUBRESOURCE mapped;
unsigned int subresource = D3D11CalcSubresource(0, 0, 0);
hr = m_DeviceContext->Map(texTemp, subresource, D3D11_MAP_READ_WRITE, 0, &mapped);
if (FAILED(hr))
{
texTemp->Release();
texTemp = NULL;
return NULL;
}
unsigned char *captureData = new unsigned char[Desc.Width * Desc.Height * 4];
RtlZeroMemory(captureData, Desc.Width * Desc.Height * 4);
const int pitch = mapped.RowPitch;
unsigned char *source = static_cast<unsigned char*>(mapped.pData);
unsigned char *dest = captureData;
for (int i = 0; i < Desc.Height; i++) {
memcpy(captureData, source, Desc.Width * 4);
source += pitch;
captureData += Desc.Width * 4;
}
for (int i = 0; i < Desc.Width * Desc.Height * 4; i++) {
//trace(L"Pixel[%d] = %x\n", i, dest[i]);
}
m_DeviceContext->Unmap(texTemp, 0);
return dest;
}
else
return NULL;
}
Thank you for your help!
The textures you obtain via duplication API are not necessarily accessible for CPU, for individual pixel access. To read the texture data, you might need to create a mappable staging texture and copy the obtained texture there. Then doing the mapping you would get a pointer to actual data. Note that this is, in general, not a performance friendly operation.
You will find related information in other answers as well:
How to work with pixels using Direct2D:
For those times when you absolutely have to do CPU pixel manipulation but still want a substantial degree of acceleration, you can manage your own mappable D3D11 textures. For example, you can use staging textures if you want to asynchronously manipulate your texture resources from the CPU.
Transferring textures across adapters in DirectX 11:
... copies it to a staging resource (created on the same device) using ID3D11DeviceContext::CopyResource. I then map that staging resource with Read...

Video captured by Media Foundation is vertical mirrorred

I'm using Media Foundation IMFSourceReaderCallback implementation for grabbing video frames from the camera, and then OpenCV imshow to present the frames in a loop.
However I get the frames vertically flipped...
Is this a bug? Should I set some attribute to avoid this?
Here is my code:
Initialization:
IMFAttributes* pDeviceAttrs, *pReaderAttrs;
hr = MFCreateAttributes(&pDeviceAttrs, 1);
if (FAILED(hr)) goto Exit;
hr = pDeviceAttrs->SetGUID(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID);
if (FAILED(hr)) goto Exit;
//...
// Correct source provider is activated through ActivateObject
//
hr = MFCreateAttributes(&pReaderAttrs, 2);
if (FAILED(hr)) goto Exit;
pReaderAttrs->SetUnknown(MF_SOURCE_READER_ASYNC_CALLBACK,(IUnknown*)this);
pReaderAttrs->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, TRUE);
hr = MFCreateSourceReaderFromMediaSource(pMediaSource, pReaderAttrs, &m_pReader);
if (FAILED(hr)) goto Exit;
// Correct profile is set
OnReadSample implementation:
HRESULT hr = S_OK;
LONG defaultStride = 0;
LONG stride = 0;
BYTE *pBuffer = NULL;
EnterCriticalSection(&m_critSec);
if (NULL != pSample)
{
IMFMediaBuffer* pMediaBuffer;
DWORD dataSize = 0;
// In case of a single buffer, no copy would happen
hr = pSample->ConvertToContiguousBuffer(&pMediaBuffer);
if (FAILED(hr)) goto Cleanup;
pMediaBuffer->GetCurrentLength(&dataSize);
hr = pMediaBuffer->Lock(&pBuffer, &dataSize, &dataSize);
if (FAILED(hr)) goto Cleanup;
// todo: use a backbuffer to avoid sync issues
if (NULL == m_pLatestFrame) m_pLatestFrame = (BYTE*)malloc(dataSize);
memcpy(m_pLatestFrame, pBuffer, dataSize);
++m_frameNumber;
pMediaBuffer->Unlock();
pMediaBuffer->Release();
}
Cleanup:
LeaveCriticalSection(&m_critSec);
// Async ReadFrame for the next buffer:
hr = m_pReader->ReadSample(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
0,
NULL, // actual
NULL, // flags
NULL, // timestamp
NULL // sample
);
return hr;
Conversion to cv::image:
void SourceReaderImpl::GetLatestFrame(BYTE** ppLatestFrame)
{
EnterCriticalSection(&m_critSec);
*ppLatestFrame = m_pLatestFrame;
LeaveCriticalSection(&m_critSec);
}
void* CameraWrapperImpl::getLatestFrame()
{
BYTE* pLatestFrame = NULL;
m_pMfReader->GetLatestFrame(&pLatestFrame);
return pLatestFrame;
}
void Player::Present()
{
//...
color = cv::Mat(colorSize,
CV_8UC3,
static_cast<unsigned char*>(m_pColorCameraImpl->getLatestFrame()));
cv::imshow(color);
}
Any idea?
Thanks in advance!
A bitmap is stored with the last scan line first, so the image will appear upside down. The easiest solution is to call cv::flip
void Player::Present()
{
//...
color = cv::Mat(colorSize,
CV_8UC3,
static_cast<unsigned char*>(m_pColorCameraImpl->getLatestFrame()));
cv::Mat corrected;
flip(color, corrected, 0);
imshow(corrected);
}

Empty BYTE vector from Wasapi stream capture session

My main goal is to capture 2 audio streams and store them as a vector<BYTE> then come up with a congruence algorithm to check for equality. Right now I am only capturing one stream, however the values of the stream are 0 '/0'. Why am I getting null terminated values for all elements in my BYTE vector?
void AudioDeviceOperator::TakeInput(AudioStreamModel* m)
{
HRESULT hr;
IAudioClient *iac = NULL;
IAudioCaptureClient *pCaptureClient = NULL;
WAVEFORMATEX *mixFormat;
UINT32 bufferFrameCount;
HRESULT de;
de = AudioDeviceEnumerator -> GetDefaultAudioEndpoint(eCapture, eConsole, &SelectedAudioDeviceModel->AudioDevice);
hr = SelectedAudioDeviceModel->AudioDevice -> Activate(IID_IAudioClient, CLSCTX_ALL, NULL, (void**)&iac);
REFERENCE_TIME bufferDuration = 0; //default to min
REFERENCE_TIME periodicity = 0;
GUID trashGuid;
HRESULT tg = CoCreateGuid(&trashGuid);
LPCGUID AudioSessionGuid = &trashGuid;
GUID guid2 = *AudioSessionGuid;
HRESULT guidError = UuidCreate(&guid2); //project -> properties -> Linker -> Command Line -> Rpctr4.lib
iac->GetMixFormat(&mixFormat);
m->StreamFormat = *mixFormat;
if (SUCCEEDED(guidError)) {
cout << "/n" << "Initializing audio stream..";
hr = iac->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_CROSSPROCESS, bufferDuration, periodicity, mixFormat, AudioSessionGuid);
cout << hr;
hr = iac->GetBufferSize(&bufferFrameCount);
cout << hr;
iac->GetService(IID_IAudioCaptureClient, (void**)&pCaptureClient);
// Calculate the actual duration of the allocated buffer.
double hnsActualDuration = (double)REFTIMES_PER_SEC * bufferFrameCount / mixFormat-> nSamplesPerSec;
bool recordAudio = TRUE;
BYTE *sData;
UINT32 numFramesAvailable = 0;
DWORD flags;
UINT32 packetLength = 0;
int numOfPackets = 0;
iac->Start();
while (recordAudio == TRUE)
{
hr = pCaptureClient->GetNextPacketSize(&packetLength);
while (packetLength != 0) {
hr = pCaptureClient->GetBuffer(&sData, &numFramesAvailable, &flags, NULL, NULL);
if (sData != NULL) {
m->Stream.push_back((*sData)); //here is where I write to the vector
numOfPackets++;
}
if (numOfPackets == 100) { // just getting 100 packets for testing
recordAudio = FALSE;
break;
}
}
hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
}
}
else
cout << "AudioSessionGuidError";
CoTaskMemFree(iac);
AudioDeviceEnumerator->Release();
//pCaptureClient->Release(); // releaseBuffer seeming to release capture client interface as weell.
};
When the audio session starts I make sure to make some noise. With the vector values the way they are I have nothing to compare. I'm also assuming that using that byte vector and rendering it with a IAudioRenderClient will result in nothing, however that is my next plan of action. Any ideas??