Capture from desktop (VFR) to mp4 - how to deal with frame rate? - c++

I'm putting frames from the Desktop Duplication API through a Media Foundation H264 encoder transform (in this case, NVIDIA's hardware accelerated encoder) and then into a SinkWriter to put the frames into an MP4 container. This whole process works quite well and is extremely fast.
The problem
The video is in slow motion, so to speak. I have the frame rates set everywhere to 60 fps, and the sample time and durations are based on that 60 fps. What is happening is that I am providing way more frames than 60 per second to the SinkWriter. I don't quite know how the MP4 format and video players work but I assume it is simply looking at the frame duration (16.7ms for 60fps) and since there are way more frames than 60, it appears slowed down.
I've had a very hard time finding out how to 'properly' limit the frames being provided to the encoder/sink writer. If I simply Sleep for 15ms or so, the video appears fine but I realize that's not the way to do it - it was just for testing and to confirm my theory. I've tried using the Frame Rate Converter DSP but it gives me an E_UNEXPECTED because I don't think it expects a 'live' source.
Essentially I think I need to do a variable frame rate to constant frame rate conversion.
My question
How would you normally deal with this issue? How do you do this? Are there ways to do it with a 'live' source in Media Foundation? Or is a manual implementation required (e.g. calculating, dropping frames if faster or duplicating them if slower, etc)?
Code provided below;
#define WIN32_LEAN_AND_MEAN
#include <iostream>
#include <mfapi.h>
#include <d3d11.h>
#include <d3d11_4.h>
#include <dxgi1_5.h>
#include <atlcomcli.h>
#include <mftransform.h>
#include <cassert>
#include <mfidl.h>
#include <mfreadwrite.h>
#include <wmcodecdsp.h>
#include <evr.h>
void SetupEncoder(ID3D11Device*, IMFTransform**);
void SetupFrameRateConverter(ID3D11Device*, IMFTransform**);
void SetupSinkWriter(IMFSinkWriter**);
void InitializeBuffer(IMFTransform*, MFT_OUTPUT_DATA_BUFFER*, UINT32);
int main()
{
SetProcessDpiAwarenessContext(DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2);
IDXGIFactory1* dxgiFactory;
auto hr = CreateDXGIFactory1(__uuidof(IDXGIFactory1), (void**)(&dxgiFactory));
IDXGIAdapter* adapter = NULL;
dxgiFactory->EnumAdapters(0, &adapter);
DXGI_ADAPTER_DESC desc = {};
adapter->GetDesc(&desc);
printf("GPU %d: %S (Vendor %04x Device %04x)\n", 0, desc.Description, desc.VendorId, desc.DeviceId);
IDXGIOutput* output;
adapter->EnumOutputs(1, &output);
DXGI_OUTPUT_DESC outputDesc = {};
output->GetDesc(&outputDesc);
printf("Output %S\n", outputDesc.DeviceName);
IDXGIOutput5* dxgiOutput5;
output->QueryInterface(&dxgiOutput5);
// Set up D3D11
D3D_FEATURE_LEVEL featureLevels[] =
{
D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0,
};
ID3D11Device* device;
D3D_FEATURE_LEVEL levelChosen;
ID3D11DeviceContext* deviceContext;
auto result = D3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, D3D11_CREATE_DEVICE_BGRA_SUPPORT, featureLevels, _countof(featureLevels), D3D11_SDK_VERSION, &device, &levelChosen, &deviceContext);
ID3D11Multithread* multithread;
device->QueryInterface(&multithread);
multithread->SetMultithreadProtected(true);
// Set up output duplication
DXGI_FORMAT formats[] =
{
DXGI_FORMAT_B8G8R8A8_UNORM
};
IDXGIOutputDuplication* duplication;
result = dxgiOutput5->DuplicateOutput1(device, 0, _countof(formats), formats, &duplication);
IMFTransform* encoder, * fpsConverter;
IMFSinkWriter* sinkWriter;
SetupEncoder(device, &encoder);
SetupFrameRateConverter(device, &fpsConverter);
SetupSinkWriter(&sinkWriter);
// Allocate buffers
IMFMediaType* outputType;
fpsConverter->GetOutputCurrentType(0, &outputType);
MFT_OUTPUT_DATA_BUFFER buffer;
DWORD status;
UINT32 uiFrameSize = 0;
hr = outputType->GetUINT32(MF_MT_SAMPLE_SIZE, &uiFrameSize);
InitializeBuffer(fpsConverter, &buffer, uiFrameSize);
// Event generator for async MFT
IMFMediaEventGenerator* eventGenerator;
encoder->QueryInterface(&eventGenerator);
// Start up
result = encoder->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL);
result = encoder->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL);
result = encoder->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL);
long startTime = 0;
long frameDuration = (long)((1 / 60.f) * 10000000);
UINT frameCounter = 0;
while (frameCounter < 1000)
{
IMFMediaEvent* mediaEvent;
eventGenerator->GetEvent(0, &mediaEvent);
MediaEventType eventType;
mediaEvent->GetType(&eventType);
if (eventType == METransformNeedInput)
{
// Grab frame first
DXGI_OUTDUPL_FRAME_INFO frameInfo;
IDXGIResource* screenResource;
duplication->AcquireNextFrame(10000, &frameInfo, &screenResource);
ID3D11Texture2D* texture;
screenResource->QueryInterface(&texture);
// Verify correct screen for now
D3D11_TEXTURE2D_DESC d;
texture->GetDesc(&d);
assert(d.Width == 1920);
// Create sample for it
IMFSample* sample;
IMFMediaBuffer* mediaBuffer;
result = MFCreateVideoSampleFromSurface(NULL, &sample);
result = MFCreateDXGISurfaceBuffer(IID_ID3D11Texture2D, texture, 0, TRUE, &mediaBuffer);
result = sample->AddBuffer(mediaBuffer);
////////////////////////
// Does not work, E_UNEXPECTED
// Put it through the FPS converter
/*result = fpsConverter->ProcessInput(0, sample, 0);
if (FAILED(result))
break;
result = fpsConverter->ProcessOutput(0, 1, &buffer, &status);*/
///////////////////////
sample->SetSampleDuration(frameDuration);
sample->SetSampleTime(startTime);
startTime += frameDuration;
result = encoder->ProcessInput(0, sample, 0);
sample->Release();
mediaBuffer->Release();
++frameCounter;
// Important, do not forget to release frame
duplication->ReleaseFrame();
}
else if (eventType == METransformHaveOutput)
{
MFT_OUTPUT_DATA_BUFFER encodingOutputBuffer;
encodingOutputBuffer.dwStreamID = 0;
encodingOutputBuffer.pSample = nullptr;
encodingOutputBuffer.dwStatus = 0;
encodingOutputBuffer.pEvents = nullptr;
result = encoder->ProcessOutput(0, 1, &encodingOutputBuffer, 0);
// Now write to sink
sinkWriter->WriteSample(0, encodingOutputBuffer.pSample);
if (encodingOutputBuffer.pSample)
encodingOutputBuffer.pSample->Release();
if (encodingOutputBuffer.pEvents)
encodingOutputBuffer.pEvents->Release();
}
}
encoder->ProcessMessage(MFT_MESSAGE_NOTIFY_END_OF_STREAM, NULL);
encoder->ProcessMessage(MFT_MESSAGE_NOTIFY_END_STREAMING, NULL);
encoder->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL);
result = sinkWriter->Finalize();
sinkWriter->Release();
duplication->Release();
adapter->Release();
device->Release();
}
void SetupEncoder(ID3D11Device* device, IMFTransform** encoderOut)
{
MFStartup(MF_VERSION, MFSTARTUP_FULL);
IMFAttributes* ptr = NULL;
MFCreateAttributes(&ptr, 0);
UINT token;
IMFDXGIDeviceManager* deviceManager;
MFCreateDXGIDeviceManager(&token, &deviceManager);
deviceManager->ResetDevice(device, token);
MFT_REGISTER_TYPE_INFO outputType;
outputType.guidMajorType = MFMediaType_Video;
outputType.guidSubtype = MFVideoFormat_H264;
IMFActivate** activates = NULL;
UINT count = 0;
MFTEnumEx(MFT_CATEGORY_VIDEO_ENCODER, MFT_ENUM_FLAG_HARDWARE | MFT_ENUM_FLAG_SORTANDFILTER, NULL, &outputType, &activates, &count);
IMFTransform* encoder;
activates[0]->ActivateObject(IID_PPV_ARGS(&encoder));
// Release the rest
for (UINT32 i = 0; i < count; i++)
{
activates[i]->Release();
}
IMFAttributes* attribs;
encoder->GetAttributes(&attribs);
// Required
attribs->SetUINT32(MF_TRANSFORM_ASYNC_UNLOCK, 1);
attribs->SetUINT32(MF_LOW_LATENCY, 1);
LPWSTR friendlyName = 0;
UINT friendlyNameLength;
attribs->GetAllocatedString(MFT_FRIENDLY_NAME_Attribute, &friendlyName, &friendlyNameLength);
printf("Using encoder %S", friendlyName);
auto result = encoder->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER, reinterpret_cast<ULONG_PTR>(deviceManager));
DWORD inputStreamId, outputStreamId;
encoder->GetStreamIDs(1, &inputStreamId, 1, &outputStreamId);
// Set up output media type
IMFMediaType* mediaType;
MFCreateMediaType(&mediaType);
mediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
mediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);
mediaType->SetUINT32(MF_MT_AVG_BITRATE, 10240000);
mediaType->SetUINT32(MF_MT_INTERLACE_MODE, 2);
mediaType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, 1);
MFSetAttributeSize(mediaType, MF_MT_FRAME_SIZE, 1920, 1080);
MFSetAttributeRatio(mediaType, MF_MT_FRAME_RATE, 60000, 1001);
result = encoder->SetOutputType(outputStreamId, mediaType, 0);
// Set up input media type
IMFMediaType* suggestedInputType;
result = encoder->GetInputAvailableType(inputStreamId, 0, &suggestedInputType);
suggestedInputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
suggestedInputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12);
MFSetAttributeSize(suggestedInputType, MF_MT_FRAME_SIZE, 1920, 1080);
MFSetAttributeRatio(suggestedInputType, MF_MT_FRAME_RATE, 60000, 1001);
result = encoder->SetInputType(inputStreamId, suggestedInputType, 0);
*encoderOut = encoder;
}
void SetupFrameRateConverter(ID3D11Device* device, IMFTransform** fpsConverterTransformOut)
{
// Set up DSP
IMFTransform* fpsConverter;
CoCreateInstance(CLSID_CFrameRateConvertDmo, NULL, CLSCTX_INPROC_SERVER, IID_IMFTransform, reinterpret_cast<void**>(&fpsConverter));
// Set up fps input type
IMFMediaType* mediaType;
MFCreateMediaType(&mediaType);
UINT32 imageSize;
MFCalculateImageSize(MFVideoFormat_ARGB32, 1920, 1080, &imageSize);
mediaType->SetUINT32(MF_MT_SAMPLE_SIZE, imageSize);
mediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
mediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_ARGB32);
MFSetAttributeSize(mediaType, MF_MT_FRAME_SIZE, 1920, 1080);
auto result = fpsConverter->SetInputType(0, mediaType, 0);
// Set up fps output type
MFSetAttributeRatio(mediaType, MF_MT_FRAME_RATE, 60000, 1001);
result = fpsConverter->SetOutputType(0, mediaType, 0);
// Start up FPS
fpsConverter->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL);
fpsConverter->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL);
*fpsConverterTransformOut = fpsConverter;
}
void SetupSinkWriter(IMFSinkWriter** sinkWriterOut)
{
IMFAttributes* attribs;
MFCreateAttributes(&attribs, 0);
attribs->SetUINT32(MF_LOW_LATENCY, 1);
attribs->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, 1);
attribs->SetGUID(MF_TRANSCODE_CONTAINERTYPE, MFTranscodeContainerType_MPEG4);
attribs->SetUINT32(MF_SINK_WRITER_DISABLE_THROTTLING, 1);
IMFSinkWriter* sinkWriter;
MFCreateSinkWriterFromURL(L"output.mp4", NULL, attribs, &sinkWriter);
// Set up input type
IMFMediaType* mediaType;
MFCreateMediaType(&mediaType);
mediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
mediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);
mediaType->SetUINT32(MF_MT_AVG_BITRATE, 10240000);
MFSetAttributeSize(mediaType, MF_MT_FRAME_SIZE, 1920, 1080);
MFSetAttributeRatio(mediaType, MF_MT_FRAME_RATE, 60000, 1001);
MFSetAttributeRatio(mediaType, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
mediaType->SetUINT32(MF_MT_INTERLACE_MODE, 2);
mediaType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, 0);
DWORD streamIndex;
auto result = sinkWriter->AddStream(mediaType, &streamIndex);
result = sinkWriter->SetInputMediaType(streamIndex, mediaType, NULL);
sinkWriter->BeginWriting();
*sinkWriterOut = sinkWriter;
}
void InitializeBuffer(IMFTransform* transform, MFT_OUTPUT_DATA_BUFFER* buffer, const UINT32 frameSize)
{
MFT_OUTPUT_STREAM_INFO outputStreamInfo;
DWORD outputStreamId = 0;
ZeroMemory(&outputStreamInfo, sizeof(outputStreamInfo));
ZeroMemory(buffer, sizeof(*buffer));
auto hr = transform->GetOutputStreamInfo(outputStreamId, &outputStreamInfo);
if (SUCCEEDED(hr))
{
if ((outputStreamInfo.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) == 0 &&
(outputStreamInfo.dwFlags & MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES) == 0) {
IMFSample* pOutputSample = NULL;
IMFMediaBuffer* pMediaBuffer = NULL;
hr = MFCreateSample(&pOutputSample);
if (SUCCEEDED(hr)) {
hr = MFCreateMemoryBuffer(frameSize, &pMediaBuffer);
}
if (SUCCEEDED(hr)) {
hr = pOutputSample->AddBuffer(pMediaBuffer);
}
if (SUCCEEDED(hr)) {
buffer->pSample = pOutputSample;
buffer->pSample->AddRef();
}
pMediaBuffer->Release();
pOutputSample->Release();
}
else
{
std::cout << "Stream provides samples";
}
}
}

Related

AvSetMmThreadCharacteristicsW for UWP

I'm working on a WASAPI UWP audio application with cpp/winrt which needs to take audio from an input and send it to an output after being processed.
I want to set my audio thread characteristics with AvSetMmThreadCharacteristicsW(L"Pro Audio", &taskIndex), but I just noticed this function (and most of avrt.h) is limited to WINAPI_PARTITION_DESKTOP and WINAPI_PARTITION_GAMES.
I think I need this because when my code is integrated into my UWP app, the audio input is full of discontinuity, and I have no issue in my test code which uses the avrt API.
Is there another way to configure my thread for audio processing?
Edit: here is my test program https://github.com/loics2/test-wasapi. The interesting part happens in the AudioStream class. I can't share my UWP app, but I can copy as is these classes into a Windows Runtime Component.
Edit 2: here's the audio thread code :
void AudioStream::StreamWorker()
{
WAVEFORMATEX* captureFormat = nullptr;
WAVEFORMATEX* renderFormat = nullptr;
RingBuffer<float> captureBuffer;
RingBuffer<float> renderBuffer;
BYTE* streamBuffer = nullptr;
unsigned int streamBufferSize = 0;
unsigned int bufferFrameCount = 0;
unsigned int numFramesPadding = 0;
unsigned int inputBufferSize = 0;
unsigned int outputBufferSize = 0;
DWORD captureFlags = 0;
winrt::hresult hr = S_OK;
// m_inputClient is a winrt::com_ptr<IAudioClient3>
if (m_inputClient) {
hr = m_inputClient->GetMixFormat(&captureFormat);
// m_audioCaptureClient is a winrt::com_ptr<IAudioCaptureClient>
if (!m_audioCaptureClient) {
hr = m_inputClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
0,
0,
captureFormat,
nullptr);
hr = m_inputClient->GetService(__uuidof(IAudioCaptureClient), m_audioCaptureClient.put_void());
hr = m_inputClient->SetEventHandle(m_inputReadyEvent.get());
hr = m_inputClient->Reset();
hr = m_inputClient->Start();
}
}
hr = m_inputClient->GetBufferSize(&inputBufferSize);
// multiplying the buffer size by the number of channels
inputBufferSize *= 2;
// m_outputClient is a winrt::com_ptr<IAudioClient3>
if (m_outputClient) {
hr = m_outputClient->GetMixFormat(&renderFormat);
// m_audioRenderClientis a winrt::com_ptr<IAudioRenderClient>
if (!m_audioRenderClient) {
hr = m_outputClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
0,
0,
captureFormat,
nullptr);
hr = m_outputClient->GetService(__uuidof(IAudioRenderClient), m_audioRenderClient.put_void());
hr = m_outputClient->SetEventHandle(m_outputReadyEvent.get());
hr = m_outputClient->Reset();
hr = m_outputClient->Start();
}
}
hr = m_outputClient->GetBufferSize(&outputBufferSize);
// multiplying the buffer size by the number of channels
outputBufferSize *= 2;
while (m_isRunning)
{
// ===== INPUT =====
// waiting for the capture event
WaitForSingleObject(m_inputReadyEvent.get(), INFINITE);
// getting the input buffer data
hr = m_audioCaptureClient->GetNextPacketSize(&bufferFrameCount);
while (SUCCEEDED(hr) && bufferFrameCount > 0) {
m_audioCaptureClient->GetBuffer(&streamBuffer, &bufferFrameCount, &captureFlags, nullptr, nullptr);
if (bufferFrameCount != 0) {
captureBuffer.write(reinterpret_cast<float*>(streamBuffer), bufferFrameCount * 2);
hr = m_audioCaptureClient->ReleaseBuffer(bufferFrameCount);
if (FAILED(hr)) {
m_audioCaptureClient->ReleaseBuffer(0);
}
}
else
{
m_audioCaptureClient->ReleaseBuffer(0);
}
hr = m_audioCaptureClient->GetNextPacketSize(&bufferFrameCount);
}
// ===== CALLBACK =====
auto size = captureBuffer.size();
float* userInputData = (float*)calloc(size, sizeof(float));
float* userOutputData = (float*)calloc(size, sizeof(float));
captureBuffer.read(userInputData, size);
OnData(userInputData, userOutputData, size / 2, 2, 48000);
renderBuffer.write(userOutputData, size);
free(userInputData);
free(userOutputData);
// ===== OUTPUT =====
// waiting for the render event
WaitForSingleObject(m_outputReadyEvent.get(), INFINITE);
// getting information about the output buffer
hr = m_outputClient->GetBufferSize(&bufferFrameCount);
hr = m_outputClient->GetCurrentPadding(&numFramesPadding);
// adjust the frame count with the padding
bufferFrameCount -= numFramesPadding;
if (bufferFrameCount != 0) {
hr = m_audioRenderClient->GetBuffer(bufferFrameCount, &streamBuffer);
auto count = (bufferFrameCount * 2);
if (renderBuffer.read(reinterpret_cast<float*>(streamBuffer), count) < count) {
// captureBuffer is not full enough, we should fill the remainder with 0
}
hr = m_audioRenderClient->ReleaseBuffer(bufferFrameCount, 0);
if (FAILED(hr)) {
m_audioRenderClient->ReleaseBuffer(0, 0);
}
}
else
{
m_audioRenderClient->ReleaseBuffer(0, 0);
}
}
exit:
// Cleanup code
}
I removed the error handling code for clarity, most of it is :
if (FAILED(hr))
goto exit;
#IInspectable was right, there's something wrong with my code : the audio processing is done by a library which then calls callbacks with some results.
In my callback, I try to raise a winrt::event, but it sometimes takes more than 50ms. When it happens, it blocks the audio thread, and creates discontinuity...

How to increase mp3 decoding quality (Media Foundation)?

I have file .wav that I need to convert in .mp3 in order to do it I am using MediaFoundation. This is approach that I use:
#include "TV_AudioEncoderMF.h"
#include <windows.h>
#include <windowsx.h>
#include <atlstr.h>
#include <comdef.h>
#include <exception>
#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mmdeviceapi.h>
#include <Audioclient.h>
#include <mferror.h>
#include <Wmcodecdsp.h>
#pragma comment(lib, "mf.lib")
#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mfplay.lib")
#pragma comment(lib, "mfreadwrite.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "wmcodecdspuuid")
TV_AudioEncoderMF::TV_AudioEncoderMF()
{
}
TV_AudioEncoderMF::~TV_AudioEncoderMF()
{
}
template <class T> void SafeRelease(T **ppT)
{
if (*ppT)
{
(*ppT)->Release();
*ppT = nullptr;
}
}
HRESULT TV_AudioEncoderMF::GetOutputMediaTypes(
GUID cAudioFormat,
UINT32 cSampleRate,
UINT32 cBitPerSample,
UINT32 cChannels,
IMFMediaType **ppType
)
{
// Enumerate all codecs except for codecs with field-of-use restrictions.
// Sort the results.
DWORD dwFlags =
(MFT_ENUM_FLAG_ALL & (~MFT_ENUM_FLAG_FIELDOFUSE)) |
MFT_ENUM_FLAG_SORTANDFILTER;
IMFCollection *pAvailableTypes = NULL; // List of audio media types.
IMFMediaType *pAudioType = NULL; // Corresponding codec.
HRESULT hr = MFTranscodeGetAudioOutputAvailableTypes(
cAudioFormat,
dwFlags,
NULL,
&pAvailableTypes
);
// Get the element count.
DWORD dwMTCount;
hr = pAvailableTypes->GetElementCount(&dwMTCount);
// Iterate through the results and check for the corresponding codec.
for (DWORD i = 0; i < dwMTCount; i++)
{
hr = pAvailableTypes->GetElement(i, (IUnknown**)&pAudioType);
GUID majorType;
hr = pAudioType->GetMajorType(&majorType);
GUID subType;
hr = pAudioType->GetGUID(MF_MT_SUBTYPE, &subType);
if (majorType != MFMediaType_Audio || subType != MFAudioFormat_FLAC)
{
continue;
}
UINT32 sampleRate = NULL;
hr = pAudioType->GetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
&sampleRate
);
UINT32 bitRate = NULL;
hr = pAudioType->GetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
&bitRate
);
UINT32 channels = NULL;
hr = pAudioType->GetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
&channels
);
if (sampleRate == cSampleRate
&& bitRate == cBitPerSample
&& channels == cChannels)
{
// Found the codec.
// Jump out!
break;
}
}
// Add the media type to the caller
*ppType = pAudioType;
(*ppType)->AddRef();
SafeRelease(&pAudioType);
return hr;
}
void TV_AudioEncoderMF::decode()
{
HRESULT hr = S_OK;
// Initialize com interface
CoInitializeEx(0, COINIT_MULTITHREADED);
// Start media foundation
MFStartup(MF_VERSION);
IMFMediaType *pInputType = NULL;
IMFSourceReader *pSourceReader = NULL;
IMFMediaType *pOuputMediaType = NULL;
IMFSinkWriter *pSinkWriter = NULL;
// Create source reader
hr = MFCreateSourceReaderFromURL(
L"D:\\buffer\\del\\out\\test.wav",
NULL,
&pSourceReader
);
// Create sink writer
hr = MFCreateSinkWriterFromURL(
L"D:\\buffer\\del\\out\\test_out.mp3",
NULL,
NULL,
&pSinkWriter
);
// Get media type from source reader
hr = pSourceReader->GetCurrentMediaType(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
&pInputType
);
// Get sample rate, bit rate and channels
UINT32 sampleRate = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
&sampleRate
);
UINT32 bitRate = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
&bitRate
);
UINT32 channels = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
&channels
);
// Try to find a media type that is fitting.
hr = GetOutputMediaTypes(
MFAudioFormat_MP3,
sampleRate,
bitRate,
channels,
&pOuputMediaType);
DWORD dwWriterStreamIndex = -1;
// Add the stream
hr = pSinkWriter->AddStream(
pOuputMediaType,
&dwWriterStreamIndex
);
// Set input media type
hr = pSinkWriter->SetInputMediaType(
dwWriterStreamIndex,
pInputType,
NULL
);
// Tell the sink writer to accept data
hr = pSinkWriter->BeginWriting();
// Forever alone loop
while (true)
{
DWORD nStreamIndex, nStreamFlags;
LONGLONG nTime;
IMFSample *pSample;
// Read through the samples until...
hr = pSourceReader->ReadSample(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
0,
&nStreamIndex,
&nStreamFlags,
&nTime,
&pSample);
if (pSample)
{
hr = pSinkWriter->WriteSample(
dwWriterStreamIndex,
pSample
);
}
// ... we are at the end of the stream...
if (nStreamFlags & MF_SOURCE_READERF_ENDOFSTREAM)
{
// ... and jump out.
break;
}
}
// Call finalize to finish writing.
hr = pSinkWriter->Finalize();
// Done :D
}
Problem is - it is a big difference in audio quality, when I playback (by win standard players) .wav file it sounds good, but when I playback compressed to .mp3 file sound it sounds like person recorded his voice on recorder with a very bad quality.
What is a possible problem here? I don't see any possible way to set out quality, like setOutQualityInPersent(100)
EDIT
void co_AudioEncoderMF::decode()
{
HRESULT hr = S_OK;
// Initialize com interface
CoInitializeEx(0, COINIT_MULTITHREADED);
// Start media foundation
MFStartup(MF_VERSION);
IMFMediaType *pInputType = NULL;
IMFSourceReader *pSourceReader = NULL;
IMFMediaType *pOuputMediaType = NULL;
IMFSinkWriter *pSinkWriter = NULL;
// Create source reader
hr = MFCreateSourceReaderFromURL(
L"D:\\buffer\\del\\out\\test.wav",
NULL,
&pSourceReader
);
// Create sink writer
hr = MFCreateSinkWriterFromURL(
L"D:\\buffer\\del\\out\\test_out.mp3",
NULL,
NULL,
&pSinkWriter
);
// Get media type from source reader
hr = pSourceReader->GetCurrentMediaType(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
&pInputType
);
// Get sample rate, bit rate and channels
UINT32 sampleRate = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
&sampleRate
);
UINT32 bitRate = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
&bitRate
);
UINT32 channels = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
&channels
);
// Try to find a media type that is fitting.
hr = GetOutputMediaTypes(
MFAudioFormat_MP3,
sampleRate,
bitRate,
channels,
&pOuputMediaType);
bitRate = bitRate + 2; <------- This line
pOuputMediaType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitRate); <------- This line
DWORD dwWriterStreamIndex = -1;
// Add the stream
hr = pSinkWriter->AddStream(
pOuputMediaType,
&dwWriterStreamIndex
);
// Set input media type
hr = pSinkWriter->SetInputMediaType(
dwWriterStreamIndex,
pInputType,
NULL
);
// Tell the sink writer to accept data
hr = pSinkWriter->BeginWriting();
// Forever alone loop
while (true)
{
DWORD nStreamIndex, nStreamFlags;
LONGLONG nTime;
IMFSample *pSample;
// Read through the samples until...
hr = pSourceReader->ReadSample(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
0,
&nStreamIndex,
&nStreamFlags,
&nTime,
&pSample);
if (pSample)
{
hr = pSinkWriter->WriteSample(
dwWriterStreamIndex,
pSample
);
}
// ... we are at the end of the stream...
if (nStreamFlags & MF_SOURCE_READERF_ENDOFSTREAM)
{
// ... and jump out.
break;
}
}
// Call finalize to finish writing.
hr = pSinkWriter->Finalize();
// Done :D
}
EDIT2
There are 2 files - https://drive.google.com/drive/folders/1yzB2u0TvMSnwsTpYnDDPFBDkTB75ZFwM?usp=sharing
Result and orig
This part is just broken:
// Try to find a media type that is fitting.
hr = GetOutputMediaTypes(
MFAudioFormat_MP3,
sampleRate,
bitRate,
channels,
&pOuputMediaType);
bitRate = bitRate + 2; <------- This line
pOuputMediaType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitRate); <------- This line
To get you back on track, replace the fragment above with:
MFCreateMediaType(&pOuputMediaType);
pOuputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
pOuputMediaType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_MP3);
pOuputMediaType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, 128000 / 8);
pOuputMediaType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, channels);
pOuputMediaType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, sampleRate);
and you'll start getting proper MP3.
Note that the attributes above are taken directly from documentation: MP3 Audio Encoder. In your application you will need to make sure that target values remain valid and match the documented options. You might need to resample audio, for example.

C++ Capture Full screen in real time

I'm developing a software that should capture what is happening on the screen to make some processing. One of the requirements is that the software has to run at least 30FPS.
I've tried several options and I'm going to shopw you two, But none of them fully works to my needs:
(1) Using Direct X - The problem with this approach is that I cannot run it at 30 FPS (I get between 15 and 20):
DirectXScreenCapturer.h:
#include "IScreenCapturer.h"
#include <Wincodec.h> // we use WIC for saving images
#include <d3d9.h> // DirectX 9 header
#pragma comment(lib, "d3d9.lib") // link to DirectX 9 library
class DirectXScreenCapturer : public IScreenCapturer
{
public:
DirectXScreenCapturer();
~DirectXScreenCapturer();
virtual bool CaptureScreen(cv::Mat&) override;
private:
IDirect3D9* _d3d;
IDirect3DDevice9* _device;
IDirect3DSurface9* _surface;
cv::Mat _screen;
};
DirectXScreenCapturer.cpp:
#include "DirectXScreenCapturer.h"
DirectXScreenCapturer::DirectXScreenCapturer() : _d3d(NULL), _device(NULL), _surface(NULL)
{
HRESULT hr = S_OK;
D3DPRESENT_PARAMETERS parameters = { 0 };
D3DDISPLAYMODE mode;
D3DLOCKED_RECT rc;
LPBYTE *shots = nullptr;
// init D3D and get screen size
_d3d = Direct3DCreate9(D3D_SDK_VERSION);
_d3d->GetAdapterDisplayMode(D3DADAPTER_DEFAULT, &mode);
parameters.Windowed = TRUE;
parameters.BackBufferCount = 1;
parameters.BackBufferHeight = mode.Height;
parameters.BackBufferWidth = mode.Width;
parameters.SwapEffect = D3DSWAPEFFECT_DISCARD;
parameters.hDeviceWindow = NULL;
// create device & capture surface
hr = _d3d->CreateDevice(D3DADAPTER_DEFAULT, _D3DDEVTYPE::D3DDEVTYPE_HAL, NULL, D3DCREATE_HARDWARE_VERTEXPROCESSING | D3DCREATE_DISABLE_PSGP_THREADING | D3DCREATE_PUREDEVICE, &parameters, &_device);
hr = _device->CreateOffscreenPlainSurface(mode.Width, mode.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &_surface, nullptr);
// compute the required buffer size
hr = _surface->LockRect(&rc, NULL, 0);
hr = _surface->UnlockRect();
// allocate screenshots buffers
_screen = cv::Mat(mode.Height, mode.Width, CV_8UC4, rc.pBits);
}
DirectXScreenCapturer::~DirectXScreenCapturer()
{
}
bool DirectXScreenCapturer::CaptureScreen(cv::Mat& result)
{
_device->GetFrontBufferData(0, _surface);
result = _screen;
return true;
}
(2) I've tried DXGI with Direct X. This solution works really well in Real Time, but fails to capture the screen when another application is running in full screen, so the software doesn't work if I'm watching movies, playign games, etc:
DXGIScreenCapturer.h
#include "IScreenCapturer.h"
#include <DXGI1_2.h>
#pragma comment(lib, "Dxgi.lib")
#include <D3D11.h>
#pragma comment(lib, "D3D11.lib")
class DXGIScreenCapturer : public IScreenCapturer
{
public:
DXGIScreenCapturer();
~DXGIScreenCapturer();
bool Init();
virtual bool CaptureScreen(cv::Mat&) override;
private:
ID3D11Device* _lDevice;
ID3D11DeviceContext* _lImmediateContext;
IDXGIOutputDuplication* _lDeskDupl;
ID3D11Texture2D* _lAcquiredDesktopImage;
DXGI_OUTPUT_DESC _lOutputDesc;
DXGI_OUTDUPL_DESC _lOutputDuplDesc;
D3D11_MAPPED_SUBRESOURCE _resource;
ID3D11Texture2D* currTexture;
cv::Mat _result;
};
DXGIScreenCapturer.cpp
#include "DXGIScreenCapturer.h"
DXGIScreenCapturer::DXGIScreenCapturer()
{
Init();
}
DXGIScreenCapturer::~DXGIScreenCapturer()
{
}
bool DXGIScreenCapturer::Init() {
// Feature levels supported
D3D_FEATURE_LEVEL gFeatureLevels[] = {
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0,
D3D_FEATURE_LEVEL_9_1
};
UINT gNumFeatureLevels = ARRAYSIZE(gFeatureLevels);
D3D_FEATURE_LEVEL lFeatureLevel;
HRESULT hr(E_FAIL);
hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, D3D11_CREATE_DEVICE_FLAG::D3D11_CREATE_DEVICE_SINGLETHREADED, gFeatureLevels, gNumFeatureLevels, D3D11_SDK_VERSION, &_lDevice, &lFeatureLevel, &_lImmediateContext);
if (FAILED(hr))
return false;
if (!_lDevice)
return false;
// Get DXGI device
IDXGIDevice* lDxgiDevice;
hr = _lDevice->QueryInterface(__uuidof(IDXGIDevice), reinterpret_cast<void**>(&lDxgiDevice));
if (FAILED(hr))
return false;
// Get DXGI adapter
IDXGIAdapter* lDxgiAdapter;
hr = lDxgiDevice->GetParent(__uuidof(IDXGIAdapter), reinterpret_cast<void**>(&lDxgiAdapter));
lDxgiDevice->Release();
lDxgiDevice = nullptr;
if (FAILED(hr))
return false;
UINT Output = 0;
// Get output
IDXGIOutput* lDxgiOutput;
hr = lDxgiAdapter->EnumOutputs(Output, &lDxgiOutput);
if (FAILED(hr))
return false;
lDxgiAdapter->Release();
lDxgiAdapter = nullptr;
hr = lDxgiOutput->GetDesc(&_lOutputDesc);
if (FAILED(hr))
return false;
// QI for Output 1
IDXGIOutput1* lDxgiOutput1;
hr = lDxgiOutput->QueryInterface(__uuidof(lDxgiOutput1), reinterpret_cast<void**>(&lDxgiOutput1));
lDxgiOutput->Release();
lDxgiOutput = nullptr;
if (FAILED(hr))
return false;
// Create desktop duplication
hr = lDxgiOutput1->DuplicateOutput(_lDevice, &_lDeskDupl);
if (FAILED(hr))
return false;
lDxgiOutput1->Release();
lDxgiOutput1 = nullptr;
// Create GUI drawing texture
_lDeskDupl->GetDesc(&_lOutputDuplDesc);
// Create CPU access texture
D3D11_TEXTURE2D_DESC desc;
desc.Width = _lOutputDuplDesc.ModeDesc.Width;
desc.Height = _lOutputDuplDesc.ModeDesc.Height;
desc.Format = _lOutputDuplDesc.ModeDesc.Format;
desc.ArraySize = 1;
desc.BindFlags = 0;
desc.MiscFlags = 0;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.MipLevels = 1;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_FLAG::D3D11_CPU_ACCESS_READ;
desc.Usage = D3D11_USAGE::D3D11_USAGE_STAGING;
hr = _lDevice->CreateTexture2D(&desc, NULL, &currTexture);
if (!currTexture)
{
hr = _lDeskDupl->ReleaseFrame();
return false;
}
while (!CaptureScreen(_result));
_result = cv::Mat(desc.Height, desc.Width, CV_8UC4, _resource.pData);
return true;
}
bool DXGIScreenCapturer::CaptureScreen(cv::Mat& output)
{
HRESULT hr(E_FAIL);
IDXGIResource* lDesktopResource = nullptr;
DXGI_OUTDUPL_FRAME_INFO lFrameInfo;
hr = _lDeskDupl->AcquireNextFrame(999, &lFrameInfo, &lDesktopResource);
if (FAILED(hr))
return false;
if (lFrameInfo.LastPresentTime.HighPart == 0) // not interested in just mouse updates, which can happen much faster than 60fps if you really shake the mouse
{
hr = _lDeskDupl->ReleaseFrame();
return false;
}
// QI for ID3D11Texture2D
hr = lDesktopResource->QueryInterface(__uuidof(ID3D11Texture2D), reinterpret_cast<void **>(&_lAcquiredDesktopImage));
lDesktopResource->Release();
lDesktopResource = nullptr;
if (FAILED(hr))
{
hr = _lDeskDupl->ReleaseFrame();
return false;
}
_lImmediateContext->CopyResource(currTexture, _lAcquiredDesktopImage);
UINT subresource = D3D11CalcSubresource(0, 0, 0);
_lImmediateContext->Map(currTexture, subresource, D3D11_MAP_READ, 0, &_resource);
_lImmediateContext->Unmap(currTexture, 0);
hr = _lDeskDupl->ReleaseFrame();
output = _result;
return true;
}
Please note that IScreenCapturer is just and interface to quickly swap implementations and also note that the return result is a cv::Mat object (OpenCV to do the rest of processing).
Any help on this issue? I'm still trying to figure out a solution that can capture the screen at least 30 FPS and that can capture the whole screen even when an app is running at full screen.

Loopback is saved in buffer, how to write to disk as wav?

In the code below, you can copy paste it to an empty proejct, add main.cpp and it is saving it to buffer, however I don't know how to write that buffer to file. You can see in SetFormat I set it up, and then in CopyData I am writing to the buffer.
Complete C++ beginner here, I was able to mash this up. How can I write the buffer to file?
#include <mmdeviceapi.h>
#include <audioclient.h>
#include "debug.h"
#include <comdef.h>
#define UNICODE
//-----------------------------------------------------------
// Record an audio stream from the default audio capture
// device. The RecordAudioStream function allocates a shared
// buffer big enough to hold one second of PCM audio data.
// The function uses this buffer to stream data from the
// capture device. The main loop runs every 1/2 second.
//-----------------------------------------------------------
// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
// function strof(jsint) { var prim = jsint >>> 0; return prim.toString(16) }
#define EXIT_ON_ERROR(hres, title) \
if (FAILED(hres)) { \
_com_error hres_str(hres); \
debug_log("exit due to error on title", title, "hres:", hres, "hres_str:", hres_str.ErrorMessage()); \
goto Exit; \
}
#define SAFE_RELEASE(punk) \
if ((punk) != NULL) \
{ (punk)->Release(); (punk) = NULL; }
const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
const IID IID_IAudioClient = __uuidof(IAudioClient);
const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient);
PWAVEFORMATEX m_pwfx;
byte* recordBuffer;;
HRESULT SetFormat(WAVEFORMATEX *pwfx, UINT32 bufferSize) {
HRESULT hr;
m_pwfx = pwfx;
recordBuffer = new byte[bufferSize * pwfx->nBlockAlign];
debug_log("Record buffer set to:", (bufferSize * pwfx->nBlockAlign), "bytes");
return 0;
}
HRESULT CopyData(BYTE *pData, UINT32 numFramesAvailable, BOOL *bDone) {
/* Get the lock */
// memcpy((void *)MyAudioSink::buff, (void *)pData, numFramesAvailable);
// printf("%f\n", buff[0]);
memcpy((void*)recordBuffer, pData, numFramesAvailable * m_pwfx->nBlockAlign);
for (size_t i = 0; i < numFramesAvailable; i++) {
debug_log((short)pData[i]);
}
return 0;
}
HRESULT RecordAudioStream() {
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
REFERENCE_TIME hnsActualDuration;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
IMMDeviceEnumerator *pEnumerator = NULL;
IMMDevice *pDevice = NULL;
IAudioClient *pAudioClient = NULL;
IAudioCaptureClient *pCaptureClient = NULL;
WAVEFORMATEX *pwfx = NULL;
UINT32 packetLength = 0;
BOOL bDone = FALSE;
BYTE *pData;
DWORD flags;
hr = CoInitialize(NULL);
EXIT_ON_ERROR(hr, "CoInitialize");
hr = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL, IID_IMMDeviceEnumerator, (void**)&pEnumerator);
EXIT_ON_ERROR(hr, "CoCreateInstance");
hr = pEnumerator->GetDefaultAudioEndpoint(eCapture, eConsole, &pDevice);
EXIT_ON_ERROR(hr, "GetDefaultAudioEndpoint");
hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL, NULL, (void**)&pAudioClient);
EXIT_ON_ERROR(hr, "Activate");
hr = pAudioClient->GetMixFormat(&pwfx);
EXIT_ON_ERROR(hr, "GetMixFormat");
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, 0, hnsRequestedDuration, 0, pwfx, NULL);
EXIT_ON_ERROR(hr, "Initialize");
// Get the size of the allocated buffer.
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
EXIT_ON_ERROR(hr, "GetBufferSize");
hr = pAudioClient->GetService(IID_IAudioCaptureClient, (void**)&pCaptureClient);
EXIT_ON_ERROR(hr, "GetService");
// Notify the audio sink which format to use.
hr = SetFormat(pwfx, bufferFrameCount);
EXIT_ON_ERROR(hr, "SetFormat");
// Calculate the actual duration of the allocated buffer.
hnsActualDuration = (double)REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;
hr = pAudioClient->Start(); // Start recording.
EXIT_ON_ERROR(hr, "Start");
// Each loop fills about half of the shared buffer.
while (bDone == FALSE) {
debug_log("at top of loop");
// Sleep for half the buffer duration.
Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr, "GetNextPacketSize");
while (packetLength != 0 && bDone == FALSE) {
// Get the available data in the shared buffer.
hr = pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
EXIT_ON_ERROR(hr, "GetBuffer");
if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
pData = NULL; // Tell CopyData to write silence.
}
// Copy the available capture data to the audio sink.
hr = CopyData(pData, numFramesAvailable, &bDone);
EXIT_ON_ERROR(hr, "CopyData");
hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
EXIT_ON_ERROR(hr, "ReleaseBuffer");
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr, "GetNextPacketSize 2");
}
}
hr = pAudioClient->Stop(); // Stop recording.
EXIT_ON_ERROR(hr, "Stop");
Exit:
CoTaskMemFree(pwfx);
SAFE_RELEASE(pEnumerator);
SAFE_RELEASE(pDevice);
SAFE_RELEASE(pAudioClient);
SAFE_RELEASE(pCaptureClient);
CoUninitialize();
return hr;
}
int main() {
RecordAudioStream();
}
debug.h - in case you want to copy paste the above
#include <utility>
#include <fstream>
#include <string>
template <typename HeadType> bool
debug_log_rec(std::ostream& out, HeadType&& head) {
out << head;
out << std::endl;
return true;
}
template <typename HeadType, typename... TailTypes> bool
debug_log_rec(std::ostream& out, HeadType&& head, TailTypes&&... tails) {
out << head;
out << " ";
debug_log_rec(out, std::forward<TailTypes>(tails)...);
return true;
}
template <typename... ArgTypes> bool
debug_log(ArgTypes&&... args) {
std::fstream fs;
fs.open("C:\\Users\\Mercurius\\Desktop\\log.txt", std::fstream::app);
debug_log_rec(fs, std::forward<ArgTypes>(args)...);
fs.close();
return true;
}

Video captured by Media Foundation is vertical mirrorred

I'm using Media Foundation IMFSourceReaderCallback implementation for grabbing video frames from the camera, and then OpenCV imshow to present the frames in a loop.
However I get the frames vertically flipped...
Is this a bug? Should I set some attribute to avoid this?
Here is my code:
Initialization:
IMFAttributes* pDeviceAttrs, *pReaderAttrs;
hr = MFCreateAttributes(&pDeviceAttrs, 1);
if (FAILED(hr)) goto Exit;
hr = pDeviceAttrs->SetGUID(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID);
if (FAILED(hr)) goto Exit;
//...
// Correct source provider is activated through ActivateObject
//
hr = MFCreateAttributes(&pReaderAttrs, 2);
if (FAILED(hr)) goto Exit;
pReaderAttrs->SetUnknown(MF_SOURCE_READER_ASYNC_CALLBACK,(IUnknown*)this);
pReaderAttrs->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, TRUE);
hr = MFCreateSourceReaderFromMediaSource(pMediaSource, pReaderAttrs, &m_pReader);
if (FAILED(hr)) goto Exit;
// Correct profile is set
OnReadSample implementation:
HRESULT hr = S_OK;
LONG defaultStride = 0;
LONG stride = 0;
BYTE *pBuffer = NULL;
EnterCriticalSection(&m_critSec);
if (NULL != pSample)
{
IMFMediaBuffer* pMediaBuffer;
DWORD dataSize = 0;
// In case of a single buffer, no copy would happen
hr = pSample->ConvertToContiguousBuffer(&pMediaBuffer);
if (FAILED(hr)) goto Cleanup;
pMediaBuffer->GetCurrentLength(&dataSize);
hr = pMediaBuffer->Lock(&pBuffer, &dataSize, &dataSize);
if (FAILED(hr)) goto Cleanup;
// todo: use a backbuffer to avoid sync issues
if (NULL == m_pLatestFrame) m_pLatestFrame = (BYTE*)malloc(dataSize);
memcpy(m_pLatestFrame, pBuffer, dataSize);
++m_frameNumber;
pMediaBuffer->Unlock();
pMediaBuffer->Release();
}
Cleanup:
LeaveCriticalSection(&m_critSec);
// Async ReadFrame for the next buffer:
hr = m_pReader->ReadSample(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
0,
NULL, // actual
NULL, // flags
NULL, // timestamp
NULL // sample
);
return hr;
Conversion to cv::image:
void SourceReaderImpl::GetLatestFrame(BYTE** ppLatestFrame)
{
EnterCriticalSection(&m_critSec);
*ppLatestFrame = m_pLatestFrame;
LeaveCriticalSection(&m_critSec);
}
void* CameraWrapperImpl::getLatestFrame()
{
BYTE* pLatestFrame = NULL;
m_pMfReader->GetLatestFrame(&pLatestFrame);
return pLatestFrame;
}
void Player::Present()
{
//...
color = cv::Mat(colorSize,
CV_8UC3,
static_cast<unsigned char*>(m_pColorCameraImpl->getLatestFrame()));
cv::imshow(color);
}
Any idea?
Thanks in advance!
A bitmap is stored with the last scan line first, so the image will appear upside down. The easiest solution is to call cv::flip
void Player::Present()
{
//...
color = cv::Mat(colorSize,
CV_8UC3,
static_cast<unsigned char*>(m_pColorCameraImpl->getLatestFrame()));
cv::Mat corrected;
flip(color, corrected, 0);
imshow(corrected);
}