DirectX11 DXGI Capture Screen when in full screen - c++

I'm currently trying to develop a personal project where I will light up LEDs according to what's happening on the screen of my computer.
I've tried several solutions to capture my screen, and DirectX11 with DXGI is the fastest way I found to have a good FPS Rate.
My only issue is the following: When in full screen (for example, watching Netflix trogh Win10 App or playing any game in fullscreen), it seems that nothing is captured. I have two functions (one to set up and another one to capture a frame:
Setup Up Function:
bool DXGIScreenCapturer::Init() {
int lresult(-1);
D3D_FEATURE_LEVEL lFeatureLevel;
HRESULT hr(E_FAIL);
hr = D3D11CreateDevice(
nullptr,
D3D_DRIVER_TYPE_HARDWARE,
nullptr,
0,
gFeatureLevels,
gNumFeatureLevels,
D3D11_SDK_VERSION,
&_lDevice,
&lFeatureLevel,
&_lImmediateContext);
if (FAILED(hr))
return false;
if (!_lDevice)
return false;
// Get DXGI device
ComPtr<IDXGIDevice> lDxgiDevice;
hr = _lDevice.As(&lDxgiDevice);
if (FAILED(hr))
return false;
// Get DXGI adapter
ComPtr<IDXGIAdapter> lDxgiAdapter;
hr = lDxgiDevice->GetParent(__uuidof(IDXGIAdapter), &lDxgiAdapter);
if (FAILED(hr))
return false;
lDxgiDevice.Reset();
UINT Output = 0;
// Get output
ComPtr<IDXGIOutput> lDxgiOutput;
hr = lDxgiAdapter->EnumOutputs(Output, &lDxgiOutput);
if (FAILED(hr))
return false;
lDxgiAdapter.Reset();
hr = lDxgiOutput->GetDesc(&_lOutputDesc);
if (FAILED(hr))
return false;
// QI for Output 1
ComPtr<IDXGIOutput1> lDxgiOutput1;
hr = lDxgiOutput.As(&lDxgiOutput1);
if (FAILED(hr))
return false;
lDxgiOutput.Reset();
// Create desktop duplication
hr = lDxgiOutput1->DuplicateOutput(_lDevice.Get(), &_lDeskDupl);
if (FAILED(hr))
return false;
lDxgiOutput1.Reset();
// Create GUI drawing texture
_lDeskDupl->GetDesc(&_lOutputDuplDesc);
// Create CPU access texture
_desc.Width = _lOutputDuplDesc.ModeDesc.Width;
_desc.Height = _lOutputDuplDesc.ModeDesc.Height;
_desc.Format = _lOutputDuplDesc.ModeDesc.Format;
std::cout << _desc.Width << "x" << _desc.Height << "\n\n\n";
_desc.ArraySize = 1;
_desc.BindFlags = 0;
_desc.MiscFlags = 0;
_desc.SampleDesc.Count = 1;
_desc.SampleDesc.Quality = 0;
_desc.MipLevels = 1;
_desc.CPUAccessFlags = D3D11_CPU_ACCESS_FLAG::D3D11_CPU_ACCESS_READ;
_desc.Usage = D3D11_USAGE::D3D11_USAGE_STAGING;
while (!CaptureScreen(_result));
_result = cv::Mat(_desc.Height, _desc.Width, CV_8UC4, _resource.pData);
return true;
}
And the capture function:
bool DXGIScreenCapturer::CaptureScreen(cv::Mat& output)
{
HRESULT hr(E_FAIL);
ComPtr<IDXGIResource> lDesktopResource = nullptr;
DXGI_OUTDUPL_FRAME_INFO lFrameInfo;
ID3D11Texture2D* currTexture = NULL;
hr = _lDeskDupl->AcquireNextFrame(999, &lFrameInfo, &lDesktopResource);
if (FAILED(hr))
return false;
if (lFrameInfo.LastPresentTime.HighPart == 0) // not interested in just mouse updates, which can happen much faster than 60fps if you really shake the mouse
{
hr = _lDeskDupl->ReleaseFrame();
return false;
}
//int accum_frames = lFrameInfo.AccumulatedFrames;
//if (accum_frames > 1) {// && current_frame != 1) {
// // TOO MANY OF THESE is the problem
// // especially after having to wait >17ms in AcquireNextFrame()
//}
// QI for ID3D11Texture2D
hr = lDesktopResource.As(&_lAcquiredDesktopImage);
// Copy image into a newly created CPU access texture
hr = _lDevice->CreateTexture2D(&_desc, NULL, &currTexture);
if (FAILED(hr))
{
hr = _lDeskDupl->ReleaseFrame();
return false;
}
if (!currTexture)
{
hr = _lDeskDupl->ReleaseFrame();
return false;
}
_lImmediateContext->CopyResource(currTexture, _lAcquiredDesktopImage.Get());
UINT subresource = D3D11CalcSubresource(0, 0, 0);
_lImmediateContext->Map(currTexture, subresource, D3D11_MAP_READ, 0, &_resource);
_lImmediateContext->Unmap(currTexture, 0);
currTexture->Release();
hr = _lDeskDupl->ReleaseFrame();
output = _result;
return true;
}

Related

How to use IDXGIOutputDuplication to capture multiple screens?

Is there a way to screen capture multi monitor setups with windows duplication api (IDXIOutputDuplication) rather than gdi? Windows duplication is now the preferred (faster and more efficient) way of screen capturing on Windows machines. This is a question on the Microsoft forum but the link to the code is broken and I can't find it anywhere on their website 1. Examples of multi monitor screen casting with GDI can be found but they use a separate API 2,3.
I have successfully captured one monitor in C++ using the links for Windows duplication api found here 4. The problem is that the api requires an adapter and an output. I could possibly create two threads and run the windows api on each for separate adapters. I was just wondering if there was a more elegant/better performance solution.
P.S. I have had trouble posting questions on StackOverflow in the past. Before closing this question, please let me know how I can change the content to better suit the rules:)
In response to the request for code, here is a snippet from the header file
ID3D11Device* D3DDevice = nullptr;
ID3D11DeviceContext* D3DDeviceContext = nullptr;
IDXGIOutputDuplication* DeskDupl = nullptr;
DXGI_OUTPUT_DESC OutputDesc;
from the cpp file (there are two parts, initializing and capuring):
From the initializing function:
// Create device
for (size_t i = 0; i < numDriverTypes; i++) {
hr = D3D11CreateDevice(nullptr, driverTypes[i], nullptr, 0, featureLevels, (UINT) numFeatureLevels,
D3D11_SDK_VERSION, &D3DDevice, &featureLevel, &D3DDeviceContext);
if (SUCCEEDED(hr))
break;
}
if (FAILED(hr))
return tsf::fmt("D3D11CreateDevice failed: %v", hr);
// Initialize the Desktop Duplication system
//m_OutputNumber = Output;
// Get DXGI device
IDXGIDevice* dxgiDevice = nullptr;
hr = D3DDevice->QueryInterface(__uuidof(IDXGIDevice), (void**) &dxgiDevice);
if (FAILED(hr))
return tsf::fmt("D3DDevice->QueryInterface failed: %v", hr);
// Get DXGI adapter
IDXGIAdapter* dxgiAdapter = nullptr;
hr = dxgiDevice->GetParent(__uuidof(IDXGIAdapter), (void**) &dxgiAdapter);
dxgiDevice->Release();
dxgiDevice = nullptr;
if (FAILED(hr)) {
return tsf::fmt("dxgiDevice->GetParent failed: %v", hr);
//return ProcessFailure(m_Device, L"Failed to get parent DXGI Adapter", L"Error", hr, SystemTransitionsExpectedErrors);
}
// Get output
IDXGIOutput* dxgiOutput = nullptr;
hr = dxgiAdapter->EnumOutputs(OutputNumber, &dxgiOutput);
dxgiAdapter->Release();
dxgiAdapter = nullptr;
if (FAILED(hr)) {
return tsf::fmt("dxgiAdapter->EnumOutputs failed: %v", hr);
//return ProcessFailure(m_Device, L"Failed to get specified output in DUPLICATIONMANAGER", L"Error", hr, EnumOutputsExpectedErrors);
}
dxgiOutput->GetDesc(&OutputDesc);
// QI for Output 1
IDXGIOutput1* dxgiOutput1 = nullptr;
hr = dxgiOutput->QueryInterface(__uuidof(dxgiOutput1), (void**) &dxgiOutput1);
dxgiOutput->Release();
dxgiOutput = nullptr;
if (FAILED(hr))
return tsf::fmt("dxgiOutput->QueryInterface failed: %v", hr);
// Create desktop duplication
hr = dxgiOutput1->DuplicateOutput(D3DDevice, &DeskDupl);
dxgiOutput1->Release();
dxgiOutput1 = nullptr;
if (FAILED(hr)) {
if (hr == DXGI_ERROR_NOT_CURRENTLY_AVAILABLE) {
//MessageBoxW(nullptr, L"There is already the maximum number of applications using the Desktop Duplication API running, please close one of those applications and then try again.", L"Error", MB_OK);
return "Too many desktop recorders already active";
}
return tsf::fmt("DuplicateOutput failed: %v", hr);
//return ProcessFailure(m_Device, L"Failed to get duplicate output in DUPLICATIONMANAGER", L"Error", hr, CreateDuplicationExpectedErrors);
}
From the capturing function:
IDXGIResource* deskRes = nullptr;
DXGI_OUTDUPL_FRAME_INFO frameInfo;
hr = DeskDupl->AcquireNextFrame(0, &frameInfo, &deskRes);
if (hr == DXGI_ERROR_WAIT_TIMEOUT) {
// nothing to see here
return false;
}
if (FAILED(hr)) {
// perhaps shutdown and reinitialize
auto msg = tsf::fmt("Acquire failed: %x\n", hr);
OutputDebugStringA(msg.c_str());
return false;
}
HaveFrameLock = true;
ID3D11Texture2D* gpuTex = nullptr;
hr = deskRes->QueryInterface(__uuidof(ID3D11Texture2D), (void**) &gpuTex);
deskRes->Release();
deskRes = nullptr;
if (FAILED(hr)) {
// not expected
return false;
}
bool ok = true;
D3D11_TEXTURE2D_DESC desc;
gpuTex->GetDesc(&desc);
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE | D3D11_CPU_ACCESS_READ;
desc.Usage = D3D11_USAGE_STAGING;
desc.BindFlags = 0;
desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GDI_COMPATIBLE ?
ID3D11Texture2D* cpuTex = nullptr;
hr = D3DDevice->CreateTexture2D(&desc, nullptr, &cpuTex);
if (SUCCEEDED(hr)) {
D3DDeviceContext->CopyResource(cpuTex, gpuTex);
} else {
// not expected
ok = false;
}
//UINT subresource = D3D11CalcSubresource(0, 0, 0);
D3D11_MAPPED_SUBRESOURCE sr;
hr = D3DDeviceContext->Map(cpuTex, 0, D3D11_MAP_READ, 0, &sr);
DeskDupl is tied to a specific adapter and output.
You will need to obtain an IDXGIOutputDuplication interface for each IDXGIOutput you wish to have duplicated. Whether those outputs are on one adapter or multiple doesn't matter. I can't speak to whether you will need to run each duplication loop on a dedicated thread.

C++ Capture Full screen in real time

I'm developing a software that should capture what is happening on the screen to make some processing. One of the requirements is that the software has to run at least 30FPS.
I've tried several options and I'm going to shopw you two, But none of them fully works to my needs:
(1) Using Direct X - The problem with this approach is that I cannot run it at 30 FPS (I get between 15 and 20):
DirectXScreenCapturer.h:
#include "IScreenCapturer.h"
#include <Wincodec.h> // we use WIC for saving images
#include <d3d9.h> // DirectX 9 header
#pragma comment(lib, "d3d9.lib") // link to DirectX 9 library
class DirectXScreenCapturer : public IScreenCapturer
{
public:
DirectXScreenCapturer();
~DirectXScreenCapturer();
virtual bool CaptureScreen(cv::Mat&) override;
private:
IDirect3D9* _d3d;
IDirect3DDevice9* _device;
IDirect3DSurface9* _surface;
cv::Mat _screen;
};
DirectXScreenCapturer.cpp:
#include "DirectXScreenCapturer.h"
DirectXScreenCapturer::DirectXScreenCapturer() : _d3d(NULL), _device(NULL), _surface(NULL)
{
HRESULT hr = S_OK;
D3DPRESENT_PARAMETERS parameters = { 0 };
D3DDISPLAYMODE mode;
D3DLOCKED_RECT rc;
LPBYTE *shots = nullptr;
// init D3D and get screen size
_d3d = Direct3DCreate9(D3D_SDK_VERSION);
_d3d->GetAdapterDisplayMode(D3DADAPTER_DEFAULT, &mode);
parameters.Windowed = TRUE;
parameters.BackBufferCount = 1;
parameters.BackBufferHeight = mode.Height;
parameters.BackBufferWidth = mode.Width;
parameters.SwapEffect = D3DSWAPEFFECT_DISCARD;
parameters.hDeviceWindow = NULL;
// create device & capture surface
hr = _d3d->CreateDevice(D3DADAPTER_DEFAULT, _D3DDEVTYPE::D3DDEVTYPE_HAL, NULL, D3DCREATE_HARDWARE_VERTEXPROCESSING | D3DCREATE_DISABLE_PSGP_THREADING | D3DCREATE_PUREDEVICE, &parameters, &_device);
hr = _device->CreateOffscreenPlainSurface(mode.Width, mode.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &_surface, nullptr);
// compute the required buffer size
hr = _surface->LockRect(&rc, NULL, 0);
hr = _surface->UnlockRect();
// allocate screenshots buffers
_screen = cv::Mat(mode.Height, mode.Width, CV_8UC4, rc.pBits);
}
DirectXScreenCapturer::~DirectXScreenCapturer()
{
}
bool DirectXScreenCapturer::CaptureScreen(cv::Mat& result)
{
_device->GetFrontBufferData(0, _surface);
result = _screen;
return true;
}
(2) I've tried DXGI with Direct X. This solution works really well in Real Time, but fails to capture the screen when another application is running in full screen, so the software doesn't work if I'm watching movies, playign games, etc:
DXGIScreenCapturer.h
#include "IScreenCapturer.h"
#include <DXGI1_2.h>
#pragma comment(lib, "Dxgi.lib")
#include <D3D11.h>
#pragma comment(lib, "D3D11.lib")
class DXGIScreenCapturer : public IScreenCapturer
{
public:
DXGIScreenCapturer();
~DXGIScreenCapturer();
bool Init();
virtual bool CaptureScreen(cv::Mat&) override;
private:
ID3D11Device* _lDevice;
ID3D11DeviceContext* _lImmediateContext;
IDXGIOutputDuplication* _lDeskDupl;
ID3D11Texture2D* _lAcquiredDesktopImage;
DXGI_OUTPUT_DESC _lOutputDesc;
DXGI_OUTDUPL_DESC _lOutputDuplDesc;
D3D11_MAPPED_SUBRESOURCE _resource;
ID3D11Texture2D* currTexture;
cv::Mat _result;
};
DXGIScreenCapturer.cpp
#include "DXGIScreenCapturer.h"
DXGIScreenCapturer::DXGIScreenCapturer()
{
Init();
}
DXGIScreenCapturer::~DXGIScreenCapturer()
{
}
bool DXGIScreenCapturer::Init() {
// Feature levels supported
D3D_FEATURE_LEVEL gFeatureLevels[] = {
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0,
D3D_FEATURE_LEVEL_9_1
};
UINT gNumFeatureLevels = ARRAYSIZE(gFeatureLevels);
D3D_FEATURE_LEVEL lFeatureLevel;
HRESULT hr(E_FAIL);
hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, D3D11_CREATE_DEVICE_FLAG::D3D11_CREATE_DEVICE_SINGLETHREADED, gFeatureLevels, gNumFeatureLevels, D3D11_SDK_VERSION, &_lDevice, &lFeatureLevel, &_lImmediateContext);
if (FAILED(hr))
return false;
if (!_lDevice)
return false;
// Get DXGI device
IDXGIDevice* lDxgiDevice;
hr = _lDevice->QueryInterface(__uuidof(IDXGIDevice), reinterpret_cast<void**>(&lDxgiDevice));
if (FAILED(hr))
return false;
// Get DXGI adapter
IDXGIAdapter* lDxgiAdapter;
hr = lDxgiDevice->GetParent(__uuidof(IDXGIAdapter), reinterpret_cast<void**>(&lDxgiAdapter));
lDxgiDevice->Release();
lDxgiDevice = nullptr;
if (FAILED(hr))
return false;
UINT Output = 0;
// Get output
IDXGIOutput* lDxgiOutput;
hr = lDxgiAdapter->EnumOutputs(Output, &lDxgiOutput);
if (FAILED(hr))
return false;
lDxgiAdapter->Release();
lDxgiAdapter = nullptr;
hr = lDxgiOutput->GetDesc(&_lOutputDesc);
if (FAILED(hr))
return false;
// QI for Output 1
IDXGIOutput1* lDxgiOutput1;
hr = lDxgiOutput->QueryInterface(__uuidof(lDxgiOutput1), reinterpret_cast<void**>(&lDxgiOutput1));
lDxgiOutput->Release();
lDxgiOutput = nullptr;
if (FAILED(hr))
return false;
// Create desktop duplication
hr = lDxgiOutput1->DuplicateOutput(_lDevice, &_lDeskDupl);
if (FAILED(hr))
return false;
lDxgiOutput1->Release();
lDxgiOutput1 = nullptr;
// Create GUI drawing texture
_lDeskDupl->GetDesc(&_lOutputDuplDesc);
// Create CPU access texture
D3D11_TEXTURE2D_DESC desc;
desc.Width = _lOutputDuplDesc.ModeDesc.Width;
desc.Height = _lOutputDuplDesc.ModeDesc.Height;
desc.Format = _lOutputDuplDesc.ModeDesc.Format;
desc.ArraySize = 1;
desc.BindFlags = 0;
desc.MiscFlags = 0;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.MipLevels = 1;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_FLAG::D3D11_CPU_ACCESS_READ;
desc.Usage = D3D11_USAGE::D3D11_USAGE_STAGING;
hr = _lDevice->CreateTexture2D(&desc, NULL, &currTexture);
if (!currTexture)
{
hr = _lDeskDupl->ReleaseFrame();
return false;
}
while (!CaptureScreen(_result));
_result = cv::Mat(desc.Height, desc.Width, CV_8UC4, _resource.pData);
return true;
}
bool DXGIScreenCapturer::CaptureScreen(cv::Mat& output)
{
HRESULT hr(E_FAIL);
IDXGIResource* lDesktopResource = nullptr;
DXGI_OUTDUPL_FRAME_INFO lFrameInfo;
hr = _lDeskDupl->AcquireNextFrame(999, &lFrameInfo, &lDesktopResource);
if (FAILED(hr))
return false;
if (lFrameInfo.LastPresentTime.HighPart == 0) // not interested in just mouse updates, which can happen much faster than 60fps if you really shake the mouse
{
hr = _lDeskDupl->ReleaseFrame();
return false;
}
// QI for ID3D11Texture2D
hr = lDesktopResource->QueryInterface(__uuidof(ID3D11Texture2D), reinterpret_cast<void **>(&_lAcquiredDesktopImage));
lDesktopResource->Release();
lDesktopResource = nullptr;
if (FAILED(hr))
{
hr = _lDeskDupl->ReleaseFrame();
return false;
}
_lImmediateContext->CopyResource(currTexture, _lAcquiredDesktopImage);
UINT subresource = D3D11CalcSubresource(0, 0, 0);
_lImmediateContext->Map(currTexture, subresource, D3D11_MAP_READ, 0, &_resource);
_lImmediateContext->Unmap(currTexture, 0);
hr = _lDeskDupl->ReleaseFrame();
output = _result;
return true;
}
Please note that IScreenCapturer is just and interface to quickly swap implementations and also note that the return result is a cv::Mat object (OpenCV to do the rest of processing).
Any help on this issue? I'm still trying to figure out a solution that can capture the screen at least 30 FPS and that can capture the whole screen even when an app is running at full screen.

cannot readsample from IMFSource in synchronous mode

I am having trouble with a video recording application that I am writing using Microsoft Media Foundation.
Specifically, the read/write function (which I put on a loop that lives on it's own thread) doesn't make it pass the call to ReadSample:
HRESULT WinCapture::rwFunction(void) {
HRESULT hr;
DWORD streamIndex, flags;
LONGLONG llTimeStamp;
IMFSample *pSample = NULL;
EnterCriticalSection(&m_critsec);
// Read another sample.
hr = m_pReader->ReadSample(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
0,
&streamIndex, // actual
&flags,//NULL, // flags
&llTimeStamp,//NULL, // timestamp
&pSample // sample
);
if (FAILED(hr)) { goto done; }
hr = m_pWriter->WriteSample(0, pSample);
goto done;
done:
return hr;
SafeRelease(&pSample);
LeaveCriticalSection(&m_critsec);
}
The value of hr is an exception code: 0xc00d3704 so the code snippet skips the call to WriteSample.
It is a lot of steps, but I am fairly certain that I am setting up m_pReader (type IMFSource *) correctly.
HRESULT WinCapture::OpenMediaSource(IMFMediaSource *pSource)
{
HRESULT hr = S_OK;
IMFAttributes *pAttributes = NULL;
hr = MFCreateAttributes(&pAttributes, 2);
// use a callback
//if (SUCCEEDED(hr))
//{
// hr = pAttributes->SetUnknown(MF_SOURCE_READER_ASYNC_CALLBACK, this);
//}
// set the desired format type
DWORD dwFormatIndex = (DWORD)formatIdx;
IMFPresentationDescriptor *pPD = NULL;
IMFStreamDescriptor *pSD = NULL;
IMFMediaTypeHandler *pHandler = NULL;
IMFMediaType *pType = NULL;
// create the source reader
if (SUCCEEDED(hr))
{
hr = MFCreateSourceReaderFromMediaSource(
pSource,
pAttributes,
&m_pReader
);
}
// steps to set the selected format type
hr = pSource->CreatePresentationDescriptor(&pPD);
if (FAILED(hr))
{
goto done;
}
BOOL fSelected;
hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD);
if (FAILED(hr))
{
goto done;
}
hr = pSD->GetMediaTypeHandler(&pHandler);
if (FAILED(hr))
{
goto done;
}
hr = pHandler->GetMediaTypeByIndex(dwFormatIndex, &pType);
if (FAILED(hr))
{
goto done;
}
hr = pHandler->SetCurrentMediaType(pType);
{
goto done;
}
hr = m_pReader->SetCurrentMediaType(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
NULL,
pType
);
// set to maximum framerate?
hr = pHandler->GetCurrentMediaType(&pType);
if (FAILED(hr))
{
goto done;
}
// Get the maximum frame rate for the selected capture format.
// Note: To get the minimum frame rate, use the
// MF_MT_FRAME_RATE_RANGE_MIN attribute instead.
PROPVARIANT var;
if (SUCCEEDED(pType->GetItem(MF_MT_FRAME_RATE_RANGE_MAX, &var)))
{
hr = pType->SetItem(MF_MT_FRAME_RATE, var);
PropVariantClear(&var);
if (FAILED(hr))
{
goto done;
}
hr = pHandler->SetCurrentMediaType(pType);
{
goto done;
}
hr = m_pReader->SetCurrentMediaType(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
NULL,
pType
);
}
goto done;
done:
SafeRelease(&pPD);
SafeRelease(&pSD);
SafeRelease(&pHandler);
SafeRelease(&pType);
SafeRelease(&pAttributes);
return hr;
}
This code is all copied from Microsoft documentation pages and the SDK sample code. The variable formatIdx is 0, I get it from enumerating the camera formats and choosing the first one.
UPDATE
I have rewritten this program so that it uses callbacks instead of a blocking read/write function and I have exactly the same issue.
Here I get the device and initiate the callback method:
HRESULT WinCapture::initCapture(const WCHAR *pwszFileName, IMFMediaSource *pSource) {
HRESULT hr = S_OK;
EncodingParameters params;
params.subtype = MFVideoFormat_WMV3; // TODO, paramterize this
params.bitrate = TARGET_BIT_RATE;
m_llBaseTime = 0;
IMFMediaType *pType = NULL;
DWORD sink_stream = 0;
EnterCriticalSection(&m_critsec);
hr = m_ppDevices[selectedDevice]->ActivateObject(IID_PPV_ARGS(&pSource));
//m_bIsCapturing = false; // this is set externally here
if (SUCCEEDED(hr))
hr = OpenMediaSource(pSource); // also creates the reader
if (SUCCEEDED(hr))
{
hr = m_pReader->GetCurrentMediaType(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
&pType
);
}
// Create the sink writer
if (SUCCEEDED(hr))
{
hr = MFCreateSinkWriterFromURL(
pwszFileName,
NULL,
NULL,
&m_pWriter
);
}
if (SUCCEEDED(hr))
hr = ConfigureEncoder(params, pType, m_pWriter, &sink_stream);
// kick off the recording
if (SUCCEEDED(hr))
{
m_llBaseTime = 0;
m_bIsCapturing = TRUE;
hr = m_pReader->ReadSample(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
0,
NULL,
NULL,
NULL,
NULL
);
}
SafeRelease(&pType);
SafeRelease(&pSource);
pType = NULL;
LeaveCriticalSection(&m_critsec);
return hr;
}
The OpenMediaSource method is here:
HRESULT WinCapture::OpenMediaSource(IMFMediaSource *pSource)
{
HRESULT hr = S_OK;
IMFAttributes *pAttributes = NULL;
hr = MFCreateAttributes(&pAttributes, 2);
// use a callback
if (SUCCEEDED(hr))
{
hr = pAttributes->SetUnknown(MF_SOURCE_READER_ASYNC_CALLBACK, this);
}
// set the desired format type
DWORD dwFormatIndex = (DWORD)formatIdx;
IMFPresentationDescriptor *pPD = NULL;
IMFStreamDescriptor *pSD = NULL;
IMFMediaTypeHandler *pHandler = NULL;
IMFMediaType *pType = NULL;
// create the source reader
if (SUCCEEDED(hr))
{
hr = MFCreateSourceReaderFromMediaSource(
pSource,
pAttributes,
&m_pReader
);
}
// steps to set the selected format type
if (SUCCEEDED(hr)) hr = pSource->CreatePresentationDescriptor(&pPD);
if (FAILED(hr))
{
goto done;
}
BOOL fSelected;
hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD);
if (FAILED(hr))
{
goto done;
}
hr = pSD->GetMediaTypeHandler(&pHandler);
if (FAILED(hr))
{
goto done;
}
hr = pHandler->GetMediaTypeByIndex(dwFormatIndex, &pType);
if (FAILED(hr))
{
goto done;
}
hr = pHandler->SetCurrentMediaType(pType);
if (FAILED(hr))
{
goto done;
}
// get available framerates
hr = MFGetAttributeRatio(pType, MF_MT_FRAME_RATE, &frameRate, &denominator);
std::cout << "frameRate " << frameRate << " denominator " << denominator << std::endl;
hr = m_pReader->SetCurrentMediaType(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
NULL,
pType
);
// set to maximum framerate?
hr = pHandler->GetCurrentMediaType(&pType);
if (FAILED(hr))
{
goto done;
}
goto done;
done:
SafeRelease(&pPD);
SafeRelease(&pSD);
SafeRelease(&pHandler);
SafeRelease(&pType);
SafeRelease(&pAttributes);
return hr;
}
Here, formatIdx is a field of this class that get sets by the user via the GUI. I leave it 0 in order to test. So, I don't think I am missing any steps to get the camera going, but maybe I am.
When I inspect what applications are using the webcam (using this method) after the call to ActivateObject, I see that my application is using the webcam as expected. But, when I enter the callback routine, I see there are two instances of my application using the webcam. This is the same using a blocking method.
I don't know if that is good or bad, but when I enter my callback method:
HRESULT WinCapture::OnReadSample(
HRESULT hrStatus,
DWORD /*dwStreamIndex*/,
DWORD /*dwStreamFlags*/,
LONGLONG llTimeStamp,
IMFSample *pSample // Can be NULL
)
{
EnterCriticalSection(&m_critsec);
if (!IsCapturing() || m_bIsCapturing == false)
{
LeaveCriticalSection(&m_critsec);
return S_OK;
}
HRESULT hr = S_OK;
if (FAILED(hrStatus))
{
hr = hrStatus;
goto done;
}
if (pSample)
{
if (m_bFirstSample)
{
m_llBaseTime = llTimeStamp;
m_bFirstSample = FALSE;
}
// rebase the time stamp
llTimeStamp -= m_llBaseTime;
hr = pSample->SetSampleTime(llTimeStamp);
if (FAILED(hr)) { goto done; }
hr = m_pWriter->WriteSample(0, pSample);
if (FAILED(hr)) { goto done; }
}
// Read another sample.
hr = m_pReader->ReadSample(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
0,
NULL, // actual
NULL, // flags
NULL, // timestamp
NULL // sample
);
done:
if (FAILED(hr))
{
//NotifyError(hr);
}
LeaveCriticalSection(&m_critsec);
return hr;
}
hrStatus is the 0x00d3704 error I was getting before, and the callback goes straight to done thus killing the callbacks.
Finally, I should say that I am modeling (read, 'copying') my code from the example MFCaptureToFile in the Windows SDK Samples and that doesn't work either. Although, there I get this weird negative number for the failed HRESULT: -1072875772.
If you have got error [0xC00D3704] - it means that source does not initialized. Such error can be caused by mistake of initialization, busy camera by another application (process) or unsupport of the camera by UVC driver(old cameras support DirectShow driver with partly compatibleness with UVC. It is possible read some general info from old cameras via UVC as friendly name, symbolic link. However, old cameras support DirectShow models - PUSH, while camera pushes bytes into the pipeline, while Media Foundation PULL data - sends special signal and wait data).
For checking your code I would like advise to research articles about capturing video from web cam on site "CodeProject" - search "videoInput" name.

Media Foundation - How to select input from crossbar?

Good Evening
I'm trying to put together a little video capture DLL using Media Foundation which is largely adapted from the SimpleCapture and CaptureEngine SDK samples. The whole thing works great when I the capture source is my inbuilt webcam.
The problem is when I try using an external USB frame grabber with multiple inputs (s-video, composite & stereo audio) there is no preview stream. I have got a function that uses the DShow interface which seems to successfully change the selected input on the cross bar (verified by looking at the options in AMCAP after running function) However I have not been able to get the device to preview at all.
Is there a Media Foundation way of selecting inputs from the crossbar? Or if not, can you suggest what may be wrong with the crossbar input selection code below:
HRESULT CaptureManager::doSelectInputUsingCrossbar(std::wstring deviceName, long input)
{
IGraphBuilder *pGraph = NULL;
ICaptureGraphBuilder2 *pBuilder = NULL;
IBaseFilter* pSrc = NULL;
ICreateDevEnum *pDevEnum = NULL;
IEnumMoniker *pClassEnum = NULL;
IMoniker *pMoniker = NULL;
bool bCameraFound = false;
IPropertyBag *pPropBag = NULL;
bool crossbarSet = false;
HRESULT hr = S_OK;
if (!(input == PhysConn_Video_Composite || input == PhysConn_Video_SVideo))
{
return S_FALSE;
}
// Create the Filter Graph Manager.
hr = CoCreateInstance(CLSID_FilterGraph, NULL,
CLSCTX_INPROC_SERVER, IID_IGraphBuilder, (void **)&pGraph);
if (SUCCEEDED(hr))
{
// Create the Capture Graph Builder.
hr = CoCreateInstance(CLSID_CaptureGraphBuilder2, NULL,
CLSCTX_INPROC_SERVER, IID_ICaptureGraphBuilder2,
(void **)&pBuilder);
if (SUCCEEDED(hr))
{
pBuilder->SetFiltergraph(pGraph);
}
else return hr;
}
else return hr;
//////////////////////////
// chooses the default camera filter
hr = CoCreateInstance(CLSID_SystemDeviceEnum, NULL, CLSCTX_INPROC,
IID_ICreateDevEnum, (void **)&pDevEnum);
if (FAILED(hr))
{
return E_FAIL;
}
// Create an enumerator for video capture devices.
if (FAILED(pDevEnum->CreateClassEnumerator(CLSID_VideoInputDeviceCategory, &pClassEnum, 0)))
{
return E_FAIL;
}
if (pClassEnum == NULL)
{
CheckHR(hr, "Class enumerator is null - no input devices detected?");
pDevEnum->Release();
return E_FAIL;
}
while (!bCameraFound && (pClassEnum->Next(1, &pMoniker, NULL) == S_OK))
{
HRESULT hr = pMoniker->BindToStorage(0, 0, IID_IPropertyBag, (void**)(&pPropBag));
if (FAILED(hr))
{
pMoniker->Release();
continue; // Skip this one, maybe the next one will work.
}
// Find the description or friendly name.
VARIANT varName;
VariantInit(&varName);
hr = pPropBag->Read(L"Description", &varName, 0);
if (FAILED(hr))
{
hr = pPropBag->Read(L"FriendlyName", &varName, 0);
}
if (SUCCEEDED(hr))
{
if (0 == wcscmp(varName.bstrVal, deviceName.c_str()))
{
pMoniker->BindToObject(0, 0, IID_IBaseFilter, (void**)&pSrc);
bCameraFound = true;
break;
}//if
VariantClear(&varName);
}
pPropBag->Release();
pMoniker->Release();
}//while
pClassEnum->Release();
pDevEnum->Release();
if (!bCameraFound)//else
{
CheckHR(hr, "Error: Get device Moniker, No device found");
goto done;
}
hr = pGraph->AddFilter(pSrc, L"video capture adapter");
if (FAILED(hr))
{
CheckHR(hr, "Can't add capture device to graph");
goto done;
}
///////////////
IAMCrossbar *pxBar = NULL;
hr = pBuilder->FindInterface(&PIN_CATEGORY_CAPTURE, &MEDIATYPE_Interleaved, pSrc, IID_IAMCrossbar, (void **)&pxBar);
if (FAILED(hr))
{
hr = pBuilder->FindInterface(&PIN_CATEGORY_CAPTURE, &MEDIATYPE_Video, pSrc, IID_IAMCrossbar, (void **)&pxBar);
if (FAILED(hr))
{
CheckHR(hr, "Failed to get crossbar filter");
goto done;
}
}//if
else
{
CheckHR(hr, "Failed to get crossbar (capture)");
goto done;
}
LONG lInpin, lOutpin;
hr = pxBar->get_PinCounts(&lOutpin, &lInpin);
BOOL IPin = TRUE; LONG pIndex = 0, pRIndex = 0, pType = 0;
while (pIndex < lInpin)
{
hr = pxBar->get_CrossbarPinInfo(IPin, pIndex, &pRIndex, &pType);
if (pType == input)
{
break;
}
pIndex++;
}
BOOL OPin = FALSE; LONG pOIndex = 0, pORIndex = 0, pOType = 0;
while (pOIndex < lOutpin)
{
hr = pxBar->get_CrossbarPinInfo(OPin, pOIndex, &pORIndex, &pOType);
if (pOType == PhysConn_Video_VideoDecoder)
{
break;
}
pIndex++;
}
hr = pxBar->Route(pOIndex, pIndex);
done:
SafeRelease(&pPropBag);
SafeRelease(&pMoniker);
SafeRelease(&pxBar);
SafeRelease(&pGraph);
SafeRelease(&pBuilder);
SafeRelease(&pSrc);
return hr;
}
USB frame grabber is not supported by Microsoft Media Foundation. Microsoft Media Foundation has supporting of live sources via USB Video Class devices driver. The modern web cameras support such driver, but USB frame grabber does not.
USB frame grabber support of DirectShow filters which based on PUSH model processing pipeline - sources send media samples into the media graph, while Microsoft Media Foundation uses PULL model processing pipeline - sinks send request for the new media samples and sources "listen" for such request.
With best.

DirectShow: webcam preview and image capture

After looking at a very similar question and seeing almost identical code, I've decided to ask this question separately. I want to show a video preview of the webcam's video stream to the default window DirectShow uses, and I also want the ability to "take a picture" of the video stream at any given moment.
I started with the DirectShow examples on MSDN, as well as the AMCap sample code, and have something I believe should should the preview part, but does not. I've found no examples of grabbing an image from the video stream except using SampleGrabber, which is deprecated and therefore I am trying not to use it.
Below is my code, line for line. Note that most of the code in EnumerateCameras is commented out. That code would've been for attaching to another window, which I don't want to do. In the MSDN documentation, it explicitly states that the VMR_7 creates its own window to display the video stream. I get no errors in my app, but this window never appears.
My question then is this: What am I doing wrong? Alternatively, if you know of a simple example of what I am trying to do, link me to it. AMCap is not a simple example, for reference.
NOTE: InitalizeVMR is for running in windowless state, which is my ultimate goal (integrating into a DirectX game). For now, however, i just want it to run in the simplest mode possible.
EDIT: The first portion of this question, that is previewing the camera stream, is solved. I am now just looking for an alternative to the deprecated SampleGrabber class so I can snap a photo at any moment and save it to a file.
EDIT: After looking for almost an hour on google, the general concensus seems to be that you HAVE to use ISampleGrabber. Please let me know if you find anything different.
Testing code (main.cpp):
CWebcam* camera = new CWebcam();
HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
MessageBox(NULL, L"text", L"caption", NULL);
if (SUCCEEDED(hr))
{
camera->Create();
camera->EnumerateCameras();
camera->StartCamera();
}
int d;
cin >> d;
Webcam.cpp:
#include "Webcam.h"
CWebcam::CWebcam() {
HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
//m_pTexInst = nullptr;
//m_pTexRes = nullptr;
}
CWebcam::~CWebcam() {
CoUninitialize();
m_pDeviceMonikers->Release();
m_pMediaController->Release();
}
BOOL CWebcam::Create() {
InitCaptureGraphBuilder(&m_pFilterGraph, &m_pCaptureGraph);
hr = m_pFilterGraph->QueryInterface(IID_IMediaControl, (void **)&m_pMediaController);
return TRUE;
}
void CWebcam::Destroy() {
}
void CWebcam::EnumerateCameras() {
HRESULT hr = EnumerateDevices(CLSID_VideoInputDeviceCategory, &m_pDeviceMonikers);
if (SUCCEEDED(hr))
{
//DisplayDeviceInformation(m_pDeviceMonikers);
//m_pDeviceMonikers->Release();
IMoniker *pMoniker = NULL;
if(m_pDeviceMonikers->Next(1, &pMoniker, NULL) == S_OK)
{
hr = pMoniker->BindToObject(0, 0, IID_IBaseFilter, (void**)&m_pCameraFilter);
if (SUCCEEDED(hr))
{
hr = m_pFilterGraph->AddFilter(m_pCameraFilter, L"Capture Filter");
}
}
// connect the output pin to the video renderer
if(SUCCEEDED(hr))
{
hr = m_pCaptureGraph->RenderStream(&PIN_CATEGORY_PREVIEW, &MEDIATYPE_Video,
m_pCameraFilter, NULL, NULL);
}
//InitializeVMR(hwnd, m_pFilterGraph, &m_pVMRControl, 1, FALSE);
//get the video window that will be displayed from the filter graph
IVideoWindow *pVideoWindow = NULL;
hr = m_pFilterGraph->QueryInterface(IID_IVideoWindow, (void **)&pVideoWindow);
/*if(hr != NOERROR)
{
printf("This graph cannot preview properly");
}
else
{
//get the video stream configurations
hr = m_pCaptureGraph->FindInterface(&PIN_CATEGORY_CAPTURE,
&MEDIATYPE_Video, m_pCameraFilter,
IID_IAMStreamConfig, (void **)&m_pVideoStreamConfig);
//Find out if this is a DV stream
AM_MEDIA_TYPE *pMediaTypeDV;
//fake window handle
HWND window = NULL;
if(m_pVideoStreamConfig && SUCCEEDED(m_pVideoStreamConfig->GetFormat(&pMediaTypeDV)))
{
if(pMediaTypeDV->formattype == FORMAT_DvInfo)
{
// in this case we want to set the size of the parent window to that of
// current DV resolution.
// We get that resolution from the IVideoWindow.
IBasicVideo* pBasivVideo;
// If we got here, gcap.pVW is not NULL
//ASSERT(pVideoWindow != NULL);
hr = pVideoWindow->QueryInterface(IID_IBasicVideo, (void**)&pBasivVideo);
/*if(SUCCEEDED(hr))
{
HRESULT hr1, hr2;
long lWidth, lHeight;
hr1 = pBasivVideo->get_VideoHeight(&lHeight);
hr2 = pBasivVideo->get_VideoWidth(&lWidth);
if(SUCCEEDED(hr1) && SUCCEEDED(hr2))
{
ResizeWindow(lWidth, abs(lHeight));
}
}
}
}
RECT rc;
pVideoWindow->put_Owner((OAHWND)window); // We own the window now
pVideoWindow->put_WindowStyle(WS_CHILD); // you are now a child
GetClientRect(window, &rc);
pVideoWindow->SetWindowPosition(0, 0, rc.right, rc.bottom); // be this big
pVideoWindow->put_Visible(OATRUE);
}*/
}
}
BOOL CWebcam::StartCamera() {
if(m_bIsStreaming == FALSE)
{
m_bIsStreaming = TRUE;
hr = m_pMediaController->Run();
if(FAILED(hr))
{
// stop parts that ran
m_pMediaController->Stop();
return FALSE;
}
return TRUE;
}
return FALSE;
}
void CWebcam::EndCamera() {
if(m_bIsStreaming)
{
hr = m_pMediaController->Stop();
m_bIsStreaming = FALSE;
//invalidate client rect as well so that it must redraw
}
}
BOOL CWebcam::CaptureToTexture() {
return TRUE;
}
HRESULT CWebcam::InitCaptureGraphBuilder(
IGraphBuilder **ppGraph, // Receives the pointer.
ICaptureGraphBuilder2 **ppBuild // Receives the pointer.
)
{
if (!ppGraph || !ppBuild)
{
return E_POINTER;
}
IGraphBuilder *pGraph = NULL;
ICaptureGraphBuilder2 *pBuild = NULL;
// Create the Capture Graph Builder.
HRESULT hr = CoCreateInstance(CLSID_CaptureGraphBuilder2, NULL,
CLSCTX_INPROC_SERVER, IID_ICaptureGraphBuilder2, (void**)&pBuild );
if (SUCCEEDED(hr))
{
// Create the Filter Graph Manager.
hr = CoCreateInstance(CLSID_FilterGraph, 0, CLSCTX_INPROC_SERVER,
IID_IGraphBuilder, (void**)&pGraph);
if (SUCCEEDED(hr))
{
// Initialize the Capture Graph Builder.
pBuild->SetFiltergraph(pGraph);
// Return both interface pointers to the caller.
*ppBuild = pBuild;
*ppGraph = pGraph; // The caller must release both interfaces.
return S_OK;
}
else
{
pBuild->Release();
}
}
return hr; // Failed
}
HRESULT CWebcam::EnumerateDevices(REFGUID category, IEnumMoniker **ppEnum)
{
// Create the System Device Enumerator.
ICreateDevEnum *pSystemDeviceEnumerator;
HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, NULL,
CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pSystemDeviceEnumerator));
if (SUCCEEDED(hr))
{
// Create an enumerator for the category.
hr = pSystemDeviceEnumerator->CreateClassEnumerator(category, ppEnum, 0);
if (hr == S_FALSE)
{
hr = VFW_E_NOT_FOUND; // The category is empty. Treat as an error.
}
pSystemDeviceEnumerator->Release();
}
return hr;
}
void CWebcam::DisplayDeviceInformation(IEnumMoniker *pEnum)
{
IMoniker *pMoniker = NULL;
int counter = 0;
while (pEnum->Next(1, &pMoniker, NULL) == S_OK)
{
IPropertyBag *pPropBag;
HRESULT hr = pMoniker->BindToStorage(0, 0, IID_PPV_ARGS(&pPropBag));
if (FAILED(hr))
{
pMoniker->Release();
continue;
}
VARIANT var;
VariantInit(&var);
// Get description or friendly name.
hr = pPropBag->Read(L"Description", &var, 0);
if (FAILED(hr))
{
hr = pPropBag->Read(L"FriendlyName", &var, 0);
}
if (SUCCEEDED(hr))
{
printf("%d: %S\n", counter, var.bstrVal);
VariantClear(&var);
}
hr = pPropBag->Write(L"FriendlyName", &var);
// WaveInID applies only to audio capture devices.
hr = pPropBag->Read(L"WaveInID", &var, 0);
if (SUCCEEDED(hr))
{
printf("%d: WaveIn ID: %d\n", counter, var.lVal);
VariantClear(&var);
}
hr = pPropBag->Read(L"DevicePath", &var, 0);
if (SUCCEEDED(hr))
{
// The device path is not intended for display.
printf("%d: Device path: %S\n", counter, var.bstrVal);
VariantClear(&var);
}
pPropBag->Release();
pMoniker->Release();
counter++;
}
}
HRESULT CWebcam::InitializeVMR(
HWND hwndApp, // Application window.
IGraphBuilder* pFG, // Pointer to the Filter Graph Manager.
IVMRWindowlessControl** ppWc, // Receives the interface.
DWORD dwNumStreams, // Number of streams to use.
BOOL fBlendAppImage // Are we alpha-blending a bitmap?
)
{
IBaseFilter* pVmr = NULL;
IVMRWindowlessControl* pWc = NULL;
*ppWc = NULL;
// Create the VMR and add it to the filter graph.
HRESULT hr = CoCreateInstance(CLSID_VideoMixingRenderer, NULL,
CLSCTX_INPROC, IID_IBaseFilter, (void**)&pVmr);
if (FAILED(hr))
{
return hr;
}
hr = pFG->AddFilter(pVmr, L"Video Mixing Renderer");
if (FAILED(hr))
{
pVmr->Release();
return hr;
}
// Set the rendering mode and number of streams.
IVMRFilterConfig* pConfig;
hr = pVmr->QueryInterface(IID_IVMRFilterConfig, (void**)&pConfig);
if (SUCCEEDED(hr))
{
pConfig->SetRenderingMode(VMRMode_Windowless);
// Set the VMR-7 to mixing mode if you want more than one video
// stream, or you want to mix a static bitmap over the video.
// (The VMR-9 defaults to mixing mode with four inputs.)
if (dwNumStreams > 1 || fBlendAppImage)
{
pConfig->SetNumberOfStreams(dwNumStreams);
}
pConfig->Release();
hr = pVmr->QueryInterface(IID_IVMRWindowlessControl, (void**)&pWc);
if (SUCCEEDED(hr))
{
pWc->SetVideoClippingWindow(hwndApp);
*ppWc = pWc; // The caller must release this interface.
}
}
pVmr->Release();
// Now the VMR can be connected to other filters.
return hr;
}
In windowless mode VMR would not create separate window. Since you started initialization for widnowless mode, you have to follow SetVideoClippingWindow with IVMRWindowlessControl::SetVideoPosition call to provide position within the window, see VMR Windowless Mode on MSDN.
Another sample code snippet for you: http://www.assembla.com/code/roatl-utilities/subversion/nodes/trunk/FullScreenWindowlessVmrSample01/MainDialog.h#ln188