Implement 'IMFTransform' to encode or decode H264 or AAC - c++

Can IMFTransform interface be implemented to encode or decode H264 or AAC data or should I use FFmpeg or OpenH264.

When you encode or decode media, IMFTransform is the interface codecs expose in Media Foundation API. That is, you don't implement it - you take advantage of existing implementation of codecs which are available to you (you implement it when you want to extend the API and supply additional codec).
Stock Windows provides you with:
AAC Decoder - CLSID_CMSAACDecMFT
AAC Encoder - CLSID_AACMFTEncoder
H.264 Video Decoder - CLSID_CMSH264DecoderMFT, leverages DXVA hardware-assisted decoding wherever applicable
H.264 Video Encoder - CLSID_CMSH264EncoderMFT, software (fallback) encoder
Additional hardware accelerated encoders might be provided with hardware drivers. All mentioned above are available in the form of IMFTransform, can be consumed directly or using higher level Media Foundation APIs.

You can implement the IMFTransform interface to decode and encode H264 and AAC. Refer to CLSID_CMSH264DecoderMFT and CLSID_CMSAACDecMFT to decode H264 and ACC, also CLSID_CMSH264EncoderMFT and CLSID_AACMFTEncoder to encode H264 and ACC.
Encoder example : initialise the encoder.
IUnknown *_transformUnk;
IMFTransform *_encoder;
HRESULT MediaEncoder::InitialiseEncoder(EncoderType encoder)
{
HRESULT hr = S_OK;
// Has the encoder been init.
if (!_isOpen)
{
_encoderType = encoder;
// Init the COM.
CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
// Create a new close event handler.
_hCloseEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
// If event was not created.
if (_hCloseEvent == NULL)
{
// Get the result value.
hr = __HRESULT_FROM_WIN32(GetLastError());
}
// If successful creation of the close event.
if (SUCCEEDED(hr))
{
// Start up Media Foundation platform.
hr = MFStartup(MF_VERSION);
_isOpen = true;
}
if (SUCCEEDED(hr))
{
// Select the encoder.
switch (encoder)
{
case Nequeo::Media::Foundation::EncoderType::H264:
// Create the H264 encoder.
hr = CreateEncoder(CLSID_CMSH264EncoderMFT);
break;
case Nequeo::Media::Foundation::EncoderType::AAC:
// Create the AAC encoder.
hr = CreateEncoder(CLSID_AACMFTEncoder);
break;
case Nequeo::Media::Foundation::EncoderType::MP3:
// Create the MP3 encoder.
hr = CreateEncoder(CLSID_MP3ACMCodecWrapper);
break;
default:
hr = ((HRESULT)-1L);
break;
}
}
if (SUCCEEDED(hr))
{
// Query for the IMFTransform interface
hr = _transformUnk->QueryInterface(IID_PPV_ARGS(&_encoder));
// Encoder has been created.
_created = true;
}
}
// Return the result.
return hr;
}
HRESULT MediaEncoder::CreateEncoder(const CLSID encoder)
{
HRESULT hr = S_OK;
// Create the decoder.
hr = CoCreateInstance(encoder, NULL, CLSCTX_INPROC_SERVER, IID_IUnknown, (void**)&_transformUnk);
// Return the result.
return hr;
}
Decoder example : initialise the decoder.
IUnknown *_transformUnk;
IMFTransform *_decoder;
HRESULT MediaDecoder::InitialiseDecoder(DecoderType decoder)
{
HRESULT hr = S_OK;
// Has the decoder been init.
if (!_isOpen)
{
_decoderType = decoder;
// Init the COM.
CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
// Create a new close event handler.
_hCloseEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
// If event was not created.
if (_hCloseEvent == NULL)
{
// Get the result value.
hr = __HRESULT_FROM_WIN32(GetLastError());
}
// If successful creation of the close event.
if (SUCCEEDED(hr))
{
// Start up Media Foundation platform.
hr = MFStartup(MF_VERSION);
_isOpen = true;
}
if (SUCCEEDED(hr))
{
// Select the decoder.
switch (decoder)
{
case Nequeo::Media::Foundation::DecoderType::H264:
// Create the H264 decoder.
hr = CreateDecoder(CLSID_CMSH264DecoderMFT);
break;
case Nequeo::Media::Foundation::DecoderType::AAC:
// Create the AAC decoder.
hr = CreateDecoder(CLSID_CMSAACDecMFT);
break;
case Nequeo::Media::Foundation::DecoderType::MP3:
// Create the MP3 decoder.
hr = CreateDecoder(CLSID_CMP3DecMediaObject);
break;
case Nequeo::Media::Foundation::DecoderType::MPEG4:
// Create the MPEG4 decoder.
hr = CreateDecoder(CLSID_CMpeg4sDecMFT);
break;
default:
hr = ((HRESULT)-1L);
break;
}
}
if (SUCCEEDED(hr))
{
// Query for the IMFTransform interface
hr = _transformUnk->QueryInterface(IID_PPV_ARGS(&_decoder));
// Decoder has been created.
_created = true;
}
}
// Return the result.
return hr;
}
HRESULT MediaDecoder::CreateDecoder(const CLSID decoder)
{
HRESULT hr = S_OK;
// Create the decoder.
hr = CoCreateInstance(decoder, NULL, CLSCTX_INPROC_SERVER, IID_IUnknown, (void**)&_transformUnk);
// Return the result.
return hr;
}

Related

How to use sink write to write raw pcm data to an aac file,ultimate goal is use sink write to accept rgb data and pcm data into an overall video file

I have been able to use sink write to read rgb32 data in a file to generate an mp4 video, please refer to the example on the official website https://learn.microsoft.com/zh-cn/windows/win32/medfound/tutorial--using-the-sink-writer-to-encode-video
Now,I wanted a similar idea to write a pcm generated data to an aac or mp3 file, but there were some problems with the code:
HRESULT InitializeSinkWriter(IMFSinkWriter** ppWriter, DWORD* pStreamIndex)
{
HRESULT hr = S_OK;
*ppWriter = NULL;
*pStreamIndex = NULL;
DWORD streamIndex;
IMFSinkWriter* pSinkWriter = NULL;
IMFMediaType* pMediaTypeOut = NULL;
IMFMediaType* pMediaTypeIn = NULL;
// Set the input media type.
if (SUCCEEDED(hr))
{
hr = MFCreateMediaType(&pMediaTypeIn);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeIn->SetGUID(MF_MT_SUBTYPE, _recorderConf.audioInPutFormat);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeIn->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, _recorderConf.audioChannelNum);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeIn->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, _recorderConf.audioSamplePerSec);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeIn->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, _recorderConf.audioBitPerSample);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeIn->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, _recorderConf.audioBytesPerSecond);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeIn->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, _recorderConf.audioSampleIndependent);
}
if (SUCCEEDED(hr))
{
hr = pSinkWriter->SetInputMediaType(streamIndex, pMediaTypeIn, NULL);
}
}
hr = pSinkWriter->SetInputMediaType(streamIndex, pMediaTypeIn, NULL);
The hr return value for this line is:
0xc00d36b4 : The data specified for the media type is invalid, inconsistent, or not supported by this object。
I want to know what went wrong and what is the right way to do it
I want to know what went wrong and what is the right way to do it

Directshow - Cannot set still pin using SetMode

I am trying to get an image from the still pin of my camera using directshow (the framework the camera maker has suggested using).
I've been following the DirectShow MS documentation on https://learn.microsoft.com/en-gb/windows/desktop/DirectShow/capturing-an-image-from-a-still-image-pin and although I can find a still pin using the ICaptureGraphBuilder2::FindPin method and have confirmed that the pin exists on the camera using graphedit app, I've hit a brick wall in using the IAMVideoControl::SetMode method to change pin.
The code that I'm using is below:
HRESULT hr = CoInitializeEx(NULL, COINIT::COINIT_MULTITHREADED);
if (FAILED(hr))
Console::WriteLine("Initialise Failed");
else
Console::WriteLine("Initialise Success");
//Initalise graph builder
ICaptureGraphBuilder2 *pBuild;
IGraphBuilder *pGraph;
hr = CoCreateInstance(CLSID_CaptureGraphBuilder2, NULL,
CLSCTX_INPROC_SERVER, IID_ICaptureGraphBuilder2, (void **)&pBuild);
if (FAILED(hr))
Console::WriteLine("Graph builder failed");
else
Console::WriteLine("Graph builder success");
//Create filter graph
hr = CoCreateInstance(CLSID_FilterGraph, 0, CLSCTX_INPROC_SERVER,
IID_IGraphBuilder, (void**)&pGraph);
if (SUCCEEDED(hr))
{
pBuild->SetFiltergraph(pGraph);
Console::WriteLine("Set filter graph success");
}
else
{
pBuild->Release();
Console::WriteLine("Set filter graph failed");
}
// Create the System Device Enumerator.
ICreateDevEnum *pDevEnum;
IEnumMoniker *pDevicesInfo;
HRESULT hr2 = CoCreateInstance(CLSID_SystemDeviceEnum, NULL,
CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pDevEnum));
if (SUCCEEDED(hr2))
{
Console::WriteLine("Device enum builder success");
// Create an enumerator for the video category.
hr = pDevEnum->CreateClassEnumerator(CLSID_VideoInputDeviceCategory, &pDevicesInfo, 0);
if (hr == S_FALSE)
Console::WriteLine("Retrieve video input devices failed");
else
Console::WriteLine("Retrieve video input devices success");
pDevEnum->Release();
}
//loop over video devices and find see3cam_130
IMoniker *pMoniker = NULL;
while (pDevicesInfo->Next(1, &pMoniker, NULL) == S_OK)
{
IPropertyBag *pPropBag;
HRESULT hr = pMoniker->BindToStorage(0, 0, IID_PPV_ARGS(&pPropBag));
if (FAILED(hr))
{
pMoniker->Release();
continue;
}
VARIANT var;
VariantInit(&var);
// Get description or friendly name.
hr = pPropBag->Read(L"FriendlyName", &var, 0);
if (SUCCEEDED(hr))
{
Console::WriteLine("Found video device");
//If correct device, break loop
if (0 == wcscmp(var.bstrVal, L"See3CAM_130"))
{
Console::WriteLine("See3CAM_130");
break;
}
else
{
Console::WriteLine("Comparison failed");
}
printf("%S\n", var.bstrVal);
VariantClear(&var);
}
else
{
Console::WriteLine("Vid Device failed");
}
}
//Create filter for device
IBaseFilter *pCap = NULL;
hr = pMoniker->BindToObject(0, 0, IID_IBaseFilter, (void**)&pCap);
if (SUCCEEDED(hr))
{
Console::WriteLine("Filter created");
hr = pGraph->AddFilter(pCap, L"Capture Filter");
if (SUCCEEDED(hr))
{
Console::WriteLine("Capture filter added to filter graph");
}
else
{
Console::WriteLine("Capture filter NOT added to filter graph");
}
}
else
Console::WriteLine("Filter failed");
//Render capture pin first before attempting to set still pin (setting still pin fails with ot without this step)
hr = pBuild->RenderStream(
&PIN_CATEGORY_CAPTURE, // Pin category.
&MEDIATYPE_Video, // Media type.
pCap, // Capture filter.
NULL, // Intermediate filter (optional).
NULL); // Mux or file sink filter.
if (SUCCEEDED(hr))
Console::WriteLine("Stream Render success");
else
Console::WriteLine("Stream Render failed");
//Create a control to run graph
IMediaControl *pControl;
hr = pGraph->QueryInterface(IID_IMediaControl, (void **)&pControl);
if (FAILED(hr))
Console::WriteLine("Graph initalised - Failed");
else
Console::WriteLine("Graph initalised - Success");
// Run the graph.
while (1)
{
hr = pControl->Run();
if (FAILED(hr))
Console::WriteLine("Graph run - Failed");
else
{
Console::WriteLine("Graph run - Success");
break;
}
}
//Find the still pin
IAMVideoControl *pAMVidControl = NULL;
hr = pCap->QueryInterface(IID_IAMVideoControl, (void**)&pAMVidControl);
if (SUCCEEDED(hr))
{
Console::WriteLine("Video Control Initialised - Success");
IPin *pPin = NULL;
hr = pBuild->FindPin(
pCap, // Filter.
PINDIR_OUTPUT, // Look for an output pin.
&PIN_CATEGORY_STILL, // Pin category.
NULL, // Media type (don't care).
FALSE, // Pin must be unconnected.
0, // Get the 0'th pin.
&pPin // Receives a pointer to thepin.
);
if (SUCCEEDED(hr))
{
Console::WriteLine("Find pin - Success");
hr = pAMVidControl->SetMode(pPin, VideoControlFlag_Trigger);
if (FAILED(hr))
Console::WriteLine("Pin Mode Set - Failed");
else
Console::WriteLine("Pin Mode Set - Success");
pPin->Release();
}
else
Console::WriteLine("Find pin - Failed");
}
else
Console::WriteLine("Video Control Initialised - Failed");
Everything succeeds until I try to use the SetMode method and I've no clue as to why...
I'm still a junior developer and quite inexperienced in c++ so I suspect I've missed a step somewhere. I've considered using the directshow.net wrapper but I don't want to waste the time I've spent getting this far.
Any hints on why IAMVideoControl::SetMode method fails in my code?
Note: The HResult error is ERROR_GEN_FAILURE (May be used to indicate that the device has stopped responding (hung) or a general failure has occurred on the device. The device may need to be manually reset.)
i have seen you query and you have to do lot of work to capture still frame.
After you enumerate the still pin, you should connect with sample grabber and null render into camera still pin.
After building and run the graph, you should trigger still mode and the frame will be received in the grabber.
then you can do with the frame whatever you wants.

How do I encode raw 48khz/32bits PCM to FLAC using Microsoft Media Foundation?

I created a SinkWriter that is able to encode video and audio using Microsoft's Media Foundation Platform.
Video is working fine so far but I have some troubles with audio only.
My PCM source has a sample rate of 48828hz, 32 bits per sample and is mono.
Everything is working well so far except for FLAC.
For instance the MP3 output is working more or less but has a wrong format. Regarding to MSDN (MP3 Audio Encoder) the MP3 encoder only supports 16 bits per sample as input. My PCM source as descriped above has 32 bits per sample.
However the export with MP3 is working cause the MF Platform seems like to have some kind of fallback and is using the MPEG Audio layer 1/2 (mpga) with 2 Channels, 32khz and a bitrate of 320kb/s.
Things start to get weird when I set the MF_MT_SUBTYPE to MFAudioFormat_FLAC. The export is working too but the quality of the audio is aweful. There's a lot of noise but I am able to recognize the audio. Regarding to VLC the FLAC file has a sample rate of 44,1khz, 8 bits per sample and is mono.
Does this mean the FLAC codec isn't able to work with the PCM I provide?
Has anyone had the same problem and was able to fix it?
Update
After doing some more research about this problem it seems like that my PCM Audio with a resolution of 32 Bit is too high. So currently I am trying to convert the 32 Bit PCM to 24 Bit for FLAC and 16 Bit for MP3 but with no luck so far. I keep you updated if I make some progress.
--------
Update 2
I've created a minimal example app that shows the problem I am facing.
It reads the 48khz32bit wave file and tries to encode it to flac.
When executing the hr = pSinkWriter->BeginWriting(); command I get the error 0xc00d36b4 whice means The data specified for the media type is invalid, inconsistent, or not supported by this object.
What am I doing wrong here?
#include "stdafx.h"
#include <windows.h>
#include <windowsx.h>
#include <comdef.h>
#include <mfapi.h>
#include <mfidl.h>
#include <mfreadwrite.h>
#include <Mferror.h>
#pragma comment(lib, "ole32")
#pragma comment(lib, "mfplat")
#pragma comment(lib, "mfreadwrite")
#pragma comment(lib, "mfuuid")
using namespace System;
int main(array<System::String ^> ^args)
{
HRESULT hr = CoInitializeEx(0, COINIT_MULTITHREADED);
hr = MFStartup(MF_VERSION);
IMFMediaType *pMediaType;
IMFMediaType *pMediaTypeOut;
IMFSourceReader *pSourceReader;
IMFAttributes *pAttributes;
IMFSinkWriter *pSinkWriter;
hr = MFCreateSourceReaderFromURL(
L"C:\\Temp\\48khz32bit.wav",
NULL,
&pSourceReader
);
hr = MFCreateAttributes(&pAttributes, 1);
hr = pAttributes->SetGUID(
MF_TRANSCODE_CONTAINERTYPE,
MFTranscodeContainerType_WAVE
);
hr = MFCreateSinkWriterFromURL(
L"C:\\Temp\\foo.flac",
NULL,
pAttributes,
&pSinkWriter
);
hr = pSourceReader->GetCurrentMediaType(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
&pMediaType);
hr = MFCreateMediaType(&pMediaTypeOut);
hr = pMediaTypeOut->SetGUID(
MF_MT_MAJOR_TYPE,
MFMediaType_Audio
);
hr = pMediaTypeOut->SetGUID(
MF_MT_SUBTYPE,
MFAudioFormat_FLAC
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
48000
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
1
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
32
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_AVG_BYTES_PER_SECOND,
(((32 + 7) / 8) * 1) * 48000
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_BLOCK_ALIGNMENT,
((32 + 7) / 8) * 1
);
DWORD nWriterStreamIndex = -1;
hr = pSinkWriter->AddStream(pMediaTypeOut, &nWriterStreamIndex);
hr = pSinkWriter->BeginWriting();
_com_error err(hr);
LPCTSTR errMsg = err.ErrorMessage();
for (;;)
{
DWORD nStreamIndex, nStreamFlags;
LONGLONG nTime;
IMFSample *pSample;
hr = pSourceReader->ReadSample(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
0,
&nStreamIndex,
&nStreamFlags,
&nTime,
&pSample);
if (pSample)
{
OutputDebugString(L"Write sample...\n");
hr = pSinkWriter->WriteSample(
nWriterStreamIndex,
pSample
);
}
if (nStreamFlags & MF_SOURCE_READERF_ENDOFSTREAM)
{
break;
}
}
hr = pSinkWriter->Finalize();
return 0;
}
--------
Update 3
I added the solution as answer.
--------
Initialize SinkWriter
HRESULT SinkWriter::InitializeSinkWriter(IMFSinkWriter **ppWriter, DWORD *pStreamIndex, DWORD *pAudioStreamIndex, LPCWSTR filename)
{
*ppWriter = NULL;
*pStreamIndex = NULL;
*pAudioStreamIndex = NULL;
IMFSinkWriter *pSinkWriter = NULL;
// Attributes
IMFAttributes *pAttributes;
HRESULT hr = S_OK;
DX::ThrowIfFailed(
MFCreateAttributes(
&pAttributes,
3
)
);
#if defined(ENABLE_HW_ACCELERATION)
CComPtr<ID3D11Device> device;
D3D_FEATURE_LEVEL levels[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0 };
#if defined(ENABLE_HW_DRIVER)
DX::ThrowIfFailed(
D3D11CreateDevice(
nullptr,
D3D_DRIVER_TYPE_HARDWARE,
nullptr,
(0 * D3D11_CREATE_DEVICE_SINGLETHREADED) | D3D11_CREATE_DEVICE_VIDEO_SUPPORT,
levels,
ARRAYSIZE(levels),
D3D11_SDK_VERSION,
&device,
nullptr,
nullptr
)
);
const CComQIPtr<ID3D10Multithread> pMultithread = device;
pMultithread->SetMultithreadProtected(TRUE);
#else
DX::ThrowIfFailed(
D3D11CreateDevice(
nullptr,
D3D_DRIVER_TYPE_NULL,
nullptr,
D3D11_CREATE_DEVICE_SINGLETHREADED,
levels,
ARRAYSIZE(levels),
D3D11_SDK_VERSION,
&device,
nullptr,
nullptr)
);
#endif
UINT token;
CComPtr<IMFDXGIDeviceManager> pManager;
DX::ThrowIfFailed(
MFCreateDXGIDeviceManager(
&token,
&pManager
)
);
DX::ThrowIfFailed(
pManager->ResetDevice(
device,
token
)
);
DX::ThrowIfFailed(
pAttributes->SetUnknown(
MF_SOURCE_READER_D3D_MANAGER,
pManager
)
);
DX::ThrowIfFailed(
pAttributes->SetUINT32(
MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS,
TRUE
)
);
#if (WINVER >= 0x0602)
DX::ThrowIfFailed(
pAttributes->SetUINT32(
MF_SOURCE_READER_ENABLE_ADVANCED_VIDEO_PROCESSING,
TRUE
)
);
#endif
#else
DX::ThrowIfFailed(
pAttributes->SetUINT32(
MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS,
TRUE
)
);
DX::ThrowIfFailed(
pAttributes->SetUINT32(
MF_SOURCE_READER_ENABLE_VIDEO_PROCESSING,
TRUE
)
);
#endif
DX::ThrowIfFailed(
MFCreateSinkWriterFromURL(
filename,
NULL,
pAttributes,
&pSinkWriter
)
);
if (m_vFormat != VideoFormat::SWFV_NONE)
{
DX::ThrowIfFailed(
InitializeVideoCodec(
pSinkWriter,
pStreamIndex
)
);
}
if (m_audFormat != AudioFormat::SWAF_NONE)
{
DX::ThrowIfFailed(
InitializeAudioCodec(
pSinkWriter,
pAudioStreamIndex
)
);
}
// Tell the sink writer to start accepting data.
DX::ThrowIfFailed(
pSinkWriter->BeginWriting()
);
// Return the pointer to the caller.
*ppWriter = pSinkWriter;
(*ppWriter)->AddRef();
SAFE_RELEASE(pSinkWriter);
return hr;
}
Initialize Audio Codec
HRESULT SinkWriter::InitializeAudioCodec(IMFSinkWriter *pSinkWriter, DWORD *pStreamIndex)
{
// Audio media types
IMFMediaType *pAudioTypeOut = NULL;
IMFMediaType *pAudioTypeIn = NULL;
DWORD audioStreamIndex;
HRESULT hr = S_OK;
// Set the output audio type.
DX::ThrowIfFailed(
MFCreateMediaType(
&pAudioTypeOut
)
);
DX::ThrowIfFailed(
pAudioTypeOut->SetGUID(
MF_MT_MAJOR_TYPE,
MFMediaType_Audio
)
);
DX::ThrowIfFailed(
pAudioTypeOut->SetGUID(
MF_MT_SUBTYPE,
AUDIO_SUBTYPE
)
);
DX::ThrowIfFailed(
pSinkWriter->AddStream(
pAudioTypeOut,
&audioStreamIndex
)
);
// Set the input audio type
DX::ThrowIfFailed(
MFCreateMediaType(
&pAudioTypeIn
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetGUID(
MF_MT_MAJOR_TYPE,
AUDIO_MAJOR_TYPE
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetGUID(
MF_MT_SUBTYPE,
MFAudioFormat_PCM
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
AUDIO_NUM_CHANNELS
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
AUDIO_BITS_PER_SAMPLE
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_BLOCK_ALIGNMENT,
AUDIO_BLOCK_ALIGNMENT
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
AUDIO_SAMPLES_PER_SECOND
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_AVG_BYTES_PER_SECOND,
AUDIO_AVG_BYTES_PER_SECOND
)
);
DX::ThrowIfFailed(
pSinkWriter->SetInputMediaType(
audioStreamIndex,
pAudioTypeIn,
NULL
)
);
*pStreamIndex = audioStreamIndex;
SAFE_RELEASE(pAudioTypeOut);
SAFE_RELEASE(pAudioTypeIn);
return hr;
}
Push audio data
HRESULT SinkWriter::PushAudio(UINT32* data)
{
HRESULT hr = S_FALSE;
if (m_isInitializing)
{
return hr;
}
IMFSample *pSample = NULL;
IMFMediaBuffer *pBuffer = NULL;
BYTE *pMem = NULL;
size_t cbBuffer = m_bufferLength * sizeof(short);
// Create a new memory buffer.
hr = MFCreateMemoryBuffer(cbBuffer, &pBuffer);
// Lock the buffer and copy the audio frame to the buffer.
if (SUCCEEDED(hr))
{
hr = pBuffer->Lock(&pMem, NULL, NULL);
}
if (SUCCEEDED(hr))
{
CopyMemory(pMem, data, cbBuffer);
}
if (pBuffer)
{
pBuffer->Unlock();
}
if (m_vFormat == VideoFormat::SWFV_NONE && m_audFormat == AudioFormat::SWAF_WAV)
{
DWORD cbWritten = 0;
if (SUCCEEDED(hr))
{
hr = m_pByteStream->Write(pMem, cbBuffer, &cbWritten);
}
if (SUCCEEDED(hr))
{
m_cbWrittenByteStream += cbWritten;
}
}
else
{
// Set the data length of the buffer.
if (SUCCEEDED(hr))
{
hr = pBuffer->SetCurrentLength(cbBuffer);
}
// Create media sample and add the buffer to the sample.
if (SUCCEEDED(hr))
{
hr = MFCreateSample(&pSample);
}
if (SUCCEEDED(hr))
{
hr = pSample->AddBuffer(pBuffer);
}
// Set the timestamp and the duration.
if (SUCCEEDED(hr))
{
hr = pSample->SetSampleTime(m_cbRtStartVideo);
}
if (SUCCEEDED(hr))
{
hr = pSample->SetSampleDuration(m_cbRtDurationVideo);
}
// Send the sample to the Sink Writer
if (SUCCEEDED(hr))
{
hr = m_pSinkWriter->WriteSample(m_audioStreamIndex, pSample);
}
/*if (SUCCEEDED(hr))
{
m_cbRtStartAudio += m_cbRtDurationAudio;
}*/
SAFE_RELEASE(pSample);
SAFE_RELEASE(pBuffer);
}
return hr;
}
So, Microsoft introduced a FLAC Media Foundation Transform (MFT) Encoder CLSID_CMSFLACEncMFT in Windows 10, but the codec remains undocumented at the moment.
Supported Media Formats in Media Foundation is similarly out of date and does not reflect presence of recent additions.
I am not aware of any comment on this, and my opinion is that the codec is added for internal use but the implementation is merely a standard Media Foundation components without licensing restrictions, so the codecs are unrestricted too by, for example, field of use limitations.
This stock codec seems to be limited to 8, 16 and 24 bit PCM input options (that is, not 32 bits/sample - you need to resample respectively). The codec is capable to accept up to 8 channels and flexible samples per second rate (48828 Hz is okay).
Even though the codec (transform) seems to be working, if you want to produce a file, you also need a suitable container format (multiplexer) which is compatible with MFAudioFormat_FLAC (the identifier has 7 results on Google Search at the moment of the post, which basically means noone is even aware of the codec). Outdated documentation does not reflect actual support for FLAC in stock media sinks.
I borrowed a custom media sink that writes a raw MFT output payload into a file, and such FLAC output is playable as the FLAC frames contain necessary information to parse the bitstream for playback.
For the reference, the file itself is: 20180224-175524.flac.
An obvious candidate among stock media sinks WAVE Media Sink is unable to accept FLAC input. Nevertheless it potentially could, the implementation is presumably limited to simpler audio formats.
AVI media sink might possibly take FLAC audio, but it seems to be impossible to create an audio only AVI.
Among other media sink there is however a media sink which can process FLAC: MPEG-4 File Sink. Again, despite the outdated documentation, the media sink takes FLAC input, so you should be able to create .MP4 files with FLAC audio track.
Sample file: 20180224-184012.mp4. "FLAC (framed)"
To sum it up:
FLAC encoder MFT is present in Windows 10 and is available for use; lacks proper documentation though
One needs to take care of conversion of input to compatible format (no direct support for 32-bit PCM)
It is possible to manage MFT directly and consume MFT output, then obtain FLAC bitstream
Alternatively, it is possible to use stock MP4 media sink to produce output with FLAC audio track
Alternatively, it is possible to develop a custom media sink and consume FLAC bitstream from upstream encoder connection
Potentially, the codec is compatible with Transcode API, however the restrictions above apply. The container type needs to be MFTranscodeContainerType_MPEG4 in particular.
The codec is apparently compatible with Media Session API, presumably it is good for use with Sink Writer API either.
In your code as you attempt to use Sink Writer API you should similarly either have MP4 output with input possibly converted to compatible format in your code (compatible PCM or compatible FLAC with encoder MFT managed on your side). Knowing that MP4 media sink overall is capable to create FLAC audio track you should be able to debug fine details in your code and fit the components to work together.
Finally I was able to solve the problem. It wasn't that hard to be honest. But that is always the case if you know how to achieve something ;).
I created a copy and paste example below to give an idea how to implement FLAC encoding with Microsoft Media Foundation.
The missing piece of the puzzle was the MFTranscodeGetAudioOutputAvailableTypes. This function lists all the available output formats from an audio encoder.
If you are not sure what MFTs are supported by the operation system you can call MFTEnumEx function first. This gives you a list of all the available MFTs. In my case with windows 10 there's the FLAC MFT that is defined like this.
Name: Microsoft FLAC Audio Encoder MFT
Input Types: 1 items:
Audio-PCM
Class identifier: 128509e9-c44e-45dc-95e9-c255b8f466a6
Output Types: 1 items:
Audio-0000f1ac-0000-0010-8000-00aa00389b71
Transform Flags: 1
Transform Category: Audio Encoder
So the next thing I did was to create the source reader and get the current media type. The important values for me are sample rate, bit rate and channels.
Then I created a GetOutputMediaTypes function that needs the requested audio format, sample rate, bit rate, channels and a reference to the IMFMediaType.
The MFTranscodeGetAudioOutputAvailableTypes function returns all available types for the MFAudioFormat_flac GUID.
After getting the count of the available media types with hr = pAvailableTypes->GetElementCount(&dwMTCount); I am able to iterate through them and check if a type is supporting my request. If that's the case I return the media type.
The last part is the easiest one.
First add the output media type to the sinkwriter to get the stream index.
DWORD dwWriterStreamIndex = -1;
// Add the stream
hr = pSinkWriter->AddStream(
pOuputMediaType,
&dwWriterStreamIndex
);
Then set the input type and call pSinkWriter->BeginWriting(); so the sinkwriter starts to accepting data.
// Set input media type
hr = pSinkWriter->SetInputMediaType(
dwWriterStreamIndex,
pInputType,
NULL
);
// Tell the sink writer to accept data
hr = pSinkWriter->BeginWriting();
If the output and input media type is correctly set, BeginWriting should return 0 as HRESULT.
We should get no error because we are using the media type the function MFTranscodeGetAudioOutputAvailableTypes is providing.
The last step is to read all samples from the source reader and write it through the sinkwriter into the flac container.
Done :)
I hope I could help with this answer.
Also thanks to Roman R.
Update
This sample is only working with Audio-PCM formats from 4 bits to 24 bits. If you want to encode an 32 Bit Audio-PCM you have to resample it first and then encode it.
--------
Here's the minimal example app.
#include <windows.h>
#include <windowsx.h>
#include <atlstr.h>
#include <comdef.h>
#include <exception>
#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mmdeviceapi.h>
#include <Audioclient.h>
#include <mferror.h>
#include <Wmcodecdsp.h>
#pragma comment(lib, "mf.lib")
#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mfplay.lib")
#pragma comment(lib, "mfreadwrite.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "wmcodecdspuuid")
inline void ThrowIfFailed(HRESULT hr)
{
if (FAILED(hr))
{
// Get the error message
_com_error err(hr);
LPCTSTR errMsg = err.ErrorMessage();
OutputDebugString(L"################################## ERROR ##################################\n");
OutputDebugString(errMsg);
OutputDebugString(L"\n################################## ----- ##################################\n");
CStringA sb(errMsg);
// Set a breakpoint on this line to catch DirectX API errors
throw std::exception(sb);
}
}
template <class T> void SafeRelease(T **ppT)
{
if (*ppT)
{
(*ppT)->Release();
*ppT = nullptr;
}
}
using namespace System;
HRESULT GetOutputMediaTypes(
GUID cAudioFormat,
UINT32 cSampleRate,
UINT32 cBitPerSample,
UINT32 cChannels,
IMFMediaType **ppType
)
{
// Enumerate all codecs except for codecs with field-of-use restrictions.
// Sort the results.
DWORD dwFlags =
(MFT_ENUM_FLAG_ALL & (~MFT_ENUM_FLAG_FIELDOFUSE)) |
MFT_ENUM_FLAG_SORTANDFILTER;
IMFCollection *pAvailableTypes = NULL; // List of audio media types.
IMFMediaType *pAudioType = NULL; // Corresponding codec.
HRESULT hr = MFTranscodeGetAudioOutputAvailableTypes(
cAudioFormat,
dwFlags,
NULL,
&pAvailableTypes
);
// Get the element count.
DWORD dwMTCount;
hr = pAvailableTypes->GetElementCount(&dwMTCount);
// Iterate through the results and check for the corresponding codec.
for (DWORD i = 0; i < dwMTCount; i++)
{
hr = pAvailableTypes->GetElement(i, (IUnknown**)&pAudioType);
GUID majorType;
hr = pAudioType->GetMajorType(&majorType);
GUID subType;
hr = pAudioType->GetGUID(MF_MT_SUBTYPE, &subType);
if (majorType != MFMediaType_Audio || subType != MFAudioFormat_FLAC)
{
continue;
}
UINT32 sampleRate = NULL;
hr = pAudioType->GetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
&sampleRate
);
UINT32 bitRate = NULL;
hr = pAudioType->GetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
&bitRate
);
UINT32 channels = NULL;
hr = pAudioType->GetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
&channels
);
if (sampleRate == cSampleRate
&& bitRate == cBitPerSample
&& channels == cChannels)
{
// Found the codec.
// Jump out!
break;
}
}
// Add the media type to the caller
*ppType = pAudioType;
(*ppType)->AddRef();
SafeRelease(&pAudioType);
return hr;
}
int main(array<System::String ^> ^args)
{
HRESULT hr = S_OK;
// Initialize com interface
ThrowIfFailed(
CoInitializeEx(0, COINIT_MULTITHREADED)
);
// Start media foundation
ThrowIfFailed(
MFStartup(MF_VERSION)
);
IMFMediaType *pInputType = NULL;
IMFSourceReader *pSourceReader = NULL;
IMFMediaType *pOuputMediaType = NULL;
IMFSinkWriter *pSinkWriter = NULL;
// Create source reader
hr = MFCreateSourceReaderFromURL(
L"C:\\Temp\\48khz24bit.wav",
NULL,
&pSourceReader
);
// Create sink writer
hr = MFCreateSinkWriterFromURL(
L"C:\\Temp\\foo.flac",
NULL,
NULL,
&pSinkWriter
);
// Get media type from source reader
hr = pSourceReader->GetCurrentMediaType(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
&pInputType
);
// Get sample rate, bit rate and channels
UINT32 sampleRate = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
&sampleRate
);
UINT32 bitRate = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
&bitRate
);
UINT32 channels = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
&channels
);
// Try to find a media type that is fitting.
hr = GetOutputMediaTypes(
MFAudioFormat_FLAC,
sampleRate,
bitRate,
channels,
&pOuputMediaType);
DWORD dwWriterStreamIndex = -1;
// Add the stream
hr = pSinkWriter->AddStream(
pOuputMediaType,
&dwWriterStreamIndex
);
// Set input media type
hr = pSinkWriter->SetInputMediaType(
dwWriterStreamIndex,
pInputType,
NULL
);
// Tell the sink writer to accept data
hr = pSinkWriter->BeginWriting();
// Forever alone loop
for (;;)
{
DWORD nStreamIndex, nStreamFlags;
LONGLONG nTime;
IMFSample *pSample;
// Read through the samples until...
hr = pSourceReader->ReadSample(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
0,
&nStreamIndex,
&nStreamFlags,
&nTime,
&pSample);
if (pSample)
{
OutputDebugString(L"Write sample...\n");
hr = pSinkWriter->WriteSample(
dwWriterStreamIndex,
pSample
);
}
// ... we are at the end of the stream...
if (nStreamFlags & MF_SOURCE_READERF_ENDOFSTREAM)
{
// ... and jump out.
break;
}
}
// Call finalize to finish writing.
hr = pSinkWriter->Finalize();
// Done :D
return 0;
}

Windows Media Foundation How to use IMFMediaSink::AddStreamSink

I am implementing sample application using Windows Media Foundation.
I have created one example application as described in below link:
https://msdn.microsoft.com/en-us/library/windows/desktop/ms703190(v=vs.85).aspx
In the above example I have added two video streams using MFCreateAggregateSource.
In the EVR renderer I am able to hear audio of both the videos but I am not able to see only one reference stream video or which is first loaded.
As per the below link,
https://msdn.microsoft.com/en-us/library/windows/desktop/aa965265(v=vs.85).aspx
The EVR media sink initially has one stream sink, which corresponds to the reference stream. To add new stream sinks, call IMFMediaSink::AddStreamSink.
In my application I am using MFCreateVideoRendererActivate.
How can I use IMFMediaSink::AddStreamSink to add streams to my EVR.
So that I can see two video stream playing in one renderer.
** Update **
I have modified below example code and added code to add video
HRESULT CreateMediaSinkActivate(
IMFStreamDescriptor *pSourceSD, // Pointer to the stream descriptor.
DWORD iStream,
HWND hVideoWindow, // Handle to the video clipping window.
IMFActivate **ppActivate
)
{
IMFMediaTypeHandler *pHandler = NULL;
IMFActivate *pActivate = NULL;
// Get the media type handler for the stream.
HRESULT hr = pSourceSD->GetMediaTypeHandler(&pHandler);
if (FAILED(hr))
{
goto done;
}
// Get the major media type.
GUID guidMajorType;
hr = pHandler->GetMajorType(&guidMajorType);
if (FAILED(hr))
{
goto done;
}
// Create an IMFActivate object for the renderer, based on the media type.
if (MFMediaType_Audio == guidMajorType)
{
// Create the audio renderer.
hr = MFCreateAudioRendererActivate(&pActivate);
}
else if (MFMediaType_Video == guidMajorType) // Added this else if case
{
// Create the video renderer.
hr = MFCreateVideoRendererActivate(hVideoWindow, &pActivate);
IMFMediaSink* pVideoSink = NULL;
HRESULT hrMS = pActivate->ActivateObject(IID_IMFMediaSink, (void**)&pVideoSink);
if (SUCCEEDED(hrMS))
{
IMFStreamSink* pStreamSink = NULL;
hrMS = pVideoSink->AddStreamSink(iStream, NULL, &pStreamSink);
if (SUCCEEDED(hrMS))
{
DWORD dwID=10;
hrMS = pStreamSink->GetIdentifier(&dwID);
if (SUCCEEDED(hrMS))
{
printf("\n%d", dwID);
SafeRelease(&pStreamSink);
}
}
}
}
else
{
// Unknown stream type.
hr = E_FAIL;
// Optionally, you could deselect this stream instead of failing.
}
if (FAILED(hr))
{
goto done;
}
// Return IMFActivate pointer to caller.
*ppActivate = pActivate;
(*ppActivate)->AddRef();
done:
SafeRelease(&pHandler);
SafeRelease(&pActivate);
return hr;
}
But problem is that I am not able to see two video stream in the video window.

Media Foundation - How to select input from crossbar?

Good Evening
I'm trying to put together a little video capture DLL using Media Foundation which is largely adapted from the SimpleCapture and CaptureEngine SDK samples. The whole thing works great when I the capture source is my inbuilt webcam.
The problem is when I try using an external USB frame grabber with multiple inputs (s-video, composite & stereo audio) there is no preview stream. I have got a function that uses the DShow interface which seems to successfully change the selected input on the cross bar (verified by looking at the options in AMCAP after running function) However I have not been able to get the device to preview at all.
Is there a Media Foundation way of selecting inputs from the crossbar? Or if not, can you suggest what may be wrong with the crossbar input selection code below:
HRESULT CaptureManager::doSelectInputUsingCrossbar(std::wstring deviceName, long input)
{
IGraphBuilder *pGraph = NULL;
ICaptureGraphBuilder2 *pBuilder = NULL;
IBaseFilter* pSrc = NULL;
ICreateDevEnum *pDevEnum = NULL;
IEnumMoniker *pClassEnum = NULL;
IMoniker *pMoniker = NULL;
bool bCameraFound = false;
IPropertyBag *pPropBag = NULL;
bool crossbarSet = false;
HRESULT hr = S_OK;
if (!(input == PhysConn_Video_Composite || input == PhysConn_Video_SVideo))
{
return S_FALSE;
}
// Create the Filter Graph Manager.
hr = CoCreateInstance(CLSID_FilterGraph, NULL,
CLSCTX_INPROC_SERVER, IID_IGraphBuilder, (void **)&pGraph);
if (SUCCEEDED(hr))
{
// Create the Capture Graph Builder.
hr = CoCreateInstance(CLSID_CaptureGraphBuilder2, NULL,
CLSCTX_INPROC_SERVER, IID_ICaptureGraphBuilder2,
(void **)&pBuilder);
if (SUCCEEDED(hr))
{
pBuilder->SetFiltergraph(pGraph);
}
else return hr;
}
else return hr;
//////////////////////////
// chooses the default camera filter
hr = CoCreateInstance(CLSID_SystemDeviceEnum, NULL, CLSCTX_INPROC,
IID_ICreateDevEnum, (void **)&pDevEnum);
if (FAILED(hr))
{
return E_FAIL;
}
// Create an enumerator for video capture devices.
if (FAILED(pDevEnum->CreateClassEnumerator(CLSID_VideoInputDeviceCategory, &pClassEnum, 0)))
{
return E_FAIL;
}
if (pClassEnum == NULL)
{
CheckHR(hr, "Class enumerator is null - no input devices detected?");
pDevEnum->Release();
return E_FAIL;
}
while (!bCameraFound && (pClassEnum->Next(1, &pMoniker, NULL) == S_OK))
{
HRESULT hr = pMoniker->BindToStorage(0, 0, IID_IPropertyBag, (void**)(&pPropBag));
if (FAILED(hr))
{
pMoniker->Release();
continue; // Skip this one, maybe the next one will work.
}
// Find the description or friendly name.
VARIANT varName;
VariantInit(&varName);
hr = pPropBag->Read(L"Description", &varName, 0);
if (FAILED(hr))
{
hr = pPropBag->Read(L"FriendlyName", &varName, 0);
}
if (SUCCEEDED(hr))
{
if (0 == wcscmp(varName.bstrVal, deviceName.c_str()))
{
pMoniker->BindToObject(0, 0, IID_IBaseFilter, (void**)&pSrc);
bCameraFound = true;
break;
}//if
VariantClear(&varName);
}
pPropBag->Release();
pMoniker->Release();
}//while
pClassEnum->Release();
pDevEnum->Release();
if (!bCameraFound)//else
{
CheckHR(hr, "Error: Get device Moniker, No device found");
goto done;
}
hr = pGraph->AddFilter(pSrc, L"video capture adapter");
if (FAILED(hr))
{
CheckHR(hr, "Can't add capture device to graph");
goto done;
}
///////////////
IAMCrossbar *pxBar = NULL;
hr = pBuilder->FindInterface(&PIN_CATEGORY_CAPTURE, &MEDIATYPE_Interleaved, pSrc, IID_IAMCrossbar, (void **)&pxBar);
if (FAILED(hr))
{
hr = pBuilder->FindInterface(&PIN_CATEGORY_CAPTURE, &MEDIATYPE_Video, pSrc, IID_IAMCrossbar, (void **)&pxBar);
if (FAILED(hr))
{
CheckHR(hr, "Failed to get crossbar filter");
goto done;
}
}//if
else
{
CheckHR(hr, "Failed to get crossbar (capture)");
goto done;
}
LONG lInpin, lOutpin;
hr = pxBar->get_PinCounts(&lOutpin, &lInpin);
BOOL IPin = TRUE; LONG pIndex = 0, pRIndex = 0, pType = 0;
while (pIndex < lInpin)
{
hr = pxBar->get_CrossbarPinInfo(IPin, pIndex, &pRIndex, &pType);
if (pType == input)
{
break;
}
pIndex++;
}
BOOL OPin = FALSE; LONG pOIndex = 0, pORIndex = 0, pOType = 0;
while (pOIndex < lOutpin)
{
hr = pxBar->get_CrossbarPinInfo(OPin, pOIndex, &pORIndex, &pOType);
if (pOType == PhysConn_Video_VideoDecoder)
{
break;
}
pIndex++;
}
hr = pxBar->Route(pOIndex, pIndex);
done:
SafeRelease(&pPropBag);
SafeRelease(&pMoniker);
SafeRelease(&pxBar);
SafeRelease(&pGraph);
SafeRelease(&pBuilder);
SafeRelease(&pSrc);
return hr;
}
USB frame grabber is not supported by Microsoft Media Foundation. Microsoft Media Foundation has supporting of live sources via USB Video Class devices driver. The modern web cameras support such driver, but USB frame grabber does not.
USB frame grabber support of DirectShow filters which based on PUSH model processing pipeline - sources send media samples into the media graph, while Microsoft Media Foundation uses PULL model processing pipeline - sinks send request for the new media samples and sources "listen" for such request.
With best.