How to increase mp3 decoding quality (Media Foundation)? - c++

I have file .wav that I need to convert in .mp3 in order to do it I am using MediaFoundation. This is approach that I use:
#include "TV_AudioEncoderMF.h"
#include <windows.h>
#include <windowsx.h>
#include <atlstr.h>
#include <comdef.h>
#include <exception>
#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mmdeviceapi.h>
#include <Audioclient.h>
#include <mferror.h>
#include <Wmcodecdsp.h>
#pragma comment(lib, "mf.lib")
#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mfplay.lib")
#pragma comment(lib, "mfreadwrite.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "wmcodecdspuuid")
TV_AudioEncoderMF::TV_AudioEncoderMF()
{
}
TV_AudioEncoderMF::~TV_AudioEncoderMF()
{
}
template <class T> void SafeRelease(T **ppT)
{
if (*ppT)
{
(*ppT)->Release();
*ppT = nullptr;
}
}
HRESULT TV_AudioEncoderMF::GetOutputMediaTypes(
GUID cAudioFormat,
UINT32 cSampleRate,
UINT32 cBitPerSample,
UINT32 cChannels,
IMFMediaType **ppType
)
{
// Enumerate all codecs except for codecs with field-of-use restrictions.
// Sort the results.
DWORD dwFlags =
(MFT_ENUM_FLAG_ALL & (~MFT_ENUM_FLAG_FIELDOFUSE)) |
MFT_ENUM_FLAG_SORTANDFILTER;
IMFCollection *pAvailableTypes = NULL; // List of audio media types.
IMFMediaType *pAudioType = NULL; // Corresponding codec.
HRESULT hr = MFTranscodeGetAudioOutputAvailableTypes(
cAudioFormat,
dwFlags,
NULL,
&pAvailableTypes
);
// Get the element count.
DWORD dwMTCount;
hr = pAvailableTypes->GetElementCount(&dwMTCount);
// Iterate through the results and check for the corresponding codec.
for (DWORD i = 0; i < dwMTCount; i++)
{
hr = pAvailableTypes->GetElement(i, (IUnknown**)&pAudioType);
GUID majorType;
hr = pAudioType->GetMajorType(&majorType);
GUID subType;
hr = pAudioType->GetGUID(MF_MT_SUBTYPE, &subType);
if (majorType != MFMediaType_Audio || subType != MFAudioFormat_FLAC)
{
continue;
}
UINT32 sampleRate = NULL;
hr = pAudioType->GetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
&sampleRate
);
UINT32 bitRate = NULL;
hr = pAudioType->GetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
&bitRate
);
UINT32 channels = NULL;
hr = pAudioType->GetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
&channels
);
if (sampleRate == cSampleRate
&& bitRate == cBitPerSample
&& channels == cChannels)
{
// Found the codec.
// Jump out!
break;
}
}
// Add the media type to the caller
*ppType = pAudioType;
(*ppType)->AddRef();
SafeRelease(&pAudioType);
return hr;
}
void TV_AudioEncoderMF::decode()
{
HRESULT hr = S_OK;
// Initialize com interface
CoInitializeEx(0, COINIT_MULTITHREADED);
// Start media foundation
MFStartup(MF_VERSION);
IMFMediaType *pInputType = NULL;
IMFSourceReader *pSourceReader = NULL;
IMFMediaType *pOuputMediaType = NULL;
IMFSinkWriter *pSinkWriter = NULL;
// Create source reader
hr = MFCreateSourceReaderFromURL(
L"D:\\buffer\\del\\out\\test.wav",
NULL,
&pSourceReader
);
// Create sink writer
hr = MFCreateSinkWriterFromURL(
L"D:\\buffer\\del\\out\\test_out.mp3",
NULL,
NULL,
&pSinkWriter
);
// Get media type from source reader
hr = pSourceReader->GetCurrentMediaType(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
&pInputType
);
// Get sample rate, bit rate and channels
UINT32 sampleRate = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
&sampleRate
);
UINT32 bitRate = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
&bitRate
);
UINT32 channels = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
&channels
);
// Try to find a media type that is fitting.
hr = GetOutputMediaTypes(
MFAudioFormat_MP3,
sampleRate,
bitRate,
channels,
&pOuputMediaType);
DWORD dwWriterStreamIndex = -1;
// Add the stream
hr = pSinkWriter->AddStream(
pOuputMediaType,
&dwWriterStreamIndex
);
// Set input media type
hr = pSinkWriter->SetInputMediaType(
dwWriterStreamIndex,
pInputType,
NULL
);
// Tell the sink writer to accept data
hr = pSinkWriter->BeginWriting();
// Forever alone loop
while (true)
{
DWORD nStreamIndex, nStreamFlags;
LONGLONG nTime;
IMFSample *pSample;
// Read through the samples until...
hr = pSourceReader->ReadSample(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
0,
&nStreamIndex,
&nStreamFlags,
&nTime,
&pSample);
if (pSample)
{
hr = pSinkWriter->WriteSample(
dwWriterStreamIndex,
pSample
);
}
// ... we are at the end of the stream...
if (nStreamFlags & MF_SOURCE_READERF_ENDOFSTREAM)
{
// ... and jump out.
break;
}
}
// Call finalize to finish writing.
hr = pSinkWriter->Finalize();
// Done :D
}
Problem is - it is a big difference in audio quality, when I playback (by win standard players) .wav file it sounds good, but when I playback compressed to .mp3 file sound it sounds like person recorded his voice on recorder with a very bad quality.
What is a possible problem here? I don't see any possible way to set out quality, like setOutQualityInPersent(100)
EDIT
void co_AudioEncoderMF::decode()
{
HRESULT hr = S_OK;
// Initialize com interface
CoInitializeEx(0, COINIT_MULTITHREADED);
// Start media foundation
MFStartup(MF_VERSION);
IMFMediaType *pInputType = NULL;
IMFSourceReader *pSourceReader = NULL;
IMFMediaType *pOuputMediaType = NULL;
IMFSinkWriter *pSinkWriter = NULL;
// Create source reader
hr = MFCreateSourceReaderFromURL(
L"D:\\buffer\\del\\out\\test.wav",
NULL,
&pSourceReader
);
// Create sink writer
hr = MFCreateSinkWriterFromURL(
L"D:\\buffer\\del\\out\\test_out.mp3",
NULL,
NULL,
&pSinkWriter
);
// Get media type from source reader
hr = pSourceReader->GetCurrentMediaType(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
&pInputType
);
// Get sample rate, bit rate and channels
UINT32 sampleRate = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
&sampleRate
);
UINT32 bitRate = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
&bitRate
);
UINT32 channels = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
&channels
);
// Try to find a media type that is fitting.
hr = GetOutputMediaTypes(
MFAudioFormat_MP3,
sampleRate,
bitRate,
channels,
&pOuputMediaType);
bitRate = bitRate + 2; <------- This line
pOuputMediaType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitRate); <------- This line
DWORD dwWriterStreamIndex = -1;
// Add the stream
hr = pSinkWriter->AddStream(
pOuputMediaType,
&dwWriterStreamIndex
);
// Set input media type
hr = pSinkWriter->SetInputMediaType(
dwWriterStreamIndex,
pInputType,
NULL
);
// Tell the sink writer to accept data
hr = pSinkWriter->BeginWriting();
// Forever alone loop
while (true)
{
DWORD nStreamIndex, nStreamFlags;
LONGLONG nTime;
IMFSample *pSample;
// Read through the samples until...
hr = pSourceReader->ReadSample(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
0,
&nStreamIndex,
&nStreamFlags,
&nTime,
&pSample);
if (pSample)
{
hr = pSinkWriter->WriteSample(
dwWriterStreamIndex,
pSample
);
}
// ... we are at the end of the stream...
if (nStreamFlags & MF_SOURCE_READERF_ENDOFSTREAM)
{
// ... and jump out.
break;
}
}
// Call finalize to finish writing.
hr = pSinkWriter->Finalize();
// Done :D
}
EDIT2
There are 2 files - https://drive.google.com/drive/folders/1yzB2u0TvMSnwsTpYnDDPFBDkTB75ZFwM?usp=sharing
Result and orig

This part is just broken:
// Try to find a media type that is fitting.
hr = GetOutputMediaTypes(
MFAudioFormat_MP3,
sampleRate,
bitRate,
channels,
&pOuputMediaType);
bitRate = bitRate + 2; <------- This line
pOuputMediaType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitRate); <------- This line
To get you back on track, replace the fragment above with:
MFCreateMediaType(&pOuputMediaType);
pOuputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
pOuputMediaType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_MP3);
pOuputMediaType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, 128000 / 8);
pOuputMediaType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, channels);
pOuputMediaType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, sampleRate);
and you'll start getting proper MP3.
Note that the attributes above are taken directly from documentation: MP3 Audio Encoder. In your application you will need to make sure that target values remain valid and match the documented options. You might need to resample audio, for example.

Related

How do I encode raw 48khz/32bits PCM to FLAC using Microsoft Media Foundation?

I created a SinkWriter that is able to encode video and audio using Microsoft's Media Foundation Platform.
Video is working fine so far but I have some troubles with audio only.
My PCM source has a sample rate of 48828hz, 32 bits per sample and is mono.
Everything is working well so far except for FLAC.
For instance the MP3 output is working more or less but has a wrong format. Regarding to MSDN (MP3 Audio Encoder) the MP3 encoder only supports 16 bits per sample as input. My PCM source as descriped above has 32 bits per sample.
However the export with MP3 is working cause the MF Platform seems like to have some kind of fallback and is using the MPEG Audio layer 1/2 (mpga) with 2 Channels, 32khz and a bitrate of 320kb/s.
Things start to get weird when I set the MF_MT_SUBTYPE to MFAudioFormat_FLAC. The export is working too but the quality of the audio is aweful. There's a lot of noise but I am able to recognize the audio. Regarding to VLC the FLAC file has a sample rate of 44,1khz, 8 bits per sample and is mono.
Does this mean the FLAC codec isn't able to work with the PCM I provide?
Has anyone had the same problem and was able to fix it?
Update
After doing some more research about this problem it seems like that my PCM Audio with a resolution of 32 Bit is too high. So currently I am trying to convert the 32 Bit PCM to 24 Bit for FLAC and 16 Bit for MP3 but with no luck so far. I keep you updated if I make some progress.
--------
Update 2
I've created a minimal example app that shows the problem I am facing.
It reads the 48khz32bit wave file and tries to encode it to flac.
When executing the hr = pSinkWriter->BeginWriting(); command I get the error 0xc00d36b4 whice means The data specified for the media type is invalid, inconsistent, or not supported by this object.
What am I doing wrong here?
#include "stdafx.h"
#include <windows.h>
#include <windowsx.h>
#include <comdef.h>
#include <mfapi.h>
#include <mfidl.h>
#include <mfreadwrite.h>
#include <Mferror.h>
#pragma comment(lib, "ole32")
#pragma comment(lib, "mfplat")
#pragma comment(lib, "mfreadwrite")
#pragma comment(lib, "mfuuid")
using namespace System;
int main(array<System::String ^> ^args)
{
HRESULT hr = CoInitializeEx(0, COINIT_MULTITHREADED);
hr = MFStartup(MF_VERSION);
IMFMediaType *pMediaType;
IMFMediaType *pMediaTypeOut;
IMFSourceReader *pSourceReader;
IMFAttributes *pAttributes;
IMFSinkWriter *pSinkWriter;
hr = MFCreateSourceReaderFromURL(
L"C:\\Temp\\48khz32bit.wav",
NULL,
&pSourceReader
);
hr = MFCreateAttributes(&pAttributes, 1);
hr = pAttributes->SetGUID(
MF_TRANSCODE_CONTAINERTYPE,
MFTranscodeContainerType_WAVE
);
hr = MFCreateSinkWriterFromURL(
L"C:\\Temp\\foo.flac",
NULL,
pAttributes,
&pSinkWriter
);
hr = pSourceReader->GetCurrentMediaType(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
&pMediaType);
hr = MFCreateMediaType(&pMediaTypeOut);
hr = pMediaTypeOut->SetGUID(
MF_MT_MAJOR_TYPE,
MFMediaType_Audio
);
hr = pMediaTypeOut->SetGUID(
MF_MT_SUBTYPE,
MFAudioFormat_FLAC
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
48000
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
1
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
32
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_AVG_BYTES_PER_SECOND,
(((32 + 7) / 8) * 1) * 48000
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_BLOCK_ALIGNMENT,
((32 + 7) / 8) * 1
);
DWORD nWriterStreamIndex = -1;
hr = pSinkWriter->AddStream(pMediaTypeOut, &nWriterStreamIndex);
hr = pSinkWriter->BeginWriting();
_com_error err(hr);
LPCTSTR errMsg = err.ErrorMessage();
for (;;)
{
DWORD nStreamIndex, nStreamFlags;
LONGLONG nTime;
IMFSample *pSample;
hr = pSourceReader->ReadSample(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
0,
&nStreamIndex,
&nStreamFlags,
&nTime,
&pSample);
if (pSample)
{
OutputDebugString(L"Write sample...\n");
hr = pSinkWriter->WriteSample(
nWriterStreamIndex,
pSample
);
}
if (nStreamFlags & MF_SOURCE_READERF_ENDOFSTREAM)
{
break;
}
}
hr = pSinkWriter->Finalize();
return 0;
}
--------
Update 3
I added the solution as answer.
--------
Initialize SinkWriter
HRESULT SinkWriter::InitializeSinkWriter(IMFSinkWriter **ppWriter, DWORD *pStreamIndex, DWORD *pAudioStreamIndex, LPCWSTR filename)
{
*ppWriter = NULL;
*pStreamIndex = NULL;
*pAudioStreamIndex = NULL;
IMFSinkWriter *pSinkWriter = NULL;
// Attributes
IMFAttributes *pAttributes;
HRESULT hr = S_OK;
DX::ThrowIfFailed(
MFCreateAttributes(
&pAttributes,
3
)
);
#if defined(ENABLE_HW_ACCELERATION)
CComPtr<ID3D11Device> device;
D3D_FEATURE_LEVEL levels[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0 };
#if defined(ENABLE_HW_DRIVER)
DX::ThrowIfFailed(
D3D11CreateDevice(
nullptr,
D3D_DRIVER_TYPE_HARDWARE,
nullptr,
(0 * D3D11_CREATE_DEVICE_SINGLETHREADED) | D3D11_CREATE_DEVICE_VIDEO_SUPPORT,
levels,
ARRAYSIZE(levels),
D3D11_SDK_VERSION,
&device,
nullptr,
nullptr
)
);
const CComQIPtr<ID3D10Multithread> pMultithread = device;
pMultithread->SetMultithreadProtected(TRUE);
#else
DX::ThrowIfFailed(
D3D11CreateDevice(
nullptr,
D3D_DRIVER_TYPE_NULL,
nullptr,
D3D11_CREATE_DEVICE_SINGLETHREADED,
levels,
ARRAYSIZE(levels),
D3D11_SDK_VERSION,
&device,
nullptr,
nullptr)
);
#endif
UINT token;
CComPtr<IMFDXGIDeviceManager> pManager;
DX::ThrowIfFailed(
MFCreateDXGIDeviceManager(
&token,
&pManager
)
);
DX::ThrowIfFailed(
pManager->ResetDevice(
device,
token
)
);
DX::ThrowIfFailed(
pAttributes->SetUnknown(
MF_SOURCE_READER_D3D_MANAGER,
pManager
)
);
DX::ThrowIfFailed(
pAttributes->SetUINT32(
MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS,
TRUE
)
);
#if (WINVER >= 0x0602)
DX::ThrowIfFailed(
pAttributes->SetUINT32(
MF_SOURCE_READER_ENABLE_ADVANCED_VIDEO_PROCESSING,
TRUE
)
);
#endif
#else
DX::ThrowIfFailed(
pAttributes->SetUINT32(
MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS,
TRUE
)
);
DX::ThrowIfFailed(
pAttributes->SetUINT32(
MF_SOURCE_READER_ENABLE_VIDEO_PROCESSING,
TRUE
)
);
#endif
DX::ThrowIfFailed(
MFCreateSinkWriterFromURL(
filename,
NULL,
pAttributes,
&pSinkWriter
)
);
if (m_vFormat != VideoFormat::SWFV_NONE)
{
DX::ThrowIfFailed(
InitializeVideoCodec(
pSinkWriter,
pStreamIndex
)
);
}
if (m_audFormat != AudioFormat::SWAF_NONE)
{
DX::ThrowIfFailed(
InitializeAudioCodec(
pSinkWriter,
pAudioStreamIndex
)
);
}
// Tell the sink writer to start accepting data.
DX::ThrowIfFailed(
pSinkWriter->BeginWriting()
);
// Return the pointer to the caller.
*ppWriter = pSinkWriter;
(*ppWriter)->AddRef();
SAFE_RELEASE(pSinkWriter);
return hr;
}
Initialize Audio Codec
HRESULT SinkWriter::InitializeAudioCodec(IMFSinkWriter *pSinkWriter, DWORD *pStreamIndex)
{
// Audio media types
IMFMediaType *pAudioTypeOut = NULL;
IMFMediaType *pAudioTypeIn = NULL;
DWORD audioStreamIndex;
HRESULT hr = S_OK;
// Set the output audio type.
DX::ThrowIfFailed(
MFCreateMediaType(
&pAudioTypeOut
)
);
DX::ThrowIfFailed(
pAudioTypeOut->SetGUID(
MF_MT_MAJOR_TYPE,
MFMediaType_Audio
)
);
DX::ThrowIfFailed(
pAudioTypeOut->SetGUID(
MF_MT_SUBTYPE,
AUDIO_SUBTYPE
)
);
DX::ThrowIfFailed(
pSinkWriter->AddStream(
pAudioTypeOut,
&audioStreamIndex
)
);
// Set the input audio type
DX::ThrowIfFailed(
MFCreateMediaType(
&pAudioTypeIn
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetGUID(
MF_MT_MAJOR_TYPE,
AUDIO_MAJOR_TYPE
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetGUID(
MF_MT_SUBTYPE,
MFAudioFormat_PCM
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
AUDIO_NUM_CHANNELS
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
AUDIO_BITS_PER_SAMPLE
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_BLOCK_ALIGNMENT,
AUDIO_BLOCK_ALIGNMENT
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
AUDIO_SAMPLES_PER_SECOND
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_AVG_BYTES_PER_SECOND,
AUDIO_AVG_BYTES_PER_SECOND
)
);
DX::ThrowIfFailed(
pSinkWriter->SetInputMediaType(
audioStreamIndex,
pAudioTypeIn,
NULL
)
);
*pStreamIndex = audioStreamIndex;
SAFE_RELEASE(pAudioTypeOut);
SAFE_RELEASE(pAudioTypeIn);
return hr;
}
Push audio data
HRESULT SinkWriter::PushAudio(UINT32* data)
{
HRESULT hr = S_FALSE;
if (m_isInitializing)
{
return hr;
}
IMFSample *pSample = NULL;
IMFMediaBuffer *pBuffer = NULL;
BYTE *pMem = NULL;
size_t cbBuffer = m_bufferLength * sizeof(short);
// Create a new memory buffer.
hr = MFCreateMemoryBuffer(cbBuffer, &pBuffer);
// Lock the buffer and copy the audio frame to the buffer.
if (SUCCEEDED(hr))
{
hr = pBuffer->Lock(&pMem, NULL, NULL);
}
if (SUCCEEDED(hr))
{
CopyMemory(pMem, data, cbBuffer);
}
if (pBuffer)
{
pBuffer->Unlock();
}
if (m_vFormat == VideoFormat::SWFV_NONE && m_audFormat == AudioFormat::SWAF_WAV)
{
DWORD cbWritten = 0;
if (SUCCEEDED(hr))
{
hr = m_pByteStream->Write(pMem, cbBuffer, &cbWritten);
}
if (SUCCEEDED(hr))
{
m_cbWrittenByteStream += cbWritten;
}
}
else
{
// Set the data length of the buffer.
if (SUCCEEDED(hr))
{
hr = pBuffer->SetCurrentLength(cbBuffer);
}
// Create media sample and add the buffer to the sample.
if (SUCCEEDED(hr))
{
hr = MFCreateSample(&pSample);
}
if (SUCCEEDED(hr))
{
hr = pSample->AddBuffer(pBuffer);
}
// Set the timestamp and the duration.
if (SUCCEEDED(hr))
{
hr = pSample->SetSampleTime(m_cbRtStartVideo);
}
if (SUCCEEDED(hr))
{
hr = pSample->SetSampleDuration(m_cbRtDurationVideo);
}
// Send the sample to the Sink Writer
if (SUCCEEDED(hr))
{
hr = m_pSinkWriter->WriteSample(m_audioStreamIndex, pSample);
}
/*if (SUCCEEDED(hr))
{
m_cbRtStartAudio += m_cbRtDurationAudio;
}*/
SAFE_RELEASE(pSample);
SAFE_RELEASE(pBuffer);
}
return hr;
}
So, Microsoft introduced a FLAC Media Foundation Transform (MFT) Encoder CLSID_CMSFLACEncMFT in Windows 10, but the codec remains undocumented at the moment.
Supported Media Formats in Media Foundation is similarly out of date and does not reflect presence of recent additions.
I am not aware of any comment on this, and my opinion is that the codec is added for internal use but the implementation is merely a standard Media Foundation components without licensing restrictions, so the codecs are unrestricted too by, for example, field of use limitations.
This stock codec seems to be limited to 8, 16 and 24 bit PCM input options (that is, not 32 bits/sample - you need to resample respectively). The codec is capable to accept up to 8 channels and flexible samples per second rate (48828 Hz is okay).
Even though the codec (transform) seems to be working, if you want to produce a file, you also need a suitable container format (multiplexer) which is compatible with MFAudioFormat_FLAC (the identifier has 7 results on Google Search at the moment of the post, which basically means noone is even aware of the codec). Outdated documentation does not reflect actual support for FLAC in stock media sinks.
I borrowed a custom media sink that writes a raw MFT output payload into a file, and such FLAC output is playable as the FLAC frames contain necessary information to parse the bitstream for playback.
For the reference, the file itself is: 20180224-175524.flac.
An obvious candidate among stock media sinks WAVE Media Sink is unable to accept FLAC input. Nevertheless it potentially could, the implementation is presumably limited to simpler audio formats.
AVI media sink might possibly take FLAC audio, but it seems to be impossible to create an audio only AVI.
Among other media sink there is however a media sink which can process FLAC: MPEG-4 File Sink. Again, despite the outdated documentation, the media sink takes FLAC input, so you should be able to create .MP4 files with FLAC audio track.
Sample file: 20180224-184012.mp4. "FLAC (framed)"
To sum it up:
FLAC encoder MFT is present in Windows 10 and is available for use; lacks proper documentation though
One needs to take care of conversion of input to compatible format (no direct support for 32-bit PCM)
It is possible to manage MFT directly and consume MFT output, then obtain FLAC bitstream
Alternatively, it is possible to use stock MP4 media sink to produce output with FLAC audio track
Alternatively, it is possible to develop a custom media sink and consume FLAC bitstream from upstream encoder connection
Potentially, the codec is compatible with Transcode API, however the restrictions above apply. The container type needs to be MFTranscodeContainerType_MPEG4 in particular.
The codec is apparently compatible with Media Session API, presumably it is good for use with Sink Writer API either.
In your code as you attempt to use Sink Writer API you should similarly either have MP4 output with input possibly converted to compatible format in your code (compatible PCM or compatible FLAC with encoder MFT managed on your side). Knowing that MP4 media sink overall is capable to create FLAC audio track you should be able to debug fine details in your code and fit the components to work together.
Finally I was able to solve the problem. It wasn't that hard to be honest. But that is always the case if you know how to achieve something ;).
I created a copy and paste example below to give an idea how to implement FLAC encoding with Microsoft Media Foundation.
The missing piece of the puzzle was the MFTranscodeGetAudioOutputAvailableTypes. This function lists all the available output formats from an audio encoder.
If you are not sure what MFTs are supported by the operation system you can call MFTEnumEx function first. This gives you a list of all the available MFTs. In my case with windows 10 there's the FLAC MFT that is defined like this.
Name: Microsoft FLAC Audio Encoder MFT
Input Types: 1 items:
Audio-PCM
Class identifier: 128509e9-c44e-45dc-95e9-c255b8f466a6
Output Types: 1 items:
Audio-0000f1ac-0000-0010-8000-00aa00389b71
Transform Flags: 1
Transform Category: Audio Encoder
So the next thing I did was to create the source reader and get the current media type. The important values for me are sample rate, bit rate and channels.
Then I created a GetOutputMediaTypes function that needs the requested audio format, sample rate, bit rate, channels and a reference to the IMFMediaType.
The MFTranscodeGetAudioOutputAvailableTypes function returns all available types for the MFAudioFormat_flac GUID.
After getting the count of the available media types with hr = pAvailableTypes->GetElementCount(&dwMTCount); I am able to iterate through them and check if a type is supporting my request. If that's the case I return the media type.
The last part is the easiest one.
First add the output media type to the sinkwriter to get the stream index.
DWORD dwWriterStreamIndex = -1;
// Add the stream
hr = pSinkWriter->AddStream(
pOuputMediaType,
&dwWriterStreamIndex
);
Then set the input type and call pSinkWriter->BeginWriting(); so the sinkwriter starts to accepting data.
// Set input media type
hr = pSinkWriter->SetInputMediaType(
dwWriterStreamIndex,
pInputType,
NULL
);
// Tell the sink writer to accept data
hr = pSinkWriter->BeginWriting();
If the output and input media type is correctly set, BeginWriting should return 0 as HRESULT.
We should get no error because we are using the media type the function MFTranscodeGetAudioOutputAvailableTypes is providing.
The last step is to read all samples from the source reader and write it through the sinkwriter into the flac container.
Done :)
I hope I could help with this answer.
Also thanks to Roman R.
Update
This sample is only working with Audio-PCM formats from 4 bits to 24 bits. If you want to encode an 32 Bit Audio-PCM you have to resample it first and then encode it.
--------
Here's the minimal example app.
#include <windows.h>
#include <windowsx.h>
#include <atlstr.h>
#include <comdef.h>
#include <exception>
#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mmdeviceapi.h>
#include <Audioclient.h>
#include <mferror.h>
#include <Wmcodecdsp.h>
#pragma comment(lib, "mf.lib")
#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mfplay.lib")
#pragma comment(lib, "mfreadwrite.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "wmcodecdspuuid")
inline void ThrowIfFailed(HRESULT hr)
{
if (FAILED(hr))
{
// Get the error message
_com_error err(hr);
LPCTSTR errMsg = err.ErrorMessage();
OutputDebugString(L"################################## ERROR ##################################\n");
OutputDebugString(errMsg);
OutputDebugString(L"\n################################## ----- ##################################\n");
CStringA sb(errMsg);
// Set a breakpoint on this line to catch DirectX API errors
throw std::exception(sb);
}
}
template <class T> void SafeRelease(T **ppT)
{
if (*ppT)
{
(*ppT)->Release();
*ppT = nullptr;
}
}
using namespace System;
HRESULT GetOutputMediaTypes(
GUID cAudioFormat,
UINT32 cSampleRate,
UINT32 cBitPerSample,
UINT32 cChannels,
IMFMediaType **ppType
)
{
// Enumerate all codecs except for codecs with field-of-use restrictions.
// Sort the results.
DWORD dwFlags =
(MFT_ENUM_FLAG_ALL & (~MFT_ENUM_FLAG_FIELDOFUSE)) |
MFT_ENUM_FLAG_SORTANDFILTER;
IMFCollection *pAvailableTypes = NULL; // List of audio media types.
IMFMediaType *pAudioType = NULL; // Corresponding codec.
HRESULT hr = MFTranscodeGetAudioOutputAvailableTypes(
cAudioFormat,
dwFlags,
NULL,
&pAvailableTypes
);
// Get the element count.
DWORD dwMTCount;
hr = pAvailableTypes->GetElementCount(&dwMTCount);
// Iterate through the results and check for the corresponding codec.
for (DWORD i = 0; i < dwMTCount; i++)
{
hr = pAvailableTypes->GetElement(i, (IUnknown**)&pAudioType);
GUID majorType;
hr = pAudioType->GetMajorType(&majorType);
GUID subType;
hr = pAudioType->GetGUID(MF_MT_SUBTYPE, &subType);
if (majorType != MFMediaType_Audio || subType != MFAudioFormat_FLAC)
{
continue;
}
UINT32 sampleRate = NULL;
hr = pAudioType->GetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
&sampleRate
);
UINT32 bitRate = NULL;
hr = pAudioType->GetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
&bitRate
);
UINT32 channels = NULL;
hr = pAudioType->GetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
&channels
);
if (sampleRate == cSampleRate
&& bitRate == cBitPerSample
&& channels == cChannels)
{
// Found the codec.
// Jump out!
break;
}
}
// Add the media type to the caller
*ppType = pAudioType;
(*ppType)->AddRef();
SafeRelease(&pAudioType);
return hr;
}
int main(array<System::String ^> ^args)
{
HRESULT hr = S_OK;
// Initialize com interface
ThrowIfFailed(
CoInitializeEx(0, COINIT_MULTITHREADED)
);
// Start media foundation
ThrowIfFailed(
MFStartup(MF_VERSION)
);
IMFMediaType *pInputType = NULL;
IMFSourceReader *pSourceReader = NULL;
IMFMediaType *pOuputMediaType = NULL;
IMFSinkWriter *pSinkWriter = NULL;
// Create source reader
hr = MFCreateSourceReaderFromURL(
L"C:\\Temp\\48khz24bit.wav",
NULL,
&pSourceReader
);
// Create sink writer
hr = MFCreateSinkWriterFromURL(
L"C:\\Temp\\foo.flac",
NULL,
NULL,
&pSinkWriter
);
// Get media type from source reader
hr = pSourceReader->GetCurrentMediaType(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
&pInputType
);
// Get sample rate, bit rate and channels
UINT32 sampleRate = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
&sampleRate
);
UINT32 bitRate = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
&bitRate
);
UINT32 channels = NULL;
hr = pInputType->GetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
&channels
);
// Try to find a media type that is fitting.
hr = GetOutputMediaTypes(
MFAudioFormat_FLAC,
sampleRate,
bitRate,
channels,
&pOuputMediaType);
DWORD dwWriterStreamIndex = -1;
// Add the stream
hr = pSinkWriter->AddStream(
pOuputMediaType,
&dwWriterStreamIndex
);
// Set input media type
hr = pSinkWriter->SetInputMediaType(
dwWriterStreamIndex,
pInputType,
NULL
);
// Tell the sink writer to accept data
hr = pSinkWriter->BeginWriting();
// Forever alone loop
for (;;)
{
DWORD nStreamIndex, nStreamFlags;
LONGLONG nTime;
IMFSample *pSample;
// Read through the samples until...
hr = pSourceReader->ReadSample(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
0,
&nStreamIndex,
&nStreamFlags,
&nTime,
&pSample);
if (pSample)
{
OutputDebugString(L"Write sample...\n");
hr = pSinkWriter->WriteSample(
dwWriterStreamIndex,
pSample
);
}
// ... we are at the end of the stream...
if (nStreamFlags & MF_SOURCE_READERF_ENDOFSTREAM)
{
// ... and jump out.
break;
}
}
// Call finalize to finish writing.
hr = pSinkWriter->Finalize();
// Done :D
return 0;
}

Why MediaFoundation InitializeSinkWriter (SetInputMediaType) only accepts WMV3 format?

Taken from the MSDN help pages, InitializeSinkWriter works fine so long as the video encoding and video input format is WMV3/RGB32, however if I change it to WMV1, MPEG2, etc. then SetInputMediaType fails.
AFAIK I have WMV1 installed as a codec according to Sherlock the Codec Detective program.
Here is the code that causes the issue:
(to find the problem code, search for "problem" in source comments, there is a lot of boiler plate code that is irrelevant)
// Format constants
const UINT32 VIDEO_WIDTH = 640;
const UINT32 VIDEO_HEIGHT = 480;
const UINT32 VIDEO_FPS = 30;
const UINT64 VIDEO_FRAME_DURATION = 10 * 1000 * 1000 / VIDEO_FPS;
const UINT32 VIDEO_BIT_RATE = 800000;
const GUID VIDEO_ENCODING_FORMAT = MFVideoFormat_WMV1 ; // problem here, must be WMV3
const GUID VIDEO_INPUT_FORMAT = MFVideoFormat_WMV3 ; // problem here if not wmv3 too
const UINT32 VIDEO_PELS = VIDEO_WIDTH * VIDEO_HEIGHT;
const UINT32 VIDEO_FRAME_COUNT = 20 * VIDEO_FPS;
HRESULT InitializeSinkWriter(IMFSinkWriter **ppWriter, DWORD *pStreamIndex)
{
*ppWriter = NULL;
*pStreamIndex = NULL;
IMFSinkWriter *pSinkWriter = NULL;
IMFMediaType *pMediaTypeOut = NULL;
IMFMediaType *pMediaTypeIn = NULL;
DWORD streamIndex;
HRESULT hr = MFCreateSinkWriterFromURL(L"output.wmv", NULL, NULL, &pSinkWriter);
// Set the output media type.
if (SUCCEEDED(hr))
{
hr = MFCreateMediaType(&pMediaTypeOut);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeOut->SetGUID(MF_MT_SUBTYPE, VIDEO_ENCODING_FORMAT);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeOut->SetUINT32(MF_MT_AVG_BITRATE, VIDEO_BIT_RATE);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeOut->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
}
if (SUCCEEDED(hr))
{
hr = MFSetAttributeSize(pMediaTypeOut, MF_MT_FRAME_SIZE, VIDEO_WIDTH, VIDEO_HEIGHT);
}
if (SUCCEEDED(hr))
{
hr = MFSetAttributeRatio(pMediaTypeOut, MF_MT_FRAME_RATE, VIDEO_FPS, 1);
}
if (SUCCEEDED(hr))
{
hr = MFSetAttributeRatio(pMediaTypeOut, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
}
if (SUCCEEDED(hr))
{
hr = pSinkWriter->AddStream(pMediaTypeOut, &streamIndex);
}
// Set the input media type.
if (SUCCEEDED(hr))
{
hr = MFCreateMediaType(&pMediaTypeIn);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeIn->SetGUID(MF_MT_SUBTYPE, VIDEO_INPUT_FORMAT);
}
if (SUCCEEDED(hr))
{
hr = pMediaTypeIn->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
}
if (SUCCEEDED(hr))
{
hr = MFSetAttributeSize(pMediaTypeIn, MF_MT_FRAME_SIZE, VIDEO_WIDTH, VIDEO_HEIGHT);
}
if (SUCCEEDED(hr))
{
hr = MFSetAttributeRatio(pMediaTypeIn, MF_MT_FRAME_RATE, VIDEO_FPS, 1);
}
if (SUCCEEDED(hr))
{
hr = MFSetAttributeRatio(pMediaTypeIn, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
}
if (SUCCEEDED(hr))
{
// Problem here! Codec issue with wmv1, mpeg, etc.
hr = pSinkWriter->SetInputMediaType(streamIndex, pMediaTypeIn, NULL);
}
else {
puts("setattributeratio failed");
}
// Tell the sink writer to start accepting data.
if (SUCCEEDED(hr))
{
hr = pSinkWriter->BeginWriting();
}
else {
puts("setinputmediatype failed"); // <-- HR result problem here
}
// Return the pointer to the caller.
if (SUCCEEDED(hr))
{
*ppWriter = pSinkWriter;
(*ppWriter)->AddRef();
*pStreamIndex = streamIndex;
}
else {
puts("beginwriting failed");
}
SafeRelease(&pSinkWriter);
SafeRelease(&pMediaTypeOut);
SafeRelease(&pMediaTypeIn);
return hr;
}
Initialize sink writer is called with this code:
void main()
{
DWORD streamidx = 0;
const WCHAR *SAMPLE_FILE = L"sample.wmv";
IMFSourceReader *pReader = NULL;
IMFSinkWriter *pWriter = NULL;
puts("Initializing...");
HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
if (SUCCEEDED(hr))
{
hr = MFStartup(MF_VERSION);
if (SUCCEEDED(hr))
{
// problem here !
hr = InitializeSinkWriter(&pWriter, &streamidx);
if (SUCCEEDED(hr))
{
// more code would go here...
}
else {
puts("InitializeSinkWriter failed"); // this is called
}
SafeRelease(&pWriter);
MFShutdown();
}
CoUninitialize();
}
puts("Finished...");
}
This is a standard Windows 7 computer I am using, so, if it only accepts WMV3 as the encoder or input type, does it mean I have to install codecs? This seems absurd since popular formats like WMV1 and MPEG should already be installed, and Sherlock codec detective says they are
There is no support for codecs you are trying in Windows Media Foundation (even though some third party software could report availability of other codecs for other APIs).
See:
Supported Media Formats in Media Foundation - Video Codecs - Encoder column in the table under Video Codecs
Windows Media Video 9 Encoder - Output Fomats - there is no WMV1 there

Directshow cant start capture twice

I am trying to follow through the DirectShow examples on the windows dev center to make my own application that can capture screen and audio to video: Capturing Video to an AVI File
The first time capture starts all is ok, but at the second nothing happens, the file with video not appearing. Is it possible that I forgot to uninitialize sometfing?
UPDATE
The problem seems not to be in missing releasing. The second time stream writes the file 1.avi is creating but it empty and when the pMediaControl->Stop(); is done it automatically deletes
UPDATE2
At the second time I found that:
hr = pBuild->RenderStream(
&PIN_CATEGORY_CAPTURE, // Pin category.
&MEDIATYPE_Video, // Media type.
pCap, // Capture filter.
NULL, // Intermediate filter (optional).
pMux); // Mux or file sink filter.
returns E_INVALIDARG. So I added (using this):
if (a == 1) {
CComPtr<IPin> sourcePin;
CComPtr<IPin> dumpPin;
sourcePin = GetPin(pMux, PINDIR_OUTPUT);
dumpPin = GetPin(pCap, PINDIR_INPUT);
hr = ppGraph->Connect(sourcePin, dumpPin);
}
And I found thaht on the second time the dumpPin value is NULL .The hr = ppGraph->AddFilter(pCap, L"Capture Filter"); runs ok. Where can I dig next to find error?
(code is updated)
My code:
#include "stdafx.h"
#include <iostream>
#include <windows.h>
#include <dshow.h>
#include <atlbase.h>
#include <dshow.h>
#include <vector>
#include <string>
#pragma comment(lib, "strmiids")
IPin *GetPin(IBaseFilter *pFilter, PIN_DIRECTION PinDir)
{
BOOL bFound = FALSE;
IEnumPins *pEnum;
IPin *pPin;
pFilter->EnumPins(&pEnum);
while (pEnum->Next(1, &pPin, 0) == S_OK)
{
PIN_DIRECTION PinDirThis;
pPin->QueryDirection(&PinDirThis);
if (bFound = (PinDir == PinDirThis))
break;
pPin->Release();
}
pEnum->Release();
return (bFound ? pPin : 0);
}
HRESULT EnumerateDevices(REFGUID category, IEnumMoniker **ppEnum)
{
// Create the System Device Enumerator.
ICreateDevEnum *pDevEnum;
HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, NULL,
CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pDevEnum));
if (SUCCEEDED(hr))
{
// Create an enumerator for the category.
hr = pDevEnum->CreateClassEnumerator(category, ppEnum, 0);
if (hr == S_FALSE)
{
hr = VFW_E_NOT_FOUND; // The category is empty. Treat as an error.
}
pDevEnum->Release();
}
return hr;
}
HRESULT InitCaptureGraphBuilder(
IGraphBuilder **ppGraph, // Receives the pointer.
ICaptureGraphBuilder2 **ppBuild // Receives the pointer.
)
{
if (!ppGraph || !ppBuild)
{
return E_POINTER;
}
IGraphBuilder *pGraph = NULL;
ICaptureGraphBuilder2 *pBuild = NULL;
// Create the Capture Graph Builder.
HRESULT hr = CoCreateInstance(CLSID_CaptureGraphBuilder2, NULL,
CLSCTX_INPROC_SERVER, IID_ICaptureGraphBuilder2, (void**)&pBuild);
if (SUCCEEDED(hr))
{
// Create the Filter Graph Manager.
hr = CoCreateInstance(CLSID_FilterGraph, 0, CLSCTX_INPROC_SERVER,
IID_IGraphBuilder, (void**)&pGraph);
if (SUCCEEDED(hr))
{
// Initialize the Capture Graph Builder.
pBuild->SetFiltergraph(pGraph);
// Return both interface pointers to the caller.
*ppBuild = pBuild;
*ppGraph = pGraph; // The caller must release both interfaces.
return S_OK;
}
else
{
pBuild->Release();
}
}
return hr; // Failed
}
struct Capture {
IPropertyBag *pPropBag;
IGraphBuilder *ppGraph;
IBaseFilter *pCap;
ICaptureGraphBuilder2 *pBuild;
};
void DisplayDeviceInformation(IEnumMoniker *pEnum,int a)
{
IMoniker *pMoniker = NULL;
std::vector<Capture> captures;
while (pEnum->Next(1, &pMoniker, NULL) == S_OK)
{
IPropertyBag *pPropBag;
HRESULT hr = pMoniker->BindToStorage(0, 0, IID_PPV_ARGS(&pPropBag));
if (FAILED(hr))
{
pMoniker->Release();
continue;
}
VARIANT var;
VariantInit(&var);
hr = pPropBag->Read(L"DevicePath", &var, 0);
if (SUCCEEDED(hr))
{
// The device path is not intended for display.
printf("Device path: %S\n", var.bstrVal);
VariantClear(&var);
}
IGraphBuilder *ppGraph;
ICaptureGraphBuilder2 *pBuild; // Capture Graph Builder
hr = InitCaptureGraphBuilder(&ppGraph, &pBuild);
IBaseFilter *pCap; // Video capture filter.
hr = pMoniker->BindToObject(0, 0, IID_IBaseFilter, (void**)&pCap);
if (SUCCEEDED(hr))
{
std::wstring name = std::wstring(L"C:\\a\\") + std::to_wstring(a) + std::wstring(L".avi");
const wchar_t *cname = name.c_str();
hr = ppGraph->AddFilter(pCap, L"Capture Filter");
if (SUCCEEDED(hr)) {
IBaseFilter *pMux;
hr = pBuild->SetOutputFileName(
&MEDIASUBTYPE_Avi, // Specifies AVI for the target file.
cname, // File name.
&pMux, // Receives a pointer to the mux.
NULL); // (Optional) Receives a pointer to the file sink.
if (a == 1) {
CComPtr<IPin> sourcePin;
CComPtr<IPin> dumpPin;
sourcePin = GetPin(pMux, PINDIR_OUTPUT);
dumpPin = GetPin(pCap, PINDIR_INPUT);
hr = ppGraph->Connect(sourcePin, dumpPin);
}
hr = pBuild->RenderStream(
&PIN_CATEGORY_CAPTURE, // Pin category.
&MEDIATYPE_Video, // Media type.
pCap, // Capture filter.
NULL, // Intermediate filter (optional).
pMux); // Mux or file sink filter.
// Release the mux filter.
pMux->Release();
IConfigAviMux *pConfigMux = NULL;
hr = pMux->QueryInterface(IID_IConfigAviMux, (void**)&pConfigMux);
if (SUCCEEDED(hr))
{
pConfigMux->SetMasterStream(0);
pConfigMux->Release();
}
IConfigInterleaving *pInterleave = NULL;
hr = pMux->QueryInterface(IID_IConfigInterleaving, (void**)&pInterleave);
if (SUCCEEDED(hr))
{
pInterleave->put_Mode(INTERLEAVE_CAPTURE);
pInterleave->Release();
}
pMux->Release();
}
}
Capture capt;
capt.ppGraph = ppGraph;
capt.pPropBag = pPropBag;
capt.pCap = pCap;
capt.pBuild = pBuild;
captures.push_back(capt);
}
for (auto cap : captures)
{
IMediaControl* pMediaControl;
cap.ppGraph->QueryInterface(&pMediaControl);
pMediaControl->Run();
}
Sleep(5000);
for (auto cap : captures)
{
IMediaControl* pMediaControl;
cap.ppGraph->QueryInterface(&pMediaControl);
pMediaControl->Stop();
pMediaControl->Release();
cap.pCap->Release();
cap.ppGraph->Release();
cap.pBuild->Release();
cap.pPropBag->Release();
}
pMoniker->Release();
}
int _tmain(int argc, _TCHAR* argv[])
{
HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
for (int a = 0; a <= 1; a++) {
if (SUCCEEDED(hr))
{
IEnumMoniker *pEnum;
hr = EnumerateDevices(CLSID_VideoInputDeviceCategory, &pEnum);
if (SUCCEEDED(hr))
{
DisplayDeviceInformation(pEnum,a);
pEnum->Release();
}
}
}
if (SUCCEEDED(hr))
{
CoUninitialize();
}
int i;
std::cin >> i;
return 0;
}
You might need to Release ppGraph, pBuild, pMediaControl and pCap at the end of DisplayDeviceInformation function and pMux at the end of the cycle. It will be better to use some sort of smart pointers instead.
I didn't figure out how too resolve this, so I just used spawn of external process.

Capture a frame from video using directshow filters in C++

I have taken a code from the net to capture a frame from a video file and modified to capture all frames and store it as bmp images.
HRESULT GrabVideoBitmap(PCWSTR pszVideoFile)
{
IGraphBuilder *pGraph = NULL;
IMediaControl *pControl = NULL;
IMediaEventEx *pEvent = NULL;
IBaseFilter *pGrabberF = NULL;
ISampleGrabber *pGrabber = NULL;
IBaseFilter *pSourceF = NULL;
IEnumPins *pEnum = NULL;
IPin *pPin = NULL;
IBaseFilter *pNullF = NULL;
long evCode;
wchar_t temp[10];
wchar_t framename[50] = IMAGE_FILE_PATH; // L"D:\\sampleframe";
BYTE *pBuffer = NULL;
HRESULT hr = CoInitialize(NULL);
if (FAILED(hr))
return 0;
hr = CoCreateInstance(CLSID_FilterGraph, NULL, CLSCTX_INPROC_SERVER,
IID_PPV_ARGS(&pGraph));
hr = pGraph->QueryInterface(IID_PPV_ARGS(&pControl));
hr = pGraph->QueryInterface(IID_PPV_ARGS(&pEvent));
// Create the Sample Grabber filter.
hr = CoCreateInstance(CLSID_SampleGrabber, NULL, CLSCTX_INPROC_SERVER,
IID_PPV_ARGS(&pGrabberF));
hr = pGraph->AddFilter(pGrabberF, L"Sample Grabber");
hr = pGrabberF->QueryInterface(IID_PPV_ARGS(&pGrabber));
// Displays the metadata of the file
DisplayFileInfo((wchar_t*)pszVideoFile); // to display video information
AM_MEDIA_TYPE mt;
ZeroMemory(&mt, sizeof(mt));
mt.majortype = MEDIATYPE_Video;
mt.subtype = MEDIASUBTYPE_RGB24;
hr = pGrabber->SetMediaType(&mt);
hr = pGraph->AddSourceFilter(pszVideoFile, L"Source", &pSourceF);
hr = pSourceF->EnumPins(&pEnum);
while (S_OK == pEnum->Next(1, &pPin, NULL))
{
hr = ConnectFilters(pGraph, pPin, pGrabberF);
SafeRelease(&pPin);
if (SUCCEEDED(hr))
{
break;
}
}
hr = CoCreateInstance(CLSID_NullRenderer, NULL, CLSCTX_INPROC_SERVER,
IID_PPV_ARGS(&pNullF));
hr = pGraph->AddFilter(pNullF, L"Null Filter");
hr = ConnectFilters(pGraph, pGrabberF, pNullF);
hr = pGrabber->SetOneShot(TRUE);
hr = pGrabber->SetBufferSamples(TRUE);
hr = pControl->Run();
hr = pEvent->WaitForCompletion(INFINITE, &evCode);
for (int i = 0; i < 10; i++)
{
// Find the required buffer size.
long cbBuffer;
hr = pGrabber->GetCurrentBuffer(&cbBuffer, NULL);
pBuffer = (BYTE*)CoTaskMemAlloc(cbBuffer);
hr = pGrabber->GetCurrentBuffer(&cbBuffer, (long*)pBuffer);
hr = pGrabber->GetConnectedMediaType(&mt);
// Examine the format block.
if ((mt.formattype == FORMAT_VideoInfo) &&
(mt.cbFormat >= sizeof(VIDEOINFOHEADER)) &&
(mt.pbFormat != NULL))
{
swprintf(temp, 5, L"%d", i);
wcscat_s(framename, temp);
wcscat_s(framename, L".bmp");
VIDEOINFOHEADER *pVih = (VIDEOINFOHEADER*)mt.pbFormat;
hr = WriteBitmap((PCWSTR)framename, &pVih->bmiHeader,
mt.cbFormat - SIZE_PREHEADER, pBuffer, cbBuffer);
wcscpy_s(framename, IMAGE_FILE_PATH);
}
else
{
// Invalid format.
hr = VFW_E_INVALIDMEDIATYPE;
}
FreeMediaType(mt);
}
done:
CoTaskMemFree(pBuffer);
SafeRelease(&pPin);
SafeRelease(&pEnum);
SafeRelease(&pNullF);
SafeRelease(&pSourceF);
SafeRelease(&pGrabber);
SafeRelease(&pGrabberF);
SafeRelease(&pControl);
SafeRelease(&pEvent);
SafeRelease(&pGraph);
return hr;
}
The input video file has 132 frames.
But only 68 images are generated.
Also last frame of the video is captured for the last 38 images.
I think the directshow graph is running continuously and WriteBitmap() is missing frames.
How to get the control in directX to capture one frame and write it to bmp file and capture the next frame and thus capture all the frames as bmp images.
Thanks
Arun
Your approach is wrong. Currently, you set the sample grabber to one shot and after that you wait for the graph completion. This way it only works for capturing a single frame. You need to capture the frames inside the ISampleGrabberCB callback of your pGrabber. You need to implement ISampleGrabberCB interface and use ISampleGrabber::SetCallback on your pGrabber filter to point it to your implementation. After that you can capture the frames inside either SampleCB or BufferCB methods. http://www.infognition.com/blog/2013/accessing_raw_video_in_directshow.html

Loopback is saved in buffer, how to write to disk as wav?

In the code below, you can copy paste it to an empty proejct, add main.cpp and it is saving it to buffer, however I don't know how to write that buffer to file. You can see in SetFormat I set it up, and then in CopyData I am writing to the buffer.
Complete C++ beginner here, I was able to mash this up. How can I write the buffer to file?
#include <mmdeviceapi.h>
#include <audioclient.h>
#include "debug.h"
#include <comdef.h>
#define UNICODE
//-----------------------------------------------------------
// Record an audio stream from the default audio capture
// device. The RecordAudioStream function allocates a shared
// buffer big enough to hold one second of PCM audio data.
// The function uses this buffer to stream data from the
// capture device. The main loop runs every 1/2 second.
//-----------------------------------------------------------
// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
// function strof(jsint) { var prim = jsint >>> 0; return prim.toString(16) }
#define EXIT_ON_ERROR(hres, title) \
if (FAILED(hres)) { \
_com_error hres_str(hres); \
debug_log("exit due to error on title", title, "hres:", hres, "hres_str:", hres_str.ErrorMessage()); \
goto Exit; \
}
#define SAFE_RELEASE(punk) \
if ((punk) != NULL) \
{ (punk)->Release(); (punk) = NULL; }
const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
const IID IID_IAudioClient = __uuidof(IAudioClient);
const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient);
PWAVEFORMATEX m_pwfx;
byte* recordBuffer;;
HRESULT SetFormat(WAVEFORMATEX *pwfx, UINT32 bufferSize) {
HRESULT hr;
m_pwfx = pwfx;
recordBuffer = new byte[bufferSize * pwfx->nBlockAlign];
debug_log("Record buffer set to:", (bufferSize * pwfx->nBlockAlign), "bytes");
return 0;
}
HRESULT CopyData(BYTE *pData, UINT32 numFramesAvailable, BOOL *bDone) {
/* Get the lock */
// memcpy((void *)MyAudioSink::buff, (void *)pData, numFramesAvailable);
// printf("%f\n", buff[0]);
memcpy((void*)recordBuffer, pData, numFramesAvailable * m_pwfx->nBlockAlign);
for (size_t i = 0; i < numFramesAvailable; i++) {
debug_log((short)pData[i]);
}
return 0;
}
HRESULT RecordAudioStream() {
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
REFERENCE_TIME hnsActualDuration;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
IMMDeviceEnumerator *pEnumerator = NULL;
IMMDevice *pDevice = NULL;
IAudioClient *pAudioClient = NULL;
IAudioCaptureClient *pCaptureClient = NULL;
WAVEFORMATEX *pwfx = NULL;
UINT32 packetLength = 0;
BOOL bDone = FALSE;
BYTE *pData;
DWORD flags;
hr = CoInitialize(NULL);
EXIT_ON_ERROR(hr, "CoInitialize");
hr = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL, IID_IMMDeviceEnumerator, (void**)&pEnumerator);
EXIT_ON_ERROR(hr, "CoCreateInstance");
hr = pEnumerator->GetDefaultAudioEndpoint(eCapture, eConsole, &pDevice);
EXIT_ON_ERROR(hr, "GetDefaultAudioEndpoint");
hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL, NULL, (void**)&pAudioClient);
EXIT_ON_ERROR(hr, "Activate");
hr = pAudioClient->GetMixFormat(&pwfx);
EXIT_ON_ERROR(hr, "GetMixFormat");
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, 0, hnsRequestedDuration, 0, pwfx, NULL);
EXIT_ON_ERROR(hr, "Initialize");
// Get the size of the allocated buffer.
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
EXIT_ON_ERROR(hr, "GetBufferSize");
hr = pAudioClient->GetService(IID_IAudioCaptureClient, (void**)&pCaptureClient);
EXIT_ON_ERROR(hr, "GetService");
// Notify the audio sink which format to use.
hr = SetFormat(pwfx, bufferFrameCount);
EXIT_ON_ERROR(hr, "SetFormat");
// Calculate the actual duration of the allocated buffer.
hnsActualDuration = (double)REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;
hr = pAudioClient->Start(); // Start recording.
EXIT_ON_ERROR(hr, "Start");
// Each loop fills about half of the shared buffer.
while (bDone == FALSE) {
debug_log("at top of loop");
// Sleep for half the buffer duration.
Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr, "GetNextPacketSize");
while (packetLength != 0 && bDone == FALSE) {
// Get the available data in the shared buffer.
hr = pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
EXIT_ON_ERROR(hr, "GetBuffer");
if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
pData = NULL; // Tell CopyData to write silence.
}
// Copy the available capture data to the audio sink.
hr = CopyData(pData, numFramesAvailable, &bDone);
EXIT_ON_ERROR(hr, "CopyData");
hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
EXIT_ON_ERROR(hr, "ReleaseBuffer");
hr = pCaptureClient->GetNextPacketSize(&packetLength);
EXIT_ON_ERROR(hr, "GetNextPacketSize 2");
}
}
hr = pAudioClient->Stop(); // Stop recording.
EXIT_ON_ERROR(hr, "Stop");
Exit:
CoTaskMemFree(pwfx);
SAFE_RELEASE(pEnumerator);
SAFE_RELEASE(pDevice);
SAFE_RELEASE(pAudioClient);
SAFE_RELEASE(pCaptureClient);
CoUninitialize();
return hr;
}
int main() {
RecordAudioStream();
}
debug.h - in case you want to copy paste the above
#include <utility>
#include <fstream>
#include <string>
template <typename HeadType> bool
debug_log_rec(std::ostream& out, HeadType&& head) {
out << head;
out << std::endl;
return true;
}
template <typename HeadType, typename... TailTypes> bool
debug_log_rec(std::ostream& out, HeadType&& head, TailTypes&&... tails) {
out << head;
out << " ";
debug_log_rec(out, std::forward<TailTypes>(tails)...);
return true;
}
template <typename... ArgTypes> bool
debug_log(ArgTypes&&... args) {
std::fstream fs;
fs.open("C:\\Users\\Mercurius\\Desktop\\log.txt", std::fstream::app);
debug_log_rec(fs, std::forward<ArgTypes>(args)...);
fs.close();
return true;
}