Why is my NVEnc image coming out pixelated? - c++

I have been working on an NVEnc project but the images seem to come out blurry no matter what I do in the setup if I am using CBR as my rate control mode. I am adapting the code from the NVidia sample here: https://github.com/NVIDIA/video-sdk-samples/blob/master/nvEncBroadcastSample/nvEnc/nvCodec/nvEncoder/NvEncoder.cpp
I am using CBR as my rate control mode as suggested in this table from the nvidia docs.
m_encoder->SetEncoderParams(&m_encInitParams, m_codecId, NV_ENC_PRESET_LOW_LATENCY_HQ_GUID);
[...]
void NvEncoder::SetEncoderParams(NV_ENC_INITIALIZE_PARAMS* pIntializeParams, GUID codecGuid, GUID presetGuid)
{
if (!m_encoder)
{
NVENC_THROW_ERROR("Encoder Initialization failed", NV_ENC_ERR_NO_ENCODE_DEVICE);
return;
}
if (pIntializeParams == nullptr || pIntializeParams->encodeConfig == nullptr)
{
NVENC_THROW_ERROR("pInitializeParams and pInitializeParams->encodeConfig can't be NULL", NV_ENC_ERR_INVALID_PTR);
}
memset(pIntializeParams->encodeConfig, 0, sizeof(NV_ENC_CONFIG));
auto pEncodeConfig = pIntializeParams->encodeConfig;
memset(pIntializeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS));
pIntializeParams->encodeConfig = pEncodeConfig;
pIntializeParams->encodeConfig->version = NV_ENC_CONFIG_VER;
pIntializeParams->version = NV_ENC_INITIALIZE_PARAMS_VER;
pIntializeParams->encodeGUID = codecGuid;
pIntializeParams->presetGUID = presetGuid;
pIntializeParams->encodeWidth = m_width;
pIntializeParams->encodeHeight = m_height;
pIntializeParams->darWidth = m_width;
pIntializeParams->darHeight = m_height;
pIntializeParams->maxEncodeWidth = m_width;
pIntializeParams->maxEncodeHeight = m_height;
pIntializeParams->frameRateNum = 60;
pIntializeParams->frameRateDen = 1;
pIntializeParams->enablePTD = 1;
pIntializeParams->reportSliceOffsets = 1;
pIntializeParams->enableSubFrameWrite = 0;
pIntializeParams->enableMEOnlyMode = m_motionEstimationOnly;
pIntializeParams->enableWeightedPrediction = 1;
#if defined(_WIN32)
pIntializeParams->enableEncodeAsync = true;
#endif
NV_ENC_PRESET_CONFIG presetConfig = { NV_ENC_PRESET_CONFIG_VER, { NV_ENC_CONFIG_VER } };
m_nvenc.nvEncGetEncodePresetConfig(m_encoder, codecGuid, presetGuid, &presetConfig);
memcpy(pIntializeParams->encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG));
pIntializeParams->encodeConfig->frameIntervalP = 1;
pIntializeParams->encodeConfig->gopLength = NVENC_INFINITE_GOPLENGTH;
pIntializeParams->encodeConfig->rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ;
pIntializeParams->encodeConfig->rcParams.averageBitRate = INT16_MAX;
//pIntializeParams->encodeConfig->rcParams.maxBitRate = 99999999999;
pIntializeParams->encodeConfig->rcParams.zeroReorderDelay = 1;
if (pIntializeParams->encodeGUID == NV_ENC_CODEC_H264_GUID)
{
pIntializeParams->encodeConfig->encodeCodecConfig.h264Config.idrPeriod = NVENC_INFINITE_GOPLENGTH; //pIntializeParams->encodeConfig->gopLength;
pIntializeParams->encodeConfig->encodeCodecConfig.h264Config.sliceMode = 1;
pIntializeParams->encodeConfig->encodeCodecConfig.h264Config.sliceModeData = INT16_MAX - 50;
pIntializeParams->encodeConfig->encodeCodecConfig.h264Config.repeatSPSPPS = 1;
}
else if (pIntializeParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID)
{
pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 = 0;
pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.idrPeriod = pIntializeParams->encodeConfig->gopLength;
}
}
These settings are generating images that look like this:
I'd really appreciate any pointers as to why my images are coming out so heavily pixelated when the settings appear to be correct AFAICT.

You are setting the average bitrate to 32768 (INT16_MAX)
pIntializeParams->encodeConfig->rcParams.averageBitRate = INT16_MAX;
I'd use 5000000 for HD and 20000000 for 4k as average bitrate.
FYI: NV_ENC_RC_PARAMS::averageBitRate is a 32 bit unsigned integer (uint32_t)

Related

Xaudio2 pop sound when changing buffer or looping

I have a simple program that plays a sine wave.
At the end of the buffer I get a pop sound.
If I try to loop I get the pop sound between each loop.
If I alternate between buffers I get the pop sound.
struct win32_audio_buffer
{
XAUDIO2_BUFFER XAudioBuffer = {};
int16 *Memory;
};
struct win32_audio_setteings
{
int32 SampleRate = 44100;
int32 ToneHz = 200;
int32 Channels = 2;
int32 LoopTime = 10;
int32 TotalSamples = SampleRate * LoopTime;
};
win32_audio_setteings AudioSetteings;
win32_audio_buffer MainAudioBuffer;
win32_audio_buffer SecondaryAudioBuffer;
IXAudio2SourceVoice* pSourceVoice;
internal void Win32InitXaudio2()
{
WAVEFORMATEX WaveFormat = {};
WaveFormat.wFormatTag = WAVE_FORMAT_PCM;
WaveFormat.nChannels = AudioSetteings.Channels;
WaveFormat.nSamplesPerSec = AudioSetteings.SampleRate;
WaveFormat.wBitsPerSample = 16;
WaveFormat.nBlockAlign = (WaveFormat.nChannels * WaveFormat.wBitsPerSample) / 8;
WaveFormat.nAvgBytesPerSec = WaveFormat.nSamplesPerSec * WaveFormat.nBlockAlign;
WaveFormat.cbSize = 0;
IXAudio2* pXAudio2;
IXAudio2MasteringVoice* pMasterVoice;
XAudio2Create(&pXAudio2);
pXAudio2->CreateMasteringVoice(&pMasterVoice);
pXAudio2->CreateSourceVoice(&pSourceVoice, &WaveFormat);
}
//DOC: AudioBytes - Size of the audio data
//DOC: pAudioData - The buffer start loaction (Needs to be type cast into BYTE pointer)
internal void Win32CreateAudioBuffer(win32_audio_buffer *AudioBuffer)
{
int32 Size = (int16)sizeof(int16) * AudioSetteings.Channels * AudioSetteings.SampleRate * AudioSetteings.LoopTime;
AudioBuffer->Memory = (int16 *)VirtualAlloc(0, Size, MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE);
AudioBuffer->XAudioBuffer.AudioBytes = Size;
AudioBuffer->XAudioBuffer.pAudioData = (BYTE *) AudioBuffer->Memory;
//AudioBuffer->XAudioBuffer.Flags = XAUDIO2_END_OF_STREAM;
AudioBuffer->XAudioBuffer.PlayBegin = 0;
AudioBuffer->XAudioBuffer.PlayLength = AudioSetteings.TotalSamples;
//AudioBuffer->XAudioBuffer.LoopCount = 10;
}
internal void Win32Playback(win32_audio_buffer *AudioBuffer)
{
for (int32 Index = 0, Sample = 0; Sample < AudioSetteings.TotalSamples; Sample++)
{
real32 Sine = sinf(Sample * 2.0f * Pi32 / AudioSetteings.ToneHz);
int16 value = (int16)(4000 * Sine);
AudioBuffer->Memory[Index++] = value;
AudioBuffer->Memory[Index++] = value;
}
pSourceVoice->SubmitSourceBuffer(&AudioBuffer->XAudioBuffer);
}
Win32InitXaudio2();
Win32CreateAudioBuffer(&MainAudioBuffer);
//Win32CreateAudioBuffer(&SecondaryAudioBuffer);
Win32Playback(&MainAudioBuffer);
//Win32Playback(&SecondaryAudioBuffer);
pSourceVoice->Start(0);
I have posted the relevant code here and it just play one sine buffer.
I tried altrantaing buffers and to start and end on a zero-crossing.
I had a similar problem.
Maybe it will help someone.
The problem is in allocating more memory for audio than needed.
So I tried something like this and found the problem (this is not solution I just show how I found problem! Probably, if it will not help in your case, then the problem somewhere else)
// XAUDIO2_BUFFER m_xaudio2Buffer...
m_xaudio2Buffer.pAudioData = source->m_data;
m_xaudio2Buffer.AudioBytes = source->m_dataSize - 100; // -100 and `pop` sound is gone
m_xaudio2Buffer.Flags = XAUDIO2_END_OF_STREAM;

How to make OpenVR multiview rendering to work?

I'm optimizing my already working VR rendering by implementing multiview rendering into my custom C++ engine, but can't get the other eye to render. Here's my vertex shader:
layout(set=0, binding=2) Buffer<float3> positions : register(b2);
VSOutput unlitVS( uint vertexId : SV_VertexID, uint viewId : SV_ViewID )
{
VSOutput vsOut;
vsOut.uv = uvs[ vertexId ];
vsOut.pos = mul( data.localToClip[ viewId ], float4( positions[ vertexId ], 1 ) );
return vsOut;
}
If I use viewId to select a transformation matrix, I'm getting an error:
ERROR: Vertex shader consumes input at location 0 but not provided
If I don't use viewId my other eye doesn't render at all. Here's how I setup my framebuffer, it has 2 layers and after rendering the left eye, I copy the second layer to the right eye.
VkImageCreateInfo imageCreateInfo = {};
imageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
imageCreateInfo.pNext = nullptr;
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
imageCreateInfo.extent.width = width;
imageCreateInfo.extent.height = height;
imageCreateInfo.extent.depth = 1;
imageCreateInfo.mipLevels = 1;
imageCreateInfo.arrayLayers = 2;
imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
imageCreateInfo.format = VK_FORMAT_R8G8B8A8_SRGB;
imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imageCreateInfo.samples = sampleCount == 4 ? VK_SAMPLE_COUNT_4_BIT : VK_SAMPLE_COUNT_1_BIT;
imageCreateInfo.usage = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
imageCreateInfo.flags = 0;
VkResult result = vkCreateImage( dev, &imageCreateInfo, nullptr, &outFramebufferDesc.image );
...
// Bit mask that specifies which view rendering is broadcast to.
// 0011 = Broadcast to first and second view (layer)
const uint32_t viewMask = 0b00000011;
// Bit mask that specifices correlation between views
// An implementation may use this for optimizations (concurrent render)
const uint32_t correlationMask = 0b00000011;
VkRenderPassMultiviewCreateInfo renderPassMultiviewCI{};
renderPassMultiviewCI.sType = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO;
renderPassMultiviewCI.subpassCount = 1;
renderPassMultiviewCI.pViewMasks = &viewMask;
renderPassMultiviewCI.correlationMaskCount = 1;
renderPassMultiviewCI.pCorrelationMasks = &correlationMask;
VkRenderPassCreateInfo renderPassCreateInfo = {};
renderPassCreateInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
renderPassCreateInfo.flags = 0;
renderPassCreateInfo.pNext = &renderPassMultiviewCI;
renderPassCreateInfo.attachmentCount = 2;
renderPassCreateInfo.pAttachments = &attachmentDescs[ 0 ];
renderPassCreateInfo.subpassCount = 1;
renderPassCreateInfo.pSubpasses = &subPassCreateInfo;
renderPassCreateInfo.dependencyCount = 0;
renderPassCreateInfo.pDependencies = nullptr;
result = vkCreateRenderPass( dev, &renderPassCreateInfo, nullptr, &outFramebufferDesc.renderPass );
assert( result == VK_SUCCESS );
VkImageView attachments[ 2 ] = { outFramebufferDesc.imageView, outFramebufferDesc.depthStencilImageView };
VkFramebufferCreateInfo framebufferCreateInfo = {};
framebufferCreateInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
framebufferCreateInfo.pNext = nullptr;
framebufferCreateInfo.renderPass = outFramebufferDesc.renderPass;
framebufferCreateInfo.attachmentCount = 2;
framebufferCreateInfo.pAttachments = &attachments[ 0 ];
framebufferCreateInfo.width = width;
framebufferCreateInfo.height = height;
framebufferCreateInfo.layers = 1;
result = vkCreateFramebuffer( dev, &framebufferCreateInfo, nullptr, &outFramebufferDesc.framebuffer );
...
// After rendering the left eye:
VkImageCopy region = {};
region.extent.width = device.width;
region.extent.height = device.height;
region.extent.depth = 1;
region.srcSubresource.baseArrayLayer = 1;
region.srcSubresource.layerCount = 1;
region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region.dstSubresource.layerCount = 1;
region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
vkCmdCopyImage( gCurrentDrawCommandBuffer, device.fbDesc.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, device.fbDesc.imageCopy, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region );
Here's my VkPipelineVertexInputStateCreateInfo, I'm using programmable vertex pulling:
VkPipelineVertexInputStateCreateInfo inputState = {};
inputState.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
How can I fix the error and get right eye working? I'm using HTC Vive, OpenVR 1.7.15, AMD R9 Nano, Vulkan 1.1 on Vulkan SDK 1.1.126.0, Windows 10. I have enabled device extension VK_KHR_MULTIVIEW_EXTENSION_NAME.

YCbCr Sampler in Vulkan

I've been trying to sample a YCbCr image in Vulkan but I keep getting incorrect results, and I was hoping someone might be able to spot my mistake.
I have a NV12 YCbCr image which I want to render onto two triangles forming a quad. If i understand correctly, the VkFormat that corresponds to NV12 is VK_FORMAT_G8_B8R8_2PLANE_420_UNORM. Below is the code that I would expect to work, but I'll try to explain what I'm trying to do as well:
Create a VkSampler with a VkSamplerYcbcrConversion (with the correct format) in pNext
Read NV12 data into staging buffer
Create VkImage with the correct format and specify that the planes are disjoint
Get memory requirements (and offset for plane 1) for each plane (0 and 1)
Allocate device local memory for the image data
Bind each plane to the correct location in memory
Copy staging buffer to image memory
Create VkImageView with the same format as the VkImage and the same VkSamplerYcbcrConversionInfo as the VkSampler in pNext.
Code:
VkSamplerYcbcrConversion ycbcr_sampler_conversion;
VkSamplerYcbcrConversionInfo ycbcr_info;
VkSampler ycbcr_sampler;
VkImage image;
VkDeviceMemory image_memory;
VkDeviceSize memory_offset_plane0, memory_offset_plane1;
VkImageView image_view;
enum YCbCrStorageFormat
{
NV12
};
unsigned char* ReadYCbCrFile(const std::string& filename, YCbCrStorageFormat storage_format, VkFormat vulkan_format, uint32_t* buffer_size, uint32_t* buffer_offset_plane1, uint32_t* buffer_offset_plane2)
{
std::ifstream file;
file.open(filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
if (!file.is_open()) { ELOG("Failed to open YCbCr image"); }
*buffer_size = file.tellg();
file.seekg(0);
unsigned char* data;
switch (storage_format)
{
case NV12:
{
if (vulkan_format != VK_FORMAT_G8_B8R8_2PLANE_420_UNORM)
{
ILOG("A 1:1 relationship doesn't exist between NV12 and 420, exiting");
exit(1);
}
*buffer_offset_plane1 = (*buffer_size / 3) * 2;
*buffer_offset_plane2 = 0; //Not used
data = new unsigned char[*buffer_size];
file.read((char*)(data), *buffer_size);
break;
}
default:
ELOG("A YCbCr storage format is required");
break;
}
file.close();
return data;
}
VkFormatProperties format_properties;
vkGetPhysicalDeviceFormatProperties(physical_device, VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, &format_properties);
bool cosited = false, midpoint = false;
if (format_properties.optimalTilingFeatures & VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT)
{
cosited = true;
}
else if (format_properties.optimalTilingFeatures & VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT)
{
midpoint = true;
}
if (!cosited && !midpoint)
{
ELOG("Nither VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT nor VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT is supported for VK_FORMAT_G8_B8R8_2PLANE_420_UNORM");
}
VkSamplerYcbcrConversionCreateInfo conversion_info = {};
conversion_info.sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO;
conversion_info.pNext = NULL;
conversion_info.format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
conversion_info.ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709;
conversion_info.ycbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL;
conversion_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
conversion_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
conversion_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
conversion_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
if (cosited)
{
conversion_info.xChromaOffset = VK_CHROMA_LOCATION_COSITED_EVEN;
conversion_info.yChromaOffset = VK_CHROMA_LOCATION_COSITED_EVEN;
}
else
{
conversion_info.xChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
conversion_info.yChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
}
conversion_info.chromaFilter = VK_FILTER_LINEAR;
conversion_info.forceExplicitReconstruction = VK_FALSE;
VkResult res = vkCreateSamplerYcbcrConversion(logical_device, &conversion_info, NULL, &ycbcr_sampler_conversion);
CHECK_VK_RESULT(res, "Failed to create YCbCr conversion sampler");
ILOG("Successfully created YCbCr conversion");
ycbcr_info.sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO;
ycbcr_info.pNext = NULL;
ycbcr_info.conversion = ycbcr_sampler_conversion;
VkSamplerCreateInfo sampler_info = {};
sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
sampler_info.pNext = &ycbcr_info;
sampler_info.flags = 0;
sampler_info.magFilter = VK_FILTER_LINEAR;
sampler_info.minFilter = VK_FILTER_LINEAR;
sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
sampler_info.mipLodBias = 0.0f;
sampler_info.anisotropyEnable = VK_FALSE;
//sampler_info.maxAnisotropy IGNORED
sampler_info.compareEnable = VK_FALSE;
//sampler_info.compareOp = IGNORED
sampler_info.minLod = 0.0f;
sampler_info.maxLod = 1.0f;
sampler_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
sampler_info.unnormalizedCoordinates = VK_FALSE;
res = vkCreateSampler(logical_device, &sampler_info, NULL, &ycbcr_sampler);
CHECK_VK_RESULT(res, "Failed to create YUV sampler");
ILOG("Successfully created sampler with YCbCr in pNext");
std::string filename = "tree_nv12_1920x1080.yuv";
uint32_t width = 1920, height = 1080;
VkFormat format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
uint32_t buffer_size, buffer_offset_plane1, buffer_offset_plane2;
unsigned char* ycbcr_data = ReadYCbCrFile(filename, NV12, VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, &buffer_size, &buffer_offset_plane1, &buffer_offset_plane2);
//Load image into staging buffer
VkDeviceMemory stage_buffer_memory;
VkBuffer stage_buffer = create_vk_buffer(buffer_size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stage_buffer_memory);
void* stage_memory_ptr;
vkMapMemory(logical_device, stage_buffer_memory, 0, buffer_size, 0, &stage_memory_ptr);
memcpy(stage_memory_ptr, ycbcr_data, buffer_size);
vkUnmapMemory(logical_device, stage_buffer_memory);
delete[] ycbcr_data;
//Create image
VkImageCreateInfo img_info = {};
img_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
img_info.flags = VK_IMAGE_CREATE_DISJOINT_BIT;
img_info.imageType = VK_IMAGE_TYPE_2D;
img_info.extent.width = width;
img_info.extent.height = height;
img_info.extent.depth = 1;
img_info.mipLevels = 1;
img_info.arrayLayers = 1;
img_info.format = format;
img_info.tiling = VK_IMAGE_TILING_LINEAR;//VK_IMAGE_TILING_OPTIMAL;
img_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
img_info.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
img_info.samples = VK_SAMPLE_COUNT_1_BIT;
img_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VkResult result = vkCreateImage(logical_device, &img_info, NULL, &image);
CHECK_VK_RESULT(result, "vkCreateImage failed to create image handle");
ILOG("Image created!");
//Get memory requirements for each plane and combine
//Plane 0
VkImagePlaneMemoryRequirementsInfo image_plane_info = {};
image_plane_info.sType = VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO;
image_plane_info.pNext = NULL;
image_plane_info.planeAspect = VK_IMAGE_ASPECT_PLANE_0_BIT;
VkImageMemoryRequirementsInfo2 image_info2 = {};
image_info2.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2;
image_info2.pNext = &image_plane_info;
image_info2.image = image;
VkImagePlaneMemoryRequirementsInfo memory_plane_requirements = {};
memory_plane_requirements.sType = VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO;
memory_plane_requirements.pNext = NULL;
memory_plane_requirements.planeAspect = VK_IMAGE_ASPECT_PLANE_0_BIT;
VkMemoryRequirements2 memory_requirements2 = {};
memory_requirements2.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
memory_requirements2.pNext = &memory_plane_requirements;
vkGetImageMemoryRequirements2(logical_device, &image_info2, &memory_requirements2);
VkDeviceSize image_size = memory_requirements2.memoryRequirements.size;
uint32_t image_bits = memory_requirements2.memoryRequirements.memoryTypeBits;
//Set offsets
memory_offset_plane0 = 0;
memory_offset_plane1 = image_size;
//Plane 1
image_plane_info.planeAspect = VK_IMAGE_ASPECT_PLANE_1_BIT;
memory_plane_requirements.planeAspect = VK_IMAGE_ASPECT_PLANE_1_BIT;
vkGetImageMemoryRequirements2(logical_device, &image_info2, &memory_requirements2);
image_size += memory_requirements2.memoryRequirements.size;
image_bits = image_bits | memory_requirements2.memoryRequirements.memoryTypeBits;
//Allocate image memory
VkMemoryAllocateInfo allocate_info = {};
allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocate_info.allocationSize = image_size;
allocate_info.memoryTypeIndex = get_device_memory_type(image_bits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
result = vkAllocateMemory(logical_device, &allocate_info, NULL, &image_memory);
CHECK_VK_RESULT(result, "vkAllocateMemory failed to allocate image memory");
//Bind each image plane to memory
std::vector<VkBindImageMemoryInfo> bind_image_memory_infos(2);
//Plane 0
VkBindImagePlaneMemoryInfo bind_image_plane0_info = {};
bind_image_plane0_info.sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
bind_image_plane0_info.pNext = NULL;
bind_image_plane0_info.planeAspect = VK_IMAGE_ASPECT_PLANE_0_BIT;
VkBindImageMemoryInfo& bind_image_memory_plane0_info = bind_image_memory_infos[0];
bind_image_memory_plane0_info.sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
bind_image_memory_plane0_info.pNext = &bind_image_plane0_info;
bind_image_memory_plane0_info.image = image;
bind_image_memory_plane0_info.memory = image_memory;
bind_image_memory_plane0_info.memoryOffset = memory_offset_plane0;
//Plane 1
VkBindImagePlaneMemoryInfo bind_image_plane1_info = {};
bind_image_plane1_info.sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
bind_image_plane1_info.pNext = NULL;
bind_image_plane1_info.planeAspect = VK_IMAGE_ASPECT_PLANE_1_BIT;
VkBindImageMemoryInfo& bind_image_memory_plane1_info = bind_image_memory_infos[1];
bind_image_memory_plane1_info.sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
bind_image_memory_plane1_info.pNext = &bind_image_plane1_info;
bind_image_memory_plane1_info.image = image;
bind_image_memory_plane1_info.memory = image_memory;
bind_image_memory_plane1_info.memoryOffset = memory_offset_plane1;
vkBindImageMemory2(logical_device, bind_image_memory_infos.size(), bind_image_memory_infos.data());
context.transition_vk_image_layout(image, format, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_PREINITIALIZED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
//Copy staging buffer to device local buffer
VkCommandBuffer tmp_cmd_buffer = begin_tmp_vk_cmd_buffer();
std::vector<VkBufferImageCopy> plane_regions(2);
plane_regions[0].bufferOffset = 0;
plane_regions[0].bufferRowLength = 0;
plane_regions[0].bufferImageHeight = 0;
plane_regions[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_PLANE_0_BIT;
plane_regions[0].imageSubresource.mipLevel = 0;
plane_regions[0].imageSubresource.baseArrayLayer = 0;
plane_regions[0].imageSubresource.layerCount = 1;
plane_regions[0].imageOffset = { 0, 0, 0 };
plane_regions[0].imageExtent = { width, height, 1 };
plane_regions[1].bufferOffset = buffer_offset_plane1;
plane_regions[1].bufferRowLength = 0;
plane_regions[1].bufferImageHeight = 0;
plane_regions[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_PLANE_1_BIT;
plane_regions[1].imageSubresource.mipLevel = 0;
plane_regions[1].imageSubresource.baseArrayLayer = 0;
plane_regions[1].imageSubresource.layerCount = 1;
plane_regions[1].imageOffset = { 0, 0, 0 };
plane_regions[1].imageExtent = { width / 2, height / 2, 1 };
vkCmdCopyBufferToImage(tmp_cmd_buffer, stage_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, plane_regions.size(), plane_regions.data());
end_tmp_vk_cmd_buffer(tmp_cmd_buffer); //Submit and waits
vkFreeMemory(logical_device, stage_buffer_memory, NULL);
vkDestroyBuffer(logical_device, stage_buffer, NULL);
transition_vk_image_layout(image, format, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
VkImageViewCreateInfo image_view_info = {};
image_view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
image_view_info.pNext = &ycbcr_info;
image_view_info.flags = 0;
image_view_info.image = image;
image_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
image_view_info.format = format;
image_view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
image_view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
image_view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
image_view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
image_view_info.subresourceRange.baseMipLevel = 0;
image_view_info.subresourceRange.levelCount = 1;
image_view_info.subresourceRange.baseArrayLayer = 0;
image_view_info.subresourceRange.layerCount = 1;
VkResult res = vkCreateImageView(logical_device, &image_view_info, NULL, &.image_view);
CHECK_VK_RESULT(res, "Failed to create image view");
ILOG("Successfully created image, allocated image memory and created image view");
I receive one validation error: vkCmdCopyBufferToImage() parameter, VkImageAspect pRegions->imageSubresource.aspectMask, is an unrecognized enumerator, but from inspecting the validation code, it seems that it's just a bit outdated and this shouldn't be an issue.
The rest of the code just sets up regular descriptor layouts/pools and allocated and updates accordingly (I've verified with a regular RGB texture).
The fragment shader is as follows:
vec2 uv = vec2(gl_FragCoord.x / 1024.0, 1.0 - (gl_FragCoord.y / 1024.0));
out_color = vec4(texture(ycbcr_image, uv).rgb, 1.0f);
When I run my program I only get a red components (the image is essentially a greyscale image). from a little testing, it seems that the VkSamplerYcbcrconversion setup as removing it from both the VkSamplerCreateInfo.pNext and VkImageViewCreateInfo.pNext doesn't change anything.
I've also looked here, Khronos YCbCr tests, but I can't find any real mistake.
Solution: according to the spec, sec. 12.1, Conversion must be fixed at pipeline creation time, through use of a combined image sampler with an immutable sampler in VkDescriptorSetLayoutBinding.
By adding the ycbcr_sampler to pImmutableSamplers when setting up the descriptor set layout binding it now works:
VkDescriptorSetLayoutBinding image_binding = {};
image_binding.binding = 0;
image_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
image_binding.descriptorCount = 1;
image_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
image_binding.pImmutableSamplers = &ycbcr_sampler;

Problems with fcvDrawContouru8 for 64bit systems

I am using fastcv v1.7 to develop an image processing algorithm, a part of the process includes finding contours from an image, selecting a choice few contours among them and then drawing those contours only.
This code block runs smoothly in 32bit systems producing expected output but while on 64bit systems same code crashes unexpectedly during the loop which executes fcvDrawContouru8. The crash is unexpected as sometimes loop iterataes 2 or 3 times and sometimes crashes on first iteration. Can't seem to work out if the problem is with memory allocation in 64bit or with fastcv itself. Any suggestions will be helpful.
uint8_t* dist_fcv = (uint8_t*)fcvMemAlloc(dist_8u.cols*dist_8u.rows*OPT_CV_ELEM_SIZE(OPT_CV_8UC1), FCV_ALIGN);
memset(dist_fcv, 0, dist_8u.cols*dist_8u.rows*OPT_CV_ELEM_SIZE(OPT_CV_8UC1));
uint32_t maxNumContours = MAX_CNT;
uint32_t sizeOfpBuffer = 0;
uint32_t maxPoints= ((2*dist_8u.cols) + (2 * dist_8u.rows));
uint32_t pNumContours = 0;
uint32_t pNumContourPoints[MAX_CNT] = {0};
uint32_t** pContourStartPointsfind = (uint32_t**)fcvMemAlloc(MAX_CNT*2*sizeof(uint32_t*),16);
sizeOfpBuffer = (MAX_CNT * 2 * maxPoints * sizeof(uint32_t));
uint32_t *pPointBuffer=(uint32_t *)malloc(sizeOfpBuffer);
memset(pPointBuffer,0,sizeOfpBuffer);
int32_t hierarchy[MAX_CNT][4];
void* cHandle = fcvFindContoursAllocate(dist_8u.cols);
fcvFindContoursExternalu8(textureless.data.ptr,
dist_8u.cols,
dist_8u.rows,
dist_8u.cols,
maxNumContours,
&pNumContours,
pNumContourPoints,
pContourStartPointsfind,
pPointBuffer,
sizeOfpBuffer,
hierarchy,
cHandle);
size_t n_TL = 0;
uint32_t** pContourStartPointsdraw = (uint32_t**)fcvMemAlloc(MAX_CNT*2*sizeof(uint32_t*),16);
uint32_t pNumDrawContourPoints[MAX_CNT] = {0};
uint32_t* dPointBuffer=(uint32_t *)malloc(sizeOfpBuffer);
uint32_t* start_contour = pPointBuffer;
uint32_t* start_contour_dPoint = dPointBuffer;
uint32_t** startFind_ptr = pContourStartPointsfind;
uint32_t** draw_ptr = pContourStartPointsdraw;
for (size_t i = 0; i < pNumContours; i++,startFind_ptr++)
{
int points_per_contour = pNumContourPoints[i];
double area = polyArea(start_contour,points_per_contour*2);
if(area < min_textureless_area)
{
start_contour = start_contour + points_per_contour*2;
continue;
}
*(draw_ptr) = *(startFind_ptr);
pNumDrawContourPoints[n_TL] = pNumContourPoints[i];
memcpy(start_contour_dPoint,start_contour,points_per_contour*2*sizeof(uint32_t));
start_contour_dPoint = start_contour_dPoint + points_per_contour*2;
start_contour = start_contour + points_per_contour*2;
n_TL++;
draw_ptr++;
}
uint32_t* holeflag = (uint32_t*)malloc(pNumContours*sizeof(uint32_t));
memset(holeflag,0,pNumContours*sizeof(uint32_t));
uint32_t bufferSize = 0;
start_contour_dPoint = dPointBuffer;
draw_ptr = pContourStartPointsdraw;
for(int i = 0; i < n_TL; i++)
{
int points_per_contour = pNumDrawContourPoints[i];
bufferSize = points_per_contour*2*sizeof(uint32_t);
fcvDrawContouru8(dist_fcv,
dist_8u.cols,
dist_8u.rows,
dist_8u.cols,
1,
holeflag,
&pNumDrawContourPoints[i],
(const uint32_t ** __restrict)(draw_ptr),
bufferSize,
start_contour_dPoint,
hierarchy,
1,1,i+1,0)
start_contour_dPoint = start_contour_dPoint + points_per_contour*2;
draw_ptr++;
}
free(pPointBuffer);
fcvFindContoursDelete(cHandle);
fcvMemFree(pContourStartPointsfind);

RGB to x264 : Strange color render

i'm trying to make a video from an OpenGl context.
I'm Using glReadPixel, to be sure RGB buffer data is Ok i save it into a bmp file, wich i can read correctly.
My .h264 video is encoded but there are some artefact and i don't understand why.
I tried a lot of different parameters for the x264_param_t but anything better !
Bitmap saved (OpenGL real data) : Bitmap from OpenGl (1mo)
Raw h264 with error : Raw h264 video (1mo)
OpenGl ReadPixel :
int nSize = ClientHeight * ClientWidth * 3;
GLubyte *inBuff = new GLubyte[nSize];
glReadBuffer(GL_FRONT);
glReadPixels(0, 0, ldwidth, ldheight, GL_BGR, GL_UNSIGNED_BYTE, inBuff);
The params define :
x264_param_default(&mX264_param_t);
x264_param_default_preset(&mX264_param_t, "placebo", "film");
mX264_param_t.i_csp = X264_CSP_BGR;
mX264_param_t.i_threads = 6;
mX264_param_t.i_width = mWidth;
mX264_param_t.i_height = mHeight;
mX264_param_t.i_fps_num = mFps;
mX264_param_t.i_fps_den = 1;
// Intra refres:
mX264_param_t.i_keyint_max = mFps;
mX264_param_t.b_intra_refresh = 1;
//Rate control:
mX264_param_t.rc.i_rc_method = X264_RC_CRF;
mX264_param_t.rc.f_rf_constant = 25;
mX264_param_t.rc.f_rf_constant_max = 35;
int bps = 5000;
mX264_param_t.rc.i_bitrate = bps;
mX264_param_t.rc.i_vbv_max_bitrate = bps;
mX264_param_t.i_bframe = 2;
mX264_param_t.i_keyint_min = mFps / 4;
//For streaming:
mX264_param_t.b_repeat_headers = 1;
mX264_param_t.b_annexb = 1;
mX264_param_t.i_log_level = X264_LOG_DEBUG;
x264_param_apply_profile(&mX264_param_t, "baseline");
mpEncoder = x264_encoder_open(&mX264_param_t);
x264_encoder_parameters(mpEncoder, &mX264_param_t);
mpPictureOut = new x264_picture_t();
mpPictureIn = new x264_picture_t();
x264_picture_alloc(mpPictureIn, X264_CSP_BGR | X264_CSP_VFLIP, mWidth, mHeight);
Then the encoding loop :
mpPictureIn->img.i_csp = X264_CSP_BGR;
mpPictureIn->img.i_plane = 1;
mpPictureIn->img.i_stride[0] = 3 * mWidth;
mpPictureIn->img.plane[0] = rgbframe;
mpPictureIn->i_pts = mFrameCount;
mpPictureIn->i_type = X264_TYPE_AUTO;
mpPictureOut->i_pts = mFrameCount;
int i_nals;
x264_nal_t* nals;
int frame_size = x264_encoder_encode(mpEncoder, &nals, &i_nals, mpPictureIn, mpPictureOut);
if(frame_size > 0)
{
mpFileOut->write_frame(nals[0].p_payload, frame_size, mpPictureOut);
mFrameCount++;
}
The write frame :
int TVideoFileWriter::write_frame(uint8_t *p_nalu, int i_size, x264_picture_t *p_picture)
{
if(fwrite(p_nalu, i_size, 1, mFileHandle))
return i_size;
return -1;
}
You opened your output file in text mode (and not binary mode) and so all 0x0A bytes where replaced with 0x0D 0x0A bytes.
Here is your output with this replace reverted: out_fixed.h264
And it plays fine.