I'm having a segmentation fault on png_read_image() and I can't figure out why.
Here's the code:
/*
Initializing pngReadStruct & pngInfoStruct...
*/
// Getting image's width & height
png_uint_32 imgWidth = png_get_image_width(pngReadStruct, pngInfoStruct);
png_uint_32 imgHeight = png_get_image_height(pngReadStruct, pngInfoStruct);
// Getting bits per channel (not per pixel)
png_uint_32 bitDepth = png_get_bit_depth(pngReadStruct, pngInfoStruct);
// Getting number of channels
png_uint_32 channels = png_get_channels(pngReadStruct, pngInfoStruct);
// Getting color type (RGB, RGBA, luminance, alpha, palette, etc)
png_uint_32 colorType = png_get_color_type(pngReadStruct, pngInfoStruct);
// Refining color type (if colored or grayscale)
switch (colorType) {
case PNG_COLOR_TYPE_PALETTE:
png_set_palette_to_rgb(pngReadStruct);
// If RBG image, setting channel number to 3
channels = 3;
break;
case PNG_COLOR_TYPE_GRAY:
if (bitDepth < 8)
png_set_expand_gray_1_2_4_to_8(pngReadStruct);
// Updating bitdepth info
bitDepth = 8;
break;
default:
break;
}
// Adding full alpha channel to the image if it possesses transparency
if (png_get_valid(pngReadStruct, pngInfoStruct, PNG_INFO_tRNS)) {
png_set_tRNS_to_alpha(pngReadStruct);
channels += 1;
}
// Defining an array to contain image's rows of pixels
std::vector<png_bytep> rowPtrs(imgHeight);
// Defining an array to contain image's pixels (data's type is 'std::unique_ptr<char[]>')
data = std::make_unique<char[]>(imgWidth * imgHeight * bitDepth * channels / 8);
const unsigned long int rowLength = imgWidth * bitDepth * channels / 8;
// Adding every pixel into previously allocated rows
for (unsigned int i = 0; i < imgHeight; ++i) {
// Preparing the rows to handle image's data
rowPtrs[i] = (png_bytep)&data + ((imgHeight - i - 1) * rowLength);
}
// Recovering image data
png_read_image(pngReadStruct, rowPtrs.data()); // /!\ Segfault here
png_destroy_read_struct(&pngReadStruct, static_cast<png_infopp>(0), static_cast<png_infopp>(0));
Every characteristic taken from the file seems fine to me, and it worked without error just a while ago; it probably is a stupid error I made while refactoring.
Thanks for the help, feel free to ask anything else I'd have missed & sorry for the long code!
Related
I have input from captured camera frame as CMSampleBufferRef and I need to get the raw pixels preferably in C type uint8_t[].
I also need to find the color scheme of the input image.
I know how to convert CMSampleBufferRef to UIImage and then to NSData with png format but I dont know how to get the raw pixels from there. Perhaps I could get it already from CMSampleBufferRef/CIImage`?
This code shows the need and the missing bits.
Any thoughts where to start?
int convertCMSampleBufferToPixelArray (CMSampleBufferRef sampleBuffer)
{
// inputs
CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
CIImage *ciImage = [CIImage imageWithCVPixelBuffer:imageBuffer];
CIContext *imgContext = [CIContext new];
CGImageRef cgImage = [imgContext createCGImage:ciImage fromRect:ciImage.extent];
UIImage *uiImage = [UIImage imageWithCGImage:cgImage];
NSData *nsData = UIImagePNGRepresentation(uiImage);
// Need to fill this gap
uint8_t* data = XXXXXXXXXXXXXXXX;
ImageFormat format = XXXXXXXXXXXXXXXX; // one of: GRAY8, RGB_888, YV12, BGRA_8888, ARGB_8888
// sample showing expected data values
// this routine converts the image data to gray
//
int width = uiImage.size.width;
int height = uiImage.size.height;
const int size = width * height;
std::unique_ptr<uint8_t[]> new_data(new uint8_t[size]);
for (int i = 0; i < size; ++i) {
new_data[i] = uint8_t(data[i * 3] * 0.299f + data[i * 3 + 1] * 0.587f +
data[i * 3 + 2] * 0.114f + 0.5f);
}
return 1;
}
Some pointers you can use to search for more info. It's nicely documented and you shouldn't have an issue.
int convertCMSampleBufferToPixelArray (CMSampleBufferRef sampleBuffer) {
CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
if (imageBuffer == NULL) {
return -1;
}
// Get address of the image buffer
CVPixelBufferLockBaseAddress(imageBuffer, 0);
uint8_t* data = CVPixelBufferGetBaseAddress(imageBuffer);
// Get size
size_t width = CVPixelBufferGetWidth(imageBuffer);
size_t height = CVPixelBufferGetHeight(imageBuffer);
// Get bytes per row
size_t bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer);
// At `data` you have a bytesPerRow * height bytes of the image data
// To get pixel info you can call CVPixelBufferGetPixelFormatType, ...
// you can call CVImageBufferGetColorSpace and inspect it, ...
// When you're done, unlock the base address
CVPixelBufferUnlockBaseAddress(imageBuffer, 0);
return 0;
}
There're couple of things you should be aware of.
First one is that it can be planar. Check the CVPixelBufferIsPlanar, CVPixelBufferGetPlaneCount, CVPixelBufferGetBytesPerRowOfPlane, etc.
Second one is that you have to calculate pixel size based on CVPixelBufferGetPixelFormatType. Something like:
CVPixelBufferGetPixelFormatType(imageBuffer)
size_t pixelSize;
switch (pixelFormat) {
case kCVPixelFormatType_32BGRA:
case kCVPixelFormatType_32ARGB:
case kCVPixelFormatType_32ABGR:
case kCVPixelFormatType_32RGBA:
pixelSize = 4;
break;
// + other cases
}
Let's say that the buffer is not planar and:
CVPixelBufferGetWidth returns 200 (pixels)
Your pixelSize is 4 (calcuated bytes per row is 200 * 4 = 800)
CVPixelBufferGetBytesPerRow can return anything >= 800
In other words, the pointer you have is not a pointer to a contiguous buffer. If you need row data you have to do something like this:
uint8_t* data = CVPixelBufferGetBaseAddress(imageBuffer);
// Get size
size_t width = CVPixelBufferGetWidth(imageBuffer);
size_t height = CVPixelBufferGetHeight(imageBuffer);
size_t pixelSize = 4; // Let's pretend it's calculated pixel size
size_t realRowSize = width * pixelSize;
size_t bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer);
for (int row = 0 ; row < height ; row++) {
// bytesPerRow acts like an offset where the next row starts
// bytesPerRow can be >= realRowSize
uint8_t *rowData = data + row * bytesPerRow;
// realRowSize = how many bytes are available for this row
// copy them somewhere
}
You have to allocate a buffer and copy these row data there if you'd like to have contiguous buffer. How many bytes to allocate? CVPixelBufferGetDataSize.
I need to create a bitmap from an array of pixels for a raycaster I'm working on in Direct2D. However, I'm having trouble understanding how to use the CreateBitmap function. Specifically, I'm not sure what the srcData parameter is supposed to be. I'm pretty sure/hoping it's a pointer to an array of pixels, but I'm not sure how to set up that array. What kind of array is it supposed to be? What data type? Etc.
Here's what I've tried:
int width = 400, height = 400;
D2D1::ColorF * arr = (D2D1::ColorF*)calloc(width * height * 4, sizeof(D2D1::ColorF));
for (int i = 0; i < width * height * 4; i++) { arr[i] = D2D1::ColorF(0.0f, 1.0f, 0.0f); }
// Create the bitmap and draw it on the screen
ID2D1Bitmap * bmp;
HRESULT hr;
hr = renderTarget->CreateBitmap(
D2D1::SizeU(width, height),
arr,
width * sizeof(int) * 4,
D2D1::BitmapProperties(),
&bmp);
if (hr != S_OK) { return; } // I've tested and found that hr does not equal S_OK
// Draw the bitmap...
What should the second and third lines look like? Is there anything else I'm doing incorrectly?
Syntax:
HRESULT CreateBitmap(
D2D1_SIZE_U size,
const void *srcData,
UINT32 pitch,
const D2D1_BITMAP_PROPERTIES & bitmapProperties,
ID2D1Bitmap **bitmap
);
Your code:
hr = renderTarget->CreateBitmap(
D2D1::SizeU(width, height),
arr, // <<--- Wrong, see (a) below
width * sizeof(int) * 4, // <<--- Close but wrong, see (b) below
D2D1::BitmapProperties(), // <<--- Wrong, see (c) below
&bmp);
(a) - you are supposed to provide an array of pixel data here, where the format depends on format of the bitmap. Note that this is optional an d you can create a bitmap without initialization. The pixels are not D2D1::ColorF exactly. They could be 4 byte RGBA data if you request respective bitmap format, see (c) below.
(b) - this is distance between rows in bytes, if your pixels are supposed to be 32-bit values you would normally want Width * 4 here
(c) - this requests DXGI_FORMAT_UNKNOWN D2D1_ALPHA_MODE_UNKNOWN and results in bitmap creation error. You need a real format here such as DXGI_FORMAT_B8G8R8A8_UNORM (see Pixel Formats and also Supported Pixel Formats and Alpha Modes)
The first link above shows how exactly bytes in memory map to pixel colors, and you are supposed to prepare your data respectively.
UPD
With DXGI_FORMAT_B8G8R8A8_UNORM your initialization structure is this:
UINT8* Data = malloc(Height * Width * 4);
for(UINT Y = 0; Y < Height; Y++)
for(UINT X = 0; X < Width; X++)
{
UINT8* PixelData = Data + ((Y * Width) + X) * 4;
PixelData[0] = unsigned integer blue in range 0..255;
PixelData[1] = unsigned integer red in range 0..255;
PixelData[2] = unsigned integer green in range 0..255;
PixelData[3] = 255;
}
Has anyone ever integrated FreeType with DirectX 11 for font rendering? The only article I seem to find is DirectX 11 Font Rendering. I can't seem to match the correct DXGI_FORMAT for rendering the grayscale bitmap that FreeType creates for a glyph.
There's three ways to handle greyscale textures in Direct3D 11:
Option (1): You can use an RGB format and replicate the channels. For example, you'd use DXGI_R8G8B8A8_UNORM and set R,G,B to the single monochrome channel and the A to all opaque (0xFF). You can handle Monochrome + Alpha (2 channel) data the same way.
This conversion is supported when loading .DDS luminance formats (D3DFMT_L8, D3DFMT_L8A8) by DirectXTex library and the texconv command-line tool with the -xlum switch.
This makes the texture up to 4 times larger in memory, but easily integrates using standard shaders.
Option (2): You keep the monochrome texture as a single channel using DXGI_FORMAT_R8_UNORM as your format. You then render using a custom shader which replicates the red channel to RGB at runtime.
This is in fact what the tutorial blog post you linked to is doing:
///////// PIXEL SHADER
float4 main(float2 uv : TEXCOORD0) : SV_Target0
{
return float4(Decal.Sample(Bilinear, uv).rrr, 1.f);
}
For Monochrome + Alpha (2-channel) you'd use DXGI_FORMAT_R8G8_UNORM and then your custom shader would use .rrrg as the swizzle.
Option (3): You can compress the monochrome data to the DXGI_FORMAT_BC2 format using a custom encoder. This is implemented in DirectX Tool Kit's MakeSpriteFont tool when using /TextureFormat:CompressedMono
// CompressBlock (16 pixels (4x4 block) stored as 16 bytes)
long alphaBits = 0;
int rgbBits = 0;
int pixelCount = 0;
for (int y = 0; y < 4; y++)
{
for (int x = 0; x < 4; x++)
{
long alpha;
int rgb;
// This is the single monochrome channel
int value = bitmapData[blockX + x, blockY + y];
if (options.NoPremultiply)
{
// If we are not premultiplied, RGB is always white and we have 4 bit alpha.
alpha = value >> 4;
rgb = 0;
}
else
{
// For premultiplied encoding, quantize the source value to 2 bit precision.
if (value < 256 / 6)
{
alpha = 0;
rgb = 1;
}
else if (value < 256 / 2)
{
alpha = 5;
rgb = 3;
}
else if (value < 256 * 5 / 6)
{
alpha = 10;
rgb = 2;
}
else
{
alpha = 15;
rgb = 0;
}
}
// Add this pixel to the alpha and RGB bit masks.
alphaBits |= alpha << (pixelCount * 4);
rgbBits |= rgb << (pixelCount * 2);
pixelCount++;
}
}
// The resulting BC2 block is:
// uint64_t = alphaBits
// uint16_t = 0xFFFF
// uint16_t = 0x0
// uint32_t = rgbBits
The resulting texture is then rendered using a standard alpha-blending shader. Since it uses 1 byte per pixel, this is effectively the same size as if you were using DXGI_FORMAT_R8_UNORM.
This technique does not work for 2-channel data, but works great for alpha-blended monochrome images like font glyphs.
I use the following c++ code to read out the depth information from the kinect:
BYTE * rgbrun = m_depthRGBX;
const USHORT * pBufferRun = (const USHORT *)LockedRect.pBits;
// end pixel is start + width*height - 1
const USHORT * pBufferEnd = pBufferRun + (Width * Height);
// process data for display in main window.
while ( pBufferRun < pBufferEnd )
{
// discard the portion of the depth that contains only the player index
USHORT depth = NuiDepthPixelToDepth(*pBufferRun);
BYTE intensity = static_cast<BYTE>(depth % 256);
// Write out blue byte
*(rgbrun++) = intensity;
// Write out green byte
*(rgbrun++) = intensity;
// Write out red byte
*(rgbrun++) = intensity;
++rgbrun;
++pBufferRun;
}
What I'd like to know is, what is the easiest way to implement frame flipping (horizontal & vertical)? I couldn't find any function in the kinect SDK, but maybe I missed it?
EDIT1 I'd like to not having to use any external libraries, so any solutions that explain the depth data layout and how to invert rows / columns, is highly appreciated.
So, you're using a standard 16bpp single channel depth map with player data. This is a nice easy format to work with. An image buffer is arranged row-wise, and each pixel in the image data has the bottom 3 bits set to the player ID and the top 13 bits set to depth data.
Here's a quick'n'dirty way to read each row in reverse, and write it out to an RGBWhatever image with a simple depth visualisation that's a little nicer to look at that the wrapping output you currently use.
BYTE * rgbrun = m_depthRGBX;
const USHORT * pBufferRun = (const USHORT *)LockedRect.pBits;
for (unsigned int y = 0; y < Height; y++)
{
for (unsigned int x = 0; x < Width; x++)
{
// shift off the player bits
USHORT depthIn = pBufferRun[(y * Width) + (Width - 1 - x)] >> 3;
// valid depth is (generally) in the range 0 to 4095.
// here's a simple visualisation to do a greyscale mapping, with white
// being closest. Set 0 (invalid pixel) to black.
BYTE intensity =
depthIn == 0 || depthIn > 4095 ?
0 : 255 - (BYTE)(((float)depthIn / 4095.0f) * 255.0f);
*(rgbrun++) = intensity;
*(rgbrun++) = intensity;
*(rgbrun++) = intensity;
++rgbrun;
}
}
Code untested, E&OE, etc ;-)
It is possible to parallelise the outer loop, if instead of using a single rgbrun pointer you get a pointer to the beginning of the current row and write the output to that instead.
I'm trying to get the Kinect depth camera pixels to overlay onto the RGB camera. I am using the C++ Kinect 1.0 SDK with an Xbox Kinect, OpenCV and trying to use the new "NuiImageGetColorPixelCoordinateFrameFromDepthPixelFrameAtResolution" method.
I have watched the image render itself in slow motion and looks as if pixels are being drawn multiple times in the one frame. It first draws itself from the top and left borders, then it gets to a point (you can see a 45 degree angle in there) where it starts drawing weird.
I have been trying to base my code off of the C# code written by Adam Smith at the MSDN forums but no dice. I have stripped out the overlay stuff and just want to draw the depth normalized depth pixels where it "should" be in the RGB image.
The image on the left is what I'm getting when trying to fit the depth image to RGB space, and the image on the right is the "raw" depth image as I like to see it. I was hoping this my method would create a similar image to the one on the right with slight distortions.
This is the code and object definitions that I have at the moment:
// From initialization
INuiSensor *m_pNuiInstance;
NUI_IMAGE_RESOLUTION m_nuiResolution = NUI_IMAGE_RESOLUTION_640x480;
HANDLE m_pDepthStreamHandle;
IplImage *m_pIplDepthFrame;
IplImage *m_pIplFittedDepthFrame;
m_pIplDepthFrame = cvCreateImage(cvSize(640, 480), 8, 1);
m_pIplFittedDepthFrame = cvCreateImage(cvSize(640, 480), 8, 1);
// Method
IplImage *Kinect::GetRGBFittedDepthFrame() {
static long *pMappedBits = NULL;
if (!pMappedBits) {
pMappedBits = new long[640*480*2];
}
NUI_IMAGE_FRAME pNuiFrame;
NUI_LOCKED_RECT lockedRect;
HRESULT hr = m_pNuiInstance->NuiImageStreamGetNextFrame(m_pDepthStreamHandle, 0, &pNuiFrame);
if (FAILED(hr)) {
// return the older frame
return m_pIplFittedDepthFrame;
}
bool hasPlayerData = HasSkeletalEngine(m_pNuiInstance);
INuiFrameTexture *pTexture = pNuiFrame.pFrameTexture;
pTexture->LockRect(0, &lockedRect, NULL, 0);
if (lockedRect.Pitch != 0) {
cvZero(m_pIplFittedDepthFrame);
hr = m_pNuiInstance->NuiImageGetColorPixelCoordinateFrameFromDepthPixelFrameAtResolution(
m_nuiResolution,
NUI_IMAGE_RESOLUTION_640x480,
640 * 480, /* size is previous */ (unsigned short*) lockedRect.pBits,
(640 * 480) * 2, /* size is previous */ pMappedBits);
if (FAILED(hr)) {
return m_pIplFittedDepthFrame;
}
for (int i = 0; i < lockedRect.size; i++) {
unsigned char* pBuf = (unsigned char*) lockedRect.pBits + i;
unsigned short* pBufS = (unsigned short*) pBuf;
unsigned short depth = hasPlayerData ? ((*pBufS) & 0xfff8) >> 3 : ((*pBufS) & 0xffff);
unsigned char intensity = depth > 0 ? 255 - (unsigned char) (256 * depth / 0x0fff) : 0;
long
x = pMappedBits[i], // tried with *(pMappedBits + (i * 2)),
y = pMappedBits[i + 1]; // tried with *(pMappedBits + (i * 2) + 1);
if (x >= 0 && x < m_pIplFittedDepthFrame->width && y >= 0 && y < m_pIplFittedDepthFrame->height) {
m_pIplFittedDepthFrame->imageData[x + y * m_pIplFittedDepthFrame->widthStep] = intensity;
}
}
}
pTexture->UnlockRect(0);
m_pNuiInstance->NuiImageStreamReleaseFrame(m_pDepthStreamHandle, &pNuiFrame);
return(m_pIplFittedDepthFrame);
}
Thanks
I have found that the problem was that the loop,
for (int i = 0; i < lockedRect.size; i++) {
// code
}
was iterating on a per-byte basis, not on a per-short (2 bytes) basis. Since lockedRect.size returns the number of bytes the fix was simply changing the increment to i += 2, even better would be changing it to sizeof(short), like so,
for (int i = 0; i < lockedRect.size; i += sizeof(short)) {
// code
}