C/CUDA: Only every fourth element in CudaArray can be indexed

C/CUDA: Only every fourth element in CudaArray can be indexed - c++

This is my first post, so I am thrilled to get some new insights and enlarge my knowledge. Currently I am working on a C-project where a binary raw file with 3d-data is loaded, processed in CUDA and saved in a new binary raw file.
This is based on the simpleTexture3D project from CUDA Samples:
This is my cpp
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, cuda
#include <vector_types.h>
#include <driver_functions.h>
#include <cuda_runtime.h>
// CUDA utilities and system includes
#include <helper_cuda.h>
#include <helper_functions.h>
#include <vector_types.h>
typedef unsigned int uint;
typedef unsigned char uchar;
const char *sSDKsample = "simpleTexture3D";
const char *volumeFilename = "Bucky.raw";
const cudaExtent volumeSize = make_cudaExtent(32, 32, 32);
const uint width = 64, height = 64, depth=64;
//const char *volumeFilename = "TestOCT.raw";
//const cudaExtent volumeSize = make_cudaExtent(1024, 512, 512);
//
//const uint width = 1024, height = 512, depth=512;
const dim3 blockSize(8, 8, 8);
const dim3 gridSize(width / blockSize.x, height / blockSize.y, depth / blockSize.z);
uint *d_output = NULL;
int *pArgc = NULL;
char **pArgv = NULL;
extern "C" void cleanup();
extern "C" void initCuda(const uchar *h_volume, cudaExtent volumeSize);
extern "C" void render_kernel(dim3 gridSize, dim3 blockSize, uint *d_output, uint imageW, uint imageH, uint imageD);
void loadVolumeData(char *exec_path);
// render image using CUDA
void render()
{
// call CUDA kernel
render_kernel(gridSize, blockSize, d_output, width, height, depth);
getLastCudaError("render_kernel failed");
}
void cleanup()
{
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
checkCudaErrors(cudaDeviceReset());
}
// Load raw data from disk
uchar *loadRawFile(const char *filename, size_t size)
{
FILE *fp = fopen(filename, "rb");
if (!fp)
{
fprintf(stderr, "Error opening file '%s'\n", filename);
return 0;
}
uchar *data = (uchar *) malloc(size);
size_t read = fread(data, 1, size, fp);
fclose(fp);
printf("Read '%s', %lu bytes\n", filename, read);
return data;
}
// write raw data to disk
int writeRawFile(const char *filename, uchar *data, size_t size)
{
int returnState=0;
// cut file extension from filename
char *a=strdup(filename); //via strdup you dumb a const char to char, you must free it yourself
int len = strlen(a);
a[len-4] = '\0'; //deletes '.raw'
//printf("%s\n",a);
char b[50];
sprintf(b, "_%dx%dx%d_out.raw", width, height, depth);
//char b[]="_out.raw"; //Add suffix out to filename
char buffer[256]; // <- danger, only storage for 256 characters.
strncpy(buffer, a, sizeof(buffer));
strncat(buffer, b, sizeof(buffer));
free(a);
FILE *fp = fopen(buffer, "wb"); //Open or create file for writing as binary, all existing data is cleared
if (!fp)
{
fprintf(stderr, "Error opening or creating file '%s'\n", buffer);
return 0;
}
size_t write = fwrite(data, 1, size, fp);
fclose(fp);
if (write==size)
{
printf("Wrote %lu bytes to '%s'\n", write, buffer);
return 0;
}
else
{
printf("Error writing data to file '%s'\n", buffer);
return 1;
}
}
// General initialization call for CUDA Device
int chooseCudaDevice(int argc, char **argv)
{
int result = 0;
result = findCudaDevice(argc, (const char **)argv);
return result;
}
void runAutoTest(char *exec_path, char *PathToFile)
{
// set path
char *path;
if (PathToFile == NULL)
{
path = sdkFindFilePath(volumeFilename, exec_path);
}
else
{
path = PathToFile;
}
if (path == NULL)
{
fprintf(stderr, "Error unable to find 3D Volume file: '%s'\n", volumeFilename);
exit(EXIT_FAILURE);
}
// Allocate output memory
checkCudaErrors(cudaMalloc((void **)&d_output, width*height*depth*sizeof(uchar)));
// zero out the output array with cudaMemset
cudaMemset(d_output, 0, width*height*depth*sizeof(uchar));
// render the volumeData
render_kernel(gridSize, blockSize, d_output, width, height, depth);
checkCudaErrors(cudaDeviceSynchronize());
getLastCudaError("render_kernel failed");
uchar *h_output = (uchar*)malloc(width*height*depth);
checkCudaErrors(cudaMemcpy(h_output, d_output, width*height*depth*sizeof(uchar), cudaMemcpyDeviceToHost));
int wState=writeRawFile(path,h_output,width*height*depth);
checkCudaErrors(cudaFree(d_output));
free(h_output);
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
//exit(bTestResult ? EXIT_SUCCESS : EXIT_FAILURE);
}
void loadVolumeData(char *exec_path, char *PathToFile)
{
char *path;
// load volume data
if (PathToFile == NULL)
{
path = sdkFindFilePath(volumeFilename, exec_path);
}
else
{
path = PathToFile;
}
if (path == NULL)
{
fprintf(stderr, "Error unable to find 3D Volume file: '%s'\n", volumeFilename);
exit(EXIT_FAILURE);
}
size_t size = volumeSize.width*volumeSize.height*volumeSize.depth;
uchar *h_volume = loadRawFile(path, size);
//int wState=writeRawFile(path,h_volume,size);
initCuda(h_volume, volumeSize);
free(h_volume);
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
pArgc = &argc;
pArgv = argv;
char *image_file = NULL;
printf("%s Starting...\n\n", sSDKsample);
if (checkCmdLineFlag(argc, (const char **)argv, "file")) //Note cmd line argument is -file "PathToFile/File.raw"
{ // for example -file "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v7.0\2_Graphics\simpleTexture3D_FanBeamCorr\data\TestOCT_Kopie.raw"
getCmdLineArgumentString(argc, (const char **)argv, "file", &image_file);
}
if (image_file)
{
chooseCudaDevice(argc, argv);
loadVolumeData(argv[0],image_file);
runAutoTest(argv[0],image_file);
}
else
{
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
chooseCudaDevice(argc, argv);
loadVolumeData(argv[0],NULL);
runAutoTest(argv[0],NULL);
}
printf("I am finished...\n"
"Can I get some ice cream please\n");
exit(EXIT_SUCCESS);
}
And this is my .cu
#ifndef _SIMPLETEXTURE3D_KERNEL_CU_
#define _SIMPLETEXTURE3D_KERNEL_CU_
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <helper_cuda.h>
#include <helper_math.h>
typedef unsigned int uint;
typedef unsigned char uchar;
texture<uchar, 3, cudaReadModeNormalizedFloat> tex; // 3D texture
cudaArray *d_volumeArray = 0;
__global__ void
d_render(uint *d_output, uint imageW, uint imageH, uint imageD)
{
uint x = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
uint y = __umul24(blockIdx.y, blockDim.y) + threadIdx.y;
uint z = __umul24(blockIdx.z, blockDim.z) + threadIdx.z;
// float u = x / (float) imageW;
// float v = y / (float) imageH;
//float w = z / (float) imageD;
// // read from 3D texture
// float voxel = tex3D(tex, u, v, w);
uint ps=__umul24(imageW,imageH);
if ((x < imageW) && (y < imageH) && (z < imageD))
{
// write output color
uint i = __umul24(z,ps) +__umul24(y, imageW) + x;
d_output[1] = (uchar) 255;//+0*voxel*255;
}
}
extern "C"
void initCuda(const uchar *h_volume, cudaExtent volumeSize)
{
// create 3D array
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar>();
checkCudaErrors(cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize));
// copy data to 3D array
cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr = make_cudaPitchedPtr((void *)h_volume, volumeSize.width*sizeof(uchar), volumeSize.width, volumeSize.height);
copyParams.dstArray = d_volumeArray;
copyParams.extent = volumeSize;
copyParams.kind = cudaMemcpyHostToDevice;
checkCudaErrors(cudaMemcpy3D(&copyParams));
// set texture parameters
tex.normalized = true; // access with normalized texture coordinates
tex.filterMode = cudaFilterModeLinear; // linear interpolation
tex.addressMode[0] = cudaAddressModeBorder; // wrap texture coordinates
tex.addressMode[1] = cudaAddressModeBorder;
tex.addressMode[2] = cudaAddressModeBorder;
// bind array to 3D texture
checkCudaErrors(cudaBindTextureToArray(tex, d_volumeArray, channelDesc));
}
extern "C"
void render_kernel(dim3 gridSize, dim3 blockSize, uint *d_output, uint imageW, uint imageH, uint imageD)
{
d_render<<<gridSize, blockSize>>>(d_output, imageW, imageH, imageD);
}
#endif // #ifndef _SIMPLETEXTURE3D_KERNEL_CU_
As you can see, currently, I set all values to zero except the index = 1, which is set to 255. Yet when I now open the image stack in Fiji, I see that the fourth pixel on the first slide is white. If I use index=i instead, I get white vertical lines across the image stack periodically every four columns. Generally spoken, it seems that only every fourth element is beeing indexed in the CudaArray. So I am wondering if there is somekind of error here resulting from sizeof(uchar)=1 and sizeof(uint)=4. There would obviously be the factor 4 :)
I am eager to here from you experts
Cheers Mika

I figured it out by myself. The kernel works with uint* d_output while the copy to the host is written into a uchar* h_output
uchar *h_output = (uchar*)malloc(width*height*depth);
checkCudaErrors(cudaMemcpy(h_output, d_output, width*height*depth*sizeof(uchar), cudaMemcpyDeviceToHost));
This led to this strange behavior

Related

How to create a bitmap/image from Scan0?

I'm having trouble porting the following C# code to C++:
protected override void OnPaint(CefBrowser browser, CefPaintElementType type, CefRectangle[] dirtyRects
, System.IntPtr buffer, int width, int height)
{
if (isPainting == true)
return;
isPainting = true;
// Save the provided buffer (a bitmap image) as a PNG.
using (System.Drawing.Bitmap bitmap = new System.Drawing.Bitmap(width, height, width * 4, System.Drawing.Imaging.PixelFormat.Format32bppRgb, buffer))
{
bitmap.Save(#"LastOnPaint.png", System.Drawing.Imaging.ImageFormat.Png);
} // End Using bitmap
}
What it does:
Create an image from a WebSite/SVG as rendered by the latest version of Chromium embedded and save it as a file.
So this is the corresponding render-handler in C++:
void RenderHandler::OnPaint(
CefRefPtr<CefBrowser> browser,
CefRenderHandler::PaintElementType type,
const CefRenderHandler::RectList& dirtyRects,
const void* buffer, int width, int height
) {
// size_t len = sizeof(buffer) / sizeof(void*);
// printf("buffer length: %zu\n", len); // 1...
// Array size is probably: width*height * 4;
}
So I was looking into what C# does in the bitmap-constructor, which is the following:
public Bitmap(int width, int height, int stride, PixelFormat format, IntPtr scan0)
{
IntPtr bitmap = IntPtr.Zero;
int status = Gdip.GdipCreateBitmapFromScan0(width, height, stride, unchecked((int)format), new HandleRef(null, scan0), out bitmap);
Gdip.CheckStatus(status);
SetNativeImage(bitmap);
}
internal void SetNativeImage(IntPtr handle) {
if (handle == IntPtr.Zero)
throw new ArgumentException(SR.GetString(SR.NativeHandle0), "handle");
nativeImage = handle;
}
Which traces to
internal const string Gdiplus = "gdiplus.dll";
[DllImport(ExternDll.Gdiplus, SetLastError=true, ExactSpelling=true, CharSet=System.Runtime.InteropServices.CharSet.Unicode)] // 3 = Unicode
[ResourceExposure(ResourceScope.Machine)]
internal static extern int GdipCreateBitmapFromScan0(int width, int height, int stride, int format, HandleRef scan0, out IntPtr bitmap);
So I thought I could just call GdipCreateBitmapFromScan0 in gdibitmapflat and be almost finished
GpStatus WINGDIPAPI GdipCreateBitmapFromScan0(INT width
, INT height, INT stride, PixelFormat format
, BYTE* scan0, GpBitmap** bitmap)
So I gathered the necessary header-files for GDI, which was a horrible experience
#ifndef __BITMAPHELPER_H__
#define __BITMAPHELPER_H__
// #define WIN32_LEAN_AND_MEAN
#pragma warning(disable:4458)
#include <Windows.h>
#include <ObjIdl.h>
#include <minmax.h>
#include <gdiplus.h>
#include <wingdi.h>
#include <gdiplusbitmap.h>
#include <gdiplusflat.h>
using namespace Gdiplus;
#pragma comment (lib,"gdiplus.lib")
#pragma warning(default:4458)
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cstdint>
#include <cstdbool>
#include <algorithm>
#include <memory>
And thought this would about do it
#include "BitmapHelper.h"
static void Test()
{
GpBitmap *bitmap = NULL;
GdipCreateBitmapFromScan0(100, 100, 0, PixelFormat32bppARGB, NULL, &bitmap); // create a bitmap object with specified width/height/color
// GpGraphics *graph;
// Image * syntaxTest = NULL;
//syntaxTest->FromFile(TEXT("d:\\abc.jpg"), true); // create an image object
// Bitmap::FromBITMAPINFO
// GpImage *image = NULL;
// Gdiplus::Image()
Bitmap *bmp = NULL;
// GdipLoadImageFromFile(TEXT("d:\\abc.jpg"), &image); // create an image object
// GdipGetImageGraphicsContext(bitmap, &graph); // create a graphic object via bitmap object
// GdipDrawImageI(graph, image, 100, 100); // draw image to this graphic object, it can be done
}
However, it turns out the compiler doesn't know GdipCreateBitmapFromScan0, although it's definitely inside #include <gdiplusflat.h>...
How to create a bitmap/image from Scan0 ?
Note:
While I am at it, I don't want to resort to C++.NET, and ideally not to the WinAPI either; because i'd like it to work on Linux too. And not to a monstrous dependency like SDL either.
So far, it looks like my possible alternatives are using this code:
https://codereview.stackexchange.com/questions/196084/read-and-write-bmp-file-in-c
which means I have to create the bitmap header myselfs.
Or I could use some code from ImageIO.
I can't quite belive that creating a simple bitmap on even a single operating-system is that hard...
Is there really no better (and portable) way to create a simple bitmap from a trivial array of pixel colors ?
And why does the compiler not find GdipCreateBitmapFromScan0 ?
If I had used LoadLibrary and GetProcAddress to invoke it instead of f*ing windows header files, I'd be about finished by now...
And why does #include <gdiplus.h> not include its own dependencies ?

Your looking at the internals of .NET have led you toward using a function that's not part of the documented, public interface of GDI+. It looks to me like that's the real cause of most of your problems.
What I think you probably want to do is start by creating a GdiPlus::Bitmap object from your pixels. It has a constructor that looks like it'll directly accept your data.
Once you've created the Bitmap object, you call its Save member function. Bitmap is publicly derived from Image, so you're basically dealing with the normal Image::Save to generate a PNG.
If you want to eliminate the dependency on Windows code, you might consider using (for one obvious possibility) libpng instead. This gives you quite a lot more control over the process, at the expense of being quite a bit more work to use (depending on what you want to do, probably on the order of a half dozen to a dozen lines of code rather than one or two).

So, after having done this in both GDI+ and raw C, I can safely say that it's actually faster, and not to mention considerably less problematic and less google-intensive just doing the image-handling without GDI/GDI+. Whoever implemented GDI+ has a major brain damage.
Since I haven't yet handled transparency properly, and not yet incorporated lodepng, I've added GDI+ as an optional extra option, for the time being.
// A program to read, write, and crop BMP image files.
#include "Bmp.h"
// Make a copy of a string on the heap.
// - Postcondition: the caller is responsible to free
// the memory for the string.
char *_string_duplicate(const char *string)
{
char *copy = (char*)malloc(sizeof(*copy) * (strlen(string) + 1));
if (copy == NULL)
{
// return "Not enough memory for error message";
const char* error_message = "Not enough memory for error message";
size_t len = strlen(error_message);
char* error = (char*)malloc(len * sizeof(char) + 1);
strcpy(error, error_message);
return error;
}
strcpy(copy, string);
return copy;
}
// Check condition and set error message.
bool _check(bool condition, char **error, const char *error_message)
{
bool is_valid = true;
if (!condition)
{
is_valid = false;
if (*error == NULL) // to avoid memory leaks
{
*error = _string_duplicate(error_message);
}
}
return is_valid;
}
// Write an image to an already open file.
// - Postcondition: it is the caller's responsibility to free the memory
// for the error message.
// - Return: true if and only if the operation succeeded.
bool write_bmp(FILE *fp, BMPImage *image, char **error)
{
// Write header
rewind(fp);
size_t num_read = fwrite(&image->header, sizeof(image->header), 1, fp);
if (!_check(num_read == 1, error, "Cannot write image"))
{
return false;
}
// Write image data
num_read = fwrite(image->data, image->header.image_size_bytes, 1, fp);
if (!_check(num_read == 1, error, "Cannot write image"))
{
return false;
}
return true;
}
// Free all memory referred to by the given BMPImage.
void free_bmp(BMPImage *image)
{
free(image->data);
free(image);
}
// Open file. In case of error, print message and exit.
FILE *_open_file(const char *filename, const char *mode)
{
FILE *fp = fopen(filename, mode);
if (fp == NULL)
{
fprintf(stderr, "Could not open file %s\n", filename);
exit(EXIT_FAILURE);
}
return fp;
}
// Close file and release memory.void _clean_up(FILE *fp, BMPImage *image, char **error)
void _clean_up(FILE *fp, BMPImage *image, char **error)
{
if (fp != NULL)
{
fclose(fp);
}
free_bmp(image);
free(*error);
}
// Print error message and clean up resources.
void _handle_error(char **error, FILE *fp, BMPImage *image)
{
fprintf(stderr, "ERROR: %s\n", *error);
_clean_up(fp, image, error);
exit(EXIT_FAILURE);
}
void write_image(const char *filename, BMPImage *image, char **error)
{
FILE *output_ptr = _open_file(filename, "wb");
if (!write_bmp(output_ptr, image, error))
{
_handle_error(error, output_ptr, image);
}
fflush(output_ptr);
fclose(output_ptr);
_clean_up(output_ptr, image, error);
}
// Return the size of an image row in bytes.
// - Precondition: the header must have the width of the image in pixels.
uint32_t computeImageSize(BMPHeader *bmp_header)
{
uint32_t bytes_per_pixel = bmp_header->bits_per_pixel / BITS_PER_BYTE;
uint32_t bytes_per_row_without_padding = bmp_header->width_px * bytes_per_pixel;
uint32_t padding = (4 - (bmp_header->width_px * bytes_per_pixel) % 4) % 4;
uint32_t row_size_bytes = bytes_per_row_without_padding + padding;
return row_size_bytes * bmp_header->height_px;
}
#ifdef USE_GDI
#pragma warning(disable:4189)
int GetEncoderClsid(const WCHAR* format, CLSID* pClsid)
{
UINT num = 0; // number of image encoders
UINT size = 0; // size of the image encoder array in bytes
Gdiplus::ImageCodecInfo* pImageCodecInfo = NULL;
Gdiplus::GetImageEncodersSize(&num, &size);
if (size == 0)
return -1; // Failure
pImageCodecInfo = (Gdiplus::ImageCodecInfo*)(malloc(size));
if (pImageCodecInfo == NULL)
return -1; // Failure
Gdiplus::GetImageEncoders(num, size, pImageCodecInfo);
for (UINT j = 0; j < num; ++j)
{
if (wcscmp(pImageCodecInfo[j].MimeType, format) == 0)
{
*pClsid = pImageCodecInfo[j].Clsid;
free(pImageCodecInfo);
return j; // Success
} // if (wcscmp(pImageCodecInfo[j].MimeType, format) == 0)
} // Next j
free(pImageCodecInfo);
return -1; // Failure
}
// https://github.com/lvandeve/lodepng
static bool notInitialized = true;
void WriteBitmapToFile(const char *filename, int width, int height, const void* buffer)
{
// HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE);
if (notInitialized)
{
// https://learn.microsoft.com/en-us/windows/desktop/api/gdiplusinit/nf-gdiplusinit-gdiplusstartup
Gdiplus::GdiplusStartupInput gdiplusStartupInput;
ULONG_PTR gdiplusToken;
Gdiplus::Status isOk = Gdiplus::GdiplusStartup(&gdiplusToken, &gdiplusStartupInput, NULL);
if (isOk != Gdiplus::Status::Ok)
{
printf("Failed on GdiplusStartup\n");
}
notInitialized = false;
// defer
// GdiplusShutdown(gdiplusToken);
} // End if (notInitialized)
// https://learn.microsoft.com/en-us/windows/desktop/gdiplus/-gdiplus-constant-image-pixel-format-constants
Gdiplus::Bitmap* myBitmap = new Gdiplus::Bitmap(width, height, width*4, PixelFormat32bppARGB, (BYTE*)buffer);
// myBitmap->RotateFlip(Gdiplus::Rotate180FlipY);
CLSID pngClsid;
// int result = GetEncoderClsid(L"image/tiff", &tiffClsid);
int result = GetEncoderClsid(L"image/png", &pngClsid);
printf("End GetEncoderClsid:\n");
if (result == -1)
printf("Error: GetEncoderClsid\n");
// throw std::runtime_error("Bitmap::Save");
// if (Ok != myBitmap->Save(L"D\foobartest.png", &pngClsid)) printf("Error: Bitmap::Save");
// WTF ? I guess a standard C/C++-stream would have been too simple ?
IStream* oStream = nullptr;
if (CreateStreamOnHGlobal(NULL, TRUE, (LPSTREAM*)&oStream) != S_OK)
printf("Error on creating an empty IStream\n");
Gdiplus::EncoderParameters encoderParameters;
encoderParameters.Count = 1;
encoderParameters.Parameter[0].Guid = Gdiplus::EncoderQuality;
encoderParameters.Parameter[0].Type = Gdiplus::EncoderParameterValueTypeLong;
encoderParameters.Parameter[0].NumberOfValues = 1;
ULONG quality = 100;
encoderParameters.Parameter[0].Value = &quality;
// https://learn.microsoft.com/en-us/windows/desktop/api/gdiplusheaders/nf-gdiplusheaders-image-save(inistream_inconstclsid_inconstencoderparameters)
if (Gdiplus::Status::Ok != myBitmap->Save(oStream, &pngClsid, &encoderParameters))
printf("Error: Bitmap::Save\n");
// throw std::runtime_error("Bitmap::Save");
ULARGE_INTEGER ulnSize;
LARGE_INTEGER lnOffset;
lnOffset.QuadPart = 0;
oStream->Seek(lnOffset, STREAM_SEEK_END, &ulnSize);
oStream->Seek(lnOffset, STREAM_SEEK_SET, NULL);
uint8_t *pBuff = new uint8_t[(unsigned int)ulnSize.QuadPart];
ULONG ulBytesRead;
oStream->Read(pBuff, (ULONG)ulnSize.QuadPart, &ulBytesRead);
FILE *output_ptr = _open_file(filename, "wb");
fwrite((void*)pBuff, sizeof(uint8_t), (unsigned int)ulnSize.QuadPart, output_ptr);
fflush(output_ptr);
fclose(output_ptr);
oStream->Release();
delete pBuff;
delete myBitmap;
// https://renenyffenegger.ch/notes/development/Base64/Encoding-and-decoding-base-64-with-cpp
// std::string rotated_string = base64_encode((const unsigned char*)pBuff, ulnSize.QuadPart);
}
#pragma warning(default:4189)
#else
// TODO: PNG-Encoder
// https://github.com/lvandeve/lodepng
// https://lodev.org/lodepng/
BMPImage * CreateBitmapFromScan0(int32_t w, int32_t h, uint8_t* scan0)
{
BMPImage *new_image = (BMPImage *)malloc(sizeof(*new_image));
BMPHeader *header = (BMPHeader *)malloc(sizeof(*header));
new_image->header = *header;
new_image->header.type = MAGIC_VALUE;
new_image->header.bits_per_pixel = BITS_PER_PIXEL;
new_image->header.width_px = w;
new_image->header.height_px = h;
new_image->header.image_size_bytes = computeImageSize(&new_image->header);
new_image->header.size = BMP_HEADER_SIZE + new_image->header.image_size_bytes;
new_image->header.dib_header_size = DIB_HEADER_SIZE;
new_image->header.offset = (uint32_t) sizeof(BMPHeader);
new_image->header.num_planes = 1;
new_image->header.compression = 0;
new_image->header.reserved1 = 0;
new_image->header.reserved2 = 0;
new_image->header.num_colors = 0;
new_image->header.important_colors = 0;
new_image->header.x_resolution_ppm = 3780; // image->header.x_resolution_ppm;
new_image->header.y_resolution_ppm = 3780; // image->header.y_resolution_ppm;
new_image->data = (uint8_t*)malloc(sizeof(*new_image->data) * new_image->header.image_size_bytes);
memcpy(new_image->data, scan0, new_image->header.image_size_bytes);
return new_image;
}
void WriteBitmapToFile(const char *filename, int width, int height, const void* buffer)
{
BMPImage * image = CreateBitmapFromScan0((int32_t)width, (int32_t)height, (uint8_t*)buffer);
char *error = NULL;
write_image(filename, image, &error);
}
#endif
Header:
#ifndef BITMAPLION_BITMAPINFORMATION_H
#define BITMAPLION_BITMAPINFORMATION_H
#ifdef __cplusplus
// #include <iostream>
// #include <fstream>
#include <cstdio>
#include <cstdlib>
#include <cstdint>
#include <cstring>
#else
#include <stdio.h>
#include <stdlib.h> // for malloc
#include <stdint.h>
#include <stdbool.h>
#include <string.h> // for strlen, strcopy
#endif
#ifdef __linux__
//linux specific code goes here
#elif _WIN32
// windows specific code goes here
#pragma warning(disable:4458)
#include <Windows.h>
#include <ObjIdl.h>
#include <minmax.h>
#include <gdiplus.h>
// #include <gdiplusheaders.h>
// #include <wingdi.h>
// #include <gdiplusbitmap.h>
// #include <gdiplusflat.h>
// #include <Gdipluspixelformats.h>
#pragma comment (lib,"gdiplus.lib")
// using namespace Gdiplus;
#pragma warning(default:4458)
#else
#endif
#define BMP_HEADER_SIZE 54
#define DIB_HEADER_SIZE 40
// Correct values for the header
#define MAGIC_VALUE 0x4D42
#define NUM_PLANE 1
#define COMPRESSION 0
#define NUM_COLORS 0
#define IMPORTANT_COLORS 0
#define BITS_PER_BYTE 8
// #define BITS_PER_PIXEL 24
#define BITS_PER_PIXEL 32
#ifdef _MSC_VER
#pragma pack(push) // save the original data alignment
#pragma pack(1) // Set data alignment to 1 byte boundary
#endif
typedef struct
#ifndef _MSC_VER
__attribute__((packed))
#endif
{
uint16_t type; // Magic identifier: 0x4d42
uint32_t size; // File size in bytes
uint16_t reserved1; // Not used
uint16_t reserved2; // Not used
uint32_t offset; // Offset to image data in bytes from beginning of file
uint32_t dib_header_size; // DIB Header size in bytes
int32_t width_px; // Width of the image
int32_t height_px; // Height of image
uint16_t num_planes; // Number of color planes
uint16_t bits_per_pixel; // Bits per pixel
uint32_t compression; // Compression type
uint32_t image_size_bytes; // Image size in bytes
int32_t x_resolution_ppm; // Pixels per meter
int32_t y_resolution_ppm; // Pixels per meter
uint32_t num_colors; // Number of colors
uint32_t important_colors; // Important colors
} BMPHeader;
#ifdef _MSC_VER
#pragma pack(pop) // restore the previous pack setting
#endif
typedef struct {
BMPHeader header;
// unsigned char* data;
// It is more informative and will force a necessary compiler error
// on a rare machine with 16-bit char.
uint8_t* data;
} BMPImage;
// #define USE_GDI true
#ifndef USE_GDI
BMPImage * CreateBitmapFromScan0(int32_t w, int32_t h, uint8_t* scan0);
#endif
void WriteBitmapToFile(const char *filename, int width, int height, const void* buffer);
#endif //BITMAPLION_BITMAPINFORMATION_H

Corrupted heap while display a BMP image on console

I have a exercise. It says, that the C program should be able to read the information of a bitmap file and after that it should display the picture on console.
I have already written a code but when it does not work correctly.
When I debugged the code it looks like the heap is corrupted. I thinks I have a known glitch/mistake in ScanPixelline function.
I don't know how to fix it. Can someone help me to check it?
I am relatively new to C programming.
#include "stdafx.h"
#include <conio.h>
#include <stdio.h>
#include <stdlib.h>
#include "stdint.h"
#include "windows.h"
#pragma pack(1)
struct BMP
{
char Type[2]; //File type. Set to "BM".
int32_t Size; //Size in BYTES of the file.
int16_t Reserved1; //Reserved. Set to zero.
int16_t Reserved2; //Reserved. Set to zero.
int32_t OffSet; //Offset to the data.
int32_t headsize; //Size of rest of header. Set to 40.
int32_t Width; //Width of bitmap in pixels.
int32_t Height; // Height of bitmap in pixels.
int16_t Planes; //Number of Planes. Set to 1.
int16_t BitsPerPixel; //Number of Bits per pixels.
int32_t Compression; //Compression. Usually set to 0.
int32_t SizeImage; //Size in bytes of the bitmap.
int32_t XPixelsPreMeter; //Horizontal pixels per meter.
int32_t YPixelsPreMeter; //Vertical pixels per meter.
int32_t ColorsUsed; //Number of colors used.
int32_t ColorsImportant; //Number of "important" colors.
};
struct Color
{
unsigned char B;
unsigned char G;
unsigned char R;
};
struct ColorTable
{
Color *colors;
unsigned long length;
};
struct PixelArray
{
Color **pixels;
unsigned long rowCount;
unsigned long columnCount;
};
void readBMP(char *File_Name, BMP &a)
{
FILE *p = fopen(File_Name, "rb");
if (p == NULL)
{
printf("Can't open file!");
fclose(p);
return;
}
else
{
fread(&a, sizeof(BMP), 1, p);
}
fclose(p);
}
void Get_Inf(BMP a)
{
if (a.Type[0] != 'B' || a.Type[1] != 'M')
{
printf("This is not a BMP file");
}
else
{
printf("This is a BMP file\n");
printf("The size of this file is %lu bytes\n", a.Size);
printf("The witdth of this image is %lu pixels\n", a.Width);
printf("The height of this image is %lu pixels\n", a.Height);
printf("The number of bits per pixels in this image is %u\n", a.BitsPerPixel);
}
}
void scanBmpPixelLine(Color *&line, unsigned long length)
{
FILE *pointer_ = fopen("test.bmp", "rb");
line = new Color[length];
fread(line, sizeof(Color), sizeof(Color)*length, pointer_);
fclose(pointer_);
//file.read((char *)line, length * sizeof(Color));
}
void skipBmpPadding(char count)
{
FILE *pointer__ = fopen("test.bmp", "rb");
if (count == 0)
{
fclose(pointer__);
return;
}
char padding[3];
fread(&padding, sizeof(char), count, pointer__);
fclose(pointer__);
//file.read((char *)&padding, count);
}
void ReadPixelArray(BMP a, PixelArray &data)
{
FILE *pointer = fopen("test.bmp", "rb");
data.rowCount = a.Height;
data.columnCount = a.Width;
data.pixels = new Color*[data.rowCount];
char paddingCount = (4 - (a.Width * (a.BitsPerPixel / 8) % 4)) % 4;
fseek(pointer, 54, SEEK_SET);
for (int i = 0; i < data.rowCount; i++)
{
scanBmpPixelLine(data.pixels[data.rowCount - i - 1], a.Width);
skipBmpPadding(paddingCount);
}
}
void drawBmp(BMP a, PixelArray data)
{
HWND console = GetConsoleWindow();
HDC hdc = GetDC(console);
for (int i = 0; i < a.Height; i++)
for (int j = 0; j < a.Width; j++)
{
Color pixel = data.pixels[i][j];
SetPixel(hdc, j, i, RGB(pixel.R, pixel.G, pixel.B));
}
ReleaseDC(console, hdc);
}
void releaseBmpPixelArray(PixelArray data)
{
for (int i = 0; i < data.rowCount; i++)
delete[]data.pixels[i];
delete[]data.pixels;
}
int main()
{
char file_name[] = "test.bmp";
BMP a;
PixelArray data;
readBMP(file_name, a);
Get_Inf(a);
ReadPixelArray(a, data);
drawBmp(a, data);
releaseBmpPixelArray(data);
}

This function:
void scanBmpPixelLine(Color *&line, unsigned long length)
{
FILE *pointer_ = fopen("test.bmp", "rb");
line = new Color[length];
fread(line, sizeof(Color), sizeof(Color)*length, pointer_);
fclose(pointer_);
//file.read((char *)line, length * sizeof(Color));
}
For starters, the intent of the function appears to be to read one line of pixel data from the file. But instead, it's re-opening the file and reading from the beginning (where the header bytes are). I'm not sure if you are aware of that...
But the crash is a result of this line:
fread(line, sizeof(Color), sizeof(Color)*length, pointer_);
The second parameter, sizeof(Color), is the size of each element. The third parameter is the number of elements to read. The total bytes read from the file will be the multiplication of the second parameter by the third parameter. So you've redundantly multiplied by sizeof(Color) one too many times. The result is that it will overwrite the line buffer.
To fix, it should be:
fread(line, sizeof(Color), length, pointer_);
You probably want to pass the FILE* pointer obtained from your ReadPixelArray function into this function instead of re-opening the file for every line.
Another code review comment. You should just read the entire file into memory instead of redundantly opening and closing the file for each operation. Then parse the header and set a pointer to the first "line" after the header.

AccessViolationException when using C++ DLL from C#

I have a C++ DLL for use from C#. I have a function which takes a string passed to it, and I have those set on the C++ function parameters as const char * like so:
int __stdcall extract_all_frames(const char* szDestination, float scaleFactor)
The main body of this function is copied directly from a working FFmpeg example function so I'm almost certain the problem isn't there. I feel like the problem is in this modification I made to it:
//Open file
char szFilename[32];
sprintf_s(szFilename, sizeof(szFilename), "frame%d.ppm\0", iFrame);
// JOIN szFILENAME AND szDESTINATION
std::string buffer(szDestination, sizeof(szDestination));
buffer.append("\\");
buffer.append(szDestination);
Which is supposed to be a concatenated path and directory. I then pass buffer.c_str() into fopen_s(), which takes const char * not std::string. Whenever calling this function from C#, I get the following exception:
A first chance exception of type 'System.AccessViolationException' occurred in XRF FFmpeg Invoke Test.exe
Additional information: Attempted to read or write protected memory. This is often an indication that other memory is corrupt.
This is the complete code:
#include "stdafx.h"
#pragma comment (lib, "avcodec.lib")
#pragma comment (lib, "avformat.lib")
#pragma comment (lib, "avutil.lib")
#pragma comment (lib, "swscale.lib")
extern "C"
{
#include <libavcodec\avcodec.h>
#include <libavformat\avformat.h>
#include <libavutil\avutil.h>
#include <libswscale\swscale.h>
}
#include <string>
#include "Xrf.FFmpeg.hpp"
void save_frame(AVFrame* pFrame, int iFrame, const char* szDestination)
{
//Open file
char szFilename[32];
sprintf_s(szFilename, sizeof(szFilename), "frame%d.ppm\0", iFrame);
// JOIN szFILENAME AND szDESTINATION
std::string buffer(szDestination, sizeof(szDestination));
buffer.append("\\");
buffer.append(szDestination);
FILE* pFile;
errno_t openError = fopen_s(&pFile, buffer.c_str(), "wb");
if (pFile == NULL)
{
return;
}
//Write header
int width = pFrame->width;
int height = pFrame->height;
fprintf(pFile, "P6\n%d %d\n255\n", width, height);
//Write pixel data
for (int y = 0; y < height; y++)
{
fwrite(pFrame->data[0] + y * pFrame->linesize[0], 1, width * 3, pFile);
}
// Close file
fclose(pFile);
}
int __stdcall extract_all_frames(const char* szPath, const char* szDestination, float scaleFactor)
{
// Check if scaleFactor is valid
if ((scaleFactor != 0.f) &&
(scaleFactor > 3.f))
{
fprintf(stderr, "Xrf: Scale factor '%f' out of bounds!\nMust be greater than 0, and less then or equal to 3.0.\n", scaleFactor);
return -1;
}
// Register all formats and codecs
av_register_all();
AVFormatContext* pFormatCtx;
if (avformat_open_input(&pFormatCtx, szPath, nullptr, nullptr) != 0)
{
fprintf(stderr, "libavformat: Couldn't open file '%s'!\n", szPath);
return -1;
}
// Retrieve stream information
if (avformat_find_stream_info(pFormatCtx, nullptr) < 0)
{
fprintf(stderr, "libavformat: Unable to find stream information!\n");
return -1;
}
// Dump information about file onto standard error
av_dump_format(pFormatCtx, 0, szPath, 0);
// Find the first video stream
size_t i;
int videoStream = -1;
for (i = 0; i < pFormatCtx->nb_streams; i++)
{
if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoStream = i;
break;
}
}
if (videoStream == -1)
{
fprintf(stderr, "libavformat: No video stream found!\n");
return -1;
}
// Get a pointer to the codec context for the video stream
AVCodecContext* pCodecCtx = pFormatCtx->streams[videoStream]->codec;
// Scale the frame
int scaleHeight = static_cast<int>(floor(pCodecCtx->height * scaleFactor));
int scaleWidth = static_cast<int>(floor(pCodecCtx->width * scaleFactor));
//Check if frame sizes are valid (not 0, because that's dumb)
if (scaleHeight == 0 || scaleWidth == 0)
{
fprintf(stderr, "Xrf: Scale factor caused a zero value in either width or height!\n");
return -1;
}
// Find the decoder for the video stream
AVCodec* pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL)
{
fprintf(stderr, "libavcodec: Unsupported codec!\n");
return -1; // Codec not found
}
// Open codec
AVDictionary* optionsDict = nullptr;
if (avcodec_open2(pCodecCtx, pCodec, &optionsDict) < 0)
{
fprintf(stderr, "libavcodec: Couldn't open codec '%s'!\n", pCodec->long_name);
return -1;
}
// Allocate video frame
AVFrame* pFrame = av_frame_alloc();
// Allocate an AVFrame structure
AVFrame* pFrameRGB = av_frame_alloc();
if (pFrameRGB == NULL)
{
fprintf(stderr, "libavformat: Unable to allocate a YUV->RGB resampling AVFrame!\n");
return -1;
}
// Determine required buffer size and allocate buffer
int numBytes = avpicture_get_size(PIX_FMT_RGB24, scaleWidth, scaleHeight);
uint8_t* buffer = static_cast <uint8_t *> (av_malloc(numBytes * sizeof(uint8_t)));
struct SwsContext* sws_ctx = sws_getContext(pCodecCtx->width,
pCodecCtx->height,
pCodecCtx->pix_fmt,
scaleWidth,
scaleHeight,
PIX_FMT_RGB24,
SWS_BILINEAR,
nullptr, nullptr, nullptr);
// Assign appropriate parts of buffer to image planes in pFrameRGB
// Note that pFrameRGB is an AVFrame, but AVFrame is a superset
// of AVPicture
avpicture_fill(reinterpret_cast <AVPicture *> (pFrameRGB),
buffer,
PIX_FMT_RGB24,
scaleWidth,
scaleHeight);
// Read frames and save first five frames to disk
AVPacket packet;
int frameFinished;
while (av_read_frame(pFormatCtx, &packet) >= 0)
{
// Is this a packet from the video stream?
if (packet.stream_index == videoStream)
{
// Decode video frame
avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);
// Did we get a video frame?
if (frameFinished)
{
// Convert the image from its native format to RGB
sws_scale(sws_ctx,
static_cast <uint8_t const * const *> (pFrame->data),
pFrame->linesize,
0,
pCodecCtx->height,
pFrameRGB->data,
pFrameRGB->linesize);
// Save the frame to disk
if (++i <= 5)
{
save_frame(pFrameRGB, i, szDestination);
}
}
}
// Free the packet that was allocated by av_read_frame
av_free_packet(&packet);
}
av_free(buffer); // Free the RGB image
av_free(pFrameRGB);
av_free(pFrame); // Free the YUV frame
avcodec_close(pCodecCtx); // Close the codec
avformat_close_input(&pFormatCtx); // Close the video file
return 0;
}
I don't know if the error is in my modification (most likely, I'm extremely new to C++), or the other code, as the exception only throws on the invocation line in C#, not the actual C++ line causing the problem.

This is wrong:
std::string buffer(szDestination, sizeof(szDestination));
szDestination is a pointer, thus sizeof(szDestination) will return the pointer size, in bytes, not the number of characters.
If szDestination is a null terminated string, use strlen or similar function to determine the number of characters. If it isn't null terminated, then you need to pass the number of bytes to copy as a parameter.
The better thing to do is when your DLL function is called:
int __stdcall extract_all_frames(const char* szPath, const char* szDestination, float scaleFactor)
take those pointers and immediately assign them to std::string. Then drop all usage of char* or const char* from there. There is no need for your helper functions to deal with "dumb" character pointers.
Example:
int __stdcall extract_all_frames(const char* szPath, const char* szDestination, float scaleFactor)
{
std::string sPath = szPath;
std::string sDestination = sDestination;
// From here, use sPath and sDestination
//...
}
// redefinition of save_frame
//...
void save_frame(AVFrame* pFrame, int iFrame, const std::string& szDestination)
{
//Open file
std::string buffer = "frame" + to_string(iFrame) + ".ppm\0";
buffer.append("\\");
buffer.append(szDestination);
//...
}

RGB to greyscale conversion using CUDA

So I am trying to write a program that turns RGB images to greyscale.
I got the idea from the Udacity problem set. The problem is that when I write out the kernel in the Udacity web environment, it says my code works, however, when I try to do it locally on my computer, I get no errors, but my image instead of coming out greyscale, comes out completely grey. It looks like one grey box the dimensions of the image I loaded. Can you help me find the error in my code, I've compared it with the Udacity version and I can't seem to find it.
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <string>
#include <cuda.h>
#include <stdio.h>
#include <opencv\cv.h>
#include <opencv\highgui.h>
#include <iostream>
#define CUDA_ERROR_CHECK
#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ )
#define CudaCheckError() __cudaCheckError( __FILE__, __LINE__ )
inline void __cudaSafeCall(cudaError err, const char *file, const int line)
{
#ifdef CUDA_ERROR_CHECK
if (cudaSuccess != err)
{
fprintf(stderr, "cudaSafeCall() failed at %s:%i : %s\n",
file, line, cudaGetErrorString(err));
exit(-1);
}
#endif
return;
}
inline void __cudaCheckError(const char *file, const int line)
{
#ifdef CUDA_ERROR_CHECK
cudaError err = cudaGetLastError();
if (cudaSuccess != err)
{
fprintf(stderr, "cudaCheckError() failed at %s:%i : %s\n",
file, line, cudaGetErrorString(err));
exit(-1);
}
err = cudaDeviceSynchronize();
if (cudaSuccess != err)
{
fprintf(stderr, "cudaCheckError() with sync failed at %s:%i : %s\n",
file, line, cudaGetErrorString(err));
exit(-1);
}
#endif
return;
}
__global__ void rgb_2_grey(uchar* const greyImage, const uchar4* const rgbImage, int rows, int columns)
{
int rgb_x = blockIdx.x * blockDim.x + threadIdx.x; //x coordinate of pixel
int rgb_y = blockIdx.y * blockDim.y + threadIdx.y; //y coordinate of pixel
if ((rgb_x >= columns) && (rgb_y >= rows)) {
return;
}
int rgb_ab = rgb_y*columns + rgb_x; //absolute pixel position
uchar4 rgb_Img = rgbImage[rgb_ab];
greyImage[rgb_ab] = uchar((float(rgb_Img.x))*0.299f + (float(rgb_Img.y))*0.587f + (float(rgb_Img.z))*0.114f);
}
using namespace cv;
using namespace std;
void Proc_Img(uchar4** h_RGBImage, uchar** h_greyImage, uchar4 **d_RGBImage, uchar** d_greyImage);
void RGB_2_Greyscale(uchar* const d_greyImage, uchar4* const d_RGBImage, size_t num_Rows, size_t num_Cols);
void Save_Img();
Mat img_RGB;
Mat img_Grey;
uchar4 *d_rgbImg;
uchar *d_greyImg;
int main()
{
uchar4* h_rgbImg;
//uchar4* d_rgbImge=0;
uchar* h_greyImg;
//uchar* d_greyImge=0;
Proc_Img(&h_rgbImg, &h_greyImg, &d_rgbImg, &d_greyImg);
RGB_2_Greyscale(d_greyImg, d_rgbImg, img_RGB.rows, img_RGB.cols);
Save_Img();
return 0;
}
void Proc_Img(uchar4** h_RGBImage, uchar** h_greyImage, uchar4 **d_RGBImage, uchar** d_greyImage){
cudaFree(0);
CudaCheckError();
//loads image into a matrix object along with the colors in BGR format (must convert to rgb).
Mat img = imread("C:\\Users\\Austin\\Pictures\\wallpapers\\IMG_3581.JPG", CV_LOAD_IMAGE_COLOR);
if (img.empty()){
cerr << "couldnt open file dumbas..." << "C:\\Users\\Austin\\Pictures\\wallpapers\\IMG_3581.JPG" << endl;
exit(1);
}
//converts color type from BGR to RGB
cvtColor(img, img_RGB, CV_BGR2RGBA);
//allocate memory for new greyscale image.
//img.rows returns the range of pixels in y, img.cols returns range of pixels in x
//CV_8UC1 means 8 bit unsigned(non-negative) single channel of color, aka greyscale.
//all three of the parameters allow the create function in the Mat class to determine how much memory to allocate
img_Grey.create(img.rows, img.cols, CV_8UC1);
//creates rgb and greyscale image arrays
*h_RGBImage = (uchar4*)img_RGB.ptr<uchar>(0); //.ptr is a method in the mat class that returns a pointer to the first element of the matrix.
*h_greyImage = (uchar*)img_Grey.ptr<uchar>(0); //this is just like a regular array/pointer mem address to first element of the array. This is templated
//in this case the compiler runs the function for returning pointer of type unsigned char. for rgb image it is
//cast to uchar4 struct to hold r,g, and b values.
const size_t num_pix = (img_RGB.rows) * (img_RGB.cols); //amount of pixels
//allocate memory on gpu
cudaMalloc(d_RGBImage, sizeof(uchar4) * num_pix); //bites of 1 uchar4 times # of pixels gives number of bites necessary for array
CudaCheckError();
cudaMalloc(d_greyImage, sizeof(uchar) * num_pix);//bites of uchar times # pixels gives number of bites necessary for array
CudaCheckError();
cudaMemset(*d_greyImage, 0, sizeof(uchar) * num_pix);
CudaCheckError();
//copy array into allocated space
cudaMemcpy(*d_RGBImage, *h_RGBImage, sizeof(uchar4)*num_pix, cudaMemcpyHostToDevice);
CudaCheckError();
d_rgbImg = *d_RGBImage;
d_greyImg = *d_greyImage;
}
void RGB_2_Greyscale(uchar* const d_greyImage, uchar4* const d_RGBImage, size_t num_Rows, size_t num_Cols){
const int BS = 16;
const dim3 blockSize(BS, BS);
const dim3 gridSize((num_Cols / BS) + 1, (num_Rows / BS) + 1);
rgb_2_grey <<<gridSize, blockSize>>>(d_greyImage, d_RGBImage, num_Rows, num_Cols);
cudaDeviceSynchronize(); CudaCheckError();
}
void Save_Img(){
const size_t num_pix = (img_RGB.rows) * (img_RGB.cols);
cudaMemcpy(img_Grey.ptr<uchar>(0), d_greyImg, sizeof(uchar)*num_pix, cudaMemcpyDeviceToHost);
CudaCheckError();
imwrite("C:\\Users\\Austin\\Pictures\\wallpapers\\IMG_3581GR.JPG", img_Grey);
cudaFree(d_rgbImg);
cudaFree(d_greyImg);
}
EDIT: I realized that the local var in my main is the same name as the global var, I have edited the code here, now I get the error from visual studio that the
variable d_rgbIme is being used without being initialized
when I have already initialized it above. If I set them equal to zero I get a CUDA error saying
an illegal memory access was encountered
I tried running cuda-memcheck, but then I get the error that i could not run the file...

I have found the error thanks to one of the comments by Robert Crovella, he has been very helpful with this! it is in my kernel the if statement should read if ((rgb_x >= columns) || (rgb_y >= rows)) {

I was working on the same problem in JCUDA. See if you can use any part of this solution:
//Read Height and Width of image in Height & Width variables
int Width = image.getWidth();
int Height = image.getHeight();
int N = Height * Width;
int[] grayScale = new int[N];
//Allocate separate arrays to store Alpha, Red, Green and
//Blue values for every pixel
int[] redHost = new int[N];
int[] greenHost = new int[N];
int[] blueHost = new int[N];
int[] alphaHost = new int[N];
for(int i=0; i<Height; i++)
{
for(int j=0; j<Width; j++)
{
int pixel = image.getRGB(j, i);
//Read the ARGB data
alphaHost[i*Width+j] = (pixel >> 24) & 0xff;
redHost[i*Width+j] = (pixel >> 16) & 0xff;
greenHost[i*Width+j] = (pixel >> 8) & 0xff;
blueHost[i*Width+j] = (pixel) & 0xff;
}
}
/* Following are the CUDA Kernel parameters*/
Pointer kernelParameters = Pointer.to(
Pointer.to(new int[]{N}), //Total size of each array W * H
Pointer.to(redDev), // Pointer to redArray on device
Pointer.to(greenDev), // Pointer to greenArray on device
Pointer.to(blueDev), // Pointer to blueArray on device
Pointer.to(Output)); //Pointer to output array
/*Following is my RGBToGrayScale.cu..i.e. CUDA Kernel */
__global__ void RGBtoGrayScale(int N, int *red, int *green, int *blue, int *Output)
{
int id = blockIdx.x * blockDim.x + threadIdx.x;
if(id<N)
{
Output[id] = (red[id]*0.2989) + (green[id]*0.587) + (blue[id]*0.114);
}
}
/* Get the output data back to Host memory */
cuMemcpyDtoH(Pointer.to(grayScale), Output, N * Sizeof.INT);
/* Write the image with the new RBG values*/
BufferedImage im = new BufferedImage(Width,Height,BufferedImage.TYPE_BYTE_GRAY);
WritableRaster raster = im.getRaster();
for(int i=0;i<Height;i++)
{
for(int j=0;j<Width;j++)
{
raster.setSample(j, i, 0, grayScale[i*Width+j]);
}
}
try
{
ImageIO.write(im,"JPEG",new File("glpattern.jpeg"));
} catch (IOException e)
{
e.printStackTrace();
}

Examples or tutorials of using libjpeg-turbo's TurboJPEG

The instructions for libjpeg-turbo here describes the TurboJPEG API thus: "This API wraps libjpeg-turbo and provides an easy-to-use interface for compressing and decompressing JPEG images in memory". Great, but are there some solid examples of using this API available? Just looking to decompress a fairly vanilla jpeg in memory.
I've found a few bits such as https://github.com/erlyvideo/jpeg/blob/master/c_src/jpeg.c, which appears to be using the TurboJPEG API, but are there any more solid/varied examples?
The source for libjpeg-turbo is well documented, so that does help.

Ok, I know that you did already solve your problem, but as some people, just like me, could be searching some simple example I will share what I created.
It is an example, compressing and decompressing an RGB image. Otherwise I think that the API documentation of TurboJPEG is quite easy to understand!
Compression:
#include <turbojpeg.h>
const int JPEG_QUALITY = 75;
const int COLOR_COMPONENTS = 3;
int _width = 1920;
int _height = 1080;
long unsigned int _jpegSize = 0;
unsigned char* _compressedImage = NULL; //!< Memory is allocated by tjCompress2 if _jpegSize == 0
unsigned char buffer[_width*_height*COLOR_COMPONENTS]; //!< Contains the uncompressed image
tjhandle _jpegCompressor = tjInitCompress();
tjCompress2(_jpegCompressor, buffer, _width, 0, _height, TJPF_RGB,
&_compressedImage, &_jpegSize, TJSAMP_444, JPEG_QUALITY,
TJFLAG_FASTDCT);
tjDestroy(_jpegCompressor);
//to free the memory allocated by TurboJPEG (either by tjAlloc(),
//or by the Compress/Decompress) after you are done working on it:
tjFree(&_compressedImage);
After that you have the compressed image in _compressedImage.
To decompress you have to do the following:
Decompression:
#include <turbojpeg.h>
long unsigned int _jpegSize; //!< _jpegSize from above
unsigned char* _compressedImage; //!< _compressedImage from above
int jpegSubsamp, width, height;
unsigned char buffer[width*height*COLOR_COMPONENTS]; //!< will contain the decompressed image
tjhandle _jpegDecompressor = tjInitDecompress();
tjDecompressHeader2(_jpegDecompressor, _compressedImage, _jpegSize, &width, &height, &jpegSubsamp);
tjDecompress2(_jpegDecompressor, _compressedImage, _jpegSize, buffer, width, 0/*pitch*/, height, TJPF_RGB, TJFLAG_FASTDCT);
tjDestroy(_jpegDecompressor);
Some random thoughts:
I just came back over this as I am writing my bachelor thesis, and I noticed that if you run the compression in a loop it is preferable to store the biggest size of the JPEG buffer to not have to allocate a new one every turn. Basically, instead of doing:
long unsigned int _jpegSize = 0;
tjCompress2(_jpegCompressor, buffer, _width, 0, _height, TJPF_RGB,
&_compressedImage, &_jpegSize, TJSAMP_444, JPEG_QUALITY,
TJFLAG_FASTDCT);
we would add an object variable, holding the size of the allocated memory long unsigned int _jpegBufferSize = 0; and before every compression round we would set the jpegSize back to that value:
long unsigned int jpegSize = _jpegBufferSize;
tjCompress2(_jpegCompressor, buffer, _width, 0, _height, TJPF_RGB,
&_compressedImage, &jpegSize, TJSAMP_444, JPEG_QUALITY,
TJFLAG_FASTDCT);
_jpegBufferSize = _jpegBufferSize >= jpegSize? _jpegBufferSize : jpegSize;
after the compression one would compare the memory size with the actual jpegSize and set it to the jpegSize if it is higher than the previous memory size.

I ended up using below code as a working example for both JPEG encoding and decoding. Best example that I can find, it's self-contained that initializes a dummy image and output the encoded image to a local file.
Below code is NOT my own, credit goes to https://sourceforge.net/p/libjpeg-turbo/discussion/1086868/thread/e402d36f/#8722 . Posting it here again to help anyone finds it's difficult to get libjpeg turbo working.
#include "turbojpeg.h"
#include <iostream>
#include <string.h>
#include <errno.h>
using namespace std;
int main(void)
{
unsigned char *srcBuf; //passed in as a param containing pixel data in RGB pixel interleaved format
tjhandle handle = tjInitCompress();
if(handle == NULL)
{
const char *err = (const char *) tjGetErrorStr();
cerr << "TJ Error: " << err << " UNABLE TO INIT TJ Compressor Object\n";
return -1;
}
int jpegQual =92;
int width = 128;
int height = 128;
int nbands = 3;
int flags = 0;
unsigned char* jpegBuf = NULL;
int pitch = width * nbands;
int pixelFormat = TJPF_GRAY;
int jpegSubsamp = TJSAMP_GRAY;
if(nbands == 3)
{
pixelFormat = TJPF_RGB;
jpegSubsamp = TJSAMP_411;
}
unsigned long jpegSize = 0;
srcBuf = new unsigned char[width * height * nbands];
for(int j = 0; j < height; j++)
{
for(int i = 0; i < width; i++)
{
srcBuf[(j * width + i) * nbands + 0] = (i) % 256;
srcBuf[(j * width + i) * nbands + 1] = (j) % 256;
srcBuf[(j * width + i) * nbands + 2] = (j + i) % 256;
}
}
int tj_stat = tjCompress2( handle, srcBuf, width, pitch, height,
pixelFormat, &(jpegBuf), &jpegSize, jpegSubsamp, jpegQual, flags);
if(tj_stat != 0)
{
const char *err = (const char *) tjGetErrorStr();
cerr << "TurboJPEG Error: " << err << " UNABLE TO COMPRESS JPEG IMAGE\n";
tjDestroy(handle);
handle = NULL;
return -1;
}
FILE *file = fopen("out.jpg", "wb");
if (!file) {
cerr << "Could not open JPEG file: " << strerror(errno);
return -1;
}
if (fwrite(jpegBuf, jpegSize, 1, file) < 1) {
cerr << "Could not write JPEG file: " << strerror(errno);
return -1;
}
fclose(file);
//write out the compress date to the image file
//cleanup
int tjstat = tjDestroy(handle); //should deallocate data buffer
handle = 0;
}

In the end I used a combination of random code found on the internet (e.g. https://github.com/erlyvideo/jpeg/blob/master/c_src/jpeg.c) and the .c and header files for libjeg-turbo, which are well documented.
This official API is a good information source aswell.

Here's a fragment of code what I use to load jpeg's from memory. Maybe it will require a bit of fixing, because I extracted it from different files in my project. It will load both - grayscale and rgb images (bpp will be set either to 1 or to 3).
struct Image
{
int bpp;
int width;
int height;
unsigned char* data;
};
struct jerror_mgr
{
jpeg_error_mgr base;
jmp_buf jmp;
};
METHODDEF(void) jerror_exit(j_common_ptr jinfo)
{
jerror_mgr* err = (jerror_mgr*)jinfo->err;
longjmp(err->jmp, 1);
}
METHODDEF(void) joutput_message(j_common_ptr)
{
}
bool Image_LoadJpeg(Image* image, unsigned char* img_data, unsigned int img_size)
{
jpeg_decompress_struct jinfo;
jerror_mgr jerr;
jinfo.err = jpeg_std_error(&jerr.base);
jerr.base.error_exit = jerror_exit;
jerr.base.output_message = joutput_message;
jpeg_create_decompress(&jinfo);
image->data = NULL;
if (setjmp(jerr.jmp)) goto bail;
jpeg_mem_src(&jinfo, img_data, img_size);
if (jpeg_read_header(&jinfo, TRUE) != JPEG_HEADER_OK) goto bail;
jinfo.dct_method = JDCT_FLOAT; // change this to JDCT_ISLOW on Android/iOS
if (!jpeg_start_decompress(&jinfo)) goto bail;
if (jinfo.num_components != 1 && jinfo.num_components != 3) goto bail;
image->data = new (std::nothrow) unsigned char [jinfo.output_width * jinfo.output_height * jinfo.output_components];
if (!image->data) goto bail;
{
JSAMPROW ptr = image->data;
while (jinfo.output_scanline < jinfo.output_height)
{
if (jpeg_read_scanlines(&jinfo, &ptr, 1) != 1) goto bail;
ptr += jinfo.output_width * jinfo.output_components;
}
}
if (!jpeg_finish_decompress(&jinfo)) goto bail;
image->bpp = jinfo.output_components;
image->width = jinfo.output_width;
image->height = jinfo.output_height;
jpeg_destroy_decompress(&jinfo);
return true;
bail:
jpeg_destroy_decompress(&jinfo);
if (image->data) delete [] data;
return false;
}

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

C/CUDA: Only every fourth element in CudaArray can be indexed - c++

Related

How to create a bitmap/image from Scan0?

Corrupted heap while display a BMP image on console

AccessViolationException when using C++ DLL from C#

RGB to greyscale conversion using CUDA

Examples or tutorials of using libjpeg-turbo's TurboJPEG

Categories

Resources