I have a binary data file that contains 2d and 3d coordinates in such order:
uint32 numberOfUVvectors;
2Dvec uv[numberOfUVvectors];
uint32 numberOfPositionVectors;
3Dvec position[numberOfPositionVectors];
uint32 numberOfNormalVectors;
3Dvec normal[numberOfNormalVectors];
2Dvec and 3Dvec are structs composed from 2 and 3 floats respectively.
At first, I read all these values using the "usual" way:
in.read(reinterpret_cast<char *>(&num2d), sizeof(uint32));
2Dvectors.reserve(num2d); // It's for an std::vector<2DVec> 2Dvectors();
for (int i = 0; i < num2d; i++){
2Dvec 2Dvector;
in.read(reinterpret_cast<char *>(&2Dvector), sizeof(2DVec));
2Dvectors.push_back(2Dvector);
}
It worked fine, but it was painfully slow (there can be more than 200k entries in a file and with so many read calls, the hdd access became a bottleneck). I decided to read the entire file into a buffer at once:
in.seekg (0, in.end);
int length = in.tellg();
in.seekg (0, in.beg);
char * buffer = new char [length];
is.read (buffer,length);
The reading is way faster now, but here's the question: how to parse that char buffer back into integers and structs?
To answer your specific question:
unsigned char * pbuffer = (unsigned char *)buffer;
uint32 num2d = *((uint32 *)pbuffer);
pbuffer += sizeof(uint32);
if(num2d)
{
2Dvec * p2Dvec = (2Dvec *)pbuffer;
2Dvectors.assign(p2Dvec, p2Dvec + num2d);
pbuffer += (num2d * sizeof(2Dvec));
}
uint32 numpos = *((uint32 *)pbuffer);
pbuffer += sizeof(uint32);
if(numpos)
{
3Dvec * p3Dvec = (3Dvec *)pbuffer;
Pos3Dvectors.assign(p3Dvec, p3Dvec + numpos);
pbuffer += (numpos * sizeof(3Dvec));
}
uint32 numnorm = *((uint32 *)pbuffer);
pbuffer += sizeof(uint32);
if(numnorm)
{
3Dvec * p3Dvec = (3Dvec *)pbuffer;
Normal3Dvectors.assign(p3Dvec, p3Dvec + numnorm);
pbuffer += (numnorm * sizeof(3Dvec));
}
// do not forget to release the allocated buffer
A an even faster way would be:
in.read(reinterpret_cast<char *>(&num2d), sizeof(uint32));
if(num2d)
{
2Dvectors.resize(num2d);
2Dvec * p2Dvec = &2Dvectors[0];
in.read(reinterpret_cast<char *>(&p2Dvec), num2d * sizeof(2Dvec));
}
//repeat for position & normal vectors
Use memcpy with the appropriate sizes and start values
or cast the values (example):
#include <iostream>
void copy_array(void *a, void const *b, std::size_t size, int amount)
{
std::size_t bytes = size * amount;
for (int i = 0; i < bytes; ++i)
reinterpret_cast<char *>(a)[i] = static_cast<char const *>(b)[i];
}
int main()
{
int a[10], b[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
copy_array(a, b, sizeof(b[0]), 10);
for (int i = 0; i < 10; ++i)
std::cout << a[i] << ' ';
}
Related
I know that 'Nearest' method of image resizing is the fastest method.
Nevertheless I search way to speed up it.
Evident step is a precalculate indices:
void CalcIndex(int sizeS, int sizeD, int colors, int* idx)
{
float scale = (float)sizeS / sizeD;
for (size_t i = 0; i < sizeD; ++i)
{
int index = (int)::floor((i + 0.5f) * scale)
idx[i] = Min(Max(index, 0), sizeS - 1) * colors;
}
}
template<int colors> inline void CopyPixel(const uint8_t* src, uint8_t* dst)
{
for (int i = 0; i < colors; ++i)
dst[i] = src[i];
}
template<int colors> void Resize(const uint8_t* src, int srcW, int srcH,
uint8_t* dst, int dstW, int dstH)
{
int idxY[dstH], idxX[dstW];//pre-calculated indices (see CalcIndex).
for (int dy = 0; dy < dstH; dy++)
{
const uint8_t * srcY = src + idxY[dy] * srcW * colors;
for (int dx = 0, offset = 0; dx < dstW; dx++, offset += colors)
CopyPixel<N>(srcY + idxX[dx], dst + offset);
dst += dstW * colors;
}
}
Are the next optimization steps exist? For example with using SIMD or some other optimization technic.
P.S. Especially I am interesting in optimization of RGB (Colors = 3).
And if I use current code I see that ARGB image (Colors = 4) is processing faster for 50% then RGB despite that it bigger for 30%.
The speed problem in (SIMD-based) resize algorithms comes from the mismatch of indexing input and output elements. When e.g. the resize factor is 6/5, one needs to consume 6 pixels and write 5. OTOH SIMD register width of 16 bytes maps to either 16 grayscale elements, 4 RGBA-elements or 5.33 RGB-elements.
My experience is that a sufficiently good performance (maybe not optimal, but often beating opencv and other freely available implementations) comes when trying to write 2-4 SIMD registers worth of data at a time, reading the required number of linear bytes from the input + some, and using pshufb in x86 SSSE3 or vtbl in Neon to gather load from the registers -- never from memory. One needs of course a fast mechanism to either calculate the LUT indices inline, or to precalculate the indices, which are shared between different output rows.
One should prepare to have several inner kernels depending on the input/output ratio of the (horizontal) resolution.
RGBrgbRGBrgbRGBr|gbRGBrgb .... <- input
^ where to load next 32 bytes of input
RGBRGBrgbRGBrgbr|gbRGBrgbRGBRGBrg| <- 32 output bytes, from
0000000000000000|0000001111111111| <- high bit of index
0120123456789ab9|abcdef0123423456| <- low 4 bits of index
Notice, that one can handle with the LUT method all channel counts
// inner kernel for downsampling between 1x and almost 2x*
// - we need to read max 32 elements and write 16
void process_row_ds(uint8_t const *input, uint8_t const *indices,
int const *advances, uint8_t *output, int out_width) {
do {
auto a = load16_bytes(input);
auto b = load16_bytes(input + 16);
auto c = load16_bytes(indices);
a = lut32(a,b,c); // get 16 bytes out of 32
store16_bytes(output, a);
output += 16;
input += *advances++;
} while (out_width--); // multiples of 16...
}
// inner kernel for upsampling between 1x and inf
void process_row_us(uint8_t const *input, uint8_t const *indices,
int const *advances, uint8_t *output, int out_width) {
do {
auto a = load16_bytes(input);
auto c = load16_bytes(indices);
a = lut16(a, c); // get 16 bytes out of 16
store16_bytes(output, a);
output += 16;
input += *advances++;
} while (out_width--);
}
I would also encourage to use some elementary filtering for downsampling, such as gaussian binomial kernels (1 1, 1 2 1, 1 3 3 1, 1 4 6 4 1, ...) along with hierarchical downsampling in addition to (at least) bilinear interpolation. It's of course possible that the application will tolerate aliasing artifacts -- the cost AFAIK is often not that large, especially given that otherwise the algorithm will be memory bound.
I think that using of _mm256_i32gather_epi32 (AVX2) can give some performance gain for resizing in case of 32 bit pixels:
inline void Gather32bit(const uint8_t * src, const int* idx, uint8_t* dst)
{
__m256i _idx = _mm256_loadu_si256((__m256i*)idx);
__m256i val = _mm256_i32gather_epi32((int*)src, _idx, 1);
_mm256_storeu_si256((__m256i*)dst, val);
}
template<> void Resize<4>(const uint8_t* src, int srcW, int srcH,
uint8_t* dst, int dstW, int dstH)
{
int idxY[dstH], idxX[dstW];//pre-calculated indices.
size_t dstW8 = dstW & (8 - 1);
for (int dy = 0; dy < dstH; dy++)
{
const uint8_t * srcY = src + idxY[dy] * srcW * 4;
int dx = 0, offset = 0;
for (; dx < dstW8; dx += 8, offset += 8*4)
Gather32bit(srcY, idxX + dx,dst + offset);
for (; dx < dstW; dx++, offset += 4)
CopyPixel<N>(srcY + idxX[dx], dst + offset);
dst += dstW * 4;
}
}
P.S. After some modification this method can be applied to RGB24:
const __m256i K8_SHUFFLE = _mm256_setr_epi8(
0x0, 0x1, 0x2, 0x4, 0x5, 0x6, 0x8, 0x9, 0xA, 0xC, 0xD, 0xE, -1, -1, -1, -1,
0x0, 0x1, 0x2, 0x4, 0x5, 0x6, 0x8, 0x9, 0xA, 0xC, 0xD, 0xE, -1, -1, -1, -1);
const __m256i K32_PERMUTE = _mm256_setr_epi32(0x0, 0x1, 0x2, 0x4, 0x5, 0x6, -1, -1);
inline void Gather24bit(const uint8_t * src, const int* idx, uint8_t* dst)
{
__m256i _idx = _mm256_loadu_si256((__m256i*)idx);
__m256i bgrx = _mm256_i32gather_epi32((int*)src, _idx, 1);
__m256i bgr = _mm256_permutevar8x32_epi32(
_mm256_shuffle_epi8(bgrx, K8_SHUFFLE), K32_PERMUTE);
_mm256_storeu_si256((__m256i*)dst, bgr);
}
template<> void Resize<3>(const uint8_t* src, int srcW, int srcH,
uint8_t* dst, int dstW, int dstH)
{
int idxY[dstH], idxX[dstW];//pre-calculated indices.
size_t dstW8 = dstW & (8 - 1);
for (int dy = 0; dy < dstH; dy++)
{
const uint8_t * srcY = src + idxY[dy] * srcW * 3;
int dx = 0, offset = 0;
for (; dx < dstW8; dx += 8, offset += 8*3)
Gather24bit(srcY, idxX + dx,dst + offset);
for (; dx < dstW; dx++, offset += 3)
CopyPixel<3>(srcY + idxX[dx], dst + offset);
dst += dstW * 3;
}
}
Note that if srcW < dstW then method of #Aki-Suihkonen is faster.
It’s possible to use SIMD, and I’m pretty sure it will help, unfortunately it’s relatively hard. Below is a simplified example which only supports image enlargements but not shrinking.
Still, I hope it might be useful as a starting point.
Both MSVC and GCC compile the hot loop in LineResize::apply method into 11 instructions. I think 11 instructions for 16 bytes should be faster than your version.
#include <stdint.h>
#include <emmintrin.h>
#include <tmmintrin.h>
#include <vector>
#include <array>
#include <assert.h>
#include <stdio.h>
// Implements nearest neighbor resize method for RGB24 or BGR24 bitmaps
class LineResize
{
// Each mask produces up to 16 output bytes.
// For enlargement exactly 16, for shrinking up to 16, possibly even 0.
std::vector<__m128i> masks;
// Length is the same as masks.
// For enlargement, the values contain source pointer offsets in bytes.
// For shrinking, the values contain destination pointer offsets in bytes.
std::vector<uint8_t> offsets;
// True if this class will enlarge images, false if it will shrink the width of the images.
bool enlargement;
void resizeFields( size_t vectors )
{
masks.resize( vectors, _mm_set1_epi32( -1 ) );
offsets.resize( vectors, 0 );
}
public:
// Compile the shuffle table. The arguments are line widths in pixels.
LineResize( size_t source, size_t dest );
// Apply the algorithm to a single line of the image.
void apply( uint8_t* rdi, const uint8_t* rsi ) const;
};
LineResize::LineResize( size_t source, size_t dest )
{
const size_t sourceBytes = source * 3;
const size_t destBytes = dest * 3;
assert( sourceBytes >= 16 );
assert( destBytes >= 16 );
// Possible to do much faster without any integer divides.
// Optimizing this sample for simplicity.
if( sourceBytes < destBytes )
{
// Enlarging the image, each SIMD vector consumes <16 input bytes, produces exactly 16 output bytes
enlargement = true;
resizeFields( ( destBytes + 15 ) / 16 );
int8_t* pMasks = (int8_t*)masks.data();
uint8_t* const pOffsets = offsets.data();
int sourceOffset = 0;
const size_t countVectors = masks.size();
for( size_t i = 0; i < countVectors; i++ )
{
const int destSlice = (int)i * 16;
std::array<int, 16> lanes;
int lane;
for( lane = 0; lane < 16; lane++ )
{
const int destByte = destSlice + lane; // output byte index
const int destPixel = destByte / 3; // output pixel index
const int channel = destByte % 3; // output byte within pixel
const int sourcePixel = destPixel * (int)source / (int)dest; // input pixel
const int sourceByte = sourcePixel * 3 + channel; // input byte
if( destByte < (int)destBytes )
lanes[ lane ] = sourceByte;
else
{
// Destination offset out of range, i.e. the last SIMD vector
break;
}
}
// Produce the offset
if( i == 0 )
assert( lanes[ 0 ] == 0 );
else
{
const int off = lanes[ 0 ] - sourceOffset;
assert( off >= 0 && off <= 16 );
pOffsets[ i - 1 ] = (uint8_t)off;
sourceOffset = lanes[ 0 ];
}
// Produce the masks
for( int j = 0; j < lane; j++ )
pMasks[ j ] = (int8_t)( lanes[ j ] - sourceOffset );
// The masks are initialized with _mm_set1_epi32( -1 ) = all bits set,
// no need to handle remainder for the last vector.
pMasks += 16;
}
}
else
{
// Shrinking the image, each SIMD vector consumes 16 input bytes, produces <16 output bytes
enlargement = false;
resizeFields( ( sourceBytes + 15 ) / 16 );
// Not implemented, but the same idea works fine for this too.
// The only difference, instead of using offsets bytes for source offsets, use it for destination offsets.
assert( false );
}
}
void LineResize::apply( uint8_t * rdi, const uint8_t * rsi ) const
{
const __m128i* pm = masks.data();
const __m128i* const pmEnd = pm + masks.size();
const uint8_t* po = offsets.data();
__m128i mask, source;
if( enlargement )
{
// One iteration of the loop produces 16 output bytes
// In MSVC results in 11 instructions for 16 output bytes.
while( pm < pmEnd )
{
mask = _mm_load_si128( pm );
pm++;
source = _mm_loadu_si128( ( const __m128i * )( rsi ) );
rsi += *po;
po++;
_mm_storeu_si128( ( __m128i * )rdi, _mm_shuffle_epi8( source, mask ) );
rdi += 16;
}
}
else
{
// One iteration of the loop consumes 16 input bytes
while( pm < pmEnd )
{
mask = _mm_load_si128( pm );
pm++;
source = _mm_loadu_si128( ( const __m128i * )( rsi ) );
rsi += 16;
_mm_storeu_si128( ( __m128i * )rdi, _mm_shuffle_epi8( source, mask ) );
rdi += *po;
po++;
}
}
}
// Utility method to print RGB pixel values from the vector
static void printPixels( const std::vector<uint8_t>&vec )
{
assert( !vec.empty() );
assert( 0 == ( vec.size() % 3 ) );
const uint8_t* rsi = vec.data();
const uint8_t* const rsiEnd = rsi + vec.size();
while( rsi < rsiEnd )
{
const uint32_t r = rsi[ 0 ];
const uint32_t g = rsi[ 1 ];
const uint32_t b = rsi[ 2 ];
rsi += 3;
const uint32_t res = ( r << 16 ) | ( g << 8 ) | b;
printf( "%06X ", res );
}
printf( "\n" );
}
// A triviual test to resize 24 pixels -> 32 pixels
int main()
{
constexpr int sourceLength = 24;
constexpr int destLength = 32;
// Initialize sample input with 24 RGB pixels
std::vector<uint8_t> input( sourceLength * 3 );
for( size_t i = 0; i < input.size(); i++ )
input[ i ] = (uint8_t)i;
printf( "Input: " );
printPixels( input );
// That special handling of the last pixels of last line is missing from this example.
static_assert( 0 == destLength % 16 );
LineResize resizer( sourceLength, destLength );
std::vector<uint8_t> result( destLength * 3 );
resizer.apply( result.data(), input.data() );
printf( "Output: " );
printPixels( result );
return 0;
}
The code ignores alignment issues. For production, you’d want another method for the last line of the image which doesn’t run to the end, instead handles the last few pixels with scalar code.
The code contains more memory references in the hot loop. However, the two vectors in that class are not too long, for 4k images the size is about 12kb, should fit in L1D cache and stay there.
If you have AVX2, will probably improve things further. For enlarging images, use _mm256_inserti128_si256, the vinserti128 instruction can load 16 bytes from memory into high half of the vector. Similarly, for downsizing images, use _mm256_extracti128_si256, the instruction has an option to use memory destination.
I am trying to take a bitmap image and get the RGB values of the pixels. What I currently have will open the bitmap file and read the pixel data:
#define _CRT_SECURE_NO_DEPRECATE
#include "findColor.h"
#include <vector>
#include <iostream>
int findColor(std::string path) {
std::vector<std::string> averageColor; //Will hold the average hex color of each image in order.
std::string currentImage;
currentImage = path + std::to_string(i) + ".btm";
FILE* f = fopen(currentImage.c_str(), "rb");
unsigned char info[54]; //Bitmap header is 54 bytes
fread(info, sizeof(unsigned char), 54, f); //reading the header
// extract image height and width from header
int width, height;
memcpy(&width, info + 18, sizeof(int));
memcpy(&height, info + 22, sizeof(int));
int heightSign = 1;
if (height < 0) {
heightSign = -1;
}
int size = 3 * width * height; //size of image in bytes. 3 bytes per pixel.
unsigned char* data = new unsigned char[size]; // allocate 3 bytes per pixel
fread(data, sizeof(unsigned char), size, f); // read the rest of the data at once
fclose(f); //close image.
for (i = 0; i < size; i += 3) //Flip the image data? It is stored as BGR flipping it to RGB?
{
unsigned char tmp = data[i-33];
data[i] = data[i + 2];
data[i + 2] = tmp;
}
return 0;
}
I really don't know where to go from here. Any responses will be appreciated.
Long ago I wrote a function in C to write a bmp-header. I used binary operators to parse the uint_16's and uint_32's directly into a char[54] array, which took care of endianess and portability.
I've been learning C++ for a while now, and am now trying to rewrite that function in C++-Style using std::ofstream instead of FILE*. The c++ code I produced looks bad and doesn't deal with endianess. Is there a better, or more proper, way to do the C++-header? (preferably one that is portable and deals with endianess)
It currently looks like this:
void writeHeader(int width, int height, std::ofstream file)
{
char bmpSign[2] = {'B', 'M'};
uint filesize = width*height*3+54;
uint reserved = 0;
uint headersize = 54;
uint infoHeader = 40;
ushort colors = 1;
ushort bitsPerPixel = 24;
uint compression = 0;
uint imgSize = width*height*3;
uint xPels = 0;
uint yPels = 0;
uint usedColors = 0;
uint impColors = 0;
file.write(bmpSign, 2*sizeof(char));
file.write(reinterpret_cast<char*>(filesize), sizeof(int));
file.write(reinterpret_cast<char*>(reserved), sizeof(int));
file.write(reinterpret_cast<char*>(headersize), sizeof(int));
file.write(reinterpret_cast<char*>(infoHeader), sizeof(int));
file.write(reinterpret_cast<char*>(width), sizeof(int));
file.write(reinterpret_cast<char*>(height), sizeof(int));
file.write(reinterpret_cast<char*>(colors), sizeof(ushort));
file.write(reinterpret_cast<char*>(bitsPerPixel), sizeof(ushort));
file.write(reinterpret_cast<char*>(compression), sizeof(int));
file.write(reinterpret_cast<char*>(imgSize), sizeof(int));
file.write(reinterpret_cast<char*>(xPels), sizeof(int));
file.write(reinterpret_cast<char*>(yPels), sizeof(int));
file.write(reinterpret_cast<char*>(usedColors), sizeof(int));
file.write(reinterpret_cast<char*>(impColors), sizeof(int));
}
Thanks!
The C-Version of the header:
void parseInt16(uint16_t mem_head, uint8_t file_head[54], int offset)
{
file_head[offset] = (mem_head & 0x00ff);
file_head[offset + 1] = (mem_head & 0xff00) >> 8;
}
void parseInt32(uint32_t mem_head, uint8_t file_head[54], int offset)
{
file_head[offset] = (mem_head & 0x000000ff);
file_head[offset + 1] = (mem_head & 0x0000ff00) >> 8;
file_head[offset + 2] = (mem_head & 0x00ff0000) >> 16;
file_head[offset + 3] = (mem_head & 0xff000000) >> 24;
}
void writeHeader(int width, int height, FILE* bitmap_destination)
{
uint8_t bmp_header[54];
bmp_header[0] = 'B'; bmp_header[1] = 'M';
//parsing the 32 or 16 bit numbers into byte form, little-endian.
//function syntax: (value, Array of Bytes, Offset)
parseInt32(width * height * 3 + 54, bmp_header, 2); //filesize
parseInt32(0, bmp_header, 6); //reserved by software (0)
parseInt32(54, bmp_header, 10 ); //headerOffset / Header size
parseInt32(40, bmp_header, 14 ); //infoHeaderSize
parseInt32(width, bmp_header, 18); //Width in pixels
parseInt32(height, bmp_header, 22); //Height in pixels
parseInt16(1, bmp_header, 26); //Colors (1 per standard)
parseInt16(24, bmp_header, 28); //bitsPerPixel (3 bytes/p)
parseInt32(0, bmp_header, 30); //Compression (0 = none)
parseInt32(width * height * 3, bmp_header, 34); //Image Size(bytes)
parseInt32(0, bmp_header, 38); //xPelsPerMeter
parseInt32(0, bmp_header, 42); //yPelsPerMeter
parseInt32(0, bmp_header, 46); //Colors used (0 for all)
parseInt32(0, bmp_header, 50); //important Colors (0 for all)
fwrite(bmp_header, 1, 54, bitmap_destination); //writing the header to disk
}
The code that you have for dealing with endianness of a platform should be reused. That code is independent of whether you use FILE* or std::fstream to write the data out.
The only line you need to change is
fwrite(bmp_header, 1, 54, bitmap_destination);
That can be replaced by:
file.write(reinterpret_cast<char*>(bmp_header), 54);
So, as the title states, I'm having trouble exporting a .bmp (24-bit bmp) with C++. I am doing it as a school project type thing, and I need some help. To learn how .BMPs work I looked at the wikipedia page, and I got some help from here, but I still can't figure it out. Here is what I have:
//Export the map as a .bmp
void PixelMap::exportMap(const char* fileName)
{
//Size of the file in bytes
int fileSize = 54 + (3 * width * height);
//The sections of the file
unsigned char generalHeader[14] = {'B','M',0,0, 0,0,0,0, 0,0,54,0, 0,0};
unsigned char DIBHeader[40] = {40,0,0,0, 0,0,0,0, 0,0,0,0, 1,0,24,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0};
unsigned char pixelArray[] = "";
//Set the binary portion of the generalHeader, mainly just file size
generalHeader[2] = (unsigned char)(fileSize);
generalHeader[3] = (unsigned char)(fileSize << 8);
generalHeader[4] = (unsigned char)(fileSize << 16);
generalHeader[5] = (unsigned char)(fileSize << 24);
//The binary variable portion of the DIB header
DIBHeader[4] = (unsigned char)(width);
DIBHeader[5] = (unsigned char)(width << 8);
DIBHeader[6] = (unsigned char)(width << 16);
DIBHeader[7] = (unsigned char)(width << 24);
DIBHeader[8] = (unsigned char)(height);
DIBHeader[9] = (unsigned char)(height << 8);
DIBHeader[10] = (unsigned char)(height << 16);
DIBHeader[11] = (unsigned char)(height << 24);
int picSize = 3 * width * height;
DIBHeader[20] = (unsigned char)(picSize);
DIBHeader[21] = (unsigned char)(picSize << 8);
DIBHeader[22] = (unsigned char)(picSize << 16);
DIBHeader[23] = (unsigned char)(picSize << 24);
//Loop through all width and height places to add all pixels
int counter = 0;
for(short j = height; j >= 0; j--)
{
for(short i = 0; i < width; i++)
{
//Add all 3 RGB values
pixelArray[counter] = pixelColour[i, j].red;
counter++;
pixelArray[counter] = pixelColour[i, j].green;
counter++;
pixelArray[counter] = pixelColour[i, j].blue;
counter++;
}
}
//Open it
ofstream fileWorking(fileName);
//Write the sections
fileWorking << generalHeader;
fileWorking << DIBHeader;
fileWorking << pixelArray;
//NO MEMORY LEAKS 4 ME
fileWorking.close();
}
This is part of a class called 'PixelMap,' basically a frame buffer or surface. The PixelMap has the variables 'width,' 'height,' and the struct array 'pixelColour.' (The struct containing 3 chars called 'red' 'green' and 'blue') If you would like to see the class, here it is. (It's just a skeleton, trying to get the .bmp down first)
//This is a pixel map, mainly for exporting BMPs
class PixelMap
{
public:
//The standard pixel variables
int width;
int height;
Colour pixelColour[];
//The constructor will set said variables
PixelMap(int Width, int Height);
//Manipulate pixels
void setPixel(int X, int Y, char r, char g, char b);
//Export the map
void exportMap(const char* fileName);
};
(Colour is the struct)
So my problem here is that when I try to run this, I get this:
So pixelArray, the array of colours to be exported gets corrupted. I assume this has to do with not being properly given a size, but I try to assign it's proper value (3 * width * height (3 being RGB)) but it says that it needs to be a constant value.
Any help with this issue is greatly appreciated!
Instead of
unsigned char pixelArray[] = "";
you could use:
std::vector<unsigned char> pixelArray(3*width*height,0);
This declares a vector with 3*width*height elements, initialized to 0. You can access the elements using the same syntax you've used for the array version (except, as pointed out in comments, you'll have to take care to write the binary values correctly to the output file).
I am currently using this approach to copy some byte values over:
for (int i = 0; i < (iLen + 1); i++)
{
*(pBuffer + i) = Image.pVid[i];
}
I would like to ask if there is a way to copy these values over in one go, perhaps by using memcopy to gain more speed.
The entire code is:
extern "C" __declspec(dllexport) int __stdcall GetCameraImage(BYTE pBuffer[], int Type, int uWidth, int uHeight)
{
CameraImage Image;
int ret;
Image.pVid = (unsigned int*)malloc(4 * uWidth*uHeight);
ret = stGetCameraImage(&Image, 1, uWidth, uHeight);
if (ret == ERR_SUCCESS)
{
int iLen = (4 * uWidth * uHeight);
for (int i = 0; i < (iLen + 1); i++)
{
*(pBuffer + i) = Image.pVid[i];
}
////print(“ImageType = %d, width = %d, height = %d”, Image.Type, Image.Width,
//// Image.Height);
////print(“First Pixel : B = %d, G = %d, R = %d”, Image.pVid[0], Image.pVid[1],
//// Image.pVid[2]);
////print(“Second Pixel : B = %d, G = %d, R = %d”, Image.pVid[4], Image.pVid[5],
//// Image.pVid[6]);
}
free(Image.pVid);
return ret;
}
Edit:
*pVid is this:
unsigned int *pVid; // pointer to image data (Format RGB32...)
The way your code is currently written, each assignment in your loop will overflow and give you some garbage value in pBuffer because you're trying to assign an unsigned int to a BYTE. On top of that, you will run off the end of the Image.pVid array because i is counting bytes, not unsigned ints
You could fix your code by doing this:
*(pBuffer + i) = ((BYTE*)Image.pVid)[i];
But that is pretty inefficient. Better to move whole words at a time, or you could just use memcpy instead:
memcpy(pBuffer,Image.pVid,iLen) //pBuffer must be at least iLen bytes long