I have 2 pyrDown implementation with SSE2 and AVX instructions set. They are differ and AVX implementation get wrong image result. Also AVX implementation is slower that SSE2 impl. It's strange. Whats wrong with AVX implementation and how it make faster?
// SSE2 implementation
static __inline __m128i average2RowsSingle(const uint8_t* __restrict__ src, size_t srcStep) {
__m128i v0 = _mm_load_si128((const __m128i *)src);
__m128i v1 = _mm_load_si128((const __m128i *)&src[srcStep]);
return _mm_avg_epu8(v0, v1);
}
// SSSE3 version
// I used `__restrict__` to give the compiler more flexibility in unrolling
void average2Rows(const uint8_t* __restrict__ src,
uint8_t*__restrict__ dst,
size_t srcStep,
size_t size)
{
const __m128i vk1 = _mm_set1_epi8(1);
const __m128i add2 = _mm_set1_epi16(2);
size_t dstsize = size/2;
for (size_t i = 0; i < dstsize - 15; i += 16)
{
const size_t ii = i*2;
// based on https://stackoverflow.com/a/45564565/820795
__m128i left = average2RowsSingle(src+ii, srcStep);
__m128i right = average2RowsSingle(src+ii+16, srcStep);
__m128i w0 = _mm_maddubs_epi16(left, vk1); // unpack and horizontal add
__m128i w1 = _mm_maddubs_epi16(right, vk1);
w0 = _mm_srli_epi16(w0, 1); // divide by 2
w1 = _mm_srli_epi16(w1, 1);
w0 = _mm_packus_epi16(w0, w1); // pack
_mm_storeu_si128((__m128i *)&dst[i], w0);
}
}
// AVX implementation
static __m256i average2RowsSingle(const uint8_t* __restrict__ src, size_t srcStep) {
auto v0 = _mm256_load_si256((const __m256i*)src);
auto v1 = _mm256_load_si256((const __m256i*)&src[srcStep]);
return _mm256_avg_epu8(v0, v1);
}
void average2Rows(const uint8_t* __restrict__ src,
uint8_t*__restrict__ dst,
size_t srcStep,
size_t size) {
const __m128i vk1 = _mm_set1_epi8(1);
size_t dstsize = size/2;
const signed char o = -1; // make shuffle zero
const __m256i vec_r_i16 = _mm256_set_epi8(o,30, o,28, o,26, o,24, o,22, o,20, o,18, o,16,
o,14, o,12, o,10, o, 8, o, 6, o, 4, o, 2, o, 0);
const __m256i vec_l_i16 = _mm256_set_epi8(o,31, o,29, o,27, o,25, o,23, o,21, o,19, o,17,
o,15, o,13, o,11, o, 9, o, 7, o, 5, o, 3, o, 1);
for (size_t i = 0; i < dstsize - 31; i += 32)
{
const size_t ii = i * 2;
auto left = average2RowsSingle(src + ii, srcStep);
auto right = average2RowsSingle(src + ii + 32, srcStep);
auto w0 = _mm256_shuffle_epi8(left, vec_r_i16);
auto w1 = _mm256_shuffle_epi8(left, vec_l_i16);
left = _mm256_srli_epi16(_mm256_add_epi16(w0, w1), 1);
w0 = _mm256_shuffle_epi8(right, vec_r_i16);
w1 = _mm256_shuffle_epi8(right, vec_l_i16);
right = _mm256_srli_epi16(_mm256_add_epi16(w0, w1), 1);
left = _mm256_packus_epi16(left, right);
_mm256_storeu_si256((__m256i *) &dst[i], left);
}
}
Wrong result after AVX implementation:
With help of #chtz I come up to this code:
inline __m256i average2RowsSingle(const uint8_t* __restrict__ src, size_t srcStep) {
auto v0 = _mm256_loadu_si256((const __m256i *)src);
auto v1 = _mm256_loadu_si256((const __m256i *)&src[srcStep]);
return _mm256_avg_epu8(v0, v1);
}
void average2Rows(const uint8_t* __restrict__ src,
uint8_t*__restrict__ dst,
size_t srcStep,
size_t size) {
const auto vk1 = _mm256_set1_epi8(1);
const size_t dstSize = size/2;
for (size_t i = 0; i < dstSize - 31; i += 32)
{
const size_t ii = i * 2;
// based on https://stackoverflow.com/a/45564565/820795
auto left = average2RowsSingle(src + ii, srcStep);
auto right = average2RowsSingle(src + ii + 32, srcStep);
auto w0 = _mm256_maddubs_epi16(left, vk1); // unpack and horizontal add
auto w1 = _mm256_maddubs_epi16(right, vk1);
w0 = _mm256_srli_epi16(w0, 1); // divide by 2
w1 = _mm256_srli_epi16(w1, 1);
w0 = _mm256_packus_epi16(w0, w1); // pack
w0 = _mm256_permute4x64_epi64(w0, 0xd8); // shuffle to get correct order
_mm256_storeu_si256((__m256i *)&dst[i], w0);
}
}
Result image:
In my algorithm, I need to create an information output. I need to write a boolean matrix into a bmp file.
It must be a monocromic image, where pixels are white if the matrix on such element is true.
Main problem is the bmp header and how to write this.
See if this works for you...
In this code, I had 3 2-dimensional arrays, called red,green and blue. Each one was of size [width][height], and each element corresponded to a pixel - I hope this makes sense!
FILE *f;
unsigned char *img = NULL;
int filesize = 54 + 3*w*h; //w is your image width, h is image height, both int
img = (unsigned char *)malloc(3*w*h);
memset(img,0,3*w*h);
for(int i=0; i<w; i++)
{
for(int j=0; j<h; j++)
{
x=i; y=(h-1)-j;
r = red[i][j]*255;
g = green[i][j]*255;
b = blue[i][j]*255;
if (r > 255) r=255;
if (g > 255) g=255;
if (b > 255) b=255;
img[(x+y*w)*3+2] = (unsigned char)(r);
img[(x+y*w)*3+1] = (unsigned char)(g);
img[(x+y*w)*3+0] = (unsigned char)(b);
}
}
unsigned char bmpfileheader[14] = {'B','M', 0,0,0,0, 0,0, 0,0, 54,0,0,0};
unsigned char bmpinfoheader[40] = {40,0,0,0, 0,0,0,0, 0,0,0,0, 1,0, 24,0};
unsigned char bmppad[3] = {0,0,0};
bmpfileheader[ 2] = (unsigned char)(filesize );
bmpfileheader[ 3] = (unsigned char)(filesize>> 8);
bmpfileheader[ 4] = (unsigned char)(filesize>>16);
bmpfileheader[ 5] = (unsigned char)(filesize>>24);
bmpinfoheader[ 4] = (unsigned char)( w );
bmpinfoheader[ 5] = (unsigned char)( w>> 8);
bmpinfoheader[ 6] = (unsigned char)( w>>16);
bmpinfoheader[ 7] = (unsigned char)( w>>24);
bmpinfoheader[ 8] = (unsigned char)( h );
bmpinfoheader[ 9] = (unsigned char)( h>> 8);
bmpinfoheader[10] = (unsigned char)( h>>16);
bmpinfoheader[11] = (unsigned char)( h>>24);
f = fopen("img.bmp","wb");
fwrite(bmpfileheader,1,14,f);
fwrite(bmpinfoheader,1,40,f);
for(int i=0; i<h; i++)
{
fwrite(img+(w*(h-i-1)*3),3,w,f);
fwrite(bmppad,1,(4-(w*3)%4)%4,f);
}
free(img);
fclose(f);
Clean C Code for Bitmap (BMP) Image Generation
This code does not use any library other than stdio.h. So, it can be easily incorporated in other languages of C-Family, like- C++, C#, Java.
#include <stdio.h>
const int BYTES_PER_PIXEL = 3; /// red, green, & blue
const int FILE_HEADER_SIZE = 14;
const int INFO_HEADER_SIZE = 40;
void generateBitmapImage(unsigned char* image, int height, int width, char* imageFileName);
unsigned char* createBitmapFileHeader(int height, int stride);
unsigned char* createBitmapInfoHeader(int height, int width);
int main ()
{
int height = 361;
int width = 867;
unsigned char image[height][width][BYTES_PER_PIXEL];
char* imageFileName = (char*) "bitmapImage.bmp";
int i, j;
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
image[i][j][2] = (unsigned char) ( i * 255 / height ); ///red
image[i][j][1] = (unsigned char) ( j * 255 / width ); ///green
image[i][j][0] = (unsigned char) ( (i+j) * 255 / (height+width) ); ///blue
}
}
generateBitmapImage((unsigned char*) image, height, width, imageFileName);
printf("Image generated!!");
}
void generateBitmapImage (unsigned char* image, int height, int width, char* imageFileName)
{
int widthInBytes = width * BYTES_PER_PIXEL;
unsigned char padding[3] = {0, 0, 0};
int paddingSize = (4 - (widthInBytes) % 4) % 4;
int stride = (widthInBytes) + paddingSize;
FILE* imageFile = fopen(imageFileName, "wb");
unsigned char* fileHeader = createBitmapFileHeader(height, stride);
fwrite(fileHeader, 1, FILE_HEADER_SIZE, imageFile);
unsigned char* infoHeader = createBitmapInfoHeader(height, width);
fwrite(infoHeader, 1, INFO_HEADER_SIZE, imageFile);
int i;
for (i = 0; i < height; i++) {
fwrite(image + (i*widthInBytes), BYTES_PER_PIXEL, width, imageFile);
fwrite(padding, 1, paddingSize, imageFile);
}
fclose(imageFile);
}
unsigned char* createBitmapFileHeader (int height, int stride)
{
int fileSize = FILE_HEADER_SIZE + INFO_HEADER_SIZE + (stride * height);
static unsigned char fileHeader[] = {
0,0, /// signature
0,0,0,0, /// image file size in bytes
0,0,0,0, /// reserved
0,0,0,0, /// start of pixel array
};
fileHeader[ 0] = (unsigned char)('B');
fileHeader[ 1] = (unsigned char)('M');
fileHeader[ 2] = (unsigned char)(fileSize );
fileHeader[ 3] = (unsigned char)(fileSize >> 8);
fileHeader[ 4] = (unsigned char)(fileSize >> 16);
fileHeader[ 5] = (unsigned char)(fileSize >> 24);
fileHeader[10] = (unsigned char)(FILE_HEADER_SIZE + INFO_HEADER_SIZE);
return fileHeader;
}
unsigned char* createBitmapInfoHeader (int height, int width)
{
static unsigned char infoHeader[] = {
0,0,0,0, /// header size
0,0,0,0, /// image width
0,0,0,0, /// image height
0,0, /// number of color planes
0,0, /// bits per pixel
0,0,0,0, /// compression
0,0,0,0, /// image size
0,0,0,0, /// horizontal resolution
0,0,0,0, /// vertical resolution
0,0,0,0, /// colors in color table
0,0,0,0, /// important color count
};
infoHeader[ 0] = (unsigned char)(INFO_HEADER_SIZE);
infoHeader[ 4] = (unsigned char)(width );
infoHeader[ 5] = (unsigned char)(width >> 8);
infoHeader[ 6] = (unsigned char)(width >> 16);
infoHeader[ 7] = (unsigned char)(width >> 24);
infoHeader[ 8] = (unsigned char)(height );
infoHeader[ 9] = (unsigned char)(height >> 8);
infoHeader[10] = (unsigned char)(height >> 16);
infoHeader[11] = (unsigned char)(height >> 24);
infoHeader[12] = (unsigned char)(1);
infoHeader[14] = (unsigned char)(BYTES_PER_PIXEL*8);
return infoHeader;
}
Without the use of any other library you can look at the BMP file format. I've implemented it in the past and it can be done without too much work.
Bitmap-File Structures
Each bitmap file contains a
bitmap-file header, a
bitmap-information header, a color
table, and an array of bytes that
defines the bitmap bits. The file has
the following form:
BITMAPFILEHEADER bmfh;
BITMAPINFOHEADER bmih;
RGBQUAD aColors[];
BYTE aBitmapBits[];
... see the file format for more details
this is a example code copied from
https://en.wikipedia.org/wiki/User:Evercat/Buddhabrot.c
void drawbmp (char * filename) {
unsigned int headers[13];
FILE * outfile;
int extrabytes;
int paddedsize;
int x; int y; int n;
int red, green, blue;
extrabytes = 4 - ((WIDTH * 3) % 4); // How many bytes of padding to add to each
// horizontal line - the size of which must
// be a multiple of 4 bytes.
if (extrabytes == 4)
extrabytes = 0;
paddedsize = ((WIDTH * 3) + extrabytes) * HEIGHT;
// Headers...
// Note that the "BM" identifier in bytes 0 and 1 is NOT included in these "headers".
headers[0] = paddedsize + 54; // bfSize (whole file size)
headers[1] = 0; // bfReserved (both)
headers[2] = 54; // bfOffbits
headers[3] = 40; // biSize
headers[4] = WIDTH; // biWidth
headers[5] = HEIGHT; // biHeight
// Would have biPlanes and biBitCount in position 6, but they're shorts.
// It's easier to write them out separately (see below) than pretend
// they're a single int, especially with endian issues...
headers[7] = 0; // biCompression
headers[8] = paddedsize; // biSizeImage
headers[9] = 0; // biXPelsPerMeter
headers[10] = 0; // biYPelsPerMeter
headers[11] = 0; // biClrUsed
headers[12] = 0; // biClrImportant
outfile = fopen(filename, "wb");
//
// Headers begin...
// When printing ints and shorts, we write out 1 character at a time to avoid endian issues.
//
fprintf(outfile, "BM");
for (n = 0; n <= 5; n++)
{
fprintf(outfile, "%c", headers[n] & 0x000000FF);
fprintf(outfile, "%c", (headers[n] & 0x0000FF00) >> 8);
fprintf(outfile, "%c", (headers[n] & 0x00FF0000) >> 16);
fprintf(outfile, "%c", (headers[n] & (unsigned int) 0xFF000000) >> 24);
}
// These next 4 characters are for the biPlanes and biBitCount fields.
fprintf(outfile, "%c", 1);
fprintf(outfile, "%c", 0);
fprintf(outfile, "%c", 24);
fprintf(outfile, "%c", 0);
for (n = 7; n <= 12; n++)
{
fprintf(outfile, "%c", headers[n] & 0x000000FF);
fprintf(outfile, "%c", (headers[n] & 0x0000FF00) >> 8);
fprintf(outfile, "%c", (headers[n] & 0x00FF0000) >> 16);
fprintf(outfile, "%c", (headers[n] & (unsigned int) 0xFF000000) >> 24);
}
//
// Headers done, now write the data...
//
for (y = HEIGHT - 1; y >= 0; y--) // BMP image format is written from bottom to top...
{
for (x = 0; x <= WIDTH - 1; x++)
{
red = reduce(redcount[x][y] + COLOUR_OFFSET) * red_multiplier;
green = reduce(greencount[x][y] + COLOUR_OFFSET) * green_multiplier;
blue = reduce(bluecount[x][y] + COLOUR_OFFSET) * blue_multiplier;
if (red > 255) red = 255; if (red < 0) red = 0;
if (green > 255) green = 255; if (green < 0) green = 0;
if (blue > 255) blue = 255; if (blue < 0) blue = 0;
// Also, it's written in (b,g,r) format...
fprintf(outfile, "%c", blue);
fprintf(outfile, "%c", green);
fprintf(outfile, "%c", red);
}
if (extrabytes) // See above - BMP lines must be of lengths divisible by 4.
{
for (n = 1; n <= extrabytes; n++)
{
fprintf(outfile, "%c", 0);
}
}
}
fclose(outfile);
return;
}
drawbmp(filename);
Here is a C++ variant of the code that works for me. Note I had to change the size computation to account for the line padding.
// mimeType = "image/bmp";
unsigned char file[14] = {
'B','M', // magic
0,0,0,0, // size in bytes
0,0, // app data
0,0, // app data
40+14,0,0,0 // start of data offset
};
unsigned char info[40] = {
40,0,0,0, // info hd size
0,0,0,0, // width
0,0,0,0, // heigth
1,0, // number color planes
24,0, // bits per pixel
0,0,0,0, // compression is none
0,0,0,0, // image bits size
0x13,0x0B,0,0, // horz resoluition in pixel / m
0x13,0x0B,0,0, // vert resolutions (0x03C3 = 96 dpi, 0x0B13 = 72 dpi)
0,0,0,0, // #colors in pallete
0,0,0,0, // #important colors
};
int w=waterfallWidth;
int h=waterfallHeight;
int padSize = (4-(w*3)%4)%4;
int sizeData = w*h*3 + h*padSize;
int sizeAll = sizeData + sizeof(file) + sizeof(info);
file[ 2] = (unsigned char)( sizeAll );
file[ 3] = (unsigned char)( sizeAll>> 8);
file[ 4] = (unsigned char)( sizeAll>>16);
file[ 5] = (unsigned char)( sizeAll>>24);
info[ 4] = (unsigned char)( w );
info[ 5] = (unsigned char)( w>> 8);
info[ 6] = (unsigned char)( w>>16);
info[ 7] = (unsigned char)( w>>24);
info[ 8] = (unsigned char)( h );
info[ 9] = (unsigned char)( h>> 8);
info[10] = (unsigned char)( h>>16);
info[11] = (unsigned char)( h>>24);
info[20] = (unsigned char)( sizeData );
info[21] = (unsigned char)( sizeData>> 8);
info[22] = (unsigned char)( sizeData>>16);
info[23] = (unsigned char)( sizeData>>24);
stream.write( (char*)file, sizeof(file) );
stream.write( (char*)info, sizeof(info) );
unsigned char pad[3] = {0,0,0};
for ( int y=0; y<h; y++ )
{
for ( int x=0; x<w; x++ )
{
long red = lround( 255.0 * waterfall[x][y] );
if ( red < 0 ) red=0;
if ( red > 255 ) red=255;
long green = red;
long blue = red;
unsigned char pixel[3];
pixel[0] = blue;
pixel[1] = green;
pixel[2] = red;
stream.write( (char*)pixel, 3 );
}
stream.write( (char*)pad, padSize );
}
Note that the lines are saved from down to up and not the other way around.
Additionally, the scanlines must have a byte-length of multiples of four, you should insert fill bytes at the end of the lines to ensure this.
I just wanted to share an improved version of Minhas Kamal's code because although it worked well enough for most applications, I had a few issues with it still. Two highly important things to remember:
The code (at the time of writing) calls free() on two static arrays. This will cause your program to crash. So I commented out those lines.
NEVER assume that your pixel data's pitch is always (Width*BytesPerPixel). It's best to let the user specify the pitch value. Example: when manipulating resources in Direct3D, the RowPitch is never guaranteed to be an even multiple of the byte depth being used. This can cause errors in your generated bitmaps (especially at odd resolutions such as 1366x768).
Below, you can see my revisions to his code:
const int bytesPerPixel = 4; /// red, green, blue
const int fileHeaderSize = 14;
const int infoHeaderSize = 40;
void generateBitmapImage(unsigned char *image, int height, int width, int pitch, const char* imageFileName);
unsigned char* createBitmapFileHeader(int height, int width, int pitch, int paddingSize);
unsigned char* createBitmapInfoHeader(int height, int width);
void generateBitmapImage(unsigned char *image, int height, int width, int pitch, const char* imageFileName) {
unsigned char padding[3] = { 0, 0, 0 };
int paddingSize = (4 - (/*width*bytesPerPixel*/ pitch) % 4) % 4;
unsigned char* fileHeader = createBitmapFileHeader(height, width, pitch, paddingSize);
unsigned char* infoHeader = createBitmapInfoHeader(height, width);
FILE* imageFile = fopen(imageFileName, "wb");
fwrite(fileHeader, 1, fileHeaderSize, imageFile);
fwrite(infoHeader, 1, infoHeaderSize, imageFile);
int i;
for (i = 0; i < height; i++) {
fwrite(image + (i*pitch /*width*bytesPerPixel*/), bytesPerPixel, width, imageFile);
fwrite(padding, 1, paddingSize, imageFile);
}
fclose(imageFile);
//free(fileHeader);
//free(infoHeader);
}
unsigned char* createBitmapFileHeader(int height, int width, int pitch, int paddingSize) {
int fileSize = fileHeaderSize + infoHeaderSize + (/*bytesPerPixel*width*/pitch + paddingSize) * height;
static unsigned char fileHeader[] = {
0,0, /// signature
0,0,0,0, /// image file size in bytes
0,0,0,0, /// reserved
0,0,0,0, /// start of pixel array
};
fileHeader[0] = (unsigned char)('B');
fileHeader[1] = (unsigned char)('M');
fileHeader[2] = (unsigned char)(fileSize);
fileHeader[3] = (unsigned char)(fileSize >> 8);
fileHeader[4] = (unsigned char)(fileSize >> 16);
fileHeader[5] = (unsigned char)(fileSize >> 24);
fileHeader[10] = (unsigned char)(fileHeaderSize + infoHeaderSize);
return fileHeader;
}
unsigned char* createBitmapInfoHeader(int height, int width) {
static unsigned char infoHeader[] = {
0,0,0,0, /// header size
0,0,0,0, /// image width
0,0,0,0, /// image height
0,0, /// number of color planes
0,0, /// bits per pixel
0,0,0,0, /// compression
0,0,0,0, /// image size
0,0,0,0, /// horizontal resolution
0,0,0,0, /// vertical resolution
0,0,0,0, /// colors in color table
0,0,0,0, /// important color count
};
infoHeader[0] = (unsigned char)(infoHeaderSize);
infoHeader[4] = (unsigned char)(width);
infoHeader[5] = (unsigned char)(width >> 8);
infoHeader[6] = (unsigned char)(width >> 16);
infoHeader[7] = (unsigned char)(width >> 24);
infoHeader[8] = (unsigned char)(height);
infoHeader[9] = (unsigned char)(height >> 8);
infoHeader[10] = (unsigned char)(height >> 16);
infoHeader[11] = (unsigned char)(height >> 24);
infoHeader[12] = (unsigned char)(1);
infoHeader[14] = (unsigned char)(bytesPerPixel * 8);
return infoHeader;
}
I edited ralf's htp code so that it would compile (on gcc, running ubuntu 16.04 lts). It was just a matter of initializing the variables.
int w = 100; /* Put here what ever width you want */
int h = 100; /* Put here what ever height you want */
int red[w][h];
int green[w][h];
int blue[w][h];
FILE *f;
unsigned char *img = NULL;
int filesize = 54 + 3*w*h; //w is your image width, h is image height, both int
if( img )
free( img );
img = (unsigned char *)malloc(3*w*h);
memset(img,0,sizeof(img));
int x;
int y;
int r;
int g;
int b;
for(int i=0; i<w; i++)
{
for(int j=0; j<h; j++)
{
x=i; y=(h-1)-j;
r = red[i][j]*255;
g = green[i][j]*255;
b = blue[i][j]*255;
if (r > 255) r=255;
if (g > 255) g=255;
if (b > 255) b=255;
img[(x+y*w)*3+2] = (unsigned char)(r);
img[(x+y*w)*3+1] = (unsigned char)(g);
img[(x+y*w)*3+0] = (unsigned char)(b);
}
}
unsigned char bmpfileheader[14] = {'B','M', 0,0,0,0, 0,0, 0,0, 54,0,0,0};
unsigned char bmpinfoheader[40] = {40,0,0,0, 0,0,0,0, 0,0,0,0, 1,0, 24,0};
unsigned char bmppad[3] = {0,0,0};
bmpfileheader[ 2] = (unsigned char)(filesize );
bmpfileheader[ 3] = (unsigned char)(filesize>> 8);
bmpfileheader[ 4] = (unsigned char)(filesize>>16);
bmpfileheader[ 5] = (unsigned char)(filesize>>24);
bmpinfoheader[ 4] = (unsigned char)( w );
bmpinfoheader[ 5] = (unsigned char)( w>> 8);
bmpinfoheader[ 6] = (unsigned char)( w>>16);
bmpinfoheader[ 7] = (unsigned char)( w>>24);
bmpinfoheader[ 8] = (unsigned char)( h );
bmpinfoheader[ 9] = (unsigned char)( h>> 8);
bmpinfoheader[10] = (unsigned char)( h>>16);
bmpinfoheader[11] = (unsigned char)( h>>24);
f = fopen("img.bmp","wb");
fwrite(bmpfileheader,1,14,f);
fwrite(bmpinfoheader,1,40,f);
for(int i=0; i<h; i++)
{
fwrite(img+(w*(h-i-1)*3),3,w,f);
fwrite(bmppad,1,(4-(w*3)%4)%4,f);
}
fclose(f);
The best bitmap encoder is the one you do not write yourself. The file format is a lot more involved, than one might expect. This is evidenced by the fact, that all proposed answers do not create a monochrome (1bpp) bitmap, but rather write out 24bpp files, that happen to only use 2 colors.
The following is a Windows-only solution, using the Windows Imaging Component. It doesn't rely on any external/3rd party libraries, other than what ships with Windows.
Like every C++ program, we need to include several header files. And link to Windowscodecs.lib while we're at it:
#include <Windows.h>
#include <comdef.h>
#include <comip.h>
#include <comutil.h>
#include <wincodec.h>
#include <vector>
#pragma comment(lib, "Windowscodecs.lib")
Next up, we declare our container (a vector, of vectors! Of bool!), and a few smart pointers for convenience:
using _com_util::CheckError;
using container = std::vector<std::vector<bool>>;
_COM_SMARTPTR_TYPEDEF(IWICImagingFactory, __uuidof(IWICImagingFactory));
_COM_SMARTPTR_TYPEDEF(IWICBitmapEncoder, __uuidof(IWICBitmapEncoder));
_COM_SMARTPTR_TYPEDEF(IWICBitmapFrameEncode, __uuidof(IWICBitmapFrameEncode));
_COM_SMARTPTR_TYPEDEF(IWICStream, __uuidof(IWICStream));
_COM_SMARTPTR_TYPEDEF(IWICPalette, __uuidof(IWICPalette));
With that all settled, we can jump right into the implementation. There's a bit of setup required to get a factory, an encoder, a frame, and get everything prepared:
void write_bitmap(wchar_t const* pathname, container const& data)
{
// Create factory
IWICImagingFactoryPtr sp_factory { nullptr };
CheckError(sp_factory.CreateInstance(CLSID_WICImagingFactory, nullptr,
CLSCTX_INPROC_SERVER));
// Create encoder
IWICBitmapEncoderPtr sp_encoder { nullptr };
CheckError(sp_factory->CreateEncoder(GUID_ContainerFormatBmp, nullptr, &sp_encoder));
// Create stream
IWICStreamPtr sp_stream { nullptr };
CheckError(sp_factory->CreateStream(&sp_stream));
CheckError(sp_stream->InitializeFromFilename(pathname, GENERIC_WRITE));
// Initialize encoder with stream
CheckError(sp_encoder->Initialize(sp_stream, WICBitmapEncoderNoCache));
// Create new frame
IWICBitmapFrameEncodePtr sp_frame { nullptr };
IPropertyBag2Ptr sp_properties { nullptr };
CheckError(sp_encoder->CreateNewFrame(&sp_frame, &sp_properties));
// Initialize frame with default properties
CheckError(sp_frame->Initialize(sp_properties));
// Set pixel format
// SetPixelFormat() requires a pointer to non-const
auto pf { GUID_WICPixelFormat1bppIndexed };
CheckError(sp_frame->SetPixelFormat(&pf));
if (!::IsEqualGUID(pf, GUID_WICPixelFormat1bppIndexed))
{
// Report unsupported pixel format
CheckError(WINCODEC_ERR_UNSUPPORTEDPIXELFORMAT);
}
// Set size derived from data argument
auto const width { static_cast<UINT>(data.size()) };
auto const height { static_cast<UINT>(data[0].size()) };
CheckError(sp_frame->SetSize(width, height));
// Set palette on frame. This is required since we use an indexed pixel format.
// Only GIF files support global palettes, so make sure to set it on the frame
// rather than the encoder.
IWICPalettePtr sp_palette { nullptr };
CheckError(sp_factory->CreatePalette(&sp_palette));
CheckError(sp_palette->InitializePredefined(WICBitmapPaletteTypeFixedBW, FALSE));
CheckError(sp_frame->SetPalette(sp_palette));
At that point everything is set up, and we have a frame to dump our data into. For 1bpp files, every byte stores the information of 8 pixels. The left-most pixel is stored in the MSB, with pixels following all the way down to the right-most pixel stored in the LSB.
The code isn't entirely important; you'll be replacing that with whatever suits your needs, when you replace the data layout of your input anyway:
// Write data to frame
auto const stride { (width * 1 + 7) / 8 };
auto const size { height * stride };
std::vector<unsigned char> buffer(size, 127u);
// Convert data to match required layout. Each byte stores 8 pixels, with the
// MSB being the leftmost, the LSB the right-most.
for (size_t x { 0 }; x < data.size(); ++x)
{
for (size_t y { 0 }; y < data[x].size(); ++y)
{
auto shift { x % 8 };
auto mask { 0x80 >> shift };
auto bit { mask * data[x][y] };
auto& value { buffer[y * stride + x / 8] };
value &= ~mask;
value |= bit;
}
}
CheckError(sp_frame->WritePixels(height, stride,
static_cast<UINT>(buffer.size()), buffer.data()));
What's left is to commit the changes to the frame and the encoder, which will ultimately write the image file to disk:
// Commit frame
CheckError(sp_frame->Commit());
// Commit image
CheckError(sp_encoder->Commit());
}
This is a test program, writing out an image to a file passed as the first command-line argument:
#include <iostream>
int wmain(int argc, wchar_t* argv[])
try
{
if (argc != 2)
{
return -1;
}
CheckError(::CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED));
// Create 64x64 matrix
container data(64, std::vector<bool>(64, false));
// Fill with arrow pointing towards the upper left
for (size_t i { 0 }; i < data.size(); ++i)
{
data[0][i] = true;
data[i][0] = true;
data[i][i] = true;
}
::write_bitmap(argv[1], data);
::CoUninitialize();
}
catch (_com_error const& e)
{
std::wcout << L"Error!\n" << L" Message: " << e.ErrorMessage() << std::endl;
}
It produces the following 64x64 image (true 1bpp, 4096 pixels, 574 bytes in size):
If you get strange colors switches in the middle of your image using the above C++ function. Be sure to open the outstream in binary mode:
imgFile.open(filename, std::ios_base::out | std::ios_base::binary);
Otherwise windows inserts unwanted characters in the middle of your file! (been banging my head on this issue for hours)
See related question here: Why does ofstream insert a 0x0D byte before 0x0A?
Here's a simple c++ bmp image file class.
class bmp_img {
public:
constexpr static int header_size = 14;
constexpr static int info_header_size = 40;
constexpr static size_t bytes_per_pixel = 3;
bmp_img(size_t width, size_t height) :
image_px_width{ width }, image_px_height{ height }, row_width{ image_px_width * bytes_per_pixel },
row_padding{ (4 - row_width % 4) % 4 }, row_stride{ row_width + row_padding }, file_size{ header_size + info_header_size + (image_px_height * row_stride) },
image(image_px_height, std::vector<unsigned char>(row_width))
{
//header file type
file_header[0] = 'B';
file_header[1] = 'M';
//header file size info
file_header[2] = static_cast<unsigned char>(file_size);
file_header[3] = static_cast<unsigned char>(file_size >> 8);
file_header[4] = static_cast<unsigned char>(file_size >> 16);
file_header[5] = static_cast<unsigned char>(file_size >> 24);
//header offset to pixel data
file_header[10] = header_size + info_header_size;
//info header size
info_header[0] = info_header_size;
//info header image width
info_header[4] = static_cast<unsigned char>(image_px_width);
info_header[5] = static_cast<unsigned char>(image_px_width >> 8);
info_header[6] = static_cast<unsigned char>(image_px_width >> 16);
info_header[7] = static_cast<unsigned char>(image_px_width >> 24);
//info header image height
info_header[8] = static_cast<unsigned char>(image_px_height);
info_header[9] = static_cast<unsigned char>(image_px_height >> 8);
info_header[10] = static_cast<unsigned char>(image_px_height >> 16);
info_header[11] = static_cast<unsigned char>(image_px_height >> 24);
//info header planes
info_header[12] = 1;
//info header bits per pixel
info_header[14] = 8 * bytes_per_pixel;
}
size_t width() const {
return image_px_width;
}
size_t height() const {
return image_px_height;
}
void set_pixel(size_t x, size_t y, int r, int g, int b) {
image[y][x * bytes_per_pixel + 2] = r;
image[y][x * bytes_per_pixel + 1] = g;
image[y][x * bytes_per_pixel + 0] = b;
}
void fill(int r, int g, int b) {
for (int y = 0; y < image_px_height; ++y) {
for (int x = 0; x < image_px_width; ++x) {
set_pixel(x, y, r, g, b);
}
}
}
void write_to_file(const char* file_name) const {
std::ofstream img_file(file_name, std::ios_base::binary | std::ios_base::out);
img_file.write((char*)file_header, header_size);
img_file.write((char*)info_header, info_header_size);
std::vector<char> allignment(row_padding);
for (int y = image_px_height - 1; y >= 0; --y) {
img_file.write((char*)image[y].data(), row_width);
img_file.write(allignment.data(), row_padding);
}
img_file.close();
}
private:
size_t image_px_width;
size_t image_px_height;
size_t row_width;
size_t row_padding;
size_t row_stride;
size_t file_size;
unsigned char file_header[header_size] = { 0 };
unsigned char info_header[info_header_size] = { 0 };
std::vector<std::vector<unsigned char>> image;
};
C++ answer, flexible API, assumes little-endian system to code-golf it a bit. Note this uses the bmp native y-axis (0 at the bottom).
#include <vector>
#include <fstream>
struct image
{
image(int width, int height)
: w(width), h(height), rgb(w * h * 3)
{}
uint8_t & r(int x, int y) { return rgb[(x + y*w)*3 + 2]; }
uint8_t & g(int x, int y) { return rgb[(x + y*w)*3 + 1]; }
uint8_t & b(int x, int y) { return rgb[(x + y*w)*3 + 0]; }
int w, h;
std::vector<uint8_t> rgb;
};
template<class Stream>
Stream & operator<<(Stream & out, image const& img)
{
uint32_t w = img.w, h = img.h;
uint32_t pad = w * -3 & 3;
uint32_t total = 54 + 3*w*h + pad*h;
uint32_t head[13] = {total, 0, 54, 40, w, h, (24<<16)|1};
char const* rgb = (char const*)img.rgb.data();
out.write("BM", 2);
out.write((char*)head, 52);
for(uint32_t i=0 ; i<h ; i++)
{ out.write(rgb + (3 * w * i), 3 * w);
out.write((char*)&pad, pad);
}
return out;
}
int main()
{
image img(100, 100);
for(int x=0 ; x<100 ; x++)
{ for(int y=0 ; y<100 ; y++)
{ img.r(x,y) = x;
img.g(x,y) = y;
img.b(x,y) = 100-x;
}
}
std::ofstream("/tmp/out.bmp") << img;
}
This code uses some newer C++ features. I've used it to create 8bit and 24bit bmp files. It only writes bmp files, one day we may read them too!
I didn't like all the shifting and error proneess for endian safety.
It could use lots more comments but the code is pretty straight forward. The supposedly run-time detection of endianness results in code being optimized away on all the compilers I tested (a while ago).
endian_type.h >> Endian safe POD type.
#ifndef ENDIAN_TYPE_H
#define ENDIAN_TYPE_H
#include <algorithm>
#include <type_traits>
namespace endian_type {
template <typename T, bool store_as_big_endian>
struct EndianType {
using value_type = T;
static_assert(std::is_fundamental_v<value_type>,
"EndianType works for fundamental data types");
EndianType() = default;
EndianType(const value_type& value)
: value{ convert_to(value) } {}
struct TypeAsBytes {
unsigned char value[sizeof(value_type)];
};
static constexpr bool is_big_endian() {
union { int ival; char cval; } uval;
uval.ival = 1;
return 0 == uval.cval;
}
static TypeAsBytes convert_to(const value_type& ivalue) {
TypeAsBytes ovalue;
const unsigned char* p_ivalue = (const unsigned char*)&ivalue;
if (store_as_big_endian != is_big_endian()) {
std::reverse_copy(p_ivalue, p_ivalue + sizeof(value_type), ovalue.value);
} else {
std::copy(p_ivalue, p_ivalue + sizeof(value_type), ovalue.value);
}
return ovalue;
}
static value_type convert_from(const TypeAsBytes& ivalue) {
value_type ovalue;
unsigned char* p_ovalue = (unsigned char*) &ovalue;
const unsigned char* p_ivalue = (const unsigned char*)&ivalue;
if (store_as_big_endian != is_big_endian()) {
std::reverse_copy(p_ivalue, p_ivalue + sizeof(value_type), p_ovalue);
}
else {
std::copy(p_ivalue, p_ivalue + sizeof(value_type), p_ovalue);
}
return ovalue;
}
value_type get() const {
return convert_from(value);
}
EndianType& set(const value_type& ivalue) {
value = convert_to(ivalue);
return *this;
}
operator value_type() const {
return get();
}
EndianType& operator=(const value_type& ivalue) {
set(ivalue);
return *this;
}
private:
TypeAsBytes value;
};
template <typename T>
using BigEndian = EndianType<T, true>;
template <typename T>
using LittleEndian = EndianType<T, false>;
} // namespace endian_type
#endif // ENDIAN_TYPE_H
The following contains the write_bmp functions.
bmp_writer.h >> the BMP writer header
#ifndef BMP_WRITER
#define BMP_WRITER
#include "endian_type.h"
#include <cctype>
#include <vector>
#include <fstream>
namespace bmp_writer {
template <typename T>
using LittleEndian = endian_type::LittleEndian<T>;
struct Header {
char magic[2]{ 'B', 'M' };
LittleEndian<std::uint32_t> size;
LittleEndian<std::uint16_t> app_data1;
LittleEndian<std::uint16_t> app_data2;
LittleEndian<std::uint32_t> offset;
};
struct Info {
LittleEndian<std::uint32_t> info_size{ 40 };
LittleEndian<std::uint32_t> width;
LittleEndian<std::uint32_t> height;
LittleEndian<std::uint16_t> count_colour_planes{ 1 };
LittleEndian<std::uint16_t> bits_per_pixel;
LittleEndian<std::uint32_t> compression{};
LittleEndian<std::uint32_t> image_bytes_size;
LittleEndian<std::uint32_t> resolution_horizontal{ 2835 };
LittleEndian<std::uint32_t> resolution_vertical{ 2835 };
LittleEndian<std::uint32_t> count_pallete_entries{ 0 };
LittleEndian<std::uint32_t> important_colours{ 0 };
};
template <std::size_t count>
class Palette {
public:
static constexpr std::uint32_t NUM_CHANNELS = 4;
using Entry = std::uint8_t[NUM_CHANNELS];
private:
Palette() {
for (auto i = 0; i < count; ++i) {
auto& entry = table[i];
for (auto j = 0; j < NUM_CHANNELS - 1; ++j) {
entry[j] = i;
}
}
}
Palette(const Palette&) = delete;
Palette(const Palette&&) = delete;
Palette& operator=(const Palette&) = delete;
Palette& operator=(const Palette&&) = delete;
public:
static const Palette& get() {
static const Palette palette;
return palette;
}
Entry table[count];
};
static_assert(sizeof(Info) == 40, "");
template <typename T>
void write_bmp(
std::ofstream& out,
std::uint32_t width,
std::uint32_t height,
std::uint16_t count_colour_planes,
const T* data,
std::uint32_t data_size
) {
auto& palette = Palette<256>::get();
Header header;
Info info;
info.width = width;
info.height = height;
//info.count_colour_planes = count_colour_planes;
const std::uint32_t t_per_pixel = data_size / (width * height);
info.bits_per_pixel = std::uint16_t(sizeof(T) * 8 * t_per_pixel);
const std::uint32_t row_len = width * sizeof(T) * t_per_pixel;
// Round row up to next multiple of 4.
const std::uint32_t padded_row_len = (row_len + 3) & ~3u;
const std::uint32_t data_size_bytes = padded_row_len * height;
info.image_bytes_size = data_size_bytes;
if (count_colour_planes == 1) {
header.offset = sizeof(Info) + sizeof(Header) + sizeof(palette);
} else {
header.offset = sizeof(Info) + sizeof(Header);
}
header.size = header.offset + height * padded_row_len;
out.write(reinterpret_cast<const char*>(&header), sizeof(header));
out.write(reinterpret_cast<const char*>(&info), sizeof(info));
if (count_colour_planes == 1) {
out.write(reinterpret_cast<const char*>(&palette), sizeof(palette));
}
const char padding[3] = {};
for (int i = height; i > 0;) {
--i;
const char* p_row =
reinterpret_cast<const char*>(data + i * width);
out.write(p_row, row_len);
if (padded_row_len != row_len) {
out.write(padding, padded_row_len - row_len);
}
}
};
template <typename T>
void write_bmp(
std::ofstream& out,
std::uint32_t width,
std::uint32_t height,
std::uint16_t count_colour_planes,
const std::vector<T>& data
) {
write_bmp(out, width, height, count_colour_planes,
&*data.cbegin(), data.size());
}
template <typename T>
void write_bmp(
const std::string& outfilename,
std::uint32_t width,
std::uint32_t height,
std::uint16_t count_colour_planes,
const std::vector<T>& data
) {
std::ofstream out{ outfilename, std::ios_base::binary };
if (!out) {
throw std::runtime_error("Failed to open: " + outfilename);
}
write_bmp(out, width, height, count_colour_planes,
&*data.begin(), static_cast<std::uint32_t>(data.size()));
out.close();
}
} // namespace
#endif // BMP_WRITER
And an example of use:
#include "bmp_writer.h"
struct PixelType {
PixelType(std::uint8_t r, std::uint8_t g, std::uint8_t b)
: c{ b, g, r } {}
PixelType(std::uint32_t c)
: c{ (c >> 16) & 0xffu, (c >> 8) & 0xffu, c & 0xffu } {}
PixelType() = default;
std::uint8_t c[3] = {};
};
void bmp_writer_test1() {
const int size_x = 20;
const int size_y = 10;
std::vector<PixelType> data(size_x * size_y);
// Write some pixels.
data[2] = PixelType(0xff0000); // red
data[10] = PixelType(0x00ff00); // green
bmp_writer::write_bmp(
"test_bmp_writer1.bmp",
std::uint32_t(size_x),
std::uint32_t(size_y),
std::uint16_t(sizeof(PixelType)),
data
);
}
void bmp_writer_test2() {
const int size_x = 20;
const int size_y = 10;
PixelType data[size_x * size_y];
// Write some pixels.
data[15] = PixelType(0xff, 0, 0); // red
data[17] = PixelType(0, 0xff, 0); // green
std::ofstream out{ "test_bmp_writer2.bmp", std::ios_base::binary };
if (!out) {
throw std::runtime_error("Failed to open: " "test_bmp_writer2.bmp");
}
bmp_writer::write_bmp(
out,
std::uint32_t(size_x),
std::uint32_t(size_y),
std::uint16_t(sizeof(PixelType)),
data,
sizeof(data) / sizeof PixelType
);
}