Save RGB image as PPM format - c++

I am trying to create RGB image in C++. I am not using any image libraries like OpenCv.
Firstly, I tried to create grayscale image. I want to draw rectangle to image. I am giving parameters at function like starting points, width, height etc. This code lines are working good for this grayscale challenge but I am trying to increase color channels to 3 like RGB. Then, I am setting Red, Green and Blue values but it is not working. This is my problem.
How can I work correctly?
x => starting point where x coordinate
y => starting point where y coordinate
width => rectangle width
height => rectangle height
value => RGB or Grayscale color value
My codes
Image::Image(int width, int height, int n_channels, int step)
{
cout << "Image constructor is running!" << endl;
m_width = width;
m_height = height;
m_n_channels = n_channels;
m_step = m_width*m_n_channels;
if (m_step < step)
m_step = step;
m_data = new uchar[m_step*height];
}
Image* Image::new_gray(int width, int height)
{
cout << "New gray image is creating!" << endl;
return new Image(width, height, 1);
}
Image* Image::new_rgb(int width, int height)
{
cout << "New RGB image is creating!" << endl;
return new Image(width, height, 3);
}
void Image::set_rect(int x, int y, int width, int height, uchar value)
{
if (x < 0) {
width += x;
x = 0;
}
if (y < 0) {
height += y;
y = 0;
}
for (int j = y; j < y+height; ++j) {
if (j >= m_height)
break;
uchar* row_data = data(j);
for (int i = x; i < x+width; ++i) {
if (i >= m_width)
break;
for (int c = 0; c < m_n_channels; ++c)
if (c == 0) {
row_data[i*m_n_channels + c] = value;
} else if (c == 1) {
row_data[i*m_n_channels + c] = value;
} else if (c == 2) {
row_data[i*m_n_channels + c] = value;
}
}
}
}
bool Image::write_pnm(const std::string& filename) const
{
if (m_n_channels != 1) {
const string magic_head = "P6";
ofstream fout;
string extended_name = filename + ".ppm";
fout.open(extended_name.c_str(), ios::out | ios::binary);
fout << magic_head << "\n";
fout << m_width << " " << m_height << " 255\n";
for (int y = 0; y < m_height; ++y) {
const uchar *row_data = data(y);
cout << reinterpret_cast<const char*>(row_data);
fout.write(reinterpret_cast<const char*>(row_data), m_width*sizeof(uchar));
}
fout.close();
return true;
}
const string magic_head = "P5";
ofstream fout;
string extended_name = filename + ".pgm";
fout.open(extended_name.c_str(), ios::out | ios::binary);
fout << magic_head << "\n";
fout << m_width << " " << m_height << " 255\n";
for (int y = 0; y < m_height; ++y) {
const uchar *row_data = data(y);
fout.write(reinterpret_cast<const char*>(row_data), m_width*sizeof(uchar));
}
fout.close();
return true;
}
My main function
#include <cstdlib>
#include <iostream>
#include "image.h"
using std::cout;
using std::endl;
using ceng391::Image;
int main(int argc, char** argv)
{
Image* gray = Image::new_gray(128, 128);
cout << "(" << gray->w() << "x" << gray->h() << ") channels: "
<< gray->n_ch() << " step: " << gray->step() << endl;
gray->set_zero();
gray->set_rect(32, 32, 64, 64, 255);
gray->write_pnm("/tmp/test_image");
Image* rgb_image = Image::new_rgb(128,128);
cout << "(" << rgb_image->w() << "x" << rgb_image->h() << ") channels: "
<< rgb_image->n_ch() << " step: " << rgb_image->step() << endl;
rgb_image->set_zero_rgb();
rgb_image->set_rect(32, 32, 64, 64, 150);
rgb_image->write_pnm("/tmp/test_image_rgb");
delete gray;
delete rgb_image;
return EXIT_SUCCESS;
}

This code is working for grayscale images because grayscale images have same number of pixels with width.
fout.write(reinterpret_cast<const char*>(row_data), m_width*sizeof(uchar));
But when I wanted to save RGB images, increased number of pixels 3 times. One pixel respresents via 3 channels so need to multiply stream size with 3 (R,G,B channels) for one pixel.
fout.write(reinterpret_cast<const char*>(row_data), m_width*sizeof(uchar)*3);

Related

C++ Load WebP file into Array

I am attempting to load a WebP image file into an array, but the WebPGetInfo function does not seem to be working as advertised. The width and height variables are not being updated to the values of the width and height of the input image.
Provided is the code I have for saving out an array as a WebP image, which does work, and the code for loading a WebP image into an array, which does not work. Included are comments to show prior unsuccessful attempts, as well as next steps.
If someone could let me know either what the error is or if there is a better function to use that would be greatly appreciated.
#include "bitmap.hh"
#include <webp/decode.h>
#include <webp/encode.h>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <random>
#include "webp/mux_types.h"
using namespace std;
bitmap::bitmap(uint32_t w, uint32_t h, uint32_t color)
: w(w), h(h), rgb(new uint32_t[w * h]) {
for (uint32_t i = 0; i < w * h; i++) rgb[i] = color;
}
bitmap::~bitmap() { delete[] rgb; }
void bitmap::clear(uint32_t color) {
for (uint32_t i = 0; i < w * h; i++) rgb[i] = color;
}
void bitmap::save(const char filename[]) {
uint8_t* out;
size_t s = WebPEncodeRGBA((uint8_t*)rgb, w, h, 4 * w, 100, &out);
ofstream f(filename, ios::binary);
f.write((char*)out, s);
WebPFree(out);
}
void bitmap::load(const char filename[]) {
// int s = WebPGetInfo(rgb, w * h * 4, w, h, out); potentially?
cout << '\n';
cout << "load function" << '\n';
// https://www.tutorialspoint.com/how-can-i-get-a-file-s-size-in-cplusplus
ifstream in_file(filename, ios::binary);
in_file.seekg(0, ios::end);
int file_size = in_file.tellg();
cout << "Size of the file is"
<< " " << file_size << " "
<< "bytes" << '\n';
uint8_t* out = new uint8_t[file_size];
in_file.read((char*)out, file_size);
ofstream out_file("out_test.webp", ios::binary);
out_file.write((char*)out, file_size);
//
// skip the header
// https://developers.google.com/speed/webp/docs/riff_container
// header is presumed to be 12 bytes
uint8_t skip = 12;
// https://developers.google.com/speed/webp/docs/api
// attempts to get the width and height of the input image, but the cout
// shows that WebPGetInfo does not work as expected
// attempted using WebPDecodeRGBA but no luck there either
int width = 0;
int height = 0;
int s = WebPGetInfo(&out[skip], file_size, &width, &height);
// use WebPDecodeRGBA maybe?
cout << file_size << ' ' << width << ' ' << height << ' ' << s << '\n';
// prints out the elements in the out array, but most entries are blank or
// garbage, with no elements being from "RIFF" item in header as expected
cout << "printing array" << '\n';
for (int i = 0; i < 20; i++) {
cout << "i = " << i << ' ' << out[i] << '\n';
}
// The width and height had not been updating properly. However, the following steps are
// what the code should do if the functions above worked as expected:
/*
w = width;
h = height;
uint8_t pixels_split = WebPGetInfo(&out[skip], file_size, &width, &height);
uint32_t pixels_merge[file_size];
for (int i = 0; i < file_size; i+=4) {
uint8_t r = pixels_split[i];
uint8_t g = pixels_split[i+1];
uint8_t b = pixels_split[i+2];
uint8_t T = pixels_split[i+3];
pixels_merge[i] = r *(256*256*256) + g *(256*256) + b *256 + T3;
}
rgb = pixels_merge;
(do something to alter the image)
bitmap::save("test_out.webp");
*/
}

How to collect the 4 bytes of data per pixel in a BMP fast and effcicient and assemble them into a struct?

i have found this Loading a tga/bmp file in C++/OpenGL, it works flawless and would like to use it in my project. however my project requires a RGB struct to modify the pixels buffer.
glDrawPixels(win_height, win_width, GL_RGB, GL_FLOAT, myBuffer);
it needs some changes to make it work in my project.
typedef union PixelInfo
{
std::uint32_t Colour;
struct
{
float b, g, r, a;
};
} *PPixelInfo;
class BMP
{
private:
std::uint32_t width, height;
std::uint16_t BitsPerPixel;
std::vector<std::uint8_t> Pixels;
public:
BMP(const char* FilePath);
std::vector<std::uint8_t> GetPixels() const {return this->Pixels;}
std::uint32_t GetWidth() const {return this->width;}
std::uint32_t GetHeight() const {return this->height;}
bool HasAlphaChannel() {return BitsPerPixel == 32;}
};
BMP::BMP(const char* FilePath)
{
std::fstream hFile(FilePath, std::ios::in | std::ios::binary);
if (!hFile.is_open()) throw std::invalid_argument("Error: File Not Found.");
hFile.seekg(0, std::ios::end);
std::size_t Length = hFile.tellg();
hFile.seekg(0, std::ios::beg);
std::vector<std::uint8_t> FileInfo(Length);
hFile.read(reinterpret_cast<char*>(FileInfo.data()), 54);
if(FileInfo[0] != 'B' && FileInfo[1] != 'M')
{
hFile.close();
throw std::invalid_argument("Error: Invalid File Format. Bitmap Required.");
}
if (FileInfo[28] != 24 && FileInfo[28] != 32)
{
hFile.close();
throw std::invalid_argument("Error: Invalid File Format. 24 or 32 bit Image Required.");
}
BitsPerPixel = FileInfo[28];
width = FileInfo[18] + (FileInfo[19] << 8);
height = FileInfo[22] + (FileInfo[23] << 8);
std::uint32_t PixelsOffset = FileInfo[10] + (FileInfo[11] << 8);
std::uint32_t size = ((width * BitsPerPixel + 31) / 32) * 4 * height;
Pixels.resize(size);
hFile.seekg (PixelsOffset, std::ios::beg);
hFile.read(reinterpret_cast<char*>(Pixels.data()), size);
hFile.close();
}
int main()
{
BMP info = BMP("Fixedsys16x28.bmp");
//info.
std::cout << "getwidth: " << info.GetWidth() << endl;
std::cout << "getheight: " << info.GetHeight()<< endl;
// for(auto i : info.GetPixels()){
// //float i_float = float(i);
// //float res = i_float / 15.0;
// //std::cout << std::hex << (int)i << " " << endl;
// std::cout << setprecision(2) << fixed << (float) i / 255 << " " << endl;
// }
std::cout << endl;
std::cout << "size: " << info.GetPixels().size() << endl;
std::cout << "alpha: " << info.HasAlphaChannel() << endl;
std::cout << setprecision(2) << fixed;
for(int i = 0; i < static_cast<int>(info.GetPixels().size()); i++){
for(int j = 0; j < 3; j++){
if(j == 0){
std::cout << (float) info.GetPixels()[i]/ 255 << " ";
}
else if(j == 1){
std::cout << (float) info.GetPixels()[i] / 255 << " ";
}
else{
std::cout << (float) info.GetPixels()[i] / 255;
}
}
std::cout << endl;
}
getchar();
GLuint texture = 0;
glGenTextures(1, &texture);
glBindTexture(GL_TEXTURE_2D, texture);
glTexImage2D(GL_TEXTURE_2D, 0, info.HasAlphaChannel() ? GL_RGBA : GL_RGB, info.GetWidth(), info.GetWidth(), 0, info.HasAlphaChannel() ? GL_BGRA : GL_BGR, GL_UNSIGNED_BYTE, info.GetPixels().data());
}
how to change this line of code?
hFile.read(reinterpret_cast<char*>(Pixels.data()), size);
so that vector gets filed with ...
typedef union PixelInfo
{
std::uint32_t Colour;
struct
{
float B, G, R, A;
};
} *PPixelInfo;
collections.
after changing the setup of vector off course or use another.
what kind of of method / function, could replace Pixels.data() , so it collect these 4 bytes and assembles them accordingly in the type of struct PixelInfo?
since Pixels.data() basically provide a pointer to the pixel vector.
how does a method that can perform the requested task look like?
collect 4 bytes of data and format them accordingly and put them in a vector<PixelInfo> while converting "std::uint8_t" into float. fast and efficiently.
it should take this in consideration "info.HasAlphaChannel()".
the accompanied image data is:
512×84 and the pixel vector size is "129024".
43008×3 bytes = 129024.
i made this loop on the pixel vector:
std::cout << setprecision(2) << fixed;
for(int i = 0; i < static_cast<int>(info.GetPixels().size()); i++){
for(int j = 0; j < 3; j++){
if(j == 0){
std::cout << (float) info.GetPixels()[i]/ 255 << " ";
}
else if(j == 1){
std::cout << (float) info.GetPixels()[i] / 255 << " ";
}
else{
std::cout << (float) info.GetPixels()[i] / 255;
}
}
std::cout << endl;
}
it seams to have worked correctly and even the last line contains 3 "bytes" of data.
this routine takes, like 20 seconds for an image 400 x 400 px.
vector<PixelInfo> pixelStore (info->GetPixels().size() / 3);
int counter = 0;
for(int i = 0; i < size; i+=3){
PixelInfo temp;
temp.r = 0;
temp.g = 0;
temp.b = 0;
for(int j = 0; j < 3; j++){
if(j == 0){
temp.r = info->GetPixels()[i + j];
}
else if(j == 1){
temp.g = info->GetPixels()[i + j];
}
else{
temp.b = info->GetPixels()[i + j];
}
if(j == 2){
//pixelStore.push_back(temp);
pixelStore[counter] = temp;
counter++;
}
}
}
Your code shows std::vector<std::uint8_t> Pixels; which is a sequence of 8-bit pixels I suppose. You want a vector of PixelInfo instead? Now does that structure match the format of bytes in the file? If that's the case, then what's the problem?
If the bytes in the file match the layout of your structure (including the machine's Endian!) just do it the same way:
vector<PixelInfo> Pixels;
size_t pixels_to_read = ....??? ;
Pixels.resize(pixels_to_read);
hFile.read (reinterpret_cast<char*>(Pixels.data()), pixels_to_read*sizeof(PixelInfo));
update
collect 4 bytes of data and format them accordingly and put them in a vector while converting "std::uint8_t" into float. fast and efficiently.
std::byte buf[4];
hFile.read (reinterpret_cast<char*>(buf),4); // read 4 bytes from file
PixelInfo px;
px.R = buf[0]; // converts 8-bit integer to float
px.G = buf[1];
px.B = buf[2];
px.A = buf[3];
Note that your PixelInfo union doesn't make sense -- how does the 32-bit color match the layout of the 4 32-bit components? Furthermore, I don't think that definition is legal, as there is no such thing as an anonymous struct. If that compiles, it must be a compiler extension.
As for doing it fast :
Read in a lot at a time, rather than one pixel. A whole row is good, if it's not practical to read the entire file into memory. Then you just step along the buffer you had loaded.
You can hope that the 4 individual statements get auto-vectorized by the optimizer. If you have a tight and simple loop around just that part (to process a whole row) you might get the compiler to vectorize it.
The fact that you have to write 4 separate statements because the left hand side uses different names rather than an index should not affect the overall speed.
20 seconds for a 400×400 pixel image? I would think it would be instantaneous. I have experience loading/converting/decoding graphics files, having written a library for DOS and bare metal (16-bit x86 code) back in the early '90's. You might try benchmarking code that only does this conversion listed above, to see if that's your bottleneck or not. Look at the code generated (in the debugger, or using Compiler Explorer) to see if it's vectorizing. Look into calling the vector primitives intrinsic instructions directly, if your compiler has that available. See if you're using all the right compiler optimization flags.

Why is the Right Side of my Image Being Wrapped Around?

essentially, I'm making a class that takes a BMP file for the constructor. From THIS POST I get all of the header data out of the way and then read RGB data into a vector. I then calculate the intensities as 0.25R + 0.5G + 0.25B. I put these numbers into a space-separated file, line by line. With the original above my result below and Using GNUPlot to open and plot the image gives me this result.
original
distortion
As you can see, the right side of the image is consistently being wrapped around further as the image is written to file (or somewhere before this process). I've pasted the code below, any help?
std::vector<char> MImage::readBMP(std::string const file){
static constexpr size_t HEADER_SIZE = 54;
std::ifstream bmp(file, std::ios::binary);
std::array<char, HEADER_SIZE> header;
bmp.read(header.data(), header.size());
auto fileSize = *reinterpret_cast<uint32_t*>(&header[2]);
auto dataOffset = *reinterpret_cast<uint32_t*>(&header[10]);
auto width = *reinterpret_cast<uint32_t*>(&header[18]);
auto height = *reinterpret_cast<uint32_t*>(&header[22]);
auto depth = *reinterpret_cast<uint16_t*>(&header[28]);
/*
std::cout << "fileSize: " << fileSize << std::endl;
std::cout << "dataOffset: " << dataOffset << std::endl;
std::cout << "width: " << width << std::endl;
std::cout << "height: " << height << std::endl;
std::cout << "depth: " << depth << "-bit" << std::endl;
*/
std::vector<char> img(dataOffset - HEADER_SIZE);
//bmp.read(img.data(), img.size());
auto dataSize = ((width * 3 + 3) & (~3)) * height;
img.resize(dataSize);
bmp.read(img.data(), img.size());
char temp = 0;
for (int i = dataSize - 4; i >= 0; i -= 3)
{
temp = img[i];
img[i] = img[i + 2];
img[i + 2] = temp;
}
// Convert to intensity
int k = 0;
int size = (int)img.size();
for (int j = 0; k+2 < size; j++)
{
//0.25B + 0.5G + 0.25R
img[j] = ((abs(img[k]) >> 2) + (abs(img[k + 1]) >> 1) + (abs(img[k + 2]) >> 2));
//OutputDebugStringA((to_string(img[j]) + "\n").c_str());
k += 3;
}
img.resize(dataSize / 3);
//OutputDebugStringA((to_string(img.size()) + "\n").c_str());
int a, b, c = 0;
//Testing #img data
ofstream TestPic;
TestPic.open("testpic.txt");
for (a = 0; a < HEIGHT; a++) {
for (b = 0; b < WIDTH; b++) {
TestPic << (int)img[c];
if (b < WIDTH-1) {
TestPic << " ";
}
c++;
}
TestPic << "\n";
}
TestPic.close();
return img; }
GNUPlot command: plot [0:630] [0:354] 'testpic.txt' matrix with image pixels
The problem you are seeing is caused by improper data alignment. Each scanline of .bmp file must have a byte-size divisible by 4. You are calculating the input data size correctly with this line:
auto dataSize = ((width * 3 + 3) & (~3)) * height;
However, while converting the img array you do not compensate/throw away the padding at the end of each scanline.
The best advice is either to use a standard bmp loader/converter like the one in STB libraries (the stb_image.h file) or, if you want to do it yourself, allocate the other array (img2 or something like that) and iterate img array scanline by scanline, writing greyscale values to img2.
By the way, this answer, which you have mentioned, contains a solution for your exact problem (it is called the "padding fix").

caffe forward net in a for loop not working

I am currently trying to write a c++ wrapper for PSPNet's prediction (originally in Matlab). PSPNet runs on Caffe.
Situation: I have a trained caffe model, and would like to implement this wrapper to run the segmentation result when given an input. In this case, my crop_size is smaller than it's original size. Thus, it is being cropped manually to multiple 425x425 "frames" and fed forward into caffe net after the pre-processes in a for-loop.
Problem: However, net seems to only be running forward once despite being in a for loop. Supported by its processing time and output, refer below.
This is the incomplete code I am currently trying to work on:
#define USE_OPENCV 1
#define trimapSize 1
#define Debug 0
#include <caffe/caffe.hpp>
#include "Header.h"
#include "caffe/data_reader.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/blob.hpp"
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif // USE_OPENCV
#include <algorithm>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <chrono> //Just for time measurement
#include <cmath>
#include <array>
#include <iostream>
#include <fstream>
#ifdef USE_OPENCV
using namespace caffe; // NOLINT(build/namespaces)
using std::string;
class Classifier {
public:
Classifier(const string& model_file,
const string& trained_file);
cv::Mat Predict(const cv::Mat& img);
private:
void SetMean(int weight, int heigh);
void WrapInputLayer(std::vector<cv::Mat>* input_channels);
cv::Mat Visualization(Blob<float>* output_layer);
cv::Mat Preprocess(const cv::Mat& img_scale, int ori_rows, int ori_cols, std::vector<cv::Mat>* input_channels);
private:
shared_ptr<Net<float> > net_;
cv::Size input_geometry_;
int num_channels_;
cv::Mat mean_;
};
Classifier::Classifier(const string& model_file,
const string& trained_file) {
Caffe::set_mode(Caffe::GPU);
/* Load the network. */
net_.reset(new Net<float>(model_file, TEST));
net_->CopyTrainedLayersFrom(trained_file);
CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";
CHECK_EQ(net_->num_outputs(), 2) << "Network should have exactly one output.";
Blob<float>* input_layer = net_->input_blobs()[0];
num_channels_ = input_layer->channels();
CHECK(num_channels_ == 3 || num_channels_ == 1)
<< "Input layer should have 1 or 3 channels.";
input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
}
/* Create the mean file in binaryproto format. */
void Classifier::SetMean(int weight, int heigh) {
mean_ = cv::Mat(heigh, weight, CV_32FC3);
mean_ = cv::Scalar(94.6744, 88.8887, 100.5404);//RGB
}
cv::Mat Classifier::Predict(const cv::Mat& img) {
cv::Mat originalTmp = img.clone();
Blob<float>* input_layer = net_->input_blobs()[0];
input_layer->Reshape(1, num_channels_,
input_geometry_.height, input_geometry_.width);
std::cout << "input_geometry_.height = " << input_geometry_.height << "input_geometry_.width = "<< input_geometry_.width << std::endl;
/* Forward dimension change to all layers. */
net_->Reshape();
std::vector<cv::Mat> input_channels;
WrapInputLayer(&input_channels);
/*-----------------------------FOR MULTI-SCALE PROCESSING--------------------------*/
int base_size = 0;
int ori_rows = img.rows;
int ori_cols = img.cols;
float scale_array [1] = {1};
// float scale_array = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
std::cout << "ori_rows = " << ori_rows << "\t ori_cols = " << ori_cols << std::endl;
cv::Mat data_all = cv::Mat::zeros(cv::Size(425, 425), CV_32FC3);
if (ori_rows > ori_cols) {
base_size = ori_rows;
}
else base_size = ori_cols;
std::cout << "base_size = " << base_size << std::endl;
std::cout << "size of array = " << (sizeof(scale_array)/sizeof(*scale_array)) << std::endl;
for (int i=0; i < (sizeof(scale_array)/sizeof(*scale_array)); i++){
int long_size = base_size * scale_array[i] + 1;
int new_rows = long_size;
int new_cols = long_size;
std::cout << "BEFORE new rows = " << new_rows << "\t new cols = " << new_cols << std::endl;
if (ori_rows > ori_cols){
new_cols = round(long_size/ori_rows*ori_cols);
}
else {new_rows = round(long_size/ori_cols*ori_rows);}
std::cout << "AFTER new rows = " << new_rows << "\t new cols = " << new_cols << std::endl;
cv::Mat img_scale;
cv::resize(img, img_scale, cv::Size(new_cols, new_rows), 0, 0, CV_INTER_LINEAR);
std::cout << "img_scale height: " << img_scale.rows << "\t width = " << img_scale.cols << std::endl;
cv::imshow("img_scale",img_scale);
cv::waitKey(0);
data_all = data_all + Preprocess(img_scale, ori_rows, ori_cols, &input_channels);
std::cout << "ok! DONE PREPROCESS!" << std::endl;
}
return data_all;
}
cv::Mat Classifier::Preprocess(const cv::Mat& img_scale, int ori_rows, int ori_cols, std::vector<cv::Mat>* input_channels)
{
int crop_size = 425;
int new_rows = img_scale.rows;
int new_cols = img_scale.cols;
cv::Mat data_output = cv::Mat::zeros(cv::Size(ori_cols, ori_rows), CV_32FC3);
int long_size = new_rows;
cv::Mat img_processed;
if (new_cols > new_rows){
long_size = new_cols;
}
if (long_size <= crop_size){
// img_processed = Preprocess(img_scale, &input_channels);
//RUN CAFFE --- NOT YET DONE ---
std::cout << "OK!" << std::endl;
}
else {
float stride_rate = 2.0/3.0;
std::cout << "stride_rate = " << stride_rate << std::endl;
int stride = ceil(crop_size*stride_rate);
std::cout << "stride = " << stride << std::endl;
cv::Mat img_pad = img_scale;
int pad_rows = img_pad.rows;
int pad_cols = img_pad.cols;
int h_grid = ceil((pad_rows - crop_size)/stride) + 1;
int w_grid = ceil((pad_cols - crop_size)/stride) + 1;
cv::Mat img_sub;
cv::Mat data_scale = cv::Mat::zeros(cv::Size(pad_cols, pad_cols), CV_32FC3);
for(int grid_yidx = 1; grid_yidx <= h_grid; grid_yidx++){
for (int grid_xidx = 1; grid_xidx <= w_grid; grid_xidx++){
int s_x = (grid_xidx-1)*stride+1;
int s_y = (grid_yidx-1)*stride+1;
int e_x = std::min(s_x + crop_size -1, pad_cols);
int e_y = std::min(s_y + crop_size -1, pad_rows);
s_x = e_x - crop_size + 1;
s_y = e_y - crop_size + 1;
/* Cropping image */
img_pad(cv::Rect(s_x,s_y,crop_size,crop_size)).copyTo(img_sub);
cv::Mat sample;
if (img_sub.channels() == 3 && num_channels_ == 1)
cv::cvtColor(img_sub, sample, cv::COLOR_BGR2GRAY);
else if (img_sub.channels() == 4 && num_channels_ == 1)
cv::cvtColor(img_sub, sample, cv::COLOR_BGRA2GRAY);
else if (img_sub.channels() == 4 && num_channels_ == 3)
cv::cvtColor(img_sub, sample, cv::COLOR_BGRA2BGR);
else if (img_sub.channels() == 1 && num_channels_ == 3)
cv::cvtColor(img_sub, sample, cv::COLOR_GRAY2BGR);
else
sample = img_sub;
cv::Mat sample_float;
if (num_channels_ == 3)
sample.convertTo(sample_float, CV_32FC3);
else
sample.convertTo(sample_float, CV_32FC1);
SetMean(sample.rows, sample.cols);
cv::imshow("sample_float", sample_float);
cv::cvtColor(sample_float, sample_float, cv::COLOR_BGRA2RGB);
sample_float = sample_float.t();
cv::Mat sample_normalized(sample_float.size(),sample_float.type());
cv::subtract(sample_float.clone(), mean_, sample_normalized);
cv::Mat sample_temp;
sample_normalized.convertTo(sample_temp, CV_32FC3, 255);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/sample_normalized.png", sample_temp);
cv::imshow("sample_normalized", sample_normalized);
cv::waitKey(0);
/* This operation will write the separate BGR planes directly to the
* input layer of the network because it is wrapped by the cv::Mat
* objects in input_channels. */
img_processed = sample_normalized.t();
cv::split(img_processed, *input_channels);
CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
== net_->input_blobs()[0]->cpu_data())
<< "Input channels are not wrapping the input layer of the network.";
img_processed.convertTo(sample_temp, CV_32FC3, 255);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/img_processed.png", sample_temp);
cv::imshow("img_normalised",img_processed);
cv::waitKey();
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); //Just for time measurement
// float loss = 0.0;
// net_->Forward(&loss);
net_->Forward();
std::chrono::steady_clock::time_point end= std::chrono::steady_clock::now();
std::cout << "Processing time = " << (std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count())/1000000.0 << " sec" <<std::endl; //Just for time measurement
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
cv::Mat segment = Visualization(output_layer);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/segment.png", segment);
}
}
}
return (img_processed);
}
struct RGB {
int R;
int G;
int B;
};
vector<RGB> get_palette(int nClass)
{
vector<RGB> listPlalette;
RGB rgb0;
rgb0.R = 0;
rgb0.G = 0;
rgb0.B = 0;
listPlalette.push_back(rgb0);
for (int i = 1; i < nClass; i++)
{
RGB rgb;
rgb.R = i*50;
rgb.G = i*50 + i;
rgb.B = 255-i*20;
listPlalette.push_back(rgb);
}
return listPlalette;
}
cv::Mat Classifier::Visualization(Blob<float>* output_layer) {
std::vector<cv::Mat> input_channels;
int H = output_layer->height();
int W = output_layer->width();
// int N = output_layer->num(); //Batch Size
int C = output_layer->channels(); //Number of classes
int index = 0;
#ifdef CPU_ONLY
const float* output_data = output_layer->cpu_data();
#else
const float* output_data = output_layer->cpu_data();
#endif // !CPU_ONLY
cv::Mat class_each_row(C, W*H, CV_32F);
for (int i = 0; i < C; i++) {
for (int j = 0; j < (W*H); j++) {
class_each_row.at<float>(i, j) = output_data[index];
index = index + 1;
}
}
class_each_row = class_each_row.t();
//==================================CONVERT INTO LABELS==================================//
float maxValue = 0;
int* labelIndex = (int*)malloc(W*H * sizeof(int));
int indexX = 0;
for (int i = 0; i < class_each_row.rows; i++) {
maxValue = -999999999999;
indexX = 0;
for (int k = 0; k < C; k++)
{
float dataM = class_each_row.at<float>(i, k);
if (dataM > maxValue) {
maxValue = dataM;
indexX = k;
}
}
labelIndex[i] = indexX;
}
cv::Mat labelTmp(W, H, CV_8UC3);
uchar* dataLabelTmp = labelTmp.data;
vector<RGB> listPalette = get_palette(21);
for (int i = 0; i < H; i++)
{
for (int j = 0; j < W; j++)
{
RGB rgb = listPalette[labelIndex[(i*W + j)]];
dataLabelTmp[3 * (i*W + j)] = rgb.B;
dataLabelTmp[3 * (i*W + j) + 1] = rgb.G;
dataLabelTmp[3 * (i*W + j) + 2] = rgb.R;
}
}
cv::imshow( "Display window", labelTmp);
cv::waitKey(0);
free(labelIndex);
labelIndex = NULL;
return labelTmp;
}
/* Wrap the input layer of the network in separate cv::Mat objects
* (one per channel). This way we save one memcpy operation and we
* don't need to rely on cudaMemcpy2D. The last preprocessing
* operation will write the separate channels directly to the input
* layer. */
void Classifier::WrapInputLayer(std::vector<cv::Mat>* input_channels) {
Blob<float>* input_layer = net_->input_blobs()[0];
int width = input_layer->width();
int height = input_layer->height();
float* input_data = input_layer->mutable_cpu_data();
for (int i = 0; i < input_layer->channels(); ++i) {
cv::Mat channel(height, width, CV_32FC1, input_data);
input_channels->push_back(channel);
input_data += width * height;
}
}
int main(int argc, char** argv) {
if (argc != 4) {
std::cerr << "Usage: " << argv[0]
<< " \ndeploy.prototxt \nnetwork.caffemodel"
<< " \nimg.jpg" << " \ncamvid12.png (for example: /SegNet-Tutorial/Scripts/camvid12.png)" << std::endl;
return 1;
}
::google::InitGoogleLogging(argv[0]);
string model_file = argv[1];
string trained_file = argv[2]; //for visualization
Classifier classifier(model_file, trained_file);
string file = argv[3];
std::cout << "---------- Semantic Segmentation for "
<< file << " ----------" << std::endl;
cv::Mat img = cv::imread(file, 1);
CHECK(!img.empty()) << "Unable to decode image " << file;
cv::Mat prediction;
classifier.Predict(img);
}
#else
int main(int argc, char** argv) {
LOG(FATAL) << "This example requires OpenCV; compile with USE_OPENCV.";
}
#endif //USE_OPENCV
To clarify: The for-loop refers to the one in pre-process: specifically this portion:
for(int grid_yidx = 1; grid_yidx <= h_grid; grid_yidx++){
for (int grid_xidx = 1; grid_xidx <= w_grid; grid_xidx++){
int s_x = (grid_xidx-1)*stride+1;
int s_y = (grid_yidx-1)*stride+1;
int e_x = std::min(s_x + crop_size -1, pad_cols);
int e_y = std::min(s_y + crop_size -1, pad_rows);
s_x = e_x - crop_size + 1;
s_y = e_y - crop_size + 1;
/* Cropping image */
img_pad(cv::Rect(s_x,s_y,crop_size,crop_size)).copyTo(img_sub);
cv::Mat sample;
if (img_sub.channels() == 3 && num_channels_ == 1)
cv::cvtColor(img_sub, sample, cv::COLOR_BGR2GRAY);
else if (img_sub.channels() == 4 && num_channels_ == 1)
cv::cvtColor(img_sub, sample, cv::COLOR_BGRA2GRAY);
else if (img_sub.channels() == 4 && num_channels_ == 3)
cv::cvtColor(img_sub, sample, cv::COLOR_BGRA2BGR);
else if (img_sub.channels() == 1 && num_channels_ == 3)
cv::cvtColor(img_sub, sample, cv::COLOR_GRAY2BGR);
else
sample = img_sub;
cv::Mat sample_float;
if (num_channels_ == 3)
sample.convertTo(sample_float, CV_32FC3);
else
sample.convertTo(sample_float, CV_32FC1);
SetMean(sample.rows, sample.cols);
cv::imshow("sample_float", sample_float);
cv::cvtColor(sample_float, sample_float, cv::COLOR_BGRA2RGB);
sample_float = sample_float.t();
cv::Mat sample_normalized(sample_float.size(),sample_float.type());
cv::subtract(sample_float.clone(), mean_, sample_normalized);
cv::Mat sample_temp;
sample_normalized.convertTo(sample_temp, CV_32FC3, 255);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/sample_normalized.png", sample_temp);
cv::imshow("sample_normalized", sample_normalized);
cv::waitKey(0);
/* This operation will write the separate BGR planes directly to the
* input layer of the network because it is wrapped by the cv::Mat
* objects in input_channels. */
img_processed = sample_normalized.t();
cv::split(img_processed, *input_channels);
CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
== net_->input_blobs()[0]->cpu_data())
<< "Input channels are not wrapping the input layer of the network.";
img_processed.convertTo(sample_temp, CV_32FC3, 255);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/img_processed.png", sample_temp);
cv::imshow("img_normalised",img_processed);
cv::waitKey();
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); //Just for time measurement
// float loss = 0.0;
// net_->Forward(&loss);
net_->Forward();
std::chrono::steady_clock::time_point end= std::chrono::steady_clock::now();
std::cout << "Processing time = " << (std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count())/1000000.0 << " sec" <<std::endl; //Just for time measurement
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
cv::Mat segment = Visualization(output_layer);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/segment.png", segment);
}
}
Original Image:Original Image (Without pre-processing)
Input: Input (first cropped frame)
Output: Output of the first cropped frame
Time taken for forwarding: Time taken
Following cropped frame gives the same output through out.
P/s: If i shift the code below to the end of predict function and return segment instead, it will work well. But only the last cropped frame will be segmented.
std::chrono::steady_clock::time_point begin =
std::chrono::steady_clock::now(); //Just for time measurement
// float loss = 0.0;
// net_->Forward(&loss);
net_->Forward();
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
std::cout << "Processing time = " << (std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count())/1000000.0 << " sec" <<std::endl; //Just for time measurement
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
cv::Mat segment = Visualization(output_layer);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/segment.png", segment);`
input: Input (Last cropped frame of pre-processed image)
output: Output of the last cropped frame
Any help will be appreciated, thank youuuuu!!!
This issue is solved by wrapping the input channel each time it is changed so that the input will be fed forward correctly.
Thus the function:
WrapInputLayer(input_channels);
should be called in the double for loop.

Image processing : luminance weighted 2

I would like to weigh values of luminance on a new image.
I have an image (5px.jpg) of 5 pixels with these luminance :50,100,150,200,250.
I have a vector of coefficient.
I created a new Mat Z which combine luminance of 5px.jpg and the coefficient.
So, my first value of luminance is 50 (lum[0]=50) and I want it to be applied on the 5.1 (coef[0]=5.1) first pixel of my matrix. To do that, I need to weight the 6th pixel with the first and the second value of luminance. In my case,the luminance of my 6th pixel will be 95 because (0.1*50)+(0.9*100)=95
And so on...
But I do not know why my code does not works.
I had already asked a similar question for a vector here and now, I'm try to adapt to an image.
My picture in input :
My output :
#define MPI 3.14159265358979323846264338327950288419716939937510
#define RAD2DEG (180./MPI)
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <opencv2/opencv.hpp>
#include <iostream>
#include <cmath>
#include <math.h>
#include <string.h>
using namespace cv;
using namespace std;
int main()
{
Mat image = imread("5px.jpg", 1);
if (image.empty())
{
cout << "Couldn't load " << image << endl;
}
else
{
cout << "Image upload, go" << endl;
}
namedWindow("ImageIn", CV_WINDOW_AUTOSIZE);
imshow("ImageIn", image);
Mat imgGrayScale;
cvtColor(image, imgGrayScale, CV_BGR2GRAY);
float *deltaP = new float[imgGrayScale.cols];
float *angle = new float[imgGrayScale.cols];
float *coeff = new float[imgGrayScale.cols];
int col;
for (col = 0; col < imgGrayScale.cols; ++col)
{
//cout << "position x = " << col << endl;
deltaP[col] = imgGrayScale.at<uchar>(0, col);
//cout << "luminance = " << deltaP[col] << endl;
angle[col] = acos(deltaP[col] / 255);
//cout << "angle =" << angle[col] << endl;
coeff[col] = (1 / cos(angle[col]));
cout << "coeff = " << coeff[col] << endl;
}
int width = imgGrayScale.size().width;
int height = imgGrayScale.size().height;
int width2 = width * 5;
int idx_coef = 0;
Mat Z = Mat::zeros(height, width2, CV_8UC1);
//for (int r = 0; r < imgGrayScale.rows; r++)
//{
//cout << "Saut de ligne " << endl << endl << endl;
for (int t = 0; t < imgGrayScale.cols; t++)
{
//cout << "Saut de colonne " << endl;
// Attribue le coeff à une variable
int c = int(coeff[idx_coef]);
//cout << "x" << t << endl;
for (int i = 0; i < c; ++i)
{
Z.at<uchar>(0, c) = imgGrayScale.at<uchar>(0, t);
}
float alpha = fmod(coeff[idx_coef], 1.f);
float beta = 1.f - alpha;
Z.at<uchar>(0, c + 1) = (alpha * imgGrayScale.at<uchar>(0, t) + beta * imgGrayScale.at<uchar>(0, t + 1));
idx_coef++;
coeff[idx_coef] = coeff[idx_coef] - beta;
if (idx_coef >= width - 1)
{
int cc = int(coeff[idx_coef]);
for (int i = 0; i < cc; ++i)
{
Z.at<uchar>(0, c) = imgGrayScale.at<uchar>(0, t);
}
idx_coef = 0;
break;
}
}
//}
namedWindow("m", CV_WINDOW_AUTOSIZE);
imshow("m", Z);
imwrite("lumianacetest.jpg", Z);
int t = waitKey();
if ((char)t == 27)
return 0;
}
You messed up with the indices while accessing the matrix Z. You shoudn't access Z at column c, but you need access the current column (as a vector::push_back would do). So you can keep the current index column in a variable, here idx_z, and increment it every time you access Z
Here your Z is CV_8U, so you lose accuracy since your values are float. You can create Z as CV_32F, and if you need to store values in CV_8U format to save the image, you can convert to CV_8U later, eventually.
The last columns of Z won't be set to any value (so I initialized them with value 0). If you need them to have the last value as in the imgGrayScale, just decomment the relevant part of the code.
Here the code:
#define MPI 3.14159265358979323846264338327950288419716939937510
#define RAD2DEG (180./MPI)
#include <opencv2\opencv.hpp>
#include <vector>
using namespace cv;
using namespace std;
int main()
{
Mat1b imgGrayScale = (Mat1b(2, 5) << 50, 100, 150, 200, 250,
50, 100, 150, 200, 250);
vector<float> deltaP(imgGrayScale.cols);
vector<float> angle(imgGrayScale.cols);
vector<float> coeff(imgGrayScale.cols);
int col;
for (col = 0; col < imgGrayScale.cols; ++col)
{
//cout << "position x = " << col << endl;
deltaP[col] = imgGrayScale.at<uchar>(0, col);
//cout << "luminance = " << deltaP[col] << endl;
angle[col] = acos(deltaP[col] / 255);
//cout << "angle =" << angle[col] << endl;
coeff[col] = (1 / cos(angle[col]));
cout << "coeff = " << coeff[col] << endl;
}
int width = imgGrayScale.size().width;
int height = imgGrayScale.size().height;
int width2 = width * 5;
Mat1f Z(height, width2, 0.f);
for (int r = 0; r < imgGrayScale.rows; r++)
{
int idx_lum = 0;
int idx_coef = 0;
int idx_z = 0;
vector<float> coef = coeff;
// Set all values in Z to the last value in imgGrayScale
Z.row(r) = imgGrayScale(r, imgGrayScale.cols-1);
while (true)
{
int c = int(coef[idx_coef]);
for (int i = 0; i < c; ++i)
{
Z(r, idx_z++) = imgGrayScale(r, idx_lum);
}
float alpha = fmod(coef[idx_coef], 1.f);
float beta = 1.f - alpha;
Z(r, idx_z++) = (alpha * imgGrayScale(r, idx_lum) + beta * imgGrayScale(r, idx_lum + 1));
idx_coef++;
idx_lum++;
coef[idx_coef] = coef[idx_coef] - beta;
if (idx_lum >= imgGrayScale.cols - 1 || idx_coef >= coef.size() - 1)
{
int cc = int(coef[idx_coef]);
for (int i = 0; i < cc; ++i)
{
Z(r, idx_z++) = imgGrayScale(r, idx_lum);
}
idx_coef = 0;
break;
}
}
}
Mat1b ZZ;
Z.convertTo(ZZ, CV_8U);
cout << "Float values:" << endl;
cout << Z << endl << endl;
cout << "Uchar values:" << endl;
cout << ZZ << endl << endl;
namedWindow("m", CV_WINDOW_AUTOSIZE);
imshow("m", Z);
imwrite("lumianacetest.png", ZZ);
waitKey();
return 0;
}