I am looking for the fastest way of converting 3 channels RGB frame to 1 channel picture in openCV. But I need to concatenate all three colors (R, G, B) of the pixel into one 32-bit value.
Every pixel should consist for an example:
pixel[0:31]= 01001001 11110000 11111111 00000000
The first 8 bits are RED color from the frame (at the same position in frame), second 8 bits are from green color and third 8 bits are from blue, last 8 bits are not important.
I tried this:
for (y = 100; y < 500; y++){
for (x = 100; x < 500; x++) {
int pixel =((edges.at<Vec3b>(y, x)[0])<<16)|
((edges.at<Vec3b>(y, x)[1])<<8)|
(edges.at<Vec3b>(y, x)[2]);
}}
But this is to slow, because I need to go through every pixel in the frame.
Thanks
Tried some variants:
Method 0a : as in the question, #Sveva
Method 0b : as in the question, with for loops inverted, #Miki
Method 1 : querying value from mat only once using indices, #Miki
Method 2 : querying value from mat using pointers, #Miki
Method 3 : converting to BGRA and memcpy, #RyanP NOTE: works only if mat isContinuos().
Results (time in milliseconds)
isContinuos? 1
Method 0a: 113.704 0x1020300
Method 0b: 20.0975 0x1020300
Method 1: 20.1939 0x1020300
Method 2: 15.7434 0x1020300
Method 3: 22.5592 0xff030201
Considerations
Inverting the for loops has a major speedup, because OpenCV Mat are row-major ordered.
The fastest method is Method 2, using pointers. Method 1 is slightly slower, but probably more readable.
Method 3 is quite fast, but a single memcpy works only if the matrix isContinuos(). If not the case, you need to loop on each row, and memcpy each row, and this is going to be (only a little) slower.
NOTE
OpenCV stores BGR values (not RGB). Methods 0a, 0b, 1 and 2 output the values as: B G R 0. You just need to swap the index 0 and 2 to get R G B 0. For Method 3, you need to use cvtColor with parameter COLOR_BGR2RGBA.
Code
#include <opencv2\opencv.hpp>
#include <vector>
#include <iostream>
using namespace std;
using namespace cv;
int main()
{
// Test Image
Mat3b img(2000, 3000, Vec3b(1, 2, 3));
cout << "isContinuos? " << img.isContinuous() << endl;
// Method 0a: method from question. Credits to #Sveto
double tic0a = double(getTickCount());
vector<int> v0a(img.rows * img.cols, 0);
for (int c = 0; c < img.cols; ++c)
{
for (int r = 0; r < img.rows; ++r)
{
v0a[r*img.cols + c] = ((img.at<Vec3b>(r, c)[0]) << 24) |
((img.at<Vec3b>(r, c)[1]) << 16) |
(img.at<Vec3b>(r, c)[2]) << 8;
}
}
double toc0a = (double(getTickCount()) - tic0a) * 1000. / getTickFrequency();
cout << "Method 0a: " << toc0a << "\t\t";;
cout << "0x" << hex << v0a[0] << endl;
// Method 0b: method from question, loops inverted
double tic0b = double(getTickCount());
vector<int> v0b(img.rows * img.cols, 0);
for (int r = 0; r < img.rows; ++r)
{
for (int c = 0; c < img.cols; ++c)
{
v0b[r*img.cols + c] = ((img.at<Vec3b>(r, c)[0]) << 24) |
((img.at<Vec3b>(r, c)[1]) << 16) |
(img.at<Vec3b>(r, c)[2]) << 8;
}
}
double toc0b = (double(getTickCount()) - tic0b) * 1000. / getTickFrequency();
cout << "Method 0b: " << toc0b << "\t\t";
cout << "0x" << hex << v0b[0] << endl;
// Method 1: custom loop with indices
double tic1 = double(getTickCount());
vector<int> v1(img.rows * img.cols, 0);
for (int r = 0; r < img.rows; ++r)
{
for (int c = 0; c < img.cols; ++c)
{
const Vec3b& b = img(r, c);
v1[r*img.cols + c] = (b[0] << 24) | (b[1] << 16) | (b[2] << 8);
}
}
double toc1 = (double(getTickCount()) - tic1) * 1000. / getTickFrequency();
cout << "Method 1: " << toc1 << "\t\t";
cout << "0x" << hex << v1[0] << endl;
// Method 2: custom loop with pointers
double tic2 = double(getTickCount());
vector<int> v2(img.rows * img.cols, 0);
for (int r = 0; r < img.rows; ++r)
{
uchar* p = img.ptr<uchar>(r);
for (int c = 0; c < img.cols; ++c)
{
int val = ((*p) << 24); ++p;
val |= ((*p) << 16); ++p;
val |= ((*p) << 8); ++p;
v2[r*img.cols + c] = val;
}
}
double toc2 = (double(getTickCount()) - tic2) * 1000. / getTickFrequency();
cout << "Method 2: " << toc2 << "\t\t";
cout << "0x" << hex << v2[0] << endl;
// Method 3: using BGRA conversion. Credits #RyanP
// NOTE: works only if img.isContinuos()
double tic3 = double(getTickCount());
Mat4b rgba3;
cvtColor(img, rgba3, COLOR_BGR2BGRA);
vector<int> v3(img.rows * img.cols, 0);
memcpy(v3.data(), rgba3.data, img.rows * img.cols * sizeof(int));
double toc3 = (double(getTickCount()) - tic3) * 1000. / getTickFrequency();
cout << "Method 3: " << toc3 << "\t\t";
cout << "0x" << hex << v3[0] << endl;
int dummy;
cin >> dummy;
return 0;
}
Use the split and merge channels functions.
The look complicated but are a lot easier than doing it a pixel at a time.
See stackoverflow.com/questions/14582082/merging-channels-in-opencv for sample
Related
essentially, I'm making a class that takes a BMP file for the constructor. From THIS POST I get all of the header data out of the way and then read RGB data into a vector. I then calculate the intensities as 0.25R + 0.5G + 0.25B. I put these numbers into a space-separated file, line by line. With the original above my result below and Using GNUPlot to open and plot the image gives me this result.
original
distortion
As you can see, the right side of the image is consistently being wrapped around further as the image is written to file (or somewhere before this process). I've pasted the code below, any help?
std::vector<char> MImage::readBMP(std::string const file){
static constexpr size_t HEADER_SIZE = 54;
std::ifstream bmp(file, std::ios::binary);
std::array<char, HEADER_SIZE> header;
bmp.read(header.data(), header.size());
auto fileSize = *reinterpret_cast<uint32_t*>(&header[2]);
auto dataOffset = *reinterpret_cast<uint32_t*>(&header[10]);
auto width = *reinterpret_cast<uint32_t*>(&header[18]);
auto height = *reinterpret_cast<uint32_t*>(&header[22]);
auto depth = *reinterpret_cast<uint16_t*>(&header[28]);
/*
std::cout << "fileSize: " << fileSize << std::endl;
std::cout << "dataOffset: " << dataOffset << std::endl;
std::cout << "width: " << width << std::endl;
std::cout << "height: " << height << std::endl;
std::cout << "depth: " << depth << "-bit" << std::endl;
*/
std::vector<char> img(dataOffset - HEADER_SIZE);
//bmp.read(img.data(), img.size());
auto dataSize = ((width * 3 + 3) & (~3)) * height;
img.resize(dataSize);
bmp.read(img.data(), img.size());
char temp = 0;
for (int i = dataSize - 4; i >= 0; i -= 3)
{
temp = img[i];
img[i] = img[i + 2];
img[i + 2] = temp;
}
// Convert to intensity
int k = 0;
int size = (int)img.size();
for (int j = 0; k+2 < size; j++)
{
//0.25B + 0.5G + 0.25R
img[j] = ((abs(img[k]) >> 2) + (abs(img[k + 1]) >> 1) + (abs(img[k + 2]) >> 2));
//OutputDebugStringA((to_string(img[j]) + "\n").c_str());
k += 3;
}
img.resize(dataSize / 3);
//OutputDebugStringA((to_string(img.size()) + "\n").c_str());
int a, b, c = 0;
//Testing #img data
ofstream TestPic;
TestPic.open("testpic.txt");
for (a = 0; a < HEIGHT; a++) {
for (b = 0; b < WIDTH; b++) {
TestPic << (int)img[c];
if (b < WIDTH-1) {
TestPic << " ";
}
c++;
}
TestPic << "\n";
}
TestPic.close();
return img; }
GNUPlot command: plot [0:630] [0:354] 'testpic.txt' matrix with image pixels
The problem you are seeing is caused by improper data alignment. Each scanline of .bmp file must have a byte-size divisible by 4. You are calculating the input data size correctly with this line:
auto dataSize = ((width * 3 + 3) & (~3)) * height;
However, while converting the img array you do not compensate/throw away the padding at the end of each scanline.
The best advice is either to use a standard bmp loader/converter like the one in STB libraries (the stb_image.h file) or, if you want to do it yourself, allocate the other array (img2 or something like that) and iterate img array scanline by scanline, writing greyscale values to img2.
By the way, this answer, which you have mentioned, contains a solution for your exact problem (it is called the "padding fix").
I used the code below to extract each HSV value from any image and to print each value on the screen.
Mat image_HSV;
cvtColor(ori_image, image_HSV, CV_BGR2HSV);
Mat mask;
inRange(image_HSV, Scalar(100, 0, 0), Scalar(100, 255, 255), mask);
image_HSV.setTo(Scalar(0, 0, 0), mask);
int h = 0;
int s = 0;
int v = 0;
int col = image_HSV.cols;
int row = image_HSV.rows;
int corow = col * row; // image's full pixel number
for (int i = 0; i < image_HSV.cols; i++) { // image row pixel
for (int j = 0; j < image_HSV.rows; j++) { // image col pixel
Vec3b hsv = image_HSV.at<Vec3b>(i,j);
h += hsv.val[0];
s += hsv.val[1];
v += hsv.val[2];
if (hsv[0] != 100) {
hsv[0] = 0;
hsv[1] = 0;
hsv[2] = 0;
}
}
}
cout << "H: " << h / corow << "% \n";
cout << "S: " << s / corow << "% \n";
cout << "V: " << v / corow << "% \n";
waitKey(0);
return 0;
I used all red color image for this time, which RGB values were 255, 0, 0.
However, I have some strange results from this code.
As I know, each H,S,V value range is covered by 0-360, 0-100, and 0-100, respectively.
Further, I also followed the post linked below but I still have a trouble to get right values.
OpenCV (C++) - Set HSV values of a pixel
But, I still don't know how to fix it.
Any help would be greatly appreciated! Thanks!
I am trying to create RGB image in C++. I am not using any image libraries like OpenCv.
Firstly, I tried to create grayscale image. I want to draw rectangle to image. I am giving parameters at function like starting points, width, height etc. This code lines are working good for this grayscale challenge but I am trying to increase color channels to 3 like RGB. Then, I am setting Red, Green and Blue values but it is not working. This is my problem.
How can I work correctly?
x => starting point where x coordinate
y => starting point where y coordinate
width => rectangle width
height => rectangle height
value => RGB or Grayscale color value
My codes
Image::Image(int width, int height, int n_channels, int step)
{
cout << "Image constructor is running!" << endl;
m_width = width;
m_height = height;
m_n_channels = n_channels;
m_step = m_width*m_n_channels;
if (m_step < step)
m_step = step;
m_data = new uchar[m_step*height];
}
Image* Image::new_gray(int width, int height)
{
cout << "New gray image is creating!" << endl;
return new Image(width, height, 1);
}
Image* Image::new_rgb(int width, int height)
{
cout << "New RGB image is creating!" << endl;
return new Image(width, height, 3);
}
void Image::set_rect(int x, int y, int width, int height, uchar value)
{
if (x < 0) {
width += x;
x = 0;
}
if (y < 0) {
height += y;
y = 0;
}
for (int j = y; j < y+height; ++j) {
if (j >= m_height)
break;
uchar* row_data = data(j);
for (int i = x; i < x+width; ++i) {
if (i >= m_width)
break;
for (int c = 0; c < m_n_channels; ++c)
if (c == 0) {
row_data[i*m_n_channels + c] = value;
} else if (c == 1) {
row_data[i*m_n_channels + c] = value;
} else if (c == 2) {
row_data[i*m_n_channels + c] = value;
}
}
}
}
bool Image::write_pnm(const std::string& filename) const
{
if (m_n_channels != 1) {
const string magic_head = "P6";
ofstream fout;
string extended_name = filename + ".ppm";
fout.open(extended_name.c_str(), ios::out | ios::binary);
fout << magic_head << "\n";
fout << m_width << " " << m_height << " 255\n";
for (int y = 0; y < m_height; ++y) {
const uchar *row_data = data(y);
cout << reinterpret_cast<const char*>(row_data);
fout.write(reinterpret_cast<const char*>(row_data), m_width*sizeof(uchar));
}
fout.close();
return true;
}
const string magic_head = "P5";
ofstream fout;
string extended_name = filename + ".pgm";
fout.open(extended_name.c_str(), ios::out | ios::binary);
fout << magic_head << "\n";
fout << m_width << " " << m_height << " 255\n";
for (int y = 0; y < m_height; ++y) {
const uchar *row_data = data(y);
fout.write(reinterpret_cast<const char*>(row_data), m_width*sizeof(uchar));
}
fout.close();
return true;
}
My main function
#include <cstdlib>
#include <iostream>
#include "image.h"
using std::cout;
using std::endl;
using ceng391::Image;
int main(int argc, char** argv)
{
Image* gray = Image::new_gray(128, 128);
cout << "(" << gray->w() << "x" << gray->h() << ") channels: "
<< gray->n_ch() << " step: " << gray->step() << endl;
gray->set_zero();
gray->set_rect(32, 32, 64, 64, 255);
gray->write_pnm("/tmp/test_image");
Image* rgb_image = Image::new_rgb(128,128);
cout << "(" << rgb_image->w() << "x" << rgb_image->h() << ") channels: "
<< rgb_image->n_ch() << " step: " << rgb_image->step() << endl;
rgb_image->set_zero_rgb();
rgb_image->set_rect(32, 32, 64, 64, 150);
rgb_image->write_pnm("/tmp/test_image_rgb");
delete gray;
delete rgb_image;
return EXIT_SUCCESS;
}
This code is working for grayscale images because grayscale images have same number of pixels with width.
fout.write(reinterpret_cast<const char*>(row_data), m_width*sizeof(uchar));
But when I wanted to save RGB images, increased number of pixels 3 times. One pixel respresents via 3 channels so need to multiply stream size with 3 (R,G,B channels) for one pixel.
fout.write(reinterpret_cast<const char*>(row_data), m_width*sizeof(uchar)*3);
I would like to weigh values of luminance on a new image.
I have an image (5px.jpg) of 5 pixels with these luminance :50,100,150,200,250.
I have a vector of coefficient.
I created a new Mat Z which combine luminance of 5px.jpg and the coefficient.
So, my first value of luminance is 50 (lum[0]=50) and I want it to be applied on the 5.1 (coef[0]=5.1) first pixel of my matrix. To do that, I need to weight the 6th pixel with the first and the second value of luminance. In my case,the luminance of my 6th pixel will be 95 because (0.1*50)+(0.9*100)=95
And so on...
But I do not know why my code does not works.
I had already asked a similar question for a vector here and now, I'm try to adapt to an image.
My picture in input :
My output :
#define MPI 3.14159265358979323846264338327950288419716939937510
#define RAD2DEG (180./MPI)
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <opencv2/opencv.hpp>
#include <iostream>
#include <cmath>
#include <math.h>
#include <string.h>
using namespace cv;
using namespace std;
int main()
{
Mat image = imread("5px.jpg", 1);
if (image.empty())
{
cout << "Couldn't load " << image << endl;
}
else
{
cout << "Image upload, go" << endl;
}
namedWindow("ImageIn", CV_WINDOW_AUTOSIZE);
imshow("ImageIn", image);
Mat imgGrayScale;
cvtColor(image, imgGrayScale, CV_BGR2GRAY);
float *deltaP = new float[imgGrayScale.cols];
float *angle = new float[imgGrayScale.cols];
float *coeff = new float[imgGrayScale.cols];
int col;
for (col = 0; col < imgGrayScale.cols; ++col)
{
//cout << "position x = " << col << endl;
deltaP[col] = imgGrayScale.at<uchar>(0, col);
//cout << "luminance = " << deltaP[col] << endl;
angle[col] = acos(deltaP[col] / 255);
//cout << "angle =" << angle[col] << endl;
coeff[col] = (1 / cos(angle[col]));
cout << "coeff = " << coeff[col] << endl;
}
int width = imgGrayScale.size().width;
int height = imgGrayScale.size().height;
int width2 = width * 5;
int idx_coef = 0;
Mat Z = Mat::zeros(height, width2, CV_8UC1);
//for (int r = 0; r < imgGrayScale.rows; r++)
//{
//cout << "Saut de ligne " << endl << endl << endl;
for (int t = 0; t < imgGrayScale.cols; t++)
{
//cout << "Saut de colonne " << endl;
// Attribue le coeff à une variable
int c = int(coeff[idx_coef]);
//cout << "x" << t << endl;
for (int i = 0; i < c; ++i)
{
Z.at<uchar>(0, c) = imgGrayScale.at<uchar>(0, t);
}
float alpha = fmod(coeff[idx_coef], 1.f);
float beta = 1.f - alpha;
Z.at<uchar>(0, c + 1) = (alpha * imgGrayScale.at<uchar>(0, t) + beta * imgGrayScale.at<uchar>(0, t + 1));
idx_coef++;
coeff[idx_coef] = coeff[idx_coef] - beta;
if (idx_coef >= width - 1)
{
int cc = int(coeff[idx_coef]);
for (int i = 0; i < cc; ++i)
{
Z.at<uchar>(0, c) = imgGrayScale.at<uchar>(0, t);
}
idx_coef = 0;
break;
}
}
//}
namedWindow("m", CV_WINDOW_AUTOSIZE);
imshow("m", Z);
imwrite("lumianacetest.jpg", Z);
int t = waitKey();
if ((char)t == 27)
return 0;
}
You messed up with the indices while accessing the matrix Z. You shoudn't access Z at column c, but you need access the current column (as a vector::push_back would do). So you can keep the current index column in a variable, here idx_z, and increment it every time you access Z
Here your Z is CV_8U, so you lose accuracy since your values are float. You can create Z as CV_32F, and if you need to store values in CV_8U format to save the image, you can convert to CV_8U later, eventually.
The last columns of Z won't be set to any value (so I initialized them with value 0). If you need them to have the last value as in the imgGrayScale, just decomment the relevant part of the code.
Here the code:
#define MPI 3.14159265358979323846264338327950288419716939937510
#define RAD2DEG (180./MPI)
#include <opencv2\opencv.hpp>
#include <vector>
using namespace cv;
using namespace std;
int main()
{
Mat1b imgGrayScale = (Mat1b(2, 5) << 50, 100, 150, 200, 250,
50, 100, 150, 200, 250);
vector<float> deltaP(imgGrayScale.cols);
vector<float> angle(imgGrayScale.cols);
vector<float> coeff(imgGrayScale.cols);
int col;
for (col = 0; col < imgGrayScale.cols; ++col)
{
//cout << "position x = " << col << endl;
deltaP[col] = imgGrayScale.at<uchar>(0, col);
//cout << "luminance = " << deltaP[col] << endl;
angle[col] = acos(deltaP[col] / 255);
//cout << "angle =" << angle[col] << endl;
coeff[col] = (1 / cos(angle[col]));
cout << "coeff = " << coeff[col] << endl;
}
int width = imgGrayScale.size().width;
int height = imgGrayScale.size().height;
int width2 = width * 5;
Mat1f Z(height, width2, 0.f);
for (int r = 0; r < imgGrayScale.rows; r++)
{
int idx_lum = 0;
int idx_coef = 0;
int idx_z = 0;
vector<float> coef = coeff;
// Set all values in Z to the last value in imgGrayScale
Z.row(r) = imgGrayScale(r, imgGrayScale.cols-1);
while (true)
{
int c = int(coef[idx_coef]);
for (int i = 0; i < c; ++i)
{
Z(r, idx_z++) = imgGrayScale(r, idx_lum);
}
float alpha = fmod(coef[idx_coef], 1.f);
float beta = 1.f - alpha;
Z(r, idx_z++) = (alpha * imgGrayScale(r, idx_lum) + beta * imgGrayScale(r, idx_lum + 1));
idx_coef++;
idx_lum++;
coef[idx_coef] = coef[idx_coef] - beta;
if (idx_lum >= imgGrayScale.cols - 1 || idx_coef >= coef.size() - 1)
{
int cc = int(coef[idx_coef]);
for (int i = 0; i < cc; ++i)
{
Z(r, idx_z++) = imgGrayScale(r, idx_lum);
}
idx_coef = 0;
break;
}
}
}
Mat1b ZZ;
Z.convertTo(ZZ, CV_8U);
cout << "Float values:" << endl;
cout << Z << endl << endl;
cout << "Uchar values:" << endl;
cout << ZZ << endl << endl;
namedWindow("m", CV_WINDOW_AUTOSIZE);
imshow("m", Z);
imwrite("lumianacetest.png", ZZ);
waitKey();
return 0;
}
I made a simple perceptron in c++ to study AI and even following a book(pt_br) i could not make my perceptron return an expected result, i tryed to debug and find the error but i didnt succeed.
My algorithm AND gate results (A and B = Y):
0 && 0 = 0
0 && 1 = 1
1 && 0 = 1
1 && 1 = 1
Basically its working as an OR gate or random.
I Tried to jump to Peter Norving and Russel book, but he goes fast over this and dont explain on depth one perceptron training.
I really want to learn every inch of this content, so i dont want to jump to Multilayer perceptron without making the simple one work, can you help?
The following code is the minimal code for operation with some explanations:
Sharp function:
int signal(float &sin){
if(sin < 0)
return 0;
if(sin > 1)
return 1;
return round(sin);
}
Perceptron Struct (W are Weights):
struct perceptron{
float w[3];
};
Perceptron training:
perceptron startTraining(){
//- Random factory generator
long int t = static_cast<long int>(time(NULL));
std::mt19937 gen;
gen.seed(std::random_device()() + t);
std::uniform_real_distribution<float> dist(0.0, 1.0);
//--
//-- Samples (-1 | x | y)
float t0[][3] = {{-1,0,0},
{-1,0,1},
{-1,1,0},
{-1,1,1}};
//-- Expected result
short d [] = {0,0,0,1};
perceptron per;
per.w[0] = dist(gen);
per.w[1] = dist(gen);
per.w[2] = dist(gen);
//-- print random numbers
cout <<"INIT "<< "W0: " << per.w[0] <<" W1: " << per.w[1] << " W2: " << per.w[2] << endl;
const float n = 0.1; // Lerning rate N
int saida =0; // Output Y
long int epo = 0; // Simple Couter
bool erro = true; // Loop control
while(erro){
erro = false;
for (int amost = 0; amost < 4; ++amost) { // Repeat for the number of samples x0=-1, x1,x2
float u=0; // Variable for the somatory
for (int entrad = 0; entrad < 3; ++entrad) { // repeat for every sinaptic weight W0=θ , W1, W2
u = u + (per.w[entrad] * t0[amost][entrad]);// U <- Weights * Inputs
}
// u=u-per.w[0]; // some references sau to take θ and subtract from U, i tried but without success
saida = signal(u); // returns 1 or 0
cout << d[amost] << " <- esperado | encontrado -> "<< saida<< endl;
if(saida != d[amost]){ // if the output is not equal to the expected value
for (int ajust = 0; ajust < 3; ++ajust) {
per.w[ajust] = per.w[ajust] + n * (d[amost] - saida) * t0[amost][ajust]; // W <- W + ɳ * ((d - y) x) where
erro = true; // W: Weights, ɳ: Learning rate
} // d: Desired outputs, y: outputs
} // x: samples
epo++;
}
}
cout << "Epocas(Loops): " << epo << endl;
return per;
}
Main with testing part:
int main()
{
perceptron per = startTraining();
cout << "fim" << endl;
cout << "W0: " << per.w[0] <<" W1: " << per.w[1] << " W2: " << per.w[2] << endl;
while(true){
int x,y;
cin >> x >> y;
float u=0;
u = (per.w[1] * x);
u = u + (per.w[2] * y);
//u=u-per.w[0];
cout << signal(u) << endl;
}
return 0;
}
In your main(), re-enable the line you commented out. Alternatively, you could write it like this to make it more illuminating:
float u = 0.0f;
u += (per.w[0] * float (-1));
u += (per.w[1] * float (x));
u += (per.w[2] * float (y));
The thing is that you trained the perceptron with three inputs, the first being hard-wired to a "-1" (making the first weight w[0] act like a constant "bias"). Accordingly, in your training function, your u is the sum of all THREE of those weight-input product.
However, in the main() you posted, you omit w[0] completely, thus producing a wrong result.