Photo is here:
I want to see this white man in green when I clicked left-mouse and [CTRL] button.
The code looks 8-neighbour point of the clicked point and makes recursive.
The code works correctly in the small matrix. But in this photo, I get a memory error. I want to see the man in green. How can I fix the code correctly?
#include <iostream>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
using namespace cv;
using namespace std;
int n=0;
void yap(int i,int j,Mat* res){
ostringstream t;
Mat *rgb=(Mat*) res;
Mat res1;
res1=*rgb;
t<<"fark_binary"<<n<<".jpg";
imwrite(t.str(),res1);
if(res1.at<Vec3b>(i,j)[0]>=200){
cout<<j<<"\n"<<i;
res1.at<Vec3b>(i,j)[0]=0;
res1.at<Vec3b>(i,j)[1]=255;
res1.at<Vec3b>(i,j)[2]=0;
cout<<"yapıldı";
}
if(res1.at<Vec3b>(i-1,j-1)[0]>=200){
yap(i-1,j-1,&res1);
res1.at<Vec3b>(i-1,j-1)[0]=0;
res1.at<Vec3b>(i-3,j-3)[1]=255;
res1.at<Vec3b>(i-3,j-3)[2]=0;
}
if(res1.at<Vec3b>(i-1,j)[0]>=200){
yap(i-1,j,&res1);
res1.at<Vec3b>(i-1,j)[0]=0;
res1.at<Vec3b>(i-1,j)[1]=255;
res1.at<Vec3b>(i-1,j)[2]=0;
}
if(res1.at<Vec3b>(i-1,j+1)[0]>=200){
res1.at<Vec3b>(i-1,j+1)[0]=0;
res1.at<Vec3b>(i-1,j+1)[1]=255;
res1.at<Vec3b>(i-1,j+1)[2]=0;
}
if(res1.at<Vec3b>(i,j-1)[0]>=200){
yap(i,j-1,&res1);
res1.at<Vec3b>(i,j-1)[0]=0;
res1.at<Vec3b>(i,j-1)[1]=255;
res1.at<Vec3b>(i,j-1)[2]=0;
}
if(res1.at<Vec3b>(i,j+1)[0]>=200){
yap(i,j+1,&res1);
res1.at<Vec3b>(i,j+1)[0]=0;
res1.at<Vec3b>(i,j+1)[1]=255;
res1.at<Vec3b>(i,j+1)[2]=0;
}
if(res1.at<Vec3b>(i+1,j-1)[0]>=200){
yap(i+1,j-1,&res1);
res1.at<Vec3b>(i+1,j-1)[0]=0;
res1.at<Vec3b>(i+1,j-1)[1]=255;
res1.at<Vec3b>(i+1,j-1)[2]=0;
}
if(res1.at<Vec3b>(i+1,j)[0]>=200){
yap(i+1,j,&res1);
res1.at<Vec3b>(i+1,j)[0]=0;
res1.at<Vec3b>(i+1,j)[1]=255;
res1.at<Vec3b>(i+1,j)[2]=0;
}
if(res1.at<Vec3b>(i+1,j+1)[0]>=200){
yap(i+1,j+1,&res1);
res1.at<Vec3b>(i+1,j+1)[0]=0;
res1.at<Vec3b>(i+1,j+1)[1]=255;
res1.at<Vec3b>(i+1,j+1)[2]=0;
}
}
void mouseTikla(int evt, int x, int y, int flags, void* param)
{ Vec3b color;
Mat* rgb = (Mat*) param;
Mat p;
p=*rgb;
if (flags == (CV_EVENT_LBUTTONDOWN+CV_EVENT_FLAG_CTRLKEY))
{
yap(y,x,&p);
cout<<x<<y;
}
}
int main(){
Mat res;
res=imread("C:/Users/giray/Desktop/27.jpg");
int i;
int j;
//res1.data[res.channels()*(res.cols*(i)+(j))];
namedWindow("Secim", 1);
setMouseCallback("Secim", mouseTikla, &res);
imshow("Secim", res);
waitKey(0);
return 0;
}
This code is so wrong in a thousand ways...
Never use recursion if you can do it without it
Why did you actually create so many pointers to that single image?
Formatting helps, really.
Memory reallocation is just insane here.
Here's the better version of your code:
#include <iostream>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <queue>
using namespace cv;
using namespace std;
void yap(int i,int j, Mat* res){
queue<pair<int, int>> q;
q.push(pair<int, int>(i, j));
while (!q.empty())
{
pair<int, int> p = q.front();
q.pop();
if ((p.first < 0) ||
(p.second < 0) ||
(p.first >= res->rows) ||
(p.second >= res->cols))
continue;
if (res->at<Vec3b>(p.first, p.second)[0] > 200)
{
res->at<Vec3b>(p.first, p.second) = {0, 255, 0};
q.push(pair<int,int>(p.first - 1, p.second - 1));
q.push(pair<int,int>(p.first + 1, p.second + 1));
q.push(pair<int,int>(p.first - 1, p.second + 1));
q.push(pair<int,int>(p.first + 1, p.second - 1));
q.push(pair<int,int>(p.first + 1, p.second));
q.push(pair<int,int>(p.first - 1, p.second));
q.push(pair<int,int>(p.first, p.second + 1));
q.push(pair<int,int>(p.first, p.second - 1));
}
}
}
void mouseTikla(int evt, int x, int y, int flags, void* param)
{
Mat* rgb = (Mat*) param;
if (evt == CV_EVENT_LBUTTONDOWN)
{
yap(y,x,rgb);
imshow("Secim", *rgb);
}
}
int main(){
Mat res;
res=imread("put_your_path_here");
namedWindow("Secim", 1);
setMouseCallback("Secim", mouseTikla, &res);
imshow("Secim", res);
waitKey(0);
return 0;
}
Related
I'm trying to use an "example" code for c++ on qt. In this example, there's a function "get_top_n" from tflite::label_image, in tensorflow/lite/examples/label_image/get_top_n.h. But, qt creator doesn't find the function.
Error: main.cpp:104 (and 107): erreur : no matching function for call to 'get_top_n'
What am I doing wrong here ?
#include <fstream>
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/string_util.h"
#include "tensorflow/lite/examples/label_image/get_top_n.h"
#include "tensorflow/lite/model.h"
std::vector<std::string> load_labels(std::string labels_file)
{
std::ifstream file(labels_file.c_str());
if (!file.is_open())
{
fprintf(stderr, "unable to open label file\n");
exit(-1);
}
std::string label_str;
std::vector<std::string> labels;
while (std::getline(file, label_str))
{
if (label_str.size() > 0)
labels.push_back(label_str);
}
file.close();
return labels;
}
int main(int argc, char *argv[])
{
// Get Model label and input image
if (argc != 4)
{
fprintf(stderr, "TfliteClassification.exe modelfile labels image\n");
exit(-1);
}
const char *modelFileName = argv[1];
const char *labelFile = argv[2];
const char *imageFile = argv[3];
// Load Model
auto model = tflite::FlatBufferModel::BuildFromFile(modelFileName);
if (model == nullptr)
{
fprintf(stderr, "failed to load model\n");
exit(-1);
}
// Initiate Interpreter
std::unique_ptr<tflite::Interpreter> interpreter;
tflite::ops::builtin::BuiltinOpResolver resolver;
tflite::InterpreterBuilder(*model, resolver)(&interpreter);
if (interpreter == nullptr)
{
fprintf(stderr, "Failed to initiate the interpreter\n");
exit(-1);
}
if (interpreter->AllocateTensors() != kTfLiteOk)
{
fprintf(stderr, "Failed to allocate tensor\n");
exit(-1);
}
// Configure the interpreter
interpreter->SetAllowFp16PrecisionForFp32(true);
interpreter->SetNumThreads(1);
// Get Input Tensor Dimensions
int input = interpreter->inputs()[0];
auto height = interpreter->tensor(input)->dims->data[1];
auto width = interpreter->tensor(input)->dims->data[2];
auto channels = interpreter->tensor(input)->dims->data[3];
// Load Input Image
cv::Mat image;
auto frame = cv::imread(imageFile);
if (frame.empty())
{
fprintf(stderr, "Failed to load iamge\n");
exit(-1);
}
// Copy image to input tensor
cv::resize(frame, image, cv::Size(width, height), cv::INTER_NEAREST);
memcpy(interpreter->typed_input_tensor<unsigned char>(0), image.data, image.total() * image.elemSize());
// Inference
std::chrono::steady_clock::time_point start, end;
start = std::chrono::steady_clock::now();
interpreter->Invoke();
end = std::chrono::steady_clock::now();
auto inference_time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
// Get Output
int output = interpreter->outputs()[0];
TfLiteIntArray *output_dims = interpreter->tensor(output)->dims;
auto output_size = output_dims->data[output_dims->size - 1];
std::vector<std::pair<float, int>> top_results;
float threshold = 0.01f;
switch (interpreter->tensor(output)->type)
{
case kTfLiteInt32:
tflite::label_image::get_top_n<float>(interpreter->typed_output_tensor<float>(0), output_size, 1, threshold, &top_results, kTfLiteFloat32);
break;
case kTfLiteUInt8:
tflite::label_image::get_top_n<uint8_t>(interpreter->typed_output_tensor<uint8_t>(0), output_size, 1, threshold, &top_results, kTfLiteUInt8);
break;
default:
fprintf(stderr, "cannot handle output type\n");
exit(-1);
}
// Print inference ms in input image
cv::putText(frame, "Infernce Time in ms: " + std::to_string(inference_time), cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(0, 0, 255), 2);
// Load Labels
auto labels = load_labels(labelFile);
// Print labels with confidence in input image
for (const auto &result : top_results)
{
const float confidence = result.first;
const int index = result.second;
std::string output_txt = "Label :" + labels[index] + " Confidence : " + std::to_string(confidence);
cv::putText(frame, output_txt, cv::Point(10, 60), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(0, 0, 255), 2);
}
// Display image
cv::imshow("Output", frame);
cv::waitKey(0);
return 0;
}
The lines affected :
104: tflite::label_image::get_top_n<float>(interpreter->typed_output_tensor<float>(0), output_size, 1, threshold, &top_results, kTfLiteFloat32);
107: tflite::label_image::get_top_n<uint8_t>(interpreter->typed_output_tensor<uint8_t>(0), output_size, 1, threshold, &top_results, kTfLiteUInt8);
Content of tensorflow/lite/examples/label_image/get_top_n.h:
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. ...*/
#ifndef TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_H_
#define TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_H_
#include "tensorflow/lite/examples/label_image/get_top_n_impl.h"
namespace tflite {
namespace label_image {
template <class T>
void get_top_n(T* prediction, int prediction_size, size_t num_results,
float threshold, std::vector<std::pair<float, int>>* top_results,
TfLiteType input_type);
// explicit instantiation so that we can use them otherwhere
template void get_top_n<float>(float*, int, size_t, float,
std::vector<std::pair<float, int>>*, TfLiteType);
template void get_top_n<int8_t>(int8_t*, int, size_t, float,
std::vector<std::pair<float, int>>*,
TfLiteType);
template void get_top_n<uint8_t>(uint8_t*, int, size_t, float,
std::vector<std::pair<float, int>>*,
TfLiteType);
} // namespace label_image
} // namespace tflite
#endif // TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_H_
Content of tensorflow/lite/examples/label_image/get_top_n_impl.h:
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. ...*/
#ifndef TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_IMPL_H_
#define TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_IMPL_H_
#include <algorithm>
#include <functional>
#include <queue>
#include "tensorflow/lite/c/common.h"
namespace tflite {
namespace label_image {
extern bool input_floating;
// Returns the top N confidence values over threshold in the provided vector,
// sorted by confidence in descending order.
template <class T>
void get_top_n(T* prediction, int prediction_size, size_t num_results,
float threshold, std::vector<std::pair<float, int>>* top_results,
TfLiteType input_type) {
// Will contain top N results in ascending order.
std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int>>,
std::greater<std::pair<float, int>>>
top_result_pq;
const long count = prediction_size; // NOLINT(runtime/int)
float value = 0.0;
for (int i = 0; i < count; ++i) {
switch (input_type) {
case kTfLiteFloat32:
value = prediction[i];
break;
case kTfLiteInt8:
value = (prediction[i] + 128) / 256.0;
break;
case kTfLiteUInt8:
value = prediction[i] / 255.0;
break;
default:
break;
}
// Only add it if it beats the threshold and has a chance at being in
// the top N.
if (value < threshold) {
continue;
}
top_result_pq.push(std::pair<float, int>(value, i));
// If at capacity, kick the smallest value out.
if (top_result_pq.size() > num_results) {
top_result_pq.pop();
}
}
// Copy to output vector and reverse into descending order.
while (!top_result_pq.empty()) {
top_results->push_back(top_result_pq.top());
top_result_pq.pop();
}
std::reverse(top_results->begin(), top_results->end());
}
} // namespace label_image
} // namespace tflite
#endif // TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_IMPL_H_
Github link
How to fix the following error?
OpenCV: terminate handler is called! The last OpenCV error is:
OpenCV(4.5.5) Error: Assertion failed (trackbar) in getTrackbarPos,
file C:\opencv-4.5.5\sources\modules\highgui\src\window.cpp, line 862
#include <opencv2/opencv.hpp>
#include <cstdlib>
#include <vector>
using namespace cv;
using namespace std;
template <typename T>
T PosToActual(T min, T max, int pos, int TICKS)
{
return min + (max - min) * pos / TICKS;
}
void Refresh(int, void *data)
{
Mat *original = (Mat *)data;
Mat modified;
vector<Mat> channels;
split(*original, channels);
int b = cv::getTrackbarPos("Blue", "Modified");
int g = cv::getTrackbarPos("Green", "Modified");
int r = cv::getTrackbarPos("Red", "Modified");
channels[0] *= PosToActual<float>(0, 1, b, 10);
channels[1] *= PosToActual<float>(0, 1, g, 10);
channels[2] *= PosToActual<float>(0, 1, r, 10);
merge(channels, modified);
imshow("Modified", modified);
}
int main()
{
const string filename = "family.jpg";
Mat original = imread(filename);
// Mat modified = Mat::zeros(original.size(), original.type());
cv::namedWindow("Modified", WINDOW_NORMAL);
int bFactor = 5;
int gFactor = 5;
int rFactor = 5;
createTrackbar("Blue", "Modified", &bFactor, 10, Refresh, &original);
createTrackbar("Green", "Modified", &gFactor, 10, Refresh, &original);
createTrackbar("Red", "Modified", &rFactor, 10, Refresh, &original);
Refresh(0, &original);
waitKey();
}
Edit:
This issue only occurs when using OpenCV for Windows (prebuilt) version
4.5.3
4.5.4
4.5.5
It works just fine for version 4.5.2. I have not checked for version lower than 4.5.2 yet because I am not interested in using any lower versions.
I'm new to c++, and I need to use list container for my 3D marker-based watershed function. But I get strange bugs when I use list container. May I know what's wrong with my code?
Thank you so much!
I used a vector of list to save the wait-to-search pixel index.
I declare the variable in this way (GVInt32 is int32_t):
vector<list<GVInt32>> toSearchList;
And I used these two kinds of operations of list:
Add a new wait-to-search index at the end of a list
toSearchList[cnt].push_back(newidx);
Remove a searched element at the middle of a list(it is list<GVInt32>::iterator):
it = toSearchList[cnt].erase(it);
But I get two kinds of errors:
malloc(): memory corruption when I do
toSearchList[cnt].push_back(newidx);
I get not accessible elements in the end of the list when I inspect the variable in debugger:
[Not accessible elements][1]
https://i.stack.imgur.com/flJg3.png
The IDE is QT creator 4.15.2
The system is Ubuntu 18.04
The full code
watershed_wz.cpp:
#include "watershed_wz.h"
WaterShed_WZ::WaterShed_WZ()
{
}
array<GVInt32, 6> WaterShed_WZ::getNeighbor_WZ(const GVInt32 idx, const GVInt32 width, const GVInt32 height, const GVInt32 thick) {
GVInt32 SLICE = width * height;
GVInt32 z = idx / SLICE;
GVInt32 y = (idx%SLICE) / width;
GVInt32 x = idx % width;
array<GVInt32, 6> nIndex;
nIndex[0] = (x == 0) ? -1 : (idx - 1);
nIndex[1] = ((x + 1) == width) ? -1 : (idx + 1);
nIndex[2] = (y == 0) ? -1 : (idx - width);
nIndex[3] = ((y + 1) == height) ? -1 : (idx + width);
nIndex[4] = (z == 0) ? -1 : (idx - SLICE);
nIndex[5] = ((z + 1) == thick) ? -1 : (idx + SLICE);
return nIndex;
}
void WaterShed_WZ::Watershed3D_WZ(
const Mat im,
const GVInt32 width,
const GVInt32 height,
const GVInt32 thick,
GVInt32* label,
const vector<vector<GVInt32>> marker)
{
//<Parameter>
//<image> the image for watershed
//<width> the width of the image
//<height> the height of the image
//<thick> the thick of the image
//<label> the map to save result. need to allocate memory before use watershed
//<marker> the marker's index
// const GVByte* image=im.data;
auto t0 = chrono::high_resolution_clock::now();
QTextStream out(stdout);
// const GVInt32 SZ_slice = width * height;
// const GVInt32 SZ = SZ_slice * thick;
const GVInt32 markerNum = marker.size();
// create toSearchList. Saved pixel connected to labeled pixels and wait to search
vector<list<GVInt32>> toSearchList;
toSearchList.resize(markerNum);
// set label to INIT (unsearched)
// ::memset(label, -1, sizeof(GVInt32) * SZ);
// initialize
array<GVInt32, 6> nIdx;
for (size_t i = 0; i < markerNum; i++)
{
for (GVInt32 idx : marker[i])
{
// initialize label (which can be considered as a map of pointer to labelBar)
label[idx] = i + 1;
nIdx = getNeighbor_WZ(idx, width, height, thick);
for (GVInt32 newidx : nIdx)
{
if (newidx != -1)
{
if (label[newidx] == -1) {
toSearchList[i].push_back(newidx);
label[newidx] = -2;
}
}
}
}
}
//watershed
GVByte h;
GVInt32 idx;
for (int h_cnt = 0; h_cnt < (1+(int)GV_BYTE_MAX); h_cnt++) // water height
{
h = (GVByte)h_cnt;
for (GVInt32 cnt = 0; cnt < markerNum; cnt++) { // for each marker
list<GVInt32>::iterator it = toSearchList[cnt].begin();
while (!toSearchList[cnt].empty())
{
// for each pixel connected to the cnt-th labeled region
idx = *it;
// if this pixel is higher than water, ignore it
if (im.at<unsigned char>(idx) > h)
{
++it;
if(it == toSearchList[cnt].end())
{
break;
}
else
{
continue;
}
}
// this pixel is lower than water, assign it
label[idx] = cnt + 1;
// L.at<int>(idx)=cnt + 1;
// add new neighbor
nIdx = getNeighbor_WZ(idx, width, height, thick);
for (GVInt32 newidx : nIdx)
{
if (newidx != -1)
{
if (label[newidx]== -1) {
toSearchList[cnt].push_back(newidx);
label[newidx] = -2;
// L.at<int>(newidx)=-2;
}
}
}
// erase searched pixel
it = toSearchList[cnt].erase(it);
if(it == toSearchList[cnt].end())
{
break;
}
else
{
continue;
}
}
}
}
auto t1 = chrono::high_resolution_clock::now();
auto dt = 1.e-9 * chrono::duration_cast<std::chrono::nanoseconds>(t1 - t0).count();
out << "Watershed used " << dt << " seconds.\n\n" << Qt::endl;
}
watershed_wz.h:
#ifndef WATERSHED_WZ_H
#define WATERSHED_WZ_H
#define _USE_MATH_DEFINES
#include <vector>
#include <array>
#include <list>
#include <opencv2/core.hpp> //basic building blocks of opencv
#include <opencv2/imgcodecs.hpp> // image io
#include <opencv2/highgui.hpp> //image display
#include <QDebug>
#include <QTextStream>
#include <chrono>
using namespace std;
using namespace cv;
typedef unsigned char GVByte;
typedef int32_t GVInt32;
//typedef uint32_t GVInt32U;
const GVByte GV_BYTE_MAX = UCHAR_MAX;
class WaterShed_WZ
{
public:
WaterShed_WZ();
static array<GVInt32, 6> getNeighbor_WZ(const GVInt32 idx, const GVInt32 width, const GVInt32 height, const GVInt32 thick);
static void Watershed3D_WZ(
const Mat im,
const GVInt32 width,
const GVInt32 height,
const GVInt32 thick,
GVInt32* label,
const vector<vector<GVInt32>> marker);
};
#endif // WATERSHED_WZ_H
try delete [cnt] index everywhere like this
toSearchList[cnt].end() --> toSearchList.end()
I am new in OpenCV world and neural networks but I have some coding experience in C++/Java.
I created my first ANN MLP and learned it the XOR:
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <iomanip>
using namespace cv;
using namespace ml;
using namespace std;
void print(Mat& mat, int prec)
{
for (int i = 0; i<mat.size().height; i++)
{
cout << "[";
for (int j = 0; j<mat.size().width; j++)
{
cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
if (j != mat.size().width - 1)
cout << ", ";
else
cout << "]" << endl;
}
}
}
int main()
{
const int hiddenLayerSize = 4;
float inputTrainingDataArray[4][2] = {
{ 0.0, 0.0 },
{ 0.0, 1.0 },
{ 1.0, 0.0 },
{ 1.0, 1.0 }
};
Mat inputTrainingData = Mat(4, 2, CV_32F, inputTrainingDataArray);
float outputTrainingDataArray[4][1] = {
{ 0.0 },
{ 1.0 },
{ 1.0 },
{ 0.0 }
};
Mat outputTrainingData = Mat(4, 1, CV_32F, outputTrainingDataArray);
Ptr<ANN_MLP> mlp = ANN_MLP::create();
Mat layersSize = Mat(3, 1, CV_16U);
layersSize.row(0) = Scalar(inputTrainingData.cols);
layersSize.row(1) = Scalar(hiddenLayerSize);
layersSize.row(2) = Scalar(outputTrainingData.cols);
mlp->setLayerSizes(layersSize);
mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM);
TermCriteria termCrit = TermCriteria(
TermCriteria::Type::COUNT + TermCriteria::Type::EPS,
100000000,
0.000000000000000001
);
mlp->setTermCriteria(termCrit);
mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP);
Ptr<TrainData> trainingData = TrainData::create(
inputTrainingData,
SampleTypes::ROW_SAMPLE,
outputTrainingData
);
mlp->train(trainingData
/*, ANN_MLP::TrainFlags::UPDATE_WEIGHTS
+ ANN_MLP::TrainFlags::NO_INPUT_SCALE
+ ANN_MLP::TrainFlags::NO_OUTPUT_SCALE*/
);
for (int i = 0; i < inputTrainingData.rows; i++) {
Mat sample = Mat(1, inputTrainingData.cols, CV_32F, inputTrainingDataArray[i]);
Mat result;
mlp->predict(sample, result);
cout << sample << " -> ";// << result << endl;
print(result, 0);
cout << endl;
}
return 0;
}
It works very well for this simple problem, I also learn this network the 1-10 to binary conversion.
But i need to use MLP for simple image classification - road signs. I write the code for loading training images and preparing matrix for learning but I'm not able to train the network - it "learn" in one second even with 1 000 000 iterations! And it produce garbage results, the same for all inputs!
Here are my test images and the source code:
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <chrono>
#include <memory>
#include <iomanip>
#include <climits>
#include <Windows.h>
using namespace cv;
using namespace ml;
using namespace std;
using namespace chrono;
const int WIDTH_SIZE = 50;
const int HEIGHT_SIZE = (int)(WIDTH_SIZE * sqrt(3)) / 2;
const int IMAGE_DATA_SIZE = WIDTH_SIZE * HEIGHT_SIZE;
void print(Mat& mat, int prec)
{
for (int i = 0; i<mat.size().height; i++)
{
cout << "[ ";
for (int j = 0; j<mat.size().width; j++)
{
cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
if (j != mat.size().width - 1)
cout << ", ";
else
cout << " ]" << endl;
}
}
}
bool loadImage(string imagePath, Mat& outputImage)
{
// load image in grayscale
Mat image = imread(imagePath, IMREAD_GRAYSCALE);
Mat temp;
// check for invalid input
if (image.empty()) {
cout << "Could not open or find the image" << std::endl;
return false;
}
// resize the image
Size size(WIDTH_SIZE, HEIGHT_SIZE);
resize(image, temp, size, 0, 0, CV_INTER_AREA);
// convert to float 1-channel
temp.convertTo(outputImage, CV_32FC1, 1.0/255.0);
return true;
}
vector<string> getFilesNamesInFolder(string folder)
{
vector<string> names;
char search_path[200];
sprintf(search_path, "%s/*.*", folder.c_str());
WIN32_FIND_DATA fd;
HANDLE hFind = ::FindFirstFile(search_path, &fd);
if (hFind != INVALID_HANDLE_VALUE) {
do {
// read all (real) files in current folder
// , delete '!' read other 2 default folder . and ..
if (!(fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
names.push_back(fd.cFileName);
}
} while (::FindNextFile(hFind, &fd));
::FindClose(hFind);
}
return names;
}
class Sign {
public:
enum class Category { A = 'A', B = 'B', C = 'C', D = 'D' };
Mat image;
Category category;
int number;
Sign(Mat& image, string name) :image(image) {
category = static_cast<Category>(name.at(0));
number = stoi(name.substr(2, name.length()));
};
};
vector<Sign> loadSignsFromFolder(String folderName) {
vector<Sign> roadSigns;
for (string fileName : getFilesNamesInFolder(folderName)) {
Mat image;
loadImage(folderName + fileName, image);
roadSigns.emplace_back(image, fileName.substr(0, (fileName.length() - 4))); //cut .png
}
return roadSigns;
}
void showSignsInWindows(vector<Sign> roadSigns) {
for (Sign sign : roadSigns) {
String windowName = "Sign " + to_string(sign.number);
namedWindow(windowName, WINDOW_AUTOSIZE);
imshow(windowName, sign.image);
}
waitKey(0);
}
Mat getInputDataFromSignsVector(vector<Sign> roadSigns) {
Mat roadSignsImageData;
for (Sign sign : roadSigns) {
Mat signImageDataInOneRow = sign.image.reshape(0, 1);
roadSignsImageData.push_back(signImageDataInOneRow);
}
return roadSignsImageData;
}
Mat getOutputDataFromSignsVector(vector<Sign> roadSigns) {
int signsCount = (int) roadSigns.size();
int signsVectorSize = signsCount + 1;
Mat roadSignsData(0, signsVectorSize, CV_32FC1);
int i = 1;
for (Sign sign : roadSigns) {
vector<float> outputTraningVector(signsVectorSize);
fill(outputTraningVector.begin(), outputTraningVector.end(), -1.0);
outputTraningVector[i++] = 1.0;
Mat tempMatrix(outputTraningVector, false);
roadSignsData.push_back(tempMatrix.reshape(0, 1));
}
return roadSignsData;
}
int main(int argc, char* argv[])
{
if (argc != 2) {
cout << " Usage: display_image ImageToLoadAndDisplay" << endl;
return -1;
}
const int hiddenLayerSize = 500;
vector<Sign> roadSigns = loadSignsFromFolder("../../../Znaki/A/");
Mat inputTrainingData = getInputDataFromSignsVector(roadSigns);
Mat outputTrainingData = getOutputDataFromSignsVector(roadSigns);
Ptr<ANN_MLP> mlp = ANN_MLP::create();
Mat layersSize = Mat(3, 1, CV_16U);
layersSize.row(0) = Scalar(inputTrainingData.cols);
layersSize.row(1) = Scalar(hiddenLayerSize);
layersSize.row(2) = Scalar(outputTrainingData.cols);
mlp->setLayerSizes(layersSize);
mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM, 1.0, 1.0);
mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.05, 0.05);
//mlp->setTrainMethod(ANN_MLP::TrainingMethods::RPROP);
TermCriteria termCrit = TermCriteria(
TermCriteria::Type::MAX_ITER //| TermCriteria::Type::EPS,
,100 //(int) INT_MAX
,0.000001
);
mlp->setTermCriteria(termCrit);
Ptr<TrainData> trainingData = TrainData::create(
inputTrainingData,
SampleTypes::ROW_SAMPLE,
outputTrainingData
);
auto start = system_clock::now();
mlp->train(trainingData
//, //ANN_MLP::TrainFlags::UPDATE_WEIGHTS
, ANN_MLP::TrainFlags::NO_INPUT_SCALE
+ ANN_MLP::TrainFlags::NO_OUTPUT_SCALE
);
auto duration = duration_cast<milliseconds> (system_clock::now() - start);
cout << "Training time: " << duration.count() << "ms" << endl;
for (int i = 0; i < inputTrainingData.rows; i++) {
Mat result;
//mlp->predict(inputTrainingData.row(i), result);
mlp->predict(roadSigns[i].image.reshape(0, 1), result);
//cout << result << endl;
print(result, 2);
}
//showSignsInWindows(roadSigns);
return 0;
}
What is wrong in this code, that XOR works but images not? I cheked the input and output matrix and they're correct... could somebody also explain me when to/shoud I use the ANN_MLP::TrainFlags::NO_INPUT_SCALE and ANN_MLP::TrainFlags::NO_OUTPUT_SCALE or what values of setActivationFunction and setTrainMethod parameters should I use?
Thanks!
There was a problem in backprop weight scale parameter - it was too big and the ANN couldn't learn more difficult things.
I changed the line to mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.0001); and the hidden layer size to 100 (to speed up the learning) - now it's working!
i have the following code. i used it after applying connected components code i have the wanted results from connected components but i'm trying using the following code to remove the very small or very high labels based on a threshold.but every time i debug it i have exception.. so could anybode help me please
this is the connected components code
#include <stdio.h>
#include <math.h>
#include <cv.h>
#include <highgui.h>
#define PIXEL(img,x,y) (img)->imageData[(x)*img->widthStep+(y)]
#include <iostream>
int main()
{
IplImage *GrayImg,*outImg;
GrayImg=cvLoadImage("Gray_Image.jpg",0);
if(!GrayImg){
printf("Could not load image file: %s\n");
cvWaitKey(0);
exit(0);
}
cvThreshold(GrayImg, GrayImg, 180, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);//converting image to binary
int height,width,channels;
height = GrayImg->height;
width = GrayImg->width;
channels = GrayImg->nChannels;
printf("Processing a %dx%d image with %d channels \n",height,width,channels);
outImg = cvCreateImage(cvSize(width, height), IPL_DEPTH_8U, 1);
cvZero(outImg);
//cvCopyImage(GrayImg,OutImg);
int i,j,v;
int top,left;
int topL,leftL;
int label=0;
int eq[10000][2];
int eqlength = 1;
cvNamedWindow("Gray", CV_WINDOW_AUTOSIZE);
cvShowImage("Gray", GrayImg );
for (i=0;i<height;i++) {
for (j=0;j<width;j++) {
v = (unsigned char) PIXEL(GrayImg,i,j);// cvGet2D(GrayImg,i,j).val[0];
//unsigned char v1 = PIXEL(GrayImg,i,j);
//int v2 = PIXEL(GrayImg,i,j);
//int v3 = v1;
if (v==255) {
if (i>0) top = cvGet2D(GrayImg,i-1,j).val[0];
else top = 0;
if (j>0) left = cvGet2D(GrayImg,i,j-1).val[0] ;
else left=0;
if (top==0 && left==0)
{
PIXEL(outImg,i,j) = label;
label+=1;
}
else if (top==255 && left==0) {
PIXEL(outImg,i,j) = PIXEL(outImg,i-1,j);
}
else if (top==0 && left==255) {
PIXEL(outImg,i,j) = PIXEL(outImg,i,j-1);
}
else if (top==255 && left==255) {
PIXEL(outImg,i,j) = PIXEL(outImg,i,j-1);
if (PIXEL(outImg,i-1,j) != PIXEL(outImg,i,j-1)){
if (eq[eqlength-1][0] == PIXEL(outImg,i-1,j) && eq[eqlength-1][1] == PIXEL(outImg,i,j-1)) ;
else
{
eq[eqlength][0] = PIXEL(outImg,i-1,j);
eq[eqlength][1] = PIXEL(outImg,i,j-1);
eqlength++;
}
}
}
}
}
//cvWaitKey(0);
//cvShowImage("Out", outImg);
}
int e;
for (i=0;i<height;i++)
for (j=0;j<width;j++)
if (PIXEL(outImg,i,j)!=0)
for (e=1;e<eqlength;e++)
if (PIXEL(outImg,i,j)==eq[e][0])
PIXEL(outImg,i,j)=eq[e][1];
cvNamedWindow("Out", CV_WINDOW_AUTOSIZE);
cvShowImage("Out", outImg);
and this is the code i added to remove unwanted labels
int* arr = new int [N];
//cout<<endl<<arr[0];
for (i=0;i<height;i++)
{
for (j=0;j<width;j++)
{
id=(unsigned char)PIXEL(outImg,i,j);
arr[id]++;
}
}
for (i=0;i<=N;i++)
{
if (arr[i]<5 || arr[i]>50)
arr[i]=0;
}
for (i=0;i<outImg->height;i++)
{
for (j=0;j<outImg->width;j++)
{
id=(unsigned char)PIXEL(outImg,i,j);
//id=cvGet2D(outImg,i,j).val[0];
if (arr[id]==0)
PIXEL(outImg,i,j)=0;
else
PIXEL(outImg,i,j)=255;
}
}
delete [] arr;
cvNamedWindow("CC", CV_WINDOW_AUTOSIZE);
cvShowImage("CC", outImg);
cvWaitKey(0);
/* Free memory and get out */
cvDestroyWindow("Gray");
cvReleaseImage(&GrayImg);
return 0;
}
You are exceeding the bounds of your array arr. Indexes are 0 based in C++ so when allocating an array of size N your maximum index is N-1.
int* arr = new int [N];
...
for (i=0;i<=N;i++) // Accessing element N of arr which does not exist. Use '<'
{
if (arr[i]<5 || arr[i]>50)
arr[i]=0;
}