I'm newbie in Visual Odometry and is following the tutorial of solving VO using PnP. However when I run the program, I get the following error:
terminate called after throwing an instance of 'cv::Exception'
what(): OpenCV(4.3.0) /home/wctu/opencv-4.3.0/modules/calib3d/src/solvepnp.cpp:754: error: (-215:Assertion failed) ( (npoints >= 4) || (npoints == 3 && flags == SOLVEPNP_ITERATIVE && useExtrinsicGuess) ) && npoints == std::max(ipoints.checkVector(2, CV_32F), ipoints.checkVector(2, CV_64F)) in function 'solvePnPGeneric'
My code is below:
string datas[2266];
string str1;
std::getline(file, str1);
datas[0] = str1;
for(int i = 1; !file.eof(); i++)
{
string str;
std::getline(file, str);
datas[i] = str;
if(str.empty()) break;
if(str.at(0) == '#') continue; /* comment */
cout << datas[i-1] << endl << datas[i] << endl;
Mat image, depth, image1, depth1;
string rgbFilename1 = datas[i-1].substr(timestampLength + 1, rgbPathLehgth );
string timestap1 = datas[i-1].substr(0, timestampLength);
string depthFilename1 = datas[i-1].substr(2*timestampLength + rgbPathLehgth + 3, depthPathLehgth );
image1 = imread(dirname + rgbFilename1);
depth1 = imread(dirname + depthFilename1, -1);
string rgbFilename = str.substr(timestampLength + 1, rgbPathLehgth );
string timestap = str.substr(0, timestampLength);
string depthFilename = str.substr(2*timestampLength + rgbPathLehgth + 3, depthPathLehgth );
image = imread(dirname + rgbFilename);
depth = imread(dirname + depthFilename, -1);
CV_Assert(!image.empty());
CV_Assert(!depth.empty());
CV_Assert(depth.type() == CV_16UC1);
cout << i << " " << rgbFilename << " " << depthFilename << endl;
std::vector<KeyPoint> keypoints_1, keypoints_2;
vector<DMatch> matches;
find_feature_matches(image1, image, keypoints_1, keypoints_2, matches);
cout << "一共找到了" << matches.size() << "组匹配点" << endl;
// // 建立3D点
//Mat d1 = imread(depth1, IMREAD_UNCHANGED); // 深度图为16位无符号数,单通道图像
Mat K = (Mat_<double>(3, 3) << 525.0f, 0, 319.5f, 0, 525.0f, 239.5f, 0, 0, 1);
vector<Point3f> pts_3d;
vector<Point2f> pts_2d;
for (DMatch m:matches) {
ushort d = depth1.ptr<unsigned short>(int(keypoints_1[m.queryIdx].pt.y))[int(keypoints_1[m.queryIdx].pt.x)];
if (d == 0) // bad depth
continue;
float dd = d / 5000.0;
Point2d p1 = pixel2cam(keypoints_1[m.queryIdx].pt, K);
pts_3d.push_back(Point3f(p1.x * dd, p1.y * dd, dd));
pts_2d.push_back(keypoints_2[m.trainIdx].pt);
}
cout << pts_3d[0] << " " << pts_2d[0] << endl;
cout << "3d-2d pairs: " << pts_3d.size() << " " << pts_2d.size() << endl;
chrono::steady_clock::time_point t1 = chrono::steady_clock::now();
Mat r, t;
solvePnP(pts_3d, pts_2d, K, Mat(), r, t, false); // 调用OpenCV 的 PnP 求解,可选择EPNP,DLS等方法
Mat R;
cv::Rodrigues(r, R); // r为旋转向量形式,用Rodrigues公式转换为矩阵
chrono::steady_clock::time_point t2 = chrono::steady_clock::now();
chrono::duration<double> time_used = chrono::duration_cast<chrono::duration<double>>(t2 - t1);
cout << "solve pnp in opencv cost time: " << time_used.count() << " seconds." << endl;
the argv[1] is the text file that associates the rgb image with the depth, and the form is below:
1311877977.445420 rgb/1311877977.445420.png 1311877977.431871 depth/1311877977.431871.png
I've searched for the solutions online and try everything, but still in vain.
I really appreciate your guys' help, thanks in advance.
**Update:
The inputs that occurs exception are below, there are only three pairs:
[0.94783, -1.70307, 7.3738] [383.4, 121.828]
[0.170393, -0.170453, 1.3256] [379.817, 186.325]
[0.610124, -0.161545, 3.4604] [403.108, 223.949]
The OpenCV function cv::solvePnP makes checks internally if the input data you supplied actually makes sense and actually matches the documentation (assertion). In your case it fails to do so and therefore throws an error message:
terminate called after throwing an instance of 'cv::Exception'
what(): OpenCV(4.3.0) /home/wctu/opencv-4.3.0/modules/calib3d/src/solvepnp.cpp:754: error:
(-215:Assertion failed)
( (npoints >= 4) || (npoints == 3 && flags == SOLVEPNP_ITERATIVE && useExtrinsicGuess) ) &&
npoints == std::max(ipoints.checkVector(2, CV_32F), ipoints.checkVector(2, CV_64F)) in function 'solvePnPGeneric'
So dimensions of the inputs are not right or the files you are using are not appropriate. The error is given in terms of its input arguments. Therefore you will have to look for the corresponding documentation of cv::solvePnP.
bool cv::solvePnP(InputArray objectPoints,
InputArray imagePoints,
InputArray cameraMatrix,
InputArray distCoeffs,
OutputArray rvec,
OutputArray tvec,
bool useExtrinsicGuess = false,
int flags = SOLVEPNP_ITERATIVE
)
Comparing your input arguments to the ones given above you will see that you set useExtrinsicGuess to false and did not supply flags which defaults to SOLVEPNP_ITERATIVE. This already tells you that your error isn't caused by (npoints == 3 && flags == SOLVEPNP_ITERATIVE && useExtrinsicGuess) (as useExtrinsicGuess is set to false) but instead by (npoints >= 4).
Opening the corresponding source-code file on Github or in your source-code folder you will actually see that npoints is defined as
int npoints = std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F));
Now we have to figure out what checkVector does: See e.g. here It checks the channels and depth of the matrix and returns -1 if the requirement is not satisfied. Otherwise, it returns the number of elements in the matrix. Note that an element may have multiple channels..
This means your code is failing either because the supplied input format for the two data types is not correct or npoints is smaller than 4.
If you again look at the documentation it tells you that objectPoints expects Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel, where N is the number of points. vector<Point3d> can be also passed here. while imagePoints expects an Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, where N is the number of points.
This is clearly fulfilled by the input pts_3d and pts_2d that you pass as they are std::vector<Point3f> and std::vector<Point3f> respectively. This means the only logical reason is that pts_3d and/or pts2d have actually less than 3 entries which is too little for a unique solution. This means there are insufficient feature matches found in between the supplied images in the step before!. Check again your input files and potentially try with different ones.
Related
I'm using OpenCV4 on Ubuntu 20.04 LTS on WSL + XServer for GUI.
I want to create custom convlutional filter kernels and apply them to my image. this is the code I've written for it:
cv::Mat filter2D(cv::Mat input, cv::Mat filter)
{
using namespace cv;
Mat dst = input.clone();
//cout << " filter data successfully found. Rows:" << filter.rows << " cols:" << filter.cols << " channels:" << filter.channels() << "\n";
//cout << " input data successfully found. Rows:" << input.rows << " cols:" << input.cols << " channels:" << input.channels() << "\n";
for (int i = 0-(filter.rows/2);i<input.rows-(filter.rows/2);i++)
{
for (int j = 0-(filter.cols/2);j<input.cols-(filter.cols/2);j++)
{ //adding k and l to i and j will make up the difference and allow us to process the whole image
float filtertotal = 0;
for (int k = 0; k < filter.rows;k++)
{
for (int l = 0; l < filter.rows;l++)
{
if(i+k >= 0 && i+k < input.rows && j+l >= 0 && j+l < input.cols)
{ //don't try to process pixels off the edge of the map
float a = input.at<uchar>(i+k,j+l);
float b = filter.at<float>(k,l);
float product = a * b;
filtertotal += product;
}
}
}
//filter all proccessed for this pixel, write it to dst
dst.at<uchar>(i+(filter.rows/2),j+(filter.cols/2)) = filtertotal;
}
}
return dst;
}
int main(int argc, char** argv)
{
// Declare variables
cv::Mat_<float> src;
const char* window_name = "filter2D Demo";
// Loads an image
src = cv::imread("fapan.png", cv::IMREAD_GRAYSCALE ); // Load an image
if( src.empty() )
{
printf(" Error opening image\n");
return EXIT_FAILURE;
}
static float x[3][3] = {
{-1, -1, -1},
{-1, 8, -1},
{-1, -1, -1}
};
cv::Mat kernel(3,3, CV_16FC1, x);
// Apply filter
filter2D(src, kernel);
cv::imshow( window_name, src );
cv::waitKey(0);
return EXIT_SUCCESS;
}
the problem is that the output image is like this.
as you can see not only the edges are white, but also inside of it is white too.
the input image
The output you have posted for the input code is correct as you are applying a normal filter on a image .
It may cause a little blurring or sharpening in it but it will never cause it to completely detect edges.
In order to detect only the edges along the images you must apply Laplacian along a certain direction.
https://www.l3harrisgeospatial.com/docs/LaplacianFilters.html#:~:text=A%20Laplacian%20filter%20is%20an,an%20edge%20or%20continuous%20progression. ( A link with some info )
Which is the derivative of the image it will only detect the change .
I recommend you do this on matlab image processing toolbox .
I am using OpenCV 3.1 w/ contrib in C++. Trying to recreate the program outlined within the "Learning Image Processing with OpenCV pg 152-161. I copied the code line by line but am getting this resulting error.
//-Create a blender-//S10
Ptr<Blender> blender = Blender::createDefault(blend_type,false);
Size dst_sz = resultRoi(corners, sizes).size();
float blend_width = sqrt(static_cast<float>(dst_sz.area()))* blend_strength / 100.f;
if(blend_width < 1.f){
blender = Blender::createDefault(Blender::NO,false);
}
else if(blend_type == Blender::MULTI_BAND){
MultiBandBlender* mb = dynamic_cast<MultiBandBlender*>(blender.get());
mb->setNumBands(static_cast<int>(ceil(log(blend_width)/log(2.))-1.));
cout << "Multi-band blender, number of bands: " << mb->numBands() << endl;
}
else if(blend_type == Blender::FEATHER){
FeatherBlender* fb = dynamic_cast<FeatherBlender*>(blender.get());
fb->setSharpness(1.f/blend_width);
cout << "Feather blender, sharpness: " << fb->sharpness() << endl;
}
blender->prepare(corners,sizes);
//-Compositing step-//S11
cout << "Composting..." << endl;
t = getTickCount();
Mat img_warped, img_warped_s;
Mat dilated_mask, seam_mask, mask, mask_warped;
for(int img_idx = 0; img_idx < num_images; img_idx++){
cout << "Compositing image #" << indices[img_idx]+1 << endl;
//-Read image and resize it if necessary-//S11.1
full_img = imread(img_names[img_idx]);
if(abs(scale - 1)> 1e-1){
resize(full_img, img, Size(),scale,scale);
}
else{
img = full_img;
}
full_img.release();
Size img_size = img.size();
Mat K;
cameras[img_idx].K().convertTo(K, CV_32F);
//-Warp the current image-//S11.2
warper->warp(img,K,cameras[img_idx].R,INTER_LINEAR,BORDER_REFLECT,img_warped);
//Warp the current image mask
mask.create(img_size, CV_8U);
mask.setTo(Scalar::all(255));
warper->warp(mask,K,cameras[img_idx].R,INTER_NEAREST,BORDER_CONSTANT,mask_warped);
//-Compenstae exposure error step-//S11.3
compensator->apply(img_idx,corners[img_idx],img_warped,mask_warped);
img_warped.convertTo(img_warped, CV_16S);
img_warped.release();
img.release();
mask.release();
dilate(masks_warped[img_idx], dilated_mask, Mat());
resize(dilated_mask, seam_mask, mask_warped.size());
mask_warped = seam_mask & mask_warped;
//-Blending images step-//S11.4
blender->feed(img_warped_s,mask_warped,corners[img_idx]);
}
The issue is occurring on the final line saying:
error: (-215) img.type() == CV_16SC3 || img.type() == CV_8UC3 in
function feed
AS the blender::feed function tasks in an img, mask, and tl I assumed the issue was with the img img_warpedvariable and tried converting it to type CV_16SC3 and CV_8UC3 but didn't work. Any help is appreciated.
As stated in the comments I wasn't populating img_warped_s.
I have implemented Neural network using OpenCV ANN Library. I am newbie in this field and I learn everything about it online (Mostly StackOverflow).
I am using this ANN for detection of number plate. I did segmentation part using OpenCV image processing library and it is working good. It performs character segmentation and gives it to the NN part of the project. NN is going to recognize the number plate.
I have sample images of 20x30, therefore I have 600 neurons in input layer. As there are 36 possibilities (0-9,A-Z) I have 36 output neurons. I kept 100 neurons in hidden layer. The predict function of OpenCV is giving me the same output for every segmented image. That output is also showing some large negative(< -1). I have used cv::ml::ANN_MLP::SIGMOID_SYM as an activation function.
Please don't mind as there is lot of code wrongly commented (I am doing trial and error).
I need to find out what is the output of predict function. Thank you for your help.
#include <opencv2/opencv.hpp>
int inputLayerSize = 1;
int outputLayerSize = 1;
int numSamples = 2;
Mat layers = Mat(3, 1, CV_32S);
layers.row(0) =Scalar(600) ;
layers.row(1) = Scalar(20);
layers.row(2) = Scalar(36);
vector<int> layerSizes = { 600,100,36 };
Ptr<ml::ANN_MLP> nnPtr = ml::ANN_MLP::create();
vector <int> n;
//nnPtr->setLayerSizes(3);
nnPtr->setLayerSizes(layers);
nnPtr->setTrainMethod(ml::ANN_MLP::BACKPROP);
nnPtr->setTermCriteria(TermCriteria(cv::TermCriteria::COUNT | cv::TermCriteria::EPS, 1000, 0.00001f));
nnPtr->setActivationFunction(cv::ml::ANN_MLP::SIGMOID_SYM, 1, 1);
nnPtr->setBackpropWeightScale(0.5f);
nnPtr->setBackpropMomentumScale(0.5f);
/*CvANN_MLP_TrainParams params = CvANN_MLP_TrainParams(
// terminate the training after either 1000
// iterations or a very small change in the
// network wieghts below the specified value
cvTermCriteria(CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.000001),
// use backpropogation for training
CvANN_MLP_TrainParams::BACKPROP,
// co-efficents for backpropogation training
// (refer to manual)
0.1,
0.1);*/
/* Mat samples(Size(inputLayerSize, numSamples), CV_32F);
samples.at<float>(Point(0, 0)) = 0.1f;
samples.at<float>(Point(0, 1)) = 0.2f;
Mat responses(Size(outputLayerSize, numSamples), CV_32F);
responses.at<float>(Point(0, 0)) = 0.2f;
responses.at<float>(Point(0, 1)) = 0.4f;
*/
//reading chaos image
// we will read the classification numbers into this variable as though it is a vector
// close the traning images file
/*vector<int> layerInfo;
layerInfo=nnPtr->get;
for (int i = 0; i < layerInfo.size(); i++) {
cout << "size of 0" <<layerInfo[i] << endl;
}*/
cv::imshow("chaos", matTrainingImagesAsFlattenedFloats);
// cout <<abc << endl;
matTrainingImagesAsFlattenedFloats.convertTo(matTrainingImagesAsFlattenedFloats, CV_32F);
//matClassificationInts.reshape(1, 496);
matClassificationInts.convertTo(matClassificationInts, CV_32F);
matSamples.convertTo(matSamples, CV_32F);
std::cout << matClassificationInts.rows << " " << matClassificationInts.cols << " ";
std::cout << matTrainingImagesAsFlattenedFloats.rows << " " << matTrainingImagesAsFlattenedFloats.cols << " ";
std::cout << matSamples.rows << " " << matSamples.cols;
imshow("Samples", matSamples);
imshow("chaos", matTrainingImagesAsFlattenedFloats);
Ptr<ml::TrainData> trainData = ml::TrainData::create(matTrainingImagesAsFlattenedFloats, ml::SampleTypes::ROW_SAMPLE, matSamples);
nnPtr->train(trainData);
bool m = nnPtr->isTrained();
if (m)
std::cout << "training complete\n\n";
// cv::Mat matCurrentChar = Mat(cv::Size(matTrainingImagesAsFlattenedFloats.cols, matTrainingImagesAsFlattenedFloats.rows), CV_32F);
// cout << "samples:\n" << samples << endl;
//cout << "\nresponses:\n" << responses << endl;
/* if (!nnPtr->train(trainData))
return 1;*/
/* cout << "\nweights[0]:\n" << nnPtr->getWeights(0) << endl;
cout << "\nweights[1]:\n" << nnPtr->getWeights(1) << endl;
cout << "\nweights[2]:\n" << nnPtr->getWeights(2) << endl;
cout << "\nweights[3]:\n" << nnPtr->getWeights(3) << endl;*/
//predicting
std::vector <cv::String> filename;
cv::String folder = "./plate/";
cv::glob(folder, filename);
if (filename.empty()) { // if unable to open image
std::cout << "error: image not read from file\n\n"; // show error message on command line
return(0); // and exit program
}
String strFinalString;
for (int i = 0; i < filename.size(); i++) {
cv::Mat matTestingNumbers = cv::imread(filename[i]);
cv::Mat matGrayscale; //
cv::Mat matBlurred; // declare more image variables
cv::Mat matThresh; //
cv::Mat matThreshCopy;
cv::Mat matCanny;
//
cv::cvtColor(matTestingNumbers, matGrayscale, CV_BGR2GRAY); // convert to grayscale
matThresh = cv::Mat(cv::Size(matGrayscale.cols, matGrayscale.rows), CV_8UC1);
for (int i = 0; i < matGrayscale.cols; i++) {
for (int j = 0; j < matGrayscale.rows; j++) {
if (matGrayscale.at<uchar>(j, i) <= 130) {
matThresh.at<uchar>(j, i) = 255;
}
else {
matThresh.at<uchar>(j, i) = 0;
}
}
}
// blur
cv::GaussianBlur(matThresh, // input image
matBlurred, // output image
cv::Size(5, 5), // smoothing window width and height in pixels
0); // sigma value, determines how much the image will be blurred, zero makes function choose the sigma value
// filter image from grayscale to black and white
/* cv::adaptiveThreshold(matBlurred, // input image
matThresh, // output image
255, // make pixels that pass the threshold full white
cv::ADAPTIVE_THRESH_GAUSSIAN_C, // use gaussian rather than mean, seems to give better results
cv::THRESH_BINARY_INV, // invert so foreground will be white, background will be black
11, // size of a pixel neighborhood used to calculate threshold value
2); */ // constant subtracted from the mean or weighted mean
// cv::imshow("thresh" + std::to_string(i), matThresh);
matThreshCopy = matThresh.clone();
std::vector<std::vector<cv::Point> > ptContours; // declare a vector for the contours
std::vector<cv::Vec4i> v4iHierarchy;// make a copy of the thresh image, this in necessary b/c findContours modifies the image
cv::Canny(matBlurred, matCanny, 20, 40, 3);
/*std::vector<std::vector<cv::Point> > ptContours; // declare a vector for the contours
std::vector<cv::Vec4i> v4iHierarchy; // declare a vector for the hierarchy (we won't use this in this program but this may be helpful for reference)
cv::findContours(matThreshCopy, // input image, make sure to use a copy since the function will modify this image in the course of finding contours
ptContours, // output contours
v4iHierarchy, // output hierarchy
cv::RETR_EXTERNAL, // retrieve the outermost contours only
cv::CHAIN_APPROX_SIMPLE); // compress horizontal, vertical, and diagonal segments and leave only their end points
/*std::vector<std::vector<cv::Point> > contours_poly(ptContours.size());
std::vector<cv::Rect> boundRect(ptContours.size());
for (int i = 0; i < ptContours.size(); i++)
{
approxPolyDP(cv::Mat(ptContours[i]), contours_poly[i], 3, true);
boundRect[i] = cv::boundingRect(cv::Mat(contours_poly[i]));
}*/
/*for (int i = 0; i < ptContours.size(); i++) { // for each contour
ContourWithData contourWithData; // instantiate a contour with data object
contourWithData.ptContour = ptContours[i]; // assign contour to contour with data
contourWithData.boundingRect = cv::boundingRect(contourWithData.ptContour); // get the bounding rect
contourWithData.fltArea = cv::contourArea(contourWithData.ptContour); // calculate the contour area
allContoursWithData.push_back(contourWithData); // add contour with data object to list of all contours with data
}
for (int i = 0; i < allContoursWithData.size(); i++) { // for all contours
if (allContoursWithData[i].checkIfContourIsValid()) { // check if valid
validContoursWithData.push_back(allContoursWithData[i]); // if so, append to valid contour list
}
}
//sort contours from left to right
std::sort(validContoursWithData.begin(), validContoursWithData.end(), ContourWithData::sortByBoundingRectXPosition);
// std::string strFinalString; // declare final string, this will have the final number sequence by the end of the program
*/
/*for (int i = 0; i < validContoursWithData.size(); i++) { // for each contour
// draw a green rect around the current char
cv::rectangle(matTestingNumbers, // draw rectangle on original image
validContoursWithData[i].boundingRect, // rect to draw
cv::Scalar(0, 255, 0), // green
2); // thickness
cv::Mat matROI = matThresh(validContoursWithData[i].boundingRect); // get ROI image of bounding rect
cv::Mat matROIResized;
cv::resize(matROI, matROIResized, cv::Size(RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT)); // resize image, this will be more consistent for recognition and storage
*/
cv::Mat matROIFloat;
cv::resize(matThresh, matThresh, cv::Size(RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT));
matThresh.convertTo(matROIFloat, CV_32FC1, 1.0 / 255.0); // convert Mat to float, necessary for call to find_nearest
cv::Mat matROIFlattenedFloat = matROIFloat.reshape(1, 1);
cv::Point maxLoc = { 0,0 };
cv::Point minLoc;
cv::Mat output = cv::Mat(cv::Size(36, 1), CV_32F);
vector<float>output2;
// cv::Mat output2 = cv::Mat(cv::Size(36, 1), CV_32F);
nnPtr->predict(matROIFlattenedFloat, output2);
// float max = output.at<float>(0, 0);
int fo = 0;
float m = output2[0];
imshow("predicted input", matROIFlattenedFloat);
// float b = output.at<float>(0, 0);
// cout <<"\n output0,0:"<<b<<endl;
// minMaxLoc(output, 0, 0, &minLoc, &maxLoc, Mat());
// cout << "\noutput:\n" << maxLoc.x << endl;
for (int j = 1; j < 36; j++) {
float value =output2[j];
if (value > m) {
m = value;
fo = j;
}
}
float * p = 0;
p = &m;
cout << "j value in output " << fo << " Max value " << p << endl;
//imshow("output image" + to_string(i), output);
// cout << "\noutput:\n" << minLoc.x << endl;
//float fltCurrentChar = (float)maxLoc.x;
output.release();
m = 0;
fo = 0;
}
// strFinalString = strFinalString + char(int(fltCurrentChar)); // append current char to full string
// cv::imshow("Predict output", output);
/*cv::Point maxLoc = {0,0};
Mat output=Mat (cv::Size(matSamples.cols,matSamples.rows),CV_32F);
nnPtr->predict(matTrainingImagesAsFlattenedFloats, output);
minMaxLoc(output, 0, 0, 0, &maxLoc, 0);
cout << "\noutput:\n" << maxLoc.x << endl;*/
// getchar();
/*for (int i = 0; i < 10;i++) {
for (int j = 0; j < 36; j++) {
if (matCurrentChar.at<float>(i, j) >= 0.6) {
cout << " "<<j<<" ";
}
}
}*/
waitKey(0);
return(0);
}
void gen() {
std::string dir, filepath;
int num, imgArea, minArea;
int pos = 0;
bool f = true;
struct stat filestat;
cv::Mat imgTrainingNumbers;
cv::Mat imgGrayscale;
cv::Mat imgBlurred;
cv::Mat imgThresh;
cv::Mat imgThreshCopy;
cv::Mat matROIResized=cv::Mat (cv::Size(RESIZED_IMAGE_WIDTH,RESIZED_IMAGE_HEIGHT),CV_8UC1);
cv::Mat matROI;
std::vector <cv::String> filename;
std::vector<std::vector<cv::Point> > ptContours;
std::vector<cv::Vec4i> v4iHierarchy;
int count = 0, contoursCount = 0;
matSamples = cv::Mat(cv::Size(36, 496), CV_32FC1);
matTrainingImagesAsFlattenedFloats = cv::Mat(cv::Size(600, 496), CV_32FC1);
for (int j = 0; j <= 35; j++) {
int tmp = j;
cv::String folder = "./Training Data/" + std::to_string(tmp);
cv::glob(folder, filename);
for (int k = 0; k < filename.size(); k++) {
count++;
// If the file is a directory (or is in some way invalid) we'll skip it
// if (stat(filepath.c_str(), &filestat)) continue;
//if (S_ISDIR(filestat.st_mode)) continue;
imgTrainingNumbers = cv::imread(filename[k]);
imgArea = imgTrainingNumbers.cols*imgTrainingNumbers.rows;
// read in training numbers image
minArea = imgArea * 50 / 100;
if (imgTrainingNumbers.empty()) {
std::cout << "error: image not read from file\n\n";
//return(0);
}
cv::cvtColor(imgTrainingNumbers, imgGrayscale, CV_BGR2GRAY);
//cv::equalizeHist(imgGrayscale, imgGrayscale);
imgThresh = cv::Mat(cv::Size(imgGrayscale.cols, imgGrayscale.rows), CV_8UC1);
/*cv::adaptiveThreshold(imgGrayscale,
imgThresh,
255,
cv::ADAPTIVE_THRESH_GAUSSIAN_C,
cv::THRESH_BINARY_INV,
3,
0);
*/
for (int i = 0; i < imgGrayscale.cols; i++) {
for (int j = 0; j < imgGrayscale.rows; j++) {
if (imgGrayscale.at<uchar>(j, i) <= 130) {
imgThresh.at<uchar>(j, i) = 255;
}
else {
imgThresh.at<uchar>(j, i) = 0;
}
}
}
// cv::imshow("imgThresh"+std::to_string(count), imgThresh);
imgThreshCopy = imgThresh.clone();
cv::GaussianBlur(imgThreshCopy,
imgBlurred,
cv::Size(5, 5),
0);
cv::Mat imgCanny;
// cv::Canny(imgBlurred,imgCanny,20,40,3);
cv::findContours(imgBlurred,
ptContours,
v4iHierarchy,
cv::RETR_EXTERNAL,
cv::CHAIN_APPROX_SIMPLE);
for (int i = 0; i < ptContours.size(); i++) {
if (cv::contourArea(ptContours[i]) > MIN_CONTOUR_AREA) {
contoursCount++;
cv::Rect boundingRect = cv::boundingRect(ptContours[i]);
cv::rectangle(imgTrainingNumbers, boundingRect, cv::Scalar(0, 0, 255), 2); // draw red rectangle around each contour as we ask user for input
matROI = imgThreshCopy(boundingRect); // get ROI image of bounding rect
std::string path = "./" + std::to_string(contoursCount) + ".JPG";
cv::imwrite(path, matROI);
// cv::imshow("matROI" + std::to_string(count), matROI);
cv::resize(matROI, matROIResized, cv::Size(RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT)); // resize image, this will be more consistent for recognition and storage
std::cout << filename[k] << " " << contoursCount << "\n";
//cv::imshow("matROI", matROI);
//cv::imshow("matROIResized"+std::to_string(count), matROIResized);
// cv::imshow("imgTrainingNumbers" + std::to_string(contoursCount), imgTrainingNumbers);
int intChar;
if (j<10)
intChar = j + 48;
else {
intChar = j + 55;
}
/*if (intChar == 27) { // if esc key was pressed
return(0); // exit program
}*/
// if (std::find(intValidChars.begin(), intValidChars.end(), intChar) != intValidChars.end()) { // else if the char is in the list of chars we are looking for . . .
// append classification char to integer list of chars
cv::Mat matImageFloat;
matROIResized.convertTo(matImageFloat,CV_32FC1);// now add the training image (some conversion is necessary first) . . .
//matROIResized.convertTo(matImageFloat, CV_32FC1); // convert Mat to float
cv::Mat matImageFlattenedFloat = matImageFloat.reshape(1, 1);
//matTrainingImagesAsFlattenedFloats.push_back(matImageFlattenedFloat);// flatten
try {
//matTrainingImagesAsFlattenedFloats.push_back(matImageFlattenedFloat);
std::cout << matTrainingImagesAsFlattenedFloats.rows << " " << matTrainingImagesAsFlattenedFloats.cols;
//unsigned char* re;
int ii = 0; // Current column in training_mat
for (int i = 0; i<matImageFloat.rows; i++) {
for (int j = 0; j < matImageFloat.cols; j++) {
matTrainingImagesAsFlattenedFloats.at<float>(contoursCount-1, ii++) = matImageFloat.at<float>(i,j);
}
}
}
catch (std::exception &exc) {
f = false;
exc.what();
}
if (f) {
matClassificationInts.push_back((float)intChar);
matSamples.at<float>(contoursCount-1, j) = 1.0;
}
f = true;
// add to Mat as though it was a vector, this is necessary due to the
// data types that KNearest.train accepts
} // end if
//} // end if
} // end for
}//end i
}//end j
}
Output of predict function
Unfortunately, I don't have the necessary time to really review the code, but I can say off the top that to train a model that performs well for prediction with 36 classes, you will need several things:
A large number of good quality images. Ideally, you'd want thousands of images for each class. Of course, you can see somewhat decent results with less than that, but if you only have a few images per class, it's never going to be able to generalize adequately.
You need a model that is large and sophisticated enough to provide the necessary expressiveness to solve the problem. For a problem like this, a plain old multi-layer perceptron with one hidden layer with 100 units may not be enough. This is actually a problem that would benefit from using a Convolutional Neural Net (CNN) with a couple layers just to extract useful features first. But assuming you don't want to go down that path, you may at least want to tweak the size of your hidden layer.
To even get to a point where the training process converges, you will probably need to experiment and crucially, you need an effective way to test the accuracy of the ANN after each experiment. Ideally, you want to observe the loss as the training is proceeding, but I'm not sure whether that's possible using OpenCV's ML functionality. At a minimum, you should fully expect to have to play around with the various so-called "hyper-parameters" and run many experiments before you have a reasonable model.
Anyway, the most important thing is to make sure you have a solid mechanism for validating the accuracy of the model after training. If you aren't already doing so, set aside some images as a separate test set, and after each experiment, use the trained ANN to predict each test image to see the accuracy.
One final general note: what you're trying to do is complex. You will save yourself a huge number of headaches if you take the time early and often to refactor your code. No matter how many experiments you run, if there's some defect causing (for example) your training data to be fundamentally different in some way than your test data, you will never see good results.
Good luck!
EDIT: I should also point out that seeing the same result for every input image is a classic sign that training failed. Unfortunately, there are many reasons why that might happen and it will be very difficult for anyone to isolate that for you without some cleaner code and access to your image data.
I have solved the issue of not getting the output of predict. The issue was created because of the input Mat image to train (ie. matTrainingImagesAsFlattenedFloats) was having values 255.0 for a white pixel. This happened because I haven't use convertTo() properly. You need to use convertTo(OutputImage name, CV_32FC1, 1.0 / 255.0); like this which will convert all the pixel values with 255.0 to 1.0 and after that I am getting the correct output.
Thank you for all the help.
This is too broad to be in one question. Sorry for the bad news. I tried this over and over and couldn't find a solution. I recommend that you implement a simple AND, OR or XOR first just to make sure that the learning part is working and that you are getting better results the more passes you do. Also I suggest to try the Tangent Hyperbolic as a Transfer Function instead of Sigmoid. And Good luck!
Here is some of my own posts that might help you:
Exact results as yours: HERE
Some codes: HERE
I don't want to say that, but several professors I met said Backpropagation just doesn't work and they had (and me have) to implement my own method of teaching the network.
I have the following code. I read from the image a block of pixels and I would like to get the value from every block (array 16*16).
However, I get this error:
OpenCV Error: Assertion failed (dims <= 2 && data && (unsigned)i0 < (unsigned)size.p[0] && (unsigned)(i1*DataType<_Tp>::channels) < (unsigned)(size.p[1]*channels()) && ((((sizeof(size_t)<<28)|0x8442211)
((DataType<_Tp>::depth) & ((1 << 3) - 1))*4) & 15) == elemSize1()) in unknown function, file C:\opencv231\build\include\opencv2/core/mat.hpp, line 537
What should I change so that I can run my code ?
enum Color {White, Black};
Color checkBlock(Mat& img, int& i, int& j, double& T)
{
unsigned int Sum=0;
for(int k=0;k<16;k++)
for(int l=0;l<16;l++)
Sum += img.at<unsigned char>(i+k,j+l);
double Average = Sum/256;
std::cout << Average << std::endl;
return (Average > T) ? (White) : (Black);
}
void main()
{
Mat img = imread("Frame.jpg",0);
namedWindow( "Display window", CV_NORMAL );// Create a window for display.
if(!img.data)
std::cout << "error";
// STEPS TO CONVERT TO BINARY IMAGE
// LOAD THE IMAGE
cv::Mat imageMat = cv::imread("Frame.jpg", CV_LOAD_IMAGE_COLOR);
cv::Mat grayscaleMat (imageMat.size(), CV_8U);
//Convert BGR to Gray
cv::cvtColor(imageMat, grayscaleMat, CV_BGR2GRAY );
//Binary image
cv::Mat binaryMat(grayscaleMat.size(), grayscaleMat.type());
//Apply thresholding
cv::threshold(grayscaleMat, binaryMat, 100, 255, cv::THRESH_BINARY);
//Show the results
// cv::namedWindow("Output",CV_NORMAL);
//cv::imshow("Output", binaryMat);
// cv::waitKey(0);
double minVal, maxVal;
minMaxLoc(img,&minVal,&maxVal,NULL,NULL);
double Threshold = 0.5 * (minVal + maxVal);
int i=4,j=4;
Size s = img.size();
Color old_c, new_c;
// define the position wher i will begin to read the first row from the image
for (j=16*55;j<=s.height;j=j+16)
for(i=0;i<=s.width;i=i+16)
{
Point x(i,j);
Point y(i+16,j+16);
//std::cout << x << " " << y << std::endl;
rectangle(img, x, y, Scalar(255,0,0),3);
Color c = checkBlock(img,i,j,Threshold);
}
In this line you are using i to index a row:
Sum += img.at<unsigned char>(i+k,j+l);
But here, where i comes from, it is clearly the index of a col.
for(i=0;i<=s.width;i=i+16)
So that first line should be:
Sum += img.at<unsigned char>(j+l, i+k);
Just to be clear, the arguments to at are (row, col), the paramters for Point are (x,y), which is a bit of a trap.
Also
for (j=16*55;j<=s.height;j=j+16)
for(i=0;i<=s.width;i=i+16)
...
Point y(i+16,j+16);
should be
for (j = 16 * 55; j < s.height - 15 ; j = j + 16)
for(i = 0; i < s.width - 15; i = i + 16)
...
Point y(i + 15, j + 15);
The imread() must include the full path of your file, and be sure to use double dashes, e.g.
cv::Mat imageMat = cv::imread(C:\\Folder\\Frame.jpg", CV_LOAD_IMAGE_COLOR);
It is not necessary to specify the dimension or type of grayscaleMat or binaryMat, as the opencv functions will prepare them for you.
In your cvtColor function, it should be CV_RGB2GRAY not CV_BGR2GRAY.
In you threshold function, it should not be cv::THRESH_BINARY but rather CV_THRESH_BINARY
Hope this helps.
I did look for the source code to Filter2D but could not find it. Neither could Visual c++.
Are there any experts on the filter2D algorithm here? I know how it's supposed to work but not how it actually works. I made my own filter2d() function to test things, and the results are substantially different from opencvs filter2D(). Here's my code:
Mat myfilter2d(Mat input, Mat filter){
Mat dst = input.clone();
cout << " filter data successfully found. Rows:" << filter.rows << " cols:" << filter.cols << " channels:" << filter.channels() << "\n";
cout << " input data successfully found. Rows:" << input.rows << " cols:" << input.cols << " channels:" << input.channels() << "\n";
for (int i = 0-(filter.rows/2);i<input.rows-(filter.rows/2);i++){
for (int j = 0-(filter.cols/2);j<input.cols-(filter.cols/2);j++){ //adding k and l to i and j will make up the difference and allow us to process the whole image
float filtertotal = 0;
for (int k = 0; k < filter.rows;k++){
for (int l = 0; l < filter.rows;l++){
if(i+k >= 0 && i+k < input.rows && j+l >= 0 && j+l < input.cols){ //don't try to process pixels off the endge of the map
float a = input.at<uchar>(i+k,j+l);
float b = filter.at<float>(k,l);
float product = a * b;
filtertotal += product;
}
}
}
//filter all proccessed for this pixel, write it to dst
st.at<uchar>(i+(filter.rows/2),j+(filter.cols/2)) = filtertotal;
}
}
return dst;
}
Anybody see anything wrong with my implementation? (besides being slow)
Here is my execution:
cvtColor(src,src_grey,CV_BGR2GRAY);
Mat dst = myfilter2d(src_grey,filter);
imshow("myfilter2d",dst);
filter2D(src_grey,dst2,-1,filter);
imshow("filter2d",dst2);
Here is my kernel:
float megapixelarray[basesize][basesize] = {
{1,1,-1,1,1},
{1,1,-1,1,1},
{1,1,1,1,1},
{1,1,-1,1,1},
{1,1,-1,1,1}
};
And here are the (substantially different) results:
Thoughts, anyone?
EDIT: Thanks to Brians answer I added this code:
//normalize the kernel so its sum = 1
Scalar mysum = sum(dst);
dst = dst / mysum[0]; //make sure its not 0
dst = dst * -1; //show negetive
and filter2d worked better. Certain filters give an exact match, and other filters, like the Sobel, fail miserably.
I'm getting close to the actual algorithm, but not there yet. Anyone else with any ideas?
I think the issue is probably one of scale: if your input image is an 8-bit image, most of the time the convolution will produce a value that overflows the maximum value 255.
In your implementation it looks like you are getting the wrapped-around value, but most OpenCV functions handle overflow by capping to the maximum (or minimum) value. That explains why most of the output of OpenCV's function is white, and also why you are getting concentric shapes in your output too.
To account for this, normalize your megapixelarray filter by dividing every value by the entire sum of the filter (i.e. make sure that the sum of the filter values is 1):
For example, instead of this filter (sum = 10):
1 1 1
1 2 1
1 1 1
Try this filter (sum = 1):
0.1 0.1 0.1
0.1 0.2 0.1
0.1 0.1 0.1
Here is my solution for creating the filter2D manually:
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
using namespace cv;
using namespace std;
int main(int argc, const char * argv[]) {
Mat img;
Mat img_conv;
Mat my_kernel;
Mat my_conv;
// Controlling if the image is loaded correctly
img = imread("my_image.jpg",CV_LOAD_IMAGE_COLOR);
if(! img.data )
{
cout << "Could not open or find the image" << std::endl ;
return -1;
}
imshow("original image", img);
img.convertTo(img, CV_64FC3);
int kernel_size; // permitted sizes: 3, 5, 7, 9 etc
cout << "Select the size of kernel (it should be an odd number from 3 onwards): \n" << endl;
cin >> kernel_size;
// Defining the kernel here
int selection;
cout << "Select the type of kernel:\n" << "1. Identity Operator \n2. Mean Filter \n3. Spatial shift \n4. Sharpening\n-> ";
cin >> selection;
switch (selection){
case 1:
my_kernel = (Mat_<double>(kernel_size,kernel_size) << 0, 0, 0, 0, 1, 0, 0, 0, 0);
break;
case 2:
my_kernel = (Mat_<double>(kernel_size,kernel_size) << 1, 1, 1, 1, 1, 1, 1, 1, 1) / ( kernel_size * kernel_size);
break;
case 3:
my_kernel = (Mat_<double>(kernel_size,kernel_size) << 0, 0, 0, 0, 0, 1, 0, 0, 0);
break;
case 4:
my_kernel = (Mat_<double>(kernel_size,kernel_size) << -1, -1, -1, -1, 17, -1, -1, -1, -1) / ( kernel_size * kernel_size);
break;
default:
cerr << "Invalid selection";
return 1;
break;
}
cout << "my kernel:\n "<<my_kernel << endl;
// Adding the countour of nulls around the original image, to avoid border problems during convolution
img_conv = Mat::Mat(img.rows + my_kernel.rows - 1, img.cols + my_kernel.cols - 1, CV_64FC3, CV_RGB(0,0,0));
for (int x=0; x<img.rows; x++) {
for (int y=0; y<img.cols; y++) {
img_conv.at<Vec3d>(x+1,y+1)[0] = img.at<Vec3d>(x,y)[0];
img_conv.at<Vec3d>(x+1,y+1)[1] = img.at<Vec3d>(x,y)[1];
img_conv.at<Vec3d>(x+1,y+1)[2] = img.at<Vec3d>(x,y)[2];
}
}
//Performing the convolution
my_conv = Mat::Mat(img.rows, img.cols, CV_64FC3, CV_RGB(0,0,0));
for (int x=(my_kernel.rows-1)/2; x<img_conv.rows-((my_kernel.rows-1)/2); x++) {
for (int y=(my_kernel.cols-1)/2; y<img_conv.cols-((my_kernel.cols-1)/2); y++) {
double comp_1=0;
double comp_2=0;
double comp_3=0;
for (int u=-(my_kernel.rows-1)/2; u<=(my_kernel.rows-1)/2; u++) {
for (int v=-(my_kernel.cols-1)/2; v<=(my_kernel.cols-1)/2; v++) {
comp_1 = comp_1 + ( img_conv.at<Vec3d>(x+u,y+v)[0] * my_kernel.at<double>(u + ((my_kernel.rows-1)/2) ,v + ((my_kernel.cols-1)/2)));
comp_2 = comp_2 + ( img_conv.at<Vec3d>(x+u,y+v)[1] * my_kernel.at<double>(u + ((my_kernel.rows-1)/2),v + ((my_kernel.cols-1)/2)));
comp_3 = comp_3 + ( img_conv.at<Vec3d>(x+u,y+v)[2] * my_kernel.at<double>(u + ((my_kernel.rows-1)/2),v + ((my_kernel.cols-1)/2)));
}
}
my_conv.at<Vec3d>(x-((my_kernel.rows-1)/2),y-(my_kernel.cols-1)/2)[0] = comp_1;
my_conv.at<Vec3d>(x-((my_kernel.rows-1)/2),y-(my_kernel.cols-1)/2)[1] = comp_2;
my_conv.at<Vec3d>(x-((my_kernel.rows-1)/2),y-(my_kernel.cols-1)/2)[2] = comp_3;
}
}
my_conv.convertTo(my_conv, CV_8UC3);
imshow("convolution - manual", my_conv);
// Performing the filtering using the opencv funtions
Mat dst;
filter2D(img, dst, -1 , my_kernel, Point( -1, -1 ), 0, BORDER_DEFAULT );
dst.convertTo(dst, CV_8UC3);
imshow("convlution - opencv", dst);
waitKey();
return 0;
}