How to find robot TCP in image with CalibrateHandEye

How to find robot TCP in image with CalibrateHandEye - c++

I'm currently developing a programm for a robot with a camera attached to the end affector.
The goal is to calculate where the robots TCP appears in the camera frame. I'm using opencv in c++.
The robot is a UR5 from Universal Robots.
My plan:
collect multiple (8) data-sets:
robot pose (XYZ in meters, directly from the robot controller)
robot rotation (rx ry rz in radians, directly from the robot controller)
take a picture of calibration pattern for each step
run calibrateCamera over the set of pictures to get tvec and rvec for every step
run calibrateHandEye
for t_gripper2base i use the robot pose
for R_gripper2base i use the robot rotation
for t_target2cam i use tvec from calibrateCamera
for R_target2cam i use rvec from calibrateCamera
I seem to get correct values (I measured the distance from cam to TCP and the t_cam2gripper seems to be correct.
Translation vector target to cam:
[-0.0001052803107026547;
-0.0780872727019615;
-0.1243323507069755]
Rotation matrix target to cam:
[0.9999922523048892, 0.002655868335207422, -0.002905459271957709;
-0.001229768871633805, 0.9119334002787367, 0.4103363999508009;
0.003739384804660116, -0.4103296477461107, 0.9119296010009958]
The formula to transform the point from TCP coordinates to the image should look like this:
(u,v,w) = C * T * (X,Y,Z,1)
But after the division by w my values are still way off (should be around (600,600)
Actual image position vector homogenized:
[1778.542462313536;
-1626.72483032188;
1]
#include <QCoreApplication>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
int main(int argc, char *argv[])
{
QCoreApplication a(argc, argv);
Mat defaultCameraMatrix = (Mat_<double>(3, 3));
Mat defaultDistortion = (Mat_<double>(1, 5));
defaultCameraMatrix.at<double>(0, 0) = 1739.3749; // default values from previous intrinsic camera calibration
defaultCameraMatrix.at<double>(0, 1) = 0;
defaultCameraMatrix.at<double>(0, 2) = 639.5;
defaultCameraMatrix.at<double>(1, 0) = 0;
defaultCameraMatrix.at<double>(1, 1) = 1739.3749;
defaultCameraMatrix.at<double>(1, 2) = 479.5;
defaultCameraMatrix.at<double>(2, 0) = 0;
defaultCameraMatrix.at<double>(2, 1) = 0;
defaultCameraMatrix.at<double>(2, 2) = 1;
defaultDistortion.at<double>(0, 0) = -0.165909;
defaultDistortion.at<double>(0, 1) = 0.303675;
defaultDistortion.at<double>(0, 2) = 0.0;
defaultDistortion.at<double>(0, 3) = 0.0;
defaultDistortion.at<double>(0, 4) = 0.0;
vector<Mat> R_gripper2base, t_gripper2base, R_target2cam, t_target2cam;
Mat actualRobotPos1 = (Mat_<double>(3, 1)),
actualRobotPos2 = (Mat_<double>(3, 1)),
actualRobotPos3 = (Mat_<double>(3, 1)),
actualRobotPos4 = (Mat_<double>(3, 1)),
actualRobotPos5 = (Mat_<double>(3, 1)),
actualRobotPos6 = (Mat_<double>(3, 1)),
actualRobotPos7 = (Mat_<double>(3, 1)),
actualRobotPos8 = (Mat_<double>(3, 1));
actualRobotPos1.at<double>(0,0) = -0.193139;
actualRobotPos1.at<double>(1,0) = 0.463823;
actualRobotPos1.at<double>(2,0) = -0.025;
t_gripper2base.push_back(actualRobotPos1);
actualRobotPos2.at<double>(0,0) = -0.193139;
actualRobotPos2.at<double>(1,0) = 0.463823;
actualRobotPos2.at<double>(2,0) = -0.025;
t_gripper2base.push_back(actualRobotPos2);
actualRobotPos3.at<double>(0,0) = -0.21273;
actualRobotPos3.at<double>(1,0) = 0.4426;
actualRobotPos3.at<double>(2,0) = -0.0288;
t_gripper2base.push_back(actualRobotPos3);
actualRobotPos4.at<double>(0,0) = -0.17213;
actualRobotPos4.at<double>(1,0) = 0.4103;
actualRobotPos4.at<double>(2,0) = 0.014;
t_gripper2base.push_back(actualRobotPos4);
actualRobotPos5.at<double>(0,0) = -0.13724;
actualRobotPos5.at<double>(1,0) = 0.45;
actualRobotPos5.at<double>(2,0) = 0.02978;
t_gripper2base.push_back(actualRobotPos5);
actualRobotPos6.at<double>(0,0) = -0.1655;
actualRobotPos6.at<double>(1,0) = 0.478;
actualRobotPos6.at<double>(2,0) = -0.0211;
t_gripper2base.push_back(actualRobotPos6);
actualRobotPos7.at<double>(0,0) = -0.17018;
actualRobotPos7.at<double>(1,0) = 0.46458;
actualRobotPos7.at<double>(2,0) = -0.03761;
t_gripper2base.push_back(actualRobotPos7);
actualRobotPos8.at<double>(0,0) = -0.193139;
actualRobotPos8.at<double>(1,0) = 0.463823;
actualRobotPos8.at<double>(2,0) = 0.025;
t_gripper2base.push_back(actualRobotPos8);
Mat actualRobotRotVec1 = (Mat_<double>(3, 1)),
actualRobotRotVec2 = (Mat_<double>(3, 1)),
actualRobotRotVec3 = (Mat_<double>(3, 1)),
actualRobotRotVec4 = (Mat_<double>(3, 1)),
actualRobotRotVec5 = (Mat_<double>(3, 1)),
actualRobotRotVec6 = (Mat_<double>(3, 1)),
actualRobotRotVec7 = (Mat_<double>(3, 1)),
actualRobotRotVec8 = (Mat_<double>(3, 1));
actualRobotRotVec1.at<double>(0,0) = -3.14159;
actualRobotRotVec1.at<double>(1,0) = 0.00;
actualRobotRotVec1.at<double>(2,0) = 0.00719124;
R_gripper2base.push_back(actualRobotRotVec1);
actualRobotRotVec2.at<double>(0,0) = -2.06;
actualRobotRotVec2.at<double>(1,0) = -2.36;
actualRobotRotVec2.at<double>(2,0) = 0.03;
R_gripper2base.push_back(actualRobotRotVec2);
actualRobotRotVec3.at<double>(0,0) = 2.39;
actualRobotRotVec3.at<double>(1,0) = 1.86;
actualRobotRotVec3.at<double>(2,0) = 0.49;
R_gripper2base.push_back(actualRobotRotVec3);
actualRobotRotVec4.at<double>(0,0) = -2.66;
actualRobotRotVec4.at<double>(1,0) = 0.08;
actualRobotRotVec4.at<double>(2,0) = 0.09;
R_gripper2base.push_back(actualRobotRotVec4);
actualRobotRotVec5.at<double>(0,0) = -2.84;
actualRobotRotVec5.at<double>(1,0) = 0.19;
actualRobotRotVec5.at<double>(2,0) = 0.69;
R_gripper2base.push_back(actualRobotRotVec5);
actualRobotRotVec6.at<double>(0,0) = 2.1;
actualRobotRotVec6.at<double>(1,0) = -2.34;
actualRobotRotVec6.at<double>(2,0) = -0.02;
R_gripper2base.push_back(actualRobotRotVec6);
actualRobotRotVec7.at<double>(0,0) = 1.66;
actualRobotRotVec7.at<double>(1,0) = -2.53;
actualRobotRotVec7.at<double>(2,0) = -0.23;
R_gripper2base.push_back(actualRobotRotVec7);
actualRobotRotVec8.at<double>(0,0) = -3.14159;
actualRobotRotVec8.at<double>(1,0) = 0.00;
actualRobotRotVec8.at<double>(2,0) = 0.00719124;
R_gripper2base.push_back(actualRobotRotVec8);
// for(int i = 0; i < t_gripper2base.size(); i++)
// {
// cout << t_gripper2base[i] << endl << endl;
// }
// for(int i = 0; i < R_gripper2base.size(); i++)
// {
// cout << R_gripper2base[i] << endl << endl;
// }
vector<String> fileNames;
glob("PATH*.png", fileNames, false); // directory of images
vector<vector<Point2f>> corners(fileNames.size());
Mat chessboardImg, chessboardImgGray;
vector<Point3f> objp;
vector<vector<Point3f>> worldCoordinates;
int checkerBoard[2] = {9,6};
double fieldSize = 0.008;
Mat cameraMatrixHandEye, distortionHandEye;
vector<Mat> rvecs, tvecs;
for(int i = 1; i < checkerBoard[1]; i++){
for(int j = 1; j < checkerBoard[0]; j++){
objp.push_back(Point3f(j*fieldSize, i*fieldSize, 0));
}
}
for(int i = 0; i < 8; i++)
{
chessboardImg = imread(fileNames[i]);
cvtColor(chessboardImg, chessboardImgGray, COLOR_BGR2GRAY);
bool patternFound = findChessboardCorners(chessboardImgGray, Size(8,5), corners[i], CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE);
if(patternFound)
{
cornerSubPix(chessboardImgGray, corners[i],Size(11,11), Size(-1,-1), TermCriteria(TermCriteria::EPS + TermCriteria::MAX_ITER, 30, 0.1));
drawChessboardCorners(chessboardImg, Size(8,5), corners[i], patternFound);
worldCoordinates.push_back(objp);
}
//***** Check loaded images and detected chessboard *****
//imshow("source", chessboardImgGray);
//imshow("chess", chessboardImg);
//waitKey(0);
//*******************************************************
}
float reprojectionError = calibrateCamera(worldCoordinates, corners, Size(1280,960), cameraMatrixHandEye, distortionHandEye, rvecs, tvecs, CALIB_FIX_ASPECT_RATIO + CALIB_FIX_K3 +
CALIB_ZERO_TANGENT_DIST + CALIB_FIX_PRINCIPAL_POINT);
//***** Check camera calibration results *****
//cout << "Reprojection Error CHE: " << endl << reprojectionError << endl << endl;
//cout << "Camera Matrix CHE: " << endl << cameraMatrixHandEye << endl << endl;
//cout << "Distortion CHE: " << endl << distortionHandEye << endl << endl;
//for(int i = 0; i < numberOfPoses; i++)
//{
// cout << "No. " << i+1 << " Target translation: " << endl << tvecs[i] << endl << endl;
// cout << "No. " << i+1 << " Target rotation: " << endl << rvecs[i] << endl << endl;
//}
//********************************************/
for(int i = 0; i < rvecs.size(); i++)
{
t_target2cam.emplace_back(tvecs[i]);
R_target2cam.emplace_back(rvecs[i]);
}
// for(int i = 0; i < t_target2cam.size(); i++)
// {
// cout << t_target2cam[i] << endl << endl;
// }
// for(int i = 0; i < R_target2cam.size(); i++)
// {
// cout << R_target2cam[i] << endl << endl;
// }
Mat R_cam2gripper;
Mat t_cam2gripper = (Mat_<double>(3, 1));
calibrateHandEye(R_gripper2base, t_gripper2base, R_target2cam, t_target2cam, R_cam2gripper, t_cam2gripper);
cout << t_cam2gripper << endl << endl;
cout << R_cam2gripper << endl << endl;
Mat transformationMat4x4 = (Mat_<double>(4, 4));
Mat transformationMatInv4x4 = (Mat_<double>(4, 4));
Mat R_cam2gripperInv = (Mat_<double>(3, 3));
Mat t_cam2gripperInv = (Mat_<double>(3, 1));
transformationMat4x4.at<double>(0, 0) = R_cam2gripper.at<double>(0, 0);
transformationMat4x4.at<double>(0, 1) = R_cam2gripper.at<double>(0, 1);
transformationMat4x4.at<double>(0, 2) = R_cam2gripper.at<double>(0, 2);
transformationMat4x4.at<double>(0, 3) = t_cam2gripper.at<double>(0, 0);
transformationMat4x4.at<double>(1, 0) = R_cam2gripper.at<double>(1, 0);
transformationMat4x4.at<double>(1, 1) = R_cam2gripper.at<double>(1, 1);
transformationMat4x4.at<double>(1, 2) = R_cam2gripper.at<double>(1, 2);
transformationMat4x4.at<double>(1, 3) = t_cam2gripper.at<double>(1, 0);
transformationMat4x4.at<double>(2, 0) = R_cam2gripper.at<double>(2, 0);
transformationMat4x4.at<double>(2, 1) = R_cam2gripper.at<double>(2, 1);
transformationMat4x4.at<double>(2, 2) = R_cam2gripper.at<double>(2, 2);
transformationMat4x4.at<double>(2, 3) = t_cam2gripper.at<double>(2, 0);
transformationMat4x4.at<double>(3, 0) = 0;
transformationMat4x4.at<double>(3, 1) = 0;
transformationMat4x4.at<double>(3, 2) = 0;
transformationMat4x4.at<double>(3, 3) = 1;
transformationMatInv4x4 = transformationMat4x4.inv();
R_cam2gripperInv.at<double>(0,0) = transformationMatInv4x4.at<double>(0,0);
R_cam2gripperInv.at<double>(0,1) = transformationMatInv4x4.at<double>(0,1);
R_cam2gripperInv.at<double>(0,2) = transformationMatInv4x4.at<double>(0,2);
R_cam2gripperInv.at<double>(1,0) = transformationMatInv4x4.at<double>(1,0);
R_cam2gripperInv.at<double>(1,1) = transformationMatInv4x4.at<double>(1,1);
R_cam2gripperInv.at<double>(1,2) = transformationMatInv4x4.at<double>(1,2);
R_cam2gripperInv.at<double>(2,0) = transformationMatInv4x4.at<double>(2,0);
R_cam2gripperInv.at<double>(2,1) = transformationMatInv4x4.at<double>(2,1);
R_cam2gripperInv.at<double>(2,2) = transformationMatInv4x4.at<double>(2,2);
t_cam2gripperInv.at<double>(0,0) = transformationMatInv4x4.at<double>(0,3);
t_cam2gripperInv.at<double>(1,0) = transformationMatInv4x4.at<double>(1,3);
t_cam2gripperInv.at<double>(2,0) = transformationMatInv4x4.at<double>(2,3);
cout << transformationMatInv4x4 << endl << endl;
cout << t_cam2gripperInv << endl << endl;
Point3f objectPoints1, objectPoints2;
vector<Point3f> objectPoints;
objectPoints1.x = 0; //TCP in TCP-Coordinates
objectPoints1.y = 0;
objectPoints1.z = 0;
objectPoints.push_back(objectPoints1);
vector<Point2f> imagePoints;
projectPoints(objectPoints, R_cam2gripperInv, t_cam2gripperInv, defaultCameraMatrix, defaultDistortion, imagePoints);
cout << imagePoints[0] << endl << endl;
return a.exec();
}
`

you need to use solvepnp to get rvec and tvec for each image separately and then you will have a list of rvecs and tvecs. list length equals no of images. To get a similar list of rvec and tvec for Gripper_to_base transformation, you need to derive the R and T matrices based on robot dynamics which take rotation angle as input. Then for each pose you need to input the rotation angle data to R,T matrices to get rvec and tvec for each pose and make list of same length. Then you input them to calibrateHandeye function.

I could finally resolve the problem.
The calibration was correct, but there were two mistakes in my approach:
To find my TCP I used the TCP-Coordinates from the robot control. Instead I had to use (0,0,0), which is the TCP in the TCP-Coordinate-System.
The second mistake was to use the transformation matrix built from R and t out of calibrateHandEye. Instead I had to use the inverse transformation matrix.

Related

Compute coordinates from source images after stitching

I use an algorithm of panorama stitching from opencv, in order to stitch 2 or 3 images into one new result image.
I have coordinates of points in each source image. I need to calculate what are the new coordinates for these points in the result image.
I describe below the algorithm. My code is similar to a sample "stitching_detailed" from opencv (branch 3.4). A result_mask of type Mat is produced, maybe it is the solution? But I don't know how to use it. I found a related question here but not on stitching.
Any idea?
Here is the algorithm (for detailed code: stitching_detailed.cpp):
Find features for each image:
Ptr<FeaturesFinder> finder = makePtr<SurfFeaturesFinder>()
vector<ImageFeatures> features(num_images);
for (int i = 0; i < num_images; ++i)
{
(*finder)(images[i], features[i]);
}
Make pairwise_matches:
vector<MatchesInfo> pairwise_matches;
Ptr<FeaturesMatcher> matcher = makePtr<BestOf2NearestMatcher>(false, match_conf);
(*matcher)(features, pairwise_matches);
Reorder the images:
vector<int> indices = leaveBiggestComponent(features, pairwise_matches, conf_thresh);
# here some code to reorder 'images'
Estimate an homography in cameras:
vector<CameraParams> cameras;
Ptr<Estimator> estimator = makePtr<HomographyBasedEstimator>();
(*estimator)(features, pairwise_matches, cameras);
Convert to CV_32F:
for (size_t i = 0; i < cameras.size(); ++i)
{
Mat R;
cameras[i].R.convertTo(R, CV_32F);
cameras[i].R = R;
}
Execute a BundleAdjuster:
Ptr<detail::BundleAdjusterBase> adjuster = makePtr<detail::BundleAdjusterRay>();
adjuster->setConfThresh(conf_thresh);
adjuster->setRefinementMask(refine_mask);
(*adjuster)(features, pairwise_matches, cameras);
Compute a value for warped_image_scale:
for (int i = 0; i < cameras.size(); ++i)
focals.push_back(cameras[i].focal);
float warped_image_scale = static_cast<float>(focals[focals.size() / 2 - 1] + focals[focals.size() / 2]) * 0.5f;
Do wave correction:
vector<Mat> rmats;
for (size_t i = 0; i < cameras.size(); ++i)
rmats.push_back(cameras[i].R.clone());
waveCorrect(rmats, wave_correct);
for (size_t i = 0; i < cameras.size(); ++i)
cameras[i].R = rmats[i];
Create a warper:
Ptr<WarperCreator> warper_creator = makePtr<cv::SphericalWarper>();
Ptr<RotationWarper> warper = warper_creator->create(static_cast<float>(warped_image_scale * seam_work_aspect));
Create a blender and feed it:
Ptr<Blender> blender;
for (size_t i = 0; i < cameras.size(); ++i)
{
full_img = input_imgs[img_idx];
if (!is_compose_scale_set)
{
is_compose_scale_set = true;
compose_scale = /* … */
}
if (abs(compose_scale - 1) > 1e-1)
resize(full_img, img, Size(), compose_scale, compose_scale, INTER_LINEAR_EXACT);
else
img = full_img;
// Warp the current image
warper->warp(img, K, cameras[img_idx].R, INTER_LINEAR, BORDER_REFLECT, img_warped);
// Warp the current image mask
mask.create(img_size, CV_8U);
mask.setTo(Scalar::all(255));
warper->warp(mask, K, cameras[img_idx].R, INTER_NEAREST, BORDER_CONSTANT, mask_warped);
// Compensate exposure
compensator->apply(img_idx, corners[img_idx], img_warped, mask_warped);
dilate(masks_warped[img_idx], dilated_mask, Mat());
resize(dilated_mask, seam_mask, mask_warped.size(), 0, 0, INTER_LINEAR_EXACT);
mask_warped = seam_mask & mask_warped;
if (!blender)
{
blender = Blender::createDefault(blend_type, try_gpu);
Size dst_sz = resultRoi(corners, sizes).size();
float blend_width = sqrt(static_cast<float>(dst_sz.area())) * blend_strength / 100.f;
MultiBandBlender *mb = dynamic_cast<MultiBandBlender *>(blender.get());
mb->setNumBands(static_cast<int>(ceil(log(blend_width) / log(2.)) - 1.));
blender->prepare(corners, sizes);
}
// Blend the current image
blender->feed(img_warped_s, mask_warped, corners[i]);
}
Then, use the blender:
Mat result, result_mask;
blender->blend(result, result_mask);
// The result image is in 'result'

When I was a school boy, I foundopencv/samples/cpp/stitching_detailed.cpp in OpenCV samples folder. At that time, my programming skills were very poor. I can't understand it even though I racked my brains. This question attracts my attention, and arouses my memory. After a whole night of hard work and debugging, I finally get it.
Basic steps:
Given the three images: blue.png, green.png, and red.png
We can get the stitching result(result.png) using the stitching_detailed.cpp.
.
blender->blend(result, result_mask);
imwrite("result.png", result);
imwrite("result_mask.png", result_mask);
I choose the centers from the three images, and calculate the corresponding coordinates (warped) on the stitching image, and draw in solid as follow:
Warping images (auxiliary)...
Compensating exposure...
Blending ...
Warp each center point, and draw solid circle.
[408, 204] => [532, 224]
[408, 204] => [359, 301]
[408, 204] => [727, 320]
Check `result.png`, `result_mask.png` and `result2.png`!
Done!
This is the function calcWarpedPoint I wrote to calculate the warped point on the stitching image:
cv::Point2f calcWarpedPoint(
const cv::Point2f& pt,
InputArray K, // Camera K parameter
InputArray R, // Camera R parameter
Ptr<RotationWarper> warper, // The Rotation Warper
const std::vector<cv::Point> &corners,
const std::vector<cv::Size> &sizes)
{
// Calculate the wrapped point using camera parameter.
cv::Point2f dst = warper->warpPoint(pt, K, R);
// Calculate the stitching image roi using corners and sizes.
// the corners and sizes have already been calculated.
cv::Point2f tl = cv::detail::resultRoi(corners, sizes).tl();
// Finally adjust the wrapped point to the stitching image.
return cv::Point2f(dst.x - tl.x, dst.y - tl.y);
}
This is example code snippet：
std::cout << "\nWarp each center point, and draw solid circle.\n";
std::vector<cv::Scalar> colors = { {255,0,0}, {0, 255, 0}, {0, 0, 255} };
for (int idx = 0; idx < img_names.size(); ++idx) {
img = cv::imread(img_names[idx]);
Mat K;
cameras[idx].K().convertTo(K, CV_32F);
Mat R = cameras[idx].R;
cv::Point2f cpt = cv::Point2f(img.cols / 2, img.rows / 2);
cv::Point pt = calcWarpedPoint(cpt, K, R, warper, corners, sizes);
cv::circle(result, pt, 5, colors[idx], -1, cv::LINE_AA);
std::cout << cpt << " => " << pt << std::endl;
}
std::cout << "\nCheck `result.png`, `result_mask.png` and `result2.png`!\n";
imwrite("result2.png", result);
The full code:
/*
* Author : Kinght-金(https://stackoverflow.com/users/3547485/)
* Created : 2019/03/01 23:00 (CST)
* Finished : 2019/03/01 07:50 (CST)
*
* Modified on opencv401/samples/cpp/stitching_detailed.cpp
* From https://github.com/opencv/opencv/blob/4.0.1/samples/cpp/stitching_detailed.cpp
*
*
* Description: A simple opencv(4.0.1) image stitching code for Stack Overflow answers.
* For https://stackoverflow.com/questions/54904718/compute-coordinates-from-source-images-after-stitching/54953792#comment96681412_54953792
*
*/
#include <iostream>
#include <fstream>
#include <string>
#include "opencv2/opencv_modules.hpp"
#include <opencv2/core/utility.hpp>
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/stitching/detail/autocalib.hpp"
#include "opencv2/stitching/detail/blenders.hpp"
#include "opencv2/stitching/detail/camera.hpp"
#include "opencv2/stitching/detail/exposure_compensate.hpp"
#include "opencv2/stitching/detail/matchers.hpp"
#include "opencv2/stitching/detail/motion_estimators.hpp"
#include "opencv2/stitching/detail/seam_finders.hpp"
#include "opencv2/stitching/detail/warpers.hpp"
#include "opencv2/stitching/warpers.hpp"
using namespace std;
using namespace cv;
using namespace cv::detail;
//! img_names are the input image (full) paths
// You can download from using the links from the answer.
//! Blue: https://i.stack.imgur.com/Yz3U1.png
//! Green: https://i.stack.imgur.com/AbUTH.png
//! Red: https://i.stack.imgur.com/9wcGc.png
vector<String> img_names = {"D:/stitching/blue.png", "D:/stitching/green.png", "D:/stitching/red.png"};
//! The function to calculate the warped point on the stitching image.
cv::Point2f calcWarpedPoint(
const cv::Point2f& pt,
InputArray K, // Camera K parameter
InputArray R, // Camera R parameter
Ptr<RotationWarper> warper, // The Rotation Warper
const std::vector<cv::Point> &corners,
const std::vector<cv::Size> &sizes)
{
// Calculate the wrapped point
cv::Point2f dst = warper->warpPoint(pt, K, R);
// Calculate the stitching image roi using corners and sizes,
// the corners and sizes have already been calculated.
cv::Point2f tl = cv::detail::resultRoi(corners, sizes).tl();
// Finally adjust the wrapped point
return cv::Point2f(dst.x - tl.x, dst.y - tl.y);
}
int main(int argc, char* argv[])
{
double work_megapix = 0.6;
double seam_megapix = 0.1;
double compose_megapix = -1;
float conf_thresh = 1.f;
float match_conf = 0.3f;
float blend_strength = 5;
// Check if have enough images
int num_images = static_cast<int>(img_names.size());
if (num_images < 2)
{
std::cout << "Need more images\n";
return -1;
}
double work_scale = 1, seam_scale = 1, compose_scale = 1;
bool is_work_scale_set = false, is_seam_scale_set = false, is_compose_scale_set = false;
//(1) 创建特征查找器
Ptr<Feature2D> finder = ORB::create();
// (2) 读取图像，适当缩放，并计算图像的特征描述
Mat full_img, img;
vector<ImageFeatures> features(num_images);
vector<Mat> images(num_images);
vector<Size> full_img_sizes(num_images);
double seam_work_aspect = 1;
for (int i = 0; i < num_images; ++i)
{
full_img = imread(img_names[i]);
full_img_sizes[i] = full_img.size();
if (full_img.empty())
{
cout << "Can't open image " << img_names[i] << std::endl;
return -1;
}
if (!is_work_scale_set)
{
work_scale = min(1.0, sqrt(work_megapix * 1e6 / full_img.size().area()));
is_work_scale_set = true;
}
resize(full_img, img, Size(), work_scale, work_scale, INTER_LINEAR_EXACT);
if (!is_seam_scale_set)
{
seam_scale = min(1.0, sqrt(seam_megapix * 1e6 / full_img.size().area()));
seam_work_aspect = seam_scale / work_scale;
is_seam_scale_set = true;
}
computeImageFeatures(finder, img, features[i]);
features[i].img_idx = i;
std::cout << "Features in image #" << i + 1 << ": " << features[i].keypoints.size() << std::endl;
resize(full_img, img, Size(), seam_scale, seam_scale, INTER_LINEAR_EXACT);
images[i] = img.clone();
}
full_img.release();
img.release();
// (3) 创建图像特征匹配器，计算匹配信息
vector<MatchesInfo> pairwise_matches;
Ptr<FeaturesMatcher> matcher = makePtr<BestOf2NearestMatcher>(false, match_conf);
(*matcher)(features, pairwise_matches);
matcher->collectGarbage();
//! (4) 剔除外点，保留最确信的大成分
// Leave only images we are sure are from the same panorama
vector<int> indices = leaveBiggestComponent(features, pairwise_matches, conf_thresh);
vector<Mat> img_subset;
vector<String> img_names_subset;
vector<Size> full_img_sizes_subset;
for (size_t i = 0; i < indices.size(); ++i)
{
img_names_subset.push_back(img_names[indices[i]]);
img_subset.push_back(images[indices[i]]);
full_img_sizes_subset.push_back(full_img_sizes[indices[i]]);
}
images = img_subset;
img_names = img_names_subset;
full_img_sizes = full_img_sizes_subset;
// Check if we still have enough images
num_images = static_cast<int>(img_names.size());
if (num_images < 2)
{
std::cout << "Need more images\n";
return -1;
}
//!(5) 估计 homography
Ptr<Estimator> estimator = makePtr<HomographyBasedEstimator>();
vector<CameraParams> cameras;
if (!(*estimator)(features, pairwise_matches, cameras))
{
cout << "Homography estimation failed.\n";
return -1;
}
for (size_t i = 0; i < cameras.size(); ++i)
{
Mat R;
cameras[i].R.convertTo(R, CV_32F);
cameras[i].R = R;
std::cout << "\nInitial camera intrinsics #" << indices[i] + 1 << ":\nK:\n" << cameras[i].K() << "\nR:\n" << cameras[i].R << std::endl;
}
//(6) 创建约束调整器
Ptr<detail::BundleAdjusterBase> adjuster = makePtr<detail::BundleAdjusterRay>();
adjuster->setConfThresh(conf_thresh);
Mat_<uchar> refine_mask = Mat::zeros(3, 3, CV_8U);
refine_mask(0, 0) = 1;
refine_mask(0, 1) = 1;
refine_mask(0, 2) = 1;
refine_mask(1, 1) = 1;
refine_mask(1, 2) = 1;
adjuster->setRefinementMask(refine_mask);
if (!(*adjuster)(features, pairwise_matches, cameras))
{
cout << "Camera parameters adjusting failed.\n";
return -1;
}
// Find median focal length
vector<double> focals;
for (size_t i = 0; i < cameras.size(); ++i)
{
focals.push_back(cameras[i].focal);
}
sort(focals.begin(), focals.end());
float warped_image_scale;
if (focals.size() % 2 == 1)
warped_image_scale = static_cast<float>(focals[focals.size() / 2]);
else
warped_image_scale = static_cast<float>(focals[focals.size() / 2 - 1] + focals[focals.size() / 2]) * 0.5f;
std::cout << "\nWarping images (auxiliary)... \n";
vector<Point> corners(num_images);
vector<UMat> masks_warped(num_images);
vector<UMat> images_warped(num_images);
vector<Size> sizes(num_images);
vector<UMat> masks(num_images);
// Preapre images masks
for (int i = 0; i < num_images; ++i)
{
masks[i].create(images[i].size(), CV_8U);
masks[i].setTo(Scalar::all(255));
}
// Warp images and their masks
Ptr<WarperCreator> warper_creator = makePtr<cv::CylindricalWarper>();
if (!warper_creator)
{
cout << "Can't create the warper \n";
return 1;
}
//! Create RotationWarper
Ptr<RotationWarper> warper = warper_creator->create(static_cast<float>(warped_image_scale * seam_work_aspect));
//! Calculate warped corners/sizes/mask
for (int i = 0; i < num_images; ++i)
{
Mat_<float> K;
cameras[i].K().convertTo(K, CV_32F);
float swa = (float)seam_work_aspect;
K(0, 0) *= swa; K(0, 2) *= swa;
K(1, 1) *= swa; K(1, 2) *= swa;
corners[i] = warper->warp(images[i], K, cameras[i].R, INTER_LINEAR, BORDER_REFLECT, images_warped[i]);
sizes[i] = images_warped[i].size();
warper->warp(masks[i], K, cameras[i].R, INTER_NEAREST, BORDER_CONSTANT, masks_warped[i]);
}
vector<UMat> images_warped_f(num_images);
for (int i = 0; i < num_images; ++i)
images_warped[i].convertTo(images_warped_f[i], CV_32F);
std::cout << "Compensating exposure... \n";
//! 计算曝光度，调整图像曝光，减少亮度差异
Ptr<ExposureCompensator> compensator = ExposureCompensator::createDefault(ExposureCompensator::GAIN_BLOCKS);
if (dynamic_cast<BlocksCompensator*>(compensator.get()))
{
BlocksCompensator* bcompensator = dynamic_cast<BlocksCompensator*>(compensator.get());
bcompensator->setNrFeeds(1);
bcompensator->setNrGainsFilteringIterations(2);
bcompensator->setBlockSize(32, 32);
}
compensator->feed(corners, images_warped, masks_warped);
Ptr<SeamFinder> seam_finder = makePtr<detail::GraphCutSeamFinder>(GraphCutSeamFinderBase::COST_COLOR);
seam_finder->find(images_warped_f, corners, masks_warped);
// Release unused memory
images.clear();
images_warped.clear();
images_warped_f.clear();
masks.clear();
Mat img_warped, img_warped_s;
Mat dilated_mask, seam_mask, mask, mask_warped;
Ptr<Blender> blender;
double compose_work_aspect = 1;
for (int img_idx = 0; img_idx < num_images; ++img_idx)
{
// Read image and resize it if necessary
full_img = imread(img_names[img_idx]);
if (!is_compose_scale_set)
{
is_compose_scale_set = true;
compose_work_aspect = compose_scale / work_scale;
// Update warped image scale
warped_image_scale *= static_cast<float>(compose_work_aspect);
warper = warper_creator->create(warped_image_scale);
// Update corners and sizes
for (int i = 0; i < num_images; ++i)
{
cameras[i].focal *= compose_work_aspect;
cameras[i].ppx *= compose_work_aspect;
cameras[i].ppy *= compose_work_aspect;
Size sz = full_img_sizes[i];
if (std::abs(compose_scale - 1) > 1e-1)
{
sz.width = cvRound(full_img_sizes[i].width * compose_scale);
sz.height = cvRound(full_img_sizes[i].height * compose_scale);
}
Mat K;
cameras[i].K().convertTo(K, CV_32F);
Rect roi = warper->warpRoi(sz, K, cameras[i].R);
corners[i] = roi.tl();
sizes[i] = roi.size();
}
}
if (abs(compose_scale - 1) > 1e-1)
resize(full_img, img, Size(), compose_scale, compose_scale, INTER_LINEAR_EXACT);
else
img = full_img;
full_img.release();
Size img_size = img.size();
Mat K, R;
cameras[img_idx].K().convertTo(K, CV_32F);
R = cameras[img_idx].R;
// Warp the current image : img => img_warped
warper->warp(img, K, cameras[img_idx].R, INTER_LINEAR, BORDER_REFLECT, img_warped);
// Warp the current image mask
mask.create(img_size, CV_8U);
mask.setTo(Scalar::all(255));
warper->warp(mask, K, cameras[img_idx].R, INTER_NEAREST, BORDER_CONSTANT, mask_warped);
compensator->apply(img_idx, corners[img_idx], img_warped, mask_warped);
img_warped.convertTo(img_warped_s, CV_16S);
img_warped.release();
img.release();
mask.release();
dilate(masks_warped[img_idx], dilated_mask, Mat());
resize(dilated_mask, seam_mask, mask_warped.size(), 0, 0, INTER_LINEAR_EXACT);
mask_warped = seam_mask & mask_warped;
if (!blender)
{
blender = Blender::createDefault(Blender::MULTI_BAND, false);
Size dst_sz = resultRoi(corners, sizes).size();
float blend_width = sqrt(static_cast<float>(dst_sz.area())) * blend_strength / 100.f;
if (blend_width < 1.f){
blender = Blender::createDefault(Blender::NO, false);
}
else
{
MultiBandBlender* mb = dynamic_cast<MultiBandBlender*>(blender.get());
mb->setNumBands(static_cast<int>(ceil(log(blend_width) / log(2.)) - 1.));
}
blender->prepare(corners, sizes);
}
blender->feed(img_warped_s, mask_warped, corners[img_idx]);
}
/* ===========================================================================*/
// Blend image
std::cout << "\nBlending ...\n";
Mat result, result_mask;
blender->blend(result, result_mask);
imwrite("result.png", result);
imwrite("result_mask.png", result_mask);
std::cout << "\nWarp each center point, and draw solid circle.\n";
std::vector<cv::Scalar> colors = { {255,0,0}, {0, 255, 0}, {0, 0, 255} };
for (int idx = 0; idx < img_names.size(); ++idx) {
img = cv::imread(img_names[idx]);
Mat K;
cameras[idx].K().convertTo(K, CV_32F);
Mat R = cameras[idx].R;
cv::Point2f cpt = cv::Point2f(img.cols / 2, img.rows / 2);
cv::Point pt = calcWarpedPoint(cpt, K, R, warper, corners, sizes);
cv::circle(result, pt, 5, colors[idx], -1, cv::LINE_AA);
std::cout << cpt << " => " << pt << std::endl;
}
std::cout << "\nCheck `result.png`, `result_mask.png` and `result2.png`!\n";
imwrite("result2.png", result);
std::cout << "\nDone!\n";
/* ===========================================================================*/
return 0;
}
Some links maybe useful:
stitching_detailed.cpp : https://github.com/opencv/opencv/blob/4.0.1/samples/cpp/stitching_detailed.cpp
waper->warp(), warpPoint(), warpRoi() https://github.com/opencv/opencv/blob/master/modules/stitching/src/warpers.cpp#L153
resultRoi() https://github.com/opencv/opencv/blob/master/modules/stitching/src/util.cpp#L116
Other links maybe interesting:
Converting opencv remap code from c++ to python
Split text lines in scanned document
How do I use the relationships between Flann matches to determine a sensible homography?

caffe forward net in a for loop not working

I am currently trying to write a c++ wrapper for PSPNet's prediction (originally in Matlab). PSPNet runs on Caffe.
Situation: I have a trained caffe model, and would like to implement this wrapper to run the segmentation result when given an input. In this case, my crop_size is smaller than it's original size. Thus, it is being cropped manually to multiple 425x425 "frames" and fed forward into caffe net after the pre-processes in a for-loop.
Problem: However, net seems to only be running forward once despite being in a for loop. Supported by its processing time and output, refer below.
This is the incomplete code I am currently trying to work on:
#define USE_OPENCV 1
#define trimapSize 1
#define Debug 0
#include <caffe/caffe.hpp>
#include "Header.h"
#include "caffe/data_reader.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/blob.hpp"
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif // USE_OPENCV
#include <algorithm>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <chrono> //Just for time measurement
#include <cmath>
#include <array>
#include <iostream>
#include <fstream>
#ifdef USE_OPENCV
using namespace caffe; // NOLINT(build/namespaces)
using std::string;
class Classifier {
public:
Classifier(const string& model_file,
const string& trained_file);
cv::Mat Predict(const cv::Mat& img);
private:
void SetMean(int weight, int heigh);
void WrapInputLayer(std::vector<cv::Mat>* input_channels);
cv::Mat Visualization(Blob<float>* output_layer);
cv::Mat Preprocess(const cv::Mat& img_scale, int ori_rows, int ori_cols, std::vector<cv::Mat>* input_channels);
private:
shared_ptr<Net<float> > net_;
cv::Size input_geometry_;
int num_channels_;
cv::Mat mean_;
};
Classifier::Classifier(const string& model_file,
const string& trained_file) {
Caffe::set_mode(Caffe::GPU);
/* Load the network. */
net_.reset(new Net<float>(model_file, TEST));
net_->CopyTrainedLayersFrom(trained_file);
CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";
CHECK_EQ(net_->num_outputs(), 2) << "Network should have exactly one output.";
Blob<float>* input_layer = net_->input_blobs()[0];
num_channels_ = input_layer->channels();
CHECK(num_channels_ == 3 || num_channels_ == 1)
<< "Input layer should have 1 or 3 channels.";
input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
}
/* Create the mean file in binaryproto format. */
void Classifier::SetMean(int weight, int heigh) {
mean_ = cv::Mat(heigh, weight, CV_32FC3);
mean_ = cv::Scalar(94.6744, 88.8887, 100.5404);//RGB
}
cv::Mat Classifier::Predict(const cv::Mat& img) {
cv::Mat originalTmp = img.clone();
Blob<float>* input_layer = net_->input_blobs()[0];
input_layer->Reshape(1, num_channels_,
input_geometry_.height, input_geometry_.width);
std::cout << "input_geometry_.height = " << input_geometry_.height << "input_geometry_.width = "<< input_geometry_.width << std::endl;
/* Forward dimension change to all layers. */
net_->Reshape();
std::vector<cv::Mat> input_channels;
WrapInputLayer(&input_channels);
/*-----------------------------FOR MULTI-SCALE PROCESSING--------------------------*/
int base_size = 0;
int ori_rows = img.rows;
int ori_cols = img.cols;
float scale_array [1] = {1};
// float scale_array = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
std::cout << "ori_rows = " << ori_rows << "\t ori_cols = " << ori_cols << std::endl;
cv::Mat data_all = cv::Mat::zeros(cv::Size(425, 425), CV_32FC3);
if (ori_rows > ori_cols) {
base_size = ori_rows;
}
else base_size = ori_cols;
std::cout << "base_size = " << base_size << std::endl;
std::cout << "size of array = " << (sizeof(scale_array)/sizeof(*scale_array)) << std::endl;
for (int i=0; i < (sizeof(scale_array)/sizeof(*scale_array)); i++){
int long_size = base_size * scale_array[i] + 1;
int new_rows = long_size;
int new_cols = long_size;
std::cout << "BEFORE new rows = " << new_rows << "\t new cols = " << new_cols << std::endl;
if (ori_rows > ori_cols){
new_cols = round(long_size/ori_rows*ori_cols);
}
else {new_rows = round(long_size/ori_cols*ori_rows);}
std::cout << "AFTER new rows = " << new_rows << "\t new cols = " << new_cols << std::endl;
cv::Mat img_scale;
cv::resize(img, img_scale, cv::Size(new_cols, new_rows), 0, 0, CV_INTER_LINEAR);
std::cout << "img_scale height: " << img_scale.rows << "\t width = " << img_scale.cols << std::endl;
cv::imshow("img_scale",img_scale);
cv::waitKey(0);
data_all = data_all + Preprocess(img_scale, ori_rows, ori_cols, &input_channels);
std::cout << "ok! DONE PREPROCESS!" << std::endl;
}
return data_all;
}
cv::Mat Classifier::Preprocess(const cv::Mat& img_scale, int ori_rows, int ori_cols, std::vector<cv::Mat>* input_channels)
{
int crop_size = 425;
int new_rows = img_scale.rows;
int new_cols = img_scale.cols;
cv::Mat data_output = cv::Mat::zeros(cv::Size(ori_cols, ori_rows), CV_32FC3);
int long_size = new_rows;
cv::Mat img_processed;
if (new_cols > new_rows){
long_size = new_cols;
}
if (long_size <= crop_size){
// img_processed = Preprocess(img_scale, &input_channels);
//RUN CAFFE --- NOT YET DONE ---
std::cout << "OK!" << std::endl;
}
else {
float stride_rate = 2.0/3.0;
std::cout << "stride_rate = " << stride_rate << std::endl;
int stride = ceil(crop_size*stride_rate);
std::cout << "stride = " << stride << std::endl;
cv::Mat img_pad = img_scale;
int pad_rows = img_pad.rows;
int pad_cols = img_pad.cols;
int h_grid = ceil((pad_rows - crop_size)/stride) + 1;
int w_grid = ceil((pad_cols - crop_size)/stride) + 1;
cv::Mat img_sub;
cv::Mat data_scale = cv::Mat::zeros(cv::Size(pad_cols, pad_cols), CV_32FC3);
for(int grid_yidx = 1; grid_yidx <= h_grid; grid_yidx++){
for (int grid_xidx = 1; grid_xidx <= w_grid; grid_xidx++){
int s_x = (grid_xidx-1)*stride+1;
int s_y = (grid_yidx-1)*stride+1;
int e_x = std::min(s_x + crop_size -1, pad_cols);
int e_y = std::min(s_y + crop_size -1, pad_rows);
s_x = e_x - crop_size + 1;
s_y = e_y - crop_size + 1;
/* Cropping image */
img_pad(cv::Rect(s_x,s_y,crop_size,crop_size)).copyTo(img_sub);
cv::Mat sample;
if (img_sub.channels() == 3 && num_channels_ == 1)
cv::cvtColor(img_sub, sample, cv::COLOR_BGR2GRAY);
else if (img_sub.channels() == 4 && num_channels_ == 1)
cv::cvtColor(img_sub, sample, cv::COLOR_BGRA2GRAY);
else if (img_sub.channels() == 4 && num_channels_ == 3)
cv::cvtColor(img_sub, sample, cv::COLOR_BGRA2BGR);
else if (img_sub.channels() == 1 && num_channels_ == 3)
cv::cvtColor(img_sub, sample, cv::COLOR_GRAY2BGR);
else
sample = img_sub;
cv::Mat sample_float;
if (num_channels_ == 3)
sample.convertTo(sample_float, CV_32FC3);
else
sample.convertTo(sample_float, CV_32FC1);
SetMean(sample.rows, sample.cols);
cv::imshow("sample_float", sample_float);
cv::cvtColor(sample_float, sample_float, cv::COLOR_BGRA2RGB);
sample_float = sample_float.t();
cv::Mat sample_normalized(sample_float.size(),sample_float.type());
cv::subtract(sample_float.clone(), mean_, sample_normalized);
cv::Mat sample_temp;
sample_normalized.convertTo(sample_temp, CV_32FC3, 255);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/sample_normalized.png", sample_temp);
cv::imshow("sample_normalized", sample_normalized);
cv::waitKey(0);
/* This operation will write the separate BGR planes directly to the
* input layer of the network because it is wrapped by the cv::Mat
* objects in input_channels. */
img_processed = sample_normalized.t();
cv::split(img_processed, *input_channels);
CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
== net_->input_blobs()[0]->cpu_data())
<< "Input channels are not wrapping the input layer of the network.";
img_processed.convertTo(sample_temp, CV_32FC3, 255);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/img_processed.png", sample_temp);
cv::imshow("img_normalised",img_processed);
cv::waitKey();
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); //Just for time measurement
// float loss = 0.0;
// net_->Forward(&loss);
net_->Forward();
std::chrono::steady_clock::time_point end= std::chrono::steady_clock::now();
std::cout << "Processing time = " << (std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count())/1000000.0 << " sec" <<std::endl; //Just for time measurement
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
cv::Mat segment = Visualization(output_layer);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/segment.png", segment);
}
}
}
return (img_processed);
}
struct RGB {
int R;
int G;
int B;
};
vector<RGB> get_palette(int nClass)
{
vector<RGB> listPlalette;
RGB rgb0;
rgb0.R = 0;
rgb0.G = 0;
rgb0.B = 0;
listPlalette.push_back(rgb0);
for (int i = 1; i < nClass; i++)
{
RGB rgb;
rgb.R = i*50;
rgb.G = i*50 + i;
rgb.B = 255-i*20;
listPlalette.push_back(rgb);
}
return listPlalette;
}
cv::Mat Classifier::Visualization(Blob<float>* output_layer) {
std::vector<cv::Mat> input_channels;
int H = output_layer->height();
int W = output_layer->width();
// int N = output_layer->num(); //Batch Size
int C = output_layer->channels(); //Number of classes
int index = 0;
#ifdef CPU_ONLY
const float* output_data = output_layer->cpu_data();
#else
const float* output_data = output_layer->cpu_data();
#endif // !CPU_ONLY
cv::Mat class_each_row(C, W*H, CV_32F);
for (int i = 0; i < C; i++) {
for (int j = 0; j < (W*H); j++) {
class_each_row.at<float>(i, j) = output_data[index];
index = index + 1;
}
}
class_each_row = class_each_row.t();
//==================================CONVERT INTO LABELS==================================//
float maxValue = 0;
int* labelIndex = (int*)malloc(W*H * sizeof(int));
int indexX = 0;
for (int i = 0; i < class_each_row.rows; i++) {
maxValue = -999999999999;
indexX = 0;
for (int k = 0; k < C; k++)
{
float dataM = class_each_row.at<float>(i, k);
if (dataM > maxValue) {
maxValue = dataM;
indexX = k;
}
}
labelIndex[i] = indexX;
}
cv::Mat labelTmp(W, H, CV_8UC3);
uchar* dataLabelTmp = labelTmp.data;
vector<RGB> listPalette = get_palette(21);
for (int i = 0; i < H; i++)
{
for (int j = 0; j < W; j++)
{
RGB rgb = listPalette[labelIndex[(i*W + j)]];
dataLabelTmp[3 * (i*W + j)] = rgb.B;
dataLabelTmp[3 * (i*W + j) + 1] = rgb.G;
dataLabelTmp[3 * (i*W + j) + 2] = rgb.R;
}
}
cv::imshow( "Display window", labelTmp);
cv::waitKey(0);
free(labelIndex);
labelIndex = NULL;
return labelTmp;
}
/* Wrap the input layer of the network in separate cv::Mat objects
* (one per channel). This way we save one memcpy operation and we
* don't need to rely on cudaMemcpy2D. The last preprocessing
* operation will write the separate channels directly to the input
* layer. */
void Classifier::WrapInputLayer(std::vector<cv::Mat>* input_channels) {
Blob<float>* input_layer = net_->input_blobs()[0];
int width = input_layer->width();
int height = input_layer->height();
float* input_data = input_layer->mutable_cpu_data();
for (int i = 0; i < input_layer->channels(); ++i) {
cv::Mat channel(height, width, CV_32FC1, input_data);
input_channels->push_back(channel);
input_data += width * height;
}
}
int main(int argc, char** argv) {
if (argc != 4) {
std::cerr << "Usage: " << argv[0]
<< " \ndeploy.prototxt \nnetwork.caffemodel"
<< " \nimg.jpg" << " \ncamvid12.png (for example: /SegNet-Tutorial/Scripts/camvid12.png)" << std::endl;
return 1;
}
::google::InitGoogleLogging(argv[0]);
string model_file = argv[1];
string trained_file = argv[2]; //for visualization
Classifier classifier(model_file, trained_file);
string file = argv[3];
std::cout << "---------- Semantic Segmentation for "
<< file << " ----------" << std::endl;
cv::Mat img = cv::imread(file, 1);
CHECK(!img.empty()) << "Unable to decode image " << file;
cv::Mat prediction;
classifier.Predict(img);
}
#else
int main(int argc, char** argv) {
LOG(FATAL) << "This example requires OpenCV; compile with USE_OPENCV.";
}
#endif //USE_OPENCV
To clarify: The for-loop refers to the one in pre-process: specifically this portion:
for(int grid_yidx = 1; grid_yidx <= h_grid; grid_yidx++){
for (int grid_xidx = 1; grid_xidx <= w_grid; grid_xidx++){
int s_x = (grid_xidx-1)*stride+1;
int s_y = (grid_yidx-1)*stride+1;
int e_x = std::min(s_x + crop_size -1, pad_cols);
int e_y = std::min(s_y + crop_size -1, pad_rows);
s_x = e_x - crop_size + 1;
s_y = e_y - crop_size + 1;
/* Cropping image */
img_pad(cv::Rect(s_x,s_y,crop_size,crop_size)).copyTo(img_sub);
cv::Mat sample;
if (img_sub.channels() == 3 && num_channels_ == 1)
cv::cvtColor(img_sub, sample, cv::COLOR_BGR2GRAY);
else if (img_sub.channels() == 4 && num_channels_ == 1)
cv::cvtColor(img_sub, sample, cv::COLOR_BGRA2GRAY);
else if (img_sub.channels() == 4 && num_channels_ == 3)
cv::cvtColor(img_sub, sample, cv::COLOR_BGRA2BGR);
else if (img_sub.channels() == 1 && num_channels_ == 3)
cv::cvtColor(img_sub, sample, cv::COLOR_GRAY2BGR);
else
sample = img_sub;
cv::Mat sample_float;
if (num_channels_ == 3)
sample.convertTo(sample_float, CV_32FC3);
else
sample.convertTo(sample_float, CV_32FC1);
SetMean(sample.rows, sample.cols);
cv::imshow("sample_float", sample_float);
cv::cvtColor(sample_float, sample_float, cv::COLOR_BGRA2RGB);
sample_float = sample_float.t();
cv::Mat sample_normalized(sample_float.size(),sample_float.type());
cv::subtract(sample_float.clone(), mean_, sample_normalized);
cv::Mat sample_temp;
sample_normalized.convertTo(sample_temp, CV_32FC3, 255);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/sample_normalized.png", sample_temp);
cv::imshow("sample_normalized", sample_normalized);
cv::waitKey(0);
/* This operation will write the separate BGR planes directly to the
* input layer of the network because it is wrapped by the cv::Mat
* objects in input_channels. */
img_processed = sample_normalized.t();
cv::split(img_processed, *input_channels);
CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
== net_->input_blobs()[0]->cpu_data())
<< "Input channels are not wrapping the input layer of the network.";
img_processed.convertTo(sample_temp, CV_32FC3, 255);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/img_processed.png", sample_temp);
cv::imshow("img_normalised",img_processed);
cv::waitKey();
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); //Just for time measurement
// float loss = 0.0;
// net_->Forward(&loss);
net_->Forward();
std::chrono::steady_clock::time_point end= std::chrono::steady_clock::now();
std::cout << "Processing time = " << (std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count())/1000000.0 << " sec" <<std::endl; //Just for time measurement
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
cv::Mat segment = Visualization(output_layer);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/segment.png", segment);
}
}
Original Image:Original Image (Without pre-processing)
Input: Input (first cropped frame)
Output: Output of the first cropped frame
Time taken for forwarding: Time taken
Following cropped frame gives the same output through out.
P/s: If i shift the code below to the end of predict function and return segment instead, it will work well. But only the last cropped frame will be segmented.
std::chrono::steady_clock::time_point begin =
std::chrono::steady_clock::now(); //Just for time measurement
// float loss = 0.0;
// net_->Forward(&loss);
net_->Forward();
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
std::cout << "Processing time = " << (std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count())/1000000.0 << " sec" <<std::endl; //Just for time measurement
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
cv::Mat segment = Visualization(output_layer);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/segment.png", segment);`
input: Input (Last cropped frame of pre-processed image)
output: Output of the last cropped frame
Any help will be appreciated, thank youuuuu!!!

This issue is solved by wrapping the input channel each time it is changed so that the input will be fed forward correctly.
Thus the function:
WrapInputLayer(input_channels);
should be called in the double for loop.

OpenCV 3d point projection

I'm having a problem righting an OpenCV program to project a 3d point. I seem to be running into this problem when using the projectPoints function of OpenCV.
Here is the error I got:
OpenCV Error: Assertion failed (mtype == type0 || (CV_MAT_CN(mtype) == CV_MAT_CN(type0) && ((1 << type0) & fixedDepthMask) != 0)) in create, file /home/daniel/Comp4102/opencv/modules/core/src/matrix.cpp, line 2375
terminate called after throwing an instance of 'cv::Exception'
what(): /home/daniel/Comp4102/opencv/modules/core/src/matrix.cpp:2375: error: (-215) mtype == type0 || (CV_MAT_CN(mtype) == CV_MAT_CN(type0) && ((1 << type0) & fixedDepthMask) != 0) in function create
And here is the code that I wrote:
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
#include <string>
std::vector<cv::Point3d> set3DPoints();
int main( int argc, char* argv[]) {
// Setting given variables.
double f = 500;
double sx = 1;
double sy = 1;
double ox = 320;
double oy = 240;
std::vector<cv::Point3d> objectPoints = set3DPoints();
cv::Mat Xw(1,3,cv::DataType<double>::type);
Xw.at<double>(0,0) = 150;
Xw.at<double>(0,1) = 200;
Xw.at<double>(0,2) = 350;
// Create the K matrix.
cv::Mat K(3,3,cv::DataType<double>::type);
K.at<double>(0,0) = -f/sx;
K.at<double>(1,0) = 0;
K.at<double>(2,0) = ox;
K.at<double>(0,1) = 0;
K.at<double>(1,1) = -f/sy;
K.at<double>(2,1) = oy;
K.at<double>(0,2) = 0;
K.at<double>(1,2) = 0;
K.at<double>(2,2) = 1;
// Creating the Rotation Matrix
cv::Mat R(3,3,cv::DataType<double>::type);
R.at<double>(0,0) = 1;
R.at<double>(1,0) = 0;
R.at<double>(2,0) = 0;
R.at<double>(0,1) = 0;
R.at<double>(1,1) = 1;
R.at<double>(2,1) = 0;
R.at<double>(0,2) = 0;
R.at<double>(1,2) = 0;
R.at<double>(2,2) = 1;
// Creating the Translation vector
cv::Mat T(3,1,cv::DataType<double>::type);
T.at<double>(0) = -70;
T.at<double>(1) = -95;
T.at<double>(2) = -120;
std::cout << "K: " << "\n" << K << "\n";
std::cout << "R: " << "\n" << R << "\n";
std::cout << "T: " << "\n" << T << "\n";
// Create zero distortion
cv::Mat distCoeffs(4,1,cv::DataType<double>::type);
distCoeffs.at<double>(0) = 0;
distCoeffs.at<double>(1) = 0;
distCoeffs.at<double>(2) = 0;
distCoeffs.at<double>(3) = 0;
// Creating Rodrigues rotation matrix
cv::Mat rvecR(3,1,cv::DataType<double>::type);
cv::Rodrigues(R,rvecR);
std::vector<cv::Point2f> projectedPoints;
cv::projectPoints(objectPoints, rvecR, T, K, distCoeffs, projectedPoints);
for(unsigned int i=0; i<projectedPoints.size(); i++){
std::cout << "Image point: " << objectPoints[i] << " Projected to " << projectedPoints[i] << "\n";
}
return 0;
}
std::vector<cv::Point3d> set3DPoints() {
std::vector<cv::Point3d> points;
double x,y,z;
x=150;
y=200;
z=350;
points.push_back(cv::Point3d(x,y,z));
return points;
}

The function projectPoints needs the arguments objectPoints and imagePoints of the same type, while you're passing objectPoints as Point3d, and imagePoints as Point2f.
The error is telling you that the two types are different: double != float.
Simply declare projectedPoints as Point2d, so that it has the same type as Point3d:
std::vector<cv::Point2d> projectedPoints;

Image processing : luminance weighted 2

I would like to weigh values of luminance on a new image.
I have an image (5px.jpg) of 5 pixels with these luminance :50,100,150,200,250.
I have a vector of coefficient.
I created a new Mat Z which combine luminance of 5px.jpg and the coefficient.
So, my first value of luminance is 50 (lum[0]=50) and I want it to be applied on the 5.1 (coef[0]=5.1) first pixel of my matrix. To do that, I need to weight the 6th pixel with the first and the second value of luminance. In my case,the luminance of my 6th pixel will be 95 because (0.1*50)+(0.9*100)=95
And so on...
But I do not know why my code does not works.
I had already asked a similar question for a vector here and now, I'm try to adapt to an image.
My picture in input :
My output :
#define MPI 3.14159265358979323846264338327950288419716939937510
#define RAD2DEG (180./MPI)
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <opencv2/opencv.hpp>
#include <iostream>
#include <cmath>
#include <math.h>
#include <string.h>
using namespace cv;
using namespace std;
int main()
{
Mat image = imread("5px.jpg", 1);
if (image.empty())
{
cout << "Couldn't load " << image << endl;
}
else
{
cout << "Image upload, go" << endl;
}
namedWindow("ImageIn", CV_WINDOW_AUTOSIZE);
imshow("ImageIn", image);
Mat imgGrayScale;
cvtColor(image, imgGrayScale, CV_BGR2GRAY);
float *deltaP = new float[imgGrayScale.cols];
float *angle = new float[imgGrayScale.cols];
float *coeff = new float[imgGrayScale.cols];
int col;
for (col = 0; col < imgGrayScale.cols; ++col)
{
//cout << "position x = " << col << endl;
deltaP[col] = imgGrayScale.at<uchar>(0, col);
//cout << "luminance = " << deltaP[col] << endl;
angle[col] = acos(deltaP[col] / 255);
//cout << "angle =" << angle[col] << endl;
coeff[col] = (1 / cos(angle[col]));
cout << "coeff = " << coeff[col] << endl;
}
int width = imgGrayScale.size().width;
int height = imgGrayScale.size().height;
int width2 = width * 5;
int idx_coef = 0;
Mat Z = Mat::zeros(height, width2, CV_8UC1);
//for (int r = 0; r < imgGrayScale.rows; r++)
//{
//cout << "Saut de ligne " << endl << endl << endl;
for (int t = 0; t < imgGrayScale.cols; t++)
{
//cout << "Saut de colonne " << endl;
// Attribue le coeff à une variable
int c = int(coeff[idx_coef]);
//cout << "x" << t << endl;
for (int i = 0; i < c; ++i)
{
Z.at<uchar>(0, c) = imgGrayScale.at<uchar>(0, t);
}
float alpha = fmod(coeff[idx_coef], 1.f);
float beta = 1.f - alpha;
Z.at<uchar>(0, c + 1) = (alpha * imgGrayScale.at<uchar>(0, t) + beta * imgGrayScale.at<uchar>(0, t + 1));
idx_coef++;
coeff[idx_coef] = coeff[idx_coef] - beta;
if (idx_coef >= width - 1)
{
int cc = int(coeff[idx_coef]);
for (int i = 0; i < cc; ++i)
{
Z.at<uchar>(0, c) = imgGrayScale.at<uchar>(0, t);
}
idx_coef = 0;
break;
}
}
//}
namedWindow("m", CV_WINDOW_AUTOSIZE);
imshow("m", Z);
imwrite("lumianacetest.jpg", Z);
int t = waitKey();
if ((char)t == 27)
return 0;
}

You messed up with the indices while accessing the matrix Z. You shoudn't access Z at column c, but you need access the current column (as a vector::push_back would do). So you can keep the current index column in a variable, here idx_z, and increment it every time you access Z
Here your Z is CV_8U, so you lose accuracy since your values are float. You can create Z as CV_32F, and if you need to store values in CV_8U format to save the image, you can convert to CV_8U later, eventually.
The last columns of Z won't be set to any value (so I initialized them with value 0). If you need them to have the last value as in the imgGrayScale, just decomment the relevant part of the code.
Here the code:
#define MPI 3.14159265358979323846264338327950288419716939937510
#define RAD2DEG (180./MPI)
#include <opencv2\opencv.hpp>
#include <vector>
using namespace cv;
using namespace std;
int main()
{
Mat1b imgGrayScale = (Mat1b(2, 5) << 50, 100, 150, 200, 250,
50, 100, 150, 200, 250);
vector<float> deltaP(imgGrayScale.cols);
vector<float> angle(imgGrayScale.cols);
vector<float> coeff(imgGrayScale.cols);
int col;
for (col = 0; col < imgGrayScale.cols; ++col)
{
//cout << "position x = " << col << endl;
deltaP[col] = imgGrayScale.at<uchar>(0, col);
//cout << "luminance = " << deltaP[col] << endl;
angle[col] = acos(deltaP[col] / 255);
//cout << "angle =" << angle[col] << endl;
coeff[col] = (1 / cos(angle[col]));
cout << "coeff = " << coeff[col] << endl;
}
int width = imgGrayScale.size().width;
int height = imgGrayScale.size().height;
int width2 = width * 5;
Mat1f Z(height, width2, 0.f);
for (int r = 0; r < imgGrayScale.rows; r++)
{
int idx_lum = 0;
int idx_coef = 0;
int idx_z = 0;
vector<float> coef = coeff;
// Set all values in Z to the last value in imgGrayScale
Z.row(r) = imgGrayScale(r, imgGrayScale.cols-1);
while (true)
{
int c = int(coef[idx_coef]);
for (int i = 0; i < c; ++i)
{
Z(r, idx_z++) = imgGrayScale(r, idx_lum);
}
float alpha = fmod(coef[idx_coef], 1.f);
float beta = 1.f - alpha;
Z(r, idx_z++) = (alpha * imgGrayScale(r, idx_lum) + beta * imgGrayScale(r, idx_lum + 1));
idx_coef++;
idx_lum++;
coef[idx_coef] = coef[idx_coef] - beta;
if (idx_lum >= imgGrayScale.cols - 1 || idx_coef >= coef.size() - 1)
{
int cc = int(coef[idx_coef]);
for (int i = 0; i < cc; ++i)
{
Z(r, idx_z++) = imgGrayScale(r, idx_lum);
}
idx_coef = 0;
break;
}
}
}
Mat1b ZZ;
Z.convertTo(ZZ, CV_8U);
cout << "Float values:" << endl;
cout << Z << endl << endl;
cout << "Uchar values:" << endl;
cout << ZZ << endl << endl;
namedWindow("m", CV_WINDOW_AUTOSIZE);
imshow("m", Z);
imwrite("lumianacetest.png", ZZ);
waitKey();
return 0;
}

OpenCV's projectPoints function

I'm currently trying to implement a example of OpenCV's projectPoints method. The idea behind this method is taking as input a set of 3D points, translation/rotation vector's of a given camera and its distortion coeficients, output the corresponding 2D points in the image plane.
The source of code is as follows:
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
#include <string>
std::vector<cv::Point3d> Generate3DPoints();
int main(int argc, char* argv[])
{
// Read 3D points
std::vector<cv::Point3d> objectPoints = Generate3DPoints();
std::vector<cv::Point2d> imagePoints;
cv::Mat intrisicMat(3, 3, cv::DataType<double>::type); // Intrisic matrix
intrisicMat.at<double>(0, 0) = 1.6415318549788924e+003;
intrisicMat.at<double>(1, 0) = 0;
intrisicMat.at<double>(2, 0) = 0;
intrisicMat.at<double>(0, 1) = 0;
intrisicMat.at<double>(1, 1) = 1.7067753507885654e+003;
intrisicMat.at<double>(2, 1) = 0;
intrisicMat.at<double>(0, 2) = 5.3262822453148601e+002;
intrisicMat.at<double>(1, 2) = 3.8095355839052968e+002;
intrisicMat.at<double>(2, 2) = 1;
cv::Mat rVec(3, 1, cv::DataType<double>::type); // Rotation vector
rVec.at<double>(0) = -3.9277902400761393e-002;
rVec.at<double>(1) = 3.7803824407602084e-002;
rVec.at<double>(2) = 2.6445674487856268e-002;
cv::Mat tVec(3, 1, cv::DataType<double>::type); // Translation vector
tVec.at<double>(0) = 2.1158489381208221e+000;
tVec.at<double>(1) = -7.6847683212704716e+000;
tVec.at<double>(2) = 2.6169795190294256e+001;
cv::Mat distCoeffs(5, 1, cv::DataType<double>::type); // Distortion vector
distCoeffs.at<double>(0) = -7.9134632415085826e-001;
distCoeffs.at<double>(1) = 1.5623584435644169e+000;
distCoeffs.at<double>(2) = -3.3916502741726508e-002;
distCoeffs.at<double>(3) = -1.3921577146136694e-002;
distCoeffs.at<double>(4) = 1.1430734623697941e+002;
std::cout << "Intrisic matrix: " << intrisicMat << std::endl << std::endl;
std::cout << "Rotation vector: " << rVec << std::endl << std::endl;
std::cout << "Translation vector: " << tVec << std::endl << std::endl;
std::cout << "Distortion coef: " << distCoeffs << std::endl << std::endl;
std::vector<cv::Point2f> projectedPoints;
cv::projectPoints(objectPoints, rVec, tVec, intrisicMat, distCoeffs, projectedPoints);
/*for (unsigned int i = 0; i < projectedPoints.size(); ++i)
{
std::cout << "Image point: " << imagePoints[i] << " Projected to " << projectedPoints[i] << std::endl;
}*/
std::cout << "Press any key to exit.";
std::cin.ignore();
std::cin.get();
return 0;
}
std::vector<cv::Point3d> Generate3DPoints()
{
std::vector<cv::Point3d> points;
double x, y, z;
x = .5; y = .5; z = -.5;
points.push_back(cv::Point3d(x, y, z));
x = .5; y = .5; z = .5;
points.push_back(cv::Point3d(x, y, z));
x = -.5; y = .5; z = .5;
points.push_back(cv::Point3d(x, y, z));
x = -.5; y = .5; z = -.5;
points.push_back(cv::Point3d(x, y, z));
x = .5; y = -.5; z = -.5;
points.push_back(cv::Point3d(x, y, z));
x = -.5; y = -.5; z = -.5;
points.push_back(cv::Point3d(x, y, z));
x = -.5; y = -.5; z = .5;
points.push_back(cv::Point3d(x, y, z));
for(unsigned int i = 0; i < points.size(); ++i)
{
std::cout << points[i] << std::endl << std::endl;
}
return points;
}
The application crashes when I try to run the projectPoints method and I have no idea why. Any help would be greatly appreciated.

It seems to be complaining about the type of the output vector of points. Try to replace your call to projectPoints:
cv::projectPoints(objectPoints, rVec, tVec, intrisicMat, distCoeffs, projectedPoints);
by this call:
cv::projectPoints(objectPoints, rVec, tVec, intrisicMat, distCoeffs, imagePoints);
This uses the variable of type std::vector<cv::Point2d> instead of std::vector<cv::Point2f>.

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

How to find robot TCP in image with CalibrateHandEye - c++

Related

Compute coordinates from source images after stitching

caffe forward net in a for loop not working

OpenCV 3d point projection

Image processing : luminance weighted 2

OpenCV's projectPoints function

Categories

Resources