caffe forward net in a for loop not working - c++

I am currently trying to write a c++ wrapper for PSPNet's prediction (originally in Matlab). PSPNet runs on Caffe.
Situation: I have a trained caffe model, and would like to implement this wrapper to run the segmentation result when given an input. In this case, my crop_size is smaller than it's original size. Thus, it is being cropped manually to multiple 425x425 "frames" and fed forward into caffe net after the pre-processes in a for-loop.
Problem: However, net seems to only be running forward once despite being in a for loop. Supported by its processing time and output, refer below.
This is the incomplete code I am currently trying to work on:
#define USE_OPENCV 1
#define trimapSize 1
#define Debug 0
#include <caffe/caffe.hpp>
#include "Header.h"
#include "caffe/data_reader.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/blob.hpp"
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif // USE_OPENCV
#include <algorithm>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <chrono> //Just for time measurement
#include <cmath>
#include <array>
#include <iostream>
#include <fstream>
#ifdef USE_OPENCV
using namespace caffe; // NOLINT(build/namespaces)
using std::string;
class Classifier {
public:
Classifier(const string& model_file,
const string& trained_file);
cv::Mat Predict(const cv::Mat& img);
private:
void SetMean(int weight, int heigh);
void WrapInputLayer(std::vector<cv::Mat>* input_channels);
cv::Mat Visualization(Blob<float>* output_layer);
cv::Mat Preprocess(const cv::Mat& img_scale, int ori_rows, int ori_cols, std::vector<cv::Mat>* input_channels);
private:
shared_ptr<Net<float> > net_;
cv::Size input_geometry_;
int num_channels_;
cv::Mat mean_;
};
Classifier::Classifier(const string& model_file,
const string& trained_file) {
Caffe::set_mode(Caffe::GPU);
/* Load the network. */
net_.reset(new Net<float>(model_file, TEST));
net_->CopyTrainedLayersFrom(trained_file);
CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";
CHECK_EQ(net_->num_outputs(), 2) << "Network should have exactly one output.";
Blob<float>* input_layer = net_->input_blobs()[0];
num_channels_ = input_layer->channels();
CHECK(num_channels_ == 3 || num_channels_ == 1)
<< "Input layer should have 1 or 3 channels.";
input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
}
/* Create the mean file in binaryproto format. */
void Classifier::SetMean(int weight, int heigh) {
mean_ = cv::Mat(heigh, weight, CV_32FC3);
mean_ = cv::Scalar(94.6744, 88.8887, 100.5404);//RGB
}
cv::Mat Classifier::Predict(const cv::Mat& img) {
cv::Mat originalTmp = img.clone();
Blob<float>* input_layer = net_->input_blobs()[0];
input_layer->Reshape(1, num_channels_,
input_geometry_.height, input_geometry_.width);
std::cout << "input_geometry_.height = " << input_geometry_.height << "input_geometry_.width = "<< input_geometry_.width << std::endl;
/* Forward dimension change to all layers. */
net_->Reshape();
std::vector<cv::Mat> input_channels;
WrapInputLayer(&input_channels);
/*-----------------------------FOR MULTI-SCALE PROCESSING--------------------------*/
int base_size = 0;
int ori_rows = img.rows;
int ori_cols = img.cols;
float scale_array [1] = {1};
// float scale_array = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
std::cout << "ori_rows = " << ori_rows << "\t ori_cols = " << ori_cols << std::endl;
cv::Mat data_all = cv::Mat::zeros(cv::Size(425, 425), CV_32FC3);
if (ori_rows > ori_cols) {
base_size = ori_rows;
}
else base_size = ori_cols;
std::cout << "base_size = " << base_size << std::endl;
std::cout << "size of array = " << (sizeof(scale_array)/sizeof(*scale_array)) << std::endl;
for (int i=0; i < (sizeof(scale_array)/sizeof(*scale_array)); i++){
int long_size = base_size * scale_array[i] + 1;
int new_rows = long_size;
int new_cols = long_size;
std::cout << "BEFORE new rows = " << new_rows << "\t new cols = " << new_cols << std::endl;
if (ori_rows > ori_cols){
new_cols = round(long_size/ori_rows*ori_cols);
}
else {new_rows = round(long_size/ori_cols*ori_rows);}
std::cout << "AFTER new rows = " << new_rows << "\t new cols = " << new_cols << std::endl;
cv::Mat img_scale;
cv::resize(img, img_scale, cv::Size(new_cols, new_rows), 0, 0, CV_INTER_LINEAR);
std::cout << "img_scale height: " << img_scale.rows << "\t width = " << img_scale.cols << std::endl;
cv::imshow("img_scale",img_scale);
cv::waitKey(0);
data_all = data_all + Preprocess(img_scale, ori_rows, ori_cols, &input_channels);
std::cout << "ok! DONE PREPROCESS!" << std::endl;
}
return data_all;
}
cv::Mat Classifier::Preprocess(const cv::Mat& img_scale, int ori_rows, int ori_cols, std::vector<cv::Mat>* input_channels)
{
int crop_size = 425;
int new_rows = img_scale.rows;
int new_cols = img_scale.cols;
cv::Mat data_output = cv::Mat::zeros(cv::Size(ori_cols, ori_rows), CV_32FC3);
int long_size = new_rows;
cv::Mat img_processed;
if (new_cols > new_rows){
long_size = new_cols;
}
if (long_size <= crop_size){
// img_processed = Preprocess(img_scale, &input_channels);
//RUN CAFFE --- NOT YET DONE ---
std::cout << "OK!" << std::endl;
}
else {
float stride_rate = 2.0/3.0;
std::cout << "stride_rate = " << stride_rate << std::endl;
int stride = ceil(crop_size*stride_rate);
std::cout << "stride = " << stride << std::endl;
cv::Mat img_pad = img_scale;
int pad_rows = img_pad.rows;
int pad_cols = img_pad.cols;
int h_grid = ceil((pad_rows - crop_size)/stride) + 1;
int w_grid = ceil((pad_cols - crop_size)/stride) + 1;
cv::Mat img_sub;
cv::Mat data_scale = cv::Mat::zeros(cv::Size(pad_cols, pad_cols), CV_32FC3);
for(int grid_yidx = 1; grid_yidx <= h_grid; grid_yidx++){
for (int grid_xidx = 1; grid_xidx <= w_grid; grid_xidx++){
int s_x = (grid_xidx-1)*stride+1;
int s_y = (grid_yidx-1)*stride+1;
int e_x = std::min(s_x + crop_size -1, pad_cols);
int e_y = std::min(s_y + crop_size -1, pad_rows);
s_x = e_x - crop_size + 1;
s_y = e_y - crop_size + 1;
/* Cropping image */
img_pad(cv::Rect(s_x,s_y,crop_size,crop_size)).copyTo(img_sub);
cv::Mat sample;
if (img_sub.channels() == 3 && num_channels_ == 1)
cv::cvtColor(img_sub, sample, cv::COLOR_BGR2GRAY);
else if (img_sub.channels() == 4 && num_channels_ == 1)
cv::cvtColor(img_sub, sample, cv::COLOR_BGRA2GRAY);
else if (img_sub.channels() == 4 && num_channels_ == 3)
cv::cvtColor(img_sub, sample, cv::COLOR_BGRA2BGR);
else if (img_sub.channels() == 1 && num_channels_ == 3)
cv::cvtColor(img_sub, sample, cv::COLOR_GRAY2BGR);
else
sample = img_sub;
cv::Mat sample_float;
if (num_channels_ == 3)
sample.convertTo(sample_float, CV_32FC3);
else
sample.convertTo(sample_float, CV_32FC1);
SetMean(sample.rows, sample.cols);
cv::imshow("sample_float", sample_float);
cv::cvtColor(sample_float, sample_float, cv::COLOR_BGRA2RGB);
sample_float = sample_float.t();
cv::Mat sample_normalized(sample_float.size(),sample_float.type());
cv::subtract(sample_float.clone(), mean_, sample_normalized);
cv::Mat sample_temp;
sample_normalized.convertTo(sample_temp, CV_32FC3, 255);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/sample_normalized.png", sample_temp);
cv::imshow("sample_normalized", sample_normalized);
cv::waitKey(0);
/* This operation will write the separate BGR planes directly to the
* input layer of the network because it is wrapped by the cv::Mat
* objects in input_channels. */
img_processed = sample_normalized.t();
cv::split(img_processed, *input_channels);
CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
== net_->input_blobs()[0]->cpu_data())
<< "Input channels are not wrapping the input layer of the network.";
img_processed.convertTo(sample_temp, CV_32FC3, 255);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/img_processed.png", sample_temp);
cv::imshow("img_normalised",img_processed);
cv::waitKey();
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); //Just for time measurement
// float loss = 0.0;
// net_->Forward(&loss);
net_->Forward();
std::chrono::steady_clock::time_point end= std::chrono::steady_clock::now();
std::cout << "Processing time = " << (std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count())/1000000.0 << " sec" <<std::endl; //Just for time measurement
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
cv::Mat segment = Visualization(output_layer);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/segment.png", segment);
}
}
}
return (img_processed);
}
struct RGB {
int R;
int G;
int B;
};
vector<RGB> get_palette(int nClass)
{
vector<RGB> listPlalette;
RGB rgb0;
rgb0.R = 0;
rgb0.G = 0;
rgb0.B = 0;
listPlalette.push_back(rgb0);
for (int i = 1; i < nClass; i++)
{
RGB rgb;
rgb.R = i*50;
rgb.G = i*50 + i;
rgb.B = 255-i*20;
listPlalette.push_back(rgb);
}
return listPlalette;
}
cv::Mat Classifier::Visualization(Blob<float>* output_layer) {
std::vector<cv::Mat> input_channels;
int H = output_layer->height();
int W = output_layer->width();
// int N = output_layer->num(); //Batch Size
int C = output_layer->channels(); //Number of classes
int index = 0;
#ifdef CPU_ONLY
const float* output_data = output_layer->cpu_data();
#else
const float* output_data = output_layer->cpu_data();
#endif // !CPU_ONLY
cv::Mat class_each_row(C, W*H, CV_32F);
for (int i = 0; i < C; i++) {
for (int j = 0; j < (W*H); j++) {
class_each_row.at<float>(i, j) = output_data[index];
index = index + 1;
}
}
class_each_row = class_each_row.t();
//==================================CONVERT INTO LABELS==================================//
float maxValue = 0;
int* labelIndex = (int*)malloc(W*H * sizeof(int));
int indexX = 0;
for (int i = 0; i < class_each_row.rows; i++) {
maxValue = -999999999999;
indexX = 0;
for (int k = 0; k < C; k++)
{
float dataM = class_each_row.at<float>(i, k);
if (dataM > maxValue) {
maxValue = dataM;
indexX = k;
}
}
labelIndex[i] = indexX;
}
cv::Mat labelTmp(W, H, CV_8UC3);
uchar* dataLabelTmp = labelTmp.data;
vector<RGB> listPalette = get_palette(21);
for (int i = 0; i < H; i++)
{
for (int j = 0; j < W; j++)
{
RGB rgb = listPalette[labelIndex[(i*W + j)]];
dataLabelTmp[3 * (i*W + j)] = rgb.B;
dataLabelTmp[3 * (i*W + j) + 1] = rgb.G;
dataLabelTmp[3 * (i*W + j) + 2] = rgb.R;
}
}
cv::imshow( "Display window", labelTmp);
cv::waitKey(0);
free(labelIndex);
labelIndex = NULL;
return labelTmp;
}
/* Wrap the input layer of the network in separate cv::Mat objects
* (one per channel). This way we save one memcpy operation and we
* don't need to rely on cudaMemcpy2D. The last preprocessing
* operation will write the separate channels directly to the input
* layer. */
void Classifier::WrapInputLayer(std::vector<cv::Mat>* input_channels) {
Blob<float>* input_layer = net_->input_blobs()[0];
int width = input_layer->width();
int height = input_layer->height();
float* input_data = input_layer->mutable_cpu_data();
for (int i = 0; i < input_layer->channels(); ++i) {
cv::Mat channel(height, width, CV_32FC1, input_data);
input_channels->push_back(channel);
input_data += width * height;
}
}
int main(int argc, char** argv) {
if (argc != 4) {
std::cerr << "Usage: " << argv[0]
<< " \ndeploy.prototxt \nnetwork.caffemodel"
<< " \nimg.jpg" << " \ncamvid12.png (for example: /SegNet-Tutorial/Scripts/camvid12.png)" << std::endl;
return 1;
}
::google::InitGoogleLogging(argv[0]);
string model_file = argv[1];
string trained_file = argv[2]; //for visualization
Classifier classifier(model_file, trained_file);
string file = argv[3];
std::cout << "---------- Semantic Segmentation for "
<< file << " ----------" << std::endl;
cv::Mat img = cv::imread(file, 1);
CHECK(!img.empty()) << "Unable to decode image " << file;
cv::Mat prediction;
classifier.Predict(img);
}
#else
int main(int argc, char** argv) {
LOG(FATAL) << "This example requires OpenCV; compile with USE_OPENCV.";
}
#endif //USE_OPENCV
To clarify: The for-loop refers to the one in pre-process: specifically this portion:
for(int grid_yidx = 1; grid_yidx <= h_grid; grid_yidx++){
for (int grid_xidx = 1; grid_xidx <= w_grid; grid_xidx++){
int s_x = (grid_xidx-1)*stride+1;
int s_y = (grid_yidx-1)*stride+1;
int e_x = std::min(s_x + crop_size -1, pad_cols);
int e_y = std::min(s_y + crop_size -1, pad_rows);
s_x = e_x - crop_size + 1;
s_y = e_y - crop_size + 1;
/* Cropping image */
img_pad(cv::Rect(s_x,s_y,crop_size,crop_size)).copyTo(img_sub);
cv::Mat sample;
if (img_sub.channels() == 3 && num_channels_ == 1)
cv::cvtColor(img_sub, sample, cv::COLOR_BGR2GRAY);
else if (img_sub.channels() == 4 && num_channels_ == 1)
cv::cvtColor(img_sub, sample, cv::COLOR_BGRA2GRAY);
else if (img_sub.channels() == 4 && num_channels_ == 3)
cv::cvtColor(img_sub, sample, cv::COLOR_BGRA2BGR);
else if (img_sub.channels() == 1 && num_channels_ == 3)
cv::cvtColor(img_sub, sample, cv::COLOR_GRAY2BGR);
else
sample = img_sub;
cv::Mat sample_float;
if (num_channels_ == 3)
sample.convertTo(sample_float, CV_32FC3);
else
sample.convertTo(sample_float, CV_32FC1);
SetMean(sample.rows, sample.cols);
cv::imshow("sample_float", sample_float);
cv::cvtColor(sample_float, sample_float, cv::COLOR_BGRA2RGB);
sample_float = sample_float.t();
cv::Mat sample_normalized(sample_float.size(),sample_float.type());
cv::subtract(sample_float.clone(), mean_, sample_normalized);
cv::Mat sample_temp;
sample_normalized.convertTo(sample_temp, CV_32FC3, 255);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/sample_normalized.png", sample_temp);
cv::imshow("sample_normalized", sample_normalized);
cv::waitKey(0);
/* This operation will write the separate BGR planes directly to the
* input layer of the network because it is wrapped by the cv::Mat
* objects in input_channels. */
img_processed = sample_normalized.t();
cv::split(img_processed, *input_channels);
CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
== net_->input_blobs()[0]->cpu_data())
<< "Input channels are not wrapping the input layer of the network.";
img_processed.convertTo(sample_temp, CV_32FC3, 255);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/img_processed.png", sample_temp);
cv::imshow("img_normalised",img_processed);
cv::waitKey();
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); //Just for time measurement
// float loss = 0.0;
// net_->Forward(&loss);
net_->Forward();
std::chrono::steady_clock::time_point end= std::chrono::steady_clock::now();
std::cout << "Processing time = " << (std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count())/1000000.0 << " sec" <<std::endl; //Just for time measurement
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
cv::Mat segment = Visualization(output_layer);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/segment.png", segment);
}
}
Original Image:Original Image (Without pre-processing)
Input: Input (first cropped frame)
Output: Output of the first cropped frame
Time taken for forwarding: Time taken
Following cropped frame gives the same output through out.
P/s: If i shift the code below to the end of predict function and return segment instead, it will work well. But only the last cropped frame will be segmented.
std::chrono::steady_clock::time_point begin =
std::chrono::steady_clock::now(); //Just for time measurement
// float loss = 0.0;
// net_->Forward(&loss);
net_->Forward();
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
std::cout << "Processing time = " << (std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count())/1000000.0 << " sec" <<std::endl; //Just for time measurement
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
cv::Mat segment = Visualization(output_layer);
cv::imwrite("/home/sgp1053c/Desktop/PSPNET-cudnn5_wrapper/wrapper/segment.png", segment);`
input: Input (Last cropped frame of pre-processed image)
output: Output of the last cropped frame
Any help will be appreciated, thank youuuuu!!!

This issue is solved by wrapping the input channel each time it is changed so that the input will be fed forward correctly.
Thus the function:
WrapInputLayer(input_channels);
should be called in the double for loop.

Related

Issues in eliminate multiple bounding box in YoloV5 using C++ for custom model

I tried to perform object detection using the yolov5 model with c++. I have a custom-trained yolov5 model which is working perfectly fine in python but my whole complete setup is in c++ thereby I have to switch. So I have converted the yolov5s model into ONNX format and tried to run it as by "https://github.com/doleron/yolov4-opencv-cpp-python"1. Unfortunately, I'm getting multiple bounding boxes in the top left corner as in the image.
I don't know how to eliminate this kind of error, but when I used the inbuilt pre-train yolov5s model the c++ code is detecting and worked perfectly. Similarly, when I used the custom-trained model in python it's working perfectly.
Here is my c++ code for object detection using c++
#include <fstream>
#include <opencv2/opencv.hpp>
std::vector<std::string> load_class_list()
{
std::vector<std::string> class_list;
std::ifstream ifs("config_files/classes.txt");
std::string line;
while (getline(ifs, line))
{
class_list.push_back(line);
}
return class_list;
}
void load_net(cv::dnn::Net &net, bool is_cuda)
{
auto result = cv::dnn::readNet("config_files/yolov5s_custom.onnx");
if (is_cuda)
{
std::cout << "Attempty to use CUDA\n";
result.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
result.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA_FP16);
}
else
{
std::cout << "Running on CPU\n";
result.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
result.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
net = result;
}
const std::vector<cv::Scalar> colors = {cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 255, 255), cv::Scalar(255, 0, 0)};
const float INPUT_WIDTH = 640.0;
const float INPUT_HEIGHT = 640.0;
const float SCORE_THRESHOLD = 0.2;
const float NMS_THRESHOLD = 0.4;
const float CONFIDENCE_THRESHOLD = 0.4;
struct Detection
{
int class_id;
float confidence;
cv::Rect box;
};
cv::Mat format_yolov5(const cv::Mat &source) {
int col = source.cols;
int row = source.rows;
int _max = MAX(col, row);
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
source.copyTo(result(cv::Rect(0, 0, col, row)));
return result;
}
void detect(cv::Mat &image, cv::dnn::Net &net, std::vector<Detection> &output, const std::vector<std::string> &className) {
cv::Mat blob;
auto input_image = format_yolov5(image);
cv::dnn::blobFromImage(input_image, blob, 1./255., cv::Size(INPUT_WIDTH, INPUT_HEIGHT), cv::Scalar(), true, false);
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
float x_factor = input_image.cols / INPUT_WIDTH;
float y_factor = input_image.rows / INPUT_HEIGHT;
float *data = (float *)outputs[0].data;
const int dimensions = 85;
const int rows = 25200;
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
for (int i = 0; i < rows; ++i) {
float confidence = data[4];
if (confidence >= CONFIDENCE_THRESHOLD) {
float * classes_scores = data + 5;
cv::Mat scores(1, className.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > SCORE_THRESHOLD) {
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
data += 85;
}
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
for (int i = 0; i < nms_result.size(); i++) {
int idx = nms_result[i];
Detection result;
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
result.box = boxes[idx];
output.push_back(result);
}
}
int main(int argc, char **argv)
{
std::vector<std::string> class_list = load_class_list();
cv::Mat frame;
cv::VideoCapture capture("sample.mp4");
if (!capture.isOpened())
{
std::cerr << "Error opening video file\n";
return -1;
}
bool is_cuda = argc > 1 && strcmp(argv[1], "cuda") == 0;
cv::dnn::Net net;
load_net(net, is_cuda);
auto start = std::chrono::high_resolution_clock::now();
int frame_count = 0;
float fps = -1;
int total_frames = 0;
while (true)
{
capture.read(frame);
if (frame.empty())
{
std::cout << "End of stream\n";
break;
}
std::vector<Detection> output;
detect(frame, net, output, class_list);
frame_count++;
total_frames++;
int detections = output.size();
for (int i = 0; i < detections; ++i)
{
auto detection = output[i];
auto box = detection.box;
auto classId = detection.class_id;
const auto color = colors[classId % colors.size()];
cv::rectangle(frame, box, color, 3);
cv::rectangle(frame, cv::Point(box.x, box.y - 20), cv::Point(box.x + box.width, box.y), color, cv::FILLED);
cv::putText(frame, class_list[classId].c_str(), cv::Point(box.x, box.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
if (frame_count >= 30)
{
auto end = std::chrono::high_resolution_clock::now();
fps = frame_count * 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
frame_count = 0;
start = std::chrono::high_resolution_clock::now();
}
if (fps > 0)
{
std::ostringstream fps_label;
fps_label << std::fixed << std::setprecision(2);
fps_label << "FPS: " << fps;
std::string fps_label_str = fps_label.str();
cv::putText(frame, fps_label_str.c_str(), cv::Point(10, 25), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2);
}
cv::imshow("output", frame);
if (cv::waitKey(1) != -1)
{
capture.release();
std::cout << "finished by user\n";
break;
}
}
std::cout << "Total frames: " << total_frames << "\n";
return 0;
}
Kindly guide me on how to eliminate these multiple boxes on the output video stream.

fisheyestereocalibration opencv 4.5.5 crashes

i’m desperatly trying to get my fisheye stereo camera working. I've got 2 210° cameras.
So far i’ve tried this code, it doesn’t work with his or mine images.
Whatever i do, it always crashes at cv::fisheye::stereoCalibrate with the error: OpenCV(4.5.5) Error: Assertion failed (abs_max < threshold) in cv::fisheye::stereoCalibrate, file ...\opencv-4.5.5\modules\calib3d\src\fisheye.cpp, line 1051. It tells me the error is in fisheye.cpp # line 1051. I put a breakpoint there to check if the condition comes true... in the first 2 loops abs_max<threshold is true, after the third loop it's false. Why does my code crash then?
I’ve tried different PCs with different openCV configurations (always 4.5.5).
I’ve tried to import stereo-parameters from MATLAB, that crashes as well, no matter if i use cv::fisheye:stereoCalibrate or cv::stereoCalibrate.
i appreciate your help :)
The Code:
#include <opencv2/core/core.hpp>
#include <opencv2/calib3d/calib3d.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <stdio.h>
#include <filesystem>
#include <string>
#include <vector>
#include <string>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
using namespace std;
using namespace cv;
static int print_help(char** argv)
{
cout <<
" Given a list of chessboard images, the number of corners (nx, ny)\n"
" on the chessboards, and a flag: useCalibrated for \n"
" calibrated (0) or\n"
" uncalibrated \n"
" (1: use stereoCalibrate(), 2: compute fundamental\n"
" matrix separately) stereo. \n"
" Calibrate the cameras and display the\n"
" rectified results along with the computed disparity images. \n" << endl;
cout << "Usage:\n " << argv[0] << " -w=<board_width default=9> -h=<board_height default=6> -s=<square_size default=1.0> <image list XML/YML file default=stereo_calib.xml>\n" << endl;
return 0;
}
static void StereoCalib(const vector<string>& imagelist, Size boardSize, float squareSize, bool displayCorners = false, bool useCalibrated = true, bool showRectified = true)
{
if (imagelist.size() % 2 != 0)
{
cout << "Error: the image list contains odd (non-even) number of elements\n";
return;
}
const int maxScale = 2;
// ARRAY AND VECTOR STORAGE:
vector<vector<Vec2f> > imagePoints[2];
vector<vector<Vec3f> > objectPoints;
Size imageSize;
int i, j, k, nimages = (int)imagelist.size() / 2;
imagePoints[0].resize(nimages);
imagePoints[1].resize(nimages);
vector<string> goodImageList;
for (i = j = 0; i < nimages; i++)
{
for (k = 0; k < 2; k++)
{
const string& filename = imagelist[i * 2 + k];
Mat img = imread(filename, 0);
if (img.empty())
break;
if (imageSize == Size())
imageSize = img.size();
else if (img.size() != imageSize)
{
cout << "The image " << filename << " has the size different from the first image size. Skipping the pair\n";
break;
}
bool found = false;
vector<Vec2f>& corners = imagePoints[k][j];
for (int scale = 1; scale <= maxScale; scale++)
{
Mat timg;
if (scale == 1)
timg = img;
else
resize(img, timg, Size(), scale, scale, INTER_LINEAR_EXACT);
found = findChessboardCorners(timg, boardSize, corners,
CALIB_CB_ADAPTIVE_THRESH | CALIB_CB_NORMALIZE_IMAGE);
if (found)
{
if (scale > 1)
{
Mat cornersMat(corners);
cornersMat *= 1. / scale;
}
break;
}
}
if (displayCorners)
{
cout << filename << endl;
Mat cimg, cimg1;
cvtColor(img, cimg, COLOR_GRAY2BGR);
drawChessboardCorners(cimg, boardSize, corners, found);
double sf = 640. / MAX(img.rows, img.cols);
resize(cimg, cimg1, Size(), sf, sf, INTER_LINEAR_EXACT);
imshow("corners", cimg1);
char c = (char)waitKey(500);
if (c == 27 || c == 'q' || c == 'Q') //Allow ESC to quit
exit(-1);
}
else
putchar('.');
if (!found)
break;
cornerSubPix(img, corners, Size(11, 11), Size(-1, -1),
TermCriteria(TermCriteria::COUNT + TermCriteria::EPS,
30, 0.01));
}
if (k == 2)
{
goodImageList.push_back(imagelist[i * 2]);
goodImageList.push_back(imagelist[i * 2 + 1]);
j++;
}
}
cout << j << " pairs have been successfully detected.\n";
nimages = j;
if (nimages < 2)
{
cout << "Error: too little pairs to run the calibration\n";
return;
}
imagePoints[0].resize(nimages);
imagePoints[1].resize(nimages);
objectPoints.resize(nimages);
for (i = 0; i < nimages; i++)
{
for (j = 0; j < boardSize.height; j++)
for (k = 0; k < boardSize.width; k++)
objectPoints[i].push_back(Point3f(k * squareSize, j * squareSize, 0));
}
/*TEST mit richtigem Format an Vector Punkte*/
cv::FileStorage fs("omni_calib_data.xml", cv::FileStorage::READ);
std::vector<cv::Mat> objectPoints1;
std::vector<cv::Mat> imagePoints1;
cv::Size imgSize;
fs["objectPoints"] >> objectPoints1;
fs["imagePoints"] >> imagePoints1;
fs["imageSize"] >> imgSize;
cout << "Running stereo calibration ...\n";
Mat testL = imread("C:/Users/Moritz/Documents/Flexybot/Code/fisheyeStereoCalibration/imgs/left2.jpg"),
testR = imread("C:/Users/Moritz/Documents/Flexybot/Code/fisheyeStereoCalibration/imgs/right2.jpg");
printf("Starting Calibration\n");
cv::Matx33d K1, K2, R;
cv::Vec3d T;
cv::Vec4d D1, D2;
int flag = 0;
flag |= cv::fisheye::CALIB_RECOMPUTE_EXTRINSIC;
flag |= cv::fisheye::CALIB_CHECK_COND;
flag |= cv::fisheye::CALIB_FIX_SKEW;
//flag |= cv::fisheye::CALIB_FIX_K2;
//flag |= cv::fisheye::CALIB_FIX_K3;
//flag |= cv::fisheye::CALIB_FIX_K4;
cv::fisheye::stereoCalibrate(objectPoints, imagePoints[0], imagePoints[1],
K1, D1, K2, D2, testR.size(), R, T,0,
cv::TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 100, DBL_EPSILON));
cv::FileStorage fs1("stereo_params.xml", cv::FileStorage::WRITE);
fs1 << "K1" << Mat(K1);
fs1 << "K2" << Mat(K2);
fs1 << "D1" << D1;
fs1 << "D2" << D2;
fs1 << "R" << Mat(R);
fs1 << "T" << T;
printf("Done Calibration\n");
printf("Starting Rectification\n");
cv::Mat R1, R2, P1, P2, Q;
cv::fisheye::stereoRectify(K1, D1, K2, D2, testR.size(), R, T, R1, R2, P1, P2,
Q, CALIB_ZERO_DISPARITY, testR.size(), 0.0, 1.1);
fs1 << "R1" << R1;
fs1 << "R2" << R2;
fs1 << "P1" << P1;
fs1 << "P2" << P2;
fs1 << "Q" << Q;
printf("Done Rectification\n");
Mat rL, rR, tL, tR, kL, dL, kR, dR,pL,pR;
fisheye::calibrate(objectPoints, imagePoints[0], testL.size(), kL, dL, rL, tL, fisheye::CALIB_ZERO_DISPARITY);
fisheye::calibrate(objectPoints, imagePoints[1], testR.size(), kR, dR, rR, tR, fisheye::CALIB_ZERO_DISPARITY);
fisheye::estimateNewCameraMatrixForUndistortRectify(kL, dL, testL.size(), Matx33d::eye(), pL, 1);
fisheye::estimateNewCameraMatrixForUndistortRectify(kR, dR, testR.size(), Matx33d::eye(), pR, 1);
cv::fisheye::undistortImage(testL, testL, kL, dL,pL);
cv::fisheye::undistortImage(testR, testR, kR, dR,pR);
imshow("Links", testL);
imshow("Rechts", testR);
waitKey(0);
return;
}
static bool readStringList(const string& filename, vector<string>& l)
{
l.resize(0);
FileStorage fs(filename, FileStorage::READ);
if (!fs.isOpened())
return false;
FileNode n = fs.getFirstTopLevelNode();
if (n.type() != FileNode::SEQ)
return false;
FileNodeIterator it = n.begin(), it_end = n.end();
for (; it != it_end; ++it)
l.push_back((string)*it);
return true;
}
int main(int argc, char** argv)
{
Size boardSize;
string imagelistfn;
bool showRectified;
cv::CommandLineParser parser(argc, argv, "{w|9|}{h|6|}{s|20.0|}{nr||}{help||}{#input|stereo_calib.xml|}");
if (parser.has("help"))
return print_help(argv);
showRectified = !parser.has("nr");
imagelistfn = samples::findFile(parser.get<string>("#input"));
boardSize.width = parser.get<int>("w");
boardSize.height = parser.get<int>("h");
float squareSize = parser.get<float>("s");
if (!parser.check())
{
parser.printErrors();
return 1;
}
vector<string> imagelist;
bool ok = readStringList(imagelistfn, imagelist);
if (!ok || imagelist.empty())
{
cout << "can not open " << imagelistfn << " or the string list is empty" << endl;
return print_help(argv);
}
StereoCalib(imagelist, boardSize, squareSize, true, true, showRectified);
return 0;
}

How to find robot TCP in image with CalibrateHandEye

I'm currently developing a programm for a robot with a camera attached to the end affector.
The goal is to calculate where the robots TCP appears in the camera frame. I'm using opencv in c++.
The robot is a UR5 from Universal Robots.
My plan:
collect multiple (8) data-sets:
robot pose (XYZ in meters, directly from the robot controller)
robot rotation (rx ry rz in radians, directly from the robot controller)
take a picture of calibration pattern for each step
run calibrateCamera over the set of pictures to get tvec and rvec for every step
run calibrateHandEye
for t_gripper2base i use the robot pose
for R_gripper2base i use the robot rotation
for t_target2cam i use tvec from calibrateCamera
for R_target2cam i use rvec from calibrateCamera
I seem to get correct values (I measured the distance from cam to TCP and the t_cam2gripper seems to be correct.
Translation vector target to cam:
[-0.0001052803107026547;
-0.0780872727019615;
-0.1243323507069755]
Rotation matrix target to cam:
[0.9999922523048892, 0.002655868335207422, -0.002905459271957709;
-0.001229768871633805, 0.9119334002787367, 0.4103363999508009;
0.003739384804660116, -0.4103296477461107, 0.9119296010009958]
The formula to transform the point from TCP coordinates to the image should look like this:
(u,v,w) = C * T * (X,Y,Z,1)
But after the division by w my values are still way off (should be around (600,600)
Actual image position vector homogenized:
[1778.542462313536;
-1626.72483032188;
1]
#include <QCoreApplication>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
int main(int argc, char *argv[])
{
QCoreApplication a(argc, argv);
Mat defaultCameraMatrix = (Mat_<double>(3, 3));
Mat defaultDistortion = (Mat_<double>(1, 5));
defaultCameraMatrix.at<double>(0, 0) = 1739.3749; // default values from previous intrinsic camera calibration
defaultCameraMatrix.at<double>(0, 1) = 0;
defaultCameraMatrix.at<double>(0, 2) = 639.5;
defaultCameraMatrix.at<double>(1, 0) = 0;
defaultCameraMatrix.at<double>(1, 1) = 1739.3749;
defaultCameraMatrix.at<double>(1, 2) = 479.5;
defaultCameraMatrix.at<double>(2, 0) = 0;
defaultCameraMatrix.at<double>(2, 1) = 0;
defaultCameraMatrix.at<double>(2, 2) = 1;
defaultDistortion.at<double>(0, 0) = -0.165909;
defaultDistortion.at<double>(0, 1) = 0.303675;
defaultDistortion.at<double>(0, 2) = 0.0;
defaultDistortion.at<double>(0, 3) = 0.0;
defaultDistortion.at<double>(0, 4) = 0.0;
vector<Mat> R_gripper2base, t_gripper2base, R_target2cam, t_target2cam;
Mat actualRobotPos1 = (Mat_<double>(3, 1)),
actualRobotPos2 = (Mat_<double>(3, 1)),
actualRobotPos3 = (Mat_<double>(3, 1)),
actualRobotPos4 = (Mat_<double>(3, 1)),
actualRobotPos5 = (Mat_<double>(3, 1)),
actualRobotPos6 = (Mat_<double>(3, 1)),
actualRobotPos7 = (Mat_<double>(3, 1)),
actualRobotPos8 = (Mat_<double>(3, 1));
actualRobotPos1.at<double>(0,0) = -0.193139;
actualRobotPos1.at<double>(1,0) = 0.463823;
actualRobotPos1.at<double>(2,0) = -0.025;
t_gripper2base.push_back(actualRobotPos1);
actualRobotPos2.at<double>(0,0) = -0.193139;
actualRobotPos2.at<double>(1,0) = 0.463823;
actualRobotPos2.at<double>(2,0) = -0.025;
t_gripper2base.push_back(actualRobotPos2);
actualRobotPos3.at<double>(0,0) = -0.21273;
actualRobotPos3.at<double>(1,0) = 0.4426;
actualRobotPos3.at<double>(2,0) = -0.0288;
t_gripper2base.push_back(actualRobotPos3);
actualRobotPos4.at<double>(0,0) = -0.17213;
actualRobotPos4.at<double>(1,0) = 0.4103;
actualRobotPos4.at<double>(2,0) = 0.014;
t_gripper2base.push_back(actualRobotPos4);
actualRobotPos5.at<double>(0,0) = -0.13724;
actualRobotPos5.at<double>(1,0) = 0.45;
actualRobotPos5.at<double>(2,0) = 0.02978;
t_gripper2base.push_back(actualRobotPos5);
actualRobotPos6.at<double>(0,0) = -0.1655;
actualRobotPos6.at<double>(1,0) = 0.478;
actualRobotPos6.at<double>(2,0) = -0.0211;
t_gripper2base.push_back(actualRobotPos6);
actualRobotPos7.at<double>(0,0) = -0.17018;
actualRobotPos7.at<double>(1,0) = 0.46458;
actualRobotPos7.at<double>(2,0) = -0.03761;
t_gripper2base.push_back(actualRobotPos7);
actualRobotPos8.at<double>(0,0) = -0.193139;
actualRobotPos8.at<double>(1,0) = 0.463823;
actualRobotPos8.at<double>(2,0) = 0.025;
t_gripper2base.push_back(actualRobotPos8);
Mat actualRobotRotVec1 = (Mat_<double>(3, 1)),
actualRobotRotVec2 = (Mat_<double>(3, 1)),
actualRobotRotVec3 = (Mat_<double>(3, 1)),
actualRobotRotVec4 = (Mat_<double>(3, 1)),
actualRobotRotVec5 = (Mat_<double>(3, 1)),
actualRobotRotVec6 = (Mat_<double>(3, 1)),
actualRobotRotVec7 = (Mat_<double>(3, 1)),
actualRobotRotVec8 = (Mat_<double>(3, 1));
actualRobotRotVec1.at<double>(0,0) = -3.14159;
actualRobotRotVec1.at<double>(1,0) = 0.00;
actualRobotRotVec1.at<double>(2,0) = 0.00719124;
R_gripper2base.push_back(actualRobotRotVec1);
actualRobotRotVec2.at<double>(0,0) = -2.06;
actualRobotRotVec2.at<double>(1,0) = -2.36;
actualRobotRotVec2.at<double>(2,0) = 0.03;
R_gripper2base.push_back(actualRobotRotVec2);
actualRobotRotVec3.at<double>(0,0) = 2.39;
actualRobotRotVec3.at<double>(1,0) = 1.86;
actualRobotRotVec3.at<double>(2,0) = 0.49;
R_gripper2base.push_back(actualRobotRotVec3);
actualRobotRotVec4.at<double>(0,0) = -2.66;
actualRobotRotVec4.at<double>(1,0) = 0.08;
actualRobotRotVec4.at<double>(2,0) = 0.09;
R_gripper2base.push_back(actualRobotRotVec4);
actualRobotRotVec5.at<double>(0,0) = -2.84;
actualRobotRotVec5.at<double>(1,0) = 0.19;
actualRobotRotVec5.at<double>(2,0) = 0.69;
R_gripper2base.push_back(actualRobotRotVec5);
actualRobotRotVec6.at<double>(0,0) = 2.1;
actualRobotRotVec6.at<double>(1,0) = -2.34;
actualRobotRotVec6.at<double>(2,0) = -0.02;
R_gripper2base.push_back(actualRobotRotVec6);
actualRobotRotVec7.at<double>(0,0) = 1.66;
actualRobotRotVec7.at<double>(1,0) = -2.53;
actualRobotRotVec7.at<double>(2,0) = -0.23;
R_gripper2base.push_back(actualRobotRotVec7);
actualRobotRotVec8.at<double>(0,0) = -3.14159;
actualRobotRotVec8.at<double>(1,0) = 0.00;
actualRobotRotVec8.at<double>(2,0) = 0.00719124;
R_gripper2base.push_back(actualRobotRotVec8);
// for(int i = 0; i < t_gripper2base.size(); i++)
// {
// cout << t_gripper2base[i] << endl << endl;
// }
// for(int i = 0; i < R_gripper2base.size(); i++)
// {
// cout << R_gripper2base[i] << endl << endl;
// }
vector<String> fileNames;
glob("PATH*.png", fileNames, false); // directory of images
vector<vector<Point2f>> corners(fileNames.size());
Mat chessboardImg, chessboardImgGray;
vector<Point3f> objp;
vector<vector<Point3f>> worldCoordinates;
int checkerBoard[2] = {9,6};
double fieldSize = 0.008;
Mat cameraMatrixHandEye, distortionHandEye;
vector<Mat> rvecs, tvecs;
for(int i = 1; i < checkerBoard[1]; i++){
for(int j = 1; j < checkerBoard[0]; j++){
objp.push_back(Point3f(j*fieldSize, i*fieldSize, 0));
}
}
for(int i = 0; i < 8; i++)
{
chessboardImg = imread(fileNames[i]);
cvtColor(chessboardImg, chessboardImgGray, COLOR_BGR2GRAY);
bool patternFound = findChessboardCorners(chessboardImgGray, Size(8,5), corners[i], CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE);
if(patternFound)
{
cornerSubPix(chessboardImgGray, corners[i],Size(11,11), Size(-1,-1), TermCriteria(TermCriteria::EPS + TermCriteria::MAX_ITER, 30, 0.1));
drawChessboardCorners(chessboardImg, Size(8,5), corners[i], patternFound);
worldCoordinates.push_back(objp);
}
//***** Check loaded images and detected chessboard *****
//imshow("source", chessboardImgGray);
//imshow("chess", chessboardImg);
//waitKey(0);
//*******************************************************
}
float reprojectionError = calibrateCamera(worldCoordinates, corners, Size(1280,960), cameraMatrixHandEye, distortionHandEye, rvecs, tvecs, CALIB_FIX_ASPECT_RATIO + CALIB_FIX_K3 +
CALIB_ZERO_TANGENT_DIST + CALIB_FIX_PRINCIPAL_POINT);
//***** Check camera calibration results *****
//cout << "Reprojection Error CHE: " << endl << reprojectionError << endl << endl;
//cout << "Camera Matrix CHE: " << endl << cameraMatrixHandEye << endl << endl;
//cout << "Distortion CHE: " << endl << distortionHandEye << endl << endl;
//for(int i = 0; i < numberOfPoses; i++)
//{
// cout << "No. " << i+1 << " Target translation: " << endl << tvecs[i] << endl << endl;
// cout << "No. " << i+1 << " Target rotation: " << endl << rvecs[i] << endl << endl;
//}
//********************************************/
for(int i = 0; i < rvecs.size(); i++)
{
t_target2cam.emplace_back(tvecs[i]);
R_target2cam.emplace_back(rvecs[i]);
}
// for(int i = 0; i < t_target2cam.size(); i++)
// {
// cout << t_target2cam[i] << endl << endl;
// }
// for(int i = 0; i < R_target2cam.size(); i++)
// {
// cout << R_target2cam[i] << endl << endl;
// }
Mat R_cam2gripper;
Mat t_cam2gripper = (Mat_<double>(3, 1));
calibrateHandEye(R_gripper2base, t_gripper2base, R_target2cam, t_target2cam, R_cam2gripper, t_cam2gripper);
cout << t_cam2gripper << endl << endl;
cout << R_cam2gripper << endl << endl;
Mat transformationMat4x4 = (Mat_<double>(4, 4));
Mat transformationMatInv4x4 = (Mat_<double>(4, 4));
Mat R_cam2gripperInv = (Mat_<double>(3, 3));
Mat t_cam2gripperInv = (Mat_<double>(3, 1));
transformationMat4x4.at<double>(0, 0) = R_cam2gripper.at<double>(0, 0);
transformationMat4x4.at<double>(0, 1) = R_cam2gripper.at<double>(0, 1);
transformationMat4x4.at<double>(0, 2) = R_cam2gripper.at<double>(0, 2);
transformationMat4x4.at<double>(0, 3) = t_cam2gripper.at<double>(0, 0);
transformationMat4x4.at<double>(1, 0) = R_cam2gripper.at<double>(1, 0);
transformationMat4x4.at<double>(1, 1) = R_cam2gripper.at<double>(1, 1);
transformationMat4x4.at<double>(1, 2) = R_cam2gripper.at<double>(1, 2);
transformationMat4x4.at<double>(1, 3) = t_cam2gripper.at<double>(1, 0);
transformationMat4x4.at<double>(2, 0) = R_cam2gripper.at<double>(2, 0);
transformationMat4x4.at<double>(2, 1) = R_cam2gripper.at<double>(2, 1);
transformationMat4x4.at<double>(2, 2) = R_cam2gripper.at<double>(2, 2);
transformationMat4x4.at<double>(2, 3) = t_cam2gripper.at<double>(2, 0);
transformationMat4x4.at<double>(3, 0) = 0;
transformationMat4x4.at<double>(3, 1) = 0;
transformationMat4x4.at<double>(3, 2) = 0;
transformationMat4x4.at<double>(3, 3) = 1;
transformationMatInv4x4 = transformationMat4x4.inv();
R_cam2gripperInv.at<double>(0,0) = transformationMatInv4x4.at<double>(0,0);
R_cam2gripperInv.at<double>(0,1) = transformationMatInv4x4.at<double>(0,1);
R_cam2gripperInv.at<double>(0,2) = transformationMatInv4x4.at<double>(0,2);
R_cam2gripperInv.at<double>(1,0) = transformationMatInv4x4.at<double>(1,0);
R_cam2gripperInv.at<double>(1,1) = transformationMatInv4x4.at<double>(1,1);
R_cam2gripperInv.at<double>(1,2) = transformationMatInv4x4.at<double>(1,2);
R_cam2gripperInv.at<double>(2,0) = transformationMatInv4x4.at<double>(2,0);
R_cam2gripperInv.at<double>(2,1) = transformationMatInv4x4.at<double>(2,1);
R_cam2gripperInv.at<double>(2,2) = transformationMatInv4x4.at<double>(2,2);
t_cam2gripperInv.at<double>(0,0) = transformationMatInv4x4.at<double>(0,3);
t_cam2gripperInv.at<double>(1,0) = transformationMatInv4x4.at<double>(1,3);
t_cam2gripperInv.at<double>(2,0) = transformationMatInv4x4.at<double>(2,3);
cout << transformationMatInv4x4 << endl << endl;
cout << t_cam2gripperInv << endl << endl;
Point3f objectPoints1, objectPoints2;
vector<Point3f> objectPoints;
objectPoints1.x = 0; //TCP in TCP-Coordinates
objectPoints1.y = 0;
objectPoints1.z = 0;
objectPoints.push_back(objectPoints1);
vector<Point2f> imagePoints;
projectPoints(objectPoints, R_cam2gripperInv, t_cam2gripperInv, defaultCameraMatrix, defaultDistortion, imagePoints);
cout << imagePoints[0] << endl << endl;
return a.exec();
}
`
you need to use solvepnp to get rvec and tvec for each image separately and then you will have a list of rvecs and tvecs. list length equals no of images. To get a similar list of rvec and tvec for Gripper_to_base transformation, you need to derive the R and T matrices based on robot dynamics which take rotation angle as input. Then for each pose you need to input the rotation angle data to R,T matrices to get rvec and tvec for each pose and make list of same length. Then you input them to calibrateHandeye function.
I could finally resolve the problem.
The calibration was correct, but there were two mistakes in my approach:
To find my TCP I used the TCP-Coordinates from the robot control. Instead I had to use (0,0,0), which is the TCP in the TCP-Coordinate-System.
The second mistake was to use the transformation matrix built from R and t out of calibrateHandEye. Instead I had to use the inverse transformation matrix.

OpenCV 3d point projection

I'm having a problem righting an OpenCV program to project a 3d point. I seem to be running into this problem when using the projectPoints function of OpenCV.
Here is the error I got:
OpenCV Error: Assertion failed (mtype == type0 || (CV_MAT_CN(mtype) == CV_MAT_CN(type0) && ((1 << type0) & fixedDepthMask) != 0)) in create, file /home/daniel/Comp4102/opencv/modules/core/src/matrix.cpp, line 2375
terminate called after throwing an instance of 'cv::Exception'
what(): /home/daniel/Comp4102/opencv/modules/core/src/matrix.cpp:2375: error: (-215) mtype == type0 || (CV_MAT_CN(mtype) == CV_MAT_CN(type0) && ((1 << type0) & fixedDepthMask) != 0) in function create
And here is the code that I wrote:
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
#include <string>
std::vector<cv::Point3d> set3DPoints();
int main( int argc, char* argv[]) {
// Setting given variables.
double f = 500;
double sx = 1;
double sy = 1;
double ox = 320;
double oy = 240;
std::vector<cv::Point3d> objectPoints = set3DPoints();
cv::Mat Xw(1,3,cv::DataType<double>::type);
Xw.at<double>(0,0) = 150;
Xw.at<double>(0,1) = 200;
Xw.at<double>(0,2) = 350;
// Create the K matrix.
cv::Mat K(3,3,cv::DataType<double>::type);
K.at<double>(0,0) = -f/sx;
K.at<double>(1,0) = 0;
K.at<double>(2,0) = ox;
K.at<double>(0,1) = 0;
K.at<double>(1,1) = -f/sy;
K.at<double>(2,1) = oy;
K.at<double>(0,2) = 0;
K.at<double>(1,2) = 0;
K.at<double>(2,2) = 1;
// Creating the Rotation Matrix
cv::Mat R(3,3,cv::DataType<double>::type);
R.at<double>(0,0) = 1;
R.at<double>(1,0) = 0;
R.at<double>(2,0) = 0;
R.at<double>(0,1) = 0;
R.at<double>(1,1) = 1;
R.at<double>(2,1) = 0;
R.at<double>(0,2) = 0;
R.at<double>(1,2) = 0;
R.at<double>(2,2) = 1;
// Creating the Translation vector
cv::Mat T(3,1,cv::DataType<double>::type);
T.at<double>(0) = -70;
T.at<double>(1) = -95;
T.at<double>(2) = -120;
std::cout << "K: " << "\n" << K << "\n";
std::cout << "R: " << "\n" << R << "\n";
std::cout << "T: " << "\n" << T << "\n";
// Create zero distortion
cv::Mat distCoeffs(4,1,cv::DataType<double>::type);
distCoeffs.at<double>(0) = 0;
distCoeffs.at<double>(1) = 0;
distCoeffs.at<double>(2) = 0;
distCoeffs.at<double>(3) = 0;
// Creating Rodrigues rotation matrix
cv::Mat rvecR(3,1,cv::DataType<double>::type);
cv::Rodrigues(R,rvecR);
std::vector<cv::Point2f> projectedPoints;
cv::projectPoints(objectPoints, rvecR, T, K, distCoeffs, projectedPoints);
for(unsigned int i=0; i<projectedPoints.size(); i++){
std::cout << "Image point: " << objectPoints[i] << " Projected to " << projectedPoints[i] << "\n";
}
return 0;
}
std::vector<cv::Point3d> set3DPoints() {
std::vector<cv::Point3d> points;
double x,y,z;
x=150;
y=200;
z=350;
points.push_back(cv::Point3d(x,y,z));
return points;
}
The function projectPoints needs the arguments objectPoints and imagePoints of the same type, while you're passing objectPoints as Point3d, and imagePoints as Point2f.
The error is telling you that the two types are different: double != float.
Simply declare projectedPoints as Point2d, so that it has the same type as Point3d:
std::vector<cv::Point2d> projectedPoints;

Image processing : luminance weighted 2

I would like to weigh values of luminance on a new image.
I have an image (5px.jpg) of 5 pixels with these luminance :50,100,150,200,250.
I have a vector of coefficient.
I created a new Mat Z which combine luminance of 5px.jpg and the coefficient.
So, my first value of luminance is 50 (lum[0]=50) and I want it to be applied on the 5.1 (coef[0]=5.1) first pixel of my matrix. To do that, I need to weight the 6th pixel with the first and the second value of luminance. In my case,the luminance of my 6th pixel will be 95 because (0.1*50)+(0.9*100)=95
And so on...
But I do not know why my code does not works.
I had already asked a similar question for a vector here and now, I'm try to adapt to an image.
My picture in input :
My output :
#define MPI 3.14159265358979323846264338327950288419716939937510
#define RAD2DEG (180./MPI)
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <opencv2/opencv.hpp>
#include <iostream>
#include <cmath>
#include <math.h>
#include <string.h>
using namespace cv;
using namespace std;
int main()
{
Mat image = imread("5px.jpg", 1);
if (image.empty())
{
cout << "Couldn't load " << image << endl;
}
else
{
cout << "Image upload, go" << endl;
}
namedWindow("ImageIn", CV_WINDOW_AUTOSIZE);
imshow("ImageIn", image);
Mat imgGrayScale;
cvtColor(image, imgGrayScale, CV_BGR2GRAY);
float *deltaP = new float[imgGrayScale.cols];
float *angle = new float[imgGrayScale.cols];
float *coeff = new float[imgGrayScale.cols];
int col;
for (col = 0; col < imgGrayScale.cols; ++col)
{
//cout << "position x = " << col << endl;
deltaP[col] = imgGrayScale.at<uchar>(0, col);
//cout << "luminance = " << deltaP[col] << endl;
angle[col] = acos(deltaP[col] / 255);
//cout << "angle =" << angle[col] << endl;
coeff[col] = (1 / cos(angle[col]));
cout << "coeff = " << coeff[col] << endl;
}
int width = imgGrayScale.size().width;
int height = imgGrayScale.size().height;
int width2 = width * 5;
int idx_coef = 0;
Mat Z = Mat::zeros(height, width2, CV_8UC1);
//for (int r = 0; r < imgGrayScale.rows; r++)
//{
//cout << "Saut de ligne " << endl << endl << endl;
for (int t = 0; t < imgGrayScale.cols; t++)
{
//cout << "Saut de colonne " << endl;
// Attribue le coeff à une variable
int c = int(coeff[idx_coef]);
//cout << "x" << t << endl;
for (int i = 0; i < c; ++i)
{
Z.at<uchar>(0, c) = imgGrayScale.at<uchar>(0, t);
}
float alpha = fmod(coeff[idx_coef], 1.f);
float beta = 1.f - alpha;
Z.at<uchar>(0, c + 1) = (alpha * imgGrayScale.at<uchar>(0, t) + beta * imgGrayScale.at<uchar>(0, t + 1));
idx_coef++;
coeff[idx_coef] = coeff[idx_coef] - beta;
if (idx_coef >= width - 1)
{
int cc = int(coeff[idx_coef]);
for (int i = 0; i < cc; ++i)
{
Z.at<uchar>(0, c) = imgGrayScale.at<uchar>(0, t);
}
idx_coef = 0;
break;
}
}
//}
namedWindow("m", CV_WINDOW_AUTOSIZE);
imshow("m", Z);
imwrite("lumianacetest.jpg", Z);
int t = waitKey();
if ((char)t == 27)
return 0;
}
You messed up with the indices while accessing the matrix Z. You shoudn't access Z at column c, but you need access the current column (as a vector::push_back would do). So you can keep the current index column in a variable, here idx_z, and increment it every time you access Z
Here your Z is CV_8U, so you lose accuracy since your values are float. You can create Z as CV_32F, and if you need to store values in CV_8U format to save the image, you can convert to CV_8U later, eventually.
The last columns of Z won't be set to any value (so I initialized them with value 0). If you need them to have the last value as in the imgGrayScale, just decomment the relevant part of the code.
Here the code:
#define MPI 3.14159265358979323846264338327950288419716939937510
#define RAD2DEG (180./MPI)
#include <opencv2\opencv.hpp>
#include <vector>
using namespace cv;
using namespace std;
int main()
{
Mat1b imgGrayScale = (Mat1b(2, 5) << 50, 100, 150, 200, 250,
50, 100, 150, 200, 250);
vector<float> deltaP(imgGrayScale.cols);
vector<float> angle(imgGrayScale.cols);
vector<float> coeff(imgGrayScale.cols);
int col;
for (col = 0; col < imgGrayScale.cols; ++col)
{
//cout << "position x = " << col << endl;
deltaP[col] = imgGrayScale.at<uchar>(0, col);
//cout << "luminance = " << deltaP[col] << endl;
angle[col] = acos(deltaP[col] / 255);
//cout << "angle =" << angle[col] << endl;
coeff[col] = (1 / cos(angle[col]));
cout << "coeff = " << coeff[col] << endl;
}
int width = imgGrayScale.size().width;
int height = imgGrayScale.size().height;
int width2 = width * 5;
Mat1f Z(height, width2, 0.f);
for (int r = 0; r < imgGrayScale.rows; r++)
{
int idx_lum = 0;
int idx_coef = 0;
int idx_z = 0;
vector<float> coef = coeff;
// Set all values in Z to the last value in imgGrayScale
Z.row(r) = imgGrayScale(r, imgGrayScale.cols-1);
while (true)
{
int c = int(coef[idx_coef]);
for (int i = 0; i < c; ++i)
{
Z(r, idx_z++) = imgGrayScale(r, idx_lum);
}
float alpha = fmod(coef[idx_coef], 1.f);
float beta = 1.f - alpha;
Z(r, idx_z++) = (alpha * imgGrayScale(r, idx_lum) + beta * imgGrayScale(r, idx_lum + 1));
idx_coef++;
idx_lum++;
coef[idx_coef] = coef[idx_coef] - beta;
if (idx_lum >= imgGrayScale.cols - 1 || idx_coef >= coef.size() - 1)
{
int cc = int(coef[idx_coef]);
for (int i = 0; i < cc; ++i)
{
Z(r, idx_z++) = imgGrayScale(r, idx_lum);
}
idx_coef = 0;
break;
}
}
}
Mat1b ZZ;
Z.convertTo(ZZ, CV_8U);
cout << "Float values:" << endl;
cout << Z << endl << endl;
cout << "Uchar values:" << endl;
cout << ZZ << endl << endl;
namedWindow("m", CV_WINDOW_AUTOSIZE);
imshow("m", Z);
imwrite("lumianacetest.png", ZZ);
waitKey();
return 0;
}