After upgrading from openCV 3.2 to openCV 3.4.1 I get Segmentation fault when running my program
Full source code below (I'm not a C expert)
running the program ./txtbin input.png output.png
Hope someone would be helpful :)
error
> Error: signal 11:
> #1 ./txtbin(_Z6bTracei+0x1c) [0x5596628d8e68]
> #2 /lib/x86_64-linux-gnu/libc.so.6(+0x33060) [0x7fa1b8481060]
> #3 /usr/local/lib/libopencv_world.so.3.4(+0xa3f7da) [0x7fa1b9ac97da]
> #4 /usr/local/lib/libopencv_world.so.3.4(+0xa2782d) [0x7fa1b9ab182d]
> #5 /usr/local/lib/libopencv_world.so.3.4(_ZN2cv7imwriteERKNS_6StringERKNS_11_InputArrayERKSt6vectorIiSaIiEE+0x8e) [0x7fa1b9ab4a4e]
> #6 ./txtbin(_ZN6Txtbin8binarizeEv+0x12d) [0x5596628d67c3]
> #7 ./txtbin(_ZN6Txtbin3runEv+0x698) [0x5596628d8c10]
> #8 ./txtbin(main+0x3fa) [0x5596628d92db]
> #9 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf1) [0x7fa1b846e2e1]
input.png
txtbin.cpp
/*
* Compile
* # g++ -rdynamic -std=c++11 txtbin.cpp -o txtbin `pkg-config opencv --cflags --libs`
*
* Get opencv version
* # pkg-config --modversion opencv
*/
#include <signal.h>
#include "../backtrace/backtrace.h"
#include "txtbin.hpp"
Backtrace b;
void bTrace(int sig){
b.trace(sig);
}
void usage(const std::string VERSION){
std::cout << "txtbin: " << VERSION << "\nOpenCV: " << CV_VERSION << "\n\nUsage: txtbin input [options] output\n"
"Options:\n"
"\t-w <number> -- Set max width (keeps aspect ratio)\n"
"\t-h <number> -- Set max height (keeps aspect ratio)\n"
"\t-c -- Crop text contours\n"
"\t-r -- Rotate contents (deskew)\n"
"\t-m <number> -- Set margins (%)\n"
"\t-b <number> -- Set blockside (pixel)\n"
"\t Default: 9 pixel\n"
"\t-t <number> -- Set threshold (%)\n"
"\t Default: 85 %\n"
"\t-v -- Verbose\n" << std::endl;
}
int main(int argc, char* argv[]){
signal(SIGSEGV, &bTrace);
Txtbin a;
try{
// Parse arguments
for(int i = 1; i < argc; i++){
std::string arg = std::string(argv[i]);
if(i == 1){
a.set_input(arg);
}
else if(arg == "-w"){
a.set_width(atoi(argv[++i]));
}
else if(arg == "-h"){
a.set_height(atoi(argv[++i]));
}
else if(arg == "-c"){
a.set_crop(true);
}
else if(arg == "-r"){
a.set_rotate(true);
}
else if(arg == "-m"){
a.set_margin(atoi(argv[++i]));
}
else if(arg == "-b"){
a.set_blockside(atoi(argv[++i]));
}
else if(arg == "-t"){
a.set_threshold(atoi(argv[++i]));
}
else if(arg == "-v"){
a.set_verbose(true);
}
else if(i == argc - 1){
a.set_output(arg);
}
else{
throw std::runtime_error("Argument '"+arg+"' is invalid");
}
}
a.run();
}
catch(std::exception& e){
std::cerr << "Error: " << e.what() << "\n" << std::endl;
usage(a.VERSION);
return 1;
}
return 0;
}
txtbin.h
struct textblock{
int left = 0;
int top = 0;
int right = 0;
int bottom = 0;
};
class Txtbin{
private:
std::string input = "";
std::string output = "output.png";
int max_width = 0;
int max_height = 0;
bool is_crop = false;
bool is_rotate = false;
float margin = 0;
int blockside = 9; // set greater for larger fonts in image and vice versa
bool is_verbose = false;
float contrast = 0.01; // set smaller for lower contrast image
float thresholding = 0.85;
void binarize ();
cv::Mat src;
cv::Mat calc_block_mean_variance(cv::Mat& img);
textblock detect_text_block (bool test_output);
void downsize ();
void crop (textblock coords);
void error (const std::string& s);
public:
Txtbin();
const std::string VERSION = "0.3.6";
void set_input (const std::string& s);
void set_output (const std::string& s);
void set_height (int h);
void set_width (int w);
void set_crop (bool c);
void set_rotate (bool r);
void set_margin (float m);
void set_blockside (int b);
void set_threshold (int t);
void set_verbose (bool v);
void run ();
};
txtbin.hpp
#include <iostream>
#include <fstream>
#include <chrono>
#include <boost/algorithm/string.hpp>
#include "/usr/local/include/opencv2/opencv.hpp"
#include "txtbin.h"
Txtbin::Txtbin(){}
void Txtbin::set_input(const std::string& s){
input = s;
}
void Txtbin::set_output(const std::string& s){
output = s;
}
void Txtbin::set_height(int h){
max_height = h;
}
void Txtbin::set_width(int w){
max_width = w;
}
void Txtbin::set_crop(bool c){
is_crop = c;
}
void Txtbin::set_rotate(bool r){
is_rotate = r;
}
void Txtbin::set_margin(float m){
margin = m;
}
void Txtbin::set_blockside(int b){
blockside = b;
}
void Txtbin::set_threshold(int t){
thresholding = t / 100;
}
void Txtbin::set_verbose(bool v){
is_verbose = v;
}
void Txtbin::error(const std::string& s){
throw std::runtime_error(s);
}
void Txtbin::binarize(){
src.convertTo(src, CV_32FC1, 1.0 / 255.0);
cv::Mat res = calc_block_mean_variance(src);
imwrite(output, res * 255);
}
void Txtbin::crop(textblock coords){
if(coords.left < coords.right && coords.top < coords.bottom){
if(coords.left < 0){
coords.left = 0;
}
int crop_width = coords.right - coords.left;
int trim_width = coords.left + crop_width - src.cols;
if(trim_width > 0){
crop_width -= trim_width;
}
if(coords.top < 0){
coords.top = 0;
}
int crop_height = coords.bottom - coords.top;
int trim_height = coords.top + crop_height - src.rows;
if(trim_height > 0){
crop_height -= trim_height;
}
cv::Rect cut_rect = cv::Rect(coords.left, coords.top, crop_width, crop_height);
src = src(cut_rect);
}
else{
std::cout << "Warning: Invalid text block coordinates. Cropping is omitted!" << std::endl;
}
}
void Txtbin::downsize(){
float
width = src.cols,
height = src.rows,
scale = 0;
bool resized = false;
if(max_width > 0 && width > max_width){
scale = width / max_width;
width /= scale;
height /= scale;
resized = true;
}
if(max_height > 0 && height > max_height){
scale = height / max_height;
width /= scale;
height /= scale;
resized = true;
}
if(resized){
resize(src, src, cv::Size(round(width), round(height)));
}
}
textblock Txtbin::detect_text_block(bool test_output){
cv::Mat img = src.clone();
// downsample image and use it for processing
int multiplier = 2;
pyrDown(img, img);
textblock block;
block.left = img.cols;
block.top = img.rows;
int
rect_bottom,
rect_right;
// morphological gradient
cv::Mat grad;
cv::Mat morphKernel = getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(3, 3));
morphologyEx(img, grad, cv::MORPH_GRADIENT, morphKernel);
// binarize
cv::Mat bw;
threshold(grad, bw, 0.0, 255.0, cv::THRESH_BINARY | cv::THRESH_OTSU);
// connect horizontally oriented regions
cv::Mat connected;
morphKernel = getStructuringElement(cv::MORPH_RECT, cv::Size(9, 1));
morphologyEx(bw, connected, cv::MORPH_CLOSE, morphKernel);
// find contours
cv::Mat mask = cv::Mat::zeros(bw.size(), CV_8UC1);
std::vector<std::vector<cv::Point> > contours;
std::vector<cv::Vec4i> hierarchy;
findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, cv::Point(0, 0));
cv::Scalar color = cv::Scalar(0, 255, 0);
cv::Scalar color2 = cv::Scalar(0, 0, 255);
int thickness = 2;
if(test_output){
cv::cvtColor(img, img, CV_GRAY2BGR);
}
// filter contours
if(!hierarchy.empty()){
for(int idx = 0; idx >= 0; idx = hierarchy[idx][0]){
cv::Rect rect = boundingRect(contours[idx]);
cv::Mat maskROI(mask, rect);
maskROI = cv::Scalar(0, 0, 0);
// fill the contour
drawContours(mask, contours, idx, cv::Scalar(255, 255, 255), CV_FILLED);
// ratio of non-zero pixels in the filled region
double r = (double)countNonZero(maskROI) / (rect.width * rect.height);
// assume at least 25% of the area is filled if it contains text
if (r > 0.25 &&
(rect.height > 8 && rect.width > 8) // constraints on region size
// these two conditions alone are not very robust. better to use something
//like the number of significant peaks in a horizontal projection as a third condition
){
if(test_output){
rectangle(img, rect, color, thickness);
}
//rectangle(src, cv::Rect(rect.x * multiplier, rect.y * multiplier, rect.width * multiplier, rect.height * multiplier), color, thickness);
if(rect.y < block.top){
block.top = rect.y;
}
rect_bottom = rect.y + rect.height;
if(rect_bottom > block.bottom){
block.bottom = rect_bottom;
}
if(rect.x < block.left){
block.left = rect.x;
}
rect_right = rect.x + rect.width;
if(rect_right > block.right){
block.right = rect_right;
}
}
}
}
if(test_output){
rectangle(img, cv::Point(block.left, block.top), cv::Point(block.right, block.bottom), color2, thickness);
imwrite("test_text_contours.jpg", img);
}
//rectangle(src, cv::Point(block.left * multiplier, block.top * multiplier), cv::Point(block.right * multiplier, block.bottom * multiplier), color2, thickness);
block.left *= multiplier;
block.top *= multiplier;
block.right *= multiplier;
block.bottom *= multiplier;
return block;
}
cv::Mat Txtbin::calc_block_mean_variance(cv::Mat& img){
cv::Mat res;
cv::Mat I;
img.convertTo(I, CV_32FC1);
res = cv::Mat::zeros(img.rows / blockside, img.cols / blockside, CV_32FC1);
cv::Mat inpaintmask;
cv::Mat patch;
cv::Mat small_img;
cv::Scalar m, s;
for(int i = 0; i < img.rows - blockside; i += blockside){
for(int j = 0; j < img.cols - blockside; j += blockside){
patch = I(cv::Range(i, i + blockside + 1), cv::Range(j, j + blockside + 1));
meanStdDev(patch, m, s);
if(s[0] > contrast){
res.at<float>(i / blockside, j / blockside) = m[0];
}
else{
res.at<float>(i / blockside, j / blockside) = 0;
}
}
}
resize(I, small_img, res.size());
threshold(res, inpaintmask, 0.02, 1.0, cv::THRESH_BINARY);
cv::Mat inpainted;
small_img.convertTo(small_img, CV_8UC1, 255);
inpaintmask.convertTo(inpaintmask, CV_8UC1);
inpaint(small_img, inpaintmask, inpainted, 5, cv::INPAINT_TELEA);
resize(inpainted, res, img.size());
res.convertTo(res, CV_32FC1, 1.0 / 255.0);
res = 1.0 - res;
res = img + res;
threshold(res, res, thresholding, 1, cv::THRESH_BINARY);
return res;
}
void Txtbin::run(){
// Return error if input file is not defined
if(input == ""){
error("Input file not defined");
}
// Return error if input file is not found
else{
std::ifstream stream(input.c_str());
if(!stream.good()){
error("Input file not found");
}
}
// Return error if output file is not defined
if(output == ""){
error("Output file not defined");
}
// Return error if output file is not PNG
else{
std::string output_lc = output;
boost::to_lower(output_lc);
if(output_lc.substr(output_lc.find_last_of(".") + 1) != "png"){
error("Output file must be PNG");
}
}
bool test_output = false;
auto start = std::chrono::high_resolution_clock::now();
src = cv::imread(input, CV_LOAD_IMAGE_GRAYSCALE);
if(is_crop || is_verbose){
textblock coords = detect_text_block(test_output);
if(is_verbose){
std::cout << "Image dimensions: " << src.cols << " x " << src.rows << std::endl;
std::cout << "Text block coordinates: Left|Top " << coords.left << "," << coords.top << " Right|Bottom " << coords.right << "," << coords.bottom << std::endl;
}
if(is_crop){
crop(coords);
}
}
if(margin){
int border = src.cols * margin / 100;
copyMakeBorder(src, src, border, border, border, border, cv::BORDER_CONSTANT, cv::Scalar(255, 255, 255));
}
downsize();
binarize();
auto elapsed = std::chrono::high_resolution_clock::now() - start;
if(is_verbose){
std::cout << "Execution time: " << (std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count() / 1000) << " ms" << std::endl;
}
}
backtrace.h
#include <stdio.h>
#include <execinfo.h>
#include <stdlib.h>
class Backtrace {
public:
Backtrace();
void trace(int sig);
};
Backtrace::Backtrace(){}
void Backtrace::trace(int sig){
void *trace[10];
char **messages = (char **)NULL;
int i, trace_size = 0;
trace_size = backtrace(trace, 10);
messages = backtrace_symbols(trace, trace_size);
fprintf(stderr, "Error: signal %d:\n", sig);
for(i=1; i<trace_size; ++i){
fprintf(stderr, "#%d %s\n", i, messages[i]);
}
exit(1);
}
The segfault does not occur with the following implementation of Txtbin::binarize.
void Txtbin::binarize(){
src.convertTo(src, CV_32FC1, 1.0 / 255.0);
cv::Mat res = calc_block_mean_variance(src) * 255;
imwrite(output, res);
}
With the prior definition of Txtbin::binarize, cv::imwrite sees the type as _InputArray::EXPR rather than as _InputArray::MAT, and passes an uninitialized value of img_vec to cv::imwrite_. The segfault results from an attempt to dereference that uninitialized value. The _InputArray::EXPR value comes from the return value of MatExpr operator * (a cv::MatExpr) being passed to _InputArray::_InputArray.
With the modified definition of Txtbin::binarize the return value of MatExpr operator * is passed to MatExpr::operator Mat() const, which passes it to MatOp_AddEx::assign, and the value passed to _InputArray::_InputArray is a cv::Mat rather than a cv::MatExpr. In short, it seems the assignment of the return value of MatExpr operator * prevents an incorrect type conversion (cv::imwrite expects a cv::InputArray, not a cv:Mat, for its second parameter).
It appears that newer versions of opencv are not susceptible to this crash. Prior to this commit, cv::imwrite called the getMat method, and this commit restored that call as the default behavior (the else clause), as a replacement for an overly specific conditional. I confirmed that recompiling opencv with that change to cv::imwrite prevents the crash even with the prior version of Txtbin::binarize (the one in the question), and that the return value of MatExpr operator * is passed to MatExpr::operator Mat() const, as in the modified definition of Txtbin::binarize due to the assignment.
In short, you can work around issue 15545, or you can install its fix.
Related
I tried to perform object detection using the yolov5 model with c++. I have a custom-trained yolov5 model which is working perfectly fine in python but my whole complete setup is in c++ thereby I have to switch. So I have converted the yolov5s model into ONNX format and tried to run it as by "https://github.com/doleron/yolov4-opencv-cpp-python"1. Unfortunately, I'm getting multiple bounding boxes in the top left corner as in the image.
I don't know how to eliminate this kind of error, but when I used the inbuilt pre-train yolov5s model the c++ code is detecting and worked perfectly. Similarly, when I used the custom-trained model in python it's working perfectly.
Here is my c++ code for object detection using c++
#include <fstream>
#include <opencv2/opencv.hpp>
std::vector<std::string> load_class_list()
{
std::vector<std::string> class_list;
std::ifstream ifs("config_files/classes.txt");
std::string line;
while (getline(ifs, line))
{
class_list.push_back(line);
}
return class_list;
}
void load_net(cv::dnn::Net &net, bool is_cuda)
{
auto result = cv::dnn::readNet("config_files/yolov5s_custom.onnx");
if (is_cuda)
{
std::cout << "Attempty to use CUDA\n";
result.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
result.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA_FP16);
}
else
{
std::cout << "Running on CPU\n";
result.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
result.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
net = result;
}
const std::vector<cv::Scalar> colors = {cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 255, 255), cv::Scalar(255, 0, 0)};
const float INPUT_WIDTH = 640.0;
const float INPUT_HEIGHT = 640.0;
const float SCORE_THRESHOLD = 0.2;
const float NMS_THRESHOLD = 0.4;
const float CONFIDENCE_THRESHOLD = 0.4;
struct Detection
{
int class_id;
float confidence;
cv::Rect box;
};
cv::Mat format_yolov5(const cv::Mat &source) {
int col = source.cols;
int row = source.rows;
int _max = MAX(col, row);
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
source.copyTo(result(cv::Rect(0, 0, col, row)));
return result;
}
void detect(cv::Mat &image, cv::dnn::Net &net, std::vector<Detection> &output, const std::vector<std::string> &className) {
cv::Mat blob;
auto input_image = format_yolov5(image);
cv::dnn::blobFromImage(input_image, blob, 1./255., cv::Size(INPUT_WIDTH, INPUT_HEIGHT), cv::Scalar(), true, false);
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
float x_factor = input_image.cols / INPUT_WIDTH;
float y_factor = input_image.rows / INPUT_HEIGHT;
float *data = (float *)outputs[0].data;
const int dimensions = 85;
const int rows = 25200;
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
for (int i = 0; i < rows; ++i) {
float confidence = data[4];
if (confidence >= CONFIDENCE_THRESHOLD) {
float * classes_scores = data + 5;
cv::Mat scores(1, className.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > SCORE_THRESHOLD) {
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
data += 85;
}
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
for (int i = 0; i < nms_result.size(); i++) {
int idx = nms_result[i];
Detection result;
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
result.box = boxes[idx];
output.push_back(result);
}
}
int main(int argc, char **argv)
{
std::vector<std::string> class_list = load_class_list();
cv::Mat frame;
cv::VideoCapture capture("sample.mp4");
if (!capture.isOpened())
{
std::cerr << "Error opening video file\n";
return -1;
}
bool is_cuda = argc > 1 && strcmp(argv[1], "cuda") == 0;
cv::dnn::Net net;
load_net(net, is_cuda);
auto start = std::chrono::high_resolution_clock::now();
int frame_count = 0;
float fps = -1;
int total_frames = 0;
while (true)
{
capture.read(frame);
if (frame.empty())
{
std::cout << "End of stream\n";
break;
}
std::vector<Detection> output;
detect(frame, net, output, class_list);
frame_count++;
total_frames++;
int detections = output.size();
for (int i = 0; i < detections; ++i)
{
auto detection = output[i];
auto box = detection.box;
auto classId = detection.class_id;
const auto color = colors[classId % colors.size()];
cv::rectangle(frame, box, color, 3);
cv::rectangle(frame, cv::Point(box.x, box.y - 20), cv::Point(box.x + box.width, box.y), color, cv::FILLED);
cv::putText(frame, class_list[classId].c_str(), cv::Point(box.x, box.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
if (frame_count >= 30)
{
auto end = std::chrono::high_resolution_clock::now();
fps = frame_count * 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
frame_count = 0;
start = std::chrono::high_resolution_clock::now();
}
if (fps > 0)
{
std::ostringstream fps_label;
fps_label << std::fixed << std::setprecision(2);
fps_label << "FPS: " << fps;
std::string fps_label_str = fps_label.str();
cv::putText(frame, fps_label_str.c_str(), cv::Point(10, 25), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2);
}
cv::imshow("output", frame);
if (cv::waitKey(1) != -1)
{
capture.release();
std::cout << "finished by user\n";
break;
}
}
std::cout << "Total frames: " << total_frames << "\n";
return 0;
}
Kindly guide me on how to eliminate these multiple boxes on the output video stream.
I use an algorithm of panorama stitching from opencv, in order to stitch 2 or 3 images into one new result image.
I have coordinates of points in each source image. I need to calculate what are the new coordinates for these points in the result image.
I describe below the algorithm. My code is similar to a sample "stitching_detailed" from opencv (branch 3.4). A result_mask of type Mat is produced, maybe it is the solution? But I don't know how to use it. I found a related question here but not on stitching.
Any idea?
Here is the algorithm (for detailed code: stitching_detailed.cpp):
Find features for each image:
Ptr<FeaturesFinder> finder = makePtr<SurfFeaturesFinder>()
vector<ImageFeatures> features(num_images);
for (int i = 0; i < num_images; ++i)
{
(*finder)(images[i], features[i]);
}
Make pairwise_matches:
vector<MatchesInfo> pairwise_matches;
Ptr<FeaturesMatcher> matcher = makePtr<BestOf2NearestMatcher>(false, match_conf);
(*matcher)(features, pairwise_matches);
Reorder the images:
vector<int> indices = leaveBiggestComponent(features, pairwise_matches, conf_thresh);
# here some code to reorder 'images'
Estimate an homography in cameras:
vector<CameraParams> cameras;
Ptr<Estimator> estimator = makePtr<HomographyBasedEstimator>();
(*estimator)(features, pairwise_matches, cameras);
Convert to CV_32F:
for (size_t i = 0; i < cameras.size(); ++i)
{
Mat R;
cameras[i].R.convertTo(R, CV_32F);
cameras[i].R = R;
}
Execute a BundleAdjuster:
Ptr<detail::BundleAdjusterBase> adjuster = makePtr<detail::BundleAdjusterRay>();
adjuster->setConfThresh(conf_thresh);
adjuster->setRefinementMask(refine_mask);
(*adjuster)(features, pairwise_matches, cameras);
Compute a value for warped_image_scale:
for (int i = 0; i < cameras.size(); ++i)
focals.push_back(cameras[i].focal);
float warped_image_scale = static_cast<float>(focals[focals.size() / 2 - 1] + focals[focals.size() / 2]) * 0.5f;
Do wave correction:
vector<Mat> rmats;
for (size_t i = 0; i < cameras.size(); ++i)
rmats.push_back(cameras[i].R.clone());
waveCorrect(rmats, wave_correct);
for (size_t i = 0; i < cameras.size(); ++i)
cameras[i].R = rmats[i];
Create a warper:
Ptr<WarperCreator> warper_creator = makePtr<cv::SphericalWarper>();
Ptr<RotationWarper> warper = warper_creator->create(static_cast<float>(warped_image_scale * seam_work_aspect));
Create a blender and feed it:
Ptr<Blender> blender;
for (size_t i = 0; i < cameras.size(); ++i)
{
full_img = input_imgs[img_idx];
if (!is_compose_scale_set)
{
is_compose_scale_set = true;
compose_scale = /* … */
}
if (abs(compose_scale - 1) > 1e-1)
resize(full_img, img, Size(), compose_scale, compose_scale, INTER_LINEAR_EXACT);
else
img = full_img;
// Warp the current image
warper->warp(img, K, cameras[img_idx].R, INTER_LINEAR, BORDER_REFLECT, img_warped);
// Warp the current image mask
mask.create(img_size, CV_8U);
mask.setTo(Scalar::all(255));
warper->warp(mask, K, cameras[img_idx].R, INTER_NEAREST, BORDER_CONSTANT, mask_warped);
// Compensate exposure
compensator->apply(img_idx, corners[img_idx], img_warped, mask_warped);
dilate(masks_warped[img_idx], dilated_mask, Mat());
resize(dilated_mask, seam_mask, mask_warped.size(), 0, 0, INTER_LINEAR_EXACT);
mask_warped = seam_mask & mask_warped;
if (!blender)
{
blender = Blender::createDefault(blend_type, try_gpu);
Size dst_sz = resultRoi(corners, sizes).size();
float blend_width = sqrt(static_cast<float>(dst_sz.area())) * blend_strength / 100.f;
MultiBandBlender *mb = dynamic_cast<MultiBandBlender *>(blender.get());
mb->setNumBands(static_cast<int>(ceil(log(blend_width) / log(2.)) - 1.));
blender->prepare(corners, sizes);
}
// Blend the current image
blender->feed(img_warped_s, mask_warped, corners[i]);
}
Then, use the blender:
Mat result, result_mask;
blender->blend(result, result_mask);
// The result image is in 'result'
When I was a school boy, I foundopencv/samples/cpp/stitching_detailed.cpp in OpenCV samples folder. At that time, my programming skills were very poor. I can't understand it even though I racked my brains. This question attracts my attention, and arouses my memory. After a whole night of hard work and debugging, I finally get it.
Basic steps:
Given the three images: blue.png, green.png, and red.png
We can get the stitching result(result.png) using the stitching_detailed.cpp.
.
blender->blend(result, result_mask);
imwrite("result.png", result);
imwrite("result_mask.png", result_mask);
I choose the centers from the three images, and calculate the corresponding coordinates (warped) on the stitching image, and draw in solid as follow:
Warping images (auxiliary)...
Compensating exposure...
Blending ...
Warp each center point, and draw solid circle.
[408, 204] => [532, 224]
[408, 204] => [359, 301]
[408, 204] => [727, 320]
Check `result.png`, `result_mask.png` and `result2.png`!
Done!
This is the function calcWarpedPoint I wrote to calculate the warped point on the stitching image:
cv::Point2f calcWarpedPoint(
const cv::Point2f& pt,
InputArray K, // Camera K parameter
InputArray R, // Camera R parameter
Ptr<RotationWarper> warper, // The Rotation Warper
const std::vector<cv::Point> &corners,
const std::vector<cv::Size> &sizes)
{
// Calculate the wrapped point using camera parameter.
cv::Point2f dst = warper->warpPoint(pt, K, R);
// Calculate the stitching image roi using corners and sizes.
// the corners and sizes have already been calculated.
cv::Point2f tl = cv::detail::resultRoi(corners, sizes).tl();
// Finally adjust the wrapped point to the stitching image.
return cv::Point2f(dst.x - tl.x, dst.y - tl.y);
}
This is example code snippet:
std::cout << "\nWarp each center point, and draw solid circle.\n";
std::vector<cv::Scalar> colors = { {255,0,0}, {0, 255, 0}, {0, 0, 255} };
for (int idx = 0; idx < img_names.size(); ++idx) {
img = cv::imread(img_names[idx]);
Mat K;
cameras[idx].K().convertTo(K, CV_32F);
Mat R = cameras[idx].R;
cv::Point2f cpt = cv::Point2f(img.cols / 2, img.rows / 2);
cv::Point pt = calcWarpedPoint(cpt, K, R, warper, corners, sizes);
cv::circle(result, pt, 5, colors[idx], -1, cv::LINE_AA);
std::cout << cpt << " => " << pt << std::endl;
}
std::cout << "\nCheck `result.png`, `result_mask.png` and `result2.png`!\n";
imwrite("result2.png", result);
The full code:
/*
* Author : Kinght-金(https://stackoverflow.com/users/3547485/)
* Created : 2019/03/01 23:00 (CST)
* Finished : 2019/03/01 07:50 (CST)
*
* Modified on opencv401/samples/cpp/stitching_detailed.cpp
* From https://github.com/opencv/opencv/blob/4.0.1/samples/cpp/stitching_detailed.cpp
*
*
* Description: A simple opencv(4.0.1) image stitching code for Stack Overflow answers.
* For https://stackoverflow.com/questions/54904718/compute-coordinates-from-source-images-after-stitching/54953792#comment96681412_54953792
*
*/
#include <iostream>
#include <fstream>
#include <string>
#include "opencv2/opencv_modules.hpp"
#include <opencv2/core/utility.hpp>
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/stitching/detail/autocalib.hpp"
#include "opencv2/stitching/detail/blenders.hpp"
#include "opencv2/stitching/detail/camera.hpp"
#include "opencv2/stitching/detail/exposure_compensate.hpp"
#include "opencv2/stitching/detail/matchers.hpp"
#include "opencv2/stitching/detail/motion_estimators.hpp"
#include "opencv2/stitching/detail/seam_finders.hpp"
#include "opencv2/stitching/detail/warpers.hpp"
#include "opencv2/stitching/warpers.hpp"
using namespace std;
using namespace cv;
using namespace cv::detail;
//! img_names are the input image (full) paths
// You can download from using the links from the answer.
//! Blue: https://i.stack.imgur.com/Yz3U1.png
//! Green: https://i.stack.imgur.com/AbUTH.png
//! Red: https://i.stack.imgur.com/9wcGc.png
vector<String> img_names = {"D:/stitching/blue.png", "D:/stitching/green.png", "D:/stitching/red.png"};
//! The function to calculate the warped point on the stitching image.
cv::Point2f calcWarpedPoint(
const cv::Point2f& pt,
InputArray K, // Camera K parameter
InputArray R, // Camera R parameter
Ptr<RotationWarper> warper, // The Rotation Warper
const std::vector<cv::Point> &corners,
const std::vector<cv::Size> &sizes)
{
// Calculate the wrapped point
cv::Point2f dst = warper->warpPoint(pt, K, R);
// Calculate the stitching image roi using corners and sizes,
// the corners and sizes have already been calculated.
cv::Point2f tl = cv::detail::resultRoi(corners, sizes).tl();
// Finally adjust the wrapped point
return cv::Point2f(dst.x - tl.x, dst.y - tl.y);
}
int main(int argc, char* argv[])
{
double work_megapix = 0.6;
double seam_megapix = 0.1;
double compose_megapix = -1;
float conf_thresh = 1.f;
float match_conf = 0.3f;
float blend_strength = 5;
// Check if have enough images
int num_images = static_cast<int>(img_names.size());
if (num_images < 2)
{
std::cout << "Need more images\n";
return -1;
}
double work_scale = 1, seam_scale = 1, compose_scale = 1;
bool is_work_scale_set = false, is_seam_scale_set = false, is_compose_scale_set = false;
//(1) 创建特征查找器
Ptr<Feature2D> finder = ORB::create();
// (2) 读取图像,适当缩放,并计算图像的特征描述
Mat full_img, img;
vector<ImageFeatures> features(num_images);
vector<Mat> images(num_images);
vector<Size> full_img_sizes(num_images);
double seam_work_aspect = 1;
for (int i = 0; i < num_images; ++i)
{
full_img = imread(img_names[i]);
full_img_sizes[i] = full_img.size();
if (full_img.empty())
{
cout << "Can't open image " << img_names[i] << std::endl;
return -1;
}
if (!is_work_scale_set)
{
work_scale = min(1.0, sqrt(work_megapix * 1e6 / full_img.size().area()));
is_work_scale_set = true;
}
resize(full_img, img, Size(), work_scale, work_scale, INTER_LINEAR_EXACT);
if (!is_seam_scale_set)
{
seam_scale = min(1.0, sqrt(seam_megapix * 1e6 / full_img.size().area()));
seam_work_aspect = seam_scale / work_scale;
is_seam_scale_set = true;
}
computeImageFeatures(finder, img, features[i]);
features[i].img_idx = i;
std::cout << "Features in image #" << i + 1 << ": " << features[i].keypoints.size() << std::endl;
resize(full_img, img, Size(), seam_scale, seam_scale, INTER_LINEAR_EXACT);
images[i] = img.clone();
}
full_img.release();
img.release();
// (3) 创建图像特征匹配器,计算匹配信息
vector<MatchesInfo> pairwise_matches;
Ptr<FeaturesMatcher> matcher = makePtr<BestOf2NearestMatcher>(false, match_conf);
(*matcher)(features, pairwise_matches);
matcher->collectGarbage();
//! (4) 剔除外点,保留最确信的大成分
// Leave only images we are sure are from the same panorama
vector<int> indices = leaveBiggestComponent(features, pairwise_matches, conf_thresh);
vector<Mat> img_subset;
vector<String> img_names_subset;
vector<Size> full_img_sizes_subset;
for (size_t i = 0; i < indices.size(); ++i)
{
img_names_subset.push_back(img_names[indices[i]]);
img_subset.push_back(images[indices[i]]);
full_img_sizes_subset.push_back(full_img_sizes[indices[i]]);
}
images = img_subset;
img_names = img_names_subset;
full_img_sizes = full_img_sizes_subset;
// Check if we still have enough images
num_images = static_cast<int>(img_names.size());
if (num_images < 2)
{
std::cout << "Need more images\n";
return -1;
}
//!(5) 估计 homography
Ptr<Estimator> estimator = makePtr<HomographyBasedEstimator>();
vector<CameraParams> cameras;
if (!(*estimator)(features, pairwise_matches, cameras))
{
cout << "Homography estimation failed.\n";
return -1;
}
for (size_t i = 0; i < cameras.size(); ++i)
{
Mat R;
cameras[i].R.convertTo(R, CV_32F);
cameras[i].R = R;
std::cout << "\nInitial camera intrinsics #" << indices[i] + 1 << ":\nK:\n" << cameras[i].K() << "\nR:\n" << cameras[i].R << std::endl;
}
//(6) 创建约束调整器
Ptr<detail::BundleAdjusterBase> adjuster = makePtr<detail::BundleAdjusterRay>();
adjuster->setConfThresh(conf_thresh);
Mat_<uchar> refine_mask = Mat::zeros(3, 3, CV_8U);
refine_mask(0, 0) = 1;
refine_mask(0, 1) = 1;
refine_mask(0, 2) = 1;
refine_mask(1, 1) = 1;
refine_mask(1, 2) = 1;
adjuster->setRefinementMask(refine_mask);
if (!(*adjuster)(features, pairwise_matches, cameras))
{
cout << "Camera parameters adjusting failed.\n";
return -1;
}
// Find median focal length
vector<double> focals;
for (size_t i = 0; i < cameras.size(); ++i)
{
focals.push_back(cameras[i].focal);
}
sort(focals.begin(), focals.end());
float warped_image_scale;
if (focals.size() % 2 == 1)
warped_image_scale = static_cast<float>(focals[focals.size() / 2]);
else
warped_image_scale = static_cast<float>(focals[focals.size() / 2 - 1] + focals[focals.size() / 2]) * 0.5f;
std::cout << "\nWarping images (auxiliary)... \n";
vector<Point> corners(num_images);
vector<UMat> masks_warped(num_images);
vector<UMat> images_warped(num_images);
vector<Size> sizes(num_images);
vector<UMat> masks(num_images);
// Preapre images masks
for (int i = 0; i < num_images; ++i)
{
masks[i].create(images[i].size(), CV_8U);
masks[i].setTo(Scalar::all(255));
}
// Warp images and their masks
Ptr<WarperCreator> warper_creator = makePtr<cv::CylindricalWarper>();
if (!warper_creator)
{
cout << "Can't create the warper \n";
return 1;
}
//! Create RotationWarper
Ptr<RotationWarper> warper = warper_creator->create(static_cast<float>(warped_image_scale * seam_work_aspect));
//! Calculate warped corners/sizes/mask
for (int i = 0; i < num_images; ++i)
{
Mat_<float> K;
cameras[i].K().convertTo(K, CV_32F);
float swa = (float)seam_work_aspect;
K(0, 0) *= swa; K(0, 2) *= swa;
K(1, 1) *= swa; K(1, 2) *= swa;
corners[i] = warper->warp(images[i], K, cameras[i].R, INTER_LINEAR, BORDER_REFLECT, images_warped[i]);
sizes[i] = images_warped[i].size();
warper->warp(masks[i], K, cameras[i].R, INTER_NEAREST, BORDER_CONSTANT, masks_warped[i]);
}
vector<UMat> images_warped_f(num_images);
for (int i = 0; i < num_images; ++i)
images_warped[i].convertTo(images_warped_f[i], CV_32F);
std::cout << "Compensating exposure... \n";
//! 计算曝光度,调整图像曝光,减少亮度差异
Ptr<ExposureCompensator> compensator = ExposureCompensator::createDefault(ExposureCompensator::GAIN_BLOCKS);
if (dynamic_cast<BlocksCompensator*>(compensator.get()))
{
BlocksCompensator* bcompensator = dynamic_cast<BlocksCompensator*>(compensator.get());
bcompensator->setNrFeeds(1);
bcompensator->setNrGainsFilteringIterations(2);
bcompensator->setBlockSize(32, 32);
}
compensator->feed(corners, images_warped, masks_warped);
Ptr<SeamFinder> seam_finder = makePtr<detail::GraphCutSeamFinder>(GraphCutSeamFinderBase::COST_COLOR);
seam_finder->find(images_warped_f, corners, masks_warped);
// Release unused memory
images.clear();
images_warped.clear();
images_warped_f.clear();
masks.clear();
Mat img_warped, img_warped_s;
Mat dilated_mask, seam_mask, mask, mask_warped;
Ptr<Blender> blender;
double compose_work_aspect = 1;
for (int img_idx = 0; img_idx < num_images; ++img_idx)
{
// Read image and resize it if necessary
full_img = imread(img_names[img_idx]);
if (!is_compose_scale_set)
{
is_compose_scale_set = true;
compose_work_aspect = compose_scale / work_scale;
// Update warped image scale
warped_image_scale *= static_cast<float>(compose_work_aspect);
warper = warper_creator->create(warped_image_scale);
// Update corners and sizes
for (int i = 0; i < num_images; ++i)
{
cameras[i].focal *= compose_work_aspect;
cameras[i].ppx *= compose_work_aspect;
cameras[i].ppy *= compose_work_aspect;
Size sz = full_img_sizes[i];
if (std::abs(compose_scale - 1) > 1e-1)
{
sz.width = cvRound(full_img_sizes[i].width * compose_scale);
sz.height = cvRound(full_img_sizes[i].height * compose_scale);
}
Mat K;
cameras[i].K().convertTo(K, CV_32F);
Rect roi = warper->warpRoi(sz, K, cameras[i].R);
corners[i] = roi.tl();
sizes[i] = roi.size();
}
}
if (abs(compose_scale - 1) > 1e-1)
resize(full_img, img, Size(), compose_scale, compose_scale, INTER_LINEAR_EXACT);
else
img = full_img;
full_img.release();
Size img_size = img.size();
Mat K, R;
cameras[img_idx].K().convertTo(K, CV_32F);
R = cameras[img_idx].R;
// Warp the current image : img => img_warped
warper->warp(img, K, cameras[img_idx].R, INTER_LINEAR, BORDER_REFLECT, img_warped);
// Warp the current image mask
mask.create(img_size, CV_8U);
mask.setTo(Scalar::all(255));
warper->warp(mask, K, cameras[img_idx].R, INTER_NEAREST, BORDER_CONSTANT, mask_warped);
compensator->apply(img_idx, corners[img_idx], img_warped, mask_warped);
img_warped.convertTo(img_warped_s, CV_16S);
img_warped.release();
img.release();
mask.release();
dilate(masks_warped[img_idx], dilated_mask, Mat());
resize(dilated_mask, seam_mask, mask_warped.size(), 0, 0, INTER_LINEAR_EXACT);
mask_warped = seam_mask & mask_warped;
if (!blender)
{
blender = Blender::createDefault(Blender::MULTI_BAND, false);
Size dst_sz = resultRoi(corners, sizes).size();
float blend_width = sqrt(static_cast<float>(dst_sz.area())) * blend_strength / 100.f;
if (blend_width < 1.f){
blender = Blender::createDefault(Blender::NO, false);
}
else
{
MultiBandBlender* mb = dynamic_cast<MultiBandBlender*>(blender.get());
mb->setNumBands(static_cast<int>(ceil(log(blend_width) / log(2.)) - 1.));
}
blender->prepare(corners, sizes);
}
blender->feed(img_warped_s, mask_warped, corners[img_idx]);
}
/* ===========================================================================*/
// Blend image
std::cout << "\nBlending ...\n";
Mat result, result_mask;
blender->blend(result, result_mask);
imwrite("result.png", result);
imwrite("result_mask.png", result_mask);
std::cout << "\nWarp each center point, and draw solid circle.\n";
std::vector<cv::Scalar> colors = { {255,0,0}, {0, 255, 0}, {0, 0, 255} };
for (int idx = 0; idx < img_names.size(); ++idx) {
img = cv::imread(img_names[idx]);
Mat K;
cameras[idx].K().convertTo(K, CV_32F);
Mat R = cameras[idx].R;
cv::Point2f cpt = cv::Point2f(img.cols / 2, img.rows / 2);
cv::Point pt = calcWarpedPoint(cpt, K, R, warper, corners, sizes);
cv::circle(result, pt, 5, colors[idx], -1, cv::LINE_AA);
std::cout << cpt << " => " << pt << std::endl;
}
std::cout << "\nCheck `result.png`, `result_mask.png` and `result2.png`!\n";
imwrite("result2.png", result);
std::cout << "\nDone!\n";
/* ===========================================================================*/
return 0;
}
Some links maybe useful:
stitching_detailed.cpp : https://github.com/opencv/opencv/blob/4.0.1/samples/cpp/stitching_detailed.cpp
waper->warp(), warpPoint(), warpRoi() https://github.com/opencv/opencv/blob/master/modules/stitching/src/warpers.cpp#L153
resultRoi() https://github.com/opencv/opencv/blob/master/modules/stitching/src/util.cpp#L116
Other links maybe interesting:
Converting opencv remap code from c++ to python
Split text lines in scanned document
How do I use the relationships between Flann matches to determine a sensible homography?
#include "stdafx.h"
#include <iostream>
#include <stdio.h>
#include <vector>
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/features2d/features2d.hpp"
#include <iomanip>
#include <fstream>
using namespace std;
using namespace cv;
/// Global Variables
int DELAY_CAPTION = 1500;
int DELAY_BLUR = 100;
int MAX_KERNEL_LENGTH = 31;
Mat src; Mat dst_inv,dst,dest[12],dog[8],grad_x,grad_x_2,grad_x_2_1,temp,p1,p2,x_max,hess[5];
char window_name[] = "Smoothing Demo";
/// Function headers
int display_caption(const char* caption);
int display_dst(int delay);
double k,v;
int width,height,m = 0,m1 = 0,width2,height2;
int sum1,r = 10,g = 0,c = 0;
/**
* function main
*/
int main(void)
{
namedWindow(window_name, WINDOW_AUTOSIZE);
ofstream outputfile;
outputfile.open("data4.txt");
/// Load the source image
src = imread("C:/Users/Adithyaanirudhha/Documents/Visual Studio 2015/Projects/ConsoleApplication2/pa.jpg", 1);
if (display_caption("Original Image") != 0) { return 0; }
dst = src.clone();
width = src.size().width;
height = src.size().height;
Size size(height,width);
if (display_dst(DELAY_CAPTION) != 0) { return 0; }
cvtColor(src, src, CV_RGB2GRAY);
/// Applying Gaussian blur
//for(int j=0;j<4;j++)
//{
//resize(src, src, size / 2);
k = 2 ^ (1 / 2);
for(int i=0;i<3;i++)
{
//if (display_caption("Gaussian Blur") != 0) { return 0; }
GaussianBlur(src, dst, Size(), 1.6*k, 1.6*k);
if (display_dst(DELAY_BLUR*10) != 0) { return 0; }
k = k * k;
dst.copyTo(dest[m]);
//dest[m] = dst;
m++;
}
//}
Mat dog_inv;
for (int n = 0; n < 2; n++)
{
if(m1!=3 || m1 != 6 || m1 != 9)
subtract(dest[m1 + 1], dest[m1], dog[n],noArray(),-1);
}
imshow(window_name, dog[1]);
for (int i = 0; i < 2; i++)
{
Sobel(dog[1], grad_x, CV_16S, 1, 0, 3, 1, 0, BORDER_DEFAULT);
convertScaleAbs(grad_x, grad_x);
transpose(grad_x, temp);
Sobel(dog[1], grad_x_2, CV_16S, 2, 0, 3, 1, 0, BORDER_DEFAULT);
Mat tmp;
dog[1].convertTo(tmp, CV_32F);
convertScaleAbs(grad_x_2, grad_x_2);
c = invert(tmp,dog_inv, DECOMP_SVD);
//imshow(window_name,src);
Sobel(dog[1], grad_x_2_1, CV_16S, 2, 0, 3, 1, 0, BORDER_DEFAULT);
convertScaleAbs(grad_x_2_1, grad_x_2_1);
//imshow(window_name,src);
//grad_x_2_1 = grad_x_2_1.inv(CV_32F);
multiply(grad_x_2_1,grad_x,x_max, 1, 1);
multiply(temp, x_max, p1, 1, 1);
transpose(x_max,temp);
multiply(temp, grad_x_2, p2, 1, 1);
multiply(p2, x_max, p2, 1, 1);
imshow(window_name, dog[1]);
/*for (int y = 0; y < dog[i].rows; y++)
{
for (int x = 0; x < dog[i].cols; x++)
{
dog[i].at<Vec3b>(y, x) = (-1 * (p1.at<Vec3b>(y, x)) + 0.5*(p2.at<Vec3b>(y, x)) + dog[i].at<Vec3b>(y, x));
}
}*/
//imshow(window_name, dog[1]);
//imshow(window_name, src);
/* for (i = 0; i<2; i++)
{
v = determinant(dog[i]);
}
for (int h = 0; h < dog[1].rows; h++)
{
sum1 = dog[1].at<uchar>(h, h);
}
}
if ((sum1 ^ 2) / v >((r + 1) ^ 2) / r)
{
hess[g] = dog[1];
}
*/
/// Wait until user press a key
//display_caption("End: Press a key!");
//imshow(window_name, dog[1]);
//waitKey(0);
//outputfile << dog[0] << endl;
//cout << "M = " << endl << " " << dest[0] << endl << endl;
}
waitKey(0);
return 0;
}
/**
* #function display_caption
*/
int display_caption(const char* caption)
{
dst = Mat::zeros(src.size(), src.type());
putText(dst, caption,
Point(src.cols / 4, src.rows / 2),
FONT_HERSHEY_COMPLEX, 1, Scalar(255, 255, 255));
imshow(window_name, dst);
int c = waitKey(DELAY_CAPTION);
if (c >= 0) { return -1; }
return 0;
}
/**
* #function display_dst
*/
int display_dst(int delay)
{
imshow(window_name, dst);
int c = waitKey(delay);
if (c >= 0) { return -1; }
return 0;
}
Error is :***** VIDEOINPUT LIBRARY - 0.1995 - TFW07 *****
OpenCV Error: Sizes of input arguments do not match (The operation is neither 'array op array' (where arrays have the same size and the same number of channels)
, nor 'array op scalar', nor 'scalar op array') in cv::arithm_op, file C:\builds
lave64\win64_amdocl\master_PackSlave-win64-vc14-shared\opencv\modules\core\src\a
rithm.cpp, line 639
Press any key to continue . . .
PLease Help Thank you :)
I want to apply an algorithm of segmentation of ( watershed by markers ). I have a problem in this code. It's applied only with one image (lena.jpg)
I want to use it with irm images but it's not working.
I think that the problem is in image RGB and gray.
#include <opencv\highgui.h>
#include <opencv2\imgproc\imgproc.hpp>
#include <cstdio>
#include <iostream>
using namespace cv;
using namespace std;
static void help()
{
cout << "\nThis program demonstrates the famous watershed segmentation algorithm in OpenCV: watershed()\n"
"Usage:\n"
"./watershed [image_name -- default is ../data/fruits.jpg]\n" << endl;
cout << "Hot keys: \n"
"\tESC - quit the program\n"
"\tr - restore the original image\n"
"\tw or SPACE - run watershed segmentation algorithm\n"
"\t\t(before running it, *roughly* mark the areas to segment on the image)\n"
"\t (before that, roughly outline several markers on the image)\n";
}
Mat markerMask, img;
Point prevPt(-1, -1);
static void onMouse(int event, int x, int y, int flags, void*)
{
if (x < 0 || x >= img.cols || y < 0 || y >= img.rows)
return;
if (event == EVENT_LBUTTONUP || !(flags & EVENT_FLAG_LBUTTON))
prevPt = Point(-1, -1);
else if (event == EVENT_LBUTTONDOWN)
prevPt = Point(x, y);
else if (event == EVENT_MOUSEMOVE && (flags & EVENT_FLAG_LBUTTON))
{
Point pt(x, y);
if (prevPt.x < 0)
prevPt = pt;
line(markerMask, prevPt, pt, Scalar::all(255), 5, 8, 0);
line(img, prevPt, pt, Scalar::all(255), 5, 8, 0);
prevPt = pt;
imshow("image", img);
}
}
int main(int argc, char** argv)
{
char* filename = argc >= 2 ? argv[1] : (char*)"samah.png";
Mat img0 = imread(filename, 1), imgGray;
if (img0.empty())
{
cout << "Couldn'g open image " << filename << ". Usage: watershed <image_name>\n";
//system("wait");
return 1;
}
help();
namedWindow("image", 1);
img0.copyTo(img);
cvtColor(img, markerMask, COLOR_BGR2GRAY);
cvtColor(markerMask, imgGray, COLOR_GRAY2BGR);
markerMask = Scalar::all(0);
imshow("image", img);
setMouseCallback("image", onMouse, 0);
for (;;)
{
int c = waitKey(0);
if ((char)c == 27)
break;
if ((char)c == 'r')
{
markerMask = Scalar::all(0);
img0.copyTo(img);
imshow("image", img);
}
if ((char)c == 'w' || (char)c == ' ')
{
int i, j, compCount = 0;
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
findContours(markerMask, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE);
if (contours.empty())
continue;
Mat markers(markerMask.size(), CV_32S);
markers = Scalar::all(0);
int idx = 0;
for (; idx >= 0; idx = hierarchy[idx][0], compCount++)
drawContours(markers, contours, idx, Scalar::all(compCount + 1), -1, 8, hierarchy, INT_MAX);
if (compCount == 0)
continue;
vector<Vec3b> colorTab;
for (i = 0; i < compCount; i++)
{
int b = theRNG().uniform(0, 255);
int g = theRNG().uniform(0, 255);
int r = theRNG().uniform(0, 255);
colorTab.push_back(Vec3b((uchar)b, (uchar)g, (uchar)r));
}
double t = (double)getTickCount();
watershed(img0, markers);
t = (double)getTickCount() - t;
printf("execution time = %gms\n", t*1000. / getTickFrequency());
Mat wshed(markers.size(), CV_8UC3);
// paint the watershed image
for (i = 0; i < markers.rows; i++)
for (j = 0; j < markers.cols; j++)
{
int index = markers.at<int>(i, j);
if (index == -1)
wshed.at<Vec3b>(i, j) = Vec3b(255, 255, 255);
else if (index <= 0 || index > compCount)
wshed.at<Vec3b>(i, j) = Vec3b(0, 0, 0);
else
wshed.at<Vec3b>(i, j) = colorTab[index - 1];
}
wshed = wshed*0.5 + imgGray*0.5;
imshow("watershed transform", wshed);
}
}
system("wait");
return 0;
}
#robot_sherrick answered me this question, this is a follow-up question for his answer.
cv::SimpleBlobDetector in Opencv 2.4 looks very exciting but I am not sure I can make it work for more detailed data extraction.
I have the following concerns:
if this only returns center of the blob, I can't have an entire, labelled Mat, can I?
how can I access the features of the detected blobs like area, convexity, color and so on?
can I display an exact segmentation with this? (like with say, waterfall)
So the code should look something like this:
cv::Mat inputImg = imread(image_file_name, CV_LOAD_IMAGE_COLOR); // Read a file
cv::SimpleBlobDetector::Params params;
params.minDistBetweenBlobs = 10.0; // minimum 10 pixels between blobs
params.filterByArea = true; // filter my blobs by area of blob
params.minArea = 20.0; // min 20 pixels squared
params.maxArea = 500.0; // max 500 pixels squared
SimpleBlobDetector myBlobDetector(params);
std::vector<cv::KeyPoint> myBlobs;
myBlobDetector.detect(inputImg, myBlobs);
If you then want to have these keypoints highlighted on your image:
cv::Mat blobImg;
cv::drawKeypoints(inputImg, myBlobs, blobImg);
cv::imshow("Blobs", blobImg);
To access the info in the keypoints, you then just access each element like so:
for(std::vector<cv::KeyPoint>::iterator blobIterator = myBlobs.begin(); blobIterator != myBlobs.end(); blobIterator++){
std::cout << "size of blob is: " << blobIterator->size << std::endl;
std::cout << "point is at: " << blobIterator->pt.x << " " << blobIterator->pt.y << std::endl;
}
Note: this has not been compiled and may have typos.
Here is a version that will allow you to get the last contours back, via the getContours() method. They will match up by index to the keypoints.
class BetterBlobDetector : public cv::SimpleBlobDetector
{
public:
BetterBlobDetector(const cv::SimpleBlobDetector::Params ¶meters = cv::SimpleBlobDetector::Params());
const std::vector < std::vector<cv::Point> > getContours();
protected:
virtual void detectImpl( const cv::Mat& image, std::vector<cv::KeyPoint>& keypoints, const cv::Mat& mask=cv::Mat()) const;
virtual void findBlobs(const cv::Mat &image, const cv::Mat &binaryImage,
std::vector<Center> ¢ers, std::vector < std::vector<cv::Point> >&contours) const;
};
Then cpp
using namespace cv;
BetterBlobDetector::BetterBlobDetector(const SimpleBlobDetector::Params ¶meters)
{
}
void BetterBlobDetector::findBlobs(const cv::Mat &image, const cv::Mat &binaryImage,
vector<Center> ¢ers, std::vector < std::vector<cv::Point> >&curContours) const
{
(void)image;
centers.clear();
curContours.clear();
std::vector < std::vector<cv::Point> >contours;
Mat tmpBinaryImage = binaryImage.clone();
findContours(tmpBinaryImage, contours, CV_RETR_LIST, CV_CHAIN_APPROX_NONE);
for (size_t contourIdx = 0; contourIdx < contours.size(); contourIdx++)
{
Center center;
center.confidence = 1;
Moments moms = moments(Mat(contours[contourIdx]));
if (params.filterByArea)
{
double area = moms.m00;
if (area < params.minArea || area >= params.maxArea)
continue;
}
if (params.filterByCircularity)
{
double area = moms.m00;
double perimeter = arcLength(Mat(contours[contourIdx]), true);
double ratio = 4 * CV_PI * area / (perimeter * perimeter);
if (ratio < params.minCircularity || ratio >= params.maxCircularity)
continue;
}
if (params.filterByInertia)
{
double denominator = sqrt(pow(2 * moms.mu11, 2) + pow(moms.mu20 - moms.mu02, 2));
const double eps = 1e-2;
double ratio;
if (denominator > eps)
{
double cosmin = (moms.mu20 - moms.mu02) / denominator;
double sinmin = 2 * moms.mu11 / denominator;
double cosmax = -cosmin;
double sinmax = -sinmin;
double imin = 0.5 * (moms.mu20 + moms.mu02) - 0.5 * (moms.mu20 - moms.mu02) * cosmin - moms.mu11 * sinmin;
double imax = 0.5 * (moms.mu20 + moms.mu02) - 0.5 * (moms.mu20 - moms.mu02) * cosmax - moms.mu11 * sinmax;
ratio = imin / imax;
}
else
{
ratio = 1;
}
if (ratio < params.minInertiaRatio || ratio >= params.maxInertiaRatio)
continue;
center.confidence = ratio * ratio;
}
if (params.filterByConvexity)
{
vector < Point > hull;
convexHull(Mat(contours[contourIdx]), hull);
double area = contourArea(Mat(contours[contourIdx]));
double hullArea = contourArea(Mat(hull));
double ratio = area / hullArea;
if (ratio < params.minConvexity || ratio >= params.maxConvexity)
continue;
}
center.location = Point2d(moms.m10 / moms.m00, moms.m01 / moms.m00);
if (params.filterByColor)
{
if (binaryImage.at<uchar> (cvRound(center.location.y), cvRound(center.location.x)) != params.blobColor)
continue;
}
//compute blob radius
{
vector<double> dists;
for (size_t pointIdx = 0; pointIdx < contours[contourIdx].size(); pointIdx++)
{
Point2d pt = contours[contourIdx][pointIdx];
dists.push_back(norm(center.location - pt));
}
std::sort(dists.begin(), dists.end());
center.radius = (dists[(dists.size() - 1) / 2] + dists[dists.size() / 2]) / 2.;
}
centers.push_back(center);
curContours.push_back(contours[contourIdx]);
}
static std::vector < std::vector<cv::Point> > _contours;
const std::vector < std::vector<cv::Point> > BetterBlobDetector::getContours() {
return _contours;
}
void BetterBlobDetector::detectImpl(const cv::Mat& image, std::vector<cv::KeyPoint>& keypoints, const cv::Mat&) const
{
//TODO: support mask
_contours.clear();
keypoints.clear();
Mat grayscaleImage;
if (image.channels() == 3)
cvtColor(image, grayscaleImage, CV_BGR2GRAY);
else
grayscaleImage = image;
vector < vector<Center> > centers;
vector < vector<cv::Point> >contours;
for (double thresh = params.minThreshold; thresh < params.maxThreshold; thresh += params.thresholdStep)
{
Mat binarizedImage;
threshold(grayscaleImage, binarizedImage, thresh, 255, THRESH_BINARY);
vector < Center > curCenters;
vector < vector<cv::Point> >curContours, newContours;
findBlobs(grayscaleImage, binarizedImage, curCenters, curContours);
vector < vector<Center> > newCenters;
for (size_t i = 0; i < curCenters.size(); i++)
{
bool isNew = true;
for (size_t j = 0; j < centers.size(); j++)
{
double dist = norm(centers[j][ centers[j].size() / 2 ].location - curCenters[i].location);
isNew = dist >= params.minDistBetweenBlobs && dist >= centers[j][ centers[j].size() / 2 ].radius && dist >= curCenters[i].radius;
if (!isNew)
{
centers[j].push_back(curCenters[i]);
size_t k = centers[j].size() - 1;
while( k > 0 && centers[j][k].radius < centers[j][k-1].radius )
{
centers[j][k] = centers[j][k-1];
k--;
}
centers[j][k] = curCenters[i];
break;
}
}
if (isNew)
{
newCenters.push_back(vector<Center> (1, curCenters[i]));
newContours.push_back(curContours[i]);
//centers.push_back(vector<Center> (1, curCenters[i]));
}
}
std::copy(newCenters.begin(), newCenters.end(), std::back_inserter(centers));
std::copy(newContours.begin(), newContours.end(), std::back_inserter(contours));
}
for (size_t i = 0; i < centers.size(); i++)
{
if (centers[i].size() < params.minRepeatability)
continue;
Point2d sumPoint(0, 0);
double normalizer = 0;
for (size_t j = 0; j < centers[i].size(); j++)
{
sumPoint += centers[i][j].confidence * centers[i][j].location;
normalizer += centers[i][j].confidence;
}
sumPoint *= (1. / normalizer);
KeyPoint kpt(sumPoint, (float)(centers[i][centers[i].size() / 2].radius));
keypoints.push_back(kpt);
_contours.push_back(contours[i]);
}
}
//Access SimpleBlobDetector datas for video
#include "opencv2/imgproc/imgproc.hpp" //
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
#include <math.h>
#include <vector>
#include <fstream>
#include <string>
#include <sstream>
#include <algorithm>
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/features2d/features2d.hpp"
using namespace cv;
using namespace std;
int main(int argc, char *argv[])
{
const char* fileName ="C:/Users/DAGLI/Desktop/videos/new/m3.avi";
VideoCapture cap(fileName); //
if(!cap.isOpened()) //
{
cout << "Couldn't open Video " << fileName << "\n";
return -1;
}
for(;;) // videonun frameleri icin sonsuz dongu
{
Mat frame,labelImg;
cap >> frame;
if(frame.empty()) break;
//imshow("main",frame);
Mat frame_gray;
cvtColor(frame,frame_gray,CV_RGB2GRAY);
//////////////////////////////////////////////////////////////////////////
// convert binary_image
Mat binaryx;
threshold(frame_gray,binaryx,120,255,CV_THRESH_BINARY);
Mat src, gray, thresh, binary;
Mat out;
vector<KeyPoint> keyPoints;
SimpleBlobDetector::Params params;
params.minThreshold = 120;
params.maxThreshold = 255;
params.thresholdStep = 100;
params.minArea = 20;
params.minConvexity = 0.3;
params.minInertiaRatio = 0.01;
params.maxArea = 1000;
params.maxConvexity = 10;
params.filterByColor = false;
params.filterByCircularity = false;
src = binaryx.clone();
SimpleBlobDetector blobDetector( params );
blobDetector.create("SimpleBlob");
blobDetector.detect( src, keyPoints );
drawKeypoints( src, keyPoints, out, CV_RGB(255,0,0), DrawMatchesFlags::DEFAULT);
cv::Mat blobImg;
cv::drawKeypoints(frame, keyPoints, blobImg);
cv::imshow("Blobs", blobImg);
for(int i=0; i<keyPoints.size(); i++){
//circle(out, keyPoints[i].pt, 20, cvScalar(255,0,0), 10);
//cout<<keyPoints[i].response<<endl;
//cout<<keyPoints[i].angle<<endl;
//cout<<keyPoints[i].size()<<endl;
cout<<keyPoints[i].pt.x<<endl;
cout<<keyPoints[i].pt.y<<endl;
}
imshow( "out", out );
if ((cvWaitKey(40)&0xff)==27) break; // esc 'ye basilinca break
}
system("pause");
}