Related
I have a 3D depth camera placed above three moving belt lanes and I'm trying to rotate the depth image (or the point cloud) so that the three lanes match the camera's angle. I'm not experienced at all with point clouds, but after some research I've tried to do the following:
Acquire an XYZ cartesian image from the sensor which I turn into a point cloud vector.
Define three points on the point cloud, one on each of the three lanes.
Fit a plane through them by finding the plane coefficients.
Finding the cross product between the plane and the z_normal, and then finding the angle of
rotation.
Use the Eigen library to transform the PCL cloud and turn it back into an openCV Mat.
For whatever reason, I always end up with a bad image with max-int values on one side and zeros on the other. I'm not certain anymore if there's something wrong with the code or if the method above is incorrect to start with.
My code so far:
// helper functions
pcl::PointCloud<pcl::PointXYZ>::Ptr MatToPcl(cv::Mat xyzMat);
cv::Mat PclToMat(pcl::PointCloud<pcl::PointXYZ>::Ptr point_cloud_ptr);
void colorize(cv::Mat& src, cv::Mat& dst);
void clip(cv::Mat& m, const uint16_t lowerBound, const uint16_t upperBound);
while(1)
{
// camera framegrabber object to capture an image
fg->SWTrigger();
if (!fg->WaitForFrame(im.get(), 2000))
{
throw;
}
// openCV Mat declerations
cv::Mat zDepth, zDepthColor;
cv::Mat xyz = im->XYZImage();
vector<cv::Mat> channels(3);
cv::split(xyz, channels);
zDepth = channels[0];
cv::imwrite("xyzMat.png", xyz);
cv::imwrite("depthImage.png", zDepth);
clip(zDepth, 1250, 1400);
colorise(zDepth, zDepthColor);
cv::imwrite("depthColored.png", zDepthColor);
// specify a 3D point on each lane
cv::Point3i p1, p2, p3;
p1.x = w / 4;
p1.y = 24;
p1.z = zDepth.at<uint16_t>(p1.x, p1.y);
p2.x = w / 2;
p2.y = 70;
p2.z = zDepth.at<uint16_t>(p2.x, p2.y);
p3.x = int(w * 0.75);
p3.y = 114;
p3.z = zDepth.at<uint16_t>(p3.x, p3.y);
auto cross = (p2 - p1).cross(p3 - p1);
// transform Mats to point clouds
pcl::PointCloud<pcl::PointXYZ>::Ptr floor_plane, xyzCentered;
floor_plane = MatToPcl(zDepth);
Eigen::Matrix<float, 1, 3> floor_plane_normal_vector, xy_plane_normal_vector, rotation_vector;
floor_plane_normal_vector[0] = cross.x;
floor_plane_normal_vector[1] = cross.y;
floor_plane_normal_vector[2] = cross.z;
// specify the z normal from the xy-plane
xy_plane_normal_vector[0] = 0.0;
xy_plane_normal_vector[1] = 0.0;
xy_plane_normal_vector[2] = 1.0;
// cross product and normalize vector
rotation_vector = xy_plane_normal_vector.cross(floor_plane_normal_vector);
rotation_vector.normalized();
// angle of rotation
float theta = -atan2(rotation_vector.norm(), xy_plane_normal_vector.dot(floor_plane_normal_vector));
// transform plane according to angle
Eigen::Affine3f transform_2 = Eigen::Affine3f::Identity();
transform_2.translation() << 0, 0, 30;
transform_2.rotate(Eigen::AngleAxisf(theta, rotation_vector));
pcl::transformPointCloud(*floor_plane, *xyzCentered, transform_2);
// Pointcloud to Mat again
cv::Mat xyzRot = PclToMat(xyzCentered);
// clipLow and clipHigh values obtained from trackbars
clip(xyzRot, clipLow, clipHigh);
cv::Mat xyzRotColor;
colorize(xyzRot, xyzRotColor)
cv::imshow("result", xyzRotColor);
cv::waitKey(1);
}
pcl::PointCloud<pcl::PointXYZ>::Ptr MatToPcl(cv::Mat xyzMat)
{
/*
* Function: Get from a Mat to pcl pointcloud datatype
* In: cv::Mat
* Out: pcl::PointCloud
*/
//char pr=100, pg=100, pb=100;
pcl::PointCloud<pcl::PointXYZ>::Ptr point_cloud_ptr(new pcl::PointCloud<pcl::PointXYZ>);;
vector<cv::Mat> channels(3);
cv::split(xyzMat, channels);
for (int i = 0; i < ifmXYZ.rows; i++)
{
for (int j = 0; j < ifmXYZ.cols; j++)
{
pcl::PointXYZ point;
point.x = channels[0].at<short>(i,j);
point.y = channels[1].at<short>(i, j);
point.z = channels[2].at<short>(i, j);
// when color needs to be added:
//uint32_t rgb = (static_cast<uint32_t>(pr) << 16 | static_cast<uint32_t>(pg) << 8 | static_cast<uint32_t>(pb));
//point.rgb = *reinterpret_cast<float*>(&rgb);
point_cloud_ptr->points.push_back(point);
}
}
point_cloud_ptr->width = (int)point_cloud_ptr->points.size();
/*point_cloud_ptr->height = 1;*/
return point_cloud_ptr;
}
// convert PCL to cv::Mat, taking only the depth values at z.
cv::Mat PclToMat(pcl::PointCloud<pcl::PointXYZ>::Ptr point_cloud_ptr)
{
cv::Mat depth_image;
if (!depth_image.empty())
depth_image.release();
depth_image.create(132, 176, CV_32F);
int count = 0;
for (int i = 0; i < 132; i++)
{
for (int j = 0; j < 176; j++)
{
depth_image.at<float>(i, j) = point_cloud_ptr->points.at(count++).z;
}
}
depth_image.convertTo(depth_image, CV_16UC1);
return depth_image;
}
/*
* For display purposes with cv::imshow, will convert a 16bit depth image to 8bit 3 channel colored image
* thanks to fmw42 for the function at https://stackoverflow.com/a/67678634/13184944
*/
void colorize(cv::Mat& src, cv::Mat& dst)
{
// stretch the image by rescaling intensity within the output 8-bit range
double oldMin;
double oldMax;
cv::Point minLoc;
cv::Point maxLoc;
cv::minMaxLoc(src, &oldMin, &oldMax, &minLoc, &maxLoc);
double oldRange = oldMax - oldMin;
double newMin = 0.0;
double newMax = 255.0;
double newRange = newMax - newMin;
//cout << oldMin << ' ' << oldMax << ' ' << oldRange << '\n';
// clip the values of the image to the required range
clip(src, oldMin, oldMax);
//TODO: Look at difference between OpenCV normalization and skimage
normalize(src, dst, 0, 255, NORM_MINMAX, CV_8UC1);
//img = (img - cv::Scalar(oldMin)) / (cv::Scalar(oldRange));
//img = (img * cv::Scalar(newRange)) + cv::Scalar(newMin);
cv::Mat channels[3] = { dst, dst, dst };
cv::merge(channels, 3, dst);
cv::Mat C(1, 6, CV_8UC(3));
cv::Vec3b color1 = { 0, 0, 255 };
cv::Vec3b color2 = { 0, 165, 255 };
cv::Vec3b color3 = { 0, 255, 255 };
cv::Vec3b color4 = { 255, 255, 0 };
cv::Vec3b color5 = { 255, 0, 0 };
cv::Vec3b color6 = { 128, 64, 64 };
C.at<cv::Vec3b>(0, 0) = color1;
C.at<cv::Vec3b>(0, 1) = color2;
C.at<cv::Vec3b>(0, 2) = color3;
C.at<cv::Vec3b>(0, 3) = color4;
C.at<cv::Vec3b>(0, 4) = color5;
C.at<cv::Vec3b>(0, 5) = color6;
cv::Mat lut;
cv::resize(C, lut, cv::Size(256, 1), 0.0, 0.0, cv::INTER_LINEAR);
//cout << lut.size << '\n';
cv::LUT(dst, lut, dst);
return;
}
void clip(cv::Mat& m, const uint16_t lowerBound, const uint16_t upperBound)
{
m.setTo(lowerBound, m < lowerBound);
m.setTo(upperBound, m > upperBound);
return;
}
Apologies if this is really basic or something is obviously wrong but I feel stuck here. I also tried segmentation with ransac but the it never aligns the plane in the way I wanted.
Thanks!
Edit: Updated the code to include additional steps and functions. Only the camera initialization is skipped.
The clip and colorize functions aid in displaying the 16bit depth image. My end goal here is to be able to use trackbars with clip(zImg, low, high) where the three lanes will always be vertically aligns (as in, change color at the same rate) as I change the clip values.
download link with image files: link
Colorized depth image:
I want to apply unsharp mask like Adobe Photoshop,
I know this answer, but it's not as sharp as Photoshop.
Photoshop has 3 parameters in Smart Sharpen dialog: Amount, Radius, Reduce Noise; I want to implement all of them.
This is the code I wrote, according to various sources in SO.
But the result is good in some stages ("blurred", "unsharpMask", "highContrast"), but in the last stage ("retval") the result is not good.
Where am I wrong, what should I improve?
Is it possible to improve the following algorithm in terms of performance?
#include "opencv2/opencv.hpp"
#include "fstream"
#include "iostream"
#include <chrono>
using namespace std;
using namespace cv;
// from https://docs.opencv.org/3.4/d3/dc1/tutorial_basic_linear_transform.html
void increaseContrast(Mat img, Mat* dst, int amountPercent)
{
*dst = img.clone();
double alpha = amountPercent / 100.0;
*dst *= alpha;
}
// from https://stackoverflow.com/a/596243/7206675
float luminanceAsPercent(Vec3b color)
{
return (0.2126 * color[2]) + (0.7152 * color[1]) + (0.0722 * color[0]);
}
// from https://stackoverflow.com/a/2938365/7206675
Mat usm(Mat original, int radius, int amountPercent, int threshold)
{
// copy original for our return value
Mat retval = original.clone();
// create the blurred copy
Mat blurred;
cv::GaussianBlur(original, blurred, cv::Size(0, 0), radius);
cv::imshow("blurred", blurred);
waitKey();
// subtract blurred from original, pixel-by-pixel to make unsharp mask
Mat unsharpMask;
cv::subtract(original, blurred, unsharpMask);
cv::imshow("unsharpMask", unsharpMask);
waitKey();
Mat highContrast;
increaseContrast(original, &highContrast, amountPercent);
cv::imshow("highContrast", highContrast);
waitKey();
// assuming row-major ordering
for (int row = 0; row < original.rows; row++)
{
for (int col = 0; col < original.cols; col++)
{
Vec3b origColor = original.at<Vec3b>(row, col);
Vec3b contrastColor = highContrast.at<Vec3b>(row, col);
Vec3b difference = contrastColor - origColor;
float percent = luminanceAsPercent(unsharpMask.at<Vec3b>(row, col));
Vec3b delta = difference * percent;
if (*(uchar*)&delta > threshold) {
retval.at<Vec3b>(row, col) += delta;
//retval.at<Vec3b>(row, col) = contrastColor;
}
}
}
return retval;
}
int main(int argc, char* argv[])
{
if (argc < 2) exit(1);
Mat mat = imread(argv[1]);
mat = usm(mat, 4, 110, 66);
imshow("usm", mat);
waitKey();
//imwrite("USM.png", mat);
}
Original Image:
Blurred stage - Seemingly good:
UnsharpMask stage - Seemingly good:
HighContrast stage - Seemingly good:
Result stage of my code - Looks bad!
Result From Photoshop - Excellent!
First of all, judging by the artefacts that Photoshop left on the borders of the petals, I'd say that it applies the mask by using a weighted sum between the original image and the mask, as in the answer you tried first.
I modified your code to implement this scheme and I tried to tweak the parameters to get as close as the Photoshop result, but I couldn't without creating a lot of noise. I wouldn't try to guess what Photoshop is exactly doing (I would definitely like to know), however I discovered that it is fairly reproducible by applying some filter on the mask to reduce the noise. The algorithm scheme would be:
blurred = blur(image, Radius)
mask = image - blurred
mask = some_filter(mask)
sharpened = (mask < Threshold) ? image : image - Amount * mask
I implemented this and tried using basic filters (median blur, mean filter, etc) on the mask and this is the kind of result I can get:
which is a bit noisier than the Photoshop image but, in my opinion, close enough to what you wanted.
On another note, it will of course depend on the usage you have for your filter, but I think that the settings you used in Photoshop are too strong (you have big overshoots near petals borders). This is sufficient to have a nice image at the naked eye, with limited overshoot:
Finally, here is the code I used to generate the two images above:
#include <opencv2/opencv.hpp>
#include <iostream>
using namespace std;
using namespace cv;
Mat usm(Mat original, float radius, float amount, float threshold)
{
// work using floating point images to avoid overflows
cv::Mat input;
original.convertTo(input, CV_32FC3);
// copy original for our return value
Mat retbuf = input.clone();
// create the blurred copy
Mat blurred;
cv::GaussianBlur(input, blurred, cv::Size(0, 0), radius);
// subtract blurred from original, pixel-by-pixel to make unsharp mask
Mat unsharpMask;
cv::subtract(input, blurred, unsharpMask);
// --- filter on the mask ---
//cv::medianBlur(unsharpMask, unsharpMask, 3);
cv::blur(unsharpMask, unsharpMask, {3,3});
// --- end filter ---
// apply mask to image
for (int row = 0; row < original.rows; row++)
{
for (int col = 0; col < original.cols; col++)
{
Vec3f origColor = input.at<Vec3f>(row, col);
Vec3f difference = unsharpMask.at<Vec3f>(row, col);
if(cv::norm(difference) >= threshold) {
retbuf.at<Vec3f>(row, col) = origColor + amount * difference;
}
}
}
// convert back to unsigned char
cv::Mat ret;
retbuf.convertTo(ret, CV_8UC3);
return ret;
}
int main(int argc, char* argv[])
{
if (argc < 3) exit(1);
Mat original = imread(argv[1]);
Mat expected = imread(argv[2]);
// closer to Photoshop
Mat current = usm(original, 0.8, 12., 1.);
// better settings (in my opinion)
//Mat current = usm(original, 2., 1., 3.);
cv::imwrite("current.png", current);
// comparison plot
cv::Rect crop(127, 505, 163, 120);
cv::Mat crops[3];
cv::resize(original(crop), crops[0], {0,0}, 4, 4, cv::INTER_NEAREST);
cv::resize(expected(crop), crops[1], {0,0}, 4, 4, cv::INTER_NEAREST);
cv::resize( current(crop), crops[2], {0,0}, 4, 4, cv::INTER_NEAREST);
char const* texts[] = {"original", "photoshop", "current"};
cv::Mat plot = cv::Mat::zeros(120 * 4, 163 * 4 * 3, CV_8UC3);
for(int i = 0; i < 3; ++i) {
cv::Rect region(163 * 4 * i, 0, 163 * 4, 120 * 4);
crops[i].copyTo(plot(region));
cv::putText(plot, texts[i], region.tl() + cv::Point{5,40},
cv::FONT_HERSHEY_SIMPLEX, 1.5, CV_RGB(255, 0, 0), 2.0);
}
cv::imwrite("plot.png", plot);
}
Here's my attempt at 'smart' unsharp masking. Result isn't very good, but I'm posting anyway. Wikipedia article on unsharp masking has details about smart sharpening.
Several things I did differently:
Convert BGR to Lab color space and apply the enhancements to the brightness channel
Use an edge map to apply enhancement to the edge regions
Original:
Enhanced: sigma=2 amount=3 low=0.3 high=.8 w=2
Edge map: low=0.3 high=.8 w=2
#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include <cstring>
cv::Mat not_so_smart_sharpen(
const cv::Mat& bgr,
double sigma,
double amount,
double canny_low_threshold_weight,
double canny_high_threshold_weight,
int edge_weight)
{
cv::Mat enhanced_bgr, lab, enhanced_lab, channel[3], blurred, difference, bw, kernel, edges;
// convert to Lab
cv::cvtColor(bgr, lab, cv::ColorConversionCodes::COLOR_BGR2Lab);
// perform the enhancement on the brightness component
cv::split(lab, channel);
cv::Mat& brightness = channel[0];
// smoothing for unsharp masking
cv::GaussianBlur(brightness, blurred, cv::Size(0, 0), sigma);
difference = brightness - blurred;
// calculate an edge map. I'll use Otsu threshold as the basis
double thresh = cv::threshold(brightness, bw, 0, 255, cv::ThresholdTypes::THRESH_BINARY | cv::ThresholdTypes::THRESH_OTSU);
cv::Canny(brightness, edges, thresh * canny_low_threshold_weight, thresh * canny_high_threshold_weight);
// control edge thickness. use edge_weight=0 to use Canny edges unaltered
cv::dilate(edges, edges, kernel, cv::Point(-1, -1), edge_weight);
// unsharp masking on the edges
cv::add(brightness, difference * amount, brightness, edges);
// use the enhanced brightness channel
cv::merge(channel, 3, enhanced_lab);
// convert to BGR
cv::cvtColor(enhanced_lab, enhanced_bgr, cv::ColorConversionCodes::COLOR_Lab2BGR);
// cv::imshow("edges", edges);
// cv::imshow("difference", difference * amount);
// cv::imshow("original", bgr);
// cv::imshow("enhanced", enhanced_bgr);
// cv::waitKey(0);
return enhanced_bgr;
}
int main(int argc, char *argv[])
{
double sigma = std::stod(argv[1]);
double amount = std::stod(argv[2]);
double low = std::stod(argv[3]);
double high = std::stod(argv[4]);
int w = std::stoi(argv[5]);
cv::Mat bgr = cv::imread("flower.jpg");
cv::Mat enhanced = not_so_smart_sharpen(bgr, sigma, amount, low, high, w);
cv::imshow("original", bgr);
cv::imshow("enhanced", enhanced);
cv::waitKey(0);
return 0;
}
I wrote a program that uses the openCV and boost::filesystem libraries, and the program crops images to fit the object in the image. (Photoshop has already been used to replace most of the backgrounds with white). However, I have thousands and thousands of pictures that I need to sort through. I already know how to use the filesystem library and have no issue traversing the system's directories. However, how do I detect images that have a non-white background (missed in the photoshop process)? This incorrect crop is formatted to have a margin and have a 1:1 aspect ratio, but it still has the odd grayish background. The image should end up looking like this correct crop. So, how do I determine if the image has a background like the incorrect crop?
could you try the code below
( to test the code you should create a directory c:/cropping and some subdirs on it. and put some images in the dirs you created.)
hope it will be helpful
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include <iostream>
using namespace cv;
using namespace std;
vector<Rect> divideHW(Mat src, int dim, double threshold1, double threshold2)
{
Mat gray, reduced, canny;
if (src.channels() == 1)
{
gray = src;
}
if (src.channels() == 3)
{
Laplacian(src, gray, CV_8UC1);
cvtColor(gray, gray, COLOR_BGR2GRAY);
imshow("sobel", gray);
}
reduce(gray, reduced, dim, REDUCE_AVG);
Canny(reduced, canny, threshold1, threshold2);
vector<Point> pts;
findNonZero(canny, pts);
vector<Rect> rects;
Rect rect(0, 0, gray.cols, gray.rows);
if (!pts.size())
{
rects.push_back(rect);
}
int ref_x = 0;
int ref_y = 0;
for (size_t i = 0; i< pts.size(); i++)
{
if (dim)
{
rect.height = pts[i].y - ref_y;
rects.push_back(rect);
rect.y = pts[i].y;
ref_y = rect.y;
if (i == pts.size() - 1)
{
rect.height = gray.rows - pts[i].y;
rects.push_back(rect);
}
}
else
{
rect.width = pts[i].x - ref_x;
rects.push_back(rect);
rect.x = pts[i].x;
ref_x = rect.x;
if (i == pts.size() - 1)
{
rect.width = gray.cols - pts[i].x;
rects.push_back(rect);
}
}
}
return rects;
}
int main( int argc, char** argv )
{
int wait_time = 0; // set this value > 0 for not waiting
vector<String> filenames;
String folder = "c:/cropping/*.*"; // you can change this value or set it by argv[1]
glob(folder, filenames, true);
for (size_t i = 0; i < filenames.size(); ++i)
{
Mat src = imread(filenames[i]);
if (src.data)
{
vector<Rect> rects = divideHW(src, 0, 0, 0);
if (rects.size() < 3) continue;
Rect border;
border.x = rects[0].width;
border.width = src.cols - rects[rects.size() - 1].width - border.x;
rects = divideHW(src, 1, 0, 20);
if (rects.size() < 3) continue;
border.y = rects[0].height;
border.height = src.rows - rects[rects.size() - 1].height - border.y;
Mat cropped = src(border).clone();
src(border).setTo(Scalar(255, 255, 255));
Scalar _mean = mean(src);
int mean_total = _mean[0] + _mean[1] + _mean[2];
if (mean_total > 763)
{
imwrite(filenames[i] + ".jpg", cropped);
imshow("cropped", cropped);
waitKey(wait_time);
}
}
}
return 0;
}
I that you can compute the gradient of an ROI of your image (all rows in column 10 to 15 for exemple).
Then you compute the energy of your gradient (sum of all pixels of the gradient image).
If the energy is very low, you have an uniform background (you can't know the background color with this algorithm). Else you have a textured backgroud.
This is a first approach. You can found in OpenCV all the functions required to do that.
A second approach :
If you are sure that your background is white, you can get the ROI of the first approach, then iterate over all pixels, and check for its color. If there are more than "n" pixels with a different color than "255,255,255", you can mark your image as "non white Background".
I've written a piece of code to take my camera feed, split it into a grid (like a chess board) and evaluate each square for colour.
The code i currently have looks like this:
using namespace std;
using namespace cv;
//Standard Dilate and erode functions to improve white/black areas in Binary Image
// Pointer &thresh used so it affects threshImg so it can be used in tracking.
void morphOps(Mat &thresh){
//Increases size of black to remove unwanted white specks outside of object
Mat erodeElement = getStructuringElement( MORPH_RECT,Size(3,3));
//Increases white-area size to remove holes in object
Mat dilateElement = getStructuringElement( MORPH_RECT,Size(8,8));
erode(thresh,thresh,erodeElement);
erode(thresh,thresh,erodeElement);
dilate(thresh,thresh,dilateElement);
dilate(thresh,thresh,dilateElement);
}
//Tracking for the Filtered Object
void trackFilteredObject(int noteNum, string colourtype, Mat &thresh ,Mat HSVImage, Mat &cam){
vector<Brick> Bricks;
Mat temp;
thresh.copyTo(temp);
threshold(temp, thresh, 120, 255, 3); //3 = Threshold to Zero
int whitePixs = countNonZero(thresh);
int cols = thresh.cols;
int rows = thresh.rows;
int imgSize = (rows*cols)/0.75;
if(whitePixs > imgSize){
Brick Brick;
Brick.setColour(colourtype);
Brick.setnoteNum(noteNum);
Bricks.push_back(Brick);
}
int main(int argc, char* argv[])
{
/// Create a window
namedWindow("window", CV_WINDOW_AUTOSIZE );
while(1){
//initialtes camera, sets capture resolution
VideoCapture capture;
capture.open(1);
capture.set(CV_CAP_PROP_FPS, 30);
capture.set(CV_CAP_PROP_FRAME_WIDTH,640);
capture.set(CV_CAP_PROP_FRAME_HEIGHT,480);
Mat cam;
// Saves camera image to Matrix "cam"
capture.read(cam);
//Sets Widths and Heights based on camera resolution (cam.cols/cam.rows retrieves this)
int Width = cam.cols;
int gridWidth = Width/16;
int Height = cam.rows;
int gridHeight = Height/16;
//Splits image into 256 squares going left to right through rows and descending vertically. (16 squares per row for 4/4 pattern)
Mat BigImage;
Mat HSVImage;
// Converts cam to HSV pallete
cvtColor(cam, HSVImage, COLOR_BGR2HSV);
Size smallSize(gridWidth,gridHeight);
std::vector<Mat> smallImages;
for (int y = 0; y < HSVImage.rows; y += smallSize.height)
{
for (int x = 0; x < HSVImage.cols; x += smallSize.width)
{
cv::Rect rect = cv::Rect(x,y, smallSize.width, smallSize.height);
//Saves the matrix to vector
smallImages.push_back(cv::Mat(HSVImage, rect));
}
}
for (int i = 0; i < smallImages.size(); i++){
Mat HSV;
smallImages.at(i).copyTo(HSV);
int noteNum = i;
Mat threshImg;
inRange(HSV,Scalar(0,0,0),Scalar(255,255,255),threshImg);
morphOps(threshImg); //erodes image
string colour = "Red";
trackFilteredObject(noteNum,colour,threshImg,HSV,cam);
inRange(HSV,Scalar(0,0,0),Scalar(255,255,255),threshImg);
morphOps(threshImg); // threshold = mat after erosion/dilation
colour = "yellow";
trackFilteredObject(noteNum,colour,threshImg,HSV,cam);
inRange(HSV,Scalar(0,0,0),Scalar(255,255,255),threshImg);
morphOps(threshImg);
colour = "Black";
trackFilteredObject(noteNum,colour,threshImg,HSV,cam);
inRange(HSV,Scalar(0,0,0),Scalar(255,255,255),threshImg);
morphOps(threshImg); // threshold = mat after erosion/dilation
colour = "White";
trackFilteredObject(noteNum,colour,threshImg,HSV,cam);
inRange(HSV,Scalar(0,0,0),Scalar(255,255,255),threshImg);
morphOps(threshImg); // threshold = mat after erosion/dilation
colour = "Green";
trackFilteredObject(noteNum,colour,threshImg,HSV,cam);
}
imshow("window", cam);
}
return 0;
}
At the moment the code takes quite a long time to execute a full loop (about 1.5 seconds) but i ideally need it to run as close to real time as possible for a music application.
Could anyone suggest why it takes so long to execute? Is there a better way to evaluate the colour of each square?
My class is as follows:
//Brick.h
#include <string>
using namespace std;
class Brick{
public:
Brick(void);
~Brick(void);
string getColour();
void setColour(string whatColour);
int getnoteNum();
void setnoteNum(int whatnoteNum);
private:
int noteNum;
string colour;
};
///
Brick.cpp
#include <stdio.h>
#include <Brick.h>
Brick::Brick(void){
}
Brick::~Brick(void){
}
// get/set Colour
////////////////////////////////
string Brick::getColour(){
return Brick::colour;
}
void Brick::setColour(string whatColour){
Brick::colour = whatColour;
}
// get/set Note Number
////////////////////////////////
int Brick::getnoteNum(){
return Brick::noteNum;
}
void Brick::setnoteNum(int whatnoteNum){
Brick::noteNum = whatnoteNum;
}
I will be so grateful to anyone who replies!
Thank you.
Try hard to not use erode and dilate. These operations are extremely time intensive. I'm quite confident that they are the bottleneck in your program.
There are some measures you can take:
Downscaling(or downsampling) the image. Ideally, you want the downscaled image's pixel to be of the same order of magnitude of a grid square's size.
Remove dilate and erode.
Off-topic: Bugfix. Fix the inRange() parameters used. Consult the HSV color space diagram and normalize to your space. e.g. extracting "green pixels" would correspond to inRange(HSV,Scalar(80f*255/360,0.3*255,0.3*255),Scalar(160f*255/360,255,255),threshImg);
I am trying to calculate the skew of text in an image so I can correct it for the best OCR results.
Currently this is the function I am using:
double compute_skew(Mat &img)
{
// Binarize
cv::threshold(img, img, 225, 255, cv::THRESH_BINARY);
// Invert colors
cv::bitwise_not(img, img);
cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(5, 3));
cv::erode(img, img, element);
std::vector<cv::Point> points;
cv::Mat_<uchar>::iterator it = img.begin<uchar>();
cv::Mat_<uchar>::iterator end = img.end<uchar>();
for (; it != end; ++it)
if (*it)
points.push_back(it.pos());
cv::RotatedRect box = cv::minAreaRect(cv::Mat(points));
double angle = box.angle;
if (angle < -45.)
angle += 90.;
cv::Point2f vertices[4];
box.points(vertices);
for(int i = 0; i < 4; ++i)
cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1, CV_AA);
return angle;
}
When I look at then angle in debug I get 0.000000
However when I give it this image I get proper results of a skew of about 16 degrees:
How can I properly detect the skew in the first image?
there are a few other ways to get the skew degree, 1) by hough transform 2) by horizontal projection profile. rotate the image in different angle bins and calculate horizontal projection. the angle with the greatest horizontal histogram value is the deskewed angle.
i have provided below implementation of 1). i believe this to be superior to the boxing method you are using because it requires that you completely clean the image of any noise,which just isnt possible in most of the time.
you should know that the method doesnt work well if there's too much noise. you can reduce noise in different ways depending on what type of "line" you want to treat as the most dominant in the image. i have provided two methods for this. be sure to play with parameters and threshold etc.
results (all run using preprocess2, all run using same parameter set)
code
#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;
void hough_transform(Mat& im,Mat& orig,double* skew)
{
double max_r=sqrt(pow(.5*im.cols,2)+pow(.5*im.rows,2));
int angleBins = 180;
Mat acc = Mat::zeros(Size(2*max_r,angleBins),CV_32SC1);
int cenx = im.cols/2;
int ceny = im.rows/2;
for(int x=1;x<im.cols-1;x++)
{
for(int y=1;y<im.rows-1;y++)
{
if(im.at<uchar>(y,x)==255)
{
for(int t=0;t<angleBins;t++)
{
double r =(x-cenx)*cos((double)t/angleBins*CV_PI)+(y-ceny)*sin((double)t /angleBins*CV_PI);
r+=max_r;
acc.at<int>(t,int(r))++;
}
}
}
}
Mat thresh;
normalize(acc,acc,255,0,NORM_MINMAX);
convertScaleAbs(acc,acc);
/*debug
Mat cmap;
applyColorMap(acc,cmap,COLORMAP_JET);
imshow("cmap",cmap);
imshow("acc",acc);*/
Point maxLoc;
minMaxLoc(acc,0,0,0,&maxLoc);
double theta = (double)maxLoc.y/angleBins*CV_PI;
double rho = maxLoc.x-max_r;
if(abs(sin(theta))<0.000001)//check vertical
{
//when vertical, line equation becomes
//x = rho
double m = -cos(theta)/sin(theta);
Point2d p1 = Point2d(rho+im.cols/2,0);
Point2d p2 = Point2d(rho+im.cols/2,im.rows);
line(orig,p1,p2,Scalar(0,0,255),1);
*skew=90;
cout<<"skew angle "<<" 90"<<endl;
}else
{
//convert normal form back to slope intercept form
//y = mx + b
double m = -cos(theta)/sin(theta);
double b = rho/sin(theta)+im.rows/2.-m*im.cols/2.;
Point2d p1 = Point2d(0,b);
Point2d p2 = Point2d(im.cols,im.cols*m+b);
line(orig,p1,p2,Scalar(0,0,255),1);
double skewangle;
skewangle= p1.x-p2.x>0? (atan2(p1.y-p2.y,p1.x-p2.x)*180./CV_PI):(atan2(p2.y-p1.y,p2. x-p1.x)*180./CV_PI);
*skew=skewangle;
cout<<"skew angle "<<skewangle<<endl;
}
imshow("orig",orig);
}
Mat preprocess1(Mat& im)
{
Mat ret = Mat::zeros(im.size(),CV_32SC1);
for(int x=1;x<im.cols-1;x++)
{
for(int y=1;y<im.rows-1;y++)
{
int gy = (im.at<uchar>(y-1,x+1)-im.at<uchar>(y-1,x-1))
+2*(im.at<uchar>(y,x+1)-im.at<uchar>(y,x-1))
+(im.at<uchar>(y+1,x+1)-im.at<uchar>(y+1,x-1));
int gx = (im.at<uchar>(y+1,x-1) -im.at<uchar>(y-1,x-1))
+2*(im.at<uchar>(y+1,x)-im.at<uchar>(y-1,x))
+(im.at<uchar>(y+1,x+1)-im.at<uchar>(y-1,x+1));
int g2 = (gy*gy + gx*gx);
ret.at<int>(y,x)=g2;
}
}
normalize(ret,ret,255,0,NORM_MINMAX);
ret.convertTo(ret,CV_8UC1);
threshold(ret,ret,50,255,THRESH_BINARY);
return ret;
}
Mat preprocess2(Mat& im)
{
// 1) assume white on black and does local thresholding
// 2) only allow voting top is white and buttom is black(buttom text line)
Mat thresh;
//thresh=255-im;
thresh=im.clone();
adaptiveThreshold(thresh,thresh,255,CV_ADAPTIVE_THRESH_GAUSSIAN_C,THRESH_BINARY,15,-2);
Mat ret = Mat::zeros(im.size(),CV_8UC1);
for(int x=1;x<thresh.cols-1;x++)
{
for(int y=1;y<thresh.rows-1;y++)
{
bool toprowblack = thresh.at<uchar>(y-1,x)==0 || thresh.at<uchar>(y-1,x-1)==0 || thresh.at<uchar>(y-1,x+1)==0;
bool belowrowblack = thresh.at<uchar>(y+1,x)==0 || thresh.at<uchar>(y+1, x-1)==0 || thresh.at<uchar>(y+1,x+1)==0;
uchar pix=thresh.at<uchar>(y,x);
if((!toprowblack && pix==255 && belowrowblack))
{
ret.at<uchar>(y,x) = 255;
}
}
}
return ret;
}
Mat rot(Mat& im,double thetaRad)
{
cv::Mat rotated;
double rskew = thetaRad* CV_PI/180;
double nw = abs(sin(thetaRad))*im.rows+abs(cos(thetaRad))*im.cols;
double nh = abs(cos(thetaRad))*im.rows+abs(sin(thetaRad))*im.cols;
cv::Mat rot_mat = cv::getRotationMatrix2D(Point2d(nw*.5,nh*.5), thetaRad*180/CV_PI, 1);
Mat pos = Mat::zeros(Size(1,3),CV_64FC1);
pos.at<double>(0)=(nw-im.cols)*.5;
pos.at<double>(1)=(nh-im.rows)*.5;
Mat res = rot_mat*pos;
rot_mat.at<double>(0,2) += res.at<double>(0);
rot_mat.at<double>(1,2) += res.at<double>(1);
cv::warpAffine(im, rotated, rot_mat,Size(nw,nh), cv::INTER_LANCZOS4);
return rotated;
}
int main(int argc, char** argv)
{
string src="C:/data/skew.png";
Mat im= imread(src);
Mat gray;
cvtColor(im,gray,CV_BGR2GRAY);
Mat preprocessed = preprocess2(gray);
imshow("preprocessed2",preprocessed);
double skew;
hough_transform(preprocessed,im,&skew);
Mat rotated = rot(im,skew* CV_PI/180);
imshow("corrected",rotated);
waitKey(0);
return 0;
}
the approach you posted has its own "ideal binarization" assumption. the threshold value directly affects the process. utilize otsu threshold, or think about DFT for a generic solution.
otsu trial:
int main()
{
Mat input = imread("your text");
cvtColor(input, input, CV_BGR2GRAY);
Mat img;
cv::threshold(input, img, 100, 255, cv::THRESH_OTSU);
cv::bitwise_not(img, img);
imshow("img ", img);
waitKey(0);
vector<Point> points;
findNonZero(img, points);
cv::RotatedRect box = cv::minAreaRect(points);
double angle = box.angle;
if (angle < -45.)
angle += 90.;
cv::Point2f vertices[4];
box.points(vertices);
for(int i = 0; i < 4; ++i)
cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0));
imshow("img ", img);
waitKey(0);
return 0;
}