Related
Nvidia's cuDNN for deep learning has a rather interesting format for images called CHW. I have a cv::Mat img; that I want to convert to a one-dimensional vector of floats. The problem that I'm having is that the format of the 1D vector for CHW is (RR...R, GG..G,BB..B).
So I'm curious as to how I can extract the channel values for each pixel and order them for this format.
I faced with same problem and and solve it in that way:
#include <opencv2/opencv.hpp>
cv::Mat hwc2chw(const cv::Mat &image){
std::vector<cv::Mat> rgb_images;
cv::split(image, rgb_images);
// Stretch one-channel images to vector
cv::Mat m_flat_r = rgb_images[0].reshape(1,1);
cv::Mat m_flat_g = rgb_images[1].reshape(1,1);
cv::Mat m_flat_b = rgb_images[2].reshape(1,1);
// Now we can rearrange channels if need
cv::Mat matArray[] = { m_flat_r, m_flat_g, m_flat_b};
cv::Mat flat_image;
// Concatenate three vectors to one
cv::hconcat( matArray, 3, flat_image );
return flat_image;
}
P.S. If input image isn't in RGB format, you can change channel order in matArray creation line.
Use cv::dnn::blobFromImage:
cv::Mat bgr_image = cv::imread(imageFileName);
cv::Mat chw_image = cv::dnn::blobFromImage
(
bgr_image,
1.0, // scale factor
cv::Size(), // spatial size for output image
cv::Scalar(), // mean
true, // swapRB: BGR to RGB
false, // crop
CV_32F // Depth of output blob. Choose CV_32F or CV_8U.
);
const float* data = reinterpret_cast<const float*>(chw_image.data);
int data_length = 1 * 3 * bgr_image.rows * bgr_image.cols;
You can either iterate over the image manually and copy the values into the right place, or you can use something like cv::extractChannel to copy the channels one by one like so:
#include <opencv2/opencv.hpp>
int main()
{
//create dummy 3 channel float image
cv::Mat sourceRGB(cv::Size(100,100),CV_32FC3);
auto size = sourceRGB.size();
for (int y = 0; y < size.height; ++y)
{
for (int x = 0; x < size.width; ++x)
{
float* pxl = sourceRGB.ptr<float>(x, y);
*pxl = x / 100.0f;
*(pxl+1) = y / 100.0f;
*(pxl + 2) = (y / 100.0f) * (x / 100.0f);
}
}
cv::imshow("test", sourceRGB);
cv::waitKey(0);
//create single image with all 3 channels one after the other
cv::Size newsize(size.width,size.height*3);
cv::Mat destination(newsize,CV_32FC1);
//copy the channels from the source image to the destination
for (int i = 0; i < sourceRGB.channels(); ++i)
{
cv::extractChannel(
sourceRGB,
cv::Mat(
size.height,
size.width,
CV_32FC1,
&(destination.at<float>(size.height*size.width*i))),
i);
}
cv::imshow("test", destination);
cv::waitKey(0);
return 0;
}
I am building a scanner feature for my app and binarize the photo of the document with OpenCV:
// convert to greyscale
cv::Mat converted, blurred, blackAndWhite;
converted = cv::Mat(inputMatrix.rows, inputMatrix.cols, CV_8UC1);
cv::cvtColor(inputMatrix, converted, CV_BGR2GRAY );
// remove noise
cv::GaussianBlur(converted, blurred, cvSize(3,3), 0);
// adaptive threshold
cv::adaptiveThreshold(blackAndWhite, blackAndWhite, 255, cv::ADAPTIVE_THRESH_GAUSSIAN_C, cv::THRESH_BINARY, 15 , 9);
The result is okay, but scans from different scanner apps are much better. Especially very small, tiny sized text is much better:
Processed with opencv
Scanned With DropBox
What can I do, to improve my result?
May be the apps are using anti-aliasing to make their binarized output look nicer. To obtain a similar effect, I first tried binarizing the image, but the result didn't look very nice with all the jagged edges. Then I applied pyramid upsampling and then downsampling to the result, and the output was better.
I didn't use adaptive thresholding however. I segmented the text-like regions and processed those regions only, then pasted them to form the final images. It is a kind of local thresholding using the Otsu method or the k-means (using combinations of thr_roi_otsu, thr_roi_kmeans and proc_parts in the code). Below are some results.
Apply Otsu threshold to all text regions, then upsample followed by downsample:
Some text:
Full image:
Upsample input image, apply Otsu threshold to individual text regions, downsample the result:
Some text:
Full image:
/*
apply Otsu threshold to the region in mask
*/
Mat thr_roi_otsu(Mat& mask, Mat& im)
{
Mat bw = Mat::ones(im.size(), CV_8U) * 255;
vector<unsigned char> pixels(countNonZero(mask));
int index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
pixels[index++] = im.at<unsigned char>(r, c);
}
}
}
// threshold pixels
threshold(pixels, pixels, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
// paste pixels
index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
bw.at<unsigned char>(r, c) = pixels[index++];
}
}
}
return bw;
}
/*
apply k-means to the region in mask
*/
Mat thr_roi_kmeans(Mat& mask, Mat& im)
{
Mat bw = Mat::ones(im.size(), CV_8U) * 255;
vector<float> pixels(countNonZero(mask));
int index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
pixels[index++] = (float)im.at<unsigned char>(r, c);
}
}
}
// cluster pixels by gray level
int k = 2;
Mat data(pixels.size(), 1, CV_32FC1, &pixels[0]);
vector<float> centers;
vector<int> labels(countNonZero(mask));
kmeans(data, k, labels, TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0), k, KMEANS_PP_CENTERS, centers);
// examine cluster centers to see which pixels are dark
int label0 = centers[0] > centers[1] ? 1 : 0;
// paste pixels
index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
bw.at<unsigned char>(r, c) = labels[index++] != label0 ? 255 : 0;
}
}
}
return bw;
}
/*
apply procfn to each connected component in the mask,
then paste the results to form the final image
*/
Mat proc_parts(Mat& mask, Mat& im, Mat (procfn)(Mat&, Mat&))
{
Mat tmp = mask.clone();
vector<vector<Point>> contours;
vector<Vec4i> hierarchy;
findContours(tmp, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
Mat byparts = Mat::ones(im.size(), CV_8U) * 255;
for(int idx = 0; idx >= 0; idx = hierarchy[idx][0])
{
Rect rect = boundingRect(contours[idx]);
Mat msk = mask(rect);
Mat img = im(rect);
// process the rect
Mat roi = procfn(msk, img);
// paste it to the final image
roi.copyTo(byparts(rect));
}
return byparts;
}
int _tmain(int argc, _TCHAR* argv[])
{
Mat im = imread("1.jpg", 0);
// detect text regions
Mat morph;
Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
morphologyEx(im, morph, CV_MOP_GRADIENT, kernel, Point(-1, -1), 1);
// prepare a mask for text regions
Mat bw;
threshold(morph, bw, 0, 255, THRESH_BINARY | THRESH_OTSU);
morphologyEx(bw, bw, CV_MOP_DILATE, kernel, Point(-1, -1), 10);
Mat bw2x, im2x;
pyrUp(bw, bw2x);
pyrUp(im, im2x);
// apply Otsu threshold to all text regions, then upsample followed by downsample
Mat otsu1x = thr_roi_otsu(bw, im);
pyrUp(otsu1x, otsu1x);
pyrDown(otsu1x, otsu1x);
// apply k-means to all text regions, then upsample followed by downsample
Mat kmeans1x = thr_roi_kmeans(bw, im);
pyrUp(kmeans1x, kmeans1x);
pyrDown(kmeans1x, kmeans1x);
// upsample input image, apply Otsu threshold to all text regions, downsample the result
Mat otsu2x = thr_roi_otsu(bw2x, im2x);
pyrDown(otsu2x, otsu2x);
// upsample input image, apply k-means to all text regions, downsample the result
Mat kmeans2x = thr_roi_kmeans(bw2x, im2x);
pyrDown(kmeans2x, kmeans2x);
// apply Otsu threshold to individual text regions, then upsample followed by downsample
Mat otsuparts1x = proc_parts(bw, im, thr_roi_otsu);
pyrUp(otsuparts1x, otsuparts1x);
pyrDown(otsuparts1x, otsuparts1x);
// apply k-means to individual text regions, then upsample followed by downsample
Mat kmeansparts1x = proc_parts(bw, im, thr_roi_kmeans);
pyrUp(kmeansparts1x, kmeansparts1x);
pyrDown(kmeansparts1x, kmeansparts1x);
// upsample input image, apply Otsu threshold to individual text regions, downsample the result
Mat otsuparts2x = proc_parts(bw2x, im2x, thr_roi_otsu);
pyrDown(otsuparts2x, otsuparts2x);
// upsample input image, apply k-means to individual text regions, downsample the result
Mat kmeansparts2x = proc_parts(bw2x, im2x, thr_roi_kmeans);
pyrDown(kmeansparts2x, kmeansparts2x);
return 0;
}
I need to get contour from hand image, usually I process image with 4 steps:
get raw RGB gray image from 3 channels to 1 channel:
cvtColor(sourceGrayImage, sourceGrayImage, COLOR_BGR2GRAY);
use Gaussian blur to filter gray image:
GaussianBlur(sourceGrayImage, sourceGrayImage, Size(3,3), 0);
binary gray image, I split image by height, normally I split image to 6 images by its height, then each one I do threshold process:
// we split source picture to binaryImageSectionCount(here it's 8) pieces by its height,
// then we for every piece, we do threshold,
// and at last we combine them agin to binaryImage
const binaryImageSectionCount = 8;
void GetBinaryImage(Mat &grayImage, Mat &binaryImage)
{
// get every partial gray image's height
int partImageHeight = grayImage.rows / binaryImageSectionCount;
for (int i = 0; i < binaryImageSectionCount; i++)
{
Mat partialGrayImage;
Mat partialBinaryImage;
Rect partialRect;
if (i != binaryImageSectionCount - 1)
{
// if it's not last piece, Rect's height should be partImageHeight
partialRect = Rect(0, i * partImageHeight, grayImage.cols, partImageHeight);
}
else
{
// if it's last piece, Rect's height should be (grayImage.rows - i * partImageHeight)
partialRect = Rect(0, i * partImageHeight, grayImage.cols, grayImage.rows - i * partImageHeight);
}
Mat partialResource = grayImage(partialRect);
partialResource.copyTo(partialGrayImage);
threshold( partialGrayImage, partialBinaryImage, 0, 255, THRESH_OTSU);
// combin partial binary image to one piece
partialBinaryImage.copyTo(binaryImage(partialRect));
///*stringstream resultStrm;
//resultStrm << "partial_" << (i + 1);
//string string = resultStrm.str();
//imshow(string, partialBinaryImage);
//waitKey(0);*/
}
imshow("result binary image.", binaryImage);
waitKey(0);
return;
}
use findcontour to get biggest area contour:
vector<vector<Point> > contours;
findContours(binaryImage, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
normally it works well,
But for some low quality gray image, it doesn't work,like below:
the complete code is here:
#include <opencv2/imgproc/imgproc.hpp>
#include<opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
using namespace std;
using namespace cv;
// we split source picture to binaryImageSectionCount(here it's 8) pieces by its height,
// then we for every piece, we do threshold,
// and at last we combine them agin to binaryImage
const binaryImageSectionCount = 8;
void GetBinaryImage(Mat &grayImage, Mat &binaryImage)
{
// get every partial gray image's height
int partImageHeight = grayImage.rows / binaryImageSectionCount;
for (int i = 0; i < binaryImageSectionCount; i++)
{
Mat partialGrayImage;
Mat partialBinaryImage;
Rect partialRect;
if (i != binaryImageSectionCount - 1)
{
// if it's not last piece, Rect's height should be partImageHeight
partialRect = Rect(0, i * partImageHeight, grayImage.cols, partImageHeight);
}
else
{
// if it's last piece, Rect's height should be (grayImage.rows - i * partImageHeight)
partialRect = Rect(0, i * partImageHeight, grayImage.cols, grayImage.rows - i * partImageHeight);
}
Mat partialResource = grayImage(partialRect);
partialResource.copyTo(partialGrayImage);
threshold( partialGrayImage, partialBinaryImage, 0, 255, THRESH_OTSU);
// combin partial binary image to one piece
partialBinaryImage.copyTo(binaryImage(partialRect));
///*stringstream resultStrm;
//resultStrm << "partial_" << (i + 1);
//string string = resultStrm.str();
//imshow(string, partialBinaryImage);
//waitKey(0);*/
}
imshow("result binary image.", binaryImage);
waitKey(0);
return;
}
int main(int argc, _TCHAR* argv[])
{
// get image path
string imgPath("C:\\Users\\Alfred\\Desktop\\gray.bmp");
// read image
Mat src = imread(imgPath);
imshow("Source", src);
//medianBlur(src, src, 7);
cvtColor(src, src, COLOR_BGR2GRAY);
imshow("gray", src);
// do filter
GaussianBlur(src, src, Size(3,3), 0);
// binary image
Mat threshold_output(src.rows, src.cols, CV_8UC1, Scalar(0, 0, 0));
GetBinaryImage(src, threshold_output);
imshow("binaryImage", threshold_output);
// get biggest contour
vector<vector<Point> > contours;
findContours(threshold_output,contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
int biggestContourIndex = 0;
int maxContourArea = -1000;
for (int i = 0; i < contours.size(); i++)
{
if (contourArea(contours[i]) > maxContourArea)
{
maxContourArea = contourArea(contours[i]);
biggestContourIndex = i;
}
}
// show biggest contour
Mat biggestContour(threshold_output.rows, threshold_output.cols, CV_8UC1, Scalar(0, 0, 0));
drawContours(biggestContour, contours, biggestContourIndex, cv::Scalar(255,255,255), 2, 8, vector<Vec4i>(), 0, Point());
imshow("maxContour", biggestContour);
waitKey(0);
}
could anybody please help me to get a better hand contour result?
thanks!!!
I have the code snippet in python, you can follow the same approach in C:
img = cv2.imread(x, 1)
cv2.imshow("img",img)
imgray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
cv2.imshow("gray",imgray)
#Code for histogram equalization
equ = cv2.equalizeHist(imgray)
cv2.imshow('equ', equ)
#Code for contrast limited adaptive histogram equalization
#clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
#cl2 = clahe.apply(imgray)
#cv2.imshow('clahe2', cl2)
This is the result I obtained:
If you're image is horribly bad you could try the code that I commented involving contrast limited adaptive histogram equalization.
I have been trying lot to get an undistorted image without interpolation. But when executed the below code i get some weird image.I am using the function initUndistortRectifyMap which gives the mapx and mapy of type CV_16SC2 later using the convertMaps function i am converting the mapx and mapy to type CV_32FC1.I have been trying to debug the reason but couldnot find anything helpful.
The distorted image
image after applying undistort without interpolation
int main()
{
Mat Cam1MatrixParam, Cam1Distortion;
Mat cf1;
cf1=imread("cam1.distort1.jpg", CV_LOAD_IMAGE_COLOR);
Size imagesize = cf1.size();
FileStorage fs1("cameracalibration.xml", FileStorage::READ);
fs1["camera_matrix"] >> Cam1MatrixParam;
fs1["distortion_coefficients"] >> Cam1Distortion;
Mat R = Mat::eye(3, 3, CV_32F) * 1;
int width = cf1.cols;
int height = cf1.rows;
Mat undistorted = Mat(height, width, CV_8UC3);
Mat mapx = Mat(height, width, CV_32FC1);
Mat mapy = Mat(height, width, CV_32FC1);
initUndistortRectifyMap(Cam1MatrixParam, Cam1Distortion, Cam1MatrixParam, R, imagesize, CV_16SC2, mapx, mapy);
convertMaps(mapx, mapy, mapx, mapy, CV_32FC1, false);
for (int j = 0; j < height; j++)
{
for ( int i = 0; i < width; i++)
{
undistorted.at<uchar>(mapy.at<float>(j, i), mapx.at<float>(j, i)) = cf1.at<uchar>(j, i);
}
}
imwrite("cam1.undistortimage.png", undistorted);
}
image with this version of code
undistorted.at(j, i) = cf1.at(mapy.at(j, i), mapx.at(j, i));
image with undistort function(remap with nearest interpolation)
It looks like instead of undoing the distortion it applies it once more.
mapx and mapy map from the display coordinates to the photo coordinates.
undistorted.at<cv::Vec3b>(j, i) = distort.at<cv::Vec3b>(mapy.at<float>(j, i), mapx.at<float>(j, i));
You can interpret this code as: for each display coordinate {j, i} find its corresponding (distorted) coordinate in the photo and then copy the pixel.
you are using color images (cv::Vec3b) so try instead:
undistorted.at<cv::Vec3b>(mapy.at<float>(j, i), mapx.at<float>(j, i)) = cf1.at<cv::Vec3b>(j, i);
maybe combined with the answer of Maxim Egorushkin if undistort map is reverse
I just realised that there is nothing on the web, after much searching about how to access a pixel's intensity value in OpenCv. A grayscale image.
Most online searches are about how to access BGR values of a colour image, like this one: Accessing certain pixel RGB value in openCV
image.at<> is basically for 3 channels, namely the BGR, out of curiousity, is there another similar method from OpenCV of accessing a certain pixel value of a grayscale image?
You can use image.at<uchar>(j,i) to acces a pixel value of a grayscale image.
cv::Mat::at<>() function is for every type of image, whether it is a single channel image or multi-channel image. The type of value returned just depends on the template argument provided to the function.
The value of grayscale image can be accessed like this:
//For 8-bit grayscale image.
unsigned char value = image.at<unsigned char>(row, column);
Make sure to return the correct data type depending on the image type (8u, 16u, 32f etc.).
For IplImage* image, you can use
uchar intensity = CV_IMAGE_ELEM(image, uchar, y, x);
For Mat image, you can use
uchar intensity = image.at<uchar>(y, x);
at(y,x)]++;
for(int i = 0; i < 256; i++)
cout<<histogram[i]<<" ";
// draw the histograms
int hist_w = 512; int hist_h = 400;
int bin_w = cvRound((double) hist_w/256);
Mat histImage(hist_h, hist_w, CV_8UC1, Scalar(255, 255, 255));
// find the maximum intensity element from histogram
int max = histogram[0];
for(int i = 1; i < 256; i++){
if(max < histogram[i]){
max = histogram[i];
}
}
// normalize the histogram between 0 and histImage.rows
for(int i = 0; i < 255; i++){
histogram[i] = ((double)histogram[i]/max)*histImage.rows;
}
// draw the intensity line for histogram
for(int i = 0; i < 255; i++)
{
line(histImage, Point(bin_w*(i), hist_h),
Point(bin_w*(i), hist_h - histogram[i]),
Scalar(0,0,0), 1, 8, 0);
}
// display histogram
namedWindow("Intensity Histogram", CV_WINDOW_AUTOSIZE);
imshow("Intensity Histogram", histImage);
namedWindow("Image", CV_WINDOW_AUTOSIZE);
imshow("Image", image);
waitKey();
return 0;
}