I'm trying to perform orientation estimation on an input image in OpenCV. I used sobel function to get gradients of the image, and used another function called calculateOrientations, which I found on the internet, to calculate orientations.
The code is as follows:
void computeGradient(cv::Mat inputImg)
{
// Gradient X
cv::Sobel(inputImg, grad_x, CV_16S, 1, 0, 5, 1, 0, cv::BORDER_DEFAULT);
cv::convertScaleAbs(grad_x, abs_grad_x);
// Gradient Y
cv::Sobel(inputImg, grad_y, CV_16S, 0, 1, 5, 1, 0, cv::BORDER_DEFAULT);
cv::convertScaleAbs(grad_y, abs_grad_y);
// convert from CV_8U to CV_32F
abs_grad_x.convertTo(abs_grad_x2, CV_32F, 1. / 255);
abs_grad_y.convertTo(abs_grad_y2, CV_32F, 1. / 255);
// calculate orientations
calculateOrientations(abs_grad_x2, abs_grad_y2);
}
void calculateOrientations(cv::Mat gradientX, cv::Mat gradientY)
{
// Create container element
orientation = cv::Mat(gradientX.rows, gradientX.cols, CV_32F);
// Calculate orientations of gradients --> in degrees
// Loop over all matrix values and calculate the accompagnied orientation
for (int i = 0; i < gradientX.rows; i++){
for (int j = 0; j < gradientX.cols; j++){
// Retrieve a single value
float valueX = gradientX.at<float>(i, j);
float valueY = gradientY.at<float>(i, j);
// Calculate the corresponding single direction, done by applying the arctangens function
float result = cv::fastAtan2(valueX, valueY);
// Store in orientation matrix element
orientation.at<float>(i, j) = result;
}
}
}
Now, I need to make sure whether the obtained orientation is correct or not. For that I want to draw arrows for each block of size 5x5 on the orientation matrix. Could someone advice me on how to draw arrows on this? Thank you.
The simplest way for OpenCV to distinguish direction is to draw little circle or square at a start or end point of line. There are no function for arrows afaik. If you need arrow you have to write this (it is simple but takes time too). Once I did it this way (not openCV, but I hope you convert it):
double arrow_pos = 0.5; // 0.5 = at the center of line
double len = sqrt((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1));
double co = (x2-x1)/len, si = (y2-y1)/len; // line coordinates are (x1,y1)-(x2,y2)
double const l = 15, sz = linewidth*2; // l - arrow length
double x0 = x2 - len*arrow_pos*co;
double y0 = y2 - len*arrow_pos*si;
double x = x2 - (l+len*arrow_pos)*co;
double y = y2 - (l+len*arrow_pos)*si;
TPoint tp[4] = {TPoint(x+sz*si, y-sz*co), TPoint(x0, y0), TPoint(x-sz*si, y+sz*co), TPoint(x+l*0.3*co, y+0.3*l*si)};
Polygon(tp, 3);
Canvas->Polyline(tp, 2);
UPDATE: arrowedLine(...) function added since OpenCV 2.4.10 and 3.0
The easiest way to draw an arrow in opencv is:
arrowedLine(img, pointStart, pointFinish, colorScalar, thickness, line_type, shift, tipLength);
thickness, line_type, shift and tipLength have already default values, so can be omitted
Related
Basically I want do to the same thing as this function here:
https://docs.opencv.org/master/d9/d0c/group__calib3d.html#ga13f7e34de8fa516a686a56af1196247f
However, the parameter description states, that this algorithm assumes that the matched feature points of the two images are from cameras with the same camera matrix.
What if I have matched features, but images from two different cameras with different camera matrices? How can I calculate the 5-Point essential matrix then?
Additional question:
If I have calculated the essential matrix can I just use it as parameter in the method computeCorrespondEpilines() instead of the fundamental matrix, assuming images are already rectified?
In OpenCV there is no standard function for calculating the essential matrix using two different cameras. But it is very easy to implement it yourself.
You can add a function to the five-point.cpp and recompile OpenCV.
I just added an overloaded function cv::findEssentialMat with an additional parameter for the second camera matrix.
cv::Mat cv::findEssentialMat(InputArray _points1, InputArray _points2, InputArray _cameraMatrix1, InputArray _cameraMatrix2, int method, double prob, double threshold, OutputArray _mask)
{
CV_INSTRUMENT_REGION();
Mat points1, points2, cameraMatrix1, cameraMatrix2;
_points1.getMat().convertTo(points1, CV_64F);
_points2.getMat().convertTo(points2, CV_64F);
_cameraMatrix1.getMat().convertTo(cameraMatrix1, CV_64F);
_cameraMatrix2.getMat().convertTo(cameraMatrix2, CV_64F);
int npoints = points1.checkVector(2);
CV_Assert(npoints >= 0 && points2.checkVector(2) == npoints &&
points1.type() == points2.type());
CV_Assert(cameraMatrix1.rows == 3 && cameraMatrix1.cols == 3 && cameraMatrix1.channels() == 1);
CV_Assert(cameraMatrix2.rows == 3 && cameraMatrix2.cols == 3 && cameraMatrix2.channels() == 1);
if (points1.channels() > 1)
{
points1 = points1.reshape(1, npoints);
points2 = points2.reshape(1, npoints);
}
double fx1 = cameraMatrix1.at<double>(0, 0);
double fy1 = cameraMatrix1.at<double>(1, 1);
double cx1 = cameraMatrix1.at<double>(0, 2);
double cy1 = cameraMatrix1.at<double>(1, 2);
double fx2 = cameraMatrix2.at<double>(0, 0);
double fy2 = cameraMatrix2.at<double>(1, 1);
double cx2 = cameraMatrix2.at<double>(0, 2);
double cy2 = cameraMatrix2.at<double>(1, 2);
points1.col(0) = (points1.col(0) - cx1) / fx1;
points2.col(0) = (points2.col(0) - cx2) / fx2;
points1.col(1) = (points1.col(1) - cy1) / fy1;
points2.col(1) = (points2.col(1) - cy2) / fy2;
// Reshape data to fit opencv ransac function
points1 = points1.reshape(2, npoints);
points2 = points2.reshape(2, npoints);
threshold /= (fx1 + fy1) / 2;
Mat E;
if (method == RANSAC)
createRANSACPointSetRegistrator(makePtr<EMEstimatorCallback>(), 5, threshold, prob)->run(points1, points2, E, _mask);
else
createLMeDSPointSetRegistrator(makePtr<EMEstimatorCallback>(), 5, prob)->run(points1, points2, E, _mask);
return E;
}
Then you have to add the function declaration to calib3d.hpp, recompile and reinstall your OpenCV version.
Additional question: If I have calculated the essential matrix can I just use it as parameter in the method computeCorrespondEpilines() instead of the fundamental matrix, assuming images are already rectified?
Yes, I think this should work.
I am trying to implement the snake algorithm for active contour using C++ and OpenCV 3. I am working with the version that uses the gradient descent. As base test I am trying to draw a contour of a lip. This is the base image.
This is the evolution of the contour without external forces (alpha = 0.001, beta = 3, step-size=0.3).
When I add the external force, this is the result.
As external force I have used just the edge detection with Sobel derivative.
This is the code I use for points update.
array<Mat, 2> edges = edgeMatrices(croppedImage);
const float ALPHA = 0.001, BETA = 3, GAMMA = 0.3, // Gamma is step size.
a = GAMMA * ALPHA, b = GAMMA * BETA;
const uint16_t CYCLES = 1000;
const float p = b, q = -a - 4 * b, r = 1 + 2 * a + 6 * b;
Mat pMatrix = pentadiagonalMatrix(POINTS_NUM, p, q, r).inv();
for (uint16_t i = 0; i < CYCLES; ++i) {
// Extract the x and y derivatives for current points.
auto externalForces = external(edges, x, y);
x = pMatrix * (x + GAMMA * externalForces[0]);
y = pMatrix * (y + GAMMA * externalForces[1]);
// Draw the points.
if (i % 200 == 0 && i > 0)
drawPoints(croppedImage, x, y, { 0.2f * i, 0.2f * i, 0 });
}
This is the code for computing the derivatives.
array<Mat, 2> edgeMatrices(Mat &img) {
// Convert image.
Mat gray;
cvtColor(img, gray, COLOR_BGR2GRAY);
// Apply scharr filter.
Mat grad_x, grad_y, blurred_x, blurred_y;
int scale = 1;
int delta = 0;
int ddepth = CV_16S;
int kernSize = 3;
Sobel(gray, grad_x, ddepth, 1, 0, kernSize, scale, delta, BORDER_DEFAULT);
Sobel(gray, grad_y, ddepth, 0, 1, kernSize, scale, delta, BORDER_DEFAULT);
GaussianBlur(grad_x, blurred_x, Size(5, 5), 30);
GaussianBlur(grad_y, blurred_y, Size(5, 5), 30);
return { blurred_x, blurred_y };
}
array<Mat, 2> external(array<Mat, 2> &edgeMat, Mat &x, Mat &y) {
array<Mat, 2> ext;
ext[0] = { Size{ 1, POINTS_NUM }, CV_32FC1 };
ext[1] = { Size{ 1, POINTS_NUM }, CV_32FC1 };
for (size_t i = 0; i < POINTS_NUM; ++i) {
ext[0].at<float>(0, i) = - edgeMat[0].at<short>(y.at<float>(0, i), x.at<float>(0, i));
ext[1].at<float>(0, i) = - edgeMat[1].at<short>(y.at<float>(0, i), x.at<float>(0, i));
}
return ext;
}
As you can see, the contour points converge in a very strange way and not towards the edge of the lip (that was the result I would expect).
I am not able to understand if it is an error about implementation or about tuning the parameters or it is just is normal behaviour and I misunderstood something about the algorithm.
I have some doubts on the derivative matrices, I think that they should be regularized in some way, but I am not sure which is the right one. Can someone help me?
The only implementations I have found are of the greedy method.
I just implemented bicubic interpolation for resizing images.
I have a test image 6x6 pixels (grayscale), its columns are black and white (x3).
I am comparing the results of my code with the results from the tool ffmpeg and they are not correct. I can not understand why, I think I may be calculating the neighbourhood of pixels wrong or maybe the distance of the resized pixel to the original ones.
Can someone look into my code (I will simplify it for better reading) and tell me where the error is?
// Iterate through each line
for(int lin = 0; lin < dstHeight; lin++){
// Original coordinates
float linInOriginal = (lin - 0.5) / scaleHeightRatio;
// Calculate original pixels coordinates to interpolate
int linTopFurther = clamp(floor(linInOriginal) - 1, 0, srcHeight - 1);
int linTop = clamp(floor(linInOriginal), 0, srcHeight - 1);
int linBottom = clamp(ceil(linInOriginal), 0, srcHeight - 1);
int linBottomFurther = clamp(ceil(linInOriginal) + 1, 0, srcHeight - 1);
// Calculate distance to the top left pixel
float linDist = linInOriginal - floor(linInOriginal);
// Iterate through each column
for(int col = 0; col < dstWidth; col++){
// Original coordinates
float colInOriginal = (col - 0.5) / scaleWidthRatio;
// Calculate original pixels coordinates to interpolate
int colLeftFurther = clamp(floor(colInOriginal) - 1, 0, srcWidth - 1);
int colLeft = clamp(floor(colInOriginal), 0, srcWidth - 1);
int colRight = clamp(ceil(colInOriginal), 0, srcWidth - 1);
int colRightFurther = clamp(ceil(colInOriginal) + 1, 0, srcWidth - 1);
// Calculate distance to the top left pixel
float colDist = colInOriginal - floor(colInOriginal);
// Gets the original pixels values
// 1st row
uint8_t p00 = srcSlice[0][linTopFurther * srcWidth + colLeftFurther];
// ...
// 2nd row
uint8_t p01 = srcSlice[0][linTop * srcWidth + colLeftFurther];
// ...
// 3rd row
// ...
// 4th row
// ...
// Bilinear interpolation operation
// Y
float value = cubicInterpolate(
cubicInterpolate(static_cast<float>(p00), static_cast<float>(p10), static_cast<float>(p20), static_cast<float>(p30), colDist),
cubicInterpolate(static_cast<float>(p01), static_cast<float>(p11), static_cast<float>(p21), static_cast<float>(p31), colDist),
cubicInterpolate(static_cast<float>(p02), static_cast<float>(p12), static_cast<float>(p22), static_cast<float>(p32), colDist),
cubicInterpolate(static_cast<float>(p03), static_cast<float>(p13), static_cast<float>(p23), static_cast<float>(p33), colDist),
linDist);
dstSlice[0][lin * dstWidth + col] = double2uint8_t(clamp(value, 0.0f, 255.0f));
}
}
I was forgetting to set the values of the second degree variables of the interpolation matrix. They were set to 0, so the resulting interpolation would resemble the bilinear interpolation.
I am detecting objects using Tensorflow in C++. It does work well and i want to draw the boxes to have some visual feedback.
There is the operation tensorflow::ops::DrawBoundingBoxes that would let me do this but the problem is :
I don't understand what the input values should be for the boxes. What does this mean :
boxes: 3-D with shape [batch, num_bounding_boxes, 4] containing bounding boxes.
I could not find an example that uses this operation in C++ anywhere, like almost as if this ops does not exist.
Is there an example somewhere in C++ where this ops is used? It sounds a basic things to do for a tutorial or to debug.
In case if you are still on this question, I have written my own implementation of this operation using OpenCV basic methods. It also supports captioning the boxes with corresponding class labels.
/** Draw bounding box and add caption to the image.
* Boolean flag _scaled_ shows if the passed coordinates are in relative units (true by default in tensorflow detection)
*/
void drawBoundingBoxOnImage(Mat &image, double yMin, double xMin, double yMax, double xMax, double score, string label, bool scaled=true) {
cv::Point tl, br;
if (scaled) {
tl = cv::Point((int) (xMin * image.cols), (int) (yMin * image.rows));
br = cv::Point((int) (xMax * image.cols), (int) (yMax * image.rows));
} else {
tl = cv::Point((int) xMin, (int) yMin);
br = cv::Point((int) xMax, (int) yMax);
}
cv::rectangle(image, tl, br, cv::Scalar(0, 255, 255), 1);
// Ceiling the score down to 3 decimals (weird!)
float scoreRounded = floorf(score * 1000) / 1000;
string scoreString = to_string(scoreRounded).substr(0, 5);
string caption = label + " (" + scoreString + ")";
// Adding caption of type "LABEL (X.XXX)" to the top-left corner of the bounding box
int fontCoeff = 12;
cv::Point brRect = cv::Point(tl.x + caption.length() * fontCoeff / 1.6, tl.y + fontCoeff);
cv::rectangle(image, tl, brRect, cv::Scalar(0, 255, 255), -1);
cv::Point textCorner = cv::Point(tl.x, tl.y + fontCoeff * 0.9);
cv::putText(image, caption, textCorner, FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 0, 0));
}
/** Draw bounding boxes and add captions to the image.
* Box is drawn only if corresponding score is higher than the _threshold_.
*/
void drawBoundingBoxesOnImage(Mat &image,
tensorflow::TTypes<float>::Flat scores,
tensorflow::TTypes<float>::Flat classes,
tensorflow::TTypes<float,3>::Tensor boxes,
map<int, string> labelsMap, double threshold=0.5) {
for (int j = 0; j < scores.size(); j++)
if (scores(j) > threshold)
drawBoundingBoxOnImage(image, boxes(0,j,0), boxes(0,j,1), boxes(0,j,2), boxes(0,j,3), scores(j), labelsMap[classes(j)]);
}
The complete example is here.
Here is a small usage example in python that draws 2 rectangles on the image.png and saves it as outout.png, I believe it should help you:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import PIL.Image as pimg
if __name__ == '__main__':
image = tf.convert_to_tensor(np.array(pimg.open('image.png'), np.float), tf.float32)
bbox = tf.convert_to_tensor([[0.1, 0.1, 0.4, 0.4], [0.5, 0.5, 0.6, 0.7]])
with tf.Session() as s:
s.run(tf.global_variables_initializer())
output = s.run(tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), tf.expand_dims(bbox, 0)))
pimg.fromarray(np.uint8(output[0])).save('output.png')
The boxes is an array of arrays of rectangles for each image, where rectangle is defined by four normalized floats [min_y, min_x, max_y, max_x].
I manage to got it working using this.
Too bad you cannot really control how the box should look like nor other informations like the score or the label text.
Status CreateBoxedTensor(Tensor &input_image, Tensor &input_boxes, Tensor *output) {
auto root = Scope::NewRootScope();
// First OP is to convert uint8 image tensor to float for the drawing op to not loose its s*
Input imgin(input_image);
auto cast_op = Cast(root, imgin, DT_FLOAT);
// Next one is the drawing itself
Input boxin(input_boxes);
auto draw_op = DrawBoundingBoxes(root, cast_op, boxin);
// And we convert back to uint8 because it's RGB after all that we want
auto cast_back_op = Cast(root, draw_op, DT_UINT8);
ClientSession session(root);
std::vector<Tensor> out_tensors;
TF_RETURN_IF_ERROR(session.Run({cast_back_op}, &out_tensors));
*output = out_tensors[0];
return Status::OK();
}
If I have a texture, is it then possible to generate a normal-map for this texture, so it can be used for bump-mapping?
Or how are normal maps usually made?
Yes. Well, sort of. Normal maps can be accurately made from height-maps. Generally, you can also put a regular texture through and get decent results as well. Keep in mind there are other methods of making a normal map, such as taking a high-resolution model, making it low resolution, then doing ray casting to see what the normal should be for the low-resolution model to simulate the higher one.
For height-map to normal-map, you can use the Sobel Operator. This operator can be run in the x-direction, telling you the x-component of the normal, and then the y-direction, telling you the y-component. You can calculate z with 1.0 / strength where strength is the emphasis or "deepness" of the normal map. Then, take that x, y, and z, throw them into a vector, normalize it, and you have your normal at that point. Encode it into the pixel and you're done.
Here's some older incomplete-code that demonstrates this:
// pretend types, something like this
struct pixel
{
uint8_t red;
uint8_t green;
uint8_t blue;
};
struct vector3d; // a 3-vector with doubles
struct texture; // a 2d array of pixels
// determine intensity of pixel, from 0 - 1
const double intensity(const pixel& pPixel)
{
const double r = static_cast<double>(pPixel.red);
const double g = static_cast<double>(pPixel.green);
const double b = static_cast<double>(pPixel.blue);
const double average = (r + g + b) / 3.0;
return average / 255.0;
}
const int clamp(int pX, int pMax)
{
if (pX > pMax)
{
return pMax;
}
else if (pX < 0)
{
return 0;
}
else
{
return pX;
}
}
// transform -1 - 1 to 0 - 255
const uint8_t map_component(double pX)
{
return (pX + 1.0) * (255.0 / 2.0);
}
texture normal_from_height(const texture& pTexture, double pStrength = 2.0)
{
// assume square texture, not necessarily true in real code
texture result(pTexture.size(), pTexture.size());
const int textureSize = static_cast<int>(pTexture.size());
for (size_t row = 0; row < textureSize; ++row)
{
for (size_t column = 0; column < textureSize; ++column)
{
// surrounding pixels
const pixel topLeft = pTexture(clamp(row - 1, textureSize), clamp(column - 1, textureSize));
const pixel top = pTexture(clamp(row - 1, textureSize), clamp(column, textureSize));
const pixel topRight = pTexture(clamp(row - 1, textureSize), clamp(column + 1, textureSize));
const pixel right = pTexture(clamp(row, textureSize), clamp(column + 1, textureSize));
const pixel bottomRight = pTexture(clamp(row + 1, textureSize), clamp(column + 1, textureSize));
const pixel bottom = pTexture(clamp(row + 1, textureSize), clamp(column, textureSize));
const pixel bottomLeft = pTexture(clamp(row + 1, textureSize), clamp(column - 1, textureSize));
const pixel left = pTexture(clamp(row, textureSize), clamp(column - 1, textureSize));
// their intensities
const double tl = intensity(topLeft);
const double t = intensity(top);
const double tr = intensity(topRight);
const double r = intensity(right);
const double br = intensity(bottomRight);
const double b = intensity(bottom);
const double bl = intensity(bottomLeft);
const double l = intensity(left);
// sobel filter
const double dX = (tr + 2.0 * r + br) - (tl + 2.0 * l + bl);
const double dY = (bl + 2.0 * b + br) - (tl + 2.0 * t + tr);
const double dZ = 1.0 / pStrength;
math::vector3d v(dX, dY, dZ);
v.normalize();
// convert to rgb
result(row, column) = pixel(map_component(v.x), map_component(v.y), map_component(v.z));
}
}
return result;
}
There's probably many ways to generate a Normal map, but like others said, you can do it from a Height Map, and 3d packages like XSI/3dsmax/Blender/any of them can output one for you as an image.
You can then output and RGB image with the Nvidia plugin for photoshop, an algorithm to convert it or you might be able to output it directly from those 3d packages with 3rd party plugins.
Be aware that in some case, you might need to invert channels (R, G or B) from the generated normal map.
Here's some resources link with examples and more complete explanation:
http://developer.nvidia.com/object/photoshop_dds_plugins.html
http://en.wikipedia.org/wiki/Normal_mapping
http://www.vrgeo.org/fileadmin/VRGeo/Bilder/VRGeo_Papers/jgt2002normalmaps.pdf
I don't think normal maps are generated from a texture. they are generated from a model.
just as texturing allows you to define complex colour detail with minimal polys (as opposed to just using millions of ploys and just vertex colours to define the colour on your mesh)
A normal map allows you to define complex normal detail with minimal polys.
I believe normal maps are usually generated from a higher res mesh, and then is used with a low res mesh.
I'm sure 3D tools, such as 3ds max or maya, as well as more specific tools will do this for you. unlike textures, I don't think they are usually done by hand.
but they are generated from the mesh, not the texture.
I suggest starting with OpenCV, due to its richness in algorithms. Here's one I wrote that iteratively blurs the normal map and weights those to the overall value, essentially creating more of a topological map.
#define ROW_PTR(img, y) ((uchar*)((img).data + (img).step * y))
cv::Mat normalMap(const cv::Mat& bwTexture, double pStrength)
{
// assume square texture, not necessarily true in real code
int scale = 1.0;
int delta = 127;
cv::Mat sobelZ, sobelX, sobelY;
cv::Sobel(bwTexture, sobelX, CV_8U, 1, 0, 13, scale, delta, cv::BORDER_DEFAULT);
cv::Sobel(bwTexture, sobelY, CV_8U, 0, 1, 13, scale, delta, cv::BORDER_DEFAULT);
sobelZ = cv::Mat(bwTexture.rows, bwTexture.cols, CV_8UC1);
for(int y=0; y<bwTexture.rows; y++) {
const uchar *sobelXPtr = ROW_PTR(sobelX, y);
const uchar *sobelYPtr = ROW_PTR(sobelY, y);
uchar *sobelZPtr = ROW_PTR(sobelZ, y);
for(int x=0; x<bwTexture.cols; x++) {
double Gx = double(sobelXPtr[x]) / 255.0;
double Gy = double(sobelYPtr[x]) / 255.0;
double Gz = pStrength * sqrt(Gx * Gx + Gy * Gy);
uchar value = uchar(Gz * 255.0);
sobelZPtr[x] = value;
}
}
std::vector<cv::Mat>planes;
planes.push_back(sobelX);
planes.push_back(sobelY);
planes.push_back(sobelZ);
cv::Mat normalMap;
cv::merge(planes, normalMap);
cv::Mat originalNormalMap = normalMap.clone();
cv::Mat normalMapBlurred;
for (int i=0; i<3; i++) {
cv::GaussianBlur(normalMap, normalMapBlurred, cv::Size(13, 13), 5, 5);
addWeighted(normalMap, 0.4, normalMapBlurred, 0.6, 0, normalMap);
}
addWeighted(originalNormalMap, 0.3, normalMapBlurred, 0.7, 0, normalMap);
return normalMap;
}