Explain working of compareX in qsort() library function - c++

I was searching for the closest pair code and i found this which has used qsort() library function. I basically didn't get the concept of how it's compare parameter works. Explanation related to this particular code will be more appreciated. Thanks.
#include <iostream>
#include <float.h>
#include <stdlib.h>
#include <math.h>
using namespace std;
// A structure to represent a Point in 2D plane
struct Point
{
int x, y;
};
/* Following two functions are needed for library function qsort().
Refer: http://www.cplusplus.com/reference/clibrary/cstdlib/qsort/ */
// Needed to sort array of points according to X coordinate
int compareX(const void* a, const void* b)
{
Point *p1 = (Point *)a, *p2 = (Point *)b;
return (p1->x - p2->x);
}
// Needed to sort array of points according to Y coordinate
int compareY(const void* a, const void* b)
{
Point *p1 = (Point *)a, *p2 = (Point *)b;
return (p1->y - p2->y);
}
// A utility function to find the distance between two points
float dist(Point p1, Point p2)
{
return sqrt( (p1.x - p2.x)*(p1.x - p2.x) +
(p1.y - p2.y)*(p1.y - p2.y)
);
}
// A Brute Force method to return the smallest distance between two points
// in P[] of size n
float bruteForce(Point P[], int n)
{
float min = FLT_MAX;
for (int i = 0; i < n; ++i)
for (int j = i+1; j < n; ++j)
if (dist(P[i], P[j]) < min)
min = dist(P[i], P[j]);
return min;
}
// A utility function to find minimum of two float values
float min(float x, float y)
{
return (x < y)? x : y;
}
// A utility function to find the distance beween the closest points of
// strip of given size. All points in strip[] are sorted accordint to
// y coordinate. They all have an upper bound on minimum distance as d.
// Note that this method seems to be a O(n^2) method, but it's a O(n)
// method as the inner loop runs at most 6 times
float stripClosest(Point strip[], int size, float d)
{
float min = d; // Initialize the minimum distance as d
// Pick all points one by one and try the next points till the difference
// between y coordinates is smaller than d.
// This is a proven fact that this loop runs at most 6 times
for (int i = 0; i < size; ++i)
for (int j = i+1; j < size && (strip[j].y - strip[i].y) < min; ++j)
if (dist(strip[i],strip[j]) < min)
min = dist(strip[i], strip[j]);
return min;
}
// A recursive function to find the smallest distance. The array Px contains
// all points sorted according to x coordinates and Py contains all points
// sorted according to y coordinates
float closestUtil(Point Px[], Point Py[], int n)
{
// If there are 2 or 3 points, then use brute force
if (n <= 3)
return bruteForce(Px, n);
// Find the middle point
int mid = n/2;
Point midPoint = Px[mid];
// Divide points in y sorted array around the vertical line.
// Assumption: All x coordinates are distinct.
Point Pyl[mid+1]; // y sorted points on left of vertical line
Point Pyr[n-mid-1]; // y sorted points on right of vertical line
int li = 0, ri = 0; // indexes of left and right subarrays
for (int i = 0; i < n; i++)
{
if (Py[i].x <= midPoint.x)
Pyl[li++] = Py[i];
else
Pyr[ri++] = Py[i];
}
// Consider the vertical line passing through the middle point
// calculate the smallest distance dl on left of middle point and
// dr on right side
float dl = closestUtil(Px, Pyl, mid);
float dr = closestUtil(Px + mid, Pyr, n-mid);
// Find the smaller of two distances
float d = min(dl, dr);
// Build an array strip[] that contains points close (closer than d)
// to the line passing through the middle point
Point strip[n];
int j = 0;
for (int i = 0; i < n; i++)
if (abs(Py[i].x - midPoint.x) < d)
strip[j] = Py[i], j++;
// Find the closest points in strip. Return the minimum of d and closest
// distance is strip[]
return min(d, stripClosest(strip, j, d) );
}
// The main functin that finds the smallest distance
// This method mainly uses closestUtil()
float closest(Point P[], int n)
{
Point Px[n];
Point Py[n];
for (int i = 0; i < n; i++)
{
Px[i] = P[i];
Py[i] = P[i];
}
qsort(Px, n, sizeof(Point), compareX);
qsort(Py, n, sizeof(Point), compareY);
// Use recursive function closestUtil() to find the smallest distance
return closestUtil(Px, Py, n);
}
// Driver program to test above functions
int main()
{
Point P[] = {{2, 3}, {12, 30}, {40, 50}, {5, 1}, {12, 10}, {3, 4}};
int n = sizeof(P) / sizeof(P[0]);
cout << "The smallest distance is " << closest(P, n);
return 0;
}

The last parameter of qsort is a pointer to a function with a specific signature: it must take two void* pointers, and return an int that indicates which of the two passed items is smaller or if the two items are the same. The specifics are here, but generally a positive result indicates that the second item is smaller, a negative indicates that the first item is smaller, and zero indicates the equaliity.
The implementation of compareX
int compareX(const void* a, const void* b)
{
Point *p1 = (Point *)a, *p2 = (Point *)b;
return (p1->x - p2->x);
}
follows the general pattern for comparison functions. First, it converts the void* pointer to the Point type, because it "knows" that it is used together with an array of Point structures. Then it subtracts the x coordinates of the two points:
p1->x - p2->x
Note that the result of the subtraction is going to be positive if the second point's x is smaller, negative when the second point's x is greater, and zero when the two xs are the same. This is precisely what qsort wants the cmp function to do, so the subtraction operation fulfills the contract of the comparison function.

Related

KNN search, growing set, arbitrary norm

Suppose the following problem in E3 with an arbitrary norm. For example, L1 norm is used (Hamming, Karlsruhe, geodesic, ..., are also applicable).
namespace {
typedef boost::tuple<double, double, double> Point;
double nL1(const Point& p1, const Point& p2) { //L1 norm, example
double dx = p1.get<0>() - p2.get<0>();
double dy = p1.get<1>() - p2.get<1>();
double dz = p1.get<2>() - p2.get<2>();
return abs(dx) + abs(dy) + abs(dz);
}
}
There are two sets A, B. A is initialized by n random points, where n is relatively large (n = 1*10^7 - 1*10^9), B is empty; see the sample code:
int main() {
using namespace std;
using namespace boost::lambda;
vector<Point> A, B;
int n = 10000000;
for (int i = 0; i < n; i++) //Create random points
A.push_back(boost::make_tuple(rand(), rand(), rand()));
Initially, we put a random point from A to B. In the simplified example, the first point A[0] is used:
B.push_back(A[0]);
Subsequently, for i = 1:n we repeat these steps:
Find the nearest point in B to A[i] according to a given norm
B * = argmin(|A[i]-B|)
If |B* - A[i]| < eps, then B.push_back(A[i]). In other words, add sufficiently close A[i] to B (Here, eps = 1.0*10^4 is used).
For the nearest search, I am using the std::partial_sort.
const int k = 1;
for (int i = 1; i < A.size(); i++) {
partial_sort(B.begin(), B.begin() + k, B.end(), bind(less<double>(), bind(nL1, _1, A[i]), bind(nL1, _2, A[i])));
if (nL1(*B.begin(), A[i]) < 1e4) B.push_back(A[i]); //Some threshold, eps=1.0*10^4
}
}
B is continuously growing and the search becomes more expensive... Being repeated in the loop, it is too slow, even for small sets (n=1*10^6)... Here, the partial sort is inefficient.
Are there significant improvements in speed? Of course, a naive approach can be used (but it is not faster).
How to speed up nn-search?
Another problem appears when also the second nearest points is required...
Current k-nn search libraries can not be used because of an arbitrary norm (the problem can be solved on the sphere). I tried to use nano-flann, but it does not support some specific norms...

How to judge that the vertices's order of polygon is clockwise or counterclockwise?

The concrete question is:
n lines, each line containing two integers. The i-th line contains xi, yi — the i-th vertex of the polygon in clockwise or counterclockwise order. Note that it is possible that more than two vertices appear in a side, such as the follow picture:
Now you need to judge that the vertices's order of polygon is clockwise or counterclockwise?
c++ code is:
struct Node
{
int x, y;
Node operator-(Node node) const
{
Node t;
t.x = x - node.x;
t.y = y - node.y;
return t;
}
int operator*(Node node) const // I konow this is Cross-Product
{
return x * node.y - y * node.x;
}
}node[1000];
for (int i = 0; i < n; i++)
scanf("%d %d", &node[i].x, &node[i].y);
int tmp = 0;
node[n].x = node[0].x, node[n].y = node[0].y;
for (int i = 0; i < n; i++)
tmp += (node[i] * node[i + 1]);
if (tmp > 0)
it is counterclockwise order;
But I don't understand the code, who can prove it?
The shoelace formula will give the oriented area of any polygon. By examining its sign, you can therefore determine the orientation. The code you have does compute twice the area, but as the sign is all that matters, this is irrelevant.

C++ calculate difference between two elements in a vector

I have a 2 vectors of size 4 to store coordinates of a shape (square/rectangle). 1st vector is for x and 2nd for y. To find out the area of the shape, I need the difference in their length. How do I find the difference between 2 elements within the same vector? Using square as an example:
vector<int> x(4);
vector<int> y(4);
double Square::computeArea()
{
int length;
double area;
if (x[0] == x[1]) //x coordinates are the same
{
length = y[0] - y[1]; //difference in y coordinates to find the length, need help here
}
else if (x[1] == x[2]
{
length = y[1] - y[2];
}
else if ... //repeat
area = length * length;
if (area < 0) { area = -area; }
setArea(area)
return area;
}
If your rectangle has edges which are parallel to the axis, and the points are ordered clockwise (or counterclockwise), you can simply use the first and third element of the arrays:
int yedge, xedge;
xedge = abs(x[0] - x[2]);
if ( xedge == 0 ) //beware, this check works well only for ints!
return area = 0.0;
else yedge = abs(y[0] - y[2]);
return area = xedge * yedge;
If you have more general convex quadrilaterals use something like this:
int dx20 = x[2] - x[0];
int dy10 = y[1] - y[0];
int dy20 = y[2] - y[0];
int dx10 = x[1] - x[0];
int dy30 = y[3] - y[0];
int dx30 = x[3] - x[0];
area = 0.5*abs(dx20*dy10-dy20*dx10);
area += 0.5*abs(dx20*dy30-dy20*dx30);
The beauty of C++ and OOP is that you can think more in terms of the problem than how to program it.
If I were in your place I would use std::pair to save the coordinates.
And have a class representing the rectangle.
I am using the distance between point 1 and 2 as length, and point 1 and 4 as width. It may not be the correct approach in all cases but it should show you have to go about programming your function.
using namespace std;
class Rectangle // Class Rectangle
{
public:
Rectangle(vector<pair<double, double>> neWcoordinates);
double computeArea();
private:
vector<pair<double, double>> coordinates;
};
double Rectangle::computeArea()
{
double length = sqrt(pow(coordinates[0].first-coordinates[1].first,2)+pow(coordinates[0].second-coordinates[1].second,2)
);
double width = sqrt(pow(coordinates[0].first-coordinates[3].first,2)+pow(coordinates[0].second-coordinates[3].second,2));
return length*width;
}

Find Middle points(Computational geometry c++)

This program should read for input an integer N then the x and y coordinates of the N points
and return the number of points that are the middle points of any two other points in the set.
First the program stores the points in an array then we loop throgh the points and calculate the distance between points[i] and every other point. We sort the points according to that distance then if we find that any two points have the same distance we check if point[i] is aligned with them if it is the case we store point[i] in the middles list.
We then get rid of doubles in the list and return the size of the list.
I submitted my solution and it doesn't work for all the cases. Please help:
#include <iostream>
#include <cmath>
#include <algorithm>
#include <list>
#include <stdio.h>
using namespace std;
struct Point
{
int x;
int y;
int distance;
};
bool PointSort(Point a,Point b);
bool colinear(Point a,Point b,Point c);
bool same_point (Point first, Point second);
int main()
{
list<Point> middles;
int N;scanf("%d", &N);
Point points[N];
Point points2[N];
for(int i=0;i<N;i++)
{ scanf("%d", &points[i].x);
scanf("%d", &points[i].y);
points2[i].x=points[i].x;
points2[i].y=points[i].y;
}
for(int i=0;i<N;i++)
{
for(int j=0;j<N;j++)
{
points2[j]=points[j];
}
for(int j=0;j<N;j++)
{
points2[j].distance=(points[i].x-points2[j].x)*(points[i].x- points2[j].x)+(points[i].y-points2[j].y)*(points[i].y-points2[j].y);
}
sort(points2,points2+N,&PointSort);
for(int j=0;j<N;j++)
{
int k=j+1;
while(points2[j].distance==points2[k].distance)
{
bool coli=colinear(points[i],points2[j],points2[k]);
if(coli){middles.push_back(points2[i]);}
k++;
}
}
}
middles.unique(same_point);
cout<<middles.size();
}
bool PointSort(Point a,Point b)
{
return a.distance<b.distance;
}
bool colinear(Point a,Point b,Point c)
{
return (a.x*(b.y-c.y)+b.x*(c.y-a.y)+c.x*(a.y-b.y))/2.0==0.0;
}
bool same_point (Point first, Point second)
{ return (first.x==second.x && first.y==second.y) ; }
You actually don't need to calculate distances to check if something is the midpoint. The coordinates of the midpoint between A and B is M=(A+B)/2. Or, to keep everything as an integer, A+B=2M where M is the midpoint. Here's a pseudocode solution for the problem:
for ( A=0; A<N-1; A++ ) {
for ( B=A+1; B<N; B++ ) {
M2 = A+B;
for ( C=0; C<N; C++ ) {
if ( C*2 == M2 ) {
// C is the midpoint of A and B
}
}
}
}
I see the following potential problems with your code:
Your code computes the distance squared (not the distance as stated) between pairs of points. Since the calculation is being done using integer arithmetic, there's a chance of arithmetic overflow.
Your code removes all midpoints found with duplicated x and y coordinates. But, is this what the problem statement requires? If duplicate points actually appear in the input stream, and happen to be midpoints of some other points, should the second and all subsequent duplicates be ignored? Also, if a point is duplicated three (or more) times in the input stream, how many midpoints does that count as? You should carefully check the problem statement to see how duplicates in the input stream should be counted and follow the requirements precisely.
Your check for collinearity looks wrong. You appear to be trying to take a 2d cross of (points[i] - points2[j]) with (points[i] - points2[k]), but this is not the correct way to do it. Here is how to take a 2d cross:
int cross2d(Point a, Point mid, Point c)
{
// Take the 2d cross product (a - mid) X (c - mid).
// 2d cross = (u.x * v.y - u.y * v.x) where u = (a-mid) and v=(c - mid)
int cross = (a.x - mid.x) * (c.y - mid.y) - (a.y - mid.y) * (c.x - mid.x);
return cross;
}
bool collinear(Point a, Point mid, Point c)
{
// Check for the points being collinear (or degenerate, i.e. return true if a == mid or mid == c).
return cross2d(a, mid, c) == 0;
}
Again, integer overflow is a potential problem for point triplets with large coordinates that are nearly perpendicular. And if you were not trying to take a 2d cross, what were you trying to do?
You're trying create an O(n-squared) algorithm by sorting the points by distance from some prospective midpoint. That's creditable, but since your code isn't working I would start by creating a naive O(n-cubed) algorithm that solves the problem straightforwardly. Then you can use that to unit-test your improved n-squared algorithm.
Adding some spacing into your mathematical expressions makes them easier to read.
So, to start you off, here's the naive n-cubed algorithm. Note that I am preserving duplicates in the input stream while avoiding double-counting of points that are midpoints of multiple pairs of points:
#include <iostream>
#include <cmath>
#include <algorithm>
#include <list>
#include <stdio.h>
using namespace std;
struct Point
{
int x;
int y;
int id;
};
bool is_middle(Point a, Point middle, Point c);
bool same_point_id(Point first, Point second);
int main()
{
list<Point> middles;
int N;
scanf("%d", &N);
// https://stackoverflow.com/questions/25437597/find-middle-pointscomputational-geometry-c
// This program should read for input an integer N then the x and y coordinates of the N points
// and return the number of points that are the middle points of any two other points in the set.
Point *points = new Point[N];
for(int i=0;i<N;i++)
{
scanf("%d", &points[i].x);
scanf("%d", &points[i].y);
points[i].id = i;
}
for(int i=0; i<N-2; i++)
{
for(int j=i+1; j<N-1; j++)
{
for(int k=j+1; k<N; k++)
{
// Check the problem requirement to determine how to count sets of three identical points in the input stream.
if (is_middle(points[i], points[j], points[k]))
middles.push_back(points[j]);
if (is_middle(points[j], points[k], points[i]))
middles.push_back(points[k]);
if (is_middle(points[k], points[i], points[j]))
middles.push_back(points[i]);
}
}
}
// Prevent the same input point from being counted multiple times.
middles.unique(same_point_id);
cout<<middles.size();
delete [] points;
}
bool is_middle(Point a, Point mid, Point c)
{
if (a.x - c.x != 2*(a.x - mid.x))
return false;
if (a.y - c.y != 2*(a.y - mid.y))
return false;
return true;
}
bool same_point_id(Point first, Point second)
{
return (first.id==second.id);
}
Update: If you do need an n-squared algorithm then sorting potential endpoints by distance squared from the midpoint isn't a bad idea. If you want to avoid potential arithmetic overflows, you can do calculate the distance squared in 64bit long long ints:
long long distance_squared(Point a, Point b)
{
long long dx = ((long long)a.x - (long long)b.x);
long long dy = ((long long)a.y - (long long)b.y);
return dx*dx + dy*dy;
}
On most platforms these will have more bits than a regular int -- and certainly not fewer.

Best way to iterate through a Matrix of coordinates in MATLAB?

I am attempting to translate the C++ code given here to MATLAB:
// Implementation of Andrew's monotone chain 2D convex hull algorithm.
// Asymptotic complexity: O(n log n).
// Practical performance: 0.5-1.0 seconds for n=1000000 on a 1GHz machine.
#include <algorithm>
#include <vector>
using namespace std;
typedef int coord_t; // coordinate type
typedef long long coord2_t; // must be big enough to hold 2*max(|coordinate|)^2
struct Point {
coord_t x, y;
bool operator <(const Point &p) const {
return x < p.x || (x == p.x && y < p.y);
}
};
// 2D cross product of OA and OB vectors, i.e. z-component of their 3D cross product.
// Returns a positive value, if OAB makes a counter-clockwise turn,
// negative for clockwise turn, and zero if the points are collinear.
coord2_t cross(const Point &O, const Point &A, const Point &B)
{
return (A.x - O.x) * (B.y - O.y) - (A.y - O.y) * (B.x - O.x);
}
// Returns a list of points on the convex hull in counter-clockwise order.
// Note: the last point in the returned list is the same as the first one.
vector<Point> convex_hull(vector<Point> P)
{
int n = P.size(), k = 0;
vector<Point> H(2*n);
// Sort points lexicographically
sort(P.begin(), P.end());
// Build lower hull
for (int i = 0; i < n; i++) {
while (k >= 2 && cross(H[k-2], H[k-1], P[i]) <= 0) k--;
H[k++] = P[i];
}
// Build upper hull
for (int i = n-2, t = k+1; i >= 0; i--) {
while (k >= t && cross(H[k-2], H[k-1], P[i]) <= 0) k--;
H[k++] = P[i];
}
H.resize(k);
return H;
}
I am having some trouble because in the C++ program, iterating through the points is easier. I wish to do the same in MATLAB but want to do that taking one point (both x and y coordinates) at a time instead of one particular value at a given index at a time.
To generate the matrix of coordinates I am using the following as of now -
x = randi(1000,100,1);
y = randi(1000,100,1);
points = [x,y];
Iteration is often unnecessary in Matlab. Given your vectors x and y I think that the C++ code translates to
convhull(x,y)
in Matlab. No (programmer-written) iteration, not much else either.
If you have a matrix C columns by 2 rows M, you just do. (where row1 =x and row2 = y)
M=[x;y]
FOR point = drange(M)
//code
end