Sort the detected text bounding box coordinates on basis of order of appearance in image - computer-vision

I have used a text detection model which gives the bounding box coordinates . I have converted the polygons to rectangles for cropping the text area in the image. The resulted bounding boxes are shuffled and i could not sort it out. As per my understanding, the boxes are sorted on the basis of Y3. But when there is a presence of curved text in a same line like in the image below, the order gets shuffled and i need to sort it before passing it to text extraction model.
Image with polygon coordinates
Converted the polygons to rectangles for cropping text areas
same image with converted rectangle bounding box
img_name='rre7'
orig=cv2.imread('CRAFT-pytorch/test/'+str(img_name)+'.jpg')
colnames=['x1','y1','x2','y2','x3','y3','x4','y4']
df=pd.read_csv('result/res_'+str(img_name)+'.txt',header=None,
delimiter=',', names=colnames)
rect=[]
boxes=df.values
for i,(x1,y1,x2,y2,x3,y3,x4,y4) in enumerate(boxes):
startX = min([x1,x2,x3,x4])
startY = min([y1,y2,y3,y4])
endX = max([x1,x2,x3,x4])
endY = max([y1,y2,y3,y4])
#print([startX,startY,endX,endY])
rect.append([startX,startY,endX,endY])
rect.sort(key=lambda b: b[1])
print("After sorting")
print('\n')
# initially the line bottom is set to be the bottom of the first rect
line_bottom = rect[0][1]+rect[0][3]-1
line_begin_idx = 0
for i in range(len(rect)):
# when a new box's top is below current line's bottom
# it's a new line
if rect[i][1] > line_bottom:
# sort the previous line by their x
rect[line_begin_idx:i] = sorted(rect[line_begin_idx:i], key=lambda
b: b[0])
line_begin_idx = i
# regardless if it's a new line or not
# always update the line bottom
line_bottom = max(rect[i][1]+rect[i][3]-1, line_bottom)
# sort the last line
rect[line_begin_idx:] = sorted(rect[line_begin_idx:], key=lambda b: b[0])
for i,(startX,startY, endX,endY) in enumerate(rect):
roi = orig[startY:endY, startX:endX]
cv2.imwrite('gray/'+str(img_name)+'_'+str(i+1)+'.jpg',roi)
In this case the polygon bounding box coordinates with detected text are
146,36,354,34,354,82,146,84 "Australian"
273,78,434,151,411,201,250,129 "Collection"
146,97,250,97,250,150,146,150 "vine"
77,166,131,126,154,158,99,197 "Old"
242,215,361,241,354,273,235,248 "Valley"
140,247,224,219,234,250,150,277 "Eden"
194,298,306,296,307,324,194,325 "Shiraz"
232,406,363,402,364,421,233,426 "Vintage"
152,402,216,405,215,425,151,422 "2008"
124,470,209,480,207,500,122,490 "South"
227,481,387,472,389,494,228,503 "Australia"
222,562,312,564,311,585,222,583 "Gibson"
198,564,217,564,217,584,198,584 "by"
386,570,421,570,421,600,386,600 "750 ml"
But the expected output is that i need the coordinates sorted in the following order of text appearance....Australian->old->vine->collection->Eden->Valley->shiraz->2008->vintage->south->Australia->by->GIBSON->750ml.

Related

OpenCV Python: How to warpPerspective a large image based on transform inferred from small region

I am using cv2.getPerspectiveTransform() and cv2.warpPerspective() to warp an image according to Adrian Rosenbrock blog : https://www.pyimagesearch.com/2014/08...
However in my case I have an image where I can only select the region B to be warped but need to warp (top-down view) the whole larger image A.
Can the parameters of the perspective transform inferred from the smaller region B be applied to the full image A? Is that possible?enter image description here
Here is one way to demonstrate that the matrix from the red square applies to the whole image in Python OpenCV.
Here I rectify the quadrilateral into a rectangle on the basis of its top and left dimensions.
Input:
import numpy as np
import cv2
import math
# read input
img = cv2.imread("red_quadrilateral.png")
hh, ww = img.shape[:2]
# specify input coordinates for corners of red quadrilateral in order TL, TR, BR, BL as x,
input = np.float32([[136,113], [206,130], [173,207], [132,196]])
# get top and left dimensions and set to output dimensions of red rectangle
width = round(math.hypot(input[0,0]-input[1,0], input[0,1]-input[1,1]))
height = round(math.hypot(input[0,0]-input[3,0], input[0,1]-input[3,1]))
print("width:",width, "height:",height)
# set upper left coordinates for output rectangle
x = input[0,0]
y = input[0,1]
# specify output coordinates for corners of red quadrilateral in order TL, TR, BR, BL as x,
output = np.float32([[x,y], [x+width-1,y], [x+width-1,y+height-1], [x,y+height-1]])
# compute perspective matrix
matrix = cv2.getPerspectiveTransform(input,output)
print(matrix)
# do perspective transformation setting area outside input to black
# Note that output size is the same as the input image size
imgOutput = cv2.warpPerspective(img, matrix, (ww,hh), cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0,0,0))
# save the warped output
cv2.imwrite("red_quadrilateral_warped.jpg", imgOutput)
# show the result
cv2.imshow("result", imgOutput)
cv2.waitKey(0)
cv2.destroyAllWindows()

algorithm that finds the "mean positioned" white pixel in a column and repeats the process for each column

I am attempting to create an algorithm that locates the white pixels in a column of a binary image, and then adds the y co-ordinates/column number of each white pixel and divides this value by the number of white pixels in the column, in order to get the "mean/middle positioned" white pixel in the column. And this returns an (x,y) co-ordinate that can be plotted. This process repeats for each column in the image and each time sy sets back to 0.
The end goal is instead of having a lines that are multiple pixels thick/wide, as shown in the image's numpy arraycurrent line multiple thicks wide array, I have lines that are just one pixel wide, whilst mantaining the original shape. I planned on doing this by selecting the "mean positioned white pixel in each column". I will then use these pixels to obtain x and y co-ordinates to plot.
Here is what I have
sx = x = img.shape[1]
sy = 0
whitec = cv2.countNonZero(img.shape[1])
arrayOfMeanY = [] #array to place (x,y) co-ordinate in
#Select column to iterate
for x in range(img.shape[1]):
# iterating through individual items in the column
for y in range(img.shape[0]):
# Checking for white pixels
pixel = img[x,y]
if pixel == 255:
# Then we check the y values of the white pixels in the column and add them all up
sy = sy+y
whitec +=1
# Doing the calculation for the mean and putting it into the meanY list
sy = sy/whitec
y = sy
print img[x,y]
array.append(y)
cv2.waitKey(0)
# reset sy to 0 for the next column
sy = 0
My issue is I recieve this error when I run the code:
File "<ipython-input-6-e4c2225ff632>", line 27, in <module>
whitec = cv2.countNonZero(img.shape[1]) #n= number of white pixels
in the column
TypeError: src is not a numpy array, neither a scalar
How do I rectify this issue, and once once this issue is rectified will my coding do what I described above.
No need for loops here. With numpy you hardly ever need to loop over individual pixels.
Instead, create a function which takes the mean of the locations of the non-zero pixels for each column (I converted to np.intp to index the image; you could just cast with int() but np.intp is what Numpy uses for indexing arrays so, it's slightly more appropriate).
def avgWhiteLocOverCol(col):
return np.intp(np.mean(np.where(col)))
Then you can simply apply the function along all columns with np.apply_along_axis().
avgRows = np.apply_along_axis(avgWhiteLocOverCol, 0, img)
For example, let's create an image with white pixels on the middle row and on the diagonal:
import numpy as np
import cv2
img = np.eye(500)*255
img[249,:] = 255
cv2.imshow('',img)
cv2.waitKey(0)
Then we can apply the function over each column, which should give a line with half the slope:
def avgWhiteLocOverCol(col):
return int(np.mean(np.where(col)))
avgRows = np.apply_along_axis(avgWhiteLocOverCol, 0, img)
avgIndImg = np.zeros_like(img)
avgIndImg[avgRows,range(img.shape[1])] = 255
cv2.imshow('',avgIndImg)
cv2.waitKey(0)

the result of density contour plot with histogram2d and matplotlib

i am trying to plot a 2D contour density map using histogram2d, i2d turned the histogram output into contour plot and plotted my data with contourf but i didn't appreciated the result, since it gives me a map with a huge rectangle in the middle.
here's the code i'm usingenter image description here
db = 1
lon_bins = np.linspace(min(lons)-db, max(lons)+db, (max(lons)-min(lons))*100)
lat_bins = np.linspace(min(lats)-db, max(lats)+db, (max(lats)-min(lats))*100)
h, xedges, yedges = (np.histogram2d(lats, lons,[lat_bins, lon_bins])
yi, xi = m(*np.meshgrid(lon_bins, lat_bins))
g = np.zeros(xi.shape)
g[:-1,:-1] = h
g[-1] = g[0] # copy the top row to the bottom
g[:,-1] = g[:,0] # copy the left column to the right
print g.shape,yi.shape,xi.shape
cs = m.contourf(yi, xi, g, cmap='Dark2')
cbar = plt.colorbar(cs, orientation='horizontal')
cbar.set_label('la densite des impacts foudre',size=18)
plt.gcf().set_size_inches(15,15)
plt.show()
And here's the result i got
so my request is how to have a nicer plotting, i don't want to have that rectangle in the middle ,i want my result being more smoothed...any ideas ?
I found the answer of my request,so in order to get rid of that rectangle i added this to my code :
g[g==0.0] = np.nan
which means, the bins that have density equal to 0 wouldn't appear on the plot and it's working fine.

How to create a grid on tkinter in python?

I managed to create a function that with a given radius, starting point and a number of points. It will create a big circle and withing this circle it will create 4 small circles.
I want to add a grid on the background that will show the Y and X axis in TKinter every 100 pixels apart starting from the top left. The coordinate origin should be the top left corner.
For example if the screen is 300x300 then the window will have 3 lines (at 100, 200 and 300) on his X axis going from left to right and top up to bottom.
A grid as a coordinate system.
Example of how I create a normal line. I use a line class which contains 2 points start point and end point:
rootWindow = Tkinter.Tk()
rootFrame = Tkinter.Frame(rootWindow, width=1000, height=800, bg="white")
rootFrame.pack()
canvas = Tkinter.Canvas(rootFrame, width=1000, height=800, bg="white")
canvas.pack()
def draw_line(l):
"Draw a line with its two end points"
draw_point(l.p1)
draw_point(l.p2)
# now draw the line segment
x1 = l.p1.x
y1 = l.p1.y
x2 = l.p2.x
y2 = l.p2.y
id = canvas.create_line(x1, y1, x2, y2, width=2, fill="blue")
return id
This will create a grid on the canvas for you
import tkinter as tk
def create_grid(event=None):
w = c.winfo_width() # Get current width of canvas
h = c.winfo_height() # Get current height of canvas
c.delete('grid_line') # Will only remove the grid_line
# Creates all vertical lines at intevals of 100
for i in range(0, w, 100):
c.create_line([(i, 0), (i, h)], tag='grid_line')
# Creates all horizontal lines at intevals of 100
for i in range(0, h, 100):
c.create_line([(0, i), (w, i)], tag='grid_line')
root = tk.Tk()
c = tk.Canvas(root, height=1000, width=1000, bg='white')
c.pack(fill=tk.BOTH, expand=True)
c.bind('<Configure>', create_grid)
root.mainloop()

Intersection-over-union between two detections

I was reading through the paper :
Ferrari et al. in the "Affinity Measures" section. I understood that Ferrari et al. tries to obtain affinity by :
Location affinity - using area of intersection-over-union between two detections
Appearance affinity - using Euclidean distances between Histograms
KLT point affinity measure
However, I have 2 main problems:
I cannot understand what is actually meant by intersection-over-union between 2 detections and how to calculate it
I tried a slightly difference appearance affinity measure. I transformed the RGB detection into HSV..concatenating the Hue and Saturation into 1 vector, and used it to compare with other detections. However, using this technique failed as a detection of a bag had a better similarity score than a detection of the same person's head (with a different orientation).
Any suggestions or solutions to my problems described above? Thank you and your help is very much appreciated.
Try intersection over Union
Intersection over Union is an evaluation metric used to measure the accuracy of an object detector on a particular dataset.
More formally, in order to apply Intersection over Union to evaluate an (arbitrary) object detector we need:
The ground-truth bounding boxes (i.e., the hand labeled bounding boxes from the testing set that specify where in the image our object is).
The predicted bounding boxes from our model.
Below I have included a visual example of a ground-truth bounding box versus a predicted bounding box:
The predicted bounding box is drawn in red while the ground-truth (i.e., hand labeled) bounding box is drawn in green.
In the figure above we can see that our object detector has detected the presence of a stop sign in an image.
Computing Intersection over Union can therefore be determined via:
As long as we have these two sets of bounding boxes we can apply Intersection over Union.
Here is the Python code
# import the necessary packages
from collections import namedtuple
import numpy as np
import cv2
# define the `Detection` object
Detection = namedtuple("Detection", ["image_path", "gt", "pred"])
def bb_intersection_over_union(boxA, boxB):
# determine the (x, y)-coordinates of the intersection rectangle
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])
# compute the area of intersection rectangle
interArea = (xB - xA) * (yB - yA)
# compute the area of both the prediction and ground-truth
# rectangles
boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
# compute the intersection over union by taking the intersection
# area and dividing it by the sum of prediction + ground-truth
# areas - the interesection area
iou = interArea / float(boxAArea + boxBArea - interArea)
# return the intersection over union value
return iou
The gt and pred are
gt : The ground-truth bounding box.
pred : The predicted bounding box from our model.
For more information, you can click this post
1) You have two overlapping bounding boxes. You compute the intersection of the boxes, which is the area of the overlap. You compute the union of the overlapping boxes, which is the sum of the areas of the entire boxes minus the area of the overlap. Then you divide the intersection by the union. There is a function for that in the Computer Vision System Toolbox called bboxOverlapRatio.
2) Generally, you don't want to concatenate the color channels. What you want instead, is a 3D histogram, where the dimensions are H, S, and V.
The current answer already explained the question clearly. So here I provide a bit better version of IoU with Python that doesn't break when two bounding boxes don't intersect.
import numpy as np
def IoU(box1: np.ndarray, box2: np.ndarray):
"""
calculate intersection over union cover percent
:param box1: box1 with shape (N,4) or (N,2,2) or (2,2) or (4,). first shape is preferred
:param box2: box2 with shape (N,4) or (N,2,2) or (2,2) or (4,). first shape is preferred
:return: IoU ratio if intersect, else 0
"""
# first unify all boxes to shape (N,4)
if box1.shape[-1] == 2 or len(box1.shape) == 1:
box1 = box1.reshape(1, 4) if len(box1.shape) <= 2 else box1.reshape(box1.shape[0], 4)
if box2.shape[-1] == 2 or len(box2.shape) == 1:
box2 = box2.reshape(1, 4) if len(box2.shape) <= 2 else box2.reshape(box2.shape[0], 4)
point_num = max(box1.shape[0], box2.shape[0])
b1p1, b1p2, b2p1, b2p2 = box1[:, :2], box1[:, 2:], box2[:, :2], box2[:, 2:]
# mask that eliminates non-intersecting matrices
base_mat = np.ones(shape=(point_num,))
base_mat *= np.all(np.greater(b1p2 - b2p1, 0), axis=1)
base_mat *= np.all(np.greater(b2p2 - b1p1, 0), axis=1)
# I area
intersect_area = np.prod(np.minimum(b2p2, b1p2) - np.maximum(b1p1, b2p1), axis=1)
# U area
union_area = np.prod(b1p2 - b1p1, axis=1) + np.prod(b2p2 - b2p1, axis=1) - intersect_area
# IoU
intersect_ratio = intersect_area / union_area
return base_mat * intersect_ratio
Here's yet another solution I implemented that works for me.
Borrowed heavily from PyImageSearch
import numpy as np
def bbox_intersects(bbox_a, bbox_b):
if bbox_b['x0'] >= bbox_a['x0'] and bbox_b['x0'] <= bbox_a['x1'] and \
bbox_b['y0'] >= bbox_a['y0'] and bbox_b['y0'] <= bbox_a['y1']:
# top-left of b within a
return True
elif bbox_b['x1'] >= bbox_a['x0'] and bbox_b['x1'] <= bbox_a['x1'] and \
bbox_b['y1'] >= bbox_a['y0'] and bbox_b['y1'] <= bbox_a['y1']:
# bottom-right of b within a
return True
elif bbox_a['x0'] >= bbox_b['x0'] and bbox_a['x0'] <= bbox_b['x1'] and \
bbox_a['y0'] >= bbox_b['y0'] and bbox_a['y0'] <= bbox_b['y1']:
# top-left of a within b
return True
elif bbox_a['x1'] >= bbox_b['x0'] and bbox_a['x1'] <= bbox_b['x1'] and \
bbox_a['y1'] >= bbox_b['y0'] and bbox_a['y1'] <= bbox_b['y1']:
# bottom-right of a within b
return True
return False
def bbox_area(x0, y0, x1, y1):
return (x1-x0) * (y1-y0)
def get_bbox_iou(bbox_a, bbox_b):
if bbox_intersects(bbox_a, bbox_b):
x_left = max(bbox_a['x0'], bbox_b['x0'])
x_right = min(bbox_a['x1'], bbox_b['x1'])
y_top = max(bbox_a['y0'], bbox_b['y0'])
y_bottom = min(bbox_a['y1'], bbox_b['y1'])
inter_area = bbox_area(x0 = x_left, x1 = x_right, y0 = y_top , y1 = y_bottom)
bbox_a_area = bbox_area(**bbox_a)
bbox_b_area = bbox_area(**bbox_b)
return inter_area / float(bbox_a_area + bbox_b_area - inter_area)
else:
return 0