How can I extract object segmentations from the coco dataset? - computer-vision

From the MSCOCO dataset segmentation annotations, how can I extract just the segmented objects themselves? For example, given an image of a person standing with a house in the background, how can I extract just the person themselves?

If your data is already in FiftyOne, then you can write a simple function using OpenCV and Numpy to crop the segmentations in your FiftyOne labels. It could look something like this:
import os
import cv2
import numpy as np
import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone import ViewField as F
def extract_classwise_instances(samples, output_dir, label_field, ext=".png"):
print("Extracted object instances...")
for sample in samples.iter_samples(progress=True):
img = cv2.imread(sample.filepath)
img_h,img_w,c = img.shape
for det in sample[label_field].detections:
mask = det.mask
[x,y,w,h] = det.bounding_box
x = int(x * img_w)
y = int(y * img_h)
h, w = mask.shape
mask_img = img[y:y+h, x:x+w, :]
alpha = mask.astype(np.uint8)*255
alpha = np.expand_dims(alpha, 2)
mask_img = np.concatenate((mask_img, alpha), axis=2)
label = det.label
label_dir = os.path.join(output_dir, label)
if not os.path.exists(label_dir):
os.mkdir(label_dir)
output_filepath = os.path.join(label_dir, det.id+ext)
cv2.imwrite(output_filepath, mask_img)
label_field = "ground_truth"
classes = ["person"]
dataset = foz.load_zoo_dataset(
"coco-2017",
split="validation",
label_types=["segmentations"],
classes=classes,
max_samples=20,
label_field=label_field,
dataset_name=fo.get_default_dataset_name(),
)
view = dataset.filter_labels(label_field, F("label").is_in(classes))
output_dir = "/tmp/coco-segmentations"
os.makedirs(output_dir, exist_ok=True)
extract_classwise_instances(view, output_dir, label_field)

Related

How to fix the reprojection from EASE-2 grid product SMAP to geographic coordinates?

I'm have been working with SMAP data satellite, specially for moisture and soil proporties.
I follow the idea of use GDAL solve everything, and make something similar to this published in Link to first approach to download SMAP data
Modifing the code and testing:
import os
import h5py
import numpy as np
from osgeo import gdal, gdal_array, osr
# the file to download
https://n5eil01u.ecs.nsidc.org/SMAP/SPL4SMAU.003/2017.08.01/SMAP_L4_SM_aup_20170801T030000_Vv3030_001.h5
path = "/path/to/data"
h5File = h5py.File(path + "SMAP_L4_SM_aup_20170801T030000_Vv3030_001.h5", 'r')
data = h5File.get('Analysis_Data/sm_rootzone_analysis')
lat = h5File.get("cell_lat")
lon = h5File.get("cell_lon")
np_data = np.array(data)
np_lat = np.array(lat)
np_lon = np.array(lon)
num_cols = float(np_data.shape[1])
num_rows = float(np_data.shape[0])
xmin = np_lon.min()
xmax = np_lon.max()
ymin = np_lat.min()
ymax = np_lat.max()
xres = (xmax - xmin) / num_cols
yres = (ymax - ymin) / num_rows
nrows, ncols = np_data.shape
xres = (xmax - xmin) / float(ncols)
yres = (ymax - ymin) / float(nrows)
geotransform = (xmin, xres, 0, ymax, 0, -xres)
dataFileOutput = path + "sm_rootzone_analysis.tif"
output_raster = gdal.GetDriverByName('GTiff').Create(dataFileOutput, ncols, nrows, 1, gdal.GDT_Float32) # Open the file
output_raster.SetGeoTransform(geotransform)
srs = osr.SpatialReference()
srs.ImportFromEPSG(4326)
output_raster.SetProjection(srs.ExportToWkt())
output_raster.GetRasterBand(1).WriteArray(np_data) # Writes my array to the raster
del output_raster
So, using this approach, the result is a global map with many problems of projections, as for example the image below, produced by the python code above.
To compare with a correct data, the same image was extract from h5, using HEG nasa software.
If the data is really in the EASE2 Global grid, you shouldn't assign EPSG:4326 as a the coordinate system with lat/lon degrees in the geotransform.
If you convert the lat/lon coordinates to the EASE2 Grid at 9km, your geotransform should be something like:
geotransform = (-17367530.44516138, 9000, 0, 7314540.79258289, 0, -9000.0)
and the srs:
srs.ImportFromEPSG(6933)

matplotlib legend at the bottom of the figure with twinx

I am trying to draw a legend under two plots (created using twinx). I want the legend to draw at the bottom center aligned with 4 columns. So far no success. How can I make the legend with respect to the entire plot, not just with a single axis object. Any help ?
import matplotlib.pyplot as plt;
import numpy as np;
from matplotlib import rc;
filename = 'ml.pdf';
fig, ax1 = plt.subplots(frameon=False);
rc('mathtext', default='regular');
rc('lines',lw=2.6);
rc('lines',mew=2.4);
rc('text', usetex=True);
x = np.array([5,10,20,50]);
dp_g = np.array([23.43, 29.93, 36.50, 46.07]);
mr_g = np.array([25.33, 31.83, 38.39, 47.75]);
md_g = np.array([24.94, 31.33, 37.80, 47.10]);
sb_g = np.array([27.01, 34.86, 43.18, 54.35]);
lns1 = ax1.plot(x,dp_g,'bs:', label="MD\n($\lambda$=.8)");
lns2 = ax1.plot(x,mr_g,'bs--',label="MR\n($\lambda$=.1)");
lns3 = ax1.plot(x,md_g,'bs-.',label='MD');
lns4 = ax1.plot(x,sb_g,'bs-',label="SB\n($\gamma$=.1)");
ax1.set_ylabel('CG ($\times$ 100)',color='b',size=14);
ax1.set_ylim([20,57]);
ax1.set_xlim([4,51]);
ax1.set_xticks(x);
ax1.tick_params(axis='y', which=u'both', length=0, labelsize=14, colors='b');
ax1.tick_params(axis='x', which=u'both', length=0, labelsize=14);
ax2 = ax1.twinx();
dp_d = np.array([18.84, 19.55, 20.09, 20.08]);
mr_d = np.array([19.42, 19.73, 20.06, 20.04]);
md_d = np.array([19.02, 19.75, 20.28, 20.29]);
sb_d = np.array([20.81, 19.77, 19.20, 19.03]);
lns6 = ax2.plot(x,dp_d,'rv:',label="MD\n($\lambda$=.8)");
lns7 = ax2.plot(x,mr_d,'rv--',label="MR\n($\lambda$=.1)");
lns8 = ax2.plot(x,md_d,'rv-.',label='MD');
lns9 = ax2.plot(x,sb_d,'rv-',label="SB\n($\gamma$=.1)");
lns = lns1 + lns2 + lns3 + lns4 + lns6 + lns7 + lns8 + lns9;
labs = [l.get_label() for l in lns];
ax2.set_ylabel('LD ($\times$ 100)',color='r',size=14);
ax2.set_ylim([15,23]);
ax2.set_xlim([4,51]);
ax2.set_xticks(x);
ax2.tick_params(axis='y', which=u'both', length=0, labelsize=14, colors='r');
ax2.tick_params(axis='x', which=u'both', length=0, labelsize=14);
ax1.set_xlabel('\# of items',size=14);
borderaxespad=2.5, ncol = 1, fontsize='11.5');
lgd = ax1.legend(lns, labs, bbox_to_anchor=(1.01,1.0), loc='lower center', borderaxespad=2.5, ncol = 4, fontsize='14');
fig.savefig(filename,format='pdf',transparent=True, bbox_extra_artists=(lgd,), bbox_inches='tight');
Apart from the broken line borderaxespad=2.5, ncol = 1, fontsize='11.5');, I believe what you want to do is to just remove the bbox_to_anchor=(1.01, 1.0) from the legend-definition. Doing so will put the legend at the bottom center of the plot (however the legend is very wide so it will span the entire width of the plot).

Fitting an Image to a ROI

I have an ROI and an image. I have to fill the ROI with the image that I have. The image should scale according to the ROI shape and size and should fill the entire ROI without repeating the image. How can I achieve this using opencv? Is there any method in opencv to achieve this?
Suppose this white section is my ROI and
this is my input image
Is there any solution using imageMagick???
Finding optimal fit of one shape inside another is not trivial, but if you can settle for suboptimal result you can do the following:
import cv2
import numpy as np
from matplotlib import pyplot as plt
bg_contours, bg_hierarchy = cv2.findContours(bg_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
bg_contour = bg_contours[0]
bg_ellipse = cv2.fitEllipse(bg_contour)
p_contours, p_hierarchy = cv2.findContours(fruit_alpha, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
pear_hull = cv2.convexHull(p_contours[0])
pear_ellipse = cv2.fitEllipse(pear_hull)
min_ratio = min(bg_ellipse[1][0] / pear_ellipse[1][0], bg_ellipse[1][1] / pear_ellipse[1][1])
x_shift = bg_ellipse[0][0] - pear_ellipse[0][0] * min_ratio
y_shift = bg_ellipse[0][1] - pear_ellipse[0][1] * min_ratio
(Heuristic) Resize the fruit contour, start with an initial guess based on the ellipses, refine using the contour (this can be improved but it is a non trivial optimization problem, you can look more here):
r_contour = np.array([[[int(j) for j in i[0]]] for i in min_ratio * p_contours[max_c_ix]])
min_dist, bad_pt = GetMinDist(outer_contour=bg_contour, inner_contour=r_contour, offset=(int(x_shift), int(y_shift)))
mask_size = max(bg_ellipse[1][0], bg_ellipse[1][1])
scale = min_ratio * (mask_size + min_dist) / mask_size
r_contour = np.array([[[int(j) for j in i[0]]] for i in scale * p_contours[max_c_ix]])
Combine the images using the alpha channel:
combined = CombineImages(bg, fruit_rgb, fruit_alpha, scale, (int(x_shift), int(y_shift)))
Utility functions:
def GetMinDist(outer_contour, inner_contour, offset):
min_dist = 10000
bad_pt = (0,0)
for i_pt in inner_contour:
#pt = (float(i_pt[0][0]), float(i_pt[0][1]))
pt = (i_pt[0][0] + int(offset[0]), i_pt[0][1] + int(offset[1]))
dst = cv2.pointPolygonTest(outer_contour, pt, True)
if dst < min_dist:
min_dist = dst
bad_pt = pt
return min_dist, bad_pt
def CombineImages(mask_img, fruit_img, fruit_alpha, scale, offset):
mask_height, mask_width, mask_dim = mask_img.shape
combined_img = np.copy(mask_img)
resized_fruit = np.copy(mask_img)
resized_fruit[:] = 0
resized_alpha = np.zeros( (mask_height, mask_width), fruit_alpha.dtype)
f_height, f_width, f_dim = fruit_img.shape
r_fruit = cv2.resize(fruit_img, (int(f_width*scale), int(f_height*scale)) )
r_alpha = cv2.resize(fruit_alpha, (int(f_width*scale), int(f_height*scale)) )
height, width, channels = r_fruit.shape
roi_x_from = offset[0]
roi_x_to = offset[0] + width
roi_y_from = offset[1]
roi_y_to = offset[1] + height
resized_fruit[roi_y_from:roi_y_to, roi_x_from:roi_x_to, :] = r_fruit
resized_alpha[roi_y_from:roi_y_to, roi_x_from:roi_x_to] = r_alpha
for y in range(0,mask_height):
for x in range(0, mask_width):
if resized_alpha[y,x] > 0:
combined_img[y,x,:] = resized_fruit[y,x,:]
return combined_img
I Hope that helps.
(I omitted parts of the code that do not contribute to the understanding of the flow)

graphical user interface for real time chatbox

I'm trying to create a chat box, using tkinter, for real time chatting. But there is a problem in the GUI part (below) where I am getting this error:
NameError: global name 'action' is not defined
My code:
from Tkinter import *
from PIL import ImageTk,Image
class LoginFrame(Frame):
def action(event):
global EntryBox
global ChatLog
EntryBox.config(state=NORMAL)
EntryText = (EntryBox.get("0.0",END))
LoadMyEntry(ChatLog,EntryText)
EntryBox.delete("0.0",END)
def __init__(self, parent):
Frame.__init__(self, parent,background=("lavender blush"))
self.parent = parent
self.parent.title("Lets Gossip")
self.pack(fill=BOTH, expand=1)
w = 400
h = 500
sw = self.parent.winfo_screenwidth()
sh = self.parent.winfo_screenheight()
x = (sw - w)/2
y = (sh - h)/2
self.parent.geometry('%dx%d+%d+%d' % (w, h, x, y))
ChatLog = Text(self, bd=0, bg="white", height="8", width="50", font="Arial",)
scrollbar = Scrollbar(self, command=ChatLog.yview, cursor="heart")
ChatLog['yscrollcommand'] = scrollbar.set
EntryBox = Text(self, bd=0, bg="white",width="29", height="5", font="Arial")
EntryBox.bind("<Return>",action)
EntryBox.bind("<KeyRelease-Return>")
scrollbar1 = Scrollbar(self, command=ChatLog.yview, cursor="heart")
EntryBox['yscrollcommand'] = scrollbar1.set
scrollbar.place(x=376,y=6, height=386)
ChatLog.place(x=6,y=6, height=386, width=370)
scrollbar1.place(x=376,y=401, height=90)
EntryBox.place(x=6, y=401, height=90, width=370)
self.pack()
root = Tk()
lf = LoginFrame(root)
root.mainloop()

face recognition on raspberry pi create LBP

I just finished a work program to faces recognition using python on ubuntu system
But when you want to move the work to "Raspberry pi" gives this error
this is full error :
AttributeError: 'module' object has no attribute 'createLBPHFaceRecognizer'
What is the solution
Thank you
import cv2
import sys
import cv
import glob
import numpy as np
import os
labeltest=[]
Images=[]
Len=0
model = cv2.createLBPHFaceRecognizer(1,8,8,8,70.0)
Labels=[]
textsay=""
# *********** Read *****************\\
def read():
arr={}
with open("csv.ext") as f:
for line in f:
arr=line.split("%",2)
labeltest.append(arr[1])
Images.append(cv2.imread(arr[0],cv2.IMREAD_GRAYSCALE))
label=range(0,len(labeltest))
for i in range(0,len(labeltest)):
label[i]=int(labeltest[i])
print (label)
model.train(np.asarray(Images),np.asarray(label))
model.save("mezo.xml")
model.load("mezo.xml")
# //*********** Read *****************
def writetofile(key):
fo = open("csv.ext", "a+")
fo.write(key)
fo.write("\n")
def searchName(key):
lines=tuple(open("Names.txt","r"))
for i in range(0,len(lines)):
test=lines[i].split("\n")
print test[0]
if str(key.lower())==str(test[0].lower()):
return i
return -1
def readName():
lines=tuple(open("Names.txt","r"))
for i in range(0,len(lines)):
Labels.append(lines[i])
print Labels
def AddName(key):
fo = open("Names.txt", "a+")
fo.write(key)
fo.write("\n")
readName()
# *********** Add *****************\\
def Add(faces,gray):
count=Len+100
for (x, y, w, h) in faces:
filename = "/home/mohammad/Desktop/traning/%03d"%count +".pgm"
f=gray[y:y+h,x:x+w]
f=cv2.resize(f,(92,112),interpolation=cv2.INTER_LANCZOS4)
newName=raw_input("Enter the Name : ")
index=searchName(newName)
if index==-1:
index=len(Labels)
AddName(newName)
filenameIn = filename+"%"+str(index)
writetofile(filenameIn)
cv2.imwrite(filename,f)
count+=1
read()
# //*********** Add *****************
path={}
path=glob.glob("/home/mohammad/Desktop/traning/*.pgm")
Len=len(path)-1
cascPath = "haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
count=0
video_capture = cv2.VideoCapture(0)
read();
readName()
while True:
# Capture frame-by-frame
ret, frame = video_capture.read()
cv2.waitKey(10)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(
frame,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30),
flags=cv2.cv.CV_HAAR_SCALE_IMAGE
)
# Draw a rectangle around the faces
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
f=gray[y:y+h,x:x+w]
f=cv2.resize(f,(92,112),interpolation=cv2.INTER_LANCZOS4)
cv2.imwrite("11.pgm",f)
label, confidence = model.predict(f)
print"Threshold : ", model.getDouble("threshold")
if label>-1:
if Labels[label] != textsay:
cmd = 'espeak "{0}" 2>/dev/null'.format(Labels[label])
os.system(cmd)
textsay=Labels[label]
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(frame,Labels[label],(x,y-10), font, 1.0,(255,255,255))
print "\n"+str(Labels[label])+" | "+str(confidence)
# Display the resulting frame
cv2.imshow('Video', frame)
k=cv2.waitKey(5)& 0xFF
if k==97 :
Add(faces,gray)
if k==27:
exit()