Region growing with the watershed transform - computer-vision

I am trying out the code by adfoucart for Region growing with the watershed transform but I ran into some errors when identifying the markers for the image.
from skimage.filters import rank,gaussian
from skimage.morphology import disk
from skimage.feature import peak_local_max
def get_markers(img2, indices=False):
im_ = gaussian(img2, sigma=4)
gradr = rank.gradient(im_[:,:,0],disk(5)).astype('int')
gradg = rank.gradient(im_[:,:,1],disk(5)).astype('int')
gradb = rank.gradient(im_[:,:,2],disk(5)).astype('int')
grad = gradr+gradg+gradb
return peak_local_max(grad.max()-grad,threshold_rel=0.5, min_distance=60,indices=indices),grad
markers,grad = get_markers(img2, True)
plt.figure()
plt.imshow(grad, cmap=plt.cm.gray)
plt.plot(markers[:,1],markers[:,0],'b+')
plt.show()
and I am receiving this error.
IndexError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_17316/2204442073.py in <module>
12 return peak_local_max(grad.max()-grad,threshold_rel=0.5, min_distance=60,indices=indices),grad
13
---> 14 markers,grad = get_markers(img2, True)
15 plt.figure()
16 plt.imshow(grad, cmap=plt.cm.gray)
~\AppData\Local\Temp/ipykernel_17316/2204442073.py in get_markers(img2, indices)
5 def get_markers(img2, indices=False):
6 im_ = gaussian(img2, sigma=4)
----> 7 gradr = rank.gradient(im_[:,:,0],disk(5)).astype('int')
8 gradg = rank.gradient(im_[:,:,1],disk(5)).astype('int')
9 gradb = rank.gradient(im_[:,:,2],disk(5)).astype('int')
IndexError: too many indices for array: array is 2-dimensional, but 3 were indexed
Any help will be appreciated thanj you!

You are probably trying to run the code on a grayscale image, which will only have 2 dimensions (height and width), while the code was written expecting an RGB image with 3 dimensions (height, width and color channel).
On a grayscale image, the lines:
gradr = rank.gradient(im_[:,:,0],disk(5)).astype('int')
gradg = rank.gradient(im_[:,:,1],disk(5)).astype('int')
gradb = rank.gradient(im_[:,:,2],disk(5)).astype('int')
grad = gradr+gradg+gradb
Could be simply replaced by:
grad = rank.gradient(im_, disk(5))

Related

List format error using matlotlib linecollection

I have a list (coordpairs) that I am trying to use as the basis for plotting using LineCollection. The list is derived from a Pandas data frame. I am having trouble getting the list in the right format, despite what is admittedly a clear error code. Trimmed data frame contents, code, and error are below. Thank you for any help.
Part of the Data Frame
RUP_ID Vert_ID Longitude Latitude
1 1 -116.316961 34.750178
1 2 -116.316819 34.750006
2 1 -116.316752 34.749938
2 2 -116.31662 34.749787
10 1 -116.317165 34.754078
10 2 -116.317277 34.751492
10 3 -116.317206 34.751273
10 4 -116.317009 34.75074
10 5 -116.316799 34.750489
11 1 -116.316044 34.760377
11 2 -116.317105 34.755674
11 3 -116.317165 34.754078
Code
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
fig = plt.figure()
ax1 = plt.subplot2grid((2, 2), (0, 0), rowspan=2, colspan=1)
for ii in range(1,len(mydf)):
temp = mydf.loc[mydf.RUP_ID == ii]
df_line = temp.sort_values(by='Vert_ID', ascending=True)
del temp
lat = df_line.Latitude
lon = df_line.Longitude
lat = lat.tolist()
long = long.tolist()
coordpairs = zip(lat, long)
lc = LineCollection(coordpairs, colors='r') # this is line 112 in the error
ax1.add_collection(lc)
# note I also tried:
# import numpy as np
# coordpairs2 = np.vstack([np.array(u) for u in set([tuple(p) for p in coordpairs])])
# lc = LineCollection(coordpairs2, colors='r')
# and received the same plotting error
Error/Outputs
C:\apath\python.exe C:/mypath/myscript.py
Traceback (most recent call last):
File "C:/mypath/myscript.py", line 112, in <module>
lc = LineCollection(coordpairs, colors='r') # this is line 112 in the error
File "C:\apath\lib\site-packages\matplotlib\collections.py", line 1149, in __init__
self.set_segments(segments)
File "C:\apath\lib\site-packages\matplotlib\collections.py", line 1164, in set_segments
self._paths = [mpath.Path(_seg) for _seg in _segments]
File "C:\apath\lib\site-packages\matplotlib\path.py", line 141, in __init__
raise ValueError(msg)
ValueError: 'vertices' must be a 2D list or array with shape Nx2
Process finished with exit code 1
You would want to create one single LineCollection, with several lines, one per RUP_ID value from the first dataframe column. That means you best loop over the unique values of that column (not over every row!) and append the coordinates to a list. Use that list as the input to LineCollection.
u = """RUP_ID Vert_ID Longitude Latitude
1 1 -116.316961 34.750178
1 2 -116.316819 34.750006
2 1 -116.316752 34.749938
2 2 -116.31662 34.749787
10 1 -116.317165 34.754078
10 2 -116.317277 34.751492
10 3 -116.317206 34.751273
10 4 -116.317009 34.75074
10 5 -116.316799 34.750489
11 1 -116.316044 34.760377
11 2 -116.317105 34.755674
11 3 -116.317165 34.754078"""
import io
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
df = pd.read_csv(io.StringIO(u), sep="\s+")
verts = []
for (RUP_ID, grp) in df.groupby("RUP_ID"):
df_line = grp.sort_values(by='Vert_ID', ascending=True)
lat = df_line.Latitude
lon = df_line.Longitude
verts.append(list(zip(lon, lat)))
lc = LineCollection(verts, color='r')
fig, ax = plt.subplots()
ax.add_collection(lc)
ax.autoscale()
plt.show()

Why doesn't an array made from a PIL draw.text() image show properly in Matplotlib?

I'd like to understand why, when I convert the PIL image imageRGB to a float array arrayRGB_f and use matplotlib's imshow() without a cmap it looks either black, or strange and unreadable, even though PIL's imageRGB.show() looks fine, and each of the individual r, g, b channels shown with cmap='gray' look okay as well.
I have workarounds, but I just don't understand why this happens.
matplotlib.__version__ returns '2.0.2' and I'm using MacOS with an Anaconda installation.
See this answer for more on the conversion of a ttf rendering to a 1bit.
fyi the output of the print statements are:
float64 (41, 101, 3)
int64 (41, 101, 3)
int64 (41, 101)
int64 (41, 101)
fontname = 'default'
imageRGB.show()
plt.imshow()
fontname = 'Arial Unicode.ttf'
imageRGB.show()
plt.imshow()
font = ImageFont.truetype(fontname, 20)
imageRGB.show()
plt.imshow()
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import matplotlib.pyplot as plt
# fontname = 'Arial Unicode.ttf'
fontname = 'default'
if fontname == 'default':
font = ImageFont.load_default()
else:
font = ImageFont.truetype(fontname, 12)
string = "Hello " + fontname[:6]
ww, hh = 101, 41
threshold = 80 # https://stackoverflow.com/a/47546095/3904031
imageRGB = Image.new('RGB', (ww, hh))
draw = ImageDraw.Draw(imageRGB)
image8bit = draw.text((10, 12), string, font=font,
fill=(255, 255, 255, 255)) # R, G, B alpha
image8bit = imageRGB.convert("L")
image1bit = image8bit.point(lambda x: 0 if x < threshold else 1, mode='1') # https://stackoverflow.com/a/47546095/3904031
arrayRGB = np.array(list(imageRGB.getdata())).reshape(hh, ww, 3)
arrayRGB_f = arrayRGB.astype(float)
array8bit = np.array(list(image8bit.getdata())).reshape(hh, ww)
array1bit = np.array(list(image1bit.getdata())).reshape(hh, ww)
for a in (arrayRGB_f, arrayRGB, array8bit, array1bit):
print a.dtype, a.shape
imageRGB.show()
if True:
plt.figure()
a = arrayRGB_f
plt.subplot(2, 2, 1)
plt.imshow(a) # , interpolation='nearest', cmap='gray',
for i in range(3):
plt.subplot(2, 2, 2+i)
plt.imshow(a[:, :, i], cmap='gray')
plt.suptitle('arrayRGB_f, fontname = ' + fontname)
plt.show()
I can't find an ideal duplicate so I'll post an answer.
As #ImportanceOfBeingErnest mentions when .imshow() is given an n x m x 3 or n x m x 4 array, it is expecting a normalized array between 0.0 and 1.0.
Best way to do this is:
arrayRGB_f = arrayRGB.astype(float)/255.
though this seems to work as well:
arrayRGB_f = arrayRGB.astype(float)
arrayRGB_f = arrayRGB_f / arrayRGB_f.max()
For longer discussions, see this and this.

SciPy/Numpy's Pooling/Convolution faster than Tensorflow's Convolution/Pooling?

I am trying to use GPUs to accelerate convolution and pooling operations in my neural network application(Spiking networks). I wrote a small script to see how much speedup I can get by using Tensorflow. Surprisingly, SciPy/Numpy does better. In my application, all the inputs(images) are stored on the disk but for an example, I created a randomly initialized image of size 27x27 and weights kernel of size 5x5x30, i made sure that I am not transferring anything from CPU to GPU and I also increased the input image size to 270x270 and the weights kernel to 7x7x30, still I don't see any improvement. I made sure that all the TF methods are in fact being executed on my GPUs by setting
sess =tf.Session(config=tf.ConfigProto(log_device_placement=True))
I have access to 2 GPUs(Tesla K20m) on a cluster.
Here's my code:
import tensorflow as tf
import numpy as np
from scipy import signal
import time
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
image_size = 27
kernel_size = 5
nofMaps = 30
def convolution(Image, weights):
in_channels = 1 # 1 because our image has 1 units in the -z direction.
out_channels = weights.shape[-1]
strides_1d = [1, 1, 1, 1]
#in_2d = tf.constant(Image, dtype=tf.float32)
in_2d = Image
#filter_3d = tf.constant(weights, dtype=tf.float32)
filter_3d =weights
in_width = int(in_2d.shape[0])
in_height = int(in_2d.shape[1])
filter_width = int(filter_3d.shape[0])
filter_height = int(filter_3d.shape[1])
input_4d = tf.reshape(in_2d, [1, in_height, in_width, in_channels])
kernel_4d = tf.reshape(filter_3d, [filter_height, filter_width, in_channels, out_channels])
inter = tf.nn.conv2d(input_4d, kernel_4d, strides=strides_1d, padding='VALID')
output_3d = tf.squeeze(inter)
output_3d= sess.run(output_3d)
return output_3d
def pooling(Image):
in_channels = Image.shape[-1]
Image_3d = tf.constant(Image, dtype = tf.float32)
in_width = int(Image.shape[0])
in_height = int(Image.shape[1])
Image_4d = tf.reshape(Image_3d,[1,in_width,in_height,in_channels])
pooled_pots4d = tf.layers.max_pooling2d(inputs=Image_4d, pool_size=[2, 2], strides=2)
pooled_pots3d = tf.squeeze(pooled_pots4d)
return sess.run(pooled_pots3d)
t1 = time.time()
#with tf.device('/device:GPU:1'):
Image = tf.random_uniform([image_size, image_size], name='Image')
weights = tf.random_uniform([kernel_size,kernel_size,nofMaps], name='Weights')
conv_result = convolution(Image,weights)
pool_result = pooling(conv_result)
print('Time taken:{}'.format(time.time()-t1))
#with tf.device('/device:CPU:0'):
print('Pool_result shape:{}'.format(pool_result.shape))
#print('first map of pool result:\n',pool_result[:,:,0])
def scipy_convolution(Image,weights):
instant_conv1_pots = np.zeros((image_size-kernel_size+1,image_size-kernel_size+1,nofMaps))
for i in range(weights.shape[-1]):
instant_conv1_pots[:,:,i]=signal.correlate(Image,weights[:,:,i],mode='valid',method='fft')
return instant_conv1_pots
def scipy_pooling(conv1_spikes):
'''
Reshape splitting each of the two axes into two each such that the
latter of the split axes is of the same length as the block size.
This would give us a 4D array. Then, perform maximum finding along those
latter axes, which would be the second and fourth axes in that 4D array.
https://stackoverflow.com/questions/41813722/numpy-array-reshaped-but-how-to-change-axis-for-pooling
'''
if(conv1_spikes.shape[0]%2!=0): #if array is odd size then omit the last row and col
conv1_spikes = conv1_spikes[0:-1,0:-1,:]
else:
conv1_spikes = conv1_spikes
m,n = conv1_spikes[:,:,0].shape
o = conv1_spikes.shape[-1]
pool1_spikes = np.zeros((m/2,n/2,o))
for i in range(o):
pool1_spikes[:,:,i]=conv1_spikes[:,:,i].reshape(m/2,2,n/2,2).max(axis=(1,3))
return pool1_spikes
t1 = time.time()
Image = np.random.rand(image_size,image_size)
weights = np.random.rand(kernel_size,kernel_size,nofMaps)
conv_result = scipy_convolution(Image,weights)
pool_result = scipy_pooling(conv_result)
print('Time taken:{}'.format(time.time()-t1))
print('Pool_result shape:{}'.format(pool_result.shape))
#print('first map of pool result:\n',pool_result[:,:,0])
~
Results are as follows:
Time taken:0.746644973755
Pool_result shape:(11, 11, 30)
Time taken:0.0127348899841
Pool_result shape:(11, 11, 30)
With suggestions from the commenter, I set image_size=270 and enclosed both convolution and pool functions in a for loop, now, TF performs better than SciPy note that I am using tf.nn.conv2d and NOT the tf.layers.conv2d. I also set the parameter use_cudnn_on_gpu=True in tf.nn.conv2d but that didn't hurt or help.
Here's the code:
import tensorflow as tf
import numpy as np
from scipy import signal
import time
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
image_size = 270
kernel_size = 5
nofMaps = 30
def convolution(Image, weights):
in_channels = 1 # 1 because our image has 1 units in the -z direction.
out_channels = weights.shape[-1]
strides_1d = [1, 1, 1, 1]
#in_2d = tf.constant(Image, dtype=tf.float32)
in_2d = Image
#filter_3d = tf.constant(weights, dtype=tf.float32)
filter_3d =weights
in_width = int(in_2d.shape[0])
in_height = int(in_2d.shape[1])
filter_width = int(filter_3d.shape[0])
filter_height = int(filter_3d.shape[1])
input_4d = tf.reshape(in_2d, [1, in_height, in_width, in_channels])
kernel_4d = tf.reshape(filter_3d, [filter_height, filter_width, in_channels, out_channels])
inter = tf.nn.conv2d(input_4d, kernel_4d, strides=strides_1d, padding='VALID',use_cudnn_on_gpu=True)
output_3d = tf.squeeze(inter)
#t1 = time.time()
output_3d= sess.run(output_3d)
#print('TF Time for Conv:{}'.format(time.time()-t1))
return output_3d
def pooling(Image):
in_channels = Image.shape[-1]
Image_3d = tf.constant(Image, dtype = tf.float32)
in_width = int(Image.shape[0])
in_height = int(Image.shape[1])
Image_4d = tf.reshape(Image_3d,[1,in_width,in_height,in_channels])
pooled_pots4d = tf.layers.max_pooling2d(inputs=Image_4d, pool_size=[2, 2], strides=2)
pooled_pots3d = tf.squeeze(pooled_pots4d)
#t1 = time.time()
pool_res = sess.run(pooled_pots3d)
#print('TF Time for Pool:{}'.format(time.time()-t1))
return pool_res
#with tf.device('/device:GPU:1'):
Image = tf.random_uniform([image_size, image_size], name='Image')
weights = tf.random_uniform([kernel_size,kernel_size,nofMaps], name='Weights')
#init = tf.global_variables_initializer
#sess.run(init)
t1 = time.time()
for i in range(150):
#t1 = time.time()
conv_result = convolution(Image,weights)
pool_result = pooling(conv_result)
#print('TF Time taken:{}'.format(time.time()-t1))
print('TF Time taken:{}'.format(time.time()-t1))
#with tf.device('/device:CPU:0'):
print('TF Pool_result shape:{}'.format(pool_result.shape))
#print('first map of pool result:\n',pool_result[:,:,0])
def scipy_convolution(Image,weights):
instant_conv1_pots = np.zeros((image_size-kernel_size+1,image_size-kernel_size+1,nofMaps))
for i in range(weights.shape[-1]):
instant_conv1_pots[:,:,i]=signal.correlate(Image,weights[:,:,i],mode='valid',method='fft')
return instant_conv1_pots
def scipy_pooling(conv1_spikes):
'''
Reshape splitting each of the two axes into two each such that the
latter of the split axes is of the same length as the block size.
This would give us a 4D array. Then, perform maximum finding along those
latter axes, which would be the second and fourth axes in that 4D array.
https://stackoverflow.com/questions/41813722/numpy-array-reshaped-but-how-to-change-axis-for-pooling
'''
if(conv1_spikes.shape[0]%2!=0): #if array is odd size then omit the last row and col
conv1_spikes = conv1_spikes[0:-1,0:-1,:]
else:
conv1_spikes = conv1_spikes
m,n = conv1_spikes[:,:,0].shape
o = conv1_spikes.shape[-1]
pool1_spikes = np.zeros((m/2,n/2,o))
for i in range(o):
pool1_spikes[:,:,i]=conv1_spikes[:,:,i].reshape(m/2,2,n/2,2).max(axis=(1,3))
return pool1_spikes
Image = np.random.rand(image_size,image_size)
weights = np.random.rand(kernel_size,kernel_size,nofMaps)
t1 = time.time()
for i in range(150):
conv_result = scipy_convolution(Image,weights)
pool_result = scipy_pooling(conv_result)
print('Scipy Time taken:{}'.format(time.time()-t1))
print('Scipy Pool_result shape:{}'.format(pool_result.shape))
#print('first map of pool result:\n',pool_result[:,:,0])
Here are results:
image_size = 27x27
kernel_size = 5x5x30
iterations = 150
TF Time taken:11.0800771713
TF Pool_result shape:(11, 11, 30)
Scipy Time taken:1.4141368866
Scipy Pool_result shape:(11, 11, 30)
image_size = 270x270
kernel_size = 5x5x30
iterations = 150
TF Time taken:26.2359631062
TF Pool_result shape:(133, 133, 30)
Scipy Time taken:31.6651778221
Scipy Pool_result shape:(11, 11, 30)
image_size = 500x500
kernel_size = 5x5x30
iterations = 150
TF Time taken:89.7967050076
TF Pool_result shape:(248, 248, 30)
Scipy Time taken:143.391746044
Scipy Pool_result shape:(248, 248, 30)
In the 2nd case you can see that I got about 18% reduction in time.
In the 3rd case you can see that I goto about 38% reduction in time.

Dataset creator with OpenCV and Python error

OS : Ubuntu 17.10
I am trying this code to create a dataset on face detection using Python2.7 and Open CV (installed with pip)
import cv2
import numpy as np
cam = cv2.VideoCapture(0)
detector = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
Id = raw_input('enter your id')
sampleNum = 0
while True:
ret, img = cam.read()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = detector.detectMultiScale(gray, 1.3, 5)
for (x, y, w, h) in faces:
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
#incrementing sample number
sampleNum = sampleNum+1
#saving the captured face in the dataset folder
cv2.imwrite("dataSet/User."+Id +'.'+ str(sampleNum) + ".jpg", gray[y:y+h,x:x+w])
cv2.imshow('frame', img)
#wait for 100 miliseconds
if cv2.waitKey(100) & 0xFF == ord('q'):break
# break if the sample number is morethan 20
elif sampleNum > 20: break
cam.release()
cv2.destroyAllWindows()
But I am getting following error
Traceback (most recent call last):
File "/home/anushi/face/datasetCreator.py", line 10, in <module>
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
error: /io/opencv/modules/imgproc/src/color.cpp:10638: error: (-215) scn == 3 || scn == 4 in function cvtColor
As the comments correctly mention, one possible cause is that the image is empty (not captured properly). Another possibility is that the image is not a color image.
You can add
cv2.imshow('frame', img)
Before
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
and see what the captured image looks like.
The rest of the code looks fine.

Interpolating 3d data at a single point in space (Python 2.7)

I have a point cloud in 4 dimensions, where each point in the cloud has a location and a value (x,y,z,Value). In addition, I have a 'special' point, S0, within the 3d point cloud; I've used this example to find the closest 10 points in the cloud, relative to S0. Now, I have a numpy array for each of the 10 closest points and their values. How can I interpolate these 10 points, to find the interpolated value at point S0? Example code is shown below:
import numpy as np
import matplotlib.pyplot as plt
numpoints = 20
linexs = 320
lineys = 40
linezs = 60
linexe = 20
lineye = 20
lineze = 0
# Create vectors of points
xpts = np.linspace(linexs, linexe, numpoints)
ypts = np.linspace(lineys, lineye, numpoints)
zpts = np.linspace(linezs, lineze, numpoints)
lin = np.dstack((xpts,ypts,zpts))
# Image line of points
fig = plt.figure()
ax = fig.add_subplot(211, projection='3d')
ax.set_xlim(0,365); ax.set_ylim(-85, 85); ax.set_zlim(0, 100)
ax.plot_wireframe(xpts, ypts, zpts)
ax.view_init(elev=12, azim=78)
def randrange(n, vmin, vmax):
return (vmax - vmin)*np.random.rand(n) + vmin
n = 10
for n in range(21):
xs = randrange(n, 0, 350)
ys = randrange(n, -75, 75)
zs = randrange(n, 0, 100)
ax.scatter(xs, ys, zs)
dat = np.dstack((xs,ys,zs))
ax.set_xlabel('X Label')
ax.set_xlim(0,350)
ax.set_ylabel('Y Label')
ax.set_ylim(-75,75)
ax.set_zlabel('Z Label')
ax.set_zlim(0,100)
ax = fig.add_subplot(212, projection='3d')
ax.set_xlim(0,365); ax.set_ylim(-85, 85); ax.set_zlim(0, 100)
ax.plot_wireframe(xpts,ypts,zpts)
ax.view_init(elev=12, azim=78)
plt.show()
dist = []
# Calculate distance from first point to all other points in cloud
for l in range(len(xpts)):
aaa = lin[0][0]-dat
dist.append(np.sqrt(aaa[0][l][0]**2+aaa[0][l][1]**2+aaa[0][l][2]**2))
full = np.dstack((dat,dist))
aaa = full[0][full[0][:,3].argsort()]
print(aaa[0:10])
A basic example. Note that the meshgrid is not needed for the interpolation, but only to make a fast ufunc to generate an example function A=f(x,y,z), here A=x+y+z.
from scipy.interpolate import interpn
import numpy as np
#make up a regular 3d grid
X=np.linspace(-5,5,11)
Y=np.linspace(-5,5,11)
Z=np.linspace(-5,5,11)
xv,yv,zv = np.meshgrid(X,Y,Z)
# make up a function
# see http://docs.scipy.org/doc/numpy/reference/ufuncs.html
A = np.add(xv,np.add(yv,zv))
#this one is easy enough for us to know what to expect at (.5,.5,.5)
# usage : interpn(points, values, xi, method='linear', bounds_error=True, fill_value=nan)
interpn((X,Y,Z),A,[0.5,0.5,0.5])
Output:
array([ 1.5])
If you pass in an array of points of interest, it will give you multiple answers.