Image segmentation using k-means - python-2.7

I 'm trying to use k-means algorithm for an image segmentation task . The problem is that my program does not segment the image.
Would you please help me to find the error in my code ?
In fact , I have fixed the number of clusters to 32.
I have used the following data structures:
3 arrays bleu,vert,rouge to store RGB values for each pixel
3 arrays cluster_bleu,cluster_rouge,cluster_vert to store RGB value for each cluster
groupe[i,0]=k maps each pixel i to cluster k
import cv2
import numpy
import random
def main():
MAX_LARGEUR = 400
MAX_HAUTEUR = 400
K = 32 #Le fameux parametre K de l'algorithme
imagecolor = cv2.imread('perr.jpg')
if imagecolor.shape[0] > MAX_LARGEUR or imagecolor.shape[1] > MAX_HAUTEUR:
factor1 = float(MAX_LARGEUR) / imagecolor.shape[0]
factor2 = float(MAX_HAUTEUR) / imagecolor.shape[1]
factor = min(factor1, factor2)
imagecolor = cv2.resize(imagecolor, None, fx=factor, fy=factor, interpolation=cv2.INTER_AREA)
nb_pixels = imagecolor.shape[0] * imagecolor.shape[1]
bleu = imagecolor[:, :, 0].reshape(nb_pixels, 1)
vert = imagecolor[:, :, 1].reshape(nb_pixels, 1)
rouge = imagecolor[:, :, 2].reshape(nb_pixels, 1)
cluster_bleu = numpy.zeros(K)
cluster_vert = numpy.zeros(K)
cluster_rouge = numpy.zeros(K)
groupe = numpy.zeros((nb_pixels, 1))
for i in range(0,K):
groupe[i,0]=i
for i in range(K,nb_pixels):
groupe[i,0]=random.randint(0, K-1)
condition =False
def etape1(indices,i):
s=indices.size
rouge_s=0
vert_s=0
bleu_s=0
#calcul de barycentre des points
if s==0:
cluster_rouge[i]=0
cluster_vert[i]=0
cluster_bleu[i]=0
if s >=1:
for j in range(0,s):
rouge_s=rouge_s+rouge[indices[j]]
vert_s=vert_s+vert[indices[j]]
bleu_s=bleu_s+bleu[indices[j]]
#mise jour des clusters
cluster_rouge[i]=rouge_s/s
cluster_vert[i]=vert_s/s
cluster_bleu[i]=bleu_s/s
iteration=0
oldGroupe = numpy.copy(groupe)
while(condition==False) :
for i in range(0,K):
indices=numpy.where(groupe==i)[0]
etape1(indices,i)
for i in range(0,nb_pixels):
minimum=10000;
dist=0;
index=-1;
for j in range(0,K):
dist=(cluster_rouge[j]-rouge[i])**2+(cluster_vert[j]-vert[i])**2+(cluster_bleu[j]-bleu[i])**2;
if(dist<=minimum):
minimum=dist;
index=j;
groupe[i,0]=index;
condition=numpy.all(groupe==oldGroupe)
oldGroupe = numpy.copy(groupe)
groupe=numpy.reshape(groupe, (imagecolor.shape[0], imagecolor.shape[1]))
for i in range(0, imagecolor.shape[0]):
for j in range(0, imagecolor.shape[1]):
imagecolor[i,j,0] = (cluster_bleu[groupe[i,j]])
imagecolor[i,j,1] = (cluster_vert[groupe[i,j]])
imagecolor[i,j,2] = (cluster_rouge[groupe[i,j]])
cv2.namedWindow("sortie")
cv2.imshow("sortie", imagecolor)
key = cv2.waitKey(0)
if __name__ == "__main__":
main()

The problem is assignment oldGroupe=groupe; which doesn't copy an array, but creates reference with different name (oldGroupe), that points to the same data as groupe. Thus, when you change groupe you also change oldGroupe, and condition is always True.
What you want is to create a copy of data in groupe with oldGroupe = numpy.copy(groupe).

Related

Implementation of Karger's Algorithm in Python Taking too Long

Wondering if you can help me understand where the critical flaw may be with my attempt at implementing Karger's algorithm in python. My program appears to take far too long to run and my computer starts to overwork running large sets of vertices. The purpose of the program is to output the minimum cut of the graph.
from random import choice
from statistics import mode
import math
fhand = open("mincuts.txt", "r")
vertices = fhand.readlines()
d = {}
for index,line in enumerate(vertices):
d["{0}".format(index+1)] = line.split()
def randy(graph, x):
y = str(choice(list(graph)))
if x == y:
y = randy(graph, x)
return y
count = 0
def contract(graph):
global count
if len(graph) == 2:
a = list(graph.keys())[0]
b = list(graph.keys())[1]
for i in range(1, len(graph[a])):
if graph[a][i] in graph[b]:
count = count + 1
#print(graph)
return
x = str(choice(list(graph)))
y = randy(graph, x)
#print(x)
#print(y)
graph[x] = graph[x] + graph[y]
graph.pop(y)
#remove self loops
for key in graph:
#method to remove duplicate entries in the arrays of the vertices. Source: www.w3schools.com
graph[key] = list(dict.fromkeys(graph[key]))
contract(graph)
N = len(d)
runs = int(N*N*(math.log(N)))
outcomes = []
for i in range(runs):
e = d.copy()
count = 0
contract(e)
outcomes.append(count)
print(outcomes)
#returns most common minimum cut value
print(mode(outcomes))
Below is a link to the graph I am running in mincuts.txt:
https://github.com/BigSoundCode/Misc-Algorithm-Implementations/blob/main/mincuts.txt

keras custom activation to drop under certain conditions

I am trying to drop the values less than 1 and greater than -1 in my custom activation like below.
def ScoreActivationFromSigmoid(x, target_min=1, target_max=9) :
condition = K.tf.logical_and(K.tf.less(x, 1), K.tf.greater(x, -1))
case_true = K.tf.reshape(K.tf.zeros([x.shape[1] * x.shape[2]], tf.float32), shape=(K.tf.shape(x)[0], x.shape[1], x.shape[2]))
case_false = x
changed_x = K.tf.where(condition, case_true, case_false)
activated_x = K.sigmoid(changed_x)
score = activated_x * (target_max - target_min) + target_min
return score
the data type has 3 dimensions: batch_size x sequence_length x number of features.
But I got this error
nvalidArgumentError: Inputs to operation activation_51/Select of type Select must have the same size and shape. Input 0: [1028,300,64] != input 1: [1,300,64]
[[{{node activation_51/Select}} = Select[T=DT_FLOAT, _class=["loc:#training_88/Adam/gradients/activation_51/Select_grad/Select_1"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](activation_51/LogicalAnd, activation_51/Reshape, dense_243/add)]]
[[{{node metrics_92/acc/Mean_1/_9371}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_473_metrics_92/acc/Mean_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
I understand what the problem is; custom activation function cannot find the proper batch size of inputs. But I don't know how to control them.
Can anyone fix this or suggest other methods to replace some of the element values in some conditions?
The error message I got when running your code is:
ValueError: Cannot reshape a tensor with 19200 elements to shape
[1028,300,64] (19737600 elements) for 'Reshape_8' (op: 'Reshape') with
input shapes: [19200], [3] and with input tensors computed as partial
shapes: input[1] = [1028,300,64].
And the problem should be that you cannot reshape a tensor of shape [x.shape[1] * x.shape[2]] to (K.tf.shape(x)[0], x.shape[1], x.shape[2]). This is because their element counts are different.
So the solution is just creating a zero array in right shape.
This line:
case_true = K.tf.reshape(K.tf.zeros([x.shape[1] * x.shape[2]], tf.float32), shape=(K.tf.shape(x)[0], x.shape[1], x.shape[2]))
should be replace with:
case_true = K.tf.reshape(K.tf.zeros([x.shape[0] * x.shape[1] * x.shape[2]], K.tf.float32), shape=(K.tf.shape(x)[0], x.shape[1], x.shape[2]))
or using K.tf.zeros_like:
case_true = K.tf.zeros_like(x)
Workable code:
import keras.backend as K
import numpy as np
def ScoreActivationFromSigmoid(x, target_min=1, target_max=9) :
condition = K.tf.logical_and(K.tf.less(x, 1), K.tf.greater(x, -1))
case_true = K.tf.zeros_like(x)
case_false = x
changed_x = K.tf.where(condition, case_true, case_false)
activated_x = K.tf.sigmoid(changed_x)
score = activated_x * (target_max - target_min) + target_min
return score
with K.tf.Session() as sess:
x = K.tf.placeholder(K.tf.float32, shape=(1028, 300, 64), name='x')
score = sess.run(ScoreActivationFromSigmoid(x), feed_dict={'x:0':np.random.randn(1028, 300, 64)})
print(score)

Mismatch in y-axis scale in one or more of the subplots using pyplot in python

I'm trying to plot and compare the frequency spectrum of two .wav files. I wrote the following in python for that:
import pylab
import time
from scipy import fft, arange
from numpy import linspace
from scipy.io.wavfile import read
import gc
import sys
params = {'figure.figsize': (20, 15)}
pylab.rcParams.update(params)
def plotSpec(y, Fs):
n = len(y) # lungime semnal
k = arange(n)
T = n / Fs
frq = k / T # two sides frequency range
frq = frq[range(n / 2)] # one side frequency range
ff_valu = fft(y) / n # fft computing and normalization
ff_valu = ff_valu[range(n / 2)]
pylab.plot(frq, abs(ff_valu), 'r') # plotting the spectrum
pylab.tick_params(axis='x', labelsize=8)
pylab.tick_params(axis='y', labelsize=8)
pylab.tick_params()
pylab.xticks(rotation=45)
pylab.xlabel('Frequency')
pylab.ylabel('Power')
del frq, ff_valu, n, k, T, y
gc.collect()
return
def graph_plot(in_file, graph_loc, output_folder, count, func_type):
graph_loc = int(graph_loc)
rate = 0
data = 0
rate, data = read(in_file)
dlen = len(data)
print "dlen=", dlen
lungime = dlen
timp = dlen / rate
print "timp=", timp
t = linspace(0, timp, dlen)
pylab.subplot(3, 2, graph_loc)
pylab.plot(t, data)
fl = in_file.split('/')
file_name = fl[len(fl) - 1]
pylab.title(file_name)
pylab.tick_params(axis='x', labelsize=8)
pylab.tick_params(axis='y', labelsize=8)
pylab.xticks(rotation=45)
pylab.xlabel('Time')
pylab.ylabel('Numerical level')
pylab.subplot(3, 2, graph_loc + 2)
plotSpec(data, rate)
pylab.subplot(3, 2, graph_loc + 4)
if rate == 16000:
frq = 16
else:
frq = 8
pylab.specgram(data, NFFT=128, noverlap=0, Fs=frq)
pylab.tick_params(axis='x', labelsize=8)
pylab.tick_params(axis='y', labelsize=8)
pylab.xticks(rotation=45)
pylab.xlabel('Time')
pylab.ylabel('Frequency')
if graph_loc == 2:
name = in_file.split("/")
lnth = len(name)
name = in_file.split("/")[lnth - 1].split(".")[0]
print "File=", name
if func_type == 'a':
save_file = output_folder + 'RESULT_' + name + '.png'
else:
save_file = output_folder + 'RESULT_graph.png'
pylab.savefig(save_file)
pylab.gcf()
pylab.gca()
pylab.close('all')
del in_file, graph_loc, output_folder, count, t, rate, data, dlen, timp
gc.get_referrers()
gc.collect()
def result_plot(orig_file, rec_file, output_folder, seq):
graph_loc = 1
graph_plot(orig_file, graph_loc, output_folder, seq, 'a')
graph_loc = 2
graph_plot(rec_file, graph_loc, output_folder, seq, 'a')
sys.exit()
save_file="~/Documents/Output/"
o_file='~/Documents/audio/orig_8sec.wav'
#o_file='~/Documents/audio/orig_4sec.wav'
r_file='~/Documents/audio/rec_8sec.wav'
#r_file='~/Documents/audio/rec_4sec.wav'
print 10*"#"+"Start"+10*"#"
result_plot(o_file, r_file,save_file, 'a')
print 10*"#"+"End"+10*"#"
pylab.close('all')
With the above code, I see that the scale of y-axis appear different:
It clearly shows an automatically assigned scale. With this any amplification or attenuation with respect to the original file is difficult to be made obvious unless the person looks up the values.
Since I cannot really predict what would be the max amplitude among either files when I use multiple samples, how can I make both y-axis on each subplot set to the max of either so that the scale is the same and amplification is more clear?
I am adding my explanation you asked for in the comments above as an answer below. The idea is to selectively modify the x-axis limits for some particular subplots
fig, axes = plt.subplots(2,3,figsize=(16,8))
x = np.linspace(0, 2*np.pi, 100)
y = np.sin(x)
for i, row in enumerate(axes):
for j, col in enumerate(row):
col.plot(x, y)
col.set_title("Title here", fontsize=18)
if i == 1 and (j == 1 or j == 2):
col.set_xlim(0, np.pi)
plt.tight_layout()
Output
An alternative to setting the limits yourself is to create the figure and axes first using
fig, axes = plt.subplots(3, 2)
This has an optional argument sharex. From the docs
sharex, sharey : bool or {'none', 'all', 'row', 'col'}, default: False
Controls sharing of properties among x (sharex) or y (sharey) axes:
True or 'all': x- or y-axis will be shared among all subplots.
False or 'none': each subplot x- or y-axis will be independent.
'row': each subplot row will share an x- or y-axis.
'col': each subplot column will share an x- or y-axis.
Therefore, we can make sure the rows share the same x axis values as each other by using the argument sharex="row":
fig, axes = plt.subplots(3, 2, sharex="row")
If you want the y axis to be shared you can use sharey="row" instead/aswell.
Taking cues from other answers, I happened to make it work the following way:
import matplotlib.pyplot as pl
import time
from scipy import fft, arange
from numpy import linspace
from scipy.io.wavfile import read
import gc
import sys
def plotWavAmplLev(in_file, sub_graph):
print "Printing Signal graph (amplitude vs seconds)...."
rate, data = read(in_file)
dlen = len(data)
timp = dlen / rate
t = linspace(0,timp,dlen)
sub_graph.plot(t, data)
fl = in_file.split('/')
file_name = fl[len(fl) - 1]
sub_graph.set_title(file_name)
sub_graph.tick_params(axis='x', labelsize=10)
sub_graph.tick_params(axis='y', labelsize=10)
sub_graph.set_xlabel('Time')
sub_graph.set_ylabel('Numerical level')
def plotSpectralDensity(y, fs, sub_graph):
print "Printing Power Spectral Density (dB vs Hz)...."
n = len(y) # lungime semnal
k = arange(n)
T = n / fs
frq = k / T # two sides frequency range
frq = frq[range(n / 2)] # one side frequency range
ff_valu = fft(y) / n # fft computing and normalization
ff_valu = ff_valu[range(n / 2)]
sub_graph.plot(frq, abs(ff_valu), 'r') # plotting the spectrum
sub_graph.tick_params(axis='x', labelsize=10)
sub_graph.tick_params(axis='y', labelsize=10)
sub_graph.tick_params()
sub_graph.set_xlabel('Frequency')
sub_graph.set_ylabel('Power')
del frq, ff_valu, n, k, T, y
gc.collect()
return
def plotSpectrogram(rate, data, sub_graph):
print "Plotting Spectrogram (kHz vs seconds)...."
if rate == 16000:
frq = 16
else:
frq = 8
sub_graph.specgram(data, NFFT=128, noverlap=0, Fs=frq)
sub_graph.tick_params(axis='x', labelsize=10)
sub_graph.tick_params(axis='y', labelsize=10)
sub_graph.set_xlabel('Time')
sub_graph.set_ylabel('Frequency')
def graph_plot(in_file_list, output_folder, func_type):
orig_file = in_file_list[0]
rec_file = in_file_list[1]
g_index = 1
g_rows = 3
g_cols = 2
fig, axes = pl.subplots(g_rows, g_cols, figsize=(20,15), sharex="row", sharey="row")
for i, row in enumerate(axes):
for j, col in enumerate(row):
if i == 0 :
if j == 0:
print "Source file waveform is being plotted...."
rate, data = read(orig_file)
plotWavAmplLev(orig_file, col)
continue
elif j == 1:
print "Recorded file waveform is being plotted...."
rate, data = read(rec_file)
plotWavAmplLev(rec_file, col)
continue
elif i == 1:
if j == 0:
print "Source file PSD is being plotted...."
rate, data = read(orig_file)
plotSpectralDensity(data, rate, col)
continue
elif j == 1:
print "Recorded file PSD is being plotted...."
rate, data = read(rec_file)
plotSpectralDensity(data, rate, col)
continue
elif i == 2:
if j == 0:
print "Source file Spectrogram is being plotted...."
rate, data = read(orig_file)
plotSpectrogram(rate, data, col)
continue
elif j == 1:
print "Recorded file Spectrogram is being plotted...."
rate, data = read(rec_file)
plotSpectrogram(rate, data, col)
continue
pl.tight_layout()
name = in_file_list[1].split("/")
lnth = len(name)
name = in_file_list[1].split("/")[lnth - 1].split(".")[0]
print "File=", name
if func_type == 'a':
save_file = output_folder + 'RESULT_' + name + '.png'
else:
save_file = output_folder + 'RESULT_graph.png'
pl.savefig(save_file)
pl.gcf()
pl.gca()
pl.close('all')
del in_file_list, output_folder, rate, data
gc.get_referrers()
gc.collect()
def result_plot(orig_file, rec_file, output_folder, seq):
flist = [orig_file, rec_file]
graph_plot(flist, output_folder, 'a')
s_file="/<path>/Output/"
#o_file='/<path>/short_orig.wav'
o_file='/<path>/orig.wav'
#r_file='/<path>/short_rec.wav'
r_file='/<path>/rec.wav'
print 10*"#"+"Start"+10*"#"
result_plot(o_file, r_file,s_file, 'a')
print 10*"#"+"End"+10*"#"
pl.close('all')
Now, I got the y-axis scales fixed and get the output as follows:
This makes comparison a lot easier now.

How to do softmax for pixelwise classification

My goal is to do grey scale image segmentation using pixelwise classification. So I have two labels 0 and 1. I made a network in pytorch which looks like the following.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.up = nn.Upsample(scale_factor=2, mode='nearest')
self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1)
self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.conv13 = nn.Conv2d(256, 2, kernel_size=3, padding=1)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.conv11(x))
x = F.relu(self.conv12(x))
x = F.relu(self.conv13(x))
x = F.softmax(x, 2)
return x
In the last layer I designed the conv13 in such that it produces 2 channels one for each class.
Since I was using the softmax I was expecting that summation of value of same index on 2 separate channel would equal to 1.
For example assume the output image is ( 2{channel}, 4, 4). So I was expecting that
image[ channel 1 ][0][0] + image[ channel 2 ][0][0] = 1
But the output I get is 0.0015 which is not even close to 1. How can i use the softmax to predict channelwise ?
To check this I used the following code
for batch, data in enumerate(trainloader, 0):
inputs , labels = data
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = rmse(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
predicted = outputs.data
predicted = predicted.to('cpu')
predicted_img = predicted.numpy()
predicted_img = np.reshape(predicted_img,(2, 4, 4))
print(predicted_img[0])
print(predicted_img[1])
Those prints showed this
[[**0.2762002** 0.13305853 0.2510342 0.23114938]
[0.26812425 0.28500515 0.05682982 0.15851443]
[0.1640967 0.5409352 0.43547812 0.44782472]
[0.29157883 0.0410011 0.2566578 0.16251141]]
[[**0.23052207** 0.868455 0.43436486 0.0684725 ]
[0.18001427 0.02341573 0.0727293 0.2525512 ]
[0.06587404 0.04974682 0.3773188 0.6559266 ]
[0.5235896 0.05838248 0.11558701 0.02304965]]
It is clear that the corresponding elements are not summing up to 1 like
0.2762002 (index 0, 0) + 0.23052207 (index 0, 0) != 1
How can I fix it ?
Please check last line of my code .. basically your dimension for softmax was wrong.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.up = nn.Upsample(scale_factor=2, mode='nearest')
self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1)
self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.conv13 = nn.Conv2d(256, 2, kernel_size=3, padding=1)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.conv11(x))
x = F.relu(self.conv12(x))
x = F.relu(self.conv13(x))
x = F.softmax(x, 1) #this line is changed
return x
net = Net()
inputs = torch.rand(1,1,4,4)
out = net (Variable(inputs))
print (out)
out.sum(dim=1)
Hope that helps.

Summing the indexes from a generated list of arrays

Hello I have list of arrays generated from a defined function below. I am wondering if there is way to sum up the same index in each array in the list giving me only 1 array?
import numpy as np
Tsp = np.linspace(3500, 40000, 3)
wcm = np.linspace(100, 10000, 5)
def blackbody(T, wcm):
k = 1.38*10**-16.0 #ergs/k
h = 6.625*10**-27.0 #erg/s
c = 3*10.0**10.0 #cm/s
bbtop = (2.0*h*c**2.0)
bbbot = (wcm**5.0)*(np.exp((h*c)/(wcm*k*T)) - 1)
bbs = bbtop/bbbot
return bbs
outflux = [blackbody(T_i, wcm) for T_i in Tsp]
Change the definition to:
def blackbody(T, wcm):
k = 1.38*10**-16.0 #ergs/k
h = 6.625*10**-27.0 #erg/s
c = 3*10.0**10.0 #cm/s
bbtop = (2.0*h*c**2.0)
T = np.atleast_1d(T) #So you can pass a single number if desired.
bbbot = (wcm**5.0)*(np.exp((h*c)/(wcm*k*T[:,None])) - 1) #Changed T to T[:,None]
bbs = bbtop/bbbot
return bbs
Now you can call it as:
blackbody(Tsp, wcm)
Double check that they are equal:
looped = np.array([blackbody(T_i, wcm) for T_i in Tsp])
broadcast = blackbody(Tsp, wcm)
print np.allclose(looped,broadcast)
True
Now that you have a single array you can sum on the axis you need using np.sum:
data = blackbody(Tsp, wcm)
data
[[ 2.89799404e-10 6.59157826e-16 4.45587348e-17 9.03800033e-18
2.89799993e-18]
[ 1.80089940e-09 4.09619532e-15 2.76900716e-16 5.61647169e-17
1.80089999e-17]
[ 3.31199940e-09 7.53323285e-15 5.09242710e-16 1.03291433e-16
3.31200005e-17]]
np.sum(data,axis=1)
[ 2.89800119e-10 1.80090385e-09 3.31200758e-09]
np.sum(data,axis=0)
[ 5.40269821e-09 1.22885860e-14 8.30702161e-16 1.68494151e-16
5.40270004e-17]
The data is aligned in both axes, but im not sure which you want from your question.