I have the folowing custom loss:
def Loss(y_true,y_pred):
y_pred = relu(y_pred)
z = k.maximum(y_true, y_pred)
y_pred_negativo = Lambda(lambda x: -x)(y_pred)
w = k.abs(add([y_true, y_pred_negativo]))
if k.sum(z) == 0.0:
erro = 0.0
elif k.sum(y_true) == 0.0 and k.sum(z) != 0:
erro = 100
else:
erro = (k.sum(w)/k.sum(z))*100.0
return erro
However, as you can see, I'm mixing numpy with tensor conditional. Therefore, I have to write this conditional in a tensor format.
if k.sum(z) == 0.0:
erro = 0.0
elif k.sum(y_true) == 0.0 and k.sum(z) != 0:
erro = 100
else:
erro = (k.sum(w)/k.sum(z))*100.0
I know how to do it for if else format, but not for this much of the conditions. Thanks!
Here comes my own definition of conditional statement in terms of keras.
def energia_perdida_tensorial(y_true,y_pred):
y_pred = relu(y_pred)
z = k.maximum(y_true, y_pred)
y_pred_negativo = Lambda(lambda x: -x)(y_pred)
w = k.abs(add([y_true, y_pred_negativo]))
erro = k.switch(k.equal(k.sum(z), 0.0), lambda: 0.0, lambda: (k.sum(w)/k.sum(z))*100.0)
erro = k.switch(k.all([k.equal(k.sum(y_true), 0), k.greater(k.sum(z), 0)], axis=0), lambda: 100.0, lambda: erro)
return erro
If is there anything wrong or a more elegant way of defining it, please make your contribuition.
Related
I am trying to write my own custom loss function that is based on the false positive and negative rates. I made a dummy code so you can check the first 2 defenitions as well. I added the rest, so you can see how it is implemented. However, still somewhere the gradient turns out to be zero. What is now the step where the gradient turns zero, or how can I check this? Please I would like to know how I can fix this :).
I tried providing you with more information so you can play around as well, but if you miss anything please do let me know!
The gradient stays True during every step. However, still during the training of the model the loss is not updated, therefore the NN does not train.
y = Variable(torch.tensor((0, 0, 0, 1, 1,1), dtype=torch.float), requires_grad = True)
y_pred = Variable(torch.tensor((0.333, 0.2, 0.01, 0.99, 0.49, 0.51), dtype=torch.float), requires_grad = True)
x = Variable(torch.tensor((0, 0, 0, 1, 1,1), dtype=torch.float), requires_grad = True)
x_pred = Variable(torch.tensor((0.55, 0.25, 0.01, 0.99, 0.65, 0.51), dtype=torch.float), requires_grad = True)
def binary_y_pred(y_pred):
y_pred.register_hook(lambda grad: print(grad))
y_pred = y_pred+torch.tensor(0.5, requires_grad=True, dtype=torch.float)
y_pred = y_pred.pow(5) # this is my way working around using torch.where()
y_pred = y_pred.pow(10)
y_pred = y_pred.pow(15)
m = nn.Sigmoid()
y_pred = m(y_pred)
y_pred = y_pred-torch.tensor(0.5, requires_grad=True, dtype=torch.float)
y_pred = y_pred*2
y_pred.register_hook(lambda grad: print(grad))
return y_pred
def confusion_matrix(y_pred, y):
TP = torch.sum(y*y_pred)
TN = torch.sum((1-y)*(1-y_pred))
FP = torch.sum((1-y)*y_pred)
FN = torch.sum(y*(1-y_pred))
k_eps = torch.tensor(1e-12, requires_grad=True, dtype=torch.float)
FN_rate = FN/(TP + FN + k_eps)
FP_rate = FP/(TN + FP + k_eps)
return FN_rate, FP_rate
def dif_rate(FN_rate_y, FN_rate_x):
dif = (FN_rate_y - FN_rate_x).pow(2)
return dif
def custom_loss_function(y_pred, y, x_pred, x):
y_pred = binary_y_pred(y_pred)
FN_rate_y, FP_rate_y = confusion_matrix(y_pred, y)
x_pred= binary_y_pred(x_pred)
FN_rate_x, FP_rate_x = confusion_matrix(x_pred, x)
FN_dif = dif_rate(FN_rate_y, FN_rate_x)
FP_dif = dif_rate(FP_rate_y, FP_rate_x)
cost = FN_dif+FP_dif
return cost
# I added the rest so you can see how it is implemented, but this peace does not fully run well! If you want this part to run as well, I can add more code.
class FeedforwardNeuralNetModel(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super(FeedforwardNeuralNetModel, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(hidden_dim, output_dim)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.fc1(x)
out = self.relu1(out)
out = self.fc2(out)
out = self.sigmoid(out)
return out
model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, betas=[0.9, 0.99], amsgrad=True)
criterion = torch.nn.BCELoss(weight=None, size_average=None, reduce=None, reduction='mean')
for epoch in range(num_epochs):
train_err = 0
for i, (samples, truths) in enumerate(train_loader):
samples = Variable(samples)
truths = Variable(truths)
optimizer.zero_grad() # Reset gradients
outputs = model(samples) # Do the forward pass
loss2 = criterion(outputs, truths) # Calculate loss
samples_y = Variable(samples_y)
samples_x = Variable(samples_x)
y_pred = model(samples_y)
y = Variable(y, requires_grad=True)
x_pred = model(samples_x)
x= Variable(x, requires_grad=True)
cost = custom_loss_function(y_pred, y, x_pred, x)
loss = loss2*0+cost #checking only if cost works.
loss.backward()
optimizer.step()
train_err += loss.item()
train_loss.append(train_err)
I expect the model to update during training. There is no error message.
With your definitions:TP+FN=y and TN+FP=1-y. Then you'll get FN_rate=1-y_pred and FP_rate=y_pred. Your cost is then FN_rate+FP_rate=1, the gradient of which is 0.
You can check this by hand or using a library for symbolic mathematics (e.g., SymPy):
from sympy import symbols
y, y_pred = symbols("y y_pred")
TP = y * y_pred
TN = (1-y)*(1-y_pred)
FP = (1-y)*y_pred
FN = y*(1-y_pred)
# let's ignore the eps for now
FN_rate = FN/(TP + FN)
FP_rate = FP/(TN + FP)
cost = FN_rate + FP_rate
from sympy import simplify
print(simplify(cost))
# output: 1
I use odeint function to solve a coupled differential equations system and plot one of the variables (theta_i) after the system is solved. My variable (theta_i) comes from the equation:
theta_i = np.arctan2(g1,g2)
where g1 ang g2 are variables calculated in the same function. The results have to be between -pi and pi and they are supposed to look like this (plot from matlab simulation):
However, when I try to plot theta_i after odeint has finished I get this(plot from my python code):
which is really weird. When I print the values of theta_i right after its calcumation (still inside the function) they look correct (between -0.2 and 0.5), so it has to be something with the result's storing and my implementation of odeint. All the other variables that come from the odeint solution are correct. I searched similar posts but nobody had the same problem with me. What might be the problem here? I am new to python and I use python 2.7.12. Thank you in advance.
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
added_mass_x = 0.03 # kg
added_mass_y = 0.04
mb = 0.3 # kg
m1 = mb-added_mass_x
m2 = mb-added_mass_y
l1 = 0.07 # m
l2 = 0.05 # m
J = 0.00050797 # kgm^2
Sa = 0.0110 # m^2
Cd = 2.44
Cl = 3.41
Kd = 0.000655 # kgm^2
r = 1000 # kg/m^3
c1 = 0.5*r*Sa*Cd
c2 = 0.5*r*Sa*Cl
c3 = 0.5*mb*(l1**2)
c4 = Kd/J
c5 = (1/(2*J))*(l1**2)*mb*l2
c6 = (1/(3*J))*(l1**3)*mb
theta_0 = 10*(np.pi/180) # rad
theta_A = 20*(np.pi/180) # rad
f = 2 # Hz
t = np.linspace(0,100,8000) # s
def direct(u,t):
vcx = u[0]
vcy = u[1]
wz = u[2]
psi = u[3]
x = u[4]
y = u[5]
vcx_i = u[6]
vcy_i = u[7]
psi_i = u[8]
wz_i = u[9]
theta_i = u[10]
theta_deg_i = u[11]
# Subsystem 1
omega = 2*np.pi*f # rad/s
theta = theta_0 + theta_A*np.sin(omega*t) # rad
theta_deg = (theta*180)/np.pi # deg
thetadotdot = -(omega**2)*theta_A*np.sin(omega*t) # rad/s^2
# Subsystem 2
vcxdot = (m2/m1)*vcy*wz-(c1/m1)*vcx*np.sqrt((vcx**2)+(vcy**2))+(c2/m1)*vcy*np.sqrt((vcx**2)+(vcy**2))*np.arctan2(vcy,vcx)-(c3/m1)*thetadotdot*np.sin(theta)
vcydot = -(m1/m2)*vcx*wz-(c1/m2)*vcy*np.sqrt((vcx**2)+(vcy**2))-(c2/m2)*vcx*np.sqrt((vcx**2)+(vcy**2))*np.arctan2(vcy,vcx)+(c3/m2)*thetadotdot*np.cos(theta)
wzdot = ((m1-m2)/J)*vcx*vcy-c4*wz*wz*np.sign(wz)-c5*thetadotdot*np.cos(theta)-c6*thetadotdot
psidot = wz
# Subsystem 3
xdotdot = vcxdot*np.cos(psi)-vcx*np.sin(psi)*wz+vcydot*np.sin(psi)+vcy*np.cos(psi)*wz # m/s^2
ydotdot = -vcxdot*np.sin(psi)-vcx*np.cos(psi)*wz+vcydot*np.cos(psi)-vcy*np.sin(psi)*wz # m/s^2
xdot = vcx*np.cos(psi)+vcy*np.sin(psi) # m/s
ydot = -vcx*np.sin(psi)+vcy*np.cos(psi) # m/s
# Subsystem 4
vcx_i = xdot*np.cos(psi_i)-ydot*np.sin(psi_i)
vcy_i = ydot*np.cos(psi_i)+xdot*np.sin(psi_i)
psidot_i = wz_i
vcxdot_i = xdotdot*np.cos(psi_i)-xdot*np.sin(psi_i)*psidot_i-ydotdot*np.sin(psi_i)-ydot*np.cos(psi_i)*psidot_i
vcydot_i = ydotdot*np.cos(psi_i)-ydot*np.sin(psi_i)*psidot_i+xdotdot*np.sin(psi_i)+xdot*np.cos(psi_i)*psidot_i
g1 = -(m1/c3)*vcxdot_i+(m2/c3)*vcy_i*wz_i-(c1/c3)*vcx_i*np.sqrt((vcx_i**2)+(vcy_i**2))+(c2/c3)*vcy_i*np.sqrt((vcx_i**2)+(vcy_i**2))*np.arctan2(vcy_i,vcx_i)
g2 = (m2/c3)*vcydot_i+(m1/c3)*vcx_i*wz_i+(c1/c3)*vcy_i*np.sqrt((vcx_i**2)+(vcy_i**2))+(c2/c3)*vcx_i*np.sqrt((vcx_i**2)+(vcy_i**2))*np.arctan2(vcy_i,vcx_i)
A = 12*np.sin(2*np.pi*f*t+np.pi) # eksiswsi tail_frequency apo simulink
if A>=0.1:
wzdot_i = ((m1-m2)/J)*vcx_i*vcy_i-c4*wz_i**2*np.sign(wz_i)-c5*g2-c6*np.sqrt((g1**2)+(g2**2))
elif A<-0.1:
wzdot_i = ((m1-m2)/J)*vcx_i*vcy_i-c4*wz_i**2*np.sign(wz_i)-c5*g2+c6*np.sqrt((g1**2)+(g2**2))
else:
wzdot_i = ((m1-m2)/J)*vcx_i*vcy_i-c4*wz_i**2*np.sign(wz)-c5*g2
if g2>0:
theta_i = np.arctan2(g1,g2)
elif g2<0 and g1>=0:
theta_i = np.arctan2(g1,g2)-np.pi
elif g2<0 and g1<0:
theta_i = np.arctan2(g1,g2)+np.pi
elif g2==0 and g1>0:
theta_i = -np.pi/2
elif g2==0 and g1<0:
theta_i = np.pi/2
elif g1==0 and g2==0:
theta_i = 0
theta_deg_i = (theta_i*180)/np.pi
#print theta_deg_i
return [vcxdot, vcydot, wzdot, psidot, xdot, ydot, vcxdot_i, vcydot_i, psidot_i, wzdot_i, theta_i, theta_deg_i]
# arxikes synthikes
vcx_0 = 0.1257
vcy_0 = 0
wz_0 = 0
psi_0 = 0
x_0 = 0
y_0 = 0
vcx_i_0 = 0.1257
vcy_i_0 = 0
psi_i_0 = 0
wz_i_0 = 0
theta_i_0 = 0.1745
theta_deg_i_0 = 9.866
u0 = [vcx_0, vcy_0, wz_0, psi_0, x_0, y_0, vcx_i_0, vcy_i_0, psi_i_0, wz_i_0, theta_i_0, theta_deg_i_0]
u = odeint(direct, u0, t, tfirst=False)
vcx = u[:,0]
vcy = u[:,1]
wz = u[:,2]
psi = u[:,3]
x = u[:,4]
y = u[:,5]
vcx_i = u[:,6]
vcy_i = u[:,7]
psi_i = u[:,8]
wz_i = u[:,9]
theta_i = u[:,10]
theta_deg_i = u[:,11]
print theta_i
plt.figure(17)
plt.plot(t,theta_i,'r-',linewidth=1,label='theta_i')
plt.xlabel('t [s]')
plt.title('theta_i [rad] (Main body CF)')
plt.legend()
plt.show()
The problem as you stated is that theta_i is not part of the gradient. When you formulate your direct, it should be of the form:
def direct(vector, t):
return vector_dot
The quickest and dirtiest solution (without cleaning the code) is to use the function you already defined:
theta_i = [direct(u_i, t_i)[10] for t_i, u_i in zip(t, u)]
I used a a shorter interval: t = np.linspace(0,10,8000). It yielded this:
EDIT: How to remove your theta from the integrator:
def direct(u, t):
# your original function as it is
def direct2(u,t):
return direct(u,t)[:9]
#now integrate the second function
u = odeint(direct2, u0, t)
I'm new to TF and ML.
Details about data: Features(x) - (70 x 70 x 70) tensor for each sample, y - a float for each sample.
TFRecords created with the following code:
def convert_to_tf_records():
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _float64_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
tfrecords_filename = 'A-100-h2-h2o.tfrecords'
writer = tf.python_io.TFRecordWriter(tfrecords_filename)
# Get data from db for now.
db = connect('results-60-70.db')
data = db.select(selection='Ti')
i = 0
for row in data:
desc = np.array(json.loads(row.descriptor), dtype=np.float32)
print(desc.shape)
be = float(row.binding_energy) * 23 # Convert to Kcal/mol ?
desc = desc.flatten()
desc = desc.tostring()
example = tf.train.Example(features=tf.train.Features(feature={'voxel_grid': _bytes_feature(desc), 'binding_energy': _float64_feature(be)}))
writer.write(example.SerializeToString())
i += 1
if i >= 10:
break
Input function:
def my_input_function(fname, perform_shuffle=False, repeat_count=None):
def _parse_elements(example):
features = tf.parse_single_example(example, features={'voxel_grid': tf.FixedLenFeature([], tf.string), 'binding_energy': tf.FixedLenFeature([], tf.float32)})
vg = tf.decode_raw(features['voxel_grid'], tf.float32)
vg = tf.reshape(vg, [70, 70, 70])
vg = tf.convert_to_tensor(vg, dtype=tf.float32)
vg = {'voxel_grid': vg}
e = tf.cast(features['binding_energy'], tf.float32)
return vg, e
def input_function():
dataset = tf.data.TFRecordDataset(fname).map(_parse_elements)
dataset = dataset.repeat(repeat_count)
dataset = dataset.batch(5)
dataset = dataset.prefetch(1)
if perform_shuffle:
dataset.shuffle(20)
iterator = dataset.make_one_shot_iterator()
batch_features, batch_labels = iterator.get_next()
return batch_features, batch_labels
return input_function
Model function:
def my_model_function(features, labels, mode):
if mode == tf.estimator.ModeKeys.PREDICT:
tf.logging.info("my_model_fn: PREDICT, {}".format(mode))
elif mode == tf.estimator.ModeKeys.EVAL:
tf.logging.info("my_model_fn: EVAL, {}".format(mode))
elif mode == tf.estimator.ModeKeys.TRAIN:
tf.logging.info("my_model_fn: TRAIN, {}".format(mode))
feature_columns = [tf.feature_column.numeric_column('voxel_grid', shape=(70, 70, 70), dtype=tf.float32)]
# Create the layer of input
input_layer = tf.feature_column.input_layer(features, feature_columns)
input_layer = tf.reshape(input_layer, [-1, 70, 70, 70, 1])
# Convolution layers
conv1 = tf.layers.conv3d(inputs=input_layer, strides=(2, 2, 2), filters=32, kernel_size=(7, 7, 7))
conv2 = tf.layers.conv3d(inputs=conv1, strides=(2, 2, 2), filters=32, kernel_size=(7, 7, 7))
pool3 = tf.layers.max_pooling3d(inputs=conv2, pool_size=[2, 2, 2], strides=2)
flat = tf.layers.flatten(pool3)
dense1 = tf.layers.dense(inputs=flat, units=10, activation=tf.nn.relu)
dense2 = tf.layers.dense(inputs=dense1, units=10, activation=tf.nn.relu)
output = tf.layers.dense(inputs=dense2, units=1)
predictions = {'binding_energy': output}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate loss
loss = tf.losses.mean_squared_error(labels=labels, predictions=predictions)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# Add evaluation metrics
eval_metric_ops = {"mse": tf.metrics.mean_squared_error(labels=labels, predictions=predictions['binding_energy'])}
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
When calling model.train using
model = tf.estimator.Estimator(model_fn=my_model_function, model_dir='./model_dir')
model.train(input_fn=my_input_function('A-100-h2-h2o.tfrecords'), steps=100)
I get the following error.
TypeError: Failed to convert object of type to Tensor.
Found it!
changing
# Calculate loss
loss = tf.losses.mean_squared_error(labels=labels, predictions=predictions)
to
# Calculate loss
loss = tf.losses.mean_squared_error(labels=labels, predictions=predictions['binding_energy'])
solves the issue.
When Running the below code, I receive an AssertionError in the Main Function, assert len(args) > 1. Any idea where in the code the issue occurs?
K-Means clustering implementation
import numpy as np
from math import sqrt
import csv
import sys
====
Define a function that computes the distance between two data points
GAP = 2
MIN_VAL = 1000000
def get_distance(point1, point2):
dis = sqrt(pow(point1[0] - point2[0],2) + pow(point1[1] - point2[1],2))
return dis
====
Define a function that reads data in from the csv
def csvreader(data_file):
sampleData = []
global Countries
with open(data_file, 'r') as csvfile:
read_data = csv.reader(csvfile, delimiter=' ', quotechar='|')
for row in read_data:
print ', '.join(row)
if read_data <> None:
for f in read_data:
values = f.split(",")
if values[0] <> 'Countries':
sampleData.append([values[1],values[2]])
return sampleData
====
Write the initialisation procedure
def cluster_dis(centroid, cluster):
dis = 0.0
for point in cluster:
dis += get_distance(centroid, point)
return dis
def update_centroids(centroids, cluster_id, cluster):
x, y = 0.0, 0.0
length = len(cluster)
if length == 0:
return
for item in cluster:
x += item[0]
y += item[1]
centroids[cluster_id] = (x / length, y / length)
====
Implement the k-means algorithm, using appropriate looping
def kmeans(data, k):
assert k <= len(data)
seed_ids = np.random.randint(0, len(data), k)
centroids = [data[idx] for idx in seed_ids]
clusters = [[] for _ in xrange(k)]
cluster_idx = [-1] * len(data)
pre_dis = 0
while True:
for point_id, point in enumerate(data):
min_distance, tmp_id = MIN_VAL, -1
for seed_id, seed in enumerate(centroids):
distance = get_distance(seed, point)
if distance < min_distance:
min_distance = distance
tmp_id = seed_id
if cluster_idx[point_id] != -1:
dex = clusters[cluster_idx[point_id]].index(point)
del clusters[cluster_idx[point_id]][dex]
clusters[tmp_id].append(point)
cluster_idx[point_id] = tmp_id
now_dis = 0.0
for cluster_id, cluster in enumerate(clusters):
now_dis += cluster_dis(centroids[cluster_id], cluster)
update_centroids(centroids, cluster_id, cluster)
delta_dis = now_dis - pre_dis
pre_dis = now_dis
if delta_dis < GAP:
break
print(centroids)
print(clusters)
return centroids, clusters
def main():
args = sys.argv[1:]
assert len(args) > 1
data_file, k = args[0], int(args[1])
data = csvreader(data_file)
kmeans(data, k)
if __name__ == '__main__':
main()
I've been trying to implement an homomorphic filter in frequency domain on both MATLAB and Python using OpenCV2 and NumPy, the MATLAB code gives the expected answer but the Python does not, the resulting image is very weird. I've tested all variables and came to the conclusion the only point there is a difference is the IFFT. On MATLAB, the results can be applied normally to the exp function and return the filtered original image expected, but the values of Python ifft are very different. I happened to see other posts with similar problems, but no satisfactory answer (perhaps i'm just bad at searching too...).
The MATLAB code
function [ img_r ] = homomorphic( img, D0, n )
[N, M] = size(img);
img_bk = double(img);
img_bk = log(img_bk+1);
img_freq = fftshift(fft2(img_bk));
magA = uint8(10*log(1+abs(img_freq)));
cu = M/2;
cv = N/2;
Hf = zeros(N,M);
for v = 1:N
dv = v - cv;
for u = 1:M
du = u - cu;
D = sqrt(du*du + dv*dv);
num = 1;
if D > 0
den = 1+((D0/D)^(2*n));
else
den = 0; %to replace +inf
end
if den ~= 0
H = num/den;
else
H = 0;
end
img_freq(v,u) = H*img_freq(v,u);
end
end
magB = uint8(10*log(1+abs(img_freq)));
img_r = (ifft2(ifftshift(img_freq)));
img_r = exp(img_r);
img_r = uint8(img_r);
and the Python code (might have some bugs but overall works)
import numpy as np
import cv2
def homomorphic(img, D0, n=2):
[N,M] = img.shape
img_bk = np.log(1 + np.float64(img))
img_freq = np.fft.fftshift(np.fft.fft2(img_bk))
cu = M/2.0
cv = N/2.0
for v in range(N):
dv = v - cv
for u in range(M):
du = u - cu
D = np.sqrt(du*du + dv*dv)
if D != 0:
a = 1.0 + (D0/D)**(2*n)
H = 1/a
else:
print D
H = 0
img_freq[v][u] = H*img_freq[v][u]
img_r = np.abs(np.fft.ifft2(np.fft.ifftshift(img_freq)))
eimg = np.exp(img_r)
eimg = np.uint8(eimg)
return eimg
I really don't get it, why the results are so different? Does anyone have any idea?