I am new to tensor and trying to understand it. I managed to create one layer model. But I would like now to add 2 more. How can I make my train function working? I would like to train it with hundreds of values X and Y. I implemented all values what I need: Weight and Bias of each layer, but I dont understand how can I use them in my train function. And when it will be trained, how can I use it. Like I do in the last part of a code.
import numpy as np
print("TensorFlow version: {}".format(tf.__version__))
print("Eager execution: {}".format(tf.executing_eagerly()))
x = np.array([
[10, 10, 30, 20],
])
y = np.array([[10, 1, 1, 1]])
class Model(object):
def __init__(self, x, y):
# get random values.
self.W = tf.Variable(tf.random.normal((len(x), len(x[0]))))
self.b = tf.Variable(tf.random.normal((len(y),)))
self.W1 = tf.Variable(tf.random.normal((len(x), len(x[0]))))
self.b1 = tf.Variable(tf.random.normal((len(y),)))
self.W2 = tf.Variable(tf.random.normal((len(x), len(x[0]))))
self.b2 = tf.Variable(tf.random.normal((len(y),)))
def __call__(self, x):
out1 = tf.multiply(x, self.W) + self.b
out2 = tf.multiply(out1, self.W1) + self.b1
last_layer = tf.multiply(out2, self.W2) + self.b2
# Input_Leyer = self.W * x + self.b
return last_layer
def loss(predicted_y, desired_y):
return tf.reduce_sum(tf.square(predicted_y - desired_y))
optimizer = tf.optimizers.Adam(0.1)
def train(model, inputs, outputs):
with tf.GradientTape() as t:
current_loss = loss(model(inputs), outputs)
grads = t.gradient(current_loss, [model.W, model.b])
optimizer.apply_gradients(zip(grads, [model.W, model.b]))
print(current_loss)
model = Model(x, y)
for i in range(10000):
train(model, x, y)
for i in range(3):
InputX = np.array([
[input(), input(), input(), input()],
])
returning = tf.math.multiply(
InputX, model.W, name=None
)
print("I think that output can be:", returning)
Just add new variables to the list:
grads = t.gradient(current_loss, [model.W, model.b, model.W1, model.b1, model.W2, model.b2])
optimizer.apply_gradients(zip(grads, [model.W, model.b, model.W1, model.b1, model.W2, model.b2]))
Related
I wanna get all integer solutions in a limited time, is it possible?
This is a linear, integer constraint satisfaction problem, which can be solved efficiently by OR Tools' CP-SAT. I've modified their example to solve your problem in Python:
from ortools.sat.python import cp_model
class VarArraySolutionPrinter(cp_model.CpSolverSolutionCallback):
"""Print intermediate solutions."""
def __init__(self, variables):
cp_model.CpSolverSolutionCallback.__init__(self)
self.__variables = variables
self.__solution_count = 0
def on_solution_callback(self):
self.__solution_count += 1
for v in self.__variables:
print('%s=%i' % (v, self.Value(v)), end=' ')
print()
def solution_count(self):
return self.__solution_count
def SearchForAllSolutionsSampleSat():
"""Showcases calling the solver to search for all solutions."""
# Creates the model.
model = cp_model.CpModel()
p = [1, 2, 3, 4]
ceq = 30
cgeq = 2
N = len(p)
# Creates the variables
x = [model.NewIntVar(0, 100, f'x{i}') for i in range(N)]
# Create the constraints.
model.Add(sum([xi*pi for xi, pi in zip(x, p)]) == ceq)
model.Add(sum(x) >= cgeq)
# Create a solver and solve.
solver = cp_model.CpSolver()
solution_printer = VarArraySolutionPrinter(x)
status = solver.SearchForAllSolutions(model, solution_printer)
print('Status = %s' % solver.StatusName(status))
print('Number of solutions found: %i' % solution_printer.solution_count())
SearchForAllSolutionsSampleSat()
I am trying to write my own custom loss function that is based on the false positive and negative rates. I made a dummy code so you can check the first 2 defenitions as well. I added the rest, so you can see how it is implemented. However, still somewhere the gradient turns out to be zero. What is now the step where the gradient turns zero, or how can I check this? Please I would like to know how I can fix this :).
I tried providing you with more information so you can play around as well, but if you miss anything please do let me know!
The gradient stays True during every step. However, still during the training of the model the loss is not updated, therefore the NN does not train.
y = Variable(torch.tensor((0, 0, 0, 1, 1,1), dtype=torch.float), requires_grad = True)
y_pred = Variable(torch.tensor((0.333, 0.2, 0.01, 0.99, 0.49, 0.51), dtype=torch.float), requires_grad = True)
x = Variable(torch.tensor((0, 0, 0, 1, 1,1), dtype=torch.float), requires_grad = True)
x_pred = Variable(torch.tensor((0.55, 0.25, 0.01, 0.99, 0.65, 0.51), dtype=torch.float), requires_grad = True)
def binary_y_pred(y_pred):
y_pred.register_hook(lambda grad: print(grad))
y_pred = y_pred+torch.tensor(0.5, requires_grad=True, dtype=torch.float)
y_pred = y_pred.pow(5) # this is my way working around using torch.where()
y_pred = y_pred.pow(10)
y_pred = y_pred.pow(15)
m = nn.Sigmoid()
y_pred = m(y_pred)
y_pred = y_pred-torch.tensor(0.5, requires_grad=True, dtype=torch.float)
y_pred = y_pred*2
y_pred.register_hook(lambda grad: print(grad))
return y_pred
def confusion_matrix(y_pred, y):
TP = torch.sum(y*y_pred)
TN = torch.sum((1-y)*(1-y_pred))
FP = torch.sum((1-y)*y_pred)
FN = torch.sum(y*(1-y_pred))
k_eps = torch.tensor(1e-12, requires_grad=True, dtype=torch.float)
FN_rate = FN/(TP + FN + k_eps)
FP_rate = FP/(TN + FP + k_eps)
return FN_rate, FP_rate
def dif_rate(FN_rate_y, FN_rate_x):
dif = (FN_rate_y - FN_rate_x).pow(2)
return dif
def custom_loss_function(y_pred, y, x_pred, x):
y_pred = binary_y_pred(y_pred)
FN_rate_y, FP_rate_y = confusion_matrix(y_pred, y)
x_pred= binary_y_pred(x_pred)
FN_rate_x, FP_rate_x = confusion_matrix(x_pred, x)
FN_dif = dif_rate(FN_rate_y, FN_rate_x)
FP_dif = dif_rate(FP_rate_y, FP_rate_x)
cost = FN_dif+FP_dif
return cost
# I added the rest so you can see how it is implemented, but this peace does not fully run well! If you want this part to run as well, I can add more code.
class FeedforwardNeuralNetModel(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super(FeedforwardNeuralNetModel, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(hidden_dim, output_dim)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.fc1(x)
out = self.relu1(out)
out = self.fc2(out)
out = self.sigmoid(out)
return out
model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, betas=[0.9, 0.99], amsgrad=True)
criterion = torch.nn.BCELoss(weight=None, size_average=None, reduce=None, reduction='mean')
for epoch in range(num_epochs):
train_err = 0
for i, (samples, truths) in enumerate(train_loader):
samples = Variable(samples)
truths = Variable(truths)
optimizer.zero_grad() # Reset gradients
outputs = model(samples) # Do the forward pass
loss2 = criterion(outputs, truths) # Calculate loss
samples_y = Variable(samples_y)
samples_x = Variable(samples_x)
y_pred = model(samples_y)
y = Variable(y, requires_grad=True)
x_pred = model(samples_x)
x= Variable(x, requires_grad=True)
cost = custom_loss_function(y_pred, y, x_pred, x)
loss = loss2*0+cost #checking only if cost works.
loss.backward()
optimizer.step()
train_err += loss.item()
train_loss.append(train_err)
I expect the model to update during training. There is no error message.
With your definitions:TP+FN=y and TN+FP=1-y. Then you'll get FN_rate=1-y_pred and FP_rate=y_pred. Your cost is then FN_rate+FP_rate=1, the gradient of which is 0.
You can check this by hand or using a library for symbolic mathematics (e.g., SymPy):
from sympy import symbols
y, y_pred = symbols("y y_pred")
TP = y * y_pred
TN = (1-y)*(1-y_pred)
FP = (1-y)*y_pred
FN = y*(1-y_pred)
# let's ignore the eps for now
FN_rate = FN/(TP + FN)
FP_rate = FP/(TN + FP)
cost = FN_rate + FP_rate
from sympy import simplify
print(simplify(cost))
# output: 1
I want to test a customized objective function for lightgbm in multi-class classification.
I have specified the parameter "num_class=3".
However, an error: "
Number of classes must be 1 for non-multiclass training" is thrown
I am using python 3.6 and lightgbm version 0.2
# iris data
from sklearn import datasets
import lightgbm as lgb
import numpy as np
iris = datasets.load_iris()
X = iris['data']
y = iris['target']
# construct train-test
num_train = int(X.shape[0] / 3 * 2)
idx = np.random.permutation(X.shape[0])
x_train = X[idx[:num_train]]
x_test = X[idx[num_train:]]
y_train = y[idx[:num_train]]
y_test = y[idx[num_train:]]
# softmax function
def softmax(x):
'''
input x: an np.array of n_sample * n_class
return : an np.array of n_sample * n_class (probabilities)
'''
x = np.where(x>100, 100, x)
x = np.exp(x)
return x / np.reshape(np.sum(x, 1), [x.shape[0], 1])
# objective function
def objective(y_true, y_pred):
'''
input:
y_true: np.array of size (n_sample,)
y_pred: np.array of size (n_sample, n_class)
'''
y_pred = softmax(y_pred)
temp = np.zeros_like(y_pred)
temp[range(y_pred.shape[0]), y_true] = 1
gradient = y_pred - temp
hessian = y_pred * (1 - y_pred)
return [gradient, hessian]
# lightgbm model
model = lgb.LGBMClassifier(n_estimators=10000,
num_classes = 3,
objective = objective,
nthread=4)
model.fit(x_train, y_train,
eval_metric = 'multi_logloss',
eval_set = [(x_test, y_test), (x_train, y_train)],
eval_names = ['valid', 'train'],
early_stopping_rounds = 200, verbose = 100)
Let me answer my own question.
The arguments in the objective function should be:
y_true of size [n_sample, ]
y_pred of size [n_sample * n_class, ] instead of [n_sample, n_class]
To be more specific, y_pred should be like
y_pred = [first_class, first_class,..., second_class, second_class,..., third_class, third_class,...]
Moreover, gradient and hessian should be grouped in the same way.
def objective(y_true, y_pred):
'''
input:
y_true: np.array of size [n_sample,]
y_pred: np.array of size [n_sample * n_class, ]
return:
gradient and hessian should have exactly the same form of y_pred
'''
y_pred = np.reshape(y_pred, [num_train, 3], order = 'F')
y_pred = softmax(y_pred)
temp = np.zeros_like(y_pred)
temp[range(y_pred.shape[0]), y_true] = 1
gradient = y_pred - temp
hessian = y_pred * (1 - y_pred)
return [gradient.ravel(order = 'F'), hessian.ravel(order = 'F')]
You I have been trying to run this script but I keep getting an indentation error at the end
of the backprop(x,y) function. I would really appreciate ANY help!!
import cPickle
import gzip
def load_data():
f = gzip.open('mnist.pkl.gz', 'rb')
training_data, validation_data, test_data = cPickle.load(f)
f.close()
return (training_data, validation_data, test_data)
import numpy as np
class Network(object):
def __init__(self, layers):
self.layers = layers
self.biases = [np.random.randn(y,1) for y
in layers[1:]]
self.weights = [np.transpose(np.random.randn(x,y))
for x,y
in zip(layers[:-1],layers[1:])]
self.num_layers = len(layers)
def backprop(self, x, y):
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
# feedforward
activation = x
activations = [x] # list to store all the activations, layer by layer
zs = [] # list to store all the z vectors, layer by layer
for b, w in zip(self.biases, self.weights):
z = np.dot(w, activation)+b
zs.append(z)
activation = sigmoid(z)
activations.append(activation)
# backward pass
delta = self.cost_derivative(activations[-1], y) * \
sigmoid_prime(zs[-1])#set first delta
nabla_b[-1] = delta#set last dC/db to delta vector
nabla_w[-1] = np.dot(delta, activations[-2].transpose())
#calculate nabla_b, nabla_w for the rest of the layers
for l in xrange(2, self.num_layers):
z = zs[-l]
sp = sigmoid_prime(z)
delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
#this is where python says there is an indent error!
return (nabla_b, nabla_w)
The problem was fixed by selecting the "edit" drop-down menu of Notepad++, choosing Blank Operations, and finally, clicking 'TAB to spaces'; obviously, this should be done after selecting the portion of code that's triggering the error.
I'm trying to implement Gaussian Mixture Model with Expectation–Maximization algorithm and I get this error.
This is the Gaussian Mixture model that I used:
class GaussianMixture:
"Model mixture of two univariate Gaussians and their EM estimation"
def __init__(self, data, mu_min=min(data), mu_max=max(data), sigma_min=.1, sigma_max=1, mix=.5):
self.data = data
#init with multiple gaussians
self.one = Gaussian(uniform(mu_min, mu_max),
uniform(sigma_min, sigma_max))
self.two = Gaussian(uniform(mu_min, mu_max),
uniform(sigma_min, sigma_max))
#as well as how much to mix them
self.mix = mix
def Estep(self):
"Perform an E(stimation)-step, freshening up self.loglike in the process"
# compute weights
self.loglike = 0. # = log(p = 1)
for datum in self.data:
# unnormalized weights
wp1 = self.one.pdf(datum) * self.mix
wp2 = self.two.pdf(datum) * (1. - self.mix)
# compute denominator
den = wp1 + wp2
# normalize
wp1 /= den
wp2 /= den
# add into loglike
self.loglike += log(wp1 + wp2)
# yield weight tuple
yield (wp1, wp2)
def Mstep(self, weights):
"Perform an M(aximization)-step"
# compute denominators
(left, rigt) = zip(*weights)
one_den = sum(left)
two_den = sum(rigt)
# compute new means
self.one.mu = sum(w * d / one_den for (w, d) in zip(left, data))
self.two.mu = sum(w * d / two_den for (w, d) in zip(rigt, data))
# compute new sigmas
self.one.sigma = sqrt(sum(w * ((d - self.one.mu) ** 2)
for (w, d) in zip(left, data)) / one_den)
self.two.sigma = sqrt(sum(w * ((d - self.two.mu) ** 2)
for (w, d) in zip(rigt, data)) / two_den)
# compute new mix
self.mix = one_den / len(data)
def iterate(self, N=1, verbose=False):
"Perform N iterations, then compute log-likelihood"
def pdf(self, x):
return (self.mix)*self.one.pdf(x) + (1-self.mix)*self.two.pdf(x)
def __repr__(self):
return 'GaussianMixture({0}, {1}, mix={2.03})'.format(self.one,
self.two,
self.mix)
def __str__(self):
return 'Mixture: {0}, {1}, mix={2:.03})'.format(self.one,
self.two,
self.mix)
And then , while training I get that error in the conditional statement.
# Check out the fitting process
n_iterations = 5
best_mix = None
best_loglike = float('-inf')
mix = GaussianMixture(data)
for _ in range(n_iterations):
try:
#train!
mix.iterate(verbose=True)
if mix.loglike > best_loglike:
best_loglike = mix.loglike
best_mix = mix
except (ZeroDivisionError, ValueError, RuntimeWarning): # Catch division errors from bad starts, and just throw them out...
pass
Any ideas why I have the following error?
AttributeError: GaussianMixture instance has no attribute 'loglike'
The loglike attribute is only created when you call the Estep method.
def Estep(self):
"Perform an E(stimation)-step, freshening up self.loglike in the process"
# compute weights
self.loglike = 0. # = log(p = 1)
You didn't call Estep between creating the GaussianMixture instance and mix.loglike:
mix = GaussianMixture(data)
for _ in range(n_iterations):
try:
#train!
mix.iterate(verbose=True)
if mix.loglike > best_loglike:
And the iterate method is empty (It looks like you forgot some code here).
def iterate(self, N=1, verbose=False):
"Perform N iterations, then compute log-likelihood"
So, there'll be no loglike attribute set on the mix instance by the time you do if mix.loglike. Hence, the AttributeError.
You need to do one of the following:
Call the Estep method (since you set self.loglike there)
Define a loglike attribute in __init__