assign on a tf.Variable tensor slice - python-2.7

I am trying to do the following
state[0,:] = state[0,:].assign( 0.9*prev_state + 0.1*( tf.matmul(inputs, weights) + biases ) )
for i in xrange(1,BATCH_SIZE):
state[i,:] = state[i,:].assign( 0.9*state[i-1,:] + 0.1*( tf.matmul(inputs, weights) + biases ) )
prev_state = prev_state.assign( state[BATCH_SIZE-1,:] )
with
state = tf.Variable(tf.zeros([BATCH_SIZE, HIDDEN_1]), name='inner_state')
prev_state = tf.Variable(tf.zeros([HIDDEN_1]), name='previous_inner_state')
As a follow-up for this question. I get an error that Tensor does not have an assign method.
What is the correct way to call the assign method on a slice of a Variable tensor?
Full current code:
import tensorflow as tf
import math
import numpy as np
INPUTS = 10
HIDDEN_1 = 20
BATCH_SIZE = 3
def create_graph(inputs, state, prev_state):
with tf.name_scope('h1'):
weights = tf.Variable(
tf.truncated_normal([INPUTS, HIDDEN_1],
stddev=1.0 / math.sqrt(float(INPUTS))),
name='weights')
biases = tf.Variable(tf.zeros([HIDDEN_1]), name='biases')
updated_state = tf.scatter_update(state, [0], 0.9 * prev_state + 0.1 * (tf.matmul(inputs[0,:], weights) + biases))
for i in xrange(1, BATCH_SIZE):
updated_state = tf.scatter_update(
updated_state, [i], 0.9 * updated_state[i-1, :] + 0.1 * (tf.matmul(inputs[i,:], weights) + biases))
prev_state = prev_state.assign(updated_state[BATCH_SIZE-1, :])
output = tf.nn.relu(updated_state)
return output
def data_iter():
while True:
idxs = np.random.rand(BATCH_SIZE, INPUTS)
yield idxs
with tf.Graph().as_default():
inputs = tf.placeholder(tf.float32, shape=(BATCH_SIZE, INPUTS))
state = tf.Variable(tf.zeros([BATCH_SIZE, HIDDEN_1]), name='inner_state')
prev_state = tf.Variable(tf.zeros([HIDDEN_1]), name='previous_inner_state')
output = create_graph(inputs, state, prev_state)
sess = tf.Session()
# Run the Op to initialize the variables.
init = tf.initialize_all_variables()
sess.run(init)
iter_ = data_iter()
for i in xrange(0, 2):
print ("iteration: ",i)
input_data = iter_.next()
out = sess.run(output, feed_dict={ inputs: input_data})

Tensorflow Variable objects have limited support for updating slices, using the tf.scatter_update(), tf.scatter_add(), and tf.scatter_sub() ops. Each of these ops allows you to specify a variable, a vector of slice indices (representing indices in the 0th dimension of the variable, which indicate the contiguous slices to be mutated) and a tensor of values (representing the new values to be applied to the variable, at the corresponding slice indices).
To update a single row of the variable, you can use tf.scatter_update(). For example, to update the 0th row of state, you would do:
updated_state = tf.scatter_update(
state, [0], 0.9 * prev_state + 0.1 * (tf.matmul(inputs, weights) + biases))
To chain multiple updates, you can use the mutable updated_state tensor that is returned from tf.scatter_update():
for i in xrange(1, BATCH_SIZE):
updated_state = tf.scatter_update(
updated_state, [i], 0.9 * updated_state[i-1, :] + ...)
prev_state = prev_state.assign(updated_state[BATCH_SIZE-1, :])
Finally, you can evaluate the resulting updated_state.op to apply all of the updates to state:
sess.run(updated_state.op) # or `sess.run(updated_state)` to fetch the result
PS. You might find it more efficient to use tf.scan() to compute the intermediate states, and just materialize prev_state in a variable.

Related

How to set parameters for lightgbm when using customized objective function for multi-class classification?

I want to test a customized objective function for lightgbm in multi-class classification.
I have specified the parameter "num_class=3".
However, an error: "
Number of classes must be 1 for non-multiclass training" is thrown
I am using python 3.6 and lightgbm version 0.2
# iris data
from sklearn import datasets
import lightgbm as lgb
import numpy as np
iris = datasets.load_iris()
X = iris['data']
y = iris['target']
# construct train-test
num_train = int(X.shape[0] / 3 * 2)
idx = np.random.permutation(X.shape[0])
x_train = X[idx[:num_train]]
x_test = X[idx[num_train:]]
y_train = y[idx[:num_train]]
y_test = y[idx[num_train:]]
# softmax function
def softmax(x):
'''
input x: an np.array of n_sample * n_class
return : an np.array of n_sample * n_class (probabilities)
'''
x = np.where(x>100, 100, x)
x = np.exp(x)
return x / np.reshape(np.sum(x, 1), [x.shape[0], 1])
# objective function
def objective(y_true, y_pred):
'''
input:
y_true: np.array of size (n_sample,)
y_pred: np.array of size (n_sample, n_class)
'''
y_pred = softmax(y_pred)
temp = np.zeros_like(y_pred)
temp[range(y_pred.shape[0]), y_true] = 1
gradient = y_pred - temp
hessian = y_pred * (1 - y_pred)
return [gradient, hessian]
# lightgbm model
model = lgb.LGBMClassifier(n_estimators=10000,
num_classes = 3,
objective = objective,
nthread=4)
model.fit(x_train, y_train,
eval_metric = 'multi_logloss',
eval_set = [(x_test, y_test), (x_train, y_train)],
eval_names = ['valid', 'train'],
early_stopping_rounds = 200, verbose = 100)
Let me answer my own question.
The arguments in the objective function should be:
y_true of size [n_sample, ]
y_pred of size [n_sample * n_class, ] instead of [n_sample, n_class]
To be more specific, y_pred should be like
y_pred = [first_class, first_class,..., second_class, second_class,..., third_class, third_class,...]
Moreover, gradient and hessian should be grouped in the same way.
def objective(y_true, y_pred):
'''
input:
y_true: np.array of size [n_sample,]
y_pred: np.array of size [n_sample * n_class, ]
return:
gradient and hessian should have exactly the same form of y_pred
'''
y_pred = np.reshape(y_pred, [num_train, 3], order = 'F')
y_pred = softmax(y_pred)
temp = np.zeros_like(y_pred)
temp[range(y_pred.shape[0]), y_true] = 1
gradient = y_pred - temp
hessian = y_pred * (1 - y_pred)
return [gradient.ravel(order = 'F'), hessian.ravel(order = 'F')]

keras custom activation to drop under certain conditions

I am trying to drop the values less than 1 and greater than -1 in my custom activation like below.
def ScoreActivationFromSigmoid(x, target_min=1, target_max=9) :
condition = K.tf.logical_and(K.tf.less(x, 1), K.tf.greater(x, -1))
case_true = K.tf.reshape(K.tf.zeros([x.shape[1] * x.shape[2]], tf.float32), shape=(K.tf.shape(x)[0], x.shape[1], x.shape[2]))
case_false = x
changed_x = K.tf.where(condition, case_true, case_false)
activated_x = K.sigmoid(changed_x)
score = activated_x * (target_max - target_min) + target_min
return score
the data type has 3 dimensions: batch_size x sequence_length x number of features.
But I got this error
nvalidArgumentError: Inputs to operation activation_51/Select of type Select must have the same size and shape. Input 0: [1028,300,64] != input 1: [1,300,64]
[[{{node activation_51/Select}} = Select[T=DT_FLOAT, _class=["loc:#training_88/Adam/gradients/activation_51/Select_grad/Select_1"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](activation_51/LogicalAnd, activation_51/Reshape, dense_243/add)]]
[[{{node metrics_92/acc/Mean_1/_9371}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_473_metrics_92/acc/Mean_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
I understand what the problem is; custom activation function cannot find the proper batch size of inputs. But I don't know how to control them.
Can anyone fix this or suggest other methods to replace some of the element values in some conditions?
The error message I got when running your code is:
ValueError: Cannot reshape a tensor with 19200 elements to shape
[1028,300,64] (19737600 elements) for 'Reshape_8' (op: 'Reshape') with
input shapes: [19200], [3] and with input tensors computed as partial
shapes: input[1] = [1028,300,64].
And the problem should be that you cannot reshape a tensor of shape [x.shape[1] * x.shape[2]] to (K.tf.shape(x)[0], x.shape[1], x.shape[2]). This is because their element counts are different.
So the solution is just creating a zero array in right shape.
This line:
case_true = K.tf.reshape(K.tf.zeros([x.shape[1] * x.shape[2]], tf.float32), shape=(K.tf.shape(x)[0], x.shape[1], x.shape[2]))
should be replace with:
case_true = K.tf.reshape(K.tf.zeros([x.shape[0] * x.shape[1] * x.shape[2]], K.tf.float32), shape=(K.tf.shape(x)[0], x.shape[1], x.shape[2]))
or using K.tf.zeros_like:
case_true = K.tf.zeros_like(x)
Workable code:
import keras.backend as K
import numpy as np
def ScoreActivationFromSigmoid(x, target_min=1, target_max=9) :
condition = K.tf.logical_and(K.tf.less(x, 1), K.tf.greater(x, -1))
case_true = K.tf.zeros_like(x)
case_false = x
changed_x = K.tf.where(condition, case_true, case_false)
activated_x = K.tf.sigmoid(changed_x)
score = activated_x * (target_max - target_min) + target_min
return score
with K.tf.Session() as sess:
x = K.tf.placeholder(K.tf.float32, shape=(1028, 300, 64), name='x')
score = sess.run(ScoreActivationFromSigmoid(x), feed_dict={'x:0':np.random.randn(1028, 300, 64)})
print(score)

AttributeError: GaussianMixture instance has no attribute 'loglike'

I'm trying to implement Gaussian Mixture Model with Expectation–Maximization algorithm and I get this error.
This is the Gaussian Mixture model that I used:
class GaussianMixture:
"Model mixture of two univariate Gaussians and their EM estimation"
def __init__(self, data, mu_min=min(data), mu_max=max(data), sigma_min=.1, sigma_max=1, mix=.5):
self.data = data
#init with multiple gaussians
self.one = Gaussian(uniform(mu_min, mu_max),
uniform(sigma_min, sigma_max))
self.two = Gaussian(uniform(mu_min, mu_max),
uniform(sigma_min, sigma_max))
#as well as how much to mix them
self.mix = mix
def Estep(self):
"Perform an E(stimation)-step, freshening up self.loglike in the process"
# compute weights
self.loglike = 0. # = log(p = 1)
for datum in self.data:
# unnormalized weights
wp1 = self.one.pdf(datum) * self.mix
wp2 = self.two.pdf(datum) * (1. - self.mix)
# compute denominator
den = wp1 + wp2
# normalize
wp1 /= den
wp2 /= den
# add into loglike
self.loglike += log(wp1 + wp2)
# yield weight tuple
yield (wp1, wp2)
def Mstep(self, weights):
"Perform an M(aximization)-step"
# compute denominators
(left, rigt) = zip(*weights)
one_den = sum(left)
two_den = sum(rigt)
# compute new means
self.one.mu = sum(w * d / one_den for (w, d) in zip(left, data))
self.two.mu = sum(w * d / two_den for (w, d) in zip(rigt, data))
# compute new sigmas
self.one.sigma = sqrt(sum(w * ((d - self.one.mu) ** 2)
for (w, d) in zip(left, data)) / one_den)
self.two.sigma = sqrt(sum(w * ((d - self.two.mu) ** 2)
for (w, d) in zip(rigt, data)) / two_den)
# compute new mix
self.mix = one_den / len(data)
def iterate(self, N=1, verbose=False):
"Perform N iterations, then compute log-likelihood"
def pdf(self, x):
return (self.mix)*self.one.pdf(x) + (1-self.mix)*self.two.pdf(x)
def __repr__(self):
return 'GaussianMixture({0}, {1}, mix={2.03})'.format(self.one,
self.two,
self.mix)
def __str__(self):
return 'Mixture: {0}, {1}, mix={2:.03})'.format(self.one,
self.two,
self.mix)
And then , while training I get that error in the conditional statement.
# Check out the fitting process
n_iterations = 5
best_mix = None
best_loglike = float('-inf')
mix = GaussianMixture(data)
for _ in range(n_iterations):
try:
#train!
mix.iterate(verbose=True)
if mix.loglike > best_loglike:
best_loglike = mix.loglike
best_mix = mix
except (ZeroDivisionError, ValueError, RuntimeWarning): # Catch division errors from bad starts, and just throw them out...
pass
Any ideas why I have the following error?
AttributeError: GaussianMixture instance has no attribute 'loglike'
The loglike attribute is only created when you call the Estep method.
def Estep(self):
"Perform an E(stimation)-step, freshening up self.loglike in the process"
# compute weights
self.loglike = 0. # = log(p = 1)
You didn't call Estep between creating the GaussianMixture instance and mix.loglike:
mix = GaussianMixture(data)
for _ in range(n_iterations):
try:
#train!
mix.iterate(verbose=True)
if mix.loglike > best_loglike:
And the iterate method is empty (It looks like you forgot some code here).
def iterate(self, N=1, verbose=False):
"Perform N iterations, then compute log-likelihood"
So, there'll be no loglike attribute set on the mix instance by the time you do if mix.loglike. Hence, the AttributeError.
You need to do one of the following:
Call the Estep method (since you set self.loglike there)
Define a loglike attribute in __init__

2 layer NN weights not updating

I have a fairly simple NN that has 1 hidden layer.
However, the weights don't seem to be updating. Or perhaps they are but the variable values don't change ?
Either way, my accuracy is 0.1 and it doesn't change no matter I change the learning rate or the activation function. Not sure what is wrong. Any ideas ?
I've posted the entire code correctly formatter so you guys can directly copy paste it and run it on your local machines.
from tensorflow.examples.tutorials.mnist import input_data
import math
import numpy as np
import tensorflow as tf
# one hot option returns binarized labels. mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
# model parameters
x = tf.placeholder(tf.float32, [784, None],name='x')
# weights
W1 = tf.Variable(tf.truncated_normal([25, 784],stddev= 1.0/math.sqrt(784)),name='W')
W2 = tf.Variable(tf.truncated_normal([25, 25],stddev=1.0/math.sqrt(25)),name='W')
W3 = tf.Variable(tf.truncated_normal([10, 25],stddev=1.0/math.sqrt(25)),name='W')
# bias units b1 = tf.Variable(tf.zeros([25,1]),name='b1')
b2 = tf.Variable(tf.zeros([25,1]),name='b2')
b3 = tf.Variable(tf.zeros([10,1]),name='b3')
# NN architecture
hidden1 = tf.nn.relu(tf.matmul(W1, x,name='hidden1')+b1, name='hidden1_out')
# hidden2 = tf.nn.sigmoid(tf.matmul(W2, hidden1, name='hidden2')+b2, name='hidden2_out')
y = tf.matmul(W3, hidden1,name='y') + b3
y_ = tf.placeholder(tf.float32, [10, None],name='y_')
# Create the model
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
train_step = tf.train.GradientDescentOptimizer(2).minimize(cross_entropy)
sess = tf.Session()
summary_writer = tf.train.SummaryWriter('log_simple_graph', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Train
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
summary =sess.run(train_step, feed_dict={x: np.transpose(batch_xs), y_: np.transpose(batch_ys)})
if summary is not None:
summary_writer.add_event(summary)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: np.transpose(mnist.test.images), y_: np.transpose(mnist.test.labels)}))
The reason why you are getting 0.1 accuracy consistently is mainly due to the order of dimensions of the input placeholder and the weights following it. Learning rate is another factor. If the learning rate is very high, the gradient would be oscillating and will not reach any minima.
Tensorflow takes the number of instances(batches) as the first index value of placeholder. So the code which declares input x
x = tf.placeholder(tf.float32, [784, None],name='x')
should be declared as
x = tf.placeholder(tf.float32, [None, 784],name='x')
Consequently, W1 should be declared as
W1 = tf.Variable(tf.truncated_normal([784, 25],stddev= 1.0/math.sqrt(784)),name='W')
and so on.. Even the bias variables should be declared in the transpose sense. (Thats how tensorflow takes it :) )
For example
b1 = tf.Variable(tf.zeros([25]),name='b1')
b2 = tf.Variable(tf.zeros([25]),name='b2')
b3 = tf.Variable(tf.zeros([10]),name='b3')
I'm putting the corrected full code below for your reference. I achieved an accuracy of 0.9262 with this :D
from tensorflow.examples.tutorials.mnist import input_data
import math
import numpy as np
import tensorflow as tf
# one hot option returns binarized labels.
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
# model parameters
x = tf.placeholder(tf.float32, [None, 784],name='x')
# weights
W1 = tf.Variable(tf.truncated_normal([784, 25],stddev= 1.0/math.sqrt(784)),name='W')
W2 = tf.Variable(tf.truncated_normal([25, 25],stddev=1.0/math.sqrt(25)),name='W')
W3 = tf.Variable(tf.truncated_normal([25, 10],stddev=1.0/math.sqrt(25)),name='W')
# bias units
b1 = tf.Variable(tf.zeros([25]),name='b1')
b2 = tf.Variable(tf.zeros([25]),name='b2')
b3 = tf.Variable(tf.zeros([10]),name='b3')
# NN architecture
hidden1 = tf.nn.relu(tf.matmul(x, W1,name='hidden1')+b1, name='hidden1_out')
# hidden2 = tf.nn.sigmoid(tf.matmul(W2, hidden1, name='hidden2')+b2, name='hidden2_out')
y = tf.matmul(hidden1, W3,name='y') + b3
y_ = tf.placeholder(tf.float32, [None, 10],name='y_')
# Create the model
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy)
sess = tf.Session()
summary_writer = tf.train.SummaryWriter('log_simple_graph', sess.graph)
init = tf.initialize_all_variables()
sess.run(init)
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
summary =sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
if summary is not None:
summary_writer.add_event(summary)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

generate N random numbers from a skew normal distribution using numpy

I need a function in python to return N random numbers from a skew normal distribution. The skew needs to be taken as a parameter.
e.g. my current use is
x = numpy.random.randn(1000)
and the ideal function would be e.g.
x = randn_skew(1000, skew=0.7)
Solution needs to conform with: python version 2.7, numpy v.1.9
A similar answer is here: skew normal distribution in scipy However this generates a PDF not the random numbers.
I start by generating the PDF curves for reference:
NUM_SAMPLES = 100000
SKEW_PARAMS = [-3, 0]
def skew_norm_pdf(x,e=0,w=1,a=0):
# adapated from:
# http://stackoverflow.com/questions/5884768/skew-normal-distribution-in-scipy
t = (x-e) / w
return 2.0 * w * stats.norm.pdf(t) * stats.norm.cdf(a*t)
# generate the skew normal PDF for reference:
location = 0.0
scale = 1.0
x = np.linspace(-5,5,100)
plt.subplots(figsize=(12,4))
for alpha_skew in SKEW_PARAMS:
p = skew_norm_pdf(x,location,scale,alpha_skew)
# n.b. note that alpha is a parameter that controls skew, but the 'skewness'
# as measured will be different. see the wikipedia page:
# https://en.wikipedia.org/wiki/Skew_normal_distribution
plt.plot(x,p)
Next I found a VB implementation of sampling random numbers from the skew normal distribution and converted it to python:
# literal adaption from:
# http://stackoverflow.com/questions/4643285/how-to-generate-random-numbers-that-follow-skew-normal-distribution-in-matlab
# original at:
# http://www.ozgrid.com/forum/showthread.php?t=108175
def rand_skew_norm(fAlpha, fLocation, fScale):
sigma = fAlpha / np.sqrt(1.0 + fAlpha**2)
afRN = np.random.randn(2)
u0 = afRN[0]
v = afRN[1]
u1 = sigma*u0 + np.sqrt(1.0 -sigma**2) * v
if u0 >= 0:
return u1*fScale + fLocation
return (-u1)*fScale + fLocation
def randn_skew(N, skew=0.0):
return [rand_skew_norm(skew, 0, 1) for x in range(N)]
# lets check they at least visually match the PDF:
plt.subplots(figsize=(12,4))
for alpha_skew in SKEW_PARAMS:
p = randn_skew(NUM_SAMPLES, alpha_skew)
sns.distplot(p)
And then wrote a quick version which (without extensive testing) appears to be correct:
def randn_skew_fast(N, alpha=0.0, loc=0.0, scale=1.0):
sigma = alpha / np.sqrt(1.0 + alpha**2)
u0 = np.random.randn(N)
v = np.random.randn(N)
u1 = (sigma*u0 + np.sqrt(1.0 - sigma**2)*v) * scale
u1[u0 < 0] *= -1
u1 = u1 + loc
return u1
# lets check again
plt.subplots(figsize=(12,4))
for alpha_skew in SKEW_PARAMS:
p = randn_skew_fast(NUM_SAMPLES, alpha_skew)
sns.distplot(p)
from scipy.stats import skewnorm
a=10
data= skewnorm.rvs(a, size=1000)
Here, a is a parameter which you can refer to:
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.skewnorm.html
Adapted from rsnorm function from fGarch R package
def random_snorm(n, mean = 0, sd = 1, xi = 1.5):
def random_snorm_aux(n, xi):
weight = xi/(xi + 1/xi)
z = numpy.random.uniform(-weight,1-weight,n)
xi_ = xi**numpy.sign(z)
random = -numpy.absolute(numpy.random.normal(0,1,n))/xi_ * numpy.sign(z)
m1 = 2/numpy.sqrt(2 * numpy.pi)
mu = m1 * (xi - 1/xi)
sigma = numpy.sqrt((1 - m1**2) * (xi**2 + 1/xi**2) + 2 * m1**2 - 1)
return (random - mu)/sigma
return random_snorm_aux(n, xi) * sd + mean