The following two codes do a simple bayesian inference in python using PyMC3. While the first code for exponential model compiles and run perfectly fine, the second one for a simple ode model, gives an error. I do not understand why one is working and the other is not. Please help.
Code #1
import numpy as np
import pymc3 as pm
def f(a,b,x,c):
return a * np.exp(b*x)+c
#Generating Data with error
a, b = 5, 0.2
xdata = np.linspace(0, 10, 21)
ydata = f(a, b, xdata,0.5)
yerror = 5 * np.random.rand(len(xdata))
ydata += np.random.normal(0.0, np.sqrt(yerror))
model = pm.Model()
with model:
alpha = pm.Uniform('alpha', lower=a/2, upper=2*a)
beta = pm.Uniform('beta', lower=b/2, upper=2*b)
mu = f(alpha, beta, xdata,0.5)
Y_obs = pm.Normal('Y_obs', mu=mu, sd=yerror, observed=ydata)
trace = pm.sample(100, tune = 50, nchains = 1)
Code #2
import numpy as np
import pymc3 as pm
def solver(I, a, T, dt):
"""Solve u'=-a*u, u(0)=I, for t in (0,T] with steps of dt."""
dt = float(dt) # avoid integer division
N = int(round(T/dt)) # no of time intervals
print N
T = N*dt # adjust T to fit time step dt
u = np.zeros(N+1) # array of u[n] values
t = np.linspace(0, T, N+1) # time mesh
u[0] = I # assign initial condition
for n in range(0, N): # n=0,1,...,N-1
u[n+1] = (1 - a*dt)*u[n]
return np.ravel(u)
# Generating data
ydata = solver(1,1.7,10,0.1)
yerror = 5 * np.random.rand(101)
ydata += np.random.normal(0.0, np.sqrt(yerror))
model = pm.Model()
with model:
alpha = pm.Uniform('alpha', lower = 1.0, upper = 2.5)
mu = solver(1,alpha,10,0.1)
Y_obs = pm.Normal('Y_obs', mu=mu, sd=yerror, observed=ydata)
trace = pm.sample(100, nchains=1)
The error is
Traceback (most recent call last):
File "1.py", line 27, in <module>
mu = solver(1,alpha,10,0.1)
File "1.py", line 16, in solver
u[n+1] = (1 - a*dt)*u[n]
ValueError: setting an array element with a sequence.
Please help.
The error is in this line:
mu = solver(1,alpha,10,0.1)
You are trying to pass alpha as a value, but alpha is a pymc3 distribution. The function solver only works when you provide a number in the second argument.
The code #1 works because this function
def f(a,b,x,c):
return a * np.exp(b*x)+c
returns a number.
Related
I am trying to use pymc3.DiscreteUniform as an index for a numpy 1D array
This worked with pymc (v2) but I am transitioning to pymc3 and code that worked under pymc don't work under pymc3.
import pymc3 as pm
d0 = pm.DiscreteUniform('d0', lower=0, upper=nDens - 1, testval = nDens//2)
pred = np.zeros(len(box.match), np.float64)
for iwvl, amatch in enumerate(box.match):
pred[iwvl] += amatch['intensitySum'][d0]
I get the following error message:
IndexError: only integers, slices (:), ellipsis (...), numpy.newaxis (None) and integer or boolean arrays are valid indices
I have found something that works but in involves going into theano and theano.tensor.
`
import pymc3 as pm
with pm.Model() as model:
em0 = pm.Normal('em0', mu=emLog, sigma=0.2)
d0 = pm.DiscreteUniform('d0', lower = 0, upper = nDens - 1, testval = Dindex)
boundNormal = pm.Bound(pm.Normal, lower=0.0)
wght = boundNormal('wght', mu=0.2, sigma=0.1)
pred = np.zeros((nDens, len(box.match)), np.float64)
for iwvl, amatch in enumerate(box.match):
pred[0:,iwvl] += amatch['intensitySum']
xpred = theano.shared(pred, name='p0')
idx = tensor.as_tensor_variable(d0)
predicted = xpred[idx]*10.**em0
nObs = len(box.match)
intensity = np.zeros(nObs, np.float64)
for iwvl in range(nObs):
intensity[iwvl] = box.match[iwvl]['obsIntensity']
sigma = 0.2
Y_obs = pm.Normal('Y_obs', mu=predicted, sigma=wght*intensity, observed=intensity)
trace = pm.sample(tune=20000, draws=100000, target_accept=0.85)`
and then you can work with the trace
it is even possible to make sigma as pm variable
I'm trying to implement a simple Bayesian Inference using a ODE model. I want to use the NUTS algorithm to sample but it gives me an initialization error. I do not know much about the PyMC3 as I'm new to this. Please take a look and tell me what is wrong.
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
import seaborn
import pymc3 as pm
import theano.tensor as T
from theano.compile.ops import as_op
#Actual Solution of the Differential Equation(Used to generate data)
def actual(a,b,x):
Y = np.exp(-b*x)*(a*np.exp(b*x)*(b*x-1)+a+b**2)/b**2
return Y
#Method For Solving the ODE
def lv(xdata, a=5.0, b=0.2):
def dy_dx(y, x):
return a*x - b*y
y0 = 1.0
Y, dict = odeint(dy_dx,y0,xdata,full_output=True)
return Y
#Generating Data for Bayesian Inference
a0, b0 = 5, 0.2
xdata = np.linspace(0, 21, 100)
ydata = actual(a0,b0,xdata)
# Adding some error to the ydata points
yerror = 10*np.random.rand(len(xdata))
ydata += np.random.normal(0.0, np.sqrt(yerror))
ydata = np.ravel(ydata)
#as_op(itypes=[T.dscalar, T.dscalar], otypes=[T.dvector])
def func(al,be):
Q = lv(xdata, a=al, b=be)
return np.ravel(Q)
# Number of Samples and Initial Conditions
nsample = 5000
y0 = 1.0
# Model for Bayesian Inference
model = pm.Model()
with model:
# Priors for unknown model parameters
alpha = pm.Uniform('alpha', lower=a0/2, upper=a0+a0/2)
beta = pm.Uniform('beta', lower=b0/2, upper=b0+b0/2)
# Expected value of outcome
mu = func(alpha,beta)
# Likelihood (sampling distribution) of observations
Y_obs = pm.Normal('Y_obs', mu=mu, sd=yerror, observed=ydata)
trace = pm.sample(nsample, nchains=1)
pm.traceplot(trace)
plt.show()
The error that I get is
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS failed. Falling back to elementwise auto-assignment.
Any help would be really appreciated
I'm trying to recreate results from Bayesian Data Analysis Third Edition.
Chapter 5 Section 3 concerns tumors in rats. a Hierarchical model is fit and the hyperprior used is not one of the densities included in pymc3.
The hyperprior is a*b*(a+b)^-2.5. Here is my attempt using pymc3.
import pymc3 as pm
with pm.Model() as model:
def ab_dist(x):
#Should be log density, from what I have read
a = x[0]
b = x[1]
return a+b-5/2*(a+b)
ab = pm.DensityDist('ab', ab_dist, shape = 2, testval=[2,2])
a = ab[0]
b = ab[1]
theta = pm.Beta('theta',alpha = a,beta = b)
Y= pm.Binomial('y', n = n, p = theta, observed = y)
At this stage, I am returned an error
ValueError: Input dimension mis-match. (input[0].shape[0] = 71, input[1].shape[0] = 20000)
What have I done wrong? Have I correctly implemented the density?
I have a fairly simple NN that has 1 hidden layer.
However, the weights don't seem to be updating. Or perhaps they are but the variable values don't change ?
Either way, my accuracy is 0.1 and it doesn't change no matter I change the learning rate or the activation function. Not sure what is wrong. Any ideas ?
I've posted the entire code correctly formatter so you guys can directly copy paste it and run it on your local machines.
from tensorflow.examples.tutorials.mnist import input_data
import math
import numpy as np
import tensorflow as tf
# one hot option returns binarized labels. mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
# model parameters
x = tf.placeholder(tf.float32, [784, None],name='x')
# weights
W1 = tf.Variable(tf.truncated_normal([25, 784],stddev= 1.0/math.sqrt(784)),name='W')
W2 = tf.Variable(tf.truncated_normal([25, 25],stddev=1.0/math.sqrt(25)),name='W')
W3 = tf.Variable(tf.truncated_normal([10, 25],stddev=1.0/math.sqrt(25)),name='W')
# bias units b1 = tf.Variable(tf.zeros([25,1]),name='b1')
b2 = tf.Variable(tf.zeros([25,1]),name='b2')
b3 = tf.Variable(tf.zeros([10,1]),name='b3')
# NN architecture
hidden1 = tf.nn.relu(tf.matmul(W1, x,name='hidden1')+b1, name='hidden1_out')
# hidden2 = tf.nn.sigmoid(tf.matmul(W2, hidden1, name='hidden2')+b2, name='hidden2_out')
y = tf.matmul(W3, hidden1,name='y') + b3
y_ = tf.placeholder(tf.float32, [10, None],name='y_')
# Create the model
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
train_step = tf.train.GradientDescentOptimizer(2).minimize(cross_entropy)
sess = tf.Session()
summary_writer = tf.train.SummaryWriter('log_simple_graph', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Train
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
summary =sess.run(train_step, feed_dict={x: np.transpose(batch_xs), y_: np.transpose(batch_ys)})
if summary is not None:
summary_writer.add_event(summary)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: np.transpose(mnist.test.images), y_: np.transpose(mnist.test.labels)}))
The reason why you are getting 0.1 accuracy consistently is mainly due to the order of dimensions of the input placeholder and the weights following it. Learning rate is another factor. If the learning rate is very high, the gradient would be oscillating and will not reach any minima.
Tensorflow takes the number of instances(batches) as the first index value of placeholder. So the code which declares input x
x = tf.placeholder(tf.float32, [784, None],name='x')
should be declared as
x = tf.placeholder(tf.float32, [None, 784],name='x')
Consequently, W1 should be declared as
W1 = tf.Variable(tf.truncated_normal([784, 25],stddev= 1.0/math.sqrt(784)),name='W')
and so on.. Even the bias variables should be declared in the transpose sense. (Thats how tensorflow takes it :) )
For example
b1 = tf.Variable(tf.zeros([25]),name='b1')
b2 = tf.Variable(tf.zeros([25]),name='b2')
b3 = tf.Variable(tf.zeros([10]),name='b3')
I'm putting the corrected full code below for your reference. I achieved an accuracy of 0.9262 with this :D
from tensorflow.examples.tutorials.mnist import input_data
import math
import numpy as np
import tensorflow as tf
# one hot option returns binarized labels.
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
# model parameters
x = tf.placeholder(tf.float32, [None, 784],name='x')
# weights
W1 = tf.Variable(tf.truncated_normal([784, 25],stddev= 1.0/math.sqrt(784)),name='W')
W2 = tf.Variable(tf.truncated_normal([25, 25],stddev=1.0/math.sqrt(25)),name='W')
W3 = tf.Variable(tf.truncated_normal([25, 10],stddev=1.0/math.sqrt(25)),name='W')
# bias units
b1 = tf.Variable(tf.zeros([25]),name='b1')
b2 = tf.Variable(tf.zeros([25]),name='b2')
b3 = tf.Variable(tf.zeros([10]),name='b3')
# NN architecture
hidden1 = tf.nn.relu(tf.matmul(x, W1,name='hidden1')+b1, name='hidden1_out')
# hidden2 = tf.nn.sigmoid(tf.matmul(W2, hidden1, name='hidden2')+b2, name='hidden2_out')
y = tf.matmul(hidden1, W3,name='y') + b3
y_ = tf.placeholder(tf.float32, [None, 10],name='y_')
# Create the model
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy)
sess = tf.Session()
summary_writer = tf.train.SummaryWriter('log_simple_graph', sess.graph)
init = tf.initialize_all_variables()
sess.run(init)
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
summary =sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
if summary is not None:
summary_writer.add_event(summary)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
I am trying to implement a logistic classifier using python. The goal is to train the algo to identify digits 0-9 using the mnist handwritten digits data set. However, fmin_cg seems to be changing the dimensions of my input arguments. I have tried reshaping the arguements inside cost() and gradient() with no luck; just more errors.
from scipy.io import loadmat
from numpy import shape, zeros, ones, dot, hstack, vstack, log, transpose, kron
from scipy.special import expit as sigmoid
import scipy.optimize
def cost(theta, X, y):
h = sigmoid( X.dot(theta) )
pos_class = y.T.dot( log(h) )
neg_class = (1.0-y).T.dot( log(1.0-h) )
cost = ((-1.0/m)*(pos_class+neg_class))
return cost
def gradient(theta, X, y):
h = sigmoid( X.dot(theta) )
grad = (1.0/m)*(X.T.dot((h-y)))
return grad
def one_vs_all(X, y, theta):
# add x1 feature,x1 = 1, to each example set
X = hstack( (ones((m,1)),X) )
# train the classifier for digit 9.0
temp_y = (y == 9.0)+0
result = scipy.optimize.fmin_cg( cost, fprime=gradient, x0=theta, \
args=(X, temp_y), maxiter=50, disp=False, full_output=True )
print result[1]
# Load data from Matlab file
data = loadmat('data.mat')
X,y = data['X'],data['y']
m,n = shape(X)
theta = zeros((n+1, 1))
one_vs_all(X, y, theta)
The error I receive:
Traceback (most recent call last):
File "/Users/jkarimi91/Documents/Digit Recognizer/Digit_Recognizer.py", line 36, in <module>
one_vs_all(X, y, theta)
File "/Users/jkarimi91/Documents/Digit Recognizer/Digit_Recognizer.py", line 26, in one_vs_all
args=(X, temp_y), maxiter=50, disp=False, full_output=True )
File "/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.py", line 1092, in fmin_cg
res = _minimize_cg(f, x0, args, fprime, callback=callback, **opts)
File "/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.py", line 1156, in _minimize_cg
deltak = numpy.dot(gfk, gfk)
ValueError: shapes (401,5000) and (401,5000) not aligned: 5000 (dim 1) != 401 (dim 0)
[Finished in 1.0s with exit code 1]
With the current code, the cost & gradient function are each returning a 2-D array. For fmin_cg to function properly, these functions must each return a 1-D array (as noted by the documentation).
I know this might be a little late but this is supposed to work
.In your gradient function I got several memory errors,So I changed the code a little and added regularization, check it out
def gradients (theta,X,y,Lambda):
m,n = shape(X)
theta = reshape(theta,(n,1))
h = sigmoid(X.dot(theta))
h = h-y
theta[0,0] = 0
grad = ((X.T.dot(h)) / m) + (Lambda / m * theta)
return grad.ravel()