How to perform the multiplication rule for selecting two items without replacement? - sympy

I'm experimenting with sympy to reproduce an example where a box has three marbles:
Red
White
Blue
Two marbles will be drawn at random without replacement.
Q: What is the chance of drawing the Red marble and then the White marble?
I have been able to calculate this using the multiplication rule by hard-coding P() instances wrapping the initial distribution before the first marble is selected and then the distribution before the second marble is selected:
from sympy.stats import DiscreteUniform, density, P
from sympy import symbols, Eq
# Coloured marbles
R, W, B = symbols('R W B')
# Select first marble without replacement
PFirstSelection = P(Eq(DiscreteUniform('FirstSeletion', (R, W, B)), R))
# Select second marble - Red is not longer available because it was selected without replacement
PSecondSelection = P(Eq(DiscreteUniform('SecondSelection', (W, B) ), W))
print(PFirstSelection)
# 1/3
print(PSecondSelection)
# 1/2
# Multiplication rule
print(PFirstSelection * PSecondSelection)
# 1/6
Is there a better way that I can achieve this with sympy?

In this case you'd better to use combination functions.
DiscreteUniform seems not for changing elements after creation.
from sympy.functions.combinatorial.numbers import nC, nP
print(1 / nP(3, 2)) # 1/6
If you don't care about order,
print(nP(2, 2) / nP(3, 2)) # 1/3
Edited. (and also modified for python3)
For N of M things, you can simply do like below
from sympy.functions.combinatorial.numbers import nC, nP
def pickProb(candidates, picks, ordered=False):
picks_num = len(picks)
numerator = nP(picks_num, picks_num) if ordered else 1
denominator = nP(len(candidates), picks_num)
return numerator / denominator
print(pickProb('RWB', 'RW')) # 1/6
print(pickProb('RWBrwba', 'Ra')) # 1/42
print(pickProb('RWBrwba', 'RWa')) # 1/210
print(pickProb('RWBrwba', 'RWa', ordered=True)) # 1/35
And combination functions can also handle duplicates, like 'R', 'R', 'W', 'B'.
from operator import mul
from sympy.functions.combinatorial.numbers import nC, nP
def pickProb(candidates, picks):
picks_num = len(picks)
c_counts = {}
for c in candidates:
c_counts[c] = c_counts[c] + 1 if c in c_counts else 1
p_counts = {}
for p in picks:
p_counts[p] = p_counts[p] + 1 if p in p_counts else 1
combinations = reduce(mul, [nP(c_counts[x], p_counts[x]) for x in p_counts.keys()], 1)
denominator = nP(len(candidates), picks_num) / combinations
return 1 / denominator
print(pickProb('RWBra', 'RWa')) # 1/60
print(pickProb('RRRWa', 'RWa')) # 1/20
print(pickProb('RRRWa', 'RRa')) # 1/10
But DiscreteUniform cannot, because this case is not "uniform".
from sympy.stats import DiscreteUniform, density, P, Hypergeometric
from sympy import Symbol, Eq
deck = DiscreteUniform('M', 'RRWB')
print(density(deck).dict) # {W: 1/4, R: 1/4, B: 1/4}
print(P(Eq(deck, Symbol('R')))) # 1/4

I think you're using correctly sympy, but you can improve your way to use python (eg., more generic, more functional, more generic, no hardcoding).
For instance:
from sympy.stats import DiscreteUniform, density, P
from sympy import symbols, Eq
from itertools import accumulate
def ToSet(value):
return set(value.split(' '))
def ProbaOfPick(pickSet, fromSet, operationTag):
return P(Eq(DiscreteUniform(operationTag, symbols(fromSet)), symbols(pickSet)))
def PickWithoutReplacement(allset, picklist, probaFunc):
currentSet = allset
probaSeq = []
operationSeq = []
for pick in picklist:
operationTag = "picking: " + pick
newP = probaFunc(pick, currentSet, operationTag)
operationSeq.append(operationTag + " from " + str(currentSet))
probaSeq.append(newP)
currentSet -= set(pick)
return (operationSeq, probaSeq)
allset = ToSet('R W B Y Ma G1 G2')
picks = 'R', 'W', 'G2'
operationSeq, probaSeq = PickWithoutReplacement(allset, picks, ProbaOfPick)
probas = list(accumulate(probaSeq, lambda a, b: a*b))
for op in operationSeq:
print(op)
print(probas)
Also your can change uniform distribution to anything non-uniform.
EDIT: dependency injection (ProbaOfPick -> probaFunc) added.
This code is only a starter.
Result:
picking: R from {'G2', 'Ma', 'Y', 'B', 'R', 'G1', 'W'}
picking: W from {'G2', 'Ma', 'Y', 'B', 'G1', 'W'}
picking: G2 from {'G2', 'Ma', 'Y', 'B', 'G1'}
[1/7, 1/42, 1/210]
Next steps: allow to pick more than 1 each step, allow non uniform probability distribution, etc

Related

How could I generate random coefficients for polynomials using Sum( f(x), (x,0,b) )?

from sympy import Sum, Eq
from sympy.abc import n,x
import random
def polynomial(x):
i = 0
def random_value(i):
return random.choice([i for i in range(-10,10) if i not in [0]])
eq = Sum(random_value(i)*x**n, (n,0,random_value(i)))
display(Eq(eq,eq.doit(), evaluate=False))
polynomial(x)
polynomial(x)
With this code, the coefficients are always the same.
Also, I am not sure if the algebra evaluations are correct for b < 0 .
One way is to use IndexedBase to generate symbolic-placeholder coefficients, and then substitute them with numerical coefficients.
from sympy import Sum, Eq, Matrix, IndexedBase
from sympy.abc import n, x
import random
def polynomial(x):
# n will go from zero to this positive value
to = random.randint(0, 10)
# generate random coefficients
# It is important for them to be a sympy Matrix or Tuple,
# otherwise the substitution (later step) won't work
coeff = Matrix([random.randint(-10, 10) for i in range(to + 1)])
c = IndexedBase("c")
eq = Sum(c[n]*x**n, (n, 0, to)).doit()
eq = eq.subs(c, coeff)
return eq
display(polynomial(x))
display(polynomial(x))
Another ways is to avoid using Sum, relying instead on list-comprehension syntax and builtin sum:
def polynomial(x):
to = random.randint(0, 10)
coeff = [random.randint(-10, 10) for i in range(to + 1)]
return sum([c * x**n for c, n in zip(coeff, range(to + 1))])
display(polynomial(x))
display(polynomial(x))
You can pass a list of coefficients (with highest order coefficient first and constant last) directly to Poly and then convert that to an expression:
>>> from sympy import Poly
>>> from sympy.abc import x
>>> Poly([1,2,3,4], x)
Poly(x**3 + 2*x**2 + 3*x + 4, x, domain='ZZ')
>>> _.as_expr()
x**3 + 2*x**2 + 3*x + 4
>>> from random import randint, choice
>>> Poly([choice((-1,1))*randint(1,10) for i in range(randint(0, 10))], x).as_expr()
-3*x**4 + 3*x**3 - x**2 - 6*x + 2

TypeError: only length-1 arrays can be converted to Python scalars, python2.7

I searched about this issue, I got more questions "the same error" but different code and different reason. So, I was hesitant more to put my issue here. After reading the majority of answers, I didn't find a solution for my issue.
The original and full code here
chapter6.py:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
from datasets import gtsrb
from classifiers import MultiClassSVM
def main():
strategies = ['one-vs-one', 'one-vs-all']
features = [None, 'gray', 'rgb', 'hsv', 'surf', 'hog']
accuracy = np.zeros((2, len(features)))
precision = np.zeros((2, len(features)))
recall = np.zeros((2, len(features)))
for f in xrange(len(features)):
print "feature", features[f]
(X_train, y_train), (X_test, y_test) = gtsrb.load_data(
"datasets/gtsrb_training",
feature=features[f],
test_split=0.2,
seed=42)
# convert to numpy
X_train = np.squeeze(np.array(X_train)).astype(np.float32)
y_train = np.array(y_train)
X_test = np.squeeze(np.array(X_test)).astype(np.float32)
y_test = np.array(y_test)
# find all class labels
labels = np.unique(np.hstack((y_train, y_test)))
for s in xrange(len(strategies)):
print " - strategy", strategies[s]
# set up SVMs
MCS = MultiClassSVM(len(labels), strategies[s])
# training phase
print " - train"
MCS.fit(X_train, y_train)
# test phase
print " - test"
acc, prec, rec = MCS.evaluate(X_test, y_test)
accuracy[s, f] = acc
precision[s, f] = np.mean(prec)
recall[s, f] = np.mean(rec)
print " - accuracy: ", acc
print " - mean precision: ", np.mean(prec)
print " - mean recall: ", np.mean(rec)
# plot results as stacked bar plot
f, ax = plt.subplots(2)
for s in xrange(len(strategies)):
x = np.arange(len(features))
ax[s].bar(x - 0.2, accuracy[s, :], width=0.2, color='b',
hatch='/', align='center')
ax[s].bar(x, precision[s, :], width=0.2, color='r', hatch='\\',
align='center')
ax[s].bar(x + 0.2, recall[s, :], width=0.2, color='g', hatch='x',
align='center')
ax[s].axis([-0.5, len(features) + 0.5, 0, 1.5])
ax[s].legend(('Accuracy', 'Precision', 'Recall'), loc=2, ncol=3,
mode='expand')
ax[s].set_xticks(np.arange(len(features)))
ax[s].set_xticklabels(features)
ax[s].set_title(strategies[s])
plt.show()
if __name__ == '__main__':
main()
classifiers.py
import cv2
import numpy as np
from abc import ABCMeta, abstractmethod
from matplotlib import pyplot as plt
__author__ = "Michael Beyeler"
__license__ = "GNU GPL 3.0 or later"
class Classifier:
"""
Abstract base class for all classifiers
A classifier needs to implement at least two methods:
- fit: A method to train the classifier by fitting the model to
the data.
- evaluate: A method to test the classifier by predicting labels of
some test data based on the trained model.
A classifier also needs to specify a classification strategy via
setting self.mode to either "one-vs-all" or "one-vs-one".
The one-vs-all strategy involves training a single classifier per
class, with the samples of that class as positive samples and all
other samples as negatives.
The one-vs-one strategy involves training a single classifier per
class pair, with the samples of the first class as positive samples
and the samples of the second class as negative samples.
This class also provides method to calculate accuracy, precision,
recall, and the confusion matrix.
"""
__metaclass__ = ABCMeta
#abstractmethod
def fit(self, X_train, y_train):
pass
#abstractmethod
def evaluate(self, X_test, y_test, visualize=False):
pass
def _accuracy(self, y_test, Y_vote):
"""Calculates accuracy
This method calculates the accuracy based on a vector of
ground-truth labels (y_test) and a 2D voting matrix (Y_vote) of
size (len(y_test), num_classes).
:param y_test: vector of ground-truth labels
:param Y_vote: 2D voting matrix (rows=samples, cols=class votes)
:returns: accuracy e[0,1]
"""
# predicted classes
y_hat = np.argmax(Y_vote, axis=1)
# all cases where predicted class was correct
mask = y_hat == y_test
return np.float32(np.count_nonzero(mask)) / len(y_test)
def _precision(self, y_test, Y_vote):
"""Calculates precision
This method calculates precision extended to multi-class
classification by help of a confusion matrix.
:param y_test: vector of ground-truth labels
:param Y_vote: 2D voting matrix (rows=samples, cols=class votes)
:returns: precision e[0,1]
"""
# predicted classes
y_hat = np.argmax(Y_vote, axis=1)
if self.mode == "one-vs-one":
# need confusion matrix
conf = self._confusion(y_test, Y_vote)
# consider each class separately
prec = np.zeros(self.num_classes)
for c in xrange(self.num_classes):
# true positives: label is c, classifier predicted c
tp = conf[c, c]
# false positives: label is c, classifier predicted not c
fp = np.sum(conf[:, c]) - conf[c, c]
if tp + fp != 0:
prec[c] = tp * 1. / (tp + fp)
elif self.mode == "one-vs-all":
# consider each class separately
prec = np.zeros(self.num_classes)
for c in xrange(self.num_classes):
# true positives: label is c, classifier predicted c
tp = np.count_nonzero((y_test == c) * (y_hat == c))
# false positives: label is c, classifier predicted not c
fp = np.count_nonzero((y_test == c) * (y_hat != c))
if tp + fp != 0:
prec[c] = tp * 1. / (tp + fp)
return prec
def _recall(self, y_test, Y_vote):
"""Calculates recall
This method calculates recall extended to multi-class
classification by help of a confusion matrix.
:param y_test: vector of ground-truth labels
:param Y_vote: 2D voting matrix (rows=samples, cols=class votes)
:returns: recall e[0,1]
"""
# predicted classes
y_hat = np.argmax(Y_vote, axis=1)
if self.mode == "one-vs-one":
# need confusion matrix
conf = self._confusion(y_test, Y_vote)
# consider each class separately
recall = np.zeros(self.num_classes)
for c in xrange(self.num_classes):
# true positives: label is c, classifier predicted c
tp = conf[c, c]
# false negatives: label is not c, classifier predicted c
fn = np.sum(conf[c, :]) - conf[c, c]
if tp + fn != 0:
recall[c] = tp * 1. / (tp + fn)
elif self.mode == "one-vs-all":
# consider each class separately
recall = np.zeros(self.num_classes)
for c in xrange(self.num_classes):
# true positives: label is c, classifier predicted c
tp = np.count_nonzero((y_test == c) * (y_hat == c))
# false negatives: label is not c, classifier predicted c
fn = np.count_nonzero((y_test != c) * (y_hat == c))
if tp + fn != 0:
recall[c] = tp * 1. / (tp + fn)
return recall
def _confusion(self, y_test, Y_vote):
"""Calculates confusion matrix
This method calculates the confusion matrix based on a vector of
ground-truth labels (y-test) and a 2D voting matrix (Y_vote) of
size (len(y_test), num_classes).
Matrix element conf[r,c] will contain the number of samples that
were predicted to have label r but have ground-truth label c.
:param y_test: vector of ground-truth labels
:param Y_vote: 2D voting matrix (rows=samples, cols=class votes)
:returns: confusion matrix
"""
y_hat = np.argmax(Y_vote, axis=1)
conf = np.zeros((self.num_classes, self.num_classes)).astype(np.int32)
for c_true in xrange(self.num_classes):
# looking at all samples of a given class, c_true
# how many were classified as c_true? how many as others?
for c_pred in xrange(self.num_classes):
y_this = np.where((y_test == c_true) * (y_hat == c_pred))
conf[c_pred, c_true] = np.count_nonzero(y_this)
return conf
class MultiClassSVM(Classifier):
"""
Multi-class classification using Support Vector Machines (SVMs)
This class implements an SVM for multi-class classification. Whereas
some classifiers naturally permit the use of more than two classes
(such as neural networks), SVMs are binary in nature.
However, we can turn SVMs into multinomial classifiers using at least
two different strategies:
* one-vs-all: A single classifier is trained per class, with the
samples of that class as positives (label 1) and all
others as negatives (label 0).
* one-vs-one: For k classes, k*(k-1)/2 classifiers are trained for each
pair of classes, with the samples of the one class as
positives (label 1) and samples of the other class as
negatives (label 0).
Each classifier then votes for a particular class label, and the final
decision (classification) is based on a majority vote.
"""
def __init__(self, num_classes, mode="one-vs-all", params=None):
"""
The constructor makes sure the correct number of classifiers is
initialized, depending on the mode ("one-vs-all" or "one-vs-one").
:param num_classes: The number of classes in the data.
:param mode: Which classification mode to use.
"one-vs-all": single classifier per class
"one-vs-one": single classifier per class pair
Default: "one-vs-all"
:param params: SVM training parameters.
For now, default values are used for all SVMs.
Hyperparameter exploration can be achieved by
embedding the MultiClassSVM process flow in a
for-loop that classifies the data with
different parameter values, then pick the
values that yield the best accuracy.
Default: None
"""
self.num_classes = num_classes
self.mode = mode
self.params = params or dict()
# initialize correct number of classifiers
self.classifiers = []
if mode == "one-vs-one":
# k classes: need k*(k-1)/2 classifiers
for _ in xrange(num_classes*(num_classes - 1) / 2):
self.classifiers.append(cv2.ml.SVM_create())
elif mode == "one-vs-all":
# k classes: need k classifiers
for _ in xrange(num_classes):
self.classifiers.append(cv2.ml.SVM_create())
else:
print "Unknown mode ", mode
def fit(self, X_train, y_train, params=None):
"""Fits the model to training data
This method trains the classifier on data (X_train) using either
the "one-vs-one" or "one-vs-all" strategy.
:param X_train: input data (rows=samples, cols=features)
:param y_train: vector of class labels
:param params: dict to specify training options for cv2.SVM.train
leave blank to use the parameters passed to the
constructor
"""
if params is None:
params = self.params
if self.mode == "one-vs-one":
svm_id = 0
for c1 in xrange(self.num_classes):
for c2 in xrange(c1 + 1, self.num_classes):
# indices where class labels are either `c1` or `c2`
data_id = np.where((y_train == c1) + (y_train == c2))[0]
# set class label to 1 where class is `c1`, else 0
y_train_bin = np.where(y_train[data_id] == c1, 1,
0).flatten()
self.classifiers[svm_id].train(X_train[data_id, :],
y_train_bin,
params=self.params)
svm_id += 1
elif self.mode == "one-vs-all":
for c in xrange(self.num_classes):
# train c-th SVM on class c vs. all other classes
# set class label to 1 where class==c, else 0
y_train_bin = np.where(y_train == c, 1, 0).flatten()
# train SVM
self.classifiers[c].train(X_train, y_train_bin,
params=self.params)
def evaluate(self, X_test, y_test, visualize=False):
"""Evaluates the model on test data
This method evaluates the classifier's performance on test data
(X_test) using either the "one-vs-one" or "one-vs-all" strategy.
:param X_test: input data (rows=samples, cols=features)
:param y_test: vector of class labels
:param visualize: flag whether to plot the results (True) or not
(False)
:returns: accuracy, precision, recall
"""
# prepare Y_vote: for each sample, count how many times we voted
# for each class
Y_vote = np.zeros((len(y_test), self.num_classes))
if self.mode == "one-vs-one":
svm_id = 0
for c1 in xrange(self.num_classes):
for c2 in xrange(c1 + 1, self.num_classes):
data_id = np.where((y_test == c1) + (y_test == c2))[0]
X_test_id = X_test[data_id, :]
y_test_id = y_test[data_id]
# set class label to 1 where class==c1, else 0
# y_test_bin = np.where(y_test_id==c1,1,0).reshape(-1,1)
# predict labels
y_hat = self.classifiers[svm_id].predict_all(X_test_id)
for i in xrange(len(y_hat)):
if y_hat[i] == 1:
Y_vote[data_id[i], c1] += 1
elif y_hat[i] == 0:
Y_vote[data_id[i], c2] += 1
else:
print "y_hat[", i, "] = ", y_hat[i]
# we vote for c1 where y_hat is 1, and for c2 where y_hat
# is 0 np.where serves as the inner index into the data_id
# array, which in turn serves as index into the results
# array
# Y_vote[data_id[np.where(y_hat == 1)[0]], c1] += 1
# Y_vote[data_id[np.where(y_hat == 0)[0]], c2] += 1
svm_id += 1
elif self.mode == "one-vs-all":
for c in xrange(self.num_classes):
# set class label to 1 where class==c, else 0
# predict class labels
# y_test_bin = np.where(y_test==c,1,0).reshape(-1,1)
# predict labels
y_hat = self.classifiers[c].predict_all(X_test)
# we vote for c where y_hat is 1
if np.any(y_hat):
Y_vote[np.where(y_hat == 1)[0], c] += 1
# with this voting scheme it's possible to end up with samples
# that have no label at all...in this case, pick a class at
# random...
no_label = np.where(np.sum(Y_vote, axis=1) == 0)[0]
Y_vote[no_label, np.random.randint(self.num_classes,
size=len(no_label))] = 1
accuracy = self._accuracy(y_test, Y_vote)
precision = self._precision(y_test, Y_vote)
recall = self._recall(y_test, Y_vote)
return accuracy, precision, recall
when running chapter6.py
The output is:
feature None
- strategy one-vs-one
- train
Traceback (most recent call last):
File "/home/redhwan/Downloads/opencv-python-blueprints-master/chapter6/chapter6.py", line 77, in <module>
main()
File "/home/redhwan/Downloads/opencv-python-blueprints-master/chapter6/chapter6.py", line 44, in main
MCS.fit(X_train, y_train)
File "/home/redhwan/Downloads/opencv-python-blueprints-master/chapter6/classifiers.py", line 258, in fit
params=self.params)
TypeError: only length-1 arrays can be converted to Python scalars
please help me or your suggestion
Thank you in advance!

Shifting scatter points for each array for delta x

I'm trying to sort out a plot which at the moment looks like this:
I'm trying to figure out how to do a shift for different dx values for each dataset in such mode that the LAST item of the series remains at the center (in this case q), whereas the first 8 items (from a to h) would be shifted on the left, the last 8 to the right (from g to p), each for a different offset.
I have two arrays (M and M1) of shape (12, 17) for each item within a loop, so that shape corresponds to one color of the scatter points.
import matplotlib.cm as cm
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
fig, ax0 = plt.subplots (nrows=1, ncols=1)
months_expanded = np.zeros((12,17))
months = np.arange(1, 13)
M = np.random.rand(12,17)
M1 = np.random.rand(12,17)
datalist = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q']
for idx, sub in enumerate(datalist):
for i in range(0,12):
months_expanded[i] = np.repeat(months[i], 17)
difference = abs(M - M1)
ax0.scatter(months_expanded[:, idx], difference[:, idx], label = sub)
colormap = plt.cm.gist_ncar
colorst = [colormap(i) for i in np.linspace(0,0.9,len(ax0.collections))]
for t,j1 in enumerate(ax0.collections):
j1.set_color(colorst[t])
ax0.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax0.yaxis.grid(True)
ax0.xaxis.grid(True)
matplotlib.rcParams.update({'font.size': 30})
fig.set_size_inches(20,20)
fig.savefig(outfile, bbox_inches='tight')
You can predefine an offset and create a set of x-values that include the offset. See the below minimal example that illustrates this point:
import numpy as np
import matplotlib.pyplot as plt
N = 5 # This would be 17 in your case
# Create offsets
dx = np.linspace(-0.5, 0.5, N)
# Put last data set in centre
dx[N//2:] += (dx[1]-dx[0])
dx[-1] = 0.0
dx_expanded = np.repeat(dx, 12).reshape(N, 12)
# X-values to be shifted
months = np.arange(1, 13)
months_expanded = np.tile(months, N).reshape(N, 12)
months_shifted = months_expanded + dx_expanded
# Y-values
M = np.random.rand(12,N)
M = np.linspace(1, 5, 12)
M = np.tile(M, N).reshape(N,12)
# Plot results
for idx in range(N):
plt.scatter(months_shifted[idx], M[idx])
plt.show()
Result:

Python: Data fitting with scipy.optimize.curve_fit with sigma = 0

I'm trying to fit a curve with scipy.optimize.curve_fit and it works pretty good so far, except in the case that a value in my sigma array is zero. I understand that the algorithm can't handle this, as I divide by zero in this case. From the scipy documentation:
sigma : None or M-length sequence, optional
If not None, the uncertainties in the ydata array. These are used as weights in the least-squares problem i.e. minimising np.sum( ((f(xdata, *popt) - ydata) / sigma)**2 ) If None, the uncertainties are assumed to be 1.
Here's what my code looks like:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
x = [0.125, 0.375, 0.625, 0.875, 1.125, 1.375, 1.625, 1.875, 2.125, 2.375, 2.625, 2.875, 3.125, 3.375, 3.625, 3.875, 4.125, 4.375]
y_para = [0, 0, 0.0414, 0.2164, 0.2616, 0.4254, 0.5698, 0.5921, 0.6286, 0.6452, 0.5879, 0.6032, 0.6667, 0.6325, 0.7629, 0.7164, 0.7091, 0.7887]
err = [0, 0, 0.0391, 0.0331, 0.0943, 0.0631, 0.1219, 0.1063, 0.0912, 0.0516, 0.0365, 0.0327, 0.0227, 0.103, 0.1344, 0.0697, 0.0114, 0.0465]
def logistic_growth(x, A1, A2, x_0, p):
return A2 + (A1-A2)/(1+(x/x_0)**p)
x_plot = np.linspace(0, 4.5, 100)
bounds_para = ([0.,0,-np.inf,-np.inf],[0.0000000001, 1,np.inf,np.inf])
paras, paras_cov = curve_fit(logistic_growth, x, y_para, bounds = bounds_para, sigma = err, absolute_sigma=True)
para_curve = logistic_growth(x_plot, *paras)
plt.figure()
plt.errorbar(x,y_para, err, color = 'b', fmt = 'o', label = "Data")
plt.plot(x_plot, para_curve, color = 'b', label = "Fit")
plt.show()
Executing this without the sigma-option in curve_fit works fine, but including it raises:
ValueError: Residuals are not finite in the initial point.
, which results from the zeros in the err-array.
Does anyone know a way to work around this?
Why not just drop the variable? If it has zero variance it cannot contribute in any meaningful way to your analysis.
This is what the scipy doc says about the curve_fit sigma parameter: 'These are used as weights in the least-squares problem ...' Then, in my opinion, they should be inverse to the errors. Here's what I suggest.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
x = [0.125, 0.375, 0.625, 0.875, 1.125, 1.375, 1.625, 1.875, 2.125, 2.375, 2.625, 2.875, 3.125, 3.375, 3.625, 3.875, 4.125, 4.375]
y_para = [0, 0, 0.0414, 0.2164, 0.2616, 0.4254, 0.5698, 0.5921, 0.6286, 0.6452, 0.5879, 0.6032, 0.6667, 0.6325, 0.7629, 0.7164, 0.7091, 0.7887]
err = [0, 0, 0.0391, 0.0331, 0.0943, 0.0631, 0.1219, 0.1063, 0.0912, 0.0516, 0.0365, 0.0327, 0.0227, 0.103, 0.1344, 0.0697, 0.0114, 0.0465]
weights = [1/max(_,0.001) for _ in err]
print (weights)
def logistic_growth(x, A1, A2, x_0, p):
return A2 + (A1-A2)/(1+(x/x_0)**p)
x_plot = np.linspace(0, 4.5, 100)
bounds_para = ([0.,0,-np.inf,-np.inf],[0.0000000001, 1,np.inf,np.inf])
paras, paras_cov = curve_fit(logistic_growth, x, y_para, bounds = bounds_para,
absolute_sigma=True,
sigma = weights)
para_curve = logistic_growth(x_plot, *paras)
plt.figure()
plt.errorbar(x,y_para, err, color = 'b', fmt = 'o', label = "Data")
plt.plot(x_plot, para_curve, color = 'b', label = "Fit")
plt.show()
This results in the following plot, where those initial data points are made to lie very close to the fitted line.

Interpolating 3d data at a single point in space (Python 2.7)

I have a point cloud in 4 dimensions, where each point in the cloud has a location and a value (x,y,z,Value). In addition, I have a 'special' point, S0, within the 3d point cloud; I've used this example to find the closest 10 points in the cloud, relative to S0. Now, I have a numpy array for each of the 10 closest points and their values. How can I interpolate these 10 points, to find the interpolated value at point S0? Example code is shown below:
import numpy as np
import matplotlib.pyplot as plt
numpoints = 20
linexs = 320
lineys = 40
linezs = 60
linexe = 20
lineye = 20
lineze = 0
# Create vectors of points
xpts = np.linspace(linexs, linexe, numpoints)
ypts = np.linspace(lineys, lineye, numpoints)
zpts = np.linspace(linezs, lineze, numpoints)
lin = np.dstack((xpts,ypts,zpts))
# Image line of points
fig = plt.figure()
ax = fig.add_subplot(211, projection='3d')
ax.set_xlim(0,365); ax.set_ylim(-85, 85); ax.set_zlim(0, 100)
ax.plot_wireframe(xpts, ypts, zpts)
ax.view_init(elev=12, azim=78)
def randrange(n, vmin, vmax):
return (vmax - vmin)*np.random.rand(n) + vmin
n = 10
for n in range(21):
xs = randrange(n, 0, 350)
ys = randrange(n, -75, 75)
zs = randrange(n, 0, 100)
ax.scatter(xs, ys, zs)
dat = np.dstack((xs,ys,zs))
ax.set_xlabel('X Label')
ax.set_xlim(0,350)
ax.set_ylabel('Y Label')
ax.set_ylim(-75,75)
ax.set_zlabel('Z Label')
ax.set_zlim(0,100)
ax = fig.add_subplot(212, projection='3d')
ax.set_xlim(0,365); ax.set_ylim(-85, 85); ax.set_zlim(0, 100)
ax.plot_wireframe(xpts,ypts,zpts)
ax.view_init(elev=12, azim=78)
plt.show()
dist = []
# Calculate distance from first point to all other points in cloud
for l in range(len(xpts)):
aaa = lin[0][0]-dat
dist.append(np.sqrt(aaa[0][l][0]**2+aaa[0][l][1]**2+aaa[0][l][2]**2))
full = np.dstack((dat,dist))
aaa = full[0][full[0][:,3].argsort()]
print(aaa[0:10])
A basic example. Note that the meshgrid is not needed for the interpolation, but only to make a fast ufunc to generate an example function A=f(x,y,z), here A=x+y+z.
from scipy.interpolate import interpn
import numpy as np
#make up a regular 3d grid
X=np.linspace(-5,5,11)
Y=np.linspace(-5,5,11)
Z=np.linspace(-5,5,11)
xv,yv,zv = np.meshgrid(X,Y,Z)
# make up a function
# see http://docs.scipy.org/doc/numpy/reference/ufuncs.html
A = np.add(xv,np.add(yv,zv))
#this one is easy enough for us to know what to expect at (.5,.5,.5)
# usage : interpn(points, values, xi, method='linear', bounds_error=True, fill_value=nan)
interpn((X,Y,Z),A,[0.5,0.5,0.5])
Output:
array([ 1.5])
If you pass in an array of points of interest, it will give you multiple answers.