Sympy has Die and Coin objects which are really useful for understanding Probability concepts. E.g.
from sympy.stats import P, Die, density
from sympy import Eq
D = Die('D', 6)
density(D).dict
# {1: 1/6, 2: 1/6, 3: 1/6, 4: 1/6, 5: 1/6, 6: 1/6}
P(D > 3)
# 1/2
I would like to be able to do similar calculations with a deck of cards, e.g.
deck = DeckOfCards('Cards')
P(Eq(deck, 'AceOfSpades'))
# 1/52
How can I do this with sympy? I guess one option would be to
deck = DiscreteUniform('Cards', list(range(52)))
def lookup_card_id(name):
# encode the list, e.g.
# 0 = Ace of Clubs
# 1 = 2 of Clubs
# 2 = 3 of Clubs
# and so on ...
return card_id
P(Eq(deck, lookup_card_id('AceOfClubs'))
# 1/52
This will get me so far, but it won’t support lookups like:
P(Eq(deck, lookup_card_id('Ace'))
# 1/13
Unfortunately DiscreteUniform seems not be able to handle containing case like
P(Eq(deck, [0, 1, 2]))
or
P(deck in [0, 1, 2])
But you can do it by yourself.
from sympy import Eq
from sympy.stats import P, DiscreteUniform
suits = ('Clubs', 'Diamonds', 'Hearts', 'Spades')
numbers = ('Ace', 'Two', 'Three', 'Four', 'Five', 'Six', 'Seven', 'Eight', 'Nine', 'Ten', 'Jack', 'Queen', 'King')
colors = {
'Black': ['Clubs', 'Spades'],
'Red': ['Diamonds', 'Hearts']
}
def get_card_name(suit, number):
return '{number}Of{suit}'.format(number=number, suit=suit)
cards = {}
for sidx, suit in enumerate(suits):
for nidx, number in enumerate(numbers):
name = get_card_name(suit, number)
cards[name] = sidx * 13 + (nidx + 1)
def flatten(list):
return reduce(lambda l, l2: l + l2, list, [])
def lookup_card_id(names):
if isinstance(names, list) or isinstance(names, tuple):
return flatten([lookup_card_id(x) for x in names])
if names in suits:
names_in_suits = [get_card_name(names, x) for x in numbers]
return lookup_card_id(names_in_suits)
if names in numbers:
names_in_numbers = [get_card_name(x, names) for x in suits]
return lookup_card_id(names_in_numbers)
if names in colors:
return lookup_card_id(colors[names])
return [cards[names]]
def prob_pick_card(name):
deck = DiscreteUniform('Cards', list(range(1, 53)))
card_id = lookup_card_id(name)
return sum([P(Eq(deck, x)) for x in card_id])
print(prob_pick_card('AceOfClubs')) # 1/52
print(prob_pick_card('Ace')) # 1/13
print(prob_pick_card('Clubs')) # 1/4
print(prob_pick_card('Black')) # 1/2
Related
I have a table, represented by an np.array like the following:
A = [[12,412,42,54],
[144,2,42,4],
[2,43,22,10]]
And a list that contains the desired starting point of each row in A:
L=[0,2,1]
The desired output would be:
B = [[12,412,42,54],
[42,4,np.nan,np.nan],
[43,22,10,np.nan]]
Edit
I prefer to avoid using a for-loop for obvious reasons.
Try compare the L with column index, then use boolean set/get items:
# convert A to numpy array for advanced indexing
A = np.array(A)
ll = A.shape[1]
keep = np.arange(ll) >= np.array(L)[:,None]
out = np.full(A.shape, np.nan)
out[keep[:,::-1]] = A[keep]
print(out)
Output:
[[ 12. 412. 42. 54.]
[ 42. 4. nan nan]
[ 43. 22. 10. nan]]
My guess would be that a vectorized approach for this would be less efficient than explicit looping, because the result is fundamentally a jagged array, which NumPy does not support well.
However, a loop-based solution is simple, that can be made faster with Numba's nb.njit(), if needed.:
import numpy as np
import numba as nb
#nb.njit
def jag_nb(arr, starts, empty=np.nan):
result = np.full(arr.shape, empty)
for i, x in enumerate(starts):
if x != 0:
result[i, :-x] = arr[i, x:]
else:
result[i, :] = arr[i, :]
return result
A = np.array([[12,412,42,54], [144,2,42,4], [2,43,22,10]])
L = np.array([0,2,1])
jag(A, L)
# array([[ 12., 412., 42., 54.],
# [ 42., 4., nan, nan],
# [ 43., 22., 10., nan]])
Compared to the pure NumPy vectorized approach proposed in #QuangHoang's answer:
def jag_np(arr, starts, empty=np.nan):
m, _ = arr.shape
keep = np.arange(m) >= starts[:, None]
result = np.full(arr.shape, np.nan)
result[keep[:, ::-1]] = arr[keep]
return result
The Numba based approach is noticeably faster, as shown with the following benchmarks:
import pandas as pd
import matplotlib.pyplot as plt
def benchmark(
funcs,
ii=range(4, 10, 1),
is_equal=lambda x, y: np.allclose(x, y, equal_nan=True),
seed=0,
unit="ms",
verbose=True,
use_str=True
):
labels = [func.__name__ for func in funcs]
units = {"s": 0, "ms": 3, "µs": 6, "ns": 9}
assert unit in units
np.random.seed(seed)
timings = {}
for i in ii:
m = n = 2 ** i
if verbose:
print(f"i={i}, n={n}")
arr = np.random.random((m, n))
starts = np.random.randint(0, n, m)
base = funcs[0](arr, starts)
timings[n] = []
for func in funcs:
res = func(arr, starts)
is_good = is_equal(base, res)
timed = %timeit -n 64 -r 8 -q -o func(arr, starts)
timing = timed.best
timings[n].append(timing if is_good else None)
if verbose:
print(
f"{func.__name__:>24}"
f" {is_good!s:5}"
f" {timing * (10 ** units[unit]):10.3f} {unit}"
f" {timings[n][0] / timing:5.1f}x")
return timings, labels
def plot(timings, labels, title=None, xlabel="Input Size / #", unit="ms"):
n_rows = 1
n_cols = 3
fig, axs = plt.subplots(n_rows, n_cols, figsize=(8 * n_cols, 6 * n_rows), squeeze=False)
units = {"s": 0, "ms": 3, "µs": 6, "ns": 9}
df = pd.DataFrame(data=timings, index=labels).transpose()
base = df[[labels[0]]].to_numpy()
(df * 10 ** units[unit]).plot(marker="o", xlabel=xlabel, ylabel=f"Best timing / {unit}", ax=axs[0, 0])
(df / base * 100).plot(marker='o', xlabel=xlabel, ylabel='Relative speed / %', logx=True, ax=axs[0, 1])
(base / df).plot(marker='o', xlabel=xlabel, ylabel='Speed Gain / x', ax=axs[0, 2])
if title:
fig.suptitle(title)
fig.patch.set_facecolor('white')
funcs = jag_np, jag_nb
timings, labels = benchmark(funcs, ii=range(4, 11))
plot(timings, labels, unit="ms")
I wanna get all integer solutions in a limited time, is it possible?
This is a linear, integer constraint satisfaction problem, which can be solved efficiently by OR Tools' CP-SAT. I've modified their example to solve your problem in Python:
from ortools.sat.python import cp_model
class VarArraySolutionPrinter(cp_model.CpSolverSolutionCallback):
"""Print intermediate solutions."""
def __init__(self, variables):
cp_model.CpSolverSolutionCallback.__init__(self)
self.__variables = variables
self.__solution_count = 0
def on_solution_callback(self):
self.__solution_count += 1
for v in self.__variables:
print('%s=%i' % (v, self.Value(v)), end=' ')
print()
def solution_count(self):
return self.__solution_count
def SearchForAllSolutionsSampleSat():
"""Showcases calling the solver to search for all solutions."""
# Creates the model.
model = cp_model.CpModel()
p = [1, 2, 3, 4]
ceq = 30
cgeq = 2
N = len(p)
# Creates the variables
x = [model.NewIntVar(0, 100, f'x{i}') for i in range(N)]
# Create the constraints.
model.Add(sum([xi*pi for xi, pi in zip(x, p)]) == ceq)
model.Add(sum(x) >= cgeq)
# Create a solver and solve.
solver = cp_model.CpSolver()
solution_printer = VarArraySolutionPrinter(x)
status = solver.SearchForAllSolutions(model, solution_printer)
print('Status = %s' % solver.StatusName(status))
print('Number of solutions found: %i' % solution_printer.solution_count())
SearchForAllSolutionsSampleSat()
I'm experimenting with sympy to reproduce an example where a box has three marbles:
Red
White
Blue
Two marbles will be drawn at random without replacement.
Q: What is the chance of drawing the Red marble and then the White marble?
I have been able to calculate this using the multiplication rule by hard-coding P() instances wrapping the initial distribution before the first marble is selected and then the distribution before the second marble is selected:
from sympy.stats import DiscreteUniform, density, P
from sympy import symbols, Eq
# Coloured marbles
R, W, B = symbols('R W B')
# Select first marble without replacement
PFirstSelection = P(Eq(DiscreteUniform('FirstSeletion', (R, W, B)), R))
# Select second marble - Red is not longer available because it was selected without replacement
PSecondSelection = P(Eq(DiscreteUniform('SecondSelection', (W, B) ), W))
print(PFirstSelection)
# 1/3
print(PSecondSelection)
# 1/2
# Multiplication rule
print(PFirstSelection * PSecondSelection)
# 1/6
Is there a better way that I can achieve this with sympy?
In this case you'd better to use combination functions.
DiscreteUniform seems not for changing elements after creation.
from sympy.functions.combinatorial.numbers import nC, nP
print(1 / nP(3, 2)) # 1/6
If you don't care about order,
print(nP(2, 2) / nP(3, 2)) # 1/3
Edited. (and also modified for python3)
For N of M things, you can simply do like below
from sympy.functions.combinatorial.numbers import nC, nP
def pickProb(candidates, picks, ordered=False):
picks_num = len(picks)
numerator = nP(picks_num, picks_num) if ordered else 1
denominator = nP(len(candidates), picks_num)
return numerator / denominator
print(pickProb('RWB', 'RW')) # 1/6
print(pickProb('RWBrwba', 'Ra')) # 1/42
print(pickProb('RWBrwba', 'RWa')) # 1/210
print(pickProb('RWBrwba', 'RWa', ordered=True)) # 1/35
And combination functions can also handle duplicates, like 'R', 'R', 'W', 'B'.
from operator import mul
from sympy.functions.combinatorial.numbers import nC, nP
def pickProb(candidates, picks):
picks_num = len(picks)
c_counts = {}
for c in candidates:
c_counts[c] = c_counts[c] + 1 if c in c_counts else 1
p_counts = {}
for p in picks:
p_counts[p] = p_counts[p] + 1 if p in p_counts else 1
combinations = reduce(mul, [nP(c_counts[x], p_counts[x]) for x in p_counts.keys()], 1)
denominator = nP(len(candidates), picks_num) / combinations
return 1 / denominator
print(pickProb('RWBra', 'RWa')) # 1/60
print(pickProb('RRRWa', 'RWa')) # 1/20
print(pickProb('RRRWa', 'RRa')) # 1/10
But DiscreteUniform cannot, because this case is not "uniform".
from sympy.stats import DiscreteUniform, density, P, Hypergeometric
from sympy import Symbol, Eq
deck = DiscreteUniform('M', 'RRWB')
print(density(deck).dict) # {W: 1/4, R: 1/4, B: 1/4}
print(P(Eq(deck, Symbol('R')))) # 1/4
I think you're using correctly sympy, but you can improve your way to use python (eg., more generic, more functional, more generic, no hardcoding).
For instance:
from sympy.stats import DiscreteUniform, density, P
from sympy import symbols, Eq
from itertools import accumulate
def ToSet(value):
return set(value.split(' '))
def ProbaOfPick(pickSet, fromSet, operationTag):
return P(Eq(DiscreteUniform(operationTag, symbols(fromSet)), symbols(pickSet)))
def PickWithoutReplacement(allset, picklist, probaFunc):
currentSet = allset
probaSeq = []
operationSeq = []
for pick in picklist:
operationTag = "picking: " + pick
newP = probaFunc(pick, currentSet, operationTag)
operationSeq.append(operationTag + " from " + str(currentSet))
probaSeq.append(newP)
currentSet -= set(pick)
return (operationSeq, probaSeq)
allset = ToSet('R W B Y Ma G1 G2')
picks = 'R', 'W', 'G2'
operationSeq, probaSeq = PickWithoutReplacement(allset, picks, ProbaOfPick)
probas = list(accumulate(probaSeq, lambda a, b: a*b))
for op in operationSeq:
print(op)
print(probas)
Also your can change uniform distribution to anything non-uniform.
EDIT: dependency injection (ProbaOfPick -> probaFunc) added.
This code is only a starter.
Result:
picking: R from {'G2', 'Ma', 'Y', 'B', 'R', 'G1', 'W'}
picking: W from {'G2', 'Ma', 'Y', 'B', 'G1', 'W'}
picking: G2 from {'G2', 'Ma', 'Y', 'B', 'G1'}
[1/7, 1/42, 1/210]
Next steps: allow to pick more than 1 each step, allow non uniform probability distribution, etc
I have a script as follows:
import numpy as np
import pandas as pd
import pdb
# conventions: W = fitness, A = affinity ; sex: 1=M, 0=F; alien: 1=alien,
# 0=native
# pop array order: W, A, sex, alien
def mkpop(n):
W = np.repeat(a=1, repeats=n)
A = np.random.normal(1, 0.1, size=n)
A[A < 0] = 0
alien = np.repeat(a=False, repeats=n)
sex = np.random.randint(0, 2, n)
pop = np.array([W, A, sex, alien])
pop = np.transpose(pop)
return pop
def migrate(pop, n=10, gParams=[1, 0.1]):
W = np.random.gamma(shape=gParams[0], scale=gParams[1], size=n)
A = np.repeat(1, n)
# 0 is native; 1 is alien
alien = np.repeat(True, n)
# 0 is female
sex = np.random.randint(0, 2, n)
popAlien = np.array([W, A, sex, alien])
popAlien = np.transpose(popAlien)
pop = np.vstack((pop, popAlien))
return pop
def mate(pop):
# split into male and female
f = pop[pop[:, 2] == 0]
m = pop[pop[:, 2] == 1]
# create transition matricies for native and alien mates
# m with native = m.!alien.transpose * f.alien
# negate alien
naLog = list(np.asarray(m[:, 3]) == False)
naPdMat = np.outer(naLog, f[:, 1])
# mate with alien = m.alien.transpose * affinity
alPdMat = np.outer(m[:, 3], f[:, 1])
# add transition matrices for probability density matrix
pdMat = alPdMat + naPdMat
# transition matrix is equal to the pd matrix / column sumso
colSums = np.sum(pdMat, axis=0)
pMat = pdMat / colSums
# select mates
def choice(x):
ch = np.random.choice(a=range(0, len(x)), p=x)
return ch
mCh = np.apply_along_axis(choice, 0, pMat)
mCh = m[mCh, :]
WMid = (f[:, 0] + mCh[:, 0]) / 2
AMid = (f[:, 1] + mCh[:, 1]) / 2
# assign fitness based on group affiliation; only native/alien matings have
# modified fitness
# reassign fitness and affinity based on group id and midparent vals
W1 = np.where(
(f[:, 3] == mCh[:, 3]) |
((f[:, 3] == 1) & (mCh[:, 3] == 0))
)
WMid[W1] = 1
# number of offspring is a poisson-distributed variable with lambda=2W
nOff = map(lambda x: np.random.poisson(lam=x), 2 * WMid)
# generate offspring
# expand list of nOff to numbers of offspring per pair
# realized offspring is index posisions of W and A vals to be replicated
# for offspring
# this can be rewritten to return a matrix of the appropriate length. This
# should work
midVals = np.array([WMid, AMid]).T
realOff = np.array([0, 0])
for i in range(0, len(nOff)):
sibs = np.repeat([np.array(midVals[i])], [nOff[i]], axis=0)
realOff = np.vstack((realOff, sibs))
offspring = np.delete(realOff, 0, 0)
sex = np.random.randint(0, 2, len(offspring))
alien = np.repeat(0, len(offspring))
otherStats = np.array([sex, alien]).T
offspring = np.hstack([offspring, otherStats])
return offspring # should return offspring
def sim(nInit, nGen=100, nAlien=10, gParams=[1, 0.1]):
gen = 0
pop = mkpop
stats = pd.DataFrame(columns=('gen', 'W', 'WMean', 'AMean', 'WVar', 'AVar'))
while gen < nGen:
pop = migrate(pop, nAlien, gParams)
offspring = mate(pop)
var = np.var(offspring, axis=0)
mean = np.mean(offspring, axis=0)
N = len(offspring)
W = N / nInit
genStats = N.append(W, gen, mean, var)
stats = stats.append(genStats)
print(N, gen)
gen = gen + 1
return stats
print mkpop(100)
print mate(mkpop(100))
#
sim(100, 100, 10, [1, 0.1])
Running this script, outputs NameError: name 'sim' is not defined. It is apparent from the commands before the final one that all the other functions defined within this script work without a hitch. I'm not sure what is going on here, and there is probably some very easy fix that I'm overlooking. Ctags recognizes this function just fine. It's entirely possibe that sim() doesn't actually work yet, as I haven't been able to debug it.
Your sim function defined in mate function scope so it's invisible to global scope. You need to fix your indentation for sim function
In my homework a user is supposed to enter a number and display factorial,Fibonacci series and all cubed numbers up to the number entered by the user in Python, cannot figure out where is the problem
#!/Python27/python
def factorial( n ):
if n <1: # base case
return 1
else:
return n * factorial( n - 1 )
# recursive call
def fact(n):
for i in range(1, n+1 ):
print "%d" % ( factorial( i ) )
# write Fibonacci series up to n
def fib(n):
a, b = 0, 1
while b < n:
print b
a, b = b, a+b
def cube(n): return n*n*n
def cubes(n):
for i in range(1, n+1):
print "%d" % (cube(i))
def main():
nr = int(input("Enter a number: ")
factorial(nr)
fact(nr)
cubes(nr)
main()
The problem arises from you not having enough brackets:
def main():
nr = int(input("Enter a number: "))
...
You forgot the closing bracket for int()
To display the output in a table, I would to return a list from each function then in main do something like:
import itertools
print "Factorial up to {n}\tFibonacci of 1 to {n}\tCubes of 1 to {n}".format(n = nr)
print '\n'.join('\t'.join(map(str, seq)) for seq in itertools.izip_longest(factorial(nr), fib(nr), cubes(nr), fillvalue=''))
Now if each of the functions (respectively) returned the following lists:
>>> factorial(nr)=> [1, 2, 3, 4]
>>> fib(nr)=> [3, 4, 5, 6, 7]
>>> cubes(nr)=> [7, 453, 23, 676]
Using the above method will yield output like this:
Factorial up to -inf Fibonacci of 1 to -inf Cubes of 1 to -inf
1 3 7
2 4 453
3 5 23
4 6 676
7
It doesn't quite look like a table, but if you stuff more tab characters into the output, you should get something looking closer to table format