Binomial iterated expectation in Cython - python-2.7

I am brand new to Cython. How to convert the Python function called Values below to Cython? With factors=2 and i=60 this takes 2.8 secs on my big Linux box. The goal is sub 1 sec with factors=2 and i=360.
Here's the code. Thanks!
import numpy as np
import itertools
class Numeraire:
def __init__(self, rate):
self.rate = rate
def __call__(self, timenext, time, state):
return np.exp(-self.rate*(timenext - time))
def Values(values, i1, i0=0, numeraire=Numeraire(0.)):
factors=len(values.shape)
norm=0.5**factors
for i in np.arange(i1-1, i0-1, -1):
for j in itertools.product(np.arange(i+1), repeat=factors):
value = 0.
for k in itertools.product(np.arange(2), repeat=factors):
value += values[tuple(np.array(j) + np.array(k))]
values[j] = value*norm*numeraire(i+1, i, j)
return values
factors = 2
i = 60
values = np.ones([i+1]*factors)
Values(values, i, numeraire=Numeraire(0.05/12))
print values[(0,)*factors], np.exp(-0.05/12*i)

Here's my latest answer (no Cython!), which runs in 125 msec for the factor=2, i=360 case.
import numpy as np
import itertools
slices = (slice(None, -1, None), slice(1, None, None))
def Expectation(values, numeraire, i, i0=0):
def Values(values, i):
factors = values.ndim
expect = np.zeros((i,)*factors)
for j in itertools.product(slices, repeat=factors):
expect += values[j]
return expect*0.5**factors*numeraire(i, i-1)
return reduce(Values, range(i, i0, -1), values)
class Numeraire:
def __init__(self, factors, rate=0):
self.factors = factors
self.rate = rate
def __call__(self, timenext, time):
return np.full((time+1,)*factors, np.exp(-self.rate*(timenext - time)))
factors = 2
i = 360
values, numeraire = np.ones((i+1,)*factors), Numeraire(factors, 0.05/12)
%timeit Expectation(values, numeraire, i)
Expectation(values, numeraire, i)[(0,)*factors], np.exp(-0.05/12*i)

Before using Cython, you should optimize your code with Numpy. Here, vectorizing the third and second inner for loops, yields a x40 speed-up,
In [1]: import numpy as np
...: import itertools
...:
...: # define Numaire and Values functions from the question above
...:
...: def Values2(values, i1, i0=0, numeraire=Numeraire(0.)):
...: factors=len(values.shape)
...: norm=0.5**factors
...: k = np.array(list(itertools.product(np.arange(2), repeat=factors)))
...: for i in np.arange(i1-1, i0-1, -1):
...: j = np.array(list(itertools.product(np.arange(i+1), repeat=factors)))
...: mask_all = j[:,:,np.newaxis] + k.T[np.newaxis, :, :]
...: mask_x, mask_y = np.swapaxes(mask_all, 2, 1).reshape(-1, 2).T
...:
...: values_tmp = values[mask_x, mask_y].reshape((j.shape[0], k.shape[0]))
...: values_tmp = values_tmp.sum(axis=1)
...: values[j[:,0], j[:,1]] = values_tmp*norm*numeraire(i+1, i, j)
...: return values
...:
...: factors = 2
...: i = 60
...: values = lambda : np.ones([i+1]*factors)
...: print values()[(0,)*factors], np.exp(-0.05/12*i)
...:
...: res = Values(values(), i, numeraire=Numeraire(0.05/12))
...: res2 = Values2(values(), i, numeraire=Numeraire(0.05/12))
...: np.testing.assert_allclose(res, res2)
...:
...: %timeit Values(values(), i, numeraire=Numeraire(0.05/12))
...: %timeit Values2(values(), i, numeraire=Numeraire(0.05/12))
...:
1.0 0.778800783071
1 loops, best of 3: 1.26 s per loop
10 loops, best of 3: 31.8 ms per loop
The next step would be to replace the line,
j = np.array(list(itertools.product(np.arange(i+1), repeat=factors)
with it's Numpy equivalent, taken from this answer (not very pretty),
def itertools_product_numpy(some_list, some_length):
return some_list[np.rollaxis(
np.indices((len(some_list),) * some_length), 0, some_length + 1)
.reshape(-1, some_length)]
k = itertools_product_numpy(np.arange(i+1), factors)
this result in an overall x160 speed up and the code runs in 1.2 second on my laptop for i=360 and factors = 2.
In this last version, I don't think that you will get much speed up, if you port it to Cython, since there is just one loop remaining and it has only ~360 iterations. Rather, some fine-tuned Python/Numpy optimizations should be performed to get a further speed increase.
Alternatively, you can try applying Cython to your original implementation. However because it is based on itertools.product, which is slow when called repeatedly in a loop, Cython will not help there.

Related

trim np arrays according to a list of starting points

I have a table, represented by an np.array like the following:
A = [[12,412,42,54],
[144,2,42,4],
[2,43,22,10]]
And a list that contains the desired starting point of each row in A:
L=[0,2,1]
The desired output would be:
B = [[12,412,42,54],
[42,4,np.nan,np.nan],
[43,22,10,np.nan]]
Edit
I prefer to avoid using a for-loop for obvious reasons.
Try compare the L with column index, then use boolean set/get items:
# convert A to numpy array for advanced indexing
A = np.array(A)
ll = A.shape[1]
keep = np.arange(ll) >= np.array(L)[:,None]
out = np.full(A.shape, np.nan)
out[keep[:,::-1]] = A[keep]
print(out)
Output:
[[ 12. 412. 42. 54.]
[ 42. 4. nan nan]
[ 43. 22. 10. nan]]
My guess would be that a vectorized approach for this would be less efficient than explicit looping, because the result is fundamentally a jagged array, which NumPy does not support well.
However, a loop-based solution is simple, that can be made faster with Numba's nb.njit(), if needed.:
import numpy as np
import numba as nb
#nb.njit
def jag_nb(arr, starts, empty=np.nan):
result = np.full(arr.shape, empty)
for i, x in enumerate(starts):
if x != 0:
result[i, :-x] = arr[i, x:]
else:
result[i, :] = arr[i, :]
return result
A = np.array([[12,412,42,54], [144,2,42,4], [2,43,22,10]])
L = np.array([0,2,1])
jag(A, L)
# array([[ 12., 412., 42., 54.],
# [ 42., 4., nan, nan],
# [ 43., 22., 10., nan]])
Compared to the pure NumPy vectorized approach proposed in #QuangHoang's answer:
def jag_np(arr, starts, empty=np.nan):
m, _ = arr.shape
keep = np.arange(m) >= starts[:, None]
result = np.full(arr.shape, np.nan)
result[keep[:, ::-1]] = arr[keep]
return result
The Numba based approach is noticeably faster, as shown with the following benchmarks:
import pandas as pd
import matplotlib.pyplot as plt
def benchmark(
funcs,
ii=range(4, 10, 1),
is_equal=lambda x, y: np.allclose(x, y, equal_nan=True),
seed=0,
unit="ms",
verbose=True,
use_str=True
):
labels = [func.__name__ for func in funcs]
units = {"s": 0, "ms": 3, "µs": 6, "ns": 9}
assert unit in units
np.random.seed(seed)
timings = {}
for i in ii:
m = n = 2 ** i
if verbose:
print(f"i={i}, n={n}")
arr = np.random.random((m, n))
starts = np.random.randint(0, n, m)
base = funcs[0](arr, starts)
timings[n] = []
for func in funcs:
res = func(arr, starts)
is_good = is_equal(base, res)
timed = %timeit -n 64 -r 8 -q -o func(arr, starts)
timing = timed.best
timings[n].append(timing if is_good else None)
if verbose:
print(
f"{func.__name__:>24}"
f" {is_good!s:5}"
f" {timing * (10 ** units[unit]):10.3f} {unit}"
f" {timings[n][0] / timing:5.1f}x")
return timings, labels
def plot(timings, labels, title=None, xlabel="Input Size / #", unit="ms"):
n_rows = 1
n_cols = 3
fig, axs = plt.subplots(n_rows, n_cols, figsize=(8 * n_cols, 6 * n_rows), squeeze=False)
units = {"s": 0, "ms": 3, "µs": 6, "ns": 9}
df = pd.DataFrame(data=timings, index=labels).transpose()
base = df[[labels[0]]].to_numpy()
(df * 10 ** units[unit]).plot(marker="o", xlabel=xlabel, ylabel=f"Best timing / {unit}", ax=axs[0, 0])
(df / base * 100).plot(marker='o', xlabel=xlabel, ylabel='Relative speed / %', logx=True, ax=axs[0, 1])
(base / df).plot(marker='o', xlabel=xlabel, ylabel='Speed Gain / x', ax=axs[0, 2])
if title:
fig.suptitle(title)
fig.patch.set_facecolor('white')
funcs = jag_np, jag_nb
timings, labels = benchmark(funcs, ii=range(4, 11))
plot(timings, labels, unit="ms")

Is it possible to find all integer solutions?

I wanna get all integer solutions in a limited time, is it possible?
This is a linear, integer constraint satisfaction problem, which can be solved efficiently by OR Tools' CP-SAT. I've modified their example to solve your problem in Python:
from ortools.sat.python import cp_model
class VarArraySolutionPrinter(cp_model.CpSolverSolutionCallback):
"""Print intermediate solutions."""
def __init__(self, variables):
cp_model.CpSolverSolutionCallback.__init__(self)
self.__variables = variables
self.__solution_count = 0
def on_solution_callback(self):
self.__solution_count += 1
for v in self.__variables:
print('%s=%i' % (v, self.Value(v)), end=' ')
print()
def solution_count(self):
return self.__solution_count
def SearchForAllSolutionsSampleSat():
"""Showcases calling the solver to search for all solutions."""
# Creates the model.
model = cp_model.CpModel()
p = [1, 2, 3, 4]
ceq = 30
cgeq = 2
N = len(p)
# Creates the variables
x = [model.NewIntVar(0, 100, f'x{i}') for i in range(N)]
# Create the constraints.
model.Add(sum([xi*pi for xi, pi in zip(x, p)]) == ceq)
model.Add(sum(x) >= cgeq)
# Create a solver and solve.
solver = cp_model.CpSolver()
solution_printer = VarArraySolutionPrinter(x)
status = solver.SearchForAllSolutions(model, solution_printer)
print('Status = %s' % solver.StatusName(status))
print('Number of solutions found: %i' % solution_printer.solution_count())
SearchForAllSolutionsSampleSat()

write a recursive function to find hermite polynomials

# I have the recursive relationship of the Hermite Polynomials:
Hn+1(x)=2xHn(x)−2nHn−1(x), n≥1,
H0(x)=1, H1(x)=2x.
I need to write def hermite(x,n) for any hermite polynomial Hn(x) using python 2.7
and make a plot of H5(x) on the interval x∈[−1,1].
Recursion is trivial here since the formula gives it. Just a small trap: you compute Hn(x), not Hn+1(x) so substract 1 to all n occurrences:
def hermite(x,n):
if n==0:
return 1
elif n==1:
return 2*x
else:
return 2*x*hermite(x,n-1)-2*(n-1)*hermite(x,n-2)
small test:
for i in range(0,5):
print(hermite(1,i))
1
2
2
-4
-20
import math
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import hermite
def HERMITE(X,N):
HER = hermite(N)
sn = HER(X)
return sn
xvals = np.linspace(-1.0,1.0,1000)
for n in np.arange(0,7,1):
sol = HERMITE(xvals,n)
plt.plot(xvals,sol,"-.",label = "n = " + str(n),linewidth=2)
plt.xticks(fontsize=14,fontweight="bold")
plt.yticks(fontsize=14,fontweight="bold")
plt.grid()
plt.legend()
plt.show()
import math
import numpy as np
import matplotlib.pyplot as plt
def HER(x,n):
if n==0:
return 1.0 + 0.0*x
elif n==1:
return 2.0*x
else:
return 2.0*x*HER(x,n-1) -2.0*(n-1)*HER(x,n-2)
xvals = np.linspace(-np.pi,np.pi,1000)
for N in np.arange(0,7,1):
sol = HER(xvals,N)
plt.plot(xvals,sol,label = "n = " + str(N))
plt.xticks(fontsize=14,fontweight="bold")
plt.yticks(fontsize=14,fontweight="bold")
plt.grid()
plt.legend()
plt.show()
#Code is working perfectly fine
Feel free to ask any question...

Filling Value of a Pandas Data Frame From a Large DB Query (Python)

I am running a snippet of code that queries a database and then fills in a pandas dataframe with a value of 1 if that tuple is present in the query. it does this by running the query then iterates over the tuples and fills in the dataframe. However, the query returns almost 8 million rows of data.
My question is if anyone knows how to speed up a process like this. Here is the code below:
user_age = pd.read_sql_query(sql_age, datastore, index_col=['userid']).age.astype(np.int, copy=False)
x = pd.DataFrame(0, index=user_age.index, columns=range(366), dtype=np.int8)
for r in pd.read_sql_query(sql_active, datastore, chunksize=50000):
for userid, day in r.itertuples(index=False):
x.at[userid, day] = 1
Thank you in advance!
You could save some time by replacing the Python loop
for userid, day in r.itertuples(index=False):
x.at[userid, day] = 1
with a NumPy array assignment using "advanced integer indexing":
x[npidx[r['userid']], r['day']] = 1
On a 80000-row DataFrame, using_numpy (below) is about 6x faster:
In [7]: %timeit orig()
1 loop, best of 3: 984 ms per loop
In [8]: %timeit using_numpy()
10 loops, best of 3: 162 ms per loop
import numpy as np
import pandas as pd
def mock_read_sql_query():
np.random.seed(2016)
for arr in np.array_split(index, N//M):
size = len(arr)
df = pd.DataFrame({'userid':arr , 'day':np.random.randint(366, size=size)})
df = df[['userid', 'day']]
yield df
N, M = 8*10**4, 5*10**2
index = np.arange(N)
np.random.shuffle(index)
columns = range(366)
def using_numpy():
npidx = np.empty_like(index)
npidx[index] = np.arange(len(index))
x = np.zeros((len(index), len(columns)), dtype=np.int8)
for r in mock_read_sql_query():
x[npidx[r['userid']], r['day']] = 1
x = pd.DataFrame(x, columns=columns, index=index)
return x
def orig():
x = pd.DataFrame(0, index=index, columns=columns, dtype=np.int8)
for r in mock_read_sql_query():
for userid, day in r.itertuples(index=False):
x.at[userid, day] = 1
return x
expected = orig()
result = using_numpy()
expected_index, expected_col = np.where(expected)
result_index, result_col = np.where(result)
assert np.equal(expected_index, result_index).all()
assert np.equal(expected_col, result_col).all()

Construct a matrix in openCV based on a given matrix

I am writing some code using the OpenCV library in Python. In the process, I need to construct a matrix based on another matrix given. Now my code looks like the following:
for x in range(0, width):
for y in range(0, height):
if I_mat[x][y]>=0 and I_mat[x][y]<=c_low:
w_mat[x][y] = float(I_mat[x][y])/c_low
elif I_mat[x][y]>c_low and I_mat[x][y]<c_high:
w_mat[x][y] = 1
else:
w_mat[x][y] = float((255-I_mat[x][y]))/float((255-c_high))
where, I_mat is the input matrix and w_mat is the matrix I am going to construct. Since the input matrix is quite large, this algorithm is quite slow. I wonder if there are any other methods to construct w_mat more efficiently. Thank a lot!
(It is not necessary to show the solution in Python.)
edit:you might want to use numba
import numpy as np
import timeit
from numba import void,jit
c_low = .3
c_high = .6
def func(val):
if val>=0 and val<=c_low:
return float(val)/c_low
elif val>c_low and val<c_high:
return 1.
else:
return (255.-val)/(255.-c_high)
def npvectorize():
global w_mat
vfunc = np.vectorize(func)
w_mat = vfunc(I_mat)
def orig():
for x in range(I_mat.shape[0]):
for y in range(I_mat.shape[1]):
if I_mat[x][y]>=0 and I_mat[x][y]<=c_low:
w_mat[x][y] = float(I_mat[x][y])/c_low
elif I_mat[x][y]>c_low and I_mat[x][y]<c_high:
w_mat[x][y] = 1
else:
w_mat[x][y] = float((255-I_mat[x][y]))/float((255-c_high))
I_mat = np.array(np.random.random((1000,1000)), dtype = np.float)
w_mat = np.empty_like(I_mat)
fast = jit(void(),nopython=True)(orig)
print timeit.Timer(fast).timeit(1)
print timeit.Timer(npvectorize).timeit(1)
print timeit.Timer(orig).timeit(1)
output:
0.0352660446331
0.472590475098
4.78634474265