I have a table, represented by an np.array like the following:
A = [[12,412,42,54],
[144,2,42,4],
[2,43,22,10]]
And a list that contains the desired starting point of each row in A:
L=[0,2,1]
The desired output would be:
B = [[12,412,42,54],
[42,4,np.nan,np.nan],
[43,22,10,np.nan]]
Edit
I prefer to avoid using a for-loop for obvious reasons.
Try compare the L with column index, then use boolean set/get items:
# convert A to numpy array for advanced indexing
A = np.array(A)
ll = A.shape[1]
keep = np.arange(ll) >= np.array(L)[:,None]
out = np.full(A.shape, np.nan)
out[keep[:,::-1]] = A[keep]
print(out)
Output:
[[ 12. 412. 42. 54.]
[ 42. 4. nan nan]
[ 43. 22. 10. nan]]
My guess would be that a vectorized approach for this would be less efficient than explicit looping, because the result is fundamentally a jagged array, which NumPy does not support well.
However, a loop-based solution is simple, that can be made faster with Numba's nb.njit(), if needed.:
import numpy as np
import numba as nb
#nb.njit
def jag_nb(arr, starts, empty=np.nan):
result = np.full(arr.shape, empty)
for i, x in enumerate(starts):
if x != 0:
result[i, :-x] = arr[i, x:]
else:
result[i, :] = arr[i, :]
return result
A = np.array([[12,412,42,54], [144,2,42,4], [2,43,22,10]])
L = np.array([0,2,1])
jag(A, L)
# array([[ 12., 412., 42., 54.],
# [ 42., 4., nan, nan],
# [ 43., 22., 10., nan]])
Compared to the pure NumPy vectorized approach proposed in #QuangHoang's answer:
def jag_np(arr, starts, empty=np.nan):
m, _ = arr.shape
keep = np.arange(m) >= starts[:, None]
result = np.full(arr.shape, np.nan)
result[keep[:, ::-1]] = arr[keep]
return result
The Numba based approach is noticeably faster, as shown with the following benchmarks:
import pandas as pd
import matplotlib.pyplot as plt
def benchmark(
funcs,
ii=range(4, 10, 1),
is_equal=lambda x, y: np.allclose(x, y, equal_nan=True),
seed=0,
unit="ms",
verbose=True,
use_str=True
):
labels = [func.__name__ for func in funcs]
units = {"s": 0, "ms": 3, "µs": 6, "ns": 9}
assert unit in units
np.random.seed(seed)
timings = {}
for i in ii:
m = n = 2 ** i
if verbose:
print(f"i={i}, n={n}")
arr = np.random.random((m, n))
starts = np.random.randint(0, n, m)
base = funcs[0](arr, starts)
timings[n] = []
for func in funcs:
res = func(arr, starts)
is_good = is_equal(base, res)
timed = %timeit -n 64 -r 8 -q -o func(arr, starts)
timing = timed.best
timings[n].append(timing if is_good else None)
if verbose:
print(
f"{func.__name__:>24}"
f" {is_good!s:5}"
f" {timing * (10 ** units[unit]):10.3f} {unit}"
f" {timings[n][0] / timing:5.1f}x")
return timings, labels
def plot(timings, labels, title=None, xlabel="Input Size / #", unit="ms"):
n_rows = 1
n_cols = 3
fig, axs = plt.subplots(n_rows, n_cols, figsize=(8 * n_cols, 6 * n_rows), squeeze=False)
units = {"s": 0, "ms": 3, "µs": 6, "ns": 9}
df = pd.DataFrame(data=timings, index=labels).transpose()
base = df[[labels[0]]].to_numpy()
(df * 10 ** units[unit]).plot(marker="o", xlabel=xlabel, ylabel=f"Best timing / {unit}", ax=axs[0, 0])
(df / base * 100).plot(marker='o', xlabel=xlabel, ylabel='Relative speed / %', logx=True, ax=axs[0, 1])
(base / df).plot(marker='o', xlabel=xlabel, ylabel='Speed Gain / x', ax=axs[0, 2])
if title:
fig.suptitle(title)
fig.patch.set_facecolor('white')
funcs = jag_np, jag_nb
timings, labels = benchmark(funcs, ii=range(4, 11))
plot(timings, labels, unit="ms")
I am trying to write the basic Newton's method without pre-built solvers.
This is the function:
## definition of variables
x_1, x_2 = sym.symbols("x_1 x_2")
a_T=np.array([[0.3],[0.6],[0.2]])
b_T=np.array([5,26,3])
c_T=np.array([40,1,10])
u= x_1-0.8
v= x_2-(a_T[0]+a_T[1]*u**2*(1-u)**(1/2)-a_T[2]*u)
alpha= -b_T[0]+(b_T[1]*u**2)*(1+u)**(1/2)+(b_T[2])*u
beta= c_T[0]*v**2*(1-c_T[1]*v)/(1+c_T[2]*u**2)
## function
f = alpha**(-beta)
I calculated the gradient and the Hessian and defined the other parameters:
## gradient
gradient_cal = sym.Matrix(1,2,sym.derive_by_array(f, (x_1, x_2)))
## hessian
hessian_cal = sym.Matrix(2, 2, sym.derive_by_array(gradient_cal, (x_1, x_2)))
# initial guess
x_A= Matrix([[1],[0.5]])
xk = x_A
#tolerance
epsilon= 1e-10
#maximum iterations
max_iter=100
And the function itself:
def newton(gradient_cal,hessian_cal,xk,epsilon,max_iter):
for k in range(0,max_iter):
fxk = gradient_cal.evalf(subs={x_1:xk[0], x_2:xk[1]})
if fxk.norm() < epsilon:
print('Found solution after',k,'iterations.')
return xk
Dfxk = hessian_cal.evalf(subs={x_1: xk[0], x_2: xk[1]})
if Dfxk == 0:
print('Zero derivative. No solution found.')
return None
A=hessian_cal.evalf(subs={x_1: xk[0], x_2: xk[1]})
B=gradient_cal.evalf(subs={x_1: xk[0], x_2: xk[1]})
pk= (A.inv().dot(B))
xk = np.subtract(xk, pk)
print('Exceeded maximum iterations. No solution found.')
return None
approx = newton(gradient_cal,hessian_cal,x_A,epsilon,max_iter)
print(approx)
The following error shows up:
TypeError: Shape should contain integers only.
I checked it and saw that the Hessian contains "I" values. Therefore I am not sure if the calculations of the gradient and the Hessian are correct.
Does anyone have a better solution to calculate the gradient and the Hessian for such a complex function?
The jacobian-batteries are already included in SymPy:
>>> from sympy.abc import x, y
>>> f = x/y + x*y**2
>>> Matrix([f]).jacobian((x,y))
Matrix([[y**2 + 1/y, 2*x*y - x/y**2]])
>>> _.jacobian((x,y)) # Hessian
Matrix([
[ 0, 2*y - 1/y**2],
[2*y - 1/y**2, 2*x + 2*x/y**3]])
So you could try
x_1, x_2 = sym.symbols("x_1 x_2")
xx = x_1, x_2
a_T=[0.3,0.6,0.2]
b_T=[5,26,3]
c_T=[40,1,10]
u= x_1-0.8
v= x_2-(a_T[0]+a_T[1]*u**2*(1-u)**(1/2)-a_T[2]*u)
alpha= -b_T[0]+(b_T[1]*u**2)*(1+u)**(1/2)+(b_T[2])*u
beta= c_T[0]*v**2*(1-c_T[1]*v)/(1+c_T[2]*u**2)
## function
f = alpha**(-beta)
jac = Matrix([f]).jacobian(xx)
hes = jac.jacobian(xx)
Here is my original function.
def f(n):
if n<0:
print("Error.Bad input")
elif n==0:
return 1
elif n==1:
return 1
else:
return f(n-1)+f(n-2)
Is it possible to modify Fibonacci Python function so that not only does it have to calculate f(n) but also it prints f(0), f(1), f(2),....,f(n) in the function?
Instead of using a recursive function you could do this:
def f(n):
if n < 0:
print("Error.Bad input")
elif n <= 1:
return 1
else:
result = [1, 2]
print(1)
print(2)
for i in range(n - 2):
result = [result[1], result[0] + result[1]]
print(result[1])
return result[1]
def fib(n):
pred, curr = 0, 1
k = 1
while (k<n):
pred, curr = curr, curr + pred
print(curr)
k = k + 1
Probably the optimal "pythonic" way to create a sequence (a list) of fibonacci numbers of arbitraly size n is to define a function which does so, than to call that function with size as an input argument. For example:
def fibonacci_sequence(n):
x = [0, 1]
[x.append(x[-1] + x[-2]) for i in range(n - len(x))]
return x
fibonacci_sequence(15)
The result is:
[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377]
Bottom-up approach. Complexity is O(n):
def fib(n):
if n in (1, 2):
return 1
i = z = 1
for _ in range(3, n+1):
z, i = i + z, z
return z
I have a list of integers which I need to parse into a string of ranges.
For example:
[0, 1, 2, 3] -> "0-3"
[0, 1, 2, 4, 8] -> "0-2,4,8"
And so on.
I'm still learning more pythonic ways of handling lists, and this one is a bit difficult for me. My latest thought was to create a list of lists which keeps track of paired numbers:
[ [0, 3], [4, 4], [5, 9], [20, 20] ]
I could then iterate across this structure, printing each sub-list as either a range, or a single value.
I don't like doing this in two iterations, but I can't seem to keep track of each number within each iteration. My thought would be to do something like this:
Here's my most recent attempt. It works, but I'm not fully satisfied; I keep thinking there's a more elegant solution which completely escapes me. The string-handling iteration isn't the nicest, I know -- it's pretty early in the morning for me :)
def createRangeString(zones):
rangeIdx = 0
ranges = [[zones[0], zones[0]]]
for zone in list(zones):
if ranges[rangeIdx][1] in (zone, zone-1):
ranges[rangeIdx][1] = zone
else:
ranges.append([zone, zone])
rangeIdx += 1
rangeStr = ""
for range in ranges:
if range[0] != range[1]:
rangeStr = "%s,%d-%d" % (rangeStr, range[0], range[1])
else:
rangeStr = "%s,%d" % (rangeStr, range[0])
return rangeStr[1:]
Is there a straightforward way I can merge this into a single iteration? What else could I do to make it more Pythonic?
>>> from itertools import count, groupby
>>> L=[1, 2, 3, 4, 6, 7, 8, 9, 12, 13, 19, 20, 22, 23, 40, 44]
>>> G=(list(x) for _,x in groupby(L, lambda x,c=count(): next(c)-x))
>>> print ",".join("-".join(map(str,(g[0],g[-1])[:len(g)])) for g in G)
1-4,6-9,12-13,19-20,22-23,40,44
The idea here is to pair each element with count(). Then the difference between the value and count() is constant for consecutive values. groupby() does the rest of the work
As Jeff suggests, an alternative to count() is to use enumerate(). This adds some extra cruft that needs to be stripped out in the print statement
G=(list(x) for _,x in groupby(enumerate(L), lambda (i,x):i-x))
print ",".join("-".join(map(str,(g[0][1],g[-1][1])[:len(g)])) for g in G)
Update: for the sample list given here, the version with enumerate runs about 5% slower than the version using count() on my computer
Whether this is pythonic is up for debate. But it is very compact. The real meat is in the Rangify() function. There's still room for improvement if you want efficiency or Pythonism.
def CreateRangeString(zones):
#assuming sorted and distinct
deltas = [a-b for a, b in zip(zones[1:], zones[:-1])]
deltas.append(-1)
def Rangify((b, p), (z, d)):
if p is not None:
if d == 1: return (b, p)
b.append('%d-%d'%(p,z))
return (b, None)
else:
if d == 1: return (b, z)
b.append(str(z))
return (b, None)
return ','.join(reduce(Rangify, zip(zones, deltas), ([], None))[0])
To describe the parameters:
deltas is the distance to the next value (inspired from an answer here on SO)
Rangify() does the reduction on these parameters
b - base or accumulator
p - previous start range
z - zone number
d - delta
To concatenate strings you should use ','.join. This removes the 2nd loop.
def createRangeString(zones):
rangeIdx = 0
ranges = [[zones[0], zones[0]]]
for zone in list(zones):
if ranges[rangeIdx][1] in (zone, zone-1):
ranges[rangeIdx][1] = zone
else:
ranges.append([zone, zone])
rangeIdx += 1
return ','.join(
map(
lambda p: '%s-%s'%tuple(p) if p[0] != p[1] else str(p[0]),
ranges
)
)
Although I prefer a more generic approach:
from itertools import groupby
# auxiliary functor to allow groupby to compare by adjacent elements.
class cmp_to_groupby_key(object):
def __init__(self, f):
self.f = f
self.uninitialized = True
def __call__(self, newv):
if self.uninitialized or not self.f(self.oldv, newv):
self.curkey = newv
self.uninitialized = False
self.oldv = newv
return self.curkey
# returns the first and last element of an iterable with O(1) memory.
def first_and_last(iterable):
first = next(iterable)
last = first
for i in iterable:
last = i
return (first, last)
# convert groups into list of range strings
def create_range_string_from_groups(groups):
for _, g in groups:
first, last = first_and_last(g)
if first != last:
yield "{0}-{1}".format(first, last)
else:
yield str(first)
def create_range_string(zones):
groups = groupby(zones, cmp_to_groupby_key(lambda a,b: b-a<=1))
return ','.join(create_range_string_from_groups(groups))
assert create_range_string([0,1,2,3]) == '0-3'
assert create_range_string([0, 1, 2, 4, 8]) == '0-2,4,8'
assert create_range_string([1,2,3,4,6,7,8,9,12,13,19,20,22,22,22,23,40,44]) == '1-4,6-9,12-13,19-20,22-23,40,44'
This is more verbose, mainly because I have used generic functions that I have and that are minor variations of itertools functions and recipes:
from itertools import tee, izip_longest
def pairwise_longest(iterable):
"variation of pairwise in http://docs.python.org/library/itertools.html#recipes"
a, b = tee(iterable)
next(b, None)
return izip_longest(a, b)
def takeuntil(predicate, iterable):
"""returns all elements before and including the one for which the predicate is true
variation of http://docs.python.org/library/itertools.html#itertools.takewhile"""
for x in iterable:
yield x
if predicate(x):
break
def get_range(it):
"gets a range from a pairwise iterator"
rng = list(takeuntil(lambda (a,b): (b is None) or (b-a>1), it))
if rng:
b, e = rng[0][0], rng[-1][0]
return "%d-%d" % (b,e) if b != e else "%d" % b
def create_ranges(zones):
it = pairwise_longest(zones)
return ",".join(iter(lambda:get_range(it),None))
k=[0,1,2,4,5,7,9,12,13,14,15]
print create_ranges(k) #0-2,4-5,7,9,12-15
def createRangeString(zones):
"""Create a string with integer ranges in the format of '%d-%d'
>>> createRangeString([0, 1, 2, 4, 8])
"0-2,4,8"
>>> createRangeString([1,2,3,4,6,7,8,9,12,13,19,20,22,22,22,23,40,44])
"1-4,6-9,12-13,19-20,22-23,40,44"
"""
buffer = []
try:
st = ed = zones[0]
for i in zones[1:]:
delta = i - ed
if delta == 1: ed = i
elif not (delta == 0):
buffer.append((st, ed))
st = ed = i
else: buffer.append((st, ed))
except IndexError:
pass
return ','.join(
"%d" % st if st==ed else "%d-%d" % (st, ed)
for st, ed in buffer)
Here is my solution. You need to keep track of various pieces of information while you iterate through the list and create the result - this screams generator to me. So here goes:
def rangeStr(start, end):
'''convert two integers into a range start-end, or a single value if they are the same'''
return str(start) if start == end else "%s-%s" %(start, end)
def makeRange(seq):
'''take a sequence of ints and return a sequence
of strings with the ranges
'''
# make sure that seq is an iterator
seq = iter(seq)
start = seq.next()
current = start
for val in seq:
current += 1
if val != current:
yield rangeStr(start, current-1)
start = current = val
# make sure the last range is included in the output
yield rangeStr(start, current)
def stringifyRanges(seq):
return ','.join(makeRange(seq))
>>> l = [1,2,3, 7,8,9, 11, 20,21,22,23]
>>> l2 = [1,2,3, 7,8,9, 11, 20,21,22,23, 30]
>>> stringifyRanges(l)
'1-3,7-9,11,20-23'
>>> stringifyRanges(l2)
'1-3,7-9,11,20-23,30'
My version will work correctly if given an empty list, which I think some of the others will not.
>>> stringifyRanges( [] )
''
makeRanges will work on any iterator that returns integers and lazily returns a sequence of strings so can be used on infinite sequences.
edit: I have updated the code to handle single numbers that are not part of a range.
edit2: refactored out rangeStr to remove duplication.
how about this mess...
def rangefy(mylist):
mylist, mystr, start = mylist + [None], "", 0
for i, v in enumerate(mylist[:-1]):
if mylist[i+1] != v + 1:
mystr += ["%d,"%v,"%d-%d,"%(start,v)][start!=v]
start = mylist[i+1]
return mystr[:-1]