Python - Error using linprog ("Invalid input for linprog: provide a 3 x 2 array for bounds, not a 2 x 3 array") - linear-programming

I am trying to use the linprog in python to solve this problem:
# Minimize = (0.035*x1) + (0.015*x2) + (0.025*x3)
# x1+x2+x3=1.2
# 0<=x1<=0.7
# 0<=x2<=0.3
# 0<=x3<=0.5
c = [0.035, 0.015, 0.025] #objective function
A_eq = [[1, 1, 1]]
b = [1.2]
lb = (0, 0, 0)
up = (0.7, 0.3, 0.5)
from scipy.optimize import linprog
linprog(c, A_ub=None, b_ub=None, A_eq=A_eq, b_eq=b, bounds=[lb,up], method='interior-point', callback=None, options=None, x0=None)
However I am getting an error could you help me with that?
thanks a lot!

You sholud define correctly the bounds for each variable in the same order as the coefficients. In this case, they’re between zero and some number:
# Minimize = (0.035*x1) + (0.015*x2) + (0.025*x3)
# x1+x2+x3=1.2
# 0<=x1<=0.7
# 0<=x2<=0.3
# 0<=x3<=0.5
c = [0.035, 0.015, 0.025] #objective function
A_eq = [[1, 1, 1]]
b = [1.2]
x1_b = (0, 0.7)
x2_b = (0, 0.3)
x3_b = (0, 0.5)
from scipy.optimize import linprog
linprog(c, A_ub=None, b_ub=None, A_eq=A_eq, b_eq=b, bounds=[x1_b, x2_b,x3_b], method='interior-point', callback=None, options=None, x0=None)

Related

trim np arrays according to a list of starting points

I have a table, represented by an np.array like the following:
A = [[12,412,42,54],
[144,2,42,4],
[2,43,22,10]]
And a list that contains the desired starting point of each row in A:
L=[0,2,1]
The desired output would be:
B = [[12,412,42,54],
[42,4,np.nan,np.nan],
[43,22,10,np.nan]]
Edit
I prefer to avoid using a for-loop for obvious reasons.
Try compare the L with column index, then use boolean set/get items:
# convert A to numpy array for advanced indexing
A = np.array(A)
ll = A.shape[1]
keep = np.arange(ll) >= np.array(L)[:,None]
out = np.full(A.shape, np.nan)
out[keep[:,::-1]] = A[keep]
print(out)
Output:
[[ 12. 412. 42. 54.]
[ 42. 4. nan nan]
[ 43. 22. 10. nan]]
My guess would be that a vectorized approach for this would be less efficient than explicit looping, because the result is fundamentally a jagged array, which NumPy does not support well.
However, a loop-based solution is simple, that can be made faster with Numba's nb.njit(), if needed.:
import numpy as np
import numba as nb
#nb.njit
def jag_nb(arr, starts, empty=np.nan):
result = np.full(arr.shape, empty)
for i, x in enumerate(starts):
if x != 0:
result[i, :-x] = arr[i, x:]
else:
result[i, :] = arr[i, :]
return result
A = np.array([[12,412,42,54], [144,2,42,4], [2,43,22,10]])
L = np.array([0,2,1])
jag(A, L)
# array([[ 12., 412., 42., 54.],
# [ 42., 4., nan, nan],
# [ 43., 22., 10., nan]])
Compared to the pure NumPy vectorized approach proposed in #QuangHoang's answer:
def jag_np(arr, starts, empty=np.nan):
m, _ = arr.shape
keep = np.arange(m) >= starts[:, None]
result = np.full(arr.shape, np.nan)
result[keep[:, ::-1]] = arr[keep]
return result
The Numba based approach is noticeably faster, as shown with the following benchmarks:
import pandas as pd
import matplotlib.pyplot as plt
def benchmark(
funcs,
ii=range(4, 10, 1),
is_equal=lambda x, y: np.allclose(x, y, equal_nan=True),
seed=0,
unit="ms",
verbose=True,
use_str=True
):
labels = [func.__name__ for func in funcs]
units = {"s": 0, "ms": 3, "µs": 6, "ns": 9}
assert unit in units
np.random.seed(seed)
timings = {}
for i in ii:
m = n = 2 ** i
if verbose:
print(f"i={i}, n={n}")
arr = np.random.random((m, n))
starts = np.random.randint(0, n, m)
base = funcs[0](arr, starts)
timings[n] = []
for func in funcs:
res = func(arr, starts)
is_good = is_equal(base, res)
timed = %timeit -n 64 -r 8 -q -o func(arr, starts)
timing = timed.best
timings[n].append(timing if is_good else None)
if verbose:
print(
f"{func.__name__:>24}"
f" {is_good!s:5}"
f" {timing * (10 ** units[unit]):10.3f} {unit}"
f" {timings[n][0] / timing:5.1f}x")
return timings, labels
def plot(timings, labels, title=None, xlabel="Input Size / #", unit="ms"):
n_rows = 1
n_cols = 3
fig, axs = plt.subplots(n_rows, n_cols, figsize=(8 * n_cols, 6 * n_rows), squeeze=False)
units = {"s": 0, "ms": 3, "µs": 6, "ns": 9}
df = pd.DataFrame(data=timings, index=labels).transpose()
base = df[[labels[0]]].to_numpy()
(df * 10 ** units[unit]).plot(marker="o", xlabel=xlabel, ylabel=f"Best timing / {unit}", ax=axs[0, 0])
(df / base * 100).plot(marker='o', xlabel=xlabel, ylabel='Relative speed / %', logx=True, ax=axs[0, 1])
(base / df).plot(marker='o', xlabel=xlabel, ylabel='Speed Gain / x', ax=axs[0, 2])
if title:
fig.suptitle(title)
fig.patch.set_facecolor('white')
funcs = jag_np, jag_nb
timings, labels = benchmark(funcs, ii=range(4, 11))
plot(timings, labels, unit="ms")

How to set parameters for lightgbm when using customized objective function for multi-class classification?

I want to test a customized objective function for lightgbm in multi-class classification.
I have specified the parameter "num_class=3".
However, an error: "
Number of classes must be 1 for non-multiclass training" is thrown
I am using python 3.6 and lightgbm version 0.2
# iris data
from sklearn import datasets
import lightgbm as lgb
import numpy as np
iris = datasets.load_iris()
X = iris['data']
y = iris['target']
# construct train-test
num_train = int(X.shape[0] / 3 * 2)
idx = np.random.permutation(X.shape[0])
x_train = X[idx[:num_train]]
x_test = X[idx[num_train:]]
y_train = y[idx[:num_train]]
y_test = y[idx[num_train:]]
# softmax function
def softmax(x):
'''
input x: an np.array of n_sample * n_class
return : an np.array of n_sample * n_class (probabilities)
'''
x = np.where(x>100, 100, x)
x = np.exp(x)
return x / np.reshape(np.sum(x, 1), [x.shape[0], 1])
# objective function
def objective(y_true, y_pred):
'''
input:
y_true: np.array of size (n_sample,)
y_pred: np.array of size (n_sample, n_class)
'''
y_pred = softmax(y_pred)
temp = np.zeros_like(y_pred)
temp[range(y_pred.shape[0]), y_true] = 1
gradient = y_pred - temp
hessian = y_pred * (1 - y_pred)
return [gradient, hessian]
# lightgbm model
model = lgb.LGBMClassifier(n_estimators=10000,
num_classes = 3,
objective = objective,
nthread=4)
model.fit(x_train, y_train,
eval_metric = 'multi_logloss',
eval_set = [(x_test, y_test), (x_train, y_train)],
eval_names = ['valid', 'train'],
early_stopping_rounds = 200, verbose = 100)
Let me answer my own question.
The arguments in the objective function should be:
y_true of size [n_sample, ]
y_pred of size [n_sample * n_class, ] instead of [n_sample, n_class]
To be more specific, y_pred should be like
y_pred = [first_class, first_class,..., second_class, second_class,..., third_class, third_class,...]
Moreover, gradient and hessian should be grouped in the same way.
def objective(y_true, y_pred):
'''
input:
y_true: np.array of size [n_sample,]
y_pred: np.array of size [n_sample * n_class, ]
return:
gradient and hessian should have exactly the same form of y_pred
'''
y_pred = np.reshape(y_pred, [num_train, 3], order = 'F')
y_pred = softmax(y_pred)
temp = np.zeros_like(y_pred)
temp[range(y_pred.shape[0]), y_true] = 1
gradient = y_pred - temp
hessian = y_pred * (1 - y_pred)
return [gradient.ravel(order = 'F'), hessian.ravel(order = 'F')]

ValueError on tensorflow while_loop shape invariants

import tensorflow as tf
cluster_size = tf.constant(6) # size of the cluster
m = tf.constant(6) # number of contigs (column size)
n = tf.constant(3) # number of points in a single contigs (column size)
contigs_index = tf.reshape(tf.range(0, m, 1, dtype=tf.int32), [1, -1])
contigs = tf.constant(
[[1.1, 2.2, 3.3], [6.6, 5.5, 4.4], [7.7, 8.8, 9.9], [11.1, 22.2, 33.3],
[66.6, 55.5, 44.4], [77.7, 88.8, 99.9]])
# pad zeo to the right till fixed length
def rpad_with_zero(points):
points = tf.slice(tf.pad(points, tf.reshape(tf.concat(
[tf.zeros([1, 2], tf.int32), tf.add(
tf.zeros([1, 2], tf.int32),
tf.subtract(cluster_size, tf.size(points)))], 0), [2, -1]), "CONSTANT"),
(0, tf.subtract(cluster_size, tf.size(points))),
(1, cluster_size))
return points
#calculate pearson correlation coefficient r value
def calculate_pcc(row, contigs):
r = tf.divide(tf.subtract(
tf.multiply(tf.to_float(n), tf.reduce_sum(tf.multiply(row, contigs), 1)),
tf.multiply(tf.reduce_sum(row, 1), tf.reduce_sum(contigs, 1))),
tf.multiply(
tf.sqrt(tf.subtract(
tf.multiply(tf.to_float(n), tf.reduce_sum(tf.square(row), 1)),
tf.square(tf.reduce_sum(row, 1)))),
tf.sqrt(tf.subtract(tf.multiply(
tf.to_float(n), tf.reduce_sum(tf.square(contigs), 1)),
tf.square(tf.reduce_sum(contigs, 1)))
)))
return r
#slice first row from contigs
row = tf.slice(contigs, (0, 0), (1, 3))
#calculate pcc
r = calculate_pcc(row, contigs)
#cluster member index whose r value is greater than 0.90, then casting to
# int32,
members0_index = tf.cast(tf.reshape(tf.where(tf.greater(r, 0.90)), [1, -1]),
tf.int32)
#members = index <intersection> members, padding the members index with
# zeros at right, to keep the fixed cluster length
members0_index = rpad_with_zero(
tf.reshape(tf.sets.set_intersection(contigs_index, members0_index).values,
[1, -1]))
#update index with the rest element index from contigs, and padding
contigs_index = rpad_with_zero(
tf.reshape(tf.sets.set_difference(contigs_index, members0_index).values,
[1, -1]))
#def condition(contigs, contigs_index, members0_index):
def condition(contigs_index, members0_index):
return tf.greater(tf.count_nonzero(contigs_index),
0) # iterate until there is a contig
#def body(contigs, contigs_index, members0_index):
def body(contigs_index, members0_index):
i = tf.reshape(tf.slice(contigs_index, [0, 0], [1, 1]),
[]) #the first element in the contigs_index
row = tf.slice(contigs, (i, 0),
(1, 3)) #slice the ith contig from contigs
r = calculate_pcc(row, contigs)
members_index = tf.cast(tf.reshape(tf.where(tf.greater(r, 0.90)), [1, -1]),
tf.int32)
members_index = rpad_with_zero(rpad_with_zero(
tf.reshape(tf.sets.set_intersection(contigs_index, members_index).values,
[1, -1])))
members0_index = tf.concat([members0_index, members_index], 0)
contigs_index = rpad_with_zero(
tf.reshape(tf.sets.set_difference(contigs_index, members_index).values,
[1, -1]))
#return [contigs, contigs_index, members0_index]
return [contigs_index, members0_index]
sess = tf.Session()
sess.run(tf.while_loop(condition, body,
#loop_vars=[contigs, contigs_index, members0_index],
loop_vars=[contigs_index, members0_index],
#shape_invariants=[contigs.get_shape(), contigs_index.get_shape(),
# tf.TensorShape([None, 6])]))
shape_invariants=[contigs_index.get_shape(), tf.TensorShape([None, 6])]))
The error is:
ValueError: The shape for while_12/Merge:0 is not an invariant for the
loop. It enters the loop with shape (1, 6), but has shape (?, ?) after
one iteration. Provide shape invariants using either the
shape_invariants argument of tf.while_loop or set_shape() on the
loop variables.
It seems the variable
contigs_index
is responsible, but i really don't know why! I unfold the loop execute each statement but could not find any shape mismatch!
shape_invariants=[contigs_index.get_shape(), tf.TensorShape([None, 6])])) should become shape_invariants=[tf.TensorShape([None, None]), tf.TensorShape([None, 6])])), to allow for shape changes of contigs_index variable (in the rpad_with_zero call).

Calculate two dimensional pairwise distance on a large numpy three dimensional array

I have a numpy array of 3 million points in the form of [pt_id, x, y, z]. The goal is to return all pairs of points that have an Euclidean distance two numbers min_d and max_d.
The Euclidean distance is between x and y and not on the z. However, I'd like to preserve the array with pt_id_from, pt_id_to, distance attributes.
I'm using scipy's dist to calculate the distances:
import scipy.spatial.distance
coords_arr = np.array([['pt1', 2452130.000, 7278106.000, 25.000],
['pt2', 2479539.000, 7287455.000, 4.900],
['pt3', 2479626.000, 7287458.000, 10.000],
['pt4', 2484097.000, 7292784.000, 8.800],
['pt5', 2484106.000, 7293079.000, 7.300],
['pt6', 2484095.000, 7292891.000, 11.100]])
dists = scipy.spatial.distance.pdist(coords_arr[:,1:3], 'euclidean')
np.savetxt('test.out', scipy.spatial.distance.squareform(dists), delimiter=',')
What should I do to return an array of form: [pt_id_from, pt_id_to, distance]?
You simply create a new array from the data by looping through all the possible combinations. The itertools module is excellent for this.
n = coords_arr.shape[0] # number of points
D = scipy.spatial.distance.squareform(dists) # distance matrix
data = []
for i, j in itertools.combinations(range(n), 2):
pt_a = coords_arr[i, 0]
pt_b = coords_arr[j, 0]
d_ab = D[i,j]
data.append([pt_a, pt_b, d_ab])
result_arr = np.array(data)
If memory is a problem, you might want to change the distance lookup from using the huge matrix D to looking up the value directly in dists using the i and j index.
Well, ['pt1', 'pt2', distance_as_number] is not exactly possible. The closest you can get with mixed datatypes is a structured array but then you can't do things like result[:2,0]. You'll have to index field names and array indices separately like: result[['a','b']][0].
Here is my solution:
import numpy as np
import scipy.spatial.distance
coords_arr = np.array([['pt1', 2452130.000, 7278106.000, 25.000],
['pt2', 2479539.000, 7287455.000, 4.900],
['pt3', 2479626.000, 7287458.000, 10.000],
['pt4', 2484097.000, 7292784.000, 8.800],
['pt5', 2484106.000, 7293079.000, 7.300],
['pt6', 2484095.000, 7292891.000, 11.100]])
dists = scipy.spatial.distance.pdist(coords_arr[:,1:3], 'euclidean')
# Create a shortcut for `coords_arr.shape[0]` which is basically
# the total amount of points, hence `n`
n = coords_arr.shape[0]
# `a` and `b` contain the indices of the points which were used to compute the
# distances in dists. In this example:
# a = [0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4]
# b = [1, 2, 3, 4, 5, 2, 3, 4, 5, 3, 4, 5, 4, 5, 5]
a = np.arange(n).repeat(np.arange(n-1, -1, -1))
b = np.hstack([range(x, n) for x in xrange(1, n)])
min_d = 1000
max_d = 10000
# Find out which distances are in range.
in_range = np.less_equal(min_d, dists) & np.less_equal(dists, max_d)
# Define the datatype of the structured array which will be the result.
dtype = [('a', '<f8', (3,)), ('b', '<f8', (3,)), ('dist', '<f8')]
# Create an empty array. We fill it later because it makes the code cleaner.
# Its size is given by the sum over `in_range` which is possible
# since True and False are equivalent to 1 and 0.
result = np.empty(np.sum(in_range), dtype=dtype)
# Fill the resulting array.
result['a'] = coords_arr[a[in_range], 1:4]
result['b'] = coords_arr[b[in_range], 1:4]
result['dist'] = dists[in_range]
print(result)
# In caste you don't want a structured array at all, this is what you can do:
result = np.hstack([coords_arr[a[in_range],1:],
coords_arr[b[in_range],1:],
dists[in_range, None]]).astype('<f8')
print(result)
The structured array:
[([2479539.0, 7287455.0, 4.9], [2484097.0, 7292784.0, 8.8], 7012.389393067102)
([2479539.0, 7287455.0, 4.9], [2484106.0, 7293079.0, 7.3], 7244.7819152821985)
([2479539.0, 7287455.0, 4.9], [2484095.0, 7292891.0, 11.1], 7092.75912462844)
([2479626.0, 7287458.0, 10.0], [2484097.0, 7292784.0, 8.8], 6953.856268287403)
([2479626.0, 7287458.0, 10.0], [2484106.0, 7293079.0, 7.3], 7187.909362255481)
([2479626.0, 7287458.0, 10.0], [2484095.0, 7292891.0, 11.1], 7034.873843929257)]
The ndarray:
[[2479539.0, 7287455.0, 4.9, 2484097.0, 7292784.0, 8.8, 7012.3893],
[2479539.0, 7287455.0, 4.9, 2484106.0, 7293079.0, 7.3, 7244.7819],
[2479539.0, 7287455.0, 4.9, 2484095.0, 7292891.0, 11.1, 7092.7591],
[2479626.0, 7287458.0, 10.0, 2484097.0, 7292784.0, 8.8, 6953.8562],
[2479626.0, 7287458.0, 10.0, 2484106.0, 7293079.0, 7.3, 7187.9093],
[2479626.0, 7287458.0, 10.0, 2484095.0, 7292891.0, 11.1, 7034.8738]]
You can use np.where to get a coords of distances within a range, then generate a new list in your format, filtering same pairs. Like this:
>>> import scipy.spatial.distance
>>> import numpy as np
>>> coords_arr = np.array([['pt1', 2452130.000, 7278106.000, 25.000],
... ['pt2', 2479539.000, 7287455.000, 4.900],
... ['pt3', 2479626.000, 7287458.000, 10.000],
... ['pt4', 2484097.000, 7292784.000, 8.800],
... ['pt5', 2484106.000, 7293079.000, 7.300],
... ['pt6', 2484095.000, 7292891.000, 11.100]])
>>>
>>> dists = scipy.spatial.distance.pdist(coords_arr[:,1:3], 'euclidean')
>>> dists = scipy.spatial.distance.squareform(dists)
>>> x, y = np.where((dists >= 8000) & (dists <= 30000))
>>> [(coords_arr[x[i]][0], coords_arr[y[i]][0], dists[y[i]][x[i]]) for i in xrange(len(x)) if x[i] < y[i]]
[('pt1', 'pt2', 28959.576688895162), ('pt1', 'pt3', 29042.897927032005)]

defined function not found

I have a script as follows:
import numpy as np
import pandas as pd
import pdb
# conventions: W = fitness, A = affinity ; sex: 1=M, 0=F; alien: 1=alien,
# 0=native
# pop array order: W, A, sex, alien
def mkpop(n):
W = np.repeat(a=1, repeats=n)
A = np.random.normal(1, 0.1, size=n)
A[A < 0] = 0
alien = np.repeat(a=False, repeats=n)
sex = np.random.randint(0, 2, n)
pop = np.array([W, A, sex, alien])
pop = np.transpose(pop)
return pop
def migrate(pop, n=10, gParams=[1, 0.1]):
W = np.random.gamma(shape=gParams[0], scale=gParams[1], size=n)
A = np.repeat(1, n)
# 0 is native; 1 is alien
alien = np.repeat(True, n)
# 0 is female
sex = np.random.randint(0, 2, n)
popAlien = np.array([W, A, sex, alien])
popAlien = np.transpose(popAlien)
pop = np.vstack((pop, popAlien))
return pop
def mate(pop):
# split into male and female
f = pop[pop[:, 2] == 0]
m = pop[pop[:, 2] == 1]
# create transition matricies for native and alien mates
# m with native = m.!alien.transpose * f.alien
# negate alien
naLog = list(np.asarray(m[:, 3]) == False)
naPdMat = np.outer(naLog, f[:, 1])
# mate with alien = m.alien.transpose * affinity
alPdMat = np.outer(m[:, 3], f[:, 1])
# add transition matrices for probability density matrix
pdMat = alPdMat + naPdMat
# transition matrix is equal to the pd matrix / column sumso
colSums = np.sum(pdMat, axis=0)
pMat = pdMat / colSums
# select mates
def choice(x):
ch = np.random.choice(a=range(0, len(x)), p=x)
return ch
mCh = np.apply_along_axis(choice, 0, pMat)
mCh = m[mCh, :]
WMid = (f[:, 0] + mCh[:, 0]) / 2
AMid = (f[:, 1] + mCh[:, 1]) / 2
# assign fitness based on group affiliation; only native/alien matings have
# modified fitness
# reassign fitness and affinity based on group id and midparent vals
W1 = np.where(
(f[:, 3] == mCh[:, 3]) |
((f[:, 3] == 1) & (mCh[:, 3] == 0))
)
WMid[W1] = 1
# number of offspring is a poisson-distributed variable with lambda=2W
nOff = map(lambda x: np.random.poisson(lam=x), 2 * WMid)
# generate offspring
# expand list of nOff to numbers of offspring per pair
# realized offspring is index posisions of W and A vals to be replicated
# for offspring
# this can be rewritten to return a matrix of the appropriate length. This
# should work
midVals = np.array([WMid, AMid]).T
realOff = np.array([0, 0])
for i in range(0, len(nOff)):
sibs = np.repeat([np.array(midVals[i])], [nOff[i]], axis=0)
realOff = np.vstack((realOff, sibs))
offspring = np.delete(realOff, 0, 0)
sex = np.random.randint(0, 2, len(offspring))
alien = np.repeat(0, len(offspring))
otherStats = np.array([sex, alien]).T
offspring = np.hstack([offspring, otherStats])
return offspring # should return offspring
def sim(nInit, nGen=100, nAlien=10, gParams=[1, 0.1]):
gen = 0
pop = mkpop
stats = pd.DataFrame(columns=('gen', 'W', 'WMean', 'AMean', 'WVar', 'AVar'))
while gen < nGen:
pop = migrate(pop, nAlien, gParams)
offspring = mate(pop)
var = np.var(offspring, axis=0)
mean = np.mean(offspring, axis=0)
N = len(offspring)
W = N / nInit
genStats = N.append(W, gen, mean, var)
stats = stats.append(genStats)
print(N, gen)
gen = gen + 1
return stats
print mkpop(100)
print mate(mkpop(100))
#
sim(100, 100, 10, [1, 0.1])
Running this script, outputs NameError: name 'sim' is not defined. It is apparent from the commands before the final one that all the other functions defined within this script work without a hitch. I'm not sure what is going on here, and there is probably some very easy fix that I'm overlooking. Ctags recognizes this function just fine. It's entirely possibe that sim() doesn't actually work yet, as I haven't been able to debug it.
Your sim function defined in mate function scope so it's invisible to global scope. You need to fix your indentation for sim function