Unable to interpolate data using Rbf in Scipy - python-2.7

I tried to interpolate the data using Rbf.
import numpy as np, matplotlib.pyplot as plt
from scipy.interpolate import Rbf
x = np.array([110, 112, 114, 115, 119, 120, 122, 124]).astype(float)
y = np.array([60, 61, 63, 67, 68, 70, 75, 81]).astype(float)
d = np.array([4, 6, 5, 3, 2, 1, 7, 9]).astype(float)
ulx, lrx = np.min(x), np.max(x)
uly, lry = np.max(y), np.min(y)
xi = np.linspace(ulx, lrx, 4)
yi = np.linspace(uly, lry, 4)
rbfi = Rbf(x, y, d)
di = rbfi(xi, yi)
plt.imshow(di)
plt.show()
However, I got:
TypeError: Invalid dimensions for image data
How is the solution?

With your original data (from the SO with griddata), this works:
Clean up the x,y, removing the outliers:
yreg=y.reshape(15,15)[:,[0]].repeat(15,1).flatten()
xreg=x.reshape(15,15)[[0],:].repeat(15,0).flatten()
ulx, lrx = np.min(xreg), np.max(xreg)
uly, lry = np.max(yreg), np.min(yreg)
N = 20
xi = np.linspace(ulx, lrx, N)
yi = np.linspace(uly, lry, N)
# grided_data = interpolate.griddata((xreg, yreg), z, (xi.reshape(1,-1), yi.reshape(-1,1)),
method='nearest',fill_value=0)
I don't think Rbf (and similar interpolators) handle broadcasting like griddata does. So I have to construct 2 vectors defining all the interpolation points.
yyi=np.repeat(yi,N)
xxi=np.repeat(xi[None,:],N,0).flatten()
rbfi=interpolate.Rbf(xreg,yreg,z,function='linear')
zzi=rbfi(xxi,yyi).reshape(N,N)
Timing wise, Rbf is noticeably slower than griddata.
With xreg, yreg, the interpolation results (for both methods) look similar to the image of z.reshape(15,15) - a square with 2 square plateaus in the lower left corner.

Related

Plotting a 3d line intersecting a surface in mplot3d (matplotlib python)

I am trying to plot a surface and a line which is passing through it. I want to have a plot where the portion of the line which is behind the surface, is hidden.
I tried this in matplotlib but the portion of the line behind the surface is also visible.
Line intersecting a surface
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
x = np.arange(0,10,1)
y = np.arange(0,10,1)
z = np.arange(0,10,1)
X, Y = np.meshgrid(x,y)
Z= np.ones((len(x),len(x)))*5
fig = plt.figure()
ax1 = fig.gca(projection='3d')
ax1.plot_surface(X, Y, Z, color='red', edgecolor='black')
ax1.plot(x,y,z,'-',color='black',linewidth=4)
plt.show()
In matplotlib, there is a concept of the zorder. Objects with a higher zorder are plotted in a layer on top of objects with a lower zorder, as per the docs. By default, the patch has a higher zorder than the line, which is why your red surface appears to block out the line. Here I have created a new set of coordinates for the background and foreground parts of the line, by selecting indices where z <= 5 or z >= 5 respectively. Then I plot these two sets of points separately, setting the zorder for all three - the surface and both of the lines.
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
x = np.arange(0, 10, 1)
y = np.arange(0, 10, 1)
z = np.arange(0, 10, 1)
x_background = x[z <= 5]
y_background = y[z <= 5]
z_background = z[z <= 5]
x_foreground = x[z >= 5]
y_foreground = y[z >= 5]
z_foreground = z[z >= 5]
X, Y = np.meshgrid(x, y)
Z = np.ones((len(x), len(x))) * 5
fig = plt.figure()
ax1 = fig.gca(projection='3d')
ax1.plot_surface(X, Y, Z, color='red', edgecolor='black', zorder=1)
ax1.plot(
z_background, z_background, z_background, '-', color='black', linewidth=4,
zorder=2)
ax1.plot(
z_foreground, z_foreground, z_foreground, '-', color='black', linewidth=4,
zorder=3)
plt.show()
Hope this helps!

Index is out of bounds with axis? What does that mean?

I have this code that takes my arrays, x1,y1,z1, vx1, vy1,vz1, and operates on them (this is the bulk of the code), and at the end I'm left with new arrays x2,y2,z2, vx2,vy2,vz2. What I want to do is the whole code, updating x1 with x2, y1 with y2, and so on....
However, when I set x1=x2... at the end of my code, I get this error message:
Traceback (most recent call last):
File "myfile.py", line 33, in <module>
File "myfile.py", line 30, in do_work
M[xn,step] = ((mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/ (abs((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2.+(.2)**2 )**(3))
IndexError: index 999 is out of bounds for axis 0 with size 999
and I cannot figure out why. I don't understand why my code won't work for my new arrays x2,y2,z2,... etc. (I know my function is kind of a mess, but I'm afraid the problem might be in there and so that's why I'm posting it as-is)
import matplotlib.pyplot as plt
import numpy as np
import time
import itertools
start_time = time.time()
G=1
dt=.01
n1 = np.loadtxt('homo_sph_N1000_R3_v1.dat',usecols=(0),skiprows=0)
mass1= np.loadtxt('homo_sph_N1000_R3_v1.dat',usecols=(1),skiprows=0)
x1=np.loadtxt('homo_sph_N1000_R3_v1.dat',usecols=(2),skiprows=0)
y1=np.loadtxt('homo_sph_N1000_R3_v1.dat',usecols=(3),skiprows=0)
z1=np.loadtxt('homo_sph_N1000_R3_v1.dat',usecols=(4),skiprows=0)
vx1=np.loadtxt('homo_sph_N1000_R3_v1.dat',usecols=(5),skiprows=0)
vy1=np.loadtxt('homo_sph_N1000_R3_v1.dat',usecols=(6),skiprows=0)
vz1=np.loadtxt('homo_sph_N1000_R3_v1.dat',usecols=(7),skiprows=0)
npoints=len(n1)-1
M = np.zeros((npoints,npoints))
for timestep in xrange(0,2):
def do_work(xn, step):
#This is where I begin operating on intial arrays
M[xn,step] = ((mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/ (abs((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2.+(.2)**2 )**(3))
[do_work(xn, step) for (xn,step) in itertools.product(xrange(0,npoints), xrange(0,npoints))]
a=[np.sum(arr) for arr in M]
a = np.array(a)
vxx = np.array(vx1)
vyy=np.array(vy1)
vzz=np.array(vz1)
vx=vxx[0:npoints]
vy=vyy[0:npoints]
vz=vzz[0:npoints]
vx2 = vx + (a +a)/2 * dt
vy2 = vy + (a +a)/2 * dt
vz2 = vz + (a+a)/2 * dt
xx = np.array(x1)
yy = np.array(y1)
zz = np.array(z1)
x=xx[0:npoints]
y=yy[0:npoints]
z=zz[0:npoints]
#x2,y2,z2.... are new arrays
x2= np.array((x+vx2*dt) + (a*dt**2)/2)
y2= np.array((y+vy2*dt) + (a*dt**2)/2)
z2= np.array((z+vz2*dt) + (a*dt**2)/2)
#I set x1....=x2... so this whole thing will loop using the new array values
x1=x2
y1=y2
z1=z2
subtract 1 from npoints
[do_work(xn, step) for (xn,step) in itertools.product(xrange(0,npoints-1), xrange(0,npoints-1))]

Surface Plotting on Python 2.7 with pyplot

I am new to Python. I have been trying to plot a data file that contains 3 columns and 1024 data points. While running the code the following error arises:
Traceback (most recent call last):
File "plot-data.py", line 27, in <module>
linewidth=0, antialiased=False)
File "/home/ritajit/.local/lib/python2.7/site-packages/mpl_toolkits/mplot3d/axes3d.py", line 1624, in plot_surface
X, Y, Z = np.broadcast_arrays(X, Y, Z)
File "/home/ritajit/.local/lib/python2.7/site-packages/numpy/lib/stride_tricks.py", line 249, in broadcast_arrays
shape = _broadcast_shape(*args)
File "/home/ritajit/.local/lib/python2.7/site-packages/numpy /lib/stride_tricks.py", line 184, in _broadcast_shape
b = np.broadcast(*args[:32])
ValueError: shape mismatch: objects cannot be broadcast to a single shape
My code looks like this
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.mlab import griddata
import matplotlib.cm as cm
from pylab import rcParams
rcParams['figure.figsize'] = 9, 9
## 3D surface_plot
fig = plt.figure()
axes = fig.add_subplot(111, projection='3d') #gca = get current axis
data = np.loadtxt('2D-data.txt')
x = data[:,0]
y = data[:,1]
z = data[:,2]
xi = np.unique(x)
yi = np.unique(y)
xv, yv = np.meshgrid(x,y)
Z = griddata(x, y, z, xi, yi, interp='linear')
# surface_plot with color grading and color bar
p = axes.plot_surface(xv,yv,Z, rstride=4, cstride=4, cmap=cm.RdBu,
linewidth=0, antialiased=False)
fig.colorbar(p, shrink=0.5)
axes.set_xlabel('$x$',fontsize=15)
axes.set_ylabel('$y$',fontsize=15)
axes.set_zlabel('$z$',fontsize=15)
plt.tight_layout()
fig.savefig("surface.pdf")
plt.show()
I am unable to work this through.
What wrong am I doing?
Is there any other way to plot 3d datafile?
A few lines from my data file:
1 2 1.30884
2 2 1.30925
3 2 1.30974
4 2 1.30841
5 2 1.30864
6 2 1.30795
The 1st,2nd,3rd columns are x,y,z respectively
Three main issues here:
You need to meshgrid the unique values, not the original ones
xi = np.unique(x)
yi = np.unique(y)
xv, yv = np.meshgrid(xi,yi)
You need to interpolate on the gridded values
griddata(x, y, z, xv, yv)
You need to plot Z, not z
p = axes.plot_surface(xv,yv,Z)
In total it looks like you could achieve pretty much the same by reshaping the data columns (but the small data excerpt is not enough to judge on this).
Last, matplotlib.mlab.griddata will be deprecated in the next version. As an alternative consider scipy.interpolate.griddata. Also have a look at the Contour plot of irregularly spaced data example.

Calculate gradient for only part of a shared variable array

I want to do the following:
import theano, numpy, theano.tensor as T
a = T.fvector('a')
w = theano.shared(numpy.array([1, 2, 3, 4], dtype=theano.config.floatX))
w_sub = w[1]
b = T.sum(a * w)
grad = T.grad(b, w_sub)
Here, w_sub is for example w[1] but I do not want to explicitly write out b in function of w_sub. Despite going through this and other related issues I can't solve it.
This is just to show you my problem. Actually, what I really want to do is a sparse convolution with Lasagne. The zero entries in the weight matrix do not need to be updated and therefore there is no need to calculate the gradient for these entries of w.
This is now the complete error message:
Traceback (most recent call last):
File "D:/Jeroen/Project_Lasagne_General/test_script.py", line 9, in <module>
grad = T.grad(b, w_sub)
File "C:\Anaconda2\lib\site-packages\theano\gradient.py", line 545, in grad
handle_disconnected(elem)
File "C:\Anaconda2\lib\site-packages\theano\gradient.py", line 532, in handle_disconnected
raise DisconnectedInputError(message)
theano.gradient.DisconnectedInputError: grad method was asked to compute the gradient with respect to a variable that is not part of the computational graph of the cost, or is used only by a non-differentiable operator: Subtensor{int64}.0
Backtrace when the node is created:
File "D:/Jeroen/Project_Lasagne_General/test_script.py", line 6, in <module>
w_sub = w[1]
When theano compiles the graph, it only sees the variables as explicitly defined in the graph. In your example, w_sub is not explicitly used in the computation of b and therefore is not part of the computation graph.
Using theano printing library with the following code, you can see on this
graph vizualization that indeed w_sub is not part of the graph of b.
import theano
import theano.tensor as T
import numpy
import theano.d3viz as d3v
a = T.fvector('a')
w = theano.shared(numpy.array([1, 2, 3, 4], dtype=theano.config.floatX))
w_sub = w[1]
b = T.sum(a * w)
o = b, w_sub
d3v.d3viz(o, 'b.html')
To fix the problem, you need to explicitly use w_sub in the computation of b.
Then you will be able to compute the gradients of b wrt w_sub and update the values of the shared variable as in the following example :
import theano
import theano.tensor as T
import numpy
a = T.fvector('a')
w = theano.shared(numpy.array([1, 2, 3, 4], dtype=theano.config.floatX))
w_sub = w[1]
b = T.sum(a * w_sub)
grad = T.grad(b, w_sub)
updates = [(w, T.inc_subtensor(w_sub, -0.1*grad))]
f = theano.function([a], b, updates=updates, allow_input_downcast=True)
f(numpy.arange(10))

Cubic spline / curve fitting

I need to determine parameters of Illumintaion change, which is defined by this continuous piece-wise polynomial C(t), where f(t) is is a cubic curve defined by the two boundary points (t1,c) and (t2,0), also f'(t1)=0 and f'(t2)=0.
Original Paper: Texture-Consistent Shadow Removal
Intensity curve is sampled from the normal on boundary of shadow and it looks like this:
Each row is sample, displaying illumintaion change.So X is number of column and Y is intensity of pixel.
I have my real data like this (one sample avaraged from all samples):
At all I have N samples and I need to determine parameters (c,t1,t2)
How can I do it?
I tried to do it by solving linear equation in Matlab:
avr_curve is average curve, obtained by averaging over all samples.
f(x)= x^3+a2*x^2+a1*x1+a0 is cubic function
%t1,t2 selected by hand
t1= 10;
t2= 15;
offset=10;
avr_curve= [41, 40, 40, 41, 41, 42, 42, 43, 43, 43, 51, 76, 98, 104, 104, 103, 104, 105, 105, 107, 105];
%gradx= convn(avr_curve,[-1 1],'same');
A= zeros(2*offset+1,3);
%b= zeros(2*offset+1,1);
b= avr_curve';
%for i= 1:2*offset+1
for i=t1:t2
i
x= i-offset-1
A(i,1)= x^2; %a2
A(i,2)= x; %a1
A(i,3)= 1; %a0
b(i,1)= b(i,1)-x^3;
end
u= A\b;
figure,plot(avr_curve(t1:t2))
%estimated cubic curve
for i= 1:2*offset+1
x= i-offset-1;
fx(i)=x^3+u(1)*x^2+u(2)*x+u(3);
end
figure,plot(fx(t1:t2))
part of avr_curve on [t1 t2]
cubic curve that I got (don't looks like avr_curve)
so what I'm doing wrong?
UPDATE:
Seems my error was due that I model cubic polynomial using 3 variables like this:
f(x)= x^3+a2*x^2+a1*x1+a0 - 3 variables
but then I use 4 variables everything seems ok:
f(x)= a3*x^3+a2*x^2+a1*x1+a0 - 4 variables
Here is the code in Matlab:
%defined by hand
t1= 10;
t2= 14;
avr_curve= [41, 40, 40, 41, 41, 42, 42, 43, 43, 43, 51, 76, 98, 104, 104, 103, 104, 105, 105, 107, 105];
x= [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21];
%x= [-10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; %real x axis
%%%model 1
%%f(x)= x^3+a2*x^2+a1*x1+a0 - 3 variables
%A= zeros(4,3);
%b= [43 104]';
%%cubic equation at t1
%A(1,1)= t1^2; %a2
%A(1,2)= t1; %a1
%A(1,3)= 1; %a0
%b(1,1)= b(1,1)-t1^3;
%%cubic equation at t2
%A(2,1)= t2^2; %a2
%A(2,2)= t2; %a1
%A(2,3)= 1; %a0
%b(2,1)= b(2,1)-t1^3;
%%1st derivative at t1
%A(3,1)= 2*t1; %a2
%A(3,2)= 1; %a1
%A(3,3)= 0; %a0
%b(3,1)= -3*t1^2;
%%1st derivative at t2
%A(4,1)= 2*t2; %a2
%A(4,2)= 1; %a1
%A(4,3)= 0; %a0
%b(4,1)= -3*t2^2;
%model 2
%f(x)= a3*x^3+a2*x^2+a1*x1+a0 - 4 variables
A= zeros(4,4);
b= [43 104]';
%cubic equation at t1
A(1,1)= t1^3; %a3
A(1,2)= t1^2; %a2
A(1,3)= t1; %a1
A(1,4)= 1; %a0
b(1,1)= b(1,1);
%cubic equation at t2
A(2,1)= t2^3; %a3
A(2,2)= t2^2; %a2
A(2,3)= t2; %a1
A(2,4)= 1; %a0
b(2,1)= b(2,1);
%1st derivative at t1
A(3,1)= 3*t1^2; %a3
A(3,2)= 2*t1; %a2
A(3,3)= 1; %a1
A(3,4)= 0; %a0
b(3,1)= 0;
%1st derivative at t2
A(4,1)= 3*t2^2; %a3
A(4,2)= 2*t2; %a2
A(4,3)= 1; %a1
A(4,4)= 0; %a0
b(4,1)= 0;
size(A)
size(b)
u= A\b;
u
%estimated cubic curve
%dx=[1:21]; % global view
dx=t1-1:t2+1; % local view in [t1 t2]
for x= dx
%fx(x)=x^3+u(1)*x^2+u(2)*x+u(3); % model 1
fx(x)= u(1)*x^3+u(2)*x^2+u(3)*x+u(4); % model 2
end
err= 0;
for x= dx
err= err+(fx(x)-avr_curve(x))^2;
end
err
figure,plot(dx,avr_curve(dx),dx,fx(dx))
spline on interval [t1-1 t2+1]
and on full interval
Disclaimer
I cannot give any guarantees on the correctness of the code or methods given below, always use your critical sense before using any of that.
0. Define the problem
You have this piecewise defined function
Where f(t) is a cubic function, in order to uniquely identify it, the following additional conditions are given
You want to recover the best values of the parameters t1, t2 and sigma that minimize the error with a given set of points.
This is essentially a curve fitting in the least squares sense.
1 Parametrize the f(t) cubic function
In order to compute the error between a candidate Cl(t) function and the set of points we need to compute f(t), its general form (being a cubic) is
So it seems that we have four additional parameters to consider. Indeed this parameters are totally defined by the free three parameters t1, t2 and sigma.
It is important to not confuse the free parameters with the dependent ones.
Given the additional conditions on f(t) we can set up this linear system
Which has one solution (as expected) given by
This tell us how to compute the parameters of the cubic given the three free parameters.
This way Cl(t) is completely determined, now it's time to find the best candidate.
2 Minimize the error
I would normally go for the least squares now.
Since this is not a linear function, there is no closed form for computing sigma, t1 and t2.
There are however numerical methods, like the Gauss-Newton one.
However one way or another it is required to compute the partial derivatives with respect of the three parameters.
I don't know how to compute the derivative with respect of a separation parameter like t1.
I've searched MathSE and found this question that address the same problem, however nobody answered.
Without the partial derivatives the least squares methods are over.
So I take a more practical road and implemented a brute force function in C that try every possible triplet of parameter and return the best match.
3 The brute force function
Given the nature of the problem, this turned out to be O(n^2) in the number of sample.
The algorithm proceeds as follow: Divide the sample set in three parts, the first one is the part of point before t1, the second one of the points between t1 and t2 and the last one of the points after t2.
The first part only is used to compute sigma, sigma is simply the arithmetic average of the points in part 1.
t1 and t2 are computed through a cycle, t1 is set to every possible point in the original points set, starting from the second and going forward.
For every choice of t1, t2 is set to every possible point after t1.
At each iteration an error is computed and if it is the minimum ever seen, the parameters used are saved.
The error is computer as the absolute value of residuals since the absolute value should be fast (surely faster than square) and it fits the purpose of a metric.
4 The code
#include <stdio.h>
#include <math.h>
float point_on_curve(float sigma, float t1, float t2, float t)
{
float a,b,c,d, K;
if (t <= t1)
return sigma;
if (t >= t2)
return 0;
K = (t1-t2)*(t1-t2)*(t1-t2);
a = -2*sigma/K;
b = 3*sigma*(t1+t2)/K;
c = -6*sigma*t1*t2/K;
d = sigma*t2*t2*(3*t1-t2)/K;
return a*t*t*t + b*t*t + c*t + d;
}
float compute_error(float sigma, float t1, float t2, int s, int dx, int* data, int len)
{
float error=0;
unsigned int i;
for (i = 0; i < len; i++)
error += fabs(point_on_curve(sigma, t1, t2, s+i*dx)- data[i]);
return error;
}
/*
* s is the starting time of the samples set
* dx is the separation in time between two sample (a.k.a. sampling period)
* data is the array of samples
* len is the number of samples
* sigma, t1, t2 are pointers to output parameters computed
*
* return 1 if not enough (3) samples, 0 if everything went ok.
*/
int curve_fit(int s, int dx, int* data, unsigned int len, float* sigma, float* t1, float* t2)
{
float l_sigma = 0;
float l_t1, l_t2;
float sum = 0;
float min_error, cur_error;
char error_valid = 0;
unsigned int i, j;
if (len < 3)
return 1;
for (i = 0; i < len; i++)
{
/* Compute sigma as the average of points <= i */
sum += data[i];
l_sigma = sum/(i+1);
/* Set t1 as the point i+1 */
l_t1 = s+(i+1)*dx;
for (j = i+2; j < len; j++)
{
/* Set t2 as the points i+2, i+3, i+4, ... */
l_t2 = s+j*dx;
/* Compute the error */
cur_error = compute_error(l_sigma, l_t1, l_t2, s, dx, data, len);
if (cur_error < min_error || !error_valid)
{
error_valid = 1;
min_error = cur_error;
*sigma = l_sigma;
*t1 = l_t1;
*t2 = l_t2;
}
}
}
return 0;
}
int main()
{
float sigma, t1, t2;
int data[]={41, 40, 40, 41, 41, 42, 42, 43, 43, 43, 51, 76, 98, 104, 104, 103, 104, 105, 105, 107, 105};
unsigned int len = sizeof(data)/sizeof(int);
unsigned int i;
for (i = 0; i < len; i++)
data[i] -= 107; /* Subtract the max */
if (curve_fit(1,1,data, len, &sigma, &t1, &t2))
printf("Not enough data!\n");
else
printf("Parameters: sigma = %.3f, t1 = %.3f, t2 = %.3f\n", sigma, t1, t2);
return 0;
}
Please note that the Cl(t) was defined as having 0 as its right limit, so the code assume this is the case.
This is why the max value (107) is subtracted from every sample, I have worked with the definition of Cl(t) given at the beginning and only late noted that the sample were biased.
By now I'm not going to adapt the code, you can easily add another parameter in the problem and redo the steps from 1 if needed, or simply translate the samples using the maximum value.
The output of the code is
Parameters: sigma = -65.556, t1 = 10.000, t2 = 14.000
Which match the points set given, considering that it is vertically translated by -107.