tf.data.TFRecordDataset.shard affecting accuracy baseline Tensorflow - python-2.7

I noticed that when I sharded my datasets, the accuracy_baseline stayed exactly the same throughout the training session. However, once I removed the sharding piece, the accuracy_baseline fluctuates.
Does anyone have any insights as to the reason why sharding causes the accuracy baseline to be the same? Below is the function that I use.
Thanks
def input_fn(filenames, train, batch_size=5, buffer_size=10):
epoch = None
if t rain != True:
epoch = 1
if run_config.task_type == "ps":
worker_num = None
worker_index = None
elif run_config.task_type == "master":
worker_num = run_config._num_worker_replicas
worker_index = 0
else:
worker_num = run_config._num_worker_replicas
worker_index = run_config.task_id + 1
d = tf.data.TFRecordDataset(filenames=filenames)
d = d.shard(worker_num,worker_index)
d = d.repeat(epoch)
d = d.shuffle(buffer_size)
d = d.map(parse)
d = d.batch(batch_size)
d = d.prefetch(1)
iterator = d.make_one_shot_iterator()
X, label = iterator.get_next()

Related

Redis session does not store variables when modified in thread - Flask

I have a thread that is running inside a route where the thread job is to do some expensive work, store variables and than I'll need to use these variables in another flask route.
When I am using the session variable (Redis) as a parameter in the thread function in order to add the data and extract it later it does not find the variables that I have stored in it.
In contrast, when I declare a global_dict and pass it to the thread function instead of session, the code works great.
As the thread function can be used by multiple users simultaneously, storing it in a global_dict is not a good practice.
Why using session in my code does not work?
In the following code, if I replace global_dict with session I won't be able to access it in the /result route.
Per doc:
"Redis can handle up to 2^32 keys, and was tested in practice to handle at least 250 million keys per instance.
Every hash, list, set, and sorted set, can hold 2^32 elements.
In other words your limit is likely the available memory in your system."
P.S Sorry for the long code blocks.
#app.route("/build",methods=["GET", "POST"])
#login_required
def build():
if request.method == "POST":
global th
global finished
finished= False
#copy_current_request_context
def operation(global_dict):
global finished
symbols = request.form.get("symbols")
mc.set("symbols", symbols)
if contains_multiple_words(symbols) == False:
flash("The app purpose is to optimize a portfolio given a list of stocks. Please enter a list of stocks seperated by a new row.")
return redirect("/build")
Build(session["user_id"], symbols.upper(), request.form.get("start"), request.form.get("end"), request.form.get("funds"), request.form.get("short"), request.form.get("volatility"), request.form.get("gamma"), request.form.get("return"))
db.session.commit()
try:
df = yf.download(symbols, start=request.form.get("start"), end=request.form.get("end"), auto_adjust = False, prepost = False, threads = True, proxy = None)["Adj Close"].dropna(axis=1, how='all')
failed=(list(shared._ERRORS.keys()))
df = df.replace(0, np.nan)
try:
global_dict['listofna']=df.columns[df.isna().iloc[-2]].tolist()+failed
except IndexError:
flash("Please enter valid stocks from Yahoo Finance.")
return redirect("/build")
df = df.loc[:,df.iloc[-2,:].notna()]
except ValueError:
flash("Please enter a valid symbols (taken from Yahoo Finance)")
return redirect("/build")
def enter_sql_data(app, df, nasdaq_exchange_info, Stocks):
for ticker in df.columns:
ticker=ticker.upper()
if any(sublist[1]==ticker in sublist for sublist in nasdaq_exchange_info) is False:
ticker_ln = yf.Ticker(ticker).stats()["price"].get('longName')
if not ticker_ln:
ticker_ln = ticker
ticker_list=[ticker_ln, ticker]
with app.app_context():
new_stock=Stocks(ticker, ticker_ln)
db.session.add(new_stock)
db.session.commit()
nasdaq_exchange_info.extend([ticker_list])
global nasdaq_exchange_info
app1 = app._get_current_object()
p1 = Process(target=enter_sql_data, args=[app1, df, nasdaq_exchange_info, Stocks])
p1.start()
prices = df.copy()
fig = px.line(prices, x=prices.index, y=prices.columns, title='Price Graph')
fig = fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(width=1350, height=900)
global_dict['plot_json'] = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)
exp_cov = risk_models.exp_cov(prices, frequency=252)
#plotting the covariance matrix
heat = go.Heatmap(
z = risk_models.cov_to_corr(exp_cov),
x = exp_cov.columns.values,
y = exp_cov.columns.values,
zmin = 0, # Sets the lower bound of the color domain
zmax = 1,
xgap = 1, # Sets the horizontal gap (in pixels) between bricks
ygap = 1,
colorscale = 'RdBu'
)
title = 'Covariance matrix'
layout = go.Layout(
title_text=title,
title_x=0.5,
width=800,
height=800,
xaxis_showgrid=False,
yaxis_showgrid=False,
yaxis_autorange='reversed'
)
fig1=go.Figure(data=[heat], layout=layout)
fig1.update_layout(width=500, height=500)
global_dict['plot_json1'] = json.dumps(fig1, cls=plotly.utils.PlotlyJSONEncoder)
S = risk_models.CovarianceShrinkage(prices).ledoit_wolf()
heat = go.Heatmap(
z = risk_models.cov_to_corr(S),
x = S.columns.values,
y = S.columns.values,
zmin = 0, # Sets the lower bound of the color domain
zmax = 1,
xgap = 1, # Sets the horizontal gap (in pixels) between bricks
ygap = 1,
colorscale = 'RdBu'
)
title = 'Ledoit-Wolf shrinkage'
layout = go.Layout(
title_text=title,
title_x=0.5,
width=800,
height=800,
xaxis_showgrid=False,
yaxis_showgrid=False,
yaxis_autorange='reversed'
)
fig2=go.Figure(data=[heat], layout=layout)
fig2.update_layout(width=500, height=500)
global_dict['plot_json2'] = json.dumps(fig2, cls=plotly.utils.PlotlyJSONEncoder)
#Section 2 -Return estimation
#it is often a bad idea to provide returns using a simple estimate like the mean of past returns. Research suggests that better off not providing expected returns – you can then just find the min_volatility() portfolio or use HRP.
mu = pypfopt.expected_returns.capm_return(prices)
fig3 = px.bar(mu, orientation='h')
fig3.update_layout(width=700, height=500)
global_dict['plot_json3'] = json.dumps(fig3, cls=plotly.utils.PlotlyJSONEncoder)
#using risk models optimized for the Efficient frontier to reduce to min volitility, good for crypto currencies - not implemented in the website now.
ef = EfficientFrontier(None, S)
try:
ef.min_volatility()
weights = ef.clean_weights()
nu = pd.Series(weights)
fig4 = px.bar(nu, orientation='h')
fig4.update_layout(width=700, height=500)
global_dict['plot_json4'] = json.dumps(fig4, cls=plotly.utils.PlotlyJSONEncoder)
av=ef.portfolio_performance()[1]
global_dict['av']=round(av, 3)*1
#if we want to buy the portfolio mentioned above
df = df.iloc[[-1]]
for col in df.columns:
if col.endswith(".L"):
df.loc[:,col] = df.loc[:,col]*GBPtoUSD()
try:
latest_prices = df.iloc[-1]
except IndexError:
flash("There is an issue with Yahoo API please try again later")
return redirect("/")
# prices as of the day you are allocating
if float(request.form.get("funds")) <= 0 or float(request.form.get("funds")) == " ":
flash("Amount need to be a positive number")
return redirect("/build")
if float(request.form.get("funds")) < float(latest_prices.min()):
flash("Amount is not high enough to cover the lowest priced stock")
return redirect("/build")
try:
da = DiscreteAllocation(weights, latest_prices, total_portfolio_value=float(request.form.get("funds")))
except TypeError:
delisted=df.columns[df.isna().any()].tolist()
delisted= ", ".join(delisted)
flash("Can't get latest prices for the following stock/s, please remove to contiue : %s" % delisted)
return redirect("/build")
alloc, global_dict['leftover'] = da.lp_portfolio()
global_dict['alloc']=alloc
global_dict['latest_prices']=latest_prices
except ValueError:
pass
#Maximise return for a given risk, with L2 regularisation
try:
ef = EfficientFrontier(mu, S)
ef.add_objective(objective_functions.L2_reg, gamma=(float(request.form.get("gamma")))) # gamme is the tuning parameter
ef.efficient_risk(int(request.form.get("volatility"))/100)
weights = ef.clean_weights()
su = pd.DataFrame([weights])
fig5 = px.pie(su, values=weights.values(), names=su.columns)
fig5.update_traces(textposition='inside')
fig5.update_layout(width=500, height=500, uniformtext_minsize=12, uniformtext_mode='hide')
global_dict['plot_json5'] = json.dumps(fig5, cls=plotly.utils.PlotlyJSONEncoder)
global_dict['perf'] =ef.portfolio_performance()
except Exception as e:
flash(str(e))
return redirect("/build")
#if we want to buy the portfolio mentioned above
for col in df.columns:
if col.endswith(".L"):
df.loc[:,col] = df.loc[:,col]*GBPtoUSD()
latest_prices1 = df.iloc[-1] # prices as of the day you are allocating
if float(request.form.get("funds")) <= 0 or float(request.form.get("funds")) == " ":
flash("Amount need to be a positive number")
return redirect("/build")
if float(request.form.get("funds")) < float(latest_prices.min()):
flash("Amount is not high enough to cover the lowest priced stock")
return redirect("/build")
da = DiscreteAllocation(weights, latest_prices, total_portfolio_value=float(request.form.get("funds")))
alloc1, global_dict['leftover1'] = da.lp_portfolio()
global_dict['alloc1']=alloc1
global_dict['latest_prices1']=latest_prices1
#Efficient semi-variance optimization
returns = pypfopt.expected_returns.returns_from_prices(prices)
returns = returns.dropna()
es = EfficientSemivariance(mu, returns)
try:
es.efficient_return(float(request.form.get("return"))/100)
except ValueError as e:
flash(str(e))
return redirect("/build")
global_dict['perf2']=es.portfolio_performance()
weights = es.clean_weights()
#if we want to buy the portfolio mentioned above
for col in df.columns:
if col.endswith(".L"):
df.loc[:,col] = df.loc[:,col]*GBPtoUSD()
latest_prices2 = df.iloc[-1] # prices as of the day you are allocating
if float(request.form.get("funds")) <= 0 or float(request.form.get("funds")) == " ":
flash("Amount need to be a positive number")
return redirect("/build")
if float(request.form.get("funds")) < float(latest_prices.min()):
flash("Amount is not high enough to cover the lowest priced stock")
return redirect("/build")
da = DiscreteAllocation(weights, latest_prices, total_portfolio_value=float(request.form.get("funds")))
alloc2, global_dict['leftover2'] = da.lp_portfolio()
global_dict['alloc2']=alloc2
global_dict['latest_prices2']=latest_prices2
mc.delete("symbols")
global_dict['ret']=float(request.form.get("return"))
global_dict['gamma']=request.form.get("gamma")
global_dict['volatility']=request.form.get("volatility")
finished = True
global global_dict
th = Thread(target=operation, args=[global_dict])
th.start()
return render_template("loading.html")
else:
if mc.get("symbols"):
cached_symbols=mc.get("symbols")
else:
cached_symbols=''
availableCash=db.session.query(Users.cash).filter_by(id=session["user_id"]).first().cash
return render_template("build.html", availableCash=round(availableCash, 4), GBP=GBPtoUSD(), nasdaq_exchange_info=nasdaq_exchange_info, cached_symbols=cached_symbols, top_50_crypto=top_50_crypto, top_world_stocks=top_world_stocks, top_US_stocks=top_US_stocks, top_div=top_div)
app.route('/result')
def result():
return render_template("built.html",av=global_dict['av'], leftover=global_dict['leftover'], alloc=global_dict['alloc'], ret=global_dict['ret'],gamma=global_dict['gamma'],volatility=global_dict['volatility'],perf=global_dict['perf'], perf2=global_dict['perf2'], alloc1=global_dict['alloc1'], alloc2=global_dict['alloc2'], plot_json=global_dict['plot_json'], plot_json1=global_dict['plot_json1'], plot_json2=global_dict['plot_json2'], plot_json3=global_dict['plot_json3'], plot_json4=global_dict['plot_json4'], plot_json5=global_dict['plot_json5'], leftover1=global_dict['leftover1'], leftover2=global_dict['leftover2'],listofna=(', '.join(global_dict['listofna'])))

My neural network takes too much time to train one epoch

I am training a neural network which tries to classify a traffic signs, but it takes too much time to train only one epoch, maybe 30+ mins for just one epoch, I have set the batch size to 64 and the learning rate to be 0.002, the input is 20x20 pixels with 3 channels, and the model summary shows that it is training 173,931 parameters, is that too much or good?
Here is the network architecture
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
class Network(nn.Module):
def __init__(self):
super(Network,self).__init__()
#Convolutional Layers
self.conv1 = nn.Conv2d(3,16,3,padding=1)
self.conv2 = nn.Conv2d(16,32,3,padding=1)
#Max Pooling Layers
self.pool = nn.MaxPool2d(2,2)
#Linear Fully connected layers
self.fc1 = nn.Linear(32*5*5,200)
self.fc2 = nn.Linear(200,43)
#Dropout
self.dropout = nn.Dropout(p=0.25)
def forward(self,x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1,32*5*5)
x = self.dropout(x)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
Here is the optimizer instance
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optim = optim.SGD(model.parameters(),lr = 0.002)
Here is the training code
epochs = 20
valid_loss_min = np.Inf
print("Training the network")
for epoch in range (1,epochs+1):
train_loss = 0
valid_loss = 0
model.train()
for data,target in train_data:
if gpu_available:
data,target = data.cuda(),target.cuda()
optim.zero_grad()
output = model(data)
loss = criterion(output,target)
loss.backward()
optim.step()
train_loss += loss.item()*data.size(0)
#########################
###### Validate #########
model.eval()
for data,target in valid_data:
if gpu_available:
data,target = data.cuda(),target.cuda()
output = model(data)
loss = criterion(output,target)
valid_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_data.dataset)
valid_loss = train/len(valid_data.dataset)
print("Epoch {}.....Train Loss = {:.6f}....Valid Loss = {:.6f}".format(epoch,train_loss,valid_loss))
if valid_loss < valid_loss_min:
torch.save(model.state_dict(), 'model_traffic.pt')
print("Valid Loss min {:.6f} >>> {:.6f}".format(valid_loss_min, valid_loss))
I am using GPU through google colab

Custom Loss Function becomes zero when backpropagated

I am trying to write my own custom loss function that is based on the false positive and negative rates. I made a dummy code so you can check the first 2 defenitions as well. I added the rest, so you can see how it is implemented. However, still somewhere the gradient turns out to be zero. What is now the step where the gradient turns zero, or how can I check this? Please I would like to know how I can fix this :).
I tried providing you with more information so you can play around as well, but if you miss anything please do let me know!
The gradient stays True during every step. However, still during the training of the model the loss is not updated, therefore the NN does not train.
y = Variable(torch.tensor((0, 0, 0, 1, 1,1), dtype=torch.float), requires_grad = True)
y_pred = Variable(torch.tensor((0.333, 0.2, 0.01, 0.99, 0.49, 0.51), dtype=torch.float), requires_grad = True)
x = Variable(torch.tensor((0, 0, 0, 1, 1,1), dtype=torch.float), requires_grad = True)
x_pred = Variable(torch.tensor((0.55, 0.25, 0.01, 0.99, 0.65, 0.51), dtype=torch.float), requires_grad = True)
def binary_y_pred(y_pred):
y_pred.register_hook(lambda grad: print(grad))
y_pred = y_pred+torch.tensor(0.5, requires_grad=True, dtype=torch.float)
y_pred = y_pred.pow(5) # this is my way working around using torch.where()
y_pred = y_pred.pow(10)
y_pred = y_pred.pow(15)
m = nn.Sigmoid()
y_pred = m(y_pred)
y_pred = y_pred-torch.tensor(0.5, requires_grad=True, dtype=torch.float)
y_pred = y_pred*2
y_pred.register_hook(lambda grad: print(grad))
return y_pred
def confusion_matrix(y_pred, y):
TP = torch.sum(y*y_pred)
TN = torch.sum((1-y)*(1-y_pred))
FP = torch.sum((1-y)*y_pred)
FN = torch.sum(y*(1-y_pred))
k_eps = torch.tensor(1e-12, requires_grad=True, dtype=torch.float)
FN_rate = FN/(TP + FN + k_eps)
FP_rate = FP/(TN + FP + k_eps)
return FN_rate, FP_rate
def dif_rate(FN_rate_y, FN_rate_x):
dif = (FN_rate_y - FN_rate_x).pow(2)
return dif
def custom_loss_function(y_pred, y, x_pred, x):
y_pred = binary_y_pred(y_pred)
FN_rate_y, FP_rate_y = confusion_matrix(y_pred, y)
x_pred= binary_y_pred(x_pred)
FN_rate_x, FP_rate_x = confusion_matrix(x_pred, x)
FN_dif = dif_rate(FN_rate_y, FN_rate_x)
FP_dif = dif_rate(FP_rate_y, FP_rate_x)
cost = FN_dif+FP_dif
return cost
# I added the rest so you can see how it is implemented, but this peace does not fully run well! If you want this part to run as well, I can add more code.
class FeedforwardNeuralNetModel(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super(FeedforwardNeuralNetModel, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(hidden_dim, output_dim)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.fc1(x)
out = self.relu1(out)
out = self.fc2(out)
out = self.sigmoid(out)
return out
model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, betas=[0.9, 0.99], amsgrad=True)
criterion = torch.nn.BCELoss(weight=None, size_average=None, reduce=None, reduction='mean')
for epoch in range(num_epochs):
train_err = 0
for i, (samples, truths) in enumerate(train_loader):
samples = Variable(samples)
truths = Variable(truths)
optimizer.zero_grad() # Reset gradients
outputs = model(samples) # Do the forward pass
loss2 = criterion(outputs, truths) # Calculate loss
samples_y = Variable(samples_y)
samples_x = Variable(samples_x)
y_pred = model(samples_y)
y = Variable(y, requires_grad=True)
x_pred = model(samples_x)
x= Variable(x, requires_grad=True)
cost = custom_loss_function(y_pred, y, x_pred, x)
loss = loss2*0+cost #checking only if cost works.
loss.backward()
optimizer.step()
train_err += loss.item()
train_loss.append(train_err)
I expect the model to update during training. There is no error message.
With your definitions:TP+FN=y and TN+FP=1-y. Then you'll get FN_rate=1-y_pred and FP_rate=y_pred. Your cost is then FN_rate+FP_rate=1, the gradient of which is 0.
You can check this by hand or using a library for symbolic mathematics (e.g., SymPy):
from sympy import symbols
y, y_pred = symbols("y y_pred")
TP = y * y_pred
TN = (1-y)*(1-y_pred)
FP = (1-y)*y_pred
FN = y*(1-y_pred)
# let's ignore the eps for now
FN_rate = FN/(TP + FN)
FP_rate = FP/(TN + FP)
cost = FN_rate + FP_rate
from sympy import simplify
print(simplify(cost))
# output: 1

How to get the right demand in the newsvendor model?

So what I'm trying to accomplish is the newsvendor problem, where the program is supposed to run and give me the demand that gives me the best chances of turning a profit as per this link.
But the issue is that when I run the below code it gives me the right demand along with the demand for it.
q = {5:0.2 ,6:0.25 ,7:0.3 ,8:.25}
w = 55
p = 80
s = 40
cul5 = 0.2
cul6 = 0.25
cul7 = 0.3
cul8 = .25
overage = w - s
underage = p - w
crit = overage/float((underage)+(overage)) # its better to use floats within the parenthesis in python 2
cumul_q = {}
cumulativevalue = 0
for key, value in sorted(q.iteritems()):
cumulativevalue = cumulativevalue + value
# print key , value
cumul_q[key] = cumulativevalue
# print cumul_q
previous_key = None
for key, value, in sorted(cumul_q.iteritems(),reverse = True):
cumulprob = 1 - value
cumulprob1 = float(cumulprob)
if crit < cumulprob1:
continue
elif crit > cumulprob1:
print key
previous_key = key

Reformulating the AMPL car example

I am trying migrating the ampl car problem that comes in the Ipopt source code tarball as example. I am having got problems with the end condition (reach a place with zero speed at final iteration) and with the cost function (minimize final time).
Can someone help me revise the following model?
# min tf
# dx/dt = 0
# dv/dt = a - R*v^2
# x(0) = 0; x(tf) = 100
# v(0) = 0; v(tf) = 0
# -3 <= a <= 1 (a is the control variable)
#!Python3.5
from pyomo.environ import *
from pyomo.dae import *
N = 20;
T = 10;
L = 100;
m = ConcreteModel()
# Parameters
m.R = Param(initialize=0.001)
# Variables
def x_init(m, i):
return i*L/N
m.t = ContinuousSet(bounds=(0,1000))
m.x = Var(m.t, bounds=(0,None), initialize=x_init)
m.v = Var(m.t, bounds=(0,None), initialize=L/T)
m.a = Var(m.t, bounds=(-3.0,1.0), initialize=0)
# Derivatives
m.dxdt = DerivativeVar(m.x, wrt=m.t)
m.dvdt = DerivativeVar(m.v, wrt=m.t)
# Objetives
m.obj = Objective(expr=m.t[N])
# DAE
def _ode1(m, i):
if i==0:
return Constraint.Skip
return m.dxdt[i] == m.v[i]
m.ode1 = Constraint(m.t, rule=_ode1)
def _ode2(m, i):
if i==0:
return Constraint.Skip
return m.dvdt[i] == m.a[i] - m.R*m.v[i]**2
m.ode2 = Constraint(m.t, rule=_ode2)
# Constraints
def _init(m):
yield m.x[0] == 0
yield m.v[0] == 0
yield ConstraintList.End
m.init = ConstraintList(rule=_init)
'''
def _end(m, i):
if i==N:
return m.x[i] == L amd m.v[i] == 0
return Constraint.Skip
m.end = ConstraintList(rule=_end)
'''
# Discretize
discretizer = TransformationFactory('dae.finite_difference')
discretizer.apply_to(m, nfe=N, wrt=m.t, scheme='BACKWARD')
# Solve
solver = SolverFactory('ipopt', executable='C:\\EXTERNOS\\COIN-OR\\win32-msvc12\\bin\\ipopt')
results = solver.solve(m, tee=True)
Currently, a ContinuousSet in Pyomo has to be bounded. This means that in order to solve a minimum time optimal control problem using this tool, the problem must be reformulated to remove the time scaling from the ContinuousSet. In addition, you have to introduce an extra variable to represent the final time. I've added an example to the Pyomo github repository showing how this can be done for your problem.