AWS Lambda : Unexpected Multiple Invocation Issue (no errors) - amazon-web-services

from binance import Client
import os
def get_position_amt(bot, symbol):
get_position = bot.futures_position_information(symbol=symbol)
amt = float(get_position[0]['positionAmt'])
return amt
def close_all_position(bot, symbol):
print('close_all_position')
amt = get_position_amt(bot, symbol)
if amt > 0:
close_order = bot.futures_create_order(
symbol=symbol, side="SELL", type="MARKET", quantity=amt
)
print(close_order)
elif amt < 0:
close_order = bot.futures_create_order(
symbol=symbol, side="BUY", type="MARKET", quantity=abs(amt)
)
print(close_order)
else:
print("no position : amt is zero")
def lambda_handler(event, context):
bot = Client(api_key=os.environ.get('api_key'), api_secret=os.environ.get('api_sc'))
data = eval(event['body'])
side=data.get('side')
symbol=data.get('market')
amt = get_position_amt(bot, symbol)
if side == "BUY":
if amt < 0:
close_all_position(bot, symbol)
elif side == "SELL":
if amt > 0:
close_all_position(bot, symbol)
ord_type = data.get('ord_type')
if ord_type == 'limit':
order = bot.futures_create_order(
symbol=data.get('market'), side=data.get('side'), type="LIMIT", timeInForce='GTC', quantity=float(data.get('volume')), price=float(data.get('price'))
)
print(order)
elif ord_type == 'market':
order = bot.futures_create_order(
symbol=data.get('market'), side=data.get('side'), type="MARKET", quantity=float(data.get('volume'))
)
print(order)
elif ord_type == 'close':
close_all_position(bot, data.get('market'))
else:
raise ValueError
I need only 1 request but this invoke 4 different requests.
4 request IDs are all different so this means it's not an error.
I tried changing time limit and number of retrys on configuration but nothing happened.
I would appreciate it if anyone know how to handle this problem.

Related

Redis session does not store variables when modified in thread - Flask

I have a thread that is running inside a route where the thread job is to do some expensive work, store variables and than I'll need to use these variables in another flask route.
When I am using the session variable (Redis) as a parameter in the thread function in order to add the data and extract it later it does not find the variables that I have stored in it.
In contrast, when I declare a global_dict and pass it to the thread function instead of session, the code works great.
As the thread function can be used by multiple users simultaneously, storing it in a global_dict is not a good practice.
Why using session in my code does not work?
In the following code, if I replace global_dict with session I won't be able to access it in the /result route.
Per doc:
"Redis can handle up to 2^32 keys, and was tested in practice to handle at least 250 million keys per instance.
Every hash, list, set, and sorted set, can hold 2^32 elements.
In other words your limit is likely the available memory in your system."
P.S Sorry for the long code blocks.
#app.route("/build",methods=["GET", "POST"])
#login_required
def build():
if request.method == "POST":
global th
global finished
finished= False
#copy_current_request_context
def operation(global_dict):
global finished
symbols = request.form.get("symbols")
mc.set("symbols", symbols)
if contains_multiple_words(symbols) == False:
flash("The app purpose is to optimize a portfolio given a list of stocks. Please enter a list of stocks seperated by a new row.")
return redirect("/build")
Build(session["user_id"], symbols.upper(), request.form.get("start"), request.form.get("end"), request.form.get("funds"), request.form.get("short"), request.form.get("volatility"), request.form.get("gamma"), request.form.get("return"))
db.session.commit()
try:
df = yf.download(symbols, start=request.form.get("start"), end=request.form.get("end"), auto_adjust = False, prepost = False, threads = True, proxy = None)["Adj Close"].dropna(axis=1, how='all')
failed=(list(shared._ERRORS.keys()))
df = df.replace(0, np.nan)
try:
global_dict['listofna']=df.columns[df.isna().iloc[-2]].tolist()+failed
except IndexError:
flash("Please enter valid stocks from Yahoo Finance.")
return redirect("/build")
df = df.loc[:,df.iloc[-2,:].notna()]
except ValueError:
flash("Please enter a valid symbols (taken from Yahoo Finance)")
return redirect("/build")
def enter_sql_data(app, df, nasdaq_exchange_info, Stocks):
for ticker in df.columns:
ticker=ticker.upper()
if any(sublist[1]==ticker in sublist for sublist in nasdaq_exchange_info) is False:
ticker_ln = yf.Ticker(ticker).stats()["price"].get('longName')
if not ticker_ln:
ticker_ln = ticker
ticker_list=[ticker_ln, ticker]
with app.app_context():
new_stock=Stocks(ticker, ticker_ln)
db.session.add(new_stock)
db.session.commit()
nasdaq_exchange_info.extend([ticker_list])
global nasdaq_exchange_info
app1 = app._get_current_object()
p1 = Process(target=enter_sql_data, args=[app1, df, nasdaq_exchange_info, Stocks])
p1.start()
prices = df.copy()
fig = px.line(prices, x=prices.index, y=prices.columns, title='Price Graph')
fig = fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(width=1350, height=900)
global_dict['plot_json'] = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)
exp_cov = risk_models.exp_cov(prices, frequency=252)
#plotting the covariance matrix
heat = go.Heatmap(
z = risk_models.cov_to_corr(exp_cov),
x = exp_cov.columns.values,
y = exp_cov.columns.values,
zmin = 0, # Sets the lower bound of the color domain
zmax = 1,
xgap = 1, # Sets the horizontal gap (in pixels) between bricks
ygap = 1,
colorscale = 'RdBu'
)
title = 'Covariance matrix'
layout = go.Layout(
title_text=title,
title_x=0.5,
width=800,
height=800,
xaxis_showgrid=False,
yaxis_showgrid=False,
yaxis_autorange='reversed'
)
fig1=go.Figure(data=[heat], layout=layout)
fig1.update_layout(width=500, height=500)
global_dict['plot_json1'] = json.dumps(fig1, cls=plotly.utils.PlotlyJSONEncoder)
S = risk_models.CovarianceShrinkage(prices).ledoit_wolf()
heat = go.Heatmap(
z = risk_models.cov_to_corr(S),
x = S.columns.values,
y = S.columns.values,
zmin = 0, # Sets the lower bound of the color domain
zmax = 1,
xgap = 1, # Sets the horizontal gap (in pixels) between bricks
ygap = 1,
colorscale = 'RdBu'
)
title = 'Ledoit-Wolf shrinkage'
layout = go.Layout(
title_text=title,
title_x=0.5,
width=800,
height=800,
xaxis_showgrid=False,
yaxis_showgrid=False,
yaxis_autorange='reversed'
)
fig2=go.Figure(data=[heat], layout=layout)
fig2.update_layout(width=500, height=500)
global_dict['plot_json2'] = json.dumps(fig2, cls=plotly.utils.PlotlyJSONEncoder)
#Section 2 -Return estimation
#it is often a bad idea to provide returns using a simple estimate like the mean of past returns. Research suggests that better off not providing expected returns – you can then just find the min_volatility() portfolio or use HRP.
mu = pypfopt.expected_returns.capm_return(prices)
fig3 = px.bar(mu, orientation='h')
fig3.update_layout(width=700, height=500)
global_dict['plot_json3'] = json.dumps(fig3, cls=plotly.utils.PlotlyJSONEncoder)
#using risk models optimized for the Efficient frontier to reduce to min volitility, good for crypto currencies - not implemented in the website now.
ef = EfficientFrontier(None, S)
try:
ef.min_volatility()
weights = ef.clean_weights()
nu = pd.Series(weights)
fig4 = px.bar(nu, orientation='h')
fig4.update_layout(width=700, height=500)
global_dict['plot_json4'] = json.dumps(fig4, cls=plotly.utils.PlotlyJSONEncoder)
av=ef.portfolio_performance()[1]
global_dict['av']=round(av, 3)*1
#if we want to buy the portfolio mentioned above
df = df.iloc[[-1]]
for col in df.columns:
if col.endswith(".L"):
df.loc[:,col] = df.loc[:,col]*GBPtoUSD()
try:
latest_prices = df.iloc[-1]
except IndexError:
flash("There is an issue with Yahoo API please try again later")
return redirect("/")
# prices as of the day you are allocating
if float(request.form.get("funds")) <= 0 or float(request.form.get("funds")) == " ":
flash("Amount need to be a positive number")
return redirect("/build")
if float(request.form.get("funds")) < float(latest_prices.min()):
flash("Amount is not high enough to cover the lowest priced stock")
return redirect("/build")
try:
da = DiscreteAllocation(weights, latest_prices, total_portfolio_value=float(request.form.get("funds")))
except TypeError:
delisted=df.columns[df.isna().any()].tolist()
delisted= ", ".join(delisted)
flash("Can't get latest prices for the following stock/s, please remove to contiue : %s" % delisted)
return redirect("/build")
alloc, global_dict['leftover'] = da.lp_portfolio()
global_dict['alloc']=alloc
global_dict['latest_prices']=latest_prices
except ValueError:
pass
#Maximise return for a given risk, with L2 regularisation
try:
ef = EfficientFrontier(mu, S)
ef.add_objective(objective_functions.L2_reg, gamma=(float(request.form.get("gamma")))) # gamme is the tuning parameter
ef.efficient_risk(int(request.form.get("volatility"))/100)
weights = ef.clean_weights()
su = pd.DataFrame([weights])
fig5 = px.pie(su, values=weights.values(), names=su.columns)
fig5.update_traces(textposition='inside')
fig5.update_layout(width=500, height=500, uniformtext_minsize=12, uniformtext_mode='hide')
global_dict['plot_json5'] = json.dumps(fig5, cls=plotly.utils.PlotlyJSONEncoder)
global_dict['perf'] =ef.portfolio_performance()
except Exception as e:
flash(str(e))
return redirect("/build")
#if we want to buy the portfolio mentioned above
for col in df.columns:
if col.endswith(".L"):
df.loc[:,col] = df.loc[:,col]*GBPtoUSD()
latest_prices1 = df.iloc[-1] # prices as of the day you are allocating
if float(request.form.get("funds")) <= 0 or float(request.form.get("funds")) == " ":
flash("Amount need to be a positive number")
return redirect("/build")
if float(request.form.get("funds")) < float(latest_prices.min()):
flash("Amount is not high enough to cover the lowest priced stock")
return redirect("/build")
da = DiscreteAllocation(weights, latest_prices, total_portfolio_value=float(request.form.get("funds")))
alloc1, global_dict['leftover1'] = da.lp_portfolio()
global_dict['alloc1']=alloc1
global_dict['latest_prices1']=latest_prices1
#Efficient semi-variance optimization
returns = pypfopt.expected_returns.returns_from_prices(prices)
returns = returns.dropna()
es = EfficientSemivariance(mu, returns)
try:
es.efficient_return(float(request.form.get("return"))/100)
except ValueError as e:
flash(str(e))
return redirect("/build")
global_dict['perf2']=es.portfolio_performance()
weights = es.clean_weights()
#if we want to buy the portfolio mentioned above
for col in df.columns:
if col.endswith(".L"):
df.loc[:,col] = df.loc[:,col]*GBPtoUSD()
latest_prices2 = df.iloc[-1] # prices as of the day you are allocating
if float(request.form.get("funds")) <= 0 or float(request.form.get("funds")) == " ":
flash("Amount need to be a positive number")
return redirect("/build")
if float(request.form.get("funds")) < float(latest_prices.min()):
flash("Amount is not high enough to cover the lowest priced stock")
return redirect("/build")
da = DiscreteAllocation(weights, latest_prices, total_portfolio_value=float(request.form.get("funds")))
alloc2, global_dict['leftover2'] = da.lp_portfolio()
global_dict['alloc2']=alloc2
global_dict['latest_prices2']=latest_prices2
mc.delete("symbols")
global_dict['ret']=float(request.form.get("return"))
global_dict['gamma']=request.form.get("gamma")
global_dict['volatility']=request.form.get("volatility")
finished = True
global global_dict
th = Thread(target=operation, args=[global_dict])
th.start()
return render_template("loading.html")
else:
if mc.get("symbols"):
cached_symbols=mc.get("symbols")
else:
cached_symbols=''
availableCash=db.session.query(Users.cash).filter_by(id=session["user_id"]).first().cash
return render_template("build.html", availableCash=round(availableCash, 4), GBP=GBPtoUSD(), nasdaq_exchange_info=nasdaq_exchange_info, cached_symbols=cached_symbols, top_50_crypto=top_50_crypto, top_world_stocks=top_world_stocks, top_US_stocks=top_US_stocks, top_div=top_div)
app.route('/result')
def result():
return render_template("built.html",av=global_dict['av'], leftover=global_dict['leftover'], alloc=global_dict['alloc'], ret=global_dict['ret'],gamma=global_dict['gamma'],volatility=global_dict['volatility'],perf=global_dict['perf'], perf2=global_dict['perf2'], alloc1=global_dict['alloc1'], alloc2=global_dict['alloc2'], plot_json=global_dict['plot_json'], plot_json1=global_dict['plot_json1'], plot_json2=global_dict['plot_json2'], plot_json3=global_dict['plot_json3'], plot_json4=global_dict['plot_json4'], plot_json5=global_dict['plot_json5'], leftover1=global_dict['leftover1'], leftover2=global_dict['leftover2'],listofna=(', '.join(global_dict['listofna'])))

ZeroDivisionError: float division by zero even after using python future module

I am facing an error while running a github code. I think the code is perfect, But i think am facing some dependency issues. Can anyone tell me what could possible be the reason behind this error. I am using python 2.7.
from __future__ import division, print_function
.
.
def time_step(self, xt):
xt = np.reshape(xt, newshape=self.dimensions)
ret_val = 0.
self.buffer.append(xt)
self.present.time_step(xt)
if self.t >= self.buffer_len:
pst_xt = self.buffer[0]
self.past.time_step(pst_xt)
if self.t >= self.present.theta + self.past.theta:
ret_val = self.comparison_function(self.present, self.past,
self.present.alpha)
self.ma_window.append(ret_val)
if self.t % self.ma_recalc_delay == 0:
self.anomaly_mean = bn.nanmean(self.ma_window)
self.anomaly_std = bn.nanstd(self.ma_window, ddof=self.ddof)
if self.anomaly_std is None or self.t < len(self.ma_window):
anomaly_density = 0
else:
normalized_score = (ret_val - self.anomaly_mean)/self.anomaly_std
if -4 <= normalized_score <= 4:
anomaly_density = CDF_TABLE[round(normalized_score, 3)]
elif normalized_score > 4:
anomaly_density = 1.
else:
anomaly_density = 0.
self.t += 1
return ret_val, anomaly_density
The code line which is giving error is the following,
normalized_score = (ret_val - self.anomaly_mean)/self.anomaly_std
Wrap it in try except, I used 0 as except value but you can change it per your needs:
try:
normalized_score = (ret_val - self.anomaly_mean)/self.anomaly_std
except ZeroDivisionError:
normalized_score = 0

django - how to replace repeated code in form validation

In my django forms.py file, I am trying to replace two occurrences of repeated validation code. Each attempt I make to have only one occurrence of each, does not seem to work.
I cannot figure out how to write the code so that I have only one occurrence of each of the repeated code in the validation. It should be possible, but I cannot understand how to achieve this.
I am hoping that someone can help me out as this has me confused.
Here is my validation code:
def clean(self):
cd_cdf = super(CertificationDetailsForm, self).clean()
# Must check the most specific cases first, then the general cases.
if 'certification_type' in cd_cdf and cd_cdf['certification_type'] == '':
self._errors['certification_type'] = self.error_class([_("This field is required.")])
elif 'certification_type' in cd_cdf and cd_cdf['certification_type'] == display_types.ENTER_MY_OWN_TYPE_DESCRIPTION:
if 'certification_type_description' in cd_cdf and len(cd_cdf['certification_type_description'].strip()) == 0:
self._errors['certification_type_description'] = self.error_class([_("This field is required.")])
# repeated code occurrence #1.1.
if 'certification_title' in cd_cdf and len(cd_cdf['certification_title'].strip()) == 0:
self._errors['certification_title'] = self.error_class([_("This field is required.")])
# repeated code occurrence #2.1.
if 'certification_date' in cd_cdf and cd_cdf['certification_date'] is not None:
if cd_cdf['certification_date'] > date.today():
self._errors['certification_date'] = self.error_class([_("Date must not be greater than today.")])
elif 'certification_type' in cd_cdf and cd_cdf['certification_type'] != display_types.ENTER_MY_OWN_DETAILS:
# repeated code occurrence #1.2.
if 'certification_title' in cd_cdf and len(cd_cdf['certification_title'].strip()) == 0:
self._errors['certification_title'] = self.error_class([_("This field is required.")])
# repeated code occurrence #2.2.
if 'certification_date' in cd_cdf and cd_cdf['certification_date'] is not None:
if cd_cdf['certification_date'] > date.today():
self._errors['certification_date'] = self.error_class([_("Date must not be greater than today.")])
elif 'certification_type' in cd_cdf and cd_cdf['certification_type'] == display_types.ENTER_MY_OWN_DETAILS:
if 'certification_description' in cd_cdf and len(cd_cdf['certification_description'].strip()) == 0:
self._errors['certification_description'] = self.error_class([_("This field is required.")])
# remove the entered value and/or assign a default value, when the certification type only requires minimum data.
if 'certification_type_description' in cd_cdf and len(cd_cdf['certification_type_description'].strip()) > 0:
cd_cdf['certification_type_description'] = None
if 'certification_title' in cd_cdf and len(cd_cdf['certification_title'].strip()) > 0:
cd_cdf['certification_title'] = None
if 'certification_institution' in cd_cdf and len(cd_cdf['certification_institution'].strip()) > 0:
cd_cdf['certification_institution'] = None
if 'certification_date' in cd_cdf and cd_cdf['certification_date'] is not None:
cd_cdf['certification_date'] = None
return cd_cdf
Here is the types code, just in case:
CERTIFICATE = 1
CERTIFICATE_LEVEL_I = 2
CERTIFICATE_LEVEL_II = 3
CERTIFICATE_LEVEL_III = 4
CERTIFICATE_LEVEL_IV = 5
STANDARD_CERTIFICATE = 6
INTERMEDIATE_CERTIFICATE = 7
ADVANCED_CERTIFICATE = 8
ACADEMIC_CERTIFICATE = 9
PROFESSIONAL_CERTIFICATE = 10
OTHER_CERTIFICATE = 11
ENTER_MY_OWN_TYPE_DESCRIPTION = 7777 # 7777 triggers a hidden text field to be displayed.
ENTER_MY_OWN_DETAILS = 9999
CERTIFICATION_TYPES = (
(CERTIFICATE, _('Certificate')),
(CERTIFICATE_LEVEL_I, _('Certificate Level I')),
(CERTIFICATE_LEVEL_II, _('Certificate Level II')),
(CERTIFICATE_LEVEL_III, _('Certificate Level III')),
(CERTIFICATE_LEVEL_IV, _('Certificate Level IV')),
(STANDARD_CERTIFICATE, _('Standard Certificate')),
(INTERMEDIATE_CERTIFICATE, _('Intermediate Certificate')),
(ADVANCED_CERTIFICATE, _('Advanced Certificate')),
(ACADEMIC_CERTIFICATE, _('Academic Certificate')),
(PROFESSIONAL_CERTIFICATE, _('Professional Certificate')),
(OTHER_CERTIFICATE, _('Other Certificate')),
(ENTER_MY_OWN_TYPE_DESCRIPTION, _('Enter my own Type Description')),
(ENTER_MY_OWN_DETAILS, _('Enter my own details'))
)
Like this:
def clean(self):
cd_cdf = super(CertificationDetailsForm, self).clean()
ctype = 'certification_type'
ctypedesc = 'certification_type_description'
ctitle = 'certification_title'
cdate = 'certification_date'
cdesc = 'certification_description'
cinst = 'certification_institution'
# Must check the most specific cases first, then the general cases.
if ctype in cd_cdf:
if cd_cdf[ctype] == '':
self._errors[ctype] = self.error_class([_("This field is required.")])
elif (cd_cdf[ctype] == display_types.ENTER_MY_OWN_TYPE_DESCRIPTION) or (cd_cdf[ctype] != display_types.ENTER_MY_OWN_DETAILS):
if cd_cdf[ctype] == display_types.ENTER_MY_OWN_TYPE_DESCRIPTION:
if ctypedesc in cd_cdf and len(cd_cdf[ctypedesc].strip()) == 0:
self._errors[ctypedesc] = self.error_class([_("This field is required.")])
else:
if ctitle in cd_cdf and len(cd_cdf[ctitle].strip()) == 0:
self._errors[ctitle] = self.error_class([_("This field is required.")])
if cdate in cd_cdf and cd_cdf[cdate] is not None:
if cd_cdf[cdate] > date.today():
self._errors[cdate] = self.error_class([_("Date must not be greater than today.")])
elif cd_cdf[ctype] == display_types.ENTER_MY_OWN_DETAILS:
if cdesc in cd_cdf and len(cd_cdf[cdesc].strip()) == 0:
self._errors[cdesc] = self.error_class([_("This field is required.")])
# remove the entered value and/or assign a default value, when the certification type only requires minimum data.
forcheck = [ctypedesc, ctitle, cinst]
for i in forcheck:
if i in cd_cdf and len(cd_cdf[i].strip()) > 0:
cd_cdf[i] = None
if cdate in cd_cdf and cd_cdf[cdate] is not None:
cd_cdf[cdate] = None
return cd_cdf
I replaced your frequently mentioned strings with self-obvious variables and joined the second and third conditions. I haven't tested this.
I agree with the first answer, it's ineligant and unpythonic, but I have no idea what all these conditions are about so I can't shorten the code any further.

Extensible Hashing with unique keys

I have a database that consists of tuples like so
'The Abyss,1989,LaserDisc,Science Fiction,James Cameron,James Cameron,USA,20th Century Fox,$0.00'
I want to concatenate the movie title with the year to make the unique key for each bucket. But unsure how to... I think it would be beneficial to use extensible hashing for this,
I would like to be able to search by movies being DVD or VHS as well as searching through and finding movies by year. I would consist buckets of years in a decade increments and types of movie (DVD, VHS)
Right now I just have a simple add, remove, and get functionality
class HTable(object):
def __init__(self, table = [], maximum = 100):
#table = dict, maximum = maximum amount of elements.
assert type(table) == dict
self.table = table
self.max = maximum
def lookup(self, data):
#Lookup a value in our table.
if type(data) == int or type(data) == long:
try:
if self.table[data % self.max] != None:
return (data % self.max, self.table[data % self.max])
else:
return None
except:
return None
else:
try:
obj1 = self.string2int(data) % self.max
obj2 = self.table[self.string2int(data) % self.max]
if obj2 != None:
return (obj1, obj2)
else:
return None
except:
return None
def append(self, data):
#data = int, string, object, etc. No duplicates allowed.
assert len(self.table) < self.max
if type(data) == int or type(data) == long:
original = data
if data >= self.max:
raise IOError, "Value to large to append into hash table. Max limit reached."
else:
original = data
data = self.string2int(data)
index = data % self.max
if int(index) >= self.max:
raise IOError, "Data: %s, exceeded your maximum limit of %s, with the size of %s." %(str(original), str(self.max), str(index))
try:
if type(original) == int or type(original) == long:
self.table[data % self.max] = data
else:
self.table[data % self.max] = original
return self.table
except:
if len(self.table) < data % self.max:
while len(self.table) < data % self.max:
self.table.append(None)
if type(original) == int:
self.table.insert(data % self.max, data)
else:
self.table.insert(data % self.max, str(original))
return self.table
def string2int(self, STRING):
#Convert a string into a 'hash' integer.
return sum([ord(j) for j in STRING])
def isEmpty(self):
#Return True if empty, false otherwise.
if len(self.table) == 0:
return True
else:
return False
def isFull(self):
#Returns TRUE if full, false otherwise.
if len(self.table) == self.max:
return True
else:
return False
def remove(self, key):
#Remove the data located at the given index/key. Key can be a index key(integer), or the data itself. For example: self.remove(key = 'value') or self.remove(key = 10).
try:
self.table.pop(int(key))
return 1
except:
try:
self.table.remove(key)
return 1
except:
return False
def get(self, key):
#Get the data in our HASH Table, using the given index(key).
try:
return self.table[int(key)]
except:
return None
def output(self):
#Return our current HASH Table.
return self.table

How to prevent Selenium from opening a pop-up to save the file?

I'm scraping the site Quicker.com but every time getting an error on random pages.
The error is:
UnexpectedAlertPresentException: Alert Text: C:\Users\HEYPIL~1\AppData\Local\Temp\Pkwnr4IA.php.part could not be saved, because the source file could not be read.
Try again later, or contact the server administrator.
<super: <class 'WebDriverException'>, <UnexpectedAlertPresentException object>>
My code:
from selenium import webdriver
import csv
import re
import hashlib
from selenium.common.exceptions import UnexpectedAlertPresentException
from selenium.common.exceptions import WebDriverException
import socket
import time
import datetime
ts = time.time()
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
date = st.encode('utf8')
IPAdd = socket.gethostbyname(socket.gethostname())
counter = 5
initial = []
base = "http://mumbai.quikr.com/Individual/0-50000000/Houses-Apartments-for-Sale/w1072?imageAds=Y&l=You_are-Price"
string = "&page="
while(counter == 5 or counter < 40):
base2 = base+string+str(counter)
if (counter < 39):
initial.append(base2)
elif(counter == 40):
initial.append(base)
else:
base2 = base
counter += 1
for n in initial:
result = []
driver = webdriver.Firefox()
driver.get(n)
main_page = '//a[#class="adttllnk unbold"]'
for a in driver.find_elements_by_xpath(main_page):
l = a.get_attribute('href')
result.append(l)
print len(result)
driver.close()
for url in result:
try:
driver = webdriver.Firefox()
driver.get(url)
items = []
desc_path = '//div[#id="ad_description"]'
img_path = '//div[#class="bigImg_wapp"]//img[#src]'
prop = '//div[#itemprop="name"]//h1[#class="ad_title translate"]'
amenities = '//div[#class="ad-atrbt-panel"]//span[#class="ad-atrbt-val"]//span[#class="attribVal newattribVal"]'
phone = '//span[#class="NoVerified-Text"]'
for x1 in driver.find_elements_by_xpath(img_path):
img = (x1.get_attribute('src')).encode('utf8')
print '\n'+img
for x2 in driver.find_elements_by_xpath(desc_path):
desc = (x2.text).encode('utf8')
print '\n'+ desc
for x3 in driver.find_elements_by_xpath(prop):
prop_title = (x3.text).encode('utf8')
print '\n'+prop_title
for x4 in driver.find_elements_by_xpath(amenities):
value = (x4.text).encode('utf8')
items.append(value)
print '\n'
print items
locality = items[0]
locality1 = locality.encode('utf8')
a = (locality1 if (isinstance(locality1,int) == False) else "")
bhk = items[1]
bhk1 = bhk.encode('utf8')
if(bhk1 == "4+ BHK"):
b = "4"
else:
bhk2 = [int(z) for z in bhk1.split() if z.isdigit()]
b = ((str(bhk2).strip('[')).strip(']')).strip()
furnish = items[2]
if(isinstance(furnish,int) == False ):
furnish1 = furnish.encode('utf8')
if((furnish1 == "Semi-Furnished") or (furnish1 == "Unfurnished") or (furnish1 == "Fully Furnished") or (furnish1 == "Unfurnished,Unf...")):
c = furnish1
else:
d = furnish1
elif(isinstance(furnish,int) == True):
furnish1 = furnish.encode('utf8')
d = furnish1
else:
c = ""
sqft = items[3]
if(isinstance(sqft,int)==True):
sqft1 = [int(xyz) for xyz in sqft.split() if xyz.isdigit()]
sqft2 = ((str(sqft1).strip('[')).strip(']')).strip()
d = sqft2.encode('utf8')
elif(isinstance(sqft,int)==False):
sqft1 = sqft.encode('utf8')
if((sqft1 == "Semi-Furnished") or (sqft1 == "Unfurnished") or (sqft1 == "Fully Furnished") or (sqft1 == "Unfurnished,Unf...")):
c = sqft1
else:
d = sqft1
else:
d = ""
atz = '\t'
print a,atz,b,atz,c,atz,d
for x5 in driver.find_elements_by_xpath(phone):
biz = (((x5.text).lstrip('+91')).strip()).encode('utf8')
if(len(biz)== 9):
biz_phone = '9'+biz
elif(len(biz) < 7 and len(biz) > 4):
biz_phone = '080'+biz
elif(len(biz) > 9 or len(biz) < 12):
biz_phone = biz
elif(len(biz) == 4 or len(biz) < 4):
biz_phone = biz.strip(biz)
else:
print '\nInvalid Business_phone'
print '\n'+biz_phone
driver.close()
hash_key = hashlib.md5("marketing#"+biz_phone+".com"+"Individual"+prop_title).hexdigest()
unique_key = ('I_'+hash_key).encode('utf8')
except (NameError, IndexError, WebDriverException, UnexpectedAlertPresentException) as e:
print "Failed to open: "+url
driver.close()
fieldname = ['Date','URL']
with open("C:\Users\Heypillow\Desktop\scrapWork\properties\\Failed_to_open_url.csv",'a') as h:
write = csv.DictWriter(h,fieldnames=fieldname,lineterminator = '\n')
write.writerow({'Date':date,
'URL':url})
I've blocked the pop-up in Firefox() but yet a pop-up is coming which addressing me to save a .php file and raises this exception.
I've already used that exception in the "except" part yet it's interrupting the code to work further and it's getting stopped just after this exception rises.
So, every time this exception rises, I have to restart the program. Thus I would like to download all the data by running the code through out the night,which is impossible with this circumstances…
How can I prevent this pop-up from opening?
(If I would have been able to upload a screenshot of the pop-up,it would have been easier to understand it.)