GLPK output formats - linear-programming

I am new to GLPK, so my apologies in advance if I'm missing something simple!
I have a largeish LP that I am feeding through GLPK to model an energy market. I'm running the following command line to GLPK to process this:
winglpk-4.65\glpk-4.65\w64\glpsol --lp problem.lp --data ExampleDataFile.dat --output results2.txt
When I open the resulting text file I can see the outputs, which all look sensible. I have one big problem: each record is split over two rows, making it very difficult to clean the file. See an extract below:
No. Row name St Activity Lower bound Upper bound Marginal
------ ------------ -- ------------- ------------- ------------- -------------
1 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1990)_
NS 0 0 = < eps
2 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1991)_
NS 0 0 = < eps
3 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1992)_
NS 0 0 = < eps
4 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1993)_
NS 0 0 = < eps
5 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1994)_
NS 0 0 = < eps
6 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1995)_
NS 0 0 = < eps
7 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1996)_
NS 0 0 = < eps
8 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1997)_
NS 0 0 = < eps
9 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1998)_
NS 0 0 = < eps
10 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1999)_
NS 0 0 = < eps
11 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2000)_
NS 0 0 = < eps
12 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2001)_
NS 0 0 = < eps
13 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2002)_
NS 0 0 = < eps
14 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2003)_
NS 0 0 = < eps
15 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2004)_
NS 0 0 = < eps
I would be very grateful of any suggestions for either:
How I can get each record in the output text file onto a single row, or
Ideas on how to clean / post-process the existing text file output.
I'm sure I'm missing something simple here, but the output is in a very unhelpful format at the moment!
Thanks!

I wrote a Python parser for the GLPK output file. It is not beautiful and not save (try-catch) but it is working (for pure simplex problems).
You can call it on output file:
outp = GLPKOutput('myoutputfile')
print(outp)
val1 = outp.getCol('mycolvar','Activity')
val2 = outp.getRow('myrowname','Upper_bound') # row names should be defined
The class is as follows:
class GLPKOutput:
def __init__(self,filename):
self.rows = {}
self.columns = {}
self.nRows = 0
self.nCols = 0
self.nNonZeros = 0
self.Status = ""
self.Objective = ""
self.rowHeaders = []
self.rowIdx = {}
self.rowWidth = []
self.Rows = []
self.hRows = {}
self.colHeaders = []
self.colIdx = {}
self.colWidth = []
self.Cols = []
self.hCols = {}
self.wcols = ['Activity','Lower_bound','Upper bound','Marginal']
self.readFile(filename)
# split columns with weird line break
def smartSplit(self,line,type,job):
ret = []
line = line.rstrip()
if type == 'ROWS':
cols = len(self.rowHeaders)
idx = self.rowWidth
else:
cols = len(self.colHeaders)
idx = self.colWidth
if job == 'full':
start = 0
for i in range(cols):
stop = start+idx[i]+1
ret.append(line[start:stop].strip())
start = stop
elif job == 'part1':
entries = line.split()
ret = entries[0:2]
elif job == 'part2':
start = 0
for i in range(cols):
stop = start+idx[i]+1
ret.append(line[start:stop].strip())
start = stop
ret = ret[2:]
# print()
# print("SMART:",job,line.strip())
# print(" TO:",ret)
return ret
def readFile(self,filename):
fp = open(filename,"r")
lines = fp.readlines()
fp.close
i = 0
pos = "HEAD"
while pos == 'HEAD' and i<len(lines):
entries = lines[i].split()
if len(entries)>0:
if entries[0] == 'Rows:':
self.nRows = int(entries[1])
elif entries[0] == 'Columns:':
self.nCols = int(entries[1])
elif entries[0] == 'Non-zeros:':
self.nNonZeros = int(entries[1])
elif entries[0] == 'Status:':
self.Status = entries[1]
elif entries[0] == 'Objective:':
self.Objective = float(entries[3]) #' '.join(entries[1:])
elif re.search('Row name',lines[i]):
lines[i] = lines[i].replace('Row name','Row_name')
lines[i] = lines[i].replace('Lower bound','Lower_bound')
lines[i] = lines[i].replace('Upper bound','Upper_bound')
entries = lines[i].split()
pos = 'ROWS'
self.rowHeaders = entries
else:
pass
i+= 1
# formatting of row width
self.rowWidth = lines[i].split()
for k in range(len(self.rowWidth)): self.rowWidth[k] = len(self.rowWidth[k])
# print("Row Widths:",self.rowWidth)
i+= 1
READY = False
FOUND = False
while pos == 'ROWS' and i<len(lines):
if re.match('^\s*[0-9]+',lines[i]): # new line
if len(lines[i].split())>2: # no linebrak
entries = self.smartSplit(lines[i],pos,'full')
READY = True
else: # line break
entries = self.smartSplit(lines[i],pos,'part1')
READY = False
FOUND = True
else:
if FOUND and not READY: # second part of line
entries += self.smartSplit(lines[i],pos,'part2')
READY = True
FOUND = False
if READY:
READY = False
FOUND = False
# print("ROW:",entries)
if re.match('[0-9]+',entries[0]): # valid line with solution data
self.Rows.append(entries)
self.hRows[entries[1]] = len(self.Rows)-1
else:
print("wrong line format ...")
print(entries)
sys.exit()
elif re.search('Column name',lines[i]):
lines[i] = lines[i].replace('Column name','Column_name')
lines[i] = lines[i].replace('Lower bound','Lower_bound')
lines[i] = lines[i].replace('Upper bound','Upper_bound')
entries = lines[i].split()
pos = 'COLS'
self.colHeaders = entries
else:
pass #print("NOTHING: ",lines[i])
i+= 1
# formatting of row width
self.colWidth = lines[i].split()
for k in range(len(self.colWidth)): self.colWidth[k] = len(self.colWidth[k])
# print("Col Widths:",self.colWidth)
i+= 1
READY = False
FOUND = False
while pos == 'COLS' and i<len(lines):
if re.match('^\s*[0-9]+',lines[i]): # new line
if len(lines[i].split())>2: # no linebreak
entries = self.smartSplit(lines[i],pos,'full')
READY = True
else: # linebreak
entries = self.smartSplit(lines[i],pos,'part1')
READY = False
FOUND = True
else:
if FOUND and not READY: # second part of line
entries += self.smartSplit(lines[i],pos,'part2')
READY = True
FOUND = False
if READY:
READY = False
FOUND = False
# print("COL:",entries)
if re.match('[0-9]+',entries[0]): # valid line with solution data
self.Cols.append(entries)
self.hCols[entries[1]] = len(self.Cols)-1
else:
print("wrong line format ...")
print(entries)
sys.exit()
elif re.search('Karush-Kuhn-Tucker',lines[i]):
pos = 'TAIL'
else:
pass #print("NOTHING: ",lines[i])
i+= 1
for i,e in enumerate(self.rowHeaders): self.rowIdx[e] = i
for i,e in enumerate(self.colHeaders): self.colIdx[e] = i
def getRow(self,name,attr):
if name in self.hRows:
if attr in self.rowIdx:
try:
val = float(self.Rows[self.hRows[name]][self.rowIdx[attr]])
except:
val = self.Rows[self.hRows[name]][self.rowIdx[attr]]
return val
else:
return -1
def getCol(self,name,attr):
if name in self.hCols:
if attr in self.colIdx:
try:
val = float(self.Cols[self.hCols[name]][self.colIdx[attr]])
except:
val = self.Cols[self.hCols[name]][self.colIdx[attr]]
return val
else:
print("key error:",name,"not known ...")
return -1
def __str__(self):
retString = '\n'+"="*80+'\nSOLUTION\n'
retString += "nRows: "+str(self.nRows)+'/'+str(len(self.Rows))+'\n'
retString += "nCols: "+str(self.nCols)+'/'+str(len(self.Cols))+'\n'
retString += "nNonZeros: "+str(self.nNonZeros)+'\n'
retString += "Status: "+str(self.Status)+'\n'
retString += "Objective: "+str(self.Objective)+'\n\n'
retString += ' '.join(self.rowHeaders)+'\n'
for r in self.Rows: retString += ' # '.join(r)+' #\n'
retString += '\n'
retString += ' '.join(self.colHeaders)+'\n'
for c in self.Cols: retString += ' # '.join(r)+' #\n'
return retString

Related

boost.python : pandas dataframe to c++

I want to use boost.python to use multi-index columns dataframe in c++.
※multi-index columns dataframe is like
I changed the type of multi-index columns dataframe into csv.
My csv file looks like this on spreadsheet
The reason why I want to use this data is for backtest. This is my backtest code in python that I want to translate to c++.
import pandas as pd
import numpy as np
from utils import load_data, load_list_csv, to_int
class No_Strategy():
def __init__(self, codes, unit, cash, position):
self.codes = codes
self.unit = unit
self.cash = cash
self.buy_signal = [0]*len(codes)
self.sell_signal = [0]*len(codes)
self.valid = 0
self.position = position
self.pass_st = 0 # 전략에 들어가지도 못한 경우
def set_data(self, prev_fs_row, fs_row, indi_row):
self.prev_fs = prev_fs_row
self.fs = fs_row # multi dimensional df
self.indi = indi_row
def _strat(self, prev_fs, curr_fs, curr_indi):
curr_rev = prev_rev = curr_ni = prev_ni = ni_growth = curr_asset = noncurr_asset = curr_asset_rat = 0
try:
prev_rev = int(prev_fs['매출액'].replace(",",""))
curr_rev = int(curr_fs['매출액'].replace(",",""))
except:
self.pass_st += 1
return 0, 0
rev_growth=(curr_rev-prev_rev)/prev_rev
try:
prev_ni = int(prev_fs['당기순이익'].replace(",",""))
curr_ni = int(curr_fs['당기순이익'].replace(",",""))
except:
self.pass_st += 1
return 0, 0
ni_growth=(curr_ni-prev_ni)/prev_ni
try:
curr_asset = int(curr_fs['유동자산'].replace(",",""))
noncurr_asset = int(curr_fs['비유동자산'].replace(",",""))
except:
self.pass_st += 1
return 0, 0
curr_asset_rat = curr_asset / noncurr_asset
#### this is the buy strategy! You can change the below ####
if (curr_indi.golden_cross) or (curr_indi.rsi_k < 0.65) :
return 1, 0
#### ************************************************** ####
#### this is the sell strategy! You can change the below ####
if (curr_indi.dead_cross):
return 0, 1
#### ************************************************** ####
return 0, 0
def run(self):
for i, code in enumerate(self.codes):
self.valid = 0
prev_fs = self.prev_fs[code]
curr_fs = self.fs[code]
curr_indi = self.indi[code]
prev_fs_cell = None
curr_fs_cell = None
try:
prev_fs_cell = prev_fs.iloc[0].replace(",","")
try:
curr_fs_cell = curr_fs.iloc[0].replace(",","")
except:
self.pass_st += 1
pass
except:
self.pass_st += 1
pass
if (curr_fs_cell != None) & (prev_fs_cell != None):
self.valid = 1
buy, sell = self._strat(prev_fs, curr_fs, curr_indi)
if self.valid == 0:
self.pass_st += 1
continue
else: # buy or sell signal get
price = curr_indi['close']
if buy:
if self.cash >= self.unit * price:
self.buy_signal[i] = self.unit
self.position[i] += self.unit
self.cash -= price * self.unit
elif sell:
if self.position[i] > 0 :
sell_num = self.position[i] - int(self.position[i]/2)
self.sell_signal[i] = sell_num
self.position[i] = int(self.position[i]/2) # 1-> 1 sell, 4 -> 2 sell ....
self.cash += price * sell_num
##title
class Broker():
def __init__(self, codes):
self.cash = 200000000 #2억
self.cash_df = None #pd.DataFrame(columns=['cash'])
self.position = [0]*len(codes)
self.position_df = None #pd.DataFrame(columns=codes) # for accumulated profit calculation
self.buy_signal = None #pd.DataFrame(columns=codes) # codes = KOSPI_stock_names
self.sell_signal = None #pd.DataFrame(columns=codes)
self.codes = codes # 012934, 3281, ...
self.unit = 1 # 주식 매매 단위
self.pass_st = 0
def set_strat(self, strategy):
self.strategy = strategy # class
def set_time(self, time_index): # time_index type: pd.Index / time range for indi df
self.buy_signal = pd.DataFrame(columns = self.codes, index = time_index) #set_index(time_index)
self.sell_signal = pd.DataFrame(columns = self.codes, index = time_index) #.set_index(time_index)
self.position_df = pd.DataFrame(columns = self.codes, index = time_index)
self.cash_df = pd.DataFrame(columns = ['cash'], index = time_index)#.set_index(time_index)
self.time_index = time_index
def set_data(self, fs, indi, price):
self.fs = fs # multi dimensional df / start: 0th - nth
self.indi = indi # multi dimensional df / start : 1th - nth
self.price = price # 2 dimensional (date X codes : close price)
def update_data(self, strategy, date):
self.cash = strategy.cash
self.cash_df.loc[date] = strategy.cash
self.position = strategy.position
self.position_df.loc[date] = strategy.position #list
self.buy_signal.loc[date] = strategy.buy_signal #list
self.sell_signal.loc[date] = strategy.sell_signal #list
self.pass_st += strategy.pass_st
def run(self):
for date in self.time_index: #아마 수정해야 할 확률 높음
if date.year == 2021:
break
else:
prev_fs_row = self.fs.loc[date.year-1] # ex: 2014
fs_row = self.fs.loc[date.year] # 2015
indi_row = self.indi.loc[date] # 2015
strategy = self.strategy(self.codes, self.unit, self.cash, self.position)
strategy.set_data(prev_fs_row, fs_row, indi_row)
strategy.run()
self.update_data(strategy, date)
def performance(self):
# !!!! 2020년까지의 결과만 성능 평가 ####
cash_df = self.cash_df[self.cash_df.index < '2021']
position_df = self.position_df[self.position_df.index < '2021']
price = self.price[self.price.index < '2021']
buy_signal = self.buy_signal[self.buy_signal.index < '2021']
sell_signal = self.sell_signal[self.sell_signal.index < '2021']
last_price = price.iloc[-1]
total_remain_num = self.position # last(2020) position data
total_buy = (price * buy_signal).sum(axis=1).sum()
total_sell = (price * sell_signal).sum(axis=1).sum()
total_remain = (last_price * total_remain_num).sum()
print(f'remain 개수: {total_remain_num}, total_remain: {total_remain} total_buy: {total_buy}, total_sell={total_sell}')
profit = total_sell + total_remain - total_buy
try:
return_mean = profit / total_buy
except:
print("no buy")
return
accum_df = (cash_df['cash'] + ((price.fillna(0) * position_df).sum(axis=1))).to_frame() # row sum
daily_return_df = (accum_df - accum_df.shift(1))/accum_df.shift(1)-1
SSE = ((daily_return_df - return_mean)**2).sum().item()
std = np.sqrt(SSE/(accum_df.shape[0]-1)) # route(sigma(x-x_bar)^2 / (n-1))
sharp = return_mean / std
self.return_mean = return_mean
self.sharp = sharp
print(f'return_mean: {return_mean}, sharp: {sharp}')
code_path = GDRIVE_DATA_PATH + 'codes.csv'
fs_path = GDRIVE_DATA_PATH + 'fs_total.csv'
indi_path = GDRIVE_DATA_PATH + 'indi_total.csv'
price_path = GDRIVE_DATA_PATH + 'prices.csv'
fs_total = load_data("fs_total.csv")
indi_total = load_data("indi_total.csv") # stock price and indicator(Golden cross, RSI, etc.)
prices = load_data("prices.csv") # stock close price data rows:date, cols: stock code.
time_index = indi_total.index # time index of indi_total multi-index columns
broker = Broker(codes)
broker.set_strat(No_Strategy)
broker.set_time(time_index)
broker.set_data(fs_total, indi_total, prices)
broker.run()
broker.performance()
I want to translate it not changing much in flow of the code.
But I cannot find how to get multi-index columns dataframe in c++, and transfer its row data to No_Strategy to decide whether invest into the stock.
※ I uploaded similar question before and get thankful answer, but it is too complicated for me so I question one more time with detail information.
look at https://github.com/hosseinmoein/DataFrame. It has about 95% of Pandas functionality in a much faster framework

How to convert ASCII(not single character, complete sentence) to string

I have method called 'encode(string_argument)' which converts string(sentence) to ascii and reverse it. I have to write method which decode the string.
Code:
str1 = 'This is sample text'
def encode(str1):
encoded_str = ''
for i in str1:
encoded_str += str(ord(i))
return encoded_str[::-1]
def decode(encoded_str):
rev_encoded_str = encoded_str[::-1]
# Incomplete
encoded_str = encode(str1)
decode(encoded_str)
Input for encode():
This is sample text
Output from encode():
6110211016112310180121190179511235115012351150140148
Input for decode():
6110211016112310180121190179511235115012351150140148
Output from decode():
This is sample text
Thanks in advance.
Finally got the solution:
def encode(str1):
encoded_str = ''
for i in str1:
encoded_str += str(ord(i))
return encoded_str[::-1]
def decode(encoded_str):
reverse_string = encoded_str[::-1]
temp_array = convert_to_possible_string_ascii(reverse_string)
final_decoded_string = ""
for x in temp_array:
# print(str(chr(int(x)))+": "+x)
final_decoded_string = final_decoded_string + str(chr(int(x)))
print(final_decoded_string)
return final_decoded_string
def convert_to_possible_string_ascii(str_array):
temp_array = []
temp_char = ""
counter = 0
for current_char in range(0, len(str_array), 1):
temp_char = temp_char+str_array[current_char]
counter = counter+1
if counter==1:
temp_var = is_valid_string_ascii(temp_char)
if temp_var:
temp_array.append(temp_char)
counter = 0
temp_char = ""
elif counter==2:
temp_var = is_valid_string_ascii(temp_char)
if temp_var:
temp_array.append(temp_char)
counter = 0
temp_char = ""
elif counter==3:
temp_var = is_valid_string_ascii(temp_char)
if temp_var:
temp_array.append(temp_char)
counter = 0
temp_char = ""
return temp_array
def is_valid_string_ascii(ascii_value):
valid_list = list(range(65, 91)) + list(range(97, 123)) +[32]
boolean_var = False
if int(ascii_value) in valid_list:
boolean_var = True
return boolean_var
str1 = 'This is sample text'
encoded_str = encode(str1)
decode(encoded_str)

Group Items together dictionary while loop

I am having trouble storing the ID to keys, like a sub (parent-child) kind of thing. I spent hours on it and could not figure a way to accomplish this. What output I am expecting is at the end of this post. Any help would be great.
import sys
import collections
dict = collections.OrderedDict()
dict["A.1"] = {"parent_child":0}
dict["A.1.1"] = {"parent_child":1}
dict["A.1.1.1"] = {"parent_child":2}
dict["A.1.1.2"] = {"parent_child":2}
dict["A.1.1.3"] = {"parent_child":2}
dict["A.1.2"] = {"parent_child":1}
dict["A.1.2.1"] = {"parent_child":2}
dict["A.1.2.2"] = {"parent_child":2}
dict["A.1.2.2.1"] = {"parent_child":3}
dict["A.1.2.2.2"] = {"parent_child":3}
dict["A.1.2.3"] = {"parent_child":2}
dict["A.1.3"] = {"parent_child":1}
dict["A.1.4"] = {"parent_child":1}
print(dict)
new_dict = {}
p = 0 # previous index
i = 0 # current
n = 1 # next index
current_PC = 0 # current parent_child
next_PC = 0 # next parent_child
previous_id = ""
current_id = ""
next_id = ""
change_current = True
change = True
lst = []
while(True):
if change_current:
current_id = dict.keys()[i]
current_PC = dict.values()[i]["parent_child"]
change_current = False
try:
next_id = dict.keys()[n]
next_PC = dict.values()[n]["parent_child"]
except:
pass # it will go out of index
print("KEY {0}".format(current_id))
if next_PC > current_PC:
if next_PC - current_PC == 1:
lst.append(next_PC)
next_PC += 1
print("next_PC: {0}".format(next_PC))
if next_PC == current_PC:
new_dict[current_id] = lst
lst = []
break
print(new_dict)
Trying to make output looks like this (at in similar way), the new_dict should look like:
new_dict["A.1"] = ["A.1.1", "A.1.2", "A.1.3", "A.1.4"]
new_dict["A.1.1"] = ["A.1.1.1", "A.1.1.2", "A.1.1.3"]
new_dict["A.1.1.1"] = []
new_dict["A.1.1.2"] = []
new_dict["A.1.1.3"] = []
new_dict["A.1.2"] = ["A.1.2.1", "A.1.2.2", "A.1.2.3"]
new_dict["A.1.2.1"] = []
new_dict["A.1.2.2"] = ["A.1.2.2.1", "A.1.2.2.2"]
new_dict["A.1.2.2.1"] = []
new_dict["A.1.2.2.2"] = []
new_dict["A.1.2.3"] = []
new_dict["A.1.3"] = []
new_dict["A.1.4"] = []
This gives you the output you are asking for. Since i did not see a {"parent_child":...} in you desired output i did not proceed with anything else.
options = ["A.1","A.1.1","A.1.1.1","A.1.1.2","A.1.1.3","A.1.2","A.1.2.1","A.1.2.2","A.1.2.2.1","A.1.2.2.2","A.1.2.3","A.1.3","A.1.4"]
new_dict = {}
for i, key in enumerate(options):
new_dict[key] = []
ls = []
for j, opt in enumerate(options):
if (key in opt) and (len(opt)-len(key)==2):
new_dict[key].append(opt)
print(new_dict)
EDIT
Using the comment of #Ranbir Aulakh
options = ["A.1","A.1.1","A.1.1.1","A.1.1.2","A.1.1.3","A.1.2","A.1.2.1","A.1.2.2","A.1.2.2.1","A.1.2.2.2","A.1.2.3","A.1.3","A.1.4"]
new_dict = {}
for i, key in enumerate(options):
new_dict[key] = []
ls = []
for j, opt in enumerate(options):
if (key in opt) and (len(opt.split("."))-len(key.split("."))==1):#(len(opt)-len(key)==2):
new_dict[key].append(opt)
print(new_dict)

How to prevent Selenium from opening a pop-up to save the file?

I'm scraping the site Quicker.com but every time getting an error on random pages.
The error is:
UnexpectedAlertPresentException: Alert Text: C:\Users\HEYPIL~1\AppData\Local\Temp\Pkwnr4IA.php.part could not be saved, because the source file could not be read.
Try again later, or contact the server administrator.
<super: <class 'WebDriverException'>, <UnexpectedAlertPresentException object>>
My code:
from selenium import webdriver
import csv
import re
import hashlib
from selenium.common.exceptions import UnexpectedAlertPresentException
from selenium.common.exceptions import WebDriverException
import socket
import time
import datetime
ts = time.time()
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
date = st.encode('utf8')
IPAdd = socket.gethostbyname(socket.gethostname())
counter = 5
initial = []
base = "http://mumbai.quikr.com/Individual/0-50000000/Houses-Apartments-for-Sale/w1072?imageAds=Y&l=You_are-Price"
string = "&page="
while(counter == 5 or counter < 40):
base2 = base+string+str(counter)
if (counter < 39):
initial.append(base2)
elif(counter == 40):
initial.append(base)
else:
base2 = base
counter += 1
for n in initial:
result = []
driver = webdriver.Firefox()
driver.get(n)
main_page = '//a[#class="adttllnk unbold"]'
for a in driver.find_elements_by_xpath(main_page):
l = a.get_attribute('href')
result.append(l)
print len(result)
driver.close()
for url in result:
try:
driver = webdriver.Firefox()
driver.get(url)
items = []
desc_path = '//div[#id="ad_description"]'
img_path = '//div[#class="bigImg_wapp"]//img[#src]'
prop = '//div[#itemprop="name"]//h1[#class="ad_title translate"]'
amenities = '//div[#class="ad-atrbt-panel"]//span[#class="ad-atrbt-val"]//span[#class="attribVal newattribVal"]'
phone = '//span[#class="NoVerified-Text"]'
for x1 in driver.find_elements_by_xpath(img_path):
img = (x1.get_attribute('src')).encode('utf8')
print '\n'+img
for x2 in driver.find_elements_by_xpath(desc_path):
desc = (x2.text).encode('utf8')
print '\n'+ desc
for x3 in driver.find_elements_by_xpath(prop):
prop_title = (x3.text).encode('utf8')
print '\n'+prop_title
for x4 in driver.find_elements_by_xpath(amenities):
value = (x4.text).encode('utf8')
items.append(value)
print '\n'
print items
locality = items[0]
locality1 = locality.encode('utf8')
a = (locality1 if (isinstance(locality1,int) == False) else "")
bhk = items[1]
bhk1 = bhk.encode('utf8')
if(bhk1 == "4+ BHK"):
b = "4"
else:
bhk2 = [int(z) for z in bhk1.split() if z.isdigit()]
b = ((str(bhk2).strip('[')).strip(']')).strip()
furnish = items[2]
if(isinstance(furnish,int) == False ):
furnish1 = furnish.encode('utf8')
if((furnish1 == "Semi-Furnished") or (furnish1 == "Unfurnished") or (furnish1 == "Fully Furnished") or (furnish1 == "Unfurnished,Unf...")):
c = furnish1
else:
d = furnish1
elif(isinstance(furnish,int) == True):
furnish1 = furnish.encode('utf8')
d = furnish1
else:
c = ""
sqft = items[3]
if(isinstance(sqft,int)==True):
sqft1 = [int(xyz) for xyz in sqft.split() if xyz.isdigit()]
sqft2 = ((str(sqft1).strip('[')).strip(']')).strip()
d = sqft2.encode('utf8')
elif(isinstance(sqft,int)==False):
sqft1 = sqft.encode('utf8')
if((sqft1 == "Semi-Furnished") or (sqft1 == "Unfurnished") or (sqft1 == "Fully Furnished") or (sqft1 == "Unfurnished,Unf...")):
c = sqft1
else:
d = sqft1
else:
d = ""
atz = '\t'
print a,atz,b,atz,c,atz,d
for x5 in driver.find_elements_by_xpath(phone):
biz = (((x5.text).lstrip('+91')).strip()).encode('utf8')
if(len(biz)== 9):
biz_phone = '9'+biz
elif(len(biz) < 7 and len(biz) > 4):
biz_phone = '080'+biz
elif(len(biz) > 9 or len(biz) < 12):
biz_phone = biz
elif(len(biz) == 4 or len(biz) < 4):
biz_phone = biz.strip(biz)
else:
print '\nInvalid Business_phone'
print '\n'+biz_phone
driver.close()
hash_key = hashlib.md5("marketing#"+biz_phone+".com"+"Individual"+prop_title).hexdigest()
unique_key = ('I_'+hash_key).encode('utf8')
except (NameError, IndexError, WebDriverException, UnexpectedAlertPresentException) as e:
print "Failed to open: "+url
driver.close()
fieldname = ['Date','URL']
with open("C:\Users\Heypillow\Desktop\scrapWork\properties\\Failed_to_open_url.csv",'a') as h:
write = csv.DictWriter(h,fieldnames=fieldname,lineterminator = '\n')
write.writerow({'Date':date,
'URL':url})
I've blocked the pop-up in Firefox() but yet a pop-up is coming which addressing me to save a .php file and raises this exception.
I've already used that exception in the "except" part yet it's interrupting the code to work further and it's getting stopped just after this exception rises.
So, every time this exception rises, I have to restart the program. Thus I would like to download all the data by running the code through out the night,which is impossible with this circumstances…
How can I prevent this pop-up from opening?
(If I would have been able to upload a screenshot of the pop-up,it would have been easier to understand it.)

python function to split a list by indexes

I am trying to build an efficient function for splitting a list of any size by any given number of indices. This method works and it took me a few hours to get it right (I hate how easy it is to get things wrong when using indexes)
Am I over-thinking this?
Code:
def lindexsplit(List,*lindex):
index = list(lindex)
index.sort()
templist1 = []
templist2 = []
templist3 = []
breakcounter = 0
itemcounter = 0
finalcounter = 0
numberofbreaks = len(index)
totalitems = len(List)
lastindexval = index[(len(index)-1)]
finalcounttrigger = (totalitems-(lastindexval+1))
for item in List:
itemcounter += 1
indexofitem = itemcounter - 1
nextbreakindex = index[breakcounter]
#Less than the last cut
if breakcounter <= numberofbreaks:
if indexofitem < nextbreakindex:
templist1.append(item)
elif breakcounter < (numberofbreaks - 1):
templist1.append(item)
templist2.append(templist1)
templist1 = []
breakcounter +=1
else:
if indexofitem <= lastindexval and indexofitem <= totalitems:
templist1.append(item)
templist2.append(templist1)
templist1 = []
else:
if indexofitem >= lastindexval and indexofitem < totalitems + 1:
finalcounter += 1
templist3.append(item)
if finalcounter == finalcounttrigger:
templist2.append(templist3)
return templist2