I attempted 3 different ways:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup as soup
from selenium import webdriver
driver = webdriver.Chrome(executable_path='C:/Users/lemonade/Documents/work/chromedriver')
my_url = "https://www.carehome.co.uk/"
def make_soup(url):
driver.get(url)
m_soup = soup(driver.page_source, features='html.parser')
return m_soup
main_page = make_soup(my_url)
boroughs = [borough.text.strip() for borough in main_page.select('.seo_links.seo_links_country [href]')]
indexs = list(range(16,19))
london_list = [boroughs[i] for i in indexs]
boroughs1 = [bo.replace("Borough","") for bo in london_list]
boroughs2 = [b1.replace("&","and") for b1 in boroughs1]
boroughs3 = ['-'.join(b2.split()) for b2 in boroughs2]
borough_links = ["https://www.carehome.co.uk/care_search_results.cfm/searchunitary/" + b3 for b3 in boroughs3]
borough_soup = [make_soup(b_link) for b_link in borough_links]
for soups in borough_soup:
titles = [title.text.strip() for title in soups.select('.home-name [href]')]
return(titles)
for soups in borough_soup:
addresses = [address.text.strip() for address in soups.select('.home-name>p.grey')]
return(addresses)
df = pd.DataFrame(zip(titles, addresses), columns = ['title','address'])
print(df)
I tried the code below instead: This gave |AttributeError: 'list' object has no attribute 'text'|
title_html = [soups.select('.home-name [href]') for soups in borough_soup]
titles = [title.text.strip() for title in title_html ]
addresses_html =[soups.select('.home-name>p.grey') for soups in borough_soup]
addresses = [address.text.strip() for address in addresses_html]
I tried to create and append a list and return that list. [See Below] This just outputted a single element from the list.
def func(borough_soup):
for soups in borough_soup:
titles = [title_html.text.strip() for title_html in soups.select('.home-name [href]')]
for title in titles:
titles1 = []
titles1.append(title)
return(titles1)
Any help would be much appreciated!
This was the fix. Creating function with an empty list and then appending each element to the list. After that concating each DF
def title(x):
titles1 = []
for soups in borough_soup:
titles = [title.text.strip() for title in soups.select('.home-name [href]')]
titles1.append(titles)
return(titles1)
titles = title(borough_soup)
def address(x):
address1 = []
for soups in borough_soup:
addresses = [address.text.strip() for address in soups.select('.home-name>p.grey')]
address1.append(addresses)
return(address1)
addresses = address(borough_soup)
indexs2 = list(range(0,2))
df_list = [pd.DataFrame(zip(titles[i], addresses[i])) for i in indexs2]
df = pd.concat(df_list)
I updated the code and it now provides the graph, however after giving me the graph it produces the following error messages.
Warning (from warnings module):
File "C:\Python27\lib\site-packages\matplotlib\collections.py", line 590
if self._edgecolors == str('face'):
FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
import urllib2
import time
import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
from matplotlib.finance import candlestick_ochl
import matplotlib
import pylab
matplotlib.rcParams.update({'font.size': 9})
def rsiFunc(prices, n=14):
deltas = np.diff(prices)
seed = deltas[:n+1]
up = seed[seed>=0].sum()/n
down = -seed[seed<0].sum()/n
rs = up/down
rsi = np.zeros_like(prices)
rsi[:n] = 100. - 100./(1.+rs)
for i in range(n, len(prices)):
delta = deltas[i-1] # cause the diff is 1 shorter
if delta>0:
upval = delta
downval = 0.
else:
upval = 0.
downval = -delta
up = (up*(n-1) + upval)/n
down = (down*(n-1) + downval)/n
rs = up/down
rsi[i] = 100. - 100./(1.+rs)
return rsi
def movingaverage(values,window):
weigths = np.repeat(1.0, window)/window
smas = np.convolve(values, weigths, 'valid')
return smas # as a numpy array
def ExpMovingAverage(values, window):
weights = np.exp(np.linspace(-1., 0., window))
weights /= weights.sum()
a = np.convolve(values, weights, mode='full')[:len(values)]
a[:window] = a[window]
return a
def computeMACD(x, slow=26, fast=12):
"""
compute the MACD (Moving Average Convergence/Divergence) using a fast and slow exponential moving avg'
return value is emaslow, emafast, macd which are len(x) arrays
"""
emaslow = ExpMovingAverage(x, slow)
emafast = ExpMovingAverage(x, fast)
return emaslow, emafast, emafast - emaslow
def graphData(stock,MA1,MA2):
'''
Use this to dynamically pull a stock:
'''
try:
print 'Currently Pulling',stock
print str(datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S'))
#Keep in mind this is close high low open data from Yahoo
urlToVisit = 'http://chartapi.finance.yahoo.com/instrument/1.0/'+stock+'/chartdata;type=quote;range=10y/csv'
stockFile =[]
try:
sourceCode = urllib2.urlopen(urlToVisit).read()
splitSource = sourceCode.split('\n')
for eachLine in splitSource:
splitLine = eachLine.split(',')
if len(splitLine)==6:
if 'values' not in eachLine:
stockFile.append(eachLine)
except Exception, e:
print str(e), 'failed to organize pulled data.'
except Exception,e:
print str(e), 'failed to pull pricing data'
try:
date, closep, highp, lowp, openp, volume = np.loadtxt(stockFile,delimiter=',', unpack=True,
converters={ 0: mdates.strpdate2num('%Y%m%d')})
x = 0
y = len(date)
newAr = []
while x < y:
appendLine = date[x],openp[x],closep[x],highp[x],lowp[x],volume[x]
newAr.append(appendLine)
x+=1
Av1 = movingaverage(closep, MA1)
Av2 = movingaverage(closep, MA2)
SP = len(date[MA2-1:])
fig = plt.figure(facecolor='#07000d')
ax1 = plt.subplot2grid((6,4), (1,0), rowspan=4, colspan=4, axisbg='#07000d')
candlestick_ochl(ax1, newAr[-SP:], width=.6, colorup='#53c156', colordown='#ff1717')#width=.6, plot_day_summary_ohlc
Label1 = str(MA1)+' SMA'
Label2 = str(MA2)+' SMA'
ax1.plot(date[-SP:],Av1[-SP:],'#e1edf9',label=Label1, linewidth=1.5)
ax1.plot(date[-SP:],Av2[-SP:],'#4ee6fd',label=Label2, linewidth=1.5)
ax1.grid(True, color='w')
ax1.xaxis.set_major_locator(mticker.MaxNLocator(10))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax1.yaxis.label.set_color("w")
ax1.spines['bottom'].set_color("#5998ff")
ax1.spines['top'].set_color("#5998ff")
ax1.spines['left'].set_color("#5998ff")
ax1.spines['right'].set_color("#5998ff")
ax1.tick_params(axis='y', colors='w')
plt.gca().yaxis.set_major_locator(mticker.MaxNLocator(prune='upper')) #gca()
ax1.tick_params(axis='x', colors='w')
plt.ylabel('Stock price and Volume')
maLeg = plt.legend(loc=9, ncol=2, prop={'size':7},
fancybox=True, borderaxespad=0.)
maLeg.get_frame().set_alpha(0.4)
textEd = plt.gca().get_legend().get_texts()#pylab.gca() changed to plt.gca()
plt.setp(textEd[0:5], color = 'w')#changed pylab.setp to plt.setp
volumeMin = 0
ax0 = plt.subplot2grid((6,4), (0,0), sharex=ax1, rowspan=1, colspan=4, axisbg='#07000d')
rsi = rsiFunc(closep)
rsiCol = '#c1f9f7'
posCol = '#386d13'
negCol = '#8f2020'
ax0.plot(date[-SP:], rsi[-SP:], rsiCol, linewidth=1.5)
ax0.axhline(70, color=negCol)
ax0.axhline(30, color=posCol)
ax0.fill_between(date[-SP:], rsi[-SP:], 70, where=(rsi[-SP:]>=70), facecolor=negCol, edgecolor=negCol, alpha=0.5)
ax0.fill_between(date[-SP:], rsi[-SP:], 30, where=(rsi[-SP:]<=30), facecolor=posCol, edgecolor=posCol, alpha=0.5)
ax0.set_yticks([30,70])
ax0.yaxis.label.set_color("w")
ax0.spines['bottom'].set_color("#5998ff")
ax0.spines['top'].set_color("#5998ff")
ax0.spines['left'].set_color("#5998ff")
ax0.spines['right'].set_color("#5998ff")
ax0.tick_params(axis='y', colors='w')
ax0.tick_params(axis='x', colors='w')
plt.ylabel('RSI')
ax1v = ax1.twinx()
ax1v.fill_between(date[-SP:],volumeMin, volume[-SP:], facecolor='#00ffe8', alpha=.4)
ax1v.axes.yaxis.set_ticklabels([])
ax1v.grid(False)
ax1v.set_ylim(0, 3*volume.max())
ax1v.spines['bottom'].set_color("#5998ff")
ax1v.spines['top'].set_color("#5998ff")
ax1v.spines['left'].set_color("#5998ff")
ax1v.spines['right'].set_color("#5998ff")
ax1v.tick_params(axis='x', colors='w')
ax1v.tick_params(axis='y', colors='w')
ax2 = plt.subplot2grid((6,4), (5,0), sharex=ax1, rowspan=1, colspan=4, axisbg='#07000d')
# START NEW INDICATOR CODE #
# END NEW INDICATOR CODE #
plt.gca().yaxis.set_major_locator(mticker.MaxNLocator(prune='upper'))
ax2.spines['bottom'].set_color("#5998ff")
ax2.spines['top'].set_color("#5998ff")
ax2.spines['left'].set_color("#5998ff")
ax2.spines['right'].set_color("#5998ff")
ax2.tick_params(axis='x', colors='w')
ax2.tick_params(axis='y', colors='w')
ax2.yaxis.set_major_locator(mticker.MaxNLocator(nbins=5, prune='upper'))
for label in ax2.xaxis.get_ticklabels():
label.set_rotation(45)
plt.suptitle(stock.upper(),color='w')
plt.setp(ax0.get_xticklabels(), visible=False)
plt.setp(ax1.get_xticklabels(), visible=False)
'''ax1.annotate('Big news!',(date[510],Av1[510]),
xytext=(0.8, 0.9), textcoords='axes fraction',
arrowprops=dict(facecolor='white', shrink=0.05),
fontsize=14, color = 'w',
horizontalalignment='right', verticalalignment='bottom')'''
plt.subplots_adjust(left=.09, bottom=.14, right=.94, top=.95, wspace=.20, hspace=0)
plt.show()
fig.savefig('example.png',facecolor=fig.get_facecolor())
except Exception,e:
print 'main loop',str(e)
while True:
stock = raw_input('Stock to plot: ')
graphData(stock,10,50)
Please look at the thread Violin plot: warning with matplotlib 1.4.3 and pyplot fill_between warning since upgrade of numpy to 1.10.10
It seems there is a bug in matplotlib 1.4.3 (which has only started causing that error since the upgrade to numpy 1.10). This is reportedly corrected in 1.5.0 (which should be released soon). Hope this helps.
I'm scraping the site Quicker.com but every time getting an error on random pages.
The error is:
UnexpectedAlertPresentException: Alert Text: C:\Users\HEYPIL~1\AppData\Local\Temp\Pkwnr4IA.php.part could not be saved, because the source file could not be read.
Try again later, or contact the server administrator.
<super: <class 'WebDriverException'>, <UnexpectedAlertPresentException object>>
My code:
from selenium import webdriver
import csv
import re
import hashlib
from selenium.common.exceptions import UnexpectedAlertPresentException
from selenium.common.exceptions import WebDriverException
import socket
import time
import datetime
ts = time.time()
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
date = st.encode('utf8')
IPAdd = socket.gethostbyname(socket.gethostname())
counter = 5
initial = []
base = "http://mumbai.quikr.com/Individual/0-50000000/Houses-Apartments-for-Sale/w1072?imageAds=Y&l=You_are-Price"
string = "&page="
while(counter == 5 or counter < 40):
base2 = base+string+str(counter)
if (counter < 39):
initial.append(base2)
elif(counter == 40):
initial.append(base)
else:
base2 = base
counter += 1
for n in initial:
result = []
driver = webdriver.Firefox()
driver.get(n)
main_page = '//a[#class="adttllnk unbold"]'
for a in driver.find_elements_by_xpath(main_page):
l = a.get_attribute('href')
result.append(l)
print len(result)
driver.close()
for url in result:
try:
driver = webdriver.Firefox()
driver.get(url)
items = []
desc_path = '//div[#id="ad_description"]'
img_path = '//div[#class="bigImg_wapp"]//img[#src]'
prop = '//div[#itemprop="name"]//h1[#class="ad_title translate"]'
amenities = '//div[#class="ad-atrbt-panel"]//span[#class="ad-atrbt-val"]//span[#class="attribVal newattribVal"]'
phone = '//span[#class="NoVerified-Text"]'
for x1 in driver.find_elements_by_xpath(img_path):
img = (x1.get_attribute('src')).encode('utf8')
print '\n'+img
for x2 in driver.find_elements_by_xpath(desc_path):
desc = (x2.text).encode('utf8')
print '\n'+ desc
for x3 in driver.find_elements_by_xpath(prop):
prop_title = (x3.text).encode('utf8')
print '\n'+prop_title
for x4 in driver.find_elements_by_xpath(amenities):
value = (x4.text).encode('utf8')
items.append(value)
print '\n'
print items
locality = items[0]
locality1 = locality.encode('utf8')
a = (locality1 if (isinstance(locality1,int) == False) else "")
bhk = items[1]
bhk1 = bhk.encode('utf8')
if(bhk1 == "4+ BHK"):
b = "4"
else:
bhk2 = [int(z) for z in bhk1.split() if z.isdigit()]
b = ((str(bhk2).strip('[')).strip(']')).strip()
furnish = items[2]
if(isinstance(furnish,int) == False ):
furnish1 = furnish.encode('utf8')
if((furnish1 == "Semi-Furnished") or (furnish1 == "Unfurnished") or (furnish1 == "Fully Furnished") or (furnish1 == "Unfurnished,Unf...")):
c = furnish1
else:
d = furnish1
elif(isinstance(furnish,int) == True):
furnish1 = furnish.encode('utf8')
d = furnish1
else:
c = ""
sqft = items[3]
if(isinstance(sqft,int)==True):
sqft1 = [int(xyz) for xyz in sqft.split() if xyz.isdigit()]
sqft2 = ((str(sqft1).strip('[')).strip(']')).strip()
d = sqft2.encode('utf8')
elif(isinstance(sqft,int)==False):
sqft1 = sqft.encode('utf8')
if((sqft1 == "Semi-Furnished") or (sqft1 == "Unfurnished") or (sqft1 == "Fully Furnished") or (sqft1 == "Unfurnished,Unf...")):
c = sqft1
else:
d = sqft1
else:
d = ""
atz = '\t'
print a,atz,b,atz,c,atz,d
for x5 in driver.find_elements_by_xpath(phone):
biz = (((x5.text).lstrip('+91')).strip()).encode('utf8')
if(len(biz)== 9):
biz_phone = '9'+biz
elif(len(biz) < 7 and len(biz) > 4):
biz_phone = '080'+biz
elif(len(biz) > 9 or len(biz) < 12):
biz_phone = biz
elif(len(biz) == 4 or len(biz) < 4):
biz_phone = biz.strip(biz)
else:
print '\nInvalid Business_phone'
print '\n'+biz_phone
driver.close()
hash_key = hashlib.md5("marketing#"+biz_phone+".com"+"Individual"+prop_title).hexdigest()
unique_key = ('I_'+hash_key).encode('utf8')
except (NameError, IndexError, WebDriverException, UnexpectedAlertPresentException) as e:
print "Failed to open: "+url
driver.close()
fieldname = ['Date','URL']
with open("C:\Users\Heypillow\Desktop\scrapWork\properties\\Failed_to_open_url.csv",'a') as h:
write = csv.DictWriter(h,fieldnames=fieldname,lineterminator = '\n')
write.writerow({'Date':date,
'URL':url})
I've blocked the pop-up in Firefox() but yet a pop-up is coming which addressing me to save a .php file and raises this exception.
I've already used that exception in the "except" part yet it's interrupting the code to work further and it's getting stopped just after this exception rises.
So, every time this exception rises, I have to restart the program. Thus I would like to download all the data by running the code through out the night,which is impossible with this circumstances…
How can I prevent this pop-up from opening?
(If I would have been able to upload a screenshot of the pop-up,it would have been easier to understand it.)
from BeautifulSoup import BeautifulSoup
import mechanize
import re
def price_walmart_match(keyword):
url = "http://www.walmart.com/search/?query="+keyword
br = mechanize.Browser()
br.set_handle_robots(False)
br.open(url)
html = br.response().read()
result_soup = BeautifulSoup(html)
found_results = result_soup.findAll('div' , attrs={'class':'js-tile tile-landscape'})
all_results=[]
for result in found_results:
title = result.find('a' , attrs={'class':'js-product-title'})
links = result.find('a' , href=True)
before_decimal= str(result.find('span', attrs={'class':'price price-display'})).split('<span class="sup">$</span>')[1].split('<span class="visuallyhidden">')[0]
after_decimal= str(result.find('span', attrs={'class':'price price-display'})).split('</span><span class="sup">')[1].split('</span>')[0]
prices = before_decimal+'.'+after_decimal
inArray = [float(prices), "http://www.walmart.com"+links['href']]
all_results.append(inArray)
print(all_result)
Sorry it is full code where i get error.
Thats Because the mentioned class is not there in the page.
Try to Represent query term which is constant.
I am new to django and know little of python. I am learning to draw graphs in django framework. I drew single bar-charts but have problem to draw multiple bar-chart using the database telecom_db of my project in django. However, in wxPython the following code worked fine. Could you figure out if something wrong with django in code below:
def graph(request):
figName="figGraph.png"
path="F:\MajorWorkspace\Visualisations\\"+figName
if os.path.exists(path)==False:
age_gr = []
countm = []
countf = []
import MySQLdb
db = MySQLdb.connect(host = "localhost",
user="root",
passwd = "",
db = "telecom_db")
cursor1 = db.cursor()
cursor2 = db.cursor()
cursor1.execute("select count(card_no) from demo where gender = 0 group by age_group")
cursor2.execute("select count(card_no) from demo where gender = 1 group by age_group")
numrows1 = int(cursor1.rowcount)
#numrows2 = int(cursor2.rowcount)
sum_male=0
sum_female=0
for x in range(numrows1):
row1 = cursor1.fetchone()
age_gr.append(x)
countm.append(row1[0])
sum_male+=row1[0]
row2 = cursor2.fetchone()
countf.append(row2[0])
sum_female+=row2[0]
# avg_call_group[x] = row[1]
cursor1.close()
cursor2.close()
import numpy as np
import matplotlib.pyplot as plt
N = len(age_gr)
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
fig = plt.figure()
ax = fig.add_subplot(111)
rects1 = ax.bar(ind, countf, width, color='b')
rects2 = ax.bar(ind+width, countm, width, color='r')
# add some
ax.set_ylabel('Scores')
ax.set_title('Age group and Gender-wise Subscriber Distribution')
ax.set_xticks(ind+width)
# \n0:under 16 \n 1:16-20 \n i(<-N):16+5i-20+5i (i<4) \n 5:35-40 \n 6:40-50 \n 7:50 over
ax.set_xticklabels(('Under 16','16-20','21-25','26-30','31-35','36-40','40-50','Above 50'))
ax.legend( (rects1[0], rects2[0]), ('male', 'female') )
def autolabel(rects,sex):
# attach some text labels
hf=0
hm=0
iter=0
for rect in rects:
height = rect.get_height()
if sex==0:
hf+=height
print 'Female'
print '\n Height='+str(height)+'\n Sum_female='+str(sum_female)
pf=(height*1.00/sum_female)*100.00
print pf
ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%1.1f%%'%float(pf), ha='center', va='bottom')
iter+=1
else:
hm+=height
print 'Male'
print '\n Height='+str(height)+'\n Sum_male='+str(sum_male)
pm=(height*1.00/sum_male)*100.00
print pm
ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%1.1f%%'%float(pm), ha='center', va='bottom')
autolabel(rects1,0)
autolabel(rects2,1)
fig.savefig(path)
image_data = open(path, "rb").read()
return HttpResponse(image_data, mimetype="image/png")