get random name from file in python - list

im a novice at python, i've wrote this those far it needs but i cant get it to run past line 4 the error is a syntax error def fileload filename what am i not doing correctly?
import random
def fileload(filename)
with open(filename,"r") as names:
for item in names:
return item
collect lines from files and append to list
def main():
name=[]
desc=[]
title=[]
filename=fileload(names.txt)
for item in names:
name.append(item)
break
filename=fileload(descriptors.txt)
for item in description:
desc.append(item)
break
filename=fileload(titles.txt)
for item in titles:
title.append(item)
random namer
def namer():
i=random.randint(0,len(names))
first_name=names[i]
last_name=names[i]
return first_name last_name
random titler
def titler():
i= random.randint(0,len(title))
random_title=title[i]
random description
def descript():
i=random.randint(0,len(desc))
random_description = desc[i]
print out random names, titles and descriptor
print(f"you are; {random_title} {first_name} {last_name} the {random_title}")
namer()
titler()
descript()
main()
also some maybe out of my intended indentation blame the website for that

Related

How to extract multiple rows of data relative to single row in scrapy

I am trying to scrape webpage given in the this link -
http://new-york.eat24hours.com/picasso-pizza/19053
Here I am trying to get all the possible details like address and phone etc..
So, Far I have extracted the name, phone, address, reviews, rating.
But I also want to extract the the full menu of restaurant here(name of item with price).
So, far I have no idea how to manage this data into output of csv.
The rest of the data for a single url will be single but the items in menu will always be of different amount.
here below is my code so far-
import scrapy
from urls import start_urls
class eat24Spider(scrapy.Spider):
AUTOTHROTTLE_ENABLED = True
name = 'eat24'
def start_requests(self):
for x in start_urls:
yield scrapy.Request(x, self.parse)
def parse(self, response):
brickset = response
NAME_SELECTOR = 'normalize-space(.//h1[#id="restaurant_name"]/a/text())'
ADDRESS_SELECTION = 'normalize-space(.//span[#itemprop="streetAddress"]/text())'
LOCALITY = 'normalize-space(.//span[#itemprop="addressLocality"]/text())'
REGION = 'normalize-space(.//span[#itemprop="addressRegion"]/text())'
ZIP = 'normalize-space(.//span[#itemprop="postalCode"]/text())'
PHONE_SELECTOR = 'normalize-space(.//span[#itemprop="telephone"]/text())'
RATING = './/meta[#itemprop="ratingValue"]/#content'
NO_OF_REVIEWS = './/meta[#itemprop="reviewCount"]/#content'
OPENING_HOURS = './/div[#class="hours_info"]//nobr/text()'
EMAIL_SELECTOR = './/div[#class="company-info__block"]/div[#class="business-buttons"]/a[span]/#href[substring-after(.,"mailto:")]'
yield {
'name': brickset.xpath(NAME_SELECTOR).extract_first().encode('utf8'),
'pagelink': response.url,
'address' : str(brickset.xpath(ADDRESS_SELECTION).extract_first().encode('utf8')+', '+brickset.xpath(LOCALITY).extract_first().encode('utf8')+', '+brickset.xpath(REGION).extract_first().encode('utf8')+', '+brickset.xpath(ZIP).extract_first().encode('utf8')),
'phone' : str(brickset.xpath(PHONE_SELECTOR).extract_first()),
'reviews' : str(brickset.xpath(NO_OF_REVIEWS).extract_first()),
'rating' : str(brickset.xpath(RATING).extract_first()),
'opening_hours' : str(brickset.xpath(OPENING_HOURS).extract_first())
}
I am sorry if I am making this confusing but any kind of help will be appreciated.
Thank you in advance!!
If you want to extract full restaurant menu, first of all, you need to locate element who contains both name and price:
menu_items = response.xpath('//tr[#itemscope]')
After that, you can simply make for loop and iterate over restaurant items appending name and price to list:
menu = []
for item in menu_items:
menu.append({
'name': item.xpath('.//a[#class="cpa"]/text()').extract_first(),
'price': item.xpath('.//span[#itemprop="price"]/text()').extract_first()
})
Finally you can add new 'menu' key to your dict:
yield {'menu': menu}
Also, I suggest you use scrapy Items for storing scraped data:
https://doc.scrapy.org/en/latest/topics/items.html
For outputting data in csv file use scrapy Feed exports, type in console:
scrapy crawl yourspidername -o restaurants.csv

List populated with Scrapy is returned before actually filled

This involves pretty much the same code I just asked a different question about this morning, so if it looks familiar, that's because it is.
class LbcSubtopicSpider(scrapy.Spider):
...irrelevant/sensitive code...
rawTranscripts = []
rawTranslations = []
def parse(self, response):
rawTitles = []
rawVideos = []
for sel in response.xpath('//ul[1]'): #only scrape the first list
...irrelevant code...
index = 0
for sub in sel.xpath('li/ul/li/a'): #scrape the sublist items
index += 1
if index%2!=0: #odd numbered entries are the transcripts
transcriptLink = sub.xpath('#href').extract()
#url = response.urljoin(transcriptLink[0])
#yield scrapy.Request(url, callback=self.parse_transcript)
else: #even numbered entries are the translations
translationLink = sub.xpath('#href').extract()
url = response.urljoin(translationLink[0])
yield scrapy.Request(url, callback=self.parse_translation)
print rawTitles
print rawVideos
print "translations:"
print self.rawTranslations
def parse_translation(self, response):
for sel in response.xpath('//p[not(#class)]'):
rawTranslation = sel.xpath('text()').extract()
rawTranslation = ''.join(rawTranslation)
#print rawTranslation
self.rawTranslations.append(rawTranslation)
#print self.rawTranslations
My problem is that "print self.rawTranslations" in the parse(...) method prints nothing more than "[]". This could mean one of two things: it could be resetting the list right before printing, or it could be printing before the calls to parse_translation(...) that populate the list from links parse(...) follows are finished. I'm inclined to suspect it's the latter, as I can't see any code that would reset the list, unless "rawTranslations = []" in the class body is run multiple times.
Worth noting is that if I uncomment the same line in parse_translation(...), it will print the desired output, meaning it's extracting the text correctly and the problem seems to be unique to the main parse(...) method.
My attempts to resolve what I believe is a synchronization problem were pretty aimless--I just tried using an RLock object based on as many Google tutorials I could find and I'm 99% sure I misused it anyway, as the result was identical.
The problem here is that you are not understanding how scrapy really works.
Scrapy is a crawling framework, used for creating website spiders, not just for doing requests, that's the requests module for.
Scrapy's requests work asynchronously, when you call yield Request(...) you are adding requests to a stack of requests that will be executed at some point (you don't have control over it). Which means that you can't expect that some part of your code after the yield Request(...) will be executed at that moment. In fact, your method should always end yielding a Request or an Item.
Now from what I can see and most cases of confusion with scrapy, you want to keep populating an item you created on some method, but the information you need is on a different request.
On that case, communication is usually done with the meta parameter of the Request, something like this:
...
yield Request(url, callback=self.second_method, meta={'item': myitem, 'moreinfo': 'moreinfo', 'foo': 'bar'})
def second_method(self, response):
previous_meta_info = response.meta
# I can access the previous item with `response.meta['item']`
...
So this seems like somewhat of a hacky solution, especially since I just learned of Scrapy's request priority functionality, but here's my new code that gives the desired result:
class LbcVideosSpider(scrapy.Spider):
...code omitted...
done = 0 #variable to keep track of subtopic iterations
rawTranscripts = []
rawTranslations = []
def parse(self, response):
#initialize containers for each field
rawTitles = []
rawVideos = []
...code omitted...
index = 0
query = sel.xpath('li/ul/li/a')
for sub in query: #scrape the sublist items
index += 1
if index%2!=0: #odd numbered entries are the transcripts
transcriptLink = sub.xpath('#href').extract()
#url = response.urljoin(transcriptLink[0])
#yield scrapy.Request(url, callback=self.parse_transcript)
else: #even numbered entries are the translations
translationLink = sub.xpath('#href').extract()
url = response.urljoin(translationLink[0])
yield scrapy.Request(url, callback=self.parse_translation, \
meta={'index': index/2, 'maxIndex': len(query)/2})
print rawTitles
print rawVideos
def parse_translation(self, response):
#grab meta variables
i = response.meta['index']
maxIndex = response.meta['maxIndex']
#interested in p nodes without class
query = response.xpath('//p[not(#class)]')
for sel in query:
rawTranslation = sel.xpath('text()').extract()
rawTranslation = ''.join(rawTranslation) #collapse each line
self.rawTranslations.append(rawTranslation)
#increment number of translations done, check if finished
self.done += 1
print self.done
if self.done==maxIndex:
print self.rawTranslations
Basically, I just kept track of how many requests were completed and making some code conditional on a request being final. This prints the fully populated list.

Saving lists with kivy

I wrote a kivy app, which is supposed to save a list of strings.
But when I try to append a new String to the list I get this error:
ValueError: dictionary update sequence element #0 has length 1; 2 is required
Here is the function, which is supposed to make and save the list:
def save(self, vinput):
store = JsonStore('hello.json')
self.ueberschrift = ([])
if store.exists('tito'):
self.ueberschrift = store.get('tito')
self.ueberschrift.update(vinput)
store.put('tito', list_of_cap=self.ueberschrift)
I left the rest of the app and the import commands out, because these parts are working fine.

Partial Text Matching GAE

I am developing a web application for managing customers. So I have a Customer entity which is made up by usual fields such as first_name, last_name, age etc.
I have a page where these customers are shown as a table. In the same page I have a search field, and I'd like to filter customers and update the table while the user is typing a something in the search field, using Ajax.
Here is how it should work:
Figure 1: The main page showing all of the customers:
Figure 2: As long as the user types letter "b", the table is updated with the results:
Given that partial text matching is not supported in GAE, I have tricked and implemented it arising from what is shown here: TL;DR: I have created a Customers Index, that contains a Search Document for every customer(doc_id=customer_key). Each Search Document contains Atom Fields for every customer's field I want to be able to search on(eg: first_name, last_name): every field is made up like this: suppose the last_name is Berlusconi, the field is going to be made up by these Atom Fields "b" "be" "ber" "berl" "berlu" "berlus" "berlusc" "berlusco" "berluscon" "berlusconi".
In this way I am able to perform full text matching in a way that resembles partial text matching. If I search for "Be", the Berlusconi customer is returned.
The search is made by Ajax calls: whenever a user types in the search field(the ajax is dalayed a little bit to see if the user keeps typing, to avoid sending a burst of requests), an Ajax call is made with the query string, and a json object is returned.
Now, things were working well in debugging, but I was testing it with a few people in the datastore. As long as I put many people, search looks very slow.
This is how I create search documents. This is called everytime a new customer is put to the datastore.
def put_search_document(cls, key):
"""
Called by _post_put_hook in BaseModel
"""
model = key.get()
_fields = []
if model:
_fields.append(search.AtomField(name="empty", value=""),) # to retrieve customers when no query string
_fields.append(search.TextField(name="sort1", value=model.last_name.lower()))
_fields.append(search.TextField(name="sort2", value=model.first_name.lower()))
_fields.append(search.TextField(name="full_name", value=Customer.tokenize1(
model.first_name.lower()+" "+model.last_name.lower()
)),)
_fields.append(search.TextField(name="full_name_rev", value=Customer.tokenize1(
model.last_name.lower()+" "+model.first_name.lower()
)),)
# _fields.append(search.TextField(name="telephone", value=Customer.tokenize1(
# model.telephone.lower()
# )),)
# _fields.append(search.TextField(name="email", value=Customer.tokenize1(
# model.email.lower()
# )),)
document = search.Document( # create new document with doc_id=key.urlsafe()
doc_id=key.urlsafe(),
fields=_fields)
index = search.Index(name=cls._get_kind()+"Index") # not in try-except: defer will catch and retry.
index.put(document)
#staticmethod
def tokenize1(string):
s = ""
for i in range(len(string)):
if i > 0:
s = s + " " + string[0:i+1]
else:
s = string[0:i+1]
return s
This is the search code:
#staticmethod
def search(ndb_model, query_phrase):
# TODO: search returns a limited number of results(20 by default)
# (See Search Results at https://cloud.google.com/appengine/docs/python/search/#Python_Overview)
sort1 = search.SortExpression(expression='sort1', direction=search.SortExpression.ASCENDING,
default_value="")
sort2 = search.SortExpression(expression='sort2', direction=search.SortExpression.ASCENDING,
default_value="")
sort_opt = search.SortOptions(expressions=[sort1, sort2])
results = search.Index(name=ndb_model._get_kind() + "Index").search(
search.Query(
query_string=query_phrase,
options=search.QueryOptions(
sort_options=sort_opt
)
)
)
print "----------------"
res_list = []
for r in results:
obj = ndb.Key(urlsafe=r.doc_id).get()
print obj.first_name + " "+obj.last_name
res_list.append(obj)
return res_list
Did anyone else had my same experience? If so, how have you solved it?
Thank you guys very much,
Marco Galassi
EDIT: names, email, phone are obviously totally invented.
Edit2: I have now moved to TextField, who look a little bit faster, but the problem still persist

Frustrating python syntax error

I am writing a script to automate HvZ games at my college and have run into this strange frustrating syntax error:
File "HvZGameMaster.py", line 53
class players(object):
^
SyntaxError: invalid syntax
Here is the offending code
class mailMan(object):
"""mailMan manages player interactions such as tags reported via text messages or emails"""
def __init__(self, playerManager):
super(mailMan, self).__init__()
self.mail = imaplib.IMAP4_SSL('imap.gmail.com')
self.mail.login(args.username,args.password)
self.mail.list()
# Out: list of "folders" aka labels in gmail.
self.mail.select("inbox") #connect to inbox.
def getBody(self, emailMessage):
maintype = emailMessage.get_content_maintype()
if maintype == 'multipart':
for part in emailMessage.get_payload():
if part.get_content_maintype() == 'text':
return part.get_payload()
elif maintype == 'text':
return emailMessage.get_payload()
def getUnread(self):
self.mail.select("inbox") # Select inbox or default namespace
(retcode, messages) = self.mail.search(None, '(UNSEEN)')
if retcode == 'OK':
retlist = []
for num in messages[0].split(' '):
print 'Processing :', messages
typ, data = self.mail.fetch(num,'(RFC822)')
msg = email.message_from_string(data[0][1])
typ, data = self.mail.store(num,'-FLAGS','\\Seen')
if retcode == 'OK':
for item in str(msg).split('\n'):
#finds who sent the message
if re.match("From: *",item):
print (item[6:], self.getBody(msg))
retlist.append((item[6:], self.getBody(msg).rstrip())
#print (item, self.getBody(msg).rstrip())
class players(object): #<-the problem happens here
"""manages the player"""
def __init__(self, pDict):
super(players, self).__init__()
self.pDict = pDict
#makes a partucular player a zombie
def makeZombie(self, pID):
self.pDict[pID].zombie = True
#makes a partucular player a zombie
def makeHuman(self, pID):
self.pDict[pID].zombie = False
As far as I can tell what I have written is correct and I have checked to make sure it is all tabs and not spaces I have made sure i don't have any erroneous \r's or \n's floating around (all \n's are where the should be at the end of the line and I'm not using any \r's)
You can find all my code for this project here if you would like to try running it yourself
There is an unbalanced (missing) parenthesis on the line above the line raising the error:
retlist.append((item[6:], self.getBody(msg).rstrip())
Note that some editors have matching parenthesis highlighting, and key combinations for moving back and forth across matched parentheses. Using an editor with these features can help cut down on these errors.