Raspberry PI - pinging multiple IPs - python-2.7

Would either like the code analysed and fixed or point me in the right direction please. So many errors, some overcome, others not.
Program runs on a Raspberry PI2 and should try and ping specific IP addresses and return a result.
Very new to programming, you can probably tell! Not sure if I need a ping library or can do it without
import sys
import time
from pushbullet import Pushbullet
import serial
class Users(object):
def __init__(self, name=None, ip=None):
self.name = name
self.ip = ip
self.status = 'out'
pb = Pushbullet("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") #Pushbullet ID removed
userList = []
userList.append(Users("Ali", "192.18.1.14"))
userList.append(Users("Sophie", "192.18.1.9"))
userList.append(Users("TV", "192.18.1.7"))
try:
while True:
print "Checking... " + time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())
for user in userList:
result = os.system ("ping -n 1 " = user.ip)
oldStatus = user.status
if (result == 0):
#What we'll do if a device is detected
if (oldStatus == 'out'):
push = pb.push_note("Home Pi", user.name + " is home")
user.status = 'in'
print user.name + " is home"
else:
#What we'll do if a device is NOT not detected
if (oldStatus == 'in'):
push = pb.push_note("Home Pi", user.name + " has just left")
user.status = 'out'
print user.name + " is out"
print "Next check will be in 30 seconds"
time.sleep(30)
except (KeyboardInterrupt, SystemExit):

I modified your code to work, i don't have pushbullet. Compare my new code with your previous to see the difference and the errors
import sys
import time
#from pushbullet import Pushbullet
#import serial
#you need import os
import os
class Users(object):
def __init__(self, name=None, ip=None):
self.name = name
self.ip = ip
self.status = 'out'
#pb = Pushbullet("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") #Pushbullet ID removed
userList = []
userList.append(Users("Notebook", "192.168.1.2"))
userList.append(Users("TV", "192.168.1.4"))
try:
while True:
print "Checking... " + time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())
for user in userList:
#result = os.system ("ping -n 1 " = user.ip)
# to concatenate string you need + sign
#result = os.system("ping -n 1 " + user.ip)
# -n is a wrong option for ubuntu
result = os.system("ping -c 1 " + user.ip)
oldStatus = user.status
if (result == 0):
#What we'll do if a device is detected
if (oldStatus == 'out'):
#push = pb.push_note("Home Pi", user.name + " is home")
user.status = 'in'
print user.name + " is home"
else:
#What we'll do if a device is NOT not detected
if (oldStatus == 'in'):
#push = pb.push_note("Home Pi", user.name + " has just left")
user.status = 'out'
print user.name + " is out"
print "Next check will be in 30 seconds"
time.sleep(30)
#wrong identation
except (KeyboardInterrupt, SystemExit):
sys.exit(0)

Related

Python: fastest (parallel) way to download thousands of zipped files using requests

Background
For our research we currently need to download ~ 15.000 files. While the database has it's command line tool, to support "bulk" download, this is totally unfeasible to do this for 15.000 runs sequentially ( as the command line tool currently does ).
Simple math
I used the currently available command line tool do download several runs, and took the average runtime, which is ~ 20 minutes per file ( if not more ). So to do so for all 15.000 files, this will take 15.000 * 20 / 60 / 24 = 208 days, which would be nice only if you get paid per an hour of script runtime ;)
The bulk command line script
This is the script which allows users to download bulk data ( NOT MADE BY ME ):
NOTE:I changed it a little, so I can run it from the IDE directly ( so not having to start the command line for every little change )
'''
Created on 27/10/2015
#author: Maxim Scheremetjew
amended 07/11/2016 by Maxim Scheremetjew
version: 1.1
'''
import sys
import argparse
import csv
import os
import urllib.request, urllib.parse, urllib.error
import urllib.request, urllib.error, urllib.parse
from urllib.error import URLError
from io import StringIO
def _download_resource_by_url(url, output_file_name):
"""Kicks off a download and stores the file at the given path.
Arguments:
'url' -- Resource location.
'output_file_name' -- Path of the output file.
"""
print("Starting the download of the following file...")
print(url)
print("Saving file in:\n" + output_file_name)
try:
urllib.request.urlretrieve(url, output_file_name)
except URLError as url_error:
print(url_error)
raise
except IOError as io_error:
print(io_error)
raise
print("Download finished.")
def _get_number_of_chunks(url_template, study_id, sample_id, run_id, version, domain, file_type):
"""
Returns the number of chunks for the given set of parameters (study, sample and run identifier).
"""
print("Getting the number of chunks from the following URL...")
url_get_number_of_chunks = url_template % (
study_id, sample_id, run_id, version, domain, file_type)
print(url_get_number_of_chunks)
try:
file_stream_handler = urllib.request.urlopen(url_get_number_of_chunks)
result = int(file_stream_handler.read())
print("Retrieved " + str(result) + " chunks.")
return result
except URLError as url_error:
print(url_error)
raise
except IOError as io_error:
print(io_error)
raise
except ValueError as e:
print(e)
print("Skipping this run! Could not retrieve the number of chunks for this URL. " \
"Check the version number in the URL and check if the run is available online.")
return 0
def _get_file_stream_handler(url_template, study_id):
"""
Returns a file stream handler for the given URL.
"""
print("Getting the list of project runs...")
url_get_project_runs = url_template % (study_id)
try:
req = urllib.request.Request(url=url_get_project_runs, headers={'Content-Type': 'text/plain'})
res = urllib.request.urlopen(req)
dec_res = res.read().decode()
sys.stderr.write(str(dec_res))
return dec_res
except URLError as url_error:
print(url_error)
raise
except IOError as io_error:
print(io_error)
raise
except ValueError as e:
print(e)
print("Could not retrieve any runs. Open the retrieval URL further down in your browser and see if you get any results back. Program will exit now.")
print(url_get_project_runs)
raise
def _print_program_settings(project_id, version, selected_file_types_list, output_path, root_url):
print("Running the program with the following setting...")
print("Project: " + project_id)
print("Pipeline version: " + version)
print("Selected file types: " + ",".join(selected_file_types_list))
print("Root URL: " + root_url)
print("Writing result to: " + output_path)
def start(args):
function_file_type_list = ["InterProScan", "GOAnnotations", "GOSlimAnnotations"]
sequences_file_type_list = ["ProcessedReads", "ReadsWithPredictedCDS", "ReadsWithMatches", "ReadsWithoutMatches",
"PredictedCDS", "PredictedCDSWithoutAnnotation", "PredictedCDSWithAnnotation",
"PredictedORFWithoutAnnotation", "ncRNA-tRNA-FASTA"]
taxonomy_file_type_list = ["5S-rRNA-FASTA", "16S-rRNA-FASTA", "23S-rRNA-FASTA", "OTU-TSV", "OTU-BIOM",
"OTU-table-HDF5-BIOM", "OTU-table-JSON-BIOM", "NewickTree", "NewickPrunedTree"]
# Default list of available file types
default_file_type_list = sequences_file_type_list + function_file_type_list + taxonomy_file_type_list
# Parse script parameters
# Parse the project accession
study_id = args['project_id']
# Parse the values for the file type parameter
selected_file_types_list = []
if not args['file_type']:
# If not specified use the default set of file types
selected_file_types_list = default_file_type_list
else:
# Remove whitespaces
selected_file_types_str = args['file_type'].replace(" ", "")
# Set all functional result file types
if selected_file_types_str == "AllFunction":
selected_file_types_list = function_file_type_list
elif selected_file_types_str == "AllTaxonomy":
selected_file_types_list = taxonomy_file_type_list
elif selected_file_types_str == "AllSequences":
selected_file_types_list = sequences_file_type_list
# Set defined file types
elif len(selected_file_types_str.split(",")) > 1:
selected_file_types_list = selected_file_types_str.split(",")
# Set single file type
else:
selected_file_types_list.append(selected_file_types_str)
# Parse the analysis version
version = args['version']
root_url = "https://www.ebi.ac.uk"
study_url_template = root_url + "/metagenomics/projects/%s/runs"
number_of_chunks_url_template = root_url + "/metagenomics/projects/%s/samples/%s/runs/%s/results/versions/%s/%s/%s/chunks"
chunk_url_template = root_url + "/metagenomics/projects/%s/samples/%s/runs/%s/results/versions/%s/%s/%s/chunks/%s"
download_url_template = root_url + "/metagenomics/projects/%s/samples/%s/runs/%s/results/versions/%s/%s/%s"
# Print out the program settings
_print_program_settings(study_id, version, selected_file_types_list, args['output_path'], root_url)
# Iterating over all file types
for file_type in selected_file_types_list:
domain = None
fileExtension = None
# Boolean flag to indicate if a file type is chunked or not
is_chunked = True
# Set the result file domain (sequences, function or taxonomy) dependent on the file type
# Set output file extension (tsv, faa or fasta) dependent on the file type
if file_type == 'InterProScan':
domain = "function"
fileExtension = ".tsv.gz"
elif file_type == 'GOSlimAnnotations' or file_type == 'GOAnnotations':
domain = "function"
fileExtension = ".csv"
is_chunked = False
# PredictedCDS is version 1.0 and 2.0 only, from version 3.0 on this file type was replaced by
# PredictedCDSWithAnnotation (PredictedCDS can be gained by concatenation of the 2 sequence file types now)
elif file_type == 'PredictedCDS' or file_type == 'PredicatedCDSWithoutAnnotation' or file_type == \
'PredictedCDSWithAnnotation':
if file_type == 'PredictedCDSWithAnnotation' and (version == '1.0' or version == '2.0'):
print("File type '" + file_type + "' is not available for version " + version + "!")
continue
elif file_type == 'PredictedCDS' and version == '3.0':
print("File type '" + file_type + "' is not available for version " + version + "!")
continue
domain = "sequences"
fileExtension = ".faa.gz"
elif file_type == 'ncRNA-tRNA-FASTA':
domain = "sequences"
fileExtension = ".fasta"
is_chunked = False
elif file_type == '5S-rRNA-FASTA' or file_type == '16S-rRNA-FASTA' or file_type == '23S-rRNA-FASTA':
is_chunked = False
domain = "taxonomy"
fileExtension = ".fasta"
# NewickPrunedTree is version 2 only
# NewickTree is version 1 only
elif file_type == 'NewickPrunedTree' or file_type == 'NewickTree':
if file_type == 'NewickPrunedTree' and version == '1.0':
print("File type '" + file_type + "' is not available for version " + version + "!")
continue
if file_type == 'NewickTree' and version == '2.0':
print("File type '" + file_type + "' is not available for version " + version + "!")
continue
is_chunked = False
domain = "taxonomy"
fileExtension = ".tree"
elif file_type == 'OTU-TSV':
is_chunked = False
domain = "taxonomy"
fileExtension = ".tsv"
# OTU-BIOM is version 1 only
# OTU-table-HDF5-BIOM and OTU-table-JSON-BIOM are version 2 only
elif file_type == 'OTU-BIOM' or file_type == 'OTU-table-HDF5-BIOM' or file_type == 'OTU-table-JSON-BIOM':
if file_type == 'OTU-BIOM' and version == '2.0':
print("File type '" + file_type + "' is not available for version " + version + "!")
continue
if (file_type == 'OTU-table-HDF5-BIOM' or file_type == 'OTU-table-JSON-BIOM') and version == '1.0':
print("File type '" + file_type + "' is not available for version " + version + "!")
continue
is_chunked = False
domain = "taxonomy"
fileExtension = ".biom"
else:
domain = "sequences"
fileExtension = ".fasta.gz"
# Retrieve a file stream handler from the given URL and iterate over each line (each run) and build the download link using the variables from above
file_stream_handler = _get_file_stream_handler(study_url_template, study_id)
reader = csv.reader(StringIO(file_stream_handler), delimiter=',')
for study_id, sample_id, run_id in reader:
print(study_id + ", " + sample_id + ", " + run_id)
output_path = args['output_path'] + "/" + study_id + "/" + file_type
if not os.path.exists(output_path):
os.makedirs(output_path)
if is_chunked:
number_of_chunks = _get_number_of_chunks(number_of_chunks_url_template, study_id, sample_id, run_id,
version, domain, file_type)
for chunk in range(1, number_of_chunks + 1):
output_file_name = output_path + "/" + run_id.replace(" ", "").replace(",",
"-") + "_" + file_type + "_" + str(
chunk) + fileExtension
rootUrl = chunk_url_template % (study_id, sample_id, run_id, version, domain, file_type, chunk)
_download_resource_by_url(rootUrl, output_file_name)
else:
output_file_name = output_path + "/" + run_id.replace(" ", "").replace(",",
"-") + "_" + file_type + fileExtension
rootUrl = download_url_template % (study_id, sample_id, run_id, version, domain, file_type)
_download_resource_by_url(rootUrl, output_file_name)
print("Program finished.")
start({'project_id':'ERP001736',
'file_type': 'ProcessedReads,16S-rRNA-FASTA,OTU-TSV',
'version': '2.0',
'output_path':''})
What am I thinking of
I have ( a little ) experience with multithreading / multiprocessing / asynchronous requests, but can't figure out what I should do in this case. I have 20 CPU's on my Linux server, so I could do some MP ( ~ 208 / 20 = 10+ days ), but based on my previous experience with doing this, the CPU's will only be used for ~1-5%, which seems a waste of capacity. I haven't used the other two methods for this kind of problem, I used both for simple http-requests ( just asking for a page and getting the result, not downloading files in chunks ).
The real question
What would be the fastest method to download all these 15.000 files ( sequentially is definitely not an option )?
If it's not too time consuming, please, provide a code example ( or a reference ) of what you mean.
Updates
I used nload to measure the dataflow-bandwidth usage, while running the script, downloading 1 file ( of course there were background processes, but these seem to be negligible, only several Mb's ). I did this at 4 time points and averaged the numbers:
Curr: 110 Mbit/s
Min: 30 Mbit/s
Avg: 90 Mbit/s
Max: 159.75 Mbit/s
Ttl: 752.41 GByte

"Unboundlocalerror: Local Variable "Val" Referenced before Assignment" Error

I have been trying to get my script to loop in such a way that it will load the outputs in 1 file, and then when it's done loading everything move the values to output file 2 , erase the values in output file 1 and start reloading them, then when those are down move the values into output two (overwriting the old ones) repeat.
I have been pretty successful so far and don't know what else to add to my script and am hoping someone here knows why I keep getting ""Unboundlocalerror: Local Variable "Val" Referenced before Assignment" error randomly midway through the loading process, when I have a very small input file, the script performs how I want.
Does anyone know how I can change my script to fix that error, I have tried to understand why it is happening but cannot.
I have tried to research it thoroughly but none of the suggestions I have found have worked (or I implemented them incorrectly, I have attached my script. Thanks!
import urllib2,re,urllib,urlparse,csv,sys,time,threading,codecs,shutil
from bs4 import BeautifulSoup
def extract(url):
try:
sys.stdout.write('0')
# global file
page = urllib2.urlopen(url).read()
soup = BeautifulSoup(page, 'html.parser')
product = soup.find("div", {"class": "js-product-price"})
price = product.findNext('div',{'class': 'js-price-display'}).getText().strip()
oos = product.findNext('p', attrs={'class': "price-oos"})
if oos is None:
oos = 'In Stock'
else:
oos = oos.getText()
val = url + "," + price + "," + oos + "," + time.ctime() + '\n'
# ifile.write(val)
sys.stdout.write('1')
except Exception as e:
print e
return val
while True:
ifile = open('output.csv', "w", 0)
inputs = csv.reader(open('input.csv'))
# inputs = csv.reader(codecs.open('input.csv', 'rU', 'utf-16'))
ifile.write('URL' + "," + 'Price' + "," + 'Stock' + "," + "Time" + '\n')
for i in inputs:
ifile.write(extract(i[0]))
ifile.close()
Update:
Thanks for the help guys! This is my new script:
import urllib2,re,urllib,urlparse,csv,sys,time,threading,codecs,shutil
from bs4 import BeautifulSoup
def extract(url):
try:
sys.stdout.write('0')
# global file
page = urllib2.urlopen(url).read()
soup = BeautifulSoup(page, 'html.parser')
product = soup.find("div", {"class": "js-product-price"})
price = product.findNext('div',{'class': 'js-price-display'}).getText().strip()
oos = product.findNext('p', attrs={'class': "price-oos"})
if oos is None:
oos = 'In Stock'
else:
oos = oos.getText()
val = url + "," + price + "," + oos + "," + time.ctime() + '\n'
# ifile.write(val)
sys.stdout.write('1')
except Exception as e:
print e
else:
return val
while True:
ifile = open('output.csv', "w", 0)
inputs = csv.reader(open('input.csv'))
# inputs = csv.reader(codecs.open('input.csv', 'rU', 'utf-16'))
ifile.write('URL' + "," + 'Price' + "," + 'Stock' + "," + "Time" + '\n')
for i in inputs:
val_to_write = extract(i[0])
if val_to_write:
ifile.write(val_to_write)
ifile.close()
shutil.copy('output.csv', 'output2.csv')
print("finished")
With the above script I am now getting the error: "ValueError: I/O operation on closed file". Thanks
Use try-except-else as you would only want to return val if no exception was raised (if an exception was raised then val wouldn't be assigned to when you try to return it). Another suggestion is not to use a "catch-em-all" except block.
def extract(url):
try:
sys.stdout.write('0')
# global file
page = urllib2.urlopen(url).read()
soup = BeautifulSoup(page, 'html.parser')
product = soup.find("div", {"class": "js-product-price"})
price = product.findNext('div',{'class': 'js-price-display'}).getText().strip()
oos = product.findNext('p', attrs={'class': "price-oos"})
if oos is None:
oos = 'In Stock'
else:
oos = oos.getText()
val = url + "," + price + "," + oos + "," + time.ctime() + '\n'
# ifile.write(val)
sys.stdout.write('1')
except Exception as e:
print e
else:
return val
But be warned: if an exception does occur then extract will return None and the calling code will have to take account for that, for example:
for i in inputs:
val_to_write = extract(i[0])
if val_to_write:
ifile.write(val_to_write)
ifile.close()

print if list index out of range

hi all im trying to create a handle for "list index out of range" but seem not to be having any luck.
import json, urllib, re
from urllib import urlencode
import googlemaps
import tempfile
import win32api
import win32print
start = "Adelaide, South Australia"
finish = " ghkjffzh, south Australia "
url = 'http://maps.googleapis.com/maps/api/directions/json?%s' % urlencode((
('origin', start),
('destination', finish)
))
ur = urllib.urlopen(url)
result = json.load(ur)
filename = "output.txt"
with open(filename, 'w') as output:
for i in range(0, len(result['routes'][0]['legs'][0]['steps'])):
try:
s = (result['routes'][0]['legs'][0]['steps'][i]['html_instructions'])
d = (result['routes'][0]['legs'][0]['steps'][i]['distance']['text'])
l = (result['routes'][0]['legs'][0]['steps'][i]['duration']['text'])
s = re.sub('<[A-Za-z\/][^>]*>', '', s)
output.writelines(s + " " + d + " " + l + '\n')
except Exception:
print "Directions could not be printed"
output.write("Directions could not be given due to the format of page or the address type")
but nothing is written to .txt and still get error.
ive tried to replace Exception with IndexError and VauleError but no change
Solved used by exploring the returned json result and found a Status result so I passed that first.
with open(filename, 'w') as output:
if result ['status'] == "NOT_FOUND"
output.write( " no directions avalible")
else:
for i in range(0, len(result['routes'][0]['legs'][0]['steps'])):
s = (result['routes'][0]['legs'][0]['steps'][i]['html_instructions'])
d = (result['routes'][0]['legs'][0]['steps'][i]['distance']['text'])
l = (result['routes'][0]['legs'][0]['steps'][i]['duration']['text'])
s = re.sub('<[A-Za-z\/][^>]*>', '', s)
output.writelines(s + " " + d + " " + l + '\n')

OpenERP RFID Terminal Module Error

After installing RFID Terminal module successfully on a fresh database of OpenERP 7 to pull attendance logs from a ZKTeco DS100 machine, I tried to add a user in the module (to link IDs on the machine with Employee IDs in OpenERP). I get the error:
File "C:\Program Files (x86)\OpenERP 7.0-20130610-231029\Server\server\openerp\addons\hr_attendance_terminal\hr_attendance_terminal.py", line 230, in create_terminal_users
UnboundLocalError: local variable 's' referenced before assignment
The code block:
def create_terminal_users(self, cr, uid, automatic=False, use_new_cursor=False, context=None):
if context is None:
context = {}
terminal_ids = self.pool.get('hr.attendance.terminal').search(cr, uid, [])
for t_id in terminal_ids:
terminal = self.pool.get('hr.attendance.terminal').browse(cr, uid, t_id, context=context)
#print "CREATE USER ON Terminal: %s | %s" % (terminal.tnr, terminal.ip)
TerminalNr = terminal.tnr # zweistelling in Hex
host = terminal.ip # Terminaladresse
port = 8000 # Terminaldatenport
STX = '\x02' # Startbit
ETX = '\x03' # Stopbit
emp_ids = self.pool.get('hr.employee').search(cr, uid, [('rfid_key', '!=', '')])
if emp_ids:
#Verbindung herstellen
s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
try:
s.connect((host,port))
except socket.error, msg:
print 'Socket Error: %s' % msg
break
for emp_id in emp_ids:
employee = self.pool.get('hr.employee').browse(cr, uid, emp_id, context=context)
s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
rfid_key = employee.rfid_key
employee_name = employee.name
pin = '0000'
pinabfrage = '0' # bei 1 wird pin abgefragt
infotext1 = ' ' # 16 Zeichen Infotext
infotext2 = employee_name.center(16) # 16 Zeichen Infotext
infotext3 = ' ' # 16 Zeichen Infotext
infotext4 = ' ' # 16 Zeichen Infotext
#Paket / Telegram erstellen
#Schema: <STX>SS<Kommando><Daten><BCC><ETX>
bccstring = self.create_bcc(TerminalNr + 'SPSTA' + rfid_key + pin + pinabfrage + infotext1 + infotext2 + infotext3 + infotext4)
message = STX + TerminalNr + 'SPSTA' + rfid_key + pin + pinabfrage + infotext1 + infotext2 + infotext3 + infotext4 + bccstring + ETX
#print "Employee: %s" % employee.name
#Paket / Telegram senden
try:
s.sendall(message)
except socket.error, msg:
print 'Socket Error: %s' % msg
break
while 1:
reply = s.recv(8192)
if str(reply) != '':
r_message = re.sub(r'\x02|\x03','',str(reply))
r_terminal = r_message[0:2]
if r_message[2:7] == 'SPSTA':
#print "Stammsatz gespeichert!"
break
s.close()
return True
Notes:
The module installed normally eventhough it was built for OERP 6.
Made minor changes to match OERP 7 import functions.
Python 2.7
s.close() should come inside the for loop.
I would like to add my comment to this, because we are the Creators of the Module.
The Module you've installed will only work with this specific Device: Officetimer II from ADC GmbH, not with any other RFID Attendance Device!
Beware, the Module that is publicly available is only a first release from mid 2011 for OpenERP Version 6.0, actually you could call it a draft, bugfixes and enhancements have not been applied on the launchpad branch! This Module should really not be used with Version 6.1 or 7.0 of OpenERP due to the handling of the terminal communication and timezone management!
Of course we have ported the Module to 6.1 and 7.0, but we have not made those modules publicly available due to several reasons.

Why is my Python code returning an error when I try to fetch YouTube videos for a given keyword?

Whenever I try to run my code, I receive the following error: "comment_content error! 'nonetype' object has no attribute 'href'" I am new to Python, and did not write this code myself; it was given to me to use. My understanding is that it was functioning properly before? Could this have to do with changes in the YouTube Data API since it was written?
import pdb
import gdata.youtube
import gdata.youtube.service
import codecs
import time
client = gdata.youtube.service.YouTubeService()
query = gdata.youtube.service.YouTubeVideoQuery()
### the input words are here
query.vq = "4b hair"
#######
# the out put file are here
viewFile = codecs.open('views4b_hair.csv', 'w')
commentFile=codecs.open('comments4b_hair.csv', 'w')
##########
query.max_results = 50
query.start_index = 0
query.safesearch = "moderate"
#query.format = 5
query.orderby = "relevance"
#query.author = "hawaiinani"
#pdb.set_trace()
for i in range(19):
#pdb.set_trace()
query.start_index=str(int(query.start_index)+50)
feed = client.YouTubeQuery(query)
print len(feed.entry)
youtubeid=[]
youtubetitle=[]
for entry in feed.entry:
#youtubetitle.append(entry.title.text)
youtubeid.append(entry.id.text[38:])
print entry.id.text[38:],i
try:
entry_comment = client.GetYouTubeVideoEntry(video_id=entry.id.text[38:])
comment_feed = client.GetYouTubeVideoCommentFeed(video_id=entry.id.text[38:])
viewFile.write(','.join([entry.id.text[38:],entry_comment.published.text,
str(entry_comment.media.duration.seconds), str(entry_comment.statistics.view_count),comment_feed.total_results.text,entry_comment.media.title.text.decode('ascii', errors='ignore').encode('ascii', 'ignore')]) + '\n')
#videop.append("%s, %s,%s, %s, %s, %s" % (search_result["id"]["videoId"],entry.published.text,
# entry.media.duration.seconds, entry.statistics.view_count,comment_feed.total_results.text,entry.media.title.text))
#
#time.sleep(3)
except Exception, ex:
print 'View_content Error', ex
time.sleep(10)
try:
comment_content = client.GetYouTubeVideoCommentFeed(video_id=entry.id.text[38:])
indexh=0
#while comment_content:
while indexh<10:
indexh=indexh+1
for comment_entry in comment_content.entry:
pubText = comment_entry.published.text
#print pubText
titleText = comment_entry.content.text.decode('ascii', errors='ignore').encode('ascii', 'ignore')
#print titleText
#print 'Got title'
#pubText, titleText = comment_entry.published.text, comment_entry.title.text
commentFile.write(','.join([entry.id.text[38:],pubText,titleText]) + '\n'+'\n')
#commentFile.write(u',')
#commentFile.write(pubText + u',')
#print 'About to write title'
#print titleText
#print 'Wrote title'
#commentlist.append("%s, %s,%s" % (search_result["id"]["videoId"],pubText, titleText))
comment_content=client.Query(comment_content.GetNextLink().href)
#time.sleep(3)
#time.sleep(3)
except Exception, ex:
print 'Comment_content Error!', ex
time.sleep(5)
#pdb.set_trace()
viewFile.close()
commentFile.close()
The error occurs when comment_content.GetNextLink() becomes None. In order to fix it, replace:
while indexh < 10:
with:
while indexh < 10 and comment_content:
also replace:
comment_content=client.Query(comment_content.GetNextLink().href)
with:
next_link = comment_content.GetNextLink()
if next_link:
comment_content = client.Query(next_link.href)
else:
comment_content = None
Hope that helps.