XLRDError: No sheet named <'Sheet1'> in python - python-2.7

I am trying to convert the xls into csv file using pandas in python. But I am getting the following error like 'XLRDError: No sheet named <'Sheet1'>'. I have verified the sheet name and it is same as specified above, but I don't how to correct this error. Please find my code below.
CODE:
def xls_2_csv():
import pandas as pd
data_xls = pd.read_excel(r'c:\delivery\file1.xls','Sheet1', index_col=None)
data_xls.to_csv(r'C:\test\file1.csv', encoding='utf-8',index=None)
xls_2_csv()
Please help me in solving this error. Thanks in advance.

I found the same problem in python 3.6 and pandas version is 0.25.1.
The following should work:
import pandas as pd
file = 'your excel file path'
# the file is endswith '.xls' and there is multiple sheets
# error method
df_sheet1 = pd.read_excel(file, sheet_name='Sheet1')
df_sheet2 = pd.read_excel(file, sheet_name='Sheet2')
# when read Sheet1 had no error, but when read Sheet2, had an error:
# xlrd.biffh.XLRDError: No sheet named <'Sheet2'>
# right method
with pd.ExcelFile(file) as xls:
for sheet_name in xls.sheet_names:
df = pd.read_excel(xls, sheet_name=sheet_name)
print(df.head())

Hi I tried the following code it worked for me.
CODE:
import logging
import time
import traceback
import xlrd
import csv
import sys
import re
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
xls = input file path
target = output file path
logging.info("Start converting: From '" + xls + "' to '" + target + "'. ")
try:
start_time = time.time()
wb = xlrd.open_workbook(xls)
sh = wb.sheet_by_index(0)
csvFile = open(target, 'wb')
wr = csv.writer(csvFile, quoting=csv.QUOTE_ALL)
for row in xrange(sh.nrows):
rowValues = sh.row_values(row)
newValues = []
for s in rowValues:
if isinstance(s, unicode):
strValue = (str(s.encode("utf-8")))
else:
strValue = (str(s))
isInt = bool(re.match("^([0-9]+)\.0$", strValue))
if isInt:
strValue = int(float(strValue))
else:
isFloat = bool(re.match("^([0-9]+)\.([0-9]+)$", strValue))
isLong = bool(re.match("^([0-9]+)\.([0-9]+)e\+([0-9]+)$", strValue))
if isFloat:
strValue = float(strValue)
if isLong:
strValue = int(float(strValue))
newValues.append(strValue)
wr.writerow(newValues)
csvFile.close()
logging.info("Finished in %s seconds", time.time() - start_time)
except Exception as e:
print (str(e) + " " + traceback.format_exc())

Related

In python can we save CSV file given by user given name?

I need to save/write a csv file from pandas dataframe in python. I have tried the following way;
import sys
import getopt
import os
def usage():
print "-f Please provide input file"
in1_flag = False
out_flag = True
inFile1 = ""
outFile1 = ""
try:
opts, args = getopt.getopt(sys.argv[1:], 'i:o', ['input_file1=', 'out_file1='])
except getopt.GetoptError:
usage()
sys.exit(2)
for opt, arg in opts:
if opt in ('-i', '--input_file1'):
inFile1 = os.path.abspath(arg)
in1_flag = True
elif opt in ('-o', '--out_file1'):
outFile1 = os.path.abspath(arg)
split_h = []
with open(inFile1) as ff:
for line in ff:
split_h = line.split()
import pandas as pd
d1 = {'report': split_h}
df1 = pd.DataFrame(data = d1, columns=['report'])
df1.to_csv(outFile1, sep = '\t',header = False, index= False)
I thought it would be as easy as taking input but here I am stuck at writing file.

Input query for python code

So I have created this code for my research, but I want to use it for plenty of data files, I do not want to do it manually, which means retyping some lines in my code to use desired file. How to use input command in python (I work with python 2.7 on Windows OS) to use it faster, just by typing name of desired datafile. My code so far:
import iodata as io
import matplotlib.pyplot as plt
import numpy as np
import time
from scipy.signal import welch
from scipy import signal
testInstance = io.InputConverter()
start = time.time()
conversionError = io.ConversionError()
#data = testInstance.convert(r"S:\Doktorat\Python\", 1", conversionError)
data = testInstance.convert(r"/Users/PycharmProjects/Hugo/20160401", "201604010000", conversionError)
end = time.time()
print("time elapsed " + str(end - start))
if(conversionError.conversionSucces):
print("Conversion succesful")
if(conversionError.conversionSucces == False):
print("Conversion failed: " + conversionError.conversionErrorLog)
print "Done!"
# Create a new subplot for two cannals 1 & 3
a = np.amin(data.data)
Bx = data.data[0,]
By = data.data[1,]
dt = float(300)/266350
Fs = 1/dt
t = np.arange(0,300,dt*1e3)
N = len(Bx)
M = len(By)
time = np.linspace(0,300,N)
time2 = np.linspace(0,300,M)
filename = 'C:/Users/PycharmProjects/Hugo/20160401/201604010000.dat'
d = open(filename,'rb')
degree = u"\u00b0"
headersize = 64
header = d.read(headersize)
ax1 = plt.subplot(211)
ax1.set_title(header[:16] + ', ' + # station name
'Canals: '+header[32:33]+' and '+header[34:35]+ ', ' # canals
+'Temp'+header[38:43]+degree+'C' # temperature
+', '+'Time:'+header[26:32]+', '+'Date'+' '+header[16:26]) # date
plt.ylabel('Pico Tesle [pT]')
plt.xlabel('Time [ms]')
plt.grid()
plt.plot(time[51:-14], Bx[51:-14], label='Canal 1', color='r', linewidth=0.1, linestyle="-")
plt.plot(time2[1:-14], By[1:-14], label='Canal 3', color='b', linewidth=0.1, linestyle="-")
plt.legend(loc='upper right', frameon=False, )
# Create a new subplot for FFT
plt.subplot(212)
plt.title('Fast Fourier Transform')
plt.ylabel('Power [a.u.]')
plt.xlabel('Frequency Hz')
xaxis2 = np.arange(0,470,10)
plt.xticks(xaxis2)
fft1 = (Bx[51:-14])
fft2 = (By[1:-14])
plt.grid()
# Loop for FFT data
for dataset in [fft1]:
dataset = np.asarray(dataset)
freqs, psd = welch(dataset, fs=266336/300, window='hamming', nperseg=8192)
plt.semilogy(freqs, psd/dataset.size**0, color='r')
for dataset2 in [fft2]:
dataset2 = np.asarray(dataset2)
freqs2, psd2 = welch(dataset2, fs=266336/300, window='hamming', nperseg=8192)
plt.semilogy(freqs2, psd2/dataset2.size**0, color='b')
plt.show()
As you can see there are some places where it would be better to put input and when I run the code I can write names of filenames etc. to python instead of creating every single pythonfile, with specified info in the code.
Btw. I use Pycharm to my python.
If all you are trying to do is get rid of the hardcoded pathname, you should be able to format your name string with input variables
name = raw_input("Name: ")
measurement = raw_input("Measurement: ")
filename = "C:/Users/PycharmProjects/{0}/{1}".format(name, measurement)
see raw_input and string formatting

django+celery+rabbitmq encode error and sig-kill

I'm now doing a little project which uses celery to turn csv and xlsx files into postgresql table.
The code below works fine without celery(except large files),but after using celery it produce some errors and bugs.
I've looked for similar questions in StackOverFlow but don't have any idea how to do and why.
Hope you guys can help me with it,thanks.
First error is as follows:
csv-1
csv-2
I think it has something to do with my encoding part, but I tried to open it with utf-8-sig and big-5, not working.(It works fine without celery)
`
# -*- coding: utf-8 -*-
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.http import HttpResponseRedirect
from django.core.urlresolvers import reverse
from django.contrib import messages
from django.conf import settings
from django.db import connection
from django.views.decorators.csrf import csrf_exempt
from celery import Celery
from celery import task
import json
import csv
import sys
import random
import psycopg2
import xlrd
import openpyxl as pyxl
from .models import Document
from .forms import DocumentForm
app = Celery(
'tasks',
broker='amqp://guest:guest#localhost:5672//',
backend='rpc://'
)
CELERY_RESULT_BACKEND = 'rpc://'
CELERY_RESULT_PERSISTENT = False
#app.task()
def csvwritein(doc):# Transform csv to table
doc = doc
conn = psycopg2.connect("dbname='apidb' user='api' host='localhost'
password='eric40502' port='5432'")
readcur = conn.cursor()
readcur.execute("select exists(select * from
information_schema.tables where table_name='%s')" % doc.tablename) # check if
same file is already in database
check = readcur.fetchone()[0]
try:
fr = open(doc.path,encoding = 'utf-8-sig')
dr.delay(fr,doc,check)
fr.close()
except Exception as e:
fr = open(doc.path,encoding = 'big5')
dr.delay(fr,doc,check)
fr.close()
conn.commit()
readcur.close()
#app.task()
def dr(fr,doc,check): # make datareader as function to keep code 'dry'
csvt = 0 #count csv reader loop time
row_id = 1 # used for following id field
conn = psycopg2.connect("dbname='apidb' user='api' host='localhost'
password='eric40502' port='5432'")
maincur = conn.cursor()
writecur = conn.cursor()
datareader = csv.reader(fr, delimiter=',')
for row in datareader:
if csvt == 0: # first time in loop(create field) and check no
same file exists
if check == True:
app =
''.join([random.SystemRandom().choice('abcdefghijklmnopqrstuvwxyz0123456789')
for i in range(6)])
tname = '%s-%s' % (doc.tablename,app
tablename = '"%s-%s"' % (doc.tablename,app)
doc.tablename = tname
doc.save()
else:
tablename = '"%s"' % doc.tablename
maincur.execute("CREATE TABLE %s (id SERIAL PRIMARY
KEY);" % tablename)
row_count = sum(1 for line in datareader)
col_count = len(row)
frow = row
for i in range(0,col_count,1):
row[i] = '"%s"' % row[i] # change number to
string
maincur.execute("ALTER TABLE %s ADD %s
CITEXT;" % (tablename,row[i]))
csvt = csvt+1
fr.seek(0)
next(datareader)
elif csvt > 0: # not first time(insert data) and check no
same file exists
for j in range(0,col_count,1):
if j == 0:
writecur.execute("INSERT INTO %s (%s)
VALUES ('%s');" % (tablename,frow[j],row[j]))
else:
writecur.execute("UPDATE %s SET %s =
'%s' WHERE id = '%d';" %(tablename,frow[j],row[j],row_id))
csvt = csvt+1
row_id = row_id+1
else:
break
conn.commit()
maincur.close()
writecur.close()
conn.close()
csvt = 0
doc = Document.objects.all()
`
Second error is about turning a xlsx file(about 130,000 rows) into postgresql table, and the worker got sig-kill after 2-3 minutes.
Debug Message:
[2016-10-27 06:17:05,227: ERROR/MainProcess] Process 'Worker-1' pid:13829 exited with 'signal 9 (SIGKILL)' [2016-10-27 06:17:05,328:ERROR/MainProcess] Task data.tasks.xlsxwritein[5aec4679-c48b-4d07-a0a9-5e4e37fcd24b] raised unexpected: WorkerLostError('Worker exited prematurely: signal 9 (SIGKILL).',) Traceback (most recent call last): File "/usr/local/lib/python3.4/dist-packages/billiard/pool.py", line 1175, in mark_as_worker_lost human_status(exitcode)), billiard.exceptions.WorkerLostError: Worker exited prematurely: signal 9 (SIGKILL).
#The code continues from the above task.py file
#app.task()
def xlsxwritein(doc): # write into database for file type xlsx
xlsxt = 0
conn = psycopg2.connect("dbname='apidb' user='api' host='localhost'
password='eric40502' port='5432'")
maincur = conn.cursor()
readcur = conn.cursor()
writecur = conn.cursor()
readcur.execute("select exists(select * from
information_schema.tables where table_name='%s')" % doc.tablename) # check if
same file is already in database
check = readcur.fetchone()[0]
row_id = 1 # used for following id field
wb = pyxl.load_workbook(doc.path)
sheetnames = wb.get_sheet_names()
ws = wb.get_sheet_by_name(sheetnames[0])
for rown in range(ws.get_highest_row()):
if xlsxt == 0:
if check == True:
app =
''.join([random.SystemRandom().choice('abcdefghijklmnopqrstuvwxyz0123456789')
for i in range(6)])
tname = '%s-%s' % (doc.tablename,app)
tablename = '"%s-%s"' % (doc.tablename,app)
doc.tablename = tname
doc.save()
else:
tablename = '"%s"' % doc.tablename
field = [ws.cell(row=1,column=col_index).value for
col_index in range(1,ws.get_highest_column()+1)]
maincur.execute("CREATE TABLE %s (id SERIAL PRIMARY
KEY);" % tablename)
for coln in range(ws.get_highest_column()):
field[coln] = '"%s"' % field[coln] # change
number to string
if field[coln] == 'ID':
field[coln] = 'original_id'
maincur.execute("ALTER TABLE %s ADD %s
CITEXT;" % (tablename,field[coln]))
xlsxt = xlsxt+1
elif xlsxt > 0 and check == False: # not first time(insert
data) and check no same file exists
for coln in range(ws.get_highest_column()):
if coln == 0:
writecur.execute("INSERT INTO %s (%s)
VALUES ('%s');"
%(tablename,field[coln],str(ws.cell(row=rown,column=coln+1).value)))
else:
writecur.execute("UPDATE %s SET %s =
'%s' WHERE id = '%d';"
%(tablename,field[coln],str(ws.cell(row=rown+1,column=coln+1).value),row_id))
xlsxt = xlsxt+1
row_id = row_id+1
else:
break
conn.commit()
maincur.close()
readcur.close()
writecur.close()
conn.close()
xlsxt = 0
Probably something is going wrong during arguments deserialization. Instead of passing doc object try instead passing filename and then read file inside of task.

print if list index out of range

hi all im trying to create a handle for "list index out of range" but seem not to be having any luck.
import json, urllib, re
from urllib import urlencode
import googlemaps
import tempfile
import win32api
import win32print
start = "Adelaide, South Australia"
finish = " ghkjffzh, south Australia "
url = 'http://maps.googleapis.com/maps/api/directions/json?%s' % urlencode((
('origin', start),
('destination', finish)
))
ur = urllib.urlopen(url)
result = json.load(ur)
filename = "output.txt"
with open(filename, 'w') as output:
for i in range(0, len(result['routes'][0]['legs'][0]['steps'])):
try:
s = (result['routes'][0]['legs'][0]['steps'][i]['html_instructions'])
d = (result['routes'][0]['legs'][0]['steps'][i]['distance']['text'])
l = (result['routes'][0]['legs'][0]['steps'][i]['duration']['text'])
s = re.sub('<[A-Za-z\/][^>]*>', '', s)
output.writelines(s + " " + d + " " + l + '\n')
except Exception:
print "Directions could not be printed"
output.write("Directions could not be given due to the format of page or the address type")
but nothing is written to .txt and still get error.
ive tried to replace Exception with IndexError and VauleError but no change
Solved used by exploring the returned json result and found a Status result so I passed that first.
with open(filename, 'w') as output:
if result ['status'] == "NOT_FOUND"
output.write( " no directions avalible")
else:
for i in range(0, len(result['routes'][0]['legs'][0]['steps'])):
s = (result['routes'][0]['legs'][0]['steps'][i]['html_instructions'])
d = (result['routes'][0]['legs'][0]['steps'][i]['distance']['text'])
l = (result['routes'][0]['legs'][0]['steps'][i]['duration']['text'])
s = re.sub('<[A-Za-z\/][^>]*>', '', s)
output.writelines(s + " " + d + " " + l + '\n')

Why is my Python code returning an error when I try to fetch YouTube videos for a given keyword?

Whenever I try to run my code, I receive the following error: "comment_content error! 'nonetype' object has no attribute 'href'" I am new to Python, and did not write this code myself; it was given to me to use. My understanding is that it was functioning properly before? Could this have to do with changes in the YouTube Data API since it was written?
import pdb
import gdata.youtube
import gdata.youtube.service
import codecs
import time
client = gdata.youtube.service.YouTubeService()
query = gdata.youtube.service.YouTubeVideoQuery()
### the input words are here
query.vq = "4b hair"
#######
# the out put file are here
viewFile = codecs.open('views4b_hair.csv', 'w')
commentFile=codecs.open('comments4b_hair.csv', 'w')
##########
query.max_results = 50
query.start_index = 0
query.safesearch = "moderate"
#query.format = 5
query.orderby = "relevance"
#query.author = "hawaiinani"
#pdb.set_trace()
for i in range(19):
#pdb.set_trace()
query.start_index=str(int(query.start_index)+50)
feed = client.YouTubeQuery(query)
print len(feed.entry)
youtubeid=[]
youtubetitle=[]
for entry in feed.entry:
#youtubetitle.append(entry.title.text)
youtubeid.append(entry.id.text[38:])
print entry.id.text[38:],i
try:
entry_comment = client.GetYouTubeVideoEntry(video_id=entry.id.text[38:])
comment_feed = client.GetYouTubeVideoCommentFeed(video_id=entry.id.text[38:])
viewFile.write(','.join([entry.id.text[38:],entry_comment.published.text,
str(entry_comment.media.duration.seconds), str(entry_comment.statistics.view_count),comment_feed.total_results.text,entry_comment.media.title.text.decode('ascii', errors='ignore').encode('ascii', 'ignore')]) + '\n')
#videop.append("%s, %s,%s, %s, %s, %s" % (search_result["id"]["videoId"],entry.published.text,
# entry.media.duration.seconds, entry.statistics.view_count,comment_feed.total_results.text,entry.media.title.text))
#
#time.sleep(3)
except Exception, ex:
print 'View_content Error', ex
time.sleep(10)
try:
comment_content = client.GetYouTubeVideoCommentFeed(video_id=entry.id.text[38:])
indexh=0
#while comment_content:
while indexh<10:
indexh=indexh+1
for comment_entry in comment_content.entry:
pubText = comment_entry.published.text
#print pubText
titleText = comment_entry.content.text.decode('ascii', errors='ignore').encode('ascii', 'ignore')
#print titleText
#print 'Got title'
#pubText, titleText = comment_entry.published.text, comment_entry.title.text
commentFile.write(','.join([entry.id.text[38:],pubText,titleText]) + '\n'+'\n')
#commentFile.write(u',')
#commentFile.write(pubText + u',')
#print 'About to write title'
#print titleText
#print 'Wrote title'
#commentlist.append("%s, %s,%s" % (search_result["id"]["videoId"],pubText, titleText))
comment_content=client.Query(comment_content.GetNextLink().href)
#time.sleep(3)
#time.sleep(3)
except Exception, ex:
print 'Comment_content Error!', ex
time.sleep(5)
#pdb.set_trace()
viewFile.close()
commentFile.close()
The error occurs when comment_content.GetNextLink() becomes None. In order to fix it, replace:
while indexh < 10:
with:
while indexh < 10 and comment_content:
also replace:
comment_content=client.Query(comment_content.GetNextLink().href)
with:
next_link = comment_content.GetNextLink()
if next_link:
comment_content = client.Query(next_link.href)
else:
comment_content = None
Hope that helps.