Save results in the files with same name as input files - python-2.7

I am working on a python code which is as follows:
import os
count = 0
for doc in os.listdir('/home/krupa/Krupa/Mirellas_Image_Annotation_Data/Test/Html_Files/Texts'):
if doc.endswith(".txt"):
with open(doc, 'r') as f:
single_line = ''.join([line for line in f])
single_space = ' '.join(single_line.split())
with open(doc.format(count) , "w") as doc:
doc.write(single_space)
count += 1
else:
continue
Here I want to write the output in the same file name but with different extension (say .key). How do I do it? Please help. Thanks in advance

import os
count = 0
for doc in os.listdir('/home/krupa/Krupa/Mirellas_Image_Annotation_Data/Test/Html_Files/Texts'):
doc1 = "doc_path" + doc
doc2 = "new_path_where_new_file_with_new_ext_needs_to_be_saved" + doc1
if doc1.endswith(".txt"):
with open(doc, 'r') as f:
single_line = ''.join([line for line in f])
single_space = ' '.join(single_line.split())
with open(doc2.format(count) , "w") as doc2:
doc2.write(single_space)
count += 1
else:
continue

Related

python 2.7 program to extract data from excel workbooks - why does it need to be saved in the same folder as the files?

I've got a program that uses openpyxl, os and tkinter that lets a person choose a file directory and then extracts data from certain cells from excel files in that directory. As-is, it will only run if the python file is in the same folder as the files from which data is being extracted are located.
I want to make it so that the program file can be stored outside that folder, but I can't figure out why it needs to be within that folder based on my code. Can someone point me to the place in the code that is making this be necessary?
Thank you
#!/usr/bin/env python
import os
import openpyxl
import Tkinter as tk
from Tkinter import *
import tkFileDialog, tkMessageBox, ttk
def file_open():
file_path = tkFileDialog.askdirectory()
if file_path == "":
tkMessageBox.showinfo("Error", "No Folder Selected")
else:
ALL_SHEETS = [f for f in os.listdir(file_path)
if os.path.isfile(os.path.join(file_path, f))
and f.endswith('.xlsx')]
HEAD = 1
ROW = 2
START = 1
END = 11
OUTFILE = 'empty_book.xlsx'
def get_row(sht, start, end, row):
row_data = []
for col in range(start, end):
d = sht.cell(row=row, column=col)
row_data.append(d.value)
return row_data
def get_all(files):
data_rows = []
for f in files:
wb = openpyxl.load_workbook(filename=f, data_only=True)
sheet = wb.get_sheet_by_name('Data')
row = get_row(sheet, START, END, ROW)
data_rows.append(row)
return data_rows
def get_headings(sheets):
first = sheets[1]
wb = openpyxl.load_workbook(filename=first)
sheet = wb.get_sheet_by_name('Data')
row = get_row(sheet, START, END, HEAD)
return row
def write_new(header, data, f):
wb = openpyxl.Workbook()
ws1 = wb.active
ws1.title = 'Data'
ws1.append(header)
for row in data:
ws1.append(row)
wb.save(filename=f)
def together():
sheets = sorted(ALL_SHEETS)
header = get_headings(sheets)
data = get_all(sheets)
write_new(header, data, OUTFILE)
together()
tkMessageBox.showinfo("Great Job!", "Data Extraction Successful!")
class NSC(tk.Frame):
def __init__(self, parent):
tk.Frame.__init__(self, parent)
self.parent = parent
self.parent.title("Degree Planner Data Extractor")
l1 = tk.Label(text="Degree Planner Data Extractor", font=('Segui',
20))
l1.place(x = 35, y = 20)
nscButton = tk.Button(text=' Extract data from degree planners ',
command=file_open)
nscButton.place(x= 80, y=100)
quitButton = tk.Button(text=" Quit ", command=self.quit)
quitButton.place(x=155, y=155)
def main():
root = Tk()
w = 400
h = 250
ws = root.winfo_screenwidth() # width of the screen
hs = root.winfo_screenheight() # height of the screen
x = (ws/2) - (w/2)
y = (hs/2) - (h/2)
root.geometry('%dx%d+%d+%d' % (w, h, x, y))
root.resizable(0,0)
app = NSC(root)
root.mainloop()
if __name__ == '__main__':
main()
You've kinda solved the problem in your code already. os.listdir returns file names without path so you needed os.path.join for the isfile test. You need to add that joined name to your list.
ALL_SHEETS = [os.path.join(file_path, f) for f in os.listdir(file_path)
if os.path.isfile(os.path.join(file_path, f))
and f.endswith('.xlsx')]
glob.glob does almost the same thing with the small risk that somebody named a directory ".xlsx".
from glob import glob
ALL_SHEETS = [f for f in glob(os.path.join(file_path, "*.xlsx"))
if os.path.isfile(f)]]

XLRDError: No sheet named <'Sheet1'> in python

I am trying to convert the xls into csv file using pandas in python. But I am getting the following error like 'XLRDError: No sheet named <'Sheet1'>'. I have verified the sheet name and it is same as specified above, but I don't how to correct this error. Please find my code below.
CODE:
def xls_2_csv():
import pandas as pd
data_xls = pd.read_excel(r'c:\delivery\file1.xls','Sheet1', index_col=None)
data_xls.to_csv(r'C:\test\file1.csv', encoding='utf-8',index=None)
xls_2_csv()
Please help me in solving this error. Thanks in advance.
I found the same problem in python 3.6 and pandas version is 0.25.1.
The following should work:
import pandas as pd
file = 'your excel file path'
# the file is endswith '.xls' and there is multiple sheets
# error method
df_sheet1 = pd.read_excel(file, sheet_name='Sheet1')
df_sheet2 = pd.read_excel(file, sheet_name='Sheet2')
# when read Sheet1 had no error, but when read Sheet2, had an error:
# xlrd.biffh.XLRDError: No sheet named <'Sheet2'>
# right method
with pd.ExcelFile(file) as xls:
for sheet_name in xls.sheet_names:
df = pd.read_excel(xls, sheet_name=sheet_name)
print(df.head())
Hi I tried the following code it worked for me.
CODE:
import logging
import time
import traceback
import xlrd
import csv
import sys
import re
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
xls = input file path
target = output file path
logging.info("Start converting: From '" + xls + "' to '" + target + "'. ")
try:
start_time = time.time()
wb = xlrd.open_workbook(xls)
sh = wb.sheet_by_index(0)
csvFile = open(target, 'wb')
wr = csv.writer(csvFile, quoting=csv.QUOTE_ALL)
for row in xrange(sh.nrows):
rowValues = sh.row_values(row)
newValues = []
for s in rowValues:
if isinstance(s, unicode):
strValue = (str(s.encode("utf-8")))
else:
strValue = (str(s))
isInt = bool(re.match("^([0-9]+)\.0$", strValue))
if isInt:
strValue = int(float(strValue))
else:
isFloat = bool(re.match("^([0-9]+)\.([0-9]+)$", strValue))
isLong = bool(re.match("^([0-9]+)\.([0-9]+)e\+([0-9]+)$", strValue))
if isFloat:
strValue = float(strValue)
if isLong:
strValue = int(float(strValue))
newValues.append(strValue)
wr.writerow(newValues)
csvFile.close()
logging.info("Finished in %s seconds", time.time() - start_time)
except Exception as e:
print (str(e) + " " + traceback.format_exc())

print if list index out of range

hi all im trying to create a handle for "list index out of range" but seem not to be having any luck.
import json, urllib, re
from urllib import urlencode
import googlemaps
import tempfile
import win32api
import win32print
start = "Adelaide, South Australia"
finish = " ghkjffzh, south Australia "
url = 'http://maps.googleapis.com/maps/api/directions/json?%s' % urlencode((
('origin', start),
('destination', finish)
))
ur = urllib.urlopen(url)
result = json.load(ur)
filename = "output.txt"
with open(filename, 'w') as output:
for i in range(0, len(result['routes'][0]['legs'][0]['steps'])):
try:
s = (result['routes'][0]['legs'][0]['steps'][i]['html_instructions'])
d = (result['routes'][0]['legs'][0]['steps'][i]['distance']['text'])
l = (result['routes'][0]['legs'][0]['steps'][i]['duration']['text'])
s = re.sub('<[A-Za-z\/][^>]*>', '', s)
output.writelines(s + " " + d + " " + l + '\n')
except Exception:
print "Directions could not be printed"
output.write("Directions could not be given due to the format of page or the address type")
but nothing is written to .txt and still get error.
ive tried to replace Exception with IndexError and VauleError but no change
Solved used by exploring the returned json result and found a Status result so I passed that first.
with open(filename, 'w') as output:
if result ['status'] == "NOT_FOUND"
output.write( " no directions avalible")
else:
for i in range(0, len(result['routes'][0]['legs'][0]['steps'])):
s = (result['routes'][0]['legs'][0]['steps'][i]['html_instructions'])
d = (result['routes'][0]['legs'][0]['steps'][i]['distance']['text'])
l = (result['routes'][0]['legs'][0]['steps'][i]['duration']['text'])
s = re.sub('<[A-Za-z\/][^>]*>', '', s)
output.writelines(s + " " + d + " " + l + '\n')

replacing specific lines in a text file using python

First of all I am pretty new at python, so bear with me. I am attempting to read from one file, retrieve specific values and overwrite old values in another file with a similar format. The format is 'text value=xxx' in both files. I have the first half of the program working, I can extract the values I want and have placed them into a dict named 'params{}'. The part I haven't figured out is how to just write the specific value into the target file without it showing up at the end of the file or just writing garbage or only half of the file. Here is my source code so far:
import os, os.path, re, fileinput, sys
#set the path to the resource files
#res_files_path = r'C:\Users\n518013\Documents\203-104 WA My MRT Files\CIA Data\pelzer_settings'
tst_res_files_path = r'C:\resource'
# Set path to target files.
#tar_files_path = r'C:\Users\n518013\Documents\203-104 WA My MRT Files\CIA Data\CIA3 Settings-G4'
tst_tar_files_path = r'C:\target'
#test dir.
test_files_path = r'C:\Users\n518013\Documents\MRT Equipment - BY 740-104 WA\CIA - AS\Setting Files\305_70R_22_5 setting files\CIA 1 Standard'
# function1 to find word index and point to value
def f_index(lst, item):
ind = lst.index(item)
val = lst[ind + 3]
print val
return val
# function 2 for values only 1 away from search term
def f_index_1(lst, item):
ind = lst.index(item)
val = lst[ind + 1]
return val
# Create file list.
file_lst = os.listdir(tst_res_files_path)
# Traverse the file list and read in dim settings files.
# Set up dict.
params = {}
#print params
for fname in file_lst:
file_loc = os.path.join(tst_res_files_path, fname)
with open(file_loc, 'r') as f:
if re.search('ms\.', fname):
print fname
break
line = f.read()
word = re.split('\W+', line)
print word
for w in word:
if w == 'Number':
print w
params['sectors'] = f_index(word, w)
elif w == 'Lid':
params['lid_dly'] = f_index(word, w)
elif w == 'Head':
params['rotation_dly'] = f_index(word, w)
elif w == 'Horizontal':
tmp = f_index_1(word, w)
param = int(tmp) + 72
params['horizontal'] = str(param)
elif w == 'Vertical':
tmp = f_index_1(word, w)
param = int(tmp) - 65
params['vertical'] = str(param)
elif w == 'Tilt':
params['tilt'] = f_index_1(word, w)
else:
print 'next...'
print params #this is just for debugging
file_tar = os.path.join(tst_tar_files_path, fname)
for lines in fileinput.input(file_tar, inplace=True):
print lines.rstrip()
if lines.startswith('Number'):
if lines[-2:-1] != params['sectors']:
repl = params['sectors']
lines = lines.replace(lines[-2:-1], repl)
sys.stdout.write(lines)
else:
continue
Sample text files:
[ADMINISTRATIVE SETTINGS]
SettingsType=SingleScan
DimensionCode=
Operator=
Description=rev.1 4sept03
TireDimClass=Crown
TireWidth=400mm
[TEST PARAMETERS]
Number Of Sectors=9
Vacuum=50
[DELAY SETTINGS]
Lid Close Delay=3
Head Rotation Delay=3
[HEAD POSITION]
Horizontal=140
Vertical=460
Tilt=0
[CALIBRATION]
UseConvFactors=0
LengthUnit=0
ConvMMX=1
ConvPixelX=1
CorrFactorX=1
ConvMMY=1
ConvPixelY=1
CorrFactorY=1
end sample txt.
The code I have only writes about half of the file back, and I don't understand why? I am trying to replace the line 'Number of Sectors=9' with 'Number of Sectors=8' if I could get this to work, the rest of the replacements can be done using if statements.
Please help! I've spent hours on google looking for answers and info and everything I find gets me close but no cigar!
Thank you all in advance!
your file has the '.ini' format. python supports reading and writing those with the ConfigParser module. you could do this:
# py3: from pathlib import Path
import os.path
import configparser
# py3: IN_PATH = Path(__file__).parent / '../data/sample.ini'
# py3: OUT_PATH = Path(__file__).parent / '../data/sample_out.ini'
HERE = os.path.dirname(__file__)
IN_PATH = os.path.join(HERE, '../data/sample.ini')
OUT_PATH = os.path.join(HERE, '../data/sample_out.ini')
config = configparser.ConfigParser()
# py3: config.read(str(IN_PATH))
config.read(IN_PATH)
print(config['CALIBRATION']['LengthUnit'])
config['CALIBRATION']['LengthUnit'] = '27'
# py3: with OUT_PATH.open('w') as fle:
with open(OUT_PATH, 'w') as fle:
config.write(fle)

Why is my Python code returning an error when I try to fetch YouTube videos for a given keyword?

Whenever I try to run my code, I receive the following error: "comment_content error! 'nonetype' object has no attribute 'href'" I am new to Python, and did not write this code myself; it was given to me to use. My understanding is that it was functioning properly before? Could this have to do with changes in the YouTube Data API since it was written?
import pdb
import gdata.youtube
import gdata.youtube.service
import codecs
import time
client = gdata.youtube.service.YouTubeService()
query = gdata.youtube.service.YouTubeVideoQuery()
### the input words are here
query.vq = "4b hair"
#######
# the out put file are here
viewFile = codecs.open('views4b_hair.csv', 'w')
commentFile=codecs.open('comments4b_hair.csv', 'w')
##########
query.max_results = 50
query.start_index = 0
query.safesearch = "moderate"
#query.format = 5
query.orderby = "relevance"
#query.author = "hawaiinani"
#pdb.set_trace()
for i in range(19):
#pdb.set_trace()
query.start_index=str(int(query.start_index)+50)
feed = client.YouTubeQuery(query)
print len(feed.entry)
youtubeid=[]
youtubetitle=[]
for entry in feed.entry:
#youtubetitle.append(entry.title.text)
youtubeid.append(entry.id.text[38:])
print entry.id.text[38:],i
try:
entry_comment = client.GetYouTubeVideoEntry(video_id=entry.id.text[38:])
comment_feed = client.GetYouTubeVideoCommentFeed(video_id=entry.id.text[38:])
viewFile.write(','.join([entry.id.text[38:],entry_comment.published.text,
str(entry_comment.media.duration.seconds), str(entry_comment.statistics.view_count),comment_feed.total_results.text,entry_comment.media.title.text.decode('ascii', errors='ignore').encode('ascii', 'ignore')]) + '\n')
#videop.append("%s, %s,%s, %s, %s, %s" % (search_result["id"]["videoId"],entry.published.text,
# entry.media.duration.seconds, entry.statistics.view_count,comment_feed.total_results.text,entry.media.title.text))
#
#time.sleep(3)
except Exception, ex:
print 'View_content Error', ex
time.sleep(10)
try:
comment_content = client.GetYouTubeVideoCommentFeed(video_id=entry.id.text[38:])
indexh=0
#while comment_content:
while indexh<10:
indexh=indexh+1
for comment_entry in comment_content.entry:
pubText = comment_entry.published.text
#print pubText
titleText = comment_entry.content.text.decode('ascii', errors='ignore').encode('ascii', 'ignore')
#print titleText
#print 'Got title'
#pubText, titleText = comment_entry.published.text, comment_entry.title.text
commentFile.write(','.join([entry.id.text[38:],pubText,titleText]) + '\n'+'\n')
#commentFile.write(u',')
#commentFile.write(pubText + u',')
#print 'About to write title'
#print titleText
#print 'Wrote title'
#commentlist.append("%s, %s,%s" % (search_result["id"]["videoId"],pubText, titleText))
comment_content=client.Query(comment_content.GetNextLink().href)
#time.sleep(3)
#time.sleep(3)
except Exception, ex:
print 'Comment_content Error!', ex
time.sleep(5)
#pdb.set_trace()
viewFile.close()
commentFile.close()
The error occurs when comment_content.GetNextLink() becomes None. In order to fix it, replace:
while indexh < 10:
with:
while indexh < 10 and comment_content:
also replace:
comment_content=client.Query(comment_content.GetNextLink().href)
with:
next_link = comment_content.GetNextLink()
if next_link:
comment_content = client.Query(next_link.href)
else:
comment_content = None
Hope that helps.