I am fairly new in python.
I am parsing a big file and I want to check that the different inputs are correct, especially if the ID entered is in the file header.
When I run the following code, I get this error message:
AttributeError: 'str' object has no attribute 'readlines'
filename = str(raw_input('enter filename: '))
try:
with open(filename, 'rU'): pass
except IOError:
print 'The file does not exist'
sys.exit(0)
def findID(w):
return re.compile(r'\b({0})\b'.format(w), flags=re.IGNORECASE).search
while True:
ID = (raw_input("Enter ID: ")).upper()
IDheader = ID + ".NA"
with open(filename, 'rU') as f:
first_line = f.readline()
if findID(IDheader)(first_line):
print "you entered ",ID
break
else:
pass
print "ID not in this file."`
for line in filename.readlines():
Line = line.split()
if...
Thank you
filename is a filename, not a file handle. You need to open it:
with open(filename, 'r') as handle:
for line in handle:
line = line.split()
Related
I have this code:
import os
def inplace_change(filename, old_string, new_string):
# Safely read the input filename using 'with'
with open(filename, 'r') as f:
s = f.read()
if old_string not in s:
print('"{old_string}" not found in {filename}.'.format(**locals()))
return
else:
# Safely write the changed content, if found in the file
with open(filename, 'w') as f:
s = s.replace(old_string, new_string)
f.write(s)
path = raw_input("Enter the file's full path: ")
old = raw_input("String to change: ")
new = raw_input("change to: ")
print "********************************"
print "**** WORKING... PLEASE WAIT ****"
print "********************************\n"
for file in os.listdir(path):
filename = os.path.join(path,file)
inplace_change(filename, old, new)
os.system("pause")
As you can see, the code is replacing sub string in a file with another sub string. I want my code to change text as directed in a text file, like "instruction file" what to change.
The text file will be:
"old_string" "new_string"
"old_string" "new_string"
"old_string" "new_ string"
the result will be that all files in directory will change all the old_string to new_string
How can I do it?
I have created a little programe to extract selected ids + sequence from a fasta file. The ids of interest are file names that contains several seq for that gene. This is the programe:
import glob, sys, os
from Bio import SeqIO, SearchIO
from Bio.SeqRecord import SeqRecord
import argparse
def help_function():
print """
usage: to_extract_seq_and_id.py [-h] [-i input_file:path to data] [-r reference file: path_to_file ]
[-o output_directory: path_to_store_new_file] """
parser = argparse.ArgumentParser()
parser.add_argument('-input_files', '-i',type=str,help='path_to_data')
parser.add_argument('-reference', '-r',type=str,help='path_to_the_fasta_reference_file')
parser.add_argument('-output_directory','-o', type=str, help='path_to_store_new_file')
opts = parser.parse_args()
#first function to check if the files exits.
def check_file_exists(filepath, file_description):
if not os.path.exists(filepath):
print("The " + file_description + " (" + filepath + ") does not exist")
sys.exit(1)
else:
print file_description + " detected"
def record_extraction(geneID,reference,output):
records=list(SeqIO.parse(opts.reference,'fasta'))
new_reference=output + '/new_reference_common_genes.fa'
output_handle=open(new_reference, 'a')
with open (opts.reference, 'rU') as input_handle:
for record in records:
recordID=record.id
if recordID == geneID:
SeqIO.write(record, output_handle, 'fasta')
else:
continue
return new_reference
def main():
if len(sys.argv) <=2:
parser.print_help()
sys.exit()
else:
check_file_exists(opts.input_files, 'input_files')
check_file_exists(opts.reference, 'reference_file')
check_file_exists(opts.output_directory, 'output_directory')
files=(glob.glob(opts.input_files + '/*.fa'))
for f in files:
database_files=glob.glob(f)[0]
file_name=os.path.basename(f)
gene_id=file_name.split('.')
gene_name=gene_id[0].split('_')
geneID=gene_name[1] + '_' + gene_name[2]
print 'The new reference fasta file has been create'
new_reference=record_extraction(geneID,opts.reference,opts.output_directory)
main()
I have a fasta file with 900 genes, and I want to extract and write in another file 700 of them. The programe runs OK but only write one gene and its sequence into the new file. I don't understand why that loop is running only once. Initially Can anyone help me to correct the problem, please?
Thanks in advance.
The question is abit unclear but maybe this is what you are looking for:
results = []
for record in records:
recordID=record.id #.split('_')
if recordID == geneID:
results.append(record)
else:
continue
SeqIO.write(" ".join(text for text in results), output_handle, 'fasta')
return new_reference
If this is not what you are looking for. Please give more details for what are the problem and solution you need.
The problem I had was a problem of indentation. If you have a look the code above, you can see that the def record_extraction() was not called within the for loop in the main function (def main()). I have changed that indentation, and now it does work perfectly.
See above the new script:
import glob, sys, os
from Bio import SeqIO, SearchIO
from Bio.SeqRecord import SeqRecord
import argparse
def help_function():
print """
usage: to_extract_seq_and_id.py [-h] [-i input_file:path to data] [-r reference file: path_to_file ]
[-o output_directory: path_to_store_new_file] """
parser = argparse.ArgumentParser()
parser.add_argument('-input_files', '-i',type=str,help='path_to_data')
parser.add_argument('-reference', '-r',type=str,help='path_to_the_fasta_reference_file')
parser.add_argument('-output_directory','-o', type=str, help='path_to_store_new_file')
opts = parser.parse_args()
#first function to check if the files exits.
def check_file_exists(filepath, file_description):
if not os.path.exists(filepath):
print("The " + file_description + " (" + filepath + ") does not exist")
sys.exit(1)
else:
print file_description + " detected"
def record_extraction(geneID,reference,output,genelist):
records=list(SeqIO.parse(opts.reference,'fasta'))
new_reference=output + '/new_reference_common_genes.fa'
output_handle=open(new_reference, 'a')
with open (opts.reference, 'rU') as input_handle:
for record in records:
recordid=record.id.split('_')
recordID=recordid[0]+'_'+recordid[1]
if recordID in genelist:
SeqIO.write(record, output_handle, 'fasta')
else:
continue
return new_reference
def main():
if len(sys.argv) <=2:
parser.print_help()
sys.exit()
else:
check_file_exists(opts.input_files, 'input_files')
check_file_exists(opts.reference, 'reference_file')
check_file_exists(opts.output_directory, 'output_directory')
#run the programme
files=(glob.glob(opts.input_files + '/*.fa'))
for f in files:
database_files=glob.glob(f)[0]
file_name=os.path.basename(f)
gene_id=file_name.split('.')
gene_name=gene_id[0].split('_')
geneID=gene_name[1]+'_'+gene_name[2]
genelist=[]
if geneID not in genelist:
genelist.append(geneID)
new_reference=record_extraction(geneID,opts.reference,opts.output_directory,genelist)
print 'The new reference fasta file has been create'
main()
I have a task to get all the files from a directory , read the files find if the files contain anything with extension .crv. I have written the following code using Python but there is something wrong the code runs infintely. Can some help me in identifying what could possibly be wrong.
for file in os.listdir(location):
try:
if file.endswith(".cxx"):
#print "crvfile found:\t", file
filepath = location+"\\"+file
#print filepath
cxxopenfile = open(filepath,"r")
for line in crvopenfile:
line = line.rstrip()
find = re.findall('^\S*(['.crv']+), line)
#if len(find) > 0:
v
v
cxxopenfile.close()
cxxfiles.append(str(file))
counter = counter+1
except Exception as e:
raise e
print "No files found here!"
print "Total files found:\t", counter
#!/usr/bin/env python
import os
import sys
import re
from os import listdir
location = "c:\\git\\repos\\test-code"
counter = 0
for file in os.listdir(location):
try:
if file.endswith(".cxx"):
#print "crvfile found:\t", file
filepath = location+"\\"+file
#print filepath
cxxopenfile = open(filepath,"r")
for line in cxxopenfile:
line = line.rstrip()
find = re.findall("^\S*['.crv']+", line)
if len(find) > 0:
#cxxfiles.append(str(file))
counter = counter+1
else:
print "No extension .crv found!"
cxxopenfile.close()
except Exception as e:
raise e
print "No files found here!"
print "Total files found:\t", counter
Results:
$ python ./test.py
No extension .crv found!
No extension .crv found!
No extension .crv found!
Total files found: 2
I am having a bit of trouble in obtaining a file path so that I can open and execute my data from the specified (text) file. Below is the code I have written so far:
def pickfile():
options={}
options['defaultextension'] = '.txt'
options['filetypes'] = [('all files','.*'), ('text files', '.*txt')]
options['initialfile'] = 'sample.txt'
options['initialdir'] = 'C:\Users\profcs\Desktop'
filename=open(tkFileDialog.askopenfilename(**options))
if filename:
print(filename)
return
with open(filename, 'rb') as f:
reader = csv.reader(f)
try:
for row in reader:
print row
except csv.Error as e:
sys.exit('file %s, line %d: %s' % (filename, reader.line_num,e))
but1 = Button(widget1, text='Pick Your File', command=pickfile)
but1.pack(side=BOTTOM, padx=10, pady=1, anchor=SE)
but1.config(relief=RAISED, bd=2)
When I display a filename, I now get the path in this form:
================ RESTART: C:\Users\profcs\Desktop\BD TEST.py ================
<open file u'C:/Users/profcs/Desktop/sample.txt', mode 'r' at 0x01EFF128>
How can I filter this path and only get 'C:/Users/profcs/Desktop/sample.txt' so that I can open my file?
Thanks in advance.
filename.name gives you the path from filename object.
I hope this helps :
filename = open(tkFileDialog.askopenfilename(**options))
print (filename.name)
'C:/Users/profcs/Desktop/sample.txt'
In your case filename is an object which represents an open file.
My code is currently taking in a csv file and outputting to text file. The piece of code I have below and am having trouble with is from the csv I am searching for a keyword like issues and every row that has that word I want to output that to a text file. Currently, I have it printing to a JSON file but its all on one line like this
"something,something1,something2,something3,something4,something5,something6,something7\r\n""something,something1,something2,something3,something4,something5,something6,something7\r\n"
But i want it to print out like this:
"something,something1,something2,something3,something4,something5,something6,something7"
"something,something1,something2,something3,something4,something5,something6,something7"
Here is the code I have so far:
def search(self, filename):
with open(filename, 'rb') as searchfile, open("weekly_test.txt", 'w') as text_file:
for line in searchfile:
if 'PBI 43125' in line:
#print (line)
json.dump(line, text_file, sort_keys=True, indent = 4)
So again I just need a little guidance on how to get my json file to be formatted the way I want.
Just replace print line with print >>file, line
def search(self, filename):
with open('test.csv', 'r') as searchfile, open('weekly_test.txt', 'w') as search_results_file:
for line in searchfile:
if 'issue' in line:
print >>search_results_file, line
# At this point, both the files will be closed automatically