Parse a file in various parts - list

I am trying to parse a file that contains the following
# This is 1st line
# This is 2nd line
ATOM This is 3rd line
ATOM This is 4th line
# This is 5th line
# This is 6th line
I wish to use Python 2.7 to parse the file and append lines up to the line starting with ATOM to a list head_list, the lines starting with ATOM to atom_list and lines after the line containing ATOM to a tail_list.
I want to use the startswith() in Python to match lines that start with ATOM. Below is my code, i am passing counter variable which has the index of the last line in the file which starts with ATOM. yet my output does not seem to be right
#!/usr/bin/python
import sys, os
global counter
def AskForFileName () :
file_name = raw_input('Enter the name of the input file \n')
try:
if not file_name :
print "You did not enter a name !"
except :
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
return file_name
def ReadFileContents (file_name) :
#print file_name
file = open(file_name,'r')
file_strings=file.readlines()
return file_strings
def BuildHeadList(all_file_contents) :
head_list=[]
i=0
try :
for line in all_file_contents:
if line.startswith("ATOM") :
break
else :
i=int(i)+1
#print "BuildHeadList :"+str(i)+"\n"
head_list.append(line)
except :
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
return head_list
def BuildAtomList(all_file_contents) :
atom_list=[]
i=0
global counter
try :
for i,line in enumerate(all_file_contents):
if line.startswith("ATOM") :
atom_list.append(line)
counter=i
#i=int(i)+1
#print "BuildAtomList :"+str(i)+"\n"
else :
continue
except :
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
return atom_list
def BuildTailList(all_file_contents) :
tail_list=[]
i=0
global counter
counter=counter+1
print "Counter value is "+str(counter)
try :
for i,line in enumerate(all_file_contents,counter):
print i
tail_list.append(line)
except :
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
return tail_list
def WriteNewFile(head_list,atom_list,tail_list):
file=open('output.txt', 'w')
#for line in head_list :
# print>>file, line,
#for line in atom_list :
# print>>file, line,
for line in tail_list :
print>>file, line,
file.close()
file_name=AskForFileName()
all_file_contents=ReadFileContents(file_name)
head_list=BuildHeadList(all_file_contents)
atom_list=BuildAtomList(all_file_contents)
tail_list=BuildTailList(all_file_contents )
WriteNewFile(head_list,atom_list,tail_list)

This line:
enumerate(all_file_contents, counter)
Doesn't do what you think it does; it iterates over everything in the lists, but numbers them starting from counter instead of 0. A minimal fix would be:
for i, line in enumerate(all_file_contents):
if i >= counter:
tail_list.append(line)
However, much better would be to not iterate over the whole file three times. In particular, note that tail_list = all_file_contents[counter:] gets the result you want. Additionally, get rid of the global and pass counter around explicitly.
You should also read the style guide.

Related

AttributeError: 'dict' object has no attribute 'append' on line 9?

Q.)8.4 Open the file romeo.txt and read it line by line. For each line, split the line into a list of words using the split() method. The program should build a list of words. For each word on each line check to see if the word is already in the list and if not append it to the list. When the program completes, sort and print the resulting words in alphabetical order.
this code is giving AttributeError: 'dict' object has no attribute 'append' on line 9
fname = input("Enter file name: ")
fh = open(fname)
lst = {}
for line in fh:
line = line.rstrip()
words = line.split()
for word in words:
if word not in lst:
lst.append(word)
print(sorted(lst))
Python dictionary has no append method.
Append is used in the list (array) in Python. Make lst a list, not a dictionary. I have made a minor change in your code below, changing
lst = {} #creation of an empty dictionary
to
lst = [] #creation of an empty list
The full code:
fname = input("Enter file name: ")
fh = open(fname)
lst = []
for line in fh:
line = line.rstrip()
words = line.split()
for word in words:
if word not in lst:
lst.append(word)
print(sorted(lst))

Replacing line in file that start with a string if exist and add if not

I created code to check if line start with string exist or not and if exist update and if not add it.
it works but i think it may there another way instead of opening the file and close it three times !!
My text is (try)
LINE1, 1111111
LINE2, 2222222
LINE4, 4444444
LINE3, 4444444
and the code i tried:
linename = 'LINE3' # Variable
num = 3333333 # Variable
f = open('try', "r")
lines = f.readlines()
f.close()
f = open('try', "w")
for line in lines:
if not line.startswith(str(linename)):
f.write(line)
f.close()
f = open('try', 'a+')
for line in f:
if line.startswith(str(linename)):
break
else:
f.write("{0}, {1} \n" .format(linename, num))
f.close()
How about this:
linename = 'LINE3'
num = 3333333
with open('try', 'r', encoding='utf8') as f:
lines = f.readlines()
found = False
for i, line in enumerate(lines):
if line.startswith(linename):
lines[i] = "{0}, {1}".format(linename, num)
found = True
break
if not found:
lines.append("{0}, {1}".format(linename, num)
with open('try', 'w', encoding='utf8') as f:
f.writelines(lines)
The next logical step would be to make a function out of it:
def store_in_file(filename, linename, num):
# ... the above ...
store_in_file('try', 'LINE3', 3333333)

How do i pass the arguments with self

def read_contents(self,filename):
with open(filename,'r') as f:
lines=f.read().splitlines()
print lines
s=read_contents('input.txt')
When trying to run this program the error is thrown as two arguments required and self needs to be present as one of the arguments (this is part of a bigger code)
how do i pass filename as arguments without getting error
You must use it within a class:
class Test:
def read_contents(self, filename):
with open(filename, 'r') as f:
lines = f.read().splitlines()
print lines
test = Test()
s = test.read_contents('input.txt')
Or remove the self:
def read_contents(filename):
with open(filename, 'r') as f:
lines = f.read().splitlines()
print lines
s = read_contents('input.txt')

Python : count function does not work

I am stuck on an exercise from a Coursera Python course, this is the question:
"Open the file mbox-short.txt and read it line by line. When you find a line that starts with 'From ' like the following line:
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
You will parse the From line using split() and print out the second word in the line (i.e. the entire address of the person who sent the message). Then print out a count at the end.
Hint: make sure not to include the lines that start with 'From:'.
You can download the sample data at http://www.pythonlearn.com/code/mbox-short.txt"
Here is my code:
fname = raw_input("Enter file name: ")
if len(fname) < 1 : fname = "mbox-short.txt"
fh = open(fname)
count = 0
for line in fh:
words = line.split()
if len(words) > 2 and words[0] == 'From':
print words[1]
count = count + 1
else:
continue
print "There were", count, "lines in the file with From as the first word"`
The output should be a list of emails and the sum of them, but it doesn't work and I don't know why: actually the output is "There were 0 lines in the file with From as the first word"
I used your code and downloaded the file from the link. And I am getting this output:
There were 27 lines in the file with From as the first word
Have you checked if you are downloading the file in the same location as the code file.
fname = input("Enter file name: ")
counter = 0
fh = open(fname)
for line in fh :
line = line.rstrip()
if not line.startswith('From '): continue
words = line.split()
print (words[1])
counter +=1
print ("There were", counter, "lines in the file with From as the first word")
fname = input("Enter file name: ")
fh = open(fname)
count = 0
for line in fh :
if line.startswith('From '): # consider the lines which start from the word "From "
y=line.split() # we split the line into words and store it in a list
print(y[1]) # print the word present at index 1
count=count+1 # increment the count variable
print("There were", count, "lines in the file with From as the first word")
I have written all the comments if anyone faces any difficulty, in case you need help feel free to contact me. This is the easiest code available on internet. Hope you benefit from my answer
fname = input('Enter the file name:')
fh = open(fname)
count = 0
for line in fh:
if line.startswith('From'):
linesplit =line.split()
print(linesplit[1])
count = count +1
fname = input("Enter file name: ")
if len(fname) < 1 : fname = "mbox-short.txt"
fh = open(fname)
count = 0
for i in fh:
i=i.rstrip()
if not i.startswith('From '): continue
word=i.split()
count=count+1
print(word[1])
print("There were", count, "lines in the file with From as the first word")
fname = input("Enter file name: ")
if len(fname) < 1 : fname = "mbox-short.txt"
fh = open(fname)
count = 0
for line in fh:
if line.startswith('From'):
line=line.rstrip()
lt=line.split()
if len(lt)==2:
print(lt[1])
count=count+1
print("There were", count, "lines in the file with From as the first word")
My code looks like this and works as a charm:
fname = input("Enter file name: ")
if len(fname) < 1:
fname = "mbox-short.txt"
fh = open(fname)
count = 0 #initialize the counter to 0 for the start
for line in fh: #iterate the document line by line
words = line.split() #split the lines in words
if not len(words) < 2 and words[0] == "From": #check for lines starting with "From" and if the line is longer than 2 positions
print(words[1]) #print the words on position 1 from the list
count += 1 # count
else:
continue
print("There were", count, "lines in the file with From as the first word")
It is a nice exercise from the course of Dr. Chuck
There is also another way. You can store the found words in a separate empty list and then print out the lenght of the list. It will deliver the same result.
My tested code as follows:
fname = input("Enter file name: ")
if len(fname) < 1:
fname = "mbox-short.txt"
fh = open(fname)
newl = list()
for line in fh:
words = line.split()
if not len(words) < 2 and words[0] == 'From':
newl.append(words[1])
else:
continue
print(*newl, sep = "\n")
print("There were", len(newl), "lines in the file with From as the first word")
I did pass the exercise with it as well. Enjoy and keep the good work. Python is so much fun to me even though i always hated programming.

Use of global variables in script

I threw the 'Apple-code' in the trash, please look at the following code :
# Define processing inputted line
def inputfile(line):
linecontents = { 'item_0110': line[0:8],
'item_0111': line[8:16],
'item_0112': line[16:24] }
print 'In the function : ', linecontents
print 'In the function : ', len(linecontents)
# Set dictionary
linecontents = {}
# Pretend to open a file and read line for line
line = '010012343710203053525150'
# Start processing the read line
inputfile(line)
# Display end resultprint
print '\nOutside the function : ', linecontents
print 'Outside the function : ', len(linecontents)
Ok, first off : I'm an idiot for trying this with vars. In the original post I already stated I have more than thirty items (fields if you want) in the file. To make matters more complex, a line from the file could look like this :
010012343710203053525150
And not all lines have the same fields so depending on what type of field it is, I would like to call a different function.
The question now is : why is the output like this :
In the function : {'item_0112': '53525150', 'item_0111': '37102030', 'item_0110': '01001234'}
In the function : 3
Outside the function : {}
Outside the function : 0
I thought the dictionary is independent from functions and/or classes ?
There are some issues with your code. I don't really see the see for global variables here.
I reformatted and refactored your code (no uppercase except for classes). You intent is not clear so I tried my best. The function read_file actually reads you file line by line and returns customer_name and customer_item line by line.
def read_file(filepath):
with open(filepath) as customer_file:
for line in customer_file:
customer_name = line[:10]
customer_item = line[10:]
print('Name : ' + customer_name)
print('Item : ' + customer_item)
yield customer_name, customer_item
In the main() or whatever function, you can do what you want with the customer's variables.
What is important here, is that read_file actually reads a file and process the information for the file before returning them to the calling function.
def main():
myfile = 'CustomerFile.txt'
for customer_name, customer_item in read_file(myfile):
if customer_item == 'Apple':
print(customer_name)
else:
print(customer_name + ' is not eating an apple')
Instead of using globals, let the ReadFile function return the values
def ReadFile(line):
...
return CustomerName, CustomerItem
and assign them to variables after calling the function:
for line in CustomerFile:
CustomerName, CustomerItem = ReadFile(line)
def ReadFile(line):
CustomerName = line[0:10]
CustomerItem = line[10:]
return CustomerName, CustomerItem
def Report(CustomerName, CustomerItem):
# Try to keep all print statements in one place
print 'Name : ' + CustomerName
print 'Item : ' + CustomerItem
if CustomerItem == 'Apple':
print CustomerName
else:
print CustomerName + ' is not eating an apple'
with open(CustomerFile.txt) as CustomerFile:
for line in CustomerFile:
CustomerName, CustomerItem = ReadFile(line)
Report(CustomerName, CustomerItem)
Note that the PEP8 style guide recommends using lower_case for variable and function names and to reserve CamelCase for classes. While you are free to use your own style, use of PEP8 is quite prevalent and so by joining the PEP8-club your code will more naturally "fit in" with other people's code and vice-versa.
I found the (obvious) answer. As I said, my Python is a bit rusty :
## Define processing inputed line
#
def inputfile(line, linecontents):
linecontents['item_0110'] = line[0:8]
linecontents['item_0111'] = line[8:16]
linecontents['item_0112'] = line[16:24]
print 'In the function : ', linecontents
print 'In the function : ', len(linecontents)
## Define main script
#
def main():
# Set dict
linecontents = {}
# Pretend to open a file and read line for line
line = '010012343710203053525150'
# Start processing the read file
inputfile(line, linecontents)
# Display end resultprint
print '\nOutside the funtion : ', linecontents
print 'Outsude the function : ', len(linecontents)
## Start main script
#
main()
And this neatly returns :
In the function : {'item_0112': '53525150', 'item_0111': '37102030', 'item_0110': '01001234'}
In the function : 3
Outside the funtion : {'item_0112': '53525150', 'item_0111': '37102030', 'item_0110': '01001234'}
Outsude the function : 3