This is not direting to file p
with open('/var/tmp/out3') as f:
before = collections.deque(maxlen=1)
for line in f:
if 'disk#g5000cca025a1ee6c' in line:
sys.stdout.writelines(before)
p.write(before)
Try this:
import sys
filename = '/var/tmp/out3'
expression = 'disk#g5000cca025a1ee6c'
with open(filename, 'r') as f:
with open('p', 'w') as p_file:
previous = next(f)
for line in f:
if expression in line:
p_file.write(previous)
previous = line
If the expression is found, you should find a file 'p' in your current directory containing the expression.
It worked when I tried it on Python2.7.10. I took the code from this answer Refer to previous line when iterating through file with Python.
Hope this helps.
Related
I am trying to modify the code to apply to multiple text files in the same directory. The code looks as follows but there is an error "NameError: name 'output' is not defined". Can you help me to suggest improvements to the code?
import re
def replaceenglishwords(filename):
mark_pattern = re.compile("\\*CHI:.*")
word_pattern = re.compile("([A-Za-z]+)")
for line in filename:
# Split into possible words
parts = line.split()
if mark_pattern.match(parts[0]) is None:
output.write()
continue
# Got a CHI line
new_line = line
for word in parts[1:]:
matches = word_pattern.match(word)
if matches:
old = f"\\b{word}\\b"
new = f"{matches.group(1)}#s:eng"
new_line = re.sub(old, new, new_line, count=1)
output.write(new_line)
import glob
for file in glob.glob('*.txt'):
outfile = open(file.replace('.txt', '-out.txt'), 'w', encoding='utf8')
for line in open(file, encoding='utf8'):
print(replaceenglishwords(line), '\n', end='', file=outfile)
outfile.close()
replaceenglishwords needs two parameters, one for the file you are searching and one for the file where you write you results: replaceenglishwords(filename, output). It looks like your function is reading the input file line by line by itself.
Now you can open both files in your loop and pass them to replaceenglishwords:
for file in glob.glob('*.txt'):
textfile = open(file, encoding='utf8')
outfile = open(file.replace('.txt', '-out.txt'), 'w', encoding='utf8')
replaceenglishwords(textfile, outfile)
textfile.close()
outfile.close()
When I run this:
import os.path
import pyproj
srcProj = pyproj.Proj(proj='longlat', ellps='GRS80', datum='NAD83')
dstProj = pyproj.Proj(proj='longlat', ellps='WGS84', datum='WGS84')
f = file(os.path.join("DISTAL-data", "countries.txt"), "r")
heading = f.readline() # Ignore field names.
with open('C:\Python27\DISTAL-data\geonames_20160222\countries.txt', 'r') as f:
for line in f.readlines():
parts = line.rstrip().split("|")
featureName = parts[1]
featureClass = parts[2]
lat = float(parts[9])
long = float(parts[10])
if featureClass == "Populated Place":
long,lat = pyproj.transform(srcProj, dstProj, long, lat)
f.close()
I get this error:
File "C:\Python27\importing world datacountriesfromNAD83 toWGS84.py",
line 13, in for line in f.readlines() : MemoryError.
I have downloaded countries file from http://geonames.nga.mil/gns/html/namefiles.html as entire country file dataset.
Please help me to get out of this.
readlines() for large files creates a large structure in memory, you can try using:
f = open('somefilename','r')
for line in f:
dosomthing()
Answer given by Yael is helpful, I would like to improve it. A Good way to read a file or large file
with open(filename) as f:
for line in f:
print f
I like to use 'with' statement which ensure file will be properly closed.
My code is currently taking in a csv file and outputting to text file. The piece of code I have below and am having trouble with is from the csv I am searching for a keyword like issues and every row that has that word I want to output that to a text file. Currently, I have it printing to a JSON file but its all on one line like this
"something,something1,something2,something3,something4,something5,something6,something7\r\n""something,something1,something2,something3,something4,something5,something6,something7\r\n"
But i want it to print out like this:
"something,something1,something2,something3,something4,something5,something6,something7"
"something,something1,something2,something3,something4,something5,something6,something7"
Here is the code I have so far:
def search(self, filename):
with open(filename, 'rb') as searchfile, open("weekly_test.txt", 'w') as text_file:
for line in searchfile:
if 'PBI 43125' in line:
#print (line)
json.dump(line, text_file, sort_keys=True, indent = 4)
So again I just need a little guidance on how to get my json file to be formatted the way I want.
Just replace print line with print >>file, line
def search(self, filename):
with open('test.csv', 'r') as searchfile, open('weekly_test.txt', 'w') as search_results_file:
for line in searchfile:
if 'issue' in line:
print >>search_results_file, line
# At this point, both the files will be closed automatically
If I wanted to read starting from a given line I can do:
with open(myfile) as f:
for x in range(from_here):
next(f)
for line in f:
do stuff
How can I do the opposite: reading only up to a given line?
I was thinking about a for loop: is there another way?
The obvious answer is to use a loop that just counts:
with open(myfile) as f:
for i in xrange(number_of_wanted_lines):
line = next(f)
# do stuff with line
Regarding the second part of your question, you can also read in the full file into a list of lines, then use slices:
with open(myfile) as f:
lines = f.readlines()[start_line_number:end_line_number+1]
for line in lines:
# do stuff with line
If you don't want to load the whole file into memory, you can also use islice (from itertools) instead of list slices:
import itertools
with open(myfile) as f:
for line in itertools.islice(f, start_line_number, end_line_number + 1):
# do stuff with line
with open(myfile) as f:
for x in range(until_here):
line = next(f)
# do stuff with line
# do stuff with the rest of f
or
import itertools as it
with open(myfile) as f:
for line in it.islice(f, until_here):
# do stuff
# do stuff with the rest of f
code:
with open(filename) as f:
file_list = f.readlines()
file_list = [line.strip() for line in file_list] # remove whitespaces from each line of file
code to process data between start and end tags (these tags can have whitespaces thats why i have removed them above)
This code works fine for me but if the file is too big then i don't think its sensible to copy whole data in a list then strip whitespaces from each line.
How can i remove whitespaces for specific part of list so that only that much part i can save in list ?
I tried:
with open(filename) as f:
for line in f.readlines():
if line.strip() == "start":
start = f.readlines.index("start")
if line.strip() == "end"
end = f.readlines.index("end")
file_list = f.readlines[start:end]
But its giving error
start = f.readlines.index("start")
AttributeError: 'builtin_function_or_method' object has no attribute 'index'
I just want to write an efficient code of code mentioned on top of this post.
The problem with your code is that the file object f is an iterator, and once you call f.readlines() it is exhausted, so finding the index of a line by calling f.readlines() again can't work. Also, calling readlines() at all negates your effort of storing only the interesting parts of the file, as readlines() would read the entire file into memory anyways.
Instead, just memorize whether you've already seen the start-line and add the following lines to the list until you see the end-line.
with open(filename) as f:
started, lines = False, []
for line in f:
stripped = line.strip()
if stripped == "end": break
if started: lines.append(stripped)
if stripped == "start": started = True
Alternatively, you could also use itertools.takewhile to get all the lines up to the end-line.
import itertools
with open(filename) as f:
for line in f:
if line.strip() == "start":
lines = itertools.takewhile(lambda l: l.strip() != "end", f)
lines = map(str.strip, lines)
break
Or even shorter, using another takewhile to read (and discard) the lines before the start-line:
with open("test.txt") as f:
list(itertools.takewhile(lambda l: l.strip() != "start", f))
lines = itertools.takewhile(lambda l: l.strip() != "end", f)
lines = map(str.strip, lines)
In all cases, lines holds the (stripped) lines between the start- and the end-line, both exclusive.
Tobias's first answer can be modified a bit with continue ...
with open(filename) as f:
started, lines = False, []
for line in f:
stripped = line.strip()
if stripped == "end": break
if stripped == "start":
started = True
continue
if not started: continue
# process line here no need to store it in a list ...