Python read and write in same function - python-2.7

My code is currently taking in a csv file and outputting to text file. The piece of code I have below and am having trouble with is from the csv I am searching for a keyword like issues and every row that has that word I want to output that to a text file. Currently, I have it printing to a JSON file but its all on one line like this
"something,something1,something2,something3,something4,something5,something6,something7\r\n""something,something1,something2,something3,something4,something5,something6,something7\r\n"
But i want it to print out like this:
"something,something1,something2,something3,something4,something5,something6,something7"
"something,something1,something2,something3,something4,something5,something6,something7"
Here is the code I have so far:
def search(self, filename):
with open(filename, 'rb') as searchfile, open("weekly_test.txt", 'w') as text_file:
for line in searchfile:
if 'PBI 43125' in line:
#print (line)
json.dump(line, text_file, sort_keys=True, indent = 4)
So again I just need a little guidance on how to get my json file to be formatted the way I want.

Just replace print line with print >>file, line
def search(self, filename):
with open('test.csv', 'r') as searchfile, open('weekly_test.txt', 'w') as search_results_file:
for line in searchfile:
if 'issue' in line:
print >>search_results_file, line
# At this point, both the files will be closed automatically

Related

How to concatenate in Python from .txt?

I have a server.txt file where i have 3 names listed down:
server.txt
CFMPAPP1
CFMPAPP2
CFMPAPP3
i am looking to take these names by calling that server.txt file and want the output.txt file as mentioned below.
output.txt
CI_Name like 'CFMPAPP1%' or
CI_Name like 'CFMPAPP2%' or
CI_Name like 'CFMPAPP3%' or
Any Idea how to do this ?
This can be easily done in three lines:
with open('server.txt', 'r') as file:
s = "".join(file.read())
amended_string = "\n".join([ "CI_Name like '{}%' or".format(a) for a in s.split('\n')])
And then you just need to save amended_string to output.txt. I hope that helps.
This solution only keeps one line at a time in memory:
with open('server.txt', 'r') as infile:
with open('output.txt', 'w') as outfile:
for line in infile:
outfile.write("CI_Name like '{}%' or\n".format(line.rstrip()))

Tag all English words in multiple text files in same directory

I am trying to modify the code to apply to multiple text files in the same directory. The code looks as follows but there is an error "NameError: name 'output' is not defined". Can you help me to suggest improvements to the code?
import re
def replaceenglishwords(filename):
mark_pattern = re.compile("\\*CHI:.*")
word_pattern = re.compile("([A-Za-z]+)")
for line in filename:
# Split into possible words
parts = line.split()
if mark_pattern.match(parts[0]) is None:
output.write()
continue
# Got a CHI line
new_line = line
for word in parts[1:]:
matches = word_pattern.match(word)
if matches:
old = f"\\b{word}\\b"
new = f"{matches.group(1)}#s:eng"
new_line = re.sub(old, new, new_line, count=1)
output.write(new_line)
import glob
for file in glob.glob('*.txt'):
outfile = open(file.replace('.txt', '-out.txt'), 'w', encoding='utf8')
for line in open(file, encoding='utf8'):
print(replaceenglishwords(line), '\n', end='', file=outfile)
outfile.close()
replaceenglishwords needs two parameters, one for the file you are searching and one for the file where you write you results: replaceenglishwords(filename, output). It looks like your function is reading the input file line by line by itself.
Now you can open both files in your loop and pass them to replaceenglishwords:
for file in glob.glob('*.txt'):
textfile = open(file, encoding='utf8')
outfile = open(file.replace('.txt', '-out.txt'), 'w', encoding='utf8')
replaceenglishwords(textfile, outfile)
textfile.close()
outfile.close()

extracting data from tow different files to produce a fasta file

I have two different files one is a fasta file, and the other a txt file produced from a dictionary with json.
file_A looks like this;
> {
"gene_1005 ['gene description_B']":2,
"gene_1009 ['gene description_C']":1,
"gene_104 ['gene description_D']":2,
"gene_1046 ['gene description_A']":1,
}
file_B looks like this:
gene_1005 ['gen description_B'] ATGTGGATCCGCCCGTTGCAGGCGGAACTGAGCGATAACACGCTGGCTTTGTATGCGCCAAACCGTTTTGTGCTCGA
gene_2 ['gene description_C'] ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTGCAACAGGTGAGTGGCCCATTAGGTGGCCGCCCAAC
what I would like to create is a new fasta file only containing those genes that have the value 2 in the file_A. I have tried the code below but I am quite lost. It will print the word[0], that is the name of the gene but it will not print word[1], that should be the number. It sends the error
'out of range'
import json
def readlines():
input_file=open('file_A.txt')
lines=input_file.readlines()
print lines[1]
for line in lines:
words=lines.split(':')
print words[0]
print words[1]
#print line
input_file.close()
readlines()
Could anyone kindly give a hand with this, please?
Thanks
I see people like giving negative without explaining why or giving an suggestion, and that was the suggestion of this post. But as I see that the negative-voter has not bother with a suggestion, I will post the answer to it.
input_file= open('file.fa', 'r')
output_file= open(wanted_genes.fa', 'w')
for line in input_file:
if line[0]=='>':
geneID=line[1:-1]
if geneID in my_dict:
output_file.write(line)
skip=0
else:
skip=1
else:
if not skip:
output_file.write(line)
input_file.close()
output_file.close()

Python OOP .txt file import not working as expected

I'm trying to import the first 6 lines of a .txt file into a collection of objects:
from Header import Header
class HeaderReader(list):
def __init__(self):
super(HeaderReader, self).__init__()
#staticmethod
def headerCreator(filePath):
with open(filePath, 'r') as file:
headerHolder = HeaderReader()
for line in file:
splittedEls = line.split('\n')
if len(splittedEls) != 6:
continue
header = Header(
splittedEls[0],
splittedEls[1],
splittedEls[2],
splittedEls[3],
splittedEls[4],
splittedEls[5]
)
headerHolder.append(header)
return headerHolder
Header is a object with 6 atributes, the first 6 lines of the .txt file is divided by line (\n) after that there is other content, I think the problem is probably in the if len(splittedEls) != 6:but I'm not sure if it is or how to resolve it. Also it might be a error to define a class, give it a __init__ then call a staticmethod. When i pass it on a .txt file it return a empty list. Any ideas?

python readline from big text file

When I run this:
import os.path
import pyproj
srcProj = pyproj.Proj(proj='longlat', ellps='GRS80', datum='NAD83')
dstProj = pyproj.Proj(proj='longlat', ellps='WGS84', datum='WGS84')
f = file(os.path.join("DISTAL-data", "countries.txt"), "r")
heading = f.readline() # Ignore field names.
with open('C:\Python27\DISTAL-data\geonames_20160222\countries.txt', 'r') as f:
for line in f.readlines():
parts = line.rstrip().split("|")
featureName = parts[1]
featureClass = parts[2]
lat = float(parts[9])
long = float(parts[10])
if featureClass == "Populated Place":
long,lat = pyproj.transform(srcProj, dstProj, long, lat)
f.close()
I get this error:
File "C:\Python27\importing world datacountriesfromNAD83 toWGS84.py",
line 13, in for line in f.readlines() : MemoryError.
I have downloaded countries file from http://geonames.nga.mil/gns/html/namefiles.html as entire country file dataset.
Please help me to get out of this.
readlines() for large files creates a large structure in memory, you can try using:
f = open('somefilename','r')
for line in f:
dosomthing()
Answer given by Yael is helpful, I would like to improve it. A Good way to read a file or large file
with open(filename) as f:
for line in f:
print f
I like to use 'with' statement which ensure file will be properly closed.