How to convert content of InMemoryUploadedFile to string - django

Does anyone know how to convert content of uploaded file (InMemoryUploadedFile) in Django2 to string?
I want to know how to write the following convert2string():
uploaded_file = request.FILES['file']
my_xml = convert2string(uploaded_file) # TODO write method(convert to xml string)
obj = MyObject()
parser = MyContentHandler(obj)
xml.sax.parseString(my_xml, parser) # or xml.sax.parse(convertType(uploaded_file), parser)

Try str(uploaded_file.read()) to convert InMemoryUploadedFile to str
uploaded_file = request.FILES['file']
print(type(uploaded_file)) # <class 'django.core.files.uploadedfile.InMemoryUploadedFile'>
print(type(uploaded_file.read())) # <class 'bytes'>
print(type(str(uploaded_file.read()))) # <class 'str'>
UPDATE-1
Assuming you are uploading a text file (.txt,.json etc) as below,
my text line 1
my text line 2
my text line 3
then your view be like,
def my_view(request):
uploaded_file = request.FILES['file']
str_text = ''
for line in uploaded_file:
str_text = str_text + line.decode() # "str_text" will be of `str` type
# do something
return something

file_in_memory -> InMemoryUploadedFile
file_in_memory.read().decode() -> txt output

If you want to read file data as a list, then you can try out the following code:
uploaded_file = request.FILES['file']
file_data = uploaded_file.read().decode().splitlines() # get each line data as list item

Related

Read uploaded fasta file in django using Bio library

in index.html I used
<input type="file" name="upload_file">
in views.py
from Bio import SeqIO
def index(request):
if request.method == "POST":
try:
text_file = request.FILES['upload_file']
list_1, list_2 = sequence_extract_fasta(text_file)
context = {'files': text_file}
return render(request, 'new.html', context)
except:
text_file = ''
context = {'files': text_file}
return render(request, 'index.html')
def sequence_extract_fasta(fasta_files):
# Defining empty list for the Fasta id and fasta sequence variables
fasta_id = []
fasta_seq = []
# opening a given fasta file using the file path
with open(fasta_files, 'r') as fasta_file:
print("pass")
# extracting multiple data in single fasta file using biopython
for record in SeqIO.parse(fasta_file, 'fasta'): # (file handle, file format)
print(record.seq)
# appending extracted fasta data to empty lists variables
fasta_seq.append(record.seq)
fasta_id.append(record.id)
# returning fasta_id and fasta sequence to both call_compare_fasta and call_reference_fasta
return fasta_id, fasta_seq
The method sequence_extract_fasta(fasta_files) work with python. But not on the Django framework. If I can find the temporary location of the uploaded file then using the path, I may be able to call the method. Is there any efficient way to solve this? your help is highly appreciated. Thank you for your time.
I found a one way of doing this.
def sequence_extract_fasta(fasta_file):
# Defining empty list for the Fasta id and fasta sequence variables
fasta_id = []
fasta_seq = []
# fasta_file = fasta_file.chunks()
print(fasta_file)
# opening given fasta file using the file path
# crating a backup file with original uploaded file data
with open('data/temp/name.bak', 'wb+') as destination:
for chunk in fasta_file.chunks():
destination.write(chunk)
# opening created backup file and reading
with open('data/temp/name.bak', 'r') as fasta_file:
# extracting multiple data in single fasta file using biopython
for record in SeqIO.parse(fasta_file, 'fasta'): # (file handle, file format)
fasta_seq.append(record.seq)
fasta_id.append(record.id)
# returning fasta_id and fasta sequence to both call_compare_fasta and call_reference_fasta
return fasta_id, fasta_seq

Django : can't interrupt update function with redirect. Is it possible?

I use a function for updating a Model.
def update_mapping(request, pk):
flow = Flow.objects.get(pk=pk)
mappings = MappingField.objects.filter(fl_id=pk)
headers_samples = GetCsvHeadersAndSamples(request, pk)
[...]
In this function, I call another one (GetCsvHeadersAndSamples) for getting datas from a CSV. Later, I use those datas with JS in the template.
def GetCsvHeadersAndSamples(request, flow_id):
get_file_and_attribs = get_csv(request, flow_id)
file = get_file_and_attribs[0]
separator = get_file_and_attribs[1]
encoding = get_file_and_attribs[2]
with open(file, newline='') as f:
reader = csv.reader(f, delimiter=separator,
encoding=encoding)
headers = next(reader)
samples = next(itertools.islice(csv.reader(f), 1, None))
headersAndSamples = {'headers': headers, 'samples': samples}
return headersAndSamples
For accessing CSV datas, I use another function for checking if the CSV still exists, in which case, I retrieve datas in it.
def get_csv(request, flow_id):
flow = Flow.objects.get(pk=flow_id)
file = flow.fl_file_name
separator = flow.fl_separator
media_folder = settings.MEDIA_ROOT
file = os.path.join(media_folder, str(file))
if os.path.isfile(file):
file_2_test = urllib.request.urlopen('file://' + file).read()
encoding = (chardet.detect(file_2_test))['encoding']
return (file, separator, encoding)
else:
# print('No file')
messages.error(request, "File not found or corrupted.")
return HttpResponseRedirect(reverse('mappings-list', args=(flow_id,)))
I hoped that the return would "break" my original function and would redirect to the 'mappings-list' page with the message.error. But it continues and returns to GetCsvHeadersAndSamples function that generates an error because CSV datas were not found. Nota: the commented print however shows well that the file is not found.
It seems that the way I'm doing things is not the good one.

How to get mimetype of a field binary in Odoo10?

I created a binary field called datas and uploaded a file. I need to get the mimetype of the data.
i tried this.
*.py
attachment_icon = fields.Char(string="Icon", compute="_get_icon")
#api.one
def _get_icon(self):
file = None
for rec in self:
print('data',type(rec.datas)) //it print type<str>
binary_data = rec.datas
print('binary_data',binary_data)
mimetype = guess_mimetype(binary_data.encode('base 64'))
print('mimetypemimetype',mimetype)// print 'text/plain'
Now the output of mimetype is text/plain, actually the uploaded file was pdf.
How can i get correct mimetype?
Hello #KbiR
Python's magic function will get the mimetype:
import magic
mime = magic.Magic(mime=True)
mime.from_file("youtPath/fileName.pdf") # 'application/pdf'

python html parser doesnot return results?

I am new to python, I have a folder of downloaded html files, from which I need to extract the text data and output it in the same folder as text file, below code works fine with individual files, however when i am trying to pass multiple files it doesn't work. Kindly suggest a solution, i will be extremely thankful. Its not even giving me any error, so I could work on it and figure out some solution.
from HTMLParser import HTMLParser
from re import sub
from sys import stderr
from traceback import print_exc
import glob
import os
class _DeHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.__text = []
def handle_data(self, data):
text = data.strip()
if len(text) > 0:
text = sub('[ \t\r\n]+', ' ', text)
self.__text.append(text + ' ')
def handle_starttag(self, tag, attrs):
if tag == 'p':
self.__text.append('\n\n')
elif tag == 'br':
self.__text.append('\n')
def handle_startendtag(self, tag, attrs):
if tag == 'br':
self.__text.append('\n\n')
def text(self):
return ''.join(self.__text).strip()
def dehtml(text):
try:
parser = _DeHTMLParser()
parser.feed(text)
parser.close()
return parser.text()
except:
print_exc(file=stderr)
return text
def main():
dir_path = r"/home/maitreyee/Downloads/SchoolCollege.com/multiple_states/"
results_dir = r"/home/maitreyee/Downloads/SchoolCollege.com/"
for file_name in glob.glob(os.path.join(dir_path, "*.html")):
text = open(file_name, "r")
results_file = os.path.splitext(file_name)[0] + '.txt'
with open(results_file, 'w') as outfile:
i = dehtml(text)
print(i)
outfile.write(i + '\n')
if __name__ == '__main__':
main()
I struggled a lot and then tried something simpler, for the above code we could just modify the main() function by the following code and then this would return .txt files for all the html files, we need to pass just the folder location.
def main():
dir_path = r"/home/maitreyee/Downloads/SchoolCollege.com/rajasthan_data/"
results_dir = r"/home/maitreyee/Downloads/SchoolCollege.com/rajasthan_data/"
for file_name in glob.glob(os.path.join(dir_path, "*.html")):
f = open(file_name)
text = f.read()
results_file = os.path.splitext(file_name)[0] + '.txt'
with open(results_file, "w") as fp:
fp.write(dehtml(text))
fp.close()
Where the directory paths are given then put in the directory path to your html file's folder. It was really helpful for me because I had to convert hundreds of html files, and I needed all the text from them, this gave me results in seconds.

django RequestFactory file upload

I try to create a request, using RequestFactory and post with file, but I don't get request.FILES.
from django.test.client import RequestFactory
from django.core.files import temp as tempfile
tdir = tempfile.gettempdir()
file = tempfile.NamedTemporaryFile(suffix=".file", dir=tdir)
file.write(b'a' * (2 ** 24))
file.seek(0)
post_data = {'file': file}
request = self.factory.post('/', post_data)
print request.FILES # get an empty request.FILES : <MultiValueDict: {}>
How can I get request.FILES with my file ?
If you open the file first and then assign request.FILES to the open file object you can access your file.
request = self.factory.post('/')
with open(file, 'r') as f:
request.FILES['file'] = f
request.FILES['file'].read()
Now you can access request.FILES like you normally would. Remember that when you leave the open block request.FILES will be a closed file object.
I made a few tweaks to #Einstein 's answer to get it to work for a test that saves the uploaded file in S3:
request = request_factory.post('/')
with open('my_absolute_file_path', 'rb') as f:
request.FILES['my_file_upload_form_field'] = f
request.FILES['my_file_upload_form_field'].read()
f.seek(0)
...
Without opening the file as 'rb' I was getting some unusual encoding errors with the file data
Without f.seek(0) the file that I uploaded to S3 was zero bytes
You need to provide proper content type, proper file object before updating your FILES.
from django.core.files.uploadedfile import File
# Let django know we are uploading files by stating content type
content_type = "multipart/form-data; boundary=------------------------1493314174182091246926147632"
request = self.factory.post('/', content_type=content_type)
# Create file object that contain both `size` and `name` attributes
my_file = File(open("/path/to/file", "rb"))
# Update FILES dictionary to include our new file
request.FILES.update({"field_name": my_file})
the boundary=------------------------1493314174182091246926147632 is part of the multipart form type. I copied it from a POST request done by my webbrowser.
All the previous answers didn't work for me. This seems to be an alternative solution:
from django.core.files.uploadedfile import SimpleUploadedFile
with open(file, "rb") as f:
file_upload = SimpleUploadedFile("file", f.read(), content_type="text/html")
data = {
"file" : file_upload
}
request = request_factory.post("/api/whatever", data=data, format='multipart')
Be sure that 'file' is really the name of your file input field in your form.
I got that error when it was not (use name, not id_name)