Serving Zip file Django - django

I'm following this solution (Serving dynamically generated ZIP archives in Django) to serve some zip files from django.
The idea is to select the files from a database using some check boxes, but I'm trying to make the example work with just 2 images.
import os
import zipfile
import StringIO
from django.http import HttpResponse
def getfiles(request):
# Files (local path) to put in the .zip
# FIXME: Change this (get paths from DB etc)
filenames = ["/home/../image1.png", "/home/../image2.png"]
# Folder name in ZIP archive which contains the above files
# E.g [thearchive.zip]/somefiles/file2.txt
# FIXME: Set this to something better
zip_subdir = "somefiles"
zip_filename = "%s.zip" % zip_subdir
# Open StringIO to grab in-memory ZIP contents
s = StringIO.StringIO()
# The zip compressor
zf = zipfile.ZipFile(s, "w")
for fpath in filenames:
# Calculate path for file in zip
fdir, fname = os.path.split(fpath)
zip_path = os.path.join(zip_subdir, fname)
# Add file, at correct path
zf.write(fpath, zip_path)
# Must close zip for all contents to be written
zf.close()
# Grab ZIP file from in-memory, make response with correct MIME-type
resp = HttpResponse(s.getvalue(), mimetype = "application/x-zip-compressed")
# ..and correct content-disposition
resp['Content-Disposition'] = 'attachment; filename=%s' % zip_filename
return resp
I wrote the getfile(request) on my views.py and i make a call from the index view
def index(request):
if request.method == 'POST': # If the form has been submitted...
resp = getfiles(request)
form = FilterForm(request.POST) # A form bound to the POST data
# do some validation and get latest_events from database
context = {'latest_events_list': latest_events_list, 'form': form}
return render(request, 'db_interface/index.html', context)
I know the getfile() method is called, because if I put names of unexistents files I got an error, but I dont get any download neither an error if the filenames are correct (I put the full path /home/myuser/xxx/yyy/Project/app/static/app/image1.png).
I tried with the django server and with the apache2/nginx server I have for production
I also tried using content_type = 'application/force-download'
Thanks

Related

Read uploaded fasta file in django using Bio library

in index.html I used
<input type="file" name="upload_file">
in views.py
from Bio import SeqIO
def index(request):
if request.method == "POST":
try:
text_file = request.FILES['upload_file']
list_1, list_2 = sequence_extract_fasta(text_file)
context = {'files': text_file}
return render(request, 'new.html', context)
except:
text_file = ''
context = {'files': text_file}
return render(request, 'index.html')
def sequence_extract_fasta(fasta_files):
# Defining empty list for the Fasta id and fasta sequence variables
fasta_id = []
fasta_seq = []
# opening a given fasta file using the file path
with open(fasta_files, 'r') as fasta_file:
print("pass")
# extracting multiple data in single fasta file using biopython
for record in SeqIO.parse(fasta_file, 'fasta'): # (file handle, file format)
print(record.seq)
# appending extracted fasta data to empty lists variables
fasta_seq.append(record.seq)
fasta_id.append(record.id)
# returning fasta_id and fasta sequence to both call_compare_fasta and call_reference_fasta
return fasta_id, fasta_seq
The method sequence_extract_fasta(fasta_files) work with python. But not on the Django framework. If I can find the temporary location of the uploaded file then using the path, I may be able to call the method. Is there any efficient way to solve this? your help is highly appreciated. Thank you for your time.
I found a one way of doing this.
def sequence_extract_fasta(fasta_file):
# Defining empty list for the Fasta id and fasta sequence variables
fasta_id = []
fasta_seq = []
# fasta_file = fasta_file.chunks()
print(fasta_file)
# opening given fasta file using the file path
# crating a backup file with original uploaded file data
with open('data/temp/name.bak', 'wb+') as destination:
for chunk in fasta_file.chunks():
destination.write(chunk)
# opening created backup file and reading
with open('data/temp/name.bak', 'r') as fasta_file:
# extracting multiple data in single fasta file using biopython
for record in SeqIO.parse(fasta_file, 'fasta'): # (file handle, file format)
fasta_seq.append(record.seq)
fasta_id.append(record.id)
# returning fasta_id and fasta sequence to both call_compare_fasta and call_reference_fasta
return fasta_id, fasta_seq

how to load csv file data into pandas using request.FILES(django 1.11) without saving file on server

i just want to upload .csv file via form, directly in to pandas dataframe in django without saving physically file on to server.
def post(self, request, format=None):
try:
from io import StringIO, BytesIO
import io
print("data===",request.FILES['file'].read().decode("utf-8"))
# print("file upload FILES data=====",pd.read_csv(request.FILES['file'].read(), sep=','))
#print(request.FILES)
print("file upload data df=====11")
mm = pd.read_csv( BytesIO(request.FILES['file'].read().decode("utf-8")))
print("dataframe data=====",mm)
# import io, csv
# urlData = request.FILES['file']
# data = [row for row in (csv.reader(urlData))]
# print("file upload data df=====222",data)
# mm = pd.read_csv()
#excel_file = request.FILES['file']
# movies = pd.read_excel(request.FILES['file'])
except Exception as e:
print(e)
log.debug("Error in CheckThreadStatus api key required "+str(e))
return Response(responsejson('api key required', status=404))
the ans is straight forward: that is
pd.read_csv(request.FILES['file'])
works perfectly fine, the mistake i was doing is that.. my csv file was not in correct format.
Check With
pd.read_csv('data.csv') # doctest: +SKIP
If using post method you can try
getFile = request.FILE['file_name']
pd.read_csv(getFile) # doctest: +SKIP
You can use StringIO for reading and decoding your csv :
import csv
from io import StringIO
csv_file = request.FILES["csv_file"]
content = StringIO(csv_file.read().decode('utf-8'))
reader = csv.reader(content)
After reading you can populate your database like this :
csv_rows = [row for row in reader]
field_names = csv_rows[0] # Get the header row
del csv_rows[0] # Deleting header after storing it's values in field_names
for index, row in enumerate(csv_rows):
data_dict = dict(zip(field_names, row))
Model.objects.update_or_create(id=row[0],
defaults=data_dict
)
Make sure to validate data before inserting, if the data is critical.
HINT: use django forms to validate for you.
from django import forms

Create download link file in django

I created a file in project, generation pdf from html. For this i have this method:
def generation_html_to_pdf(self):
path_pdf = None
with NamedTemporaryFile(delete=False, suffix=".pdf", dir='pdf_files') as tf:
path_pdf = tf.name
pdfkit.from_file('templates/first_page.html', tf.name)
return path_pdf
Then, in pdf_files folder i have the pdf file. I want to get a download link for this file:
my view
path_to_pdf = generation_html_to_pdf()
download_link = 'http://' + request.get_host() + path_to_pdf
json_inf_pdf = {'download_link': download_link}
return JsonResponse(json_inf_pdf, status=200)
i have json like this:
{"download_link": "http://127.0.0.1:8000/home/alex/projects/test_project/pdf_files/tmpe0nqbn01.pdf"}"
when i click in this link i have error:
Page not found (404)
You need to create download view and url. Function like this to create link:
def download_link(request):
''' Create download link '''
download_link = 'http://{}/{}'.format(request.get_host(), 'download/my_filename')
json_inf_pdf = {'download_link': download_link}
return JsonResponse(json_inf_pdf, status=200)
and to download pdf:
def download_file(request, my_filename):
''' Download file '''
# Open template
from django.conf import settings
template_url = os.path.join(settings.BASE_DIR, 'templates', 'first_page.html')
template_open = open(template_url, 'r')
# Read template
from django import template
t = template.Template(template_open.read())
c = template.Context({})
# Create pdf
pdf = pdfkit.from_string(t.render(c))
# Create and return response with created pdf
response = HttpResponse(pdf)
response['Content-Type'] = 'application/pdf'
response['Content-disposition'] = 'attachment ; filename = {}'.format(my_filename)
return response
and url:
path('/download/<str:my_filename>', views.download_file, name="download_pdf')
I can't guarantee that this will work in your case without modification, since I can't tell which html-to-pdf library you're using and without seeing your other code. It's just a basic implementation idea.

How to fix a connection to https NSIDC/NASA website?

I have been working in a python code to search and download SMAP satellite data from NSIDC https website. My code was working until last week when start a bug:
urllib2.HTTPError: HTTP Error 404: Not Found
Any help?
The code Is a adaptation from a NSIDC website proposed to do exactly what I need. The example below:
"""This script, NSIDC_parse_HTML_BatchDL.py, defines an HTML parser to scrape data files from an earthdata HTTPS URL and bulk downloads all files to your working directory.
This code was adapted from https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python
Last edited Jan 26, 2017 G. Deemer"""
import urllib2
import os
from cookielib import CookieJar
from HTMLParser import HTMLParser
# Define a custom HTML parser to scrape the contents of the HTML data table
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.inLink = False
self.dataList = []
self.directory = '/'
self.indexcol = ';'
self.Counter = 0
def handle_starttag(self, tag, attrs):
self.inLink = False
if tag == 'table':
self.Counter += 1
if tag == 'a':
for name, value in attrs:
if name == 'href':
if self.directory in value or self.indexcol in value:
break
else:
self.inLink = True
self.lasttag = tag
def handle_endtag(self, tag):
if tag == 'table':
self.Counter +=1
def handle_data(self, data):
if self.Counter == 1:
if self.lasttag == 'a' and self.inLink and data.strip():
self.dataList.append(data)
parser = MyHTMLParser()
# Define function for batch downloading
def BatchJob(Files, cookie_jar):
for dat in Files:
print "downloading: ", dat
JobRequest = urllib2.Request(url+dat)
JobRequest.add_header('cookie', cookie_jar) # Pass the saved cookie into additional HTTP request
JobRedirect_url = urllib2.urlopen(JobRequest).geturl() + '&app_type=401'
# Request the resource at the modified redirect url
Request = urllib2.Request(JobRedirect_url)
Response = urllib2.urlopen(Request)
f = open( dat, 'wb')
f.write(Response.read())
f.close()
Response.close()
print "Files downloaded to: ", os.path.dirname(os.path.realpath(__file__))
#===========================================================================
# The following code block is used for HTTPS authentication
#===========================================================================
# The user credentials that will be used to authenticate access to the data
username = "user"
password = "password"
# The FULL url of the directory which contains the files you would like to bulk download
url = "https://n5eil01u.ecs.nsidc.org/SMAP/SPL4SMGP.003/2017.10.14/" # Example URL
# Create a password manager to deal with the 401 reponse that is returned from
# Earthdata Login
password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
password_manager.add_password(None, "https://urs.earthdata.nasa.gov", username, password)
# Create a cookie jar for storing cookies. This is used to store and return
# the session cookie given to use by the data server (otherwise it will just
# keep sending us back to Earthdata Login to authenticate). Ideally, we
# should use a file based cookie jar to preserve cookies between runs. This
# will make it much more efficient.
cookie_jar = CookieJar()
# Install all the handlers.
opener = urllib2.build_opener(
urllib2.HTTPBasicAuthHandler(password_manager),
#urllib2.HTTPHandler(debuglevel=1), # Uncomment these two lines to see
#urllib2.HTTPSHandler(debuglevel=1), # details of the requests/responses
urllib2.HTTPCookieProcessor(cookie_jar))
urllib2.install_opener(opener)
# Create and submit the requests. There are a wide range of exceptions that
# can be thrown here, including HTTPError and URLError. These should be
# caught and handled.
#===========================================================================
# Open a requeset to grab filenames within a directory. Print optional
#===========================================================================
DirRequest = urllib2.Request(url)
DirResponse = urllib2.urlopen(DirRequest)
# Get the redirect url and append 'app_type=401'
# to do basic http auth
DirRedirect_url = DirResponse.geturl()
DirRedirect_url += '&app_type=401'
# Request the resource at the modified redirect url
DirRequest = urllib2.Request(DirRedirect_url)
DirResponse = urllib2.urlopen(DirRequest)
DirBody = DirResponse.read(DirResponse)
# Uses the HTML parser defined above to pring the content of the directory containing data
parser.feed(DirBody)
Files = parser.dataList
# Display the contents of the python list declared in the HTMLParser class
# print Files #Uncomment to print a list of the files
#=========================================================================
# Call the function to download all files in url
#=========================================================================
BatchJob(Files, cookie_jar) # Comment out to prevent downloading to your working directory
I could fix the bug using a directly load of the website and selecting the images to download. As the code above.
"""This script, NSIDC_parse_HTML_BatchDL.py, defines an HTML parser to scrape data files from an earthdata HTTPS URL and bulk downloads all files to your working directory.
This code was adapted from https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python Last edited Jan 26, 2017 G. Deemer"""
import urllib2
import os
from cookielib import CookieJar
# Define function for batch downloading
def BatchJob(Files, cookie_jar):
for dat in Files:
print "downloading: ", dat
JobRequest = urllib2.Request(url+dat)
JobRequest.add_header('cookie', cookie_jar) # Pass the saved cookie into additional HTTP request
JobRedirect_url = urllib2.urlopen(JobRequest).geturl() + '&app_type=401'
# Request the resource at the modified redirect url
Request = urllib2.Request(JobRedirect_url)
Response = urllib2.urlopen(Request)
f = open( dat, 'wb')
f.write(Response.read())
f.close()
Response.close()
print "Files downloaded to: ", os.path.dirname(os.path.realpath(__file__))
#==========================================================================
# The following code block is used for HTTPS authentication
#==========================================================================
# The user credentials that will be used to authenticate access to the data
username = "user"
password = "password"
# The FULL url of the directory which contains the files you would like to bulk download
url = "https://n5eil01u.ecs.nsidc.org/SMAP/SPL4SMGP.003/2017.10.14/" # Example URL
# Create a password manager to deal with the 401 reponse that is returned from # Earthdata Login
password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
password_manager.add_password(None,
"https://urs.earthdata.nasa.gov",
username, password)
# Create a cookie jar for storing cookies. This is used to store and return
# the session cookie given to use by the data server (otherwise it will just
# keep sending us back to Earthdata Login to authenticate). Ideally, we
# should use a file based cookie jar to preserve cookies between runs. This
# will make it much more efficient.
cookie_jar = CookieJar()
# Install all the handlers.
opener = urllib2.build_opener(
urllib2.HTTPBasicAuthHandler(password_manager),
#urllib2.HTTPHandler(debuglevel=1), # Uncomment these two lines to see
#urllib2.HTTPSHandler(debuglevel=1), # details of the requests/responses
urllib2.HTTPCookieProcessor(cookie_jar))
urllib2.install_opener(opener)
# Create and submit the requests. There are a wide range of exceptions that
# can be thrown here, including HTTPError and URLError. These should be
# caught and handled.
#===========================================================================
# Open a requeset to grab filenames within a directory. Print optional
#===========================================================================
DirResponse = urllib2.urlopen(url)
htmlPage = DirResponse.read()
listFiles = [x.split(">")[0].replace('"', "")
for x in htmlPage.split("><a href=") if x.split(">")[0].endswith('.h5"') == True]
# Display the contents of the python list declared in the HTMLParser class
# print Files #Uncomment to print a list of the files
#=========================================================================
# Call the function to download all files in url
#=========================================================================
BatchJob(Files, cookie_jar) # Comment out to prevent downloading to your working directory

django RequestFactory file upload

I try to create a request, using RequestFactory and post with file, but I don't get request.FILES.
from django.test.client import RequestFactory
from django.core.files import temp as tempfile
tdir = tempfile.gettempdir()
file = tempfile.NamedTemporaryFile(suffix=".file", dir=tdir)
file.write(b'a' * (2 ** 24))
file.seek(0)
post_data = {'file': file}
request = self.factory.post('/', post_data)
print request.FILES # get an empty request.FILES : <MultiValueDict: {}>
How can I get request.FILES with my file ?
If you open the file first and then assign request.FILES to the open file object you can access your file.
request = self.factory.post('/')
with open(file, 'r') as f:
request.FILES['file'] = f
request.FILES['file'].read()
Now you can access request.FILES like you normally would. Remember that when you leave the open block request.FILES will be a closed file object.
I made a few tweaks to #Einstein 's answer to get it to work for a test that saves the uploaded file in S3:
request = request_factory.post('/')
with open('my_absolute_file_path', 'rb') as f:
request.FILES['my_file_upload_form_field'] = f
request.FILES['my_file_upload_form_field'].read()
f.seek(0)
...
Without opening the file as 'rb' I was getting some unusual encoding errors with the file data
Without f.seek(0) the file that I uploaded to S3 was zero bytes
You need to provide proper content type, proper file object before updating your FILES.
from django.core.files.uploadedfile import File
# Let django know we are uploading files by stating content type
content_type = "multipart/form-data; boundary=------------------------1493314174182091246926147632"
request = self.factory.post('/', content_type=content_type)
# Create file object that contain both `size` and `name` attributes
my_file = File(open("/path/to/file", "rb"))
# Update FILES dictionary to include our new file
request.FILES.update({"field_name": my_file})
the boundary=------------------------1493314174182091246926147632 is part of the multipart form type. I copied it from a POST request done by my webbrowser.
All the previous answers didn't work for me. This seems to be an alternative solution:
from django.core.files.uploadedfile import SimpleUploadedFile
with open(file, "rb") as f:
file_upload = SimpleUploadedFile("file", f.read(), content_type="text/html")
data = {
"file" : file_upload
}
request = request_factory.post("/api/whatever", data=data, format='multipart')
Be sure that 'file' is really the name of your file input field in your form.
I got that error when it was not (use name, not id_name)