Django file upload progress get cache returns None - django

Django file upload progress process json request getting null response
view.py
def upload_progress(request):
"""
A view to report back on upload progress.
Return JSON object with information about the progress of an upload.
Copied from:
http://djangosnippets.org/snippets/678/
See upload.py for file upload handler.
"""
#import ipdb
#ipdb.set_trace()
progress_id = ''
if 'X-Progress-ID' in request.GET:
progress_id = request.GET['X-Progress-ID']
elif 'X-Progress-ID' in request.META:
progress_id = request.META['X-Progress-ID']
if progress_id:
from django.utils import simplejson
cache_key = "%s_%s" % (request.META['REMOTE_ADDR'], progress_id)
data = cache.get(cache_key)
return HttpResponse(simplejson.dumps(data))
UploadProgressCachedHandler.py
from django.core.files.uploadhandler import FileUploadHandler
from django.core.cache import cache
class UploadProgressCachedHandler(FileUploadHandler):
"""
Tracks progress for file uploads.
The http post request must contain a header or query parameter, 'X-Progress-ID'
which should contain a unique string to identify the upload to be tracked.
Copied from:
http://djangosnippets.org/snippets/678/
See views.py for upload_progress function...
"""
def __init__(self, request=None):
super(UploadProgressCachedHandler, self).__init__(request)
self.progress_id = None
self.cache_key = None
def handle_raw_input(self, input_data, META, content_length, boundary, encoding=None):
self.content_length = content_length
if 'X-Progress-ID' in self.request.GET :
self.progress_id = self.request.GET['X-Progress-ID']
elif 'X-Progress-ID' in self.request.META:
self.progress_id = self.request.META['X-Progress-ID']
if self.progress_id:
self.cache_key = "%s_%s" % (self.request.META['REMOTE_ADDR'], self.progress_id )
cache.set(self.cache_key, {
'length': self.content_length,
'uploaded' : 0
})
def new_file(self, field_name, file_name, content_type, content_length, charset=None):
pass
def receive_data_chunk(self, raw_data, start):
if self.cache_key:
data = cache.get(self.cache_key)
data['uploaded'] += self.chunk_size
cache.set(self.cache_key, data)
#cache.set(self.cache_key, 5000)
return raw_data
def file_complete(self, file_size):
pass
def upload_complete(self):
if self.cache_key:
cache.delete(self.cache_key)
Iam setting cache with uploadProgressCacheHandler. But when tries to retrieve via json request .The data returning None object.'cache_key' is generating correctly.
Please help.

I guess you are trying to make the JSON request when the flow already has reached upload_complete. At that point cache_key has been deleted from cache (that's why it is empty).
Are you using the Django development server? I think this built-in server only allows to handle one request at time, which in your situation means that first the upload is completed and following the JSON request is processed. You can try with another server (ex: Apache) able to handle multiple requests.

Related

Django unit test - patch multiple requests for external api

I need to write 'one' test code with external api, which requires requests 'twice'.
first, I need to check if user is valid.
So I handled this with decorator in ./users/utils.py
import requests
def login_decorator(func):
def wrapper(self, request, *args, **kwargs):
# this access token is issued by external api
access_token = request.headers.get('Authorization', None)
# first requests, which gives me an info about user.
response = requests.get(
'https://kapi.kakao.com/v2/user/me',
headers={'Authorization':f'Bearer {access_token}'}
)
user_email = response.json()['kakao_account']['email']
request.user = User.objects.get(email=user_email)
return func(self, request, *args, **kwargs)
return wrapper
and then, I need to send that user a message with the external api again.
this code is in ./bids/views.py
import requests
class BiddingView(View):
#it uses login_decorator above
#login_decorator
def post(self, request, art_id):
try:
user = request.user
data = json.loads(request.body)
with transaction.atomic():
#handle things with bidding system#
#the external api requires its token to use a message api.
token = request.headers.get('Authorization', None)
#second requests, with post method
response = requests.post(
'https://kapi.kakao.com/v2/api/talk/memo/default/send',
headers = {'Authorization' : f'Bearer {token}'},
data = {"template_object" : json.dumps({'message':'contents'})}
)
return JsonResponse({'MESSAGE' : 'SUCCESS'}, status=200)
except KeyError:
return JsonResponse({'MESSAGE' : 'KEY ERROR'}, status=400)
This is my unit test code about BiddingView so far, which obviously only works for decorator
#patch('users.utils.requests')
def test_kakao_message_success(self, mock_requests):
class MockResponse:
def json(self):
return {'kakao_account' : {'email' : 'test#test.com'}}
mock_requests.get = MagicMock(return_value=MockResponse())
header = {'HTTP_Authorization' : 'ACCESS_TOKEN'}
body = {'offered_price' : 10000}
response = client.post(
'/bidding/1',
json.dumps(body),
content_type='application/json', **header
)
but I need to patch both .users.utils.requests and .bids.views.requests for my mock test.
#patch('users.utils.requests') # + #patch('bids.views.requests')
def test_kakao_message_success(self, mock_requests):
I want to know how to patch two requests at the same time.
I simplified your source code for ease of testing so that we can concentrate on the problem which are the external requests.
./utils.py
import requests
def login_decorator(func):
def wrapper(self, request, *args, **kwargs):
# first requests, which gives me an info about user.
response = requests.get(
'https://kapi.kakao.com/v2/user/me',
headers={'Authorization': 'Bearer access_token'}
)
request.user = response.json()['kakao_account']['email']
return func(self, request, *args, **kwargs)
return wrapper
./views.py
import json
import requests
from utils import login_decorator
class BiddingView:
#it uses login_decorator above
#login_decorator
def post(self, request, art_id):
print(f"The current user is {request.user}")
#second requests, with post method
response = requests.post(
'https://kapi.kakao.com/v2/api/talk/memo/default/send',
headers = {'Authorization' : 'Bearer token'},
data = {"template_object" : json.dumps({'message':'contents'})}
)
return response.text
Solution 1 - Manual patching of requests for each source file
You can stack the unittest.mock.patch decorator, one after the other.
from unittest.mock import MagicMock, patch
from views import BiddingView
class MockLoginResponse:
def json(self):
return {'kakao_account' : {'email' : 'test#test.com'}}
class MockViewResponse:
text = "He alone, who owns the youth, gains the future."
#patch('utils.requests.get', MagicMock(return_value=MockLoginResponse()))
#patch('views.requests.post', MagicMock(return_value=MockViewResponse()))
def test_kakao_message_success():
response = BiddingView().post(
request=MagicMock(),
art_id="some art"
)
print(f"Response: {response}")
Output:
__________________________________________________________________________________ test_kakao_message_success ___________________________________________________________________________________
------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------
The current user is test#test.com
Response: He alone, who owns the youth, gains the future.
Solution 2.1 - Instead of patching requests per file, patch the exact target request
This requires you to install library https://pypi.org/project/requests-mock/
from unittest.mock import MagicMock
import requests_mock
from views import BiddingView
def test_kakao_message_success_with_library():
with requests_mock.Mocker() as requests_mocker:
# Mock the external requests
requests_mocker.get(
"https://kapi.kakao.com/v2/user/me",
json={'kakao_account' : {'email' : 'test#test.com'}},
)
requests_mocker.post(
"https://kapi.kakao.com/v2/api/talk/memo/default/send",
text="He alone, who owns the youth, gains the future.",
)
response = BiddingView().post(
request=MagicMock(),
art_id="some art"
)
print(f"Response: {response}")
Output:
Same as above
Solution 2.2 - Instead of patching requests per file, patch the exact target request. But now, apply it automatically to any test using pytest's autouse feature.
from unittest.mock import MagicMock
import pytest
import requests_mock
from views import BiddingView
#pytest.fixture(autouse=True)
def setup_external_requests():
with requests_mock.Mocker() as requests_mocker:
# Mock the external requests
requests_mocker.get(
"https://kapi.kakao.com/v2/user/me",
json={'kakao_account' : {'email' : 'test#test.com'}},
)
requests_mocker.post(
"https://kapi.kakao.com/v2/api/talk/memo/default/send",
text="He alone, who owns the youth, gains the future.",
)
# It is required to perform a yield instead of return to not teardown the requests_mocker
yield requests_mocker
def test_kakao_message_success_with_library_2():
response = BiddingView().post(
request=MagicMock(),
art_id="some art"
)
print(f"Response: {response}")
Output:
Same as above

How to pass data saved from a POST method to the GET method using REST API Django (without a model)?

I have created an API that allows me to upload an image using the POST method in POSTMAN. After submission, I want to display that image name after making a GET request. I am not using any model and I don't intend to grab the image from the directory it is stored in; since I will be uploading images in a server later.
I have looked at multiple sources. A few examples are this, and this.
This is my current code so far but not successful:
views.py:
class API(APIView):
parser_classes = (MultiPartParser,)
def get(self, request, *args, **kwargs):
name = self.request.GET.get('image')
if name:
return Response({"img_name": name}, status=200)
return Response({"img_name" : None}, status = 400)
def post(self, request):
file = self.request.data
img_file = file['image'] #store the image data in this variable
if img_file:
uploaded_file = img_file
img = [{"image_name": uploaded_file}]
serializer = ImgSerializer(img, many = True).data
return Response(serializer, status = 200)
else:
return Response("Please upload", status = 400)
serializers.py:
from rest_framework import serializers
class ImgSerializer(serializers.Serializer):
image_name = serializers.CharField()
My expected result within GET request should be like this:
{'image_name' : 'image_name_from_POST_Request'}
But I am getting this result instead:
None
How can I pass data from the POST request to the GET request using Django's rest framework? Is there an efficient way to deploy this requirement without using a model?
I figured it out. I just created a JSON file in the POST method and stored the necessary data in it. Finally, in order to view the data within the GET method, I opened the file and returned it as a Response.
views.py:
class API(APIView):
parser_classes = (MultiPartParser,)
def get(self, request):
with open('data.txt') as json_file:
data = json.load(json_file)
if data:
return Response(data, status=200)
return Response({"name" : None}, status = 400)
def post(self, request):
posted_file = self.request.data
img_file = posted_file['image']
if img_file:
uploaded_file = img_file
data = [{"image_name": uploaded_file}]
json_data = {"image_name": uploaded_file}
data = {}
data['key'] = []
data['key'].append(json_data)
with open('data.txt', 'w') as outfile:
json.dump(image, outfile)
serializer = ImgSerializer(image, many = True).data
return Response(serializer, status = 200)
else:
return Response(serializer.errors, status = 400)

Vimeo 'Replace' API Endpoint Not Changing Thumbnail

I am using Vimeo's API for the users of my app to upload videos, or replace their existing video with a new one. I am using a Vimeo Client to help me make the calls in my Django Application. Uploading works without any issues, but when I try to replace an existing video with a new one, the thumbnail stays as the old video. If you play the video, it will play the new one, but the thumbnail never changes.
Model Method that Uploads/Replaces
def vimeo_upload(self):
media_file = self.video_file
if media_file and os.path.exists(media_file.path):
v = vimeo.VimeoClient(token=settings.VIMEO_ACCESS_TOKEN, key=settings.VIMEO_API_KEY,
secret=settings.VIMEO_API_SECRET)
if self.video_url is None:
try:
video_uri = v.upload(media_file.path)
except AssertionError as exc:
logging.error('Vimeo Error: %s Video: %s' % (exc, media_file.path))
else:
self.video_url = video_uri
else:
try:
v.replace(video_uri=self.video_url, filename=media_file.path)
except Exception as exc:
self.video_url = None
logging.error('Vimeo Replace Error: %s Video: %s' % (exc, media_file.path))
# set the video title, description, etc.
if self.video_url:
try:
# convert locale from en-us form to en
v.patch(self.video_url, data={'description': self.customer.full_name, })
except Exception as exc:
logging.error('Vimeo Patch Error: %s Video: %s' % (exc, media_file.path))
Vimeo Client Model, and UploadVideoMixin
class UploadVideoMixin(object):
"""Handle uploading a new video to the Vimeo API."""
UPLOAD_ENDPOINT = '/me/videos'
REPLACE_ENDPOINT = '{video_uri}/files'
def upload(self, filename, upgrade_to_1080=False):
"""Upload the named file to Vimeo."""
ticket = self.post(
self.UPLOAD_ENDPOINT,
data={'type': 'streaming',
'upgrade_to_1080': 'true' if upgrade_to_1080 else 'false'},
params={'fields': 'upload_link,complete_uri'})
return self._perform_upload(filename, ticket)
def replace(self, video_uri, filename, upgrade_to_1080=False):
"""Replace the video at the given uri with the named source file."""
uri = self.REPLACE_ENDPOINT.format(video_uri=video_uri)
ticket = self.put(
uri,
data={'type': 'streaming',
'upgrade_to_1080': 'true' if upgrade_to_1080 else 'false'},
params={'fields': 'upload_link,complete_uri'})
return self._perform_upload(filename, ticket)
def _perform_upload(self, filename, ticket):
"""Take an upload ticket and perform the actual upload."""
if ticket.status_code != 201:
raise UploadTicketCreationFailure(ticket, "Failed to create an upload ticket")
ticket = ticket.json()
# Perform the actual upload.
target = ticket['upload_link']
last_byte = 0
# Try to get size of obj by path. If provided obj is not a file path
# find the size of file-like object.
try:
size = os.path.getsize(filename)
with io.open(filename, 'rb') as f:
while last_byte < size:
try:
self._make_pass(target, f, size, last_byte)
except requests.exceptions.Timeout:
# If there is a timeout here, we are okay with it, since
# we'll check and resume.
pass
last_byte = self._get_progress(target, size)
except TypeError:
size = len(filename.read())
f = filename
while last_byte < size:
try:
self._make_pass(target, f, size, last_byte)
except requests.exceptions.Timeout:
# If there is a timeout here, we are okay with it, since
# we'll check and resume.
pass
last_byte = self._get_progress(target, size)
# Perform the finalization and get the location.
finalized_resp = self.delete(ticket['complete_uri'])
if finalized_resp.status_code != 201:
raise VideoCreationFailure(finalized_resp, "Failed to create the video")
return finalized_resp.headers.get('Location', None)
def _get_progress(self, upload_target, filesize):
"""Test the completeness of the upload."""
progress_response = self.put(
upload_target,
headers={'Content-Range': 'bytes */*'})
range_recv = progress_response.headers.get('Range', None)
_, last_byte = range_recv.split('-')
return int(last_byte)
def _make_pass(self, upload_target, f, size, last_byte):
"""Make a pass at uploading.
This particular function may do many things. If this is a large upload
it may terminate without having completed the upload. This can also
occur if there are network issues or any other interruptions. These
can be recovered from by checking with the server to see how much it
has and resuming the connection.
"""
response = self.put(
upload_target,
timeout=None,
headers={
'Content-Length': str(size),
'Content-Range': 'bytes: %d-%d/%d' % (last_byte, size, size)
}, data=f)
if response.status_code != 200:
raise VideoUploadFailure(response, "Unexpected status code on upload")
class VimeoClient(ClientCredentialsMixin, AuthorizationCodeMixin, UploadMixin):
"""Client handle for the Vimeo API."""
API_ROOT = "https://api.vimeo.com"
HTTP_METHODS = set(('head', 'get', 'post', 'put', 'patch', 'options', 'delete'))
ACCEPT_HEADER = "application/vnd.vimeo.*;version=3.2"
USER_AGENT = "pyvimeo 0.3.10; (http://developer.vimeo.com/api/docs)"
def __init__(self, token=None, key=None, secret=None, *args, **kwargs):
"""Prep the handle with the authentication information."""
self.token = token
self.app_info = (key, secret)
self._requests_methods = dict()
# Make sure we have enough info to be useful.
assert token is not None or (key is not None and secret is not None)
# Internally we back this with an auth mechanism for Requests.
#property
def token(self):
return self._token.token
#token.setter
def token(self, value):
self._token = _BearerToken(value) if value else None
def __getattr__(self, name):
"""This is where we get the function for the verb that was just
requested.
From here we can apply the authentication information we have.
"""
if name not in self.HTTP_METHODS:
raise AttributeError("%r is not an HTTP method" % name)
# Get the Requests based function to use to preserve their defaults.
request_func = getattr(requests, name, None)
if request_func is None:
raise AttributeError(
"%r could not be found in the backing lib" % name
)
#wraps(request_func)
def caller(url, jsonify=True, **kwargs):
"""Hand off the call to Requests."""
headers = kwargs.get('headers', dict())
headers['Accept'] = self.ACCEPT_HEADER
headers['User-Agent'] = self.USER_AGENT
if jsonify \
and 'data' in kwargs \
and isinstance(kwargs['data'], (dict, list)):
kwargs['data'] = json.dumps(kwargs['data'])
headers['Content-Type'] = 'application/json'
kwargs['timeout'] = kwargs.get('timeout', (1, 30))
kwargs['auth'] = kwargs.get('auth', self._token)
kwargs['headers'] = headers
if not url[:4] == "http":
url = self.API_ROOT + url
response = request_func(url, **kwargs)
if response.status_code == 429:
raise APIRateLimitExceededFailure(
response, 'Too many API requests'
)
return response
return caller
class _BearerToken(requests.auth.AuthBase):
"""Model the bearer token and apply it to the request."""
def __init__(self, token):
self.token = token
def __call__(self, request):
request.headers['Authorization'] = 'Bearer ' + self.token
return request

I have a middleware where I a want to log every request/response. How can I access to POST data?

I have this middleware
import logging
request_logger = logging.getLogger('api.request.logger')
class LoggingMiddleware(object):
def process_response(self, request, response):
request_logger.log(logging.DEBUG,
"GET: {}. POST: {} response code: {}. response "
"content: {}".format(request.GET, request.DATA,
response.status_code,
response.content))
return response
The problem is that request in process_response method has no .POST nor .DATA nor .body. I am using django-rest-framework and my requests has Content-Type: application/json
Note, that if I put logging to process_request method - it has .body and everything I need. However, I need both request and response in a single log entry.
Here is complete solution I made
"""
Api middleware module
"""
import logging
request_logger = logging.getLogger('api.request.logger')
class LoggingMiddleware(object):
"""
Provides full logging of requests and responses
"""
_initial_http_body = None
def process_request(self, request):
self._initial_http_body = request.body # this requires because for some reasons there is no way to access request.body in the 'process_response' method.
def process_response(self, request, response):
"""
Adding request and response logging
"""
if request.path.startswith('/api/') and \
(request.method == "POST" and
request.META.get('CONTENT_TYPE') == 'application/json'
or request.method == "GET"):
request_logger.log(logging.DEBUG,
"GET: {}. body: {} response code: {}. "
"response "
"content: {}"
.format(request.GET, self._initial_http_body,
response.status_code,
response.content), extra={
'tags': {
'url': request.build_absolute_uri()
}
})
return response
Note, this
'tags': {
'url': request.build_absolute_uri()
}
will allow you to filter by url in sentry.
Andrey's solution will break on concurrent requests. You'd need to store the body somewhere in the request scope and fetch it in the process_response().
class RequestLoggerMiddleware(object):
def process_request(self, request):
request._body_to_log = request.body
def process_response(self, request, response):
if not hasattr(request, '_body_to_log'):
return response
msg = "method=%s path=%s status=%s request.body=%s response.body=%s"
args = (request.method,
request.path,
response.status_code,
request._body_to_log,
response.content)
request_logger.info(msg, *args)
return response
All answers above have one potential problem -- big request.body passed to the server. In Django request.body is a property. (from framework)
#property
def body(self):
if not hasattr(self, '_body'):
if self._read_started:
raise RawPostDataException("You cannot access body after reading from request's data stream")
try:
self._body = self.read()
except IOError as e:
six.reraise(UnreadablePostError, UnreadablePostError(*e.args), sys.exc_info()[2])
self._stream = BytesIO(self._body)
return self._body
Django framework access body directly only in one case. (from framework)
elif self.META.get('CONTENT_TYPE', '').startswith('application/x-www-form-urlencoded'):
As you can see, property body read the entire request into memory. As a result, your server can simply crash. Moreover, it becomes vulnerable to DoS attack.
In this case I would suggest using another method of HttpRequest class. (from framework)
def readlines(self):
return list(iter(self))
So, you no longer need to do this
def process_request(self, request):
request._body_to_log = request.body
you can simply do:
def process_response(self, request, response):
msg = "method=%s path=%s status=%s request.body=%s response.body=%s"
args = (request.method,
request.path,
response.status_code,
request.readlines(),
response.content)
request_logger.info(msg, *args)
return response
EDIT: this approach with request.readlines() has problems. Sometimes it does not log anything.
It's frustrating and surprising that there is no easy-to-use request logging package in Django.
So I created one myself. Check it out: https://github.com/rhumbixsf/django-request-logging.git
Uses the logging system so it is easy to configure. This is what you get with DEBUG level:
GET/POST request url
POST BODY if any
GET/POST request url - response code
Response body
It is like accessing the form data to create a new form.
You must use request.POST for this (perhaps request.FILES is something you'd log as well).
class LoggingMiddleware(object):
def process_response(self, request, response):
request_logger.log(logging.DEBUG,
"GET: {}. POST: {} response code: {}. response "
"content: {}".format(request.GET, request.POST,
response.status_code,
response.content))
return response
See Here for request properties.
You can use like below:
"""
Middleware to log requests and responses.
"""
import socket
import time
import json
import logging
request_logger = logging.getLogger(__name__)
class RequestLogMiddleware:
"""Request Logging Middleware."""
def __init__(self, get_response):
self.get_response = get_response
def __call__(self, request):
log_data = {}
# add request payload to log_data
req_body = json.loads(request.body.decode("utf-8")) if request.body else {}
log_data["request_body"] = req_body
# request passes on to controller
response = self.get_response(request)
# add response payload to our log_data
if response and response["content-type"] == "application/json":
response_body = json.loads(response.content.decode("utf-8"))
log_data["response_body"] = response_body
request_logger.info(msg=log_data)
return response
# Log unhandled exceptions as well
def process_exception(self, request, exception):
try:
raise exception
except Exception as e:
request_logger.exception("Unhandled Exception: " + str(e))
return exception
You can also check this out - log requests via middleware explains this
Also note, that response.content returns bytestring and not unicode string so if you need to print unicode, you need to call response.content.decode("utf-8").
You cannot access request.POST (or equivalently request.body) in the process_response part of the middleware. Here is a ticket raising the issue. Though you can have it in the process_request part. The previous answers give a class-based middleware. Django 2.0+ and 3.0+ allow function based middlewares.
from .models import RequestData # Model that stores all the request data
def requestMiddleware(get_response):
# One-time configuration and initialization.
def middleware(request):
# Code to be executed for each request before
# the view (and later middleware) are called.
try : metadata = request.META ;
except : metadata = 'no data'
try : data = request.body ;
except : data = 'no data'
try : u = str(request.user)
except : u = 'nouser'
response = get_response(request)
w = RequestData.objects.create(userdata=u, metadata=metadata,data=data )
w.save()
return response
return middleware
Model RequestData looks as follows -
class RequestData(models.Model):
time = models.DateTimeField(auto_now_add=True)
userdata = models.CharField(max_length=10000, default=' ')
data = models.CharField(max_length=20000, default=' ')
metadata = models.CharField(max_length=20000, default=' ')

Django: Validate file type of uploaded file

I have an app that lets people upload files, represented as UploadedFiles. However, I want to make sure that users only upload xml files. I know I can do this using magic, but I don't know where to put this check - I can't put it in the clean function since the file is not yet uploaded when clean runs, as far as I can tell.
Here's the UploadedFile model:
class UploadedFile(models.Model):
"""This represents a file that has been uploaded to the server."""
STATE_UPLOADED = 0
STATE_ANNOTATED = 1
STATE_PROCESSING = 2
STATE_PROCESSED = 4
STATES = (
(STATE_UPLOADED, "Uploaded"),
(STATE_ANNOTATED, "Annotated"),
(STATE_PROCESSING, "Processing"),
(STATE_PROCESSED, "Processed"),
)
status = models.SmallIntegerField(choices=STATES,
default=0, blank=True, null=True)
file = models.FileField(upload_to=settings.XML_ROOT)
project = models.ForeignKey(Project)
def __unicode__(self):
return self.file.name
def name(self):
return os.path.basename(self.file.name)
def save(self, *args, **kwargs):
if not self.status:
self.status = self.STATE_UPLOADED
super(UploadedFile, self).save(*args, **kwargs)
def delete(self, *args, **kwargs):
os.remove(self.file.path)
self.file.delete(False)
super(UploadedFile, self).delete(*args, **kwargs)
def get_absolute_url(self):
return u'/upload/projects/%d' % self.id
def clean(self):
if not "XML" in magic.from_file(self.file.url):
raise ValidationError(u'Not an xml file.')
class UploadedFileForm(forms.ModelForm):
class Meta:
model = UploadedFile
exclude = ('project',)
Validating files is a common challenge, so I would like to use a validator:
import magic
from django.utils.deconstruct import deconstructible
from django.template.defaultfilters import filesizeformat
#deconstructible
class FileValidator(object):
error_messages = {
'max_size': ("Ensure this file size is not greater than %(max_size)s."
" Your file size is %(size)s."),
'min_size': ("Ensure this file size is not less than %(min_size)s. "
"Your file size is %(size)s."),
'content_type': "Files of type %(content_type)s are not supported.",
}
def __init__(self, max_size=None, min_size=None, content_types=()):
self.max_size = max_size
self.min_size = min_size
self.content_types = content_types
def __call__(self, data):
if self.max_size is not None and data.size > self.max_size:
params = {
'max_size': filesizeformat(self.max_size),
'size': filesizeformat(data.size),
}
raise ValidationError(self.error_messages['max_size'],
'max_size', params)
if self.min_size is not None and data.size < self.min_size:
params = {
'min_size': filesizeformat(self.min_size),
'size': filesizeformat(data.size)
}
raise ValidationError(self.error_messages['min_size'],
'min_size', params)
if self.content_types:
content_type = magic.from_buffer(data.read(), mime=True)
data.seek(0)
if content_type not in self.content_types:
params = { 'content_type': content_type }
raise ValidationError(self.error_messages['content_type'],
'content_type', params)
def __eq__(self, other):
return (
isinstance(other, FileValidator) and
self.max_size == other.max_size and
self.min_size == other.min_size and
self.content_types == other.content_types
)
Then you can use FileValidator in your models.FileField or forms.FileField as follows:
validate_file = FileValidator(max_size=1024 * 100,
content_types=('application/xml',))
file = models.FileField(upload_to=settings.XML_ROOT,
validators=[validate_file])
From django 1.11, you can also use FileExtensionValidator.
from django.core.validators import FileExtensionValidator
class UploadedFile(models.Model):
file = models.FileField(upload_to=settings.XML_ROOT,
validators=[FileExtensionValidator(allowed_extensions=['xml'])])
Note this must be used on a FileField and won't work on a CharField (for example), since the validator validates on value.name.
ref: https://docs.djangoproject.com/en/dev/ref/validators/#fileextensionvalidator
For posterity: the solution is to use the read method and pass that to magic.from_buffer.
class UploadedFileForm(ModelForm):
def clean_file(self):
file = self.cleaned_data.get("file", False)
filetype = magic.from_buffer(file.read())
if not "XML" in filetype:
raise ValidationError("File is not XML.")
return file
class Meta:
model = models.UploadedFile
exclude = ('project',)
I think what you want to do is to clean the uploaded file in Django's Form.clean_your_field_name_here() methods - the data is available on your system by then if it was submitted as normal HTTP POST request.
Also if you consider this inefficient explore the options of different Django file upload backends and how to do streaming processing.
If you need to consider the security of the system when dealing with uploads
Make sure uploaded file has correct extension
Make sure the mimetype matches the file extension
In the case you are worried about user's uploading exploit files (for attacking against your site)
Rewrite all the file contents on save to get rid of possible extra (exploit) payload (so you cannot embed HTML in XML which the browser would interpret as a site-origin HTML file when downloading)
Make sure you use content-disposition header on download
Some more info here: http://opensourcehacker.com/2013/07/31/secure-user-uploads-and-exploiting-served-user-content/
Below is my example how I sanitize the uploaded images:
class Example(models.Model):
image = models.ImageField(upload_to=filename_gen("participant-images/"), blank=True, null=True)
class Example(forms.ModelForm):
def clean_image(self):
""" Clean the uploaded image attachemnt.
"""
image = self.cleaned_data.get('image', False)
utils.ensure_safe_user_image(image)
return image
def ensure_safe_user_image(image):
""" Perform various checks to sanitize user uploaded image data.
Checks that image was valid header, then
:param: InMemoryUploadedFile instance (Django form field value)
:raise: ValidationError in the case the image content has issues
"""
if not image:
return
assert isinstance(image, InMemoryUploadedFile), "Image rewrite has been only tested on in-memory upload backend"
# Make sure the image is not too big, so that PIL trashes the server
if image:
if image._size > 4*1024*1024:
raise ValidationError("Image file too large - the limit is 4 megabytes")
# Then do header peak what the image claims
image.file.seek(0)
mime = magic.from_buffer(image.file.getvalue(), mime=True)
if mime not in ("image/png", "image/jpeg"):
raise ValidationError("Image is not valid. Please upload a JPEG or PNG image.")
doc_type = mime.split("/")[-1].upper()
# Read data from cStringIO instance
image.file.seek(0)
pil_image = Image.open(image.file)
# Rewrite the image contents in the memory
# (bails out with exception on bad data)
buf = StringIO()
pil_image.thumbnail((2048, 2048), Image.ANTIALIAS)
pil_image.save(buf, doc_type)
image.file = buf
# Make sure the image has valid extension (can't upload .htm image)
extension = unicode(doc_type.lower())
if not image.name.endswith(u".%s" % extension):
image.name = image.name + u"." + extension
I found an interesting package who can do upload file validation recently. You can see the package here. the package approach is similar with sultan answer, thus we can just implement it right away.
from upload_validator import FileTypeValidator
validator = FileTypeValidator(
allowed_types=['application/msword'],
allowed_extensions=['.doc', '.docx']
)
file_resource = open('sample.doc')
# ValidationError will be raised in case of invalid type or extension
validator(file_resource)