Django: Validate file type of uploaded file - django

I have an app that lets people upload files, represented as UploadedFiles. However, I want to make sure that users only upload xml files. I know I can do this using magic, but I don't know where to put this check - I can't put it in the clean function since the file is not yet uploaded when clean runs, as far as I can tell.
Here's the UploadedFile model:
class UploadedFile(models.Model):
"""This represents a file that has been uploaded to the server."""
STATE_UPLOADED = 0
STATE_ANNOTATED = 1
STATE_PROCESSING = 2
STATE_PROCESSED = 4
STATES = (
(STATE_UPLOADED, "Uploaded"),
(STATE_ANNOTATED, "Annotated"),
(STATE_PROCESSING, "Processing"),
(STATE_PROCESSED, "Processed"),
)
status = models.SmallIntegerField(choices=STATES,
default=0, blank=True, null=True)
file = models.FileField(upload_to=settings.XML_ROOT)
project = models.ForeignKey(Project)
def __unicode__(self):
return self.file.name
def name(self):
return os.path.basename(self.file.name)
def save(self, *args, **kwargs):
if not self.status:
self.status = self.STATE_UPLOADED
super(UploadedFile, self).save(*args, **kwargs)
def delete(self, *args, **kwargs):
os.remove(self.file.path)
self.file.delete(False)
super(UploadedFile, self).delete(*args, **kwargs)
def get_absolute_url(self):
return u'/upload/projects/%d' % self.id
def clean(self):
if not "XML" in magic.from_file(self.file.url):
raise ValidationError(u'Not an xml file.')
class UploadedFileForm(forms.ModelForm):
class Meta:
model = UploadedFile
exclude = ('project',)

Validating files is a common challenge, so I would like to use a validator:
import magic
from django.utils.deconstruct import deconstructible
from django.template.defaultfilters import filesizeformat
#deconstructible
class FileValidator(object):
error_messages = {
'max_size': ("Ensure this file size is not greater than %(max_size)s."
" Your file size is %(size)s."),
'min_size': ("Ensure this file size is not less than %(min_size)s. "
"Your file size is %(size)s."),
'content_type': "Files of type %(content_type)s are not supported.",
}
def __init__(self, max_size=None, min_size=None, content_types=()):
self.max_size = max_size
self.min_size = min_size
self.content_types = content_types
def __call__(self, data):
if self.max_size is not None and data.size > self.max_size:
params = {
'max_size': filesizeformat(self.max_size),
'size': filesizeformat(data.size),
}
raise ValidationError(self.error_messages['max_size'],
'max_size', params)
if self.min_size is not None and data.size < self.min_size:
params = {
'min_size': filesizeformat(self.min_size),
'size': filesizeformat(data.size)
}
raise ValidationError(self.error_messages['min_size'],
'min_size', params)
if self.content_types:
content_type = magic.from_buffer(data.read(), mime=True)
data.seek(0)
if content_type not in self.content_types:
params = { 'content_type': content_type }
raise ValidationError(self.error_messages['content_type'],
'content_type', params)
def __eq__(self, other):
return (
isinstance(other, FileValidator) and
self.max_size == other.max_size and
self.min_size == other.min_size and
self.content_types == other.content_types
)
Then you can use FileValidator in your models.FileField or forms.FileField as follows:
validate_file = FileValidator(max_size=1024 * 100,
content_types=('application/xml',))
file = models.FileField(upload_to=settings.XML_ROOT,
validators=[validate_file])

From django 1.11, you can also use FileExtensionValidator.
from django.core.validators import FileExtensionValidator
class UploadedFile(models.Model):
file = models.FileField(upload_to=settings.XML_ROOT,
validators=[FileExtensionValidator(allowed_extensions=['xml'])])
Note this must be used on a FileField and won't work on a CharField (for example), since the validator validates on value.name.
ref: https://docs.djangoproject.com/en/dev/ref/validators/#fileextensionvalidator

For posterity: the solution is to use the read method and pass that to magic.from_buffer.
class UploadedFileForm(ModelForm):
def clean_file(self):
file = self.cleaned_data.get("file", False)
filetype = magic.from_buffer(file.read())
if not "XML" in filetype:
raise ValidationError("File is not XML.")
return file
class Meta:
model = models.UploadedFile
exclude = ('project',)

I think what you want to do is to clean the uploaded file in Django's Form.clean_your_field_name_here() methods - the data is available on your system by then if it was submitted as normal HTTP POST request.
Also if you consider this inefficient explore the options of different Django file upload backends and how to do streaming processing.
If you need to consider the security of the system when dealing with uploads
Make sure uploaded file has correct extension
Make sure the mimetype matches the file extension
In the case you are worried about user's uploading exploit files (for attacking against your site)
Rewrite all the file contents on save to get rid of possible extra (exploit) payload (so you cannot embed HTML in XML which the browser would interpret as a site-origin HTML file when downloading)
Make sure you use content-disposition header on download
Some more info here: http://opensourcehacker.com/2013/07/31/secure-user-uploads-and-exploiting-served-user-content/
Below is my example how I sanitize the uploaded images:
class Example(models.Model):
image = models.ImageField(upload_to=filename_gen("participant-images/"), blank=True, null=True)
class Example(forms.ModelForm):
def clean_image(self):
""" Clean the uploaded image attachemnt.
"""
image = self.cleaned_data.get('image', False)
utils.ensure_safe_user_image(image)
return image
def ensure_safe_user_image(image):
""" Perform various checks to sanitize user uploaded image data.
Checks that image was valid header, then
:param: InMemoryUploadedFile instance (Django form field value)
:raise: ValidationError in the case the image content has issues
"""
if not image:
return
assert isinstance(image, InMemoryUploadedFile), "Image rewrite has been only tested on in-memory upload backend"
# Make sure the image is not too big, so that PIL trashes the server
if image:
if image._size > 4*1024*1024:
raise ValidationError("Image file too large - the limit is 4 megabytes")
# Then do header peak what the image claims
image.file.seek(0)
mime = magic.from_buffer(image.file.getvalue(), mime=True)
if mime not in ("image/png", "image/jpeg"):
raise ValidationError("Image is not valid. Please upload a JPEG or PNG image.")
doc_type = mime.split("/")[-1].upper()
# Read data from cStringIO instance
image.file.seek(0)
pil_image = Image.open(image.file)
# Rewrite the image contents in the memory
# (bails out with exception on bad data)
buf = StringIO()
pil_image.thumbnail((2048, 2048), Image.ANTIALIAS)
pil_image.save(buf, doc_type)
image.file = buf
# Make sure the image has valid extension (can't upload .htm image)
extension = unicode(doc_type.lower())
if not image.name.endswith(u".%s" % extension):
image.name = image.name + u"." + extension

I found an interesting package who can do upload file validation recently. You can see the package here. the package approach is similar with sultan answer, thus we can just implement it right away.
from upload_validator import FileTypeValidator
validator = FileTypeValidator(
allowed_types=['application/msword'],
allowed_extensions=['.doc', '.docx']
)
file_resource = open('sample.doc')
# ValidationError will be raised in case of invalid type or extension
validator(file_resource)

Related

Upload temporary file in django rest framework

I'm trying to create a way to upload xlsx files and then use celery to perform some actions.
I'm thinking this:
A view to upload the file and save it temporarily
Use celery to execute what I want in the file and then delete it.
I'm trying to do something like this:
class ImportMyFileView(APIView):
parser_classes = (FileUploadParser, )
def post(self, request, filename, format=None):
my_file = request.data["file"]
with open(f"/tmp/{my_file.name}", "wb+") as destination:
for chunk in my_file.chunks():
destination.write(chunk)
# call_celery_here()
...
Return something
I can generate the file where I want, but the problem is that when I open xlsx. I get this here:
--X-INSOMNIA-BOUNDARY
Content-Disposition: form-data
Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
PK<q^Q_rels/.rels���J1��}��{w�Dd���ЛH}���a7�0u}{�Z���I~��7C��f�G�Fo�Z+���{�����kW�#�VJ$cʪ��l� �n�0�\Q�X^:�`���d�d{�m]_�d����h��V����F�w�^F9��W��-�(F/3�O�DSU�N�l/w�{N(�[��q��T����u<��r�?焮�s9�F����M��h���'h?PKf����
Is there any detail missing?
Here is how I would do it, relying on DRF's built in functionality:
import os
from rest_framework import serializers
from django.core.files.storage import FileSystemStorage
class UploadSerializer(serializers.Serializer):
file = serializers.FileField()
class UploadView(APIView):
...
def post(self, request):
ser = UploadSerializer(request.data)
ser.is_valid(raise_exception=True)
fs = FileSystemStorage(tempfile.gettempdir())
file_name = fs.save(content=ser.validated_data['file'])
full_path = os.path.join(fs.location, file_name)
celery_func.delay(file_name=full_path)
return Response("Upload OK")
A more robust way to do this would be to create a model representing your uploads to be processed, and use the django model's FileField.
class Todo(models.Model):
xlsx_file = models.FileField(...) # read the docs on this
created_at = models.DateTimeField(auto_now_add=True)
is_complete = models.BooleanField(default=False)
class UploadView(APIView):
def post(self, request):
...
todo = Todo.objects.create(
xslx_file = ser.validated_data['file']
)
celery_func.delay(todo_id=todo.pk)
return Response("Upload OK")
Once this works you can look into using a ModelSerializer either alone, or paired with a ModelViewSet. Thats a bigger learning curve though.

Django REST Framework: upload image and rename it

My application is used to register restaurants in the system. Along with some data (like restaurant name) the frontend is sending restaurant logo. Using this code:
class Base64ImageField(serializers.ImageField):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.default_error_messages['image_not_png'] = _(
'Only .png files are supported.'
)
def to_internal_value(self, data):
if isinstance(data, six.string_types):
if 'data:' in data and ';base64,' in data:
header, data = data.split(';base64,')
try:
decoded_file = base64.b64decode(data)
except TypeError:
self.fail('invalid_image')
file_name = str(uuid.uuid4())
file_extension = self.get_file_extension(file_name, decoded_file)
if file_extension is not 'png':
self.fail('image_not_png')
complete_file_name = '%s.%s'.format(file_name, file_extension)
data = ContentFile(decoded_file, name=complete_file_name)
return super().to_internal_value(data)
def get_file_extension(self, file_name, decoded_file):
return imghdr.what(file_name, decoded_file)
I was able to decode and save the logo to the disk with some randomly generated name. Now I'm wondering how to name that file properly. By 'properly' I mean that I want to name the logo file the same as is the restaurant name, which is problematic, because to_internal_value doesn't have access to anything else than the file data The questions is: where should I put the code to rename the file to be the same as the restaurant name that came in the same request?

Validate a file before Uploading using python magic

I am using python magic to validate a file before uploading so for that I am following the below link:
https://djangosnippets.org/snippets/3039/
validators.py file:
from django.core.exceptions import ValidationError
import magic
class MimetypeValidator(object):
def __init__(self, mimetypes):
self.mimetypes = mimetypes
def __call__(self, value):
try:
mime_byt = magic.from_buffer(value.read(1024), mime=True)
mime = mime_byt.decode(encoding='UTF-8')
if mime not in self.mimetypes:
raise ValidationError('%s is not an acceptable file type' % value)
except AttributeError as e:
raise ValidationError('This value could not be validated for file type' % value)
here is my form.py file:
class FileForm(forms.ModelForm):
file = forms.FileField(
label='Select a File *',
allow_empty_file=False,
validators=[MimetypeValidator('application/pdf')],
help_text='Max. Size - 25 MB')
class Meta:
model = File
fields = ('file')
SO I am able to upload a pdf file with this python magic logic but I also want to allow to upload a image tiff file and restrict the file size to 25 MB.
How can I implement this by using python magic?
You don't need any library to do this - you can check the uploaded size of a file in the clean method on the form:
def clean_file(self):
file = self.cleaned_data['file']
if file.size > 25000000:
raise ValidationError('The file is too big')
return file

Django TestCase: SimpleUploadedFile shows wrong filetype

I've been trying to get the tests running on upload forms. But, whenever I run the tests, it says that file is of wrong type.
Upload form saves the file with a randomly generated file name:
class Video(models.Model):
def get_generated_path(self):
# Generates random path for video file upload
original_file = models.CharField(null=True)
uploaded_file = models.FileField(storage=FileSystemStorage(location=
settings.MEDIA_ROOT), upload_to=get_generated_path)
video_name = models.TextField()
And form looks like:
class VideoForm(forms.Form):
video_file = forms.FileField()
video_name = forms.CharField()
def clean_video_file(forms.Form):
content = self.cleaned_data['video_file']
content_type = content.content_type.split('/')[0]
if content_type in settings.CONTENT_TYPES:
if content._size > settings.MAX_UPLOAD_SIZE:
raise forms.ValidationError(_('Please keep filesize under %s.
Current filesize %s') % (filesizeformat(
settings.MAX_UPLOAD_SIZE), filesizeformat(content._size)))
else:
raise forms.ValidationError(_('File type is not supported,
content type is: %s' % content_type))
return content
Most of the remaining logic is in views:
def upload_video(request):
try:
# Check if user is authenticated
if form.is_valid():
video_file = request.FILES['video_file']
video_name = form.cleaned_data['video_name']
save_video = Video.objects.create(
original_file = 'uploaded_videos' + user.username,
video_name = video_name)
return HTTPResponseRedirect('next-page')
except Exception, e:
...
The tests are written as:
def test_video_form(TestCase):
user = #Create a dummy user
test_video = SimpleUploadedFile('test_video.flv', open(
'path/to/test/video', 'rb'))
form = VideoForm(user, {'video_name': test_video}, )
self.assertTrue(form.is_valid())
The above test always fails since it says that the file type of 'test_video.flv' is plain/text. I've checked the 'test_video.flv' and its of correct type.
How to pass these files to the upload form and test it.
SimpleUploadedFile has the content_type text/plain by default. It is why your testing code goes fail in clean_video_file and this line:
raise forms.ValidationError(_('File type is not supported,
content type is: %s' % content_type))
is printing:
File type is not supported,
content type is: text/plain
Pass the content_type = 'video'in the SimpleUploadedFile line as shown below.
def test_video_form(TestCase):
user = #Create a dummy user
test_video = SimpleUploadedFile('test_video.flv', open(
'path/to/test/video', 'rb'), content_type='video') # Notice the change here.
form = VideoForm(user, {'video_name': test_video}, )
self.assertTrue(form.is_valid())

Django - Get uploaded file type / mimetype

Is there a way to get the content type of an upload file when overwriting the models save method? I have tried this:
def save(self):
print(self.file.content_type)
super(Media, self).save()
But it did not work. In this example, self.file is a model.FileField:
file = models.FileField(upload_to='uploads/%m-%Y/')
Edit: I want to be able to save the content type to the database, so I'll need it before the save is actually complete :)
class MyForm(forms.ModelForm):
def clean_file(self):
file = self.cleaned_data['file']
try:
if file:
file_type = file.content_type.split('/')[0]
print file_type
if len(file.name.split('.')) == 1:
raise forms.ValidationError(_('File type is not supported'))
if file_type in settings.TASK_UPLOAD_FILE_TYPES:
if file._size > settings.TASK_UPLOAD_FILE_MAX_SIZE:
raise forms.ValidationError(_('Please keep filesize under %s. Current filesize %s') % (filesizeformat(settings.TASK_UPLOAD_FILE_MAX_SIZE), filesizeformat(file._size)))
else:
raise forms.ValidationError(_('File type is not supported'))
except:
pass
return file
settings.py
TASK_UPLOAD_FILE_TYPES = ['pdf', 'vnd.oasis.opendocument.text','vnd.ms-excel','msword','application',]
TASK_UPLOAD_FILE_MAX_SIZE = "5242880"
You can use PIL or magic to read the few first bytes and get the MIME type that way. I wouldn't trust the content_type since anyone can fake an HTTP header.
Magic solution below. For a PIL implementation you can get an idea from django's get_image_dimensions.
import magic
def get_mime_type(file):
"""
Get MIME by reading the header of the file
"""
initial_pos = file.tell()
file.seek(0)
mime_type = magic.from_buffer(file.read(2048), mime=True)
file.seek(initial_pos)
return mime_type
File is the in-memory uploaded file in the view.
I'm using Django Rest Framework and this is the simplest way to determine content type/mime type:
file = request.data.get("file") # type(file) = 'django.core.files.uploadedfile.InMemoryUploadedFile'
print(file.content_type)
Let's say I have uploaded a JPEG image then my output would be:
image/jpeg
Let me know in the comments if this serves your purpose.
Need to override the save method in the model class
def save(self, *args, **kwargs):
if self.file and self.file.file:
try:#Need to add a try catch such that in case a file is not being uploaded, then the mime_type is not assigned
self.mime_type=self.file.file.content_type
except:
pass
Taking an assumption that our model has file column(FileField), and mime_type column (CharField)