Only accept a certain file type in FileField, server-side - django

How can I restrict FileField to only accept a certain type of file (video, audio, pdf, etc.) in an elegant way, server-side?

One very easy way is to use a custom validator.
In your app's validators.py:
def validate_file_extension(value):
import os
from django.core.exceptions import ValidationError
ext = os.path.splitext(value.name)[1] # [0] returns path+filename
valid_extensions = ['.pdf', '.doc', '.docx', '.jpg', '.png', '.xlsx', '.xls']
if not ext.lower() in valid_extensions:
raise ValidationError('Unsupported file extension.')
Then in your models.py:
from .validators import validate_file_extension
... and use the validator for your form field:
class Document(models.Model):
file = models.FileField(upload_to="documents/%Y/%m/%d", validators=[validate_file_extension])
See also: How to limit file types on file uploads for ModelForms with FileFields?.
Warning
For securing your code execution environment from malicious media files
Use Exif libraries to properly validate the media files.
Separate your media files from your application code
execution environment
If possible use solutions like S3, GCS, Minio or
anything similar
When loading media files on client side, use client native methods (for example if you are loading the media files non securely in a
browser, it may cause execution of "crafted" JavaScript code)

Django in version 1.11 has a newly added FileExtensionValidator for model fields, the docs is here: https://docs.djangoproject.com/en/dev/ref/validators/#fileextensionvalidator.
An example of how to validate a file extension:
from django.core.validators import FileExtensionValidator
from django.db import models
class MyModel(models.Model):
pdf_file = models.FileField(
upload_to="foo/", validators=[FileExtensionValidator(allowed_extensions=["pdf"])]
)
Note that this method is not safe. Citation from Django docs:
Don’t rely on validation of the file extension to determine a file’s
type. Files can be renamed to have any extension no matter what data
they contain.
There is also new validate_image_file_extension (https://docs.djangoproject.com/en/dev/ref/validators/#validate-image-file-extension) for validating image extensions (using Pillow).

A few people have suggested using python-magic to validate that the file actually is of the type you are expecting to receive. This can be incorporated into the validator suggested in the accepted answer:
import os
import magic
from django.core.exceptions import ValidationError
def validate_is_pdf(file):
valid_mime_types = ['application/pdf']
file_mime_type = magic.from_buffer(file.read(1024), mime=True)
if file_mime_type not in valid_mime_types:
raise ValidationError('Unsupported file type.')
valid_file_extensions = ['.pdf']
ext = os.path.splitext(file.name)[1]
if ext.lower() not in valid_file_extensions:
raise ValidationError('Unacceptable file extension.')
This example only validates a pdf, but any number of mime-types and file extensions can be added to the arrays.
Assuming you saved the above in validators.py you can incorporate this into your model like so:
from myapp.validators import validate_is_pdf
class PdfFile(models.Model):
file = models.FileField(upload_to='pdfs/', validators=(validate_is_pdf,))

You can use the below to restrict filetypes in your Form
file = forms.FileField(widget=forms.FileInput(attrs={'accept':'application/pdf'}))

There's a Django snippet that does this:
import os
from django import forms
class ExtFileField(forms.FileField):
"""
Same as forms.FileField, but you can specify a file extension whitelist.
>>> from django.core.files.uploadedfile import SimpleUploadedFile
>>>
>>> t = ExtFileField(ext_whitelist=(".pdf", ".txt"))
>>>
>>> t.clean(SimpleUploadedFile('filename.pdf', 'Some File Content'))
>>> t.clean(SimpleUploadedFile('filename.txt', 'Some File Content'))
>>>
>>> t.clean(SimpleUploadedFile('filename.exe', 'Some File Content'))
Traceback (most recent call last):
...
ValidationError: [u'Not allowed filetype!']
"""
def __init__(self, *args, **kwargs):
ext_whitelist = kwargs.pop("ext_whitelist")
self.ext_whitelist = [i.lower() for i in ext_whitelist]
super(ExtFileField, self).__init__(*args, **kwargs)
def clean(self, *args, **kwargs):
data = super(ExtFileField, self).clean(*args, **kwargs)
filename = data.name
ext = os.path.splitext(filename)[1]
ext = ext.lower()
if ext not in self.ext_whitelist:
raise forms.ValidationError("Not allowed filetype!")
#-------------------------------------------------------------------------
if __name__ == "__main__":
import doctest, datetime
doctest.testmod()

First. Create a file named formatChecker.py inside the app where the you have the model that has the FileField that you want to accept a certain file type.
This is your formatChecker.py:
from django.db.models import FileField
from django.forms import forms
from django.template.defaultfilters import filesizeformat
from django.utils.translation import ugettext_lazy as _
class ContentTypeRestrictedFileField(FileField):
"""
Same as FileField, but you can specify:
* content_types - list containing allowed content_types. Example: ['application/pdf', 'image/jpeg']
* max_upload_size - a number indicating the maximum file size allowed for upload.
2.5MB - 2621440
5MB - 5242880
10MB - 10485760
20MB - 20971520
50MB - 5242880
100MB 104857600
250MB - 214958080
500MB - 429916160
"""
def __init__(self, *args, **kwargs):
self.content_types = kwargs.pop("content_types")
self.max_upload_size = kwargs.pop("max_upload_size")
super(ContentTypeRestrictedFileField, self).__init__(*args, **kwargs)
def clean(self, *args, **kwargs):
data = super(ContentTypeRestrictedFileField, self).clean(*args, **kwargs)
file = data.file
try:
content_type = file.content_type
if content_type in self.content_types:
if file._size > self.max_upload_size:
raise forms.ValidationError(_('Please keep filesize under %s. Current filesize %s') % (filesizeformat(self.max_upload_size), filesizeformat(file._size)))
else:
raise forms.ValidationError(_('Filetype not supported.'))
except AttributeError:
pass
return data
Second. In your models.py, add this:
from formatChecker import ContentTypeRestrictedFileField
Then instead of using 'FileField', use this 'ContentTypeRestrictedFileField'.
Example:
class Stuff(models.Model):
title = models.CharField(max_length=245)
handout = ContentTypeRestrictedFileField(upload_to='uploads/', content_types=['video/x-msvideo', 'application/pdf', 'video/mp4', 'audio/mpeg', ],max_upload_size=5242880,blank=True, null=True)
Those are the things you have to when you want to only accept a certain file type in FileField.

after I checked the accepted answer, I decided to share a tip based on Django documentation. There is already a validator for use to validate file extension. You don't need to rewrite your own custom function to validate whether your file extension is allowed or not.
https://docs.djangoproject.com/en/3.0/ref/validators/#fileextensionvalidator
Warning
Don’t rely on validation of the file extension to determine a file’s
type. Files can be renamed to have any extension no matter what data
they contain.

I think you would be best suited using the ExtFileField that Dominic Rodger specified in his answer and python-magic that Daniel Quinn mentioned is the best way to go. If someone is smart enough to change the extension at least you will catch them with the headers.

You can define a list of accepted mime types in settings and then define a validator which uses python-magic to detect the mime-type and raises ValidationError if the mime-type is not accepted. Set that validator on the file form field.
The only problem is that sometimes the mime type is application/octet-stream, which could correspond to different file formats. Did someone of you overcome this issue?

Additionally i Will extend this class with some extra behaviour.
class ContentTypeRestrictedFileField(forms.FileField):
...
widget = None
...
def __init__(self, *args, **kwargs):
...
self.widget = forms.ClearableFileInput(attrs={'accept':kwargs.pop('accept', None)})
super(ContentTypeRestrictedFileField, self).__init__(*args, **kwargs)
When we create instance with param accept=".pdf,.txt", in popup with file structure as a default we will see files with passed extension.

Just a minor tweak to #Thismatters answer since I can't comment. According to the README of python-magic:
recommend using at least the first 2048 bytes, as less can produce incorrect identification
So changing 1024 bytes to 2048 to read the contents of the file and get the mime type base from that can give the most accurate result, hence:
def validate_extension(file):
valid_mime_types = ["application/pdf", "image/jpeg", "image/png", "image/jpg"]
file_mime_type = magic.from_buffer(file.read(2048), mime=True) # Changed this to 1024 to 2048
if file_mime_type not in valid_mime_types:
raise ValidationError("Unsupported file type.")
valid_file_extensions = [".pdf", ".jpeg", ".png", ".jpg"]
ext = os.path.splitext(file.name)[1]
if ext.lower() not in valid_file_extensions:
raise ValidationError("Unacceptable file extension.")

Related

how to unit test file upload in django

In my django app, I have a view which accomplishes file upload.The core snippet is like this
...
if (request.method == 'POST'):
if request.FILES.has_key('file'):
file = request.FILES['file']
with open(settings.destfolder+'/%s' % file.name, 'wb+') as dest:
for chunk in file.chunks():
dest.write(chunk)
I would like to unit test the view.I am planning to test the happy path as well as the fail path..ie,the case where the request.FILES has no key 'file' , case where request.FILES['file'] has None..
How do I set up the post data for the happy path?Can somebody tell me?
I used to do the same with open('some_file.txt') as fp: but then I needed images, videos and other real files in the repo and also I was testing a part of a Django core component that is well tested, so currently this is what I have been doing:
from django.core.files.uploadedfile import SimpleUploadedFile
def test_upload_video(self):
video = SimpleUploadedFile("file.mp4", "file_content", content_type="video/mp4")
self.client.post(reverse('app:some_view'), {'video': video})
# some important assertions ...
In Python 3.5+ you need to use bytes object instead of str. Change "file_content" to b"file_content"
It's been working fine, SimpleUploadedFile creates an InMemoryFile that behaves like a regular upload and you can pick the name, content and content type.
From Django docs on Client.post:
Submitting files is a special case. To POST a file, you need only
provide the file field name as a key, and a file handle to the file
you wish to upload as a value. For example:
c = Client()
with open('wishlist.doc') as fp:
c.post('/customers/wishes/', {'name': 'fred', 'attachment': fp})
I recommend you to take a look at Django RequestFactory. It's the best way to mock data provided in the request.
Said that, I found several flaws in your code.
"unit" testing means to test just one "unit" of functionality. So,
if you want to test that view you'd be testing the view, and the file
system, ergo, not really unit test. To make this point more clear. If
you run that test, and the view works fine, but you don't have
permissions to save that file, your test would fail because of that.
Other important thing is test speed. If you're doing something like
TDD the speed of execution of your tests is really important.
Accessing any I/O is not a good idea.
So, I recommend you to refactor your view to use a function like:
def upload_file_to_location(request, location=None): # Can use the default configured
And do some mocking on that. You can use Python Mock.
PS: You could also use Django Test Client But that would mean that you're adding another thing more to test, because that client make use of Sessions, middlewares, etc. Nothing similar to Unit Testing.
I do something like this for my own event related application but you should have more than enough code to get on with your own use case
import tempfile, csv, os
class UploadPaperTest(TestCase):
def generate_file(self):
try:
myfile = open('test.csv', 'wb')
wr = csv.writer(myfile)
wr.writerow(('Paper ID','Paper Title', 'Authors'))
wr.writerow(('1','Title1', 'Author1'))
wr.writerow(('2','Title2', 'Author2'))
wr.writerow(('3','Title3', 'Author3'))
finally:
myfile.close()
return myfile
def setUp(self):
self.user = create_fuser()
self.profile = ProfileFactory(user=self.user)
self.event = EventFactory()
self.client = Client()
self.module = ModuleFactory()
self.event_module = EventModule.objects.get_or_create(event=self.event,
module=self.module)[0]
add_to_admin(self.event, self.user)
def test_paper_upload(self):
response = self.client.login(username=self.user.email, password='foz')
self.assertTrue(response)
myfile = self.generate_file()
file_path = myfile.name
f = open(file_path, "r")
url = reverse('registration_upload_papers', args=[self.event.slug])
# post wrong data type
post_data = {'uploaded_file': i}
response = self.client.post(url, post_data)
self.assertContains(response, 'File type is not supported.')
post_data['uploaded_file'] = f
response = self.client.post(url, post_data)
import_file = SubmissionImportFile.objects.all()[0]
self.assertEqual(SubmissionImportFile.objects.all().count(), 1)
#self.assertEqual(import_file.uploaded_file.name, 'files/registration/{0}'.format(file_path))
os.remove(myfile.name)
file_path = import_file.uploaded_file.path
os.remove(file_path)
I did something like that :
from django.core.files.uploadedfile import SimpleUploadedFile
from django.test import TestCase
from django.core.urlresolvers import reverse
from django.core.files import File
from django.utils.six import BytesIO
from .forms import UploadImageForm
from PIL import Image
from io import StringIO
def create_image(storage, filename, size=(100, 100), image_mode='RGB', image_format='PNG'):
"""
Generate a test image, returning the filename that it was saved as.
If ``storage`` is ``None``, the BytesIO containing the image data
will be passed instead.
"""
data = BytesIO()
Image.new(image_mode, size).save(data, image_format)
data.seek(0)
if not storage:
return data
image_file = ContentFile(data.read())
return storage.save(filename, image_file)
class UploadImageTests(TestCase):
def setUp(self):
super(UploadImageTests, self).setUp()
def test_valid_form(self):
'''
valid post data should redirect
The expected behavior is to show the image
'''
url = reverse('image')
avatar = create_image(None, 'avatar.png')
avatar_file = SimpleUploadedFile('front.png', avatar.getvalue())
data = {'image': avatar_file}
response = self.client.post(url, data, follow=True)
image_src = response.context.get('image_src')
self.assertEquals(response.status_code, 200)
self.assertTrue(image_src)
self.assertTemplateUsed('content_upload/result_image.html')
create_image function will create image so you don't need to give static path of image.
Note : You can update code as per you code.
This code for Python 3.6.
from rest_framework.test import force_authenticate
from rest_framework.test import APIRequestFactory
factory = APIRequestFactory()
user = User.objects.get(username='#####')
view = <your_view_name>.as_view()
with open('<file_name>.pdf', 'rb') as fp:
request=factory.post('<url_path>',{'file_name':fp})
force_authenticate(request, user)
response = view(request)
As mentioned in Django's official documentation:
Submitting files is a special case. To POST a file, you need only provide the file field name as a key, and a file handle to the file you wish to upload as a value. For example:
c = Client()
with open('wishlist.doc') as fp:
c.post('/customers/wishes/', {'name': 'fred', 'attachment': fp})
More Information: How to check if the file is passed as an argument to some function?
While testing, sometimes we want to make sure that the file is passed as an argument to some function.
e.g.
...
class AnyView(CreateView):
...
def post(self, request, *args, **kwargs):
attachment = request.FILES['attachment']
# pass the file as an argument
my_function(attachment)
...
In tests, use Python's mock something like this:
# Mock 'my_function' and then check the following:
response = do_a_post_request()
self.assertEqual(mock_my_function.call_count, 1)
self.assertEqual(
mock_my_function.call_args,
call(response.wsgi_request.FILES['attachment']),
)
if you want to add other data with file upload then follow the below method
file = open('path/to/file.txt', 'r', encoding='utf-8')
data = {
'file_name_to_receive_on_backend': file,
'param1': 1,
'param2': 2,
.
.
}
response = self.client.post("/url/to/view", data, format='multipart')`
The only file_name_to_receive_on_backend will be received as a file other params received normally as post paramas.
In Django 1.7 there's an issue with the TestCase wich can be resolved by using open(filepath, 'rb') but when using the test client we have no control over it. I think it's probably best to ensure file.read() returns always bytes.
source: https://code.djangoproject.com/ticket/23912, by KevinEtienne
Without rb option, a TypeError is raised:
TypeError: sequence item 4: expected bytes, bytearray, or an object with the buffer interface, str found
from django.test import Client
from requests import Response
client = Client()
with open(template_path, 'rb') as f:
file = SimpleUploadedFile('Name of the django file', f.read())
response: Response = client.post(url, format='multipart', data={'file': file})
Hope this helps.
Very handy solution with mock
from django.test import TestCase, override_settings
#use your own client request factory
from my_framework.test import APIClient
from django.core.files import File
import tempfile
from pathlib import Path
import mock
image_mock = mock.MagicMock(spec=File)
image_mock.name = 'image.png' # or smt else
class MyTest(TestCase):
# I assume we want to put this file in storage
# so to avoid putting garbage in our MEDIA_ROOT
# we're using temporary storage for test purposes
#override_settings(MEDIA_ROOT=Path(tempfile.gettempdir()))
def test_send_file(self):
client = APIClient()
client.post(
'/endpoint/'
{'file':image_mock},
format="multipart"
)
I am using Python==3.8.2 , Django==3.0.4, djangorestframework==3.11.0
I tried self.client.post but got a Resolver404 exception.
Following worked for me:
import requests
upload_url='www.some.com/oaisjdoasjd' # your url to upload
with open('/home/xyz/video1.webm', 'rb') as video_file:
# if it was a text file we would perhaps do
# file = video_file.read()
response_upload = requests.put(
upload_url,
data=video_file,
headers={'content-type': 'video/webm'}
)
I am using django rest framework and I had to test the upload of multiple files.
I finally get it by using format="multipart" in my APIClient.post request.
from rest_framework.test import APIClient
...
self.client = APIClient()
with open('./photo.jpg', 'rb') as fp:
resp = self.client.post('/upload/',
{'images': [fp]},
format="multipart")
I am using GraphQL, upload for test:
with open('test.jpg', 'rb') as fp:
response = self.client.execute(query, variables, data={'image': [fp]})
code in class mutation
#classmethod
def mutate(cls, root, info, **kwargs):
if image := info.context.FILES.get("image", None):
kwargs["image"] = image
TestingMainModel.objects.get_or_create(
id=kwargs["id"],
defaults=kwargs
)

How does one use magic to verify file type in a Django form clean method?

I have written an email form class in Django with a FileField. I want to check the uploaded file for its type via checking its mimetype. Subsequently, I want to limit file types to pdfs, word, and open office documents.
To this end, I have installed python-magic and would like to check file types as follows per the specs for python-magic:
mime = magic.Magic(mime=True)
file_mime_type = mime.from_file('address/of/file.txt')
However, recently uploaded files lack addresses on my server. I also do not know of any method of the mime object akin to "from_file_content" that checks for the mime type given the content of the file.
What is an effective way to use magic to verify file types of uploaded files in Django forms?
Stan described good variant with buffer. Unfortunately the weakness of this method is reading file to the memory. Another option is using temporary stored file:
import tempfile
import magic
with tempfile.NamedTemporaryFile() as tmp:
for chunk in form.cleaned_data['file'].chunks():
tmp.write(chunk)
print(magic.from_file(tmp.name, mime=True))
Also, you might want to check the file size:
if form.cleaned_data['file'].size < ...:
print(magic.from_buffer(form.cleaned_data['file'].read()))
else:
# store to disk (the code above)
Additionally:
Whether the name can be used to open the file a second time, while the named temporary file is still open, varies across platforms (it can be so used on Unix; it cannot on Windows NT or later).
So you might want to handle it like so:
import os
tmp = tempfile.NamedTemporaryFile(delete=False)
try:
for chunk in form.cleaned_data['file'].chunks():
tmp.write(chunk)
print(magic.from_file(tmp.name, mime=True))
finally:
os.unlink(tmp.name)
tmp.close()
Also, you might want to seek(0) after read():
if hasattr(f, 'seek') and callable(f.seek):
f.seek(0)
Where uploaded data is stored
Why no trying something like that in your view :
m = magic.Magic()
m.from_buffer(request.FILES['my_file_field'].read())
Or use request.FILES in place of form.cleaned_data if django.forms.Form is really not an option.
mime = magic.Magic(mime=True)
attachment = form.cleaned_data['attachment']
if hasattr(attachment, 'temporary_file_path'):
# file is temporary on the disk, so we can get full path of it.
mime_type = mime.from_file(attachment.temporary_file_path())
else:
# file is on the memory
mime_type = mime.from_buffer(attachment.read())
Also, you might want to seek(0) after read():
if hasattr(f, 'seek') and callable(f.seek):
f.seek(0)
Example from Django code. Performed for image fields during validation.
You can use django-safe-filefield package to validate that uploaded file extension match it MIME-type.
from safe_filefield.forms import SafeFileField
class MyForm(forms.Form):
attachment = SafeFileField(
allowed_extensions=('xls', 'xlsx', 'csv')
)
In case you're handling a file upload and concerned only about images,
Django will set content_type for you (or rather for itself?):
from django.forms import ModelForm
from django.core.files import File
from django.db import models
class MyPhoto(models.Model):
photo = models.ImageField(upload_to=photo_upload_to, max_length=1000)
class MyForm(ModelForm):
class Meta:
model = MyPhoto
fields = ['photo']
photo = MyPhoto.objects.first()
photo = File(open('1.jpeg', 'rb'))
form = MyForm(files={'photo': photo})
if form.is_valid():
print(form.instance.photo.file.content_type)
It doesn't rely on content type provided by the user. But
django.db.models.fields.files.FieldFile.file is an undocumented
property.
Actually, initially content_type is set from the request, but when
the form gets validated, the value is updated.
Regarding non-images, doing request.FILES['name'].read() seems okay to me.
First, that's what Django does. Second, files larger than 2.5 Mb by default
are stored on a disk. So let me point you at the other answer
here.
For the curious, here's the stack trace that leads to updating
content_type:
django.forms.forms.BaseForm.is_valid: self.errors
django.forms.forms.BaseForm.errors: self.full_clean()
django.forms.forms.BaseForm.full_clean: self._clean_fields()
django.forms.forms.BaseForm._clean_fiels: field.clean()
django.forms.fields.FileField.clean: super().clean()
django.forms.fields.Field.clean: self.to_python()
django.forms.fields.ImageField.to_python

How to limit file types on file uploads for ModelForms with FileFields?

My goal is to limit a FileField on a Django ModelForm to PDFs and Word Documents. The answers I have googled all deal with creating a separate file handler, but I am not sure how to do so in the context of a ModelForm. Is there a setting in settings.py I may use to limit upload file types?
Create a validation method like:
def validate_file_extension(value):
if not value.name.endswith('.pdf'):
raise ValidationError(u'Error message')
and include it on the FileField validators like this:
actual_file = models.FileField(upload_to='uploaded_files', validators=[validate_file_extension])
Also, instead of manually setting which extensions your model allows, you should create a list on your setting.py and iterate over it.
Edit
To filter for multiple files:
def validate_file_extension(value):
import os
ext = os.path.splitext(value.name)[1]
valid_extensions = ['.pdf','.doc','.docx']
if not ext in valid_extensions:
raise ValidationError(u'File not supported!')
Validating with the extension of a file name is not a consistent way. For example I can rename a picture.jpg into a picture.pdf and the validation won't raise an error.
A better approach is to check the content_type of a file.
Validation Method
def validate_file_extension(value):
if value.file.content_type != 'application/pdf':
raise ValidationError(u'Error message')
Usage
actual_file = models.FileField(upload_to='uploaded_files', validators=[validate_file_extension])
An easier way of doing it is as below in your Form
file = forms.FileField(widget=forms.FileInput(attrs={'accept':'application/pdf'}))
Django since 1.11 has a FileExtensionValidator for this purpose:
class SomeDocument(Model):
document = models.FileFiled(validators=[
FileExtensionValidator(allowed_extensions=['pdf', 'doc'])])
As #savp mentioned, you will also want to customize the widget so that users can't select inappropriate files in the first place:
class SomeDocumentForm(ModelForm):
class Meta:
model = SomeDocument
widgets = {'document': FileInput(attrs={'accept': 'application/pdf,application/msword'})}
fields = '__all__'
You may need to fiddle with accept to figure out exactly what MIME types are needed for your purposes.
As others have mentioned, none of this will prevent someone from renaming badstuff.exe to innocent.pdf and uploading it through your form—you will still need to handle the uploaded file safely. Something like the python-magic library can help you determine the actual file type once you have the contents.
For a more generic use, I wrote a small class ExtensionValidator that extends Django's built-in RegexValidator. It accepts single or multiple extensions, as well as an optional custom error message.
class ExtensionValidator(RegexValidator):
def __init__(self, extensions, message=None):
if not hasattr(extensions, '__iter__'):
extensions = [extensions]
regex = '\.(%s)$' % '|'.join(extensions)
if message is None:
message = 'File type not supported. Accepted types are: %s.' % ', '.join(extensions)
super(ExtensionValidator, self).__init__(regex, message)
def __call__(self, value):
super(ExtensionValidator, self).__call__(value.name)
Now you can define a validator inline with the field, e.g.:
my_file = models.FileField('My file', validators=[ExtensionValidator(['pdf', 'doc', 'docx'])])
I use something along these lines (note, "pip install filemagic" is required for this...):
import magic
def validate_mime_type(value):
supported_types=['application/pdf',]
with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
mime_type=m.id_buffer(value.file.read(1024))
value.file.seek(0)
if mime_type not in supported_types:
raise ValidationError(u'Unsupported file type.')
You could probably also incorporate the previous examples into this - for example also check the extension/uploaded type (which might be faster as a primary check than magic.) This still isn't foolproof - but it's better, since it relies more on data in the file, rather than browser provided headers.
Note: This is a validator function that you'd want to add to the list of validators for the FileField model.
I find that the best way to check the type of a file is by checking its content type. I will also add that one the best place to do type checking is in form validation. I would have a form and a validation as follows:
class UploadFileForm(forms.Form):
file = forms.FileField()
def clean_file(self):
data = self.cleaned_data['file']
# check if the content type is what we expect
content_type = data.content_type
if content_type == 'application/pdf':
return data
else:
raise ValidationError(_('Invalid content type'))
The following documentation links can be helpful:
https://docs.djangoproject.com/en/3.1/ref/files/uploads/ and https://docs.djangoproject.com/en/3.1/ref/forms/validation/
I handle this by using a clean_[your_field] method on a ModelForm. You could set a list of acceptable file extensions in settings.py to check against in your clean method, but there's nothing built-into settings.py to limit upload types.
Django-Filebrowser, for example, takes the approach of creating a list of acceptable file extensions in settings.py.
Hope that helps you out.

django: registering unzipped files on the local disk

[I apologize in advance for the length of this question.]
I'm using Django 1.2.3-3+squeeze1 on Debian squeeze.
I am writing an application which uploads zip files to disk in a
temporary location, unzips them, and then saves the results to a
permanent location. The unzipped files are registered in the database as a class called
FileUpload after they are unzipped. The uploaded zipped files also correspond to a class,
but I'll ignore that for the purposes of this question. FileUpload looks like this.
class FileUpload(models.Model):
folder = models.ForeignKey(FolderUpload, null=True, blank=True, related_name='parentfolder')
upload_date = models.DateTimeField(default=datetime.now(), blank=True, editable=False)
upload = models.FileField(upload_to=file_upload_path)
name = models.CharField(max_length=100)
description = models.CharField(blank=True, max_length=200)
def save(self):
if not self.id:
if self.folder == None:
pass
else:
self.path = self.folder.path
super(FileUpload, self).save()
I'm also using a form defined by
from django.forms import ChoiceField, Form, ModelForm
class FileUploadForm(ModelForm):
class Meta:
model = FileUpload
The function that takes the unzipped files on the disk. registers them
with the database, and moves them to the correct place is called
addFile. I was previously using this:
def addFile(name, filename, description, content, folder_id=None):
#f = open(filename, 'r')
#content = f.read()
from forms import FileUploadForm
from django.core.files.uploadedfile import SimpleUploadedFile
if folder_id == None:
data = {'name':name, 'description':description}
else:
data = {'name':name, 'description':description, 'folder':str(folder_id)}
file_data = {'upload': SimpleUploadedFile(filename, content)}
ff = FileUploadForm(data, file_data)
try:
zf = ff.save(commit=False)
zf.save()
except:
raise RuntimeError, "Form error is %s."%(ff.errors)
return zf
This worked, but the problem was that it dumped the entire file into
memory. With large files, and especially given Python isn't known for
it's memory economy, this consumed huge amounts of memory. So I
switched to this:
from django.core.files.uploadedfile import UploadedFile
class UnzippedFile(UploadedFile):
def __init__(self, file, filepath, content_type='text/plain', charset=None):
import os
self.filepath = filepath
self.name = os.path.basename(filepath)
self.size = os.path.getsize(filepath)
self.content_type = content_type
self.charset = charset
super(UnzippedFile, self).__init__(file, self.name, content_type, self.size, charset)
def temporary_file_path(self):
"""
Returns the full path of this file.
"""
return self.filepath
def addFile(filepath, description, file, folder_id=None):
import os, sys
from forms import FileUploadForm
from django.core.files.uploadedfile import UploadedFile
name = os.path.basename(filepath)
if folder_id == None:
data = {'name':name, 'description':description}
else:
data = {'name':name, 'description':description, 'folder':str(folder_id)}
file_data = {'upload': UnzippedFile(file, filepath)}
ff = FileUploadForm(data, file_data)
try:
zf = ff.save(commit=False)
zf.save()
except:
raise
return zf
I was forced to subclass UploadedFile, since none of the derived
classes that were already there (in
django/core/files/uploadedfile.py) seemed to do what I wanted.
The temporary_file_path function is there because the Django File Uploads docs say
UploadedFile.temporary_file_path()
Only files uploaded onto disk will have this method; it returns the
full path to the temporary uploaded file.
It seems the FileSystemStorage class looks for this attribute in
_save function as described later.
If n is the relative path of the file in the zip archive, then the
usage is
name = os.path.normpath(os.path.basename(n)) # name of file
pathname = os.path.join(dirname, n) # full file path
description = "from zip file '" + zipfilename + "'" # `zipfilename` is the name of the zip file
fname = open(pathname) # file handle
f = addFile(pathname, description, fname)
This works, but I traced through the code, and found that the code was
using streaming, when clearly the optimal thing to do in this case
would be to just copy the file from the temporary location to the
permanent location. The code in question is in
django/core/files/storage.py, in the _save function of the
FileSystemStorage class. In _save, name is the relative path of
the destination, and content is a File object.
def _save(self, name, content):
full_path = self.path(name)
directory = os.path.dirname(full_path)
[...]
while True:
try:
# This file has a file path that we can move.
if hasattr(content, 'temporary_file_path'):
file_move_safe(content.temporary_file_path(), full_path)
content.close()
# This is a normal uploadedfile that we can stream.
else:
# This fun binary flag incantation makes os.open throw an
# OSError if the file already exists before we open it.
fd = os.open(full_path, os.O_WRONLY | os.O_CREAT | os.O_EXCL | getattr(os, 'O_BINARY', 0))
try:
locks.lock(fd, locks.LOCK_EX)
for chunk in content.chunks():
os.write(fd, chunk)
finally:
locks.unlock(fd)
os.close(fd)
except OSError, e:
if e.errno == errno.EEXIST:
# Ooops, the file exists. We need a new file name.
name = self.get_available_name(name)
full_path = self.path(name)
else:
raise
else:
# OK, the file save worked. Break out of the loop.
break
The _save function is looking for the attribute
temporary_file_path. I believe this code is intended to be triggered
by the temporary_file_path function mentioned earlier in
django/core/files/uploadedfile.py. However, the class that is
actually passed (corresponding to the content argument) is <class
'django.db.models.fields.files.FieldFile'>, and here is what the
attribute dict (content.__dict__) for this object looks like:
{'_committed': False, 'name': u'foo', 'instance': <FileUpload: foo>,
'_file': <UnzippedFile: foo (text/plain)>, 'storage':<django.core.files.storage.DefaultStorage object at 0x9a70ccc>,
'field': <django.db.models.fields.files.FileField object at0x9ce9b4c>, 'mode': None}
The temporary_file_path is attached to the
UnzippedFile class, which is inside the _file data member. So
content._file has a temporary_file_path attribute, not content
itself.
This is what a regular file upload looks like. As you can see, it is similar.
[Fri Jun 17 08:05:33 2011] [error] type of content is <class 'django.db.models.fields.files.FieldFile'>
[Fri Jun 17 08:05:33 2011] [error] {'_committed': False, 'name': u'behavior.py',
'instance': <FileUpload: b>, '_file': <TemporaryUploadedFile: behavior.py (text/x-python)>,
'storage': <django.core.files.storage.DefaultStorage object at 0xb8d7fd8c>,
'field': <django.db.models.fields.files.FileField object at 0xb8eb584c>, 'mode': None}
It is difficult for me to follow in any detail how the code gets from
the FileUploadForm save to the Storage object. The Django form
code in particular is quite obscure.
Anyway, my question, after all this setup is, how/when is the first
option below, with file_move_safe supposed to be activated? I'm
seeing a mismatch here. Is this a bug? Can anyone clarify?
if hasattr(content, 'temporary_file_path')
The above will never equal true with this conditional since you state that content does not have a temporary_file_path identifier. However since content._file does you can use the following to get the functionality you are looking for
if hasattr(content, '_file'):
if hasattr(content._file,'temporary_file_path'):

Django with Pluggable MongoDB Storage troubles

I'm trying to use django, and mongoengine to provide the storage backend only with GridFS. I still have a MySQL database.
I'm running into a strange (to me) error when I'm deleting from the django admin and am wondering if I am doing something incorrectly.
my code looks like this:
# settings.py
from mongoengine import connect
connect("mongo_storage")
# models.py
from mongoengine.django.storage import GridFSStorage
class MyFile(models.Model):
name = models.CharField(max_length=50)
content = models.FileField(upload_to="appsfiles", storage=GridFSStorage())
creation_time = models.DateTimeField(auto_now_add=True)
last_update_time = models.DateTimeField(auto_now=True)
I am able to upload files just fine, but when I delete them, something seems to break and the mongo database seems to get in an unworkable state until I manually delete all FileDocument.objects. When this happens I can't upload files or delete them from the django interface.
From the stack trace I have:
/home/projects/vector/src/mongoengine/django/storage.py in _get_doc_with_name
doc = [d for d in docs if getattr(d, self.field).name == name] ...
▼ Local vars
Variable Value
_[1]
[]
d
docs
Error in formatting: cannot set options after executing query
name
u'testfile.pdf'
self
/home/projects/vector/src/mongoengine/fields.py in __getattr__
raise AttributeError
Am I using this feature incorrectly?
UPDATE:
thanks to #zeekay's answer I was able to get a working gridfs storage plugin to work. I ended up not using mongoengine at all. I put my adapted solution on github. There is a clear sample project showing how to use it. I also uploaded the project to pypi.
Another Update:
I'd highly recommend the django-storages project. It has lots of storage backed options and is used by many more people than my original proposed solution.
I think you are better off not using MongoEngine for this, I haven't had much luck with it either. Here is a drop-in replacement for mongoengine.django.storage.GridFSStorage, which works with the admin.
from django.core.files.storage import Storage
from django.conf import settings
from pymongo import Connection
from gridfs import GridFS
class GridFSStorage(Storage):
def __init__(self, host='localhost', port=27017, collection='fs'):
for s in ('host', 'port', 'collection'):
name = 'GRIDFS_' + s.upper()
if hasattr(settings, name):
setattr(self, s, getattr(settings, name))
for s, v in zip(('host', 'port', 'collection'), (host, port, collection)):
if v:
setattr(self, s, v)
self.db = Connection(host=self.host, port=self.port)[self.collection]
self.fs = GridFS(self.db)
def _save(self, name, content):
self.fs.put(content, filename=name)
return name
def _open(self, name, *args, **kwars):
return self.fs.get_last_version(filename=name)
def delete(self, name):
oid = fs.get_last_version(filename=name)._id
self.fs.delete(oid)
def exists(self, name):
return self.fs.exists({'filename': name})
def size(self, name):
return self.fs.get_last_version(filename=name).length
GRIDFS_HOST, GRIDFS_PORT and GRIDFS_COLLECTION can be defined in your settings or passed as host, port, collection keyword arguments to GridFSStorage in your model's FileField.
I referred to Django's custom storage documenation, and loosely followed this answer to a similar question.