I want to upload a CSV file in the admin that adds information to a model. In case you can live with a normal form and not a extension of change_form.html and not overwriting response_change (I tried that fist) this is how it can be done:
from django.core.files.storage import default_storage
from django.core.files.base import ContentFile
class StoreAdminForm(forms.ModelForm):
## add an extra field:
upfile = forms.FileField()
class Meta:
model = Store
fields = "__all__"
def clean(self):
cleaned_data = super(StoreAdminForm, self).clean()
if "upfile" in self.changed_data:
### file validation on file type etc here ..
## file is valid:
## next lines deal with the InMemoryUploadedFile Type
path = settings.MEDIA_ROOT.joinpath("___tmp___")
tmp = default_storage.save(path, ContentFile(cleaned_data["upfile"].read()))
## open file
with open(tmp_file, encoding = "utf8") as f:
data = f.readlines()
## ...
I hope this helps everyone, I lost some time with not knowing how to deal with the InMemoryUploadedFile types.
I'm trying to create a way to upload xlsx files and then use celery to perform some actions.
I'm thinking this:
A view to upload the file and save it temporarily
Use celery to execute what I want in the file and then delete it.
I'm trying to do something like this:
class ImportMyFileView(APIView):
parser_classes = (FileUploadParser, )
def post(self, request, filename, format=None):
my_file = request.data["file"]
with open(f"/tmp/{my_file.name}", "wb+") as destination:
for chunk in my_file.chunks():
destination.write(chunk)
# call_celery_here()
...
Return something
I can generate the file where I want, but the problem is that when I open xlsx. I get this here:
--X-INSOMNIA-BOUNDARY
Content-Disposition: form-data
Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
PK<q^Q_rels/.rels���J1��}��{w�Dd���ЛH}���a7�0u}{�Z���I~��7C��f�G�Fo�Z+���{�����kW�#�VJ$cʪ��l� �n�0�\Q�X^:�`���d�d{�m]_�d����h��V����F�w�^F9��W��-�(F/3�O�DSU�N�l/w�{N(�[��q��T����u<��r�?焮�s9�F����M��h���'h?PKf����
Is there any detail missing?
Here is how I would do it, relying on DRF's built in functionality:
import os
from rest_framework import serializers
from django.core.files.storage import FileSystemStorage
class UploadSerializer(serializers.Serializer):
file = serializers.FileField()
class UploadView(APIView):
...
def post(self, request):
ser = UploadSerializer(request.data)
ser.is_valid(raise_exception=True)
fs = FileSystemStorage(tempfile.gettempdir())
file_name = fs.save(content=ser.validated_data['file'])
full_path = os.path.join(fs.location, file_name)
celery_func.delay(file_name=full_path)
return Response("Upload OK")
A more robust way to do this would be to create a model representing your uploads to be processed, and use the django model's FileField.
class Todo(models.Model):
xlsx_file = models.FileField(...) # read the docs on this
created_at = models.DateTimeField(auto_now_add=True)
is_complete = models.BooleanField(default=False)
class UploadView(APIView):
def post(self, request):
...
todo = Todo.objects.create(
xslx_file = ser.validated_data['file']
)
celery_func.delay(todo_id=todo.pk)
return Response("Upload OK")
Once this works you can look into using a ModelSerializer either alone, or paired with a ModelViewSet. Thats a bigger learning curve though.
I wanted to upload files with limited extension on imagefield. So, how can I validate my image field for jpg,bmp and gif only.
Also, which extension does image field take in by default?
This is how I do it:
from django.utils.image import Image
# settings.py
# ALLOWED_UPLOAD_IMAGES = ('gif', 'bmp', 'jpeg')
class ImageForm(forms.Form):
image = forms.ImageField()
def clean_image(self):
image = self.cleaned_data["image"]
# This won't raise an exception since it was validated by ImageField.
im = Image.open(image)
if im.format.lower() not in settings.ALLOWED_UPLOAD_IMAGES:
raise forms.ValidationError(_("Unsupported file format. Supported formats are %s."
% ", ".join(settings.ALLOWED_UPLOAD_IMAGES)))
image.seek(0)
return image
Works for ModelForm as well.
Unit test:
from StringIO import StringIO
from django.core.files.uploadedfile import SimpleUploadedFile
from django.test.utils import override_settings
from django.test import TestCase
class ImageFormTest(TestCase):
def test_image_upload(self):
"""
Image upload
"""
content = 'GIF87a\x01\x00\x01\x00\x80\x01\x00\x00\x00\x00ccc,\x00' \
'\x00\x00\x00\x01\x00\x01\x00\x00\x02\x02D\x01\x00;'
image = StringIO(content)
image.name = 'image.gif'
image.content_type = 'image/gif'
files = {'image': SimpleUploadedFile(image.name, image.read()), }
form = ImageForm(data={}, files=files)
self.assertTrue(form.is_valid())
#override_settings(ALLOWED_UPLOAD_IMAGES=['png', ])
def test_image_upload_not_allowed_format(self):
image = StringIO('GIF87a\x01\x00\x01\x00\x80\x01\x00\x00\x00\x00ccc,\x00'
'\x00\x00\x00\x01\x00\x01\x00\x00\x02\x02D\x01\x00;')
image.name = 'image'
files = {'image': SimpleUploadedFile(image.name, image.read()), }
form = ImageForm(data={}, files=files)
self.assertFalse(form.is_valid())
Pillow will allow a bunch of image formats
I have an app that lets people upload files, represented as UploadedFiles. However, I want to make sure that users only upload xml files. I know I can do this using magic, but I don't know where to put this check - I can't put it in the clean function since the file is not yet uploaded when clean runs, as far as I can tell.
Here's the UploadedFile model:
class UploadedFile(models.Model):
"""This represents a file that has been uploaded to the server."""
STATE_UPLOADED = 0
STATE_ANNOTATED = 1
STATE_PROCESSING = 2
STATE_PROCESSED = 4
STATES = (
(STATE_UPLOADED, "Uploaded"),
(STATE_ANNOTATED, "Annotated"),
(STATE_PROCESSING, "Processing"),
(STATE_PROCESSED, "Processed"),
)
status = models.SmallIntegerField(choices=STATES,
default=0, blank=True, null=True)
file = models.FileField(upload_to=settings.XML_ROOT)
project = models.ForeignKey(Project)
def __unicode__(self):
return self.file.name
def name(self):
return os.path.basename(self.file.name)
def save(self, *args, **kwargs):
if not self.status:
self.status = self.STATE_UPLOADED
super(UploadedFile, self).save(*args, **kwargs)
def delete(self, *args, **kwargs):
os.remove(self.file.path)
self.file.delete(False)
super(UploadedFile, self).delete(*args, **kwargs)
def get_absolute_url(self):
return u'/upload/projects/%d' % self.id
def clean(self):
if not "XML" in magic.from_file(self.file.url):
raise ValidationError(u'Not an xml file.')
class UploadedFileForm(forms.ModelForm):
class Meta:
model = UploadedFile
exclude = ('project',)
Validating files is a common challenge, so I would like to use a validator:
import magic
from django.utils.deconstruct import deconstructible
from django.template.defaultfilters import filesizeformat
#deconstructible
class FileValidator(object):
error_messages = {
'max_size': ("Ensure this file size is not greater than %(max_size)s."
" Your file size is %(size)s."),
'min_size': ("Ensure this file size is not less than %(min_size)s. "
"Your file size is %(size)s."),
'content_type': "Files of type %(content_type)s are not supported.",
}
def __init__(self, max_size=None, min_size=None, content_types=()):
self.max_size = max_size
self.min_size = min_size
self.content_types = content_types
def __call__(self, data):
if self.max_size is not None and data.size > self.max_size:
params = {
'max_size': filesizeformat(self.max_size),
'size': filesizeformat(data.size),
}
raise ValidationError(self.error_messages['max_size'],
'max_size', params)
if self.min_size is not None and data.size < self.min_size:
params = {
'min_size': filesizeformat(self.min_size),
'size': filesizeformat(data.size)
}
raise ValidationError(self.error_messages['min_size'],
'min_size', params)
if self.content_types:
content_type = magic.from_buffer(data.read(), mime=True)
data.seek(0)
if content_type not in self.content_types:
params = { 'content_type': content_type }
raise ValidationError(self.error_messages['content_type'],
'content_type', params)
def __eq__(self, other):
return (
isinstance(other, FileValidator) and
self.max_size == other.max_size and
self.min_size == other.min_size and
self.content_types == other.content_types
)
Then you can use FileValidator in your models.FileField or forms.FileField as follows:
validate_file = FileValidator(max_size=1024 * 100,
content_types=('application/xml',))
file = models.FileField(upload_to=settings.XML_ROOT,
validators=[validate_file])
From django 1.11, you can also use FileExtensionValidator.
from django.core.validators import FileExtensionValidator
class UploadedFile(models.Model):
file = models.FileField(upload_to=settings.XML_ROOT,
validators=[FileExtensionValidator(allowed_extensions=['xml'])])
Note this must be used on a FileField and won't work on a CharField (for example), since the validator validates on value.name.
ref: https://docs.djangoproject.com/en/dev/ref/validators/#fileextensionvalidator
For posterity: the solution is to use the read method and pass that to magic.from_buffer.
class UploadedFileForm(ModelForm):
def clean_file(self):
file = self.cleaned_data.get("file", False)
filetype = magic.from_buffer(file.read())
if not "XML" in filetype:
raise ValidationError("File is not XML.")
return file
class Meta:
model = models.UploadedFile
exclude = ('project',)
I think what you want to do is to clean the uploaded file in Django's Form.clean_your_field_name_here() methods - the data is available on your system by then if it was submitted as normal HTTP POST request.
Also if you consider this inefficient explore the options of different Django file upload backends and how to do streaming processing.
If you need to consider the security of the system when dealing with uploads
Make sure uploaded file has correct extension
Make sure the mimetype matches the file extension
In the case you are worried about user's uploading exploit files (for attacking against your site)
Rewrite all the file contents on save to get rid of possible extra (exploit) payload (so you cannot embed HTML in XML which the browser would interpret as a site-origin HTML file when downloading)
Make sure you use content-disposition header on download
Some more info here: http://opensourcehacker.com/2013/07/31/secure-user-uploads-and-exploiting-served-user-content/
Below is my example how I sanitize the uploaded images:
class Example(models.Model):
image = models.ImageField(upload_to=filename_gen("participant-images/"), blank=True, null=True)
class Example(forms.ModelForm):
def clean_image(self):
""" Clean the uploaded image attachemnt.
"""
image = self.cleaned_data.get('image', False)
utils.ensure_safe_user_image(image)
return image
def ensure_safe_user_image(image):
""" Perform various checks to sanitize user uploaded image data.
Checks that image was valid header, then
:param: InMemoryUploadedFile instance (Django form field value)
:raise: ValidationError in the case the image content has issues
"""
if not image:
return
assert isinstance(image, InMemoryUploadedFile), "Image rewrite has been only tested on in-memory upload backend"
# Make sure the image is not too big, so that PIL trashes the server
if image:
if image._size > 4*1024*1024:
raise ValidationError("Image file too large - the limit is 4 megabytes")
# Then do header peak what the image claims
image.file.seek(0)
mime = magic.from_buffer(image.file.getvalue(), mime=True)
if mime not in ("image/png", "image/jpeg"):
raise ValidationError("Image is not valid. Please upload a JPEG or PNG image.")
doc_type = mime.split("/")[-1].upper()
# Read data from cStringIO instance
image.file.seek(0)
pil_image = Image.open(image.file)
# Rewrite the image contents in the memory
# (bails out with exception on bad data)
buf = StringIO()
pil_image.thumbnail((2048, 2048), Image.ANTIALIAS)
pil_image.save(buf, doc_type)
image.file = buf
# Make sure the image has valid extension (can't upload .htm image)
extension = unicode(doc_type.lower())
if not image.name.endswith(u".%s" % extension):
image.name = image.name + u"." + extension
I found an interesting package who can do upload file validation recently. You can see the package here. the package approach is similar with sultan answer, thus we can just implement it right away.
from upload_validator import FileTypeValidator
validator = FileTypeValidator(
allowed_types=['application/msword'],
allowed_extensions=['.doc', '.docx']
)
file_resource = open('sample.doc')
# ValidationError will be raised in case of invalid type or extension
validator(file_resource)
Is there a way to get the content type of an upload file when overwriting the models save method? I have tried this:
def save(self):
print(self.file.content_type)
super(Media, self).save()
But it did not work. In this example, self.file is a model.FileField:
file = models.FileField(upload_to='uploads/%m-%Y/')
Edit: I want to be able to save the content type to the database, so I'll need it before the save is actually complete :)
class MyForm(forms.ModelForm):
def clean_file(self):
file = self.cleaned_data['file']
try:
if file:
file_type = file.content_type.split('/')[0]
print file_type
if len(file.name.split('.')) == 1:
raise forms.ValidationError(_('File type is not supported'))
if file_type in settings.TASK_UPLOAD_FILE_TYPES:
if file._size > settings.TASK_UPLOAD_FILE_MAX_SIZE:
raise forms.ValidationError(_('Please keep filesize under %s. Current filesize %s') % (filesizeformat(settings.TASK_UPLOAD_FILE_MAX_SIZE), filesizeformat(file._size)))
else:
raise forms.ValidationError(_('File type is not supported'))
except:
pass
return file
settings.py
TASK_UPLOAD_FILE_TYPES = ['pdf', 'vnd.oasis.opendocument.text','vnd.ms-excel','msword','application',]
TASK_UPLOAD_FILE_MAX_SIZE = "5242880"
You can use PIL or magic to read the few first bytes and get the MIME type that way. I wouldn't trust the content_type since anyone can fake an HTTP header.
Magic solution below. For a PIL implementation you can get an idea from django's get_image_dimensions.
import magic
def get_mime_type(file):
"""
Get MIME by reading the header of the file
"""
initial_pos = file.tell()
file.seek(0)
mime_type = magic.from_buffer(file.read(2048), mime=True)
file.seek(initial_pos)
return mime_type
File is the in-memory uploaded file in the view.
I'm using Django Rest Framework and this is the simplest way to determine content type/mime type:
file = request.data.get("file") # type(file) = 'django.core.files.uploadedfile.InMemoryUploadedFile'
print(file.content_type)
Let's say I have uploaded a JPEG image then my output would be:
image/jpeg
Let me know in the comments if this serves your purpose.
Need to override the save method in the model class
def save(self, *args, **kwargs):
if self.file and self.file.file:
try:#Need to add a try catch such that in case a file is not being uploaded, then the mime_type is not assigned
self.mime_type=self.file.file.content_type
except:
pass
Taking an assumption that our model has file column(FileField), and mime_type column (CharField)