I am using python magic to validate a file before uploading so for that I am following the below link:
https://djangosnippets.org/snippets/3039/
validators.py file:
from django.core.exceptions import ValidationError
import magic
class MimetypeValidator(object):
def __init__(self, mimetypes):
self.mimetypes = mimetypes
def __call__(self, value):
try:
mime_byt = magic.from_buffer(value.read(1024), mime=True)
mime = mime_byt.decode(encoding='UTF-8')
if mime not in self.mimetypes:
raise ValidationError('%s is not an acceptable file type' % value)
except AttributeError as e:
raise ValidationError('This value could not be validated for file type' % value)
here is my form.py file:
class FileForm(forms.ModelForm):
file = forms.FileField(
label='Select a File *',
allow_empty_file=False,
validators=[MimetypeValidator('application/pdf')],
help_text='Max. Size - 25 MB')
class Meta:
model = File
fields = ('file')
SO I am able to upload a pdf file with this python magic logic but I also want to allow to upload a image tiff file and restrict the file size to 25 MB.
How can I implement this by using python magic?
You don't need any library to do this - you can check the uploaded size of a file in the clean method on the form:
def clean_file(self):
file = self.cleaned_data['file']
if file.size > 25000000:
raise ValidationError('The file is too big')
return file
Related
This is my Django Model where I'm trying to store the content of an uploaded file in field attachments:
class CsvFile(models.Model):
processed = models.BooleanField(default=False)
uid = models.UUIDField(unique=True, default=str(uuid4()))
date = models.DateField(null=False, default=datetime.datetime.now().date())
time = models.TimeField(null=False, default=datetime.datetime.now().time())
original_filename = models.CharField(max_length=600, blank=True)
attachment = models.TextField(blank=True)
file = models.FileField(upload_to=f"csv/", blank=True)
def save_file_content_to_attachment(self, file):
try:
with file.open('r') as f:
self.attachment = f.read()
except (FileNotFoundError, ValueError):
self.attachment = ''
def save(self, *args, **kwargs):
# Save the uploaded file to the csv_path field
self.original_filename = self.file.name
# Print a message to help troubleshoot the issue
print(f"Saving file content to attachment for file {self.file.name}")
# Save the file content to the attachment field
self.save_file_content_to_attachment(self.file)
super(CsvFile, self).save(*args, **kwargs)
def delete(self, *args, **kwargs):
# Delete the file from storage
try:
default_storage.delete(self.file.name)
except FileNotFoundError:
pass # File does not exist, so we can ignore the exception
super(CsvFile, self).delete(*args, **kwargs)
Unfotunately an upload of a file fails with I/O error
Here is the full backtrace
https://hastebin.skyra.pw/mizawicane.css
Can somebody shed light into why this is not working?
In case others run into the same problem.
This is how I solved it:
def save_file_content_to_attachment(self, file):
# Make sure the file is open in read mode
if not file.closed:
file.open('r')
# Make sure the file is a file-like object that supports reading
if hasattr(file, 'read'):
try:
# Read the contents of the file and save them to the attachment field
self.attachment = file.read()
except FileNotFoundError:
# If the file is not found, set the attachment field to an empty string
self.attachment = ''
else:
# If the file is not a file-like object that supports reading, set the attachment field to an empty string
self.attachment = ''
I find myself in an odd situation only when deployed (debug == false):
My model throws a path traversal attempt exception. I want to create a directory for every file uploaded and save the file within the directory (some.zip) used in example. In my dev environment I have no problems and everything works just fine.
models.py:
class Template(models.Model):
def get_folder(self, filename):
filename_PATH = Path(filename)
template_dir = filename_PATH.stem
return Path(settings.TEMPLATES_FOLDER).joinpath(template_dir, filename)
name = models.CharField("template", max_length=32, unique=True)
file = models.FileField("templatefile", upload_to=get_folder, null=True, max_length=260, storage=OverwriteStorage())
class OverwriteStorage(FileSystemStorage): #this is actually above
def get_available_name(self, name, max_length=None):
self.delete(name)
return name
forms.py:
class TemplateAdminForm(forms.ModelForm):
def __init__(self,*args,**kwargs):
super().__init__(*args, **kwargs)
class Meta:
model = Template
fields = ["name", "file", ]
def clean(self):
cleaned_data = super().clean()
upFile = Path(str(cleaned_data["file"]))
if upFile.suffix == ".zip":
path = self.instance.get_folder(cleaned_data["name"])
logging.error(f"{path}")
unpack_zip(path) ## works! the directory is created/filled
else:
raise forms.ValidationError("unknown file type ...")
logging.error("DONE!") # I see this output
return cleaned_data
## signal to see when the error might be happening:
#receiver(post_save, sender = Template)
def testing(sender, **kwargs):
logging.error("we never get here")
settings.py:
TEMPLATES_FOLDER = PATH(MEDIA_ROOT).joinpath("TEMPLATES")
but:
ERROR:django.security.SuspiciousFileOperation:Detected path traversal attempt in '/opt/project/media_root/TEMPLATES/some/some' WARNING:django.request:Bad Request: /admin/appName/template/add/
Edit:
Because of this discussion it might be important, this is happening on django 3.2.8
I get the same error on Django 3.2.6 when opening a file with mode "wb" at an absolute path name, when I'm not using a temporary file which I have read is recommened in order to avoid this problem so I will link this answer in case it helps you deploy it and share my experience.
Here's where it's been advised: answer
One possible solution would be to move that directory under the django project root folder and address it with a relative path. I'd try to use this too in order to understand how you could achieve this:
import os
print("WORKING DIRECTORY: " + os.getcwd())
An article on this topic suggests to use the following code (when dealing with an image file in that case): link
from django.core.files.temp import NamedTemporaryFile
from django.core import files
image_temp_file = NamedTemporaryFile(delete=True)
in_memory_image = open('/path/to/file', 'rb')
# Write the in-memory file to the temporary file
# Read the streamed image in sections
for block in in_memory_image.read(1024 * 8):
# If no more file then stop
if not block:
break # Write image block to temporary file
image_temp_file.write(block)
file_name = 'temp.png' # Choose a unique name for the file
image_temp_file.flush()
temp_file = files.File(image_temp_file, name=file_name)
Lets go through the code:
Create a NamedTemporaryFile instead of TemporaryFile as Django’s ImageField requires file name.
Iterate over your in-memory file and write blocks of data to the NamedTemporaryFile object.
Flush the file to ensure the file is written to the storage.
Change the temporary file to a Django’s File object.
You can assign this file to Django models directly and save it.
>>> from blog.models import Blog
>>> b = Blog.objects.first()
>>> b.image = temp_file
>>> b.save()
I personally solved my SuspiciousFileOperation problems by addressing my directory with "BASE_DIR" from settings.py as the beginning of the path (nothing above that level in the filesystem), using a NamedTemporaryFile and by using the model FileField save() method appropriately like this:
# inside a model class save(self, *args, **kwargs) method
# file_name is the file name alone, no path to the file
self.myfilefield.save(file_name, temporary_file_object, save=False) # and then call the super().save(*args, **kwargs) inside the save() method of your model
I have an app that lets people upload files, represented as UploadedFiles. However, I want to make sure that users only upload xml files. I know I can do this using magic, but I don't know where to put this check - I can't put it in the clean function since the file is not yet uploaded when clean runs, as far as I can tell.
Here's the UploadedFile model:
class UploadedFile(models.Model):
"""This represents a file that has been uploaded to the server."""
STATE_UPLOADED = 0
STATE_ANNOTATED = 1
STATE_PROCESSING = 2
STATE_PROCESSED = 4
STATES = (
(STATE_UPLOADED, "Uploaded"),
(STATE_ANNOTATED, "Annotated"),
(STATE_PROCESSING, "Processing"),
(STATE_PROCESSED, "Processed"),
)
status = models.SmallIntegerField(choices=STATES,
default=0, blank=True, null=True)
file = models.FileField(upload_to=settings.XML_ROOT)
project = models.ForeignKey(Project)
def __unicode__(self):
return self.file.name
def name(self):
return os.path.basename(self.file.name)
def save(self, *args, **kwargs):
if not self.status:
self.status = self.STATE_UPLOADED
super(UploadedFile, self).save(*args, **kwargs)
def delete(self, *args, **kwargs):
os.remove(self.file.path)
self.file.delete(False)
super(UploadedFile, self).delete(*args, **kwargs)
def get_absolute_url(self):
return u'/upload/projects/%d' % self.id
def clean(self):
if not "XML" in magic.from_file(self.file.url):
raise ValidationError(u'Not an xml file.')
class UploadedFileForm(forms.ModelForm):
class Meta:
model = UploadedFile
exclude = ('project',)
Validating files is a common challenge, so I would like to use a validator:
import magic
from django.utils.deconstruct import deconstructible
from django.template.defaultfilters import filesizeformat
#deconstructible
class FileValidator(object):
error_messages = {
'max_size': ("Ensure this file size is not greater than %(max_size)s."
" Your file size is %(size)s."),
'min_size': ("Ensure this file size is not less than %(min_size)s. "
"Your file size is %(size)s."),
'content_type': "Files of type %(content_type)s are not supported.",
}
def __init__(self, max_size=None, min_size=None, content_types=()):
self.max_size = max_size
self.min_size = min_size
self.content_types = content_types
def __call__(self, data):
if self.max_size is not None and data.size > self.max_size:
params = {
'max_size': filesizeformat(self.max_size),
'size': filesizeformat(data.size),
}
raise ValidationError(self.error_messages['max_size'],
'max_size', params)
if self.min_size is not None and data.size < self.min_size:
params = {
'min_size': filesizeformat(self.min_size),
'size': filesizeformat(data.size)
}
raise ValidationError(self.error_messages['min_size'],
'min_size', params)
if self.content_types:
content_type = magic.from_buffer(data.read(), mime=True)
data.seek(0)
if content_type not in self.content_types:
params = { 'content_type': content_type }
raise ValidationError(self.error_messages['content_type'],
'content_type', params)
def __eq__(self, other):
return (
isinstance(other, FileValidator) and
self.max_size == other.max_size and
self.min_size == other.min_size and
self.content_types == other.content_types
)
Then you can use FileValidator in your models.FileField or forms.FileField as follows:
validate_file = FileValidator(max_size=1024 * 100,
content_types=('application/xml',))
file = models.FileField(upload_to=settings.XML_ROOT,
validators=[validate_file])
From django 1.11, you can also use FileExtensionValidator.
from django.core.validators import FileExtensionValidator
class UploadedFile(models.Model):
file = models.FileField(upload_to=settings.XML_ROOT,
validators=[FileExtensionValidator(allowed_extensions=['xml'])])
Note this must be used on a FileField and won't work on a CharField (for example), since the validator validates on value.name.
ref: https://docs.djangoproject.com/en/dev/ref/validators/#fileextensionvalidator
For posterity: the solution is to use the read method and pass that to magic.from_buffer.
class UploadedFileForm(ModelForm):
def clean_file(self):
file = self.cleaned_data.get("file", False)
filetype = magic.from_buffer(file.read())
if not "XML" in filetype:
raise ValidationError("File is not XML.")
return file
class Meta:
model = models.UploadedFile
exclude = ('project',)
I think what you want to do is to clean the uploaded file in Django's Form.clean_your_field_name_here() methods - the data is available on your system by then if it was submitted as normal HTTP POST request.
Also if you consider this inefficient explore the options of different Django file upload backends and how to do streaming processing.
If you need to consider the security of the system when dealing with uploads
Make sure uploaded file has correct extension
Make sure the mimetype matches the file extension
In the case you are worried about user's uploading exploit files (for attacking against your site)
Rewrite all the file contents on save to get rid of possible extra (exploit) payload (so you cannot embed HTML in XML which the browser would interpret as a site-origin HTML file when downloading)
Make sure you use content-disposition header on download
Some more info here: http://opensourcehacker.com/2013/07/31/secure-user-uploads-and-exploiting-served-user-content/
Below is my example how I sanitize the uploaded images:
class Example(models.Model):
image = models.ImageField(upload_to=filename_gen("participant-images/"), blank=True, null=True)
class Example(forms.ModelForm):
def clean_image(self):
""" Clean the uploaded image attachemnt.
"""
image = self.cleaned_data.get('image', False)
utils.ensure_safe_user_image(image)
return image
def ensure_safe_user_image(image):
""" Perform various checks to sanitize user uploaded image data.
Checks that image was valid header, then
:param: InMemoryUploadedFile instance (Django form field value)
:raise: ValidationError in the case the image content has issues
"""
if not image:
return
assert isinstance(image, InMemoryUploadedFile), "Image rewrite has been only tested on in-memory upload backend"
# Make sure the image is not too big, so that PIL trashes the server
if image:
if image._size > 4*1024*1024:
raise ValidationError("Image file too large - the limit is 4 megabytes")
# Then do header peak what the image claims
image.file.seek(0)
mime = magic.from_buffer(image.file.getvalue(), mime=True)
if mime not in ("image/png", "image/jpeg"):
raise ValidationError("Image is not valid. Please upload a JPEG or PNG image.")
doc_type = mime.split("/")[-1].upper()
# Read data from cStringIO instance
image.file.seek(0)
pil_image = Image.open(image.file)
# Rewrite the image contents in the memory
# (bails out with exception on bad data)
buf = StringIO()
pil_image.thumbnail((2048, 2048), Image.ANTIALIAS)
pil_image.save(buf, doc_type)
image.file = buf
# Make sure the image has valid extension (can't upload .htm image)
extension = unicode(doc_type.lower())
if not image.name.endswith(u".%s" % extension):
image.name = image.name + u"." + extension
I found an interesting package who can do upload file validation recently. You can see the package here. the package approach is similar with sultan answer, thus we can just implement it right away.
from upload_validator import FileTypeValidator
validator = FileTypeValidator(
allowed_types=['application/msword'],
allowed_extensions=['.doc', '.docx']
)
file_resource = open('sample.doc')
# ValidationError will be raised in case of invalid type or extension
validator(file_resource)
I'm trying to save images which have been passed to me as Base64 encoded text into a Django Imagefield.
But it seems to not be saving correctly. The database reports all my images are stored as "" when it should report them as a filename for example:
"template_images/template_folders/myImage.png"
The code that's trying to save my images is as follows:
elif model_field.get_internal_type() == "ImageField" or model_field.get_internal_type() == "FileField": # Convert files from base64 back to a file.
if field_elt.text is not None:
setattr(instance, model_field.name, File(b64decode(field_elt.text)))
After reading this answer, I got this to work:
from base64 import b64decode
from django.core.files.base import ContentFile
image_data = b64decode(b64_text)
my_model_instance.cool_image_field = ContentFile(image_data, 'whatup.png')
my_model_instance.save()
Therefore, I suggest you change your code to:
from django.core.files.base import ContentFile
# Your other code...
elif model_field.get_internal_type() == "ImageField" or model_field.get_internal_type() == "FileField": # Convert files from base64 back to a file.
if field_elt.text is not None:
image_data = b64decode(field_elt.text)
setattr(instance, model_field.name, ContentFile(image_data, 'myImage.png'))
Then, assuming your ImageField is defined with the upload_to argument set to template_images/template_folders/, you should see the file save down to YOUR_MEDIA_URL/template_images/template_folders/myImage.png
Another good approach based on this SO answer: https://stackoverflow.com/a/28036805/6143656 tried it and tested in django 1.10
I made a function for decoded base64 file.
def decode_base64_file(data):
def get_file_extension(file_name, decoded_file):
import imghdr
extension = imghdr.what(file_name, decoded_file)
extension = "jpg" if extension == "jpeg" else extension
return extension
from django.core.files.base import ContentFile
import base64
import six
import uuid
# Check if this is a base64 string
if isinstance(data, six.string_types):
# Check if the base64 string is in the "data:" format
if 'data:' in data and ';base64,' in data:
# Break out the header from the base64 content
header, data = data.split(';base64,')
# Try to decode the file. Return validation error if it fails.
try:
decoded_file = base64.b64decode(data)
except TypeError:
TypeError('invalid_image')
# Generate file name:
file_name = str(uuid.uuid4())[:12] # 12 characters are more than enough.
# Get the file name extension:
file_extension = get_file_extension(file_name, decoded_file)
complete_file_name = "%s.%s" % (file_name, file_extension, )
return ContentFile(decoded_file, name=complete_file_name)
Then you can call the function
import decode_base64_file
p = Post(content='My Picture', image=decode_based64_file(your_base64_file))
p.save()
I guess this is the cleanest and shortest way to do this.
Here is how you can handle a Base64 encoded image file in a post request at the Django-based (drf also) API end which saves it as an ImageField.
Let say you have a Model as follows:
Class MyImageModel(models.Model):
image = models.ImageField(upload_to = 'geo_entity_pic')
data=model.CharField()
So the Corresponding Serializer would be as follows:
from drf_extra_fields.fields import Base64ImageField
Class MyImageModelSerializer(serializers.ModelSerializers):
image=Base64ImageField()
class meta:
model=MyImageModel
fields= ('data','image')
def create(self, validated_data):
image=validated_data.pop('image')
data=validated_data.pop('data')
return MyImageModel.objects.create(data=data,image=image)
The corresponding View can be as follows:
elif request.method == 'POST':
serializer = MyImageModelSerializer(data=request.data)
if serializer.is_valid():
serializer.save()
return Response(serializer.data, status=201)
return Response(serializer.errors, status=400)
Notice In the Serializer I have used Implementation of Base64ImageField provided in the module django-extra-field
To install this module run the command
pip install pip install django-extra-fields
Import the same and Done!
Send (via post method) your image as an Base64 encoded String in JSON object along with any other data you have.
Is there a way to get the content type of an upload file when overwriting the models save method? I have tried this:
def save(self):
print(self.file.content_type)
super(Media, self).save()
But it did not work. In this example, self.file is a model.FileField:
file = models.FileField(upload_to='uploads/%m-%Y/')
Edit: I want to be able to save the content type to the database, so I'll need it before the save is actually complete :)
class MyForm(forms.ModelForm):
def clean_file(self):
file = self.cleaned_data['file']
try:
if file:
file_type = file.content_type.split('/')[0]
print file_type
if len(file.name.split('.')) == 1:
raise forms.ValidationError(_('File type is not supported'))
if file_type in settings.TASK_UPLOAD_FILE_TYPES:
if file._size > settings.TASK_UPLOAD_FILE_MAX_SIZE:
raise forms.ValidationError(_('Please keep filesize under %s. Current filesize %s') % (filesizeformat(settings.TASK_UPLOAD_FILE_MAX_SIZE), filesizeformat(file._size)))
else:
raise forms.ValidationError(_('File type is not supported'))
except:
pass
return file
settings.py
TASK_UPLOAD_FILE_TYPES = ['pdf', 'vnd.oasis.opendocument.text','vnd.ms-excel','msword','application',]
TASK_UPLOAD_FILE_MAX_SIZE = "5242880"
You can use PIL or magic to read the few first bytes and get the MIME type that way. I wouldn't trust the content_type since anyone can fake an HTTP header.
Magic solution below. For a PIL implementation you can get an idea from django's get_image_dimensions.
import magic
def get_mime_type(file):
"""
Get MIME by reading the header of the file
"""
initial_pos = file.tell()
file.seek(0)
mime_type = magic.from_buffer(file.read(2048), mime=True)
file.seek(initial_pos)
return mime_type
File is the in-memory uploaded file in the view.
I'm using Django Rest Framework and this is the simplest way to determine content type/mime type:
file = request.data.get("file") # type(file) = 'django.core.files.uploadedfile.InMemoryUploadedFile'
print(file.content_type)
Let's say I have uploaded a JPEG image then my output would be:
image/jpeg
Let me know in the comments if this serves your purpose.
Need to override the save method in the model class
def save(self, *args, **kwargs):
if self.file and self.file.file:
try:#Need to add a try catch such that in case a file is not being uploaded, then the mime_type is not assigned
self.mime_type=self.file.file.content_type
except:
pass
Taking an assumption that our model has file column(FileField), and mime_type column (CharField)