Django - Get uploaded file type / mimetype - django

Is there a way to get the content type of an upload file when overwriting the models save method? I have tried this:
def save(self):
print(self.file.content_type)
super(Media, self).save()
But it did not work. In this example, self.file is a model.FileField:
file = models.FileField(upload_to='uploads/%m-%Y/')
Edit: I want to be able to save the content type to the database, so I'll need it before the save is actually complete :)

class MyForm(forms.ModelForm):
def clean_file(self):
file = self.cleaned_data['file']
try:
if file:
file_type = file.content_type.split('/')[0]
print file_type
if len(file.name.split('.')) == 1:
raise forms.ValidationError(_('File type is not supported'))
if file_type in settings.TASK_UPLOAD_FILE_TYPES:
if file._size > settings.TASK_UPLOAD_FILE_MAX_SIZE:
raise forms.ValidationError(_('Please keep filesize under %s. Current filesize %s') % (filesizeformat(settings.TASK_UPLOAD_FILE_MAX_SIZE), filesizeformat(file._size)))
else:
raise forms.ValidationError(_('File type is not supported'))
except:
pass
return file
settings.py
TASK_UPLOAD_FILE_TYPES = ['pdf', 'vnd.oasis.opendocument.text','vnd.ms-excel','msword','application',]
TASK_UPLOAD_FILE_MAX_SIZE = "5242880"

You can use PIL or magic to read the few first bytes and get the MIME type that way. I wouldn't trust the content_type since anyone can fake an HTTP header.
Magic solution below. For a PIL implementation you can get an idea from django's get_image_dimensions.
import magic
def get_mime_type(file):
"""
Get MIME by reading the header of the file
"""
initial_pos = file.tell()
file.seek(0)
mime_type = magic.from_buffer(file.read(2048), mime=True)
file.seek(initial_pos)
return mime_type
File is the in-memory uploaded file in the view.

I'm using Django Rest Framework and this is the simplest way to determine content type/mime type:
file = request.data.get("file") # type(file) = 'django.core.files.uploadedfile.InMemoryUploadedFile'
print(file.content_type)
Let's say I have uploaded a JPEG image then my output would be:
image/jpeg
Let me know in the comments if this serves your purpose.

Need to override the save method in the model class
def save(self, *args, **kwargs):
if self.file and self.file.file:
try:#Need to add a try catch such that in case a file is not being uploaded, then the mime_type is not assigned
self.mime_type=self.file.file.content_type
except:
pass
Taking an assumption that our model has file column(FileField), and mime_type column (CharField)

Related

I/O operation on closed file. when trying to read a file content on django model save()

This is my Django Model where I'm trying to store the content of an uploaded file in field attachments:
class CsvFile(models.Model):
processed = models.BooleanField(default=False)
uid = models.UUIDField(unique=True, default=str(uuid4()))
date = models.DateField(null=False, default=datetime.datetime.now().date())
time = models.TimeField(null=False, default=datetime.datetime.now().time())
original_filename = models.CharField(max_length=600, blank=True)
attachment = models.TextField(blank=True)
file = models.FileField(upload_to=f"csv/", blank=True)
def save_file_content_to_attachment(self, file):
try:
with file.open('r') as f:
self.attachment = f.read()
except (FileNotFoundError, ValueError):
self.attachment = ''
def save(self, *args, **kwargs):
# Save the uploaded file to the csv_path field
self.original_filename = self.file.name
# Print a message to help troubleshoot the issue
print(f"Saving file content to attachment for file {self.file.name}")
# Save the file content to the attachment field
self.save_file_content_to_attachment(self.file)
super(CsvFile, self).save(*args, **kwargs)
def delete(self, *args, **kwargs):
# Delete the file from storage
try:
default_storage.delete(self.file.name)
except FileNotFoundError:
pass # File does not exist, so we can ignore the exception
super(CsvFile, self).delete(*args, **kwargs)
Unfotunately an upload of a file fails with I/O error
Here is the full backtrace
https://hastebin.skyra.pw/mizawicane.css
Can somebody shed light into why this is not working?
In case others run into the same problem.
This is how I solved it:
def save_file_content_to_attachment(self, file):
# Make sure the file is open in read mode
if not file.closed:
file.open('r')
# Make sure the file is a file-like object that supports reading
if hasattr(file, 'read'):
try:
# Read the contents of the file and save them to the attachment field
self.attachment = file.read()
except FileNotFoundError:
# If the file is not found, set the attachment field to an empty string
self.attachment = ''
else:
# If the file is not a file-like object that supports reading, set the attachment field to an empty string
self.attachment = ''

Django and SuspiciousFileOperation:Detected path traversal attempt

I find myself in an odd situation only when deployed (debug == false):
My model throws a path traversal attempt exception. I want to create a directory for every file uploaded and save the file within the directory (some.zip) used in example. In my dev environment I have no problems and everything works just fine.
models.py:
class Template(models.Model):
def get_folder(self, filename):
filename_PATH = Path(filename)
template_dir = filename_PATH.stem
return Path(settings.TEMPLATES_FOLDER).joinpath(template_dir, filename)
name = models.CharField("template", max_length=32, unique=True)
file = models.FileField("templatefile", upload_to=get_folder, null=True, max_length=260, storage=OverwriteStorage())
class OverwriteStorage(FileSystemStorage): #this is actually above
def get_available_name(self, name, max_length=None):
self.delete(name)
return name
forms.py:
class TemplateAdminForm(forms.ModelForm):
def __init__(self,*args,**kwargs):
super().__init__(*args, **kwargs)
class Meta:
model = Template
fields = ["name", "file", ]
def clean(self):
cleaned_data = super().clean()
upFile = Path(str(cleaned_data["file"]))
if upFile.suffix == ".zip":
path = self.instance.get_folder(cleaned_data["name"])
logging.error(f"{path}")
unpack_zip(path) ## works! the directory is created/filled
else:
raise forms.ValidationError("unknown file type ...")
logging.error("DONE!") # I see this output
return cleaned_data
## signal to see when the error might be happening:
#receiver(post_save, sender = Template)
def testing(sender, **kwargs):
logging.error("we never get here")
settings.py:
TEMPLATES_FOLDER = PATH(MEDIA_ROOT).joinpath("TEMPLATES")
but:
ERROR:django.security.SuspiciousFileOperation:Detected path traversal attempt in '/opt/project/media_root/TEMPLATES/some/some' WARNING:django.request:Bad Request: /admin/appName/template/add/
Edit:
Because of this discussion it might be important, this is happening on django 3.2.8
I get the same error on Django 3.2.6 when opening a file with mode "wb" at an absolute path name, when I'm not using a temporary file which I have read is recommened in order to avoid this problem so I will link this answer in case it helps you deploy it and share my experience.
Here's where it's been advised: answer
One possible solution would be to move that directory under the django project root folder and address it with a relative path. I'd try to use this too in order to understand how you could achieve this:
import os
print("WORKING DIRECTORY: " + os.getcwd())
An article on this topic suggests to use the following code (when dealing with an image file in that case): link
from django.core.files.temp import NamedTemporaryFile
from django.core import files
image_temp_file = NamedTemporaryFile(delete=True)
in_memory_image = open('/path/to/file', 'rb')
# Write the in-memory file to the temporary file
# Read the streamed image in sections
for block in in_memory_image.read(1024 * 8):
# If no more file then stop
if not block:
break # Write image block to temporary file
image_temp_file.write(block)
file_name = 'temp.png' # Choose a unique name for the file
image_temp_file.flush()
temp_file = files.File(image_temp_file, name=file_name)
Lets go through the code:
Create a NamedTemporaryFile instead of TemporaryFile as Django’s ImageField requires file name.
Iterate over your in-memory file and write blocks of data to the NamedTemporaryFile object.
Flush the file to ensure the file is written to the storage.
Change the temporary file to a Django’s File object.
You can assign this file to Django models directly and save it.
>>> from blog.models import Blog
>>> b = Blog.objects.first()
>>> b.image = temp_file
>>> b.save()
I personally solved my SuspiciousFileOperation problems by addressing my directory with "BASE_DIR" from settings.py as the beginning of the path (nothing above that level in the filesystem), using a NamedTemporaryFile and by using the model FileField save() method appropriately like this:
# inside a model class save(self, *args, **kwargs) method
# file_name is the file name alone, no path to the file
self.myfilefield.save(file_name, temporary_file_object, save=False) # and then call the super().save(*args, **kwargs) inside the save() method of your model

Validate a file before Uploading using python magic

I am using python magic to validate a file before uploading so for that I am following the below link:
https://djangosnippets.org/snippets/3039/
validators.py file:
from django.core.exceptions import ValidationError
import magic
class MimetypeValidator(object):
def __init__(self, mimetypes):
self.mimetypes = mimetypes
def __call__(self, value):
try:
mime_byt = magic.from_buffer(value.read(1024), mime=True)
mime = mime_byt.decode(encoding='UTF-8')
if mime not in self.mimetypes:
raise ValidationError('%s is not an acceptable file type' % value)
except AttributeError as e:
raise ValidationError('This value could not be validated for file type' % value)
here is my form.py file:
class FileForm(forms.ModelForm):
file = forms.FileField(
label='Select a File *',
allow_empty_file=False,
validators=[MimetypeValidator('application/pdf')],
help_text='Max. Size - 25 MB')
class Meta:
model = File
fields = ('file')
SO I am able to upload a pdf file with this python magic logic but I also want to allow to upload a image tiff file and restrict the file size to 25 MB.
How can I implement this by using python magic?
You don't need any library to do this - you can check the uploaded size of a file in the clean method on the form:
def clean_file(self):
file = self.cleaned_data['file']
if file.size > 25000000:
raise ValidationError('The file is too big')
return file

Django: Validate file type of uploaded file

I have an app that lets people upload files, represented as UploadedFiles. However, I want to make sure that users only upload xml files. I know I can do this using magic, but I don't know where to put this check - I can't put it in the clean function since the file is not yet uploaded when clean runs, as far as I can tell.
Here's the UploadedFile model:
class UploadedFile(models.Model):
"""This represents a file that has been uploaded to the server."""
STATE_UPLOADED = 0
STATE_ANNOTATED = 1
STATE_PROCESSING = 2
STATE_PROCESSED = 4
STATES = (
(STATE_UPLOADED, "Uploaded"),
(STATE_ANNOTATED, "Annotated"),
(STATE_PROCESSING, "Processing"),
(STATE_PROCESSED, "Processed"),
)
status = models.SmallIntegerField(choices=STATES,
default=0, blank=True, null=True)
file = models.FileField(upload_to=settings.XML_ROOT)
project = models.ForeignKey(Project)
def __unicode__(self):
return self.file.name
def name(self):
return os.path.basename(self.file.name)
def save(self, *args, **kwargs):
if not self.status:
self.status = self.STATE_UPLOADED
super(UploadedFile, self).save(*args, **kwargs)
def delete(self, *args, **kwargs):
os.remove(self.file.path)
self.file.delete(False)
super(UploadedFile, self).delete(*args, **kwargs)
def get_absolute_url(self):
return u'/upload/projects/%d' % self.id
def clean(self):
if not "XML" in magic.from_file(self.file.url):
raise ValidationError(u'Not an xml file.')
class UploadedFileForm(forms.ModelForm):
class Meta:
model = UploadedFile
exclude = ('project',)
Validating files is a common challenge, so I would like to use a validator:
import magic
from django.utils.deconstruct import deconstructible
from django.template.defaultfilters import filesizeformat
#deconstructible
class FileValidator(object):
error_messages = {
'max_size': ("Ensure this file size is not greater than %(max_size)s."
" Your file size is %(size)s."),
'min_size': ("Ensure this file size is not less than %(min_size)s. "
"Your file size is %(size)s."),
'content_type': "Files of type %(content_type)s are not supported.",
}
def __init__(self, max_size=None, min_size=None, content_types=()):
self.max_size = max_size
self.min_size = min_size
self.content_types = content_types
def __call__(self, data):
if self.max_size is not None and data.size > self.max_size:
params = {
'max_size': filesizeformat(self.max_size),
'size': filesizeformat(data.size),
}
raise ValidationError(self.error_messages['max_size'],
'max_size', params)
if self.min_size is not None and data.size < self.min_size:
params = {
'min_size': filesizeformat(self.min_size),
'size': filesizeformat(data.size)
}
raise ValidationError(self.error_messages['min_size'],
'min_size', params)
if self.content_types:
content_type = magic.from_buffer(data.read(), mime=True)
data.seek(0)
if content_type not in self.content_types:
params = { 'content_type': content_type }
raise ValidationError(self.error_messages['content_type'],
'content_type', params)
def __eq__(self, other):
return (
isinstance(other, FileValidator) and
self.max_size == other.max_size and
self.min_size == other.min_size and
self.content_types == other.content_types
)
Then you can use FileValidator in your models.FileField or forms.FileField as follows:
validate_file = FileValidator(max_size=1024 * 100,
content_types=('application/xml',))
file = models.FileField(upload_to=settings.XML_ROOT,
validators=[validate_file])
From django 1.11, you can also use FileExtensionValidator.
from django.core.validators import FileExtensionValidator
class UploadedFile(models.Model):
file = models.FileField(upload_to=settings.XML_ROOT,
validators=[FileExtensionValidator(allowed_extensions=['xml'])])
Note this must be used on a FileField and won't work on a CharField (for example), since the validator validates on value.name.
ref: https://docs.djangoproject.com/en/dev/ref/validators/#fileextensionvalidator
For posterity: the solution is to use the read method and pass that to magic.from_buffer.
class UploadedFileForm(ModelForm):
def clean_file(self):
file = self.cleaned_data.get("file", False)
filetype = magic.from_buffer(file.read())
if not "XML" in filetype:
raise ValidationError("File is not XML.")
return file
class Meta:
model = models.UploadedFile
exclude = ('project',)
I think what you want to do is to clean the uploaded file in Django's Form.clean_your_field_name_here() methods - the data is available on your system by then if it was submitted as normal HTTP POST request.
Also if you consider this inefficient explore the options of different Django file upload backends and how to do streaming processing.
If you need to consider the security of the system when dealing with uploads
Make sure uploaded file has correct extension
Make sure the mimetype matches the file extension
In the case you are worried about user's uploading exploit files (for attacking against your site)
Rewrite all the file contents on save to get rid of possible extra (exploit) payload (so you cannot embed HTML in XML which the browser would interpret as a site-origin HTML file when downloading)
Make sure you use content-disposition header on download
Some more info here: http://opensourcehacker.com/2013/07/31/secure-user-uploads-and-exploiting-served-user-content/
Below is my example how I sanitize the uploaded images:
class Example(models.Model):
image = models.ImageField(upload_to=filename_gen("participant-images/"), blank=True, null=True)
class Example(forms.ModelForm):
def clean_image(self):
""" Clean the uploaded image attachemnt.
"""
image = self.cleaned_data.get('image', False)
utils.ensure_safe_user_image(image)
return image
def ensure_safe_user_image(image):
""" Perform various checks to sanitize user uploaded image data.
Checks that image was valid header, then
:param: InMemoryUploadedFile instance (Django form field value)
:raise: ValidationError in the case the image content has issues
"""
if not image:
return
assert isinstance(image, InMemoryUploadedFile), "Image rewrite has been only tested on in-memory upload backend"
# Make sure the image is not too big, so that PIL trashes the server
if image:
if image._size > 4*1024*1024:
raise ValidationError("Image file too large - the limit is 4 megabytes")
# Then do header peak what the image claims
image.file.seek(0)
mime = magic.from_buffer(image.file.getvalue(), mime=True)
if mime not in ("image/png", "image/jpeg"):
raise ValidationError("Image is not valid. Please upload a JPEG or PNG image.")
doc_type = mime.split("/")[-1].upper()
# Read data from cStringIO instance
image.file.seek(0)
pil_image = Image.open(image.file)
# Rewrite the image contents in the memory
# (bails out with exception on bad data)
buf = StringIO()
pil_image.thumbnail((2048, 2048), Image.ANTIALIAS)
pil_image.save(buf, doc_type)
image.file = buf
# Make sure the image has valid extension (can't upload .htm image)
extension = unicode(doc_type.lower())
if not image.name.endswith(u".%s" % extension):
image.name = image.name + u"." + extension
I found an interesting package who can do upload file validation recently. You can see the package here. the package approach is similar with sultan answer, thus we can just implement it right away.
from upload_validator import FileTypeValidator
validator = FileTypeValidator(
allowed_types=['application/msword'],
allowed_extensions=['.doc', '.docx']
)
file_resource = open('sample.doc')
# ValidationError will be raised in case of invalid type or extension
validator(file_resource)

How to upload files in Django and save them in a different location depending on the format? (jpeg and doc)

How to upload files in Django and save them (and take other actions in the signal - post_save) in a different location depending on the format? (jpeg and doc)
def upload(request):
user = request.user
upload_form = UploadForm(request.POST or None, request.FILES or None)
if request.method == "POST":
if upload_form.is_valid():
my_model = upload_form.save(commit=False)
my_model.user = user
my_model.save()
models:
class FileStore(models.Model):
user = models.ForeignKey(User)
standard = models.FileField(upload_to="standard")
after_operation = models.FileField(upload_to="after_ocr",blank=True, null=True)
signal:
#receiver(post_save, sender=FileStore)
def my_handler(sender,instance, **kwargs):
if kwargs['created']:
text= image_to_string(Image.open(instance.standard))
...
instance.after_operation = File(text_file)
instance.save()
I want if file is .doc or .pdf save only in standard field and if file is .jpeg or .png I need run my signal function.
For instance, you can retrieve the uploaded file by accessing the request.FILES dictionary like this:
uploaded_file = request.FILES['file']
uploaded_file is now of type UploadedFile which means you can get info about the file like this:
# name of the file, ie: my_file.txt
filename = uploaded_file.name
# file extension (get the las 4 chars)
file_ext = filename[-4:]
# handle file extension
if file_ext == '.jpg':
# do something for jpegs
if file_ext == '.doc':
# do something for docs
So now, for saving it you may try this, I haven't prove it yet:
# f is the UploadedFile
model_file = File(f)
model_file.save('path/to/wherever.ext', f.readlines(), true)
I hope this helps! This may not work out of the box but I hope it bring some light to the problem. Try to look at the docs: django files and django uploaded files. This topic is very well documented.
Good luck!