I'm trying to create a way to upload xlsx files and then use celery to perform some actions.
I'm thinking this:
A view to upload the file and save it temporarily
Use celery to execute what I want in the file and then delete it.
I'm trying to do something like this:
class ImportMyFileView(APIView):
parser_classes = (FileUploadParser, )
def post(self, request, filename, format=None):
my_file = request.data["file"]
with open(f"/tmp/{my_file.name}", "wb+") as destination:
for chunk in my_file.chunks():
destination.write(chunk)
# call_celery_here()
...
Return something
I can generate the file where I want, but the problem is that when I open xlsx. I get this here:
--X-INSOMNIA-BOUNDARY
Content-Disposition: form-data
Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
PK<q^Q_rels/.rels���J1��}��{w�Dd���ЛH}���a7�0u}{�Z���I~��7C��f�G�Fo�Z+���{�����kW�#�VJ$cʪ��l� �n�0�\Q�X^:�`���d�d{�m]_�d����h��V����F�w�^F9��W��-�(F/3�O�DSU�N�l/w�{N(�[��q��T����u<��r�?焮�s9�F����M��h���'h?PKf����
Is there any detail missing?
Here is how I would do it, relying on DRF's built in functionality:
import os
from rest_framework import serializers
from django.core.files.storage import FileSystemStorage
class UploadSerializer(serializers.Serializer):
file = serializers.FileField()
class UploadView(APIView):
...
def post(self, request):
ser = UploadSerializer(request.data)
ser.is_valid(raise_exception=True)
fs = FileSystemStorage(tempfile.gettempdir())
file_name = fs.save(content=ser.validated_data['file'])
full_path = os.path.join(fs.location, file_name)
celery_func.delay(file_name=full_path)
return Response("Upload OK")
A more robust way to do this would be to create a model representing your uploads to be processed, and use the django model's FileField.
class Todo(models.Model):
xlsx_file = models.FileField(...) # read the docs on this
created_at = models.DateTimeField(auto_now_add=True)
is_complete = models.BooleanField(default=False)
class UploadView(APIView):
def post(self, request):
...
todo = Todo.objects.create(
xslx_file = ser.validated_data['file']
)
celery_func.delay(todo_id=todo.pk)
return Response("Upload OK")
Once this works you can look into using a ModelSerializer either alone, or paired with a ModelViewSet. Thats a bigger learning curve though.
Related
I know that they don't do this, but for one of my pet-projects I want a strange thing: store jinja-templates in the database (and be able to edit them through the admin panel).
There is something like this model (in models.py):
class TbTemplate(models.Model):
szFileName = models.CharField(
primary_key=True,
db_index=True,
unique=True,
verbose_name="Path/Name"
)
szJinjaCode = models.TextField(
verbose_name='Template',
help_text='Template Code (jinja2)'
)
szDescription = models.CharField(
max_length=100,
verbose_name='Description'
)
def __unicode__(self):
return f"{self.szFileName} ({self.szDescription})"
def __str__(self):
return self.__unicode__()
class Meta:
verbose_name = '[…Template]'
verbose_name_plural = '[…Templates]'
Next, in view.py you can do something like this:
# -*- coding: utf-8 -*-
from django.http import HttpRequest, HttpResponse
from django.template.loader import render_to_string
from web.models import TbTemplate
def something(request: HttpRequest, template: str) -> HttpResponse:
"""
:param request: http-in
:param template: Template name
:return response: http-out
"""
to_template = {}
# ...
# ... do smth
# ...
tmpl = TbTemplate.objects.get(pk=template)
html = render_to_string(tmpl.szJinjaCode, to_template)
return HttpResponse(html)
And everything works. Templates to available for editing through the admin panel (of course, you need to hang a "mirror"-like widget for syntax highlighting, etc.)...
But I want to use in jinja templates like: {% include "something_template.jinja2" %} ... And for this it is necessary that the templates are not only in the database, but also stored as files in the templates-folder.
In addition, templates are easier to create and edit in IDEs, and access to templates through the admin panel only for cosmetic changes.
And then I need to somehow intercept the "read" method in/for the admin panel. So that if a template in the TbTemplate table is opened for editing in the admin panel, then for the szJinjaCode it was read not from the database, but from the corresponding szFileName-file.
How to do this?
It is done in two steps:
Firstly,
in models.py we will override the save() method for the TbTemplate model. At the same time, we can override delete() method, so that it do not delete anything (or vice versa, it deletes not only the entry in the database, but also the corresponding teplate-file... or deletes the entry in the database, and renames the corresponding file...). We get this model:
# -*- coding: utf-8 -*-
from django.db import models
from project.settings import *
import os
class TbTemplate(models.Model):
szFileName = models.CharField(
primary_key=True, db_index=True, unique=True,
verbose_name="Path/Name"
)
szJinjaCode = models.TextField(
default='', null=True, blank=True,
verbose_name='Template',
help_text='Template Code (jinja2)'
)
szDescription = models.CharField(
max_length=100,
verbose_name='Description'
)
def __unicode__(self):
return f"{self.szFileName} ({self.szDescription})"
def __str__(self):
return self.__unicode__()
def save(self, *args, **kwargs):
path_filename = TEMPLATES_DIR / self.szFileName
if not os.path.exists(os.path.dirname(path_filename)):
os.makedirs(os.path.dirname(path_filename))
with open(path_filename, "w+", encoding="utf-8") as file:
file.write(self.szJinjaCode)
# TODO: for production, need to add some code for modify
# touch_reload file for uWSGI reload
super(TbTemplate, self).save(*args, **kwargs)
def delete(self, *args, **kwargs):
pass
# ... or do smth ... and after:
# super(TbTemplate, self).delete(*args, **kwargs)
class Meta:
verbose_name = '[…Template]'
verbose_name_plural = '[…Templates]'
now, when changing and creating a template through Django-Admin, a corresponding template file will be create/modify.
Secondly,
in the admin.py file, when define the admin.ModelAdmin class for the TbTemplate control model, it is necessary to override the get_fields () method, which is responsible for getting the fields in the admin form. (I had to look for a method stupidly trying many cases that are made something similar, but not that). As a result, something like this admin.py:
# -*- coding: utf-8 -*-
from django.contrib import admin
from my_appweb.models import TbTemplate
from project.settings import *
class AdminTemplate(admin.ModelAdmin):
list_display = ('szFileName', 'szDescription')
list_display_links = ('szFileName', 'szDescription', )
def get_fields(self, request, obj=None):
try:
with open(Path(TEMPLATES_DIR) / obj.szFileName, "r", encoding="utf-8") as file:
obj.szJinjaCode = file.read()
except (AttributeError, FileNotFoundError, TypeError):
pass
return ['szFileName', 'szDescription', 'szJinjaCode']
admin.site.register(TbTemplate, AdminTemplate)
Now, if some "external forces" change the template file (or someone changes the template code in the database bypassing the admin panel), then when you open the template for editing in the admin panel, the data will still be received from the file.
It's all
P.S. in the settnig.py of the project, you need to add something like:
TEMPLATES_DIR = BASE_DIR / 'templates-jinja2'
So that the model and the admin panel know in which directory to pour the template files.
Interesting question.
The first part - inclusion tag
Your foundation - you want to use inclusion tag. Therefore you want to save something in file. But you can simply override template loader, who get before file the template from the database:
#settings.py:
TEMPLATES = [
{
'BACKEND': 'myapp.backends.MyTemplate',
... # other staff
},
]
in myapp/backends.py:
class MyTemplate(Jinja2):
def get_template(self, template_name):
template = TbTemplate.objects.filter(pk=template_name).first()
if template:
return self.from_string(template.szJinjaCode)
return super().get_template(template_name)
After that - every template can be saved in DB, {% include %} call template_backend which get template from database before file-template.
The second part. Save template to file/database.
If you do it, you don't need the first part, every time the template-file should be saved/changed.
class TbTemplateAdmin(ModelAdmin):
def save_model(self, request, obj, *args, **kwargs):
super().save(request, obj, *args, **kwargs)
with open( Path(path_to_templates) / obj.szFileName, "w+" ) as template:
template.write(obj.szJinjaCode)
The Third part - get the file in admin on get_object:
class TbTemplateAdmin(ModelAdmin):
def get_object(self, *args, **kwargs):
obj = super().get_object(self, *args, **kwargs)
with open( Path(path_to_templates) / obj.szFileName, "r" ) as template:
obj.szJinjaCode = template.read()
return obj
The Last part - convert the new file templates to objects:
In our projects we add automatically the new templates to database, spoiler - with inclusion tags. In your case - you can create an ModelAdmin.action to add templates in database. I don't solve it for you, try to do something yourself. I hope for your understanding
Only one fing I lost here. If you use Cached Template Loader, and you should use it on production, in this case you should refresh cache for changed templates. Don't forget about it.
I'm restricting the upload button to allow only csv files.
I need help please to append _hello at the end of each file uploaded by the user, but before the extension. (e.g. user_file_name.csv becomes automatically user_file_name_hello.csv)
Optional: I'd like the original file to be first renamed automatically, then saved to my uploads directory.
models.py
from django.db import models
# validation method to check if file is csv
from django.core.exceptions import ValidationError
def validate_file_extension(value):
if not value.name.endswith('.csv'):
raise ValidationError(u'Only CSV files allowed.')
# Create your models here.
class user_file(models.Model):
user_file_csv = models.FileField(upload_to='documents/user_files/', validators=[validate_file_extension])
forms.py
from django import forms
from .models import user_file
from django.forms import FileInput
class user_file_form(forms.ModelForm):
class Meta:
model = user_file
widgets = {'user_file_csv': FileInput(attrs={'accept': 'text/csv'})}
fields = ('user_file_csv',)
Thank you!
Maybe you need something like this:
class FileUploadUtil:
#staticmethod
def my_files_path(instance, filename):
name, file_extention = os.path.splitext(filename)
name = 'prefix-{}-{}-sufix.{}'.format(name, instance.id, file_extention)
return "my_files/{}".format(name)
class MyModel(models.Model):
# Other fields
# ...
my_file = models.FileField(max_length=300, upload_to=FileUploadUtil.my_files_path)
Optional: I'd like the original file to be first renamed automatically, then saved to my uploads directory.
You can override save() method. Check here
Django document
Maybe You need decorator.
from pathlib import Path
def rename_helper(path: str, append_text: str):
stem, suffix = Path(path).stem, Path(path).suffix
return f"{stem}{append_text}{suffix}"
def rename_previous_image(func):
""" return wrapper object """
def wrapper(*args, **kwargs):
self = args[0]
model = type(self)
previous_obj = model.objects.filter(pk=self.pk)
if previous_obj.exists():
old_name_with_path = Path(str(previous_obj[0].user_file_csv))
Path.rename(old_name_with_path , rename_helper(path=old_name_with_path , append_text="_hello"))
return func(*args, **kwargs)
return wrapper
And, You can decorate your model save() method.
class MyModel(models.Model):
# Other fields
# ...
my_file = models.FileField(max_length=300, upload_to=FileUploadUtil.my_files_path)
#rename_previous_image
def save(self, **kwargs):
super(user_file, self).save(**kwargs) # You must add This row.
besides,
recommend rename your user_file class
like UserFile
Check This PEP 8
Have a good day.
I'm using boto3 to upload files to S3 and save their path in the FileField.
class SomeFile(models.Model):
file = models.FileField(upload_to='some_folder', max_length=400, blank=True, null=True)
For the above model the following code works to create a record.
ff = SomeFile(file='file path in S3')
ff.full_clean()
ff.save()
Now, when I use ModelSerializer to do the same.
class SomeFileSerializer(serializers.ModelSerializer):
class Meta:
model = SomeFile
fields = ('file')
I get this error after running the code below
rest_framework.exceptions.ValidationError: {'file': [ErrorDetail(string='The submitted data was not a file. Check the encoding type on the form.', code='invalid')]}
serializer = SomeFileSerializer(data={'file': 'file path to S3'})
serializer.is_valid(raise_exception=True)
I need help in setting up the serializer to accept file path without actually having the file.
I was really in the same situation, and it was hard to find the solution on the web.
We have two options to solve this problem.
1. Passing data directly to save method
Read action: use serializer's read only ImageField
Write action: pass kwargs to save method
serializers.py
from rest_framework import serializers
class SomeFileSerializer(serializers.ModelSerializer):
file = serializers.ImageField(read_only=True)
class Meta:
model = SomeFile
fields = ('file')
views.py
serializer = SomeFileSerializer(data={'file': 'file path to S3'})
serializer.is_valid(raise_exception=True)
# for put method
serializer.save(file=request.data.get('file'))
# for patch method (if partial=True in serializer)
if request.data.get('file'):
serializer.save(file=request.data.get('file'))
else:
serializer.save()
2. Using CharField instead of ImageField
Read action: override to_representation function to response absolute url
Write action: use CharField to avoid ImageField's validation and action
serializers.py
from rest_framework import serializers
class SomeFileSerializer(serializers.ModelSerializer):
file = serializers.CharField(max_length=400)
class Meta:
model = SomeFile
fields = ('file')
def to_representation(self, instance):
representation = super().to_representation(instance)
if instance.file:
# update filename to response absolute url
representation['file'] = instance.file_absolute_url
return representation
models.py
class SomeFile(models.Model):
file = models.FileField(upload_to='some_folder', max_length=400, blank=True, null=True)
#property
def file_absolute_url(self):
return self.file.url if self.file else None
Although I chose the 2nd solution because of drf_spectacular for documentation, the 1st solution would be easy to implement.
I believe I need to proxy my ElasticSearch connection via a Django URL in order to do filtering by user token.
So instead of going via localhost:9200/_search, I want to use localhost:8000/myapi/elastic/_search.
I am unsure how to connect them. I've tried using a serializers/views setup
myapp/search.py
class TaskIndex(DocType):
title = String()
class Meta:
index = 'task-index'
# Bulk indexing function, run in shell
def bulk_indexing():
TaskIndex.init()
es = Elasticsearch()
bulk(client=es, actions=(b.indexing() for b in models.Task.objects.all().iterator()))
# Simple search function
def _search(title):
s = Search().filter('term', title=title.text)
response = s.execute()
return response
api/serializers.py
from myapp.search import TaskIndex
class ElasticSerializer(serializers.ModelSerializer):
class Meta:
model = TaskIndex
api/views.py
class ElasticViewSet(viewsets.ModelViewSet):
queryset = TaskIndex.objects.none()
serializer_class = ElasticSerializer
api/urls.py
router.register(r'elastic', ElasticViewSet)
So this is how I was able to achieve it. Really simple method:
views.py
from services import elastic_result
class ElasticView(APIView):
permission_classes=[]
def post(self, title):
_search = elastic_result(id, title)
return _search
pass
services.py
import requests
import json
def elastic_result(id, request):
requestdata = json.loads(request.body)
r = requests.post('http://localhost:9200/_search', json=requestdata)
items = r.json()
return Response(items)
urls.py
import ElasticView
url(r'^elastic/_search', ElasticView.as_view()
I have an app that lets people upload files, represented as UploadedFiles. However, I want to make sure that users only upload xml files. I know I can do this using magic, but I don't know where to put this check - I can't put it in the clean function since the file is not yet uploaded when clean runs, as far as I can tell.
Here's the UploadedFile model:
class UploadedFile(models.Model):
"""This represents a file that has been uploaded to the server."""
STATE_UPLOADED = 0
STATE_ANNOTATED = 1
STATE_PROCESSING = 2
STATE_PROCESSED = 4
STATES = (
(STATE_UPLOADED, "Uploaded"),
(STATE_ANNOTATED, "Annotated"),
(STATE_PROCESSING, "Processing"),
(STATE_PROCESSED, "Processed"),
)
status = models.SmallIntegerField(choices=STATES,
default=0, blank=True, null=True)
file = models.FileField(upload_to=settings.XML_ROOT)
project = models.ForeignKey(Project)
def __unicode__(self):
return self.file.name
def name(self):
return os.path.basename(self.file.name)
def save(self, *args, **kwargs):
if not self.status:
self.status = self.STATE_UPLOADED
super(UploadedFile, self).save(*args, **kwargs)
def delete(self, *args, **kwargs):
os.remove(self.file.path)
self.file.delete(False)
super(UploadedFile, self).delete(*args, **kwargs)
def get_absolute_url(self):
return u'/upload/projects/%d' % self.id
def clean(self):
if not "XML" in magic.from_file(self.file.url):
raise ValidationError(u'Not an xml file.')
class UploadedFileForm(forms.ModelForm):
class Meta:
model = UploadedFile
exclude = ('project',)
Validating files is a common challenge, so I would like to use a validator:
import magic
from django.utils.deconstruct import deconstructible
from django.template.defaultfilters import filesizeformat
#deconstructible
class FileValidator(object):
error_messages = {
'max_size': ("Ensure this file size is not greater than %(max_size)s."
" Your file size is %(size)s."),
'min_size': ("Ensure this file size is not less than %(min_size)s. "
"Your file size is %(size)s."),
'content_type': "Files of type %(content_type)s are not supported.",
}
def __init__(self, max_size=None, min_size=None, content_types=()):
self.max_size = max_size
self.min_size = min_size
self.content_types = content_types
def __call__(self, data):
if self.max_size is not None and data.size > self.max_size:
params = {
'max_size': filesizeformat(self.max_size),
'size': filesizeformat(data.size),
}
raise ValidationError(self.error_messages['max_size'],
'max_size', params)
if self.min_size is not None and data.size < self.min_size:
params = {
'min_size': filesizeformat(self.min_size),
'size': filesizeformat(data.size)
}
raise ValidationError(self.error_messages['min_size'],
'min_size', params)
if self.content_types:
content_type = magic.from_buffer(data.read(), mime=True)
data.seek(0)
if content_type not in self.content_types:
params = { 'content_type': content_type }
raise ValidationError(self.error_messages['content_type'],
'content_type', params)
def __eq__(self, other):
return (
isinstance(other, FileValidator) and
self.max_size == other.max_size and
self.min_size == other.min_size and
self.content_types == other.content_types
)
Then you can use FileValidator in your models.FileField or forms.FileField as follows:
validate_file = FileValidator(max_size=1024 * 100,
content_types=('application/xml',))
file = models.FileField(upload_to=settings.XML_ROOT,
validators=[validate_file])
From django 1.11, you can also use FileExtensionValidator.
from django.core.validators import FileExtensionValidator
class UploadedFile(models.Model):
file = models.FileField(upload_to=settings.XML_ROOT,
validators=[FileExtensionValidator(allowed_extensions=['xml'])])
Note this must be used on a FileField and won't work on a CharField (for example), since the validator validates on value.name.
ref: https://docs.djangoproject.com/en/dev/ref/validators/#fileextensionvalidator
For posterity: the solution is to use the read method and pass that to magic.from_buffer.
class UploadedFileForm(ModelForm):
def clean_file(self):
file = self.cleaned_data.get("file", False)
filetype = magic.from_buffer(file.read())
if not "XML" in filetype:
raise ValidationError("File is not XML.")
return file
class Meta:
model = models.UploadedFile
exclude = ('project',)
I think what you want to do is to clean the uploaded file in Django's Form.clean_your_field_name_here() methods - the data is available on your system by then if it was submitted as normal HTTP POST request.
Also if you consider this inefficient explore the options of different Django file upload backends and how to do streaming processing.
If you need to consider the security of the system when dealing with uploads
Make sure uploaded file has correct extension
Make sure the mimetype matches the file extension
In the case you are worried about user's uploading exploit files (for attacking against your site)
Rewrite all the file contents on save to get rid of possible extra (exploit) payload (so you cannot embed HTML in XML which the browser would interpret as a site-origin HTML file when downloading)
Make sure you use content-disposition header on download
Some more info here: http://opensourcehacker.com/2013/07/31/secure-user-uploads-and-exploiting-served-user-content/
Below is my example how I sanitize the uploaded images:
class Example(models.Model):
image = models.ImageField(upload_to=filename_gen("participant-images/"), blank=True, null=True)
class Example(forms.ModelForm):
def clean_image(self):
""" Clean the uploaded image attachemnt.
"""
image = self.cleaned_data.get('image', False)
utils.ensure_safe_user_image(image)
return image
def ensure_safe_user_image(image):
""" Perform various checks to sanitize user uploaded image data.
Checks that image was valid header, then
:param: InMemoryUploadedFile instance (Django form field value)
:raise: ValidationError in the case the image content has issues
"""
if not image:
return
assert isinstance(image, InMemoryUploadedFile), "Image rewrite has been only tested on in-memory upload backend"
# Make sure the image is not too big, so that PIL trashes the server
if image:
if image._size > 4*1024*1024:
raise ValidationError("Image file too large - the limit is 4 megabytes")
# Then do header peak what the image claims
image.file.seek(0)
mime = magic.from_buffer(image.file.getvalue(), mime=True)
if mime not in ("image/png", "image/jpeg"):
raise ValidationError("Image is not valid. Please upload a JPEG or PNG image.")
doc_type = mime.split("/")[-1].upper()
# Read data from cStringIO instance
image.file.seek(0)
pil_image = Image.open(image.file)
# Rewrite the image contents in the memory
# (bails out with exception on bad data)
buf = StringIO()
pil_image.thumbnail((2048, 2048), Image.ANTIALIAS)
pil_image.save(buf, doc_type)
image.file = buf
# Make sure the image has valid extension (can't upload .htm image)
extension = unicode(doc_type.lower())
if not image.name.endswith(u".%s" % extension):
image.name = image.name + u"." + extension
I found an interesting package who can do upload file validation recently. You can see the package here. the package approach is similar with sultan answer, thus we can just implement it right away.
from upload_validator import FileTypeValidator
validator = FileTypeValidator(
allowed_types=['application/msword'],
allowed_extensions=['.doc', '.docx']
)
file_resource = open('sample.doc')
# ValidationError will be raised in case of invalid type or extension
validator(file_resource)