Flask Form Image to base64 string - flask

I created a Flask form where I can upload an image. Then I need to convert that image to base64 string, but I'm always getting the same result.
OUTPUT of my prints:
<FileStorage: '20190925_184412.jpg' ('image/jpeg')>
b''
And the code
from flask import Flask, render_template
from flask_wtf import FlaskForm
from wtforms import FileField
from flask_uploads import configure_uploads, IMAGES, UploadSet
import base64
app = Flask(__name__)
app.config['SECRET_KEY'] = 'thisisasecret'
app.config['UPLOADED_IMAGES_DEST'] = 'uploads/images'
images = UploadSet('images', IMAGES)
configure_uploads(app, images)
class MyForm(FlaskForm):
image = FileField('image')
#app.route('/', methods=['GET', 'POST'])
def index():
form = MyForm()
if form.validate_on_submit():
filename = images.save(form.image.data)
image_string = base64.b64encode(form.image.data.read())
print(form.image.data)
print(image_string)
return f'Filename: { filename }'
return render_template('index.html', form=form)

I think this is due to the way Werkzeug's FileStorage object works. As I menioned in another answer it has a stream attribute; this is of type tempfile.SpooledTemporaryFile so must be re-wound after reading, if you wish to read it again.
In your case this stream attribute is: form.image.data.stream. I suspect this is read once when you call the method images.save.
So the solution should be to rewind that stream, prior to calculating the the b64 string:
if form.validate_on_submit():
filename = images.save(form.image.data) # first read happens here
form.image.data.stream.seek(0)
image_string = base64.b64encode(form.image.data.read())
print(form.image.data)
print(image_string)
return f'Filename: { filename }'

Related

How to upload and process large excel files using Celery in Django?

I am trying to upload and process excel file using Django and DRF with Celery.
There is an issue when I am trying to pass the file to my Celery task to be processed in the background, I get a following error:
kombu.exceptions.EncodeError: Object of type InMemoryUploadedFile is not JSON serializable
Here is my view post request handler:
class FileUploadView(generics.CreateAPIView):
"""
POST: upload file to save data in the database
"""
parser_classes = [MultiPartParser]
serializer_class = FileSerializerXLSX
def post(self, request, format=None):
"""
Allows to upload file and lets it be handled by pandas
"""
serialized = FileSerializerXLSX(data=request.data)
if serialized.is_valid():
file_obj = request.data['file']
# file_bytes = file_obj.read()
print(file_obj)
import_excel_task.delay(file_obj)
print("its working")
return Response(status=204)
return Response(serialized._errors, status=status.HTTP_400_BAD_REQUEST)
And my celery task:
def import_excel_helper(file_obj):
df = extract_excel_to_dataframe(file_obj)
transform_df_to_clientmodel(df)
transform_df_to_productmodel(df)
transform_df_to_salesmodel(df)
#shared_task(name="import_excel_task")
def import_excel_task(file_obj):
"""Save excel file in the background"""
logger.info("Importing excel file")
import_excel_helper(file_obj)
Any idea what is the way to handle importing Excel files into celery task so that it can be processed by other functions in the background?
As in the error, the body of the request to call a celery task must be JSON serializable since it is the default configuration. Then as documented in kombu:
The primary disadvantage to JSON is that it limits you to the following data types: strings, Unicode, floats, boolean, dictionaries, and lists. Decimals and dates are notably missing.
Let's say this is my excel file.
file.xlsx
Some
Value
Here
:)
Solution 1
Convert the raw bytes of the excel into Base64 string before calling the task so that it can be JSON serialized (since strings are valid data types in a JSON document, raw bytes are not). Then, everything else in the Celery configurations are the same default values.
tasks.py
import base64
import pandas
from celery import Celery
app = Celery('tasks')
#app.task
def add(excel_file_base64):
excel_file = base64.b64decode(excel_file_base64)
df = pandas.read_excel(excel_file)
print("Contents of excel file:", df)
views.py
import base64
from tasks import add
with open("file.xlsx", 'rb') as file: # Change this to be your <request.data['file']>
excel_raw_bytes = file.read()
excel_base64 = base64.b64encode(excel_raw_bytes).decode()
add.apply_async((excel_base64,))
Output
[2021-08-19 20:40:28,904: INFO/MainProcess] Task tasks.add[d5373444-485d-4c50-8695-be2e68ef1c67] received
[2021-08-19 20:40:29,094: WARNING/ForkPoolWorker-4] Contents of excel file:
[2021-08-19 20:40:29,094: WARNING/ForkPoolWorker-4]
[2021-08-19 20:40:29,099: WARNING/ForkPoolWorker-4] Some Value
0 Here :)
[2021-08-19 20:40:29,099: WARNING/ForkPoolWorker-4]
[2021-08-19 20:40:29,099: INFO/ForkPoolWorker-4] Task tasks.add[d5373444-485d-4c50-8695-be2e68ef1c67] succeeded in 0.19386404199940444s: None
Solution 2:
This is the harder way. Implement a custom serializer that will handle excel files.
tasks.py
import ast
import base64
import pandas
from celery import Celery
from kombu.serialization import register
def my_custom_excel_encoder(obj):
"""Uncomment this block if you intend to pass it as a Base64 string:
file_base64 = base64.b64encode(obj[0][0]).decode()
obj = list(obj)
obj[0] = [file_base64]
"""
return str(obj)
def my_custom_excel_decoder(obj):
obj = ast.literal_eval(obj)
"""Uncomment this block if you passed it as a Base64 string (as commented above in the encoder):
obj[0][0] = base64.b64decode(obj[0][0])
"""
return obj
register(
'my_custom_excel',
my_custom_excel_encoder,
my_custom_excel_decoder,
content_type='application/x-my-custom-excel',
content_encoding='utf-8',
)
app = Celery('tasks')
app.conf.update(
accept_content=['json', 'my_custom_excel'],
)
#app.task
def add(excel_file):
df = pandas.read_excel(excel_file)
print("Contents of excel file:", df)
views.py
from tasks import add
with open("file.xlsx", 'rb') as excel_file: # Change this to be your <request.data['file']>
excel_raw_bytes = excel_file.read()
add.apply_async((excel_raw_bytes,), serializer='my_custom_excel')
Output
Same as Solution 1
Solution 3
You might be interested with this documentation of Sending raw data without Serialization

Django - Get response to return image without saving file to models

I am using the tmdb api to return movie info as well as images.
Steps below of Get logic
Api request is made which provides movie info as well as "backdrop_path"
I then use this path to make another request for the jpg related to that movie.
Blocker
I'm unable to then output that jpg. It currently returns a url path as below.
Views.py
from django.shortcuts import render
from django.views.generic import TemplateView
import requests
import urllib
# Create your views here.
def index(request):
# Query API with user input
if 'movie' in request.GET:
api_key = 'api'
id = request.GET['movie']
url = 'https://api.themoviedb.org/3/search/movie?api_key={}&language=en-US&query={}&include_adult=false'
response = requests.get(url.format(api_key,id))
# successful request
if response.status_code == 200:
# Parse json output for key value pairs
tmdb = response.json()
# save image jpg
backdrop_path = tmdb['results'][0]['backdrop_path']
url = 'https://image.tmdb.org/t/p/original/{}'
gg = urllib.request.urlretrieve(url.format(backdrop_path), 'test.jpg')
context = {
'title': tmdb['results'][0]['original_title'],
'overview': tmdb['results'][0]['overview'],
'release_date': tmdb['results'][0]['release_date'],
'vote_average': tmdb['results'][0]['vote_average'],
'vote_count': tmdb['results'][0]['vote_count'],
'backdrop_path' : tmdb['results'][0]['backdrop_path'],
'jpg' : gg
}
return render(request, 'home.html', {'context': context})
else: # returns homepage if invalid request
return render(request, 'home.html')
else: # Homepage without GET request
return render(request, 'home.html')
urlretrieve doesn't return the image ready to be used, instead it returns a tuple with the local name of the file and the headers (as an HTTPMessage object as you can see in your example), which is what you're seeing.
However, I don't think returning a file in your response is ideal, nor it would work in your scenario. Since you seem to be using templates, what I would do is return the image url and use it in an image HTML tag, like this <img src="{{ jpg }}"/>

Django - Converting a Binary stream to an Image

I am trying to obtain an image from a url and return it to the ModelAdmin to display it in a new column of the table.
I tried the following code in admin.py file:
def new_field(self, obj):
r = requests.get('https://abcd.com/image')
return r.content
The code is not giving me any error but it's returning a long binary string instead of the image itself.
How can I pass the image itself, or convert the binary content to an image?
You do not need download image if you wont only show it.
def new_field(self, obj):
url = 'https://abcd.com/image'
return '<img src="{}" />'.format(url)
new_field.allow_tags = True # it is important!!!
You can make use of a NamedTemporaryFile [GitHub] here. For example:
from django.core.files import File
from django.core.files.temp import NamedTemporaryFile
def store_image_from_source(self, obj):
img = NamedTemporaryFile()
r = requests.get('https://abcd.com/image')
img.write(r.content)
img.flush()
file = File(img)
obj.my_img_attr.save('filename.jpeg', file, save=True)
Here 'filename.jpeg' is thus te name of the file, as if you would have uploaded a file with that name with a ModelForm.

In-memory data in a Flask app

I think the correct way to have instance variables in Flask is by adding users and sessions, but I'm trying to test a concept and I don't want to go through all of that just yet. I'm trying to have a web app load an image into a variable that can then have different image operations performed on it. Obviously you don't want to have to keep performing a list of operations on the image on each new request because that would be horribly inefficient.
Is there a way of having an app.var in Flask that I can access from different routes? I've tried using the global context and Flask's current_app, but I get the impression that's not what they're for.
The code for my blueprint is:
import os
from flask import Flask, url_for, render_template, \
g, send_file, Blueprint
from io import BytesIO
from PIL import Image, ImageDraw, ImageOps
home = Blueprint('home', __name__)
#home.before_request
def before_request():
g.img = None
g.user = None
#home.route('/')
def index():
return render_template('home/index.html')
#home.route('/image')
def image():
if g.img is None:
root = os.path.dirname(os.path.abspath(__file__))
filename = os.path.join(root, '../static/images/lena.jpg')
g.img = Image.open(filename)
img_bytes = BytesIO()
g.img.save(img_bytes, 'jpeg')
img_bytes.seek(0)
return send_file(img_bytes, mimetype='image/jpg')
#home.route('/grayscale', methods=['POST'])
def grayscale():
if g.img:
print('POST grayscale request')
g.img = ImageOps.grayscale(img)
return "Grayscale operation successful"
else:
print('Grayscale called with no image loaded')
return "Grayscale operation failed"
The /image route returns the image correctly, but I'd like to be able to call /grayscale, perform the operation, and be able to make another call to /image and have it return the image from memory without loading it.
You could save a key in your session variable and use that to identify the image in a global dictionary. However this might lead to some trouble if you use multiple Flask application instances. But with one it would be fine. Otherwise you could use Redis when working with multiple workers. I haven't tried the following code but it should show the concept.
from flask import session
import uuid
app.config['SECRET_KEY'] = 'your secret key'
img_dict = {}
#route('/image')
def image():
key = session.get('key')
if key is None:
session['key'] = key = uuid.uuid1()
img_dict[key] = yourimagedata
#home.route('/grayscale', methods=['POST'])
def grayscale():
key = session.get('key')
if key is None:
print('Grayscale called with no image loaded')
return "Grayscale operation failed"
else:
img = img_dict[key]
print('POST grayscale request')
g.img = ImageOps.grayscale(img)
return "Grayscale operation successful"

how to unit test file upload in django

In my django app, I have a view which accomplishes file upload.The core snippet is like this
...
if (request.method == 'POST'):
if request.FILES.has_key('file'):
file = request.FILES['file']
with open(settings.destfolder+'/%s' % file.name, 'wb+') as dest:
for chunk in file.chunks():
dest.write(chunk)
I would like to unit test the view.I am planning to test the happy path as well as the fail path..ie,the case where the request.FILES has no key 'file' , case where request.FILES['file'] has None..
How do I set up the post data for the happy path?Can somebody tell me?
I used to do the same with open('some_file.txt') as fp: but then I needed images, videos and other real files in the repo and also I was testing a part of a Django core component that is well tested, so currently this is what I have been doing:
from django.core.files.uploadedfile import SimpleUploadedFile
def test_upload_video(self):
video = SimpleUploadedFile("file.mp4", "file_content", content_type="video/mp4")
self.client.post(reverse('app:some_view'), {'video': video})
# some important assertions ...
In Python 3.5+ you need to use bytes object instead of str. Change "file_content" to b"file_content"
It's been working fine, SimpleUploadedFile creates an InMemoryFile that behaves like a regular upload and you can pick the name, content and content type.
From Django docs on Client.post:
Submitting files is a special case. To POST a file, you need only
provide the file field name as a key, and a file handle to the file
you wish to upload as a value. For example:
c = Client()
with open('wishlist.doc') as fp:
c.post('/customers/wishes/', {'name': 'fred', 'attachment': fp})
I recommend you to take a look at Django RequestFactory. It's the best way to mock data provided in the request.
Said that, I found several flaws in your code.
"unit" testing means to test just one "unit" of functionality. So,
if you want to test that view you'd be testing the view, and the file
system, ergo, not really unit test. To make this point more clear. If
you run that test, and the view works fine, but you don't have
permissions to save that file, your test would fail because of that.
Other important thing is test speed. If you're doing something like
TDD the speed of execution of your tests is really important.
Accessing any I/O is not a good idea.
So, I recommend you to refactor your view to use a function like:
def upload_file_to_location(request, location=None): # Can use the default configured
And do some mocking on that. You can use Python Mock.
PS: You could also use Django Test Client But that would mean that you're adding another thing more to test, because that client make use of Sessions, middlewares, etc. Nothing similar to Unit Testing.
I do something like this for my own event related application but you should have more than enough code to get on with your own use case
import tempfile, csv, os
class UploadPaperTest(TestCase):
def generate_file(self):
try:
myfile = open('test.csv', 'wb')
wr = csv.writer(myfile)
wr.writerow(('Paper ID','Paper Title', 'Authors'))
wr.writerow(('1','Title1', 'Author1'))
wr.writerow(('2','Title2', 'Author2'))
wr.writerow(('3','Title3', 'Author3'))
finally:
myfile.close()
return myfile
def setUp(self):
self.user = create_fuser()
self.profile = ProfileFactory(user=self.user)
self.event = EventFactory()
self.client = Client()
self.module = ModuleFactory()
self.event_module = EventModule.objects.get_or_create(event=self.event,
module=self.module)[0]
add_to_admin(self.event, self.user)
def test_paper_upload(self):
response = self.client.login(username=self.user.email, password='foz')
self.assertTrue(response)
myfile = self.generate_file()
file_path = myfile.name
f = open(file_path, "r")
url = reverse('registration_upload_papers', args=[self.event.slug])
# post wrong data type
post_data = {'uploaded_file': i}
response = self.client.post(url, post_data)
self.assertContains(response, 'File type is not supported.')
post_data['uploaded_file'] = f
response = self.client.post(url, post_data)
import_file = SubmissionImportFile.objects.all()[0]
self.assertEqual(SubmissionImportFile.objects.all().count(), 1)
#self.assertEqual(import_file.uploaded_file.name, 'files/registration/{0}'.format(file_path))
os.remove(myfile.name)
file_path = import_file.uploaded_file.path
os.remove(file_path)
I did something like that :
from django.core.files.uploadedfile import SimpleUploadedFile
from django.test import TestCase
from django.core.urlresolvers import reverse
from django.core.files import File
from django.utils.six import BytesIO
from .forms import UploadImageForm
from PIL import Image
from io import StringIO
def create_image(storage, filename, size=(100, 100), image_mode='RGB', image_format='PNG'):
"""
Generate a test image, returning the filename that it was saved as.
If ``storage`` is ``None``, the BytesIO containing the image data
will be passed instead.
"""
data = BytesIO()
Image.new(image_mode, size).save(data, image_format)
data.seek(0)
if not storage:
return data
image_file = ContentFile(data.read())
return storage.save(filename, image_file)
class UploadImageTests(TestCase):
def setUp(self):
super(UploadImageTests, self).setUp()
def test_valid_form(self):
'''
valid post data should redirect
The expected behavior is to show the image
'''
url = reverse('image')
avatar = create_image(None, 'avatar.png')
avatar_file = SimpleUploadedFile('front.png', avatar.getvalue())
data = {'image': avatar_file}
response = self.client.post(url, data, follow=True)
image_src = response.context.get('image_src')
self.assertEquals(response.status_code, 200)
self.assertTrue(image_src)
self.assertTemplateUsed('content_upload/result_image.html')
create_image function will create image so you don't need to give static path of image.
Note : You can update code as per you code.
This code for Python 3.6.
from rest_framework.test import force_authenticate
from rest_framework.test import APIRequestFactory
factory = APIRequestFactory()
user = User.objects.get(username='#####')
view = <your_view_name>.as_view()
with open('<file_name>.pdf', 'rb') as fp:
request=factory.post('<url_path>',{'file_name':fp})
force_authenticate(request, user)
response = view(request)
As mentioned in Django's official documentation:
Submitting files is a special case. To POST a file, you need only provide the file field name as a key, and a file handle to the file you wish to upload as a value. For example:
c = Client()
with open('wishlist.doc') as fp:
c.post('/customers/wishes/', {'name': 'fred', 'attachment': fp})
More Information: How to check if the file is passed as an argument to some function?
While testing, sometimes we want to make sure that the file is passed as an argument to some function.
e.g.
...
class AnyView(CreateView):
...
def post(self, request, *args, **kwargs):
attachment = request.FILES['attachment']
# pass the file as an argument
my_function(attachment)
...
In tests, use Python's mock something like this:
# Mock 'my_function' and then check the following:
response = do_a_post_request()
self.assertEqual(mock_my_function.call_count, 1)
self.assertEqual(
mock_my_function.call_args,
call(response.wsgi_request.FILES['attachment']),
)
if you want to add other data with file upload then follow the below method
file = open('path/to/file.txt', 'r', encoding='utf-8')
data = {
'file_name_to_receive_on_backend': file,
'param1': 1,
'param2': 2,
.
.
}
response = self.client.post("/url/to/view", data, format='multipart')`
The only file_name_to_receive_on_backend will be received as a file other params received normally as post paramas.
In Django 1.7 there's an issue with the TestCase wich can be resolved by using open(filepath, 'rb') but when using the test client we have no control over it. I think it's probably best to ensure file.read() returns always bytes.
source: https://code.djangoproject.com/ticket/23912, by KevinEtienne
Without rb option, a TypeError is raised:
TypeError: sequence item 4: expected bytes, bytearray, or an object with the buffer interface, str found
from django.test import Client
from requests import Response
client = Client()
with open(template_path, 'rb') as f:
file = SimpleUploadedFile('Name of the django file', f.read())
response: Response = client.post(url, format='multipart', data={'file': file})
Hope this helps.
Very handy solution with mock
from django.test import TestCase, override_settings
#use your own client request factory
from my_framework.test import APIClient
from django.core.files import File
import tempfile
from pathlib import Path
import mock
image_mock = mock.MagicMock(spec=File)
image_mock.name = 'image.png' # or smt else
class MyTest(TestCase):
# I assume we want to put this file in storage
# so to avoid putting garbage in our MEDIA_ROOT
# we're using temporary storage for test purposes
#override_settings(MEDIA_ROOT=Path(tempfile.gettempdir()))
def test_send_file(self):
client = APIClient()
client.post(
'/endpoint/'
{'file':image_mock},
format="multipart"
)
I am using Python==3.8.2 , Django==3.0.4, djangorestframework==3.11.0
I tried self.client.post but got a Resolver404 exception.
Following worked for me:
import requests
upload_url='www.some.com/oaisjdoasjd' # your url to upload
with open('/home/xyz/video1.webm', 'rb') as video_file:
# if it was a text file we would perhaps do
# file = video_file.read()
response_upload = requests.put(
upload_url,
data=video_file,
headers={'content-type': 'video/webm'}
)
I am using django rest framework and I had to test the upload of multiple files.
I finally get it by using format="multipart" in my APIClient.post request.
from rest_framework.test import APIClient
...
self.client = APIClient()
with open('./photo.jpg', 'rb') as fp:
resp = self.client.post('/upload/',
{'images': [fp]},
format="multipart")
I am using GraphQL, upload for test:
with open('test.jpg', 'rb') as fp:
response = self.client.execute(query, variables, data={'image': [fp]})
code in class mutation
#classmethod
def mutate(cls, root, info, **kwargs):
if image := info.context.FILES.get("image", None):
kwargs["image"] = image
TestingMainModel.objects.get_or_create(
id=kwargs["id"],
defaults=kwargs
)