How to include images in xhtml2pdf generated pdf files? - templates

I am running a streamlit app which generates reports containing images and dataframes. I have used jinja2 to generate the html file from a template. Then, I would now like to convert to a pdf file using xhtml2pdf to download.
How to do that?
from jinja2 import Environment, FileSystemLoader
def convert_html_to_pdf(source_html, output_filename="temp/report.pdf"):
result_file = io.BytesIO()
pdf = pisa.CreatePDF(
source_html,
dest=result_file)
return pdf.getvalue()
def load_template():
env = Environment(loader=FileSystemLoader('templates'))
template = env.get_template('catAnalysisTemplate.html')
return template
def render_report(data, filename="report"):
template = load_template()
html = template.render(data)
# with open(f'temp/{filename}.html', 'w') as f:
# f.write(html)
pdf = convert_html_to_pdf(html)
return [html, pdf]
This works fine except the images are not included in the pdf file. My static images are stored in
img/
logo.png
and the charts I may generate it in memory as like
def plot_co_attainment(qp):
img = io.BytesIO()
data = qp.co_attainment()[["Level", "Perc_Attainment"]]
plt.figure(dpi=150)
plt.bar(data["Level"], data["Perc_Attainment"], width=0.5, color=colors)
for i, val in enumerate(data["Perc_Attainment"].values):
plt.text(i, val, str(val) + "%",
horizontalalignment='center',
verticalalignment='bottom',
fontdict={'fontweight': 500, 'size': 20})
plt.xlabel("Course Outcomes")
plt.ylabel("Percentage of Attainment")
plt.ylim((0, 110))
plt.savefig(buf, format='jpg')
return buf
How do I connect the dots and get the images in my pdf file?

I am having the same issue. The way I solved it was to use a link_handler and return the data as a data: uri containing the png image data.
This example will take the src attribute and use it to generate a square image in that color, which will be embedded in the PDF. Sadly this doesn't let you modify the image tag itself so you can't change the sizes/classes or anything else.
Using something like this opens the way to embedding just about anything without having to add them to your template directly.
from base64 import b64encode
from io import BytesIO
from xhtml2pdf import pisa
from PIL import Image
html_src = """
<body>
<div>
<img src="red"/>
<img src="green"/>
<img src="blue"/>
</div>
</body>
"""
def link_callback(src_attr, *args):
"""
Returns the image data for use by the pdf renderer
"""
img_out = BytesIO()
img = Image.new("RGB", (100, 100), src_attr)
img.save(img_out, "png")
return f"data:image/png;base64,{b64encode(img_out.getvalue())}"
def main():
with open("one.pdf", "wb") as f:
pizza = pisa.CreatePDF(
html_src,
dest=f,
link_callback=link_callback,
)
if __name__ == "__main__":
main()

Related

Improving accuracy in Python Tesseract OCR

I am using pytesseract along with openCV in a simple django application in Python to extract text in Bengali language from image files. I have a form that lets you upload an image and on clicking the submit button sends it to the server side in an ajax call in jQuery to extract the text from the image to serve the purpose of OCR (Optical Character Recognition).
Template part :
<div style="text-align: center;">
<div id="result" class="text-center"></div>
<form enctype="multipart/form-data" id="ocrForm" action="{% url 'process_image' %}" method="post"> <!-- Do not forget to add: enctype="multipart/form-data" -->
{% csrf_token %}
{{ form }}
<button type="submit" class="btn btn-success">OCRzed</button>
</form>
<br><br><hr>
<div id="content" style="width: 50%; margin: 0 auto;">
</div>
</div>
<script type="text/javascript">
$(document).ready(function(){
function submitFile(){
var fd = new FormData();
fd.append('file', getFile())
$("#result").html('<span class="wait">Please wait....</span>');
$('#content').html('');
$.ajax({
url: "{% url 'process_image' %}",
type: "POST",
data: fd,
processData: false,
contentType: false,
success: function(data){
// console.log(data.content);
$("#result").html('');
if(data.content){
$('#content').html(
"<p>" + data.content + "</p>"
)
}
}
})
}
function getFile(){
var fp = $("#file_id")
var item = fp[0].files
return item[0]
}
// Submit the file for OCRization
$("#ocrForm").on('submit', function(event){
event.preventDefault();
submitFile()
})
});
</script>
The urls.py file has:
from django.urls import path, re_path
from .views import *
urlpatterns = [
path('process_image', OcrView.process_image, name='process_image') ,
]
The view part :
from django.contrib.auth.models import User
from django.shortcuts import render, redirect, get_object_or_404
from .forms import NewTopicForm
from .models import Board, Topic, Post
from django.shortcuts import render
from django.http import HttpResponse
from django.http import Http404
from django.http import JsonResponse
from django.views.generic import FormView
from django.views.decorators.csrf import csrf_exempt
import json
import cv2
import numpy as np
import pytesseract # ======= > Add
try:
from PIL import Image
except:
import Image
def ocr(request):
return render(request, 'ocr.html')
# {'board': board,'form':form})
# get grayscale image
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# noise removal
def remove_noise(image):
return cv2.medianBlur(image,5)
#thresholding
def thresholding(image):
return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
#dilation
def dilate(image):
kernel = np.ones((5,5),np.uint8)
return cv2.dilate(image, kernel, iterations = 1)
#erosion
def erode(image):
kernel = np.ones((5,5),np.uint8)
return cv2.erode(image, kernel, iterations = 1)
#opening - erosion followed by dilation
def opening(image):
kernel = np.ones((5,5),np.uint8)
return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
#canny edge detection
def canny(image):
return cv2.Canny(image, 100, 200)
#skew correction
def deskew(image):
coords = np.column_stack(np.where(image > 0))
angle = cv2.minAreaRect(coords)[-1]
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return rotated
#template matching
def match_template(image, template):
return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
class OcrView(FormView):
form_class = UploadForm
template_name = 'ocr.html'
success_url = '/'
#csrf_exempt
def process_image(request):
if request.method == 'POST':
response_data = {}
upload = request.FILES['file']
filestr = request.FILES['file'].read()
#convert string data to numpy array
npimg = np.fromstring(filestr, np.uint8)
image = cv2.imdecode(npimg, cv2.IMREAD_UNCHANGED)
# image=Image.open(upload)
gray = get_grayscale(image)
thresh = thresholding(gray)
opening1 = opening(gray)
canny1 = canny(gray)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
# content = pytesseract.image_to_string(Image.open(upload), lang = 'ben')
# content = pytesseract.image_to_string( image, lang = 'ben')
content = pytesseract.image_to_string( image, lang = 'eng+ben')
# data_ben = process_image("test_ben.png", "ben")
response_data['content'] = content
return JsonResponse(response_data)
I am attaching a sample image just below here which when I give as the input file, the extracted text I get from there is not up to any satisfactory level of accuracy. The input image is:
I am attaching a screenshot of the extracted text with wrong words underlined in red below. Note that the spaces and indentations are not preserved there. The screenshot of extracted text is :
In the above code snippet, I have done the image processing with the following code lines:
gray = get_grayscale(image)
thresh = thresholding(gray)
opening1 = opening(gray)
canny1 = canny(gray)
After that I have fed tesserect with the processed image in the following line:
content = pytesseract.image_to_string( image, lang = 'eng+ben')
But my point of confusion is that I have nowhere saved the image before or after processing. So when I use the above line , I am not sure whether the processed or unprocessed image is supplied to tesserect engine.
Q1) Do I need to save the image after processing it and then supply it to the tesserect engine ? If yes , how to do that ?
Q2) What else steps should I take to improve the accuracy ?
NB: Even if you are not familiar with Bengali language, I think this wont be any problem as you can just look at the red-underlined words and make a comparison.
EDIT:
TL;DR:
You can just look at the code in view.py and urls.py files and exclude the template code for the sake of understanding easily.
Q1) No need to save the image. The image is stored in your variable image
Q2) You are not actually doing OCR on the image post-processing functions applied to, i.e. variable canny1. The below code would successively perform the processing steps on image and then apply OCR to the post-processed image stored in canny1.
gray = get_grayscale(image)
thresh = thresholding(gray)
opening1 = opening(thresh )
canny1 = canny(opening1 )
content = pytesseract.image_to_string( canny1 , lang = 'eng+ben')

Send PIL image to front without saving it

I'm learning flask and i got stuck in this part.
I want to send a image to a img HTML tag without saving it...
here is where i got so far
PYTHON
def serve_pil_image(pil_img):
img_io = BytesIO()
pil_img.save(img_io, 'JPEG', quality=70)
img_io.seek(0)
return send_file(img_io, mimetype='image/jpeg')
#app.route('/upload', methods=["POST"])
def upload():
target = os.path.join(APP_ROOT, 'static/images')
# create image directory if not found
if not os.path.isdir(target):
os.mkdir(target)
# retrieve file from HTML -- NO SAVING
for upload in request.files.getlist("file"):
print('Getting ', upload)
img = Image.open(upload)
return render_template('processing.html')
#app.route('/static/images')
def serve_img():
img = poster
return serve_pil_image(img)
HTML
<img src="{{ url_for('serve_img', filename=img) }}" class="figure-img img-fluid rounded">
If you know a little JavaScript you could create a url within the window using createObjectURL. Store the blob content that got sent by Flask.send_file in the created object. Get a reference to the image tag, and make it point to the in memory url.

Django - Converting a Binary stream to an Image

I am trying to obtain an image from a url and return it to the ModelAdmin to display it in a new column of the table.
I tried the following code in admin.py file:
def new_field(self, obj):
r = requests.get('https://abcd.com/image')
return r.content
The code is not giving me any error but it's returning a long binary string instead of the image itself.
How can I pass the image itself, or convert the binary content to an image?
You do not need download image if you wont only show it.
def new_field(self, obj):
url = 'https://abcd.com/image'
return '<img src="{}" />'.format(url)
new_field.allow_tags = True # it is important!!!
You can make use of a NamedTemporaryFile [GitHub] here. For example:
from django.core.files import File
from django.core.files.temp import NamedTemporaryFile
def store_image_from_source(self, obj):
img = NamedTemporaryFile()
r = requests.get('https://abcd.com/image')
img.write(r.content)
img.flush()
file = File(img)
obj.my_img_attr.save('filename.jpeg', file, save=True)
Here 'filename.jpeg' is thus te name of the file, as if you would have uploaded a file with that name with a ModelForm.

Matplotlib image not displaying in Django template

I have a seaborn barplot image that I would like to display inside a django template.
I have looked at a few solutions and I managed to display the image with HttpResponse. However, the image it displays is just an image. What I want is an image of the plot on a webpage and you can still click the navbar and home button etc.
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg
from io import BytesIO
from django.template.loader import render_to_string
In views.py
def top_causal(request):
# code for data manipulation
f = plt.figure(figsize=(10, 5))
ax = sns.barplot(data)
# labels, title etc...
FigureCanvasAgg(f)
buf = BytesIO()
plt.savefig(buf, format='png)
plt.close(f)
# this works
response = HttpResponse(buf.getvalue(), content_type='image/png')
return response
# What I want which is to display the image inside a template(does not work)
buffer = buf.getvalue()
content_type="image/png"
graphic = (buffer, content_type)
rendered = render_to_string('top_causal.html', {'graphic': graphic})
return HttpResponse(rendered)
in top_causal.html
<img src="data:image/png;base64,{{ graphic|safe }}" alt="Not working.">
HttpResponse displays the image as a page.
The response using render_to_string returns this instead of an image inside the webpage. I can still see the navbar.
\x06\x1d\x00\x80kT\xb7n]eee\x95\x18\xff\xe1\x87\x1f$I\xf5\xea\xd5\xb3\x19\xcf\xce\xce.\x11\x9b\x9d\x9d-WWW999U\xb8~\xa7N\x9d$\xfd\xf6\x02\xb2\xb0\xb00IR\x9b6mt\xe4\xc8\x91\x12\xb1G\x8e\x1c\x91\xc5b\xb9\xea\xe7\xe1\xda\xb5k\xa7\xf7\xdf\x7f_\xc5\xc5\xc5:|\xf8\xb0V\xadZ\xa5\xe7\x9f\x7f^\xb5j\xd5\xd2\xd4\xa9S\xafx\xee\xdbo\xbf\xad\x81\x03\x07j\xc9\x92%6\xe3\xb9\xb9\xb9\xe5\xba\x9e\xbau\xeb\xcab\xb1h\xc2\x84\t\x9a0aB\xa91M\x9b6-W\xae\xab\xa9W\xaf\x9e\xaaU\xab\xa6\xbd{\xf7\xaaj\xd5\xaa%\xe6]\\\\\xecR\xe7\xb2;\xef\xbcSqqq\x92~{Q\xde\xbb\xef\xbe\xaby\xf3\xe6\xa9\xb8\xb8X\x8b\x16-\xb2k-\x00\x80\xe3b\x07\x1d\x00\x80k\xd4\xb7o_\xa5\xa4\xa4\x94x\x13w||\xbc\xaaT\xa9\xa2\x90\x90\x10\x9b\xf1\xf7\xdf\x7f\xdfz\x8b\xb5$\x9d;wN[\xb7n-\x11W^\x97o\x8do\xd6\xac\x99ul\xc8\x90!JIIQJJ\x8au\xec\xd7 ...
You need to convert the image to byte array, encode it to base64 and embed it to your template. Here is how you do it:
<!DOCTYPE html>
<!--my_template.html-->
<html>
<div class='section'>
<div class='flex column plot mt15 mlr15 justify-left align-left'>
<img src='{{ imuri }}'/>
</div>
</div>
</html>
from django.template.loader import get_template
def top_causal(request):
# .......................
# Get your plot ready ...
# .......................
figure = plt.gcf()
buf = io.BytesIO()
figure.savefig(buf, format='png', transparent=True, quality=100, dpi=200)
buf.seek(0)
imsrc = base64.b64encode(buf.read())
imuri = 'data:image/png;base64,{}'.format(urllib.parse.quote(imsrc))
context = { 'plot': imuri}
# Now embed that to your template
template = get_template('my_template.html')
html = template.render(context=context)
# Your template is ready to go...
I discarded the idea of using matplotlib and went to use Highcharts https://www.highcharts.com/ instead to plot my graph and display it on the webpage.
The data displayed was done by using Django's ORM and Query API.

Upload Image to Amazon S3 with Flask-admin

I am using Flask-Admin and is very happy with it. However, the sample in Flask-Admin only provides to upload the image to static folder. Is it possible to upload it to S3 directly with Flask-Admin? Thanks.
Regards
Alex
Thanks for your sample code, Alex Chan. I needed this functionality too, so I decided to write more complete S3FileUploadField and S3ImageUploadField classes, based on your code and various other snippets.
You can find my code at:
https://github.com/Jaza/flask-admin-s3-upload
Also up on pypi, so you can install with:
pip install flask-admin-s3-upload
I've documentated a basic usage example in the readme (can see it on the github project page). Hope this helps, for anyone else who needs S3 file uploads in flask-admin.
Here it is but not clean the code..
import os
import os.path as op
import cStringIO
import logging
import config
from flask import url_for
from werkzeug import secure_filename
from werkzeug.datastructures import FileStorage
import boto
from boto.s3.key import Key
from wtforms import ValidationError, fields
from wtforms.widgets import HTMLString, html_params
from flask.ext.admin.babel import gettext
from flask.ext.admin._compat import string_types, urljoin
from flask.ext.admin.form.upload import ImageUploadField
try:
from PIL import Image, ImageOps
except ImportError:
Image = None
ImageOps = None
__all__ = ['FileUploadInput', 'FileUploadField',
'ImageUploadInput', 'ImageUploadField',
'namegen_filename', 'thumbgen_filename']
class ImageUploadInput(object):
"""
Renders a image input chooser field.
You can customize `empty_template` and `data_template` members to customize
look and feel.
"""
empty_template = ('<input %(file)s>')
data_template = ('<div class="image-thumbnail">'
' <img %(image)s>'
' <input type="checkbox" name="%(marker)s">Delete</input>'
'</div>'
'<input %(file)s>')
def __call__(self, field, **kwargs):
kwargs.setdefault('id', field.id)
kwargs.setdefault('name', field.name)
args = {
'file': html_params(type='file',
**kwargs),
'marker': '_%s-delete' % field.name
}
if field.data and isinstance(field.data, string_types):
url = self.get_url(field)
args['image'] = html_params(src=url)
template = self.data_template
else:
template = self.empty_template
return HTMLString(template % args)
def get_url(self, field):
if field.thumbnail_size:
filename = field.thumbnail_fn(field.data)
else:
filename = field.data
if field.url_relative_path:
filename = urljoin(field.url_relative_path, filename)
return field.data
#return url_for(field.endpoint, filename=field.data)
class s3ImageUploadField(ImageUploadField):
"""
Image upload field.
Does image validation, thumbnail generation, updating and deleting images.
Requires PIL (or Pillow) to be installed.
"""
widget = ImageUploadInput()
keep_image_formats = ('PNG',)
"""
If field detects that uploaded image is not in this list, it will save image
as PNG.
"""
def __init__(self, label=None, validators=None,
base_path=None, relative_path=None,
namegen=None, allowed_extensions=None,
max_size=None,
thumbgen=None, thumbnail_size=None,
permission=0o666,
url_relative_path=None, endpoint='static',
**kwargs):
"""
Constructor.
:param label:
Display label
:param validators:
Validators
:param base_path:
Absolute path to the directory which will store files
:param relative_path:
Relative path from the directory. Will be prepended to the file name for uploaded files.
Flask-Admin uses `urlparse.urljoin` to generate resulting filename, so make sure you have
trailing slash.
:param namegen:
Function that will generate filename from the model and uploaded file object.
Please note, that model is "dirty" model object, before it was committed to database.
For example::
import os.path as op
def prefix_name(obj, file_data):
parts = op.splitext(file_data.filename)
return secure_filename('file-%s%s' % parts)
class MyForm(BaseForm):
upload = FileUploadField('File', namegen=prefix_name)
:param allowed_extensions:
List of allowed extensions. If not provided, will allow any file.
:param max_size:
Tuple of (width, height, force) or None. If provided, Flask-Admin will
resize image to the desired size.
:param thumbgen:
Thumbnail filename generation function. All thumbnails will be saved as JPEG files,
so there's no need to keep original file extension.
For example::
import os.path as op
def thumb_name(filename):
name, _ = op.splitext(filename)
return secure_filename('%s-thumb.jpg' % name)
class MyForm(BaseForm):
upload = ImageUploadField('File', thumbgen=prefix_name)
:param thumbnail_size:
Tuple or (width, height, force) values. If not provided, thumbnail won't be created.
Width and height is in pixels. If `force` is set to `True`, will try to fit image into dimensions and
keep aspect ratio, otherwise will just resize to target size.
:param url_relative_path:
Relative path from the root of the static directory URL. Only gets used when generating
preview image URLs.
For example, your model might store just file names (`relative_path` set to `None`), but
`base_path` is pointing to subdirectory.
:param endpoint:
Static endpoint for images. Used by widget to display previews. Defaults to 'static'.
"""
# Check if PIL is installed
if Image is None:
raise Exception('PIL library was not found')
self.max_size = max_size
self.thumbnail_fn = thumbgen or thumbgen_filename
self.thumbnail_size = thumbnail_size
self.endpoint = endpoint
self.image = None
self.url_relative_path = url_relative_path
if not allowed_extensions:
allowed_extensions = ('gif', 'jpg', 'jpeg', 'png', 'tiff')
super(ImageUploadField, self).__init__(label, validators,
base_path=base_path,
relative_path=relative_path,
namegen=namegen,
allowed_extensions=allowed_extensions,
permission=permission,
**kwargs)
def pre_validate(self, form):
super(ImageUploadField, self).pre_validate(form)
if self.data and isinstance(self.data, FileStorage):
try:
self.image = Image.open(self.data)
except Exception as e:
raise ValidationError('Invalid image: %s' % e)
# Deletion
def _delete_file(self, filename):
super(ImageUploadField, self)._delete_file(filename)
self._delete_thumbnail(filename)
def _delete_thumbnail(self, filename):
path = self._get_path(self.thumbnail_fn(filename))
if op.exists(path):
os.remove(path)
# Saving
def _save_file(self, data, filename):
path = self._get_path(filename)
if not op.exists(op.dirname(path)):
os.makedirs(os.path.dirname(path), self.permission)
# Figure out format
filename, format = self._get_save_format(filename, self.image)
if self.image and (self.image.format != format or self.max_size):
if self.max_size:
image = self._resize(self.image, self.max_size)
else:
image = self._resize(self.image, (500, 500))
#image = self.image
self._save_image(image, filename, format)
else:
data.seek(0)
data.save(path)
savedUrl=self._save_image(self.image, filename, format)
self._save_thumbnail(data, filename, format)
return savedUrl
def _save_thumbnail(self, data, filename, format):
if self.image and self.thumbnail_size:
path = self._get_path(self.thumbnail_fn(filename))
savedUrl=self._save_image(self._resize(self.image, self.thumbnail_size),
thumbgen_filename(filename),
format)
return savedUrl
def _resize(self, image, size):
(width, height, force) = size
if image.size[0] > width or image.size[1] > height:
if force:
return ImageOps.fit(self.image, (width, height), Image.ANTIALIAS)
else:
thumb = self.image.copy()
thumb.thumbnail((width, height), Image.ANTIALIAS)
return thumb
return image
def _save_image(self, image, path, format='JPEG'):
if image.mode not in ('RGB', 'RGBA'):
image = image.convert('RGBA')
conn =boto.connect_s3( config.AWS_KEY, config.AWS_SECRET,)
bucket = conn.get_bucket("vipbutton")
k = Key(bucket)
k.key= path
tempFile = cStringIO.StringIO()
image.save(tempFile,format)
#image.seek(0)
#tempFile.seek(0)
#k.set_contents_from_string('This is a test of S3')
k.set_contents_from_string(tempFile.getvalue())
k.set_acl('public-read')
#k.set_contents_from_file(tempFile.getValue())
#with open(path, 'wb') as fp:
# image.save(fp, format)
return k.generate_url(expires_in=0, query_auth=False)
def _get_save_format(self, filename, image):
if image.format not in self.keep_image_formats:
name, ext = op.splitext(filename)
filename = '%s.jpg' % name
return filename, 'JPEG'
return filename, image.format
# Helpers
def namegen_filename(obj, file_data):
"""
Generate secure filename for uploaded file.
"""
return secure_filename(file_data.filename)
def thumbgen_filename(filename):
"""
Generate thumbnail name from filename.
"""
name, ext = op.splitext(filename)
return '%s_thumb%s' % (name, ext)