Replace string in .html files with Django custom command - django

I need to export my django project to static files. I'm using django-distill. Everything works fine except hrefs in main folder directory. So I decided to replace them with custom command after files were generated. However after few attempts I don't know why this function doesn't work. For example if even when I print out soup it show me empty string.
class Command(BaseCommand):
help='change urls in each header to static version'
def replace_urls(self):
find_string_1 = 'href="/blog/"'
find_string_2 = 'href="/contact/"'
replace_string_1 = 'href="blog.html"'
replace_string_2 = 'href="/contact.html"'
exclude_dirs = ['media', 'static']
for (_, dirs, files) in os.walk(f'{settings.BASE_DIR}/staticpage/'):
dirs[:] = [d for d in dirs if d not in exclude_dirs]
for filepath in files:
f = open(filepath, mode='r', encoding='utf-8')
soup = BeautifulSoup(f, "lxml", from_encoding="utf-8")
if find_string_1 in soup:
soup.replace_with(replace_string_1)
if find_string_2 in soup:
soup.replace_with(replace_string_2)
f.close()
def handle(self, *args, **kwargs):
try:
self.replace_urls()
self.stdout.write(self.style.SUCCESS(f'********** Command has been execute without any error **********'))

Related

Create download link file in django

I created a file in project, generation pdf from html. For this i have this method:
def generation_html_to_pdf(self):
path_pdf = None
with NamedTemporaryFile(delete=False, suffix=".pdf", dir='pdf_files') as tf:
path_pdf = tf.name
pdfkit.from_file('templates/first_page.html', tf.name)
return path_pdf
Then, in pdf_files folder i have the pdf file. I want to get a download link for this file:
my view
path_to_pdf = generation_html_to_pdf()
download_link = 'http://' + request.get_host() + path_to_pdf
json_inf_pdf = {'download_link': download_link}
return JsonResponse(json_inf_pdf, status=200)
i have json like this:
{"download_link": "http://127.0.0.1:8000/home/alex/projects/test_project/pdf_files/tmpe0nqbn01.pdf"}"
when i click in this link i have error:
Page not found (404)
You need to create download view and url. Function like this to create link:
def download_link(request):
''' Create download link '''
download_link = 'http://{}/{}'.format(request.get_host(), 'download/my_filename')
json_inf_pdf = {'download_link': download_link}
return JsonResponse(json_inf_pdf, status=200)
and to download pdf:
def download_file(request, my_filename):
''' Download file '''
# Open template
from django.conf import settings
template_url = os.path.join(settings.BASE_DIR, 'templates', 'first_page.html')
template_open = open(template_url, 'r')
# Read template
from django import template
t = template.Template(template_open.read())
c = template.Context({})
# Create pdf
pdf = pdfkit.from_string(t.render(c))
# Create and return response with created pdf
response = HttpResponse(pdf)
response['Content-Type'] = 'application/pdf'
response['Content-disposition'] = 'attachment ; filename = {}'.format(my_filename)
return response
and url:
path('/download/<str:my_filename>', views.download_file, name="download_pdf')
I can't guarantee that this will work in your case without modification, since I can't tell which html-to-pdf library you're using and without seeing your other code. It's just a basic implementation idea.

Python file not found error after class import in another file

Ok, i have a python file inside my project with only this class:
class hd_XML():
def __init__(self):
self.path = 'static/XML/current/'
self.filename = 'current_settings.xml'
self.tree = ''
def open(self):
self.tree = ET.parse(self.path + self.filename)
self.root = self.tree.getroot()
return self.root
def get_data(self):
self.root = self.open()
canale = Channel
canali = []
i = 0
for child in self.root:
canale.id = child.attrib['id']
canale.max = child.attrib['max']
canale.color = child.attrib['color']
canali.append(canale)
i += 1
return canali
if i run this class standalone with:
if __name__ == '__main__':
xml = hd_XML()
print(xml.get_data())
that works. But, if I import this class in my main app file as below,
import hd_modXML #thats my separate file name
xml = hd_modXML.hd_XML()
canali = xml.get_data()
print(canali[0].id)
I cannot retrive the file...
FileNotFoundError: [Errno 2] No such file or directory: 'static/XML/current/current_settings.xml'
Why?! with a standalone file I can find it and after an import I can't?
project structure:
main folder <--- where app.py (where is included hd_modXML.py) and hd_modXML.py are
|_static
|_XML
|_current\ <-- where current_settings.xml is
|_templates
After some tries I found that it works giving the parser the whole directory path from root, so in my case:
self.path = '/home/grace/pyDev/prova_horus2/static/XML/current/'
I don't know why with prievious versions it worked without...
Now I need a way to avoid hardcoding the root path, but for this I can help myself.
Many thanks to yklsga for pointing me to the right way

python html parser doesnot return results?

I am new to python, I have a folder of downloaded html files, from which I need to extract the text data and output it in the same folder as text file, below code works fine with individual files, however when i am trying to pass multiple files it doesn't work. Kindly suggest a solution, i will be extremely thankful. Its not even giving me any error, so I could work on it and figure out some solution.
from HTMLParser import HTMLParser
from re import sub
from sys import stderr
from traceback import print_exc
import glob
import os
class _DeHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.__text = []
def handle_data(self, data):
text = data.strip()
if len(text) > 0:
text = sub('[ \t\r\n]+', ' ', text)
self.__text.append(text + ' ')
def handle_starttag(self, tag, attrs):
if tag == 'p':
self.__text.append('\n\n')
elif tag == 'br':
self.__text.append('\n')
def handle_startendtag(self, tag, attrs):
if tag == 'br':
self.__text.append('\n\n')
def text(self):
return ''.join(self.__text).strip()
def dehtml(text):
try:
parser = _DeHTMLParser()
parser.feed(text)
parser.close()
return parser.text()
except:
print_exc(file=stderr)
return text
def main():
dir_path = r"/home/maitreyee/Downloads/SchoolCollege.com/multiple_states/"
results_dir = r"/home/maitreyee/Downloads/SchoolCollege.com/"
for file_name in glob.glob(os.path.join(dir_path, "*.html")):
text = open(file_name, "r")
results_file = os.path.splitext(file_name)[0] + '.txt'
with open(results_file, 'w') as outfile:
i = dehtml(text)
print(i)
outfile.write(i + '\n')
if __name__ == '__main__':
main()
I struggled a lot and then tried something simpler, for the above code we could just modify the main() function by the following code and then this would return .txt files for all the html files, we need to pass just the folder location.
def main():
dir_path = r"/home/maitreyee/Downloads/SchoolCollege.com/rajasthan_data/"
results_dir = r"/home/maitreyee/Downloads/SchoolCollege.com/rajasthan_data/"
for file_name in glob.glob(os.path.join(dir_path, "*.html")):
f = open(file_name)
text = f.read()
results_file = os.path.splitext(file_name)[0] + '.txt'
with open(results_file, "w") as fp:
fp.write(dehtml(text))
fp.close()
Where the directory paths are given then put in the directory path to your html file's folder. It was really helpful for me because I had to convert hundreds of html files, and I needed all the text from them, this gave me results in seconds.

Python3 LaTex PDF generator using subprocess, Error: memoryview: str object does not have the buffer interface

I am working on converting a python 2 project to python 3.4. One part of project uses LaTex and subprocess to generate PDF files. I am having issue getting the code working pass through subprocess.Popen.communicate() step. The problem is in gen_pdf() and I think it is cmd.communicate(input=self._gen_latex()) that is causing issue. If I take out try and run the code directly, it will generate error "memoryview: str object does not have the buffer interface". But I couldn't get a solution to get around of this issue.
Any help is highly appreciated. Thanks!
import django.conf
import subprocess
import os
import tempfile
import shutil
class PDFLatexWriter(object):
"""
Handles creating Latex documents and building them into PDFs.
"""
def gen_pdf(self):
"""
Generates the Latex document and writes to tmpfile.
Returns the pdf file handle.
"""
try:
args=['/usr/bin/pdflatex', '-jobname', 'dp', '-output-directory', self.tmpd, '-halt-on-error']
cmd = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stderr, stdout = cmd.communicate(input=self._gen_latex())
if cmd.poll() != 0:
print('Error running cmd.')
raise IOError
else:
return self._cp_pdf_tmp()
except Exception:
pass
finally:
self._clean_tmp()
def __init__(self, get_pdf_form, parent_dir=os.path.join(django.conf.settings.BASE_DIR+'/media', 'pdfs', 'tmp')):
"""
get_pdf_form: A validated pdfs.forms.GetPDFForm.
parent_dir: Directory where the temporary directory will be created.
"""
self.form = get_pdf_form
self.parent = parent_dir
self.tmpd = tempfile.mkdtemp(dir=self.parent)
def __del__(self):
self._clean_tmp()
def _gen_latex(self):
"""
Generates the latex markup and returns a string of the markup.
"""
header = r"""
\documentclass[a4paper,16pt]{article}
\usepackage{graphicx}
\usepackage{pdfpages}
\usepackage{hyperref}
\usepackage{fancyhdr}
\begin{document}
\pagestyle{fancy}
\fancyhead[C]{\includegraphics[width=9mm]{%s}\huge{ Student Book}}
""" % os.path.join(django.conf.settings.BASE_DIR, 'static', 'images', 'logo.png')
footer = '\n\n\end{document}'
links = ''
docs = ''
hyperlink = 2
for x, i in enumerate(self.form.iter_pdf()):
docs += r"\includepdf[pages=%s,link,linkname=%s]{%s}" % (i[1], i[0].pdf_display_name, i[0].pdf_path)
docs += '\n'
if i[1] == '-':
# Complete PDF.
links += r"\noindent\hyperlink{page.%s}{%s}\newline" % (hyperlink,
i[0].pdf_display_name)
hyperlink += i[0].pages
else:
links += r"\noindent\hyperlink{page.%s}{%s (Page %s)}\newline" % (hyperlink,
i[0].pdf_display_name, i[1])
hyperlink += 1
links += '\n'
return header + '\n\n' + links + '\n\n' + docs + '\n\n' + footer
def _cp_pdf_tmp(self):
"""
gen_pdf() creates a temp directory that includes latex build files and the PDF. Unfortunately,
a temp directory will not automatically delete when the last reference is closed. Therefore,
it's necessary to manually delete this temp dir before returning from the view. However,
we can't send the PDF to the user if we've already deleted its containing dir. This function
copies the PDF to a true temp file that will delete on close, allowing us to have the desired
behavior where the temp dir is manually deleted, and the PDF is deleted upon close.
Returns a file handle to the PDF.
"""
if os.path.isfile(os.path.join(self.tmpd, 'dp.pdf')):
tmp = tempfile.TemporaryFile(dir=self.parent, mode='r+b')
shutil.copyfileobj(open(os.path.join(self.tmpd, 'dp.pdf'), 'rb'), tmp)
tmp.seek(0)
return tmp
else:
print('No source file.')
raise IOError
def _clean_tmp(self):
"""
Cleans up temp directory.
"""
try:
shutil.rmtree(self.tmpd)
except OSError:
print('Unable to clean temporary files.')
Added Traceback
Traceback:
File "/usr/lib/python3/dist-packages/django/core/handlers/base.py" in get_response
112. response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/usr/lib/python3/dist-packages/django/contrib/auth/decorators.py" in _wrapped_view
22. return view_func(request, *args, **kwargs)
File "/var/django/project1/project1/pdfs/views.py" in pdf_share
132. pdf_fb = tex.gen_pdf()
File "/var/django/project1/project1/pdfs/latex.py" in gen_pdf
125. stdout = cmd.communicate(input=self._gen_latex())[0]
File "/usr/lib/python3.4/subprocess.py" in communicate
960. stdout, stderr = self._communicate(input, endtime, timeout)
File "/usr/lib/python3.4/subprocess.py" in _communicate
1602. input_view = memoryview(self._input)
Exception Type: TypeError at /app1/share/pdf/
Exception Value: memoryview: str object does not have the buffer interface
After fixing " stdout = cmd.communicate(input=(self._gen_latex()).encode('utf-8'))[0] ", I was able to print out all the LaTex executing details. The reason I got Popen.poll() = 1 instead of 0 was because the subprocess has been terminated with an error. After print out stdout and dig into the error, there was a logo file that had wrong path. After correcting that error, everything is working perfectly.
Hope this helps for whoever happens to work on the similar stuff like me.

Serving Zip file Django

I'm following this solution (Serving dynamically generated ZIP archives in Django) to serve some zip files from django.
The idea is to select the files from a database using some check boxes, but I'm trying to make the example work with just 2 images.
import os
import zipfile
import StringIO
from django.http import HttpResponse
def getfiles(request):
# Files (local path) to put in the .zip
# FIXME: Change this (get paths from DB etc)
filenames = ["/home/../image1.png", "/home/../image2.png"]
# Folder name in ZIP archive which contains the above files
# E.g [thearchive.zip]/somefiles/file2.txt
# FIXME: Set this to something better
zip_subdir = "somefiles"
zip_filename = "%s.zip" % zip_subdir
# Open StringIO to grab in-memory ZIP contents
s = StringIO.StringIO()
# The zip compressor
zf = zipfile.ZipFile(s, "w")
for fpath in filenames:
# Calculate path for file in zip
fdir, fname = os.path.split(fpath)
zip_path = os.path.join(zip_subdir, fname)
# Add file, at correct path
zf.write(fpath, zip_path)
# Must close zip for all contents to be written
zf.close()
# Grab ZIP file from in-memory, make response with correct MIME-type
resp = HttpResponse(s.getvalue(), mimetype = "application/x-zip-compressed")
# ..and correct content-disposition
resp['Content-Disposition'] = 'attachment; filename=%s' % zip_filename
return resp
I wrote the getfile(request) on my views.py and i make a call from the index view
def index(request):
if request.method == 'POST': # If the form has been submitted...
resp = getfiles(request)
form = FilterForm(request.POST) # A form bound to the POST data
# do some validation and get latest_events from database
context = {'latest_events_list': latest_events_list, 'form': form}
return render(request, 'db_interface/index.html', context)
I know the getfile() method is called, because if I put names of unexistents files I got an error, but I dont get any download neither an error if the filenames are correct (I put the full path /home/myuser/xxx/yyy/Project/app/static/app/image1.png).
I tried with the django server and with the apache2/nginx server I have for production
I also tried using content_type = 'application/force-download'
Thanks