Download a file without storing it - flask

I'm building a Youtube Video Downloader using PyTube and Flask. What I want to do is the end user receives the video file, but I never store it into the server.
Currently the code looks like this:
def download_video():
if request.method == "POST":
url = YouTube(session['link']) # Getting user input
itag = request.form.get("itag") # Getting user input
video = url.streams.get_by_itag(itag)
file = video.download(path) # Downloading the video into a folder
return send_file(file, as_attachment=True)
return redirect(url_for("home"))
The code works fine, the only drawback is that it is being store into the server, which later on can become an issue.
I already tried to download it to /dev/null/, which locally seems to work, but when deployed to Heroku, it gives me an Internal Server Error.

The download method is used to:
Write the media stream to disk.
A dirty workaround could of course be to remove the file saved at the output_path after calling download, but you could also write the media stream to a buffer using stream_to_buffer and send that.
Minimal and reproducible example
from pytube import YouTube
from flask import Flask, send_file
from io import BytesIO
app = Flask(__name__)
#app.route("/")
def index():
buffer = BytesIO()
url = YouTube("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
video = url.streams.get_by_itag(18)
video.stream_to_buffer(buffer)
buffer.seek(0)
return send_file(
buffer,
as_attachment=True,
attachment_filename="cool-video.mp4",
mimetype="video/mp4",
)
if __name__ == "__main__":
app.run()

Related

Accessing downloaded data in a Cloud Run instance

I have a Google Cloud Run instance that looks like this:
import json
import os
import rarfile
from google.cloud import storage
from flask import Flask, request
app = Flask(__name__)
#app.route("/", methods=["POST"])
def index():
file_id = request.values.get("fileId")
try:
storage_client = storage.Client()
bucket = storage_client.get_bucket("test-bucket")
blob = bucket.blob(file_id)
blob.download_to_filename(file_id)
rf = rarfile.RarFile(file_id)
rf.extractall()
return ("", 204)
except Exception as e:
return f"Error: {e}", 400
return ("500 Error", 500)
However, when I trigger the instance, I get the following error:
Error: can only concatenate str (not "RarCannotExec") to str
What is going wrong here? When I download the file and unzip in locally, I run into no problems. Does it have to do with the file system of Cloud Run instances?
EDIT:
I think from above it is clear that the error is from the except where I return Error: {e}. However, from analyzing the logs, it is apparent that the program fails at the rf = rarfile.RarFile(file_id) line. That, I am still unclear on.
EDIT 2:
I need to install either unrar or unrar-free to the container. Cheers!

django PDF FileResponse "Failed to load PDF document."

I am trying to generate and output PDF from a django view. I followed the example in django documentation using ReportLab but the downloaded PDF is not opening in any PDF readers.
I use Python 3.7.0, Django==2.1.3, reportlab==3.5.12. I tried adding content_type="application/pdf" to 'FileResponse` but still having the same issue.
import io
from django.http import FileResponse
from reportlab.pdfgen import canvas
def printPDF(request):
# Create a file-like buffer to receive PDF data.
buffer = io.BytesIO()
# Create the PDF object, using the buffer as its "file."
p = canvas.Canvas(buffer)
p.drawString(100, 100, "Hello world.")
p.showPage()
p.save()
return FileResponse(buffer, as_attachment=True, filename='hello.pdf')
The generated PDF should be opening in all PDF readers. But I am getting 'Failed to load PDF document.'
buffer = BytesIO() is used instead of a file to store the pdf document. Before returning it with FileResponse you need to reset the stream position to its start:
buffer.seek(io.SEEK_SET)
Now the pdf download should work as expected.
There seems to be something fishy going on with the interaction of BytesIO and FileResponse. The following worked for me.
def printPDF(request):
response = HttpResponse(content_type='application/pdf')
response['Content-Disposition'] = 'attachment; filename=hello.pdf'
p = canvas.Canvas(response)
p.drawString(100, 100, "Hello world.")
p.showPage()
p.save()
return response

Python: Erratic joblib behaviour on Flask

I am trying to deploy a machine learning model on AWS EC2 instance using Flask. These are sklearn's fitted Random Forest models that are pickled using joblib. When I host Flask on localhost and load them into memory everything runs smoothly. However, when I deploy it on the apache2 server using mod_wsgi, joblib works sometimes(i.e. the models are loaded using joblib sometimes) and the other times the server just hangs. There is no error in logs. Any ideas would be appreciated.
Here is the relevant code that I am using:
# In[49]:
from flask import Flask, jsonify, request, render_template
from datetime import datetime
from sklearn.externals import joblib
import pickle as pkl
import os
# In[50]:
app = Flask(__name__, template_folder="/home/ubuntu/flaskapp/")
# In[51]:
log = lambda msg: app.logger.info(msg, extra={'worker_id': "request.uuid" })
# Logger
import logging
handler = logging.FileHandler('/home/ubuntu/app.log')
handler.setLevel(logging.ERROR)
app.logger.addHandler(handler)
# In[52]:
#app.route('/')
def host_template():
return render_template('Static_GUI.html')
# In[53]:
def load_models(path):
model_arr = [0]*len(os.listdir(path))
for filename in os.listdir(path):
f = open(path+"/"+filename, 'rb')
model_arr[int(filename[2:])] = joblib.load(f)
print("Classifier ", filename[2:], " added.")
f.close()
return model_arr
# In[54]:
partition_limit = 30
# In[55]:
print("Dictionaries being loaded.")
dict_file_path = "/home/ubuntu/Dictionaries/VARR"
dictionaries = pkl.load(open(dict_file_path, "rb"))
print("Dictionaries Loaded.")
# In[56]:
print("Begin loading classifiers.")
model_path = "/home/ubuntu/RF_Models/"
classifier_arr = load_models(model_path)
print("Classifiers Loaded.")
if __name__ == '__main__':
log("/home/ubuntu/print.log")
print("Starting API")
app.run(debug=True)
I was stuck with this for quite sometime. Posting the answer in case someone runs into this problem. Using print statements and looking at logs I narrowed the problem down to joblib.load statement. I found this awesome blog: http://blog.rtwilson.com/how-to-fix-flask-wsgi-webapp-hanging-when-importing-a-module-such-as-numpy-or-matplotlib
The idea of using a global process group fixed the problem. That forced the use of main interpreter just as the top comment on that blog page mentions.

Stream file from remote url to Django view response

Is there any way to stream file from remote URL with Django Response (without downloading the file locally)?
# view.py
def file_recover(request, *args, **kwargs):
file_url = "http://remote-file-storage.com/file/111"
return StreamFileFromURLResponse(file_url)
We have file storage (files can be large - 1 GB and more). We can't share download url (there are security issues). File streaming can significantly
increase download speed by forwarding download stream to Django response.
Django has built in StreamingHttpResponse class which should be given an iterator that yields strings as content. In example below I'm using requests Raw Response Content
import requests
from django.http import StreamingHttpResponse
def strem_file(request, *args, **kwargs):
r = requests.get("http://host.com/file.txt", stream=True)
resp = StreamingHttpResponse(streaming_content=r.raw)
# In case you want to force file download in a browser
# resp['Content-Disposition'] = 'attachment; filename="saving-file-name.txt"'
return resp

Using python to update a file on google drive

I have the following script to upload a file unto google drive, using python27. As it is now it will upload a new copy of the file, but I want the existing file updated/overwritten. I can't find help in the Google Drive API references and guides for python. Any suggestions?
from __future__ import print_function
import os
from apiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
# Gain acces to google drive
SCOPES = 'https://www.googleapis.com/auth/drive.file'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_secret.json', SCOPES)
creds = tools.run_flow(flow, store, flags) \
if flags else tools.run(flow, store)
DRIVE = build('drive', 'v3', http=creds.authorize(Http()))
#The file that is being uploaded
FILES = (
('all-gm-keys.txt', 'application/vnd.google-apps.document'), #in google doc format
)
#Where the file ends on google drive
for filename, mimeType in FILES:
folder_id = '0B6V-MONTYPYTHONROCKS-lTcXc' #Not the real folder id
metadata = {'name': filename,'parents': [ folder_id ] }
if mimeType:
metadata['mimeType'] = mimeType
res = DRIVE.files().create(body=metadata, media_body=filename).execute()
if res:
print('Uploaded "%s" (%s)' % (filename, res['mimeType']))
I think that you are looking for the update method. Here is a link to the documentation. There is an example on overwriting the file in python.
I think that using the official google client api instead of pure http requests should make your task easier.
from apiclient import errors
from apiclient.http import MediaFileUpload
# ...
def update_file(service, file_id, new_title, new_description, new_mime_type,
new_filename, new_revision):
"""Update an existing file's metadata and content.
Args:
service: Drive API service instance.
file_id: ID of the file to update.
new_title: New title for the file.
new_description: New description for the file.
new_mime_type: New MIME type for the file.
new_filename: Filename of the new content to upload.
new_revision: Whether or not to create a new revision for this file.
Returns:
Updated file metadata if successful, None otherwise.
"""
try:
# First retrieve the file from the API.
file = service.files().get(fileId=file_id).execute()
# File's new metadata.
file['title'] = new_title
file['description'] = new_description
file['mimeType'] = new_mime_type
# File's new content.
media_body = MediaFileUpload(
new_filename, mimetype=new_mime_type, resumable=True)
# Send the request to the API.
updated_file = service.files().update(
fileId=file_id,
body=file,
newRevision=new_revision,
media_body=media_body).execute()
return updated_file
except errors.HttpError, error:
print 'An error occurred: %s' % error
return None
Link the example: https://developers.google.com/drive/api/v2/reference/files/update#examples