Save a file from requests using django filesystem - django

I'm currently trying to save a file via requests, it's rather large, so I'm instead streaming it.
I'm unsure how to specifically do this, as I keep getting different errors. This is what I have so far.
def download_file(url, matte_upload_path, matte_servers, job_name, count):
local_filename = url.split('/')[-1]
url = "%s/static/downloads/%s_matte/%s/%s" % (matte_servers[0], job_name, count, local_filename)
with requests.get(url, stream=True) as r:
r.raise_for_status()
fs = FileSystemStorage(location=matte_upload_path)
print(matte_upload_path, 'matte path upload')
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
fs.save(local_filename, f)
return local_filename
but it returns
io.UnsupportedOperation: read
I'm basically trying to have requests save it to the specific location via django, any help would be appreciated.

I was able to solve this, by using a tempfile to save the python requests, then saving it via the FileSystemStorage
local_filename = url.split('/')[-1]
url = "%s/static/downloads/%s_matte/%s/%s" % (matte_servers[0], job_name, count, local_filename)
response = requests.get(url, stream=True)
fs = FileSystemStorage(location=matte_upload_path)
lf = tempfile.NamedTemporaryFile()
# Read the streamed image in sections
for block in response.iter_content(1024 * 8):
# If no more file then stop
if not block:
break
# Write image block to temporary file
lf.write(block)
fs.save(local_filename, lf)

Related

Flask passing uploaded file to another service using requests

I have Python flask webservice that takes in a file:
Headers:
Content-type: multipart/formdata
Content:
"fileTest": UPLOADED FILE
When I pass the file to another service using requests lib, I get issue where the uploaded file is not passed.
My Code:
files = {}
for form_file_param in request.files:
fs = request.files[form_file_param] # type: FileStorage
files[form_file_param] = (fs.filename, fs.read())
req_headers = {
"content-type": u "multipart/form-data; boundary=X-INSOMNIA-BOUNDARY",
}
r = requests.request(method='POST',
url=url,
headers=req_headers,
files=files)
I contact my other service directly through postman and it works successfully. I cannot seem to figure out what I am doing wrong in the above code.
You need to follow requests document.
http://docs.python-requests.org/en/master/user/quickstart/#post-a-multipart-encoded-file
url = 'https://httpbin.org/post'
files = {'file': ('report.xls', open('report.xls', 'rb'), 'application/vnd.ms-excel', {'Expires': '0'})}
r = requests.post(url, files=files)
r.text
Change . after watching OP response , the issue caused by header - Content-Type.
This is a special content type which can be visualized as multiple sub-requests in one big request. Each of those sub-requests (one form-data element) has their own set of headers. The content type of the actual data is in there.1
Note : there are no different between fs and fs.read()
#models.py line 149
if isinstance(fp, (str, bytes, bytearray)):
fdata = fp
else:
data = fp.read()

Google Cloud Speech-to-Text AP

I am using Google Cloud Speech-to-Text AP and trying to transcribe long audio file.However the audio file from the bucket cannot be detected.
I get an error stating :IOError: [Errno 2] No such file or directory:
def transcribe_gcs(gcs_uri):
time(gcs_uri)
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=16000,
language_code='en-US')
operation = client.long_running_recognize(config, audio)
print('Waiting for operation to complete...')
response = operation.result(timeout=90)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
print('Confidence: {}'.format(result.alternatives[0].confidence))
Try this
import requests
import json
url = "https://speech.googleapis.com/v1/speech:longrunningrecognize?key=<apiaccesskey>"
payload = {"config": {"encoding": "LINEAR16","sample_rate_hertz": 8000,
"language_code": "en-IN"},
"audio": {"uri": "gs://bucketname/file.flac"}}
r = requests.post(url, data=json.dumps(payload))
json_resp = r.json()
token_resp=json_resp['name']
url = "https://speech.googleapis.com/v1/operations/" + str(token_resp) +
"?key=<apiacesskey>"
content_response = requests.get(url)
content_json = content_response.json()
Your response is in content_json variable.

multiple openpyxl xlsx workbooks into one .zip file for download

I am trying to get some xlsx files from a form, i load them using openpyxl and do some data processing.. and finally i need to download all processed xlsx files zipped to the user.
here is an example of what i did so far
if form.is_valid():
s = StringIO.StringIO()
zf = zipfile.ZipFile(s, mode="w")
for xlsx in request.FILES.getlist('xlsxs'):
element_column = "G"
element_row = 16
massar_column = "C"
massar_row_start = 18
loop = column_index_from_string(element_column)
while (loop <= ws.max_column):
for i in range(massar_row_start, ws.max_row+1):
# ...
ws["%s%s" % (element_column,i)] = 0
# ...
loop+=2
element_column = get_column_letter(loop)
buf = save_virtual_workbook(wb)
zf.write(buf) # or zf.write(wb)
zf.close()
response = HttpResponse(s.getvalue(), content_type="application/x-zip-compressed")
response['Content-Disposition'] = "attachment; filename=notes.zip"
return response
I get the error
TypeError at My_view
stat() argument 1 must be encoded string without null bytes, not str
Thanks in advance for any help you can offer.
save_virtual_workbook returns a bytestream - source.
You are passing this value to ZipFile.write which is expecting a filename.
I think you should be using ZipFile.writestr, and you need to provide a filename that will be used inside the archive. I'm not sure how you are getting the error message you see, but this is the first mistake I can see.

Django To upload and read and write large excel file

I am new to Django and i need my app to allow users to upload excel files. On server side I am reading the excel file by each cell, append some values and then translate the values and again write back to excel file and download the attachment. I am able to perform this action for small files, but for large file it gives me timeout error. Please see the below .
enter code here
def translatedoc(request):
data=""
convrowstr=""
if request.method=='POST':
response = StreamingHttpResponse (content_type='application/vnd.ms-excel')
try:
form=fileUpload(request.POST,request.FILES)
if form.is_valid():
input_file=request.FILES.get('file')
sl=request.POST.get('fsl')
if sl=="Detect Language":
sl="auto"
else:
# get sl code from database
sl=languagecode.objects.filter(Language=sl).values_list('code')
sl=str(sl[0][0])
# get tl code from database
tl=languagecode.objects.filter(Language=request.POST.get('ftl')).values_list('code')
wb = xlrd.open_workbook(file_contents=input_file.read())
wb_sheet=wb.sheet_by_index(0)
for rownum in range(0, wb_sheet.nrows):
convstr=""
for colnum in range(0,wb_sheet.ncols):
try:
rw=wb_sheet.cell_value(rownum,colnum)
if type(rw)==float or type(rw)==int:
convstr=convstr +'<td>' + str(rw)
else:
convstr=convstr +'<td>' + rw
except Exception as e:
pass
if len(convstr) + len(convrowstr) >20000:
# translate if the length of doc exceed the limit
#call google api module
data=data + translate(convrowstr,sl,str(tl[0][0]))
convrowstr=""
if rownum==wb_sheet.nrows-1:
convrowstr= convrowstr + "<tr>" + convstr
# translate for first or last
#call google api module
data=data + translate(convrowstr,sl,str(tl[0][0]))
convrowstr=""
convrowstr= convrowstr + "<tr>" + convstr
log.error(rownum)
if len(data)>1:
sio=StringIO.StringIO()
try:
workbook = xlwt.Workbook()
sheet = workbook.add_sheet("output")
row=0
for rw in data.split("<tr>")[1:]:
col=0
for cl in rw.split("<td>")[1:]:
try:
sheet.write(row,col,cl.split("<b>")[1].split("</b>")[0])
except Exception as e:
pass
col+=1
row+=1
workbook.save(sio)
sio.seek(0)
sv=sio.getvalue()
response['Content-Disposition'] = 'attachment; filename=Output.xls'
return response
except Exception as e:
log.error(e)
except Exception as e:
log.error(e)
you can do the through celery for large file upload. You can read the file in celery.

Merging two files in python

In python 2.7.3, I try to merge two files in one.
I download a file over the Internet. The entire file size is exactly 3,197,743 bytes. I download it in two parts, one part is 3,000,000 bytes in size, the second part is 197,743 in size. Then, I want to merge the two files to reconstruct the entire file.
Here my code :
import requests
import shutil
URL = 'some_URL'
headers = {'user-agent': 'Agent'}
headers.update({'range': 'bytes=0-2999999'})
response = requests.get(URL, headers=headers)
file = open('some_file', 'wb')
file.write(response.content)
file.close()
headers2 = {'user-agent': 'Agent'}
headers2.update({'range': 'bytes=3000000-'})
response2 = requests.get(URL, headers=headers2)
file2 = open('some_file2', 'wb')
file2.write(response2.content)
file2.close()
source = open('some_file2','rb')
destination = open('some_file','ab')
shutil.copyfileobj(source,destination)
destination.close()
source.close()
At the end, I have one file ('some-file' in the example) which size is exactly 3,197,743 bytes but the file is corrupted. I tried this with a PDF file.
Where is the problem ?
I tried to solve your problem with different approaches and used diff tool to identify whether the program retrieves part files differently. I identified that there are no difference, thus I am not really sure what's wrong.
However, I propose following solution to resolve your usecase
import urllib2
URL = "http://traffic.org/general-reports/traffic_pub_gen19.pdf"
req = urllib2.urlopen(URL)
CHUNK = 3000000
with open("some_file.pdf", 'wb') as fp:
while True:
chunk = req.read(CHUNK)
if not chunk: break
fp.write(chunk)