Downloading images with Python - python-2.7

I am working on a project where I need to download some images using python. I have tried to fix it by doing different things but it is still not working. Here is some code I found and I tried to use but it does not seem to work. To be honest I am a newbie at programming so I would be grateful to get some help.
Here is the code:
import json
import os
import time
import requests
import Image
from StringIO import StringIO
from requests.exceptions import ConnectionError
def go(query,pathA):
BASE_URL = 'https://ajax.googleapis.com/ajax/services/search/images?'\
'v=1.0&q=' + query + '&start=%d'
BASE_PATH = os.path.join(pathA, query)
if not os.path.exists(BASE_PATH):
os.makedirs(BASE_PATH)
start = 0
while start < 60:
r = requests.get(BASE_URL % start)
for image_info in json.loads(r.text)['responseData']['results']:
url = image_info['unescapedUrl']
try:
image_r = requests.get(url)
except ConnectionError, e:
print 'could not download %s' % urla
continue
# Remove file-system path characters from name.
title = image_info['titleNoFormatting'].replace('/', '').replace('\\', '')
fileII = open(os.path.join(BASE_PATH, '%s.jpg') % title, 'w')
try:
Image.open(StringIO(image_r.content)).save(fileII, 'JPEG')
except IOError, e:
# Throw away some gifs...blegh.
print 'could not save %s' % url
continue
finally:
fileII.close()
print start
start += 4 # 4 images per page.
time.sleep(1.5)
# Example use
go('landscape', 'myDirectory')
The error I get when I run the code above is:
IOError: [Errno 22] invalid mode ('w') or filename: u'myDirectory\landscape\Na
ture - Photo gallery | MIRIADNA.COM.jpg'
Thanks in advance

This bit of code defines where you will save your image
# Remove file-system path characters from name.
title = image_info['titleNoFormatting'].replace('/', '').replace('\\', '')
Reading your error message I see that file does not exist (or a directory) because w is a valid mode for opening a file.
Try hardcoding the title to a simple and local path, such as
title = 'test'

Related

Cognitive face API Python 2.7.x - Bad Image Url

Trying to run the following program to post the faces stored in a folder using ImageUrl but there exists an error.
The API I used is MS's Cognitive Face API v1.0
Please help
import sys
import os, time
import cognitive_face as CF
from global_variables import personGroupId
import urllib
import sqlite3
Key = '---some key----' #I removed the key her for some security Purpose
CF.Key.set(Key)
def get_person_id():
person_id = ''
extractId = str(sys.argv[1])[-2:]
connect = sqlite3.connect("Face-DataBase")
c = connect.cursor()
cmd = "SELECT * FROM Students WHERE ID = " + extractId
c.execute(cmd)
row = c.fetchone()
person_id = row[3]
connect.close()
return person_id
if len(sys.argv) is not 1:
currentDir = os.path.dirname(os.path.abspath(os.path.realpath(__file__)))
imageFolder = os.path.join(currentDir, "dataset/" + str(sys.argv[1]))
person_id = get_person_id()
for filename in os.listdir(imageFolder):
if filename.endswith(".jpg"):
print(filename)
imgurl = urllib.request.pathname2url(os.path.join(imageFolder, filename))
res = CF.face.detect(imgurl)
if len(res) != 1:
print ("No face detected in image")
else:
res = CF.person.add_face(imgurl, personGroupId, person_id)
print(res)
time.sleep(6)
The error I got is a Invalid Image Url with Status Code '400'
User.22.1.jpg
Traceback (most recent call last):
File "add_person_faces.py", line 31, in <module>
res = CF.face.detect(imgurl)
File "C:\Users\Avina\Anaconda3\envs\virtual_platform\lib\site-packages\cognitive_face\face.py", line 41, in detect
data=data)
File "C:\Users\Avina\Anaconda3\envs\virtual_platform\lib\site-packages\cognitive_face\util.py", line 102, in request
error_msg.get('message'))
cognitive_face.util.CognitiveFaceException: Error when calling Cognitive Face API:
status_code: 400
code: InvalidURL
message: Invalid image URL.
You'll have to send a public url or send the image in binary format for the API to be able to access it.
Here is how you read the image:
try:
with open('mypic.jpg', 'rb') as image:
img = image.read()
face_to_detect = bytearray(img)
print("image reading done!")
except Exception as ex:
print("exception in reading image bytes {0}".format(ex.args))
has_exception = True
And then post it:
result = cf.face.detect(face_to_detect)

Using python to update a file on google drive

I have the following script to upload a file unto google drive, using python27. As it is now it will upload a new copy of the file, but I want the existing file updated/overwritten. I can't find help in the Google Drive API references and guides for python. Any suggestions?
from __future__ import print_function
import os
from apiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
# Gain acces to google drive
SCOPES = 'https://www.googleapis.com/auth/drive.file'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_secret.json', SCOPES)
creds = tools.run_flow(flow, store, flags) \
if flags else tools.run(flow, store)
DRIVE = build('drive', 'v3', http=creds.authorize(Http()))
#The file that is being uploaded
FILES = (
('all-gm-keys.txt', 'application/vnd.google-apps.document'), #in google doc format
)
#Where the file ends on google drive
for filename, mimeType in FILES:
folder_id = '0B6V-MONTYPYTHONROCKS-lTcXc' #Not the real folder id
metadata = {'name': filename,'parents': [ folder_id ] }
if mimeType:
metadata['mimeType'] = mimeType
res = DRIVE.files().create(body=metadata, media_body=filename).execute()
if res:
print('Uploaded "%s" (%s)' % (filename, res['mimeType']))
I think that you are looking for the update method. Here is a link to the documentation. There is an example on overwriting the file in python.
I think that using the official google client api instead of pure http requests should make your task easier.
from apiclient import errors
from apiclient.http import MediaFileUpload
# ...
def update_file(service, file_id, new_title, new_description, new_mime_type,
new_filename, new_revision):
"""Update an existing file's metadata and content.
Args:
service: Drive API service instance.
file_id: ID of the file to update.
new_title: New title for the file.
new_description: New description for the file.
new_mime_type: New MIME type for the file.
new_filename: Filename of the new content to upload.
new_revision: Whether or not to create a new revision for this file.
Returns:
Updated file metadata if successful, None otherwise.
"""
try:
# First retrieve the file from the API.
file = service.files().get(fileId=file_id).execute()
# File's new metadata.
file['title'] = new_title
file['description'] = new_description
file['mimeType'] = new_mime_type
# File's new content.
media_body = MediaFileUpload(
new_filename, mimetype=new_mime_type, resumable=True)
# Send the request to the API.
updated_file = service.files().update(
fileId=file_id,
body=file,
newRevision=new_revision,
media_body=media_body).execute()
return updated_file
except errors.HttpError, error:
print 'An error occurred: %s' % error
return None
Link the example: https://developers.google.com/drive/api/v2/reference/files/update#examples

How to download specific Google Drive folder using Python?

I'm trying to download specific folders from Google Drive.
I tried this example
http://www.mwclearning.com/?p=1608 but its download all the files from G-Drive.
EX: If I have two folders in Google Drive say..
A folder having -> 1 , 2 Files
B folder having -> 3, 4, 5 Files
If I want to download folder A then only 1 , 2 files should get downloaded..
Any suggestion or help could be very helpful.
Thanks in advance.
Use Drive credentials.json Downloaded from your Drive API
from __future__ import print_function
import pickle
import os
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
from apiclient.http import MediaFileUpload, MediaIoBaseDownload
import io
from apiclient import errors
from apiclient import http
import logging
from apiclient import discovery
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive']
# To list folders
def listfolders(service, filid, des):
results = service.files().list(
pageSize=1000, q="\'" + filid + "\'" + " in parents",
fields="nextPageToken, files(id, name, mimeType)").execute()
# logging.debug(folder)
folder = results.get('files', [])
for item in folder:
if str(item['mimeType']) == str('application/vnd.google-apps.folder'):
if not os.path.isdir(des+"/"+item['name']):
os.mkdir(path=des+"/"+item['name'])
print(item['name'])
listfolders(service, item['id'], des+"/"+item['name']) # LOOP un-till the files are found
else:
downloadfiles(service, item['id'], item['name'], des)
print(item['name'])
return folder
# To Download Files
def downloadfiles(service, dowid, name,dfilespath):
request = service.files().get_media(fileId=dowid)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%." % int(status.progress() * 100))
with io.open(dfilespath + "/" + name, 'wb') as f:
fh.seek(0)
f.write(fh.read())
def main():
"""Shows basic usage of the Drive v3 API.
Prints the names and ids of the first 10 files the user has access to.
"""
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES) # credentials.json download from drive API
creds = flow.run_local_server()
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
service = build('drive', 'v3', credentials=creds)
# Call the Drive v3 API
Folder_id = "'PAST YOUR SHARED FOLDER ID'" # Enter The Downloadable folder ID From Shared Link
results = service.files().list(
pageSize=1000, q=Folder_id+" in parents", fields="nextPageToken, files(id, name, mimeType)").execute()
items = results.get('files', [])
if not items:
print('No files found.')
else:
print('Files:')
for item in items:
if item['mimeType'] == 'application/vnd.google-apps.folder':
if not os.path.isdir("Folder"):
os.mkdir("Folder")
bfolderpath = os.getcwd()+"/Folder/"
if not os.path.isdir(bfolderpath+item['name']):
os.mkdir(bfolderpath+item['name'])
folderpath = bfolderpath+item['name']
listfolders(service, item['id'], folderpath)
else:
if not os.path.isdir("Folder"):
os.mkdir("Folder")
bfolderpath = os.getcwd()+"/Folder/"
if not os.path.isdir(bfolderpath + item['name']):
os.mkdir(bfolderpath + item['name'])
filepath = bfolderpath + item['name']
downloadfiles(service, item['id'], item['name'], filepath)
if __name__ == '__main__':
main()
Try to check the Google Drive API documentation, you can see here the sample code use to perform a file download using Python.
file_id = '0BwwA4oUTeiV1UVNwOHItT0xfa2M'
request = drive_service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print "Download %d%%." % int(status.progress() * 100)
For the folders part, you can check here on how to get it.
For more information, you can check this tutorial and YT video.
Here's just the code that deals specifically with downloading a folder recursively.
I've tried to keep it to-the-point, omitting code that's described in tutorials already. I expect you to already have the ID of the folder that you want to download.
The part elif not itemType.startswith('application/'): has the purpose of skipping any Drive-format documents. However, the check is overly-simplistic, so you might want to improve it or remove it.
from __future__ import print_function
import pickle
import os.path
import io
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
def main():
"""Based on the quickStart.py example at
https://developers.google.com/drive/api/v3/quickstart/python
"""
creds = getCredentials()
service = build('drive', 'v3', credentials=creds)
folderId = ""
destinationFolder = ""
downloadFolder(service, folderId, destinationFolder)
def downloadFolder(service, fileId, destinationFolder):
if not os.path.isdir(destinationFolder):
os.mkdir(path=destinationFolder)
results = service.files().list(
pageSize=300,
q="parents in '{0}'".format(fileId),
fields="files(id, name, mimeType)"
).execute()
items = results.get('files', [])
for item in items:
itemName = item['name']
itemId = item['id']
itemType = item['mimeType']
filePath = destinationFolder + "/" + itemName
if itemType == 'application/vnd.google-apps.folder':
print("Stepping into folder: {0}".format(filePath))
downloadFolder(service, itemId, filePath) # Recursive call
elif not itemType.startswith('application/'):
downloadFile(service, itemId, filePath)
else:
print("Unsupported file: {0}".format(itemName))
def downloadFile(service, fileId, filePath):
# Note: The parent folders in filePath must exist
print("-> Downloading file with id: {0} name: {1}".format(fileId, filePath))
request = service.files().get_media(fileId=fileId)
fh = io.FileIO(filePath, mode='wb')
try:
downloader = MediaIoBaseDownload(fh, request, chunksize=1024*1024)
done = False
while done is False:
status, done = downloader.next_chunk(num_retries = 2)
if status:
print("Download %d%%." % int(status.progress() * 100))
print("Download Complete!")
finally:
fh.close()
Please do download the 'client_id.json' file as specified in the tutorial link for downloading follow steps 5-7
In the last line of the code change the "folder_id" to the id of the folder you want to download from drive by right clicking on the folder and enabling share link. The id will be the part of URL after "id=" and also changing the "savepath" to the path where you want to save the downloaded folder to be on your system.
from __future__ import print_function
from googleapiclient import discovery
from httplib2 import Http
from oauth2client import file, client, tools
import os, io
from apiclient.http import MediaFileUpload, MediaIoBaseDownload
SCOPES = 'https://www.googleapis.com/auth/drive'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_id.json', SCOPES)
creds = tools.run_flow(flow, store)
DRIVE = discovery.build('drive', 'v3', http=creds.authorize(Http()))
def retaining_folder_structure(query,filepath):
results = DRIVE.files().list(fields="nextPageToken, files(id, name, kind, mimeType)",q=query).execute()
items = results.get('files', [])
for item in items:
#print(item['name'])
if item['mimeType']=='application/vnd.google-apps.folder':
fold=item['name']
path=filepath+'/'+fold
if os.path.isdir(path):
retaining_folder_structure("'%s' in parents"%(item['id']),path)
else:
os.mkdir(path)
retaining_folder_structure("'%s' in parents"%(item['id']),path)
else:
request = DRIVE.files().get_media(fileId=item['id'])
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%." % int(status.progress() * 100))
path=filepath+'/'+item['name']
#print(path)
with io.open(path,'wb') as f:
fh.seek(0)
f.write(fh.read())
retaining_floder_structure("'folder_id' in parents",'savepath')

Downloading a CSV file from a URL in using Python 2.7

I'm trying to download a CSV file from a URL & save it to file on my hard drive. I'm trying to use the following code on Python 2.7 but getting errors. The CSV is located on a SharePoint site.
import urllib
import csv
url = 'https://office.com/sites/20Reporting/Lists/Reports/476%20-%2050%20DaySDShrink%20Report/476-%2030%20DaySDShrink.csv'
csv = urllib.urlopen(url).read() # returns type 'str'
with open('C:\Users\Documents\DPAM.csv', 'wb') as fx:
fx.write(csv)
I'm getting the following error message.
IOError: ('http error', 401, 'Unauthorized', <httplib.HTTPMessage instance at 0x037541E8>)
Try something like this:
import urllib2,base64
import csv
username ="username"
password= "password"
url = 'https://office.com/sites/20Reporting/Lists/Reports/476%20-%2050%20DaySDShrink%20Report/476-%2030%20DaySDShrink.csv'
request = urllib2.Request(url )
base64string = base64.encodestring('%s:%s' % (username, password)).replace('\n', '')
request.add_header("Authorization", "Basic %s" % base64string)
csv = urllib2.urlopen(request).read()
with open('C:\Users\Documents\DPAM.csv', 'wb') as fx:
fx.write(csv)
Also you can try to communicate with sharepoint via SOAP using urllib2

Django with mod_XSENDFILE unable to download complete file

Attached is the code which downloads a file from browser using django 1.3 and Apache 2.2 with mod_xsendfile
#login_required
def sendfile(request, productid):
path = settings.RESOURCES_DIR
filepath = os.path.join('C:/workspace/y/src/y/media/audio/','sleep_away.mp3')
print "filepath",filepath
filename = 'sleep_away.mp3' # Select your file here.
print "Within sendfile size", os.path.getsize(filepath)
wrapper = FileWrapper(open(filepath,'r'))
content_type = mimetypes.guess_type(filename)[0]
response = HttpResponse(wrapper, content_type = content_type)
print "Within wrapper"
from django.utils.encoding import smart_str
response['X-Sendfile'] = smart_str(filepath)
response['Content-Length'] = os.path.getsize(filepath)
from django.utils.encoding import smart_str
response['Content-Disposition'] = 'attachment; filename=%s/' % smart_str(filename)
return response
The console shows the following filesize which is the right size
Within sendfile size 4842585
But when I download/save the file it shows 107 KB...i.e 109,787 bytes.Where am I going wrong. Why isnt it downloading the complete file?
I consider your new to django or python. Try to put the import statements at the beginning of the method. Once imported it can be used through the method no need import every time you use. In windows you should use "rb" (read binary) to serve anything other than text files. Try not to use variable names that might conflict with method names or other keywords of the language. Your method should be like this
#login_required
def sendfile(request, productid):
from django.utils.encoding import smart_str
##set path and filename
resource_path = settings.RESOURCES_DIR # resource dir ie /workspace/y/src/y/media
filename = "sleep_away.mp3" #file to be served
##add it to os.path
filepath = os.path.join(resource_path,"audio",filename)
print "complete file path: ", filepath
##filewrapper to server in size of 8kb each until whole file is served
file_wrapper = FileWrapper(file(filepath,'rb')) ##windows needs rb (read binary) for non text files
##get file mimetype
file_mimetype = mimetypes.guess_type(filepath)
##create response with file_mimetype and file_wrapper
response = HttpResponse(content_type=file_mimetype, file_wrapper)
##set X-sendfile header with filepath
response['X-Sendfile'] = filepath ##no need for smart_str here.
##get filesize
print "sendfile size", os.stat(filepath).st_size
response['Content-Length'] = os.stat(filepath).st_size ##set content length
response['Content-Disposition'] = 'attachment; filename=%s/' % smart_str(filename) ##set disposition
return response ## all done, hurray!! return response :)
Hope that helps
You could have a look at the django-private-files project. Haven't tested it myself, but it looks promissing.
link to the docs --> http://readthedocs.org/docs/django-private-files/en/latest/usage.html
cheers