How to download specific Google Drive folder using Python? - python-2.7

I'm trying to download specific folders from Google Drive.
I tried this example
http://www.mwclearning.com/?p=1608 but its download all the files from G-Drive.
EX: If I have two folders in Google Drive say..
A folder having -> 1 , 2 Files
B folder having -> 3, 4, 5 Files
If I want to download folder A then only 1 , 2 files should get downloaded..
Any suggestion or help could be very helpful.
Thanks in advance.

Use Drive credentials.json Downloaded from your Drive API
from __future__ import print_function
import pickle
import os
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
from apiclient.http import MediaFileUpload, MediaIoBaseDownload
import io
from apiclient import errors
from apiclient import http
import logging
from apiclient import discovery
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive']
# To list folders
def listfolders(service, filid, des):
results = service.files().list(
pageSize=1000, q="\'" + filid + "\'" + " in parents",
fields="nextPageToken, files(id, name, mimeType)").execute()
# logging.debug(folder)
folder = results.get('files', [])
for item in folder:
if str(item['mimeType']) == str('application/vnd.google-apps.folder'):
if not os.path.isdir(des+"/"+item['name']):
os.mkdir(path=des+"/"+item['name'])
print(item['name'])
listfolders(service, item['id'], des+"/"+item['name']) # LOOP un-till the files are found
else:
downloadfiles(service, item['id'], item['name'], des)
print(item['name'])
return folder
# To Download Files
def downloadfiles(service, dowid, name,dfilespath):
request = service.files().get_media(fileId=dowid)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%." % int(status.progress() * 100))
with io.open(dfilespath + "/" + name, 'wb') as f:
fh.seek(0)
f.write(fh.read())
def main():
"""Shows basic usage of the Drive v3 API.
Prints the names and ids of the first 10 files the user has access to.
"""
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES) # credentials.json download from drive API
creds = flow.run_local_server()
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
service = build('drive', 'v3', credentials=creds)
# Call the Drive v3 API
Folder_id = "'PAST YOUR SHARED FOLDER ID'" # Enter The Downloadable folder ID From Shared Link
results = service.files().list(
pageSize=1000, q=Folder_id+" in parents", fields="nextPageToken, files(id, name, mimeType)").execute()
items = results.get('files', [])
if not items:
print('No files found.')
else:
print('Files:')
for item in items:
if item['mimeType'] == 'application/vnd.google-apps.folder':
if not os.path.isdir("Folder"):
os.mkdir("Folder")
bfolderpath = os.getcwd()+"/Folder/"
if not os.path.isdir(bfolderpath+item['name']):
os.mkdir(bfolderpath+item['name'])
folderpath = bfolderpath+item['name']
listfolders(service, item['id'], folderpath)
else:
if not os.path.isdir("Folder"):
os.mkdir("Folder")
bfolderpath = os.getcwd()+"/Folder/"
if not os.path.isdir(bfolderpath + item['name']):
os.mkdir(bfolderpath + item['name'])
filepath = bfolderpath + item['name']
downloadfiles(service, item['id'], item['name'], filepath)
if __name__ == '__main__':
main()

Try to check the Google Drive API documentation, you can see here the sample code use to perform a file download using Python.
file_id = '0BwwA4oUTeiV1UVNwOHItT0xfa2M'
request = drive_service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print "Download %d%%." % int(status.progress() * 100)
For the folders part, you can check here on how to get it.
For more information, you can check this tutorial and YT video.

Here's just the code that deals specifically with downloading a folder recursively.
I've tried to keep it to-the-point, omitting code that's described in tutorials already. I expect you to already have the ID of the folder that you want to download.
The part elif not itemType.startswith('application/'): has the purpose of skipping any Drive-format documents. However, the check is overly-simplistic, so you might want to improve it or remove it.
from __future__ import print_function
import pickle
import os.path
import io
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
def main():
"""Based on the quickStart.py example at
https://developers.google.com/drive/api/v3/quickstart/python
"""
creds = getCredentials()
service = build('drive', 'v3', credentials=creds)
folderId = ""
destinationFolder = ""
downloadFolder(service, folderId, destinationFolder)
def downloadFolder(service, fileId, destinationFolder):
if not os.path.isdir(destinationFolder):
os.mkdir(path=destinationFolder)
results = service.files().list(
pageSize=300,
q="parents in '{0}'".format(fileId),
fields="files(id, name, mimeType)"
).execute()
items = results.get('files', [])
for item in items:
itemName = item['name']
itemId = item['id']
itemType = item['mimeType']
filePath = destinationFolder + "/" + itemName
if itemType == 'application/vnd.google-apps.folder':
print("Stepping into folder: {0}".format(filePath))
downloadFolder(service, itemId, filePath) # Recursive call
elif not itemType.startswith('application/'):
downloadFile(service, itemId, filePath)
else:
print("Unsupported file: {0}".format(itemName))
def downloadFile(service, fileId, filePath):
# Note: The parent folders in filePath must exist
print("-> Downloading file with id: {0} name: {1}".format(fileId, filePath))
request = service.files().get_media(fileId=fileId)
fh = io.FileIO(filePath, mode='wb')
try:
downloader = MediaIoBaseDownload(fh, request, chunksize=1024*1024)
done = False
while done is False:
status, done = downloader.next_chunk(num_retries = 2)
if status:
print("Download %d%%." % int(status.progress() * 100))
print("Download Complete!")
finally:
fh.close()

Please do download the 'client_id.json' file as specified in the tutorial link for downloading follow steps 5-7
In the last line of the code change the "folder_id" to the id of the folder you want to download from drive by right clicking on the folder and enabling share link. The id will be the part of URL after "id=" and also changing the "savepath" to the path where you want to save the downloaded folder to be on your system.
from __future__ import print_function
from googleapiclient import discovery
from httplib2 import Http
from oauth2client import file, client, tools
import os, io
from apiclient.http import MediaFileUpload, MediaIoBaseDownload
SCOPES = 'https://www.googleapis.com/auth/drive'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_id.json', SCOPES)
creds = tools.run_flow(flow, store)
DRIVE = discovery.build('drive', 'v3', http=creds.authorize(Http()))
def retaining_folder_structure(query,filepath):
results = DRIVE.files().list(fields="nextPageToken, files(id, name, kind, mimeType)",q=query).execute()
items = results.get('files', [])
for item in items:
#print(item['name'])
if item['mimeType']=='application/vnd.google-apps.folder':
fold=item['name']
path=filepath+'/'+fold
if os.path.isdir(path):
retaining_folder_structure("'%s' in parents"%(item['id']),path)
else:
os.mkdir(path)
retaining_folder_structure("'%s' in parents"%(item['id']),path)
else:
request = DRIVE.files().get_media(fileId=item['id'])
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%." % int(status.progress() * 100))
path=filepath+'/'+item['name']
#print(path)
with io.open(path,'wb') as f:
fh.seek(0)
f.write(fh.read())
retaining_floder_structure("'folder_id' in parents",'savepath')

Related

I'm not able to stream the chunks from in_memory_file from google cloud storage bucket

PYTHON
This is the Python code, We are streaming our large file from google cloud storage to cloud run. In this code the large csv file is splitting into chunks and those chunks are going into in_memory_file whenever we got the first chunks it should immediately start streaming it and the remaining chunks will stream in response but we are not able to stream those chunks from in_memory_file
import os
import signal
import sys
import json
import pandas as pd
import structlog
import time
import threading
import io
import csv
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import ThreadPoolExecutor
from types import FrameType
from io import BytesIO, StringIO
from google.cloud import storage
from google.oauth2 import service_account
from flask import Flask, Response, request
app = Flask(__name__)
# SigTerm Log
def getJSONLogger() -> structlog._config.BoundLoggerLazyProxy:
structlog.configure(
processors=[`enter code here`
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper("iso"),
structlog.processors.JSONRenderer(),
],
wrapper_class=structlog.stdlib.BoundLogger,
)
return structlog.get_logger()
logger = getJSONLogger()
# SigTerm Handler
def shutdown_handler(signal: int, frame: FrameType) -> None:
logger.info("Signal received, safely shutting down.")
print("Exiting process.", flush=True)
sys.exit(0)
signal.signal(signal.SIGTERM, shutdown_handler)
# Split files to chunks
def split_byte_size(size: int, uri: str, bucket: str, key: str) -> list:
byte_list = []
chunks = 50
start = 0
for i in range(size, size * chunks + 1, size):
stop = i // chunks
byte_list.append({"uri": uri, "start": start, "end": stop, "bucket": bucket, "key": key})
start = stop + 1
return byte_list
#Cloud Storage connection
project = 'XYZ'
service_account_credentials_path = 'key.json'
credentials = service_account.Credentials.from_service_account_file(service_account_credentials_path)
storage_client = storage.Client(project=project, credentials=credentials)
# Download objects as chunks
def downloader(input: dict) -> object:
bucket_object = storage_client.get_bucket(bucket_or_name=input["bucket"])
blob = bucket_object.blob(input["key"])
in_memory_file = io.BytesIO()
blob.download_to_file(in_memory_file, start=input['start'], end=input['end'])
#print("Chunk " + str(input['start']) + " to " + str(input['end']) + "completed")
return in_memory_file
#app.route("/chunk_data")
def chunk_data():
bucket_name = 'cloudrundemofile'
source_blob_name = 'demofile.csv'
bucket_object = storage_client.get_bucket(bucket_name)
blob = bucket_object.get_blob(source_blob_name)
split_bytes = split_byte_size(blob.size, project, bucket_name, source_blob_name)
print(split_bytes)
#Async Thread
with ThreadPoolExecutor(max_workers=5) as ex:
results = ex.map(downloader, split_bytes)
resp = Response(results, 206, mimetype='text/csv')
#resp.headers.add('Content-Range', 'bytes {0}-{1}/{2}'.format(start, start + length - 1, file_size))
return resp
#return "Success"
if __name__ == "__main__":
signal.signal(signal.SIGINT, shutdown_handler)
app.run(host="0.0.0.0", port=8080)
else:
signal.signal(signal.SIGTERM, shutdown_handler)**

Why different error when using different network connection to put a file to a s3 bucket with Sign v4?

Here are my code:
First the driver script
#!/usr/bin/env python
import os
import sys
from gen_url import sign
import requests
import uuid
def upload_me(file_path, key=None, secret=None):
access_key = 'ABC' if key is None else key
secret_key = 'EDF' if secret is None else secret
s3_bucket = 'my-work-bucket'
object_name = '1-2-{uuid}.jpeg'.format(uuid=uuid.uuid4())
mime_type = 'image/jpeg'
expires = 24 * 60 * 60 # link expiry in sec
os.environ['AWS_ACCESS_KEY_ID'] = access_key
os.environ['AWS_SECRET_ACCESS_KEY'] = secret_key
region = 'us-west-2'
url = sign(key, secret, s3_bucket, object_name, mime_type, expires, region)
with open(file_path, 'r') as f:
resp = requests.post(url, data=f)
print resp.content
if __name__ == '__main__':
argc = len(sys.argv)
key = secret = None
if argc == 2 or argc == 4:
file_path = sys.argv[1]
if argc == 4:
key = sys.argv[2]
secret = sys.argv[3]
else:
raise Exception('Expect 1 or 3 arguments')
upload_me(file_path, key, secret)
The code of sign function in gen_url module
import sys
import boto3
from botocore.client import Config
from datetime import datetime, date, time
def sign(access_key, secret_key, s3_bucket, object_name, mime_type, expires, region):
s3_client = boto3.client('s3',
region_name=region,
aws_access_key_id=access_key,
aws_secret_access_key=secret_key)
# Don't include content type
# 'ContentType': mime_type
params = {
'Bucket': s3_bucket,
'Key': object_name,
}
response = s3_client.generate_presigned_url('put_object',
Params=params,
ExpiresIn=expires)
return response
When I am using the internet connection at home, it is the error I got:
requests.exceptions.ConnectionError: ('Connection aborted.', error(32, 'Broken pipe'))
But I use tethering with my iphone, the command gives a different error:
<Error><Code>SignatureDoesNotMatch</Code><Message>The request signature we calculated does not match the signature you provided. Check your key and signing method.</Message>
Why totally different output when the networks are different?
It turns out the last three lines of the driver script should be:
with open(file_path, 'rb') as f:
resp = requests.put(url, data=f)
print resp.content
Then there is no issue with either connections.
(error(32, 'Broken pipe') could be just a coincidence that there were connection issue with my ISP.

How to remove file after uploading it to Google Drive

I have written code in python that uploads a file to Google Drive, but after it uploads I cannot delete it from local drive, because I get error "Access Denied", but if I exit out of function then I can delete the file. So my question is how can I delete the file from inside the function?
GDriveUpload.py
import os
import httplib2
import ntpath
import oauth2client
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
# Copy your credentials here
_CLIENT_ID = 'YOUR_CLIENT_ID'
_CLIENT_SECRET = 'YOUR_CLIENT_SECRET'
_REFRESH_TOKEN = 'YOUR_REFRESH_TOKEN'
_PARENT_FOLDER_ID = 'YOUR_PARENT_FOLDER_ID'
_DATA_FILE = 'datafile.dat'
# ====================================================================================
# Upload file to Google Drive
def UploadFile(client_id, client_secret, refresh_token, parent_folder_id, local_file, DeleteOnExit=False):
cred = oauth2client.client.GoogleCredentials(None,client_id,client_secret,refresh_token,None,'https://accounts.google.com/o/oauth2/token',None)
http = cred.authorize(httplib2.Http())
drive_service = build('drive', 'v2', http=http)
media_body = MediaFileUpload(local_file, mimetype='application/octet-stream', chunksize=5242880, resumable=True)
body = {
'title': (ntpath.basename(local_file)),
'parents': [{'id': parent_folder_id}],
'mimeType': 'application/octet-stream'
}
request = drive_service.files().insert(body=body, media_body=media_body)
response = None
while response is None:
status, response = request.next_chunk()
if status:
print "Uploaded %.2f%%" % (status.progress() * 100)
if DeleteOnExit == True:
os.remove(local_file)
# ====================================================================================
if __name__ == '__main__':
UploadFile(_CLIENT_ID, _CLIENT_SECRET, _REFRESH_TOKEN, _PARENT_FOLDER_ID, _DATA_FILE, DeleteOnExit=True)

How to specify a new directory path to add attachment in mail using python

I am using below code to add attachment to the mail and sending it.
In below code, files which are present in same directory as script gets attached.
My code is in 'testdir' folder and the files to attach are in 'testdir/testfiles' folder.
How do I change the directory path below so that it attaches the files inside 'testdir/testfiles' folder.
Code :
import smtplib
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email import Encoders
import os
def send_email(to, subject, text, filenames):
try:
gmail_user = 'xxxxx#gmail.com'
gmail_pwd = 'xxxxx'
msg = MIMEMultipart()
msg['From'] = gmail_user
msg['To'] = ", ".join(to)
msg['Subject'] = subject
msg.attach(MIMEText(text))
for file in filenames:
part = MIMEBase('application', 'octet-stream')
part.set_payload(open(file, 'rb').read())
Encoders.encode_base64(part)
part.add_header('Content-Disposition', 'attachment; filename="%s"'% os.path.basename(file))
msg.attach(part)
mailServer = smtplib.SMTP("smtp.gmail.com:587")
mailServer.ehlo()
mailServer.starttls()
mailServer.ehlo()
mailServer.login(gmail_user, gmail_pwd)
mailServer.sendmail(gmail_user, to, msg.as_string())
mailServer.close()
print('successfully sent the mail')
except smtplib.SMTPException,error:
print str(error)
if __name__ == '__main__':
attachment_file = ['t2.csv','t1.txt']
to = "xxxxx#gmail.com"
TEXT = "Hello everyone"
SUBJECT = "Testing sending using gmail"
send_email(to, SUBJECT, TEXT, attachment_file)
I created a new directory 'testfiles' and placed all files in it and list the files inside it using os.listdir. And iterate over all the files and attach it to mail.
Updated code for attaching files to mail:
dir_path = "/home/testdir/testfiles"
files = os.listdir("testfiles")
for f in files: # add files to the message
file_path = os.path.join(dir_path, f)
attachment = MIMEApplication(open(file_path, "rb").read(), _subtype="txt")
attachment.add_header('Content-Disposition', 'attachment', filename=f)
msg.attach(attachment)

Using python to update a file on google drive

I have the following script to upload a file unto google drive, using python27. As it is now it will upload a new copy of the file, but I want the existing file updated/overwritten. I can't find help in the Google Drive API references and guides for python. Any suggestions?
from __future__ import print_function
import os
from apiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
# Gain acces to google drive
SCOPES = 'https://www.googleapis.com/auth/drive.file'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_secret.json', SCOPES)
creds = tools.run_flow(flow, store, flags) \
if flags else tools.run(flow, store)
DRIVE = build('drive', 'v3', http=creds.authorize(Http()))
#The file that is being uploaded
FILES = (
('all-gm-keys.txt', 'application/vnd.google-apps.document'), #in google doc format
)
#Where the file ends on google drive
for filename, mimeType in FILES:
folder_id = '0B6V-MONTYPYTHONROCKS-lTcXc' #Not the real folder id
metadata = {'name': filename,'parents': [ folder_id ] }
if mimeType:
metadata['mimeType'] = mimeType
res = DRIVE.files().create(body=metadata, media_body=filename).execute()
if res:
print('Uploaded "%s" (%s)' % (filename, res['mimeType']))
I think that you are looking for the update method. Here is a link to the documentation. There is an example on overwriting the file in python.
I think that using the official google client api instead of pure http requests should make your task easier.
from apiclient import errors
from apiclient.http import MediaFileUpload
# ...
def update_file(service, file_id, new_title, new_description, new_mime_type,
new_filename, new_revision):
"""Update an existing file's metadata and content.
Args:
service: Drive API service instance.
file_id: ID of the file to update.
new_title: New title for the file.
new_description: New description for the file.
new_mime_type: New MIME type for the file.
new_filename: Filename of the new content to upload.
new_revision: Whether or not to create a new revision for this file.
Returns:
Updated file metadata if successful, None otherwise.
"""
try:
# First retrieve the file from the API.
file = service.files().get(fileId=file_id).execute()
# File's new metadata.
file['title'] = new_title
file['description'] = new_description
file['mimeType'] = new_mime_type
# File's new content.
media_body = MediaFileUpload(
new_filename, mimetype=new_mime_type, resumable=True)
# Send the request to the API.
updated_file = service.files().update(
fileId=file_id,
body=file,
newRevision=new_revision,
media_body=media_body).execute()
return updated_file
except errors.HttpError, error:
print 'An error occurred: %s' % error
return None
Link the example: https://developers.google.com/drive/api/v2/reference/files/update#examples