Upload folder to google cloud bucket with python - google-cloud-platform

I know that I can upload single files like this:
bucket_name = "my-bucket-name"
bucket = client.get_bucket(bucket_name)
blob_name = "myfile.txt"
blob = bucket.blob(blob_name)
blob.upload_from_filename(blob_name)
How can I do the same with a folder? Is there something like blob.upload_from_foldername?
I tried the same code with replacing myfile.txt with myfoldername but it did not work.
FileNotFoundError: [Errno 2] No such file or directory: 'myfoldername'
This is the folder structure:
I assume something is wrong with the path but I am not sure what. I am executing the code from Untitled.ipynb. Works with myfile.txt but not with myfoldername.
I do not want to use a command line function.

You can't upload an empty folder or directory in Google Cloud Storage but you can create empty folder in Cloud Storage using the client:
from google.cloud import storage
def create_newfolder(bucket_name, destination_folder_name):
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(destination_folder_name)
blob.upload_from_string('')
print('Created {} .'.format(destination_folder_name))
And if you are trying to upload a whole directory, you can use the codes below:
import glob
import os
from google.cloud import storage
client = storage.Client()
def upload_from_directory(directory_path: str, destination_bucket_name: str, destination_blob_name: str):
rel_paths = glob.glob(directory_path + '/**', recursive=True)
bucket = client.get_bucket(destination_bucket_name)
for local_file in rel_paths:
remote_path = f'{destination_blob_name}/{"/".join(local_file.split(os.sep)[1:])}'
if os.path.isfile(local_file):
blob = bucket.blob(remote_path)
blob.upload_from_filename(local_file)

Related

How to upload a Python Requests content to S3 without saving the file

I am running a Python Lambda Function that downloads an MP3 file using the Requests library from a server and then uploads the file to S3. This is the code I am using which is working fine:
dl_url = "https://server.com/file.mp3"
response = requests.get(dl_url)
s3 = boto3.resource('s3')
bucket = s3.Bucket('my-bucket')
file_name = "file.mp3"
dir_file = f"/tmp/{file_name}"
with open(dir_file, "wb") as f:
f.write(response.content)
bucket.upload_file(dir_file, file_name)
This code works fine, however I was wondering if I can skip the step of saving the file first and then uploading the file.
Working code that doesn't require you to save the file first:
import boto3
import requests
s3_client = boto3.client('s3')
bucket = "test-bucket"
dl_url = "https://file-examples.com/storage/fe1170c1cf625dc95987de5/2017/11/file_example_MP3_700KB.mp3"
response = requests.get(dl_url)
# Write the object
s3_client.put_object(Bucket=bucket,
Key="sample_mp3.mp3",
Body=response.content)

How to upload a list of files from the web directly to s3 bucket

I have a file with urls in my s3 bucket. I would like to use a python lambda function to upload the url files to s3 bucket.
For example my uploaded file to s3 contains:
http://...
http://...
Each line corresponds to a file to be uploaded into s3.
Here is the code:
import json
import urllib.parse
import boto3
import requests
import os
from gzip import GzipFile
from io import TextIOWrapper
import requests
print('Loading functions')
s3 = boto3.client('s3')
def get_file_seqs(response):
try:
size = response['ResponseMetadata']['HTTPHeaders']['content-length']
print("[+] Size retrieved")
return size
except:
print("[-] Size can not be retrieved")
def lambda_handler(event, context):
# Defining bucket objects
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
#get file from s3
print('[+] Getting file from S3 bucket')
response = s3.get_object(Bucket=bucket, Key=key)
try:
#checking file size
print('[+] Checking file size')
file_size = get_file_seqs(response)
if file_size == 0:
print('File size is equal to 0')
return False
else:
#create new directories
print('[+] Creating new directories')
bucket_name = "triggersnextflow"
directories = ['backups/sample/', 'backups/control/']
#loop to create new dirs
for dirs in directories:
s3.put_object(Bucket = bucket_name, Key = dirs, Body = '')
#NOW I WOULD LIKE TO DOWNLOAD THE FILES FROM THE URLS INSIDE S3 OBJECT
#return true
return True
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
raise e
Download an S3 object to a file:
import boto3
s3 = boto3.resource('s3')
s3.meta.client.download_file('mybucket', 'hello.txt', '/tmp/hello.txt')
You will find great resource of information here:
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file

Trying to download all image files from an AWS s3 bucket subfolder into a file on cloud9 IDE

I found this code online but I am unable to change it to get the images downloaded into a specified folder.
import boto3
import os
def download_all_objects_in_folder():
s3_resource = boto3.resource('s3')
my_bucket = s3_resource.Bucket('images')
objects = my_bucket.objects.filter(Prefix='pets/cats/')
for obj in objects:
path, filename = os.path.split(obj.key)
my_bucket.download_file(obj.key, filename)

How to load csv.gpg file from GCS to Bigquery suing airflow or python

we have a requirement to decrypt and load .gpg file from GCS to Bigquery in airflow. Is there any easiest quick way to decrypt and load at same time from gcs to BQ?
The process I have used :
download the file to local server
decrypt and upload the same to gcs
load from gcs to BQ
import pgpy
import os
from google.cloud import secretmanager
from google.cloud import storage
from io import BytesIO
def upload_to_bucket(blob_name, output, bucket_name):
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(blob_name)
blob.upload_from_string(data=output.read())
output.close()
return blob.public_url
def get_secret(project, key_id, version):
client = secretmanager.SecretManagerServiceClient()
name=f"projects/{project}/secrets/{key_id}/versions/{version}"
response = client.access_secret_version(request={"name": name})
return response.payload.data.decode('ascii')
fetch_key = get_secret('project', 'secret_key', version)
key, _ = pgpy.PGPKey.from_blob(fetch_key)
storage_client = storage.Client()
bucket = storage_client.get_bucket('bucket_name')
blob = bucket.blob('test.csv.gpg')
local_path=os.path.join(os.getcwd(),'teest.csv.gpg')
blob.download_to_filename(os.path.join(os.getcwd(),'test.csv.gpg'))
print(local_path)
msg_encrypted = pgpy.PGPMessage.from_file(local_path)
msg_plaintext = BytesIO((key.decrypt(msg_encrypted).message))
print(msg_plaintext)
g=upload_to_bucket('test.csv', msg_plaintext, 'buckeet')
print(g)
os.remove(local_path)
Do we have any option to read directly from gcs and load to BQ?

Copy a folder from server to Azure blob using django-storage

How can I upload a folder (media_root/folder) which contains subfolder and files in it to Azure blob container? I can upload a file to Azure blob container using this:
from django.core.files.storage import default_storage
f = open('media_root/folder/file.csv', 'rb')
default_storage.save(path, f)
I have set AzureStorage class in my settings.py.
DEFAULT_FILE_STORAGE = 'storages.backends.azure_storage.AzureStorage'
Any help would be much appreciated.
As the doc of django-storage shows, there is no method to upload a folder to Azure Storage blob, it just can be used to upload files.
If you want to upload a folder(make sure it is not empty) to Azure Storage blob, see the following code with Python.
from azure.storage.blob import BlockBlobService,PublicAccess
import os
def run_sample():
block_blob_service = BlockBlobService("your_account_name", "your_account_key")
container_name ='test1'
path_remove = "F:\\"
local_path = "F:\\folderA"
for r,d,f in os.walk(local_path):
if f:
for file in f:
file_path_on_azure = os.path.join(r,file).replace(path_remove,"")
file_path_on_local = os.path.join(r,file)
block_blob_service.create_blob_from_path(container_name,file_path_on_azure,file_path_on_local)
# Main method.
if __name__ == '__main__':
run_sample()