Download only specific folder in S3 bucket using python boto - amazon-web-services

The link below shows how to download an entire S3 content. However, how does one get subfolder content. Suppose my S3 folder has the following emulated structure.
S3Folder/S1/file1.c
S3Folder/S1/file2.h
S3Folder/S1/file1.h
S3Folder/S2/file.exe
S3Folder/S2/resource.data
Suppose I am interested only in S2 folder. How do I isolate the keys in bucket list ?
local backup of an S3 content
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
bucket = conn.get_bucket(bucket_name)
# go through the list of files
bucket_list = bucket.list()
for l in bucket_list:
keyString = str(l.key)
d = LOCAL_PATH + keyString
try:
l.get_contents_to_filename(d)
except OSError:
# check if dir exists
if not os.path.exists(d):
os.mkdir(d)

You could do the following:
import os
import boto3
s3_resource = boto3.resource("s3", region_name="us-east-1")
def download_objects():
root_dir = 'D:/' # local machine location
s3_bucket_name = 'S3_Bucket_Name' #s3 bucket name
s3_root_folder_prefix = 'sample' # bucket inside root folder
s3_folder_list = ['s3_folder_1','s3_folder_2','s3_folder_3'] # root folder inside sub folders list
my_bucket = self.s3_resource.Bucket(s3_bucket_name)
for file in my_bucket.objects.filter(Prefix=s3_root_folder_prefix):
if any(s in file.key for s in s3_folder_list):
try:
path, filename = os.path.split(file.key)
try:
os.makedirs(root_dir + path)
except Exception as err:
pass
my_bucket.download_file(file.key, root_dir + path + '/' + filename)
except Exception as err:
print(err)
if __name__ == '__main__':
download_objects()

You can download s3 objects by adding prefix of it in the key value.
So, according to your Question , you just need to add prefix '/S2' while downloading objects
FYI: s3 download object using boto3
For more check this

Related

Download folder with Google Cloud Storage Api [duplicate]

At the following page
https://googlecloudplatform.github.io/google-cloud-python/latest/storage/blobs.html
there are all the API calls which can be used for Python & Google Cloud storage. Even in the "official" samples on github
https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/storage/cloud-client/snippets.py
don't have a related example.
Finally, downloading a directory with the same method used for download files gives the error
Error: [Errno 21] Is a directory:
You just have to first list all the files in a directory and then download them one by one:
bucket_name = 'your-bucket-name'
prefix = 'your-bucket-directory/'
dl_dir = 'your-local-directory/'
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name=bucket_name)
blobs = bucket.list_blobs(prefix=prefix) # Get list of files
for blob in blobs:
filename = blob.name.replace('/', '_')
blob.download_to_filename(dl_dir + filename) # Download
blob.name includes the entire directory structure + filename, so if you want the same file name as in the bucket, you might want to extract it first (instead of replacing / with _)
If you want to keep the same directory structure without renaming and also create nested folders. I have for python 3.5+ a solution based on #ksbg answer :
from pathlib import Path
bucket_name = 'your-bucket-name'
prefix = 'your-bucket-directory/'
dl_dir = 'your-local-directory/'
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name=bucket_name)
blobs = bucket.list_blobs(prefix=prefix) # Get list of files
for blob in blobs:
if blob.name.endswith("/"):
continue
file_split = blob.name.split("/")
directory = "/".join(file_split[0:-1])
Path(directory).mkdir(parents=True, exist_ok=True)
blob.download_to_filename(blob.name)
Lets say, we want to download FINALFOLDER from the storage path: gs://TEST_BUCKET_NAME/FOLDER1/FOLDER2/FINALFOLDER
After downloading, the final path will look like: D:\\my_blob_data\FINALFOLDER
from os import makedirs
from os.path import join, isdir, isfile, basename
from google.cloud import storage
# if your environment was authenticated, the default config will be picked up
storage_client = storage.Client() # comment this line if you want to use service account
# uncomment the line below if you have a service account json
# storage_client = storage.Client.from_service_account_json('creds/sa.json')
bucket_name = 'TEST_BUCKET_NAME'
prefix = 'FOLDER2'
dst_path = 'D:\\my_blob_data'
if isdir(dstPath) == False:
makedirs(dstPath)
bucket = storage_client.bucket(bucket_name=bucket_name)
blobs = bucket.list_blobs(prefix=prefix) # Get list of files
for blob in blobs:
blob_name = blob.name
dst_file_name = blob_name.replace('FOLDER1/FOLDER2', dst_path) #.replace('FOLDER1/FOLDER2', 'D:\\my_blob_data')
# extract the final directory and create it in the destination path if it does not exist
dst_dir = dst_file_name.replace('/' + basename(dst_file_name), '')
if isdir(dst_dir) == False:
makedirs(dst_dir)
# download the blob object
blob.download_to_filename(dst_file_name)
Using tensoflow gfile package, here is a recursive function.
root_dir is the GCS parent folder.
local_base_dir is the parent folder created at local
def copy_recursively(root_dir, local_base_dir):
if tf.io.gfile.exists(local_base_dir):
tf.io.gfile.rmtree(local_base_dir)
tf.io.gfile.mkdir(local_base_dir)
file_list = tf.io.gfile.glob(root_dir+'/**')
for item in file_list:
if not tf.io.gfile.isdir(item):
fname = item.rsplit('/',1)[-1]
if not fname.startswith('.'):
tf.io.gfile.copy(item,
os.path.join(local_base_dir,fname),
overwrite=False)
else:
child_dir= item.rsplit('/',1)[-1]
full_dir_path = os.path.join(local_base_dir,child_dir)
print(f"Setting up child directory: {full_dir_path}")
copy_recursively(item,full_dir_path)
root_dir = 'gs://.../.../..'
local_base_dir = root_dir.rsplit('/',1)[-1]
copy_recursively(root_dir, local_base_dir)
Refer This Link- https://medium.com/#sandeepsinh/multiple-file-download-form-google-cloud-storage-using-python-and-gcs-api-1dbcab23c44
1 - Add Your Credential Json
2 - List Bucket Items
3 - Download
import logging
import os
from google.cloud import storage
global table_id
global bucket_name
logging.basicConfig(format=’%(levelname)s:%(message)s’, level=logging.DEBUG)
bucket_name = ‘mybucket’
table_id = ‘shakespeare’
storage_client = storage.Client.from_service_account_json(‘/google-cloud/keyfile/service_account.json’)
# The “folder” where the files you want to download are
folder=’/google-cloud/download/{}’.format(table_id)
delimiter=’/’
bucket=storage_client.get_bucket(bucket_name)
blobs=bucket.list_blobs(prefix=table_id, delimiter=delimiter) #List all objects that satisfy the filter.
# Download the file to a destination
def download_to_local():
logging.info(‘File download Started…. Wait for the job to complete.’)
# Create this folder locally if not exists
if not os.path.exists(folder):
os.makedirs(folder)
# Iterating through for loop one by one using API call
for blob in blobs:
logging.info(‘Blobs: {}’.format(blob.name))
destination_uri = ‘{}/{}’.format(folder, blob.name)
blob.download_to_filename(destination_uri)
logging.info(‘Exported {} to {}’.format(
blob.name, destination_uri))
if __name__ == ‘__main__’:
download_to_local()

NoCredentialError when trying to access head_object

I have the following code which runs as expected:
import boto3
session = boto3.Session(profile_name='default')
s3 = session.resource('s3')
bucketName = 'myBucketName'
bucket = s3.Bucket(bucketName)
for object_summary in bucket.objects.filter(Prefix="MainFolder/"):
s3_cli = boto3.client('s3')
if(object_summary.key[-1]!='/'):
print('FileName: '+object_summary.key)
# print(s3_cli.head_object(Bucket=bucketName,Key=str(object_summary.key)))
else:
s3obj='FolderName: '+object_summary.key
print(s3obj)
And lists the files and folders present in MainFolder on my S3 bucket. However, when I uncomment Line#12, I get this error:
NoCredentialsError: Unable to locate credentials
Any idea what I am doing wrong?
Instead of:
s3_cli = boto3.client('s3')
you should be using your session which loads the specific profile:
s3_cli = session.client('s3')

Is there a simple way to rename s3 folder via boto3?

I have s3 bucket with folder, and inside the folder there are large files.
I want to rename the folder with python3-boto3 script.
I read this ("How to Rename Amazon S3 Folder Objects with Python"), and what he is doing is to copy the files with new prefix, then deleting the original folder.
It is very not efficient way to do it, and because I have large files, it will take long time to do it.
Is there a simpler/more efficient way to do it?
There is no way to rename s3 objects/folders - you will need to copy them to the new name and delete the old name unfortunately.
There is a mv command in the aws cli, but behind the scenes it is doing a copy then delete for you - so you can make the operation easier, but it is not a true 'rename'.
https://docs.aws.amazon.com/cli/latest/reference/s3/mv.html
Simple, no. Unfortunately.
There are a lot of 'issues' with folder structures in s3 it seems as the storage is flat.
I have a Django project where I needed the ability to rename a folder but still keep the directory structure in-tact, meaning empty folders would need to be copied and stored in the renamed directory as well.
aws cli is great but neither cp or sync or mv copied empty folders (i.e. files ending in '/') over to the new folder location, so I used a mixture of boto3 and the aws cli to accomplish the task.
More or less I find all folders in the renamed directory and then use boto3 to put them in the new location, then I cp the data with aws cli and finally remove it.
import threading
import os
from django.conf import settings
from django.contrib import messages
from django.core.files.storage import default_storage
from django.shortcuts import redirect
from django.urls import reverse
def rename_folder(request, client_url):
"""
:param request:
:param client_url:
:return:
"""
current_property = request.session.get('property')
if request.POST:
# name the change
new_name = request.POST['name']
# old full path with www.[].com?
old_path = request.POST['old_path']
# remove the query string
old_path = ''.join(old_path.split('?')[0])
# remove the .com prefix item so we have the path in the storage
old_path = ''.join(old_path.split('.com/')[-1])
# remove empty values, this will happen at end due to these being folders
old_path_list = [x for x in old_path.split('/') if x != '']
# remove the last folder element with split()
base_path = '/'.join(old_path_list[:-1])
# # now build the new path
new_path = base_path + f'/{new_name}/'
# remove empty variables
# print(old_path_list[:-1], old_path.split('/'), old_path, base_path, new_path)
endpoint = settings.AWS_S3_ENDPOINT_URL
# # recursively add the files
copy_command = f"aws s3 --endpoint={endpoint} cp s3://{old_path} s3://{new_path} --recursive"
remove_command = f"aws s3 --endpoint={endpoint} rm s3://{old_path} --recursive"
# get_creds() is nothing special it simply returns the elements needed via boto3
client, resource, bucket, resource_bucket = get_creds()
path_viewing = f'{"/".join(old_path.split("/")[1:])}'
directory_content = default_storage.listdir(path_viewing)
# loop over folders and add them by default, aws cli does not copy empty ones
# so this is used to accommodate
folders, files = directory_content
for folder in folders:
new_key = new_path+folder+'/'
# we must remove bucket name for this to work
new_key = new_key.split(f"{bucket}/")[-1]
# push this to new thread
threading.Thread(target=put_object, args=(client, bucket, new_key,)).start()
print(f'{new_key} added')
# # run command, which will copy all data
os.system(copy_command)
print('Copy Done...')
os.system(remove_command)
print('Remove Done...')
# print(bucket)
print(f'Folder renamed.')
messages.success(request, f'Folder Renamed to: {new_name}')
return redirect(request.META.get('HTTP_REFERER', f"{reverse('home', args=[client_url])}"))

Issue with uploading files from local directory to aws S3 using python 2.7 and boto 2

I’m doing simple operation to of downloading the gzip files from S3 bucket to the local directory. I’m extracting those into another local directory and then uploading them back to S3 bucket again into archive folder path. While doing this operation I want to make sure I am processing same set of files that I initially download from S3 bucket which is (f_name) in below code. Now, below code is not uploading those back to S3 , that’s where I’m stuck. But able to download from S3 and extract it into local directory. Can you please help me understand what is wrong with the _uploadFile function?
from boto.s3.connection import S3Connection
from boto.s3.key import *
import os
import os.path
aws_bucket= "event-logs-dev” ## S3 Bucket name
local_download_directory= "/Users/TargetData/Download/test_queue1/“ ## local directory to download the gzip files from S3.
Target_directory_to_extract = "/Users/TargetData/unzip” ##local directory to gunzip the downloaded files.
Target_s3_path_to_upload= "event-logs-dev/data/clean/xact/logs/archive/“ ## S3 bucket path to upload the files.
def decompressAllFilesFromNetfiler(self,aws_bucket,local_download_directory,Target_d irectory_to_extract,Target_s3_path_to_upload):
zipFiles = [f for f in os.listdir(local_download_directory) if re.match(r'.*\.tar\.gz', f)]
for f_name in zipFiles:
if os.path.exists(Target_directory_to_extract+"/"+f_name[:-len('.tar.gz')]) and os.access(Target_directory_to_extract+"/"+f_name[:-len('.tar.gz')], os.R_OK):
print ('File {} already exists!'.format(f_name))
else:
f_name_with_path = os.path.join(local_download_directory, f_name)
os.system('mkdir -p {} && tar vxzf {} -C {}'.format(Target_directory_to_extract, f_name_with_path, Target_directory_to_extract))
print ('Extracted file {}'.format(f_name))
self._uploadFile(aws_bucket,f_name,Target_s3_path_to_upload,Target_directory_to_extract)
def _uploadFile(self, aws_bucket, f_name,Target_s3_path_to_upload,Target_directory_to_extract):
full_key_name = os.path.expanduser(os.path.join(Target_s3_path_to_upload, f_name))
path = os.path.expanduser(os.path.join(Target_directory_to_extract, f_name))
try:
print "Uploaded extracted file to: %s" % (full_key_name)
key = aws_bucket.new_key(full_key_name)
key.set_contents_from_filename(path)
except:
if full_key_name is None:
print "Error uploading”
Currently, the output prints that Uploaded extracted file to: event-logs-dev/data/clean/xact/logs/archive/1442235602129200000.tar.gz, but nothing is uploaded to S3 bucket. Your help is greatly appreciated!! Thank you in advance!
It appears that you have cut and pasted parts of your code - and maybe formatting was lost as your code above will not work as pasted. I've taken the liberty to make it PEP8 (mostly) however there is still some missing code to create the S3 objects. Since your import the modules, I presume that you have that section of code and just didn't paste it.
here is a cleaned up version of your code formatted correctly. I also added a Exception code to your try: block to print out the error you get. You should update the Exception to be more specific to the Exceptions thrown for make_key or set_contents_... but the general Exception will get you started. If nothing more this is more readable, but you should include your S3 connection code too - and remove anything that is specific to your domain (e.g. keys, trade secrets, etc).
#!/usr/bin/env python
"""
do some download
some extract
and some upload
"""
from boto.s3.connection import S3Connection
from boto.s3.key import *
import os
import os.path
aws_bucket = 'event-logs-dev'
local_download_directory = '/Users/TargetData/Download/test_queue1/'
Target_directory_to_extract = '/Users/TargetData/unzip'
Target_s3_path_to_upload = 'event-logs-dev/data/clean/xact/logs/archive/'
'''
MUST BE SOME MAGIC HERE TO GET AN S3 CONNECTION ???
aws_bucket IS NOT A BUCKET OBJECT ...
'''
def decompressAllFilesFromNetfiler(self,
aws_bucket,
local_download_directory,
Target_directory_to_extract,
Target_s3_path_to_upload):
'''
decompress stuff
'''
zipFiles = [f for f in os.listdir(
local_download_directory) if re.match(r'.*\.tar\.gz', f)]
for f_name in zipFiles:
if os.path.exists(
"{}/{}".format(Target_directory_to_extract,
f_name[:len('.tar.gz')])) and os.access(
"{}/{}".format(Target_directory_to_extract,
f_name[:len('.tar.gz')])) and os.R_OK:
print ('File {} already exists!'.format(f_name))
else:
f_name_with_path = os.path.join(local_download_directory, f_name)
os.system('mkdir -p {} && tar vxzf {} -C {}'.format(
Target_directory_to_extract,
f_name_with_path,
Target_directory_to_extract))
print ('Extracted file {}'.format(f_name))
self._uploadFile(aws_bucket,
f_name,
Target_s3_path_to_upload,
Target_directory_to_extract)
def _uploadFile(self,
aws_bucket,
f_name,
Target_s3_path_to_upload,
Target_directory_to_extract):
full_key_name = os.path.expanduser(os.path.join(Target_s3_path_to_upload,
f_name))
path = os.path.expanduser(os.path.join(Target_directory_to_extract, f_name))
try:
S3CONN = S3Connection()
BUCKET = S3CONN.get_bucket(aws_bucket)
key = BUCKET.new_key(full_key_name)
key.set_contents_from_filename(path)
print "Uploaded extracted file to: {}".format(full_key_name)
except Exception as UploadERR:
if full_key_name is None:
print 'Error uploading'
else:
print "Error : {}".format(UploadERR)

upload a directory to s3 with boto

I am already connected to the instance and I want to upload the files that are generated from my python script directly to S3. I have tried this:
import boto
s3 = boto.connect_s3()
bucket = s3.get_bucket('alexandrabucket')
from boto.s3.key import Key
key = bucket.new_key('s0').set_contents_from_string('some content')
but this is rather creating a new file s0 with the context "same content" while I want to upload the directory s0 to mybucket.
I had a look also to s3put but I didn't manage to get what I want.
The following function can be used to upload directory to s3 via boto.
def uploadDirectory(path,bucketname):
for root,dirs,files in os.walk(path):
for file in files:
s3C.upload_file(os.path.join(root,file),bucketname,file)
Provide a path to the directory and bucket name as the inputs. The files are placed directly into the bucket. Alter the last variable of the upload_file() function to place them in "directories".
There is nothing in the boto library itself that would allow you to upload an entire directory. You could write your own code to traverse the directory using os.walk or similar and to upload each individual file using boto.
There is a command line utility in boto called s3put that could handle this or you could use the AWS CLI tool which has a lot of features that allow you to upload entire directories or even sync the S3 bucket with a local directory or vice-versa.
The s3fs package provides nice functionalities to handle such cases
s3_file = s3fs.S3FileSystem()
local_path = "some_dir_path/some_dir_path/"
s3_path = "bucket_name/dir_path"
s3_file.put(local_path, s3_path, recursive=True)
I built the function based on the feedback from #JDPTET, however,
I needed to remove the common entire local path from getting uploaded to the bucket!
Not sure how many path separators I encounter - so I had to use os.path.normpath
def upload_folder_to_s3(s3bucket, inputDir, s3Path):
print("Uploading results to s3 initiated...")
print("Local Source:",inputDir)
os.system("ls -ltR " + inputDir)
print("Dest S3path:",s3Path)
try:
for path, subdirs, files in os.walk(inputDir):
for file in files:
dest_path = path.replace(inputDir,"")
__s3file = os.path.normpath(s3Path + '/' + dest_path + '/' + file)
__local_file = os.path.join(path, file)
print("upload : ", __local_file, " to Target: ", __s3file, end="")
s3bucket.upload_file(__local_file, __s3file)
print(" ...Success")
except Exception as e:
print(" ... Failed!! Quitting Upload!!")
print(e)
raise e
s3 = boto3.resource('s3', region_name='us-east-1')
s3bucket = s3.Bucket("<<s3bucket_name>>")
upload_folder_to_s3(s3bucket, "<<Local Folder>>", "<<s3 Path>>")
You could do the following:
import os
import boto3
s3_resource = boto3.resource("s3", region_name="us-east-1")
def upload_objects():
try:
bucket_name = "S3_Bucket_Name" #s3 bucket name
root_path = 'D:/sample/' # local folder for upload
my_bucket = s3_resource.Bucket(bucket_name)
for path, subdirs, files in os.walk(root_path):
path = path.replace("\\","/")
directory_name = path.replace(root_path,"")
for file in files:
my_bucket.upload_file(os.path.join(path, file), directory_name+'/'+file)
except Exception as err:
print(err)
if __name__ == '__main__':
upload_objects()
This is the code I used which recursively upload files from the specified folder to the specified s3 path. Just add S3 credential and bucket details in the script:
https://gist.github.com/hari116/4ab5ebd885b63e699c4662cd8382c314/
#!/usr/bin/python
"""Usage: Add bucket name and credentials
script.py <source folder> <s3 destination folder >"""
import os
from sys import argv
import boto3
from botocore.exceptions import NoCredentialsError
ACCESS_KEY = ''
SECRET_KEY = ''
host = ''
bucket_name = ''
local_folder, s3_folder = argv[1:3]
walks = os.walk(local_folder)
# Function to upload to s3
def upload_to_aws(bucket, local_file, s3_file):
"""local_file, s3_file can be paths"""
s3 = boto3.client('s3', aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY)
print(' Uploading ' +local_file + ' as ' + bucket + '/' +s3_file)
try:
s3.upload_file(local_file, bucket, s3_file)
print(' '+s3_file + ": Upload Successful")
print(' ---------')
return True
except NoCredentialsError:
print("Credentials not available")
return False
"""For file names"""
for source, dirs, files in walks:
print('Directory: ' + source)
for filename in files:
# construct the full local path
local_file = os.path.join(source, filename)
# construct the full Dropbox path
relative_path = os.path.relpath(local_file, local_folder)
s3_file = os.path.join(s3_folder, relative_path)
# Invoke upload function
upload_to_aws(bucket_name, local_file, s3_file)
For reading file form folder we can use
import boto
from boto.s3.key import Key
keyId = 'YOUR_AWS_ACCESS_KEY_ID'
sKeyId='YOUR_AWS_ACCESS_KEY_ID'
bucketName='your_bucket_name'
conn = boto.connect_s3(keyId,sKeyId)
bucket = conn.get_bucket(bucketName)
for key in bucket.list():
print ">>>>>"+key.name
pathV = key.name.split('/')
if(pathV[0] == "data"):
if(pathV[1] != ""):
srcFileName = key.name
filename = key.name
filename = filename.split('/')[1]
destFileName = "model/data/"+filename
k = Key(bucket,srcFileName)
k.get_contents_to_filename(destFileName)
elif(pathV[0] == "nlu_data"):
if(pathV[1] != ""):
srcFileName = key.name
filename = key.name
filename = filename.split('/')[1]
destFileName = "model/nlu_data/"+filename
k = Key(bucket,srcFileName)
k.get_contents_to_filename(destFileName)
Updated #user 923227's answer to (1) include newer boto3 interface (2) work with nuances of windows double backslash (3) cleaner tqdm progress bar:
import os
from tqdm import tqdm
def upload_folder_to_s3(s3_client, s3bucket, input_dir, s3_path):
pbar = tqdm(os.walk(input_dir))
for path, subdirs, files in pbar:
for file in files:
dest_path = path.replace(input_dir, "").replace(os.sep, '/')
s3_file = f'{s3_path}/{dest_path}/{file}'.replace('//', '/')
local_file = os.path.join(path, file)
s3_client.upload_file(local_file, s3bucket, s3_file)
pbar.set_description(f'Uploaded {local_file} to {s3_file}')
print(f"Successfully uploaded {input_dir} to S3 {s3_path}")
Usage example:
s3_client = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
upload_folder_to_s3(s3_client, 'BUCKET-NAME', <local-directory>, <s3-directory>)
Somehow the other snippets did not really work for me, this is a modification of the snippet from user 923227 that does.
This code copies all files in a directory and maintains the directory in S3, e.g.2023/01/file.jpg will be in the bucket as 2023/01/file.jpg.
import os
import sys
import boto3
client = boto3.client('s3')
local_path = "your-path/data"
bucketname = "bucket-name"
for path, dirs, files in os.walk(local_path):
for file in files:
file_s3 = os.path.normpath(path + '/' + file)
file_local = os.path.join(path, file)
print("Upload:", file_local, "to target:", file_s3, end="")
client.upload_file(file_local, bucketname, file_s3)
print(" ...Success")
Another method that did not exist when this question was first asked is to use python-rclone (https://github.com/ddragosd/python-rclone/blob/master/README.md).
This requires a download of rclone and a working rclone config. Commonly used for AWS (https://rclone.org/s3/) but can be used for other providers as well.
install('python-rclone')
import rclone
cfg_path = r'(path to rclone config file here)'
with open(cfg_path) as f:
cfg = f.read()
# Implementation
# Local file to cloud server
result = rclone.with_config(cfg).run_cmd(command="sync", extra_args=["/home/demodir/", "AWS test:dummydir/etc/"])
# Cloud server to cloud server
result = rclone.with_config(cfg).run_cmd(command="sync", extra_args=["Gdrive:test/testing/", "AWS test:dummydir/etc/"
This allows you to run a "sync" command similar to the AWS CLI within your python code by reading in the config file and mapping your output via kwargs (extra_args)
This solution does not use boto, but I think it could do what the OP wants.
It uses awscli and Python.
import os
class AwsCredentials:
def __init__(self, access_key: str, secret_key: str):
self.access_key = access_key
self.secret_key = secret_key
def to_command(self):
credentials = f'AWS_ACCESS_KEY_ID={self.access_key} AWS_SECRET_ACCESS_KEY={self.secret_key}'
return credentials
def sync_s3_bucket(credentials: AwsCredentials, source_path: str, bucket: str) -> None:
command = f'{credentials.to_command()} aws s3 sync {source_path} s3://{bucket}'
result = os.system(command)
assert result == 0, f'The s3 sync was not successful, error code: {result}'
Please consider getting the AWS credentials from a file or from the environment.
The documentation for the s3 sync command is here.
Simply running terminal commands using os module with F string works
import os
ActualFolderName = "FolderToBeUploadedOnS3"
os.system(f'aws s3 cp D:\<PathToYourFolder>\{ActualFolderName} s3://<BucketName>/{ActualFolderName}/ --recursive')