can't upload to s3 using boto3 and flask - flask

this is basic thing and it seems obvious but am stuck in this one
am using boto3, I have this access key and secret key,
upload file route:
#app.route('/up')
def up():
main = request.files['mainimg']
bucket=<bucketname>
if main:
upload_to_aws(main)
the upload_to_aws function (from github):
import os
import boto3
from werkzeug.utils import secure_filename
def upload_to_aws(file, acl="public-read"):
filename = secure_filename(file.filename)
s3 = boto3.client(
's3',
aws_access_key_id=os.environ.get('FASO_S3_ACCESS_KEY'),
aws_secret_access_key=os.environ.get('FASO_S3_SECRET_KEY')
)
try:
s3.upload_fileobj(
file,
"fasofashion",
file.filename,
ExtraArgs={
"ACL": acl,
"ContentType": file.content_type
}
)
print('uploadeed')
except Exception as e:
# This is a catch all exception, edit this part to fit your needs.
print("Something Happened: ", e)
return e
I keep getting these errors:
Access denied file must be a string
File must be a string

Related

Lambda call S3 get public access block using boto3

I'm trying to verify if the public access block of my bucket mypublicbucketname is checked or not through Lambda function. For testing, I create a bucket and I have unchecked the public access block. So, I did this Lambda:
import sys
from pip._internal import main
main(['install', '-I', '-q', 'boto3', '--target', '/tmp/', '--no-cache-dir', '--disable-pip-version-check'])
sys.path.insert(0,'/tmp/')
import json
import boto3
import botocore
def lambda_handler(event, context):
# TODO implement
print(boto3.__version__)
print(botocore.__version__)
client = boto3.client('s3')
response = client.get_public_access_block(Bucket='mypublicbucketname')
print("response:>>",response)
I updated the latest version of boto3 and botocore.
1.16.40 #for boto3
1.19.40 #for botocore
Even if I uploaded them and the function seems correct I got this exception:
[ERROR] ClientError: An error occurred (NoSuchPublicAccessBlockConfiguration) when calling the GetPublicAccessBlock operation: The public access block configuration was not found
Someone can explain me why I have this error ?
For futur users. If you got the same problem with get_public_access_block(). Use this solution:
try:
response = client.get_public_access_block(Bucket='mypublicbucketname')
except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == 'NoSuchPublicAccessBlockConfiguration':
print('No Public Access')
else:
print("unexpected error: %s" % (e.response))
for put_public_access_block, it works fine.

KeyError: 'Records' in AWS Lambda triggered by s3 PUT event

i m trying to create a simple event driven AWS Lambda Python function to extract a ZIP or GZIP attachment from an email stored in S3 by another service (such as Amazon SES).
from __future__ import print_function
import email
import zipfile
import os
import gzip
import string
import boto3
import urllib
print('Loading function')
s3 = boto3.client('s3')
s3r = boto3.resource('s3')
xmlDir = "/tmp/output/"
outputBucket = "" # Set here for a seperate bucket otherwise it is set to the events bucket
outputPrefix = "xml/" # Should end with /
def lambda_handler(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key']).decode('utf8')
try:
# Set outputBucket if required
if not outputBucket:
global outputBucket
outputBucket = bucket
# Use waiter to ensure the file is persisted
waiter = s3.get_waiter('object_exists')
waiter.wait(Bucket=bucket, Key=key)
response = s3r.Bucket(bucket).Object(key)
# Read the raw text file into a Email Object
msg = email.message_from_string(response.get()["Body"].read())
if len(msg.get_payload()) == 2:
# Create directory for XML files (makes debugging easier)
if os.path.isdir(xmlDir) == False:
os.mkdir(xmlDir)
# The first attachment
attachment = msg.get_payload()[1]
# Extract the attachment into /tmp/output
extract_attachment(attachment)
# Upload the XML files to S3
upload_resulting_files_to_s3()
else:
print("Could not see file/attachment.")
return 0
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist '
'and your bucket is in the same region as this '
'function.'.format(key, bucket))
raise e
def extract_attachment(attachment):
# Process filename.zip attachments
if "gzip" in attachment.get_content_type():
contentdisp = string.split(attachment.get('Content-Disposition'), '=')
fname = contentdisp[1].replace('\"', '')
open('/tmp/' + contentdisp[1], 'wb').write(attachment.get_payload(decode=True))
# This assumes we have filename.xml.gz, if we get this wrong, we will just
# ignore the report
xmlname = fname[:-3]
open(xmlDir + xmlname, 'wb').write(gzip.open('/tmp/' + contentdisp[1], 'rb').read())
# Process filename.xml.gz attachments (Providers not complying to standards)
elif "zip" in attachment.get_content_type():
open('/tmp/attachment.zip', 'wb').write(attachment.get_payload(decode=True))
with zipfile.ZipFile('/tmp/attachment.zip', "r") as z:
z.extractall(xmlDir)
else:
print('Skipping ' + attachment.get_content_type())
def upload_resulting_files_to_s3():
# Put all XML back into S3 (Covers non-compliant cases if a ZIP contains multiple results)
for fileName in os.listdir(xmlDir):
if fileName.endswith(".xml"):
print("Uploading: " + fileName) # File name to upload
s3r.meta.client.upload_file(xmlDir+'/'+fileName, outputBucket, outputPrefix+fileName)
on running the function i m getting this error
'Records': KeyError
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 25, in lambda_handler
for record in event["Records"]:
KeyError: 'Records'
i tried googling and found few telling me to add Mapping Template --https://intellipaat.com/community/18329/keyerror-records-in-aws-s3-lambda-trigger ,
"KeyError: 'Records'" in AWS S3 - Lambda trigger,
following this link but i m getting some other error
'query': KeyError
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 24, in lambda_handler
for record in event['query']['Records']:
KeyError: 'query'

AWS Lambda bootstrap.py file is throwing error while trying to upload data to elastic search

I'm trying to index pdf documents that are uploaded to s3 bucket. My lambda function is working fine til PDF extraction part. it's establishing connection with elastic search endpoint and while uploading data elastic search for indexing, it's throwing error. Please find lambda function code below. Please help me with this. Thanks in advance.
from __future__ import print_function
import json
import urllib
import boto3
import slate
import elasticsearch
import datetime
es_endpoint = 'search-sdjsf-zrtisx]sdaswasfsjmtsyuih3awvu.us-east-
1.es.amazonaws.com'
es_index = 'pdf_text_extracts'
es_type = 'document'
print('Loading function')
s3 = boto3.client('s3')
# prepare a dict to hold our document data
doc_data = {}
doc_data['insert_time'] =
str(datetime.datetime.isoformat(datetime.datetime.now()))
def lambda_handler(event, context):
#print("Received event: " + json.dumps(event, indent=2))
# Get the object from the event and show its content type
bucket = event['Records'][0]['s3']['bucket']['name']
object_key = urllib.unquote_plus(event['Records'][0]['s3']['object']
['key']).decode('utf8')
try:
# get the file data from s3
temp_pdf_file = open('/tmp/tempfile.pdf', 'w')
response = s3.get_object(Bucket=bucket, Key=object_key)
print("CONTENT TYPE: " + response['ContentType'])
# return response['ContentType']
temp_pdf_file.write(response['Body'].read()) # write the object data
to a local file; will be passed to slate
temp_pdf_file.close() # close the temporary file for now
# pull the text from the temporary PDF file using slate
print("Extracting data from: " + object_key)
with open('/tmp/tempfile.pdf') as temp_pdf_file:
doc = slate.PDF(temp_pdf_file)
# store document data to dict
doc_data['source_pdf_name'] = object_key
doc_data['document_text'] = doc[0] # we're only worried about page 1
at this point
#datj=json.dumps(doc_data)
#z=json.loads(datj)
#print(z)
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist
and your bucket is in the same region as this
function.'.format(object_key, bucket))
raise e
# put the data in ES
#try:
es = elasticsearch.Elasticsearch([{'host': es_endpoint, 'port': 443,
'use_ssl': True}]) # hold off on validating certs
es_response = es.index(index=es_index, doc_type=es_type, body=doc_data)
print('Data posted to ES: ' + str(es_response))
#except Exception as e:
#print('Data post to ES failed: ' + str(e))
#raise e
return "Done"
I have removed try and except in last block to find the actual error and its throwing the below error while trying to upload data to elastic search.
Traceback (most recent call last):
File "/var/runtime/awslambda/bootstrap.py", line 576, in <module>
main()
File "/var/runtime/awslambda/bootstrap.py", line 571, in main
handle_event_request(request_handler, invokeid, event_body, context_objs,
invoked_function_arn)
File "/var/runtime/awslambda/bootstrap.py", line 264, in
handle_event_request
result = report_fault_helper(invokeid, sys.exc_info(), None)
File "/var/runtime/awslambda/bootstrap.py", line 315, in report_fault_helper
msgs = [str(value), etype.__name__]
Remove the return "Done" at the end, that's not allowed in a Lambda environment.

AWS Lambda, copy between s3 buckets with python

So Im writing a lambda fuction, witch is triggered by an S3 PUT,
import datetime
import boto3
import botocore
#boto3.set_stream_logger('botocore', level='DEBUG')
def lambda_handler(event, context):
src_bucket_name=event['Records'][0]['s3']['bucket']['name']
print src_bucket_name
file = event['Records'][0]['s3']['object']['key']
split_string = file.split('/')
file_string = split_string[-1].split('_')
fecha_str = event['Records'][0]['eventTime']
fecha_real=datetime.datetime.strptime(fecha_str, '%Y-%m-%dT%H:%M:%S.%fZ')+ datetime.timedelta(hours=-6)
new_path='PATH/'+file_string[0].lower()+'/'+str(fecha_real.year)+'/'+str(fecha_real.month)+'/'+split_string[-1]
s3 = boto3.resource('s3')
s3_client = boto3.client('s3')
copy_source = {
'Bucket': src_bucket_name,
'Key': file
}
s3.meta.client.copy(copy_source, DST_BUCKET_NAME, new_path)
when I run the code I get
ClientError: An error occurred (404) when calling the HeadObject operation: Not Found
the file does exists
file in source_bucket
Could you please tell what am I doing wrong?
EDIT:
I gave admin permissions to the role I'm using and still having the same error.
UPDATE-CLOSED:
I deleted the role, made a new one, change the code on the copy part to this:
copy_source = {
'Bucket': src_bucket_name,
'Key': file
}
r = s3_client.copy_object(
Bucket=[DST_BUCKET_NAME],
CopySource=copy_source,
Key=new_path
)
and it worked!

How to configure authorization mechanism inline with boto3

I am using boto3 in aws lambda to fecth object in S3 located in Frankfurt Region.
v4 is necessary. otherwise following error will return
"errorMessage": "An error occurred (InvalidRequest) when calling
the GetObject operation: The authorization mechanism you have
provided is not supported. Please use AWS4-HMAC-SHA256."
Realized ways to configure signature_version http://boto3.readthedocs.org/en/latest/guide/configuration.html
But since I am using AWS lambda, I do not have access to underlying configuration profiles
The code of my AWS lambda function
from __future__ import print_function
import boto3
def lambda_handler (event, context):
input_file_bucket = event["Records"][0]["s3"]["bucket"]["name"]
input_file_key = event["Records"][0]["s3"]["object"]["key"]
input_file_name = input_file_bucket+"/"+input_file_key
s3=boto3.resource("s3")
obj = s3.Object(bucket_name=input_file_bucket, key=input_file_key)
response = obj.get()
return event #echo first key valuesdf
Is that possible to configure signature_version within this code ? use Session for example. Or is there any workaround on this?
Instead of using the default session, try using custom session and Config from boto3.session
import boto3
import boto3.session
session = boto3.session.Session(region_name='eu-central-1')
s3client = session.client('s3', config= boto3.session.Config(signature_version='s3v4'))
s3client.get_object(Bucket='<Bkt-Name>', Key='S3-Object-Key')
I tried the session approach, but I had issues. This method worked better for me, your mileage may vary:
s3 = boto3.resource('s3', config=Config(signature_version='s3v4'))
You will need to import Config from botocore.client in order to make this work. See below for a functional method to test a bucket (list objects). This assumes you are running it from an environment where your authentication is managed, such as Amazon EC2 or Lambda with a IAM Role:
import boto3
from botocore.client import Config
from botocore.exceptions import ClientError
def test_bucket(bucket):
print 'testing bucket: ' + bucket
try:
s3 = boto3.resource('s3', config=Config(signature_version='s3v4'))
b = s3.Bucket(bucket)
objects = b.objects.all()
for obj in objects:
print obj.key
print 'bucket test SUCCESS'
except ClientError as e:
print 'Client Error'
print e
print 'bucket test FAIL'
To test it, simply call the method with a bucket name. Your role will have to grant proper permissions.
Using a resource worked for me.
from botocore.client import Config
import boto3
s3 = boto3.resource("s3", config=Config(signature_version="s3v4"))
return s3.meta.client.generate_presigned_url(
"get_object", Params={"Bucket": AIRFLOW_BUCKET, "Key": key}, ExpiresIn=expTime
)