Basic lambda function trying to get contents of the bucket but getting errors though
import json
import urllib.parse
import boto3
print('Loading function')
s3 = boto3.client('s3')
def lambda_handler(event, context):
#print("Received event: " + json.dumps(event, indent=2))
# Get the object from the event and show its content type
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
try:
response = s3.get_object(Bucket=bucket, Key=key)
print("CONTENT TYPE: " + response['ContentType'])
return response['ContentType']
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
raise e
Here is the error message when i run the lambda function.
Error message
{
"errorMessage": "'Records'",
"errorType": "KeyError",
"requestId": "5c89bb8e-a70e-4c33-ba00-43174095544e",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 13, in lambda_handler\n bucket = event['Records'][0]['s3']['bucket']['name']\n"
]
}
Function Logs
START RequestId: 5c89bb8e-a70e-4c33-ba00-43174095544e Version: $LATEST
[ERROR] KeyError: 'Records'
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 13, in lambda_handler
bucket = event['Records'][0]['s3']['bucket']['name']
END RequestId: 5c89bb8e-a70e-4c33-ba00-43174095544e
REPORT RequestId: 5c89bb8e-a70e-4c33-ba00-43174095544e Duration: 1.89 ms Billed Duration: 2 ms Memory Size: 128 MB Max Memory Used: 69 MB Init Duration: 356.28 ms
The problem is that
bucket = event['Records'][0]['s3']['bucket']['name']
Doesn't exist. Check the event object when its been triggered from S3. If you want to test in console you need to pass a similarly shaped object as the event.
Related
I'm using the below Lambda code to read data from an S3 bucket that triggers the lambda function once a file is created into the S3 bucket.
import json
import urllib.parse
import boto3
print('Loading function')
s3 = boto3.client('s3')
def lambda_handler(event, context):
#print("Received event: " + json.dumps(event, indent=2))
# Get the object from the event and show its content type
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
print("bucket= ", bucket)
print("key= ", key)
try:
response = s3.get_object(Bucket=bucket, Key=key)
print("CONTENT TYPE: " + response['ContentType'])
return response['ContentType']
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
raise e
The uploaded file is a csv file that contains 50 comma delimited 50 records.
I need to stream this received data into an AWS Kinesis Data Stream called test-stream once the file is uploaded to the S3 bucket.. Any help please?
Thanks..
I have a lambda function that moves files from one s3 bucket to another :
import json
import boto3
from datetime import datetime, timedelta
def lambda_handler(event, context):
# TODO implement
SOURCE_BUCKET = 'source-bucket'
DESTINATION_BUCKET = 'destination-bucket'
s3_client = boto3.client('s3')
# Create a reusable Paginator
paginator = s3_client.get_paginator('list_objects_v2')
# Create a PageIterator from the Paginator
page_iterator = paginator.paginate(Bucket=SOURCE_BUCKET)
# Loop through each object, looking for ones older than a given time period
for page in page_iterator:
for object in page['Contents']:
if object['LastModified'] < datetime.now().astimezone() - timedelta(hours=1): # <-- Change time period here
print(f"Moving {object['Key']}")
# Copy object
s3_client.copy_object(
ACL='bucket-owner-full-control',
Bucket=DESTINATION_BUCKET,
Key=object['Key'],
CopySource={'Bucket':SOURCE_BUCKET, 'Key':object['Key']}
)
# Delete original object
s3_client.delete_object(Bucket=SOURCE_BUCKET, Key=object['Key'])
I am getting error :
Response:
{
"errorMessage": "'Contents'",
"errorType": "KeyError",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 21, in lambda_handler\n for object in page['Contents']:\n"
]
}
Request ID:
"518e0f39-63e4-43df-842d-b73d56f83cd8"
Function Logs:
START RequestId: 518e0f39-63e4-43df-842d-b73d56f83cd8 Version: $LATEST
[ERROR] KeyError: 'Contents'
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 21, in lambda_handler
for object in page['Contents']:END RequestId: 518e0f39-63e4-43df-842d-b73d56f83cd8
REPORT RequestId: 518e0f39-63e4-43df-842d-b73d56f83cd8 Duration: 1611.00 ms Billed Duration: 1700 ms Memory Size: 128 MB Max Memory Used: 76 MB Init Duration: 248.12 ms
can someone help here. It has moved all the files but still giving me error.
This is assuming that the key Contents is always returned. If there are not objects in the bucket this will not exist.
Add a simple if "Contents" in page to handle it not always existing.
So your function code might look like
import json
import boto3
from datetime import datetime, timedelta
def lambda_handler(event, context):
# TODO implement
SOURCE_BUCKET = 'source-bucket'
DESTINATION_BUCKET = 'destination-bucket'
s3_client = boto3.client('s3')
# Create a reusable Paginator
paginator = s3_client.get_paginator('list_objects_v2')
# Create a PageIterator from the Paginator
page_iterator = paginator.paginate(Bucket=SOURCE_BUCKET)
# Loop through each object, looking for ones older than a given time period
for page in page_iterator:
if "Contents" in page:
for object in page['Contents']:
if object['LastModified'] < datetime.now().astimezone() - timedelta(hours=1): # <-- Change time period here
print(f"Moving {object['Key']}")
# Copy object
s3_client.copy_object(
ACL='bucket-owner-full-control',
Bucket=DESTINATION_BUCKET,
Key=object['Key'],
CopySource={'Bucket':SOURCE_BUCKET, 'Key':object['Key']}
)
# Delete original object
s3_client.delete_object(Bucket=SOURCE_BUCKET, Key=object['Key'])
else:
print("No Contents key for page!")
i m trying to create a simple event driven AWS Lambda Python function to extract a ZIP or GZIP attachment from an email stored in S3 by another service (such as Amazon SES).
from __future__ import print_function
import email
import zipfile
import os
import gzip
import string
import boto3
import urllib
print('Loading function')
s3 = boto3.client('s3')
s3r = boto3.resource('s3')
xmlDir = "/tmp/output/"
outputBucket = "" # Set here for a seperate bucket otherwise it is set to the events bucket
outputPrefix = "xml/" # Should end with /
def lambda_handler(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key']).decode('utf8')
try:
# Set outputBucket if required
if not outputBucket:
global outputBucket
outputBucket = bucket
# Use waiter to ensure the file is persisted
waiter = s3.get_waiter('object_exists')
waiter.wait(Bucket=bucket, Key=key)
response = s3r.Bucket(bucket).Object(key)
# Read the raw text file into a Email Object
msg = email.message_from_string(response.get()["Body"].read())
if len(msg.get_payload()) == 2:
# Create directory for XML files (makes debugging easier)
if os.path.isdir(xmlDir) == False:
os.mkdir(xmlDir)
# The first attachment
attachment = msg.get_payload()[1]
# Extract the attachment into /tmp/output
extract_attachment(attachment)
# Upload the XML files to S3
upload_resulting_files_to_s3()
else:
print("Could not see file/attachment.")
return 0
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist '
'and your bucket is in the same region as this '
'function.'.format(key, bucket))
raise e
def extract_attachment(attachment):
# Process filename.zip attachments
if "gzip" in attachment.get_content_type():
contentdisp = string.split(attachment.get('Content-Disposition'), '=')
fname = contentdisp[1].replace('\"', '')
open('/tmp/' + contentdisp[1], 'wb').write(attachment.get_payload(decode=True))
# This assumes we have filename.xml.gz, if we get this wrong, we will just
# ignore the report
xmlname = fname[:-3]
open(xmlDir + xmlname, 'wb').write(gzip.open('/tmp/' + contentdisp[1], 'rb').read())
# Process filename.xml.gz attachments (Providers not complying to standards)
elif "zip" in attachment.get_content_type():
open('/tmp/attachment.zip', 'wb').write(attachment.get_payload(decode=True))
with zipfile.ZipFile('/tmp/attachment.zip', "r") as z:
z.extractall(xmlDir)
else:
print('Skipping ' + attachment.get_content_type())
def upload_resulting_files_to_s3():
# Put all XML back into S3 (Covers non-compliant cases if a ZIP contains multiple results)
for fileName in os.listdir(xmlDir):
if fileName.endswith(".xml"):
print("Uploading: " + fileName) # File name to upload
s3r.meta.client.upload_file(xmlDir+'/'+fileName, outputBucket, outputPrefix+fileName)
on running the function i m getting this error
'Records': KeyError
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 25, in lambda_handler
for record in event["Records"]:
KeyError: 'Records'
i tried googling and found few telling me to add Mapping Template --https://intellipaat.com/community/18329/keyerror-records-in-aws-s3-lambda-trigger ,
"KeyError: 'Records'" in AWS S3 - Lambda trigger,
following this link but i m getting some other error
'query': KeyError
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 24, in lambda_handler
for record in event['query']['Records']:
KeyError: 'query'
When trying to create a function lambda in AWS in order to start instance automatically, this is the function:
import boto3
region = 'us-east-1'
instances = ['i-xxx']
ec2 = boto3.client('ec2', region_name=region)
def lambda_handler(event, context):
ec2.stop_instances(InstanceIds=instances)
print('stopped your instances: ' + str(instances))
and after Save and Test - I got this error:
Response:
{
"errorMessage": "2019-09-15T09:54:06.364Z 372c2df4-1303-4326-b882-a04154007881 Task timed out after 3.00 seconds"
}
Request ID:
"372c2df4-1303-4326-b882-a04154007881"
Function Logs:
START RequestId: 372c2df4-1303-4326-b882-a04154007881 Version: $LATEST
END RequestId: 372c2df4-1303-4326-b882-a04154007881
REPORT RequestId: 372c2df4-1303-4326-b882-a04154007881 Duration: 3003.17 ms Billed Duration: 3000 ms Memory Size: 128 MB Max Memory Used: 81 MB Init Duration: 115.73 ms
XRAY TraceId: 1-5d7e0a3b-79a0391249fcda644105b8ba SegmentId: 0eefbaed756a35c4 Sampled: false
2019-09-15T09:54:06.364Z 372c2df4-1303-4326-b882-a04154007881 Task timed out after 3.00 seconds
Check if you have set the aws lambda timeout to some appropriate value, because it seems to be on the default value which is 3 sec and looks like it not going to be sufficient for you.
Timeout – The amount of time that Lambda allows a function to run before stopping it. The default is 3 seconds. The maximum allowed value is 900 seconds.aws docs
To start and stop the instance boto3 docs
import sys
import boto3
from botocore.exceptions import ClientError
instance_id = sys.argv[2]
action = sys.argv[1].upper()
ec2 = boto3.client('ec2')
if action == 'ON':
# Do a dryrun first to verify permissions
try:
ec2.start_instances(InstanceIds=[instance_id], DryRun=True)
except ClientError as e:
if 'DryRunOperation' not in str(e):
raise
# Dry run succeeded, run start_instances without dryrun
try:
response = ec2.start_instances(InstanceIds=[instance_id], DryRun=False)
print(response)
except ClientError as e:
print(e)
else:
# Do a dryrun first to verify permissions
try:
ec2.stop_instances(InstanceIds=[instance_id], DryRun=True)
except ClientError as e:
if 'DryRunOperation' not in str(e):
raise
# Dry run succeeded, call stop_instances without dryrun
try:
response = ec2.stop_instances(InstanceIds=[instance_id], DryRun=False)
print(response)
except ClientError as e:
print(e)
I'm trying to index pdf documents that are uploaded to s3 bucket. My lambda function is working fine til PDF extraction part. it's establishing connection with elastic search endpoint and while uploading data elastic search for indexing, it's throwing error. Please find lambda function code below. Please help me with this. Thanks in advance.
from __future__ import print_function
import json
import urllib
import boto3
import slate
import elasticsearch
import datetime
es_endpoint = 'search-sdjsf-zrtisx]sdaswasfsjmtsyuih3awvu.us-east-
1.es.amazonaws.com'
es_index = 'pdf_text_extracts'
es_type = 'document'
print('Loading function')
s3 = boto3.client('s3')
# prepare a dict to hold our document data
doc_data = {}
doc_data['insert_time'] =
str(datetime.datetime.isoformat(datetime.datetime.now()))
def lambda_handler(event, context):
#print("Received event: " + json.dumps(event, indent=2))
# Get the object from the event and show its content type
bucket = event['Records'][0]['s3']['bucket']['name']
object_key = urllib.unquote_plus(event['Records'][0]['s3']['object']
['key']).decode('utf8')
try:
# get the file data from s3
temp_pdf_file = open('/tmp/tempfile.pdf', 'w')
response = s3.get_object(Bucket=bucket, Key=object_key)
print("CONTENT TYPE: " + response['ContentType'])
# return response['ContentType']
temp_pdf_file.write(response['Body'].read()) # write the object data
to a local file; will be passed to slate
temp_pdf_file.close() # close the temporary file for now
# pull the text from the temporary PDF file using slate
print("Extracting data from: " + object_key)
with open('/tmp/tempfile.pdf') as temp_pdf_file:
doc = slate.PDF(temp_pdf_file)
# store document data to dict
doc_data['source_pdf_name'] = object_key
doc_data['document_text'] = doc[0] # we're only worried about page 1
at this point
#datj=json.dumps(doc_data)
#z=json.loads(datj)
#print(z)
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist
and your bucket is in the same region as this
function.'.format(object_key, bucket))
raise e
# put the data in ES
#try:
es = elasticsearch.Elasticsearch([{'host': es_endpoint, 'port': 443,
'use_ssl': True}]) # hold off on validating certs
es_response = es.index(index=es_index, doc_type=es_type, body=doc_data)
print('Data posted to ES: ' + str(es_response))
#except Exception as e:
#print('Data post to ES failed: ' + str(e))
#raise e
return "Done"
I have removed try and except in last block to find the actual error and its throwing the below error while trying to upload data to elastic search.
Traceback (most recent call last):
File "/var/runtime/awslambda/bootstrap.py", line 576, in <module>
main()
File "/var/runtime/awslambda/bootstrap.py", line 571, in main
handle_event_request(request_handler, invokeid, event_body, context_objs,
invoked_function_arn)
File "/var/runtime/awslambda/bootstrap.py", line 264, in
handle_event_request
result = report_fault_helper(invokeid, sys.exc_info(), None)
File "/var/runtime/awslambda/bootstrap.py", line 315, in report_fault_helper
msgs = [str(value), etype.__name__]
Remove the return "Done" at the end, that's not allowed in a Lambda environment.