AWS Lambda error No module named 'StringIO' or No module named 'StringIO' - amazon-web-services

I try to use AWS Lambda for mass email sending, the code we use as the link below:
https://aws.amazon.com/cn/premiumsupport/knowledge-center/mass-email-ses-lambda/
from __future__ import print_function
import StringIO
import csv
import json
import os
import urllib
import zlib
from time import strftime, gmtime
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import boto3
import botocore
import concurrent.futures
__author__ = 'Said Ali Samed'
__date__ = '10/04/2016'
__version__ = '1.0'
# Get Lambda environment variables
region = os.environ['us-east-1']
max_threads = os.environ['10']
text_message_file = os.environ['email_body.txt']
html_message_file = os.environ['email_body.html']
# Initialize clients
s3 = boto3.client('s3', region_name=region)
ses = boto3.client('ses', region_name=region)
send_errors = []
mime_message_text = ''
mime_message_html = ''
def current_time():
return strftime("%Y-%m-%d %H:%M:%S UTC", gmtime())
def mime_email(subject, from_address, to_address, text_message=None, html_message=None):
msg = MIMEMultipart('alternative')
msg['Subject'] = subject
msg['From'] = from_address
msg['To'] = to_address
if text_message:
msg.attach(MIMEText(text_message, 'plain'))
if html_message:
msg.attach(MIMEText(html_message, 'html'))
return msg.as_string()
def send_mail(from_address, to_address, message):
global send_errors
try:
response = ses.send_raw_email(
Source=from_address,
Destinations=[
to_address,
],
RawMessage={
'Data': message
}
)
if not isinstance(response, dict): # log failed requests only
send_errors.append('%s, %s, %s' % (current_time(), to_address, response))
except botocore.exceptions.ClientError as e:
send_errors.append('%s, %s, %s, %s' %
(current_time(),
to_address,
', '.join("%s=%r" % (k, v) for (k, v) in e.response['ResponseMetadata'].iteritems()),
e.message))
def lambda_handler(event, context):
global send_errors
global mime_message_text
global mime_message_html
try:
# Read the uploaded csv file from the bucket into python dictionary list
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key']).decode('utf8')
response = s3.get_object(Bucket=bucket, Key=key)
body = zlib.decompress(response['Body'].read(), 16+zlib.MAX_WBITS)
reader = csv.DictReader(StringIO.StringIO(body),
fieldnames=['from_address', 'to_address', 'subject', 'message'])
# Read the message files
try:
response = s3.get_object(Bucket=bucket, Key=text_message_file)
mime_message_text = response['Body'].read()
except:
mime_message_text = None
print('Failed to read text message file. Did you upload %s?' % text_message_file)
try:
response = s3.get_object(Bucket=bucket, Key=html_message_file)
mime_message_html = response['Body'].read()
except:
mime_message_html = None
print('Failed to read html message file. Did you upload %s?' % html_message_file)
if not mime_message_text and not mime_message_html:
raise ValueError('Cannot continue without a text or html message file.')
# Send in parallel using several threads
e = concurrent.futures.ThreadPoolExecutor(max_workers=max_threads)
for row in reader:
from_address = row['from_address'].strip()
to_address = row['to_address'].strip()
subject = row['subject'].strip()
message = mime_email(subject, from_address, to_address, mime_message_text, mime_message_html)
e.submit(send_mail, from_address, to_address, message)
e.shutdown()
except Exception as e:
print(e.message + ' Aborting...')
raise e
print('Send email complete.')
# Remove the uploaded csv file
try:
response = s3.delete_object(Bucket=bucket, Key=key)
if 'ResponseMetadata' in response.keys() and response['ResponseMetadata']['HTTPStatusCode'] == 204:
print('Removed s3://%s/%s' % (bucket, key))
except Exception as e:
print(e)
# Upload errors if any to S3
if len(send_errors) > 0:
try:
result_data = '\n'.join(send_errors)
logfile_key = key.replace('.csv.gz', '') + '_error.log'
response = s3.put_object(Bucket=bucket, Key=logfile_key, Body=result_data)
if 'ResponseMetadata' in response.keys() and response['ResponseMetadata']['HTTPStatusCode'] == 200:
print('Send email errors saved in s3://%s/%s' % (bucket, logfile_key))
except Exception as e:
print(e)
raise e
# Reset publish error log
send_errors = []
if __name__ == "__main__":
json_content = json.loads(open('event.json', 'r').read())
lambda_handler(json_content, None)
but it has problem when i choose python 2.7.the error is
module initialization error 'us-east-1'
when i choose python 3.6 the error is
Unable to import module 'lambda_function': No module named 'StringIO'
anyone can tell me what is the problem it is ?

From Python v3, the StringIO module has gone. Instead, import the io module and use io.StringIO.
The problem with the v27 version is presumably that the following statement is failing:
region = os.environ['us-east-1']
This will result in a KeyError if us-east-1 is not an available environment variable. Instead use AWS_REGION or AWS_DEFAULT_REGION. See the full list of Lambda environment variables.

Please set the environment variables as described in step 4 of the article:
"Configure Lambda environment variables appropriate to your usage scenario. For example, the following variables would be valid for a given use case:
REGION=us-east-1, MAX_THREADS=10, TEXT_MESSAGE_FILE=email_body.txt, HTML_MESSAGE_FILE=email_body.html."
What was done (as per the code provided in the question) is replacing names of environment variables with their values, which means that python is looking for e.g. 'us-east-1' environment variable which isn't there...
This is the original code
# Get Lambda environment variables
region = os.environ['REGION']
max_threads = os.environ['MAX_THREADS']
text_message_file = os.environ['TEXT_MESSAGE_FILE']
html_message_file = os.environ['HTML_MESSAGE_FILE']
You can also hard-code the values, like below:
# Get Lambda environment variables
region = 'us-east-1'
max_threads = '10'
text_message_file = 'email_body.txt'
html_message_file = 'email_body.html'
but I'd suggest to set the environment variables instead (and use the version of script provided by the article author). When it comes to setting environment variables in Lambda, see this article :)

Related

Lambda task timeout but no application log

I have a python lambda that triggers by S3 uploads to a specific folder. The lambda function is to process the uploaded file and outputs it to another folder on the same S3 bucket.
The issue is that when I do a bulk upload using AWS console, some files do not get processed. I ended up setting a dead letter queue to catch these invocations. While inspecting the message in the queue, there is a request ID which I tried to find it in the lambda logs.
These are the logs for the request ID:
Now the odd part is that in the python code, the first line after the imports is print('Loading function') which does not show up in the lambda log?
Added the python code here. It should still print the Processing file name: " + key which is inside the handler ya?
import urllib.parse
from datetime import datetime
import boto3
from constants import CONTENT_TYPE, XML_EXTENSION, VALIDATING
from xml_process import *
from s3Integration import download_file
print('Loading function')
s3 = boto3.client('s3')
def lambda_handler(event, context):
# Get the object from the event and show its content type
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
print("Processing file name: " + key)
try:
response = s3.get_object(Bucket=bucket, Key=key)
xml_content = response["Body"].read()
content_type = response["ContentType"]
tree = ET.fromstring(xml_content)
key_file_name = key.split("/")[1]
# Creating a temporary copy by downloading file to get the namespaces
temp_file_name = "/tmp/" + key_file_name
download_file(key, temp_file_name)
namespaces = {node[0]: node[1] for _, node in ET.iterparse(temp_file_name, events=['start-ns'])}
for name, value in namespaces.items():
ET.register_namespace(name, value)
# Preparing path for file processing
processed_file = key_file_name.split(".")[0] + "_processed." + key_file_name.split(".")[1]
print(processed_file, "processed")
db_record = XMLMapping(file_path=key,
processed_file_path=processed_file,
uploaded_by="lambda",
status=VALIDATING, uploaded_date=datetime.now(), is_active=True)
session.add(db_record)
session.commit()
if key_file_name.split(".")[1] == XML_EXTENSION:
if content_type in CONTENT_TYPE:
xml_parse(tree, db_record, processed_file, True)
else:
print("Content Type is not valid. Provided value: ", content_type)
else:
print("File extension is not valid. Provided extension: ", key_file_name.split(".")[1])
return "success"
except Exception as e:
print(e)
raise e
I don't think its a permission issue as other files uploaded in the same batch were processed successfully.

AWS lambda extensions not sharing environment variables

The following is a secrets-wrapper.py lambda extension:
#!/usr/bin/env python3
import boto3
import json
import os
LAMBDA_WRAPPER_NAME = "secrets-wrapper"
def debug_print(message):
print(message, flush=True)
def error_print(message):
print(message, flush=True)
def load_secrets():
if not 'APP_PARAMS_PATH' in os.environ:
error_print(
f"{LAMBDA_WRAPPER_NAME} unable to load secrets APP_PARAMS_PATH not found")
return
app_config_path = os.environ['APP_PARAMS_PATH']
if 'APP_ENV' in os.environ:
env = os.environ['APP_ENV']
full_config_path = '/' + env + '/' + app_config_path
else:
full_config_path = '/' + app_config_path
debug_print(
f"{LAMBDA_WRAPPER_NAME} Loading secrets for path {full_config_path}")
try:
client = boto3.client('ssm')
# Get all parameters for this app
param_details = client.get_parameters_by_path(
Path=full_config_path,
Recursive=False,
WithDecryption=True
)
# Loop through the returned parameters and populate the ConfigParser
if 'Parameters' in param_details:
for param in param_details.get('Parameters'):
debug_print(f"Found param: {param}")
param_name = param.get('Name').split("/")[-1]
debug_print(f"Name: {param_name}")
param_value = param.get('Value')
debug_print(f"Value: {param_value}")
os.environ[param_name] = param_value
except:
debug_print(f"{LAMBDA_WRAPPER_NAME} could not load from SSM")
raise Exception(f"{LAMBDA_WRAPPER_NAME} could not load from SSM")
def main():
load_secrets()
# Start the function
args = os.sys.argv[1:]
os.system(" ".join(args))
if __name__ == "__main__":
main()
and this is the function:
import json
import os
def lambda_handler(event, context):
# TODO implement
print(f"ENV foo check: {`os.environ.get('foo')`}", flush=True)
return {
'statusCode': 200,
'body': json.dumps('Hello from Lambda!')
}
when setting the APP_ENV to the lambda function:
aws lambda update-function-configuration --function-name my-function-example \
--environment "Variables={APP_NAME=myApp}"
within the lambda-extension it fetches no problem:
env = os.environ['APP_ENV'] #dev
However, when setting the environment variables within the extension os.environ[param_name] = param_value
when fetching it back in the invoked function os.environ.get('foo')
it is returned as None as if it was not set. If I output the set environment variable anywhere in the extension it will be indeed set.
Question is:
is the environment variable strictly one direction?
if not, how can I share ENV variables from extension to runtime function?

IICS taskflow - Notification task - attach file

I have an IICS taskflow with a mapping task and a notification task. The target of the mapping task is a csv stored in a server location.
With the notification task, I want to send an email with the csv attached. Do you know if this is possible or if is there another way to get to send the target csv by email??
Not the ideal solution, but you can write a simple python program to send the csv as an email, just provide the path of the csv to python program. and then, execute the python script from informatica as a command task. (IF Success) it is super easy to use a python script.
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
import os
import shutil
def send_mail(body_text, fromaddr, recipient_list, smtp_login, smtp_pass, file_path):
msg = MIMEMultipart()
msg['From'] = fromaddr
msg['To'] = ', '.join(recipient_list)
msg['Subject'] = 'your Subject variable'
msg.attach(MIMEText(body_text, 'plain'))
filename = os.path.basename(file_path)
attachment = open(file_path, "rb")
part = MIMEBase('multipart', 'mixed; name=%s' % filename)
part.set_payload(attachment.read())
encoders.encode_base64(part)
part.add_header('Content-Disposition', "attachment; filename= %s" % filename)
msg.attach(part)
server = smtplib.SMTP(host="your.mailserver.com", port=123)
# eg smtp.google.com
server.starttls()
server.login(smtp_login, smtp_pass)
text = msg.as_string()
server.set_debuglevel(1)
server.sendmail(fromaddr, recipient_list, text)
server.quit()
mainDir = '/path/to/file'
sendFileName = 'yourfilename' + '.csv'
sourceFilePath = mainDir + 'file_send/filename.csv'
body_text = '''
Dear All,
Please find attached document
Thank You.
'''
smtp_login = "emailusername"
smtp_pass = "emaipassword"
recipient_list = ['abc#company.com', 'def#company.com']
file_path = os.path.abspath(sendFilePath)
fromaddr = 'emailusername#company.com'
send_mail(body_text=body_text, fromaddr=fromaddr, recipient_list=recipient_list, smtp_login=smtp_login, smtp_pass=smtp_pass,
file_path=file_path)
This code may needs some modifications but thought it might help someone.

Get information about a file uploaded to S3

i have created a lambda function that sends emails whenever a file is uploaded on s3 bucket, but now i want to have all the informations related to that file as the name, size, date and time of upload, and if it's possible where it comes from.
I have all this infortmation on aws console, but want to have it in the email body.
i am using serverless framework. v 1.22.0
here is my code
import json
import boto3
import botocore
import logging
import sys
import os
import traceback
from botocore.exceptions import ClientError
from pprint import pprint
from time import strftime, gmtime
email_from = '********#*****.com'
email_to = '********#*****.com'
email_subject = 'new event on s3 '
email_body = 'a new file is uploaded'
#setup simple logging for INFO
logger = logging.getLogger()
logger.setLevel(logging.INFO)
from botocore.exceptions import ClientError
def sthree(event, context):
"""Send email whenever a file is uploaded to S3"""
body = {}
status_code = 200
email_body = str(context)
try:
s3 = boto3.client('s3')
ses = boto3.client('ses')
ses.send_email(Source = email_from,
Destination = {'ToAddresses': [email_to,],},
Message = {'Subject': {'Data': email_subject}, 'Body':{'Text' : {'Data': email_body}}}
)
except Exception as e:
print(traceback.format_exc())
status_code = 500
body["message"] = json.dumps(e)
response = {
"statusCode": 200,
"body": json.dumps(body)
}
return response
Here is the event json structure sent by S3 upon object creation:
http://docs.aws.amazon.com/AmazonS3/latest/dev/notification-content-structure.html
You can get the file names, sizes and source ip like this:
for record in event['Records']:
filename = record['s3']['object']['key'];
filesize = record['s3']['object']['size'];
source = record['requestParameters']['sourceIPAddress'];
eventTime = record['eventTime'];
def lambda_handler(event, context):
s3 = boto3.client('s3')
email_from = 'XXXXXXXXX#XXX.com'
email_to = 'XXXXXXXXX#XXX.com'
email_subject = 'new event on s3'
email_body = "File Name :" + event[u'Records'][0][u's3'][u'object'][u'key'] + "\n" + "File Size :" + str(event[u'Records'][0][u's3'][u'object'][u'size']) + "\n" + "Upload Time :" + event[u'Records'][0][u'eventTime'] + "\n" + "User Details :" + event[u'Records'][0][u'userIdentity'][u'principalId']
ses = boto3.client('ses')
ses.send_email(Source = email_from,
Destination = {'ToAddresses': [email_to,],},
Message = {'Subject': {'Data': email_subject}, 'Body':{'Text' : {'Data': email_body}}}
)
print("Function execution Completed !!!")

Alarm clock that interacted with google calendar API

I am looking for some help on an alarm clock that interacted with google calendar.
I have some problem with the code now where is not pulling down the events.
here is the errore i get now:
INFO:main:Polling calendar for events...
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/calendar/v3/calendars/primary/events?alt=json&singleEvents=true
INFO:main:Polling calendar for events...
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/calendar/v3/calendars/primary/events?alt=json&singleEvents=true
Process finished with exit code -1
# Inspired from 'Raspberry Pi as a Google Calender Alarm Clock'
# http://www.esologic.com/?p=634
#and this link as well https://github.com/ehamiter/get-on-the-bus
from datetime import datetime
import logging, os, platform, re, time
from apiclient import discovery
import httplib2
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
from oauth2client.tools import run_flow
SCOPES = 'https://www.googleapis.com/auth/calendar.readonly'
CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'Google Calendar API Python Quickstart'
FREQUENCY_CHECK = 5 # in second
MP3_FOLDER = 'E:\Users\Andrew.Price\PycharmProjects\SimpleAlarm\MP3'
CALENDAR_ID ='primary'
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class Alarm():
system = platform.system().lower()
flow = flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
flow.params['access_type'] = 'offline'
flow.params['approval_prompt'] = 'force'
storage = Storage('calendar.dat')
credentials = storage.get()
if credentials is None or credentials.invalid == True:
credentials = run_flow(flow, storage)
http = httplib2.Http()
http = credentials.authorize(http)
service = discovery.build('calendar', 'v3', http=http)
#service = build(serviceName='calendar', version='v3', http=http, developerKey=API_KEY)
def check_credentials(self):
if self.credentials is None or self.credentials.invalid == True:
credentials = run_flow(self.flow, self.storage)
def calendar_event_query(self):
self.check_credentials()
today = datetime.today()
events = self.service.events().list(singleEvents=True, calendarId=CALENDAR_ID).execute()
#events = self.service.events().list(singleEvents=True).execute()
for i, event in enumerate(events['items']):
name = event['summary'].lower()
try:
start = event['start']['dateTime'][:-9]
except KeyError:
start = ''
description = event.get('description', '')
repeat = True if description.lower() == 'repeat' else False
now = today.strftime('%Y-%m-%dT%H:%M')
if start >= now:
logger.debug('Event #%s, Name: %s, Start: %s', i, name, start)
if start == now:
if name.startswith('say'):
name = re.sub(r'[^a-zA-Z0-9\s\']', '', name)
command = '{0} "{1}"'.format('say' if system == 'darwin' else 'espeak -ven+m2', name[4:])
logger.info('Event starting. Announcing \'%s\'...', name[4:])
else:
mp3_files = os.listdir(MP3_FOLDER)
mp3_name = name.replace(' ', '_') + '.mp3'
mp3_name = mp3_name if mp3_name in mp3_files else 'default.mp3'
command = 'mpg123 \'{}/{}\''.format(MP3_FOLDER, mp3_name)
logger.info('Event %s starting. Playing mp3 file %s...', name, mp3_name)
os.system(command)
if repeat == False:
time.sleep(60)
def poll(self):
logger.info('Polling calendar for events...')
self.calendar_event_query()
while True:
a = Alarm()
a.poll()
time.sleep(FREQUENCY_CHECK)
I have changed the code and got it to work using this code from Matt http://mattdyson.org/projects/alarmpi/#comment-20249
This is the code that I have changed. It is not pretty yet just works for now
from __future__ import print_function
import pytz
import dateutil.parser
import httplib2
from oauth2client import tools
from oauth2client import client
import datetime
import logging
from googleapiclient.discovery import build
from apiclient import discovery
from oauth2client.file import Storage
import Settings
import os
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
log = logging.getLogger('root')
# If modifying these scopes, delete your previously saved credentials
# at ~/.credentials/calendar-python-quickstart.json
SCOPES = 'https://www.googleapis.com/auth/calendar.readonly'
CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'Smart-Alarm'
class AlarmGatherer:
def __init__(self):
#home_dir = os.path.expanduser('~')
#credential_dir = os.path.join(home_dir, 'calendar.dat')
#if not os.path.exists(credential_dir):
# os.makedirs(credential_dir)
#credential_path = os.path.join(credential_dir, 'client_secret.json')
SCOPES = 'https://www.googleapis.com/auth/calendar.readonly'
CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'Smart-Alarm'
self.FLOW = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
self.storage = Storage('calendar.dat')
self.credentials = self.storage.get()
if not self.checkCredentials():
log.error("GCal credentials have expired")
log.warn("Remove calendar.dat and run 'python AlarmGatherer.py' to fix")
return
http = httplib2.Http()
http = self.credentials.authorize(http)
self.service = build('calendar', 'v3', http=http)
def checkCredentials(self):
return not (self.credentials is None or self.credentials.invalid == True)
def generateAuth(self):
self.credentials = tools.run_flow(self.FLOW, self.storage)
def getNextEvent(self, today=False):
log.debug("Fetching details of next event")
if not self.checkCredentials():
log.error("GCal credentials have expired")
log.warn("Remove calendar.dat and run 'python AlarmGatherer.py' to fix")
raise Exception("GCal credentials not authorized")
#time = datetime.datetime.utcnow().isoformat() + 'Z' # 'Z' indicates UTC
time = datetime.datetime.now()
if not today:
# We want to find events tomorrow, rather than another one today
log.debug("Skipping events from today")
#time += datetime.timedelta(days=1) # Move to tomorrow
time = time.replace(hour=10, minute=0, second=0, microsecond=0) # Reset to 10am the next day
# 10am is late enough that a night shift from today won't be caught, but a morning shift
# from tomorrow will be caught
result = self.service.events().list(
calendarId='primary',
timeMin="%sZ" % (time.isoformat()),
maxResults=1,
singleEvents=True,
orderBy='startTime'
).execute()
events = result.get('items', [])
return events[0]
def getNextEventTime(self, includeToday=False):
log.debug("Fetching next event time (including today=%s)" % (includeToday))
nextEvent = self.getNextEvent(today=includeToday)
start = dateutil.parser.parse(nextEvent['start']['dateTime'])
# start = dateutil.parser.parse(nextEvent['start']['dateTime'],ignoretz=True)
# start = start.replace(tzinfo=pytz.timezone('Africa/Johannesburg'))
return start
def getNextEventLocation(self, includeToday=False):
log.debug("Fetching next event location (including today=%s)" % (includeToday))
nextEvent = self.getNextEvent(today=includeToday)
if (nextEvent['location']):
return nextEvent['location']
return None
def getDefaultAlarmTime(self):
defaultTime = ('0600')
#defaultTime = self.settings.getint('default_wake')
#defaultTime = self.settings.getint('default_wake')
defaultHour = int(defaultTime[:2])
defaultMin = int(defaultTime[2:])
alarm = datetime.datetime.now(pytz.timezone('Africa/Johannesburg'))
alarm += datetime.timedelta(days=1) # Move to tomorrow
alarm = alarm.replace(hour=defaultHour, minute=defaultMin, second=0, microsecond=0)
return alarm
if __name__ == '__main__':
print("Running credential check")
a = AlarmGatherer()
try:
if not a.checkCredentials():
raise Exception("Credential check failed")
except:
print("Credentials not correct, please generate new code")
a.generateAuth()
a = AlarmGatherer()
print(a.getNextEventTime())
print(a.getNextEventLocation())