Related
My lambda function is to spin up a transient EMR. I am getting the error below:
"errorMessage": "An error occurred (AccessDeniedException) when calling the RunJobFlow operation: User: arn:aws:sts::111111111115:assumed-role/lambda-eks-role/transient_job is not authorized to perform: elasticmapreduce:RunJobFlow on resource: arn:aws:elasticmapreduce:ap-southeast-1:111111111115:cluster/* because no identity-based policy allows the elasticmapreduce:RunJobFlow action",
The above is a result of the IAM role condition below:
{
"Sid": "RunJobFlowExplicitlyWithEMRManagedTag",
"Effect": "Allow",
"Action": [
"elasticmapreduce:RunJobFlow"
],
"Resource": "*",
"Condition": {
"StringEquals": {
"aws:RequestTag/for-use-with-amazon-emr-managed-policies": "true"
}
}
}
I was told to pass the above tag (i.e., "for-use-with-amazon-emr-managed-policies": "true") when I create my cluster. How do I do that? Every time I search for Lambda and RequestTag, I'm not getting anything relevant.
FYI, I have no privilege to change the IAM roles. I was told by the admin, and the exact words were: "Can you add this tag (for-use-with-amazon-emr-managed-policies": "true") to the cluster you are creating"
I believe I have to add the tag in the function launch_transient_emr() but I have no idea where exactly and how (and I'm still searching online for any relevant information). Any guidance is appreciated.
A snippet of my Lambda Code:
import json
import boto3
from datetime import datetime
import urllib.parse
### Steps Configs (under function 'get_emr_step')
TODAY_DATE = datetime.today().strftime("%Y%m%d") + datetime.today().strftime("%H%M%s")[:-3]
JOB_TYPE_MAPPING = {
'cowrie': {
'job-script-path': 's3://bucket-test-transient/transient-job-scripts/emr_type1_job.py',
'output_file_name': 'type1-results/'
},
'suricata': {
'job-script-path': 's3://bucket-test-transient/transient-job-scripts/emr_type2_job.py',
'output_file_name': 'type2-results/'
}
}
### EMR Job Running Configs (under function 'launch_transient_emr')
CLUSTER_NAME = 'transient_emr_cluster_'+TODAY_DATE # TODO: insert some cluster name
LOGURI = os.environ['LOGURI']
RELEASE_LABEL = os.environ['RELEASE_LABEL']
EBS_ROOT_VOLUME_SIZE = os.environ['EBS_ROOT_VOLUME_SIZE']
# Instance Variables
MASTER_INSTANCE_TYPE = os.environ['MASTER_INSTANCE_TYPE']
SLAVE_INSTANCE_TYPE = os.environ['SLAVE_INSTANCE_TYPE']
INSTANCE_COUNT = os.environ['INSTANCE_COUNT']
EC2_SUBNET_ID = os.environ['EC2_SUBNET_ID']
# Roles
JOB_FLOW_ROLE = os.environ['JOB_FLOW_ROLE']
SERVICE_ROLE = os.environ['SERVICE_ROLE']
# Bootstrap
BOOTSTRAP_PATH = os.environ['BOOTSTRAP_PATH']
# Output File Configs
OUTPUT_BUCKET_NAME = os.environ['OUTPUT_BUCKET_NAME']
def get_emr_step(job_type, source_bucket_name, source_key):
job_date = source_key.split("/")[1]
spark_steps = [
{
"Name": job_type+"-daily-job-"+job_date+"-"+TODAY_DATE,
"ActionOnFailure": "TERMINATE_CLUSTER",
"HadoopJarStep": {
"Jar": "command-runner.jar",
"Args": [
"sudo",
"spark-submit",
"--deploy-mode",
"client",
JOB_TYPE_MAPPING[job_type]["job-script-path"],
"--input_bucket_name",
source_bucket_name,
"--input_key_name",
source_key,
"--output_bucket_name",
OUTPUT_BUCKET_NAME,
"--output_file_name",
JOB_TYPE_MAPPING[job_type]["output_file_name"]
],
}
}
]
return spark_steps
def launch_transient_emr(spark_steps):
client = get_emr_client()
response = client.run_job_flow(
Name = CLUSTER_NAME,
LogUri = LOGURI,
ReleaseLabel = RELEASE_LABEL,
EbsRootVolumeSize = EBS_ROOT_VOLUME_SIZE,
Instances={
'MasterInstanceType': MASTER_INSTANCE_TYPE,
'SlaveInstanceType': SLAVE_INSTANCE_TYPE,
'InstanceCount': INSTANCE_COUNT,
'KeepJobFlowAliveWhenNoSteps': False,
'TerminationProtected': False,
'Ec2SubnetId': EC2_SUBNET_ID
},
Applications = [ {'Name': 'Spark'} ],
Configurations = [
{
'Classification': 'spark-hive-site',
'Properties': {
'hive.metastore.client.factory.class': 'com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory'}
},
{
"Classification": "spark",
"Properties": {
"maximizeResourceAllocation": "true"
}
},
{
"Classification": "spark-defaults",
"Properties": {
"spark.executorEnv.ARROW_PRE_0_15_IPC_FORMAT":"1",
"spark.network.timeout":"1500"
}
},
{
"Classification": "hdfs-site",
"Properties": {
"dfs.replication":"2"
}
},
{
"Classification": "livy-conf",
"Properties": {
"livy.server.session.timeout": "10h"
}
},
{
"Classification": "emrfs-site",
"Properties": {
"fs.s3.maxConnections":"100"
}
}
],
VisibleToAllUsers = True,
JobFlowRole = JOB_FLOW_ROLE,
ServiceRole = SERVICE_ROLE,
Steps = spark_steps,
BootstrapActions = [
{
'Name': 'string',
'ScriptBootstrapAction': {
'Path': BOOTSTRAP_PATH
}
}
]
)
return response
def get_emr_client():
return boto3.client("emr")
def lambda_handler(event, context):
# Get the object from the event and show its content type
source_bucket_name = event['Records'][0]['s3']['bucket']['name']
source_key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
try:
job_type = 'type1' if 'type1' in source_key else 'type2'
spark_steps = get_emr_step(job_type, source_bucket_name, source_key)
response = launch_transient_emr(spark_steps)
return {"status" :"Successfully launched EMR cluster"}
except Exception as e:
print(e)
raise e
Take a look here https://docs.aws.amazon.com/emr/latest/APIReference/API_RunJobFlow.html
Focus now on this image
The key you need is for-use-with-amazon-emr-managed-policies and the value is true. You can follow the same approach you did for the steps. Your admin is right.
I am creating an API to make GET and POST request to a table in DynamoDB.
I deployed it using serverless and received the endpoints for each API type.
But when testing it out with Postman I get the following error:
Bad request. We can't connect to the server for this app or website at this time. There might be too much traffic or a configuration error. Try again later, or contact the app or website owner.
If you provide content to customers through CloudFront, you can find steps to troubleshoot and help prevent this error by reviewing the CloudFront documentation.
Code for creating the data in the table:
const postsTable = process.env.POSTS_TABLE;
// Create a response
function response(statusCode, message) {
return {
statusCode: statusCode,
body: JSON.stringify(message)
};
}
// Create a post
module.exports.createPost = (event, context, callback) => {
const reqBody = JSON.parse(event.body);
if (
!reqBody.title ||
reqBody.title.trim() === "" ||
!reqBody.body ||
reqBody.body.trim() === ""
) {
return callback(
null,
response(400, {
error:
"Post must have a title and body and they must not be empty"
})
);
}
const post = {
id: uuidv4(),
createdAt: new Date().toISOString(),
userId: 1,
title: reqBody.title,
body: reqBody.body
};
return db
.put({
TableName: postsTable,
Item: post
})
.promise()
.then(() => {
callback(null, response(201, post));
})
.catch(err => response(null, response(err.statusCode, err)));
};
I managed to do it but did not use Serverless.
I set up Lambda functions to POST and GET the data from a url.
I think the issue previously was to do with the policies. This time when making the Lambda functions I set it as the following:
I clicked on "Create a new role from AWS policy templates" while creating an execution role for a new function, then selected "Simple microservice permissions" for Policy templates. This added Basic execution role policy and below DynamoDB permissions to the role for all the tables in the same region as the function :
"Action": [
"dynamodb:DeleteItem",
"dynamodb:GetItem",
"dynamodb:PutItem",
"dynamodb:Scan",
"dynamodb:UpdateItem"
]
Lambda function for POST request
exports.handler = async (event, context) => {
const ddb = new AWS.DynamoDB({ apiVersion: "2012-10-08" });
const documentClient = new AWS.DynamoDB.DocumentClient({
region: "ap-southeast-1"
});
let responseBody = "";
let statusCode = 0;
const {
deviceId,
batteryLevel,
eventId,
id,
location,
tags,
time
} = JSON.parse(event.body);
const params = {
TableName: "dashboard",
Item: {
batteryLevel: batteryLevel,
deviceId: deviceId,
eventId: eventId,
location: location,
tags: tags,
time: time
}
};
try {
const data = await documentClient.put(params).promise();
responseBody = JSON.stringify(data);
statusCode = 201;
} catch (err) {
responseBody = "Unable to POST data";
statusCode = 403;
}
const response = {
statusCode: statusCode,
headers: {
myHeader: "test"
},
body: responseBody
};
return response;
};
Other issues as well were with the method execution of the API I needed to set a custom model for the Request Body to match my data:
{
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "DashboardInputModel",
"type": "object",
"properties":
{
"batteryLevel": {"type": "string"},
"deviceId": {"type": "string"},
"eventId": {"type": "string"},
"id": {"type": "number"},
"location": {
"type": "object",
"properties":{
"accuracy": {"type": "number"},
"latitude": {"type": "number"},
"longitude": {"type": "number"}
}
},
"tags": {
"type": "array",
"items": {
"type": "object",
"properties": {
"accelX":{"type": "number"},
"accelY": {"type": "number"},
"accelZ": {"type": "number"},
"createDate": {"type": "string"},
"dataFormat":{"type": "number"},
"defaultBackground": {"type": "number"},
"favorite": {"type": "boolean"},
"humidity": {"type": "number"},
"id": {"type": "string"},
"measurementSequenceNumber": {"type": "number"},
"movementCounter": {"type": "number"},
"name": {"type": "string"},
"pressure": {"type": "number"},
"rssi": {"type": "number"},
"temperature": {"type": "number"},
"txPower":{"type": "number"},
"updateAt": {"type": "string"},
"voltage": {"type": "number"}
}
}
},
"time": {"type": "string"}
}
}
For each action I also enabled CORS and replaced the existing CORS headers.
These two videos explains the entire process much better than the documentation and I hope it helps.
Part 1
Part 2
By bad request do you mean Status Code 400? It could simply be that you are not correctly calling your API.
If you are getting a 403 then you need to pass through that you are authorised to access the resource you are trying to get. You can see how to do this through the AWS docs.
This page includes a link to an example.
List of error codes.
I have following boto3 script:
Import boto3
ChangeBatch={
'Changes': [
{
'Action': 'DELETE',
'ResourceRecordSet': {
'Name': 'test.example.com.',
'Region': 'us-west-1',
'SetIdentifier': 'test1',
'AliasTarget': {
'HostedZoneId': '**675',
'DNSName': 'testexample.example.com.',
'EvaluateTargetHealth': 'True'
},
'HealthCheckId': '**-**-**-675'
}
}
]
}
When I run the above code it does not delete anything. This is a latency based routing policy. Not sure what am I doing wrong I checked online and looked at aws documentation this is the suggested way to delete recordset.
I figured it out. It was missing type.
Import boto3
ChangeBatch={
'Changes': [
{
'Action': 'DELETE',
'ResourceRecordSet': {
'Name': 'test.example.com.',
'Region': 'us-west-1',
'Type': 'A'
'SetIdentifier': 'test1',
'AliasTarget': {
'HostedZoneId': '**675',
'DNSName': 'testexample.example.com.',
'EvaluateTargetHealth': 'True'
},
'HealthCheckId': '**-**-**-675'
}
}
]
}
When you create a new AWS EMR cluster through the AWS Management Console you're able to provide JSON Software Configurations. You can put the JSON file in an S3 bucket and point the Software Configurations to the S3 bucket via the following field,
I need to do this through the AWS Python SDK Boto3 library but I don't see where to do it at in the available fields in their example,
response = client.run_job_flow(
Name='string',
LogUri='string',
AdditionalInfo='string',
AmiVersion='string',
ReleaseLabel='string',
Instances={
'MasterInstanceType': 'string',
'SlaveInstanceType': 'string',
'InstanceCount': 123,
'InstanceGroups': [
{
'Name': 'string',
'Market': 'ON_DEMAND'|'SPOT',
'InstanceRole': 'MASTER'|'CORE'|'TASK',
'BidPrice': 'string',
'InstanceType': 'string',
'InstanceCount': 123,
'Configurations': [
{
'Classification': 'string',
'Configurations': {'... recursive ...'},
'Properties': {
'string': 'string'
}
},
],
'EbsConfiguration': {
'EbsBlockDeviceConfigs': [
{
'VolumeSpecification': {
'VolumeType': 'string',
'Iops': 123,
'SizeInGB': 123
},
'VolumesPerInstance': 123
},
],
'EbsOptimized': True|False
},
'AutoScalingPolicy': {
'Constraints': {
'MinCapacity': 123,
'MaxCapacity': 123
},
'Rules': [
{
'Name': 'string',
'Description': 'string',
'Action': {
'Market': 'ON_DEMAND'|'SPOT',
'SimpleScalingPolicyConfiguration': {
'AdjustmentType': 'CHANGE_IN_CAPACITY'|'PERCENT_CHANGE_IN_CAPACITY'|'EXACT_CAPACITY',
'ScalingAdjustment': 123,
'CoolDown': 123
}
},
'Trigger': {
'CloudWatchAlarmDefinition': {
'ComparisonOperator': 'GREATER_THAN_OR_EQUAL'|'GREATER_THAN'|'LESS_THAN'|'LESS_THAN_OR_EQUAL',
'EvaluationPeriods': 123,
'MetricName': 'string',
'Namespace': 'string',
'Period': 123,
'Statistic': 'SAMPLE_COUNT'|'AVERAGE'|'SUM'|'MINIMUM'|'MAXIMUM',
'Threshold': 123.0,
'Unit': 'NONE'|'SECONDS'|'MICRO_SECONDS'|'MILLI_SECONDS'|'BYTES'|'KILO_BYTES'|'MEGA_BYTES'|'GIGA_BYTES'|'TERA_BYTES'|'BITS'|'KILO_BITS'|'MEGA_BITS'|'GIGA_BITS'|'TERA_BITS'|'PERCENT'|'COUNT'|'BYTES_PER_SECOND'|'KILO_BYTES_PER_SECOND'|'MEGA_BYTES_PER_SECOND'|'GIGA_BYTES_PER_SECOND'|'TERA_BYTES_PER_SECOND'|'BITS_PER_SECOND'|'KILO_BITS_PER_SECOND'|'MEGA_BITS_PER_SECOND'|'GIGA_BITS_PER_SECOND'|'TERA_BITS_PER_SECOND'|'COUNT_PER_SECOND',
'Dimensions': [
{
'Key': 'string',
'Value': 'string'
},
]
}
}
},
]
}
},
],
'InstanceFleets': [
{
'Name': 'string',
'InstanceFleetType': 'MASTER'|'CORE'|'TASK',
'TargetOnDemandCapacity': 123,
'TargetSpotCapacity': 123,
'InstanceTypeConfigs': [
{
'InstanceType': 'string',
'WeightedCapacity': 123,
'BidPrice': 'string',
'BidPriceAsPercentageOfOnDemandPrice': 123.0,
'EbsConfiguration': {
'EbsBlockDeviceConfigs': [
{
'VolumeSpecification': {
'VolumeType': 'string',
'Iops': 123,
'SizeInGB': 123
},
'VolumesPerInstance': 123
},
],
'EbsOptimized': True|False
},
'Configurations': [
{
'Classification': 'string',
'Configurations': {'... recursive ...'},
'Properties': {
'string': 'string'
}
},
]
},
],
'LaunchSpecifications': {
'SpotSpecification': {
'TimeoutDurationMinutes': 123,
'TimeoutAction': 'SWITCH_TO_ON_DEMAND'|'TERMINATE_CLUSTER',
'BlockDurationMinutes': 123
}
}
},
],
'Ec2KeyName': 'string',
'Placement': {
'AvailabilityZone': 'string',
'AvailabilityZones': [
'string',
]
},
'KeepJobFlowAliveWhenNoSteps': True|False,
'TerminationProtected': True|False,
'HadoopVersion': 'string',
'Ec2SubnetId': 'string',
'Ec2SubnetIds': [
'string',
],
'EmrManagedMasterSecurityGroup': 'string',
'EmrManagedSlaveSecurityGroup': 'string',
'ServiceAccessSecurityGroup': 'string',
'AdditionalMasterSecurityGroups': [
'string',
],
'AdditionalSlaveSecurityGroups': [
'string',
]
},
Steps=[
{
'Name': 'string',
'ActionOnFailure': 'TERMINATE_JOB_FLOW'|'TERMINATE_CLUSTER'|'CANCEL_AND_WAIT'|'CONTINUE',
'HadoopJarStep': {
'Properties': [
{
'Key': 'string',
'Value': 'string'
},
],
'Jar': 'string',
'MainClass': 'string',
'Args': [
'string',
]
}
},
],
BootstrapActions=[
{
'Name': 'string',
'ScriptBootstrapAction': {
'Path': 'string',
'Args': [
'string',
]
}
},
],
SupportedProducts=[
'string',
],
NewSupportedProducts=[
{
'Name': 'string',
'Args': [
'string',
]
},
],
Applications=[
{
'Name': 'string',
'Version': 'string',
'Args': [
'string',
],
'AdditionalInfo': {
'string': 'string'
}
},
],
Configurations=[
{
'Classification': 'string',
'Configurations': {'... recursive ...'},
'Properties': {
'string': 'string'
}
},
],
VisibleToAllUsers=True|False,
JobFlowRole='string',
ServiceRole='string',
Tags=[
{
'Key': 'string',
'Value': 'string'
},
],
SecurityConfiguration='string',
AutoScalingRole='string',
ScaleDownBehavior='TERMINATE_AT_INSTANCE_HOUR'|'TERMINATE_AT_TASK_COMPLETION',
CustomAmiId='string',
EbsRootVolumeSize=123,
RepoUpgradeOnBoot='SECURITY'|'NONE',
KerberosAttributes={
'Realm': 'string',
'KdcAdminPassword': 'string',
'CrossRealmTrustPrincipalPassword': 'string',
'ADDomainJoinUser': 'string',
'ADDomainJoinPassword': 'string'
}
)
How can I provide an S3 bucket location that has the Software Configuration JSON file for creating an EMR cluster through the Boto3 library?
Right now the boto3 SDK can't directly import the configuration settings from s3 for you as part of the run_job_flow() function. You would need to setup an S3 client in boto3, download the data as an S3 object and then update the Configuration List part of your EMR dictionary with the JSON data in your S3 file.
An example of how to download a json file from S3 and then load it into memory as a Python Dict can be found over here - Reading an JSON file from S3 using Python boto3
The Configuring Applications - Amazon EMR documentation says:
Supplying a Configuration in the Console
To supply a configuration, you navigate to the Create cluster page and choose Edit software settings. You can then enter the configuration directly (in JSON or using shorthand syntax demonstrated in shadow text) in the console or provide an Amazon S3 URI for a file with a JSON Configurations object.
That seems to be the capability you showed in your question.
The documentation then shows how you can do it via the CLI:
aws emr create-cluster --use-default-roles --release-label emr-5.14.0 --instance-type m4.large --instance-count 2 --applications Name=Hive --configurations https://s3.amazonaws.com/mybucket/myfolder/myConfig.json
This maps to the Configurations options in the JSON you show above:
'Configurations': [
{
'Classification': 'string',
'Configurations': {'... recursive ...'},
'Properties': {
'string': 'string'
}
},
]
Configurations: A configuration classification that applies when provisioning cluster instances, which can include configurations for applications and software that run on the cluster.
It would contain settings such as:
[
{
"Classification": "core-site",
"Properties": {
"hadoop.security.groups.cache.secs": "250"
}
},
{
"Classification": "mapred-site",
"Properties": {
"mapred.tasktracker.map.tasks.maximum": "2",
"mapreduce.map.sort.spill.percent": "0.90",
"mapreduce.tasktracker.reduce.tasks.maximum": "5"
}
}
]
Short answer: Configurations
In aws api gateway there is a section called API Link and I can manually set that.
The problem is I cannot find any section in cloudformation documentation on how I can create vpc link via cloud formation on api gateway.
Is it sth that cloudformation does not support or am I missing it?
You can use swagger to define an API Gateway using VPC Link. This is a complete CloudFormation template you can deploy to test it out...
{
"AWSTemplateFormatVersion": "2010-09-09",
"Description": "Test backend access via API Gateway. This template provisions a Regional API Gateway proxing requests to a backend via VPC Link and Direct Connect to on-premises resources using private ip addresses.",
"Parameters": {
"VPCId": {
"Description": "VPC Id for API Gateway VPC Link",
"Type": "AWS::EC2::VPC::Id"
},
"NLBSubnetList": {
"Type": "List<AWS::EC2::Subnet::Id>",
"Description": "Subnet Ids for provisioning load balancer supporting the VPC Link"
},
"BackendBaseEndpoint": {
"Description": "The backend service base url including protocol. e.g.: https://<url>",
"Type": "String",
"Default": "https://mybackend.dev.mycompany.com"
},
"TargetIpAddresses": {
"Type": "CommaDelimitedList",
"Description": "Comma separated list of NLB target ip addresses. Specify two entries.",
"Default": "10.78.80.1, 10.79.80.1"
}
},
"Resources": {
"API": {
"Type": "AWS::ApiGateway::RestApi",
"Properties": {
"Name": "Test Api",
"Description": "Test Api using VPC_LINK and AWS_IAM authorisation",
"Body": {
"swagger": "2.0",
"info": {
"title": "Test Api"
},
"schemes": [
"https"
],
"paths": {
"/{proxy+}": {
"x-amazon-apigateway-any-method": {
"parameters": [
{
"name": "proxy",
"in": "path",
"required": true,
"type": "string"
}
],
"responses": {},
"security": [
{
"sigv4": []
}
],
"x-amazon-apigateway-integration": {
"responses": {
"default": {
"statusCode": "200"
}
},
"requestParameters": {
"integration.request.path.proxy": "method.request.path.proxy"
},
"uri": {
"Fn::Join": [
"",
[
{
"Ref": "BackendBaseEndpoint"
},
"/{proxy}"
]
]
},
"passthroughBehavior": "when_no_match",
"connectionType": "VPC_LINK",
"connectionId": "${stageVariables.vpcLinkId}",
"httpMethod": "GET",
"type": "http_proxy"
}
}
}
},
"securityDefinitions": {
"sigv4": {
"type": "apiKey",
"name": "Authorization",
"in": "header",
"x-amazon-apigateway-authtype": "awsSigv4"
}
}
},
"EndpointConfiguration": {
"Types": [
"REGIONAL"
]
}
},
"DependsOn": "VPCLink"
},
"APIStage": {
"Type": "AWS::ApiGateway::Stage",
"Properties": {
"StageName": "dev",
"Description": "dev Stage",
"RestApiId": {
"Ref": "API"
},
"DeploymentId": {
"Ref": "APIDeployment"
},
"MethodSettings": [
{
"ResourcePath": "/*",
"HttpMethod": "GET",
"MetricsEnabled": "true",
"DataTraceEnabled": "true",
"LoggingLevel": "ERROR"
}
],
"Variables": {
"vpcLinkId": {
"Ref": "VPCLink"
}
}
}
},
"APIDeployment": {
"Type": "AWS::ApiGateway::Deployment",
"Properties": {
"RestApiId": {
"Ref": "API"
},
"Description": "Test Deployment"
}
},
"VPCLink": {
"Type": "AWS::ApiGateway::VpcLink",
"Properties": {
"Description": "Vpc link to GIS platform",
"Name": "VPCLink",
"TargetArns": [
{
"Ref": "NLB"
}
]
}
},
"NLBTargetGroup": {
"Type": "AWS::ElasticLoadBalancingV2::TargetGroup",
"Properties": {
"Name": "NLBTargetGroup",
"Port": 443,
"Protocol": "TCP",
"TargetGroupAttributes": [
{
"Key": "deregistration_delay.timeout_seconds",
"Value": "20"
}
],
"TargetType": "ip",
"Targets": [
{
"Id": { "Fn::Select" : [ "0", {"Ref": "TargetIpAddresses"} ] },
"Port": 443,
"AvailabilityZone": "all"
},
{
"Id": { "Fn::Select" : [ "1", {"Ref": "TargetIpAddresses"} ] },
"Port": 443,
"AvailabilityZone": "all"
}
],
"VpcId": {
"Ref": "VPCId"
},
"Tags": [
{
"Key": "Project",
"Value": "API and VPC Link Test"
}
]
}
},
"NLB": {
"Type": "AWS::ElasticLoadBalancingV2::LoadBalancer",
"Properties": {
"Type": "network",
"Scheme": "internal",
"Subnets": {
"Ref": "NLBSubnetList"
}
}
},
"NLBListener": {
"Type": "AWS::ElasticLoadBalancingV2::Listener",
"Properties": {
"DefaultActions": [
{
"Type": "forward",
"TargetGroupArn": {
"Ref": "NLBTargetGroup"
}
}
],
"LoadBalancerArn": {
"Ref": "NLB"
},
"Port": "443",
"Protocol": "TCP"
}
}
},
"Outputs": {
"NetworkLoadBalancerArn": {
"Value": {
"Ref": "NLB"
},
"Description": "The network elastic load balancer Amazon resource name"
}
}
}
Unfortunately, CloudFormation does not support API Gateway's VPC Links at this time.
You can create a Lambda-backed custom resource to manage the VPC Link using CloudFormation.
Here is a Lambda function (using python3.6) for a CloudFormation custom resource I use to manage VPC links:
import copy
import json
import re
import time
import boto3
from botocore.vendored import requests
SUCCESS = "SUCCESS"
FAILED = "FAILED"
FAILED_PHYSICAL_RESOURCE_ID = "FAILED_PHYSICAL_RESOURCE_ID"
class AddOrUpdateTargetArnsError(Exception):
def __init__(self):
self.message = 'Target arns are not allowed to be changed/added.'
super().__init__(self.message)
class FailedVpcLinkError(Exception):
def __init__(self, status_message):
self.message = f'statusMessages: {status_message}'
super().__init__(self.message)
def lambda_handler(event, context):
try:
_lambda_handler(event, context)
except Exception as e:
send(
event,
context,
response_status=FAILED,
# Do not fail on delete to avoid rollback failure
response_data=None,
physical_resource_id=event.get('PhysicalResourceId', FAILED_PHYSICAL_RESOURCE_ID),
reason=e
)
# Must raise, otherwise the Lambda will be marked as successful, and the exception
# will not be logged to CloudWatch logs.
raise
def _lambda_handler(event, context):
print("Received event: ")
print(event)
resource_type = event['ResourceType']
if resource_type != "Custom::ApiGatewayVpcLink":
raise ValueError(f'Unexpected resource_type: {resource_type}')
request_type = event['RequestType']
wait_for = event.get('WaitFor', None)
resource_properties = event['ResourceProperties']
physical_resource_id = event.get('PhysicalResourceId', None)
apigateway = boto3.client('apigateway')
if wait_for:
handle_self_invocation(
wait_for=wait_for,
physical_resource_id=physical_resource_id,
event=event,
context=context,
)
else:
if request_type == 'Create':
kwargs = dict(
name=resource_properties['Name'],
targetArns=resource_properties['TargetArns'],
description=resource_properties.get('Description', None)
)
response = apigateway.create_vpc_link(**kwargs)
event_copy = copy.deepcopy(event)
event_copy['WaitFor'] = 'CreateComplete'
event_copy['PhysicalResourceId'] = response['id']
print('Reinvoking function because VPC link creation is asynchronous')
relaunch_lambda(event=event_copy, context=context)
return
elif request_type == 'Update':
old_resource_properties = event['OldResourceProperties']
current_target_arns = apigateway.get_vpc_link(
vpcLinkId=physical_resource_id,
)['targetArns']
# must compare current_target_arns to resource_properties['TargetArns'], to protect against
# UPDATE created by UPDATE_FAILED. In that particular case, current_target_arns will be the same as
# resource_properties['TargetArns'] but different than old_resource_properties['TargetArns']
if set(current_target_arns) != set(resource_properties['TargetArns']) and \
set(resource_properties['TargetArns']) != set(old_resource_properties['TargetArns']):
raise AddOrUpdateTargetArnsError()
patch_operations = []
if resource_properties['Name'] != old_resource_properties['Name']:
patch_operations.append(dict(
op='replace',
path='/name',
value=resource_properties['Name'],
))
if 'Description' in resource_properties and 'Description' in old_resource_properties:
if resource_properties['Description'] != old_resource_properties['Description']:
patch_operations.append(dict(
op='replace',
path='/description',
value=resource_properties['Description'],
))
elif 'Description' in resource_properties and 'Description' not in old_resource_properties:
patch_operations.append(dict(
op='replace',
path='/description',
value=resource_properties['Description'],
))
elif 'Description' not in resource_properties and 'Description' in old_resource_properties:
patch_operations.append(dict(
op='replace',
path='/description',
value=None,
))
apigateway.update_vpc_link(
vpcLinkId=physical_resource_id,
patchOperations=patch_operations,
)
elif request_type == 'Delete':
delete = True
if physical_resource_id == FAILED_PHYSICAL_RESOURCE_ID:
delete = False
print('Custom resource was never properly created, skipping deletion.')
stack_name = re.match("arn:aws:cloudformation:.+:stack/(?P<stack_name>.+)/.+", event['StackId']).group('stack_name')
if stack_name in physical_resource_id:
delete = False
print(f'Skipping deletion, because VPC link was not created properly. Heuristic: stack name ({stack_name}) found in physical resource ID ({physical_resource_id})')
logical_resource_id = event['LogicalResourceId']
if logical_resource_id in physical_resource_id:
delete = False
print(f'Skipping deletion, because VPC link was not created properly. Heuristic: logical resource ID ({logical_resource_id}) found in physical resource ID ({physical_resource_id})')
if delete:
apigateway.delete_vpc_link(
vpcLinkId=physical_resource_id
)
event_copy = copy.deepcopy(event)
event_copy['WaitFor'] = 'DeleteComplete'
print('Reinvoking function because VPC link deletion is asynchronous')
relaunch_lambda(event=event_copy, context=context)
return
else:
print(f'Request type is {request_type}, doing nothing.')
send(
event,
context,
response_status=SUCCESS,
response_data=None,
physical_resource_id=physical_resource_id,
)
def handle_self_invocation(wait_for, physical_resource_id, event, context):
apigateway = boto3.client('apigateway')
if wait_for == 'CreateComplete':
print('Waiting for creation of VPC link: {vpc_link_id}'.format(vpc_link_id=physical_resource_id))
response = apigateway.get_vpc_link(
vpcLinkId=physical_resource_id,
)
status = response['status']
print('Status of VPC link {vpc_link_id} is {status}'.format(vpc_link_id=physical_resource_id, status=status))
if status == 'AVAILABLE':
send(
event,
context,
response_status=SUCCESS,
response_data=None,
physical_resource_id=physical_resource_id,
)
elif status == 'FAILED':
raise FailedVpcLinkError(status_message=response['statusMessage'])
elif status == 'PENDING':
# Sleeping here to avoid polluting CloudWatch Logs by reinvoking the Lambda too quickly
time.sleep(30)
relaunch_lambda(event, context)
else:
print('Unexpected status, doing nothing')
elif wait_for == 'DeleteComplete':
print('Waiting for deletion of VPC link: {vpc_link_id}'.format(vpc_link_id=physical_resource_id))
try:
response = apigateway.get_vpc_link(
vpcLinkId=physical_resource_id,
)
except apigateway.exceptions.NotFoundException:
print('VPC link {vpc_link_id} deleted successfully'.format(vpc_link_id=physical_resource_id))
send(
event,
context,
response_status=SUCCESS,
response_data=None,
physical_resource_id=physical_resource_id,
)
else:
status = response['status']
assert status == 'DELETING', f'status is {status}'
# Sleeping here to avoid polluting CloudWatch Logs by reinvoking the Lambda too quickly
time.sleep(10)
relaunch_lambda(event, context)
else:
raise ValueError(f'Unexpected WaitFor: {wait_for}')
def relaunch_lambda(event, context):
boto3.client("lambda").invoke(
FunctionName=context.function_name,
InvocationType='Event',
Payload=json.dumps(event),
)
def send(event, context, response_status, response_data, physical_resource_id, reason=None):
response_url = event['ResponseURL']
response_body = {
'Status': response_status,
'Reason': str(reason) if reason else 'See the details in CloudWatch Log Stream: ' + context.log_stream_name,
'PhysicalResourceId': physical_resource_id,
'StackId': event['StackId'],
'RequestId': event['RequestId'],
'LogicalResourceId': event['LogicalResourceId'],
'Data': response_data,
}
json_response_body = json.dumps(response_body)
headers = {
'content-type': '',
'content-length': str(len(json_response_body))
}
try:
requests.put(
response_url,
data=json_response_body,
headers=headers
)
except Exception as e:
print("send(..) failed executing requests.put(..): " + str(e))