I'm trying to deploy AWS CI CD pipeline, where the developer commits in AWS Commit and AWS build will take of care of the building the docker image and pushes it to Elastic Container Registry. From the ECR to ECS fargate the deployment should be done.
I have tried it to do, I need help in fetching the docker image url from code build to move it to ECR. Find the below code
resource "aws_codecommit_repository" "repo" {
repository_name = var.repository_name
description = var.description
default_branch = var.default_branch
# Tags
tags = var.tags
}
# Triggers
resource "aws_codecommit_trigger" "triggers" {
count = length(var.triggers)
repository_name = aws_codecommit_repository.repo.repository_name
trigger {
name = lookup(element(var.triggers, count.index), "name")
events = lookup(element(var.triggers, count.index), "events")
destination_arn = lookup(element(var.triggers, count.index), "destination_arn")
}
}
resource "aws_ecr_repository" "my_sample_ecr_repo" {
name = "my-sample-ecr-repo"
}
resource "aws_codebuild_project" "codebuild_project" {
name = "sample-code"
description = "Codebuild demo with Terraform"
build_timeout = "120"
artifacts {
type = "NO_ARTIFACTS"
}
source {
type = "CodeCommit"
location = lookup(var.repository_name)
}
environment {
image = lookup(var.codebuild_params, "IMAGE")
type = lookup(var.codebuild_params, "TYPE")
compute_type = lookup(var.codebuild_params, "COMPUTE_TYPE")
image_pull_credentials_type = lookup(var.codebuild_params, "CRED_TYPE")
privileged_mode = true
dynamic "environment_variable" {
for_each = var.environment_variables
content {
name = environment_variable.key
value = environment_variable.value
}
}
}
logs_config {
cloudwatch_logs {
status = "DISABLED"
}
s3_logs {
status = "DISABLED"
}
}
}
resource "aws_ecs_cluster" "my_cluster" {
name = "my-cluster" # Naming the cluster
}
resource "aws_ecs_task_definition" "my_sample_task" {
family = "my-sample-task" # Naming the task
container_definitions = <<DEFINITION
[
{
"name": "my-sample-task",
"image": "${aws_ecr_repository.my_sample_ecr_repo.repository_url}",
"essential": true,
"portMappings": [
{
"containerPort": 3000,
"hostPort": 3000
}
]
"memory": 512,
"cpu": 256
}
]
DEFINITION
requires_compatibilities = ["FARGATE"] # Stating that we are using ECS Fargate
network_mode = "awsvpc" # Using awsvpc as our network mode as this is required for Fargate
memory = 512 # Specifying the memory our container requires
cpu = 256 # Specifying the CPU our container requires
execution_role_arn = "${aws_iam_role.ecsTaskExecutionRole.arn}"
}
resource "aws_iam_role" "ecsTaskExecutionRole" {
name = "ecsTaskExecutionRole"
assume_role_policy = "${data.aws_iam_policy_document.assume_role_policy.json}"
}
data "aws_iam_policy_document" "assume_role_policy" {
statement {
actions = ["sts:AssumeRole"]
principals {
type = "Service"
identifiers = ["ecs-tasks.amazonaws.com"]
}
}
}
resource "aws_iam_role_policy_attachment" "ecsTaskExecutionRole_policy" {
role = "${aws_iam_role.ecsTaskExecutionRole.name}"
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}
resource "aws_ecs_service" "my_sample_service" {
name = "my-sample-service" # Naming our first service
cluster = "${aws_ecs_cluster.my_cluster.id}" # Referencing our created Cluster
task_definition = "${aws_ecs_task_definition.my_sample_task.arn}" # Referencing the task our service will spin up
launch_type = "FARGATE"
desired_count = 1 # Setting the number of containers to 3
load_balancer {
target_group_arn = "${aws_lb_target_group.target_group.arn}" # Referencing our target group
container_name = "${aws_ecs_task_definition.my_sample_task.family}"
container_port = 3000 # Specifying the container port
}
network_configuration {
subnets = ["${aws_default_subnet.default_subnet_a.id}", "${aws_default_subnet.default_subnet_b.id}", "${aws_default_subnet.default_subnet_c.id}"]
assign_public_ip = true # Providing our containers with public IPs
security_groups = ["${aws_security_group.service_security_group.id}"] # Setting the security group
}
}
resource "aws_default_vpc" "default_vpc" {
}
# Providing a reference to our default subnets
resource "aws_default_subnet" "default_subnet_a" {
availability_zone = "ap-south-1c"
}
resource "aws_default_subnet" "default_subnet_b" {
availability_zone = "ap-south-1b"
}
resource "aws_default_subnet" "default_subnet_c" {
availability_zone = "ap-south-1a"
}
resource "aws_security_group" "service_security_group" {
ingress {
from_port = 0
to_port = 0
protocol = "-1"
# Only allowing traffic in from the load balancer security group
security_groups = ["${aws_security_group.load_balancer_security_group.id}"]
}
egress {
from_port = 0 # Allowing any incoming port
to_port = 0 # Allowing any outgoing port
protocol = "-1" # Allowing any outgoing protocol
cidr_blocks = ["0.0.0.0/0"] # Allowing traffic out to all IP addresses
}
}
resource "aws_alb" "application_load_balancer" {
name = "test-lb-tf" # Naming our load balancer
load_balancer_type = "application"
subnets = [ # Referencing the default subnets
"${aws_default_subnet.default_subnet_a.id}",
"${aws_default_subnet.default_subnet_b.id}",
"${aws_default_subnet.default_subnet_c.id}"
]
# Referencing the security group
security_groups = ["${aws_security_group.load_balancer_security_group.id}"]
}
resource "aws_security_group" "load_balancer_security_group" {
ingress {
from_port = 80 # Allowing traffic in from port 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"] # Allowing traffic in from all sources
}
egress {
from_port = 0 # Allowing any incoming port
to_port = 0 # Allowing any outgoing port
protocol = "-1" # Allowing any outgoing protocol
cidr_blocks = ["0.0.0.0/0"] # Allowing traffic out to all IP addresses
}
}
resource "aws_lb_target_group" "target_group" {
name = "target-group"
port = 80
protocol = "HTTP"
target_type = "ip"
vpc_id = "${aws_default_vpc.default_vpc.id}" # Referencing the default VPC
health_check {
matcher = "200,301,302"
path = "/"
}
}
resource "aws_lb_listener" "listener" {
load_balancer_arn = "${aws_alb.application_load_balancer.arn}" # Referencing our load balancer
port = "80"
protocol = "HTTP"
default_action {
type = "forward"
target_group_arn = "${aws_lb_target_group.target_group.arn}" # Referencing our tagrte group
}
}
Related
I have a terraform-defined ECS cluster with fargate task, service, target group and lb.
I'm trying to send requests to the fargate cluster but it's timing out. I've tried to add an attachment as follows:
resource "aws_lb_target_group_attachment" "websocket-server" {
target_group_arn = aws_lb_target_group.websocket-server.arn
target_id = aws_ecs_cluster.websocket-server-cluster.id
port = 443
}
But unfortunately this throws:
Error registering targets with target group: ValidationError: The IP address 'arn:aws:ecs:eu-west-2:xxxxxx:cluster/websocket-server-cluster' is not a valid IPv4 address
My LB/target group/ECS definitions:
resource "aws_ecs_cluster" "websocket-server-cluster" {
name = "websocket-server-cluster"
}
resource "aws_ecs_service" "websocket-server-service" {
name = "websocket-server-service"
cluster = aws_ecs_cluster.websocket-server-cluster.arn
deployment_maximum_percent = 200
deployment_minimum_healthy_percent = 0
launch_type = "FARGATE"
task_definition = aws_ecs_task_definition.websocket-server-task.arn
load_balancer {
target_group_arn = aws_lb_target_group.websocket-server.arn
container_name = "websocket-server"
container_port = 443
}
network_configuration {
assign_public_ip = true
security_groups = [aws_security_group.public.id, aws_security_group.private.id]
subnets = [aws_subnet.public.id, aws_subnet.private.id]
}
}
module "websocket-server" {
source = "git::https://github.com/cloudposse/terraform-aws-ecs-container-definition.git?ref=tags/0.58.1"
container_name = "websocket-server"
container_image = "${aws_ecr_repository.websocket-server.repository_url}:latest"
container_cpu = "256"
container_memory = "512"
port_mappings = [
{
containerPort = 443
hostPort = 443
protocol = "tcp"
}
]
environment = []
}
resource "aws_ecs_task_definition" "websocket-server-task" {
family = "websocket-server"
requires_compatibilities = ["FARGATE"]
memory = "512"
cpu = "256"
task_role_arn = aws_iam_role.ecs-container-role.arn
execution_role_arn = aws_iam_role.ecs-container-role.arn
network_mode = "awsvpc"
container_definitions = module.websocket-server.json_map_encoded_list
lifecycle {
ignore_changes = [
tags, tags_all
]
}
}
resource "aws_lb" "main" {
name = "main"
internal = false
load_balancer_type = "application"
security_groups = [aws_security_group.public.id, aws_security_group.private.id]
enable_deletion_protection = false
subnets = [aws_subnet.public.id, aws_subnet.public-backup.id]
}
resource "aws_lb_target_group" "websocket-server" {
name = "websocket-server"
port = 443
protocol = "HTTPS"
vpc_id = aws_vpc.main.id
target_type = "ip"
health_check {
enabled = true
healthy_threshold = 3
unhealthy_threshold = 3
timeout = 10
protocol = "HTTPS"
path = "/apis/websocket-server/health"
interval = "100"
matcher = "200"
}
depends_on = [
aws_lb.main
]
}
resource "aws_lb_listener" "websocket-server" {
load_balancer_arn = aws_lb.main.arn
port = "443"
protocol = "HTTPS"
ssl_policy = "ELBSecurityPolicy-2016-08"
certificate_arn = aws_acm_certificate.main.arn
default_action {
target_group_arn = aws_lb_target_group.websocket-server.arn
type = "forward"
}
}
resource "aws_lb_listener" "http" {
load_balancer_arn = aws_lb.main.arn
port = "80"
protocol = "HTTP"
default_action {
type = "redirect"
redirect {
port = "443"
protocol = "HTTPS"
status_code = "HTTP_301"
}
}
}
resource "aws_lb_listener_certificate" "main" {
listener_arn = aws_lb_listener.websocket-server.arn
certificate_arn = aws_acm_certificate.main.arn
}
The attachment definition is not necessary at all. Keep in mind, containers for Fargate services do not use network interfaces of the underlying EC2 instances in the cluster (you don't see the instances at all for that matter). They use AWS VPC networking mode only -- independent network interfaces in the VPC are attached to the containers.
The target group attachment happens automatically and is configured through the load_balancer block in the aws_ecs_service resource. As ECS starts the containers, they get registered with the target group automatically. There is no static attachment to define in the case of Fargate ECS services.
Just remove the tg attachment resource from your tf file altogether.
Check out this resource for a decent reference implementation with terraform.
As a completely separate side note, you probably also do not want assign_public_ip = true in your service configuration. That would allow access to your containers directly without going through the load balancer which is almost never what you want when you're using a load balancer.
tldr;
I can't access to my service through the ALB DNS name. Trying to reach the URL will timeout.
I noticed that from IGW and Nate there's an isolated routed subnet (Public Subnet 2) and also a task that's not being exposed through the ALB because somehow it got a different attached subnet.
More general context
Got Terraform modules defining
ECS cluster, service and task definition
ALB setup, including a target group and a listener
Got a couple subnets and a security group for ALB
Got private subnets and own sg for ECS
Target group port is the same as container port already
Using CodePipeline a get a task running, I can see logs of my service meaning it starts.
Some questions
Can I have multiple IGW associated to a single NAT within a single VPC?
Tasks get attached a couple private subnets and a sg with permissions to the alb sg. Also, tasks should access a Redis instance so I'm also attaching to them a SG and a subnet where Elastic Cache node lives (shown in the terraform module below). Any advise here?
ALB and networking resources
variable "vpc_id" {
type = string
default = "vpc-0af6233d57f7a6e1b"
}
variable "environment" {
type = string
default = "dev"
}
data "aws_vpc" "vpc" {
id = var.vpc_id
}
### Public subnets
resource "aws_subnet" "public_subnet_us_east_1a" {
vpc_id = data.aws_vpc.vpc.id
cidr_block = "10.0.10.0/24"
map_public_ip_on_launch = true
availability_zone = "us-east-1a"
tags = {
Name = "audible-blog-us-${var.environment}-public-subnet-1a"
}
}
resource "aws_subnet" "public_subnet_us_east_1b" {
vpc_id = data.aws_vpc.vpc.id
cidr_block = "10.0.11.0/24"
availability_zone = "us-east-1b"
map_public_ip_on_launch = true
tags = {
Name = "audible-blog-us-${var.environment}-public-subnet-1b"
}
}
### Private subnets
resource "aws_subnet" "private_subnet_us_east_1a" {
vpc_id = data.aws_vpc.vpc.id
cidr_block = "10.0.12.0/24"
map_public_ip_on_launch = true
availability_zone = "us-east-1a"
tags = {
Name = "audible-blog-us-${var.environment}-private-subnet-1a"
}
}
resource "aws_subnet" "private_subnet_us_east_1b" {
vpc_id = data.aws_vpc.vpc.id
cidr_block = "10.0.13.0/24"
availability_zone = "us-east-1b"
tags = {
Name = "audible-blog-us-${var.environment}-private-subnet-1b"
}
}
# Create a NAT gateway with an EIP for each private subnet to get internet connectivity
resource "aws_eip" "gw_a" {
vpc = true
}
resource "aws_eip" "gw_b" {
vpc = true
}
resource "aws_nat_gateway" "gw_a" {
subnet_id = aws_subnet.public_subnet_us_east_1a.id
allocation_id = aws_eip.gw_a.id
}
resource "aws_nat_gateway" "gw_b" {
subnet_id = aws_subnet.public_subnet_us_east_1b.id
allocation_id = aws_eip.gw_b.id
}
# Create a new route table for the private subnets
# And make it route non-local traffic through the NAT gateway to the internet
resource "aws_route_table" "private_a" {
vpc_id = data.aws_vpc.vpc.id
route {
cidr_block = "0.0.0.0/0"
nat_gateway_id = aws_nat_gateway.gw_a.id
}
}
resource "aws_route_table" "private_b" {
vpc_id = data.aws_vpc.vpc.id
route {
cidr_block = "0.0.0.0/0"
nat_gateway_id = aws_nat_gateway.gw_b.id
}
}
# Explicitely associate the newly created route tables to the private subnets (so they don't default to the main route table)
resource "aws_route_table_association" "private_a" {
subnet_id = aws_subnet.private_subnet_us_east_1a.id
route_table_id = aws_route_table.private_a.id
}
resource "aws_route_table_association" "private_b" {
subnet_id = aws_subnet.private_subnet_us_east_1b.id
route_table_id = aws_route_table.private_b.id
}
# This is the group you need to edit if you want to restrict access to your application
resource "aws_security_group" "alb_sg" {
name = "audible-blog-us-${var.environment}-lb-sg"
description = "Internet to ALB Security Group"
vpc_id = data.aws_vpc.vpc.id
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = {
name = "audible-blog-us-${var.environment}-lb-sg"
}
}
# Traffic to the ECS Cluster should only come from the ALB
resource "aws_security_group" "ecs_tasks_sg" {
name = "audible-blog-us-${var.environment}-ecs-sg"
description = "ALB to ECS Security Group"
vpc_id = data.aws_vpc.vpc.id
ingress {
from_port = 8080
to_port = 8080
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
security_groups = [ aws_security_group.alb_sg.id ]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = {
name = "audible-blog-us-${var.environment}-ecs-sg"
}
}
resource "aws_alb" "alb" {
name = "audible-blog-us-${var.environment}-alb"
internal = false
load_balancer_type = "application"
subnets = [ aws_subnet.public_subnet_us_east_1a.id, aws_subnet.public_subnet_us_east_1b.id ]
security_groups = [ aws_security_group.alb_sg.id ]
tags = {
name = "audible-blog-us-${var.environment}-alb"
environment = var.environment
}
}
resource "aws_alb_target_group" "target_group" {
name = "audible-blog-us-${var.environment}-target-group"
port = "8080"
protocol = "HTTP"
vpc_id = data.aws_vpc.vpc.id
target_type = "ip"
health_check {
enabled = true
path = "/blog"
interval = 30
matcher = "200-304"
port = "traffic-port"
unhealthy_threshold = 5
}
depends_on = [aws_alb.alb]
}
resource "aws_alb_listener" "web_app_http" {
load_balancer_arn = aws_alb.alb.arn
port = 80
protocol = "HTTP"
depends_on = [aws_alb_target_group.target_group]
default_action {
target_group_arn = aws_alb_target_group.target_group.arn
type = "forward"
}
}
output "networking_details" {
value = {
load_balancer_arn = aws_alb.alb.arn
load_balancer_target_group_arn = aws_alb_target_group.target_group.arn
subnets = [
aws_subnet.private_subnet_us_east_1a.id,
aws_subnet.private_subnet_us_east_1b.id
]
security_group = aws_security_group.ecs_tasks_sg.id
}
}
ECS Fargate module
module "permissions" {
source = "./permissions"
environment = var.environment
}
resource "aws_ecs_cluster" "cluster" {
name = "adl-blog-us-${var.environment}"
}
resource "aws_cloudwatch_log_group" "logs_group" {
name = "/ecs/adl-blog-us-next-${var.environment}"
retention_in_days = 90
}
resource "aws_ecs_task_definition" "task" {
family = "adl-blog-us-task-${var.environment}"
container_definitions = jsonencode([
{
name = "adl-blog-us-next"
image = "536299334720.dkr.ecr.us-east-1.amazonaws.com/adl-blog-us:latest"
portMappings = [
{
containerPort = 8080
hostPort = 8080
},
{
containerPort = 6379
hostPort = 6379
}
]
environment: [
{
"name": "ECS_TASK_FAMILY",
"value": "adl-blog-us-task-${var.environment}"
}
],
logConfiguration: {
logDriver: "awslogs",
options: {
awslogs-group: "/ecs/adl-blog-us-next-${var.environment}",
awslogs-region: "us-east-1",
awslogs-stream-prefix: "ecs"
}
},
healthCheck: {
retries: 3,
command: [
"CMD-SHELL",
"curl -sf http://localhost:8080/blog || exit 1"
],
timeout: 5,
interval: 30,
startPeriod: null
}
}
])
cpu = 256
memory = 512
requires_compatibilities = ["FARGATE"]
network_mode = "awsvpc"
execution_role_arn = module.permissions.task_definition_execution_role_arn
task_role_arn = module.permissions.task_definition_execution_role_arn
}
resource "aws_ecs_service" "service" {
name = "adl-blog-us-task-service-${var.environment}"
cluster = aws_ecs_cluster.cluster.id
deployment_controller {
type = "ECS"
}
deployment_maximum_percent = 200
deployment_minimum_healthy_percent = 50
task_definition = aws_ecs_task_definition.task.family
desired_count = 3
launch_type = "FARGATE"
network_configuration {
subnets = concat(
var.public_alb_networking_details.subnets,
[ var.private_networking_details.subnet.id ]
)
security_groups = [
var.public_alb_networking_details.security_group,
var.private_networking_details.security_group.id
]
assign_public_ip = true
}
load_balancer {
target_group_arn = var.public_alb_networking_details.load_balancer_target_group_arn
container_name = "adl-blog-us-next"
container_port = 8080
}
force_new_deployment = true
lifecycle {
ignore_changes = [desired_count]
}
depends_on = [
module.permissions
]
}
variable "private_networking_details" {}
variable "public_alb_networking_details" {}
variable "environment" {
type = string
}
Your container ports are 8080 and 6379. However your target group says its 80. So you have to double check what are your actual ports that you use on Fargate and adjust your TG accordingly.
There could be other issues as well, which aren't yet apparent. For example, you are opening port 443, but there is no listener for that. So any attempt of using https will fail.
I'm trying to deploy a docker image via terraform and AWS ECS using Fargate. Using terraform, I've created a VPC, two private and two public subnets, a ECR repository to store the image, an ECS cluster, ECS task, ECS service, and a load balancer with a target group.
These resources are created successfully, but the target group is constantly:
varying in the number of targets that are shown. For instance, refreshing will sometimes show 3 registered targets. Sometimes it will show 4.
Usually have a status of "draining" and details that say "Target deregistration in progress". Sometimes one of them will have a status of "initial" and details that say "Target registration in progress"
Additionally, visiting the URL of the load balancer returns a "503 Service Temporarily Unavailable"
I came across this post, that led to me this article, which helped me better understand how Fargate works but I'm having trouble translating this into the terraform + aws method I'm trying to implement.
I'm suspecting the issue could be in how the security groups are allowing/disallowing traffic but I'm still a novice with dev ops stuff so I appreciate in advance any help offered.
Here is the terraform main.tf that I've used to create the resources. Most of it is gathered from different tutorials and adjusted with updates whenever terraform screamed at me about a deprecation.
So, which parts of the following configuration is wrong and is causing the target groups to constantly be in a draining state?
Again, thanks in advance for any help or insights provided!
# ..terraform/main.tf
# START CREATE VPC
resource "aws_vpc" "vpc" {
cidr_block = "10.0.0.0/16"
instance_tenancy= "default"
enable_dns_hostnames = true
enable_dns_support = true
enable_classiclink = false
tags = {
Name = "vpc"
}
}
# END CREATE VPC
# START CREATE PRIVATE AND PUBLIC SUBNETS
resource "aws_subnet" "public_subnet_1" {
vpc_id = aws_vpc.vpc.id
cidr_block = "10.0.1.0/24"
map_public_ip_on_launch = true
availability_zone = "us-east-1a"
tags = {
Name = "public-subnet-1"
}
}
resource "aws_subnet" "public_subnet_2" {
vpc_id = aws_vpc.vpc.id
cidr_block = "10.0.2.0/24"
map_public_ip_on_launch = true
availability_zone = "us-east-1b"
tags = {
Name = "public-subnet-2"
}
}
resource "aws_subnet" "private_subnet_1" {
vpc_id = aws_vpc.vpc.id
cidr_block = "10.0.3.0/24"
map_public_ip_on_launch = false
availability_zone = "us-east-1a"
tags = {
Name = "private-subnet-1"
}
}
resource "aws_subnet" "private_subnet_2" {
vpc_id = aws_vpc.vpc.id
cidr_block = "10.0.4.0/24"
map_public_ip_on_launch = false
availability_zone = "us-east-1b"
tags = {
Name = "private-subnet-1"
}
}
# END CREATE PRIVATE AND PUBLIC SUBNETS
# START CREATE GATEWAY
resource "aws_internet_gateway" "vpc_gateway" {
vpc_id = aws_vpc.vpc.id
tags = {
Name = "vpc-gateway"
}
}
# END CREATE GATEWAY
# START CREATE ROUTE TABLE AND ASSOCIATIONS
resource "aws_route_table" "public_route_table" {
vpc_id = aws_vpc.vpc.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.vpc_gateway.id
}
tags = {
Name = "public-route-table"
}
}
resource "aws_route_table_association" "route_table_association_1" {
subnet_id = aws_subnet.public_subnet_1.id
route_table_id = aws_route_table.public_route_table.id
}
resource "aws_route_table_association" "route_table_association_2" {
subnet_id = aws_subnet.public_subnet_2.id
route_table_id = aws_route_table.public_route_table.id
}
# END CREATE ROUTE TABLE AND ASSOCIATIONS
# START CREATE ECR REPOSITORY
resource "aws_ecr_repository" "api_ecr_repository" {
name = "api-ecr-repository"
}
# END CREATE ECR REPOSITORY
# START CREATE ECS CLUSTER
resource "aws_ecs_cluster" "api_cluster" {
name = "api-cluster"
}
# END CREATE ECS CLUSTER
# START CREATE ECS TASK AND DESIGNATE 'FARGATE'
resource "aws_ecs_task_definition" "api_cluster_task" {
family = "api-cluster-task"
container_definitions = <<DEFINITION
[
{
"name": "api-cluster-task",
"image": "${aws_ecr_repository.api_ecr_repository.repository_url}",
"essential": true,
"portMappings": [
{
"containerPort": 4000,
"hostPort": 4000
}
],
"memory": 512,
"cpu": 256
}
]
DEFINITION
requires_compatibilities = ["FARGATE"]
network_mode = "awsvpc"
memory = 512
cpu = 256
execution_role_arn = aws_iam_role.ecs_task_execution_role.arn
}
# END CREATE ECS TASK AND DESIGNATE 'FARGATE'
# START CREATE TASK POLICIES
data "aws_iam_policy_document" "assume_role_policy" {
version = "2012-10-17"
statement {
sid = ""
effect = "Allow"
actions = ["sts:AssumeRole"]
principals {
type = "Service"
identifiers = ["ecs-tasks.amazonaws.com"]
}
}
}
resource "aws_iam_role" "ecs_task_execution_role" {
name = "ecs-take-execution-role"
assume_role_policy = data.aws_iam_policy_document.assume_role_policy.json
}
resource "aws_iam_role_policy_attachment" "ecs_task_execution_role_attachment" {
role = aws_iam_role.ecs_task_execution_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}
# END CREATE TASK POLICIES
# START CREATE ECS SERVICE
resource "aws_ecs_service" "api_cluster_service" {
name = "api-cluster-service"
cluster = aws_ecs_cluster.api_cluster.id
task_definition = aws_ecs_task_definition.api_cluster_task.arn
launch_type = "FARGATE"
desired_count = 1
load_balancer {
target_group_arn = aws_lb_target_group.api_lb_target_group.arn
container_name = aws_ecs_task_definition.api_cluster_task.family
container_port = 4000
}
network_configuration {
security_groups = [aws_security_group.ecs_tasks.id]
subnets = [
aws_subnet.public_subnet_1.id,
aws_subnet.public_subnet_2.id
]
assign_public_ip = true
}
depends_on = [aws_lb_listener.api_lb_listener, aws_iam_role_policy_attachment.ecs_task_execution_role_attachment]
}
resource "aws_security_group" "api_cluster_security_group" {
vpc_id = aws_vpc.vpc.id
ingress {
from_port = 0
to_port = 0
protocol = -1
security_groups = [aws_security_group.load_balancer_security_group.id]
}
egress {
from_port = 0
to_port = 0
protocol = -1
cidr_blocks = ["0.0.0.0/0"]
}
}
# END CREATE ECS SERVICE
# CREATE LOAD BALANCER
resource "aws_alb" "api_load_balancer" {
name = "api-load-balancer"
load_balancer_type = "application"
subnets = [
aws_subnet.public_subnet_1.id,
aws_subnet.public_subnet_2.id
]
security_groups = [aws_security_group.load_balancer_security_group.id]
}
resource "aws_security_group" "load_balancer_security_group" {
name = "allow-load-balancer-traffic"
vpc_id = aws_vpc.vpc.id
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}
# END CREATE LOAD BALANCER
# CREATE ECS TASK SECURITY GROUP
resource "aws_security_group" "ecs_tasks" {
name = "ecs-tasks-sg"
description = "allow inbound access from the ALB only"
vpc_id = aws_vpc.vpc.id
ingress {
protocol = "tcp"
from_port = 4000
to_port = 4000
cidr_blocks = ["0.0.0.0/0"]
security_groups = [aws_security_group.load_balancer_security_group.id]
}
egress {
protocol = "-1"
from_port = 0
to_port = 0
cidr_blocks = ["0.0.0.0/0"]
}
}
# END ECS TASK SECURITY GROUP
# START CREATE LOAD BALANCER TARGET GROUP
resource "aws_lb_target_group" "api_lb_target_group" {
name = "api-lb-target-group"
vpc_id = aws_vpc.vpc.id
port = 80
protocol = "HTTP"
target_type = "ip"
health_check {
healthy_threshold= "3"
interval = "90"
protocol = "HTTP"
matcher = "200-299"
timeout = "20"
path = "/"
unhealthy_threshold = "2"
}
}
# END CREATE LOAD BALANCER TARGET GROUP
# START CREATE LOAD BALANCER LISTENER
resource "aws_lb_listener" "api_lb_listener" {
load_balancer_arn = aws_alb.api_load_balancer.arn
port = 80
protocol = "HTTP"
default_action {
type = "forward"
target_group_arn = aws_lb_target_group.api_lb_target_group.arn
}
}
# END CREATE LOAD BALANCER LISTENER
Your are not using api_cluster_security_group at all in your setup, thus its not clear what it is its purpose. Also in your aws_security_group.ecs_tasks you are allowing only port 4000. However, due to dynamic port mapping between ALB and ECS services, you should allow all ports, not only 4000.
There could be other issues, which are not apparent yet.
I am trying to do BlueGreen Deployment on ECS. My service when I deploy on ECS cluster manually is running fine and it is passing all the health checks. But Whenever I do blue-green deployment on the same service on ECS it get stuck in install phase untill timeout.
After Timeout I get this error "The deployment timed out while waiting for the replacement task set to become healthy. This time out period is 60 minutes." I am not sure what to do now.
I have applied everything, tested Load Balancer, target groups, and ecr all of them seems working fine when I manually deploy service and test. Please find my terraform code and help me out on this. And let me know if you need furthur details.
ECS Cluster
resource "aws_ecs_cluster" "production-fargate-cluster" {
name = "Production-Fargate-Cluster"
}
#Application Load Balancer
resource "aws_alb" "ecs_cluster_alb" {
name = var.ecs_cluster_name
internal = false
security_groups = [aws_security_group.ecs_alb_security_group.id]
subnets = data.terraform_remote_state.infrastructure.outputs.two_public_subnets
tags = {
Name = "${var.ecs_cluster_name} - Application Load Balancer"
}
}
#First Target group
resource "aws_alb_target_group" "ecs_default_target_group" {
name = "${var.ecs_cluster_name}-BlueTG"
port = var.alb_target_group_port #port 80
protocol = "HTTP"
vpc_id = data.terraform_remote_state.infrastructure.outputs.vpc_id
target_type = "ip"
health_check {
enabled = true
path = "/actuator/health"
interval = 30
healthy_threshold = 3
unhealthy_threshold = 2
}
tags = {
Name = "Blue-TG"
}
}
#First Load balancer's listener
resource "aws_alb_listener" "ecs_alb_http_listener" {
load_balancer_arn = aws_alb.ecs_cluster_alb.arn
port = var.first_load_balancer_listener_port #80 port
protocol = "HTTP"
default_action {
type = "forward"
target_group_arn = aws_alb_target_group.ecs_default_target_group.arn
}
lifecycle {
ignore_changes = [default_action]
}
}
#Second Load balancer's listener
resource "aws_alb_listener" "ecs_alb_http_listener_second" {
load_balancer_arn = aws_alb.ecs_cluster_alb.arn
port = 8080
protocol = "HTTP"
default_action {
type = "forward"
target_group_arn = aws_alb_target_group.ecs_default_target_group_second.arn
}
lifecycle {
ignore_changes = [default_action]
}
}
#Second Target group
resource "aws_alb_target_group" "ecs_default_target_group_second" {
name = "${var.ecs_cluster_name}-GreenTG"
port = 8080
protocol = "HTTP"
vpc_id = data.terraform_remote_state.infrastructure.outputs.vpc_id
target_type = "ip"
health_check {
enabled = true
path = "/actuator/health"
interval = 30
healthy_threshold = 3
unhealthy_threshold = 2
}
tags = {
Name = "Blue-TG"
}
}
Fargate ECS Service
resource "aws_ecs_service" "ecs_service" {
name = var.ecs_service_name
task_definition = aws_ecs_task_definition.task_definition_for_application.arn
cluster = data.terraform_remote_state.platform.outputs.ecs_cluster_name
launch_type = "FARGATE"
network_configuration {
#since we have a load balancer and nat gateway attached we should be deploying in private subnets
#but I deployed in public subnet just to try some few things
#you can deploy services in private subnet!! And you should :)
subnets = data.terraform_remote_state.platform.outputs.ecs_public_subnets
security_groups = [aws_security_group.app_security_group.id]
assign_public_ip = true
}
load_balancer {
container_name = var.task_definition_name
container_port = var.docker_container_port
target_group_arn = data.terraform_remote_state.platform.outputs.aws_alb_target_group_arn[0] #target group with port 80 is given here
}
desired_count = 2
deployment_controller {
type = "CODE_DEPLOY"
}
lifecycle {
ignore_changes = [load_balancer, task_definition, desired_count]
}
}
#Task definition for application
resource "aws_ecs_task_definition" "task_definition_for_application" {
container_definitions = data.template_file.ecs_task_definition_template.rendered
family = var.task_definition_name
cpu = var.cpu
memory = var.memory
requires_compatibilities = ["FARGATE"]
network_mode = "awsvpc"
execution_role_arn = aws_iam_role.fargate_iam_role.arn
task_role_arn = aws_iam_role.ecs_task_execution_role.arn
}
#Role
resource "aws_iam_role" "fargate_iam_role" {
name = "fargate_iam_role"
assume_role_policy = data.aws_iam_policy_document.ecs-task-assume-role.json
}
resource "aws_iam_role_policy_attachment" "fargate_iam_role_policy" {
role = aws_iam_role.fargate_iam_role.name
policy_arn = data.aws_iam_policy.ecs-task-execution-role.arn
}
#Security Group
resource "aws_security_group" "app_security_group" {
name = "${var.ecs_service_name}-SG"
description = "Security group for springbootapp to communicate in and out"
vpc_id = data.terraform_remote_state.platform.outputs.vpc_id
ingress {
from_port = 80
protocol = "TCP"
to_port = 8080
cidr_blocks = [data.terraform_remote_state.platform.outputs.vpc_cidr_block]
}
egress {
from_port = 0
protocol = "-1"
to_port = 0
cidr_blocks = ["0.0.0.0/0"]
}
tags = {
Name = "${var.ecs_service_name}-SG"
}
}
#CloudWatch
resource "aws_cloudwatch_log_group" "application_log_group" {
name = "/ecs/sun-api"
}
Code Pipeline
#Code Pipeline
resource "aws_codepipeline" "codepipeline_for_blue_green_deployment" {
name = var.pipeline_name
role_arn = aws_iam_role.codepipeline_roles.arn
artifact_store {
location = var.bucket_for_codepipeline
type = var.artifact_store_type
}
stage {
name = "github_Source"
action {
name = "github_Source"
category = "Source"
owner = var.source_stage_owner
provider = var.source_stage_provider
version = "1"
output_artifacts = ["SourceArtifact"]
configuration = {
PollForSourceChanges = true
OAuthToken = var.github_token
Owner = var.git_hub_owner
Repo = var.repo_name
Branch = var.branch_name
}
}
action {
name = "Image"
category = "Source"
owner = "AWS"
provider = "ECR"
version = "1"
output_artifacts = ["MyImage"]
run_order = 1
configuration = {
ImageTag: "latest"
RepositoryName:"umar-tahir-terraform-repo"
}
}
}
stage {
name = "Deploy"
action {
name = "Deploy"
category = "Deploy"
owner = "AWS"
provider = "CodeDeployToECS"
version = "1"
input_artifacts = ["SourceArtifact","MyImage"]
configuration ={
ApplicationName = aws_codedeploy_app.application_deploy.name
DeploymentGroupName = aws_codedeploy_deployment_group.code_deployment_group.deployment_group_name
TaskDefinitionTemplateArtifact: "SourceArtifact",
AppSpecTemplateArtifact: "SourceArtifact",
TaskDefinitionTemplatePath: "taskdef.json",
AppSpecTemplatePath: "appspec.yaml",
Image1ArtifactName: "MyImage",
Image1ContainerName: "IMAGE1_NAME",
}
}
}
}
Code Deploy
resource "aws_codedeploy_app" "application_deploy" {
compute_platform = var.compute_platform
name = var.aws_codedeploy_app_name
}
resource "aws_codedeploy_deployment_group" "code_deployment_group" {
app_name = aws_codedeploy_app.application_deploy.name
deployment_group_name = var.deployment_group_name
deployment_config_name = var.deployment_config_name
service_role_arn = aws_iam_role.codedeploy_role_blue_green.arn
auto_rollback_configuration {
enabled = true
events = ["DEPLOYMENT_FAILURE"]
}
blue_green_deployment_config {
deployment_ready_option {
action_on_timeout = var.action_on_timeout
}
terminate_blue_instances_on_deployment_success {
action = var.terminate_blue_instances_on_deployment_success_action
}
}
ecs_service {
cluster_name = data.terraform_remote_state.aws_modules_state.outputs.ecs_cluster_name
service_name = "generalapplication"
}
deployment_style {
deployment_option = var.deployment_option
deployment_type = var.deployment_type
}
load_balancer_info {
target_group_pair_info {
prod_traffic_route {
listener_arns = [data.terraform_remote_state.aws_modules_state.outputs.listener_arns]
}
target_group {
name = data.terraform_remote_state.aws_modules_state.outputs.green_target_group_name
}
target_group {
name = data.terraform_remote_state.aws_modules_state.outputs.blue_target_group_name
}
}
}
}
appSpec.yml
version: 0.0
Resources:
- TargetService:
Type: AWS::ECS::Service
Properties:
TaskDefinition: <TASK_DEFINITION>
LoadBalancerInfo:
ContainerName: "springboottaskdefinition"
ContainerPort: 8080
PlatformVersion: "LATEST"
task def
{
"taskRoleArn": "arn-xxxx",
"executionRoleArn": "arn-xxxx",
"containerDefinitions": [
{
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "/ecs/sun-api",
"awslogs-region": "us-east-1",
"awslogs-stream-prefix": "springboottaskdefinition-LogGroup-stream"
}
},
"portMappings": [
{
"hostPort": 8080,
"protocol": "tcp",
"containerPort": 8080
}
],
"image": "<IMAGE1_NAME>",
"essential": true,
"name": "springboottaskdefinition"
}
],
"memory": "1024",
"family": "springboottaskdefinition",
"requiresCompatibilities": [
"FARGATE"
],
"networkMode": "awsvpc",
"cpu": "512"
}
I have been trying to spin up ECS using terraform. About two days ago it was working as expected, however today I tried to run terraform apply and I keep getting an error saying
"The requested configuration is currently not supported. Launching EC2 instance failed"
I have researched a lot about this issue, I tried hardcoding the VPC tenancy to default, I've tried changing the region, the instance type and nothing seems to fix the issue.
The is my terraform config:
provider "aws" {
region = var.region
}
data "aws_availability_zones" "available" {}
# Define a vpc
resource "aws_vpc" "motivy_vpc" {
cidr_block = var.motivy_network_cidr
tags = {
Name = var.motivy_vpc
}
enable_dns_support = "true"
instance_tenancy = "default"
enable_dns_hostnames = "true"
}
# Internet gateway for the public subnet
resource "aws_internet_gateway" "motivy_ig" {
vpc_id = aws_vpc.motivy_vpc.id
tags = {
Name = "motivy_ig"
}
}
# Public subnet 1
resource "aws_subnet" "motivy_public_sn_01" {
vpc_id = aws_vpc.motivy_vpc.id
cidr_block = var.motivy_public_01_cidr
availability_zone = data.aws_availability_zones.available.names[0]
tags = {
Name = "motivy_public_sn_01"
}
}
# Public subnet 2
resource "aws_subnet" "motivy_public_sn_02" {
vpc_id = aws_vpc.motivy_vpc.id
cidr_block = var.motivy_public_02_cidr
availability_zone = data.aws_availability_zones.available.names[1]
tags = {
Name = "motivy_public_sn_02"
}
}
# Routing table for public subnet 1
resource "aws_route_table" "motivy_public_sn_rt_01" {
vpc_id = aws_vpc.motivy_vpc.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.motivy_ig.id
}
tags = {
Name = "motivy_public_sn_rt_01"
}
}
# Routing table for public subnet 2
resource "aws_route_table" "motivy_public_sn_rt_02" {
vpc_id = aws_vpc.motivy_vpc.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.motivy_ig.id
}
tags = {
Name = "motivy_public_sn_rt_02"
}
}
# Associate the routing table to public subnet 1
resource "aws_route_table_association" "motivy_public_sn_rt_01_assn" {
subnet_id = aws_subnet.motivy_public_sn_01.id
route_table_id = aws_route_table.motivy_public_sn_rt_01.id
}
# Associate the routing table to public subnet 2
resource "aws_route_table_association" "motivy_public_sn_rt_02_assn" {
subnet_id = aws_subnet.motivy_public_sn_02.id
route_table_id = aws_route_table.motivy_public_sn_rt_02.id
}
# ECS Instance Security group
resource "aws_security_group" "motivy_public_sg" {
name = "motivys_public_sg"
description = "Test public access security group"
vpc_id = aws_vpc.motivy_vpc.id
ingress {
from_port = 22
to_port = 22
protocol = "tcp"
cidr_blocks = [
"0.0.0.0/0"]
}
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = [
"0.0.0.0/0"]
}
ingress {
from_port = 5000
to_port = 5000
protocol = "tcp"
cidr_blocks = [
"0.0.0.0/0"]
}
ingress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = [
var.motivy_public_01_cidr,
var.motivy_public_02_cidr
]
}
egress {
# allow all traffic to private SN
from_port = "0"
to_port = "0"
protocol = "-1"
cidr_blocks = [
"0.0.0.0/0"]
}
tags = {
Name = "motivy_public_sg"
}
}
data "aws_ecs_task_definition" "motivy_server" {
task_definition = aws_ecs_task_definition.motivy_server.family
}
resource "aws_ecs_task_definition" "motivy_server" {
family = "motivy_server"
container_definitions = file("task-definitions/service.json")
}
data "aws_ami" "latest_ecs" {
most_recent = true # get the latest version
filter {
name = "name"
values = [
"amzn2-ami-ecs-*"] # ECS optimized image
}
owners = [
"amazon" # Only official images
]
}
resource "aws_launch_configuration" "ecs-launch-configuration" {
name = "ecs-launch-configuration"
image_id = data.aws_ami.latest_ecs.id
instance_type = "t2.micro"
iam_instance_profile = aws_iam_instance_profile.ecs-instance-profile.id
root_block_device {
volume_type = "standard"
volume_size = 100
delete_on_termination = true
}
enable_monitoring = true
lifecycle {
create_before_destroy = true
}
security_groups = [aws_security_group.motivy_public_sg.id]
associate_public_ip_address = "true"
key_name = var.ecs_key_pair_name
user_data = <<EOF
#!/bin/bash
echo ECS_CLUSTER=${var.ecs_cluster} >> /etc/ecs/ecs.config
EOF
}
resource "aws_appautoscaling_target" "ecs_motivy_server_target" {
max_capacity = 2
min_capacity = 1
resource_id = "service/${aws_ecs_cluster.motivy_ecs_cluster.name}/${aws_ecs_service.motivy_server_service.name}"
scalable_dimension = "ecs:service:DesiredCount"
service_namespace = "ecs"
depends_on = [ aws_ecs_service.motivy_server_service ]
}
resource "aws_iam_role" "ecs-instance-role" {
name = "ecs-instance-role"
path = "/"
assume_role_policy = data.aws_iam_policy_document.ecs-instance-policy.json
}
data "aws_iam_policy_document" "ecs-instance-policy" {
statement {
actions = ["sts:AssumeRole"]
principals {
type = "Service"
identifiers = ["ec2.amazonaws.com"]
}
}
}
resource "aws_iam_role_policy_attachment" "ecs-instance-role-attachment" {
role = aws_iam_role.ecs-instance-role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role"
}
resource "aws_iam_instance_profile" "ecs-instance-profile" {
name = "ecs-instance-profile"
path = "/"
role = aws_iam_role.ecs-instance-role.id
provisioner "local-exec" {
command = "sleep 10"
}
}
resource "aws_autoscaling_group" "motivy-server-autoscaling-group" {
name = "motivy-server-autoscaling-group"
termination_policies = [
"OldestInstance" # When a “scale down” event occurs, which instances to kill first?
]
default_cooldown = 30
health_check_grace_period = 30
max_size = var.max_instance_size
min_size = var.min_instance_size
desired_capacity = var.desired_capacity
# Use this launch configuration to define “how” the EC2 instances are to be launched
launch_configuration = aws_launch_configuration.ecs-launch-configuration.name
lifecycle {
create_before_destroy = true
}
# Refer to vpc.tf for more information
# You could use the private subnets here instead,
# if you want the EC2 instances to be hidden from the internet
vpc_zone_identifier = [aws_subnet.motivy_public_sn_01.id, aws_subnet.motivy_public_sn_02.id]
tags = [{
key = "Name",
value = var.ecs_cluster,
# Make sure EC2 instances are tagged with this tag as well
propagate_at_launch = true
}]
}
resource "aws_alb" "motivy_server_alb_load_balancer" {
name = "motivy-alb-load-balancer"
security_groups = [aws_security_group.motivy_public_sg.id]
subnets = [aws_subnet.motivy_public_sn_01.id, aws_subnet.motivy_public_sn_02.id]
tags = {
Name = "motivy_server_alb_load_balancer"
}
}
resource "aws_alb_target_group" "motivy_server_target_group" {
name = "motivy-server-target-group"
port = 5000
protocol = "HTTP"
vpc_id = aws_vpc.motivy_vpc.id
deregistration_delay = "10"
health_check {
healthy_threshold = "2"
unhealthy_threshold = "6"
interval = "30"
matcher = "200,301,302"
path = "/"
protocol = "HTTP"
timeout = "5"
}
stickiness {
type = "lb_cookie"
}
tags = {
Name = "motivy-server-target-group"
}
}
resource "aws_alb_listener" "alb-listener" {
load_balancer_arn = aws_alb.motivy_server_alb_load_balancer.arn
port = "80"
protocol = "HTTP"
default_action {
target_group_arn = aws_alb_target_group.motivy_server_target_group.arn
type = "forward"
}
}
resource "aws_autoscaling_attachment" "asg_attachment_motivy_server" {
autoscaling_group_name = aws_autoscaling_group.motivy-server-autoscaling-group.id
alb_target_group_arn = aws_alb_target_group.motivy_server_target_group.arn
}
This is the exact error I get
Error: "motivy-server-autoscaling-group": Waiting up to 10m0s: Need at least 2 healthy instances in ASG, have 0. Most recent activity: {
ActivityId: "a775c531-9496-fdf9-5157-ab2448626293",
AutoScalingGroupName: "motivy-server-autoscaling-group",
Cause: "At 2020-04-05T22:10:28Z an instance was started in response to a difference between desired and actual capacity, increasing the capacity from 0 to 2.",
Description: "Launching a new EC2 instance. Status Reason: The requested configuration is currently not supported. Please check the documentation for supported configurations. Launching EC2 instance failed.",
Details: "{\"Subnet ID\":\"subnet-05de5fc0e994d05fe\",\"Availability Zone\":\"us-east-1a\"}",
EndTime: 2020-04-05 22:10:29 +0000 UTC,
Progress: 100,
StartTime: 2020-04-05 22:10:29.439 +0000 UTC,
StatusCode: "Failed",
StatusMessage: "The requested configuration is currently not supported. Please check the documentation for supported configurations. Launching EC2 instance failed."
}
I'm not sure why it worked two days ago.
But recent Amazon ECS-optimized AMIs' volume_type is gp2.
You should choose gp2 as root_block_device.volume_type.
resource "aws_launch_configuration" "ecs-launch-configuration" {
# ...
root_block_device {
volume_type = "gp2"
volume_size = 100
delete_on_termination = true
}
# ...
}
data "aws_ami" "latest_ecs" {
most_recent = true # get the latest version
filter {
name = "name"
values = ["amzn2-ami-ecs-hvm-*-x86_64-ebs"] # ECS optimized image
}
owners = [
"amazon" # Only official images
]
}
For me worked using t3 gen instead of t2