AWS ECS Fargate run task: Essential container in task exited - amazon-web-services

Goal:
Create an interactive shell within an ECS Fargate container
Problem:
After running a task within the ECS service, the task status immediately goes to STOPPED after Pending and gives the following stopped reason: Essential container in task exited. Since the task is stopped, creating an interactive shell with the aws ecs execute-command is not feasible.
Background:
Using a custom ECR image for the target container
Cloudwatch logs show that the ECR image associated entrypoint.sh was successful
Dockerfile:
FROM python:3.9-alpine AS build
ARG TERRAFORM_VERSION=1.0.2
ARG TERRAGRUNT_VERSION=0.31.0
ARG TFLINT_VERSION=0.23.0
ARG TFSEC_VERSION=0.36.11
ARG TFDOCS_VERSION=0.10.1
ARG GIT_CHGLOG_VERSION=0.14.2
ARG SEMTAG_VERSION=0.1.1
ARG GH_VERSION=2.2.0
ARG TFENV_VERSION=2.2.2
ENV VIRTUAL_ENV=/opt/venv
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
WORKDIR /src/
COPY install.sh ./install.sh
COPY requirements.txt ./requirements.txt
RUN chmod u+x ./install.sh \
&& sh ./install.sh
FROM python:3.9-alpine
ENV VIRTUAL_ENV=/opt/venv
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
ENV PATH="/usr/local/.tfenv/bin:$PATH"
WORKDIR /src/
COPY --from=build /usr/local /usr/local
COPY --from=build $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$VIRTUAL_ENV/lib/python3.9/site-packages:$PATH"
RUN apk update \
&& apk add --virtual .runtime \
bash \
git \
curl \
jq \
# needed for bats --pretty formatter
ncurses \
openssl \
grep \
# needed for pcregrep
pcre-tools \
coreutils \
postgresql-client \
libgcc \
libstdc++ \
ncurses-libs \
docker \
&& ln -sf python3 /usr/local/bin/python \
&& git config --global advice.detachedHead false \
&& git config --global user.email testing_user#users.noreply.github.com \
&& git config --global user.name testing_user
COPY entrypoint.sh ./entrypoint.sh
ENTRYPOINT ["bash", "entrypoint.sh"]
CMD ["/bin/bash"]
entrypoint.sh:
if [ -n "$ADDITIONAL_PATH" ]; then
echo "Adding to PATH: $ADDITIONAL_PATH"
export PATH="$ADDITIONAL_PATH:$PATH"
fi
source $VIRTUAL_ENV/bin/activate
pip install -e /src
echo "done"
Terraform configurations for ECS: (Using this AWS blog post as a reference)
data "aws_caller_identity" "current" {}
data "aws_region" "current" {}
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
name = local.mut_id
cidr = "10.0.0.0/16"
azs = ["us-west-2a", "us-west-2b", "us-west-2c", "us-west-2d"]
enable_dns_hostnames = true
public_subnets = local.public_subnets
create_database_subnet_group = true
database_dedicated_network_acl = true
database_inbound_acl_rules = [
{
rule_number = 1
rule_action = "allow"
from_port = 5432
to_port = 5432
protocol = "tcp"
cidr_block = local.private_subnets[0]
}
]
database_subnet_group_name = "metadb"
database_subnets = local.database_subnets
private_subnets = local.private_subnets
private_dedicated_network_acl = true
private_outbound_acl_rules = [
{
rule_number = 1
rule_action = "allow"
from_port = 5432
to_port = 5432
protocol = "tcp"
cidr_block = local.database_subnets[0]
}
]
enable_nat_gateway = true
single_nat_gateway = true
one_nat_gateway_per_az = false
}
module "ecr_testing_img" {
source = "github.com/marshall7m/terraform-aws-ecr/modules//ecr-docker-img"
create_repo = true
source_path = "${path.module}/../.."
repo_name = "${local.mut_id}-integration-testing"
tag = "latest"
trigger_build_paths = [
"${path.module}/../../Dockerfile",
"${path.module}/../../entrypoint.sh",
"${path.module}/../../install.sh"
]
}
module "testing_kms" {
source = "github.com/marshall7m/terraform-aws-kms/modules//cmk"
trusted_admin_arns = [data.aws_caller_identity.current.arn]
trusted_service_usage_principals = ["ecs-tasks.amazonaws.com"]
}
module "testing_ecs_task_role" {
source = "github.com/marshall7m/terraform-aws-iam/modules//iam-role"
role_name = "${local.mut_id}-task"
trusted_services = ["ecs-tasks.amazonaws.com"]
statements = [
{
effect = "Allow"
actions = ["kms:Decrypt"]
resources = [module.testing_kms.arn]
},
{
effect = "Allow"
actions = [
"ssmmessages:CreateControlChannel",
"ssmmessages:CreateDataChannel",
"ssmmessages:OpenControlChannel",
"ssmmessages:OpenDataChannel"
]
resources = ["*"]
}
]
}
module "testing_ecs_execution_role" {
source = "github.com/marshall7m/terraform-aws-iam/modules//iam-role"
role_name = "${local.mut_id}-exec"
trusted_services = ["ecs-tasks.amazonaws.com"]
custom_role_policy_arns = ["arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"]
}
resource "aws_ecs_cluster" "testing" {
name = "${local.mut_id}-integration-testing"
configuration {
execute_command_configuration {
kms_key_id = module.testing_kms.arn
logging = "DEFAULT"
}
}
}
resource "aws_ecs_service" "testing" {
name = "${local.mut_id}-integration-testing"
task_definition = aws_ecs_task_definition.testing.arn
cluster = aws_ecs_cluster.testing.id
desired_count = 0
enable_execute_command = true
launch_type = "FARGATE"
platform_version = "1.4.0"
network_configuration {
subnets = [module.vpc.public_subnets[0]]
security_groups = [aws_security_group.testing.id]
assign_public_ip = true
}
wait_for_steady_state = true
}
resource "aws_cloudwatch_log_group" "testing" {
name = "${local.mut_id}-ecs"
}
resource "aws_ecs_task_definition" "testing" {
family = "integration-testing"
requires_compatibilities = ["FARGATE"]
task_role_arn = module.testing_ecs_task_role.role_arn
execution_role_arn = module.testing_ecs_execution_role.role_arn
network_mode = "awsvpc"
cpu = 256
memory = 512
container_definitions = jsonencode([{
name = "testing"
image = module.ecr_testing_img.full_image_url
linuxParameters = {
initProcessEnabled = true
}
logConfiguration = {
logDriver = "awslogs",
options = {
awslogs-group = aws_cloudwatch_log_group.testing.name
awslogs-region = data.aws_region.current.name
awslogs-stream-prefix = "testing"
}
}
cpu = 256
memory = 512
}])
runtime_platform {
operating_system_family = "LINUX"
cpu_architecture = "X86_64"
}
}
resource "aws_security_group" "testing" {
name = "${local.mut_id}-integration-testing-ecs"
description = "Allows internet access request from testing container"
vpc_id = module.vpc.vpc_id
egress {
description = "Allows outbound HTTP access for installing packages within container"
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
description = "Allows outbound HTTPS access for installing packages within container"
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
}
Snippet of Bash script that runs the ECS task and execute command within container:
task_id=$(aws ecs run-task \
--cluster "$cluster_arn" \
--task-definition "$task_arn" \
--launch-type FARGATE \
--platform-version '1.4.0' \
--enable-execute-command \
--network-configuration awsvpcConfiguration="{subnets=[$subnet_id],securityGroups=[$sg_id],assignPublicIp=ENABLED}" \
--region $AWS_REGION | jq -r '.tasks[0].taskArn | split("/") | .[-1]')
echo "Task ID: $task_id"
if [ "$run_ecs_exec_check" == true ]; then
bash <( curl -Ls https://raw.githubusercontent.com/aws-containers/amazon-ecs-exec-checker/main/check-ecs-exec.sh ) "$cluster_arn" "$task_id"
fi
sleep_time=10
status=""
echo ""
echo "Waiting for task to be running"
while [ "$status" != "RUNNING" ]; do
echo "Checking status in $sleep_time seconds..."
sleep $sleep_time
status=$(aws ecs describe-tasks \
--cluster "$cluster_arn" \
--region $AWS_REGION \
--tasks "$task_id" | jq -r '.tasks[0].containers[0].managedAgents[] | select(.name == "ExecuteCommandAgent") | .lastStatus')
echo "Status: $status"
if [ "$status" == "STOPPED" ]; then
aws ecs describe-tasks \
--cluster "$cluster_arn" \
--region $AWS_REGION \
--tasks "$task_id"
exit 1
fi
# sleep_time=$(( $sleep_time * 2 ))
done
echo "Running interactive shell within container"
aws ecs execute-command \
--region $AWS_REGION \
--cluster "$cluster_arn" \
--task "$task_id" \
--command "/bin/bash" \
--interactive

As soon as the last command in your entrypoint.sh finishes, the docker container is going to exit. Just like if you ran the docker container locally. I suggest working on getting a docker container to run locally without exiting first, and then deploying that to ECS.
A command like tail -f /dev/null will work if you just want the container to sit there doing nothing.

Related

How can run cmd in aws_ecs_task_definition terraform aws

I need to run docker cmd in aws_ecs_task_definition I can directly run that in my local machine docker but unable to run that on task_defination
docker run -it --rm \
--name n8n \
-p 5678:5678 \
-e DB_TYPE=postgresdb \
-e DB_POSTGRESDB_DATABASE=<POSTGRES_DATABASE> \
-e DB_POSTGRESDB_HOST=<POSTGRES_HOST> \
-e DB_POSTGRESDB_PORT=<POSTGRES_PORT> \
-e DB_POSTGRESDB_USER=<POSTGRES_USER> \
-e DB_POSTGRESDB_SCHEMA=<POSTGRES_SCHEMA> \
-e DB_POSTGRESDB_PASSWORD=<POSTGRES_PASSWORD> \
-v ~/.n8n:/home/node/.n8n \
n8nio/n8n \
n8n start
thats the cmd I need to run but can working fine locally but unable to from aws_ecs_task_definition
I tried to run that cmd from
command inside container_definitions but unable to run that
edited
resource "aws_ecs_task_definition" "task-definition" {
family = "${var.PROJECT_NAME}-task-definition"
container_definitions = jsonencode([
{
name = "${var.PROJECT_NAME}-task-container"
image = "${var.IMAGE_PATH}"
cpu = 10
memory = 512
essential = true
environment = [
{name: "DB_TYPE", value: "postgresdb"},
{name: "DB_POSTGRESDB_DATABASE", value: "${var.DB_NAME}"},
{name: "DB_POSTGRESDB_HOST", value: "${var.DB_NAME}"},
{name: "DB_POSTGRESDB_DATABASE", value: "${aws_db_instance.rds.address}"},
{name: "DB_POSTGRESDB_PORT", value: "5432"},
{name: "DB_POSTGRESDB_USER", value: "${var.DB_USERNAME}"},
{name: "DB_POSTGRESDB_PASSWORD", value: "${var.DB_PASSWORD}"},
]
command = [
"docker", "run",
"-it", "--rm",
"--name", "${var.IMAGE_PATH}",
"-v", "~/.n8n:/home/node/.n8n",
"n8nio/n8n",
"n8n", "start",
"n8n", "restart"
]
portMappings = [
{
containerPort = 5678
hostPort = 5678
}
]
}
])
depends_on = [
aws_db_instance.rds
]
}
resource "aws_ecs_service" "service" {
name = "${var.PROJECT_NAME}-ecs-service"
cluster = aws_ecs_cluster.ecs-cluster.id
task_definition = aws_ecs_task_definition.task-definition.arn
desired_count = 1
iam_role = aws_iam_role.ecs-service-role.arn
depends_on = [aws_iam_policy_attachment.ecs-service-attach]
load_balancer {
elb_name = aws_elb.elb.name
container_name = "${var.PROJECT_NAME}-task-container"
container_port = 5678
}
}
The command in an ECS task definition doesn't take a docker command. It is the command that should be run inside the docker container that ECS is starting. ECS is a docker orchestration service. ECS runs the docker commands for you behind the scenes, you never give ECS a direct docker command to run.
Looking at the docker command you are running locally, the command part that is being executed inside the container is n8n start. So your command should be:
command = [
"n8n", "start"
]
All those other docker command arguments, like the container name, volume mapping, environment variables, image ID, are all arguments that you have would elsewhere in the ECS task definition. It appears you have already specified all those arguments in your Task definition elsewhere, except for the volume mapping.

do enviroment variables inside task definition take priority over container enviroment variables?

i have this docker file
FROM node:14-slim AS ui-build
WORKDIR /usr/src
COPY ui/ ./ui/
RUN cd ui && npm install && npm run build
FROM node:14-slim AS api-build
WORKDIR /usr/src
COPY api/ ./api/
ENV ENVIRONMENT test
ENV URI test
RUN cd api && npm install && npm run build
RUN ls
FROM node:14-slim
WORKDIR /root/
COPY --from=ui-build /usr/src/ui/build ./ui/build
COPY --from=api-build /usr/src/api/dist .
RUN ls
EXPOSE 80
CMD ["node", "api.bundle.js"]
and this task definition in terraform
resource "aws_ecs_task_definition" "main" {
family = var.task_name
network_mode = var.net_mode
requires_compatibilities = [var.ecs_type]
cpu = var.container_cpu
memory = var.container_memory
execution_role_arn = aws_iam_role.ecs_task_execution_role.arn
container_definitions = jsonencode([{
name = var.container_name
image = var.container_image
essential = var.essential
environment = [{"name": "ENVIRONMENT", "value": "${var.environment}"}, {"name": "URI", "value": "${var.uridb}"}] //this envs will be pass to the container to select deploy enviroment
portMappings = [{
protocol = var.protocol
containerPort = tonumber(var.container_port)
hostPort = tonumber(var.container_host_port)
}]
logConfiguration = {
logDriver = var.log_driver
options = {
awslogs-group = aws_cloudwatch_log_group.main_lgr.name
awslogs-stream-prefix = "ecs"
//awslogs-create-group = "true" // creates new log group with awslogs-grou
awslogs-region = var.region
}
}
}])
tags = {
Environment = var.environment
}
depends_on = [aws_iam_role.ecs_task_execution_role]
}
taking a look inside my container it would seen that the envs in my docker file have presedence over the ones in the task definition
container log
task defnition on aws
how can i make my task definition ENVS have priority over the ones in the container once i run my service?

httpserver in EC2 instance via terraform

terraform {
required_providers {
aws = {
version = "~>3.27"
source = "hashicorp/aws"
}
}
}
provider "aws" {
profile = "default"
region = "us-west-2"
}
variable "tag_name" {
type = string
}
resource "aws_instance" "app_server" {
ami = "ami-830c94e3"
instance_type = "t2.micro"
vpc_security_group_ids = [aws_security_group.allow_port_8080.id]
user_data = <<-EOF
#!/bin/bash
# Use this for your user data (script from top to bottom)
# install httpd (Linux 2 version)
yum update -y
yum install -y httpd
systemctl start httpd
systemctl enable httpd
echo "<h1>Hello World from $(hostname -f)</h1>" > /var/www/html/index.html
EOF
tags = {
Name = var.tag_name
}
}
resource "aws_security_group" "allow_port_8080" {
name = "allow_port_8080"
ingress {
from_port = 8080
to_port = 8080
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
}
this is the terraform file created. I want to set up http server in my EC2 instance then to access it via ipv4 public IP.
but http://publicip:8080, giving error as
This site can’t be reached
I tried modifying as below
user_data = <<-EOF
#!/bin/bash
echo "<h1>Hello World</h1>" > index.html
nohup busybox httpd -f -p 8080
EOF
I am following
https://www.youtube.com/watch?v=0i-Q6ZMDtlQ&list=PLqq-6Pq4lTTYwjFB9E9aLUJhTBLsCF0p_&index=32
thank you
Your aws_security_group does not allow for any outgoing traffic, thus you can't install httpd on it. You have to explicitly allow outgoing traffic:
resource "aws_security_group" "allow_port_8080" {
name = "allow_port_8080"
ingress {
from_port = 8080
to_port = 8080
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
ipv6_cidr_blocks = ["::/0"]
}
}

terraform V12: Error import KeyPair: MissingParameter: The request must contain the parameter PublicKeyMaterial

getting error "import KeyPair: MissingParameter: The request must contain the parameter PublicKeyMaterial " when I run "terraform apply". what does this error mean.
resource "aws_instance" "ec2_test_instance" {
ami = var.instance_test_ami
instance_type = var.instance_type
subnet_id = var.aws_subnet_id
key_name = aws_key_pair.deployer.key_name
tags = {
Name = var.environment_tag
}
provisioner "local-exec" {
command = "echo ${self.public_ip} > public-ip.txt"
}
provisioner "remote-exec" {
connection {
type = "ssh"
host = self.public_ip
user = "centos"
private_key = file("${path.module}/my-key")
}
inline = [
"sudo yum -y install wget, unzip",
"sudo yum -y install java-1.8.0-openjdk"
]
}
}
Assuming that everything else is correct, connection block should be inside provisioner, not outside of it:
resource "aws_instance" "ec2_test_instance" {
ami = var.instance_test_ami
instance_type = var.instance_type
subnet_id = var.aws_subnet_id
key_name = aws_key_pair.deployer.key_name
provisioner "remote-exec" {
connection {
type = "ssh"
host = self.public_ip
user = "centos"
private_key = file("${path.module}/my-key")
}
inline = [
"sudo yum -y install wget, unzip",
"sudo yum -y install java-1.8.0-openjdk",
]
}
}

How to create AWS AMI from created instance using terraform?

I am setting up an aws instance with wordpress installation and want to create an AMI using created instance. Below I attach my code.
provider "aws" {
region = "${var.region}"
access_key = "${var.access_key}"
secret_key = "${var.secret_key}"
}
resource "aws_instance" "test-wordpress" {
ami = "${var.image_id}"
instance_type = "${var.instance_type}"
key_name = "test-web"
#associate_public_ip_address = yes
user_data = <<-EOF
#!/bin/bash
sudo yum update -y
sudo amazon-linux-extras install -y lamp-mariadb10.2-php7.2 php7.2
sudo yum install -y httpd mariadb-server
cd /var/www/html
sudo echo "healthy" > healthy.html
sudo wget https://wordpress.org/latest.tar.gz
sudo tar -xzf latest.tar.gz
sudo cp -r wordpress/* /var/www/html/
sudo rm -rf wordpress
sudo rm -rf latest.tar.gz
sudo chmod -R 755 wp-content
sudo chown -R apache:apache wp-content
sudo service httpd start
sudo chkconfig httpd on
EOF
tags = {
Name = "test-Wordpress-Server"
}
}
resource "aws_ami_from_instance" "test-wordpress-ami" {
name = "test-wordpress-ami"
source_instance_id = "${aws_instance.test-wordpress.id}"
depends_on = [
aws_instance.test-wordpress,
]
tags = {
Name = "test-wordpress-ami"
}
}
AMI will be created but When I use that AMI to create an another instance wordpress installation not in there. How can I solve this issue?
The best way to create AMI images i think is using Packer, also from Hashicorp like terraform.
What is Packer?
Provision Infrastructure with Packer Packer is HashiCorp's open-source tool for creating machine images from source
configuration. You can configure Packer images with an operating
system and software for your specific use-case.
Packer creates an instance with temporary keypair, security_group and IAM roles. In the provisioner "shell" are custom inline commands possible. Afterwards you can use this ami with your terraform code.
A sample script could look like this:
packer {
required_plugins {
amazon = {
version = ">= 0.0.2"
source = "github.com/hashicorp/amazon"
}
}
}
source "amazon-ebs" "linux" {
# AMI Settings
ami_name = "ami-oracle-python3"
instance_type = "t2.micro"
source_ami = "ami-xxxxxxxx"
ssh_username = "ec2-user"
associate_public_ip_address = false
ami_virtualization_type = "hvm"
subnet_id = "subnet-xxxxxx"
launch_block_device_mappings {
device_name = "/dev/xvda"
volume_size = 8
volume_type = "gp2"
delete_on_termination = true
encrypted = false
}
# Profile Settings
profile = "xxxxxx"
region = "eu-central-1"
}
build {
sources = [
"source.amazon-ebs.linux"
]
provisioner "shell" {
inline = [
"export no_proxy=localhost"
]
}
}
You can find documentation here.
So you can search for AMI by your tag as described in documentation
In your case:
data "aws_ami" "example" {
executable_users = ["self"]
most_recent = true
owners = ["self"]
filter {
name = "tag:Name"
values = ["test-wordpress-ami"]
}
}
and then refer ID as ${data.aws_ami.example.image_id}