EFS volume hangs when accessed from an ECS task - amazon-web-services

I was previously having issues providing access to EFS from an ECS task (Providing access to EFS from ECS task)
This has now been resolved, inasmuch as the task starts, and it all looks fine.
The problem is that running df, or ls or touch on the mountpoint hangs indefinitely.
The task definition is below:
{
"taskDefinitionArn": "arn:aws:ecs:eu-west-2:000000000000:task-definition/backend-app-task:53",
"containerDefinitions": [
{
"name": "server",
"image": "000000000000.dkr.ecr.eu-west-2.amazonaws.com/foo-backend:latest-server",
"cpu": 512,
"memory": 1024,
"portMappings": [
{
"containerPort": 8000,
"hostPort": 8000,
"protocol": "tcp"
}
],
"essential": true,
"environment": [
],
"mountPoints": [
{
"sourceVolume": "persistent",
"containerPath": "/opt/data/",
"readOnly": false
}
],
"volumesFrom": [],
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "/ecs/foo",
"awslogs-region": "eu-west-2",
"awslogs-stream-prefix": "ecs"
}
}
}
],
"family": "backend-app-task",
"taskRoleArn": "arn:aws:iam::000000000000:role/ecsTaskRole",
"executionRoleArn": "arn:aws:iam::000000000000:role/myEcsTaskExecutionRole",
"networkMode": "awsvpc",
"revision": 53,
"volumes": [
{
"name": "persistent",
"efsVolumeConfiguration": {
"fileSystemId": "fs-00000000000000000",
"rootDirectory": "/",
"transitEncryption": "ENABLED",
"transitEncryptionPort": 2049,
"authorizationConfig": {
"accessPointId": "fsap-00000000000000000",
"iam": "ENABLED"
}
}
}
],
"status": "ACTIVE",
"requiresAttributes": [
{
"name": "com.amazonaws.ecs.capability.logging-driver.awslogs"
},
{
"name": "ecs.capability.execution-role-awslogs"
},
{
"name": "ecs.capability.efsAuth"
},
{
"name": "com.amazonaws.ecs.capability.ecr-auth"
},
{
"name": "com.amazonaws.ecs.capability.docker-remote-api.1.19"
},
{
"name": "ecs.capability.efs"
},
{
"name": "com.amazonaws.ecs.capability.task-iam-role"
},
{
"name": "com.amazonaws.ecs.capability.docker-remote-api.1.25"
},
{
"name": "ecs.capability.execution-role-ecr-pull"
},
{
"name": "com.amazonaws.ecs.capability.docker-remote-api.1.18"
},
{
"name": "ecs.capability.task-eni"
}
],
"placementConstraints": [],
"compatibilities": [
"EC2",
"FARGATE"
],
"requiresCompatibilities": [
"FARGATE"
],
"cpu": "512",
"memory": "1024",
"registeredAt": "2022-03-08T14:23:47.391Z",
"registeredBy": "arn:aws:iam::000000000000:root",
"tags": []
}
According to the docs, hanging can occur when large amounts of data are being written to the EFS volume. This is not the case here, the EFS volume is new, and empty, with a size of 6KiB. I also tried configuring it with provisioned throughput, but that did not make any difference.
EDIT
IAM role definition:
data "aws_iam_policy_document" "ecs_task_execution_role_base" {
version = "2012-10-17"
statement {
sid = ""
effect = "Allow"
actions = ["sts:AssumeRole"]
principals {
type = "Service"
identifiers = ["ecs-tasks.amazonaws.com"]
}
}
}
# ECS task execution role
resource "aws_iam_role" "ecs_task_execution_role" {
name = var.ecs_task_execution_role_name
assume_role_policy = data.aws_iam_policy_document.ecs_task_execution_role_base.json
}
# ECS task execution role policy attachment
resource "aws_iam_role_policy_attachment" "ecs_task_execution_role" {
role = aws_iam_role.ecs_task_execution_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}
resource "aws_iam_role_policy_attachment" "ecs_task_execution_role2" {
role = aws_iam_role.ecs_task_execution_role.name
policy_arn = "arn:aws:iam::aws:policy/AmazonElasticFileSystemClientFullAccess"
}
resource "aws_iam_policy" "ecs_exec_policy" {
name = "ecs_exec_policy"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = ["ssmmessages:CreateControlChannel",
"ssmmessages:CreateDataChannel",
"ssmmessages:OpenControlChannel",
"ssmmessages:OpenDataChannel",
]
Effect = "Allow"
Resource = "*"
},
]
})
}
resource "aws_iam_role" "ecs_task_role" {
name = "ecsTaskRole"
assume_role_policy = data.aws_iam_policy_document.ecs_task_execution_role_base.json
managed_policy_arns = ["arn:aws:iam::aws:policy/AmazonElasticFileSystemClientFullAccess","arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy", aws_iam_policy.ecs_exec_policy.arn]
}

Related

AWS CodeDeploy: STRING_VALUE can not be converted to an Integer

Using AWS CodePipeline and setting a Source, Build and passing taskdef.json and appspec.yaml as artifacts, the deployment action Amazon ECS (Blue/Green) will fail with the error:
STRING_VALUE can not be converted to an Integer
This error does not specify where this error happens and therefore it is not possible to fix.
For reference, the files look like this:
# appspec.yaml
version: 0.0
Resources:
- TargetService:
Type: AWS::ECS::Service
Properties:
TaskDefinition: <TASK_DEFINITION>
LoadBalancerInfo:
ContainerName: "my-project"
ContainerPort: 3000
// taskdef.json
{
"family": "my-project-web",
"taskRoleArn": "arn:aws:iam::1234567890:role/ecsTaskRole-role",
"executionRoleArn": "arn:aws:iam::1234567890:role/ecsTaskExecutionRole-web",
"networkMode": "awsvpc",
"cpu": "256",
"memory": "512",
"containerDefinitions":
[
{
"name": "my-project",
"memory": "512",
"image": "01234567890.dkr.ecr.us-east-1.amazonaws.com/my-project:a09b7d81",
"environment": [],
"secrets":
[
{
"name": "APP_ENV",
"valueFrom": "arn:aws:secretsmanager:us-east-1:1234567890:secret:web/my-project-NBcsLj:APP_ENV::"
},
{
"name": "PORT",
"valueFrom": "arn:aws:secretsmanager:us-east-1:1234567890:secret:web/my-project-NBcsLj:PORT::"
},
{
"name": "APP_NAME",
"valueFrom": "arn:aws:secretsmanager:us-east-1:1234567890:secret:web/my-project-NBcsLj:APP_NAME::"
},
{
"name": "LOG_CHANNEL",
"valueFrom": "arn:aws:secretsmanager:us-east-1:1234567890:secret:web/my-project-NBcsLj:LOG_CHANNEL::"
},
{
"name": "APP_KEY",
"valueFrom": "arn:aws:secretsmanager:us-east-1:1234567890:secret:web/my-project-NBcsLj:APP_KEY::"
},
{
"name": "APP_DEBUG",
"valueFrom": "arn:aws:secretsmanager:us-east-1:1234567890:secret:web/my-project-NBcsLj:APP_DEBUG::"
}
],
"essential": true,
"logConfiguration":
{
"logDriver": "awslogs",
"options":
{
"awslogs-group": "",
"awslogs-region": "",
"awslogs-stream-prefix": ""
}
},
"portMappings":
[
{
"hostPort": 3000,
"protocol": "tcp",
"containerPort": 3000
}
],
"entryPoint": [ "web" ],
"command": []
}
],
"requiresCompatibilities": [ "FARGATE", "EC2" ],
"tags":
[
{
"key": "project",
"value": "my-project"
}
]
}
Any insights on this issue are highly appreciated!
Please refer to the following guide that outlines the supported data type for each parameter: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definition_parameters.html. It appears that you've provided a string where an integer is expected.
If I was to guess, looking at the above, the value for memory under containerDefinitions should be an integer not a string: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definition_parameters.html#container_definition_memory

How to define ephemeralStorage using terraform in a aws_batch_job_definition?

I'm trying to define the ephemeralStorage in my aws_batch_job_definition using terraform, but is not working. I'm not sure where a I should put the parameter in the JSON neither in the GUI.
Here is my job definition:
resource "aws_batch_job_definition" "sample" {
name = "sample_job_definition"
type = "container"
platform_capabilities = [
"FARGATE",
]
container_properties = <<CONTAINER_PROPERTIES
{
"command": ["bash", "/root/plotter.sh"],
"image": "995648859937.dkr.ecr.us-east-1.amazonaws.com/chia:latest",
"fargatePlatformConfiguration": {
"platformVersion": "LATEST"
},
"resourceRequirements": [
{"type": "VCPU", "value": "4"},
{"type": "MEMORY", "value": "15360"}
],
"networkMode": "awsvpc",
"networkConfiguration": {
"assignPublicIp" : "ENABLED"
},
"executionRoleArn": "${aws_iam_role.ecs_task_execution_role.arn}",
"jobRoleArn": "${aws_iam_role.ecs_task_role.arn}"
}
CONTAINER_PROPERTIES
}
We've also been struggling to find information on this and it appears you can't.
The best workaround seems to be attaching and mounting an EFS volume, e.g:
{
"containerProperties": [
{
"name": "container-using-efs",
"image": "amazonlinux:2",
"command": [
"ls",
"-la",
"/mount/efs"
],
"mountPoints": [
{
"sourceVolume": "myEfsVolume",
"containerPath": "/mount/efs",
"readOnly": true
}
],
"volumes": [
{
"name": "myEfsVolume",
"efsVolumeConfiguration": {
"fileSystemId": "fs-12345678",
"rootDirectory": "/path/to/my/data",
"transitEncryption": "ENABLED",
"transitEncryptionPort": integer,
"authorizationConfig": {
"accessPointId": "fsap-1234567890abcdef1",
"iam": "ENABLED"
}
}
}
]
}
]
}

ECS Fargate Service - who needs to access KMS for Secrets?

I;m trying to setup ECS Service that will run single task with MySQL and Webserver. I'd like to inject some runtime parameters as environmental variables from SSM Parameter Store. Some of them will be plain text but some will be encrypted with KMS. So suppose I have following task definition:
{
"ipcMode": null,
"executionRoleArn": "arn:aws:iam::657433956652:role/ecsTaskExecutionRole",
"containerDefinitions": [
{
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "/ecs/wordpress-test",
"awslogs-region": "eu-central-1",
"awslogs-stream-prefix": "ecs"
}
},
"entryPoint": null,
"portMappings": [
{
"hostPort": 80,
"protocol": "tcp",
"containerPort": 80
}
],
"memoryReservation": 512,
"name": "wordpress"
},
{
"dnsSearchDomains": null,
"logConfiguration": {
"logDriver": "awslogs",
"secretOptions": null,
"options": {
"awslogs-group": "/ecs/wordpress-test",
"awslogs-region": "eu-central-1",
"awslogs-stream-prefix": "ecs"
}
},
"secrets": [
{
"valueFrom": "arn:aws:ssm:eu-central-1:657433956652:parameter/project/dev/db.connection.default.password",
"name": "MYSQL_ROOT_PASSWORD"
}
],
"memoryReservation": 512,
"name": "mysql"
}
],
"placementConstraints": [],
"memory": "1024",
"taskRoleArn": "arn:aws:iam::657433956652:role/ecsTaskExecutionRole",
"compatibilities": [
"FARGATE"
],
"taskDefinitionArn": "arn:aws:ecs:eu-central-1:657433956652:task-definition/wordpress-test:1",
"family": "wordpress-test",
"networkMode": "awsvpc",
"cpu": "512",
}
The question is: which role should receive access to read SSM Parameter Store and key used for encrypting SecureStrings parameters? Should it be Service, Cluster or maybe even Pipeline that actually creates the service dynamically?
Your ecsTaskExecutionRole should have permission to access SSM Parameter.
Create an inline policy and attach that policy to the arn:aws:iam::657433956652:role/ecsTaskExecutionRole
From documentation sample,
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"ssm:GetParameters",
"secretsmanager:GetSecretValue",
"kms:Decrypt"
],
"Resource": [
"arn:aws:ssm:<region>:<aws_account_id>:parameter/parameter_name",
"arn:aws:secretsmanager:<region>:<aws_account_id>:secret:secret_name",
"arn:aws:kms:<region>:<aws_account_id>:key/key_id"
]
}
]
}
https://docs.aws.amazon.com/AmazonECS/latest/developerguide/specifying-sensitive-data.html#secrets-iam

describe-task-definition not returning hostname value

I have the need to find the hostname of my ECS task via the CLI, which according to the Amazon documentation should be available via the CLI:
https://docs.aws.amazon.com/cli/latest/reference/ecs/describe-task-definition.html
However, when I run the describe-task-definition it is not returning the information:
> aws ecs describe-task-definition --task-definition my-test-task:1
{
"taskDefinition": {
"status": "ACTIVE",
"networkMode": "bridge",
"family": "my-test-task",
"placementConstraints": [],
"requiresAttributes": [
{
"name": "com.amazonaws.ecs.capability.ecr-auth"
},
{
"name": "com.amazonaws.ecs.capability.docker-remote-api.1.21"
}
],
"volumes": [
{
"host": {
"sourcePath": "/opt/cf/rails-app/public/"
},
"name": "ruby-on-rails-public-volume"
}
],
"taskDefinitionArn": "arn:aws:ecs:us-east-1:accountId:task-definition/my-test-task:1",
"containerDefinitions": [
{
"memoryReservation": 1024,
"environment": [
{
"name": "DATABASE_HOSTNAME",
"value": "hostname"
},
{
"name": "PUMA_WORKERS",
"value": "2"
},
{
"name": "RAILS_ENV",
"value": "staging"
},
{
"name": "DATABASE_NAME",
"value": "ruby-on-rails"
},
{
"name": "DEBIAN_FRONTEND",
"value": "noninteractive"
},
{
"name": "PORT",
"value": "8080"
},
{
"name": "LANG",
"value": "en_US.UTF-8"
},
{
"name": "DATABASE_PASSWORD",
"value": "cf"
},
{
"name": "DATABASE_USER",
"value": "cf"
},
{
"name": "PUMA_MAX_THREADS",
"value": "6"
}
],
"name": "my-test-task",
"mountPoints": [
{
"sourceVolume": "ruby-on-rails-public-volume",
"containerPath": "/opt/cf/rails-app/public/"
}
],
"image": "accountId.dkr.ecr.us-east-1.amazonaws.com/cf/rails:latest",
"cpu": 1024,
"portMappings": [
{
"protocol": "tcp",
"containerPort": 8080,
"hostPort": 8080
}
],
"command": [
"puma",
"-C",
"config/puma.rb"
],
"essential": true,
"volumesFrom": []
}
],
"revision": 1
}
}
I am not sure what I need to do to get that value included. I confirmed I am running the latest CLI.
Thanks!
First of all the hoostname is not defined to task definition. Its defined to container definition inside task definition. Secondly, There is no default hostname, You have to explicitly define the hostname in the container definition while creating task definition revision. By default, it uses container id as the hostname.

ECR Task definition: Container links should not have a cycle?

I'm using AWS-CLI to register an ECR task definition. My task definition is like follows:
{
"family": "",
"taskRoleArn": "",
"executionRoleArn": "",
"networkMode": "none",
"containerDefinitions": [
{
"name": "",
"image": "",
"cpu": 0,
"memory": 0,
"memoryReservation": 0,
"links": [
""
],
"portMappings": [
{
"hostPort": 80,
"protocol": "tcp",
"containerPort": 80
}
],
"essential": true,
"entryPoint": [
""
],
"command": [
""
],
"environment": [
{
"name": "",
"value": ""
}
],
"mountPoints": [
{
"sourceVolume": "",
"containerPath": "",
"readOnly": true
}
],
"volumesFrom": [
{
"sourceContainer": "",
"readOnly": true
}
],
"linuxParameters": {
"capabilities": {
"add": [
""
],
"drop": [
""
]
},
"devices": [
{
"hostPath": "",
"containerPath": "",
"permissions": [
"mknod"
]
}
],
"initProcessEnabled": true
},
"hostname": "",
"user": "",
"workingDirectory": "",
"disableNetworking": true,
"privileged": true,
"readonlyRootFilesystem": true,
"dnsServers": [
""
],
"dnsSearchDomains": [
""
],
"extraHosts": [
{
"hostname": "",
"ipAddress": ""
}
],
"dockerSecurityOptions": [
""
],
"dockerLabels": {
"KeyName": ""
},
"ulimits": [
{
"name": "fsize",
"softLimit": 0,
"hardLimit": 0
}
],
"logConfiguration": {
"logDriver": "syslog",
"options": {
"KeyName": ""
}
}
}
],
"volumes": [
{
"name": "",
"host": {
"sourcePath": ""
}
}
],
"placementConstraints": [
{
"type": "memberOf",
"expression": ""
}
],
"requiresCompatibilities": [
"EC2"
],
"cpu": "10",
"memory": "600"
}
, which is basically almost identical to the auto-generated skeleton:
aws ecs register-task-definition --generate-cli-skeleton
But it looks that when using the command
aws ecs register-task-definition --family taskDef --cli-input-json taskDef-v1.json --region us-east-2
I get this:
An error occurred (ClientException) when calling the RegisterTaskDefinition operation: Container links should not have a cycle
What am I doing wrong?
That particular error is caused because you have empty links defined:
"links": [
""
]
The CLI skeleton is a template you need to edit - it has many empty values for fields that are not required. The minimum task definition template is something like:
{
"containerDefinitions": [
{
"name": "my-task-name",
"image": "my-registry/my-image",
"memoryReservation": 256,
"cpu": 256
}
]
}