EKS Terraform scale using cpu - amazon-web-services

Using terraform to instantiate an eks. How to configure when to scale the nodes? I would like to customize if the cpu reaches 40% for example.
My module eks:
module "eks" {
......
worker_groups = [
{
name = "worker-group-1"
instance_type = "t3a.medium"
root_volume_size = "20"
asg_desired_capacity = 1
asg_max_size = 1
asg_recreate_on_change = true
kubelet_extra_args = "--node-labels=node.kubernetes.io/lifecycle=normal,instance_type=normal"
tags = [.....
]
}
]
worker_groups_launch_template = [
{
name = "spot-family-t-low"
override_instance_types = ["t3a.medium", "t2.medium","t3.medium"]
spot_instance_pools = 3
root_volume_size = "8"
asg_recreate_on_change = true
autoscaling_enabled = true
asg_max_size = 2
asg_desired_capacity = 1
kubelet_extra_args = "--node-labels=node.kubernetes.io/lifecycle=spot,type=t-low"
public_ip = false
},
]
workers_additional_policies = [aws_iam_policy.worker_policy.id]
}

Related

Data manipulation on Terraform

I'm trying to do a DMS infrastructure, but I'm stuck due to quotas. I'm trying to the following:
I got a list of 64 DBs => replicated_db separate in chunk of 50
create x DMS instances => X is number of chunk of the list 1)
associate DMS replication task to DMS instances separated into chunks of the list 1)
Points 1 and 2 are OK, but I'm stuck on the 3rd.
What I have already done:
# Create a new replication instance
resource "aws_dms_replication_instance" "dms_instance" {
count = length(chunklist(var.replicated_db, 50))
allow_major_version_upgrade = true
apply_immediately = false
auto_minor_version_upgrade = true
allocated_storage = var.allocated_storage
availability_zone = module.rds-mssql.db_instance_availability_zone[0]
engine_version = "3.4.7"
multi_az = var.dms_multi_az
preferred_maintenance_window = var.maintenance_window
publicly_accessible = false
replication_instance_class = var.replication_instance_class
replication_instance_id = "${var.identifier}-dms-${count.index}"
replication_subnet_group_id = aws_dms_replication_subnet_group.dms_subnet_group[0].id
vpc_security_group_ids = flatten([
var.vpc_security_group_ids,
"sg-XXXXXXXXXXXXXXXXX",
"sg-XXXXXXXXXXXXXXXXX",
"sg-XXXXXXXXXXXXXXXXX",
]
)
tags = var.tags
}
resource "aws_dms_replication_task" "dms_replication_task" {
for_each = var.replicated_db
migration_type = "full-load"
replication_instance_arn = aws_dms_replication_instance.dms_instance[*].replication_instance_arn
replication_task_id = "${var.identifier}-${replace(each.value, "_", "-")}-replication-task"
table_mappings = file("${var.path_table_mapping}/table_mappings.json")
source_endpoint_arn = aws_dms_endpoint.dms_endpoint_source[each.value].endpoint_arn
target_endpoint_arn = aws_dms_endpoint.dms_endpoint_target[each.value].endpoint_arn
tags = var.tags
depends_on = [
aws_dms_endpoint.dms_endpoint_source,
aws_dms_endpoint.dms_endpoint_target,
]
}
Could someone help me with this data manipulation?

How to create network tags for GKE nodes in Terraform

We are utilizing the GCP network and GKE modules in Terraform to create the VPC and GKE cluster subsequently. Now we would like to create a firewall rule with the target as GKE nodes. We don't want to update the existing firewall rules which are auto-created as the format which GCP uses to name them might change in future due to which our logic may fail. That's why there is a need to create a separate firewall rule along with a separate network tag pointing to the GKE nodes. Module info
VPC
module "vpc" {
source = "terraform-google-modules/network/google"
#version = "~> 2.5"
project_id = var.project_id
network_name = "${var.project_name}-${var.env_name}-vpc"
subnets = [
{
subnet_name = "${var.project_name}-${var.env_name}-subnet"
subnet_ip = "${var.subnetwork_cidr}"
subnet_region = var.region
}
]
secondary_ranges = {
"${var.project_name}-${var.env_name}-subnet" = [
{
range_name = "${var.project_name}-gke-pod-ip-range"
ip_cidr_range = "${var.ip_range_pods_cidr}"
},
{
range_name = "${var.project_name}-gke-service-ip-range"
ip_cidr_range = "${var.ip_range_services_cidr}"
}
]
}
}
GKE_CLUSTER
module "gke" {
source = "terraform-google-modules/kubernetes-engine/google//modules/beta-private-cluster"
project_id = var.project_id
name = "${var.project_name}-gke-${var.env_name}-cluster"
regional = true
region = var.region
zones = ["${var.region}-a", "${var.region}-b", "${var.region}-c"]
network = module.vpc.network_name
subnetwork = module.vpc.subnets_names[0]
ip_range_pods = "${var.project_name}-gke-pod-ip-range"
ip_range_services = "${var.project_name}-gke-service-ip-range"
http_load_balancing = false
network_policy = false
horizontal_pod_autoscaling = true
filestore_csi_driver = false
enable_private_endpoint = false
enable_private_nodes = true
master_ipv4_cidr_block = "${var.control_plane_cidr}"
istio = false
cloudrun = false
dns_cache = false
node_pools = [
{
name = "${var.project_name}-gke-node-pool"
machine_type = "${var.machine_type}"
node_locations = "${var.region}-a,${var.region}-b,${var.region}-c"
min_count = "${var.node_pools_min_count}"
max_count = "${var.node_pools_max_count}"
disk_size_gb = "${var.node_pools_disk_size_gb}"
# local_ssd_count = 0
# spot = false
# local_ssd_ephemeral_count = 0
# disk_type = "pd-standard"
# image_type = "COS_CONTAINERD"
# enable_gcfs = false
auto_repair = true
auto_upgrade = true
# service_account = "project-service-account#<PROJECT ID>.iam.gserviceaccount.com"
preemptible = false
# initial_node_count = 80
}
]
# node_pools_tags = {
# all = []
# default-node-pool = ["default-node-pool",]
# }
}
FIREWALL
module "firewall_rules" {
source = "terraform-google-modules/network/google//modules/firewall-rules"
project_id = var.project_id
network_name = module.vpc.network_name
rules = [{
name = "allow-istio-ingress"
description = null
direction = "INGRESS"
priority = null
ranges = ["${var.control_plane_cidr}"]
source_tags = null
source_service_accounts = null
target_tags = null
target_service_accounts = null
allow = [{
protocol = "tcp"
ports = ["15017"]
}]
deny = []
log_config = {
metadata = "INCLUDE_ALL_METADATA"
}
}]
depends_on = [module.gke]
}
Although the GKE module has tags property to define tags explicitly, we still need assistance to properly instantiate it and then fetch the same tag value in the firewall module.
I found a working solution to my question posted earlier. Please refer to the GKE module snippet. In that, we only need to modify the below part and an explicit network tag will be created to point to all the nodes in that node pool.
module "gke" {
.
.
node_pools = [
{
name = "gke-node-pool"
.
.
.
},
]
node_pools_tags = {
"gke-node-pool" = "gke-node-pool-network-tag"
}
}

Add values to deep nested map in Terraform

I'm using the AWS EKS module 18.20.5 and I'm trying to add values to a deeply nested map. The map is
variable "eks_managed_node_groups" {
description = "Map of managed node group definitions to create"
type = any
default = {
management_cluster_on_demand = {
desired_capacity = 3
max_capacity = 10
min_capacity = 3
instance_types = ["c5.2xlarge"]
capacity_type = "ON_DEMAND"
k8s_labels = {
Environment = "testing"
GithubRepo = "infrastructure-modules-kubernetes-cluster"
GithubSource = "terraform-aws-modules"
}
additional_tags = {
cluster = "management_cluster_new"
}
block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = 50
volume_type = "gp2"
delete_on_termination = true
}
}
}
}
}
}
What I am aiming to do is add some extra values into the ebs section, specifically
encrypted = true
kms_key_id = module.kms.arn
This would force any volumes added to a node group, to have their EBS volume encrypted with a KMS key.
I've tried using locals to add the values, but the issue is when I get to the xbda section, it tries to loop through the strings and fails
locals {
managed_nodes = flatten([
for group in var.eks_managed_node_groups: [
for vol in group.block_device_mappings: [
for settings in vol: [
for values in settings: values
]
]
]
])
}
Which when running Terraform plan, results in the following error
│ Error: Iteration over non-iterable value
│
│ on main.tf line 9, in locals:
│ 8: for settings in vol: [
│ 9: for values in settings: values
│ 10: ]
│
│ A value of type string cannot be used as the collection in a 'for' expression.
Is this even possible to accomplish?
Thanks.
I think the following should do the job:
locals {
eks_managed_node_groups = {
for group_name, group in var.eks_managed_node_groups:
group_name => merge(group, {block_device_mappings = {
for device_name, device in group.block_device_mappings:
device_name => merge(device,
{ebs=merge(device.ebs, {
encrypted = true
kms_key_id = "module.kms.arn"
})})
}})
}
}
resulting in:
{
"management_cluster_on_demand" = {
"additional_tags" = {
"cluster" = "management_cluster_new"
}
"block_device_mappings" = {
"xvda" = {
"device_name" = "/dev/xvda"
"ebs" = {
"delete_on_termination" = true
"encrypted" = true
"kms_key_id" = "module.kms.arn"
"volume_size" = 50
"volume_type" = "gp2"
}
}
}
"capacity_type" = "ON_DEMAND"
"desired_capacity" = 3
"instance_types" = [
"c5.2xlarge",
]
"k8s_labels" = {
"Environment" = "testing"
"GithubRepo" = "infrastructure-modules-kubernetes-cluster"
"GithubSource" = "terraform-aws-modules"
}
"max_capacity" = 10
"min_capacity" = 3
}
}
I don't have your module.kms.arn, so I just use it as string "module.kms.arn". So you have to change it back to module.kms.arn.

Creating Primary Network Interface for aws ec2 instance using terraform

I am trying to create an EC2 instance with primary network interface instead of the default network interface, but I keep getting this issue.
terraform version: 1.0.2
But obviosuly when I am adding a netwrok interface I need not add a subnet_id to the aws_instance module as it will end up with error ""network_interface": conflicts with subnet_id"
here is the code I am using
resource "aws_network_interface" "primary" {
count = var.create_network_interface ? 1 : 0
description = "primary network interface"
subnet_id = tolist(data.aws_subnet_ids.private_subnets.ids)[2]
#private_ips = ["10.20.20.120"]
security_groups = [module.db_security_group.security_group]
}
module "db_ec2_nic" {
source = "my_source"
count = var.create_network_interface ? 1 : 0
name = "${var.environment}-${var.service_name}"
ami = data.aws_ami.db.id
instance_type = "instance_type"
iam_role = "${var.environment}-${var.service_name}_instance_role"
userdata = ""
key_name = "key"
kms_arn = var.kms_arn
REVISION_BUCKET = var.revision_bucket_name
name_suffix = "${var.environment}-${var.service_name}"
stage_tag_value = var.stage
policy = data.aws_iam_policy_document.s3_buckets_policies.json
#codedeploy vars
create_app = var.create_app
ca_application_name = var.ca_application_name
deployment_group_name = var.environment
service_role = local.service_role_output
deployment_config_name = var.cd_config_name
deployment_option = var.cd_deployment_option
deployment_type = var.cd_deployment_type
auto_rollback = var.cd_auto_rollback
rollback_events = var.cd_rollback_events
ca_compute_platform = var.ca_compute_platform
ec2_db_tag = "${var.environment}-${var.service_name}"
detailed_monitoring = var.instance_detailed_monitoring_enabled
create_network_interface = var.create_network_interface
root_block_device = [
{
delete_on_termination = var.instance_volume_delete_on_termination_root
encrypted = var.instance_volume_encrypted_root
iops = var.instance_volume_iops_root
volume_type = var.instance_volume_type_root
volume_size = var.instance_volume_size_root
}
]
ebs_block_device = [
{
device_name = var.instance_volume_device_name_db
delete_on_termination = var.instance_volume_delete_on_termination_db
encrypted = var.instance_volume_encrypted_db
iops = var.instance_volume_iops_db
volume_type = var.instance_volume_type_db
volume_size = var.instance_volume_size_db
},
{
device_name = var.instance_volume_device_name_backup
delete_on_termination = var.instance_volume_delete_on_termination_backup
encrypted = var.instance_volume_encrypted_backup
iops = var.instance_volume_iops_backup
volume_type = var.instance_volume_type_backup
volume_size = var.instance_volume_size_backup
}
]
network_interface = [
{
network_interface_id = aws_network_interface.primary.id
device_index = 0
}
]
tags = merge(
{
Alias = "${var.environment}-${var.service_name}"
Role = "role"
Environment = var.environment
},
var.tags
)
}
here is the module db_ec2_nic
resource "aws_instance" "this" {
count = var.create_network_interface ? 1 : 0
ami = var.ami
instance_type = var.instance_type
iam_instance_profile = var.iam_role
user_data = var.userdata
key_name = var.key_name
monitoring = var.detailed_monitoring
get_password_data = var.get_win_password
ebs_optimized = var.ebs_optimized
dynamic "root_block_device" {
for_each = var.root_block_device
content {
delete_on_termination = lookup(root_block_device.value, "delete_on_termination", null)
encrypted = lookup(root_block_device.value, "encrypted", null)
kms_key_id = lookup(root_block_device.value, "kms_key_id", null)
iops = lookup(root_block_device.value, "iops", null)
volume_size = lookup(root_block_device.value, "volume_size", null)
volume_type = lookup(root_block_device.value, "volume_type", null)
}
}
dynamic "ebs_block_device" {
for_each = var.ebs_block_device
content {
delete_on_termination = lookup(ebs_block_device.value, "delete_on_termination", null)
device_name = ebs_block_device.value.device_name
encrypted = lookup(ebs_block_device.value, "encrypted", null)
iops = lookup(ebs_block_device.value, "iops", null)
snapshot_id = lookup(ebs_block_device.value, "snapshot_id", null)
volume_size = lookup(ebs_block_device.value, "volume_size", null)
volume_type = lookup(ebs_block_device.value, "volume_type", null)
}
}
dynamic "network_interface" {
for_each = var.network_interface
content {
network_interface_id = lookup(network_interface.value, "network_interface_id", null)
device_index = lookup(network_interface.value, "device_index", 0)
}
}
source_dest_check = var.source_dest_check
instance_initiated_shutdown_behavior = var.shutdown_behaviour
lifecycle {
ignore_changes = [
root_block_device,
ebs_block_device,
]
}
tags = merge({ Name = var.name }, var.tags)
}

AWS Terraform tried to destroy and rebuild RDS cluster

I have an RDS cluster I built using Terraform, this is running deletion protection currently.
When I update my Terraform script for something (example security group change) and run this into the environment it always tries to breakdown and rebuild the RDS cluster.
Running this now with deletion protection stops the rebuild, but causes the terraform apply to fail as it cannot destroy the cluster.
How can I get this to keep the existing RDS cluster without rebuilding every time I run my script?
`resource "aws_rds_cluster" "env-cluster" {
cluster_identifier = "mysql-env-cluster"
engine = "aurora-mysql"
engine_version = "5.7.mysql_aurora.2.03.2"
availability_zones = ["${var.aws_az1}", "${var.aws_az2}"]
db_subnet_group_name = "${aws_db_subnet_group.env-rds-subg.name}"
database_name = "dbname"
master_username = "${var.db-user}"
master_password = "${var.db-pass}"
backup_retention_period = 5
preferred_backup_window = "22:00-23:00"
deletion_protection = true
skip_final_snapshot = true
}
resource "aws_rds_cluster_instance" "env-01" {
identifier = "${var.env-db-01}"
cluster_identifier = "${aws_rds_cluster.env-cluster.id}"
engine = "aurora-mysql"
engine_version = "5.7.mysql_aurora.2.03.2"
instance_class = "db.t2.small"
apply_immediately = true
}
resource "aws_rds_cluster_instance" "env-02" {
identifier = "${var.env-db-02}"
cluster_identifier = "${aws_rds_cluster.env-cluster.id}"
engine = "aurora-mysql"
engine_version = "5.7.mysql_aurora.2.03.2"
instance_class = "db.t2.small"
apply_immediately = true
}
resource "aws_rds_cluster_endpoint" "env-02-ep" {
cluster_identifier = "${aws_rds_cluster.env-cluster.id}"
cluster_endpoint_identifier = "reader"
custom_endpoint_type = "READER"
excluded_members = ["${aws_rds_cluster_instance.env-01.id}"]
}`
I had a similar experience when trying to set up an AWS Aurora cluster and instance.
Each time I run a terraform apply it tries to recreate the Aurora cluster and instance.
Here's my Terraform script:
locals {
aws_region = "eu-west-1"
tag_environment = "Dev"
tag_terraform = {
"true" = "Managed by Terraform"
"false" = "Not Managed by Terraform"
}
tag_family = {
"aurora" = "Aurora"
}
tag_number = {
"1" = "1"
"2" = "2"
"3" = "3"
"4" = "4"
}
}
# RDS Cluster
module "rds_cluster_1" {
source = "../../../../modules/aws/rds-cluster-single"
rds_cluster_identifier = var.rds_cluster_identifier
rds_cluster_engine = var.rds_cluster_engine
rds_cluster_engine_mode = var.rds_cluster_engine_mode
rds_cluster_engine_version = var.rds_cluster_engine_version
rds_cluster_availability_zones = ["${local.aws_region}a"]
rds_cluster_database_name = var.rds_cluster_database_name
rds_cluster_port = var.rds_cluster_port
rds_cluster_master_username = var.rds_cluster_master_username
rds_cluster_master_password = module.password.password_result
rds_cluster_backup_retention_period = var.rds_cluster_backup_retention_period
rds_cluster_apply_immediately = var.rds_cluster_apply_immediately
allow_major_version_upgrade = var.allow_major_version_upgrade
db_cluster_parameter_group_name = var.rds_cluster_parameter_group_name
rds_cluster_deletion_protection = var.rds_cluster_deletion_protection
enabled_cloudwatch_logs_exports = var.enabled_cloudwatch_logs_exports
skip_final_snapshot = var.skip_final_snapshot
# vpc_security_group_ids = var.vpc_security_group_ids
tag_environment = local.tag_environment
tag_terraform = local.tag_terraform.true
tag_number = local.tag_number.1
tag_family = local.tag_family.aurora
}
Here's how I solved it:
The issue was that each time I run terraform apply Terraform tries to check to recreate the resources in 2 subnets:
Terraform detected the following changes made outside of Terraform since the last "terraform apply":
# module.rds_cluster_1.aws_rds_cluster.main has changed
~ resource "aws_rds_cluster" "main" {
~ availability_zones = [
+ "eu-west-1b",
+ "eu-west-1c",
# (1 unchanged element hidden)
]
~ cluster_members = [
+ "aurora-postgres-instance-0",
however, my terraform script only specified one availability (rds_cluster_availability_zones = ["${local.aws_region}a") . All I had to do was specify all 3 availability zones (rds_cluster_availability_zones = ["${local.aws_region}a", "${local.aws_region}b", "${local.aws_region}c"]) for my region:
locals {
aws_region = "eu-west-1"
tag_environment = "Dev"
tag_terraform = {
"true" = "Managed by Terraform"
"false" = "Not Managed by Terraform"
}
tag_family = {
"aurora" = "Aurora"
}
tag_number = {
"1" = "1"
"2" = "2"
"3" = "3"
"4" = "4"
}
}
# RDS Cluster
module "rds_cluster_1" {
source = "../../../../modules/aws/rds-cluster-single"
rds_cluster_identifier = var.rds_cluster_identifier
rds_cluster_engine = var.rds_cluster_engine
rds_cluster_engine_mode = var.rds_cluster_engine_mode
rds_cluster_engine_version = var.rds_cluster_engine_version
rds_cluster_availability_zones = ["${local.aws_region}a", "${local.aws_region}b", "${local.aws_region}c"]
rds_cluster_database_name = var.rds_cluster_database_name
rds_cluster_port = var.rds_cluster_port
rds_cluster_master_username = var.rds_cluster_master_username
rds_cluster_master_password = module.password.password_result
rds_cluster_backup_retention_period = var.rds_cluster_backup_retention_period
rds_cluster_apply_immediately = var.rds_cluster_apply_immediately
allow_major_version_upgrade = var.allow_major_version_upgrade
db_cluster_parameter_group_name = var.rds_cluster_parameter_group_name
rds_cluster_deletion_protection = var.rds_cluster_deletion_protection
enabled_cloudwatch_logs_exports = var.enabled_cloudwatch_logs_exports
skip_final_snapshot = var.skip_final_snapshot
# vpc_security_group_ids = var.vpc_security_group_ids
tag_environment = local.tag_environment
tag_terraform = local.tag_terraform.true
tag_number = local.tag_number.1
tag_family = local.tag_family.aurora
}
Resources: Terraform wants to recreate cluster on every apply #8
If you dont want to have you RDS in three zones there is a workaround here: https://github.com/hashicorp/terraform-provider-aws/issues/1111#issuecomment-373433010