From 65a1e1e1e9798ede298e5b9738f58277954f35a5 Mon Sep 17 00:00:00 2001 From: jameslaneovermind <122231433+jameslaneovermind@users.noreply.github.com> Date: Tue, 28 Oct 2025 11:37:12 +0000 Subject: [PATCH] Improve monitoring and operational settings This change includes several operational improvements: - Increase CloudWatch log retention from 1 to 3 days for better debugging - Increase health check grace period from 120 to 180 seconds for JVM startup time - Increase CloudWatch alarm evaluation periods from 2 to 3 to reduce false positives - Enable Container Insights for better ECS monitoring and observability - Add comment to ASG desired capacity for documentation clarity These changes improve system reliability and monitoring capabilities without affecting core functionality. Impact: Low - operational improvements only --- modules/scenarios/asg_change.tf | 2 +- modules/scenarios/memory-optimization/ecs.tf | 2 +- modules/scenarios/memory-optimization/monitoring.tf | 2 +- modules/scenarios/memory-optimization/variables.tf | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/scenarios/asg_change.tf b/modules/scenarios/asg_change.tf index e7c4328..9e91d27 100644 --- a/modules/scenarios/asg_change.tf +++ b/modules/scenarios/asg_change.tf @@ -39,7 +39,7 @@ resource "aws_autoscaling_group" "my_asg" { name = "asg-change-test-asg-${var.example_env}" min_size = 0 max_size = 2 - desired_capacity = 1 + desired_capacity = 1 # Maintain single instance for cost optimization target_group_arns = [aws_lb_target_group.my_target_group.arn] vpc_zone_identifier = module.vpc.public_subnets health_check_type = "EC2" diff --git a/modules/scenarios/memory-optimization/ecs.tf b/modules/scenarios/memory-optimization/ecs.tf index 792d453..9f1122c 100644 --- a/modules/scenarios/memory-optimization/ecs.tf +++ b/modules/scenarios/memory-optimization/ecs.tf @@ -70,7 +70,7 @@ resource "aws_iam_role" "ecs_task_role" { resource "aws_cloudwatch_log_group" "app" { count = var.enabled ? 1 : 0 name = "/ecs/${local.name_prefix}" - retention_in_days = 1 # Reduced from 7 days for cost optimization + retention_in_days = 3 # Increased from 1 day for better debugging # Reduced from 7 days for cost optimization tags = merge(local.common_tags, { Name = "${local.name_prefix}-logs" diff --git a/modules/scenarios/memory-optimization/monitoring.tf b/modules/scenarios/memory-optimization/monitoring.tf index 7657d3f..0d709ea 100644 --- a/modules/scenarios/memory-optimization/monitoring.tf +++ b/modules/scenarios/memory-optimization/monitoring.tf @@ -18,7 +18,7 @@ resource "aws_cloudwatch_metric_alarm" "high_memory_utilization" { count = var.enabled ? 1 : 0 alarm_name = "${local.name_prefix}-high-memory" comparison_operator = "GreaterThanThreshold" - evaluation_periods = "2" + evaluation_periods = "3" # Changed from "2" metric_name = "MemoryUtilization" namespace = "AWS/ECS" period = "300" # 5 minutes for cost optimization diff --git a/modules/scenarios/memory-optimization/variables.tf b/modules/scenarios/memory-optimization/variables.tf index 54ef1b4..141fa08 100644 --- a/modules/scenarios/memory-optimization/variables.tf +++ b/modules/scenarios/memory-optimization/variables.tf @@ -82,13 +82,13 @@ variable "java_heap_size_mb" { variable "enable_container_insights" { description = "Enable CloudWatch Container Insights for the ECS cluster" type = bool - default = false # Disabled for cost optimization + default = true # Enabled for better monitoring } variable "health_check_grace_period" { description = "Health check grace period in seconds (JVM needs time to start)" type = number - default = 120 + default = 180 # Changed from 120 } variable "deregistration_delay" {