diff --git a/autoscaling_group.tf b/autoscaling_group.tf index 33011d7..6e6ce66 100644 --- a/autoscaling_group.tf +++ b/autoscaling_group.tf @@ -1,8 +1,8 @@ resource "aws_autoscaling_group" "sensor_asg" { name = var.sensor_asg_name - min_size = 1 - max_size = 5 - desired_capacity = 1 + min_size = var.min_instances + max_size = var.max_instances + desired_capacity = var.desired_instances launch_template { name = aws_launch_template.sensor_launch_template.name @@ -11,12 +11,27 @@ resource "aws_autoscaling_group" "sensor_asg" { vpc_zone_identifier = var.monitoring_subnet_ids target_group_arns = [aws_lb_target_group.health_check.arn] - health_check_type = "EC2" - health_check_grace_period = 300 + health_check_type = "ELB" + health_check_grace_period = 600 termination_policies = ["OldestInstance"] protect_from_scale_in = false wait_for_capacity_timeout = 0 + tag { + key = "Name" + value = var.sensor_asg_name + propagate_at_launch = true + } + + dynamic "tag" { + for_each = var.tags + content { + key = tag.key + value = tag.value + propagate_at_launch = true + } + } + initial_lifecycle_hook { lifecycle_transition = "autoscaling:EC2_INSTANCE_LAUNCHING" name = var.asg_lifecycle_hook_name @@ -24,6 +39,11 @@ resource "aws_autoscaling_group" "sensor_asg" { heartbeat_timeout = 300 } + warm_pool { + pool_state = "Stopped" + min_size = var.warm_instances + } + depends_on = [ aws_lambda_function.auto_scaling_lambda, aws_cloudwatch_event_rule.asg_lifecycle_rule, @@ -43,6 +63,18 @@ resource "aws_autoscaling_policy" "sensor_autoscale_policy" { } } +resource "aws_autoscaling_policy" "sensor_autoscale_scale_in_policy" { + name = "${var.sensor_asg_auto_scale_policy_name}-scale-in" + autoscaling_group_name = aws_autoscaling_group.sensor_asg.name + + policy_type = "StepScaling" + adjustment_type = "ChangeInCapacity" + step_adjustment { + metric_interval_upper_bound = 0 + scaling_adjustment = -1 + } +} + resource "awscc_cloudwatch_alarm" "sensor_asg_high_cpu_alarm" { statistic = "Average" threshold = 70 @@ -59,4 +91,22 @@ resource "awscc_cloudwatch_alarm" "sensor_asg_high_cpu_alarm" { } ] metric_name = "CPUUtilization" -} \ No newline at end of file +} + +resource "awscc_cloudwatch_alarm" "sensor_asg_low_cpu_alarm" { + statistic = "Average" + threshold = 30 + alarm_description = "Scale in if CPU < 30% for 5 minutes" + evaluation_periods = 5 + period = 60 + comparison_operator = "LessThanThreshold" + namespace = "AWS/EC2" + alarm_actions = [aws_autoscaling_policy.sensor_autoscale_scale_in_policy.arn] + dimensions = [ + { + name = "AutoScalingGroupName" + value = aws_autoscaling_group.sensor_asg.name + } + ] + metric_name = "CPUUtilization" +} diff --git a/launch_template.tf b/launch_template.tf index 90ac985..723e946 100644 --- a/launch_template.tf +++ b/launch_template.tf @@ -20,16 +20,18 @@ resource "aws_launch_template" "sensor_launch_template" { ebs { volume_size = var.sensor_launch_template_volume_size volume_type = "gp3" - encrypted = var.kms_key_id == "" ? false : true + encrypted = true kms_key_id = var.kms_key_id == "" ? null : var.kms_key_id delete_on_termination = true } } network_interfaces { - device_index = 0 - security_groups = [aws_security_group.monitoring.id] - delete_on_termination = true + device_index = 0 + security_groups = [aws_security_group.monitoring.id] + delete_on_termination = true + associate_public_ip_address = false + } user_data = module.sensor_config.cloudinit_config.rendered diff --git a/security_groups.tf b/security_groups.tf index 63f036c..25c8b74 100644 --- a/security_groups.tf +++ b/security_groups.tf @@ -18,8 +18,8 @@ resource "aws_security_group_rule" "geneve_mirror_traffic_rule" { resource "aws_security_group_rule" "monitor_traffic_rule" { type = "ingress" - from_port = 41080 - to_port = 41080 + from_port = var.sensor_health_check_http_port + to_port = var.sensor_health_check_http_port protocol = "tcp" security_group_id = aws_security_group.monitoring.id description = "GWLB Health Check Port" @@ -32,7 +32,7 @@ resource "aws_security_group_rule" "public_network_egress_all" { to_port = 0 protocol = "-1" security_group_id = aws_security_group.monitoring.id - description = "Default egress rule" + description = "Monitoring network egress rule" cidr_blocks = ["0.0.0.0/0"] } diff --git a/sensor_config.tf b/sensor_config.tf index 4320ca1..bc614c8 100644 --- a/sensor_config.tf +++ b/sensor_config.tf @@ -1,5 +1,5 @@ module "sensor_config" { - source = "github.com/corelight/terraform-config-sensor?ref=v1.0.0" + source = "github.com/corelight/terraform-config-sensor" sensor_license = var.license_key fleet_community_string = var.community_string @@ -12,5 +12,5 @@ module "sensor_config" { sensor_management_interface_name = "eth1" sensor_monitoring_interface_name = "eth0" base64_encode_config = true - sensor_health_check_http_port = "41080" + sensor_health_check_http_port = var.sensor_health_check_http_port } diff --git a/variables.tf b/variables.tf index bcfb46d..ecc64aa 100644 --- a/variables.tf +++ b/variables.tf @@ -23,10 +23,10 @@ variable "aws_key_pair_name" { type = string } -variable "availability_zones" { +/* variable "availability_zones" { description = "The availability zone the auto scale group and load balancer will use" type = list(string) -} +} */ variable "community_string" { description = "the community string (api string) often times referenced by Fleet" @@ -92,7 +92,7 @@ variable "sensor_asg_name" { default = "corelight-sensor" } -variable "monitoring_nic_name" { +/* variable "monitoring_nic_name" { description = "The name of the Network Interface used for monitoring GENEVE traffic to the sensor" type = string default = "corelight-mon-nic" @@ -102,7 +102,7 @@ variable "management_nic_name" { description = "The name of the Network Interface used for management of the sensor - SSH/HTTPS" type = string default = "corelight-mgmt-nic" -} +} */ variable "sensor_launch_template_name" { @@ -218,3 +218,33 @@ variable "fleet_no_proxy" { default = "" description = "(optional) hosts or domains to bypass the proxy for fleet traffic" } + +variable "min_instances" { + description = "Minimum number of instances in the auto-scaling group" + type = number + default = 1 +} + +variable "max_instances" { + description = "Maximum number of instances in the auto-scaling group" + type = number + default = 5 +} + +variable "desired_instances" { + description = "Desired number of instances in the auto-scaling group" + type = number + default = 1 +} + +variable "warm_instances" { + description = "Minimum number of warm instances in the auto-scaling group" + type = number + default = 0 +} + +variable "sensor_health_check_http_port" { + type = string + default = "41080" + description = "the port number for the HTTP health check request" +}