From 4b1ebab1794a1160d5d2c1419ed0b766e8cdd1db Mon Sep 17 00:00:00 2001 From: Dan Clegg Date: Fri, 7 Feb 2025 16:29:57 -0700 Subject: [PATCH 1/5] Updated rke, rancher, tools Use tf orb --- .circleci/config.yml | 55 +++++++++++++++++++++++++++++++++++++++++ .pre-commit-config.yaml | 27 ++++++++++++++++++++ .tflint.hcl | 6 +++++ README.md | 1 + cortex.yaml | 13 ++++++++++ main.tf | 21 ++++++++++++---- renovate.json | 6 +++++ variables.tf | 51 +++++++++++++++++++++++++++++++++----- 8 files changed, 169 insertions(+), 11 deletions(-) create mode 100644 .circleci/config.yml create mode 100644 .pre-commit-config.yaml create mode 100644 .tflint.hcl create mode 100644 cortex.yaml create mode 100644 renovate.json diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..90ef47a --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,55 @@ +# CircleCI 2.1 configuration file +# Check https://circleci.com/docs/2.0/sample-config/ for more details +# +version: 2.1 + +orbs: + terraform: circleci/terraform@3.6.0 + +executors: + trivy: + docker: + - image: aquasec/trivy:0.60.0 + environment: + ENV_FILE: /tmp/workspace/.env + WORKSPACE: /tmp/workspace + +jobs: + terraform_fmt: + description: Check terraform format + executor: terraform/default + working_directory: /tmp/workspace + steps: + - checkout + - terraform/init: + path: . + - terraform/validate: + path: . + - terraform/fmt: + path: . + scan: + executor: trivy + steps: + - checkout + - setup_remote_docker + - run: + name: Install trivy + command: | + apk add --update-cache --upgrade curl + curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin + - run: + name: Scan filesystem + command: | + trivy fs --include-non-failures --misconfig-scanners terraform \ + --exit-code 0 --no-progress \ + --scanners vuln,secret,config --severity CRITICAL,HIGH,MEDIUM,LOW \ + --output "trivy-results.json" --format json --ignore-unfixed . + - store_artifacts: + path: trivy-results.json + destination: trivy_output + +workflows: + Lint and Scan: + jobs: + - terraform_fmt + - scan diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..cba483c --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-merge-conflict + - id: end-of-file-fixer + - id: no-commit-to-branch + - id: check-case-conflict + - repo: https://github.com/antonbabenko/pre-commit-terraform + rev: v1.97.4 + hooks: + - id: terraform_validate + args: + - '--hook-config=--retry-once-with-cleanup=true' + - id: terraform_providers_lock + args: + - --tf-init-args=-upgrade + - id: terraform_docs + args: + - '--args=--lockfile=false' + - '--hook-config=--path-to-file=README.md' + - '--hook-config=--add-to-existing-file=true' + - '--hook-config=--create-file-if-not-exist=true' + - id: terraform_fmt + - id: terraform_tflint + args: + - '--args=--config=__GIT_WORKING_DIR__/.tflint.hcl' diff --git a/.tflint.hcl b/.tflint.hcl new file mode 100644 index 0000000..0295f7e --- /dev/null +++ b/.tflint.hcl @@ -0,0 +1,6 @@ +plugin "aws" { + enabled = true + deep_check = false + version = "0.38.0" + source = "github.com/terraform-linters/tflint-ruleset-aws" +} diff --git a/README.md b/README.md index 6619c73..7931e27 100644 --- a/README.md +++ b/README.md @@ -64,4 +64,5 @@ module "rancher" { ``` ## Development + Please submit any feature enhancements, bug fixes, or ideas via pull requests or issues. diff --git a/cortex.yaml b/cortex.yaml new file mode 100644 index 0000000..b8270c3 --- /dev/null +++ b/cortex.yaml @@ -0,0 +1,13 @@ +openapi: 3.0.1 +info: + title: terraform-aws-rancher + description: Terraform module to deploy Rancher on AWS + x-cortex-tag: terraform-aws-rancher + x-cortex-type: service + x-cortex-git: + github: + repository: dominodatalab/terraform-aws-rancher + x-cortex-owners: + - email: eng-infrastructure@dominodatalab.com + type: email + description: Infrastructure Engineering diff --git a/main.tf b/main.tf index 9787f6e..e46f615 100644 --- a/main.tf +++ b/main.tf @@ -1,3 +1,14 @@ +terraform { + required_version = ">= 1.3.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.0.0" + } + } +} + locals { lb_name = "${var.name}-lb-${var.internal_lb ? "int" : "ext"}" lb_secgrp_name = "${var.name}-lb" @@ -75,7 +86,7 @@ resource "aws_elb" "this" { name = local.lb_name security_groups = [aws_security_group.loadbalancer.id] subnets = var.lb_subnet_ids - instances = aws_instance.this.*.id + instances = aws_instance.this[*].id internal = var.internal_lb idle_timeout = 3600 @@ -242,7 +253,7 @@ resource "aws_security_group" "instances" { description = "Node intercommunication" from_port = 0 to_port = 0 - protocol = "-1" + protocol = -1 self = true } @@ -250,7 +261,7 @@ resource "aws_security_group" "instances" { description = "Allow all outbound traffic" from_port = 0 to_port = 0 - protocol = "-1" + protocol = -1 cidr_blocks = ["0.0.0.0/0"] } @@ -359,9 +370,9 @@ resource "aws_security_group_rule" "provisioner_secgrp_ingress_443" { # Provisioner #------------------------------------------------------------------------------ module "ranchhand" { - source = "github.com/dominodatalab/ranchhand?ref=v1.1.1" + source = "github.com/dominodatalab/ranchhand?ref=plat-9091-rancher" - node_ips = aws_instance.this.*.private_ip + node_ips = aws_instance.this[*].private_ip working_dir = var.ranchhand_working_dir cert_dnsnames = concat([aws_elb.this.dns_name], var.cert_dnsnames) diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000..39a2b6e --- /dev/null +++ b/renovate.json @@ -0,0 +1,6 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + "config:base" + ] +} diff --git a/variables.tf b/variables.tf index 8c2208b..844cdd5 100644 --- a/variables.tf +++ b/variables.tf @@ -3,6 +3,7 @@ #------------------------------------------------------------------------------ variable "vpc_id" { description = "VPC where resources should be created" + type = string } variable "lb_subnet_ids" { @@ -17,6 +18,7 @@ variable "subnet_ids" { variable "ssh_key_name" { description = "Name of the EC2 key pair to use for the instances" + type = string } #------------------------------------------------------------------------------ @@ -25,16 +27,19 @@ variable "ssh_key_name" { variable "use_provisioner_secgrp" { description = "Determines whether to use the security provision_security_group or provisioner_cidr_block inputs." default = "true" + type = string } variable "provisioner_security_group" { description = "ID of security group attached to the VM that will provision the Rancher instances. This is typically a bastion host." default = "" + type = string } variable "provisioner_cidr_block" { description = "CIDR address of the host that will provision the Rancher instances. This will only work with instances that are publicly accessible." default = "" + type = string } #------------------------------------------------------------------------------ @@ -43,86 +48,103 @@ variable "provisioner_cidr_block" { variable "name" { description = "Root name applied to all resources" default = "rancher" + type = string } variable "internal_lb" { description = "Create an internal load balancer. Defaults to internet-facing." default = false + type = string } variable "lb_security_groups" { description = "Grant LB ingress access to one or more security group IDs" default = [] + type = list(string) } variable "lb_security_groups_count" { description = "Count of dynamically determines lb_security_groups" default = 0 + type = number } variable "lb_cidr_blocks" { description = "Grant LB ingress access to one or more CIDR addresses" default = [] + type = list(string) } variable "instance_count" { description = "Number of instances to launch" default = 3 + type = number } variable "ami" { - description = "Instance AMI defaults to Ubuntu 16.04" - default = "ami-0565af6e282977273" + description = "Instance AMI defaults to Ubuntu 24.04" + default = "ami-00c257e12d6828491" + type = string } variable "instance_type" { description = "Type of instances to launch" default = "t3.xlarge" + type = string } variable "os_disk_size" { description = "Root partition volume size for instances" default = 30 + type = number } variable "os_disk_type" { description = "Root partition volume type for instances" default = "gp3" + type = string } variable "os_disk_delete_on_termination" { description = "Destroy root EBS volume when instances are terminated" default = true + type = bool } variable "os_disk_encrypted" { description = "Encrypt root EBS volume" default = true + type = bool } variable "os_disk_kms_key_id" { description = "Optional encryption key for root EBS volume" default = "" + type = string } variable "ebs_optimized" { description = "Attach NICs dedicated to EBS volume network traffic" default = true + type = bool } variable "enable_detailed_monitoring" { description = "Launch EC2 instances with detailed monitoring enabled" default = false + type = bool } variable "enable_deletion_protection" { description = "" default = false + type = bool } variable "tags" { description = "Extra tags assigned to all resources" default = {} + type = map(string) } #------------------------------------------------------------------------------ @@ -131,41 +153,49 @@ variable "tags" { variable "ranchhand_working_dir" { description = "Directory where ranchhand should be executed. Defaults to the current working directory." default = "" + type = string } variable "cert_dnsnames" { description = "Hostnames for the rancher and rke ssl certs (comma-delimited)" default = [""] + type = list(string) } variable "cert_ipaddresses" { description = "IP addresses for the rancher and rke ssl certs (comma-delimited)" default = ["127.0.0.1"] + type = list(string) } variable "ssh_username" { description = "SSH username on the nodes" default = "ubuntu" + type = string } variable "ssh_key_path" { description = "Path to the SSH private key that will be used to connect to the VMs" default = "~/.ssh/id_rsa" + type = string } variable "ssh_proxy_user" { description = "Bastion host SSH username" default = "" + type = string } variable "ssh_proxy_host" { description = "Bastion host used to proxy SSH connections" default = "" + type = string } variable "admin_password" { description = "Password override for the initial admin user" default = "" + type = string } # Update the rancher_* variables together @@ -174,40 +204,49 @@ variable "admin_password" { # before changing these values variable "rancher_version" { description = "Override for the installed Rancher version. Without the [v]" - default = "2.7.5" + default = "2.10.5" + type = string } variable "rancher_image_tag" { description = "Override for the installed Rancher image tag. With the [v]" - default = "v2.7.5" + default = "v2.10.5" + type = string } variable "rancher_kubectl_version" { description = "Override for the kubectl version supported by RKE to install. With the [v]" - default = "v1.26.7" + default = "v1.31.5" + type = string } variable "rancher_rke_version" { description = "Override for the installed RKE image tag. With the [v]" - default = "v1.4.8" + default = "v1.7.3" + type = string } variable "helm_v3_registry_host" { default = "" + type = string } variable "helm_v3_registry_user" { default = "" + type = string } variable "helm_v3_registry_password" { default = "" + type = string } variable "newrelic_license_key" { default = "" + type = string } variable "require_imdsv2" { description = "Require instance metadata service v2" + type = bool } From 4e2a35d794a4c99c4e165bd9cc1e77dd0b62f314 Mon Sep 17 00:00:00 2001 From: Secretions Date: Fri, 17 Jan 2025 18:49:23 -0800 Subject: [PATCH 2/5] Update ranchhand for openssl/cryptography compatibility fix (#48) --- main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.tf b/main.tf index e46f615..9d5a598 100644 --- a/main.tf +++ b/main.tf @@ -370,7 +370,7 @@ resource "aws_security_group_rule" "provisioner_secgrp_ingress_443" { # Provisioner #------------------------------------------------------------------------------ module "ranchhand" { - source = "github.com/dominodatalab/ranchhand?ref=plat-9091-rancher" + source = "github.com/dominodatalab/ranchhand?ref=v1.1.2" node_ips = aws_instance.this[*].private_ip From b7d006d09cafd73059705a4dbbf38073725795c7 Mon Sep 17 00:00:00 2001 From: Dan Clegg Date: Wed, 19 Mar 2025 16:05:33 -0600 Subject: [PATCH 3/5] Update cortex Noting implications of metadata_options.http_tokens=optional Force IMDSv2 Initial rke2 migration --- README.md | 2 + cloud-init/rke2-agent.yaml | 16 + cloud-init/rke2-server.yaml | 29 ++ cortex.yaml | 11 +- data.tf | 42 ++ locals.tf | 19 + main.tf | 817 +++++++++++++++++++++++------------- output.tf | 9 +- variables.tf | 413 ++++++++++-------- 9 files changed, 888 insertions(+), 470 deletions(-) create mode 100644 cloud-init/rke2-agent.yaml create mode 100644 cloud-init/rke2-server.yaml create mode 100644 data.tf create mode 100644 locals.tf diff --git a/README.md b/README.md index 7931e27..da63f67 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # terraform-aws-rancher +**Note:** This module provisions Rancher clusters using RKE2 and containerd (not RKE1 or Docker). + Terraform module which creates an HA deployment of Rancher inside AWS using [RanchHand](https://github.com/dominodatalab/ranchhand). ## Usage diff --git a/cloud-init/rke2-agent.yaml b/cloud-init/rke2-agent.yaml new file mode 100644 index 0000000..e5822d3 --- /dev/null +++ b/cloud-init/rke2-agent.yaml @@ -0,0 +1,16 @@ +#cloud-config +package_update: true + +write_files: + - path: /etc/rancher/rke2/config.yaml + content: | + server: ${server_url} + token: ${rke2_token} + node-label: + - "node-type=worker" + permissions: '0600' + +runcmd: + - curl -sfL https://get.rke2.io | INSTALL_RKE2_TYPE="agent" sh - + - systemctl enable rke2-agent.service + - systemctl start rke2-agent.service \ No newline at end of file diff --git a/cloud-init/rke2-server.yaml b/cloud-init/rke2-server.yaml new file mode 100644 index 0000000..25c024f --- /dev/null +++ b/cloud-init/rke2-server.yaml @@ -0,0 +1,29 @@ +#cloud-config +package_update: true + +packages: + - curl + - wget + +write_files: + - path: /etc/rancher/rke2/config.yaml + content: | + token: ${rke2_token} + %{ if server_url != "" }server: ${server_url}%{ endif } + tls-san: + - ${server_url} + node-taint: + - "${node_taint}" + cni: ${cni} + cluster-cidr: ${cluster_cidr} + service-cidr: ${service_cidr} + etcd-expose-metrics: true + permissions: '0600' + +runcmd: + - curl -sfL https://get.rke2.io | INSTALL_RKE2_TYPE="server" sh - + - systemctl enable rke2-server.service + - systemctl start rke2-server.service + - mkdir -p /home/ubuntu/.kube + - cp /etc/rancher/rke2/rke2.yaml /home/ubuntu/.kube/config + - chown ubuntu:ubuntu /home/ubuntu/.kube/config \ No newline at end of file diff --git a/cortex.yaml b/cortex.yaml index b8270c3..b02f5eb 100644 --- a/cortex.yaml +++ b/cortex.yaml @@ -6,8 +6,13 @@ info: x-cortex-type: service x-cortex-git: github: + alias: cortex repository: dominodatalab/terraform-aws-rancher x-cortex-owners: - - email: eng-infrastructure@dominodatalab.com - type: email - description: Infrastructure Engineering + - name: eng-distributions + type: group + provider: OKTA + description: Updated by eng-devprod + x-cortex-circle-ci: + projects: + - projectSlug: github/dominodatalab/terraform-aws-rancher diff --git a/data.tf b/data.tf new file mode 100644 index 0000000..b94d8ab --- /dev/null +++ b/data.tf @@ -0,0 +1,42 @@ +data "template_file" "rke2_server_userdata" { + template = file("${path.module}/cloud-init/rke2-server.yaml") + vars = { + cluster_cidr = local.cluster_cidr + cni = var.cni_plugin + node_taint = var.node_taint + rke2_token = var.rke2_token + server_url = var.rke2_server_url # Only for additional servers + service_cidr = local.service_cidr + } +} + +data "template_file" "rke2_agent_userdata" { + template = file("${path.module}/cloud-init/rke2-agent.yaml") + vars = { + cluster_cidr = local.cluster_cidr + cni = var.cni_plugin + node_taint = var.node_taint + rke2_token = var.rke2_token + server_url = var.rke2_server_url # Only for additional servers + service_cidr = local.service_cidr + } +} + +data "aws_ami" "ubuntu" { + most_recent = true + owners = ["099720109477"] # Canonical + + filter { + name = "name" + values = ["ubuntu/images/hvm-ssd/${var.node_os_version}-amd64-server-*"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } +} + +data "aws_vpc" "this" { + id = var.vpc_id +} diff --git a/locals.tf b/locals.tf new file mode 100644 index 0000000..e4da376 --- /dev/null +++ b/locals.tf @@ -0,0 +1,19 @@ +locals { + vpc_cidrs = data.aws_vpc.this.cidr_block_associations[*].cidr_block + vpc_id = data.aws_vpc.this.id + + ami_id = var.ami != "" ? var.ami : data.aws_ami.ubuntu.id + + # Per https://docs.rke2.io/reference/https://docs.rke2.io/reference/server_config + # these values must be the same on all servers in the cluster + agent_token = var.rke2_token != "" ? var.rke2_token : random_string.rke2_token.result + cluster_cidr = var.cluster_cidr != "" ? var.cluster_cidr : "10.42.0.0/16" + cluster_dns = var.cluster_dns != "" ? var.cluster_dns : "10.43.0.10" + cluster_domain = var.cluster_domain + disable_cloud_controller = false + disable_kube_proxy = false + egress_selector_mode = "agent" + service_cidr = var.service_cidr != "" ? var.service_cidr : "10.43.0.0/16" + + tags = merge(var.tags) +} diff --git a/main.tf b/main.tf index 9d5a598..b07585e 100644 --- a/main.tf +++ b/main.tf @@ -9,36 +9,23 @@ terraform { } } -locals { - lb_name = "${var.name}-lb-${var.internal_lb ? "int" : "ext"}" - lb_secgrp_name = "${var.name}-lb" - instance_secgrp_name = "${var.name}-instances" - provisioner_secgrp_name = "${var.name}-provisioner" -} - -#------------------------------------------------------------------------------ -# EC2 instances -#------------------------------------------------------------------------------ -resource "aws_instance" "this" { - count = var.instance_count - - ami = var.ami - ebs_optimized = var.ebs_optimized - instance_type = var.instance_type +# #------------------------------------------------------------------------------ +# # EC2 instances +# #------------------------------------------------------------------------------ +resource "aws_instance" "rke2_server" { + count = var.server_count + ami = data.aws_ami.ubuntu.id + instance_type = var.server_instance_type key_name = var.ssh_key_name monitoring = var.enable_detailed_monitoring - subnet_id = element(var.subnet_ids, count.index % length(var.subnet_ids)) disable_api_termination = var.enable_deletion_protection + subnet_id = var.subnet_ids[count.index] + user_data = data.cloudinit_config.rke2_server_userdata.rendered vpc_security_group_ids = [ - aws_security_group.instances.id, - aws_security_group.provisioner.id, + aws_security_group.rke2_server.id ] - lifecycle { - ignore_changes = [ami, root_block_device] - } - root_block_device { volume_size = var.os_disk_size volume_type = var.os_disk_type @@ -47,352 +34,586 @@ resource "aws_instance" "this" { kms_key_id = var.os_disk_kms_key_id } + lifecycle { + ignore_changes = [ami, root_block_device] + } + metadata_options { http_endpoint = "enabled" - http_tokens = var.require_imdsv2 ? "required" : "optional" + http_tokens = "required" http_put_response_hop_limit = 2 } tags = merge( - var.tags, + local.tags, { "Name" = "${var.name}-${count.index}" "Terraform" = "true" + "rke2-role" = count.index == 0 ? "server" : "agent" }, ) - volume_tags = var.tags + volume_tags = local.tags - provisioner "remote-exec" { - inline = [ - "cloud-init status --wait" - ] +} - connection { - host = coalesce(self.public_ip, self.private_ip) - type = "ssh" - user = var.ssh_username - private_key = file(var.ssh_key_path) - bastion_host = var.ssh_proxy_host - bastion_user = var.ssh_proxy_user - } + +# #------------------------------------------------------------------------------ +# # Load balancer +# #------------------------------------------------------------------------------ +resource "aws_elb" "rke2_server" { + name = "rke2-server-lb" + internal = true + subnets = var.subnet_ids + security_groups = [aws_security_group.rke2_server.id, aws_security_group.rke2_agent.id] + + # listener { + # instance_port = 443 + # instance_protocol = "TCP" + # lb_port = 443 + # lb_protocol = "TCP" + # } + + # listener { + # instance_port = 80 + # instance_protocol = "TCP" + # lb_port = 80 + # lb_protocol = "TCP" + # } + + # RKE2 API server listener + listener { + instance_port = 6443 + instance_protocol = "TCP" + lb_port = 6443 + lb_protocol = "TCP" } -} -#------------------------------------------------------------------------------ -# Load balancer -#------------------------------------------------------------------------------ -resource "aws_elb" "this" { - name = local.lb_name - security_groups = [aws_security_group.loadbalancer.id] - subnets = var.lb_subnet_ids - instances = aws_instance.this[*].id - internal = var.internal_lb - idle_timeout = 3600 + # RKE2 supervisor port for HA + listener { + instance_port = 9345 + instance_protocol = "TCP" + lb_port = 9345 + lb_protocol = "TCP" + } + # RKE2 kubelet port listener { - instance_port = 443 + instance_port = 10250 instance_protocol = "TCP" - lb_port = 443 + lb_port = 10250 lb_protocol = "TCP" } + # RKE2 etcd port listener { - instance_port = 80 + instance_port = 2379 instance_protocol = "TCP" - lb_port = 80 + lb_port = 2379 + lb_protocol = "TCP" + } + # RKE2 etcd port + listener { + instance_port = 2380 + instance_protocol = "TCP" + lb_port = 2380 lb_protocol = "TCP" } health_check { healthy_threshold = 3 unhealthy_threshold = 3 - target = "HTTP:80/healthz" + target = "HTTP:6443/healthz" interval = 10 timeout = 6 } - tags = merge( - var.tags, - { - "Name" = local.lb_name - "Terraform" = "true" - }, - ) -} - -#------------------------------------------------------------------------------ -# Security groups -#------------------------------------------------------------------------------ -resource "aws_security_group" "loadbalancer" { - name = local.lb_secgrp_name - description = "Grant access to Rancher ELB" - vpc_id = var.vpc_id - - tags = merge( - var.tags, - { - "Name" = local.lb_secgrp_name - "Terraform" = "true" - }, - ) -} - -resource "aws_security_group_rule" "lb_rancher_ingress_443" { - type = "ingress" - from_port = 443 - to_port = 443 - protocol = "tcp" - - security_group_id = aws_security_group.loadbalancer.id - source_security_group_id = aws_security_group.instances.id -} - -resource "aws_security_group_rule" "lb_rancher_ingress_80" { - type = "ingress" - from_port = 80 - to_port = 80 - protocol = "tcp" - - security_group_id = aws_security_group.loadbalancer.id - source_security_group_id = aws_security_group.instances.id -} - -resource "aws_security_group_rule" "lb_cidr_ingress_443" { - count = length(var.lb_cidr_blocks) - - type = "ingress" - from_port = 443 - to_port = 443 - protocol = "tcp" - - security_group_id = aws_security_group.loadbalancer.id - cidr_blocks = var.lb_cidr_blocks + tags = { + Name = "rke2-server-nlb" + } } -resource "aws_security_group_rule" "lb_secgrp_ingress_443" { - count = var.lb_security_groups_count +resource "aws_lb_target_group" "rke2_api" { + name = "rke2-api-tg" + port = 6443 + protocol = "TCP" + vpc_id = var.vpc_id - type = "ingress" - from_port = 443 - to_port = 443 - protocol = "tcp" + health_check { + enabled = true + healthy_threshold = 2 + unhealthy_threshold = 2 + timeout = 10 + interval = 30 + port = 6443 + protocol = "TCP" + } - security_group_id = aws_security_group.loadbalancer.id - source_security_group_id = var.lb_security_groups[count.index] + tags = local.tags } -resource "aws_security_group_rule" "lb_cidr_ingress_80" { - count = length(var.lb_cidr_blocks) +resource "aws_lb_target_group" "rke2_server" { + name = "rke2-server-tg" + port = 9345 # RKE2 uses 9345 (server), 6443 (API), 10250 (kubelet), 2379-2380 (etcd) + protocol = "TCP" + vpc_id = var.vpc_id - type = "ingress" - from_port = 80 - to_port = 80 - protocol = "tcp" - security_group_id = aws_security_group.loadbalancer.id - cidr_blocks = var.lb_cidr_blocks -} - -resource "aws_security_group_rule" "lb_secgrp_ingress_80" { - count = var.lb_security_groups_count - - type = "ingress" - from_port = 80 - to_port = 80 - protocol = "tcp" + health_check { + enabled = true + healthy_threshold = 2 + unhealthy_threshold = 2 + timeout = 10 + interval = 30 + port = 9345 + protocol = "TCP" + } - security_group_id = aws_security_group.loadbalancer.id - source_security_group_id = var.lb_security_groups[count.index] + tags = local.tags } -resource "aws_security_group_rule" "lb_egress_443" { - type = "egress" - description = "Outgoing instance traffic" - from_port = 443 - to_port = 443 - protocol = "tcp" - - security_group_id = aws_security_group.loadbalancer.id - source_security_group_id = aws_security_group.instances.id -} +# #------------------------------------------------------------------------------ +# # Security groups +# #------------------------------------------------------------------------------ +resource "aws_security_group" "rke2_server" { + name_prefix = "rke2-server-" + vpc_id = local.vpc_id -resource "aws_security_group_rule" "lb_egress_80" { - type = "egress" - description = "Outgoing instance traffic" - from_port = 80 - to_port = 80 - protocol = "tcp" + # Kubernetes API + ingress { + from_port = 6443 + to_port = 6443 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] + } - security_group_id = aws_security_group.loadbalancer.id - source_security_group_id = aws_security_group.instances.id -} + # RKE2 server port + ingress { + from_port = 9345 + to_port = 9345 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] + } -resource "aws_security_group" "instances" { - name = local.instance_secgrp_name - description = "Govern access to Rancher server instances" - vpc_id = var.vpc_id + # etcd peer communication + ingress { + from_port = 2379 + to_port = 2380 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] + } + # Canal CNI ingress { - description = "Incoming LB traffic" - from_port = 443 - to_port = 443 - protocol = "tcp" - security_groups = [aws_security_group.loadbalancer.id] + from_port = 8472 + to_port = 8472 + protocol = "udp" + cidr_blocks = [local.vpc_cidrs] } + # Kubelet ingress { - description = "Incoming LB traffic" - from_port = 80 - to_port = 80 - protocol = "tcp" - security_groups = [aws_security_group.loadbalancer.id] + from_port = 10250 + to_port = 10250 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] } + # NodePort services ingress { - description = "Node intercommunication" - from_port = 0 - to_port = 0 - protocol = -1 - self = true + from_port = 30000 + to_port = 32767 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] } egress { - description = "Allow all outbound traffic" from_port = 0 to_port = 0 - protocol = -1 + protocol = "-1" cidr_blocks = ["0.0.0.0/0"] } - tags = merge( - var.tags, - { - "Name" = local.instance_secgrp_name - "Terraform" = "true" - }, - ) + tags = local.tags } -resource "aws_security_group" "provisioner" { - name = local.provisioner_secgrp_name - description = "Provision Rancher instances" +resource "aws_security_group" "rke2_agent" { + name_prefix = "rke2-agent-" vpc_id = var.vpc_id - tags = merge( - var.tags, - { - "Name" = local.provisioner_secgrp_name - "Terraform" = "true" - }, - ) -} - -resource "aws_security_group_rule" "provisioner_cidr_ingress_22" { - count = var.use_provisioner_secgrp ? 0 : 1 - - type = "ingress" - description = "RKE SSH access" - from_port = 22 - to_port = 22 - protocol = "tcp" - - security_group_id = aws_security_group.provisioner.id - cidr_blocks = [var.provisioner_cidr_block] -} - -resource "aws_security_group_rule" "provisioner_secgrp_ingress_22" { - count = var.use_provisioner_secgrp ? 1 : 0 - - type = "ingress" - description = "RKE SSH access" - from_port = 22 - to_port = 22 - protocol = "tcp" - - security_group_id = aws_security_group.provisioner.id - source_security_group_id = var.provisioner_security_group -} - -resource "aws_security_group_rule" "provisioner_cidr_ingress_6443" { - count = var.use_provisioner_secgrp ? 0 : 1 - - type = "ingress" - description = "RKE K8s endpoint verification" - from_port = 6443 - to_port = 6443 - protocol = "tcp" - - security_group_id = aws_security_group.provisioner.id - cidr_blocks = [var.provisioner_cidr_block] -} - -resource "aws_security_group_rule" "provisioner_secgrp_ingress_6443" { - count = var.use_provisioner_secgrp ? 1 : 0 - - type = "ingress" - description = "RKE K8s endpoint verification" - from_port = 6443 - to_port = 6443 - protocol = "tcp" - - security_group_id = aws_security_group.provisioner.id - source_security_group_id = var.provisioner_security_group -} - -resource "aws_security_group_rule" "provisioner_cidr_ingress_443" { - count = var.use_provisioner_secgrp ? 0 : 1 - - type = "ingress" - description = "Ranchhand cluster verification" - from_port = 443 - to_port = 443 - protocol = "tcp" + # Kubelet + ingress { + from_port = 10250 + to_port = 10250 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] + } - security_group_id = aws_security_group.provisioner.id - cidr_blocks = [var.provisioner_cidr_block] -} + # Canal CNI + ingress { + from_port = 8472 + to_port = 8472 + protocol = "udp" + cidr_blocks = [local.vpc_cidrs] + } -resource "aws_security_group_rule" "provisioner_secgrp_ingress_443" { - count = var.use_provisioner_secgrp ? 1 : 0 + # NodePort services + ingress { + from_port = 30000 + to_port = 32767 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] #[var.allowed_cidrs] + } - type = "ingress" - description = "Ranchhand cluster verification" - from_port = 443 - to_port = 443 - protocol = "tcp" + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } - security_group_id = aws_security_group.provisioner.id - source_security_group_id = var.provisioner_security_group + tags = local.tags } -#------------------------------------------------------------------------------ -# Provisioner -#------------------------------------------------------------------------------ -module "ranchhand" { - source = "github.com/dominodatalab/ranchhand?ref=v1.1.2" - - node_ips = aws_instance.this[*].private_ip - - working_dir = var.ranchhand_working_dir - cert_dnsnames = concat([aws_elb.this.dns_name], var.cert_dnsnames) - cert_ipaddresses = var.cert_ipaddresses - - rancher_version = var.rancher_version - rancher_image_tag = var.rancher_image_tag - rke_version = var.rancher_rke_version - kubectl_version = var.rancher_kubectl_version - - ssh_username = var.ssh_username - ssh_key_path = var.ssh_key_path - ssh_proxy_user = var.ssh_proxy_user - ssh_proxy_host = var.ssh_proxy_host - - admin_password = var.admin_password - - helm_v3_registry_host = var.helm_v3_registry_host - helm_v3_registry_user = var.helm_v3_registry_user - helm_v3_registry_password = var.helm_v3_registry_password - - newrelic_license_key = var.newrelic_license_key -} +# resource "aws_security_group" "loadbalancer" { +# name = local.lb_secgrp_name +# description = "Grant access to Rancher ELB" +# vpc_id = var.vpc_id + +# tags = merge( +# local.tags, +# { +# "Name" = local.lb_secgrp_name +# "Terraform" = "true" +# }, +# ) +# } + +# # Existing load balancer rules for Rancher (443, 80) +# resource "aws_security_group_rule" "lb_rancher_ingress_443" { +# description = "ingress port 443 - loadbalancer ${aws_security_group.loadbalancer.id}" +# type = "ingress" +# from_port = 443 +# to_port = 443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# resource "aws_security_group_rule" "lb_rancher_ingress_80" { +# description = "ingress port 80 - loadbalancer ${aws_security_group.loadbalancer.id}" +# type = "ingress" +# from_port = 80 +# to_port = 80 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# # RKE2 API server ingress rules +# resource "aws_security_group_rule" "lb_rke2_ingress_6443" { +# description = "ingress port 6443 - RKE2 API server" +# type = "ingress" +# from_port = 6443 +# to_port = 6443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# # RKE2 supervisor port ingress rules +# resource "aws_security_group_rule" "lb_rke2_ingress_9345" { +# description = "ingress port 9345 - RKE2 supervisor" +# type = "ingress" +# from_port = 9345 +# to_port = 9345 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# resource "aws_security_group_rule" "lb_cidr_ingress_443" { +# count = length(var.lb_cidr_blocks) +# description = "ingress port 443 - ${var.lb_cidr_blocks[count.index]}" + +# type = "ingress" +# from_port = 443 +# to_port = 443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# cidr_blocks = var.lb_cidr_blocks +# } + +# resource "aws_security_group_rule" "lb_secgrp_ingress_443" { +# count = var.lb_security_groups_count +# description = "${var.lb_security_groups[count.index]} ingress port 443" + +# type = "ingress" +# from_port = 443 +# to_port = 443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = var.lb_security_groups[count.index] +# } + +# resource "aws_security_group_rule" "lb_cidr_ingress_80" { +# count = length(var.lb_cidr_blocks) +# description = "ingress port 80 - ${var.lb_cidr_blocks[count.index]}" + +# type = "ingress" +# from_port = 80 +# to_port = 80 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# cidr_blocks = var.lb_cidr_blocks +# } + +# resource "aws_security_group_rule" "lb_secgrp_ingress_80" { +# count = var.lb_security_groups_count +# description = "${var.lb_security_groups[count.index]} ingress port 80" + +# type = "ingress" +# from_port = 80 +# to_port = 80 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = var.lb_security_groups[count.index] +# } + +# # RKE2 API server CIDR ingress rules +# resource "aws_security_group_rule" "lb_cidr_ingress_6443" { +# count = length(var.lb_cidr_blocks) +# description = "ingress port 6443 - RKE2 API - ${var.lb_cidr_blocks[count.index]}" + +# type = "ingress" +# from_port = 6443 +# to_port = 6443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# cidr_blocks = var.lb_cidr_blocks +# } + +# resource "aws_security_group_rule" "lb_secgrp_ingress_6443" { +# count = var.lb_security_groups_count +# description = "${var.lb_security_groups[count.index]} ingress port 6443 - RKE2 API" + +# type = "ingress" +# from_port = 6443 +# to_port = 6443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = var.lb_security_groups[count.index] +# } + +# resource "aws_security_group_rule" "lb_egress_443" { +# type = "egress" +# description = "Outgoing instance traffic" +# from_port = 443 +# to_port = 443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# resource "aws_security_group_rule" "lb_egress_80" { +# type = "egress" +# description = "Outgoing instance traffic" +# from_port = 80 +# to_port = 80 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# # RKE2 egress rules +# resource "aws_security_group_rule" "lb_egress_6443" { +# type = "egress" +# description = "Outgoing RKE2 API traffic" +# from_port = 6443 +# to_port = 6443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# resource "aws_security_group_rule" "lb_egress_9345" { +# type = "egress" +# description = "Outgoing RKE2 supervisor traffic" +# from_port = 9345 +# to_port = 9345 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# resource "aws_security_group" "instances" { +# name = local.instance_secgrp_name +# description = "Govern access to Rancher server instances" +# vpc_id = var.vpc_id + +# ingress { +# description = "Incoming LB traffic" +# from_port = 443 +# to_port = 443 +# protocol = "tcp" +# security_groups = [aws_security_group.loadbalancer.id] +# } + +# ingress { +# description = "Incoming LB traffic" +# from_port = 80 +# to_port = 80 +# protocol = "tcp" +# security_groups = [aws_security_group.loadbalancer.id] +# } + +# # RKE2 API server +# ingress { +# description = "RKE2 API server" +# from_port = 6443 +# to_port = 6443 +# protocol = "tcp" +# security_groups = [aws_security_group.loadbalancer.id] +# } + +# # RKE2 supervisor port for HA +# ingress { +# description = "RKE2 supervisor port" +# from_port = 9345 +# to_port = 9345 +# protocol = "tcp" +# security_groups = [aws_security_group.loadbalancer.id] +# } + +# # RKE2 etcd client port +# ingress { +# description = "RKE2 etcd client" +# from_port = 2379 +# to_port = 2379 +# protocol = "tcp" +# self = true +# } + +# # RKE2 etcd peer port +# ingress { +# description = "RKE2 etcd peer" +# from_port = 2380 +# to_port = 2380 +# protocol = "tcp" +# self = true +# } + +# # RKE2 kubelet +# ingress { +# description = "RKE2 kubelet" +# from_port = 10250 +# to_port = 10250 +# protocol = "tcp" +# self = true +# } + +# # RKE2 CNI (Flannel VXLAN) +# ingress { +# description = "RKE2 CNI VXLAN" +# from_port = 8472 +# to_port = 8472 +# protocol = "udp" +# self = true +# } + +# # RKE2 metrics server +# ingress { +# description = "RKE2 metrics server" +# from_port = 10254 +# to_port = 10254 +# protocol = "tcp" +# self = true +# } + +# # NodePort services +# ingress { +# description = "NodePort services" +# from_port = 30000 +# to_port = 32767 +# protocol = "tcp" +# self = true +# } + +# ingress { +# description = "Node intercommunication" +# from_port = 0 +# to_port = 0 +# protocol = -1 +# self = true +# } + +# egress { +# description = "Allow all outbound traffic" +# from_port = 0 +# to_port = 0 +# protocol = -1 +# cidr_blocks = ["0.0.0.0/0"] +# } + +# tags = merge( +# local.tags, +# { +# "Name" = local.instance_secgrp_name +# "Terraform" = "true" +# }, +# ) +# } + + +# resource "aws_security_group_rule" "provisioner_cidr_ingress_22" { +# count = var.use_provisioner_secgrp ? 0 : 1 + +# type = "ingress" +# description = "RKE2 SSH access" +# from_port = 22 +# to_port = 22 +# protocol = "tcp" + +# security_group_id = aws_security_group.provisioner.id +# cidr_blocks = [var.provisioner_cidr_block] +# } + +# resource "aws_security_group_rule" "provisioner_secgrp_ingress_22" { +# count = var.use_provisioner_secgrp ? 1 : 0 + +# type = "ingress" +# description = "RKE2 SSH access" +# from_port = 22 +# to_port = 22 +# protocol = "tcp" + +# security_group_id = aws_security_group.provisioner.id +# source_security_group_id = var.provisioner_security_group +# } + +# resource "aws_security_group_rule" "provisioner_cidr_ingress_6443" { +# count = var.use_provisioner_secgrp ? 0 : 1 + +# type = "ingress" +# description = "RKE2 K8s endpoint verification" +# from_port = 6443 +# to_port = 6443 +# protocol = "tcp" + +# security_group_id = aws_security_group.provisioner.id +# cidr_blocks = [var.provisioner_cidr_block] +# } diff --git a/output.tf b/output.tf index f24c431..6d086f5 100644 --- a/output.tf +++ b/output.tf @@ -5,11 +5,16 @@ output "lb_dns_name" { output "cluster_provisioned" { description = "ID of the null_resource cluster provisioner" - value = module.ranchhand.cluster_provisioned + value = module.rke2_provisioner.cluster_provisioned } output "admin_password" { description = "Generated Rancher admin user password" - value = module.ranchhand.admin_password + value = module.rke2_provisioner.admin_password + sensitive = true } +output "kubeconfig_file" { + description = "Path to the generated kubeconfig file" + value = module.rke2_provisioner.kubeconfig_file +} diff --git a/variables.tf b/variables.tf index 844cdd5..f6d452c 100644 --- a/variables.tf +++ b/variables.tf @@ -6,91 +6,132 @@ variable "vpc_id" { type = string } -variable "lb_subnet_ids" { - description = "List of subnets where LB will be created" - type = list(string) -} - -variable "subnet_ids" { - description = "List of subnets where instances will be created" - type = list(string) +variable "server_count" { + description = "Number of instances in the cluster" + type = number + default = 3 + validation { + condition = var.server_count >= 3 + error_message = "server_count must be at least 3 to ensure high availability." + } } -variable "ssh_key_name" { - description = "Name of the EC2 key pair to use for the instances" +variable "rke2_server_url" { + description = "RKE2 server URL used to join nodes to the cluster" type = string } -#------------------------------------------------------------------------------ -# YOU MUST CHOOSE ONE OF THE FOLLOWING OTHERWISE PROVISIONING WILL FAIL! -#------------------------------------------------------------------------------ -variable "use_provisioner_secgrp" { - description = "Determines whether to use the security provision_security_group or provisioner_cidr_block inputs." - default = "true" +# Check CNI and versions at https://docs.rke2.io/release-notes/v{K8S_VER}.X +# i.e. https://docs.rke2.io/release-notes/v1.32.X +variable "cni_plugin" { + description = "CNI plugin to use for the RKE2 cluster (canal, calico, cilium, flannel)" + default = "calico" type = string + + validation { + condition = alltrue([ + for mode in var.cni_plugin : contains(["canal", "calico", "cilium", "flannel"], mode) + ]) + error_message = "cni_plugin must be one of 'canal', 'calico', 'cilium', or 'flannel'." + } } -variable "provisioner_security_group" { - description = "ID of security group attached to the VM that will provision the Rancher instances. This is typically a bastion host." +variable "node_taint" { + description = "Taint to apply to the RKE2 nodes (e.g., 'node-role.kubernetes.io/control-plane:NoSchedule')" default = "" type = string } -variable "provisioner_cidr_block" { - description = "CIDR address of the host that will provision the Rancher instances. This will only work with instances that are publicly accessible." - default = "" +variable "server_instance_type" { + description = "Instance type for the RKE2 server nodes" + default = "t3.xlarge" type = string } -#------------------------------------------------------------------------------ -# OPTIONAL -#------------------------------------------------------------------------------ -variable "name" { - description = "Root name applied to all resources" - default = "rancher" +variable "ssh_key_name" { + description = "Name of the EC2 key pair to use for the instances" + default = "rke2-keypair" type = string + } -variable "internal_lb" { - description = "Create an internal load balancer. Defaults to internet-facing." - default = false +variable "subnet_ids" { + description = "List of subnet IDs where the RKE2 server nodes will be created" + type = list(string) + +} + +variable "node_os_version" { + description = "Operating system version for the RKE2 nodes (e.g., 'ubuntu-24.04')" + default = "ubuntu-24.04" type = string } -variable "lb_security_groups" { - description = "Grant LB ingress access to one or more security group IDs" - default = [] - type = list(string) +variable "rke2_version" { + description = "Override for the installed RKE2 version. With the 'v'" + type = string + default = "v1.32.4+rke2r1" + + validation { + condition = can(regex("^v[0-9]+\\.[0-9]+\\.[0-9]+\\+rke2r[0-9]+$", var.rke2_version)) + error_message = "rke2_version must be in the format v..+rke2r, e.g., v1.32.4+rke2r1" + } } -variable "lb_security_groups_count" { - description = "Count of dynamically determines lb_security_groups" - default = 0 - type = number +variable "rke2_token" { + description = "Token for RKE2 cluster" + type = string + sensitive = true } -variable "lb_cidr_blocks" { - description = "Grant LB ingress access to one or more CIDR addresses" - default = [] - type = list(string) +variable "cluster_dns" { + description = "Cluster DNS IP" + type = string + default = "10.43.0.10" + validation { + condition = provider::assert::ip(var.ip_address) + error_message = "Invalid cluster_dns address" + } } -variable "instance_count" { - description = "Number of instances to launch" - default = 3 - type = number +variable "cluster_domain" { + description = "Cluster domain" + type = string + default = "cluster.local" } -variable "ami" { - description = "Instance AMI defaults to Ubuntu 24.04" - default = "ami-00c257e12d6828491" +variable "cluster_cidr" { + description = "IPv4/IPv6 network CIDRs to use for pod IPs" type = string + default = "10.42.0.0/16" + + validation { + condition = provider::assert::cidr(var.cluster_cidr) + error_message = "Invalid cluster_cidr" + } } -variable "instance_type" { - description = "Type of instances to launch" - default = "t3.xlarge" +variable "service_cidr" { + description = "IPv4/IPv6 network CIDRs to use for service IPs" type = string + default = "10.43.0.0/16" + + validation { + condition = provider::assert::cidr(var.service_cidr) + error_message = "Invalid service_cidr" + } +} + +variable "enable_detailed_monitoring" { + description = "Launch EC2 instances with detailed monitoring enabled" + default = false + type = bool +} + +variable "enable_deletion_protection" { + description = "If true, enables EC2 Instance Termination Protection" + default = false + type = bool } variable "os_disk_size" { @@ -100,7 +141,7 @@ variable "os_disk_size" { } variable "os_disk_type" { - description = "Root partition volume type for instances" + description = "Root partition volume type for instances (io1, io2, gp2, gp3, sc1, st1, standard)" default = "gp3" type = string } @@ -123,130 +164,168 @@ variable "os_disk_kms_key_id" { type = string } -variable "ebs_optimized" { - description = "Attach NICs dedicated to EBS volume network traffic" - default = true - type = bool -} - -variable "enable_detailed_monitoring" { - description = "Launch EC2 instances with detailed monitoring enabled" - default = false - type = bool -} - -variable "enable_deletion_protection" { - description = "" - default = false - type = bool -} - variable "tags" { description = "Extra tags assigned to all resources" default = {} type = map(string) } -#------------------------------------------------------------------------------ -# RANCHHAND -#------------------------------------------------------------------------------ -variable "ranchhand_working_dir" { - description = "Directory where ranchhand should be executed. Defaults to the current working directory." - default = "" - type = string -} - -variable "cert_dnsnames" { - description = "Hostnames for the rancher and rke ssl certs (comma-delimited)" - default = [""] - type = list(string) -} - -variable "cert_ipaddresses" { - description = "IP addresses for the rancher and rke ssl certs (comma-delimited)" - default = ["127.0.0.1"] - type = list(string) -} - -variable "ssh_username" { - description = "SSH username on the nodes" - default = "ubuntu" - type = string -} - -variable "ssh_key_path" { - description = "Path to the SSH private key that will be used to connect to the VMs" - default = "~/.ssh/id_rsa" - type = string -} - -variable "ssh_proxy_user" { - description = "Bastion host SSH username" - default = "" - type = string -} - -variable "ssh_proxy_host" { - description = "Bastion host used to proxy SSH connections" - default = "" - type = string -} - -variable "admin_password" { - description = "Password override for the initial admin user" - default = "" - type = string -} - -# Update the rancher_* variables together -# Please reference the Rancher support matrix before changing these values -# https://www.suse.com/suse-rancher/support-matrix/all-supported-versions/ -# before changing these values -variable "rancher_version" { - description = "Override for the installed Rancher version. Without the [v]" - default = "2.10.5" - type = string -} - -variable "rancher_image_tag" { - description = "Override for the installed Rancher image tag. With the [v]" - default = "v2.10.5" +# #------------------------------------------------------------------------------ +# # OPTIONAL +# #------------------------------------------------------------------------------ +variable "name" { + description = "Root name applied to all resources" + default = "rancher" type = string } -variable "rancher_kubectl_version" { - description = "Override for the kubectl version supported by RKE to install. With the [v]" - default = "v1.31.5" +variable "ami" { + description = "Specific AMI ID to use for the RKE2 nodes. If not specified, the latest Ubuntu AMI will be used." type = string + validation { + condition = can(regex("^ami-[a-z0-9]+$", var.ami)) || var.ami == "" + error_message = "AMI must be a valid AMI ID or an empty string to use the default Ubuntu AMI." + } } -variable "rancher_rke_version" { - description = "Override for the installed RKE image tag. With the [v]" - default = "v1.7.3" +variable "egress_selector_mode" { + description = "RKE2 egress selector mode (agent, cluster, pod, disabled)" + default = "agent" type = string -} - -variable "helm_v3_registry_host" { - default = "" - type = string -} - -variable "helm_v3_registry_user" { - default = "" - type = string -} - -variable "helm_v3_registry_password" { - default = "" - type = string -} - -variable "newrelic_license_key" { - default = "" - type = string -} - -variable "require_imdsv2" { - description = "Require instance metadata service v2" - type = bool -} + validation { + # condition = contains(["agent", "cluster", "pod", "disabled"], var.egress_selector_mode) + # condition = var.egress_selector_mode == "agent" || var.egress_selector_mode == "cluster" || var.egress_selector_mode == "pod" || var.egress_selector_mode == "disabled" + condition = alltrue([ + for mode in var.egress_selector_mode : contains(["agent", "cluster", "pod", "disabled"], mode) + ]) + error_message = "egress_selector_mode must be one of 'agent', 'cluster', 'pod', or 'disabled'." + } +} + + + +# variable "ebs_optimized" { +# description = "Attach NICs dedicated to EBS volume network traffic" +# default = true +# type = bool +# } + + + + +# #------------------------------------------------------------------------------ +# # RKE2 CONFIGURATION +# #------------------------------------------------------------------------------ + +# variable "cert_dnsnames" { +# description = "Hostnames for the rancher and RKE2 ssl certs (comma-delimited)" +# default = [""] +# type = list(string) +# } + +# variable "cert_ipaddresses" { +# description = "IP addresses for the rancher and RKE2 ssl certs (comma-delimited)" +# default = ["127.0.0.1"] +# type = list(string) +# } + +# variable "ssh_username" { +# description = "SSH username on the nodes" +# default = "ubuntu" +# type = string +# } + +# variable "ssh_key_path" { +# description = "Path to the SSH private key that will be used to connect to the VMs" +# default = "~/.ssh/id_rsa" +# type = string +# } + +# variable "ssh_proxy_user" { +# description = "Bastion host SSH username" +# default = "" +# type = string +# } + +# variable "ssh_proxy_host" { +# description = "Bastion host used to proxy SSH connections" +# default = "" +# type = string +# } + +# variable "admin_password" { +# description = "Password override for the initial admin user" +# default = "" +# type = string +# } + +# # Update the rancher_* and rke2_* variables together +# # Please reference the Rancher support matrix before changing these values +# # https://www.suse.com/suse-rancher/support-matrix/all-supported-versions/ +# variable "rancher_version" { +# description = "Override for the installed Rancher version. Without the [v]" +# default = "2.10.5" +# type = string +# } + +# variable "rancher_image_tag" { +# description = "Override for the installed Rancher image tag. With the [v]" +# default = "v2.10.5" +# type = string +# } + +# variable "rancher_kubectl_version" { +# description = "Override for the kubectl version supported by RKE2 to install. With the [v]" +# default = "v1.31.5" +# type = string +# } + +# variable "rke2_version" { +# description = "Override for the installed RKE2 version. With the [v]" +# default = "v1.31.5+rke2r1" +# type = string +# } + +# variable "rke2_channel" { +# description = "RKE2 release channel (stable, latest)" +# default = "stable" +# type = string +# } + +# variable "rke2_cni" { +# description = "RKE2 CNI plugin (canal, calico, cilium)" +# default = "canal" +# type = string +# } + +# variable "rke2_disable_cloud_controller" { +# description = "Disable RKE2 cloud controller manager" +# default = false +# type = bool +# } + +# variable "rke2_config_file" { +# description = "Path to custom RKE2 configuration file" +# default = "" +# type = string +# } + +# variable "helm_v3_registry_host" { +# default = "" +# type = string +# } + +# variable "helm_v3_registry_user" { +# default = "" +# type = string +# } + +# variable "helm_v3_registry_password" { +# default = "" +# type = string +# } + +# variable "newrelic_license_key" { +# default = "" +# type = string +# } From 31f52c1cabb8a57c87ad0e1f7d6c6d228aec987c Mon Sep 17 00:00:00 2001 From: Dan Clegg Date: Fri, 7 Feb 2025 16:29:57 -0700 Subject: [PATCH 4/5] Updated rke, rancher, tools Use tf orb --- .circleci/config.yml | 55 +++++++++++++++++++++++++++++++++++++++++ .pre-commit-config.yaml | 27 ++++++++++++++++++++ .tflint.hcl | 6 +++++ README.md | 1 + cortex.yaml | 13 ++++++++++ main.tf | 19 +++++++++++--- renovate.json | 6 +++++ variables.tf | 51 +++++++++++++++++++++++++++++++++----- 8 files changed, 168 insertions(+), 10 deletions(-) create mode 100644 .circleci/config.yml create mode 100644 .pre-commit-config.yaml create mode 100644 .tflint.hcl create mode 100644 cortex.yaml create mode 100644 renovate.json diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..90ef47a --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,55 @@ +# CircleCI 2.1 configuration file +# Check https://circleci.com/docs/2.0/sample-config/ for more details +# +version: 2.1 + +orbs: + terraform: circleci/terraform@3.6.0 + +executors: + trivy: + docker: + - image: aquasec/trivy:0.60.0 + environment: + ENV_FILE: /tmp/workspace/.env + WORKSPACE: /tmp/workspace + +jobs: + terraform_fmt: + description: Check terraform format + executor: terraform/default + working_directory: /tmp/workspace + steps: + - checkout + - terraform/init: + path: . + - terraform/validate: + path: . + - terraform/fmt: + path: . + scan: + executor: trivy + steps: + - checkout + - setup_remote_docker + - run: + name: Install trivy + command: | + apk add --update-cache --upgrade curl + curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin + - run: + name: Scan filesystem + command: | + trivy fs --include-non-failures --misconfig-scanners terraform \ + --exit-code 0 --no-progress \ + --scanners vuln,secret,config --severity CRITICAL,HIGH,MEDIUM,LOW \ + --output "trivy-results.json" --format json --ignore-unfixed . + - store_artifacts: + path: trivy-results.json + destination: trivy_output + +workflows: + Lint and Scan: + jobs: + - terraform_fmt + - scan diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..cba483c --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-merge-conflict + - id: end-of-file-fixer + - id: no-commit-to-branch + - id: check-case-conflict + - repo: https://github.com/antonbabenko/pre-commit-terraform + rev: v1.97.4 + hooks: + - id: terraform_validate + args: + - '--hook-config=--retry-once-with-cleanup=true' + - id: terraform_providers_lock + args: + - --tf-init-args=-upgrade + - id: terraform_docs + args: + - '--args=--lockfile=false' + - '--hook-config=--path-to-file=README.md' + - '--hook-config=--add-to-existing-file=true' + - '--hook-config=--create-file-if-not-exist=true' + - id: terraform_fmt + - id: terraform_tflint + args: + - '--args=--config=__GIT_WORKING_DIR__/.tflint.hcl' diff --git a/.tflint.hcl b/.tflint.hcl new file mode 100644 index 0000000..0295f7e --- /dev/null +++ b/.tflint.hcl @@ -0,0 +1,6 @@ +plugin "aws" { + enabled = true + deep_check = false + version = "0.38.0" + source = "github.com/terraform-linters/tflint-ruleset-aws" +} diff --git a/README.md b/README.md index 6619c73..7931e27 100644 --- a/README.md +++ b/README.md @@ -64,4 +64,5 @@ module "rancher" { ``` ## Development + Please submit any feature enhancements, bug fixes, or ideas via pull requests or issues. diff --git a/cortex.yaml b/cortex.yaml new file mode 100644 index 0000000..b8270c3 --- /dev/null +++ b/cortex.yaml @@ -0,0 +1,13 @@ +openapi: 3.0.1 +info: + title: terraform-aws-rancher + description: Terraform module to deploy Rancher on AWS + x-cortex-tag: terraform-aws-rancher + x-cortex-type: service + x-cortex-git: + github: + repository: dominodatalab/terraform-aws-rancher + x-cortex-owners: + - email: eng-infrastructure@dominodatalab.com + type: email + description: Infrastructure Engineering diff --git a/main.tf b/main.tf index b95e83f..9d5a598 100644 --- a/main.tf +++ b/main.tf @@ -1,3 +1,14 @@ +terraform { + required_version = ">= 1.3.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.0.0" + } + } +} + locals { lb_name = "${var.name}-lb-${var.internal_lb ? "int" : "ext"}" lb_secgrp_name = "${var.name}-lb" @@ -75,7 +86,7 @@ resource "aws_elb" "this" { name = local.lb_name security_groups = [aws_security_group.loadbalancer.id] subnets = var.lb_subnet_ids - instances = aws_instance.this.*.id + instances = aws_instance.this[*].id internal = var.internal_lb idle_timeout = 3600 @@ -242,7 +253,7 @@ resource "aws_security_group" "instances" { description = "Node intercommunication" from_port = 0 to_port = 0 - protocol = "-1" + protocol = -1 self = true } @@ -250,7 +261,7 @@ resource "aws_security_group" "instances" { description = "Allow all outbound traffic" from_port = 0 to_port = 0 - protocol = "-1" + protocol = -1 cidr_blocks = ["0.0.0.0/0"] } @@ -361,7 +372,7 @@ resource "aws_security_group_rule" "provisioner_secgrp_ingress_443" { module "ranchhand" { source = "github.com/dominodatalab/ranchhand?ref=v1.1.2" - node_ips = aws_instance.this.*.private_ip + node_ips = aws_instance.this[*].private_ip working_dir = var.ranchhand_working_dir cert_dnsnames = concat([aws_elb.this.dns_name], var.cert_dnsnames) diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000..39a2b6e --- /dev/null +++ b/renovate.json @@ -0,0 +1,6 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + "config:base" + ] +} diff --git a/variables.tf b/variables.tf index 8c2208b..844cdd5 100644 --- a/variables.tf +++ b/variables.tf @@ -3,6 +3,7 @@ #------------------------------------------------------------------------------ variable "vpc_id" { description = "VPC where resources should be created" + type = string } variable "lb_subnet_ids" { @@ -17,6 +18,7 @@ variable "subnet_ids" { variable "ssh_key_name" { description = "Name of the EC2 key pair to use for the instances" + type = string } #------------------------------------------------------------------------------ @@ -25,16 +27,19 @@ variable "ssh_key_name" { variable "use_provisioner_secgrp" { description = "Determines whether to use the security provision_security_group or provisioner_cidr_block inputs." default = "true" + type = string } variable "provisioner_security_group" { description = "ID of security group attached to the VM that will provision the Rancher instances. This is typically a bastion host." default = "" + type = string } variable "provisioner_cidr_block" { description = "CIDR address of the host that will provision the Rancher instances. This will only work with instances that are publicly accessible." default = "" + type = string } #------------------------------------------------------------------------------ @@ -43,86 +48,103 @@ variable "provisioner_cidr_block" { variable "name" { description = "Root name applied to all resources" default = "rancher" + type = string } variable "internal_lb" { description = "Create an internal load balancer. Defaults to internet-facing." default = false + type = string } variable "lb_security_groups" { description = "Grant LB ingress access to one or more security group IDs" default = [] + type = list(string) } variable "lb_security_groups_count" { description = "Count of dynamically determines lb_security_groups" default = 0 + type = number } variable "lb_cidr_blocks" { description = "Grant LB ingress access to one or more CIDR addresses" default = [] + type = list(string) } variable "instance_count" { description = "Number of instances to launch" default = 3 + type = number } variable "ami" { - description = "Instance AMI defaults to Ubuntu 16.04" - default = "ami-0565af6e282977273" + description = "Instance AMI defaults to Ubuntu 24.04" + default = "ami-00c257e12d6828491" + type = string } variable "instance_type" { description = "Type of instances to launch" default = "t3.xlarge" + type = string } variable "os_disk_size" { description = "Root partition volume size for instances" default = 30 + type = number } variable "os_disk_type" { description = "Root partition volume type for instances" default = "gp3" + type = string } variable "os_disk_delete_on_termination" { description = "Destroy root EBS volume when instances are terminated" default = true + type = bool } variable "os_disk_encrypted" { description = "Encrypt root EBS volume" default = true + type = bool } variable "os_disk_kms_key_id" { description = "Optional encryption key for root EBS volume" default = "" + type = string } variable "ebs_optimized" { description = "Attach NICs dedicated to EBS volume network traffic" default = true + type = bool } variable "enable_detailed_monitoring" { description = "Launch EC2 instances with detailed monitoring enabled" default = false + type = bool } variable "enable_deletion_protection" { description = "" default = false + type = bool } variable "tags" { description = "Extra tags assigned to all resources" default = {} + type = map(string) } #------------------------------------------------------------------------------ @@ -131,41 +153,49 @@ variable "tags" { variable "ranchhand_working_dir" { description = "Directory where ranchhand should be executed. Defaults to the current working directory." default = "" + type = string } variable "cert_dnsnames" { description = "Hostnames for the rancher and rke ssl certs (comma-delimited)" default = [""] + type = list(string) } variable "cert_ipaddresses" { description = "IP addresses for the rancher and rke ssl certs (comma-delimited)" default = ["127.0.0.1"] + type = list(string) } variable "ssh_username" { description = "SSH username on the nodes" default = "ubuntu" + type = string } variable "ssh_key_path" { description = "Path to the SSH private key that will be used to connect to the VMs" default = "~/.ssh/id_rsa" + type = string } variable "ssh_proxy_user" { description = "Bastion host SSH username" default = "" + type = string } variable "ssh_proxy_host" { description = "Bastion host used to proxy SSH connections" default = "" + type = string } variable "admin_password" { description = "Password override for the initial admin user" default = "" + type = string } # Update the rancher_* variables together @@ -174,40 +204,49 @@ variable "admin_password" { # before changing these values variable "rancher_version" { description = "Override for the installed Rancher version. Without the [v]" - default = "2.7.5" + default = "2.10.5" + type = string } variable "rancher_image_tag" { description = "Override for the installed Rancher image tag. With the [v]" - default = "v2.7.5" + default = "v2.10.5" + type = string } variable "rancher_kubectl_version" { description = "Override for the kubectl version supported by RKE to install. With the [v]" - default = "v1.26.7" + default = "v1.31.5" + type = string } variable "rancher_rke_version" { description = "Override for the installed RKE image tag. With the [v]" - default = "v1.4.8" + default = "v1.7.3" + type = string } variable "helm_v3_registry_host" { default = "" + type = string } variable "helm_v3_registry_user" { default = "" + type = string } variable "helm_v3_registry_password" { default = "" + type = string } variable "newrelic_license_key" { default = "" + type = string } variable "require_imdsv2" { description = "Require instance metadata service v2" + type = bool } From 06c8e713384b596507863d65040020e43829f9ec Mon Sep 17 00:00:00 2001 From: Dan Clegg Date: Wed, 19 Mar 2025 16:05:33 -0600 Subject: [PATCH 5/5] Update cortex Noting implications of metadata_options.http_tokens=optional Force IMDSv2 Initial rke2 migration --- README.md | 2 + cloud-init/rke2-agent.yaml | 16 + cloud-init/rke2-server.yaml | 29 ++ cortex.yaml | 11 +- data.tf | 42 ++ locals.tf | 19 + main.tf | 817 +++++++++++++++++++++++------------- output.tf | 9 +- variables.tf | 413 ++++++++++-------- 9 files changed, 888 insertions(+), 470 deletions(-) create mode 100644 cloud-init/rke2-agent.yaml create mode 100644 cloud-init/rke2-server.yaml create mode 100644 data.tf create mode 100644 locals.tf diff --git a/README.md b/README.md index 7931e27..da63f67 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # terraform-aws-rancher +**Note:** This module provisions Rancher clusters using RKE2 and containerd (not RKE1 or Docker). + Terraform module which creates an HA deployment of Rancher inside AWS using [RanchHand](https://github.com/dominodatalab/ranchhand). ## Usage diff --git a/cloud-init/rke2-agent.yaml b/cloud-init/rke2-agent.yaml new file mode 100644 index 0000000..e5822d3 --- /dev/null +++ b/cloud-init/rke2-agent.yaml @@ -0,0 +1,16 @@ +#cloud-config +package_update: true + +write_files: + - path: /etc/rancher/rke2/config.yaml + content: | + server: ${server_url} + token: ${rke2_token} + node-label: + - "node-type=worker" + permissions: '0600' + +runcmd: + - curl -sfL https://get.rke2.io | INSTALL_RKE2_TYPE="agent" sh - + - systemctl enable rke2-agent.service + - systemctl start rke2-agent.service \ No newline at end of file diff --git a/cloud-init/rke2-server.yaml b/cloud-init/rke2-server.yaml new file mode 100644 index 0000000..25c024f --- /dev/null +++ b/cloud-init/rke2-server.yaml @@ -0,0 +1,29 @@ +#cloud-config +package_update: true + +packages: + - curl + - wget + +write_files: + - path: /etc/rancher/rke2/config.yaml + content: | + token: ${rke2_token} + %{ if server_url != "" }server: ${server_url}%{ endif } + tls-san: + - ${server_url} + node-taint: + - "${node_taint}" + cni: ${cni} + cluster-cidr: ${cluster_cidr} + service-cidr: ${service_cidr} + etcd-expose-metrics: true + permissions: '0600' + +runcmd: + - curl -sfL https://get.rke2.io | INSTALL_RKE2_TYPE="server" sh - + - systemctl enable rke2-server.service + - systemctl start rke2-server.service + - mkdir -p /home/ubuntu/.kube + - cp /etc/rancher/rke2/rke2.yaml /home/ubuntu/.kube/config + - chown ubuntu:ubuntu /home/ubuntu/.kube/config \ No newline at end of file diff --git a/cortex.yaml b/cortex.yaml index b8270c3..b02f5eb 100644 --- a/cortex.yaml +++ b/cortex.yaml @@ -6,8 +6,13 @@ info: x-cortex-type: service x-cortex-git: github: + alias: cortex repository: dominodatalab/terraform-aws-rancher x-cortex-owners: - - email: eng-infrastructure@dominodatalab.com - type: email - description: Infrastructure Engineering + - name: eng-distributions + type: group + provider: OKTA + description: Updated by eng-devprod + x-cortex-circle-ci: + projects: + - projectSlug: github/dominodatalab/terraform-aws-rancher diff --git a/data.tf b/data.tf new file mode 100644 index 0000000..b94d8ab --- /dev/null +++ b/data.tf @@ -0,0 +1,42 @@ +data "template_file" "rke2_server_userdata" { + template = file("${path.module}/cloud-init/rke2-server.yaml") + vars = { + cluster_cidr = local.cluster_cidr + cni = var.cni_plugin + node_taint = var.node_taint + rke2_token = var.rke2_token + server_url = var.rke2_server_url # Only for additional servers + service_cidr = local.service_cidr + } +} + +data "template_file" "rke2_agent_userdata" { + template = file("${path.module}/cloud-init/rke2-agent.yaml") + vars = { + cluster_cidr = local.cluster_cidr + cni = var.cni_plugin + node_taint = var.node_taint + rke2_token = var.rke2_token + server_url = var.rke2_server_url # Only for additional servers + service_cidr = local.service_cidr + } +} + +data "aws_ami" "ubuntu" { + most_recent = true + owners = ["099720109477"] # Canonical + + filter { + name = "name" + values = ["ubuntu/images/hvm-ssd/${var.node_os_version}-amd64-server-*"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } +} + +data "aws_vpc" "this" { + id = var.vpc_id +} diff --git a/locals.tf b/locals.tf new file mode 100644 index 0000000..e4da376 --- /dev/null +++ b/locals.tf @@ -0,0 +1,19 @@ +locals { + vpc_cidrs = data.aws_vpc.this.cidr_block_associations[*].cidr_block + vpc_id = data.aws_vpc.this.id + + ami_id = var.ami != "" ? var.ami : data.aws_ami.ubuntu.id + + # Per https://docs.rke2.io/reference/https://docs.rke2.io/reference/server_config + # these values must be the same on all servers in the cluster + agent_token = var.rke2_token != "" ? var.rke2_token : random_string.rke2_token.result + cluster_cidr = var.cluster_cidr != "" ? var.cluster_cidr : "10.42.0.0/16" + cluster_dns = var.cluster_dns != "" ? var.cluster_dns : "10.43.0.10" + cluster_domain = var.cluster_domain + disable_cloud_controller = false + disable_kube_proxy = false + egress_selector_mode = "agent" + service_cidr = var.service_cidr != "" ? var.service_cidr : "10.43.0.0/16" + + tags = merge(var.tags) +} diff --git a/main.tf b/main.tf index 9d5a598..b07585e 100644 --- a/main.tf +++ b/main.tf @@ -9,36 +9,23 @@ terraform { } } -locals { - lb_name = "${var.name}-lb-${var.internal_lb ? "int" : "ext"}" - lb_secgrp_name = "${var.name}-lb" - instance_secgrp_name = "${var.name}-instances" - provisioner_secgrp_name = "${var.name}-provisioner" -} - -#------------------------------------------------------------------------------ -# EC2 instances -#------------------------------------------------------------------------------ -resource "aws_instance" "this" { - count = var.instance_count - - ami = var.ami - ebs_optimized = var.ebs_optimized - instance_type = var.instance_type +# #------------------------------------------------------------------------------ +# # EC2 instances +# #------------------------------------------------------------------------------ +resource "aws_instance" "rke2_server" { + count = var.server_count + ami = data.aws_ami.ubuntu.id + instance_type = var.server_instance_type key_name = var.ssh_key_name monitoring = var.enable_detailed_monitoring - subnet_id = element(var.subnet_ids, count.index % length(var.subnet_ids)) disable_api_termination = var.enable_deletion_protection + subnet_id = var.subnet_ids[count.index] + user_data = data.cloudinit_config.rke2_server_userdata.rendered vpc_security_group_ids = [ - aws_security_group.instances.id, - aws_security_group.provisioner.id, + aws_security_group.rke2_server.id ] - lifecycle { - ignore_changes = [ami, root_block_device] - } - root_block_device { volume_size = var.os_disk_size volume_type = var.os_disk_type @@ -47,352 +34,586 @@ resource "aws_instance" "this" { kms_key_id = var.os_disk_kms_key_id } + lifecycle { + ignore_changes = [ami, root_block_device] + } + metadata_options { http_endpoint = "enabled" - http_tokens = var.require_imdsv2 ? "required" : "optional" + http_tokens = "required" http_put_response_hop_limit = 2 } tags = merge( - var.tags, + local.tags, { "Name" = "${var.name}-${count.index}" "Terraform" = "true" + "rke2-role" = count.index == 0 ? "server" : "agent" }, ) - volume_tags = var.tags + volume_tags = local.tags - provisioner "remote-exec" { - inline = [ - "cloud-init status --wait" - ] +} - connection { - host = coalesce(self.public_ip, self.private_ip) - type = "ssh" - user = var.ssh_username - private_key = file(var.ssh_key_path) - bastion_host = var.ssh_proxy_host - bastion_user = var.ssh_proxy_user - } + +# #------------------------------------------------------------------------------ +# # Load balancer +# #------------------------------------------------------------------------------ +resource "aws_elb" "rke2_server" { + name = "rke2-server-lb" + internal = true + subnets = var.subnet_ids + security_groups = [aws_security_group.rke2_server.id, aws_security_group.rke2_agent.id] + + # listener { + # instance_port = 443 + # instance_protocol = "TCP" + # lb_port = 443 + # lb_protocol = "TCP" + # } + + # listener { + # instance_port = 80 + # instance_protocol = "TCP" + # lb_port = 80 + # lb_protocol = "TCP" + # } + + # RKE2 API server listener + listener { + instance_port = 6443 + instance_protocol = "TCP" + lb_port = 6443 + lb_protocol = "TCP" } -} -#------------------------------------------------------------------------------ -# Load balancer -#------------------------------------------------------------------------------ -resource "aws_elb" "this" { - name = local.lb_name - security_groups = [aws_security_group.loadbalancer.id] - subnets = var.lb_subnet_ids - instances = aws_instance.this[*].id - internal = var.internal_lb - idle_timeout = 3600 + # RKE2 supervisor port for HA + listener { + instance_port = 9345 + instance_protocol = "TCP" + lb_port = 9345 + lb_protocol = "TCP" + } + # RKE2 kubelet port listener { - instance_port = 443 + instance_port = 10250 instance_protocol = "TCP" - lb_port = 443 + lb_port = 10250 lb_protocol = "TCP" } + # RKE2 etcd port listener { - instance_port = 80 + instance_port = 2379 instance_protocol = "TCP" - lb_port = 80 + lb_port = 2379 + lb_protocol = "TCP" + } + # RKE2 etcd port + listener { + instance_port = 2380 + instance_protocol = "TCP" + lb_port = 2380 lb_protocol = "TCP" } health_check { healthy_threshold = 3 unhealthy_threshold = 3 - target = "HTTP:80/healthz" + target = "HTTP:6443/healthz" interval = 10 timeout = 6 } - tags = merge( - var.tags, - { - "Name" = local.lb_name - "Terraform" = "true" - }, - ) -} - -#------------------------------------------------------------------------------ -# Security groups -#------------------------------------------------------------------------------ -resource "aws_security_group" "loadbalancer" { - name = local.lb_secgrp_name - description = "Grant access to Rancher ELB" - vpc_id = var.vpc_id - - tags = merge( - var.tags, - { - "Name" = local.lb_secgrp_name - "Terraform" = "true" - }, - ) -} - -resource "aws_security_group_rule" "lb_rancher_ingress_443" { - type = "ingress" - from_port = 443 - to_port = 443 - protocol = "tcp" - - security_group_id = aws_security_group.loadbalancer.id - source_security_group_id = aws_security_group.instances.id -} - -resource "aws_security_group_rule" "lb_rancher_ingress_80" { - type = "ingress" - from_port = 80 - to_port = 80 - protocol = "tcp" - - security_group_id = aws_security_group.loadbalancer.id - source_security_group_id = aws_security_group.instances.id -} - -resource "aws_security_group_rule" "lb_cidr_ingress_443" { - count = length(var.lb_cidr_blocks) - - type = "ingress" - from_port = 443 - to_port = 443 - protocol = "tcp" - - security_group_id = aws_security_group.loadbalancer.id - cidr_blocks = var.lb_cidr_blocks + tags = { + Name = "rke2-server-nlb" + } } -resource "aws_security_group_rule" "lb_secgrp_ingress_443" { - count = var.lb_security_groups_count +resource "aws_lb_target_group" "rke2_api" { + name = "rke2-api-tg" + port = 6443 + protocol = "TCP" + vpc_id = var.vpc_id - type = "ingress" - from_port = 443 - to_port = 443 - protocol = "tcp" + health_check { + enabled = true + healthy_threshold = 2 + unhealthy_threshold = 2 + timeout = 10 + interval = 30 + port = 6443 + protocol = "TCP" + } - security_group_id = aws_security_group.loadbalancer.id - source_security_group_id = var.lb_security_groups[count.index] + tags = local.tags } -resource "aws_security_group_rule" "lb_cidr_ingress_80" { - count = length(var.lb_cidr_blocks) +resource "aws_lb_target_group" "rke2_server" { + name = "rke2-server-tg" + port = 9345 # RKE2 uses 9345 (server), 6443 (API), 10250 (kubelet), 2379-2380 (etcd) + protocol = "TCP" + vpc_id = var.vpc_id - type = "ingress" - from_port = 80 - to_port = 80 - protocol = "tcp" - security_group_id = aws_security_group.loadbalancer.id - cidr_blocks = var.lb_cidr_blocks -} - -resource "aws_security_group_rule" "lb_secgrp_ingress_80" { - count = var.lb_security_groups_count - - type = "ingress" - from_port = 80 - to_port = 80 - protocol = "tcp" + health_check { + enabled = true + healthy_threshold = 2 + unhealthy_threshold = 2 + timeout = 10 + interval = 30 + port = 9345 + protocol = "TCP" + } - security_group_id = aws_security_group.loadbalancer.id - source_security_group_id = var.lb_security_groups[count.index] + tags = local.tags } -resource "aws_security_group_rule" "lb_egress_443" { - type = "egress" - description = "Outgoing instance traffic" - from_port = 443 - to_port = 443 - protocol = "tcp" - - security_group_id = aws_security_group.loadbalancer.id - source_security_group_id = aws_security_group.instances.id -} +# #------------------------------------------------------------------------------ +# # Security groups +# #------------------------------------------------------------------------------ +resource "aws_security_group" "rke2_server" { + name_prefix = "rke2-server-" + vpc_id = local.vpc_id -resource "aws_security_group_rule" "lb_egress_80" { - type = "egress" - description = "Outgoing instance traffic" - from_port = 80 - to_port = 80 - protocol = "tcp" + # Kubernetes API + ingress { + from_port = 6443 + to_port = 6443 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] + } - security_group_id = aws_security_group.loadbalancer.id - source_security_group_id = aws_security_group.instances.id -} + # RKE2 server port + ingress { + from_port = 9345 + to_port = 9345 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] + } -resource "aws_security_group" "instances" { - name = local.instance_secgrp_name - description = "Govern access to Rancher server instances" - vpc_id = var.vpc_id + # etcd peer communication + ingress { + from_port = 2379 + to_port = 2380 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] + } + # Canal CNI ingress { - description = "Incoming LB traffic" - from_port = 443 - to_port = 443 - protocol = "tcp" - security_groups = [aws_security_group.loadbalancer.id] + from_port = 8472 + to_port = 8472 + protocol = "udp" + cidr_blocks = [local.vpc_cidrs] } + # Kubelet ingress { - description = "Incoming LB traffic" - from_port = 80 - to_port = 80 - protocol = "tcp" - security_groups = [aws_security_group.loadbalancer.id] + from_port = 10250 + to_port = 10250 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] } + # NodePort services ingress { - description = "Node intercommunication" - from_port = 0 - to_port = 0 - protocol = -1 - self = true + from_port = 30000 + to_port = 32767 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] } egress { - description = "Allow all outbound traffic" from_port = 0 to_port = 0 - protocol = -1 + protocol = "-1" cidr_blocks = ["0.0.0.0/0"] } - tags = merge( - var.tags, - { - "Name" = local.instance_secgrp_name - "Terraform" = "true" - }, - ) + tags = local.tags } -resource "aws_security_group" "provisioner" { - name = local.provisioner_secgrp_name - description = "Provision Rancher instances" +resource "aws_security_group" "rke2_agent" { + name_prefix = "rke2-agent-" vpc_id = var.vpc_id - tags = merge( - var.tags, - { - "Name" = local.provisioner_secgrp_name - "Terraform" = "true" - }, - ) -} - -resource "aws_security_group_rule" "provisioner_cidr_ingress_22" { - count = var.use_provisioner_secgrp ? 0 : 1 - - type = "ingress" - description = "RKE SSH access" - from_port = 22 - to_port = 22 - protocol = "tcp" - - security_group_id = aws_security_group.provisioner.id - cidr_blocks = [var.provisioner_cidr_block] -} - -resource "aws_security_group_rule" "provisioner_secgrp_ingress_22" { - count = var.use_provisioner_secgrp ? 1 : 0 - - type = "ingress" - description = "RKE SSH access" - from_port = 22 - to_port = 22 - protocol = "tcp" - - security_group_id = aws_security_group.provisioner.id - source_security_group_id = var.provisioner_security_group -} - -resource "aws_security_group_rule" "provisioner_cidr_ingress_6443" { - count = var.use_provisioner_secgrp ? 0 : 1 - - type = "ingress" - description = "RKE K8s endpoint verification" - from_port = 6443 - to_port = 6443 - protocol = "tcp" - - security_group_id = aws_security_group.provisioner.id - cidr_blocks = [var.provisioner_cidr_block] -} - -resource "aws_security_group_rule" "provisioner_secgrp_ingress_6443" { - count = var.use_provisioner_secgrp ? 1 : 0 - - type = "ingress" - description = "RKE K8s endpoint verification" - from_port = 6443 - to_port = 6443 - protocol = "tcp" - - security_group_id = aws_security_group.provisioner.id - source_security_group_id = var.provisioner_security_group -} - -resource "aws_security_group_rule" "provisioner_cidr_ingress_443" { - count = var.use_provisioner_secgrp ? 0 : 1 - - type = "ingress" - description = "Ranchhand cluster verification" - from_port = 443 - to_port = 443 - protocol = "tcp" + # Kubelet + ingress { + from_port = 10250 + to_port = 10250 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] + } - security_group_id = aws_security_group.provisioner.id - cidr_blocks = [var.provisioner_cidr_block] -} + # Canal CNI + ingress { + from_port = 8472 + to_port = 8472 + protocol = "udp" + cidr_blocks = [local.vpc_cidrs] + } -resource "aws_security_group_rule" "provisioner_secgrp_ingress_443" { - count = var.use_provisioner_secgrp ? 1 : 0 + # NodePort services + ingress { + from_port = 30000 + to_port = 32767 + protocol = "tcp" + cidr_blocks = [local.vpc_cidrs] #[var.allowed_cidrs] + } - type = "ingress" - description = "Ranchhand cluster verification" - from_port = 443 - to_port = 443 - protocol = "tcp" + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } - security_group_id = aws_security_group.provisioner.id - source_security_group_id = var.provisioner_security_group + tags = local.tags } -#------------------------------------------------------------------------------ -# Provisioner -#------------------------------------------------------------------------------ -module "ranchhand" { - source = "github.com/dominodatalab/ranchhand?ref=v1.1.2" - - node_ips = aws_instance.this[*].private_ip - - working_dir = var.ranchhand_working_dir - cert_dnsnames = concat([aws_elb.this.dns_name], var.cert_dnsnames) - cert_ipaddresses = var.cert_ipaddresses - - rancher_version = var.rancher_version - rancher_image_tag = var.rancher_image_tag - rke_version = var.rancher_rke_version - kubectl_version = var.rancher_kubectl_version - - ssh_username = var.ssh_username - ssh_key_path = var.ssh_key_path - ssh_proxy_user = var.ssh_proxy_user - ssh_proxy_host = var.ssh_proxy_host - - admin_password = var.admin_password - - helm_v3_registry_host = var.helm_v3_registry_host - helm_v3_registry_user = var.helm_v3_registry_user - helm_v3_registry_password = var.helm_v3_registry_password - - newrelic_license_key = var.newrelic_license_key -} +# resource "aws_security_group" "loadbalancer" { +# name = local.lb_secgrp_name +# description = "Grant access to Rancher ELB" +# vpc_id = var.vpc_id + +# tags = merge( +# local.tags, +# { +# "Name" = local.lb_secgrp_name +# "Terraform" = "true" +# }, +# ) +# } + +# # Existing load balancer rules for Rancher (443, 80) +# resource "aws_security_group_rule" "lb_rancher_ingress_443" { +# description = "ingress port 443 - loadbalancer ${aws_security_group.loadbalancer.id}" +# type = "ingress" +# from_port = 443 +# to_port = 443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# resource "aws_security_group_rule" "lb_rancher_ingress_80" { +# description = "ingress port 80 - loadbalancer ${aws_security_group.loadbalancer.id}" +# type = "ingress" +# from_port = 80 +# to_port = 80 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# # RKE2 API server ingress rules +# resource "aws_security_group_rule" "lb_rke2_ingress_6443" { +# description = "ingress port 6443 - RKE2 API server" +# type = "ingress" +# from_port = 6443 +# to_port = 6443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# # RKE2 supervisor port ingress rules +# resource "aws_security_group_rule" "lb_rke2_ingress_9345" { +# description = "ingress port 9345 - RKE2 supervisor" +# type = "ingress" +# from_port = 9345 +# to_port = 9345 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# resource "aws_security_group_rule" "lb_cidr_ingress_443" { +# count = length(var.lb_cidr_blocks) +# description = "ingress port 443 - ${var.lb_cidr_blocks[count.index]}" + +# type = "ingress" +# from_port = 443 +# to_port = 443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# cidr_blocks = var.lb_cidr_blocks +# } + +# resource "aws_security_group_rule" "lb_secgrp_ingress_443" { +# count = var.lb_security_groups_count +# description = "${var.lb_security_groups[count.index]} ingress port 443" + +# type = "ingress" +# from_port = 443 +# to_port = 443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = var.lb_security_groups[count.index] +# } + +# resource "aws_security_group_rule" "lb_cidr_ingress_80" { +# count = length(var.lb_cidr_blocks) +# description = "ingress port 80 - ${var.lb_cidr_blocks[count.index]}" + +# type = "ingress" +# from_port = 80 +# to_port = 80 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# cidr_blocks = var.lb_cidr_blocks +# } + +# resource "aws_security_group_rule" "lb_secgrp_ingress_80" { +# count = var.lb_security_groups_count +# description = "${var.lb_security_groups[count.index]} ingress port 80" + +# type = "ingress" +# from_port = 80 +# to_port = 80 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = var.lb_security_groups[count.index] +# } + +# # RKE2 API server CIDR ingress rules +# resource "aws_security_group_rule" "lb_cidr_ingress_6443" { +# count = length(var.lb_cidr_blocks) +# description = "ingress port 6443 - RKE2 API - ${var.lb_cidr_blocks[count.index]}" + +# type = "ingress" +# from_port = 6443 +# to_port = 6443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# cidr_blocks = var.lb_cidr_blocks +# } + +# resource "aws_security_group_rule" "lb_secgrp_ingress_6443" { +# count = var.lb_security_groups_count +# description = "${var.lb_security_groups[count.index]} ingress port 6443 - RKE2 API" + +# type = "ingress" +# from_port = 6443 +# to_port = 6443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = var.lb_security_groups[count.index] +# } + +# resource "aws_security_group_rule" "lb_egress_443" { +# type = "egress" +# description = "Outgoing instance traffic" +# from_port = 443 +# to_port = 443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# resource "aws_security_group_rule" "lb_egress_80" { +# type = "egress" +# description = "Outgoing instance traffic" +# from_port = 80 +# to_port = 80 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# # RKE2 egress rules +# resource "aws_security_group_rule" "lb_egress_6443" { +# type = "egress" +# description = "Outgoing RKE2 API traffic" +# from_port = 6443 +# to_port = 6443 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# resource "aws_security_group_rule" "lb_egress_9345" { +# type = "egress" +# description = "Outgoing RKE2 supervisor traffic" +# from_port = 9345 +# to_port = 9345 +# protocol = "tcp" + +# security_group_id = aws_security_group.loadbalancer.id +# source_security_group_id = aws_security_group.instances.id +# } + +# resource "aws_security_group" "instances" { +# name = local.instance_secgrp_name +# description = "Govern access to Rancher server instances" +# vpc_id = var.vpc_id + +# ingress { +# description = "Incoming LB traffic" +# from_port = 443 +# to_port = 443 +# protocol = "tcp" +# security_groups = [aws_security_group.loadbalancer.id] +# } + +# ingress { +# description = "Incoming LB traffic" +# from_port = 80 +# to_port = 80 +# protocol = "tcp" +# security_groups = [aws_security_group.loadbalancer.id] +# } + +# # RKE2 API server +# ingress { +# description = "RKE2 API server" +# from_port = 6443 +# to_port = 6443 +# protocol = "tcp" +# security_groups = [aws_security_group.loadbalancer.id] +# } + +# # RKE2 supervisor port for HA +# ingress { +# description = "RKE2 supervisor port" +# from_port = 9345 +# to_port = 9345 +# protocol = "tcp" +# security_groups = [aws_security_group.loadbalancer.id] +# } + +# # RKE2 etcd client port +# ingress { +# description = "RKE2 etcd client" +# from_port = 2379 +# to_port = 2379 +# protocol = "tcp" +# self = true +# } + +# # RKE2 etcd peer port +# ingress { +# description = "RKE2 etcd peer" +# from_port = 2380 +# to_port = 2380 +# protocol = "tcp" +# self = true +# } + +# # RKE2 kubelet +# ingress { +# description = "RKE2 kubelet" +# from_port = 10250 +# to_port = 10250 +# protocol = "tcp" +# self = true +# } + +# # RKE2 CNI (Flannel VXLAN) +# ingress { +# description = "RKE2 CNI VXLAN" +# from_port = 8472 +# to_port = 8472 +# protocol = "udp" +# self = true +# } + +# # RKE2 metrics server +# ingress { +# description = "RKE2 metrics server" +# from_port = 10254 +# to_port = 10254 +# protocol = "tcp" +# self = true +# } + +# # NodePort services +# ingress { +# description = "NodePort services" +# from_port = 30000 +# to_port = 32767 +# protocol = "tcp" +# self = true +# } + +# ingress { +# description = "Node intercommunication" +# from_port = 0 +# to_port = 0 +# protocol = -1 +# self = true +# } + +# egress { +# description = "Allow all outbound traffic" +# from_port = 0 +# to_port = 0 +# protocol = -1 +# cidr_blocks = ["0.0.0.0/0"] +# } + +# tags = merge( +# local.tags, +# { +# "Name" = local.instance_secgrp_name +# "Terraform" = "true" +# }, +# ) +# } + + +# resource "aws_security_group_rule" "provisioner_cidr_ingress_22" { +# count = var.use_provisioner_secgrp ? 0 : 1 + +# type = "ingress" +# description = "RKE2 SSH access" +# from_port = 22 +# to_port = 22 +# protocol = "tcp" + +# security_group_id = aws_security_group.provisioner.id +# cidr_blocks = [var.provisioner_cidr_block] +# } + +# resource "aws_security_group_rule" "provisioner_secgrp_ingress_22" { +# count = var.use_provisioner_secgrp ? 1 : 0 + +# type = "ingress" +# description = "RKE2 SSH access" +# from_port = 22 +# to_port = 22 +# protocol = "tcp" + +# security_group_id = aws_security_group.provisioner.id +# source_security_group_id = var.provisioner_security_group +# } + +# resource "aws_security_group_rule" "provisioner_cidr_ingress_6443" { +# count = var.use_provisioner_secgrp ? 0 : 1 + +# type = "ingress" +# description = "RKE2 K8s endpoint verification" +# from_port = 6443 +# to_port = 6443 +# protocol = "tcp" + +# security_group_id = aws_security_group.provisioner.id +# cidr_blocks = [var.provisioner_cidr_block] +# } diff --git a/output.tf b/output.tf index f24c431..6d086f5 100644 --- a/output.tf +++ b/output.tf @@ -5,11 +5,16 @@ output "lb_dns_name" { output "cluster_provisioned" { description = "ID of the null_resource cluster provisioner" - value = module.ranchhand.cluster_provisioned + value = module.rke2_provisioner.cluster_provisioned } output "admin_password" { description = "Generated Rancher admin user password" - value = module.ranchhand.admin_password + value = module.rke2_provisioner.admin_password + sensitive = true } +output "kubeconfig_file" { + description = "Path to the generated kubeconfig file" + value = module.rke2_provisioner.kubeconfig_file +} diff --git a/variables.tf b/variables.tf index 844cdd5..f6d452c 100644 --- a/variables.tf +++ b/variables.tf @@ -6,91 +6,132 @@ variable "vpc_id" { type = string } -variable "lb_subnet_ids" { - description = "List of subnets where LB will be created" - type = list(string) -} - -variable "subnet_ids" { - description = "List of subnets where instances will be created" - type = list(string) +variable "server_count" { + description = "Number of instances in the cluster" + type = number + default = 3 + validation { + condition = var.server_count >= 3 + error_message = "server_count must be at least 3 to ensure high availability." + } } -variable "ssh_key_name" { - description = "Name of the EC2 key pair to use for the instances" +variable "rke2_server_url" { + description = "RKE2 server URL used to join nodes to the cluster" type = string } -#------------------------------------------------------------------------------ -# YOU MUST CHOOSE ONE OF THE FOLLOWING OTHERWISE PROVISIONING WILL FAIL! -#------------------------------------------------------------------------------ -variable "use_provisioner_secgrp" { - description = "Determines whether to use the security provision_security_group or provisioner_cidr_block inputs." - default = "true" +# Check CNI and versions at https://docs.rke2.io/release-notes/v{K8S_VER}.X +# i.e. https://docs.rke2.io/release-notes/v1.32.X +variable "cni_plugin" { + description = "CNI plugin to use for the RKE2 cluster (canal, calico, cilium, flannel)" + default = "calico" type = string + + validation { + condition = alltrue([ + for mode in var.cni_plugin : contains(["canal", "calico", "cilium", "flannel"], mode) + ]) + error_message = "cni_plugin must be one of 'canal', 'calico', 'cilium', or 'flannel'." + } } -variable "provisioner_security_group" { - description = "ID of security group attached to the VM that will provision the Rancher instances. This is typically a bastion host." +variable "node_taint" { + description = "Taint to apply to the RKE2 nodes (e.g., 'node-role.kubernetes.io/control-plane:NoSchedule')" default = "" type = string } -variable "provisioner_cidr_block" { - description = "CIDR address of the host that will provision the Rancher instances. This will only work with instances that are publicly accessible." - default = "" +variable "server_instance_type" { + description = "Instance type for the RKE2 server nodes" + default = "t3.xlarge" type = string } -#------------------------------------------------------------------------------ -# OPTIONAL -#------------------------------------------------------------------------------ -variable "name" { - description = "Root name applied to all resources" - default = "rancher" +variable "ssh_key_name" { + description = "Name of the EC2 key pair to use for the instances" + default = "rke2-keypair" type = string + } -variable "internal_lb" { - description = "Create an internal load balancer. Defaults to internet-facing." - default = false +variable "subnet_ids" { + description = "List of subnet IDs where the RKE2 server nodes will be created" + type = list(string) + +} + +variable "node_os_version" { + description = "Operating system version for the RKE2 nodes (e.g., 'ubuntu-24.04')" + default = "ubuntu-24.04" type = string } -variable "lb_security_groups" { - description = "Grant LB ingress access to one or more security group IDs" - default = [] - type = list(string) +variable "rke2_version" { + description = "Override for the installed RKE2 version. With the 'v'" + type = string + default = "v1.32.4+rke2r1" + + validation { + condition = can(regex("^v[0-9]+\\.[0-9]+\\.[0-9]+\\+rke2r[0-9]+$", var.rke2_version)) + error_message = "rke2_version must be in the format v..+rke2r, e.g., v1.32.4+rke2r1" + } } -variable "lb_security_groups_count" { - description = "Count of dynamically determines lb_security_groups" - default = 0 - type = number +variable "rke2_token" { + description = "Token for RKE2 cluster" + type = string + sensitive = true } -variable "lb_cidr_blocks" { - description = "Grant LB ingress access to one or more CIDR addresses" - default = [] - type = list(string) +variable "cluster_dns" { + description = "Cluster DNS IP" + type = string + default = "10.43.0.10" + validation { + condition = provider::assert::ip(var.ip_address) + error_message = "Invalid cluster_dns address" + } } -variable "instance_count" { - description = "Number of instances to launch" - default = 3 - type = number +variable "cluster_domain" { + description = "Cluster domain" + type = string + default = "cluster.local" } -variable "ami" { - description = "Instance AMI defaults to Ubuntu 24.04" - default = "ami-00c257e12d6828491" +variable "cluster_cidr" { + description = "IPv4/IPv6 network CIDRs to use for pod IPs" type = string + default = "10.42.0.0/16" + + validation { + condition = provider::assert::cidr(var.cluster_cidr) + error_message = "Invalid cluster_cidr" + } } -variable "instance_type" { - description = "Type of instances to launch" - default = "t3.xlarge" +variable "service_cidr" { + description = "IPv4/IPv6 network CIDRs to use for service IPs" type = string + default = "10.43.0.0/16" + + validation { + condition = provider::assert::cidr(var.service_cidr) + error_message = "Invalid service_cidr" + } +} + +variable "enable_detailed_monitoring" { + description = "Launch EC2 instances with detailed monitoring enabled" + default = false + type = bool +} + +variable "enable_deletion_protection" { + description = "If true, enables EC2 Instance Termination Protection" + default = false + type = bool } variable "os_disk_size" { @@ -100,7 +141,7 @@ variable "os_disk_size" { } variable "os_disk_type" { - description = "Root partition volume type for instances" + description = "Root partition volume type for instances (io1, io2, gp2, gp3, sc1, st1, standard)" default = "gp3" type = string } @@ -123,130 +164,168 @@ variable "os_disk_kms_key_id" { type = string } -variable "ebs_optimized" { - description = "Attach NICs dedicated to EBS volume network traffic" - default = true - type = bool -} - -variable "enable_detailed_monitoring" { - description = "Launch EC2 instances with detailed monitoring enabled" - default = false - type = bool -} - -variable "enable_deletion_protection" { - description = "" - default = false - type = bool -} - variable "tags" { description = "Extra tags assigned to all resources" default = {} type = map(string) } -#------------------------------------------------------------------------------ -# RANCHHAND -#------------------------------------------------------------------------------ -variable "ranchhand_working_dir" { - description = "Directory where ranchhand should be executed. Defaults to the current working directory." - default = "" - type = string -} - -variable "cert_dnsnames" { - description = "Hostnames for the rancher and rke ssl certs (comma-delimited)" - default = [""] - type = list(string) -} - -variable "cert_ipaddresses" { - description = "IP addresses for the rancher and rke ssl certs (comma-delimited)" - default = ["127.0.0.1"] - type = list(string) -} - -variable "ssh_username" { - description = "SSH username on the nodes" - default = "ubuntu" - type = string -} - -variable "ssh_key_path" { - description = "Path to the SSH private key that will be used to connect to the VMs" - default = "~/.ssh/id_rsa" - type = string -} - -variable "ssh_proxy_user" { - description = "Bastion host SSH username" - default = "" - type = string -} - -variable "ssh_proxy_host" { - description = "Bastion host used to proxy SSH connections" - default = "" - type = string -} - -variable "admin_password" { - description = "Password override for the initial admin user" - default = "" - type = string -} - -# Update the rancher_* variables together -# Please reference the Rancher support matrix before changing these values -# https://www.suse.com/suse-rancher/support-matrix/all-supported-versions/ -# before changing these values -variable "rancher_version" { - description = "Override for the installed Rancher version. Without the [v]" - default = "2.10.5" - type = string -} - -variable "rancher_image_tag" { - description = "Override for the installed Rancher image tag. With the [v]" - default = "v2.10.5" +# #------------------------------------------------------------------------------ +# # OPTIONAL +# #------------------------------------------------------------------------------ +variable "name" { + description = "Root name applied to all resources" + default = "rancher" type = string } -variable "rancher_kubectl_version" { - description = "Override for the kubectl version supported by RKE to install. With the [v]" - default = "v1.31.5" +variable "ami" { + description = "Specific AMI ID to use for the RKE2 nodes. If not specified, the latest Ubuntu AMI will be used." type = string + validation { + condition = can(regex("^ami-[a-z0-9]+$", var.ami)) || var.ami == "" + error_message = "AMI must be a valid AMI ID or an empty string to use the default Ubuntu AMI." + } } -variable "rancher_rke_version" { - description = "Override for the installed RKE image tag. With the [v]" - default = "v1.7.3" +variable "egress_selector_mode" { + description = "RKE2 egress selector mode (agent, cluster, pod, disabled)" + default = "agent" type = string -} - -variable "helm_v3_registry_host" { - default = "" - type = string -} - -variable "helm_v3_registry_user" { - default = "" - type = string -} - -variable "helm_v3_registry_password" { - default = "" - type = string -} - -variable "newrelic_license_key" { - default = "" - type = string -} - -variable "require_imdsv2" { - description = "Require instance metadata service v2" - type = bool -} + validation { + # condition = contains(["agent", "cluster", "pod", "disabled"], var.egress_selector_mode) + # condition = var.egress_selector_mode == "agent" || var.egress_selector_mode == "cluster" || var.egress_selector_mode == "pod" || var.egress_selector_mode == "disabled" + condition = alltrue([ + for mode in var.egress_selector_mode : contains(["agent", "cluster", "pod", "disabled"], mode) + ]) + error_message = "egress_selector_mode must be one of 'agent', 'cluster', 'pod', or 'disabled'." + } +} + + + +# variable "ebs_optimized" { +# description = "Attach NICs dedicated to EBS volume network traffic" +# default = true +# type = bool +# } + + + + +# #------------------------------------------------------------------------------ +# # RKE2 CONFIGURATION +# #------------------------------------------------------------------------------ + +# variable "cert_dnsnames" { +# description = "Hostnames for the rancher and RKE2 ssl certs (comma-delimited)" +# default = [""] +# type = list(string) +# } + +# variable "cert_ipaddresses" { +# description = "IP addresses for the rancher and RKE2 ssl certs (comma-delimited)" +# default = ["127.0.0.1"] +# type = list(string) +# } + +# variable "ssh_username" { +# description = "SSH username on the nodes" +# default = "ubuntu" +# type = string +# } + +# variable "ssh_key_path" { +# description = "Path to the SSH private key that will be used to connect to the VMs" +# default = "~/.ssh/id_rsa" +# type = string +# } + +# variable "ssh_proxy_user" { +# description = "Bastion host SSH username" +# default = "" +# type = string +# } + +# variable "ssh_proxy_host" { +# description = "Bastion host used to proxy SSH connections" +# default = "" +# type = string +# } + +# variable "admin_password" { +# description = "Password override for the initial admin user" +# default = "" +# type = string +# } + +# # Update the rancher_* and rke2_* variables together +# # Please reference the Rancher support matrix before changing these values +# # https://www.suse.com/suse-rancher/support-matrix/all-supported-versions/ +# variable "rancher_version" { +# description = "Override for the installed Rancher version. Without the [v]" +# default = "2.10.5" +# type = string +# } + +# variable "rancher_image_tag" { +# description = "Override for the installed Rancher image tag. With the [v]" +# default = "v2.10.5" +# type = string +# } + +# variable "rancher_kubectl_version" { +# description = "Override for the kubectl version supported by RKE2 to install. With the [v]" +# default = "v1.31.5" +# type = string +# } + +# variable "rke2_version" { +# description = "Override for the installed RKE2 version. With the [v]" +# default = "v1.31.5+rke2r1" +# type = string +# } + +# variable "rke2_channel" { +# description = "RKE2 release channel (stable, latest)" +# default = "stable" +# type = string +# } + +# variable "rke2_cni" { +# description = "RKE2 CNI plugin (canal, calico, cilium)" +# default = "canal" +# type = string +# } + +# variable "rke2_disable_cloud_controller" { +# description = "Disable RKE2 cloud controller manager" +# default = false +# type = bool +# } + +# variable "rke2_config_file" { +# description = "Path to custom RKE2 configuration file" +# default = "" +# type = string +# } + +# variable "helm_v3_registry_host" { +# default = "" +# type = string +# } + +# variable "helm_v3_registry_user" { +# default = "" +# type = string +# } + +# variable "helm_v3_registry_password" { +# default = "" +# type = string +# } + +# variable "newrelic_license_key" { +# default = "" +# type = string +# }