diff --git a/docs/.sphinx/.wordlist.txt b/docs/.sphinx/.wordlist.txt index fd1f1010..68b76f1f 100644 --- a/docs/.sphinx/.wordlist.txt +++ b/docs/.sphinx/.wordlist.txt @@ -329,3 +329,5 @@ snapcrafting subcluster swrast zSystems +mimir +terraform diff --git a/docs/conf.py b/docs/conf.py index 4d2e65c3..e939afe7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -220,6 +220,7 @@ "http://127.0.0.1:8000", "https://github.com/canonical/ACME/*", "troubleshooting/", + "https://github.com/canonical/observability-stack//terraform/cos-lite", ] diff --git a/docs/explanation/assets/tls-diagram.png b/docs/explanation/assets/tls-diagram.png deleted file mode 100644 index 7a90f48c..00000000 Binary files a/docs/explanation/assets/tls-diagram.png and /dev/null differ diff --git a/docs/explanation/index.rst b/docs/explanation/index.rst index b3003cf1..68d246ca 100644 --- a/docs/explanation/index.rst +++ b/docs/explanation/index.rst @@ -14,6 +14,5 @@ Explanation Telemetry Flow Telemetry Labels Logging Architecture - TLS encryption in COS Model-Driven Observability What is Observability? diff --git a/docs/explanation/tls-encryption-in-cos.md b/docs/explanation/tls-encryption-in-cos.md deleted file mode 100644 index 2a0a9a13..00000000 --- a/docs/explanation/tls-encryption-in-cos.md +++ /dev/null @@ -1,35 +0,0 @@ -# TLS encryption in COS - -## COS - -When deploying COS using [the provided Terraform module](https://github.com/canonical/observability-stack/tree/main/terraform/cos), it will by default be deployed using a self-signed certificate authority. If you have other certificate requirements, you'll be able to replace the self-signed-certificates operator with another TLS operator of your liking, consulting the "Providing" section of [the `tls-certificates` interface page on Charmhub](https://charmhub.io/integrations/tls-certificates). - -## COS Lite - -COS Lite can be deployed unencrypted, with TLS termination only, or end-to-end encrypted. - -### Unencrypted COS Lite - -The [cos-lite bundle](https://charmhub.io/cos-lite) deploys COS with workloads communicating using plain HTTP (unencrypted). - -### TLS-terminated COS Lite - -The Traefik charm can function as a TLS termination point by relating it to an external CA (integrator) charm. Within the COS model, charms would still communicate using plain HTTP (unencrypted). - -### COS Lite with end-to-end TLS - -The cos-lite bundle together with the TLS overlay deploy an end-to-end encrypted COS. -- COS charms generate CSRs with the K8s FQDN as the SAN DNS and the internal CA signs. -- All COS charms trust the internal CA by installing the CA certificate in the charm and workload containers, using the `update-ca-certificates` tool. -- The external CA provides a certificate for Traefik's external URL. -- Within the COS model, workloads communicate via K8s FQDN URLs. -- Requests coming from outside of the model, use the ingress URLs. -- Traefik is able to establish a secure connection with its proxied apps thanks to trusting the local CA. - -Note: currently there is a [known issue](https://github.com/canonical/operator/issues/970) due to which some COS relations are limited to in-cluster relations only. - -The end-to-end COS TLS design is described in the diagram below. The diagram is limited to prometheus and alertmanager for brevity and clarity. - -![TLS](assets/tls-diagram.png) - -As with any TLS configuration, keep in mind best practices such as frequent certificate rotation. See [this guide](https://charmhub.io/blackbox-exporter-k8s/docs/monitor-ssl-certificates) for an example of monitoring certificates. \ No newline at end of file diff --git a/docs/how-to/assets/high-level-tls.png b/docs/how-to/assets/high-level-tls.png new file mode 100644 index 00000000..5f5723c9 Binary files /dev/null and b/docs/how-to/assets/high-level-tls.png differ diff --git a/docs/how-to/configure-tls-encryption.md b/docs/how-to/configure-tls-encryption.md new file mode 100644 index 00000000..4f6b47ec --- /dev/null +++ b/docs/how-to/configure-tls-encryption.md @@ -0,0 +1,86 @@ +# TLS encryption in COS + +Both COS and COS Lite, have 2 sections of the deployment (internal and external) which can implement TLS communication. + +The combination of these 2 configurations provides our products with 4 modes of operation: +1. Both `external` and `internal` TLS communication, i.e. `full TLS encryption` +2. Only `external` TLS communication +3. Only `internal` TLS communication (default) +4. Neither `external` nor `internal` TLS communication, i.e. `unencrypted` + +![high-level-tls.png](assets/high-level-tls.png) + + + +## Full TLS encryption implementation details + +The recommended deployment for COS implements full TLS encryption, which requires an external certificates provider offer URL (cross-model relation) and has the following semantics: + +- The external CA provides a certificate for Traefik's external URL. +- Within the COS model, workloads communicate via K8s FQDN URLs, except (on a case-by-case basis) when they have ingress relations +- COS charms generate CSRs with the K8s FQDN as the SAN DNS and the internal CA signs. +- All COS charms trust the internal CA by installing the CA certificate in the charm and workload containers, using the `update-ca-certificates` tool. +- Traefik establishes a secure connection with its proxied apps by trusting the local CA. + +COS Lite with full TLS encryption is described in the diagram below. The diagram is limited to prometheus and alertmanager for brevity and clarity. + +```{note} +This TLS diagram is relevant for COS as well, if prometheus is replaced with Mimir. +``` + +```{mermaid} +%%{init: { "theme": "dark" } }%% +flowchart TB + subgraph COS [cos-model] + traefik[traefik] + prometheus[prometheus] + alertmanager[alertmanager] + localca[local-ca] + end + + subgraph CAModel [ca-model] + direction TB + cert-provider[certificates provider] + end + + subgraph ObserveModel [observable-model] + grafana[grafana-agent] + end + + grafana -->|"remote_write
(example.com)"| prometheus + prometheus -->|"self-monitoring
(am-0.cluster.local)"| alertmanager + cert-provider -->|"tls_certificates
(example.com)"| traefik + traefik -->|"ingress-per-unit
(prom-0.cluster.local)"| prometheus + traefik -->|"ingress-per-app
(am-*.cluster.local)"| alertmanager + + prometheus -->|"tls_certificates
(prom-0.cluster.local)"| localca + alertmanager -->|"tls_certificates
(am-0.cluster.local)"| localca + localca -->|"certificate_transfer
(local_ca)"| traefik + + cert-provider -->|"certificate_transfer
(external_ca)"| grafana + + classDef Charm stroke:white,stroke-width:1px,color:white,rx:8px,ry:8px + class traefik,prometheus,alertmanager,localca,grafana,cert-provider Charm +``` + +As with any TLS configuration, keep in mind best practices such as frequent certificate rotation. See [this guide](https://charmhub.io/blackbox-exporter-k8s/docs/monitor-ssl-certificates) for an example of monitoring certificates. + +```{warning} currently there is a [known issue](https://github.com/canonical/operator/issues/970) due to which some COS relations are limited to in-cluster relations only. +``` + +## Deployment + +Using the following Terraform root module, you can control `external` and `internal` TLS. + +To enable `internal` TLS, set the `internal_tls` value to `true`. To enable `external` TLS, supply the `external_certificates_offer_url` value with a `certificates` provider's Juju offer URL, from the `ssc` module in this example. The combination of these settings enables full encryption. + +```{Note} +If you are using COS Lite, create a cos-lite module with the cos-lite source: "git::https://github.com/canonical/observability-stack//terraform/cos-lite" + +The [COS Lite bundle](https://charmhub.io/cos-lite) is now deprecated in favor of Terraform modules. +``` + +```{literalinclude} /how-to/cos-tls.tf +``` diff --git a/docs/how-to/cos-tls.tf b/docs/how-to/cos-tls.tf new file mode 100644 index 00000000..445bb190 --- /dev/null +++ b/docs/how-to/cos-tls.tf @@ -0,0 +1,17 @@ +# Note: The deployment order matters since the 'traefik:certificates' integration depends on 'module.ssc' +# 'terraform apply -target module.ssc' +# 'terraform apply' + +module "ssc" { + source = "git::https://github.com/canonical/self-signed-certificates-operator//terraform" + model = "external-ca" +} + +module "cos" { + # Use the right source value depending on whether you are using cos or cos-lite + source = "git::https://github.com/canonical/observability-stack//terraform/cos" + model = "cos" + channel = "1/stable" + internal_tls = true # Set to 'false' to disable TLS between in-model applications + external_certificates_offer_url = module.ssc.offers.certificates.url # Set to 'null' to communicate with Traefik via HTTP, i.e. 'external_tls' +} diff --git a/docs/how-to/index.rst b/docs/how-to/index.rst index 7d52e68d..55352c72 100644 --- a/docs/how-to/index.rst +++ b/docs/how-to/index.rst @@ -41,6 +41,7 @@ with COS to actually observe them. Integrate COS Lite with uncharmed applications Disable built-in charm alert rules Testing with Minio + Configure TLS encryption Troubleshooting =============== diff --git a/terraform/cos-lite/README.md b/terraform/cos-lite/README.md index 9e95d9d7..d1a1d7ae 100644 --- a/terraform/cos-lite/README.md +++ b/terraform/cos-lite/README.md @@ -24,8 +24,18 @@ The module offers the following configurable inputs: | Name | Type | Description | Default | |--|--|--|--| | `channel` | string | Channel that all the charms (unless overwritten) are deployed from | +| `external_certificates_offer_url` | string | A Juju offer URL of a CA providing the 'tls_certificates' integration for Traefik to supply it with server certificates | null | +| `internal_tls` | bool | Specify whether to use TLS or not for internal COS communication. By default, TLS is enabled using self-signed-certificates | true | | `model` | string | Reference to an existing model resource or data source for the model to deploy to | -| `use_tls` | bool | Specify whether to use TLS or not for coordinator-worker communication | true | +| `ssc_channel` | string | Channel that the self-signed certificates charm is deployed from | 1/stable | +| `traefik_channel` | string | Channel that the Traefik charm is deployed from | latest/stable | +| `alertmanager_revision` | number | Revision number of the charm | null | +| `catalogue_revision` | number | Revision number of the charm | null | +| `grafana_revision` | number | Revision number of the charm | null | +| `loki_revision` | number | Revision number of the charm | null | +| `prometheus_revision` | number | Revision number of the charm | null | +| `ssc_revision` | number | Revision number of the charm | null | +| `traefik_revision` | number | Revision number of the charm | null | ### Outputs @@ -33,9 +43,8 @@ Upon application, the module exports the following outputs: | Name | Description | |------------|-----------------------------| -| `app_name` | Application name | -| `provides` | Map of `provides` endpoints | -| `requires` | Map of `requires` endpoints | +| `components` | map(any) | All Terraform charm modules which make up this product module | +| `offers` | map(any) | All Juju offers which are exposed by this product module | ## Usage diff --git a/terraform/cos-lite/main.tf b/terraform/cos-lite/main.tf index 34b7712d..9aefa757 100644 --- a/terraform/cos-lite/main.tf +++ b/terraform/cos-lite/main.tf @@ -41,7 +41,7 @@ module "prometheus" { } module "ssc" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 source = "git::https://github.com/canonical/self-signed-certificates-operator//terraform" model = var.model channel = var.ssc_channel @@ -353,7 +353,7 @@ resource "juju_integration" "traefik_self_monitoring_prometheus" { # Provided by Self-Signed-Certificates resource "juju_integration" "alertmanager_certificates" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 model = var.model application { @@ -368,7 +368,7 @@ resource "juju_integration" "alertmanager_certificates" { } resource "juju_integration" "catalogue_certificates" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 model = var.model application { @@ -383,7 +383,7 @@ resource "juju_integration" "catalogue_certificates" { } resource "juju_integration" "grafana_certificates" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 model = var.model application { @@ -398,7 +398,7 @@ resource "juju_integration" "grafana_certificates" { } resource "juju_integration" "loki_certificates" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 model = var.model application { @@ -413,7 +413,7 @@ resource "juju_integration" "loki_certificates" { } resource "juju_integration" "prometheus_certificates" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 model = var.model application { @@ -427,13 +427,29 @@ resource "juju_integration" "prometheus_certificates" { } } -resource "juju_integration" "traefik_certificates" { - count = var.use_tls ? 1 : 0 +resource "juju_integration" "traefik_receive_ca_certificate" { + count = var.internal_tls ? 1 : 0 model = var.model application { name = module.ssc[0].app_name - endpoint = module.ssc[0].provides.certificates + endpoint = module.ssc[0].provides.send-ca-cert + } + + application { + name = module.traefik.app_name + endpoint = module.traefik.endpoints.receive_ca_cert + } +} + +# Provided by an external CA + +resource "juju_integration" "external_traefik_certificates" { + count = local.tls_termination ? 1 : 0 + model = var.model + + application { + offer_url = var.external_certificates_offer_url } application { diff --git a/terraform/cos-lite/variables.tf b/terraform/cos-lite/variables.tf index aee72671..b65faa56 100644 --- a/terraform/cos-lite/variables.tf +++ b/terraform/cos-lite/variables.tf @@ -1,3 +1,7 @@ +locals { + tls_termination = var.external_certificates_offer_url != null ? true : false +} + variable "channel" { description = "Channel that the charms are (unless overwritten by external_channels) deployed from" type = string @@ -8,12 +12,18 @@ variable "model" { type = string } -variable "use_tls" { - description = "Specify whether to use TLS or not for coordinator-worker communication. By default, TLS is enabled through self-signed-certificates" +variable "internal_tls" { + description = "Specify whether to use TLS or not for internal COS communication. By default, TLS is enabled using self-signed-certificates" type = bool default = true } +variable "external_certificates_offer_url" { + description = "A Juju offer URL (e.g. admin/external-ca.certificates) of a CA providing the 'tls_certificates' integration for Traefik to supply it with server certificates." + type = string + default = null +} + # -------------- # External channels -------------- # O11y does not own these charms, so we allow users to specify their channels directly. diff --git a/terraform/cos/README.md b/terraform/cos/README.md index 3a1cad12..69fa3cfa 100644 --- a/terraform/cos/README.md +++ b/terraform/cos/README.md @@ -73,10 +73,10 @@ The module offers the following configurable inputs: ### Outputs Upon application, the module exports the following outputs: -| Name | Type | Description | -| - | - | - | -| `components`| map(any) | All TF charm submodule which make up this product module | -| `offers`| map(any) | All offers which are exposed by this product module | +| Name | Description | +|------------|-----------------------------| +| `components` | map(any) | All Terraform charm modules which make up this product module | +| `offers` | map(any) | All Juju offers which are exposed by this product module | ## Usage diff --git a/terraform/cos/main.tf b/terraform/cos/main.tf index 0896e388..4abc7953 100644 --- a/terraform/cos/main.tf +++ b/terraform/cos/main.tf @@ -71,7 +71,7 @@ module "mimir" { } module "ssc" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 source = "git::https://github.com/canonical/self-signed-certificates-operator//terraform" model = var.model channel = var.ssc_channel @@ -608,7 +608,7 @@ resource "juju_integration" "grafana_tracing_grafana_agent_traicing_provider" { # Provided by Self-Signed-Certificates resource "juju_integration" "alertmanager_certificates" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 model = var.model application { @@ -623,7 +623,7 @@ resource "juju_integration" "alertmanager_certificates" { } resource "juju_integration" "catalogue_certificates" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 model = var.model application { @@ -638,7 +638,7 @@ resource "juju_integration" "catalogue_certificates" { } resource "juju_integration" "grafana_certificates" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 model = var.model application { @@ -653,7 +653,7 @@ resource "juju_integration" "grafana_certificates" { } resource "juju_integration" "grafana_agent_certificates" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 model = var.model application { @@ -668,7 +668,7 @@ resource "juju_integration" "grafana_agent_certificates" { } resource "juju_integration" "loki_certificates" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 model = var.model application { @@ -683,7 +683,7 @@ resource "juju_integration" "loki_certificates" { } resource "juju_integration" "mimir_certificates" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 model = var.model application { @@ -698,7 +698,7 @@ resource "juju_integration" "mimir_certificates" { } resource "juju_integration" "tempo_certificates" { - count = var.use_tls ? 1 : 0 + count = var.internal_tls ? 1 : 0 model = var.model application { @@ -712,13 +712,29 @@ resource "juju_integration" "tempo_certificates" { } } -resource "juju_integration" "traefik_certificates" { - count = var.use_tls ? 1 : 0 +resource "juju_integration" "traefik_receive_ca_certificate" { + count = var.internal_tls ? 1 : 0 model = var.model application { name = module.ssc[0].app_name - endpoint = module.ssc[0].provides.certificates + endpoint = module.ssc[0].provides.send-ca-cert + } + + application { + name = module.traefik.app_name + endpoint = module.traefik.endpoints.receive_ca_cert + } +} + +# Provided by an external CA + +resource "juju_integration" "external_traefik_certificates" { + count = local.tls_termination ? 1 : 0 + model = var.model + + application { + offer_url = var.external_certificates_offer_url } application { diff --git a/terraform/cos/variables.tf b/terraform/cos/variables.tf index a100b5d7..4168f3af 100644 --- a/terraform/cos/variables.tf +++ b/terraform/cos/variables.tf @@ -1,7 +1,6 @@ - -# the list of kubernetes clouds where this COS module can be deployed. locals { - clouds = ["aws", "self-managed"] + clouds = ["aws", "self-managed"] # list of k8s clouds where this COS module can be deployed. + tls_termination = var.external_certificates_offer_url != null ? true : false } variable "channel" { @@ -14,12 +13,18 @@ variable "model" { type = string } -variable "use_tls" { - description = "Specify whether to use TLS or not for coordinator-worker communication. By default, TLS is enabled through self-signed-certificates" +variable "internal_tls" { + description = "Specify whether to use TLS or not for internal COS communication. By default, TLS is enabled using self-signed-certificates" type = bool default = true } +variable "external_certificates_offer_url" { + description = "A Juju offer URL of a CA providing the 'tls_certificates' integration for Traefik to supply it with server certificates" + type = string + default = null +} + variable "cloud" { description = "Kubernetes cloud or environment where this COS module will be deployed (e.g self-managed, aws)" type = string