From 1342813ee4bf941d7b886d4e7c8f56a1514047a6 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Tue, 15 Jul 2025 18:41:48 -0400 Subject: [PATCH 01/27] Initial migration of private preview content --- .../sidebar-data/self-hosted-deployments.json | 81 +++ ...nfigure-cockroachdb-kubernetes-operator.md | 169 +++++ ...oy-cockroachdb-with-kubernetes-operator.md | 638 ++++++++++++++++++ .../v25.2/kubernetes-operator-overview.md | 11 + .../v25.2/kubernetes-operator-performance.md | 287 ++++++++ .../migrate-cockroachdb-kubernetes-helm.md | 147 ++++ ...migrate-cockroachdb-kubernetes-operator.md | 181 +++++ ...monitor-cockroachdb-kubernetes-operator.md | 345 ++++++++++ .../scale-cockroachdb-kubernetes-operator.md | 96 +++ ...chedule-cockroachdb-kubernetes-operator.md | 346 ++++++++++ .../secure-cockroachdb-kubernetes-operator.md | 179 +++++ ...upgrade-cockroachdb-kubernetes-operator.md | 139 ++++ 12 files changed, 2619 insertions(+) create mode 100644 src/current/v25.2/configure-cockroachdb-kubernetes-operator.md create mode 100644 src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md create mode 100644 src/current/v25.2/kubernetes-operator-overview.md create mode 100644 src/current/v25.2/kubernetes-operator-performance.md create mode 100644 src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md create mode 100644 src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md create mode 100644 src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md create mode 100644 src/current/v25.2/scale-cockroachdb-kubernetes-operator.md create mode 100644 src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md create mode 100644 src/current/v25.2/secure-cockroachdb-kubernetes-operator.md create mode 100644 src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md diff --git a/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json b/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json index d5ce5fe0975..027f0f5d939 100644 --- a/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json +++ b/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json @@ -170,6 +170,87 @@ } ] }, + { + "title": "Deploy with Kubernetes Operator", + "items": [ + { + "title": "Overview", + "urls": [ + "/${VERSION}/kubernetes-operator-overview.html" + ] + }, + { + "title": "Cluster Deployment", + "urls": [ + "/${VERSION}/deploy-cockroachdb-with-kubernetes-operator.html" + ] + }, + { + "title": "Migrate from existing Kubernetes deployments", + "items": [ + { + "title": "Helm StatefulSet migration", + "urls": [ + "/${VERSION}/migrate-cockroachdb-kubernetes-helm.html" + ] + }, + { + "title": "Legacy operator migration", + "urls": [ + "/${VERSION}/migrate-cockroachdb-kubernetes-operator.html" + ] + } + ] + }, + { + "title": "Operate on Kubernetes", + "items": [ + { + "title": "Pod Scheduling", + "urls": [ + "/${VERSION}/schedule-cockroachdb-kubernetes-operator.html" + ] + }, + { + "title": "Resource Management", + "urls": [ + "/${VERSION}/configure-cockroachdb-kubernetes-operator.html" + ] + }, + { + "title": "Certificate Management", + "urls": [ + "/${VERSION}/secure-cockroachdb-kubernetes-operator.html" + ] + }, + { + "title": "Cluster Scaling", + "urls": [ + "/${VERSION}/scale-cockroachdb-kubernetes-operator.html" + ] + }, + { + "title": "Cluster Monitoring", + "urls": [ + "/${VERSION}/monitor-cockroachdb-kubernetes-operator.html" + ] + }, + { + "title": "Cluster Upgrades", + "urls": [ + "/${VERSION}/upgrade-cockroachdb-kubernetes-operator.html" + ] + }, + { + "title": "Kubernetes Performance", + "urls": [ + "/${VERSION}/kubernetes-operator-performance.html" + ] + } + ] + } + ] + }, { "title": "Multi-Region for Self-Hosted Deployments", "items": [ diff --git a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md new file mode 100644 index 00000000000..c3c78d04a91 --- /dev/null +++ b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md @@ -0,0 +1,169 @@ +--- +title: Resource Management with the Kubernetes Operator +summary: Allocate CPU, memory, and storage resources for a cluster deployed with the Kubernetes Operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This page explains how to configure Kubernetes cluster resources such as memory, CPU, and storage. + +On a production cluster, the resources you allocate to CockroachDB should be proportionate to your machine types and workload. We recommend that you determine and set these values before deploying the cluster, but you can also update the values on a running cluster. + +{{site.data.alerts.callout_info}} +Run `kubectl describe nodes` to see the available resources on the instances that you have provisioned. +{{site.data.alerts.end}} + +## Memory and CPU + +You can set the CPU and memory resources allocated to the CockroachDB container on each pod. + +{{site.data.alerts.callout_info}} +1 CPU in Kubernetes is equivalent to 1 vCPU or 1 hyperthread. For best practices on provisioning CPU and memory for CockroachDB, see the [Production Checklist](recommended-production-settings.html#hardware). +{{site.data.alerts.end}} + +Specify CPU and memory values in `cockroachdb.crdbCluster.resources.limits` and `cockroachdb.crdbCluster.resources.requests` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): + +```yaml +cockroachdb: + crdbCluster: + resources: + limits: + cpu: 4000m + memory: 16Gi + requests: + cpu: 4000m + memory: 16Gi +``` + +Apply the new settings to the cluster: + +```shell +$ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE +``` + +We recommend using identical values for `resources.requests` and `resources.limits`. When setting the new values, note that not all of a pod's resources will be available to the CockroachDB container. This is because a fraction of the CPU and memory is reserved for Kubernetes. + +{{site.data.alerts.callout_info}} +If no resource limits are specified, the pods will be able to consume the maximum available CPUs and memory. However, to avoid overallocating resources when another memory-intensive workload is on the same instance, always set resource requests and limits explicitly. +{{site.data.alerts.end}} + +For more information on how Kubernetes handles resources, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/). + +### Cache and SQL memory size + +Each CockroachDB node reserves a portion of its available memory for its cache and for storing temporary data for SQL queries. For more information on these settings, see the [Production Checklist](recommended-production-settings.html#cache-and-sql-memory-size). + +The Kubernetes operator dynamically sets cache size and SQL memory size each to 25% (the recommended percent) of the available memory, which depends on the memory request and limit you [specified](#memory-and-cpu) for your configuration. These values can be modified by adding the `cache` or `max-sql-memory` fields to `cockroachdb.crdbCluster.flags`, which is equivalent to appending `--cache` or `--max-sql-memory` as [cockroach start flags](cockroach-start.html#flags). + +## Persistent storage + +When you start your cluster, Kubernetes dynamically provisions and mounts a persistent volume into each pod. For more information on persistent volumes, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/persistent-volumes/). + +The storage capacity of each volume is set in `cockroachdb.crdbCluster.dataStore.volumeClaimTemplate.spec.resources` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): + +```yaml +cockroachdb: + crdbCluster: + dataStore: + volumeClaimTemplate: + spec: + resources: + requests: + storage: "10Gi" +``` + +You should provision an appropriate amount of disk storage for your workload. For recommendations on this, see the [Production Checklist](recommended-production-settings.html#storage). + +### Expand disk size + +If you discover that you need more capacity, you can expand the persistent volumes on a running cluster. Increasing disk size is often [beneficial for CockroachDB performance](kubernetes-operator-performance.html). + +Specify a new volume size in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): + +```yaml +cockroachdb: + crdbCluster: + dataStore: + volumeClaimTemplate: + spec: + resources: + requests: + storage: "100Gi" +``` + +Apply the new settings to the cluster: + +```shell +$ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE +``` + +The Operator updates all nodes and triggers a rolling restart of the pods with the new storage capacity. + +To verify that the storage capacity has been updated, run `kubectl get pvc` to view the persistent volume claims (PVCs). It will take a few minutes before the PVCs are completely updated. + +## Network ports + +The Operator separates network traffic into three ports: + + + + + + + + + + + + + + + + + + + + + + + + + + +
Protocol + Default + Description + Custom Resource Field +
gRPC + 26258 + Used for node connections + service.ports.grpc +
HTTP + 8080 + Used to access the DB Console + service.ports.http +
SQL + 26257 + Used for SQL shell access + service.ports.sql +
+ +Specify alternate port numbers in `cockroachdb.crdbCluster.service.ports` of the Operator's [custom resource](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): + +```yaml +cockroachdb: + crdbCluster: + service: + ports: + sql: 5432 +``` + +Apply the new settings to the cluster: + +```shell +$ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE +``` + +The Operator updates all nodes and triggers a rolling restart of the pods with the new port settings. diff --git a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md new file mode 100644 index 00000000000..0216e6e7093 --- /dev/null +++ b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md @@ -0,0 +1,638 @@ +--- +title: Deploy CockroachDB in a Kubernetes Cluster with the Operator +summary: Deploy a secure 3-node CockroachDB cluster with the Kubernetes operator. +toc: true +toc_not_nested: false +secure: true +docs_area: deploy +--- + +This page shows you how to start and stop a secure 3-node CockroachDB cluster in a single [Kubernetes](http://kubernetes.io/) cluster. + +## Prerequisites and best practices + +### Kubernetes version + +To deploy CockroachDB v25.1 or later, Kubernetes 1.30 or higher is required. Cockroach Labs strongly recommends that you use a Kubernetes version that is eligible for [patch support by the Kubernetes project](https://kubernetes.io/releases/). + +### Helm version + +The CockroachDB Helm chart requires Helm 3.0 or higher. If you attempt to use an incompatible Helm version, an error like the following occurs: + +``` +Error: UPGRADE FAILED: template: cockroachdb/templates/tests/client.yaml:6:14: executing "cockroachdb/templates/tests/client.yaml" at <.Values.networkPolicy.enabled>: nil pointer evaluating interface {}.enabled +``` + +The Helm chart consists of two sub-charts: + +* `operator` - The CockroachDB operator chart to be installed first. +* `cockroachdb` - The CockroachDB application chart to be installed after the operator is ready. + +### Network + +Service Name Indication (SNI) is an extension to the TLS protocol which allows a client to indicate which hostname it is attempting to connect to at the start of the TCP handshake process. The server can present multiple certificates on the same IP address and TCP port number, and one server can serve multiple secure websites or API services even if they use different certificates. + +Due to its order of operations, the PostgreSQL wire protocol's implementation of TLS is not compatible with SNI-based routing in the Kubernetes ingress controller. Instead, use a TCP load balancer for CockroachDB that is not shared with other services. + +If you want to secure your cluster to use TLS certificates for all network communications, Helm must be installed with RBAC privileges or else you will get an "attempt to grant extra privileges" error. + +### Localities + +CockroachDB clusters use locality labels to determine an efficient distribution of replicas. This is especially important in the case of multi-region deployments. In cloud provider deployments such as EKS/AKS/GKE, the `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` labels are applied implicitly to Kubernetes nodes and populated by the regions and zones specific to the cloud provider. Other locality labels can be arbitrarily defined for further granularity, such as province, datacenter, rack, etc., but these need to be applied individually to the Kubernetes node when initialized so that CockroachDB can understand where the node lives and distribute replicas accordingly. + +In the case of baremetal Kubernetes deployments, you must plan a hierarchy of locality labels that suit your CockroachDB node distribution, then apply these labels individually to nodes when they are initialized. Most of these values can be set arbitrarily, but region and zone locations must be set in the reserved `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` namespace, respectively. + +For more information on how locality labels are used by CockroachDB, see the [--locality flag documentation](cockroach-start.html#locality). + +### Architecture + +The operator is only supported in environments with an ARM64 or AMD64 architecture. + +### Resources + +When starting Kubernetes, select machines with at least 4 vCPUs and 16 GiB of memory, and provision at least 2 vCPUs and 8 Gi of memory to CockroachDB per pod. These minimum settings are used by default in this deployment guide, and are appropriate for testing purposes only. On a production deployment, you should adjust the resource settings for your workload. + +### Storage + +Kubernetes deployments use external persistent volumes that are often replicated by the provider. CockroachDB replicates data automatically, and this redundant layer of replication can impact performance. Using [local volumes](https://kubernetes.io/docs/concepts/storage/volumes/#local) may improve performance. + +## Step 1. Start Kubernetes + +You can use the hosted [Google Kubernetes Engine (GKE)](#hosted-gke) service, hosted [Amazon Elastic Kubernetes Service (EKS)](#hosted-eks), or [Microsoft Azure Kubernetes Service (AKS)](#hosted-aks) to quickly start Kubernetes. + +{{site.data.alerts.callout_info}} +GKE/EKS/AKS are not required to run CockroachDB on Kubernetes. Any cluster hardware with the minimum recommended Kubernetes version and at least 3 pods, each presenting sufficient resources to start a CockroachDB node, can also be used. However, note that support for other deployments may vary. +{{site.data.alerts.end}} + +### Hosted GKE + +1. Complete the **Before You Begin** steps described in the [Google Kubernetes Engine Quickstart](https://cloud.google.com/kubernetes-engine/docs/quickstart) documentation. + + This includes installing `gcloud`, which is used to create and delete Kubernetes Engine clusters, and `kubectl`, which is the command-line tool used to manage Kubernetes from your workstation. + + The documentation offers the choice of using Google's Cloud Shell product or using a local shell on your machine. Choose to use a local shell if you want to be able to view the DB Console using the steps in this guide. + +2. From your local workstation, start the Kubernetes cluster, specifying one of the available [regions](https://cloud.google.com/compute/docs/regions-zones#available) (e.g., `us-east1`): + + Since this region can differ from your default `gcloud` region, be sure to include the `--region` flag to run `gcloud` commands against this cluster. + + ```shell + $ gcloud container clusters create cockroachdb --machine-type n2-standard-4 --region {region-name} --num-nodes 1 + + Creating cluster cockroachdb...done. + ``` + + This creates GKE instances and joins them into a single Kubernetes cluster named `cockroachdb`. The `--region` flag specifies a [regional three-zone cluster](https://cloud.google.com/kubernetes-engine/docs/how-to/creating-a-regional-cluster), and `--num-nodes` specifies one Kubernetes worker node in each zone. + + The `--machine-type` flag tells the node pool to use the [n2-standard-4](https://cloud.google.com/compute/docs/machine-types#standard_machine_types) machine type (4 vCPUs, 16 GB memory), which meets our [recommended CPU and memory configuration](recommended-production-settings#basic-hardware-recommendations). + The process can take a few minutes, so do not move on to the next step until you see a `Creating cluster cockroachdb...done` message and details about your cluster. + + {{site.data.alerts.callout_info}} + Consider creating another, dedicated node group for the operator pod for system resource availability. + {{site.data.alerts.end}} + +3. Get the email address associated with your Google Cloud account: + + ```shell + $ gcloud info | grep Account + + Account: [your.google.cloud.email@example.org] + ``` + + This command returns your email address in all lowercase. However, in the next step, you must enter the address using the accurate capitalization. For example, if your address is YourName@example.com, you must use YourName@example.com and not yourname@example.com. + +4. [Create the RBAC roles](https://cloud.google.com/kubernetes-engine/docs/how-to/role-based-access-control#prerequisites_for_using_role-based_access_control) CockroachDB needs for running on GKE, using the address from the previous step: + + ```shell + $ kubectl create clusterrolebinding $USER-cluster-admin-binding \ + --clusterrole=cluster-admin \ + --user={your.google.cloud.email@example.org} + + clusterrolebinding.rbac.authorization.k8s.io/your.username-cluster-admin-binding created + ``` + +### Hosted EKS + +1. Complete the steps described in the [EKS Getting Started](https://docs.aws.amazon.com/eks/latest/userguide/getting-started-eksctl.html) documentation. + + This includes installing and configuring the AWS CLI and `eksctl`, which is the command-line tool used to create and delete Kubernetes clusters on EKS, and `kubectl`, which is the command-line tool used to manage Kubernetes from your workstation. + + If you are running [EKS-Anywhere](https://aws.amazon.com/eks/eks-anywhere/), CockroachDB requires that you [configure your default storage class](https://kubernetes.io/docs/tasks/administer-cluster/change-default-storage-class/) to auto-provision persistent volumes. Alternatively, you can define a custom storage configuration as required by your install pattern. + +2. From your local workstation, start the Kubernetes cluster: + + To ensure that all 3 nodes can be placed into a different availability zone, you may want to first [confirm that at least 3 zones are available in the region](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#availability-zones-describe) for your account. + + ```shell + $ eksctl create cluster \ + --name cockroachdb \ + --nodegroup-name standard-workers \ + --node-type m6i.xlarge \ + --nodes 3 \ + --nodes-min 1 \ + --nodes-max 4 \ + --node-ami auto + ``` + + This creates EKS instances and joins them into a single Kubernetes cluster named `cockroachdb`. The `--node-type` flag tells the node pool to use the [m6i.xlarge](https://aws.amazon.com/ec2/instance-types/) instance type (4 vCPUs, 16 GB memory), which meets our [recommended CPU and memory configuration](recommended-production-settings#basic-hardware-recommendations). + Cluster provisioning usually takes between 10 and 15 minutes. Do not move on to the next step until you see a message like `[✔] EKS cluster "cockroachdb" in "us-east-1" region is ready` and details about your cluster. + + {{site.data.alerts.callout_info}} + Consider creating another, dedicated node group for the operator pod for system resource availability. + {{site.data.alerts.end}} + +3. Open the [AWS CloudFormation console](https://console.aws.amazon.com/cloudformation/home) to verify that the stacks `eksctl-cockroachdb-cluster` and `eksctl-cockroachdb-nodegroup-standard-workers` were successfully created. Be sure that your region is selected in the console. + +### Hosted AKS + +1. Complete the **Before you begin**, **Define environment variables**, and **Create a resource groups** steps described in the [AKS quickstart guide](https://learn.microsoft.com/azure/aks/learn/quick-kubernetes-deploy-cli). This includes setting up the Azure CLI and the `az` tool, which is the command-line tool to create and manage Azure cloud resources. + + Set the environment variables as desired for your CRDB deployment. For these instructions, set the `MY_AKS_CLUSTER_NAME` variable to `cockroachdb`. + + Do not follow the **Create an AKS cluster** steps or following sections of the quickstart guide, as these topics will be described specifically for CRDB in this documentation. + +2. From your workstation, create the Kubernetes cluster: + + ```shell + $ az aks create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_AKS_CLUSTER_NAME \ + --node-count 3 \ + --generate-ssh-keys + ``` + +3. Create an application in your Azure tenant and create a secret named `azure-cluster-identity-credentials-secret` which contains `AZURE_CLIENT_ID` and `AZURE_CLIENT_SECRET` to hold the application credentials. The following example YAML can be used to define this application: + + ```shell + apiVersion: v1 + kind: Secret + metadata: + name: azure-cluster-identity-credentials-secret + type: Opaque + stringData: + azure-credentials: | + azure_client_id: 11111111-1111-1111-1111-111111111111 + azure_client_secret: s3cr3t + ``` + + See the [Azure.Identity documentation](https://learn.microsoft.com/dotnet/api/azure.identity.environmentcredential?view=azure-dotnet) for more information on how to use these variables. + +### Baremetal/other deployments + +For baremetal deployments, the specific Kubernetes infrastructure deployment steps should be similar to those described in [Hosted GKE](#hosted-gke) and [Hosted EKS](#hosted-eks). + +* Be prepared to apply labels to your Kubernetes nodes upon initialization, that can be used by CockroachDB as [locality labels](#localities). In other cloud provider deployments, some of these labels are applied automatically by the provider. These must be applied manually in a baremetal deployment. + +## Step 2. Start CockroachDB + +### Install the Operator sub-chart + +1. Check out the CockroachDB Helm repository from GitHub: + + ```shell + $ git clone https://github.com/cockroachdb/helm-charts.git + ``` + +2. Set your environment variables. This step is optional but recommended in order to use the example commands and templates described in these instructions. Note the default Kubernetes namespace of `cockroach-ns`. + + ```shell + $ export CRDBOPERATOR=crdb-operator + $ export CRDBCLUSTER=cockroachdb + $ export NAMESPACE=cockroach-ns + ``` + +3. Install the Operator sub-chart: + + ```shell + $ kubectl create namespace $NAMESPACE + $ helm install $CRDBOPERATOR ./cockroachdb-parent/charts/operator -n $NAMESPACE + ``` + +### Initialize the cluster + +1. Open `cockroachdb-parent/charts/cockroachdb/values.yaml`, a values file that tells Helm how to configure the Kubernetes cluster, in your text editor of choice. + +2. Modify the `cockroachdb.crdbCluster.regions` section to describe the number of nodes to deploy and what region(s) to deploy them in. The default configuration uses `k3d`, replace with the `cloudProvider` of choice (`gcp`, `aws`, `azure`). For other deployments such as baremetal, the `cloudProvider` field is optional and can be removed altogether. The following example initializes three nodes on Google Cloud in the `us-central1` region: + + ```yaml + cockroachdb: + crdbCluster: + regions: + - code: us-central1 + nodes: 3 + cloudProvider: gcp + namespace: cockroach-ns + ``` + + If you intend to deploy CockroachDB nodes across multiple different regions, follow the additional steps described in [Deploy across multiple regions](#deploy-across-multiple-regions). + +3. Modify the values file with the CPU and memory requests and limits for each node to use, in the `cockroachdb.crdbCluster.resources` section. The default values are 4vCPU and 16GB of memory but this section must be uncommented similar to the following example: + + See [Resource management](configure-cockroachdb-kubernetes-operator.html) for more information on configuring node resource allocation. + +4. Modify the TLS configuration as desired. For a secure deployment, set `cockroachdb.tls.enabled` in the values file to `true`. You can either use the default self-signer utility to generate all certificates, provide a custom CA certificate and generate other certificates, or use your own certificates. + - **All self-signed certificates**: By default, the certificates are created by the self-signer utility which requires no configuration beyond setting a custom certificate duration if desired. This utility creates self-signed certificates for the nodes and root client which are stored in a secret. You can see these certificates by running `kubectl get secrets`: + + ```shell + $ kubectl get secrets + + crdb-cockroachdb-ca-secret Opaque 2 23s + crdb-cockroachdb-client-secret kubernetes.io/tls 3 22s + crdb-cockroachdb-node-secret kubernetes.io/tls 3 23s + ``` + + {{site.data.alerts.callout_info}} + If you are deploying on OpenShift you must also set `cockroachdb.tls.selfSigner.securityContext.enabled` to `false` to mitigate stricter security policies. + {{site.data.alerts.end}} + - **Custom CA certificate**: If you wish to supply your own CA certificates to the deployed nodes but allow the self-signer utility to generate client certificates, create a Kubernetes secret with the custom CA certificate. To perform these steps using the `cockroach cert` command: + + ```shell + $ mkdir certs + $ mkdir my-safe-directory + $ cockroach cert create-ca --certs-dir=certs --ca-key=my-safe-directory/ca.key + ``` + + Set `cockroachdb.tls.selfSigner.caProvided` to true and specify the secret where the certificate is stored: + + ```yaml + cockroachdb: + tls: + enabled: true + selfSigner: + enabled: true + caProvided: true + caSecret: + ``` + + {{site.data.alerts.callout_info}} + If you are deploying on OpenShift you must also set `cockroachdb.tls.selfSigner.securityContext.enabled` to `false` to mitigate stricter security policies. + {{site.data.alerts.end}} + - **All custom certificates**: Set up your certificates and load them into your Kubernetes cluster as Secrets using the following commands: + + ```shell + $ mkdir certs + $ mkdir my-safe-directory + $ cockroach cert create-ca --certs-dir=certs --ca-key=my-safe-directory/ca.key + $ cockroach cert create-client root --certs-dir=certs --ca-key=my-safe-directory/ca.key + $ kubectl create secret generic cockroachdb-root --from-file=certs + + secret/cockroachdb-root created + + $ cockroach cert create-node --certs-dir=certs --ca-key=my-safe-directory/ca.key localhost 127.0.0.1 my-release-cockroachdb-public my-release-cockroachdb-public.cockroach-ns my-release-cockroachdb-public.cockroach-ns.svc.cluster.local *.my-release-cockroachdb *.my-release-cockroachdb.cockroach-ns *.my-release-cockroachdb.cockroach-ns.svc.cluster.local + $ kubectl create secret generic cockroachdb-node --from-file=certs + + secret/cockroachdb-node created + ``` + + {{site.data.alerts.callout_info}} + The subject alternative names are based on a release called `my-release` in the `cockroach-ns` namespace. Make sure they match the services created with the release during Helm install. + {{site.data.alerts.end}} + + If you wish to supply certificates with [cert-manager](https://cert-manager.io/), set `cockroachdb.tls.certManager.enabled` to `true`, and `cockroachdb.tls.certManager.issuer` to an IssuerRef (as they appear in certificate resources) pointing to a clusterIssuer or issuer that you have set up in the cluster. The following k8s application describes an example issuer: + + ```yaml + apiVersion: v1 + kind: Secret + metadata: + name: cockroachdb-ca + namespace: cockroach-ns + data: + tls.crt: [BASE64 Encoded ca.crt] + tls.key: [BASE64 Encoded ca.key] + type: kubernetes.io/tls + --- + apiVersion: cert-manager.io/v1alpha3 + kind: Issuer + metadata: + name: cockroachdb-cert-issuer + namespace: cockroach-ns + spec: + ca: + secretName: cockroachdb-ca + ``` + + If your certificates are stored in tls secrets such as secrets generated by cert-manager, the secret will contain files named: `ca.crt`, `tls.crt`, and `tls.key`. + + For CockroachDB, rename these files as applicable to match the following naming scheme: `ca.crt`, `node.crt`, `node.key`, `client.root.crt`, and `client.root.key`. + + Add the following to the values file: + + ```yaml + cockroachdb: + tls: + enabled: true + externalCertificates: + enabled: true + certificates: + nodeSecretName: {node_secret_name} + nodeClientSecretName: {client_secret_name} + ``` + + Replacing the following: + - `{node_secret_name}`: The name of the Kubernetes secret that contains the generated client certificate and key. + - `{client_secret_name}`: The name of the Kubernetes secret that contains the generated node certificate and key. + + See [Example: Authenticate with cockroach cert](#example-authenticate-with-cockroach-cert) for a more detailed walkthrough of a TLS configuration with manual certificates. + +5. Review [locality labels](#localities) as needed for your Kubernetes host. These labels are written as a list of Kubernetes node values where the locality information of each node is stored, defined in `cockroachdb.crdbCluster.localityLabels`. When CockroachDB is initialized on a node, these values are processed as though they are provided through the [cockroach start –locality](cockroach-start#locality) flag. + + If no locality labels are provided in `cockroachdb.crdbCluster.localityLabels`, the default locality labels are `region` and `zone`, stored in `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` respectively. Cloud providers like EKS/AKS/GKE auto-populate these values describing the node’s region and zone, so for cloud provider deployments the locality labels can be left as-is: + + ```yaml + cockroachdb: + crdbCluster: + localityLabels: [] + ``` + + For baremetal deployments, you can use the default `localityLabels` configuration to use the default values for `region` and `zone` (`topology.kubernetes.io/region` and `topology.kubernetes.io/zone`), but will need to be set manually these values manually when the node is initialized because there is no cloud provider to do so automatically. + + To add more granular levels of locality to your nodes, add custom locality levels as values in the `cockroachdb.crdbCluster.localityLabels` list. Any custom `localityLabels` configuration overrides the default `region` and `zone` configuration, so if you append an additional locality level but wish to keep the `region` and `zone` labels you must declare them manually. + + The following example uses the existing `region` and `zone` labels and adds an additional `datacenter` locality label that is more granular than `zone`. This example declares that the `datacenter` locality information is stored in the `example.datacenter.locality` variable on the node: + + ```yaml + cockroachdb: + crdbCluster: + localityLabels: + - topology.kubernetes.io/region + - topology.kubernetes.io/zone + - example.datacenter.locality + ``` + + In this example, if a Kubernetes node is initialized in the `us-central1` region, `us-central1-c` zone, and `dc2` datacenter, its `cockroach start –locality` command would be similar to the following command: + + ```shell + cockroach start --locality region=us-central1,zone=us-central1-c,example.datacenter.locality=dc2 + ``` + + Optionally, review the `cockroachdb.crdbCluster.topologySpreadConstraints` configuration and set `topologyKey` to a locality variable that will have distinct values for each node. The default recommendation is to set this to a zone as follows: + + ```yaml + cockroachdb: + crdbCluster: + topologySpreadConstraints: + topologyKey: topology.kubernetes.io/zone + ``` + +6. Modify other relevant parts of the configuration such as other `topologySpreadConstraints` fields, `service.ports`, and others as needed for your configuration. + +7. Run the following command to install the CockroachDB chart using Helm: + + ```shell + $ helm install $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb -n $NAMESPACE + ``` + + You can override the default parameters using the `--set key=value[,key=value]` argument while installing the chart: + + ```shell + $ helm install $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --set clusterDomain=cluster-test.local -n $NAMESPACE + ``` + +#### Deploy across multiple regions + +The Helm chart supports specifying multiple region definitions in `cockroachdb.crdbCluster.regions` with their respective node counts. You must ensure the required networking is set up to allow for service discovery across regions. Also, ensure that the same CA cert is used across all the regions. + +For each region, modify the `regions` configuration as described in [Initialize the cluster](#initialize-the-cluster) and perform `helm install` against the respective Kubernetes cluster. While applying the installation in a given region, do the following: + +* Verify that the domain matches `cockroachdb.clusterDomain` in the values file +* Ensure that `cockroachdb.crdbCluster.regions` captures the information for regions that have already been deployed, including the current region. This allows CockroachDB in the current region to connect to clusters deployed in the existing regions. + +The following example shows a configuration across two regions with 3 nodes in each cluster: + +```yaml +cockroachdb: + clusterDomain: cluster.gke.gcp-us-east1 + crdbCluster: + regions: + - code: us-central1 + nodes: 3 + cloudProvider: gcp + domain: cluster.gke.gcp-us-central1 + namespace: cockroach-ns + - code: us-east1 + nodes: 3 + cloudProvider: gcp + domain: cluster.gke.gcp-us-east1 + namespace: cockroach-ns +``` + +## Step 3. Use the built-in SQL client + +To use the CockroachDB SQL client, follow these steps to launch a secure pod running the `cockroach` binary. + +1. Download the secure client k8s application: + + ```shell + $ curl -O https://raw.githubusercontent.com/cockroachdb/helm-charts/master/examples/client-secure.yaml + ``` + + {{site.data.alerts.callout_danger}} + Be mindful that this client tool logs into CockroachDB as root using the root certificates. + {{site.data.alerts.end}} + +2. Edit the yaml file with the following values: + * `spec.serviceAccountName: my-release-cockroachdb` + * `spec.image: cockroachdb/cockroach:` + * `spec.volumes[0].project.sources[0].secret.name: my-release-cockroachdb-client-secret` + +3. Launch a pod using this file and keep it running indefinitely: + + ```shell + $ kubectl create -f client-secure.yaml + ``` + +4. Get a shell into the pod and start the CockroachDB [built-in SQL client](cockroach-sql.html): + + ```shell + $ kubectl exec -it cockroachdb-client-secure \ + -- ./cockroach sql \ + --certs-dir=/cockroach/cockroach-certs \ + --host=cockroachdb-public + + # Welcome to the CockroachDB SQL shell. + # All statements must be terminated by a semicolon. + # To exit, type: \q. + # + # Server version: CockroachDB CCL v21.1.0 (x86_64-unknown-linux-gnu, built 2021/04/23 13:54:57, go1.13.14) (same version as client) + # Cluster ID: a96791d9-998c-4683-a3d3-edbf425bbf11 + # + # Enter \? for a brief introduction. + # + root@cockroachdb-public:26257/defaultdb> + ``` + + This pod will continue running indefinitely, so any time you need to reopen the built-in SQL client or run any other cockroach client commands (e.g., cockroach node), repeat step 2 using the appropriate cockroach command. If you'd prefer to delete the pod and recreate it when needed, run `kubectl delete pod cockroachdb-client-secure`. + +5. Run some basic [CockroachDB SQL statements](learn-cockroachdb-sql.html): + + ```sql + > CREATE DATABASE bank; + > CREATE TABLE bank.accounts (id INT PRIMARY KEY, balance DECIMAL); + > INSERT INTO bank.accounts VALUES (1, 1000.50); + > SELECT * FROM bank.accounts; + id | balance + +----+---------+ + 1 | 1000.50 + (1 row) + ``` + +6. [Create a user with a password](create-user.html#create-a-user-with-a-password): + + ```sql + > CREATE USER roach WITH PASSWORD 'Q7gc8rEdS'; + ``` + + You will need this username and password to access the DB Console later. + +7. Exit the SQL shell and pod: + + ```sql + > \q + ``` + +## Step 4. Access the DB Console + +To access the cluster's [DB Console](ui-overview.html): + +1. On secure clusters, [certain pages of the DB Console](ui-overview.html#db-console-access) can only be accessed by admin users. + + Get a shell into the pod and start the CockroachDB [built-in SQL client](cockroach-sql.html): + + ```shell + $ kubectl exec -it cockroachdb-client-secure \ + -- ./cockroach sql \ + --certs-dir=/cockroach/cockroach-certs \ + --host=cockroachdb-public + ``` + +2. Assign `roach` to the `admin` role (you only need to do this once): + + ```sql + > GRANT admin TO roach; + ``` + +3. Exit the SQL shell and pod: + + ```sql + > \q + ``` + +4. In a new terminal window, port-forward from your local machine to the `cockroachdb-public` service: + + ```shell + $ kubectl port-forward service/cockroachdb-public 8080 + Forwarding from 127.0.0.1:8080 -> 8080 + ``` + + The port-forward command must be run on the same machine as the web browser in which you want to view the DB Console. If you have been running these commands from a cloud instance or other non-local shell, you will not be able to view the UI without configuring kubectl locally and running the above port-forward command on your local machine. + +5. Go to [https://localhost:8080](https://localhost:8080/) and log in with the username and password you created earlier. + + {{site.data.alerts.callout_info}} + If you are using Google Chrome, and you are getting an error about not being able to reach `localhost` because its certificate has been revoked, go to `chrome://flags/#allow-insecure-localhost`, enable "Allow invalid certificates for resources loaded from localhost", and then restart the browser. Enabling this Chrome feature degrades security for all sites running on `localhost`, not just CockroachDB's DB Console, so be sure to enable the feature only temporarily. + {{site.data.alerts.end}} + +6. In the UI, verify that the cluster is running as expected: + 1. View the [Node List](ui-cluster-overview-page.html#node-list) to ensure that all nodes successfully joined the cluster. + 2. Click the **Databases** tab on the left to verify that `bank` is listed. + +## Next steps + +Read the following pages for detailed information on cluster scaling, certificate management, resource management, best practices, and other cluster operation details: + +* [Pod scheduling](schedule-cockroachdb-kubernetes-operator.html) +* [Resource management](configure-cockroachdb-kubernetes-operator.html) +* [Certificate management](secure-cockroachdb-kubernetes-operator.html) +* [Cluster scaling](scale-cockroachdb-kubernetes-operator.html) +* [Cluster monitoring](monitor-cockroachdb-kubernetes-operator.html) +* [Upgrade a cluster](upgrade-cockroachdb-kubernetes-operator.html) +* [CockroachDB performance on Kubernetes](kubernetes-operator-performance.html) + +## Appendix + +### Example: Authenticate with `cockroach cert` + +This example uses [cockroach cert commands](cockroach-cert.html) to generate and sign the CockroachDB node and client certificates. To learn more about the supported methods of signing certificates, refer to [Authentication](authentication.html#using-digital-certificates-with-cockroachdb). + +1. Create two directories: + + ```shell + $ mkdir certs my-safe-directory + ``` + +2. Create the CA certificate and key pair: + + ```shell + $ cockroach cert create-ca \ + --certs-dir=certs \ + --ca-key=my-safe-directory/ca.key + ``` + +3. Create a client certificate and key pair for the root user: + + ```shell + $ cockroach cert create-client root \ + --certs-dir=certs \ + --ca-key=my-safe-directory/ca.key + ``` + +4. Upload the client certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the operator: + + ```shell + $ kubectl create secret generic cockroachdb.client.root \ + --from-file=tls.key=certs/client.root.key \ + --from-file=tls.crt=certs/client.root.crt \ + --from-file=ca.crt=certs/ca.crt + + secret/cockroachdb.client.root created + ``` + +5. Create the certificate and key pair for your CockroachDB nodes, specifying the namespace you used when deploying the cluster. This example uses the `cockroach-ns` namespace: + + ```shell + $ cockroach cert create-node localhost \ + 127.0.0.1 \ + cockroachdb-public \ + cockroachdb-public.cockroach-ns \ + cockroachdb-public.cockroach-ns.svc.cluster.local \ + *.cockroachdb \ + *.cockroachdb.cockroach-ns \ + *.cockroachdb.cockroach-ns.svc.cluster.local \ + --certs-dir=certs \ + --ca-key=my-safe-directory/ca.key + ``` + +6. Upload the node certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the operator: + + ```shell + $ kubectl create secret generic cockroachdb.node \ + --from-file=tls.key=certs/node.key \ + --from-file=tls.crt=certs/node.crt \ + --from-file=ca.crt=certs/ca.crt + + secret/cockroachdb.node created + ``` + +7. Check that the secrets were created on the cluster: + + ```shell + $ kubectl get secrets + + NAME TYPE DATA AGE + cockroachdb.client.root Opaque 3 13s + cockroachdb.node Opaque 3 3s + default-token-6js7b kubernetes.io/service-account-token 3 9h + ``` + +8. Add `cockroachdb.tls.externalCertificates.certificates.nodeSecretName` and `cockroachdb.tls.externalCertificates.certificates.nodeClientSecretName` to the values file used to deploy the cluster: + + ```yaml + cockroachdb: + tls: + enabled: true + externalCertificates: + enabled: true + certificates: + nodeSecretName: cockroachdb.node + nodeClientSecretName: cockroachdb.client.root + ``` diff --git a/src/current/v25.2/kubernetes-operator-overview.md b/src/current/v25.2/kubernetes-operator-overview.md new file mode 100644 index 00000000000..bc33e7f678f --- /dev/null +++ b/src/current/v25.2/kubernetes-operator-overview.md @@ -0,0 +1,11 @@ +--- +title: Kubernetes Operator Overview +summary: An overview of deployment and management of a CockroachDB cluster using our Kubernetes Operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +key: operate-cockroachdb-kubernetes-operator.html +--- + +Placeholder content introducing the new operator and comparing it to existing k8s deployments. diff --git a/src/current/v25.2/kubernetes-operator-performance.md b/src/current/v25.2/kubernetes-operator-performance.md new file mode 100644 index 00000000000..bc7631315b2 --- /dev/null +++ b/src/current/v25.2/kubernetes-operator-performance.md @@ -0,0 +1,287 @@ +--- +title: CockroachDB Performance with the Kubernetes Operator +summary: How running CockroachDB in Kubernetes affects its performance and how to get the best possible performance when running in Kubernetes using the operator. +toc: true +docs_area: deploy +--- + +Kubernetes provides many useful abstractions for deploying and operating distributed systems, but some of the abstractions come with a performance overhead and an increase in underlying system complexity. This section explains potential bottlenecks to be aware of when running CockroachDB in Kubernetes and shows you how to optimize your deployment for better performance. + +## Before you begin + +Before you focus on optimizing a Kubernetes-orchestrated CockroachDB cluster: + +1. Before deploying on Kubernetes, ensure that performance is optimized for your workload on identical hardware. You may find that you first need to [modify your workload](performance-best-practices-overview.html) or use [different machine specs](recommended-production-settings.html#hardware) to achieve the performance you need. + +2. Go through the documentation for [deploying CockroachDB in a Kubernetes cluster ](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster)to familiarize yourself with the necessary Kubernetes terminology and deployment abstractions. + +## Performance factors + +A number of independent factors affect performance when running CockroachDB on Kubernetes. Most are easiest to change before you create your CockroachDB cluster. If you need to modify a CockroachDB cluster that is already running on Kubernetes, extra care and testing is strongly recommended. + +The following sections show how to modify excerpts from our provided Kubernetes configuration YAML files. You can find the most up-to-date version of this file [on GitHub](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml). + +### Version of CockroachDB + +Because CockroachDB is under very active development, there are typically substantial performance gains in each release. If you aren't running the latest release and aren't getting the performance you desire, you should try the latest and see how much it helps. + +### Client workload + +Your workload is the single most important factor in database performance. Read through our [SQL performance best practices](performance-best-practices-overview.html) to determine whether there are any easy changes that you can make to speed up your application. + +### Machine size + +The size of the machines you're using isn't a Kubernetes-specific concern, but it's always a good place to start if you want more performance. See our [hardware recommendations](recommended-production-settings.html#hardware) for specific suggestions, but using machines with more CPU will almost always allow for greater throughput. Be aware that because Kubernetes runs a set of processes on every machine in a cluster, you typically will get more bang for your buck by using fewer large machines than more small machines. + +### Disk type + +CockroachDB makes heavy use of the disks you provide it, so using faster disks is an easy way to improve your cluster's performance. Our provided configuration does not specify what type of disks it wants, so in most environments Kubernetes will auto-provision disks of the default type. In the common cloud environments (AWS, GCP, Azure) this means you'll get slow disks that aren't optimized for database workloads (e.g.,HDDs on GCE, SSDs without provisioned IOPS on AWS). However, we [strongly recommend using SSDs](recommended-production-settings.html#hardware) for the best performance, and Kubernetes makes it relatively easy to use them. + +#### Creating a different disk type + +Kubernetes exposes the disk types used by its volume provisioner via its [StorageClass API object](https://kubernetes.io/docs/concepts/storage/storage-classes/). Each cloud environment has its own default `StorageClass`, but you can easily change the default or create a new named class which you can then ask for when asking for volumes. To do this, pick the type of volume provisioner you want to use from the list in the [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/storage-classes/), take the example YAML file they provide, modify it to have the disk type you want, then run `kubectl create -f <your-storage-class-file.yaml>`. For example, in order to use the `pd-ssd` disk type on Google Compute Engine or Google Kubernetes Engine, you can use a `StorageClass` file like this: + +```yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: +provisioner: kubernetes.io/gce-pd +parameters: + type: pd-ssd +``` + +You can then use this new disk type either by configuring the CockroachDB YAML file to request it or by making it the default. You may also want to set additional parameters as documented in the list of Kubernetes storage classes, such as configuring the `iopsPerGB` if you're creating a `StorageClass` for AWS's `io1` Provisioned IOPS volume type. + +#### Configuring the disk type used by CockroachDB + +To use a new `StorageClass` without making it the default in your cluster, you have to modify your application's YAML file to ask for it. In the CockroachDB configuration, that means adding a line to its `cockroachdb.crdbCluster.dataStore.volumeClaimTemplates` section. For example, that would mean adding a `storageClassName` field: + +```yaml +cockroachdb: + crdbCluster: + dataStore: + volumeClaimTemplate: + storageClassName: +``` + +If you make this change then run `kubectl create -f` on your YAML file, Kubernetes should create volumes for you using your new `StorageClass`. + +#### Changing the default disk type + +If you want your new `StorageClass` to be the default for all volumes in your cluster, you have to run a couple of commands to inform Kubernetes of what you want. First, get the names of your `StorageClass` objects. Then remove the current default and add yours as the new default. + +```shell +$ kubectl get storageclasses + +NAME PROVISIONER +ssd kubernetes.io/gce-pd +standard (default) kubernetes.io/gce-pd +``` +```shell +$ kubectl patch storageclass standard -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}' + +storageclass "standard" patched +``` +```shell +$ kubectl patch storageclass ssd -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' + +storageclass "ssd" patched +``` + +### Disk size + +On some cloud providers (notably including all GCP disks and the AWS io1 disk type), the number of IOPS available to a disk is directly correlated to the size of the disk. In such cases, increasing the size of your disks can make for significantly better CockroachDB performance, as well as less risk of filling them up. Doing so is easy -- before you create your CockroachDB cluster, modify the `cockroachdb.crdbCluster.dataStore.volumeClaimTemplate` in the CockroachDB YAML file to ask for more space. The following example sets this value to 1TB: + +```yaml +cockroachdb: + crdbCluster: + dataStore: + volumeClaimTemplate: + spec: + resources: + requests: + storage: 1024Gi +``` + +Since [GCE disk IOPS scale linearly with disk size](https://cloud.google.com/compute/docs/disks/performance#type_comparison), a 1TiB disk gives 1024 times as many IOPS as a 1GiB disk, which can make a very large difference for write-heavy workloads. + +### Local disks + +Up to this point, we have assumed the use of auto-provisioned, remotely attached disks. However, local disks typically provide better performance than remotely attached disks. For example, SSD Instance Store Volumes outperform EBS Volumes on AWS, and Local SSDs outperform Persistent Disks on GCE. As of v1.14, Kubernetes supports [local volumes](https://kubernetes.io/docs/concepts/storage/volumes/#local). + +Note that when running with local disks, there is a greater chance of experiencing a disk failure than when using the cloud providers' network-attached disks that are often replicated underneath the covers. Consequently, you may want to use [Replication Controls](configure-replication-zones.html) to increase the replication factor of your data to 5 from its default of 3 when using local disks. + +### Resource requests and limits + +When you ask Kubernetes to run a pod, you can tell it to reserve certain amounts of CPU and/or memory for each container in the pod or to limit the CPU and/or memory of each container. Doing one or both of these can have different implications depending on how utilized your Kubernetes cluster is. For the authoritative information on this topic, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/). + +#### Resource requests + +Resource requests allow you to reserve a certain amount of CPU or memory for your container. If you add resource requests to your CockroachDB YAML file, Kubernetes will schedule each CockroachDB pod onto a node with sufficient unreserved resources and will ensure the pods are guaranteed the reserved resources using the applicable Linux container primitives. If you are running other workloads in your Kubernetes cluster, setting resource requests is very strongly recommended to ensure good performance, because if you do not set them then CockroachDB could be starved of CPU cycles or [OOM stopped](cluster-setup-troubleshooting.html#out-of-memory-oom-crash) before less important processes. + +To determine how many resources are usable on your Kubernetes nodes, you can run: + +```shell +$ kubectl describe nodes + +Name: gke-perf-default-pool-aafee20c-k4t8 +[...] +Capacity: + cpu: 4 + memory: 15393536Ki + pods: 110 +Allocatable: + cpu: 3920m + memory: 12694272Ki + pods: 110 +[...] +Non-terminated Pods: (2 in total) + Namespace Name CPU Requests CPU Limits Memory Requests Memory Limits + --------- ---- ------------ ---------- --------------- ------------- + kube-system kube-dns-778977457c-kqtlr 260m (6%) 0 (0%) 110Mi (0%) 170Mi (1%) + kube-system kube-proxy-gke-perf-default-pool-aafee20c-k4t8 100m (2%) 0 (0%) 0 (0%) 0 (0%) +Allocated resources: + (Total limits may be over 100 percent, i.e., overcommitted.) + CPU Requests CPU Limits Memory Requests Memory Limits + ------------ ---------- --------------- ------------- + 360m (9%) 0 (0%) 110Mi (0%) 170Mi (1%) +``` + +This will output a lot of information for each of the nodes in your cluster, but if you focus in on the right parts you'll see how many "allocatable" resources are available on each node and how many resources are already being used by other pods. The "allocatable" resources are how much CPU and memory Kubernetes is willing to provide to pods running on the machine. The difference between the node's "capacity" and its "allocatable" resources is taken up by the operating system and Kubernetes's management processes. The "m" in "3920m" stands for "milli-CPUs", meaning "thousandths of a CPU". + +You'll also see a number of pods running here that you may not have realized were in your cluster. Kubernetes runs a handful of pods in the `kube-system` namespace that are part of the cluster infrastructure. These may make it tough to attempt to reserve all the allocatable space on your nodes for CockroachDB, since some of them are essential for the Kubernetes cluster's health. If you want to run CockroachDB on every node in your cluster, you'll have to leave room for these processes. If you are only running CockroachDB on a subset of the nodes in your cluster, you can choose to take up all the "allocatable" space other than what is being used by the `kube-system` pods that are on all the nodes in the cluster, such as `kube-proxy` or the `fluentd` logging agent. + +Note that it will be difficult to truly use up all of the allocatable space in the current versions of Kubernetes (v1.10 or older) because you'd have to manually preempt the `kube-system` pods that are already on the nodes you want CockroachDB to run on (by deleting them). This should become easier in future versions of Kubernetes when its [Pod Priority](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/) feature gets promoted from alpha to beta. Once that feature is more widely available, you could set the CockroachDB pods to a higher priority, causing the Kubernetes scheduler to preempt and reschedule the `kube-system` pods onto other machines. + +Once you've picked out an amount of CPU and memory to reserve for Cockroach, you'll have to configure the resource request in your CockroachDB YAML file. They should go underneath the `containers` heading. For example, to use most of the available resources on the machines described above, you'd configure these lines of your values file: + +```yaml +cockroachdb: + crdbCluster: + resources: + requests: + cpu: 3500m + memory: 12300Mi +``` + +When you initialize the cluster, you'll want to check to make sure that all the CockroachDB pods are scheduled successfully. If you see any get stuck in the pending state, run `kubectl describe pod <podname>` and check the `Events` for information about why they're still pending. You may need to manually preempt pods on one or more nodes by running `kubectl delete pod` on them to make room for the CockroachDB pods. As long as the pods you delete were created by a higher-level Kubernetes object such as a `Deployment`, they'll be safely recreated on another node. + +#### Resource limits + +Resource limits are conceptually similar to resource requests, but serve a different purpose. They let you cap the resources used by a pod to no more than the provided limit, which can have a couple of different uses. For one, it makes for more predictable performance because your pods will not be allowed to use any excess capacity on their machines, meaning that they will not have more resources available to them at some times (during lulls in traffic) than others (busy periods where the other pods on a machine are also fully utilizing their reserved resources). Secondly, it also increases the ["Quality of Service" guaranteed by the Kubernetes runtime](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/resource-qos.md) on Kubernetes versions 1.8 and below, making the pods less likely to be preempted when a machine is oversubscribed. Finally, memory limits in particular limit the amount of memory that the container knows is available to it, which help when you specify percentages for the CockroachDB `--cache` and `--max-sql-memory` flags, as our default configuration file does. + +Setting resource limits works about the same as setting resource requests. If you wanted to set resource limits in addition to requests on the config from the [Resource Requests](#resource-requests) section above, you'd change the config to: + +```yaml +cockroachdb: + crdbCluster: + resources: + requests: + cpu: 3500m + memory: 12300Mi + limits: + cpu: 3500m + memory: 12300Mi +``` + +The pods would then be restricted to only use the resource they have reserved and guaranteed to not be preempted except in very exceptional circumstances. This typically will not give you better performance on an under-utilized Kubernetes cluster, but will give you more predictable performance as other workloads are run. + +{{site.data.alerts.callout_danger}} +While setting memory limits is strongly recommended, [setting CPU limits can hurt tail latencies as currently implemented by Kubernetes](https://github.com/kubernetes/kubernetes/issues/51135). We recommend not setting CPU limits at all unless you have explicitly enabled the non-default [Static CPU Management Policy](https://kubernetes.io/docs/tasks/administer-cluster/cpu-management-policies/#static-policy) when setting up your Kubernetes cluster, and even then only setting integer (non-fractional) CPU limits and memory limits exactly equal to the corresponding requests. +{{site.data.alerts.end}} + +#### Default resource requests and limits + +Note that even if you do not manually set resource requests yourself, you're likely unknowingly using them anyways. In many installations of Kubernetes, a [LimitRange](https://kubernetes.io/docs/tasks/administer-cluster/cpu-default-namespace/) is preconfigured for the `default` namespace that applies a default CPU request of `100m`, or one-tenth of a CPU. You can see this configuration by running the following command: + +```shell +$ kubectl describe limitranges +``` + +Experimentally, this does not appear to have a noticeable effect on CockroachDB's performance when a Kubernetes cluster isn't heavily utilized, but do not be surprised if you see CPU requests on your pods that you didn't set. + +### Other pods on the same machines as CockroachDB + +As discovered in the above section on [Resource Requests and Limits](#resource-requests-and-limits), there will always be pods other than just CockroachDB running in your Kubernetes cluster, even if you do not create any other pods of your own. You can see them at any time by running: + +```shell +$ kubectl get pods --all-namespaces + +NAMESPACE NAME READY STATUS RESTARTS AGE +kube-system event-exporter-v0.1.7-5c4d9556cf-6v7lf 2/2 Running 0 2m +kube-system fluentd-gcp-v2.0.9-6rvmk 2/2 Running 0 2m +kube-system fluentd-gcp-v2.0.9-m2xgp 2/2 Running 0 2m +kube-system fluentd-gcp-v2.0.9-sfgps 2/2 Running 0 2m +kube-system fluentd-gcp-v2.0.9-szwwn 2/2 Running 0 2m +kube-system heapster-v1.4.3-968544ffd-5tsb8 3/3 Running 0 1m +kube-system kube-dns-778977457c-4s7vv 3/3 Running 0 1m +kube-system kube-dns-778977457c-ls6fq 3/3 Running 0 2m +kube-system kube-dns-autoscaler-7db47cb9b7-x2cc4 1/1 Running 0 2m +kube-system kube-proxy-gke-test-default-pool-828d39a7-dbn0 1/1 Running 0 2m +kube-system kube-proxy-gke-test-default-pool-828d39a7-nr06 1/1 Running 0 2m +kube-system kube-proxy-gke-test-default-pool-828d39a7-rc4m 1/1 Running 0 2m +kube-system kube-proxy-gke-test-default-pool-828d39a7-trd1 1/1 Running 0 2m +kube-system kubernetes-dashboard-768854d6dc-v7ng8 1/1 Running 0 2m +kube-system l7-default-backend-6497bcdb4d-2kbh4 1/1 Running 0 2m +``` + +These ["cluster add-ons"](https://github.com/kubernetes/kubernetes/tree/master/cluster/addons) provide a variety of basic services like managing DNS entries for services within the cluster, powering the Kubernetes dashboard UI, or collecting logs or metrics from all the pods running in the cluster. If you do not like having them take up space in your cluster, you can prevent some of them from running by configuring your Kubernetes cluster appropriately. For example, on GKE, you can create a cluster with the minimal set of addons by running: + +```shell +$ gcloud container clusters create --no-enable-cloud-logging --no-enable-cloud-monitoring --addons="" +``` + +However, essentials like `kube-proxy` and `kube-dns` are effectively required to have a compliant Kubernetes cluster. This means that you'll always have some pods that aren't yours running in your cluster, so it's important to understand and account for the possible effects of CockroachDB having to share a machine with other processes. The more processes there are on the same machine as a CockroachDB pod, the worse and less predictable its performance will likely be. To protect against this, it's strongly recommended to run with [Resource Requests](#resource-requests) on your CockroachDB pods to provide some level of CPU and memory isolation. + +Setting resource requests isn't a panacea, though. There can still be contention for shared resources like network I/O or, in [exceptional](https://sysdig.com/blog/container-isolation-gone-wrong/) cases, internal kernel data structures. For these reasons and because of the Kubernetes infrastructure processes running on each machine, CockroachDB running on Kubernetes simply cannot reach quite the same levels of performance as running directly on dedicated machines. Thankfully, it can at least get quite close if you use Kubernetes wisely. + +If for some reason setting appropriate resource requests still isn't getting you the performance you expect, you might want to consider going all the way to [dedicated nodes](#dedicated-nodes). + +#### Client applications on the same machines as CockroachDB + +Running client applications such as benchmarking applications on the same machines as CockroachDB can be even worse than just having Kubernetes system pods on the same machines. They are very likely to end up competing for resources, because when the applications get more loaded than usual, so will the CockroachDB processes. The best way to avoid this is to [set resource requests and limits](#resource-requests-and-limits), but if you are unwilling or unable to do that for some reason, you can also set [anti-affinity scheduling policies](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) on your client applications: + +```yaml +cockroachdb: + crdbCluster: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - loadgen + topologyKey: kubernetes.io/hostname + - weight: 99 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - cockroachdb + topologyKey: kubernetes.io/hostname +``` + +This configuration will first prefer to put the `loadgen` pods on different nodes from each other, which is important for the fault tolerance of the `loadgen` pods themselves. As a secondary priority, it will attempt to put the pods on nodes that do not already have a running `CockroachDB` pod. This will ensure the best possible balance of fault tolerance and performance for the load generator and CockroachDB cluster. + +### Networking + +[Kubernetes asks a lot of the network that it runs on](https://kubernetes.io/docs/concepts/cluster-administration/networking/) in order to provide a routable IP address and an isolated Linux network namespace to each pod in the cluster, among its other requirements. While this document isn't nearly large enough to properly explain the details, and those details themselves can depend heavily on specifically how you have set up the network for your cluster, it suffices to say that Docker and Kubernetes's networking abstractions often come with a performance penalty for high-throughput distributed applications such as CockroachDB. + +If you really want to eke more performance out of your cluster, networking is a good target to at least experiment with. You can either replace your cluster's networking solution with a more performant one or bypass most of the networking overhead by using the host machines' networks directly. + +#### Networking solutions + +If you aren't using a hosted Kubernetes service, you'll typically have to choose how to set up the network when you're creating a Kubernetes cluster. There are [a lot of solutions out there](https://kubernetes.io/docs/concepts/cluster-administration/networking/#how-to-achieve-this), and they can have significantly different performance characteristics and functionality. We do not endorse any networking software or configurations in particular, but want to call out that your choice can have a meaningful effect on performance compared to running CockroachDB outside of Kubernetes. + +### Dedicated nodes + +If your Kubernetes cluster is made up of heterogeneous hardware, it's likely that you'd like to make sure CockroachDB only runs on certain machines. If you want to get as much performance as possible out of a set of machines, you might also want to make sure that only CockroachDB is run on them. + +For more information, see [Pod scheduling](schedule-cockroachdb-kubernetes-operator.html). diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md new file mode 100644 index 00000000000..e8a01efb8e2 --- /dev/null +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md @@ -0,0 +1,147 @@ +--- +title: Migrate from Helm StatefulSet +summary: Migration guide detailing how to migrate away from a Helm deployment of CockroachDB to the Kubernetes operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This guide describes how to migrate an existing CockroachDB cluster managed via StatefulSet to the enterprise operator. + +These instructions assume that you are migrating from a StatefulSet cluster that was configured using the Helm chart per the following command: + +```shell +helm upgrade --install --set operator.enabled=false crdb-test --debug ./cockroachdb +``` + +If your existing cluster was created using the public operator, refer to the [public operator migration guide](migrate-cockroachdb-kubernetes-operator.html). + +This migration process is designed to allow migration to occur without affecting cluster availability, and preserving existing disks so data doesn’t need to be replicated into empty volumes. Note that this process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. + +{{site.data.alerts.callout_info}} +This migration process is currently only recommended to run in a non-production environment. We are actively working on a rollback procedure but are looking for early feedback on this process. +{{site.data.alerts.end}} + +## Step 1. Prepare the migration helper + +Build the migration helper and add the `./bin` directory to your PATH: + +```shell +$ make bin/migration-helper +$ export PATH=$PATH:$(pwd)/bin +``` + +Export environment variables about the existing deployment: + +```shell +# Set STS_NAME to the cockroachdb statefulset deployed via helm chart. +$ export STS_NAME="crdb-example-cockroachdb" + +# Set NAMESPACE to the namespace where the statefulset is installed. +$ export NAMESPACE="default" + +# RELEASE_NAME describes the release name of the installed helm chart release. +$ export RELEASE_NAME=$(kubectl get sts $STS_NAME -n $NAMESPACE -o yaml | yq '.metadata.annotations."meta.helm.sh/release-name"') + +# Set CLOUD_PROVIDER to the cloud vendor where k8s cluster is residing. +# All the major cloud providers are supported (gcp,aws,azure) +$ export CLOUD_PROVIDER=gcp + +# Set REGION to the cloud provider's identifier of this region. +# This region must match the "topology.kubernetes.io/region" label in +# the Kubernetes nodes for this cluster. +$ export REGION=us-central1 +``` + +## Step 2. Generate manifests with the migration helper + +The operator uses slightly different certificates than the CockroachDB Helm chart, and mounts them in configmaps and secrets with different names. Use the migration helper utility with the `migrate-certs` option to re-map and generate TLS certificates: + +```shell +$ bin/migration-helper migrate-certs --statefulset-name $STS_NAME --namespace $NAMESPACE +``` + +Generate a manifest for each crdbnode and the crdbcluster based on the state of the StatefulSet. We do this because we want the new pods and their associated PVCs to have the same names as the original StatefulSet-managed pods and PVCs. This means that the new operator-managed pods will use the original PVCs rather than replicate data into empty nodes. + +```shell +$ mkdir -p manifests +$ bin/migration-helper build-manifest helm --statefulset-name $STS_NAME --namespace $NAMESPACE --cloud-provider $CLOUD_PROVIDER --cloud-region $REGION --output-dir ./manifests +``` + +## Step 3. Replace statefulset pods with operator nodes + +To migrate seamlessly from the CockroachDB Helm chart to the operator, we’ll scale down StatefulSet-managed pods and replace them with crdbnode objects, one by one. Then we’ll create the crdbcluster object that manages the crdbnodes. + +First, create objects in kubectl that will eventually be owned by the crdbcluster: + +```shell +$ kubectl create priorityclass crdb-critical --value 500000000 +``` + +Install the crdb-operator with Helm: + +```shell +$ helm upgrade --install crdb-operator ./cockroachdb-parent/charts/operator +``` + +For each pod in the StatefulSet, perform the following steps: + +1. Scale the StatefulSet down by one replica. For example, for a five-node cluster, scale the StatefulSet down to four replicas: + + ```shell + $ kubectl scale statefulset/$STS_NAME --replicas=4 + ``` + +2. Create the crdbnode corresponding to the StatefulSet pod you just scaled down. The manifests are labeled as `crdbnode-X.yaml` where `X` is shared with each `<STS_NAME>-X` StatefulSet pod, so note whichever pod was scaled down and specify the corresponding manifest in the following command: + + ```shell + $ kubectl apply -f manifests/crdbnode-4.yaml + ``` + +3. Wait for the new pod to become ready. If it doesn’t, check the operator logs for errors. + +4. Before moving on to the next replica migration, verify that there are no underreplicated ranges: + 1. Set up port forwarding to access the CockroachDB node’s HTTP interface. Note that CockroachDB’s UI runs on port 8080 by default: + + ```shell + $ kubectl port-forward pod/cockroachdb-4 8080:8080 + ``` + + 2. Check that there are zero underreplicated ranges. The following command outputs the number of under-replicated ranges on this CockroachDB node: + + ```shell + $ curl --insecure -s https://localhost:8080/_status/vars | grep "ranges_underreplicated{" | awk '{print $2}' + ``` + +Repeat these steps until the StatefulSet has zero replicas. + +## Step 4. Update the public service + +The Helm chart creates a public Service that exposes both SQL and gRPC connections over a single power. However, the operator uses a different port for gRPC communication. To ensure compatibility, update the public Service to reflect the correct gRPC port used by the operator. + +Apply the updated service manifest: + +```shell +$ kubectl apply -f manifests/public-service.yaml +``` + +The existing StatefulSet creates a PodDisruptionBudget (PDB) that conflicts with the one managed by the operator. To avoid this conflict, delete the existing PDB: + +```shell +$ kubectl delete poddisruptionbudget $STS_NAME-budget +``` + +## Step 5. Deploy the crdbcluster object + +Delete the StatefulSet that was scaled down to zero, as the Helm upgrade can only proceed if no StatefulSet is present: + +```shell +$ kubectl delete statefulset $STS_NAME +``` + +Apply the crdbcluster manifest using Helm: + +```shell +$ helm upgrade $RELEASE_NAME ./cockroachdb-parent/charts/cockroachdb -f manifests/values.yaml +``` \ No newline at end of file diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md new file mode 100644 index 00000000000..feeb5940532 --- /dev/null +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md @@ -0,0 +1,181 @@ +--- +title: Migrate from legacy Kubernetes Operator +summary: Migration guide detailing how to migrate away from a Helm deployment of CockroachDB to the Kubernetes operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This guide describes how to migrate an existing CockroachDB cluster managed via the public operator to the enterprise operator. + +These instructions assume that you are migrating from a public operator cluster that is managed with kubectl via the following yaml files: + +```shell +$ kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/crds.yaml +$ kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/operator.yaml +$ kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/examples/example.yaml +``` + +If your existing cluster was created as a StatefulSet using Helm, refer to the [Helm migration guide](migrate-cockroachdb-kubernetes-helm.html). + +This migration process is designed to allow migration to occur without affecting cluster availability, and preserving existing disks so data doesn’t need to be replicated into empty volumes. Note that this process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. + +{{site.data.alerts.callout_info}} +This migration process is currently only recommended to run in a non-production environment. We are actively working on a rollback procedure but are looking for early feedback on this process. +{{site.data.alerts.end}} + +## Step 1. Prepare the migration helper + +Build the migration helper and add the `./bin` directory to your PATH: + +```shell +$ make bin/migration-helper +$ export PATH=$PATH:$(pwd)/bin +``` + +Export environment variables about the existing deployment: + +```shell +# Set CRDBCLUSTER to the crdbcluster custom resource name in the public operator. +$ export CRDBCLUSTER="cockroachdb" + +# Set NAMESPACE to the namespace where the statefulset is installed. +$ export NAMESPACE="default" + +# Set CLOUD_PROVIDER to the cloud vendor where k8s cluster is residing. +# All the major cloud providers are supported (gcp,aws,azure) +$ export CLOUD_PROVIDER=gcp + +# Set REGION to the cloud provider's identifier of this region. +# This region must match the "topology.kubernetes.io/region" label in +# the Kubernetes nodes for this cluster. +$ export REGION=us-central1 +``` + +Back up the crdbcluster resource in case there is a need to revert: + +```shell +$ mkdir -p backup +$ kubectl get crdbcluster -o yaml $CRDBCLUSTER > backup/crdbcluster-$CRDBCLUSTER.yaml +``` + +## Step 2. Generate manifests with the migration helper + +The enterprise operator uses slightly different certificates than the public operator, and mounts them in configmaps and secrets with different names. Use the migration helper utility with the `migrate-certs` option to re-map and generate TLS certificates: + +```shell +$ bin/migration-helper migrate-certs --statefulset-name $STS_NAME --namespace $NAMESPACE +``` + +Generate a manifest for each crdbnode and the crdbcluster based on the state of the StatefulSet. We do this because we want the new pods and their associated PVCs to have the same names as the original StatefulSet-managed pods and PVCs. This means that the new operator-managed pods will use the original PVCs rather than replicate data into empty nodes. + +```shell +$ mkdir -p manifests +$ bin/migration-helper build-manifest helm --statefulset-name $STS_NAME --namespace $NAMESPACE --cloud-provider $CLOUD_PROVIDER --cloud-region $REGION --output-dir ./manifests +``` + +## Step 3. Uninstall and replace the old operator + +The public operator and the enterprise operator use custom resource definitions with the same names, so you must remove the public operator before installing the cloud operator. Run the following commands to uninstall the public operator, without deleting its managed resources: + +```shell +# Ensure that the operator can't accidentally delete managed k8s objects. +kubectl delete clusterrolebinding cockroach-operator-rolebinding + +# Delete public operator custom resource. +kubectl delete crdbcluster $CRDBCLUSTER --cascade=orphan + +# Delete public operator resources and custom resource definition. +kubectl delete -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/crds.yaml +kubectl delete serviceaccount cockroach-operator-sa -n cockroach-operator-system +kubectl delete clusterrole cockroach-operator-role +kubectl delete clusterrolebinding cockroach-operator-rolebinding +kubectl delete service cockroach-operator-webhook-service -n cockroach-operator-system +kubectl delete deployment cockroach-operator-manager -n cockroach-operator-system +kubectl delete mutatingwebhookconfigurations cockroach-operator-mutating-webhook-configuration +kubectl delete validatingwebhookconfigurations cockroach-operator-validating-webhook-configuration +``` + +Run `helm upgrade` to install the enterprise operator and wait for it to become ready: + +```shell +$ helm upgrade --install crdb-operator ./cockroachdb-parent/charts/operator +$ kubectl rollout status deployment/cockroach-operator --timeout=60s +``` + +## Step 4. Replace statefulset pods with operator nodes + +To migrate seamlessly from the public operator to the enterprise operator, we’ll scale down StatefulSet-managed pods and replace them with crdbnode objects, one by one. Then we’ll create the crdbcluster object that manages the crdbnodes. + +First, create objects in kubectl that will eventually be owned by the crdbcluster: + +```shell +$ kubectl create priorityclass crdb-critical --value 500000000 +$ kubectl apply -f manifests/rbac.yaml +``` + +Install the crdb-operator with Helm: + +```shell +$ helm upgrade --install crdb-operator ./cockroachdb-parent/charts/operator +``` + +For each pod in the StatefulSet, perform the following steps: + +1. Scale the StatefulSet down by one replica. For example, for a five-node cluster, scale the StatefulSet down to four replicas: + + ```shell + $ kubectl scale statefulset/$STS_NAME --replicas=4 + ``` + +2. Create the crdbnode corresponding to the StatefulSet pod you just scaled down. The manifests are labeled as `crdbnode-X.yaml` where `X` is shared with each `<STS_NAME>-X` StatefulSet pod, so note whichever pod was scaled down and specify the corresponding manifest in the following command: + + ```shell + $ kubectl apply -f manifests/crdbnode-4.yaml + ``` + +3. Wait for the new pod to become ready. If it doesn’t, check the operator logs for errors. + +4. Before moving on to the next replica migration, verify that there are no underreplicated ranges: + 1. Set up port forwarding to access the CockroachDB node’s HTTP interface. Note that CockroachDB’s UI runs on port 8080 by default: + + ```shell + $ kubectl port-forward pod/cockroachdb-4 8080:8080 + ``` + + 2. Check that there are zero underreplicated ranges. The following command outputs the number of under-replicated ranges on this CockroachDB node: + + ```shell + $ curl --insecure -s https://localhost:8080/_status/vars | grep "ranges_underreplicated{" | awk '{print $2}' + ``` + +Repeat these steps until the StatefulSet has zero replicas. + +## Step 5. Update the crdbcluster manifest + +The public operator creates a pod disruption budget that conflicts with a pod disruption budget managed by the cloud operator. Before applying the crdbcluster manifest, delete the existing pod disruption budget: + +```shell +$ kubectl delete poddisruptionbudget $CRDBCLUSTER +``` + +Annotate the existing Kubernetes objects so they can managed by the Helm chart: + +```shell +$ kubectl annotate service $CRDBCLUSTER-public meta.helm.sh/release-name="$CRDBCLUSTER" +$ kubectl annotate service $CRDBCLUSTER-public meta.helm.sh/release-namespace="$NAMESPACE" +$ kubectl label service $CRDBCLUSTER-public app.kubernetes.io/managed-by=Helm --overwrite=true +``` + +Apply the crdbcluster manifest: + +```shell +$ helm install $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb -f manifests/values.yaml +``` + +Once the migration is successful, delete the StatefulSet that was created by the public operator: + +```shell +$ kubectl delete poddisruptionbudget $STS_NAME-budget +``` diff --git a/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md b/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md new file mode 100644 index 00000000000..d8dd72f8d46 --- /dev/null +++ b/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md @@ -0,0 +1,345 @@ +--- +title: Cluster Monitoring with the Kubernetes Operator +summary: How to monitor a secure CockroachDB cluster deployed with the Kubernetes operator. +toc: true +toc_not_nested: true +docs_area: deploy +--- + +Despite CockroachDB's various [built-in safeguards against failure](architecture/replication-layer.html), it is critical to actively monitor the overall health and performance of a cluster running in production and to create alerting rules that promptly send notifications when there are events that require investigation or intervention. + +## Configure Prometheus + +Every node of a CockroachDB cluster exports granular timeseries metrics formatted for easy integration with [Prometheus](https://prometheus.io/), an open source tool for storing, aggregating, and querying timeseries data. This section shows you how to orchestrate Prometheus as part of your Kubernetes cluster and pull these metrics into Prometheus for external monitoring. + +This guidance is based on [CoreOS's Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator/tree/main), which allows a Prometheus instance to be managed using built-in Kubernetes concepts. + +{{site.data.alerts.callout_info}} +If you're on Hosted GKE, before starting, make sure the email address associated with your Google Cloud account is part of the `cluster-admin` RBAC group, as shown in [Deploy CockroachDB with Kubernetes](deploy-cockroachdb-with-kubernetes-operator.html). +{{site.data.alerts.end}} + +1. From your local workstation, edit the cockroachdb service to add the prometheus: cockroachdb label: + + ```shell + $ kubectl label svc cockroachdb prometheus=cockroachdb + + service/cockroachdb labeled + ``` + + This ensures that only the cockroachdb (not the cockroach-public service) is being monitored by a Prometheus job. + +2. Determine the latest version of [CoreOS's Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator/releases/) and run the following to download and apply the latest `bundle.yaml` definition file: + + {{site.data.alerts.callout_info}} + Be sure to specify the latest [CoreOS Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator/releases/) version in the following command, in place of this example's use of version `v0.82.0`. + {{site.data.alerts.end}} + + ```shell + $ kubectl apply \ + -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.82.0/bundle.yaml \ + --server-side + + + customresourcedefinition.apiextensions.k8s.io/alertmanagers.monitoring.coreos.com serverside-applied + customresourcedefinition.apiextensions.k8s.io/podmonitors.monitoring.coreos.com serverside-applied + customresourcedefinition.apiextensions.k8s.io/probes.monitoring.coreos.com serverside-applied + customresourcedefinition.apiextensions.k8s.io/prometheuses.monitoring.coreos.com serverside-applied + customresourcedefinition.apiextensions.k8s.io/prometheusrules.monitoring.coreos.com serverside-applied + customresourcedefinition.apiextensions.k8s.io/servicemonitors.monitoring.coreos.com serverside-applied + customresourcedefinition.apiextensions.k8s.io/thanosrulers.monitoring.coreos.com serverside-applied + clusterrolebinding.rbac.authorization.k8s.io/prometheus-operator serverside-applied + clusterrole.rbac.authorization.k8s.io/prometheus-operator serverside-applied + deployment.apps/prometheus-operator serverside-applied + serviceaccount/prometheus-operator serverside-applied + service/prometheus-operator serverside-applied + ``` + +3. Confirm that the `prometheus-operator` has started: + + ```shell + $ kubectl get deploy prometheus-operator + + NAME READY UP-TO-DATE AVAILABLE AGE + prometheus-operator 1/1 1 1 27s + ``` + +4. Download our Prometheus manifest: + + ```shell + $ curl -O https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/prometheus.yaml + ``` + +5. Apply the Prometheus manifest. This creates the various objects necessary to run a Prometheus instance: + + ```shell + $ kubectl apply -f prometheus.yaml + + serviceaccount/prometheus created + clusterrole.rbac.authorization.k8s.io/prometheus created + clusterrolebinding.rbac.authorization.k8s.io/prometheus created + servicemonitor.monitoring.coreos.com/cockroachdb created + prometheus.monitoring.coreos.com/cockroachdb created + ``` + +6. Access the Prometheus UI locally and verify that CockroachDB is feeding data into Prometheus: + 1. Port-forward from your local machine to the pod running Prometheus: + + ```shell + $ kubectl port-forward prometheus-cockroachdb-0 9090 + ``` + + 2. Go to [http://localhost:9090](http://localhost:9090/) in your browser. + + 3. To verify that each CockroachDB node is connected to Prometheus, go to **Status > Targets**. The screen should look like this: + + Prometheus targets + + 4. To verify that data is being collected, go to **Graph**, enter the `sys_uptime` variable in the field, click **Execute**, and then click the **Graph** tab. The screen should like this: + + Prometheus graph + + {{site.data.alerts.callout_info}} + Prometheus auto-completes CockroachDB time series metrics for you, but if you want to see a full listing, with descriptions, port-forward as described in [Access the DB Console](deploy-cockroachdb-with-kubernetes-operator.html#step-4-access-the-db-console) and then point your browser to [http://localhost:8080/_status/vars](http://localhost:8080/_status/vars). + {{site.data.alerts.end}} + +For more details on using the Prometheus UI, see their [official documentation](https://prometheus.io/docs/introduction/getting_started/). + +## Configure Alertmanager + +Active monitoring helps you spot problems early, but it is also essential to send notifications when there are events that require investigation or intervention. This section shows you how to use [Alertmanager](https://prometheus.io/docs/alerting/alertmanager/) and CockroachDB's starter [alerting rules](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/prometheus/alert-rules.yaml) to do this. + +1. Download our [alertmanager-config.yaml](https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/alertmanager-config.yaml) configuration file: + + ```shell + $ curl -O https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/alertmanager-config.yaml + ``` + +2. Edit the `alertmanager-config.yaml` file to [specify the desired receivers for notifications](https://prometheus.io/docs/alerting/configuration/#receiver). Initially, the file contains a placeholder web hook. + +3. Add this configuration to the Kubernetes cluster as a secret, renaming it to `alertmanager.yaml` and labeling it to make it easier to find: + + ```shell + $ kubectl create secret generic alertmanager-cockroachdb \ + --from-file=alertmanager.yaml=alertmanager-config.yaml + + secret/alertmanager-cockroachdb created + ``` + ```shell + $ kubectl label secret alertmanager-cockroachdb app=cockroachdb + + secret/alertmanager-cockroachdb labeled + ``` + + {{site.data.alerts.callout_danger}} + The name of the secret, `alertmanager-cockroachdb`, must match the name used in the `alertmanager.yaml` file. If they differ, the Alertmanager instance will start without configuration, and nothing will happen. + {{site.data.alerts.end}} + +4. Use our [alertmanager.yaml](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/prometheus/alertmanager.yaml) file to create the various objects necessary to run an Alertmanager instance, including a ClusterIP service so that Prometheus can forward alerts: + + ```shell + $ kubectl apply \ + -f https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/alertmanager.yaml + + alertmanager.monitoring.coreos.com/cockroachdb created + service/alertmanager-cockroachdb created + ``` + +5. Verify that Alertmanager is running: + 1. Port-forward from your local machine to the pod running Alertmanager: + + ```shell + $ kubectl port-forward alertmanager-cockroachdb-0 9093 + ``` + + 2. Go to [http://localhost:9093](http://localhost:9093/) in your browser. The screen should look like this: + + Alertmanager + +6. Ensure that the Alertmanagers are visible to Prometheus by opening [http://localhost:9090/status](http://localhost:9090/status). The screen should look like this: + + Alertmanager + +7. Add CockroachDB's starter [alerting rules](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/prometheus/alert-rules.yaml): + + ```shell + $ kubectl apply \ + -f https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/alert-rules.yaml + + prometheusrule.monitoring.coreos.com/prometheus-cockroachdb-rules created + ``` + +8. Ensure that the rules are visible to Prometheus by opening [http://localhost:9090/rules](http://localhost:9090/rules). The screen should look like this: + + Alertmanager + +9. Verify that the `TestAlertManager` example alert is firing by opening [http://localhost:9090/alerts](http://localhost:9090/alerts). The screen should look like this: + + Alertmanager + +10. To remove the example alert: + 1. Use the `kubectl edit` command to open the rules for editing: + + ```shell + $ kubectl edit prometheusrules prometheus-cockroachdb-rules + ``` + + 2. Remove the `dummy.rules` block and save the file: + + ```yaml + - name: rules/dummy.rules + rules: + - alert: TestAlertManager + expr: vector(1) + ``` + +## Monitor the Operator + +The CockroachDB Operator automatically exposes [Prometheus-style metrics](https://prometheus.io/docs/concepts/metric_types/) that you can monitor to observe its operations. + +Metrics can be collected from the Operator via HTTP requests (port 8080 by default) against the `/metrics` endpoint. The response will describe the current node metrics, for example: + +```json +... +# HELP node_decommissioning Whether a CRDB node is decommissioning. +# TYPE node_decommissioning gauge +node_decommissioning{node="cockroachdb-nvq2l"} 0 +node_decommissioning{node="cockroachdb-pmp45"} 0 +node_decommissioning{node="cockroachdb-q6784"} 0 +node_decommissioning{node="cockroachdb-r4wz8"} 0 +... +``` + +## Configure logging + +You can use the Operator to configure the CockroachDB logging system. This allows you to output logs to [configurable log sinks](configure-logs.html#configure-log-sinks) such as file or network logging destinations. + +The logging configuration is defined in a [ConfigMap](https://kubernetes.io/docs/concepts/configuration/configmap/) object, using a key named `logs.yaml`. For example: + +```yaml +apiVersion: v1 +data: + logs.yaml: | + sinks: + file-groups: + dev: + channels: DEV + filter: WARNING + fluent-servers: + ops: + channels: [OPS, HEALTH, SQL_SCHEMA] + address: 127.0.0.1:5170 + net: tcp + redact: true + security: + channels: [SESSIONS, USER_ADMIN, PRIVILEGES, SENSITIVE_ACCESS] + address: 127.0.0.1:5170 + net: tcp + auditable: true +kind: ConfigMap +metadata: + name: logconfig + namespace: cockroach-ns +``` + +The above configuration overrides the [default logging configuration](configure-logs.html#default-logging-configuration) and reflects our recommended Kubernetes logging configuration: + +* Save debug-level logs (the `DEV` [log channel](logging-overview.html#logging-channels)) to disk for troubleshooting. +* Send operational- and security-level logs to a [network collector](logging-use-cases.html#network-logging), in this case [Fluentd](configure-logs.html#fluentd-logging-format). + +The ConfigMap `name` must match the `cockroachdb.crdbCluster.loggingConfigMapName` object in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): + +```yaml +cockroachdb: + crdbCluster: + loggingConfigMapName: logconfig +``` + +By default, the Operator also modifies the [default logging configuration](configure-logs.html#default-logging-configuration) with the following: + +```yaml +sinks: + stderr: + channels: {INFO: "HEALTH, OPS", WARNING: "STORAGE, DEV"} + redact: true +``` + +This outputs logging events in the [OPS](logging.html#ops) channel to a `cockroach-stderr.log` file. + +### Example: Configuring a troubleshooting log file on pods + +In this example, CockroachDB has already been deployed on a Kubernetes cluster. We override the [default logging configuration](configure-logs.html#default-logging-configuration) to output [DEV](logging.html#dev) logs to a `cockroach-dev.log` file. + +1. Create a ConfigMap named `logconfig`. Note that `namespace` is set to the `cockroach-ns` namespace: + + ```yaml + apiVersion: v1 + data: + logs.yaml: | + sinks: + file-groups: + dev: + channels: DEV + filter: WARNING + kind: ConfigMap + metadata: + name: logconfig + namespace: cockroach-ns + ``` + + For simplicity, also name the YAML file `logconfig.yaml`. + + {{site.data.alerts.callout_info}} + The ConfigMap key is not related to the ConfigMap `name` or YAML filename, and must be named `logging.yaml`. + {{site.data.alerts.end}} + + This configuration outputs `DEV` logs that have severity [WARNING](logging.html#logging-levels-severities) to a `cockroach-dev.log` file on each pod. + +2. Apply the ConfigMap to the cluster: + + ```shell + $ kubectl apply -f logconfig.yaml + + configmap/logconfig created + ``` + +3. Add the `name` of the ConfigMap in `loggingConfigMapName` to the values file: + + ```yaml + cockroachdb: + crdbCluster: + loggingConfigMapName: logconfig + ``` + +4. Apply the new settings to the cluster: + + ```shell + $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ``` + + The changes will be rolled out to each pod. + +5. See the log files available on a pod: + + ```shell + $ kubectl exec cockroachdb-2 -- ls cockroach-data/logs + + cockroach-dev.cockroachdb-2.unknownuser.2022-05-02T19_03_03Z.000001.log + cockroach-dev.log + cockroach-health.cockroachdb-2.unknownuser.2022-05-02T18_53_01Z.000001.log + cockroach-health.log + cockroach-pebble.cockroachdb-2.unknownuser.2022-05-02T18_52_48Z.000001.log + cockroach-pebble.log + cockroach-stderr.cockroachdb-2.unknownuser.2022-05-02T18_52_48Z.000001.log + cockroach-stderr.cockroachdb-2.unknownuser.2022-05-02T19_03_03Z.000001.log + cockroach-stderr.cockroachdb-2.unknownuser.2022-05-02T20_04_03Z.000001.log + cockroach-stderr.log + cockroach.cockroachdb-2.unknownuser.2022-05-02T18_52_48Z.000001.log + cockroach.log + ... + ``` + +6. View a specific log file: + + ```shell + $ kubectl exec cockroachdb-2 -- cat cockroach-data/logs/cockroach-dev.log + ``` diff --git a/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md b/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md new file mode 100644 index 00000000000..0e9019b8ef2 --- /dev/null +++ b/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md @@ -0,0 +1,96 @@ +--- +title: Cluster Scaling with the Kubernetes Operator +summary: How to scale a secure CockroachDB cluster deployed with the Kubernetes Operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This page explains how to add and remove CockroachDB nodes on Kubernetes. + +## Add nodes + +Before scaling up CockroachDB, note the following [topology recommendations](recommended-production-settings.html#topology): + +* Each CockroachDB node (running in its own pod) should run on a separate Kubernetes worker node. +* Each availability zone should have the same number of CockroachDB nodes. + +If your cluster has 3 CockroachDB nodes distributed across 3 availability zones (as in our [deployment example](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster)), we recommend scaling up by a multiple of 3 to retain an even distribution of nodes. You should therefore scale up to a minimum of 6 CockroachDB nodes, with 2 nodes in each zone. + +1. Run `kubectl get nodes` to list the worker nodes in your Kubernetes cluster. There should be at least as many worker nodes as pods you plan to add. This ensures that no more than one pod will be placed on each worker node. + +2. If you need to add worker nodes, resize your cluster by specifying the desired number of worker nodes in each zone. Using Google Kubernetes Engine as an example: + + ```shell + $ gcloud container clusters resize {cluster-name} --region {region-name} --num-nodes 2 + ``` + + This example distributes 2 worker nodes across the default 3 zones, raising the total to 6 worker nodes. + +3. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster), with the target size of the CockroachDB cluster in the specified region. This value refers to the number of CockroachDB nodes, each running in one pod: + + ```yaml + cockroachdb: + crdbCluster: + regions: + - code: us-central1 + cloudProvider: gcp + domain: cluster.domain.us-central + nodes: 6 + ``` + +4. Apply the new settings to the cluster: + + ```shell + $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ``` + +5. Verify that the new pods were successfully started: + + ```shell + $ kubectl get pods + + NAME READY STATUS RESTARTS AGE + cockroach-operator-655fbf7847-zn9v8 1/1 Running 0 30m + cockroachdb-0 1/1 Running 0 24m + cockroachdb-1 1/1 Running 0 24m + cockroachdb-2 1/1 Running 0 24m + cockroachdb-3 1/1 Running 0 30s + cockroachdb-4 1/1 Running 0 30s + cockroachdb-5 1/1 Running 0 30s + ``` + + Each pod should be running in one of the 6 worker nodes. + +## Remove nodes + +If your nodes are distributed across 3 availability zones (as in our [deployment example](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster)), we recommend scaling down by a multiple of 3 to retain an even distribution. If your cluster has 6 CockroachDB nodes, you should therefore scale down to 3, with 1 node in each zone. + +{{site.data.alerts.callout_danger}} +Do not scale down to fewer than 3 nodes. This is considered an anti-pattern on CockroachDB and will cause errors. Before scaling down CockroachDB, note that each availability zone should have the same number of CockroachDB nodes. +{{site.data.alerts.end}} + +1. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster), with the target size of the CockroachDB cluster. For instance, to scale a cluster in Google Cloud down to 3 nodes: + + ```yaml + cockroachdb: + crdbCluster: + regions: + - code: us-central1 + cloudProvider: gcp + domain: cluster.domain.us-central + nodes: 3 + ``` + +2. Apply the new settings to the cluster: + + ```shell + $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ``` + +3. Verify that the pods were successfully removed: + + ```shell + $ kubectl get pods + ``` diff --git a/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md b/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md new file mode 100644 index 00000000000..ecbed386940 --- /dev/null +++ b/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md @@ -0,0 +1,346 @@ +--- +title: Pod Scheduling with the Kubernetes Operator +summary: Schedule CockroachDB pods on Kubernetes using the Operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This page describes how to configure pod scheduling settings. These settings control how CockroachDB pods should be identified or scheduled onto worker nodes, which are then proxied to the Kubernetes scheduler. + +## Node selectors + +A pod with a *node selector* will be scheduled onto a worker node that has matching [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/), or key-value pairs. + +Specify the labels in `cockroachdb.crdbCluster.nodeSelector` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). If you specify multiple `nodeSelector` labels, the node must match all of them. + +The following configuration causes CockroachDB pods to be scheduled onto worker nodes that have *both* the labels `worker-pool-name=crdb-workers` and `kubernetes.io/arch=amd64`: + +```yaml +cockroachdb: + crdbCluster: + nodeSelector: + worker-pool-name: crdb-workers + kubernetes.io/arch: amd64 +``` + +For an example of labeling nodes, see [Scheduling CockroachDB onto labeled nodes](#example-scheduling-cockroachdb-onto-labeled-nodes). + +## Affinities and anti-affinities + +A pod with a *node affinity* seeks out worker nodes that have matching [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/). A pod with a *pod affinity* seeks out pods that have matching labels. A pod with a *pod anti-affinity* avoids pods that have matching labels. + +Affinities and anti-affinities can be used together with `operator` fields to: + +* Require CockroachDB pods to be scheduled onto a labeled worker node. +* Require CockroachDB pods to be co-located with labeled pods (e.g., on a node or region). +* Prevent CockroachDB pods from being scheduled onto a labeled worker node. +* Prevent CockroachDB pods from being co-located with labeled pods (e.g., on a node or region). + +For an example, see [Scheduling CockroachDB onto labeled nodes](#example-scheduling-cockroachdb-onto-labeled-nodes). + +### Add a node affinity + +Specify node affinities in `cockroachdb.crdbCluster.affinity.nodeAffinity` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. + +The following configuration requires that CockroachDB pods are scheduled onto worker nodes running a Linux operating system, with a preference against worker nodes in the `us-east4-b` availability zone. + +```yaml +cockroachdb: + crdbCluster: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/os + operator: In + values: + - linux + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: topology.kubernetes.io/zone + operator: NotIn + values: + - us-east4-b +``` + +The `requiredDuringSchedulingIgnoredDuringExecution` node affinity rule, using the `In` operator, requires CockroachDB pods to be scheduled onto nodes with the matching label `kubernetes.io/os=linux`. It will not evict pods that are already running on nodes that do not match the affinity requirements. + +The `preferredDuringSchedulingIgnoredDuringExecution` node affinity rule, using the `NotIn` operator and specified `weight`, discourages (but does not disallow) CockroachDB pods from being scheduled onto nodes with the label `topology.kubernetes.io/zone=us-east4-b`. This achieves a similar effect as a `PreferNoSchedule` [taint](#taints-and-tolerations). + +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the Operator. + +### Add a pod affinity or anti-affinity + +Specify pod affinities and anti-affinities in `cockroachdb.crdbCluster.affinity.podAffinity` and `cockroachdb.crdbCluster.affinity.podAntiAffinity` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. + +The following configuration attempts to schedule CockroachDB pods in the same zones as the pods that run our example [load generator](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/example-app.yaml) app. It disallows CockroachDB pods from being co-located on the same worker node. + +```yaml +cockroachdb: + crdbCluster: + affinity: + podAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - loadgen + topologyKey: topology.kubernetes.io/zone + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - cockroachdb + topologyKey: kubernetes.io/hostname +``` + +The `preferredDuringSchedulingIgnoredDuringExecution` pod affinity rule, using the `In` operator and specified `weight`, encourages (but does not require) CockroachDB pods to be co-located with pods labeled `app=loadgen` already running in the same zone, as specified with `topologyKey`. + +The `requiredDuringSchedulingIgnoredDuringExecution` pod anti-affinity rule, using the `In` operator, requires CockroachDB pods not to be co-located on a worker node, as specified with `topologyKey`. + +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the Operator. + +### Example: Scheduling CockroachDB onto labeled nodes + +In this example, CockroachDB has not yet been deployed to a running Kubernetes cluster. We use a combination of node affinity and pod anti-affinity rules to schedule 3 CockroachDB pods onto three labeled worker nodes. + +1. List the worker nodes on the running Kubernetes cluster: + + ```shell + $ kubectl get nodes + + + NAME STATUS ROLES AGE VERSION + gke-cockroachdb-default-pool-263138a5-kp3v Ready 3m56s v1.20.10-gke.301 + gke-cockroachdb-default-pool-263138a5-nn62 Ready 3m56s v1.20.10-gke.301 + gke-cockroachdb-default-pool-41796213-75c9 Ready 3m56s v1.20.10-gke.301 + gke-cockroachdb-default-pool-41796213-bw3z Ready 3m54s v1.20.10-gke.301 + gke-cockroachdb-default-pool-ccd74623-dghs Ready 3m54s v1.20.10-gke.301 + gke-cockroachdb-default-pool-ccd74623-p5mf Ready 3m55s v1.20.10-gke.301 + ``` + +2. Add a `node=crdb` label to three of the running worker nodes. + + ```shell + $ kubectl label nodes gke-cockroachdb-default-pool-263138a5-kp3v gke-cockroachdb-default-pool-41796213-75c9 gke-cockroachdb-default-pool-ccd74623-dghs node=crdb + + node/gke-cockroachdb-default-pool-5726e554-77r7 labeled + node/gke-cockroachdb-default-pool-ee4d4d67-0922 labeled + node/gke-cockroachdb-default-pool-ee4d4d67-w18b labeled + ``` + + In this example, 6 GKE nodes are deployed in 3 [node pools](https://cloud.google.com/kubernetes-engine/docs/concepts/node-pools), and each node pool resides in a separate availability zone. To maintain an even distribution of CockroachDB pods as specified in our [topology recommendations](recommended-production-settings.html#topology), each of the 3 labeled worker nodes must belong to a different node pool. + + {{site.data.alerts.callout_info}} + This also ensures that the CockroachDB pods, which will be bound to persistent volumes in the same three availability zones, can be scheduled onto worker nodes in their respective zones. + {{site.data.alerts.end}} + +3. Add the following rules to the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): + + ```yaml + cockroachdb: + crdbCluster: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node + operator: In + values: + - crdb + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/instance + operator: In + values: + - cockroachdb + topologyKey: kubernetes.io/hostname + ``` + + The `nodeAffinity` rule requires CockroachDB pods to be scheduled onto worker nodes with the label `node=crdb`. The `podAntiAffinity` rule requires CockroachDB pods not to be co-located on a worker node, as specified with `topologyKey`. + +4. Apply the settings to the cluster: + + ```shell + $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ``` + +5. The CockroachDB pods will be deployed to the 3 labeled nodes. To observe this, run: + + ```shell + $ kubectl get pods -o wide + + NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES + cockroach-operator-bfdbfc9c7-tbpsw 1/1 Running 0 171m 10.32.2.4 gke-cockroachdb-default-pool-263138a5-kp3v + cockroachdb-0 1/1 Running 0 100s 10.32.4.10 gke-cockroachdb-default-pool-ccd74623-dghs + cockroachdb-1 1/1 Running 0 100s 10.32.2.6 gke-cockroachdb-default-pool-263138a5-kp3v + cockroachdb-2 1/1 Running 0 100s 10.32.0.5 gke-cockroachdb-default-pool-41796213-75c9 + ``` + +## Taints and tolerations + +When a *taint* is added to a Kubernetes worker node, pods are prevented from being scheduled onto that node. This effect is ignored by adding a *toleration* to a pod that specifies a matching taint. + +Taints and tolerations are useful if you want to: + +* Prevent CockroachDB pods from being scheduled onto a labeled worker node. +* Evict CockroachDB pods from a labeled worker node on which they are currently running. + +For an example, see [Evicting CockroachDB from a running worker node](#example-evicting-cockroachdb-from-a-running-worker-node). + +### Add a toleration + +Specify pod tolerations in the `cockroachdb.crdbCluster.tolerations` object in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). + +The following toleration matches a taint with the specified key, value, and `NoSchedule` effect, using the `Equal` operator. A toleration that uses the `Equal` operator must include a `value` field: + +```yaml +cockroachdb: + crdbCluster: + tolerations: + - key: "test" + operator: "Equal" + value: "example" + effect: "NoSchedule" +``` + +A `NoSchedule` taint on a node prevents pods from being scheduled onto the node. The matching toleration allows a pod to be scheduled onto the node. A `NoSchedule` toleration is therefore best included before [deploying the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). + +{{site.data.alerts.callout_info}} +A `PreferNoSchedule` taint discourages, but does not disallow, pods from being scheduled onto the node. +{{site.data.alerts.end}} + +The following toleration matches every taint with the specified key and `NoExecute` effect, using the `Exists` operator. A toleration that uses the `Exists` operator must exclude a `value` field: + +```yaml +cockroachdb: + crdbCluster: + tolerations: + - key: "test" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 3600 +``` + +A `NoExecute` taint on a node prevents pods from being scheduled onto the node, and evicts pods from the node if they are already running on the node. The matching toleration allows a pod to be scheduled onto the node, and to continue running on the node if `tolerationSeconds` is not specified. If `tolerationSeconds` is specified, the pod is evicted after this number of seconds. + +For more information on using taints and tolerations, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the Operator. + +### Example: Evicting CockroachDB from a running worker node + +In this example, CockroachDB has already been deployed on a Kubernetes cluster. We use the `NoExecute` effect to evict one of the CockroachDB pods from its worker node. + +1. List the worker nodes on the running Kubernetes cluster: + + ```shell + $ kubectl get nodes + + NAME STATUS ROLES AGE VERSION + gke-cockroachdb-default-pool-4e5ce539-68p5 Ready 56m v1.20.9-gke.1001 + gke-cockroachdb-default-pool-4e5ce539-j1h1 Ready 56m v1.20.9-gke.1001 + gke-cockroachdb-default-pool-95fde00d-173d Ready 56m v1.20.9-gke.1001 + gke-cockroachdb-default-pool-95fde00d-hw04 Ready 56m v1.20.9-gke.1001 + gke-cockroachdb-default-pool-eb2b2889-q15v Ready 56m v1.20.9-gke.1001 + gke-cockroachdb-default-pool-eb2b2889-q704 Ready 56m v1.20.9-gke.1001 + ``` + +2. Add a taint to a running worker node: + + ```shell + $ kubectl taint nodes gke-cockroachdb-default-pool-4e5ce539-j1h1 test=example:NoExecute + + node/gke-cockroachdb-default-pool-4e5ce539-j1h1 tainted + ``` + +3. Add a matching tolerations object in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). + + ```yaml + cockroachdb: + crdbCluster: + tolerations: + - key: "test" + operator: "Exists" + effect: "NoExecute" + ``` + + Because no tolerationSeconds is specified, CockroachDB will be evicted immediately from the tainted worker node. + +4. Apply the new settings to the cluster: + + ```shell + $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ``` + +5. The CockroachDB pod running on the tainted node (in this case, cockroachdb-2) will be evicted and started on a different worker node. To observe this: + + ```shell + $ kubectl get pods -o wide + + NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES + cockroach-operator-c9fc6cb5c-bl6rs 1/1 Running 0 44m 10.32.2.4 gke-cockroachdb-default-pool-4e5ce539-68p5 + cockroachdb-0 1/1 Running 0 9m21s 10.32.4.10 gke-cockroachdb-default-pool-95fde00d-173d + cockroachdb-1 1/1 Running 0 9m21s 10.32.2.6 gke-cockroachdb-default-pool-eb2b2889-q15v + cockroachdb-2 0/1 Running 0 6s 10.32.0.5 gke-cockroachdb-default-pool-4e5ce539-68p5 + ``` + + `cockroachdb-2` is now scheduled onto the `gke-cockroachdb-default-pool-4e5ce539-68p5` node. + +## Topology spread constraints + +A pod with a *topology spread constraint* must satisfy its conditions when being deployed to a given topology. This is used to control the degree to which pods are unevenly distributed across failure domains. + +### Add a topology spread constraint + +Specify pod topology spread constraints in the `cockroachdb.crdbCluster.topologySpreadConstraints` object of the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). If you specify multiple `topologySpreadConstraints` objects, the matching pods must satisfy all of the constraints. + +The following topology spread constraint ensures that CockroachDB pods deployed with the label `environment=production` will not be unevenly distributed across zones by more than `1` pod: + +```yaml +cockroachdb: + crdbCluster: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + environment: production +``` + +The `DoNotSchedule` condition prevents labeled pods from being scheduled onto Kubernetes worker nodes when doing so would fail to meet the spread and topology constraints specified with `maxSkew` and `topologyKey`, respectively. + +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the Operator. + +## Resource labels and annotations + +To assist in working with your cluster, you can add labels and annotations to your resources. + +Specify labels in `cockroachdb.crdbCluster.podLabels` and annotations in `cockroachdb.crdbCluster.podAnnotations` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): + +```yaml +cockroachdb: + crdbCluster: + podLabels: + app.kubernetes.io/version: v25.1.4 + podAnnotations + operator: https://raw.githubusercontent.com/cockroachdb/helm-charts/refs/heads/master/cockroachdb-parent/charts/cockroachdb/values.yaml +``` + +To verify that the labels and annotations were applied to a pod, for example, run `kubectl describe pod {pod-name}`. + +For more information about [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/) and [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/), see the Kubernetes documentation. diff --git a/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md new file mode 100644 index 00000000000..646c8d632b7 --- /dev/null +++ b/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md @@ -0,0 +1,179 @@ +--- +title: Certificate Management with the Kubernetes Operator +summary: How to authenticate a secure CockroachDB cluster deployed with the Kubernetes operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This page describes steps for additional procedures related to certificate management. + +## Rotate security certificates + +You may need to rotate the node, client, or CA certificates in the following scenarios: + +* The node, client, or CA certificates are expiring soon. +* Your organization's compliance policy requires periodic certificate rotation. +* The key (for a node, client, or CA) is compromised. +* You need to modify the contents of a certificate, for example, to add another DNS name or the IP address of a load balancer through which a node can be reached. In this case, you would need to rotate only the node certificates. + +### Example: Rotate certificates signed with `cockroach cert` + +If you previously [authenticated with cockroach cert](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster), follow these steps to rotate the certificates using the same CA: + +1. Create a new client certificate and key pair for the root user, overwriting the previous certificate and key: + + ```shell + $ cockroach cert create-client root \ + --certs-dir=certs \ + --ca-key=my-safe-directory/ca.key \ + --overwrite + ``` + +2. Upload the new client certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the Operator: + + ```shell + $ kubectl create secret generic cockroachdb.client.root.2 \ + --from-file=tls.key=certs/client.root.key \ + --from-file=tls.crt=certs/client.root.crt \ + --from-file=ca.crt=certs/ca.crt + + secret/cockroachdb.client.root.2 created + ``` + +3. Create a new certificate and key pair for your CockroachDB nodes, overwriting the previous certificate and key. Specify the namespace you used when [deploying the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). This example uses the `cockroach-ns` namespace: + + ```shell + $ cockroach cert create-node localhost \ + 127.0.0.1 \ + cockroachdb-public \ + cockroachdb-public.cockroach-ns \ + cockroachdb-public.cockroach-ns.svc.cluster.local \ + *.cockroachdb \ + *.cockroachdb.cockroach-ns \ + *.cockroachdb.cockroach-ns.svc.cluster.local \ + --certs-dir=certs \ + --ca-key=my-safe-directory/ca.key \ + --overwrite + ``` + +4. Upload the new node certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the Operator: + + ```shell + $ kubectl create secret generic cockroachdb.node.2 \ + --from-file=tls.key=certs/node.key \ + --from-file=tls.crt=certs/node.crt \ + --from-file=ca.crt=certs/ca.crt + + secret/cockroachdb.node.2 created + ``` + +5. Add `cockroachdb.tls.externalCertificates.certificates.nodeClientSecretName` and `cockroachdb.tls.externalCertificates.certificates.nodeSecretName` to the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): + + ```yaml + cockroachdb: + tls: + externalCertificates: + enabled: true + certificates: + nodeClientSecretName: "cockroachdb.client.root.2" + nodeSecretName: "cockroachdb.node.2" + ``` + +6. Check that the secrets were created on the cluster: + + ```shell + $ kubectl get secrets + + NAME TYPE DATA AGE + cockroachdb.client.root.2 Opaque 3 4s + cockroachdb.node.2 Opaque 3 1s + default-token-6js7b kubernetes.io/service-account-token 3 9h + ``` + + {{site.data.alerts.callout_info}} + Remember that `nodeSecretName` and `nodeClientSecretName` in the Operator must specify these secret names. For details, see the [deployment guide](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). + {{site.data.alerts.end}} + +7. Apply the new settings to the cluster: + + ```shell + $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ``` + + The pods will terminate and restart one at a time, using the new certificates. You can observe this process: + + ```shell + $ kubectl get pods + + NAME READY STATUS RESTARTS AGE + cockroach-operator-655fbf7847-lvz6x 1/1 Running 0 4h29m + cockroachdb-0 1/1 Running 0 4h16m + cockroachdb-1 1/1 Terminating 0 4h16m + cockroachdb-2 1/1 Running 0 43s + ``` + +8. Delete the existing client secret that is no longer in use: + + ```shell + $ kubectl delete secret cockroachdb.client.root + + secret "cockroachdb.client.root" deleted + ``` + +9. Delete the existing node secret that is no longer in use: + + ```shell + $ kubectl delete secret cockroachdb.node + + secret "cockroachdb.node" deleted + ``` + +## Secure the webhooks + +The Operator ships with both [mutating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#mutatingadmissionwebhook) and [validating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#validatingadmissionwebhook) webhooks. Communication between the Kubernetes API server and the webhook service must be secured with TLS. + +By default, the Operator searches for the TLS secret `cockroach-operator-certs`, which contains a CA certificate. If the secret is not found, the Operator auto-generates `cockroach-operator-certs` with a CA certificate for future runs. + +The Operator then generates a one-time server certificate for the webhook server that is signed with `cockroach-operator-certs`. Finally, the CA bundle for both mutating and validating webhook configurations is patched with the CA certificate. + +You can also use your own certificate authority rather than `cockroach-operator-certs`. Both the certificate and key files you generate must be PEM-encoded. See the following [example](#example-using-openssl-to-secure-the-webhooks). + +### Example: Using OpenSSL to secure the webhooks + +These steps demonstrate how to use the [openssl genrsa](https://www.openssl.org/docs/manmaster/man1/genrsa.html) and [openssl req](https://www.openssl.org/docs/manmaster/man1/req.html) subcommands to secure the webhooks on a running Kubernetes cluster: + +1. Generate a 4096-bit RSA private key: + + ```shell + $ openssl genrsa -out tls.key 4096 + ``` + +2. Generate an X.509 certificate, valid for 10 years. You will be prompted for the certificate field values. + + ```shell + $ openssl req -x509 -new -nodes -key tls.key -sha256 -days 3650 -out tls.crt + ``` + +3. Create the secret, making sure that you are in the correct namespace: + + ```shell + $ kubectl create secret tls cockroach-operator-certs --cert=tls.crt --key=tls.key + + secret/cockroach-operator-certs created + ``` + +4. Remove the certificate and key from your local environment: + + ```shell + $ rm tls.crt tls.key + ``` + +5. Roll the Operator deployment to ensure a new server certificate is generated: + + ```shell + $ kubectl rollout restart deploy/cockroach-operator-manager + + deployment.apps/cockroach-operator-manager restarted + ``` diff --git a/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md b/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md new file mode 100644 index 00000000000..24f2f7b6dc6 --- /dev/null +++ b/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md @@ -0,0 +1,139 @@ +--- +title: Upgrade a cluster in Kubernetes with the Operator +summary: How to upgrade a secure CockroachDB cluster deployed with the Kubernetes operator. +toc: true +toc_not_nested: true +secure: true +docs_area: deploy +--- + +This page describes how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster](deploy-cockroachdb-with-kubernetes-operator.html). + +## Overview + +{% include common/upgrade/overview.md %} + +On Kubernetes, the upgrade is a staged update in which each pod's container image for CockroachDB is updated in a rolling fashion. The cluster remains available during the upgrade. + +## Before you begin + +{% include {{ page.version.version }}/orchestration/operator-check-namespace.md %} +{% include common/upgrade/prepare-to-upgrade-self-hosted.md %} + +### Ensure you have a valid license key + +{% include common/upgrade-cockroach-version-license-limitations.md %} + +## Perform a patch upgrade + +To upgrade from one patch release to another within the same major version, perform the following steps on one node at a time: + +1. Change the container image in the custom resource: + + ```yaml + cockroachdb: + crdbCluster: + image: + name: cockroachdb/cockroach:v25.2.2 + ``` + +2. Apply the new settings to the cluster: + + ```shell + $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ``` + + The Operator will perform the staged update. + +3. To check the status of the rolling upgrade, run `kubectl get pods`. + +4. Verify that all pods have been upgraded: + + ```shell + $ kubectl get pods \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.containers[0].image}{"\n"}' + ``` + +You can also check the CockroachDB version of each node in the [DB Console](ui-cluster-overview-page.html#node-details). + +### Roll back a patch upgrade + +{% include_cached common/upgrade/patch-rollback-kubernetes.md %} + +## Perform a major-version upgrade + +To perform a major upgrade: + +1. Change the container image in the values file: + + ```yaml + cockroachdb: + crdbCluster: + image: + name: cockroachdb/cockroach:v25.1.4 + ``` + +2. Apply the new settings to the cluster: + + ```shell + $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ``` + + The Operator will perform the staged update. + +3. To check the status of the rolling upgrade, run `kubectl get pods`. + +4. Verify that all pods have been upgraded: + + ```shell + $ kubectl get pods \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.containers[0].image}{"\n"}' + ``` + +5. If auto-finalization is enabled (the default), finalization begins as soon as the last node rejoins the cluster. When finalization finishes, the upgrade is complete. + +6. If auto-finalization is disabled, follow your organization's testing procedures to decide whether to [finalize the upgrade](#finalize-a-major-version-upgrade-manually) or [roll back](#roll-back-a-major-version-upgrade) the upgrade. After finalization begins, you can no longer roll back to the cluster's previous major version. + +### Finalize a major-version upgrade manually + +{% include common/upgrade/finalize-kubernetes.md %} + +### Roll back a major-version upgrade + +To roll back to the previous major version before an upgrade is finalized: + +1. Change the container image in the custom resource to use the previous major version: + + ```yaml + cockroachdb: + crdbCluster: + image: + name: cockroachdb/cockroach:v24.3 + ``` + +2. Apply the new settings to the cluster: + + ```shell + $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ``` + + The Operator will perform the staged rollback. + +3. To check the status of the rollback, run `kubectl get pods`. + +4. Verify that all pods have been rolled back: + + ```shell + $ kubectl get pods \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.containers[0].image}{"\n"}' + ``` + +Rollbacks do not require finalization. + +## Disable auto-finalization + +{% include common/upgrade/disable-auto-finalization.md %} + +## Troubleshooting + +{% include common/upgrade/troubleshooting-self-hosted.md %} From 61c403f2d125febeebcfbfca93a7de8f960ec4f7 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Fri, 1 Aug 2025 14:10:25 -0400 Subject: [PATCH 02/27] Apply suggestions from code review Applying batch suggestions from Ryan's review in github Co-authored-by: Ryan Kuo <8740013+taroface@users.noreply.github.com> --- ...nfigure-cockroachdb-kubernetes-operator.md | 12 +-- ...oy-cockroachdb-with-kubernetes-operator.md | 92 +++++++++---------- .../v25.2/kubernetes-operator-performance.md | 88 +++++++++--------- .../migrate-cockroachdb-kubernetes-helm.md | 24 ++--- 4 files changed, 110 insertions(+), 106 deletions(-) diff --git a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md index c3c78d04a91..0fe1a592e1e 100644 --- a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md @@ -9,7 +9,7 @@ docs_area: deploy This page explains how to configure Kubernetes cluster resources such as memory, CPU, and storage. -On a production cluster, the resources you allocate to CockroachDB should be proportionate to your machine types and workload. We recommend that you determine and set these values before deploying the cluster, but you can also update the values on a running cluster. +On a production cluster, the resources you allocate to CockroachDB should be proportionate to your machine types and workload. Cockroach Labs recommends that you determine and set these values before deploying the cluster, but you can also update the values on a running cluster. {{site.data.alerts.callout_info}} Run `kubectl describe nodes` to see the available resources on the instances that you have provisioned. @@ -20,12 +20,12 @@ Run `kubectl describe nodes` to see the available resources on the instances tha You can set the CPU and memory resources allocated to the CockroachDB container on each pod. {{site.data.alerts.callout_info}} -1 CPU in Kubernetes is equivalent to 1 vCPU or 1 hyperthread. For best practices on provisioning CPU and memory for CockroachDB, see the [Production Checklist](recommended-production-settings.html#hardware). +1 CPU in Kubernetes is equivalent to 1 vCPU or 1 hyperthread. For best practices on provisioning CPU and memory for CockroachDB, refer to the [Production Checklist](recommended-production-settings.html#hardware). {{site.data.alerts.end}} Specify CPU and memory values in `cockroachdb.crdbCluster.resources.limits` and `cockroachdb.crdbCluster.resources.requests` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): -```yaml +~~~yaml cockroachdb: crdbCluster: resources: @@ -35,7 +35,7 @@ cockroachdb: requests: cpu: 4000m memory: 16Gi -``` +~~~ Apply the new settings to the cluster: @@ -55,7 +55,7 @@ For more information on how Kubernetes handles resources, see the [Kubernetes do Each CockroachDB node reserves a portion of its available memory for its cache and for storing temporary data for SQL queries. For more information on these settings, see the [Production Checklist](recommended-production-settings.html#cache-and-sql-memory-size). -The Kubernetes operator dynamically sets cache size and SQL memory size each to 25% (the recommended percent) of the available memory, which depends on the memory request and limit you [specified](#memory-and-cpu) for your configuration. These values can be modified by adding the `cache` or `max-sql-memory` fields to `cockroachdb.crdbCluster.flags`, which is equivalent to appending `--cache` or `--max-sql-memory` as [cockroach start flags](cockroach-start.html#flags). +The Kubernetes operator dynamically sets cache size and SQL memory size each to 25% (the recommended percentage) of the available memory, which depends on the memory request and limit you [specified](#memory-and-cpu) for your configuration. These values can be modified by adding the `cache` or `max-sql-memory` fields to `cockroachdb.crdbCluster.flags`, which is equivalent to appending `--cache` or `--max-sql-memory` as [cockroach start flags](cockroach-start.html#flags). ## Persistent storage @@ -109,7 +109,7 @@ The Operator separates network traffic into three ports: - diff --git a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md index 0216e6e7093..473c6768fc2 100644 --- a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md +++ b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md @@ -7,7 +7,7 @@ secure: true docs_area: deploy --- -This page shows you how to start and stop a secure 3-node CockroachDB cluster in a single [Kubernetes](http://kubernetes.io/) cluster. +This page describes how to start and stop a secure 3-node CockroachDB cluster in a single [Kubernetes](http://kubernetes.io/) cluster. ## Prerequisites and best practices @@ -30,19 +30,19 @@ The Helm chart consists of two sub-charts: ### Network -Service Name Indication (SNI) is an extension to the TLS protocol which allows a client to indicate which hostname it is attempting to connect to at the start of the TCP handshake process. The server can present multiple certificates on the same IP address and TCP port number, and one server can serve multiple secure websites or API services even if they use different certificates. +Service Name Indication (SNI) is an extension to the TLS protocol that allows a client to indicate which hostname it is attempting to connect to at the start of the TCP handshake process. The server can present multiple certificates on the same IP address and TCP port number, and one server can serve multiple secure websites or API services even if they use different certificates. -Due to its order of operations, the PostgreSQL wire protocol's implementation of TLS is not compatible with SNI-based routing in the Kubernetes ingress controller. Instead, use a TCP load balancer for CockroachDB that is not shared with other services. +Due to its order of operations, the PostgreSQL wire protocol's implementation of TLS is incompatible with SNI-based routing in the Kubernetes ingress controller. Instead, use a TCP load balancer for CockroachDB that is not shared with other services. -If you want to secure your cluster to use TLS certificates for all network communications, Helm must be installed with RBAC privileges or else you will get an "attempt to grant extra privileges" error. +If you want to secure your cluster to use TLS certificates for all network communications, Helm must be installed with RBAC privileges. Otherwise, you will get an `attempt to grant extra privileges` error. ### Localities -CockroachDB clusters use locality labels to determine an efficient distribution of replicas. This is especially important in the case of multi-region deployments. In cloud provider deployments such as EKS/AKS/GKE, the `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` labels are applied implicitly to Kubernetes nodes and populated by the regions and zones specific to the cloud provider. Other locality labels can be arbitrarily defined for further granularity, such as province, datacenter, rack, etc., but these need to be applied individually to the Kubernetes node when initialized so that CockroachDB can understand where the node lives and distribute replicas accordingly. +CockroachDB clusters use locality labels to efficiently distribute replicas. This is especially important in multi-region deployments. In cloud provider deployments (e.g., [GKE](#hosted-gke), [EKS](#hosted-eks), or [AKS](#hosted-aks)), the [`topology.kubernetes.io/region`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesioregion) and [`topology.kubernetes.io/zone`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) labels are applied implicitly to Kubernetes nodes and populated by the regions and zones specific to the cloud provider. For further granularity, you can define arbitrary locality labels (e.g., `province`, `datacenter`, `rack`), but these need to be applied individually to the Kubernetes node when initialized so that CockroachDB can understand where the node lives and distribute replicas accordingly. -In the case of baremetal Kubernetes deployments, you must plan a hierarchy of locality labels that suit your CockroachDB node distribution, then apply these labels individually to nodes when they are initialized. Most of these values can be set arbitrarily, but region and zone locations must be set in the reserved `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` namespace, respectively. +On bare metal Kubernetes deployments, you must plan a hierarchy of locality labels that suit your CockroachDB node distribution, then apply these labels individually to nodes when they are initialized. Although you can set most of these values arbitrarily, you must set region and zone locations in the reserved `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` namespaces, respectively. -For more information on how locality labels are used by CockroachDB, see the [--locality flag documentation](cockroach-start.html#locality). +For more information on how locality labels are used by CockroachDB, refer to the [`--locality` documentation](cockroach-start.html#locality). ### Architecture @@ -50,7 +50,7 @@ The operator is only supported in environments with an ARM64 or AMD64 architectu ### Resources -When starting Kubernetes, select machines with at least 4 vCPUs and 16 GiB of memory, and provision at least 2 vCPUs and 8 Gi of memory to CockroachDB per pod. These minimum settings are used by default in this deployment guide, and are appropriate for testing purposes only. On a production deployment, you should adjust the resource settings for your workload. +When starting Kubernetes, select machines with at least 4 vCPUs and 16 GiB of memory, and provision at least 2 vCPUs and 8 GiB of memory to CockroachDB per pod. These minimum settings are used by default in this deployment guide, and are appropriate for testing purposes only. On a production deployment, you should adjust the resource settings for your workload. ### Storage @@ -60,8 +60,8 @@ Kubernetes deployments use external persistent volumes that are often replicated You can use the hosted [Google Kubernetes Engine (GKE)](#hosted-gke) service, hosted [Amazon Elastic Kubernetes Service (EKS)](#hosted-eks), or [Microsoft Azure Kubernetes Service (AKS)](#hosted-aks) to quickly start Kubernetes. -{{site.data.alerts.callout_info}} -GKE/EKS/AKS are not required to run CockroachDB on Kubernetes. Any cluster hardware with the minimum recommended Kubernetes version and at least 3 pods, each presenting sufficient resources to start a CockroachDB node, can also be used. However, note that support for other deployments may vary. +{{site.data.alerts.callout_success}} +Cloud providers such as GKE, EKS, and AKS are not required to run CockroachDB on Kubernetes. You can use any cluster hardware with the minimum recommended Kubernetes version and at least 3 pods, each presenting sufficient resources to start a CockroachDB node. However, note that support for other deployments may vary. {{site.data.alerts.end}} ### Hosted GKE @@ -72,7 +72,7 @@ GKE/EKS/AKS are not required to run CockroachDB on Kubernetes. Any cluster hardw The documentation offers the choice of using Google's Cloud Shell product or using a local shell on your machine. Choose to use a local shell if you want to be able to view the DB Console using the steps in this guide. -2. From your local workstation, start the Kubernetes cluster, specifying one of the available [regions](https://cloud.google.com/compute/docs/regions-zones#available) (e.g., `us-east1`): +1. From your local workstation, start the Kubernetes cluster, specifying one of the available [regions](https://cloud.google.com/compute/docs/regions-zones#available) (e.g., `us-east1`): Since this region can differ from your default `gcloud` region, be sure to include the `--region` flag to run `gcloud` commands against this cluster. @@ -99,7 +99,7 @@ GKE/EKS/AKS are not required to run CockroachDB on Kubernetes. Any cluster hardw Account: [your.google.cloud.email@example.org] ``` - This command returns your email address in all lowercase. However, in the next step, you must enter the address using the accurate capitalization. For example, if your address is YourName@example.com, you must use YourName@example.com and not yourname@example.com. + The preceding command returns your email address in all lowercase. However, in the next step, you must enter the address using the accurate capitalization. For example, if your address is `YourName@example.com`, you must use `YourName@example.com` and not `yourname@example.com`. 4. [Create the RBAC roles](https://cloud.google.com/kubernetes-engine/docs/how-to/role-based-access-control#prerequisites_for_using_role-based_access_control) CockroachDB needs for running on GKE, using the address from the previous step: @@ -147,9 +147,9 @@ GKE/EKS/AKS are not required to run CockroachDB on Kubernetes. Any cluster hardw 1. Complete the **Before you begin**, **Define environment variables**, and **Create a resource groups** steps described in the [AKS quickstart guide](https://learn.microsoft.com/azure/aks/learn/quick-kubernetes-deploy-cli). This includes setting up the Azure CLI and the `az` tool, which is the command-line tool to create and manage Azure cloud resources. - Set the environment variables as desired for your CRDB deployment. For these instructions, set the `MY_AKS_CLUSTER_NAME` variable to `cockroachdb`. + Set the environment variables as desired for your CockroachDB deployment. For these instructions, set the `MY_AKS_CLUSTER_NAME` variable to `cockroachdb`. - Do not follow the **Create an AKS cluster** steps or following sections of the quickstart guide, as these topics will be described specifically for CRDB in this documentation. + Do not follow the **Create an AKS cluster** steps or following sections of the quickstart guide, as these topics will be described specifically for CockroachDB in this documentation. 2. From your workstation, create the Kubernetes cluster: @@ -161,7 +161,7 @@ GKE/EKS/AKS are not required to run CockroachDB on Kubernetes. Any cluster hardw --generate-ssh-keys ``` -3. Create an application in your Azure tenant and create a secret named `azure-cluster-identity-credentials-secret` which contains `AZURE_CLIENT_ID` and `AZURE_CLIENT_SECRET` to hold the application credentials. The following example YAML can be used to define this application: +3. Create an application in your Azure tenant and create a secret named `azure-cluster-identity-credentials-secret` that contains `AZURE_CLIENT_ID` and `AZURE_CLIENT_SECRET` to hold the application credentials. You can use the following example YAML to define this application: ```shell apiVersion: v1 @@ -175,13 +175,13 @@ GKE/EKS/AKS are not required to run CockroachDB on Kubernetes. Any cluster hardw azure_client_secret: s3cr3t ``` - See the [Azure.Identity documentation](https://learn.microsoft.com/dotnet/api/azure.identity.environmentcredential?view=azure-dotnet) for more information on how to use these variables. + For more information on how to use these variables, refer to the [`Azure.Identity` documentation](https://learn.microsoft.com/dotnet/api/azure.identity.environmentcredential?view=azure-dotnet). -### Baremetal/other deployments +### Bare metal deployments -For baremetal deployments, the specific Kubernetes infrastructure deployment steps should be similar to those described in [Hosted GKE](#hosted-gke) and [Hosted EKS](#hosted-eks). +For bare metal deployments, the specific Kubernetes infrastructure deployment steps should be similar to those described in [Hosted GKE](#hosted-gke) and [Hosted EKS](#hosted-eks). -* Be prepared to apply labels to your Kubernetes nodes upon initialization, that can be used by CockroachDB as [locality labels](#localities). In other cloud provider deployments, some of these labels are applied automatically by the provider. These must be applied manually in a baremetal deployment. +- You must plan a hierarchy of [locality labels](#localities) that suit your CockroachDB node distribution, then apply these labels individually to nodes when they are initialized. Although you can set most of these values arbitrarily, you must set region and zone locations in the reserved `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` namespaces, respectively. ## Step 2. Start CockroachDB @@ -193,7 +193,7 @@ For baremetal deployments, the specific Kubernetes infrastructure deployment ste $ git clone https://github.com/cockroachdb/helm-charts.git ``` -2. Set your environment variables. This step is optional but recommended in order to use the example commands and templates described in these instructions. Note the default Kubernetes namespace of `cockroach-ns`. +2. Set your environment variables. This step is optional but recommended in order to use the example commands and templates described in the following instructions. Note the default Kubernetes namespace of `cockroach-ns`. ```shell $ export CRDBOPERATOR=crdb-operator @@ -210,9 +210,9 @@ For baremetal deployments, the specific Kubernetes infrastructure deployment ste ### Initialize the cluster -1. Open `cockroachdb-parent/charts/cockroachdb/values.yaml`, a values file that tells Helm how to configure the Kubernetes cluster, in your text editor of choice. +1. Open `cockroachdb-parent/charts/cockroachdb/values.yaml`, a values file that tells Helm how to configure the Kubernetes cluster, in your text editor. -2. Modify the `cockroachdb.crdbCluster.regions` section to describe the number of nodes to deploy and what region(s) to deploy them in. The default configuration uses `k3d`, replace with the `cloudProvider` of choice (`gcp`, `aws`, `azure`). For other deployments such as baremetal, the `cloudProvider` field is optional and can be removed altogether. The following example initializes three nodes on Google Cloud in the `us-central1` region: +2. Modify the `cockroachdb.crdbCluster.regions` section to describe the number of nodes to deploy and what region(s) to deploy them in. Replace the default `cloudProvider` with the appropriate value (`gcp`, `aws`, `azure`). For bare metal deployments, you can remove the `cloudProvider` field. The following example initializes three nodes on Google Cloud in the `us-central1` region: ```yaml cockroachdb: @@ -226,12 +226,12 @@ For baremetal deployments, the specific Kubernetes infrastructure deployment ste If you intend to deploy CockroachDB nodes across multiple different regions, follow the additional steps described in [Deploy across multiple regions](#deploy-across-multiple-regions). -3. Modify the values file with the CPU and memory requests and limits for each node to use, in the `cockroachdb.crdbCluster.resources` section. The default values are 4vCPU and 16GB of memory but this section must be uncommented similar to the following example: +3. Uncomment and modify `cockroachdb.crdbCluster.resources` in the values file with the CPU and memory requests and limits for each node to use. The default values are 4vCPU and 16GB of memory: - See [Resource management](configure-cockroachdb-kubernetes-operator.html) for more information on configuring node resource allocation. + For more information on configuring node resource allocation, refer to [Resource management](configure-cockroachdb-kubernetes-operator.html) 4. Modify the TLS configuration as desired. For a secure deployment, set `cockroachdb.tls.enabled` in the values file to `true`. You can either use the default self-signer utility to generate all certificates, provide a custom CA certificate and generate other certificates, or use your own certificates. - - **All self-signed certificates**: By default, the certificates are created by the self-signer utility which requires no configuration beyond setting a custom certificate duration if desired. This utility creates self-signed certificates for the nodes and root client which are stored in a secret. You can see these certificates by running `kubectl get secrets`: + - **All self-signed certificates**: By default, the certificates are created by the self-signer utility, which requires no configuration beyond setting a custom certificate duration if desired. This utility creates self-signed certificates for the nodes and root client which are stored in a secret. You can see these certificates by running `kubectl get secrets`: ```shell $ kubectl get secrets @@ -252,7 +252,7 @@ For baremetal deployments, the specific Kubernetes infrastructure deployment ste $ cockroach cert create-ca --certs-dir=certs --ca-key=my-safe-directory/ca.key ``` - Set `cockroachdb.tls.selfSigner.caProvided` to true and specify the secret where the certificate is stored: + Set `cockroachdb.tls.selfSigner.caProvided` to `true` and specify the secret where the certificate is stored: ```yaml cockroachdb: @@ -261,13 +261,13 @@ For baremetal deployments, the specific Kubernetes infrastructure deployment ste selfSigner: enabled: true caProvided: true - caSecret: + caSecret: {ca-secret-name} ``` {{site.data.alerts.callout_info}} If you are deploying on OpenShift you must also set `cockroachdb.tls.selfSigner.securityContext.enabled` to `false` to mitigate stricter security policies. {{site.data.alerts.end}} - - **All custom certificates**: Set up your certificates and load them into your Kubernetes cluster as Secrets using the following commands: + - **All custom certificates**: Set up your certificates and load them into your Kubernetes cluster as secrets using the following commands: ```shell $ mkdir certs @@ -311,7 +311,7 @@ For baremetal deployments, the specific Kubernetes infrastructure deployment ste secretName: cockroachdb-ca ``` - If your certificates are stored in tls secrets such as secrets generated by cert-manager, the secret will contain files named: `ca.crt`, `tls.crt`, and `tls.key`. + If your certificates are stored in TLS secrets, such as secrets generated by `cert-manager`, the secret will contain files named: `ca.crt`, `tls.crt`, and `tls.key`. For CockroachDB, rename these files as applicable to match the following naming scheme: `ca.crt`, `node.crt`, `node.key`, `client.root.crt`, and `client.root.key`. @@ -332,11 +332,11 @@ For baremetal deployments, the specific Kubernetes infrastructure deployment ste - `{node_secret_name}`: The name of the Kubernetes secret that contains the generated client certificate and key. - `{client_secret_name}`: The name of the Kubernetes secret that contains the generated node certificate and key. - See [Example: Authenticate with cockroach cert](#example-authenticate-with-cockroach-cert) for a more detailed walkthrough of a TLS configuration with manual certificates. + For a detailed tutorial of a TLS configuration with manual certificates, refer to [Example: Authenticate with cockroach cert](#example-authenticate-with-cockroach-cert). -5. Review [locality labels](#localities) as needed for your Kubernetes host. These labels are written as a list of Kubernetes node values where the locality information of each node is stored, defined in `cockroachdb.crdbCluster.localityLabels`. When CockroachDB is initialized on a node, these values are processed as though they are provided through the [cockroach start –locality](cockroach-start#locality) flag. +5. In `cockroachdb.crdbCluster.localityLabels`, provide [locality labels](#localities) that specify where the locality information of each Kubernetes node is stored. When CockroachDB is initialized on a node, it processes these values as though they are provided through the [`cockroach start --locality`](cockroach-start#locality) flag. - If no locality labels are provided in `cockroachdb.crdbCluster.localityLabels`, the default locality labels are `region` and `zone`, stored in `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` respectively. Cloud providers like EKS/AKS/GKE auto-populate these values describing the node’s region and zone, so for cloud provider deployments the locality labels can be left as-is: + It is not necessary to modify `cockroachdb.crdbCluster.localityLabels` in cloud provider deployments, in which the [`topology.kubernetes.io/region`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesioregion) and [`topology.kubernetes.io/zone`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) locality labels are applied implicitly to Kubernetes nodes and populated by the regions and zones specific to the cloud provider. ```yaml cockroachdb: @@ -344,7 +344,7 @@ For baremetal deployments, the specific Kubernetes infrastructure deployment ste localityLabels: [] ``` - For baremetal deployments, you can use the default `localityLabels` configuration to use the default values for `region` and `zone` (`topology.kubernetes.io/region` and `topology.kubernetes.io/zone`), but will need to be set manually these values manually when the node is initialized because there is no cloud provider to do so automatically. + For bare metal deployments, you can also use the default `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` values. However, since these values cannot be set implicitly from a cloud provider, you need to set them manually when initializing the node. To add more granular levels of locality to your nodes, add custom locality levels as values in the `cockroachdb.crdbCluster.localityLabels` list. Any custom `localityLabels` configuration overrides the default `region` and `zone` configuration, so if you append an additional locality level but wish to keep the `region` and `zone` labels you must declare them manually. @@ -359,7 +359,7 @@ For baremetal deployments, the specific Kubernetes infrastructure deployment ste - example.datacenter.locality ``` - In this example, if a Kubernetes node is initialized in the `us-central1` region, `us-central1-c` zone, and `dc2` datacenter, its `cockroach start –locality` command would be similar to the following command: + In this example, if a Kubernetes node is initialized in the `us-central1` region, `us-central1-c` zone, and `dc2` datacenter, its `cockroach start --locality` flag would be equivalent to the following: ```shell cockroach start --locality region=us-central1,zone=us-central1-c,example.datacenter.locality=dc2 @@ -394,10 +394,10 @@ The Helm chart supports specifying multiple region definitions in `cockroachdb.c For each region, modify the `regions` configuration as described in [Initialize the cluster](#initialize-the-cluster) and perform `helm install` against the respective Kubernetes cluster. While applying the installation in a given region, do the following: -* Verify that the domain matches `cockroachdb.clusterDomain` in the values file +* Verify that the domain matches `cockroachdb.clusterDomain` in the values file. * Ensure that `cockroachdb.crdbCluster.regions` captures the information for regions that have already been deployed, including the current region. This allows CockroachDB in the current region to connect to clusters deployed in the existing regions. -The following example shows a configuration across two regions with 3 nodes in each cluster: +The following example shows a configuration across two regions, `us-central1` and `us-east1`, with 3 nodes in each cluster: ```yaml cockroachdb: @@ -420,14 +420,14 @@ cockroachdb: To use the CockroachDB SQL client, follow these steps to launch a secure pod running the `cockroach` binary. -1. Download the secure client k8s application: +1. Download the secure client Kubernetes application: ```shell $ curl -O https://raw.githubusercontent.com/cockroachdb/helm-charts/master/examples/client-secure.yaml ``` {{site.data.alerts.callout_danger}} - Be mindful that this client tool logs into CockroachDB as root using the root certificates. + This client tool logs into CockroachDB as `root` using the root certificates. {{site.data.alerts.end}} 2. Edit the yaml file with the following values: @@ -494,7 +494,7 @@ To use the CockroachDB SQL client, follow these steps to launch a secure pod run To access the cluster's [DB Console](ui-overview.html): -1. On secure clusters, [certain pages of the DB Console](ui-overview.html#db-console-access) can only be accessed by admin users. +1. On secure clusters, [certain pages of the DB Console](ui-overview.html#db-console-access) can only be accessed by `admin` users. Get a shell into the pod and start the CockroachDB [built-in SQL client](cockroach-sql.html): @@ -524,16 +524,16 @@ To access the cluster's [DB Console](ui-overview.html): Forwarding from 127.0.0.1:8080 -> 8080 ``` - The port-forward command must be run on the same machine as the web browser in which you want to view the DB Console. If you have been running these commands from a cloud instance or other non-local shell, you will not be able to view the UI without configuring kubectl locally and running the above port-forward command on your local machine. + Run the `port-forward` command on the same machine as the web browser in which you want to view the DB Console. If you have been running these commands from a cloud instance or other non-local shell, you will not be able to view the UI without configuring `kubectl` locally and running the preceding `port-forward` command on your local machine. -5. Go to [https://localhost:8080](https://localhost:8080/) and log in with the username and password you created earlier. +5. Go to [`https://localhost:8080`](https://localhost:8080/) and log in with the username and password you created earlier. {{site.data.alerts.callout_info}} - If you are using Google Chrome, and you are getting an error about not being able to reach `localhost` because its certificate has been revoked, go to `chrome://flags/#allow-insecure-localhost`, enable "Allow invalid certificates for resources loaded from localhost", and then restart the browser. Enabling this Chrome feature degrades security for all sites running on `localhost`, not just CockroachDB's DB Console, so be sure to enable the feature only temporarily. + If you are using Google Chrome, and get an error about not being able to reach `localhost` because its certificate has been revoked, go to `chrome://flags/#allow-insecure-localhost`, enable "Allow invalid certificates for resources loaded from localhost", and then restart the browser. This degrades security for all sites running on `localhost`, not just CockroachDB's DB Console, so enable the feature only temporarily. {{site.data.alerts.end}} -6. In the UI, verify that the cluster is running as expected: - 1. View the [Node List](ui-cluster-overview-page.html#node-list) to ensure that all nodes successfully joined the cluster. +6. In the DB Console, verify that the cluster is running as expected: + 1. View the [**Node List**](ui-cluster-overview-page.html#node-list) to ensure that all nodes successfully joined the cluster. 2. Click the **Databases** tab on the left to verify that `bank` is listed. ## Next steps @@ -550,9 +550,9 @@ Read the following pages for detailed information on cluster scaling, certificat ## Appendix -### Example: Authenticate with `cockroach cert` +### Authenticate with `cockroach cert` -This example uses [cockroach cert commands](cockroach-cert.html) to generate and sign the CockroachDB node and client certificates. To learn more about the supported methods of signing certificates, refer to [Authentication](authentication.html#using-digital-certificates-with-cockroachdb). +The following example uses [cockroach cert commands](cockroach-cert.html) to generate and sign the CockroachDB node and client certificates. To learn more about the supported methods of signing certificates, refer to [Authentication](authentication.html#using-digital-certificates-with-cockroachdb). 1. Create two directories: diff --git a/src/current/v25.2/kubernetes-operator-performance.md b/src/current/v25.2/kubernetes-operator-performance.md index bc7631315b2..9bb9bf96507 100644 --- a/src/current/v25.2/kubernetes-operator-performance.md +++ b/src/current/v25.2/kubernetes-operator-performance.md @@ -5,7 +5,7 @@ toc: true docs_area: deploy --- -Kubernetes provides many useful abstractions for deploying and operating distributed systems, but some of the abstractions come with a performance overhead and an increase in underlying system complexity. This section explains potential bottlenecks to be aware of when running CockroachDB in Kubernetes and shows you how to optimize your deployment for better performance. +Kubernetes provides many useful abstractions for deploying and operating distributed systems, but some of the abstractions come with a performance overhead and an increase in underlying system complexity. This page outlines potential bottlenecks when running CockroachDB in Kubernetes and how to optimize performance. ## Before you begin @@ -13,39 +13,43 @@ Before you focus on optimizing a Kubernetes-orchestrated CockroachDB cluster: 1. Before deploying on Kubernetes, ensure that performance is optimized for your workload on identical hardware. You may find that you first need to [modify your workload](performance-best-practices-overview.html) or use [different machine specs](recommended-production-settings.html#hardware) to achieve the performance you need. -2. Go through the documentation for [deploying CockroachDB in a Kubernetes cluster ](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster)to familiarize yourself with the necessary Kubernetes terminology and deployment abstractions. +2. Read the documentation for [deploying CockroachDB on a Kubernetes cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster) to familiarize yourself with the necessary Kubernetes terminology and deployment abstractions. ## Performance factors A number of independent factors affect performance when running CockroachDB on Kubernetes. Most are easiest to change before you create your CockroachDB cluster. If you need to modify a CockroachDB cluster that is already running on Kubernetes, extra care and testing is strongly recommended. -The following sections show how to modify excerpts from our provided Kubernetes configuration YAML files. You can find the most up-to-date version of this file [on GitHub](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml). +The following sections show how to modify excerpts from the Cockroach Labs-provided Kubernetes configuration YAML files. You can find the most up-to-date version of this file [on GitHub](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml). ### Version of CockroachDB -Because CockroachDB is under very active development, there are typically substantial performance gains in each release. If you aren't running the latest release and aren't getting the performance you desire, you should try the latest and see how much it helps. +Because CockroachDB is under very active development, there are typically substantial performance gains in each release. If you are not experiencing optimal performance and aren't running the latest release, consider upgrading. ### Client workload -Your workload is the single most important factor in database performance. Read through our [SQL performance best practices](performance-best-practices-overview.html) to determine whether there are any easy changes that you can make to speed up your application. +Your workload is the single most important factor in database performance. Read through [SQL performance best practices](performance-best-practices-overview.html) and determine whether you can make workload changes to speed up your application. ### Machine size -The size of the machines you're using isn't a Kubernetes-specific concern, but it's always a good place to start if you want more performance. See our [hardware recommendations](recommended-production-settings.html#hardware) for specific suggestions, but using machines with more CPU will almost always allow for greater throughput. Be aware that because Kubernetes runs a set of processes on every machine in a cluster, you typically will get more bang for your buck by using fewer large machines than more small machines. +The size of the machines you're using is not a Kubernetes-specific concern, but is a good place to start if you want more performance. Using machines with more CPU will almost always allow for greater throughput. Because Kubernetes runs a set of processes on every machine in a cluster, it is typically more efficient to use fewer large machines than more small machines. For specific suggestions, refer to [Hardware](recommended-production-settings.html#hardware). ### Disk type -CockroachDB makes heavy use of the disks you provide it, so using faster disks is an easy way to improve your cluster's performance. Our provided configuration does not specify what type of disks it wants, so in most environments Kubernetes will auto-provision disks of the default type. In the common cloud environments (AWS, GCP, Azure) this means you'll get slow disks that aren't optimized for database workloads (e.g.,HDDs on GCE, SSDs without provisioned IOPS on AWS). However, we [strongly recommend using SSDs](recommended-production-settings.html#hardware) for the best performance, and Kubernetes makes it relatively easy to use them. +CockroachDB makes heavy use of the disks you provide it, so using faster disks is an easy way to improve your cluster's performance. For the best performance, [SSDs are strongly recommended](recommended-production-settings.html#hardware). -#### Creating a different disk type +The Cockroach Labs-provided configuration does not specify disk type, so in most environments Kubernetes will auto-provision disks of the default type. In the common cloud environments (AWS, GCP, Azure) this means you'll get slow disks that aren't optimized for database workloads (e.g., HDDs on GCE, SSDs without provisioned IOPS on AWS). -Kubernetes exposes the disk types used by its volume provisioner via its [StorageClass API object](https://kubernetes.io/docs/concepts/storage/storage-classes/). Each cloud environment has its own default `StorageClass`, but you can easily change the default or create a new named class which you can then ask for when asking for volumes. To do this, pick the type of volume provisioner you want to use from the list in the [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/storage-classes/), take the example YAML file they provide, modify it to have the disk type you want, then run `kubectl create -f <your-storage-class-file.yaml>`. For example, in order to use the `pd-ssd` disk type on Google Compute Engine or Google Kubernetes Engine, you can use a `StorageClass` file like this: +#### Create a different disk type + +Kubernetes exposes the disk types used by its volume provisioner via its [`StorageClass` API object](https://kubernetes.io/docs/concepts/storage/storage-classes/). Each cloud environment has a default `StorageClass`, but you can easily change the default or create a new named class that you can specify later. + +To do this, pick a volume provisioner from the list in the [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/storage-classes/), modify the example YAML file to specify the disk type you want, then run `kubectl create -f {your-storage-class-file}.yaml`. For example, in order to use the `pd-ssd` disk type on Google Compute Engine or Google Kubernetes Engine, you can use a `StorageClass` file like the following: ```yaml apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: - name: + name: {your-ssd-class-name} provisioner: kubernetes.io/gce-pd parameters: type: pd-ssd @@ -53,9 +57,9 @@ parameters: You can then use this new disk type either by configuring the CockroachDB YAML file to request it or by making it the default. You may also want to set additional parameters as documented in the list of Kubernetes storage classes, such as configuring the `iopsPerGB` if you're creating a `StorageClass` for AWS's `io1` Provisioned IOPS volume type. -#### Configuring the disk type used by CockroachDB +#### Configure the disk type used by CockroachDB -To use a new `StorageClass` without making it the default in your cluster, you have to modify your application's YAML file to ask for it. In the CockroachDB configuration, that means adding a line to its `cockroachdb.crdbCluster.dataStore.volumeClaimTemplates` section. For example, that would mean adding a `storageClassName` field: +To use a new `StorageClass` without making it the default in your cluster, modify your application's YAML file to ask for it. In the CockroachDB configuration, that means adding `storageClassName` to `cockroachdb.crdbCluster.dataStore.volumeClaimTemplates`: ```yaml cockroachdb: @@ -65,11 +69,11 @@ cockroachdb: storageClassName: ``` -If you make this change then run `kubectl create -f` on your YAML file, Kubernetes should create volumes for you using your new `StorageClass`. +When running `kubectl create -f` on your modified YAML file, Kubernetes should create volumes using the specified `storageClassName`. -#### Changing the default disk type +#### Change the default disk type -If you want your new `StorageClass` to be the default for all volumes in your cluster, you have to run a couple of commands to inform Kubernetes of what you want. First, get the names of your `StorageClass` objects. Then remove the current default and add yours as the new default. +To make a new `StorageClass` the default for all volumes in your cluster, run the following `kubectl` commands. ```shell $ kubectl get storageclasses @@ -91,7 +95,7 @@ storageclass "ssd" patched ### Disk size -On some cloud providers (notably including all GCP disks and the AWS io1 disk type), the number of IOPS available to a disk is directly correlated to the size of the disk. In such cases, increasing the size of your disks can make for significantly better CockroachDB performance, as well as less risk of filling them up. Doing so is easy -- before you create your CockroachDB cluster, modify the `cockroachdb.crdbCluster.dataStore.volumeClaimTemplate` in the CockroachDB YAML file to ask for more space. The following example sets this value to 1TB: +On some cloud providers, including all GCP disks and the AWS `io1` disk type, the number of IOPS available to a disk is directly correlated to the size of the disk. In such cases, increasing the size of your disks can significantly improve CockroachDB performance, and decrease the risk of filling them up. Before you create your CockroachDB cluster, modify the `cockroachdb.crdbCluster.dataStore.volumeClaimTemplate` in the CockroachDB YAML file to ask for more space. The following example sets this value to 1TB: ```yaml cockroachdb: @@ -108,17 +112,17 @@ Since [GCE disk IOPS scale linearly with disk size](https://cloud.google.com/com ### Local disks -Up to this point, we have assumed the use of auto-provisioned, remotely attached disks. However, local disks typically provide better performance than remotely attached disks. For example, SSD Instance Store Volumes outperform EBS Volumes on AWS, and Local SSDs outperform Persistent Disks on GCE. As of v1.14, Kubernetes supports [local volumes](https://kubernetes.io/docs/concepts/storage/volumes/#local). +The examples thus far assume the use of auto-provisioned, remotely attached disks. However, local disks typically provide better performance than remotely attached disks. For example, SSD Instance Store Volumes outperform EBS Volumes on AWS, and Local SSDs outperform Persistent Disks on GCE. As of v1.14, Kubernetes supports [local volumes](https://kubernetes.io/docs/concepts/storage/volumes/#local). -Note that when running with local disks, there is a greater chance of experiencing a disk failure than when using the cloud providers' network-attached disks that are often replicated underneath the covers. Consequently, you may want to use [Replication Controls](configure-replication-zones.html) to increase the replication factor of your data to 5 from its default of 3 when using local disks. +When using local disks, consider using [replication controls](configure-replication-zones.html) to increase the replication factor of your data from 3 (default) to 5. This is because local disks have a greater chance of experiencing a disk failure than a cloud provider's network-attached disks, which are often replicated underneath the covers. ### Resource requests and limits -When you ask Kubernetes to run a pod, you can tell it to reserve certain amounts of CPU and/or memory for each container in the pod or to limit the CPU and/or memory of each container. Doing one or both of these can have different implications depending on how utilized your Kubernetes cluster is. For the authoritative information on this topic, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/). +When you ask Kubernetes to run a pod, you can tell it to reserve certain amounts of CPU or memory for each container in the pod, or to limit the CPU or memory of each container. Setting resource [requests](#resource-requests) or [limits](#resource-limits) can have different implications, depending on your Kubernetes cluster's resource utilization. For the authoritative information on this topic, refer to the [Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/). #### Resource requests -Resource requests allow you to reserve a certain amount of CPU or memory for your container. If you add resource requests to your CockroachDB YAML file, Kubernetes will schedule each CockroachDB pod onto a node with sufficient unreserved resources and will ensure the pods are guaranteed the reserved resources using the applicable Linux container primitives. If you are running other workloads in your Kubernetes cluster, setting resource requests is very strongly recommended to ensure good performance, because if you do not set them then CockroachDB could be starved of CPU cycles or [OOM stopped](cluster-setup-troubleshooting.html#out-of-memory-oom-crash) before less important processes. +Resource requests reserve a certain amount of CPU or memory for your container. If you add resource requests to your CockroachDB YAML file, Kubernetes will schedule each CockroachDB pod onto a node with sufficient unreserved resources and ensure the pods are guaranteed the reserved resources using the applicable Linux container primitives. If you are running other workloads in your Kubernetes cluster, setting resource requests is strongly recommended to ensure good performance. If you do not set resource requests, CockroachDB could be starved of CPU cycles or [OOM-stopped](cluster-setup-troubleshooting.html#out-of-memory-oom-crash) before less important processes. To determine how many resources are usable on your Kubernetes nodes, you can run: @@ -148,13 +152,13 @@ Allocated resources: 360m (9%) 0 (0%) 110Mi (0%) 170Mi (1%) ``` -This will output a lot of information for each of the nodes in your cluster, but if you focus in on the right parts you'll see how many "allocatable" resources are available on each node and how many resources are already being used by other pods. The "allocatable" resources are how much CPU and memory Kubernetes is willing to provide to pods running on the machine. The difference between the node's "capacity" and its "allocatable" resources is taken up by the operating system and Kubernetes's management processes. The "m" in "3920m" stands for "milli-CPUs", meaning "thousandths of a CPU". +In the output, the `Allocatable` field shows the `cpu` and `memory` resources Kubernetes will provide to pods running on the machine. The difference between the machine's `Capacity` and its `Allocatable` resources is taken up by the operating system and Kubernetes' management processes. In the preceding output, `3920m` stands for 3920 "milli-CPUs", or "thousandths of a CPU". -You'll also see a number of pods running here that you may not have realized were in your cluster. Kubernetes runs a handful of pods in the `kube-system` namespace that are part of the cluster infrastructure. These may make it tough to attempt to reserve all the allocatable space on your nodes for CockroachDB, since some of them are essential for the Kubernetes cluster's health. If you want to run CockroachDB on every node in your cluster, you'll have to leave room for these processes. If you are only running CockroachDB on a subset of the nodes in your cluster, you can choose to take up all the "allocatable" space other than what is being used by the `kube-system` pods that are on all the nodes in the cluster, such as `kube-proxy` or the `fluentd` logging agent. +Kubernetes runs additional pods in the `kube-system` namespace that are part of the cluster infrastructure. If you want to run CockroachDB on every node in your cluster, you must leave room for these processes, which are essential for the Kubernetes cluster's health. If you are only running CockroachDB on a subset of the Kubernetes machines, you can take up all the `Allocatable` space other than what is used by the `kube-system` pods that are on all the Kubernetes machines, such as `kube-proxy` or the `fluentd` logging agent. -Note that it will be difficult to truly use up all of the allocatable space in the current versions of Kubernetes (v1.10 or older) because you'd have to manually preempt the `kube-system` pods that are already on the nodes you want CockroachDB to run on (by deleting them). This should become easier in future versions of Kubernetes when its [Pod Priority](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/) feature gets promoted from alpha to beta. Once that feature is more widely available, you could set the CockroachDB pods to a higher priority, causing the Kubernetes scheduler to preempt and reschedule the `kube-system` pods onto other machines. +On Kubernetes v1.10 or earlier, it is difficult to truly use all of the allocatable space, because you'd have to manually preempt the `kube-system` pods on each machine (by deleting them). When the Kubernetes [Pod Priority](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/) feature is promoted from alpha to beta and becomes more widely available, you could set the CockroachDB pods to a higher priority, causing the Kubernetes scheduler to preempt and reschedule the `kube-system` pods onto other machines. -Once you've picked out an amount of CPU and memory to reserve for Cockroach, you'll have to configure the resource request in your CockroachDB YAML file. They should go underneath the `containers` heading. For example, to use most of the available resources on the machines described above, you'd configure these lines of your values file: +Once you've picked out an amount of CPU and memory to reserve for Cockroach, configure the resource requests in your CockroachDB YAML file. They should go underneath the `containers` heading. For example, to use most of the available resources on the machines described above, you'd configure the following lines of your values file: ```yaml cockroachdb: @@ -165,13 +169,13 @@ cockroachdb: memory: 12300Mi ``` -When you initialize the cluster, you'll want to check to make sure that all the CockroachDB pods are scheduled successfully. If you see any get stuck in the pending state, run `kubectl describe pod <podname>` and check the `Events` for information about why they're still pending. You may need to manually preempt pods on one or more nodes by running `kubectl delete pod` on them to make room for the CockroachDB pods. As long as the pods you delete were created by a higher-level Kubernetes object such as a `Deployment`, they'll be safely recreated on another node. +When you initialize the cluster, check that all the CockroachDB pods are scheduled successfully. If you see any get stuck in the pending state, run `kubectl describe pod {podname}` and check the `Events` for information about why they're still pending. You may need to manually preempt pods on one or more nodes by running `kubectl delete pod` on them to make room for the CockroachDB pods. As long as the pods you delete were created by a higher-level Kubernetes object such as a `Deployment`, they'll be safely recreated on another node. #### Resource limits -Resource limits are conceptually similar to resource requests, but serve a different purpose. They let you cap the resources used by a pod to no more than the provided limit, which can have a couple of different uses. For one, it makes for more predictable performance because your pods will not be allowed to use any excess capacity on their machines, meaning that they will not have more resources available to them at some times (during lulls in traffic) than others (busy periods where the other pods on a machine are also fully utilizing their reserved resources). Secondly, it also increases the ["Quality of Service" guaranteed by the Kubernetes runtime](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/resource-qos.md) on Kubernetes versions 1.8 and below, making the pods less likely to be preempted when a machine is oversubscribed. Finally, memory limits in particular limit the amount of memory that the container knows is available to it, which help when you specify percentages for the CockroachDB `--cache` and `--max-sql-memory` flags, as our default configuration file does. +Resource limits cap the resources used by a pod to no more than the provided limit. This makes for more predictable performance because your pods will not be allowed to use any excess capacity on their machines. Pods will not have more resources available to them at some times (e.g., lulls in traffic) than others (e.g., busy periods where the other pods on a machine are also fully utilizing their reserved resources). Resource limits also increase the ["Quality of Service" guaranteed by the Kubernetes runtime](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/resource-qos.md) on Kubernetes v1.8 and earlier, making the pods less likely to be preempted when a machine is oversubscribed. Finally, memory limits in particular define the amount of memory the container perceives as available, which is useful when specifying percentage-based values for the CockroachDB `--cache` and `--max-sql-memory` flags, as in our default configuration file. -Setting resource limits works about the same as setting resource requests. If you wanted to set resource limits in addition to requests on the config from the [Resource Requests](#resource-requests) section above, you'd change the config to: +To set resource limits, in addition to the [resource requests](#resource-requests) described in the preceding section, change the configuration as follows: ```yaml cockroachdb: @@ -185,15 +189,15 @@ cockroachdb: memory: 12300Mi ``` -The pods would then be restricted to only use the resource they have reserved and guaranteed to not be preempted except in very exceptional circumstances. This typically will not give you better performance on an under-utilized Kubernetes cluster, but will give you more predictable performance as other workloads are run. +Pods will be limited to their reserved resources and are unlikely to be preempted, except in rare cases. This will not improve performance on an underutilized Kubernetes cluster, but provides more predictable performance as other workloads run. {{site.data.alerts.callout_danger}} -While setting memory limits is strongly recommended, [setting CPU limits can hurt tail latencies as currently implemented by Kubernetes](https://github.com/kubernetes/kubernetes/issues/51135). We recommend not setting CPU limits at all unless you have explicitly enabled the non-default [Static CPU Management Policy](https://kubernetes.io/docs/tasks/administer-cluster/cpu-management-policies/#static-policy) when setting up your Kubernetes cluster, and even then only setting integer (non-fractional) CPU limits and memory limits exactly equal to the corresponding requests. +While setting memory limits is strongly recommended, [setting CPU limits can hurt tail latencies as currently implemented by Kubernetes](https://github.com/kubernetes/kubernetes/issues/51135). Cockroach Labs recommends not setting CPU limits at all, unless you have explicitly enabled the non-default [Static CPU Management Policy](https://kubernetes.io/docs/tasks/administer-cluster/cpu-management-policies/#static-policy) when setting up your Kubernetes cluster. In this case, set CPU limits as integers and match memory limits exactly to their corresponding requests. {{site.data.alerts.end}} #### Default resource requests and limits -Note that even if you do not manually set resource requests yourself, you're likely unknowingly using them anyways. In many installations of Kubernetes, a [LimitRange](https://kubernetes.io/docs/tasks/administer-cluster/cpu-default-namespace/) is preconfigured for the `default` namespace that applies a default CPU request of `100m`, or one-tenth of a CPU. You can see this configuration by running the following command: +Even if you do not manually set resource requests, they are likely being applied. In many installations of Kubernetes, a [LimitRange](https://kubernetes.io/docs/tasks/administer-cluster/cpu-default-namespace/) is preconfigured for the `default` namespace that applies a default CPU request of `100m`, or one-tenth of a CPU. You can see this configuration by running the following command: ```shell $ kubectl describe limitranges @@ -203,7 +207,7 @@ Experimentally, this does not appear to have a noticeable effect on CockroachDB' ### Other pods on the same machines as CockroachDB -As discovered in the above section on [Resource Requests and Limits](#resource-requests-and-limits), there will always be pods other than just CockroachDB running in your Kubernetes cluster, even if you do not create any other pods of your own. You can see them at any time by running: +As described in [Resource requests and limits](#resource-requests-and-limits), your Kubernetes cluster will always run pods other than CockroachDB. You can see them by running: ```shell $ kubectl get pods --all-namespaces @@ -226,21 +230,21 @@ kube-system kubernetes-dashboard-768854d6dc-v7ng8 1/1 Running kube-system l7-default-backend-6497bcdb4d-2kbh4 1/1 Running 0 2m ``` -These ["cluster add-ons"](https://github.com/kubernetes/kubernetes/tree/master/cluster/addons) provide a variety of basic services like managing DNS entries for services within the cluster, powering the Kubernetes dashboard UI, or collecting logs or metrics from all the pods running in the cluster. If you do not like having them take up space in your cluster, you can prevent some of them from running by configuring your Kubernetes cluster appropriately. For example, on GKE, you can create a cluster with the minimal set of addons by running: +These ["cluster add-ons"](https://github.com/kubernetes/kubernetes/tree/master/cluster/addons) provide a variety of basic services like managing DNS entries for services within the cluster, powering the Kubernetes dashboard UI, or collecting logs or metrics from all the pods running in the cluster. If you do not like having them take up space in your cluster, you can prevent some of them from running by configuring your Kubernetes cluster appropriately. For example, on GKE, you can create a cluster with the minimal set of add-ons by running: ```shell $ gcloud container clusters create --no-enable-cloud-logging --no-enable-cloud-monitoring --addons="" ``` -However, essentials like `kube-proxy` and `kube-dns` are effectively required to have a compliant Kubernetes cluster. This means that you'll always have some pods that aren't yours running in your cluster, so it's important to understand and account for the possible effects of CockroachDB having to share a machine with other processes. The more processes there are on the same machine as a CockroachDB pod, the worse and less predictable its performance will likely be. To protect against this, it's strongly recommended to run with [Resource Requests](#resource-requests) on your CockroachDB pods to provide some level of CPU and memory isolation. +However, some pods like `kube-proxy` and `kube-dns` are required for compliant Kubernetes clusters. Since there will always be pods other than CockroachDB running in your cluster, it's important to understand and account for the effects of having CockroachDB share a machine with other processes. The more processes there are on the same machine as a CockroachDB pod, the slower and less predictable its performance will likely be. To protect against this, it's strongly recommended to specify [resource requests](#resource-requests) on your CockroachDB pods to provide some level of CPU and memory isolation. -Setting resource requests isn't a panacea, though. There can still be contention for shared resources like network I/O or, in [exceptional](https://sysdig.com/blog/container-isolation-gone-wrong/) cases, internal kernel data structures. For these reasons and because of the Kubernetes infrastructure processes running on each machine, CockroachDB running on Kubernetes simply cannot reach quite the same levels of performance as running directly on dedicated machines. Thankfully, it can at least get quite close if you use Kubernetes wisely. +Even with resource requests, there can still be contention for shared resources like network I/O or, in [exceptional](https://sysdig.com/blog/container-isolation-gone-wrong/) cases, internal kernel data structures. For these reasons and because of the Kubernetes infrastructure processes running on each machine, CockroachDB running on Kubernetes cannot match the performance of running CockroachDB directly on dedicated machines, although it can get quite close with careful configuration. -If for some reason setting appropriate resource requests still isn't getting you the performance you expect, you might want to consider going all the way to [dedicated nodes](#dedicated-nodes). +If setting appropriate resource requests still isn't getting you the performance you expect, consider using [dedicated nodes](#dedicated-nodes). #### Client applications on the same machines as CockroachDB -Running client applications such as benchmarking applications on the same machines as CockroachDB can be even worse than just having Kubernetes system pods on the same machines. They are very likely to end up competing for resources, because when the applications get more loaded than usual, so will the CockroachDB processes. The best way to avoid this is to [set resource requests and limits](#resource-requests-and-limits), but if you are unwilling or unable to do that for some reason, you can also set [anti-affinity scheduling policies](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) on your client applications: +Client applications such as benchmarking applications running on the same machines as CockroachDB are likely to compete for resources. As application load increases, so does the load on CockroachDB processes. The best way to avoid this is to [set resource requests and limits](#resource-requests-and-limits). Alternatively, you can also set [anti-affinity scheduling policies](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) on your client applications: ```yaml cockroachdb: @@ -268,20 +272,20 @@ cockroachdb: topologyKey: kubernetes.io/hostname ``` -This configuration will first prefer to put the `loadgen` pods on different nodes from each other, which is important for the fault tolerance of the `loadgen` pods themselves. As a secondary priority, it will attempt to put the pods on nodes that do not already have a running `CockroachDB` pod. This will ensure the best possible balance of fault tolerance and performance for the load generator and CockroachDB cluster. +The preceding configuration will first prefer to put the `loadgen` pods on different nodes from each other, which is important for the fault tolerance of the `loadgen` pods themselves. As a secondary priority, it will attempt to put the pods on nodes that do not already have a running `CockroachDB` pod. This will ensure the best possible balance of fault tolerance and performance for the load generator and CockroachDB cluster. ### Networking -[Kubernetes asks a lot of the network that it runs on](https://kubernetes.io/docs/concepts/cluster-administration/networking/) in order to provide a routable IP address and an isolated Linux network namespace to each pod in the cluster, among its other requirements. While this document isn't nearly large enough to properly explain the details, and those details themselves can depend heavily on specifically how you have set up the network for your cluster, it suffices to say that Docker and Kubernetes's networking abstractions often come with a performance penalty for high-throughput distributed applications such as CockroachDB. +[Kubernetes places significant demands on the underlying network](https://kubernetes.io/docs/concepts/cluster-administration/networking/) in order to provide each pod a routable IP address and isolated Linux network namespace, among other requirements. While the impact is heavily dependent on your Kubernetes cluster's network setup, Docker and Kubernetes' networking abstractions often introduce a performance penalty for high-throughput distributed applications such as CockroachDB. -If you really want to eke more performance out of your cluster, networking is a good target to at least experiment with. You can either replace your cluster's networking solution with a more performant one or bypass most of the networking overhead by using the host machines' networks directly. +Experimenting with networking can be a way to eke more performance out of your cluster. You can either replace your cluster's networking solution with a more performant one, or bypass most of the networking overhead by using the host machines' networks directly. #### Networking solutions -If you aren't using a hosted Kubernetes service, you'll typically have to choose how to set up the network when you're creating a Kubernetes cluster. There are [a lot of solutions out there](https://kubernetes.io/docs/concepts/cluster-administration/networking/#how-to-achieve-this), and they can have significantly different performance characteristics and functionality. We do not endorse any networking software or configurations in particular, but want to call out that your choice can have a meaningful effect on performance compared to running CockroachDB outside of Kubernetes. +If you aren't using a hosted Kubernetes service, you'll need to choose a [networking solution](https://kubernetes.io/docs/concepts/cluster-administration/networking/#how-to-achieve-this) when creating a Kubernetes cluster. While Cockroach Labs does not endorse any specific networking solutions, note that your choice can meaningfully impact CockroachDB's performance compared to running it outside of Kubernetes. ### Dedicated nodes -If your Kubernetes cluster is made up of heterogeneous hardware, it's likely that you'd like to make sure CockroachDB only runs on certain machines. If you want to get as much performance as possible out of a set of machines, you might also want to make sure that only CockroachDB is run on them. +If your Kubernetes cluster uses heterogeneous hardware, you will likely want to ensure that CockroachDB only runs on specific machines. To optimize performance, it can be beneficial to dedicate those machines exclusively to CockroachDB. -For more information, see [Pod scheduling](schedule-cockroachdb-kubernetes-operator.html). +For more information, refer to [Pod scheduling](schedule-cockroachdb-kubernetes-operator.html). diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md index e8a01efb8e2..385a303c585 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md @@ -9,7 +9,7 @@ docs_area: deploy This guide describes how to migrate an existing CockroachDB cluster managed via StatefulSet to the enterprise operator. -These instructions assume that you are migrating from a StatefulSet cluster that was configured using the Helm chart per the following command: +These instructions assume that you are migrating from a StatefulSet cluster that was configured using the Helm chart with the following command: ```shell helm upgrade --install --set operator.enabled=false crdb-test --debug ./cockroachdb @@ -17,10 +17,10 @@ helm upgrade --install --set operator.enabled=false crdb-test --debug ./cockroac If your existing cluster was created using the public operator, refer to the [public operator migration guide](migrate-cockroachdb-kubernetes-operator.html). -This migration process is designed to allow migration to occur without affecting cluster availability, and preserving existing disks so data doesn’t need to be replicated into empty volumes. Note that this process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. +This migration can be completed without affecting cluster availability, and preserves existing disks so that data doesn't need to be replicated into empty volumes. The process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. -{{site.data.alerts.callout_info}} -This migration process is currently only recommended to run in a non-production environment. We are actively working on a rollback procedure but are looking for early feedback on this process. +{{site.data.alerts.callout_danger}} +This migration process is only recommended for non-production environments at this time. {{site.data.alerts.end}} ## Step 1. Prepare the migration helper @@ -32,7 +32,7 @@ $ make bin/migration-helper $ export PATH=$PATH:$(pwd)/bin ``` -Export environment variables about the existing deployment: +Export environment variables for the existing deployment: ```shell # Set STS_NAME to the cockroachdb statefulset deployed via helm chart. @@ -62,7 +62,7 @@ The operator uses slightly different certificates than the CockroachDB Helm char $ bin/migration-helper migrate-certs --statefulset-name $STS_NAME --namespace $NAMESPACE ``` -Generate a manifest for each crdbnode and the crdbcluster based on the state of the StatefulSet. We do this because we want the new pods and their associated PVCs to have the same names as the original StatefulSet-managed pods and PVCs. This means that the new operator-managed pods will use the original PVCs rather than replicate data into empty nodes. +Generate a manifest for each crdbnode and the crdbcluster based on the state of the StatefulSet. The new pods and their associated PVCs must have the same names as the original StatefulSet-managed pods and PVCs. The new operator-managed pods will then use the original PVCs, rather than replicate data into empty nodes. ```shell $ mkdir -p manifests @@ -71,15 +71,15 @@ $ bin/migration-helper build-manifest helm --statefulset-name $STS_NAME --namesp ## Step 3. Replace statefulset pods with operator nodes -To migrate seamlessly from the CockroachDB Helm chart to the operator, we’ll scale down StatefulSet-managed pods and replace them with crdbnode objects, one by one. Then we’ll create the crdbcluster object that manages the crdbnodes. +To migrate seamlessly from the CockroachDB Helm chart to the operator, scale down StatefulSet-managed pods and replace them with crdbnode objects, one by one. Then we’ll create the crdbcluster object that manages the crdbnodes. -First, create objects in kubectl that will eventually be owned by the crdbcluster: +Create objects with `kubectl` that will eventually be owned by the crdbcluster: ```shell $ kubectl create priorityclass crdb-critical --value 500000000 ``` -Install the crdb-operator with Helm: +Install the `crdb-operator` with Helm: ```shell $ helm upgrade --install crdb-operator ./cockroachdb-parent/charts/operator @@ -93,7 +93,7 @@ For each pod in the StatefulSet, perform the following steps: $ kubectl scale statefulset/$STS_NAME --replicas=4 ``` -2. Create the crdbnode corresponding to the StatefulSet pod you just scaled down. The manifests are labeled as `crdbnode-X.yaml` where `X` is shared with each `<STS_NAME>-X` StatefulSet pod, so note whichever pod was scaled down and specify the corresponding manifest in the following command: +2. Create the `crdbnode` resource that corresponds to the StatefulSet pod you just scaled down. Each manifest is labeled with the pattern `crdbnode-X.yaml`, where `X` corresponds to a StatefulSet pod named `{STS_NAME}-X`. Note the pod that was scaled down and specify its manifest in a command like the following: ```shell $ kubectl apply -f manifests/crdbnode-4.yaml @@ -102,7 +102,7 @@ For each pod in the StatefulSet, perform the following steps: 3. Wait for the new pod to become ready. If it doesn’t, check the operator logs for errors. 4. Before moving on to the next replica migration, verify that there are no underreplicated ranges: - 1. Set up port forwarding to access the CockroachDB node’s HTTP interface. Note that CockroachDB’s UI runs on port 8080 by default: + 1. Set up port forwarding to access the CockroachDB node’s HTTP interface. Note that the DB Console runs on port 8080 by default: ```shell $ kubectl port-forward pod/cockroachdb-4 8080:8080 @@ -120,7 +120,7 @@ Repeat these steps until the StatefulSet has zero replicas. The Helm chart creates a public Service that exposes both SQL and gRPC connections over a single power. However, the operator uses a different port for gRPC communication. To ensure compatibility, update the public Service to reflect the correct gRPC port used by the operator. -Apply the updated service manifest: +Apply the updated Service manifest: ```shell $ kubectl apply -f manifests/public-service.yaml From 8f7ef66db5aaa3037cc7dc47983a2ce35c2a71bc Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Sat, 2 Aug 2025 02:47:11 -0400 Subject: [PATCH 03/27] Address remainder of Ryan's comments --- .../sidebar-data/self-hosted-deployments.json | 12 +- ...nfigure-cockroachdb-kubernetes-operator.md | 104 ++-- ...oy-cockroachdb-with-kubernetes-operator.md | 490 ++++++++++-------- .../v25.2/kubernetes-operator-overview.md | 4 +- .../v25.2/kubernetes-operator-performance.md | 99 ++-- .../migrate-cockroachdb-kubernetes-helm.md | 152 +++--- ...migrate-cockroachdb-kubernetes-operator.md | 256 +++++---- ...monitor-cockroachdb-kubernetes-operator.md | 248 +++++---- .../scale-cockroachdb-kubernetes-operator.md | 70 +-- ...chedule-cockroachdb-kubernetes-operator.md | 155 +++--- .../secure-cockroachdb-kubernetes-operator.md | 170 +++--- ...upgrade-cockroachdb-kubernetes-operator.md | 88 ++-- 12 files changed, 1000 insertions(+), 848 deletions(-) diff --git a/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json b/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json index 027f0f5d939..0333793d24c 100644 --- a/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json +++ b/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json @@ -171,7 +171,7 @@ ] }, { - "title": "Deploy with Kubernetes Operator", + "title": "Deploy in Kubernetes with CockroachDB Operator", "items": [ { "title": "Overview", @@ -180,22 +180,22 @@ ] }, { - "title": "Cluster Deployment", + "title": "CockroachDB Operator Deployment Guide", "urls": [ "/${VERSION}/deploy-cockroachdb-with-kubernetes-operator.html" ] }, { - "title": "Migrate from existing Kubernetes deployments", + "title": "Migrate from Other Kubernetes Deployments", "items": [ { - "title": "Helm StatefulSet migration", + "title": "Migrate from Helm StatefulSet", "urls": [ "/${VERSION}/migrate-cockroachdb-kubernetes-helm.html" ] }, { - "title": "Legacy operator migration", + "title": "Migrate from Public Operator", "urls": [ "/${VERSION}/migrate-cockroachdb-kubernetes-operator.html" ] @@ -203,7 +203,7 @@ ] }, { - "title": "Operate on Kubernetes", + "title": "Operate CockroachDB with Kubernetes", "items": [ { "title": "Pod Scheduling", diff --git a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md index 0fe1a592e1e..59c8d6b4111 100644 --- a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md @@ -1,6 +1,6 @@ --- -title: Resource Management with the Kubernetes Operator -summary: Allocate CPU, memory, and storage resources for a cluster deployed with the Kubernetes Operator. +title: Resource Management with the CockroachDB Operator +summary: Allocate CPU, memory, and storage resources for a cluster deployed with the CockroachDB operator. toc: true toc_not_nested: true secure: true @@ -25,7 +25,7 @@ You can set the CPU and memory resources allocated to the CockroachDB container Specify CPU and memory values in `cockroachdb.crdbCluster.resources.limits` and `cockroachdb.crdbCluster.resources.requests` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): -~~~yaml +~~~ yaml cockroachdb: crdbCluster: resources: @@ -39,11 +39,12 @@ cockroachdb: Apply the new settings to the cluster: -```shell -$ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE -``` +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE +~~~ -We recommend using identical values for `resources.requests` and `resources.limits`. When setting the new values, note that not all of a pod's resources will be available to the CockroachDB container. This is because a fraction of the CPU and memory is reserved for Kubernetes. +Cockroach Labs recommends using identical values for `resources.requests` and `resources.limits`. When setting the new values, note that not all of a pod's resources will be available to the CockroachDB container. This is because a fraction of the CPU and memory is reserved for Kubernetes. {{site.data.alerts.callout_info}} If no resource limits are specified, the pods will be able to consume the maximum available CPUs and memory. However, to avoid overallocating resources when another memory-intensive workload is on the same instance, always set resource requests and limits explicitly. @@ -55,7 +56,7 @@ For more information on how Kubernetes handles resources, see the [Kubernetes do Each CockroachDB node reserves a portion of its available memory for its cache and for storing temporary data for SQL queries. For more information on these settings, see the [Production Checklist](recommended-production-settings.html#cache-and-sql-memory-size). -The Kubernetes operator dynamically sets cache size and SQL memory size each to 25% (the recommended percentage) of the available memory, which depends on the memory request and limit you [specified](#memory-and-cpu) for your configuration. These values can be modified by adding the `cache` or `max-sql-memory` fields to `cockroachdb.crdbCluster.flags`, which is equivalent to appending `--cache` or `--max-sql-memory` as [cockroach start flags](cockroach-start.html#flags). +The CockroachDB operator dynamically sets cache size and SQL memory size each to 25% (the recommended percentage) of the available memory, which depends on the memory request and limit you [specified](#memory-and-cpu) for your configuration. These values can be modified by adding the `cache` or `max-sql-memory` fields to `cockroachdb.crdbCluster.flags`, which is equivalent to appending `--cache` or `--max-sql-memory` as [cockroach start flags](cockroach-start.html#flags). ## Persistent storage @@ -63,7 +64,7 @@ When you start your cluster, Kubernetes dynamically provisions and mounts a pers The storage capacity of each volume is set in `cockroachdb.crdbCluster.dataStore.volumeClaimTemplate.spec.resources` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): -```yaml +~~~ yaml cockroachdb: crdbCluster: dataStore: @@ -72,7 +73,7 @@ cockroachdb: resources: requests: storage: "10Gi" -``` +~~~ You should provision an appropriate amount of disk storage for your workload. For recommendations on this, see the [Production Checklist](recommended-production-settings.html#storage). @@ -82,7 +83,7 @@ If you discover that you need more capacity, you can expand the persistent volum Specify a new volume size in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): -```yaml +~~~ yaml cockroachdb: crdbCluster: dataStore: @@ -91,79 +92,44 @@ cockroachdb: resources: requests: storage: "100Gi" -``` +~~~ Apply the new settings to the cluster: -```shell -$ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE -``` +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE +~~~ -The Operator updates all nodes and triggers a rolling restart of the pods with the new storage capacity. +The CockroachDB operator updates all nodes and triggers a rolling restart of the pods with the new storage capacity. To verify that the storage capacity has been updated, run `kubectl get pvc` to view the persistent volume claims (PVCs). It will take a few minutes before the PVCs are completely updated. ## Network ports -The Operator separates network traffic into three ports: - -
Protocol + Protocol Default
- - - - - - - - - - - - - - - - - - - - - - - -
Protocol - - Default - Description - Custom Resource Field -
gRPC - 26258 - Used for node connections - service.ports.grpc -
HTTP - 8080 - Used to access the DB Console - service.ports.http -
SQL - 26257 - Used for SQL shell access - service.ports.sql -
- -Specify alternate port numbers in `cockroachdb.crdbCluster.service.ports` of the Operator's [custom resource](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): - -```yaml +The CockroachDB operator separates network traffic into three ports: + +| Protocol | Default Port| Description | Custom Resource Field | +|------------|-------------|-------------------------------|----------------------------------| +| gRPC | 26258 | Used for node connections | service.ports.grpc | +| HTTP | 8080 | Used to access the DB Console | service.ports.http | +| SQL | 26257 | Used for SQL shell access | service.ports.sql | + +Specify alternate port numbers in `cockroachdb.crdbCluster.service.ports` of the CockroachDB operator's [custom resource](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): + +~~~ yaml cockroachdb: crdbCluster: service: ports: sql: 5432 -``` +~~~ Apply the new settings to the cluster: -```shell -$ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE -``` +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE +~~~ -The Operator updates all nodes and triggers a rolling restart of the pods with the new port settings. +The CockroachDB operator updates all nodes and triggers a rolling restart of the pods with the new port settings. diff --git a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md index 473c6768fc2..3a9f75f1e91 100644 --- a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md +++ b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md @@ -1,6 +1,6 @@ --- -title: Deploy CockroachDB in a Kubernetes Cluster with the Operator -summary: Deploy a secure 3-node CockroachDB cluster with the Kubernetes operator. +title: Deploy CockroachDB with the CockroachDB Operator +summary: Deploy a secure 3-node CockroachDB cluster with the CockroachDB operator. toc: true toc_not_nested: false secure: true @@ -19,14 +19,14 @@ To deploy CockroachDB v25.1 or later, Kubernetes 1.30 or higher is required. Coc The CockroachDB Helm chart requires Helm 3.0 or higher. If you attempt to use an incompatible Helm version, an error like the following occurs: -``` +~~~ Error: UPGRADE FAILED: template: cockroachdb/templates/tests/client.yaml:6:14: executing "cockroachdb/templates/tests/client.yaml" at <.Values.networkPolicy.enabled>: nil pointer evaluating interface {}.enabled -``` +~~~ The Helm chart consists of two sub-charts: -* `operator` - The CockroachDB operator chart to be installed first. -* `cockroachdb` - The CockroachDB application chart to be installed after the operator is ready. +- `operator`: The CockroachDB operator chart to be installed first. +- `cockroachdb`: The CockroachDB application chart to be installed after the operator is ready. ### Network @@ -46,7 +46,7 @@ For more information on how locality labels are used by CockroachDB, refer to th ### Architecture -The operator is only supported in environments with an ARM64 or AMD64 architecture. +The CockroachDB operator is only supported in environments with an ARM64 or AMD64 architecture. ### Resources @@ -54,7 +54,7 @@ When starting Kubernetes, select machines with at least 4 vCPUs and 16 GiB of me ### Storage -Kubernetes deployments use external persistent volumes that are often replicated by the provider. CockroachDB replicates data automatically, and this redundant layer of replication can impact performance. Using [local volumes](https://kubernetes.io/docs/concepts/storage/volumes/#local) may improve performance. +Kubernetes deployments use external persistent volumes that are often replicated by the provider. CockroachDB replicates data automatically, and this redundant layer of [replication](% link {{ page.version.version }}/architecture/overview.md %}#replication) can impact performance. Using [local volumes](https://kubernetes.io/docs/concepts/storage/volumes/#local) may improve performance. ## Step 1. Start Kubernetes @@ -72,44 +72,53 @@ Cloud providers such as GKE, EKS, and AKS are not required to run CockroachDB on The documentation offers the choice of using Google's Cloud Shell product or using a local shell on your machine. Choose to use a local shell if you want to be able to view the DB Console using the steps in this guide. -1. From your local workstation, start the Kubernetes cluster, specifying one of the available [regions](https://cloud.google.com/compute/docs/regions-zones#available) (e.g., `us-east1`): +1. From your local workstation, start the Kubernetes cluster, specifying one of the available [regions](https://cloud.google.com/compute/docs/regions-zones#available) (e.g., `us-east1`). - Since this region can differ from your default `gcloud` region, be sure to include the `--region` flag to run `gcloud` commands against this cluster. + The process can take a few minutes, so do not move on to the next step until you see a `Creating cluster cockroachdb...done` message and details about your cluster. - ```shell - $ gcloud container clusters create cockroachdb --machine-type n2-standard-4 --region {region-name} --num-nodes 1 - + {% include_cached copy-clipboard.html %} + ~~~ shell + gcloud container clusters create cockroachdb --machine-type n2-standard-4 --region {region-name} --num-nodes 1 + ~~~ + ~~~ shell Creating cluster cockroachdb...done. - ``` + ~~~ + + {{site.data.alerts.callout_info}} + Since this region can differ from your default `gcloud` region, be sure to include the `--region` flag to run `gcloud` commands against this cluster. + {{site.data.alerts.end}} This creates GKE instances and joins them into a single Kubernetes cluster named `cockroachdb`. The `--region` flag specifies a [regional three-zone cluster](https://cloud.google.com/kubernetes-engine/docs/how-to/creating-a-regional-cluster), and `--num-nodes` specifies one Kubernetes worker node in each zone. The `--machine-type` flag tells the node pool to use the [n2-standard-4](https://cloud.google.com/compute/docs/machine-types#standard_machine_types) machine type (4 vCPUs, 16 GB memory), which meets our [recommended CPU and memory configuration](recommended-production-settings#basic-hardware-recommendations). - The process can take a few minutes, so do not move on to the next step until you see a `Creating cluster cockroachdb...done` message and details about your cluster. {{site.data.alerts.callout_info}} Consider creating another, dedicated node group for the operator pod for system resource availability. {{site.data.alerts.end}} -3. Get the email address associated with your Google Cloud account: - - ```shell - $ gcloud info | grep Account +1. Get the email address associated with your Google Cloud account: + {% include_cached copy-clipboard.html %} + ~~~ shell + gcloud info | grep Account + ~~~ + ~~~ shell Account: [your.google.cloud.email@example.org] - ``` + ~~~ The preceding command returns your email address in all lowercase. However, in the next step, you must enter the address using the accurate capitalization. For example, if your address is `YourName@example.com`, you must use `YourName@example.com` and not `yourname@example.com`. -4. [Create the RBAC roles](https://cloud.google.com/kubernetes-engine/docs/how-to/role-based-access-control#prerequisites_for_using_role-based_access_control) CockroachDB needs for running on GKE, using the address from the previous step: +1. [Create the RBAC roles](https://cloud.google.com/kubernetes-engine/docs/how-to/role-based-access-control#prerequisites_for_using_role-based_access_control) CockroachDB needs for running on GKE, using the address from the previous step: - ```shell - $ kubectl create clusterrolebinding $USER-cluster-admin-binding \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create clusterrolebinding $USER-cluster-admin-binding \ --clusterrole=cluster-admin \ --user={your.google.cloud.email@example.org} - + ~~~ + ~~~ shell clusterrolebinding.rbac.authorization.k8s.io/your.username-cluster-admin-binding created - ``` + ~~~ ### Hosted EKS @@ -119,12 +128,15 @@ Cloud providers such as GKE, EKS, and AKS are not required to run CockroachDB on If you are running [EKS-Anywhere](https://aws.amazon.com/eks/eks-anywhere/), CockroachDB requires that you [configure your default storage class](https://kubernetes.io/docs/tasks/administer-cluster/change-default-storage-class/) to auto-provision persistent volumes. Alternatively, you can define a custom storage configuration as required by your install pattern. -2. From your local workstation, start the Kubernetes cluster: +1. From your local workstation, start the Kubernetes cluster: To ensure that all 3 nodes can be placed into a different availability zone, you may want to first [confirm that at least 3 zones are available in the region](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#availability-zones-describe) for your account. - ```shell - $ eksctl create cluster \ + Cluster provisioning usually takes between 10 and 15 minutes. Do not move on to the next step until you see a message like `[✔] EKS cluster "cockroachdb" in "us-east-1" region is ready` and details about your cluster. + + {% include_cached copy-clipboard.html %} + ~~~ shell + eksctl create cluster \ --name cockroachdb \ --nodegroup-name standard-workers \ --node-type m6i.xlarge \ @@ -132,16 +144,15 @@ Cloud providers such as GKE, EKS, and AKS are not required to run CockroachDB on --nodes-min 1 \ --nodes-max 4 \ --node-ami auto - ``` + ~~~ This creates EKS instances and joins them into a single Kubernetes cluster named `cockroachdb`. The `--node-type` flag tells the node pool to use the [m6i.xlarge](https://aws.amazon.com/ec2/instance-types/) instance type (4 vCPUs, 16 GB memory), which meets our [recommended CPU and memory configuration](recommended-production-settings#basic-hardware-recommendations). - Cluster provisioning usually takes between 10 and 15 minutes. Do not move on to the next step until you see a message like `[✔] EKS cluster "cockroachdb" in "us-east-1" region is ready` and details about your cluster. {{site.data.alerts.callout_info}} Consider creating another, dedicated node group for the operator pod for system resource availability. {{site.data.alerts.end}} -3. Open the [AWS CloudFormation console](https://console.aws.amazon.com/cloudformation/home) to verify that the stacks `eksctl-cockroachdb-cluster` and `eksctl-cockroachdb-nodegroup-standard-workers` were successfully created. Be sure that your region is selected in the console. +1. Open the [AWS CloudFormation console](https://console.aws.amazon.com/cloudformation/home) to verify that the stacks `eksctl-cockroachdb-cluster` and `eksctl-cockroachdb-nodegroup-standard-workers` were successfully created. Be sure that your region is selected in the console. ### Hosted AKS @@ -149,21 +160,22 @@ Cloud providers such as GKE, EKS, and AKS are not required to run CockroachDB on Set the environment variables as desired for your CockroachDB deployment. For these instructions, set the `MY_AKS_CLUSTER_NAME` variable to `cockroachdb`. - Do not follow the **Create an AKS cluster** steps or following sections of the quickstart guide, as these topics will be described specifically for CockroachDB in this documentation. + Do not follow the **Create an AKS cluster** steps or following sections of the AKS quickstart guide, as these topics will be described specifically for CockroachDB in this documentation. -2. From your workstation, create the Kubernetes cluster: +1. From your workstation, create the Kubernetes cluster: - ```shell - $ az aks create \ + {% include_cached copy-clipboard.html %} + ~~~ shell + az aks create \ --resource-group $MY_RESOURCE_GROUP_NAME \ --name $MY_AKS_CLUSTER_NAME \ --node-count 3 \ --generate-ssh-keys - ``` + ~~~ -3. Create an application in your Azure tenant and create a secret named `azure-cluster-identity-credentials-secret` that contains `AZURE_CLIENT_ID` and `AZURE_CLIENT_SECRET` to hold the application credentials. You can use the following example YAML to define this application: +1. Create an application in your Azure tenant and create a secret named `azure-cluster-identity-credentials-secret` that contains `AZURE_CLIENT_ID` and `AZURE_CLIENT_SECRET` to hold the application credentials. You can use the following example YAML to define this application: - ```shell + ~~~ yaml apiVersion: v1 kind: Secret metadata: @@ -173,7 +185,7 @@ Cloud providers such as GKE, EKS, and AKS are not required to run CockroachDB on azure-credentials: | azure_client_id: 11111111-1111-1111-1111-111111111111 azure_client_secret: s3cr3t - ``` + ~~~ For more information on how to use these variables, refer to the [`Azure.Identity` documentation](https://learn.microsoft.com/dotnet/api/azure.identity.environmentcredential?view=azure-dotnet). @@ -185,36 +197,42 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st ## Step 2. Start CockroachDB -### Install the Operator sub-chart +### Install the operator sub-chart 1. Check out the CockroachDB Helm repository from GitHub: - ```shell - $ git clone https://github.com/cockroachdb/helm-charts.git - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + git clone https://github.com/cockroachdb/helm-charts.git + ~~~ -2. Set your environment variables. This step is optional but recommended in order to use the example commands and templates described in the following instructions. Note the default Kubernetes namespace of `cockroach-ns`. +1. Set your environment variables. This step is optional but recommended in order to use the example commands and templates described in the following instructions. Note the default Kubernetes namespace of `cockroach-ns`. - ```shell - $ export CRDBOPERATOR=crdb-operator - $ export CRDBCLUSTER=cockroachdb - $ export NAMESPACE=cockroach-ns - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + export CRDBOPERATOR=crdb-operator + export CRDBCLUSTER=cockroachdb + export NAMESPACE=cockroach-ns + ~~~ -3. Install the Operator sub-chart: +1. Install the operator sub-chart: - ```shell - $ kubectl create namespace $NAMESPACE - $ helm install $CRDBOPERATOR ./cockroachdb-parent/charts/operator -n $NAMESPACE - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create namespace $NAMESPACE + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + helm install $CRDBOPERATOR ./cockroachdb-parent/charts/operator -n $NAMESPACE + ~~~ ### Initialize the cluster 1. Open `cockroachdb-parent/charts/cockroachdb/values.yaml`, a values file that tells Helm how to configure the Kubernetes cluster, in your text editor. -2. Modify the `cockroachdb.crdbCluster.regions` section to describe the number of nodes to deploy and what region(s) to deploy them in. Replace the default `cloudProvider` with the appropriate value (`gcp`, `aws`, `azure`). For bare metal deployments, you can remove the `cloudProvider` field. The following example initializes three nodes on Google Cloud in the `us-central1` region: +1. Modify the `cockroachdb.crdbCluster.regions` section to describe the number of nodes to deploy and what region(s) to deploy them in. Replace the default `cloudProvider` with the appropriate value (`gcp`, `aws`, `azure`). For bare metal deployments, you can remove the `cloudProvider` field. The following example initializes three nodes on Google Cloud in the `us-central1` region: - ```yaml + ~~~ yaml cockroachdb: crdbCluster: regions: @@ -222,39 +240,48 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st nodes: 3 cloudProvider: gcp namespace: cockroach-ns - ``` + ~~~ If you intend to deploy CockroachDB nodes across multiple different regions, follow the additional steps described in [Deploy across multiple regions](#deploy-across-multiple-regions). -3. Uncomment and modify `cockroachdb.crdbCluster.resources` in the values file with the CPU and memory requests and limits for each node to use. The default values are 4vCPU and 16GB of memory: +1. Uncomment and modify `cockroachdb.crdbCluster.resources` in the values file with the CPU and memory requests and limits for each node to use. The default values are 4vCPU and 16GB of memory: For more information on configuring node resource allocation, refer to [Resource management](configure-cockroachdb-kubernetes-operator.html) -4. Modify the TLS configuration as desired. For a secure deployment, set `cockroachdb.tls.enabled` in the values file to `true`. You can either use the default self-signer utility to generate all certificates, provide a custom CA certificate and generate other certificates, or use your own certificates. - - **All self-signed certificates**: By default, the certificates are created by the self-signer utility, which requires no configuration beyond setting a custom certificate duration if desired. This utility creates self-signed certificates for the nodes and root client which are stored in a secret. You can see these certificates by running `kubectl get secrets`: - - ```shell - $ kubectl get secrets +1. Modify the TLS configuration as desired. For a secure deployment, set `cockroachdb.tls.enabled` in the values file to `true`. You can either allow the operator to generate self-signed certificates, provide a custom CA certificate and generate other certificates, or use your own certificates. + - **All self-signed certificates**: By default, the certificates are created automatically by a self-signer utility, which requires no configuration beyond setting a custom certificate duration if desired. This utility creates self-signed certificates for the nodes and root client which are stored in a secret. You can see these certificates by running `kubectl get secrets`: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get secrets + ~~~ + ~~~ crdb-cockroachdb-ca-secret Opaque 2 23s crdb-cockroachdb-client-secret kubernetes.io/tls 3 22s crdb-cockroachdb-node-secret kubernetes.io/tls 3 23s - ``` + ~~~ {{site.data.alerts.callout_info}} If you are deploying on OpenShift you must also set `cockroachdb.tls.selfSigner.securityContext.enabled` to `false` to mitigate stricter security policies. {{site.data.alerts.end}} - - **Custom CA certificate**: If you wish to supply your own CA certificates to the deployed nodes but allow the self-signer utility to generate client certificates, create a Kubernetes secret with the custom CA certificate. To perform these steps using the `cockroach cert` command: + - **Custom CA certificate**: If you wish to supply your own CA certificates to the deployed nodes but allow automatic generation of client certificates, create a Kubernetes secret with the custom CA certificate. To perform these steps using the `cockroach cert` command: - ```shell - $ mkdir certs - $ mkdir my-safe-directory - $ cockroach cert create-ca --certs-dir=certs --ca-key=my-safe-directory/ca.key - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + mkdir certs + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + mkdir my-safe-directory + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-ca --certs-dir=certs --ca-key=my-safe-directory/ca.key + ~~~ Set `cockroachdb.tls.selfSigner.caProvided` to `true` and specify the secret where the certificate is stored: - ```yaml + ~~~ yaml cockroachdb: tls: enabled: true @@ -262,35 +289,52 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st enabled: true caProvided: true caSecret: {ca-secret-name} - ``` + ~~~ {{site.data.alerts.callout_info}} If you are deploying on OpenShift you must also set `cockroachdb.tls.selfSigner.securityContext.enabled` to `false` to mitigate stricter security policies. {{site.data.alerts.end}} - **All custom certificates**: Set up your certificates and load them into your Kubernetes cluster as secrets using the following commands: - ```shell - $ mkdir certs - $ mkdir my-safe-directory - $ cockroach cert create-ca --certs-dir=certs --ca-key=my-safe-directory/ca.key - $ cockroach cert create-client root --certs-dir=certs --ca-key=my-safe-directory/ca.key - $ kubectl create secret generic cockroachdb-root --from-file=certs - + {% include_cached copy-clipboard.html %} + ~~~ shell + mkdir certs + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + mkdir my-safe-directory + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-ca --certs-dir=certs --ca-key=my-safe-directory/ca.key + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-client root --certs-dir=certs --ca-key=my-safe-directory/ca.key + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret generic cockroachdb-root --from-file=certs + ~~~ + ~~~ shell secret/cockroachdb-root created - - $ cockroach cert create-node --certs-dir=certs --ca-key=my-safe-directory/ca.key localhost 127.0.0.1 my-release-cockroachdb-public my-release-cockroachdb-public.cockroach-ns my-release-cockroachdb-public.cockroach-ns.svc.cluster.local *.my-release-cockroachdb *.my-release-cockroachdb.cockroach-ns *.my-release-cockroachdb.cockroach-ns.svc.cluster.local - $ kubectl create secret generic cockroachdb-node --from-file=certs - + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-node --certs-dir=certs --ca-key=my-safe-directory/ca.key localhost 127.0.0.1 my-release-cockroachdb-public my-release-cockroachdb-public.cockroach-ns my-release-cockroachdb-public.cockroach-ns.svc.cluster.local *.my-release-cockroachdb *.my-release-cockroachdb.cockroach-ns *.my-release-cockroachdb.cockroach-ns.svc.cluster.local + kubectl create secret generic cockroachdb-node --from-file=certs + ~~~ + ~~~ shell secret/cockroachdb-node created - ``` + ~~~ {{site.data.alerts.callout_info}} The subject alternative names are based on a release called `my-release` in the `cockroach-ns` namespace. Make sure they match the services created with the release during Helm install. {{site.data.alerts.end}} - If you wish to supply certificates with [cert-manager](https://cert-manager.io/), set `cockroachdb.tls.certManager.enabled` to `true`, and `cockroachdb.tls.certManager.issuer` to an IssuerRef (as they appear in certificate resources) pointing to a clusterIssuer or issuer that you have set up in the cluster. The following k8s application describes an example issuer: + If you wish to supply certificates with [cert-manager](https://cert-manager.io/), set `cockroachdb.tls.certManager.enabled` to `true`, and `cockroachdb.tls.certManager.issuer` to an IssuerRef (as they appear in certificate resources) pointing to a clusterIssuer or issuer that you have set up in the cluster. The following Kubernetes application describes an example issuer: - ```yaml + ~~~ yaml apiVersion: v1 kind: Secret metadata: @@ -309,7 +353,7 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st spec: ca: secretName: cockroachdb-ca - ``` + ~~~ If your certificates are stored in TLS secrets, such as secrets generated by `cert-manager`, the secret will contain files named: `ca.crt`, `tls.crt`, and `tls.key`. @@ -317,16 +361,16 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st Add the following to the values file: - ```yaml + ~~~ yaml cockroachdb: - tls: - enabled: true - externalCertificates: + tls: enabled: true - certificates: - nodeSecretName: {node_secret_name} - nodeClientSecretName: {client_secret_name} - ``` + externalCertificates: + enabled: true + certificates: + nodeSecretName: {node_secret_name} + nodeClientSecretName: {client_secret_name} + ~~~ Replacing the following: - `{node_secret_name}`: The name of the Kubernetes secret that contains the generated client certificate and key. @@ -334,59 +378,55 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st For a detailed tutorial of a TLS configuration with manual certificates, refer to [Example: Authenticate with cockroach cert](#example-authenticate-with-cockroach-cert). -5. In `cockroachdb.crdbCluster.localityLabels`, provide [locality labels](#localities) that specify where the locality information of each Kubernetes node is stored. When CockroachDB is initialized on a node, it processes these values as though they are provided through the [`cockroach start --locality`](cockroach-start#locality) flag. +1. In `cockroachdb.crdbCluster.localityLabels`, provide [locality labels](#localities) that specify where the locality information of each Kubernetes node is stored. When CockroachDB is initialized on a node, it processes these values as though they are provided through the [`cockroach start --locality`](cockroach-start#locality) flag. - It is not necessary to modify `cockroachdb.crdbCluster.localityLabels` in cloud provider deployments, in which the [`topology.kubernetes.io/region`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesioregion) and [`topology.kubernetes.io/zone`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) locality labels are applied implicitly to Kubernetes nodes and populated by the regions and zones specific to the cloud provider. + The default configuration uses the `region` and `zone` locality levels, which are set differently depending on the deployment type: + - In cloud provider deployments, the [`topology.kubernetes.io/region`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesioregion) and [`topology.kubernetes.io/zone`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) locality labels are applied implicitly to Kubernetes nodes and populated by the regions and zones specific to the cloud provider. + - In bare metal deployments, the `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` values are not set implicitly by a cloud provider when initializing the node, so you must set them manually or configure custom locality labels. - ```yaml - cockroachdb: - crdbCluster: - localityLabels: [] - ``` - - For bare metal deployments, you can also use the default `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` values. However, since these values cannot be set implicitly from a cloud provider, you need to set them manually when initializing the node. - - To add more granular levels of locality to your nodes, add custom locality levels as values in the `cockroachdb.crdbCluster.localityLabels` list. Any custom `localityLabels` configuration overrides the default `region` and `zone` configuration, so if you append an additional locality level but wish to keep the `region` and `zone` labels you must declare them manually. + To add more granular levels of locality to your nodes or use different locality labels, add custom locality levels as values in the `cockroachdb.crdbCluster.localityLabels` list. Any custom `localityLabels` configuration overrides the default `region` and `zone` configuration, so if you append an additional locality level but wish to keep the `region` and `zone` labels you must declare them manually. The following example uses the existing `region` and `zone` labels and adds an additional `datacenter` locality label that is more granular than `zone`. This example declares that the `datacenter` locality information is stored in the `example.datacenter.locality` variable on the node: - ```yaml + ~~~ yaml cockroachdb: crdbCluster: localityLabels: - topology.kubernetes.io/region - topology.kubernetes.io/zone - example.datacenter.locality - ``` + ~~~ In this example, if a Kubernetes node is initialized in the `us-central1` region, `us-central1-c` zone, and `dc2` datacenter, its `cockroach start --locality` flag would be equivalent to the following: - ```shell + ~~~ shell cockroach start --locality region=us-central1,zone=us-central1-c,example.datacenter.locality=dc2 - ``` + ~~~ - Optionally, review the `cockroachdb.crdbCluster.topologySpreadConstraints` configuration and set `topologyKey` to a locality variable that will have distinct values for each node. The default recommendation is to set this to a zone as follows: + Optionally, review the `cockroachdb.crdbCluster.topologySpreadConstraints` configuration and set `topologyKey` to a locality variable that will have distinct values for each node. By default the lowest locality level is `zone`, so the following configuration sets that value as the `topologyKey`: - ```yaml + ~~~ yaml cockroachdb: crdbCluster: topologySpreadConstraints: topologyKey: topology.kubernetes.io/zone - ``` + ~~~ -6. Modify other relevant parts of the configuration such as other `topologySpreadConstraints` fields, `service.ports`, and others as needed for your configuration. +1. Modify other relevant parts of the configuration such as other `topologySpreadConstraints` fields, `service.ports`, and others as needed for your configuration. -7. Run the following command to install the CockroachDB chart using Helm: +1. Run the following command to install the CockroachDB chart using Helm: - ```shell - $ helm install $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb -n $NAMESPACE - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + helm install $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb -n $NAMESPACE + ~~~ You can override the default parameters using the `--set key=value[,key=value]` argument while installing the chart: - ```shell - $ helm install $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --set clusterDomain=cluster-test.local -n $NAMESPACE - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + helm install $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --set clusterDomain=cluster-test.local -n $NAMESPACE + ~~~ #### Deploy across multiple regions @@ -394,12 +434,12 @@ The Helm chart supports specifying multiple region definitions in `cockroachdb.c For each region, modify the `regions` configuration as described in [Initialize the cluster](#initialize-the-cluster) and perform `helm install` against the respective Kubernetes cluster. While applying the installation in a given region, do the following: -* Verify that the domain matches `cockroachdb.clusterDomain` in the values file. -* Ensure that `cockroachdb.crdbCluster.regions` captures the information for regions that have already been deployed, including the current region. This allows CockroachDB in the current region to connect to clusters deployed in the existing regions. +- Verify that the domain matches `cockroachdb.clusterDomain` in the values file. +- Ensure that `cockroachdb.crdbCluster.regions` captures the information for regions that have already been deployed, including the current region. This allows CockroachDB in the current region to connect to clusters deployed in the existing regions. The following example shows a configuration across two regions, `us-central1` and `us-east1`, with 3 nodes in each cluster: -```yaml +~~~ yaml cockroachdb: clusterDomain: cluster.gke.gcp-us-east1 crdbCluster: @@ -414,7 +454,7 @@ cockroachdb: cloudProvider: gcp domain: cluster.gke.gcp-us-east1 namespace: cockroach-ns -``` +~~~ ## Step 3. Use the built-in SQL client @@ -422,33 +462,37 @@ To use the CockroachDB SQL client, follow these steps to launch a secure pod run 1. Download the secure client Kubernetes application: - ```shell - $ curl -O https://raw.githubusercontent.com/cockroachdb/helm-charts/master/examples/client-secure.yaml - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + curl -O https://raw.githubusercontent.com/cockroachdb/helm-charts/master/examples/client-secure.yaml + ~~~ {{site.data.alerts.callout_danger}} This client tool logs into CockroachDB as `root` using the root certificates. {{site.data.alerts.end}} -2. Edit the yaml file with the following values: - * `spec.serviceAccountName: my-release-cockroachdb` - * `spec.image: cockroachdb/cockroach:` - * `spec.volumes[0].project.sources[0].secret.name: my-release-cockroachdb-client-secret` +1. Edit the yaml file with the following values: + - `spec.serviceAccountName: my-release-cockroachdb` + - `spec.image: cockroachdb/cockroach:` + - `spec.volumes[0].project.sources[0].secret.name: my-release-cockroachdb-client-secret` -3. Launch a pod using this file and keep it running indefinitely: +1. Launch a pod using this file and keep it running indefinitely: - ```shell - $ kubectl create -f client-secure.yaml - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create -f client-secure.yaml + ~~~ -4. Get a shell into the pod and start the CockroachDB [built-in SQL client](cockroach-sql.html): +1. Get a shell into the pod and start the CockroachDB [built-in SQL client](cockroach-sql.html): - ```shell - $ kubectl exec -it cockroachdb-client-secure \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl exec -it cockroachdb-client-secure \ -- ./cockroach sql \ --certs-dir=/cockroach/cockroach-certs \ --host=cockroachdb-public - + ~~~ + ~~~ shell # Welcome to the CockroachDB SQL shell. # All statements must be terminated by a semicolon. # To exit, type: \q. @@ -459,36 +503,36 @@ To use the CockroachDB SQL client, follow these steps to launch a secure pod run # Enter \? for a brief introduction. # root@cockroachdb-public:26257/defaultdb> - ``` + ~~~ - This pod will continue running indefinitely, so any time you need to reopen the built-in SQL client or run any other cockroach client commands (e.g., cockroach node), repeat step 2 using the appropriate cockroach command. If you'd prefer to delete the pod and recreate it when needed, run `kubectl delete pod cockroachdb-client-secure`. + This pod will continue running indefinitely, so any time you need to reopen the built-in SQL client or run any other cockroach client commands (e.g., cockroach node), repeat this step using the appropriate cockroach command. If you'd prefer to delete the pod and recreate it when needed, run `kubectl delete pod cockroachdb-client-secure`. -5. Run some basic [CockroachDB SQL statements](learn-cockroachdb-sql.html): +1. Run some basic [CockroachDB SQL statements](learn-cockroachdb-sql.html): - ```sql - > CREATE DATABASE bank; - > CREATE TABLE bank.accounts (id INT PRIMARY KEY, balance DECIMAL); - > INSERT INTO bank.accounts VALUES (1, 1000.50); - > SELECT * FROM bank.accounts; + ~~~ sql + CREATE DATABASE bank; + CREATE TABLE bank.accounts (id INT PRIMARY KEY, balance DECIMAL); + INSERT INTO bank.accounts VALUES (1, 1000.50); + SELECT * FROM bank.accounts; id | balance +----+---------+ 1 | 1000.50 (1 row) - ``` + ~~~ -6. [Create a user with a password](create-user.html#create-a-user-with-a-password): +1. [Create a user with a password](create-user.html#create-a-user-with-a-password): - ```sql - > CREATE USER roach WITH PASSWORD 'Q7gc8rEdS'; - ``` + ~~~ sql + CREATE USER roach WITH PASSWORD 'Q7gc8rEdS'; + ~~~ You will need this username and password to access the DB Console later. -7. Exit the SQL shell and pod: +1. Exit the SQL shell and pod: - ```sql - > \q - ``` + ~~~ sql + \q + ~~~ ## Step 4. Access the DB Console @@ -498,57 +542,61 @@ To access the cluster's [DB Console](ui-overview.html): Get a shell into the pod and start the CockroachDB [built-in SQL client](cockroach-sql.html): - ```shell - $ kubectl exec -it cockroachdb-client-secure \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl exec -it cockroachdb-client-secure \ -- ./cockroach sql \ --certs-dir=/cockroach/cockroach-certs \ --host=cockroachdb-public - ``` + ~~~ -2. Assign `roach` to the `admin` role (you only need to do this once): +1. Assign `roach` to the `admin` role (you only need to do this once): - ```sql - > GRANT admin TO roach; - ``` + ~~~ sql + GRANT admin TO roach; + ~~~ -3. Exit the SQL shell and pod: +1. Exit the SQL shell and pod: - ```sql - > \q - ``` + ~~~ sql + \q + ~~~ -4. In a new terminal window, port-forward from your local machine to the `cockroachdb-public` service: +1. In a new terminal window, port-forward from your local machine to the `cockroachdb-public` service: - ```shell - $ kubectl port-forward service/cockroachdb-public 8080 + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward service/cockroachdb-public 8080 + ~~~ + ~~~ shell Forwarding from 127.0.0.1:8080 -> 8080 - ``` + ~~~ Run the `port-forward` command on the same machine as the web browser in which you want to view the DB Console. If you have been running these commands from a cloud instance or other non-local shell, you will not be able to view the UI without configuring `kubectl` locally and running the preceding `port-forward` command on your local machine. -5. Go to [`https://localhost:8080`](https://localhost:8080/) and log in with the username and password you created earlier. +1. Go to [`https://localhost:8080`](https://localhost:8080/) and log in with the username and password you created earlier. {{site.data.alerts.callout_info}} If you are using Google Chrome, and get an error about not being able to reach `localhost` because its certificate has been revoked, go to `chrome://flags/#allow-insecure-localhost`, enable "Allow invalid certificates for resources loaded from localhost", and then restart the browser. This degrades security for all sites running on `localhost`, not just CockroachDB's DB Console, so enable the feature only temporarily. {{site.data.alerts.end}} -6. In the DB Console, verify that the cluster is running as expected: +1. In the DB Console, verify that the cluster is running as expected: 1. View the [**Node List**](ui-cluster-overview-page.html#node-list) to ensure that all nodes successfully joined the cluster. - 2. Click the **Databases** tab on the left to verify that `bank` is listed. + 1. Click the **Databases** tab on the left to verify that `bank` is listed. ## Next steps Read the following pages for detailed information on cluster scaling, certificate management, resource management, best practices, and other cluster operation details: -* [Pod scheduling](schedule-cockroachdb-kubernetes-operator.html) -* [Resource management](configure-cockroachdb-kubernetes-operator.html) -* [Certificate management](secure-cockroachdb-kubernetes-operator.html) -* [Cluster scaling](scale-cockroachdb-kubernetes-operator.html) -* [Cluster monitoring](monitor-cockroachdb-kubernetes-operator.html) -* [Upgrade a cluster](upgrade-cockroachdb-kubernetes-operator.html) -* [CockroachDB performance on Kubernetes](kubernetes-operator-performance.html) +- [Pod scheduling](schedule-cockroachdb-kubernetes-operator.html) +- [Resource management](configure-cockroachdb-kubernetes-operator.html) +- [Certificate management](secure-cockroachdb-kubernetes-operator.html) +- [Cluster scaling](scale-cockroachdb-kubernetes-operator.html) +- [Cluster monitoring](monitor-cockroachdb-kubernetes-operator.html) +- [Upgrade a cluster](upgrade-cockroachdb-kubernetes-operator.html) +- [CockroachDB performance on Kubernetes](kubernetes-operator-performance.html) -## Appendix +## Examples ### Authenticate with `cockroach cert` @@ -556,41 +604,47 @@ The following example uses [cockroach cert commands](cockroach-cert.html) to gen 1. Create two directories: - ```shell - $ mkdir certs my-safe-directory - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + mkdir certs my-safe-directory + ~~~ -2. Create the CA certificate and key pair: +1. Create the CA certificate and key pair: - ```shell - $ cockroach cert create-ca \ + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-ca \ --certs-dir=certs \ --ca-key=my-safe-directory/ca.key - ``` + ~~~ -3. Create a client certificate and key pair for the root user: +1. Create a client certificate and key pair for the root user: - ```shell - $ cockroach cert create-client root \ + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-client root \ --certs-dir=certs \ --ca-key=my-safe-directory/ca.key - ``` + ~~~ -4. Upload the client certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the operator: +1. Upload the client certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the CockroachDB operator: - ```shell - $ kubectl create secret generic cockroachdb.client.root \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret generic cockroachdb.client.root \ --from-file=tls.key=certs/client.root.key \ --from-file=tls.crt=certs/client.root.crt \ --from-file=ca.crt=certs/ca.crt - + ~~~ + ~~~ shell secret/cockroachdb.client.root created - ``` + ~~~ -5. Create the certificate and key pair for your CockroachDB nodes, specifying the namespace you used when deploying the cluster. This example uses the `cockroach-ns` namespace: +1. Create the certificate and key pair for your CockroachDB nodes, specifying the namespace you used when deploying the cluster. This example uses the `cockroach-ns` namespace: - ```shell - $ cockroach cert create-node localhost \ + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-node localhost \ 127.0.0.1 \ cockroachdb-public \ cockroachdb-public.cockroach-ns \ @@ -600,33 +654,37 @@ The following example uses [cockroach cert commands](cockroach-cert.html) to gen *.cockroachdb.cockroach-ns.svc.cluster.local \ --certs-dir=certs \ --ca-key=my-safe-directory/ca.key - ``` + ~~~ -6. Upload the node certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the operator: +1. Upload the node certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the CockroachDB operator: - ```shell - $ kubectl create secret generic cockroachdb.node \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret generic cockroachdb.node \ --from-file=tls.key=certs/node.key \ --from-file=tls.crt=certs/node.crt \ --from-file=ca.crt=certs/ca.crt - + ~~~ + ~~~ shell secret/cockroachdb.node created - ``` - -7. Check that the secrets were created on the cluster: + ~~~ - ```shell - $ kubectl get secrets +1. Check that the secrets were created on the cluster: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get secrets + ~~~ + ~~~ shell NAME TYPE DATA AGE cockroachdb.client.root Opaque 3 13s cockroachdb.node Opaque 3 3s default-token-6js7b kubernetes.io/service-account-token 3 9h - ``` + ~~~ -8. Add `cockroachdb.tls.externalCertificates.certificates.nodeSecretName` and `cockroachdb.tls.externalCertificates.certificates.nodeClientSecretName` to the values file used to deploy the cluster: +1. Add `cockroachdb.tls.externalCertificates.certificates.nodeSecretName` and `cockroachdb.tls.externalCertificates.certificates.nodeClientSecretName` to the values file used to deploy the cluster: - ```yaml + ~~~ yaml cockroachdb: tls: enabled: true @@ -635,4 +693,4 @@ The following example uses [cockroach cert commands](cockroach-cert.html) to gen certificates: nodeSecretName: cockroachdb.node nodeClientSecretName: cockroachdb.client.root - ``` + ~~~ diff --git a/src/current/v25.2/kubernetes-operator-overview.md b/src/current/v25.2/kubernetes-operator-overview.md index bc33e7f678f..46e741120bd 100644 --- a/src/current/v25.2/kubernetes-operator-overview.md +++ b/src/current/v25.2/kubernetes-operator-overview.md @@ -1,6 +1,6 @@ --- -title: Kubernetes Operator Overview -summary: An overview of deployment and management of a CockroachDB cluster using our Kubernetes Operator. +title: CockroachDB Operator Overview +summary: An overview of deployment and management of a CockroachDB cluster using the CockroachDB operator with Kubernetes. toc: true toc_not_nested: true secure: true diff --git a/src/current/v25.2/kubernetes-operator-performance.md b/src/current/v25.2/kubernetes-operator-performance.md index 9bb9bf96507..83df6072b43 100644 --- a/src/current/v25.2/kubernetes-operator-performance.md +++ b/src/current/v25.2/kubernetes-operator-performance.md @@ -1,6 +1,6 @@ --- -title: CockroachDB Performance with the Kubernetes Operator -summary: How running CockroachDB in Kubernetes affects its performance and how to get the best possible performance when running in Kubernetes using the operator. +title: Performance with the CockroachDB Operator +summary: How running CockroachDB in Kubernetes affects its performance and how to get the best possible performance when running in Kubernetes using the CockroachDB operator. toc: true docs_area: deploy --- @@ -13,7 +13,7 @@ Before you focus on optimizing a Kubernetes-orchestrated CockroachDB cluster: 1. Before deploying on Kubernetes, ensure that performance is optimized for your workload on identical hardware. You may find that you first need to [modify your workload](performance-best-practices-overview.html) or use [different machine specs](recommended-production-settings.html#hardware) to achieve the performance you need. -2. Read the documentation for [deploying CockroachDB on a Kubernetes cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster) to familiarize yourself with the necessary Kubernetes terminology and deployment abstractions. +1. Read the documentation for [deploying CockroachDB on a Kubernetes cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster) to familiarize yourself with the necessary Kubernetes terminology and deployment abstractions. ## Performance factors @@ -45,7 +45,7 @@ Kubernetes exposes the disk types used by its volume provisioner via its [`Stora To do this, pick a volume provisioner from the list in the [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/storage-classes/), modify the example YAML file to specify the disk type you want, then run `kubectl create -f {your-storage-class-file}.yaml`. For example, in order to use the `pd-ssd` disk type on Google Compute Engine or Google Kubernetes Engine, you can use a `StorageClass` file like the following: -```yaml +~~~ yaml apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: @@ -53,21 +53,23 @@ metadata: provisioner: kubernetes.io/gce-pd parameters: type: pd-ssd -``` +~~~ -You can then use this new disk type either by configuring the CockroachDB YAML file to request it or by making it the default. You may also want to set additional parameters as documented in the list of Kubernetes storage classes, such as configuring the `iopsPerGB` if you're creating a `StorageClass` for AWS's `io1` Provisioned IOPS volume type. +You may also want to set additional parameters as documented in the list of Kubernetes [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/storage-classes/), such as configuring the `iopsPerGB` if you're creating a `StorageClass` for AWS's `io1` Provisioned IOPS volume type. + +You can configure this new disk type to only be used by CockroachDB nodes or as the default for all volumes in your cluster: #### Configure the disk type used by CockroachDB To use a new `StorageClass` without making it the default in your cluster, modify your application's YAML file to ask for it. In the CockroachDB configuration, that means adding `storageClassName` to `cockroachdb.crdbCluster.dataStore.volumeClaimTemplates`: -```yaml +~~~ yaml cockroachdb: crdbCluster: dataStore: volumeClaimTemplate: storageClassName: -``` +~~~ When running `kubectl create -f` on your modified YAML file, Kubernetes should create volumes using the specified `storageClassName`. @@ -75,29 +77,35 @@ When running `kubectl create -f` on your modified YAML file, Kubernetes should c To make a new `StorageClass` the default for all volumes in your cluster, run the following `kubectl` commands. -```shell -$ kubectl get storageclasses - +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl get storageclasses +~~~ +~~~ shell NAME PROVISIONER ssd kubernetes.io/gce-pd standard (default) kubernetes.io/gce-pd -``` -```shell -$ kubectl patch storageclass standard -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}' - +~~~ +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl patch storageclass standard -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}' +~~~ +~~~ shell storageclass "standard" patched -``` -```shell -$ kubectl patch storageclass ssd -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' - +~~~ +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl patch storageclass ssd -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' +~~~ +~~~ shell storageclass "ssd" patched -``` +~~~ ### Disk size On some cloud providers, including all GCP disks and the AWS `io1` disk type, the number of IOPS available to a disk is directly correlated to the size of the disk. In such cases, increasing the size of your disks can significantly improve CockroachDB performance, and decrease the risk of filling them up. Before you create your CockroachDB cluster, modify the `cockroachdb.crdbCluster.dataStore.volumeClaimTemplate` in the CockroachDB YAML file to ask for more space. The following example sets this value to 1TB: -```yaml +~~~ yaml cockroachdb: crdbCluster: dataStore: @@ -106,7 +114,7 @@ cockroachdb: resources: requests: storage: 1024Gi -``` +~~~ Since [GCE disk IOPS scale linearly with disk size](https://cloud.google.com/compute/docs/disks/performance#type_comparison), a 1TiB disk gives 1024 times as many IOPS as a 1GiB disk, which can make a very large difference for write-heavy workloads. @@ -126,9 +134,11 @@ Resource requests reserve a certain amount of CPU or memory for your container. To determine how many resources are usable on your Kubernetes nodes, you can run: -```shell -$ kubectl describe nodes - +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl describe nodes +~~~ +~~~ shell Name: gke-perf-default-pool-aafee20c-k4t8 [...] Capacity: @@ -150,7 +160,7 @@ Allocated resources: CPU Requests CPU Limits Memory Requests Memory Limits ------------ ---------- --------------- ------------- 360m (9%) 0 (0%) 110Mi (0%) 170Mi (1%) -``` +~~~ In the output, the `Allocatable` field shows the `cpu` and `memory` resources Kubernetes will provide to pods running on the machine. The difference between the machine's `Capacity` and its `Allocatable` resources is taken up by the operating system and Kubernetes' management processes. In the preceding output, `3920m` stands for 3920 "milli-CPUs", or "thousandths of a CPU". @@ -160,14 +170,14 @@ On Kubernetes v1.10 or earlier, it is difficult to truly use all of the allocata Once you've picked out an amount of CPU and memory to reserve for Cockroach, configure the resource requests in your CockroachDB YAML file. They should go underneath the `containers` heading. For example, to use most of the available resources on the machines described above, you'd configure the following lines of your values file: -```yaml +~~~ yaml cockroachdb: crdbCluster: resources: requests: cpu: 3500m memory: 12300Mi -``` +~~~ When you initialize the cluster, check that all the CockroachDB pods are scheduled successfully. If you see any get stuck in the pending state, run `kubectl describe pod {podname}` and check the `Events` for information about why they're still pending. You may need to manually preempt pods on one or more nodes by running `kubectl delete pod` on them to make room for the CockroachDB pods. As long as the pods you delete were created by a higher-level Kubernetes object such as a `Deployment`, they'll be safely recreated on another node. @@ -177,7 +187,7 @@ Resource limits cap the resources used by a pod to no more than the provided lim To set resource limits, in addition to the [resource requests](#resource-requests) described in the preceding section, change the configuration as follows: -```yaml +~~~ yaml cockroachdb: crdbCluster: resources: @@ -185,9 +195,8 @@ cockroachdb: cpu: 3500m memory: 12300Mi limits: - cpu: 3500m memory: 12300Mi -``` +~~~ Pods will be limited to their reserved resources and are unlikely to be preempted, except in rare cases. This will not improve performance on an underutilized Kubernetes cluster, but provides more predictable performance as other workloads run. @@ -199,9 +208,10 @@ While setting memory limits is strongly recommended, [setting CPU limits can hur Even if you do not manually set resource requests, they are likely being applied. In many installations of Kubernetes, a [LimitRange](https://kubernetes.io/docs/tasks/administer-cluster/cpu-default-namespace/) is preconfigured for the `default` namespace that applies a default CPU request of `100m`, or one-tenth of a CPU. You can see this configuration by running the following command: -```shell -$ kubectl describe limitranges -``` +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl describe limitranges +~~~ Experimentally, this does not appear to have a noticeable effect on CockroachDB's performance when a Kubernetes cluster isn't heavily utilized, but do not be surprised if you see CPU requests on your pods that you didn't set. @@ -209,9 +219,11 @@ Experimentally, this does not appear to have a noticeable effect on CockroachDB' As described in [Resource requests and limits](#resource-requests-and-limits), your Kubernetes cluster will always run pods other than CockroachDB. You can see them by running: -```shell -$ kubectl get pods --all-namespaces - +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl get pods --all-namespaces +~~~ +~~~ shell NAMESPACE NAME READY STATUS RESTARTS AGE kube-system event-exporter-v0.1.7-5c4d9556cf-6v7lf 2/2 Running 0 2m kube-system fluentd-gcp-v2.0.9-6rvmk 2/2 Running 0 2m @@ -228,13 +240,14 @@ kube-system kube-proxy-gke-test-default-pool-828d39a7-rc4m 1/1 Running kube-system kube-proxy-gke-test-default-pool-828d39a7-trd1 1/1 Running 0 2m kube-system kubernetes-dashboard-768854d6dc-v7ng8 1/1 Running 0 2m kube-system l7-default-backend-6497bcdb4d-2kbh4 1/1 Running 0 2m -``` +~~~ These ["cluster add-ons"](https://github.com/kubernetes/kubernetes/tree/master/cluster/addons) provide a variety of basic services like managing DNS entries for services within the cluster, powering the Kubernetes dashboard UI, or collecting logs or metrics from all the pods running in the cluster. If you do not like having them take up space in your cluster, you can prevent some of them from running by configuring your Kubernetes cluster appropriately. For example, on GKE, you can create a cluster with the minimal set of add-ons by running: -```shell -$ gcloud container clusters create --no-enable-cloud-logging --no-enable-cloud-monitoring --addons="" -``` +{% include_cached copy-clipboard.html %} +~~~ shell +gcloud container clusters create --no-enable-cloud-logging --no-enable-cloud-monitoring --addons="" +~~~ However, some pods like `kube-proxy` and `kube-dns` are required for compliant Kubernetes clusters. Since there will always be pods other than CockroachDB running in your cluster, it's important to understand and account for the effects of having CockroachDB share a machine with other processes. The more processes there are on the same machine as a CockroachDB pod, the slower and less predictable its performance will likely be. To protect against this, it's strongly recommended to specify [resource requests](#resource-requests) on your CockroachDB pods to provide some level of CPU and memory isolation. @@ -246,7 +259,7 @@ If setting appropriate resource requests still isn't getting you the performance Client applications such as benchmarking applications running on the same machines as CockroachDB are likely to compete for resources. As application load increases, so does the load on CockroachDB processes. The best way to avoid this is to [set resource requests and limits](#resource-requests-and-limits). Alternatively, you can also set [anti-affinity scheduling policies](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) on your client applications: -```yaml +~~~ yaml cockroachdb: crdbCluster: affinity: @@ -270,7 +283,7 @@ cockroachdb: values: - cockroachdb topologyKey: kubernetes.io/hostname -``` +~~~ The preceding configuration will first prefer to put the `loadgen` pods on different nodes from each other, which is important for the fault tolerance of the `loadgen` pods themselves. As a secondary priority, it will attempt to put the pods on nodes that do not already have a running `CockroachDB` pod. This will ensure the best possible balance of fault tolerance and performance for the load generator and CockroachDB cluster. diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md index 385a303c585..30ba55d3fd4 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md @@ -1,73 +1,78 @@ --- title: Migrate from Helm StatefulSet -summary: Migration guide detailing how to migrate away from a Helm deployment of CockroachDB to the Kubernetes operator. +summary: Migration guide detailing how to migrate away from a Helm deployment of CockroachDB to the CockroachDB operator. toc: true toc_not_nested: true secure: true docs_area: deploy --- -This guide describes how to migrate an existing CockroachDB cluster managed via StatefulSet to the enterprise operator. +This guide describes how to migrate an existing CockroachDB cluster managed via StatefulSet to the CockroachDB operator. These instructions assume that you are migrating from a StatefulSet cluster that was configured using the Helm chart with the following command: -```shell +~~~ shell helm upgrade --install --set operator.enabled=false crdb-test --debug ./cockroachdb -``` +~~~ +{{site.data.alerts.callout_success}} If your existing cluster was created using the public operator, refer to the [public operator migration guide](migrate-cockroachdb-kubernetes-operator.html). +{{site.data.alerts.end}} This migration can be completed without affecting cluster availability, and preserves existing disks so that data doesn't need to be replicated into empty volumes. The process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. -{{site.data.alerts.callout_danger}} -This migration process is only recommended for non-production environments at this time. -{{site.data.alerts.end}} - ## Step 1. Prepare the migration helper Build the migration helper and add the `./bin` directory to your PATH: -```shell -$ make bin/migration-helper -$ export PATH=$PATH:$(pwd)/bin -``` +{% include_cached copy-clipboard.html %} +~~~ shell +make bin/migration-helper +export PATH=$PATH:$(pwd)/bin +~~~ Export environment variables for the existing deployment: -```shell -# Set STS_NAME to the cockroachdb statefulset deployed via helm chart. -$ export STS_NAME="crdb-example-cockroachdb" - -# Set NAMESPACE to the namespace where the statefulset is installed. -$ export NAMESPACE="default" - -# RELEASE_NAME describes the release name of the installed helm chart release. -$ export RELEASE_NAME=$(kubectl get sts $STS_NAME -n $NAMESPACE -o yaml | yq '.metadata.annotations."meta.helm.sh/release-name"') - -# Set CLOUD_PROVIDER to the cloud vendor where k8s cluster is residing. -# All the major cloud providers are supported (gcp,aws,azure) -$ export CLOUD_PROVIDER=gcp - -# Set REGION to the cloud provider's identifier of this region. -# This region must match the "topology.kubernetes.io/region" label in -# the Kubernetes nodes for this cluster. -$ export REGION=us-central1 -``` +- Set STS_NAME to the cockroachdb statefulset deployed via helm chart: + {% include_cached copy-clipboard.html %} + ~~~ shell + export STS_NAME="crdb-example-cockroachdb" + ~~~ + +- Set NAMESPACE to the namespace where the statefulset is installed: + {% include_cached copy-clipboard.html %} + ~~~ shell + export NAMESPACE="default" + ~~~ + +- Set CLOUD_PROVIDER to the cloud vendor where Kubernetes cluster is residing. All major cloud providers are supported (gcp, aws, azure): + {% include_cached copy-clipboard.html %} + ~~~ shell + export CLOUD_PROVIDER=gcp + ~~~ + +- Set REGION to the cloud provider's identifier of this region. This region must match the "topology.kubernetes.io/region" label in the Kubernetes nodes for this cluster: + {% include_cached copy-clipboard.html %} + ~~~ shell + export REGION=us-central1 + ~~~ ## Step 2. Generate manifests with the migration helper The operator uses slightly different certificates than the CockroachDB Helm chart, and mounts them in configmaps and secrets with different names. Use the migration helper utility with the `migrate-certs` option to re-map and generate TLS certificates: -```shell -$ bin/migration-helper migrate-certs --statefulset-name $STS_NAME --namespace $NAMESPACE -``` +{% include_cached copy-clipboard.html %} +~~~ shell +bin/migration-helper migrate-certs --statefulset-name $STS_NAME --namespace $NAMESPACE +~~~ Generate a manifest for each crdbnode and the crdbcluster based on the state of the StatefulSet. The new pods and their associated PVCs must have the same names as the original StatefulSet-managed pods and PVCs. The new operator-managed pods will then use the original PVCs, rather than replicate data into empty nodes. -```shell -$ mkdir -p manifests -$ bin/migration-helper build-manifest helm --statefulset-name $STS_NAME --namespace $NAMESPACE --cloud-provider $CLOUD_PROVIDER --cloud-region $REGION --output-dir ./manifests -``` +{% include_cached copy-clipboard.html %} +~~~ shell +mkdir -p manifests +bin/migration-helper build-manifest helm --statefulset-name $STS_NAME --namespace $NAMESPACE --cloud-provider $CLOUD_PROVIDER --cloud-region $REGION --output-dir ./manifests +~~~ ## Step 3. Replace statefulset pods with operator nodes @@ -75,44 +80,49 @@ To migrate seamlessly from the CockroachDB Helm chart to the operator, scale dow Create objects with `kubectl` that will eventually be owned by the crdbcluster: -```shell -$ kubectl create priorityclass crdb-critical --value 500000000 -``` +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl create priorityclass crdb-critical --value 500000000 +~~~ Install the `crdb-operator` with Helm: -```shell -$ helm upgrade --install crdb-operator ./cockroachdb-parent/charts/operator -``` +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade --install crdb-operator ./cockroachdb-parent/charts/operator +~~~ For each pod in the StatefulSet, perform the following steps: 1. Scale the StatefulSet down by one replica. For example, for a five-node cluster, scale the StatefulSet down to four replicas: - ```shell - $ kubectl scale statefulset/$STS_NAME --replicas=4 - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl scale statefulset/$STS_NAME --replicas=4 + ~~~ 2. Create the `crdbnode` resource that corresponds to the StatefulSet pod you just scaled down. Each manifest is labeled with the pattern `crdbnode-X.yaml`, where `X` corresponds to a StatefulSet pod named `{STS_NAME}-X`. Note the pod that was scaled down and specify its manifest in a command like the following: - ```shell - $ kubectl apply -f manifests/crdbnode-4.yaml - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply -f manifests/crdbnode-4.yaml + ~~~ -3. Wait for the new pod to become ready. If it doesn’t, check the operator logs for errors. +3. Wait for the new pod to become ready. If it doesn’t, [check the operator logs](monitor-cockroachdb-kubernetes-operator.html#monitor-the-operator) for errors. 4. Before moving on to the next replica migration, verify that there are no underreplicated ranges: 1. Set up port forwarding to access the CockroachDB node’s HTTP interface. Note that the DB Console runs on port 8080 by default: - ```shell - $ kubectl port-forward pod/cockroachdb-4 8080:8080 - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward pod/cockroachdb-4 8080:8080 + ~~~ 2. Check that there are zero underreplicated ranges. The following command outputs the number of under-replicated ranges on this CockroachDB node: - ```shell - $ curl --insecure -s https://localhost:8080/_status/vars | grep "ranges_underreplicated{" | awk '{print $2}' - ``` + ~~~ shell + curl --insecure -s https://localhost:8080/_status/vars | grep "ranges_underreplicated{" | awk '{print $2}' + ~~~ Repeat these steps until the StatefulSet has zero replicas. @@ -122,26 +132,30 @@ The Helm chart creates a public Service that exposes both SQL and gRPC connectio Apply the updated Service manifest: -```shell -$ kubectl apply -f manifests/public-service.yaml -``` +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl apply -f manifests/public-service.yaml +~~~ The existing StatefulSet creates a PodDisruptionBudget (PDB) that conflicts with the one managed by the operator. To avoid this conflict, delete the existing PDB: -```shell -$ kubectl delete poddisruptionbudget $STS_NAME-budget -``` +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl delete poddisruptionbudget $STS_NAME-budget +~~~ ## Step 5. Deploy the crdbcluster object Delete the StatefulSet that was scaled down to zero, as the Helm upgrade can only proceed if no StatefulSet is present: -```shell -$ kubectl delete statefulset $STS_NAME -``` +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl delete statefulset $STS_NAME +~~~ Apply the crdbcluster manifest using Helm: -```shell -$ helm upgrade $RELEASE_NAME ./cockroachdb-parent/charts/cockroachdb -f manifests/values.yaml -``` \ No newline at end of file +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade $RELEASE_NAME ./cockroachdb-parent/charts/cockroachdb -f manifests/values.yaml +~~~ \ No newline at end of file diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md index feeb5940532..c9bf01b051b 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md @@ -1,181 +1,209 @@ --- -title: Migrate from legacy Kubernetes Operator -summary: Migration guide detailing how to migrate away from a Helm deployment of CockroachDB to the Kubernetes operator. +title: Migrate from the Public Operator +summary: Migration guide detailing how to migrate away from a deployment using the public Kubernetes operator to the CockroachDB operator. toc: true toc_not_nested: true secure: true docs_area: deploy --- -This guide describes how to migrate an existing CockroachDB cluster managed via the public operator to the enterprise operator. +This guide describes how to migrate an existing CockroachDB cluster managed via the public operator to the CockroachDB operator. These instructions assume that you are migrating from a public operator cluster that is managed with kubectl via the following yaml files: -```shell -$ kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/crds.yaml -$ kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/operator.yaml -$ kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/examples/example.yaml -``` +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/crds.yaml +kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/operator.yaml +kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/examples/example.yaml +~~~ +{{site.data.alerts.callout_success}} If your existing cluster was created as a StatefulSet using Helm, refer to the [Helm migration guide](migrate-cockroachdb-kubernetes-helm.html). - -This migration process is designed to allow migration to occur without affecting cluster availability, and preserving existing disks so data doesn’t need to be replicated into empty volumes. Note that this process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. - -{{site.data.alerts.callout_info}} -This migration process is currently only recommended to run in a non-production environment. We are actively working on a rollback procedure but are looking for early feedback on this process. {{site.data.alerts.end}} +This migration process can be completed without affecting cluster availability, and preserves existing disks so that data doesn’t need to be replicated into empty volumes. This process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. + ## Step 1. Prepare the migration helper Build the migration helper and add the `./bin` directory to your PATH: -```shell -$ make bin/migration-helper -$ export PATH=$PATH:$(pwd)/bin -``` - -Export environment variables about the existing deployment: - -```shell -# Set CRDBCLUSTER to the crdbcluster custom resource name in the public operator. -$ export CRDBCLUSTER="cockroachdb" - -# Set NAMESPACE to the namespace where the statefulset is installed. -$ export NAMESPACE="default" - -# Set CLOUD_PROVIDER to the cloud vendor where k8s cluster is residing. -# All the major cloud providers are supported (gcp,aws,azure) -$ export CLOUD_PROVIDER=gcp - -# Set REGION to the cloud provider's identifier of this region. -# This region must match the "topology.kubernetes.io/region" label in -# the Kubernetes nodes for this cluster. -$ export REGION=us-central1 -``` +{% include_cached copy-clipboard.html %} +~~~ shell +make bin/migration-helper +export PATH=$PATH:$(pwd)/bin +~~~ + +Export environment variables for the existing deployment: + +- Set CRDBCLUSTER to the crdbcluster custom resource name in the public operator: + {% include_cached copy-clipboard.html %} + ~~~ shell + export CRDBCLUSTER="cockroachdb" + ~~~ + +- Set NAMESPACE to the namespace where the statefulset is installed: + {% include_cached copy-clipboard.html %} + ~~~ shell + export NAMESPACE="default" + ~~~ + +- Set CLOUD_PROVIDER to the cloud vendor where Kubernetes cluster is residing. All major cloud providers are supported (gcp, aws, azure): + {% include_cached copy-clipboard.html %} + ~~~ shell + export CLOUD_PROVIDER=gcp + ~~~ + +- Set REGION to the cloud provider's identifier of this region. This region must match the "topology.kubernetes.io/region" label in the Kubernetes nodes for this cluster: + {% include_cached copy-clipboard.html %} + ~~~ shell + export REGION=us-central1 + ~~~ Back up the crdbcluster resource in case there is a need to revert: -```shell -$ mkdir -p backup -$ kubectl get crdbcluster -o yaml $CRDBCLUSTER > backup/crdbcluster-$CRDBCLUSTER.yaml -``` +{% include_cached copy-clipboard.html %} +~~~ shell +mkdir -p backup +kubectl get crdbcluster -o yaml $CRDBCLUSTER > backup/crdbcluster-$CRDBCLUSTER.yaml +~~~ ## Step 2. Generate manifests with the migration helper -The enterprise operator uses slightly different certificates than the public operator, and mounts them in configmaps and secrets with different names. Use the migration helper utility with the `migrate-certs` option to re-map and generate TLS certificates: +The CockroachDB operator uses slightly different certificates than the public operator, and mounts them in configmaps and secrets with different names. Use the migration helper utility with the `migrate-certs` option to re-map and generate TLS certificates: -```shell -$ bin/migration-helper migrate-certs --statefulset-name $STS_NAME --namespace $NAMESPACE -``` +{% include_cached copy-clipboard.html %} +~~~ shell +bin/migration-helper migrate-certs --statefulset-name $STS_NAME --namespace $NAMESPACE +~~~ -Generate a manifest for each crdbnode and the crdbcluster based on the state of the StatefulSet. We do this because we want the new pods and their associated PVCs to have the same names as the original StatefulSet-managed pods and PVCs. This means that the new operator-managed pods will use the original PVCs rather than replicate data into empty nodes. +Generate a manifest for each crdbnode and the crdbcluster based on the state of the StatefulSet. The new pods and their associated PVCs must have the same names as the original StatefulSet-managed pods and PVCs. The new CockroachDB operator-managed pods will then use the original PVCs, rather than replicate data into empty nodes. -```shell -$ mkdir -p manifests -$ bin/migration-helper build-manifest helm --statefulset-name $STS_NAME --namespace $NAMESPACE --cloud-provider $CLOUD_PROVIDER --cloud-region $REGION --output-dir ./manifests -``` +{% include_cached copy-clipboard.html %} +~~~ shell +mkdir -p manifests +bin/migration-helper build-manifest helm --statefulset-name $STS_NAME --namespace $NAMESPACE --cloud-provider $CLOUD_PROVIDER --cloud-region $REGION --output-dir ./manifests +~~~ -## Step 3. Uninstall and replace the old operator +## Step 3. Uninstall and replace the public operator -The public operator and the enterprise operator use custom resource definitions with the same names, so you must remove the public operator before installing the cloud operator. Run the following commands to uninstall the public operator, without deleting its managed resources: +The public operator and the CockroachDB operator use custom resource definitions with the same names, so you must remove the public operator before installing the CockroachDB operator. Run the following commands to uninstall the public operator, without deleting its managed resources: -```shell -# Ensure that the operator can't accidentally delete managed k8s objects. -kubectl delete clusterrolebinding cockroach-operator-rolebinding +- Ensure that the operator can't accidentally delete managed Kubernetes objects: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete clusterrolebinding cockroach-operator-rolebinding + ~~~ -# Delete public operator custom resource. -kubectl delete crdbcluster $CRDBCLUSTER --cascade=orphan +- Delete the public operator custom resource: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete crdbcluster $CRDBCLUSTER --cascade=orphan + ~~~ -# Delete public operator resources and custom resource definition. -kubectl delete -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/crds.yaml -kubectl delete serviceaccount cockroach-operator-sa -n cockroach-operator-system -kubectl delete clusterrole cockroach-operator-role -kubectl delete clusterrolebinding cockroach-operator-rolebinding -kubectl delete service cockroach-operator-webhook-service -n cockroach-operator-system -kubectl delete deployment cockroach-operator-manager -n cockroach-operator-system -kubectl delete mutatingwebhookconfigurations cockroach-operator-mutating-webhook-configuration -kubectl delete validatingwebhookconfigurations cockroach-operator-validating-webhook-configuration -``` +- Delete public operator resources and custom resource definition: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/crds.yaml + kubectl delete serviceaccount cockroach-operator-sa -n cockroach-operator-system + kubectl delete clusterrole cockroach-operator-role + kubectl delete clusterrolebinding cockroach-operator-rolebinding + kubectl delete service cockroach-operator-webhook-service -n cockroach-operator-system + kubectl delete deployment cockroach-operator-manager -n cockroach-operator-system + kubectl delete mutatingwebhookconfigurations cockroach-operator-mutating-webhook-configuration + kubectl delete validatingwebhookconfigurations cockroach-operator-validating-webhook-configuration + ~~~ -Run `helm upgrade` to install the enterprise operator and wait for it to become ready: +Run `helm upgrade` to install the CockroachDB operator and wait for it to become ready: -```shell -$ helm upgrade --install crdb-operator ./cockroachdb-parent/charts/operator -$ kubectl rollout status deployment/cockroach-operator --timeout=60s -``` +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade --install crdb-operator ./cockroachdb-parent/charts/operator +kubectl rollout status deployment/cockroach-operator --timeout=60s +~~~ -## Step 4. Replace statefulset pods with operator nodes +## Step 4. Replace statefulset pods with operator-managed nodes -To migrate seamlessly from the public operator to the enterprise operator, we’ll scale down StatefulSet-managed pods and replace them with crdbnode objects, one by one. Then we’ll create the crdbcluster object that manages the crdbnodes. +To migrate seamlessly from the public operator to the CockroachDB operator, scale down StatefulSet-managed pods and replace them with crdbnode objects, one by one. Then we’ll create the crdbcluster object that manages the crdbnodes. -First, create objects in kubectl that will eventually be owned by the crdbcluster: +Create objects with `kubectl` that will eventually be owned by the crdbcluster: -```shell -$ kubectl create priorityclass crdb-critical --value 500000000 -$ kubectl apply -f manifests/rbac.yaml -``` +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl create priorityclass crdb-critical --value 500000000 +kubectl apply -f manifests/rbac.yaml +~~~ -Install the crdb-operator with Helm: +Install the `crdb-operator` with Helm: -```shell -$ helm upgrade --install crdb-operator ./cockroachdb-parent/charts/operator -``` +{% include_cached copy-clipboard.html %} +~~~ shell +helm upgrade --install crdb-operator ./cockroachdb-parent/charts/operator +~~~ For each pod in the StatefulSet, perform the following steps: 1. Scale the StatefulSet down by one replica. For example, for a five-node cluster, scale the StatefulSet down to four replicas: - ```shell - $ kubectl scale statefulset/$STS_NAME --replicas=4 - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl scale statefulset/$STS_NAME --replicas=4 + ~~~ -2. Create the crdbnode corresponding to the StatefulSet pod you just scaled down. The manifests are labeled as `crdbnode-X.yaml` where `X` is shared with each `<STS_NAME>-X` StatefulSet pod, so note whichever pod was scaled down and specify the corresponding manifest in the following command: +1. Create the `crdbnode` resource that corresponds to the StatefulSet pod you just scaled down. Each manifest is labeled with the pattern `crdbnode-X.yaml`, where `X` corresponds to a StatefulSet pod named `{STS_NAME}-X`. Note the pod that was scaled down and specify its manifest in a command like the following: - ```shell - $ kubectl apply -f manifests/crdbnode-4.yaml - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply -f manifests/crdbnode-4.yaml + ~~~ -3. Wait for the new pod to become ready. If it doesn’t, check the operator logs for errors. +1. Wait for the new pod to become ready. If it doesn’t, [check the operator logs](monitor-cockroachdb-kubernetes-operator.html#monitor-the-operator) for errors. -4. Before moving on to the next replica migration, verify that there are no underreplicated ranges: - 1. Set up port forwarding to access the CockroachDB node’s HTTP interface. Note that CockroachDB’s UI runs on port 8080 by default: +1. Before moving on to the next replica migration, verify that there are no underreplicated ranges: + 1. Set up port forwarding to access the CockroachDB node’s HTTP interface. Note that the DB Console runs on port 8080 by default: - ```shell - $ kubectl port-forward pod/cockroachdb-4 8080:8080 - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward pod/cockroachdb-4 8080:8080 + ~~~ - 2. Check that there are zero underreplicated ranges. The following command outputs the number of under-replicated ranges on this CockroachDB node: + 1. Check that there are zero underreplicated ranges. The following command outputs the number of under-replicated ranges on this CockroachDB node: - ```shell - $ curl --insecure -s https://localhost:8080/_status/vars | grep "ranges_underreplicated{" | awk '{print $2}' - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + curl --insecure -s https://localhost:8080/_status/vars | grep "ranges_underreplicated{" | awk '{print $2}' + ~~~ Repeat these steps until the StatefulSet has zero replicas. ## Step 5. Update the crdbcluster manifest -The public operator creates a pod disruption budget that conflicts with a pod disruption budget managed by the cloud operator. Before applying the crdbcluster manifest, delete the existing pod disruption budget: +The public operator creates a pod disruption budget that conflicts with a pod disruption budget managed by the CockroachDB operator. Before applying the crdbcluster manifest, delete the existing pod disruption budget: -```shell -$ kubectl delete poddisruptionbudget $CRDBCLUSTER -``` +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl delete poddisruptionbudget $CRDBCLUSTER +~~~ Annotate the existing Kubernetes objects so they can managed by the Helm chart: -```shell -$ kubectl annotate service $CRDBCLUSTER-public meta.helm.sh/release-name="$CRDBCLUSTER" -$ kubectl annotate service $CRDBCLUSTER-public meta.helm.sh/release-namespace="$NAMESPACE" -$ kubectl label service $CRDBCLUSTER-public app.kubernetes.io/managed-by=Helm --overwrite=true -``` +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl annotate service $CRDBCLUSTER-public meta.helm.sh/release-name="$CRDBCLUSTER" +kubectl annotate service $CRDBCLUSTER-public meta.helm.sh/release-namespace="$NAMESPACE" +kubectl label service $CRDBCLUSTER-public app.kubernetes.io/managed-by=Helm --overwrite=true +~~~ Apply the crdbcluster manifest: -```shell -$ helm install $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb -f manifests/values.yaml -``` +{% include_cached copy-clipboard.html %} +~~~ shell +helm install $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb -f manifests/values.yaml +~~~ Once the migration is successful, delete the StatefulSet that was created by the public operator: -```shell -$ kubectl delete poddisruptionbudget $STS_NAME-budget -``` +{% include_cached copy-clipboard.html %} +~~~ shell +kubectl delete poddisruptionbudget $STS_NAME-budget +~~~ diff --git a/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md b/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md index d8dd72f8d46..e299a12dea3 100644 --- a/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md @@ -1,6 +1,6 @@ --- -title: Cluster Monitoring with the Kubernetes Operator -summary: How to monitor a secure CockroachDB cluster deployed with the Kubernetes operator. +title: Cluster Monitoring with the CockroachDB Operator +summary: How to monitor a secure CockroachDB cluster deployed with the CockroachDB operator. toc: true toc_not_nested: true docs_area: deploy @@ -20,26 +20,29 @@ If you're on Hosted GKE, before starting, make sure the email address associated 1. From your local workstation, edit the cockroachdb service to add the prometheus: cockroachdb label: - ```shell - $ kubectl label svc cockroachdb prometheus=cockroachdb - + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl label svc cockroachdb prometheus=cockroachdb + ~~~ + ~~~ shell service/cockroachdb labeled - ``` + ~~~ This ensures that only the cockroachdb (not the cockroach-public service) is being monitored by a Prometheus job. -2. Determine the latest version of [CoreOS's Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator/releases/) and run the following to download and apply the latest `bundle.yaml` definition file: +1. Determine the latest version of [CoreOS's Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator/releases/) and run the following to download and apply the latest `bundle.yaml` definition file: {{site.data.alerts.callout_info}} Be sure to specify the latest [CoreOS Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator/releases/) version in the following command, in place of this example's use of version `v0.82.0`. {{site.data.alerts.end}} - ```shell - $ kubectl apply \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply \ -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.82.0/bundle.yaml \ --server-side - - + ~~~ + ~~~ shell customresourcedefinition.apiextensions.k8s.io/alertmanagers.monitoring.coreos.com serverside-applied customresourcedefinition.apiextensions.k8s.io/podmonitors.monitoring.coreos.com serverside-applied customresourcedefinition.apiextensions.k8s.io/probes.monitoring.coreos.com serverside-applied @@ -52,49 +55,55 @@ If you're on Hosted GKE, before starting, make sure the email address associated deployment.apps/prometheus-operator serverside-applied serviceaccount/prometheus-operator serverside-applied service/prometheus-operator serverside-applied - ``` + ~~~ -3. Confirm that the `prometheus-operator` has started: - - ```shell - $ kubectl get deploy prometheus-operator +1. Confirm that the `prometheus-operator` has started: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get deploy prometheus-operator + ~~~ + ~~~ shell NAME READY UP-TO-DATE AVAILABLE AGE prometheus-operator 1/1 1 1 27s - ``` - -4. Download our Prometheus manifest: + ~~~ - ```shell - $ curl -O https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/prometheus.yaml - ``` +1. Download our Prometheus manifest: -5. Apply the Prometheus manifest. This creates the various objects necessary to run a Prometheus instance: + {% include_cached copy-clipboard.html %} + ~~~ shell + curl -O https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/prometheus.yaml + ~~~ - ```shell - $ kubectl apply -f prometheus.yaml +1. Apply the Prometheus manifest. This creates the various objects necessary to run a Prometheus instance: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply -f prometheus.yaml + ~~~ + ~~~ shell serviceaccount/prometheus created clusterrole.rbac.authorization.k8s.io/prometheus created clusterrolebinding.rbac.authorization.k8s.io/prometheus created servicemonitor.monitoring.coreos.com/cockroachdb created prometheus.monitoring.coreos.com/cockroachdb created - ``` + ~~~ -6. Access the Prometheus UI locally and verify that CockroachDB is feeding data into Prometheus: +1. Access the Prometheus UI locally and verify that CockroachDB is feeding data into Prometheus: 1. Port-forward from your local machine to the pod running Prometheus: - ```shell - $ kubectl port-forward prometheus-cockroachdb-0 9090 - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward prometheus-cockroachdb-0 9090 + ~~~ - 2. Go to [http://localhost:9090](http://localhost:9090/) in your browser. + 1. Go to [http://localhost:9090](http://localhost:9090/) in your browser. - 3. To verify that each CockroachDB node is connected to Prometheus, go to **Status > Targets**. The screen should look like this: + 1. To verify that each CockroachDB node is connected to Prometheus, go to **Status > Targets**. The screen should look like this: Prometheus targets - 4. To verify that data is being collected, go to **Graph**, enter the `sys_uptime` variable in the field, click **Execute**, and then click the **Graph** tab. The screen should like this: + 1. To verify that data is being collected, go to **Graph**, enter the `sys_uptime` variable in the field, click **Execute**, and then click the **Graph** tab. The screen should like this: Prometheus graph @@ -110,112 +119,123 @@ Active monitoring helps you spot problems early, but it is also essential to sen 1. Download our [alertmanager-config.yaml](https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/alertmanager-config.yaml) configuration file: - ```shell - $ curl -O https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/alertmanager-config.yaml - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + curl -O https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/alertmanager-config.yaml + ~~~ -2. Edit the `alertmanager-config.yaml` file to [specify the desired receivers for notifications](https://prometheus.io/docs/alerting/configuration/#receiver). Initially, the file contains a placeholder web hook. +1. Edit the `alertmanager-config.yaml` file to [specify the desired receivers for notifications](https://prometheus.io/docs/alerting/configuration/#receiver). Initially, the file contains a placeholder web hook. -3. Add this configuration to the Kubernetes cluster as a secret, renaming it to `alertmanager.yaml` and labeling it to make it easier to find: +1. Add this configuration to the Kubernetes cluster as a secret, renaming it to `alertmanager.yaml` and labeling it to make it easier to find: - ```shell - $ kubectl create secret generic alertmanager-cockroachdb \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret generic alertmanager-cockroachdb \ --from-file=alertmanager.yaml=alertmanager-config.yaml - + ~~~ + ~~~ shell secret/alertmanager-cockroachdb created - ``` - ```shell - $ kubectl label secret alertmanager-cockroachdb app=cockroachdb - + ~~~ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl label secret alertmanager-cockroachdb app=cockroachdb + ~~~ + ~~~ shell secret/alertmanager-cockroachdb labeled - ``` + ~~~ {{site.data.alerts.callout_danger}} The name of the secret, `alertmanager-cockroachdb`, must match the name used in the `alertmanager.yaml` file. If they differ, the Alertmanager instance will start without configuration, and nothing will happen. {{site.data.alerts.end}} -4. Use our [alertmanager.yaml](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/prometheus/alertmanager.yaml) file to create the various objects necessary to run an Alertmanager instance, including a ClusterIP service so that Prometheus can forward alerts: +1. Use our [alertmanager.yaml](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/prometheus/alertmanager.yaml) file to create the various objects necessary to run an Alertmanager instance, including a ClusterIP service so that Prometheus can forward alerts: - ```shell - $ kubectl apply \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply \ -f https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/alertmanager.yaml - + ~~~ + ~~~ shell alertmanager.monitoring.coreos.com/cockroachdb created service/alertmanager-cockroachdb created - ``` + ~~~ -5. Verify that Alertmanager is running: +1. Verify that Alertmanager is running: 1. Port-forward from your local machine to the pod running Alertmanager: - ```shell - $ kubectl port-forward alertmanager-cockroachdb-0 9093 - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward alertmanager-cockroachdb-0 9093 + ~~~ - 2. Go to [http://localhost:9093](http://localhost:9093/) in your browser. The screen should look like this: + 1. Go to [http://localhost:9093](http://localhost:9093/) in your browser. The screen should look like this: Alertmanager -6. Ensure that the Alertmanagers are visible to Prometheus by opening [http://localhost:9090/status](http://localhost:9090/status). The screen should look like this: +1. Ensure that the Alertmanagers are visible to Prometheus by opening [http://localhost:9090/status](http://localhost:9090/status). The screen should look like this: Alertmanager -7. Add CockroachDB's starter [alerting rules](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/prometheus/alert-rules.yaml): +1. Add CockroachDB's starter [alerting rules](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/prometheus/alert-rules.yaml): - ```shell - $ kubectl apply \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply \ -f https://raw.githubusercontent.com/cockroachdb/cockroach/master/cloud/kubernetes/prometheus/alert-rules.yaml - + ~~~ + ~~~ shell prometheusrule.monitoring.coreos.com/prometheus-cockroachdb-rules created - ``` + ~~~ -8. Ensure that the rules are visible to Prometheus by opening [http://localhost:9090/rules](http://localhost:9090/rules). The screen should look like this: +1. Ensure that the rules are visible to Prometheus by opening [http://localhost:9090/rules](http://localhost:9090/rules). The screen should look like this: Alertmanager -9. Verify that the `TestAlertManager` example alert is firing by opening [http://localhost:9090/alerts](http://localhost:9090/alerts). The screen should look like this: +1. Verify that the `TestAlertManager` example alert is firing by opening [http://localhost:9090/alerts](http://localhost:9090/alerts). The screen should look like this: Alertmanager -10. To remove the example alert: +1. To remove the example alert: 1. Use the `kubectl edit` command to open the rules for editing: - ```shell - $ kubectl edit prometheusrules prometheus-cockroachdb-rules - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl edit prometheusrules prometheus-cockroachdb-rules + ~~~ - 2. Remove the `dummy.rules` block and save the file: + 1. Remove the `dummy.rules` block and save the file: - ```yaml + ~~~ yaml - name: rules/dummy.rules rules: - alert: TestAlertManager expr: vector(1) - ``` + ~~~ -## Monitor the Operator +## Monitor the operator -The CockroachDB Operator automatically exposes [Prometheus-style metrics](https://prometheus.io/docs/concepts/metric_types/) that you can monitor to observe its operations. +The CockroachDB operator automatically exposes [Prometheus-style metrics](https://prometheus.io/docs/concepts/metric_types/) that you can monitor to observe its operations. -Metrics can be collected from the Operator via HTTP requests (port 8080 by default) against the `/metrics` endpoint. The response will describe the current node metrics, for example: +Metrics can be collected from the operator via HTTP requests (port 8080 by default) against the `/metrics` endpoint. The response will describe the current node metrics, for example: -```json +~~~json ... -# HELP node_decommissioning Whether a CRDB node is decommissioning. +# HELP node_decommissioning Whether a CockroachDB node is decommissioning. # TYPE node_decommissioning gauge node_decommissioning{node="cockroachdb-nvq2l"} 0 node_decommissioning{node="cockroachdb-pmp45"} 0 node_decommissioning{node="cockroachdb-q6784"} 0 node_decommissioning{node="cockroachdb-r4wz8"} 0 ... -``` +~~~ ## Configure logging -You can use the Operator to configure the CockroachDB logging system. This allows you to output logs to [configurable log sinks](configure-logs.html#configure-log-sinks) such as file or network logging destinations. +You can use the operator to configure the CockroachDB logging system. This allows you to output logs to [configurable log sinks](configure-logs.html#configure-log-sinks) such as file or network logging destinations. The logging configuration is defined in a [ConfigMap](https://kubernetes.io/docs/concepts/configuration/configmap/) object, using a key named `logs.yaml`. For example: -```yaml +~~~ yaml apiVersion: v1 data: logs.yaml: | @@ -239,39 +259,39 @@ kind: ConfigMap metadata: name: logconfig namespace: cockroach-ns -``` +~~~ The above configuration overrides the [default logging configuration](configure-logs.html#default-logging-configuration) and reflects our recommended Kubernetes logging configuration: -* Save debug-level logs (the `DEV` [log channel](logging-overview.html#logging-channels)) to disk for troubleshooting. -* Send operational- and security-level logs to a [network collector](logging-use-cases.html#network-logging), in this case [Fluentd](configure-logs.html#fluentd-logging-format). +- Save debug-level logs (the `DEV` [log channel](logging-overview.html#logging-channels)) to disk for troubleshooting. +- Send operational- and security-level logs to a [network collector](logging-use-cases.html#network-logging), in this case [Fluentd](configure-logs.html#fluentd-logging-format). The ConfigMap `name` must match the `cockroachdb.crdbCluster.loggingConfigMapName` object in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): -```yaml +~~~ yaml cockroachdb: crdbCluster: loggingConfigMapName: logconfig -``` +~~~ -By default, the Operator also modifies the [default logging configuration](configure-logs.html#default-logging-configuration) with the following: +By default, the operator also modifies the [default logging configuration](configure-logs.html#default-logging-configuration) with the following: -```yaml +~~~ yaml sinks: stderr: channels: {INFO: "HEALTH, OPS", WARNING: "STORAGE, DEV"} redact: true -``` +~~~ This outputs logging events in the [OPS](logging.html#ops) channel to a `cockroach-stderr.log` file. ### Example: Configuring a troubleshooting log file on pods -In this example, CockroachDB has already been deployed on a Kubernetes cluster. We override the [default logging configuration](configure-logs.html#default-logging-configuration) to output [DEV](logging.html#dev) logs to a `cockroach-dev.log` file. +In this example, CockroachDB has already been deployed on a Kubernetes cluster. Override the [default logging configuration](configure-logs.html#default-logging-configuration) to output [DEV](logging.html#dev) logs to a `cockroach-dev.log` file. 1. Create a ConfigMap named `logconfig`. Note that `namespace` is set to the `cockroach-ns` namespace: - ```yaml + ~~~ yaml apiVersion: v1 data: logs.yaml: | @@ -284,7 +304,7 @@ In this example, CockroachDB has already been deployed on a Kubernetes cluster. metadata: name: logconfig namespace: cockroach-ns - ``` + ~~~ For simplicity, also name the YAML file `logconfig.yaml`. @@ -294,35 +314,40 @@ In this example, CockroachDB has already been deployed on a Kubernetes cluster. This configuration outputs `DEV` logs that have severity [WARNING](logging.html#logging-levels-severities) to a `cockroach-dev.log` file on each pod. -2. Apply the ConfigMap to the cluster: - - ```shell - $ kubectl apply -f logconfig.yaml +1. Apply the ConfigMap to the cluster: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply -f logconfig.yaml + ~~~ + ~~~ shell configmap/logconfig created - ``` + ~~~ -3. Add the `name` of the ConfigMap in `loggingConfigMapName` to the values file: +1. Add the `name` of the ConfigMap in `loggingConfigMapName` to the values file: - ```yaml + ~~~ yaml cockroachdb: crdbCluster: loggingConfigMapName: logconfig - ``` + ~~~ -4. Apply the new settings to the cluster: +1. Apply the new settings to the cluster: - ```shell - $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ The changes will be rolled out to each pod. -5. See the log files available on a pod: - - ```shell - $ kubectl exec cockroachdb-2 -- ls cockroach-data/logs +1. See the log files available on a pod: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl exec cockroachdb-2 -- ls cockroach-data/logs + ~~~ + ~~~ shell cockroach-dev.cockroachdb-2.unknownuser.2022-05-02T19_03_03Z.000001.log cockroach-dev.log cockroach-health.cockroachdb-2.unknownuser.2022-05-02T18_53_01Z.000001.log @@ -336,10 +361,11 @@ In this example, CockroachDB has already been deployed on a Kubernetes cluster. cockroach.cockroachdb-2.unknownuser.2022-05-02T18_52_48Z.000001.log cockroach.log ... - ``` + ~~~ -6. View a specific log file: +1. View a specific log file: - ```shell - $ kubectl exec cockroachdb-2 -- cat cockroach-data/logs/cockroach-dev.log - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl exec cockroachdb-2 -- cat cockroach-data/logs/cockroach-dev.log + ~~~ diff --git a/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md b/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md index 0e9019b8ef2..d99af8d32a1 100644 --- a/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md @@ -1,6 +1,6 @@ --- -title: Cluster Scaling with the Kubernetes Operator -summary: How to scale a secure CockroachDB cluster deployed with the Kubernetes Operator. +title: Cluster Scaling with the CockroachDB Operator +summary: How to scale a secure CockroachDB cluster deployed with the CockroachDB operator. toc: true toc_not_nested: true secure: true @@ -13,24 +13,25 @@ This page explains how to add and remove CockroachDB nodes on Kubernetes. Before scaling up CockroachDB, note the following [topology recommendations](recommended-production-settings.html#topology): -* Each CockroachDB node (running in its own pod) should run on a separate Kubernetes worker node. -* Each availability zone should have the same number of CockroachDB nodes. +- Each CockroachDB node (running in its own pod) should run on a separate Kubernetes worker node. +- Each availability zone should have the same number of CockroachDB nodes. -If your cluster has 3 CockroachDB nodes distributed across 3 availability zones (as in our [deployment example](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster)), we recommend scaling up by a multiple of 3 to retain an even distribution of nodes. You should therefore scale up to a minimum of 6 CockroachDB nodes, with 2 nodes in each zone. +If your cluster has 3 CockroachDB nodes distributed across 3 availability zones (as in our [deployment example](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster)), Cockroach Labs recommends scaling up by a multiple of 3 to retain an even distribution of nodes. You should therefore scale up to a minimum of 6 CockroachDB nodes, with 2 nodes in each zone. 1. Run `kubectl get nodes` to list the worker nodes in your Kubernetes cluster. There should be at least as many worker nodes as pods you plan to add. This ensures that no more than one pod will be placed on each worker node. -2. If you need to add worker nodes, resize your cluster by specifying the desired number of worker nodes in each zone. Using Google Kubernetes Engine as an example: +1. If you need to add worker nodes, resize your cluster by specifying the desired number of worker nodes in each zone. Using Google Kubernetes Engine as an example: - ```shell - $ gcloud container clusters resize {cluster-name} --region {region-name} --num-nodes 2 - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + gcloud container clusters resize {cluster-name} --region {region-name} --num-nodes 2 + ~~~ This example distributes 2 worker nodes across the default 3 zones, raising the total to 6 worker nodes. -3. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster), with the target size of the CockroachDB cluster in the specified region. This value refers to the number of CockroachDB nodes, each running in one pod: +1. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster), with the target size of the CockroachDB cluster in the specified region. This value refers to the number of CockroachDB nodes, each running in one pod: - ```yaml + ~~~ yaml cockroachdb: crdbCluster: regions: @@ -38,19 +39,22 @@ If your cluster has 3 CockroachDB nodes distributed across 3 availability zones cloudProvider: gcp domain: cluster.domain.us-central nodes: 6 - ``` + ~~~ -4. Apply the new settings to the cluster: +1. Apply the new settings to the cluster: - ```shell - $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ -5. Verify that the new pods were successfully started: - - ```shell - $ kubectl get pods +1. Verify that the new pods were successfully started: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods + ~~~ + ~~~ shell NAME READY STATUS RESTARTS AGE cockroach-operator-655fbf7847-zn9v8 1/1 Running 0 30m cockroachdb-0 1/1 Running 0 24m @@ -59,13 +63,13 @@ If your cluster has 3 CockroachDB nodes distributed across 3 availability zones cockroachdb-3 1/1 Running 0 30s cockroachdb-4 1/1 Running 0 30s cockroachdb-5 1/1 Running 0 30s - ``` + ~~~ Each pod should be running in one of the 6 worker nodes. ## Remove nodes -If your nodes are distributed across 3 availability zones (as in our [deployment example](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster)), we recommend scaling down by a multiple of 3 to retain an even distribution. If your cluster has 6 CockroachDB nodes, you should therefore scale down to 3, with 1 node in each zone. +If your nodes are distributed across 3 availability zones (as in our [deployment example](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster)), Cockroach Labs recommends scaling down by a multiple of 3 to retain an even distribution. If your cluster has 6 CockroachDB nodes, you should therefore scale down to 3, with 1 node in each zone. {{site.data.alerts.callout_danger}} Do not scale down to fewer than 3 nodes. This is considered an anti-pattern on CockroachDB and will cause errors. Before scaling down CockroachDB, note that each availability zone should have the same number of CockroachDB nodes. @@ -73,7 +77,7 @@ Do not scale down to fewer than 3 nodes. This is considered an anti-pattern on C 1. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster), with the target size of the CockroachDB cluster. For instance, to scale a cluster in Google Cloud down to 3 nodes: - ```yaml + ~~~ yaml cockroachdb: crdbCluster: regions: @@ -81,16 +85,18 @@ Do not scale down to fewer than 3 nodes. This is considered an anti-pattern on C cloudProvider: gcp domain: cluster.domain.us-central nodes: 3 - ``` + ~~~ -2. Apply the new settings to the cluster: +1. Apply the new settings to the cluster: - ```shell - $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ -3. Verify that the pods were successfully removed: +1. Verify that the pods were successfully removed: - ```shell - $ kubectl get pods - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods + ~~~ diff --git a/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md b/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md index ecbed386940..64c166a5587 100644 --- a/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md @@ -1,6 +1,6 @@ --- -title: Pod Scheduling with the Kubernetes Operator -summary: Schedule CockroachDB pods on Kubernetes using the Operator. +title: Pod Scheduling with the CockroachDB Operator +summary: Schedule CockroachDB pods on Kubernetes using the CockroachDB operator. toc: true toc_not_nested: true secure: true @@ -17,13 +17,13 @@ Specify the labels in `cockroachdb.crdbCluster.nodeSelector` in the values file The following configuration causes CockroachDB pods to be scheduled onto worker nodes that have *both* the labels `worker-pool-name=crdb-workers` and `kubernetes.io/arch=amd64`: -```yaml +~~~ yaml cockroachdb: crdbCluster: nodeSelector: worker-pool-name: crdb-workers kubernetes.io/arch: amd64 -``` +~~~ For an example of labeling nodes, see [Scheduling CockroachDB onto labeled nodes](#example-scheduling-cockroachdb-onto-labeled-nodes). @@ -33,10 +33,10 @@ A pod with a *node affinity* seeks out worker nodes that have matching [labels]( Affinities and anti-affinities can be used together with `operator` fields to: -* Require CockroachDB pods to be scheduled onto a labeled worker node. -* Require CockroachDB pods to be co-located with labeled pods (e.g., on a node or region). -* Prevent CockroachDB pods from being scheduled onto a labeled worker node. -* Prevent CockroachDB pods from being co-located with labeled pods (e.g., on a node or region). +- Require CockroachDB pods to be scheduled onto a labeled worker node. +- Require CockroachDB pods to be co-located with labeled pods (e.g., on a node or region). +- Prevent CockroachDB pods from being scheduled onto a labeled worker node. +- Prevent CockroachDB pods from being co-located with labeled pods (e.g., on a node or region). For an example, see [Scheduling CockroachDB onto labeled nodes](#example-scheduling-cockroachdb-onto-labeled-nodes). @@ -46,7 +46,7 @@ Specify node affinities in `cockroachdb.crdbCluster.affinity.nodeAffinity` in th The following configuration requires that CockroachDB pods are scheduled onto worker nodes running a Linux operating system, with a preference against worker nodes in the `us-east4-b` availability zone. -```yaml +~~~ yaml cockroachdb: crdbCluster: affinity: @@ -66,13 +66,13 @@ cockroachdb: operator: NotIn values: - us-east4-b -``` +~~~ The `requiredDuringSchedulingIgnoredDuringExecution` node affinity rule, using the `In` operator, requires CockroachDB pods to be scheduled onto nodes with the matching label `kubernetes.io/os=linux`. It will not evict pods that are already running on nodes that do not match the affinity requirements. The `preferredDuringSchedulingIgnoredDuringExecution` node affinity rule, using the `NotIn` operator and specified `weight`, discourages (but does not disallow) CockroachDB pods from being scheduled onto nodes with the label `topology.kubernetes.io/zone=us-east4-b`. This achieves a similar effect as a `PreferNoSchedule` [taint](#taints-and-tolerations). -For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the Operator. +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the operator. ### Add a pod affinity or anti-affinity @@ -80,7 +80,7 @@ Specify pod affinities and anti-affinities in `cockroachdb.crdbCluster.affinity. The following configuration attempts to schedule CockroachDB pods in the same zones as the pods that run our example [load generator](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/example-app.yaml) app. It disallows CockroachDB pods from being co-located on the same worker node. -```yaml +~~~ yaml cockroachdb: crdbCluster: affinity: @@ -104,24 +104,25 @@ cockroachdb: values: - cockroachdb topologyKey: kubernetes.io/hostname -``` +~~~ The `preferredDuringSchedulingIgnoredDuringExecution` pod affinity rule, using the `In` operator and specified `weight`, encourages (but does not require) CockroachDB pods to be co-located with pods labeled `app=loadgen` already running in the same zone, as specified with `topologyKey`. The `requiredDuringSchedulingIgnoredDuringExecution` pod anti-affinity rule, using the `In` operator, requires CockroachDB pods not to be co-located on a worker node, as specified with `topologyKey`. -For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the Operator. +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the operator. ### Example: Scheduling CockroachDB onto labeled nodes -In this example, CockroachDB has not yet been deployed to a running Kubernetes cluster. We use a combination of node affinity and pod anti-affinity rules to schedule 3 CockroachDB pods onto three labeled worker nodes. +In this example, CockroachDB has not yet been deployed to a running Kubernetes cluster. Use a combination of node affinity and pod anti-affinity rules to schedule 3 CockroachDB pods onto three labeled worker nodes. 1. List the worker nodes on the running Kubernetes cluster: - ```shell - $ kubectl get nodes - - + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get nodes + ~~~ + ~~~ shell NAME STATUS ROLES AGE VERSION gke-cockroachdb-default-pool-263138a5-kp3v Ready 3m56s v1.20.10-gke.301 gke-cockroachdb-default-pool-263138a5-nn62 Ready 3m56s v1.20.10-gke.301 @@ -129,17 +130,19 @@ In this example, CockroachDB has not yet been deployed to a running Kubernetes c gke-cockroachdb-default-pool-41796213-bw3z Ready 3m54s v1.20.10-gke.301 gke-cockroachdb-default-pool-ccd74623-dghs Ready 3m54s v1.20.10-gke.301 gke-cockroachdb-default-pool-ccd74623-p5mf Ready 3m55s v1.20.10-gke.301 - ``` + ~~~ -2. Add a `node=crdb` label to three of the running worker nodes. - - ```shell - $ kubectl label nodes gke-cockroachdb-default-pool-263138a5-kp3v gke-cockroachdb-default-pool-41796213-75c9 gke-cockroachdb-default-pool-ccd74623-dghs node=crdb +1. Add a `node=crdb` label to three of the running worker nodes. + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl label nodes gke-cockroachdb-default-pool-263138a5-kp3v gke-cockroachdb-default-pool-41796213-75c9 gke-cockroachdb-default-pool-ccd74623-dghs node=crdb + ~~~ + ~~~ shell node/gke-cockroachdb-default-pool-5726e554-77r7 labeled node/gke-cockroachdb-default-pool-ee4d4d67-0922 labeled node/gke-cockroachdb-default-pool-ee4d4d67-w18b labeled - ``` + ~~~ In this example, 6 GKE nodes are deployed in 3 [node pools](https://cloud.google.com/kubernetes-engine/docs/concepts/node-pools), and each node pool resides in a separate availability zone. To maintain an even distribution of CockroachDB pods as specified in our [topology recommendations](recommended-production-settings.html#topology), each of the 3 labeled worker nodes must belong to a different node pool. @@ -147,9 +150,9 @@ In this example, CockroachDB has not yet been deployed to a running Kubernetes c This also ensures that the CockroachDB pods, which will be bound to persistent volumes in the same three availability zones, can be scheduled onto worker nodes in their respective zones. {{site.data.alerts.end}} -3. Add the following rules to the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): +1. Add the following rules to the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): - ```yaml + ~~~ yaml cockroachdb: crdbCluster: affinity: @@ -170,27 +173,30 @@ In this example, CockroachDB has not yet been deployed to a running Kubernetes c values: - cockroachdb topologyKey: kubernetes.io/hostname - ``` + ~~~ The `nodeAffinity` rule requires CockroachDB pods to be scheduled onto worker nodes with the label `node=crdb`. The `podAntiAffinity` rule requires CockroachDB pods not to be co-located on a worker node, as specified with `topologyKey`. -4. Apply the settings to the cluster: - - ```shell - $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE - ``` +1. Apply the settings to the cluster: -5. The CockroachDB pods will be deployed to the 3 labeled nodes. To observe this, run: + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ - ```shell - $ kubectl get pods -o wide +1. The CockroachDB pods will be deployed to the 3 labeled nodes. To observe this, run: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods -o wide + ~~~ + ~~~ shell NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES cockroach-operator-bfdbfc9c7-tbpsw 1/1 Running 0 171m 10.32.2.4 gke-cockroachdb-default-pool-263138a5-kp3v cockroachdb-0 1/1 Running 0 100s 10.32.4.10 gke-cockroachdb-default-pool-ccd74623-dghs cockroachdb-1 1/1 Running 0 100s 10.32.2.6 gke-cockroachdb-default-pool-263138a5-kp3v cockroachdb-2 1/1 Running 0 100s 10.32.0.5 gke-cockroachdb-default-pool-41796213-75c9 - ``` + ~~~ ## Taints and tolerations @@ -198,8 +204,8 @@ When a *taint* is added to a Kubernetes worker node, pods are prevented from bei Taints and tolerations are useful if you want to: -* Prevent CockroachDB pods from being scheduled onto a labeled worker node. -* Evict CockroachDB pods from a labeled worker node on which they are currently running. +- Prevent CockroachDB pods from being scheduled onto a labeled worker node. +- Evict CockroachDB pods from a labeled worker node on which they are currently running. For an example, see [Evicting CockroachDB from a running worker node](#example-evicting-cockroachdb-from-a-running-worker-node). @@ -209,7 +215,7 @@ Specify pod tolerations in the `cockroachdb.crdbCluster.tolerations` object in t The following toleration matches a taint with the specified key, value, and `NoSchedule` effect, using the `Equal` operator. A toleration that uses the `Equal` operator must include a `value` field: -```yaml +~~~ yaml cockroachdb: crdbCluster: tolerations: @@ -217,7 +223,7 @@ cockroachdb: operator: "Equal" value: "example" effect: "NoSchedule" -``` +~~~ A `NoSchedule` taint on a node prevents pods from being scheduled onto the node. The matching toleration allows a pod to be scheduled onto the node. A `NoSchedule` toleration is therefore best included before [deploying the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). @@ -227,7 +233,7 @@ A `PreferNoSchedule` taint discourages, but does not disallow, pods from being s The following toleration matches every taint with the specified key and `NoExecute` effect, using the `Exists` operator. A toleration that uses the `Exists` operator must exclude a `value` field: -```yaml +~~~ yaml cockroachdb: crdbCluster: tolerations: @@ -235,21 +241,23 @@ cockroachdb: operator: "Exists" effect: "NoExecute" tolerationSeconds: 3600 -``` +~~~ A `NoExecute` taint on a node prevents pods from being scheduled onto the node, and evicts pods from the node if they are already running on the node. The matching toleration allows a pod to be scheduled onto the node, and to continue running on the node if `tolerationSeconds` is not specified. If `tolerationSeconds` is specified, the pod is evicted after this number of seconds. -For more information on using taints and tolerations, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the Operator. +For more information on using taints and tolerations, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the operator. ### Example: Evicting CockroachDB from a running worker node -In this example, CockroachDB has already been deployed on a Kubernetes cluster. We use the `NoExecute` effect to evict one of the CockroachDB pods from its worker node. +In this example, CockroachDB has already been deployed on a Kubernetes cluster. Use the `NoExecute` effect to evict one of the CockroachDB pods from its worker node. 1. List the worker nodes on the running Kubernetes cluster: - ```shell - $ kubectl get nodes - + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get nodes + ~~~ + ~~~ shell NAME STATUS ROLES AGE VERSION gke-cockroachdb-default-pool-4e5ce539-68p5 Ready 56m v1.20.9-gke.1001 gke-cockroachdb-default-pool-4e5ce539-j1h1 Ready 56m v1.20.9-gke.1001 @@ -257,46 +265,51 @@ In this example, CockroachDB has already been deployed on a Kubernetes cluster. gke-cockroachdb-default-pool-95fde00d-hw04 Ready 56m v1.20.9-gke.1001 gke-cockroachdb-default-pool-eb2b2889-q15v Ready 56m v1.20.9-gke.1001 gke-cockroachdb-default-pool-eb2b2889-q704 Ready 56m v1.20.9-gke.1001 - ``` - -2. Add a taint to a running worker node: + ~~~ - ```shell - $ kubectl taint nodes gke-cockroachdb-default-pool-4e5ce539-j1h1 test=example:NoExecute +1. Add a taint to a running worker node: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl taint nodes gke-cockroachdb-default-pool-4e5ce539-j1h1 test=example:NoExecute + ~~~ + ~~~ shell node/gke-cockroachdb-default-pool-4e5ce539-j1h1 tainted - ``` + ~~~ -3. Add a matching tolerations object in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). +1. Add a matching tolerations object in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). - ```yaml + ~~~ yaml cockroachdb: crdbCluster: tolerations: - key: "test" operator: "Exists" effect: "NoExecute" - ``` + ~~~ Because no tolerationSeconds is specified, CockroachDB will be evicted immediately from the tainted worker node. -4. Apply the new settings to the cluster: - - ```shell - $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE - ``` +1. Apply the new settings to the cluster: -5. The CockroachDB pod running on the tainted node (in this case, cockroachdb-2) will be evicted and started on a different worker node. To observe this: + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ - ```shell - $ kubectl get pods -o wide +1. The CockroachDB pod running on the tainted node (in this case, cockroachdb-2) will be evicted and started on a different worker node. To observe this: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods -o wide + ~~~ + ~~~ shell NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES cockroach-operator-c9fc6cb5c-bl6rs 1/1 Running 0 44m 10.32.2.4 gke-cockroachdb-default-pool-4e5ce539-68p5 cockroachdb-0 1/1 Running 0 9m21s 10.32.4.10 gke-cockroachdb-default-pool-95fde00d-173d cockroachdb-1 1/1 Running 0 9m21s 10.32.2.6 gke-cockroachdb-default-pool-eb2b2889-q15v cockroachdb-2 0/1 Running 0 6s 10.32.0.5 gke-cockroachdb-default-pool-4e5ce539-68p5 - ``` + ~~~ `cockroachdb-2` is now scheduled onto the `gke-cockroachdb-default-pool-4e5ce539-68p5` node. @@ -310,7 +323,7 @@ Specify pod topology spread constraints in the `cockroachdb.crdbCluster.topology The following topology spread constraint ensures that CockroachDB pods deployed with the label `environment=production` will not be unevenly distributed across zones by more than `1` pod: -```yaml +~~~ yaml cockroachdb: crdbCluster: topologySpreadConstraints: @@ -320,11 +333,11 @@ cockroachdb: labelSelector: matchLabels: environment: production -``` +~~~ The `DoNotSchedule` condition prevents labeled pods from being scheduled onto Kubernetes worker nodes when doing so would fail to meet the spread and topology constraints specified with `maxSkew` and `topologyKey`, respectively. -For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the Operator. +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). The [custom resource definition](https://github.com/cockroachdb/helm-charts/blob/master/cockroachdb-parent/charts/cockroachdb/values.yaml) details the fields supported by the operator. ## Resource labels and annotations @@ -332,14 +345,14 @@ To assist in working with your cluster, you can add labels and annotations to yo Specify labels in `cockroachdb.crdbCluster.podLabels` and annotations in `cockroachdb.crdbCluster.podAnnotations` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): -```yaml +~~~ yaml cockroachdb: crdbCluster: podLabels: app.kubernetes.io/version: v25.1.4 podAnnotations operator: https://raw.githubusercontent.com/cockroachdb/helm-charts/refs/heads/master/cockroachdb-parent/charts/cockroachdb/values.yaml -``` +~~~ To verify that the labels and annotations were applied to a pod, for example, run `kubectl describe pod {pod-name}`. diff --git a/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md index 646c8d632b7..0664dbdcf10 100644 --- a/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md @@ -1,6 +1,6 @@ --- -title: Certificate Management with the Kubernetes Operator -summary: How to authenticate a secure CockroachDB cluster deployed with the Kubernetes operator. +title: Certificate Management with the CockroachDB Operator +summary: How to authenticate a secure CockroachDB cluster deployed with the CockroachDB operator. toc: true toc_not_nested: true secure: true @@ -13,10 +13,10 @@ This page describes steps for additional procedures related to certificate manag You may need to rotate the node, client, or CA certificates in the following scenarios: -* The node, client, or CA certificates are expiring soon. -* Your organization's compliance policy requires periodic certificate rotation. -* The key (for a node, client, or CA) is compromised. -* You need to modify the contents of a certificate, for example, to add another DNS name or the IP address of a load balancer through which a node can be reached. In this case, you would need to rotate only the node certificates. +- The node, client, or CA certificates are expiring soon. +- Your organization's compliance policy requires periodic certificate rotation. +- The key (for a node, client, or CA) is compromised. +- You need to modify the contents of a certificate, for example, to add another DNS name or the IP address of a load balancer through which a node can be reached. In this case, you would need to rotate only the node certificates. ### Example: Rotate certificates signed with `cockroach cert` @@ -24,28 +24,32 @@ If you previously [authenticated with cockroach cert](deploy-cockroachdb-with-ku 1. Create a new client certificate and key pair for the root user, overwriting the previous certificate and key: - ```shell - $ cockroach cert create-client root \ + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-client root \ --certs-dir=certs \ --ca-key=my-safe-directory/ca.key \ --overwrite - ``` + ~~~ -2. Upload the new client certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the Operator: +1. Upload the new client certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the CockroachDB operator: - ```shell - $ kubectl create secret generic cockroachdb.client.root.2 \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret generic cockroachdb.client.root.2 \ --from-file=tls.key=certs/client.root.key \ --from-file=tls.crt=certs/client.root.crt \ --from-file=ca.crt=certs/ca.crt - + ~~~ + ~~~ shell secret/cockroachdb.client.root.2 created - ``` + ~~~ -3. Create a new certificate and key pair for your CockroachDB nodes, overwriting the previous certificate and key. Specify the namespace you used when [deploying the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). This example uses the `cockroach-ns` namespace: +1. Create a new certificate and key pair for your CockroachDB nodes, overwriting the previous certificate and key. Specify the namespace you used when [deploying the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). This example uses the `cockroach-ns` namespace: - ```shell - $ cockroach cert create-node localhost \ + {% include_cached copy-clipboard.html %} + ~~~ shell + cockroach cert create-node localhost \ 127.0.0.1 \ cockroachdb-public \ cockroachdb-public.cockroach-ns \ @@ -56,22 +60,24 @@ If you previously [authenticated with cockroach cert](deploy-cockroachdb-with-ku --certs-dir=certs \ --ca-key=my-safe-directory/ca.key \ --overwrite - ``` + ~~~ -4. Upload the new node certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the Operator: +1. Upload the new node certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the CockroachDB operator: - ```shell - $ kubectl create secret generic cockroachdb.node.2 \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret generic cockroachdb.node.2 \ --from-file=tls.key=certs/node.key \ --from-file=tls.crt=certs/node.crt \ --from-file=ca.crt=certs/ca.crt - + ~~~ + ~~~ shell secret/cockroachdb.node.2 created - ``` + ~~~ -5. Add `cockroachdb.tls.externalCertificates.certificates.nodeClientSecretName` and `cockroachdb.tls.externalCertificates.certificates.nodeSecretName` to the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): +1. Add `cockroachdb.tls.externalCertificates.certificates.nodeClientSecretName` and `cockroachdb.tls.externalCertificates.certificates.nodeSecretName` to the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): - ```yaml + ~~~ yaml cockroachdb: tls: externalCertificates: @@ -79,64 +85,73 @@ If you previously [authenticated with cockroach cert](deploy-cockroachdb-with-ku certificates: nodeClientSecretName: "cockroachdb.client.root.2" nodeSecretName: "cockroachdb.node.2" - ``` + ~~~ -6. Check that the secrets were created on the cluster: - - ```shell - $ kubectl get secrets +1. Check that the secrets were created on the cluster: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get secrets + ~~~ + ~~~ shell NAME TYPE DATA AGE cockroachdb.client.root.2 Opaque 3 4s cockroachdb.node.2 Opaque 3 1s default-token-6js7b kubernetes.io/service-account-token 3 9h - ``` + ~~~ {{site.data.alerts.callout_info}} - Remember that `nodeSecretName` and `nodeClientSecretName` in the Operator must specify these secret names. For details, see the [deployment guide](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). + Remember that `nodeSecretName` and `nodeClientSecretName` in the operator configuration must specify these secret names. For details, see the [deployment guide](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). {{site.data.alerts.end}} -7. Apply the new settings to the cluster: +1. Apply the new settings to the cluster: - ```shell - $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ The pods will terminate and restart one at a time, using the new certificates. You can observe this process: - ```shell - $ kubectl get pods - + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods + ~~~ + ~~~ shell NAME READY STATUS RESTARTS AGE cockroach-operator-655fbf7847-lvz6x 1/1 Running 0 4h29m cockroachdb-0 1/1 Running 0 4h16m cockroachdb-1 1/1 Terminating 0 4h16m cockroachdb-2 1/1 Running 0 43s - ``` + ~~~ -8. Delete the existing client secret that is no longer in use: - - ```shell - $ kubectl delete secret cockroachdb.client.root +1. Delete the existing client secret that is no longer in use: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete secret cockroachdb.client.root + ~~~ + ~~~ shell secret "cockroachdb.client.root" deleted - ``` - -9. Delete the existing node secret that is no longer in use: + ~~~ - ```shell - $ kubectl delete secret cockroachdb.node +1. Delete the existing node secret that is no longer in use: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete secret cockroachdb.node + ~~~ + ~~~ shell secret "cockroachdb.node" deleted - ``` + ~~~ ## Secure the webhooks -The Operator ships with both [mutating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#mutatingadmissionwebhook) and [validating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#validatingadmissionwebhook) webhooks. Communication between the Kubernetes API server and the webhook service must be secured with TLS. +The operator ships with both [mutating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#mutatingadmissionwebhook) and [validating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#validatingadmissionwebhook) webhooks. Communication between the Kubernetes API server and the webhook service must be secured with TLS. -By default, the Operator searches for the TLS secret `cockroach-operator-certs`, which contains a CA certificate. If the secret is not found, the Operator auto-generates `cockroach-operator-certs` with a CA certificate for future runs. +By default, the CockroachDB operator searches for the TLS secret `cockroach-operator-certs`, which contains a CA certificate. If the secret is not found, the operator auto-generates `cockroach-operator-certs` with a CA certificate for future runs. -The Operator then generates a one-time server certificate for the webhook server that is signed with `cockroach-operator-certs`. Finally, the CA bundle for both mutating and validating webhook configurations is patched with the CA certificate. +The operator then generates a one-time server certificate for the webhook server that is signed with `cockroach-operator-certs`. Finally, the CA bundle for both mutating and validating webhook configurations is patched with the CA certificate. You can also use your own certificate authority rather than `cockroach-operator-certs`. Both the certificate and key files you generate must be PEM-encoded. See the following [example](#example-using-openssl-to-secure-the-webhooks). @@ -146,34 +161,41 @@ These steps demonstrate how to use the [openssl genrsa](https://www.openssl.org/ 1. Generate a 4096-bit RSA private key: - ```shell - $ openssl genrsa -out tls.key 4096 - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + openssl genrsa -out tls.key 4096 + ~~~ -2. Generate an X.509 certificate, valid for 10 years. You will be prompted for the certificate field values. +1. Generate an X.509 certificate, valid for 10 years. You will be prompted for the certificate field values. - ```shell - $ openssl req -x509 -new -nodes -key tls.key -sha256 -days 3650 -out tls.crt - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + openssl req -x509 -new -nodes -key tls.key -sha256 -days 3650 -out tls.crt + ~~~ -3. Create the secret, making sure that you are in the correct namespace: - - ```shell - $ kubectl create secret tls cockroach-operator-certs --cert=tls.crt --key=tls.key +1. Create the secret, making sure that you are in the correct namespace: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl create secret tls cockroach-operator-certs --cert=tls.crt --key=tls.key + ~~~ + ~~~ shell secret/cockroach-operator-certs created - ``` - -4. Remove the certificate and key from your local environment: + ~~~ - ```shell - $ rm tls.crt tls.key - ``` +1. Remove the certificate and key from your local environment: -5. Roll the Operator deployment to ensure a new server certificate is generated: + {% include_cached copy-clipboard.html %} + ~~~ shell + rm tls.crt tls.key + ~~~ - ```shell - $ kubectl rollout restart deploy/cockroach-operator-manager +1. Roll the operator deployment to ensure a new server certificate is generated: + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl rollout restart deploy/cockroach-operator-manager + ~~~ + ~~~ shell deployment.apps/cockroach-operator-manager restarted - ``` + ~~~ diff --git a/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md b/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md index 24f2f7b6dc6..69d893e24e9 100644 --- a/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md @@ -1,13 +1,13 @@ --- -title: Upgrade a cluster in Kubernetes with the Operator -summary: How to upgrade a secure CockroachDB cluster deployed with the Kubernetes operator. +title: Upgrade a cluster in Kubernetes with the CockroachDB Operator +summary: How to upgrade a secure CockroachDB cluster deployed with the CockroachDB operator. toc: true toc_not_nested: true secure: true docs_area: deploy --- -This page describes how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster](deploy-cockroachdb-with-kubernetes-operator.html). +This page describes how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster](deploy-cockroachdb-with-kubernetes-operator.html) with the CockroachDB operator. ## Overview @@ -30,29 +30,31 @@ To upgrade from one patch release to another within the same major version, perf 1. Change the container image in the custom resource: - ```yaml + ~~~ yaml cockroachdb: crdbCluster: image: name: cockroachdb/cockroach:v25.2.2 - ``` + ~~~ -2. Apply the new settings to the cluster: +1. Apply the new settings to the cluster: - ```shell - $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ - The Operator will perform the staged update. + The operator will perform the staged update. -3. To check the status of the rolling upgrade, run `kubectl get pods`. +1. To check the status of the rolling upgrade, run `kubectl get pods`. -4. Verify that all pods have been upgraded: +1. Verify that all pods have been upgraded: - ```shell - $ kubectl get pods \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods \ -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.containers[0].image}{"\n"}' - ``` + ~~~ You can also check the CockroachDB version of each node in the [DB Console](ui-cluster-overview-page.html#node-details). @@ -66,33 +68,35 @@ To perform a major upgrade: 1. Change the container image in the values file: - ```yaml + ~~~ yaml cockroachdb: crdbCluster: image: name: cockroachdb/cockroach:v25.1.4 - ``` + ~~~ -2. Apply the new settings to the cluster: +1. Apply the new settings to the cluster: - ```shell - $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ - The Operator will perform the staged update. + The operator will perform the staged update. -3. To check the status of the rolling upgrade, run `kubectl get pods`. +1. To check the status of the rolling upgrade, run `kubectl get pods`. -4. Verify that all pods have been upgraded: +1. Verify that all pods have been upgraded: - ```shell - $ kubectl get pods \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods \ -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.containers[0].image}{"\n"}' - ``` + ~~~ -5. If auto-finalization is enabled (the default), finalization begins as soon as the last node rejoins the cluster. When finalization finishes, the upgrade is complete. +1. If auto-finalization is enabled (the default), finalization begins as soon as the last node rejoins the cluster. When finalization finishes, the upgrade is complete. -6. If auto-finalization is disabled, follow your organization's testing procedures to decide whether to [finalize the upgrade](#finalize-a-major-version-upgrade-manually) or [roll back](#roll-back-a-major-version-upgrade) the upgrade. After finalization begins, you can no longer roll back to the cluster's previous major version. +1. If auto-finalization is disabled, follow your organization's testing procedures to decide whether to [finalize the upgrade](#finalize-a-major-version-upgrade-manually) or [roll back](#roll-back-a-major-version-upgrade) the upgrade. After finalization begins, you can no longer roll back to the cluster's previous major version. ### Finalize a major-version upgrade manually @@ -104,29 +108,31 @@ To roll back to the previous major version before an upgrade is finalized: 1. Change the container image in the custom resource to use the previous major version: - ```yaml + ~~~ yaml cockroachdb: crdbCluster: image: name: cockroachdb/cockroach:v24.3 - ``` + ~~~ -2. Apply the new settings to the cluster: +1. Apply the new settings to the cluster: - ```shell - $ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE - ``` + {% include_cached copy-clipboard.html %} + ~~~ shell + helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE + ~~~ - The Operator will perform the staged rollback. + The operator will perform the staged rollback. -3. To check the status of the rollback, run `kubectl get pods`. +1. To check the status of the rollback, run `kubectl get pods`. -4. Verify that all pods have been rolled back: +1. Verify that all pods have been rolled back: - ```shell - $ kubectl get pods \ + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods \ -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.containers[0].image}{"\n"}' - ``` + ~~~ Rollbacks do not require finalization. From 9164513d81f517dab86571238d774076df903255 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Mon, 4 Aug 2025 14:25:19 -0400 Subject: [PATCH 04/27] Add overview and preview note --- ...nfigure-cockroachdb-kubernetes-operator.md | 4 +++ ...oy-cockroachdb-with-kubernetes-operator.md | 4 +++ .../deploy-cockroachdb-with-kubernetes.md | 4 +-- .../v25.2/kubernetes-operator-overview.md | 36 ++++++++++++++++++- .../v25.2/kubernetes-operator-performance.md | 4 +++ .../migrate-cockroachdb-kubernetes-helm.md | 4 +++ ...migrate-cockroachdb-kubernetes-operator.md | 4 +++ ...monitor-cockroachdb-kubernetes-operator.md | 4 +++ .../scale-cockroachdb-kubernetes-operator.md | 4 +++ ...chedule-cockroachdb-kubernetes-operator.md | 4 +++ ...upgrade-cockroachdb-kubernetes-operator.md | 4 +++ 11 files changed, 73 insertions(+), 3 deletions(-) diff --git a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md index 59c8d6b4111..a4e33ae4c18 100644 --- a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md @@ -9,6 +9,10 @@ docs_area: deploy This page explains how to configure Kubernetes cluster resources such as memory, CPU, and storage. +{{site.data.alerts.callout_info}} +The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + On a production cluster, the resources you allocate to CockroachDB should be proportionate to your machine types and workload. Cockroach Labs recommends that you determine and set these values before deploying the cluster, but you can also update the values on a running cluster. {{site.data.alerts.callout_info}} diff --git a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md index 3a9f75f1e91..61d8ae4f961 100644 --- a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md +++ b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md @@ -9,6 +9,10 @@ docs_area: deploy This page describes how to start and stop a secure 3-node CockroachDB cluster in a single [Kubernetes](http://kubernetes.io/) cluster. +{{site.data.alerts.callout_info}} +The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + ## Prerequisites and best practices ### Kubernetes version diff --git a/src/current/v25.2/deploy-cockroachdb-with-kubernetes.md b/src/current/v25.2/deploy-cockroachdb-with-kubernetes.md index 2220b2f21f6..009b6fc9229 100644 --- a/src/current/v25.2/deploy-cockroachdb-with-kubernetes.md +++ b/src/current/v25.2/deploy-cockroachdb-with-kubernetes.md @@ -11,7 +11,7 @@ docs_area: This page shows you how to start and stop a secure 3-node CockroachDB cluster in a single [Kubernetes](http://kubernetes.io/) cluster. You can use any of the following approaches: -- [CockroachDB Kubernetes Operator](https://github.com/cockroachdb/cockroach-operator) +- [Public Kubernetes operator](https://github.com/cockroachdb/cockroach-operator) {{site.data.alerts.callout_info}} The CockroachDB Kubernetes Operator is also available on platforms such as [Red Hat OpenShift]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-openshift.md %}) and [IBM Cloud Pak for Data](https://www.ibm.com/products/cloud-pak-for-data). @@ -36,7 +36,7 @@ This page shows you how to start and stop a secure 3-node CockroachDB cluster in Choose how you want to deploy and maintain the CockroachDB cluster. {{site.data.alerts.callout_info}} -The [CockroachDB Kubernetes Operator](https://github.com/cockroachdb/cockroach-operator) eases CockroachDB cluster creation and management on a single Kubernetes cluster. +The [Public Kubernetes operator](https://github.com/cockroachdb/cockroach-operator) eases CockroachDB cluster creation and management on a single Kubernetes cluster. The Operator does not provision or apply an Enterprise license key. To use CockroachDB with the Operator, [set a license]({% link {{ page.version.version }}/licensing-faqs.md %}#set-a-license) in the SQL shell. {{site.data.alerts.end}} diff --git a/src/current/v25.2/kubernetes-operator-overview.md b/src/current/v25.2/kubernetes-operator-overview.md index 46e741120bd..50e4da793cc 100644 --- a/src/current/v25.2/kubernetes-operator-overview.md +++ b/src/current/v25.2/kubernetes-operator-overview.md @@ -8,4 +8,38 @@ docs_area: deploy key: operate-cockroachdb-kubernetes-operator.html --- -Placeholder content introducing the new operator and comparing it to existing k8s deployments. +The CockroachDB operator is a fully-featured [Kubernetes operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) that allows you to deploy and manage CockroachDB self-hosted clusters. + +{{site.data.alerts.callout_info}} +The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + +With the CockroachDB operator, you can deploy CockroachDB clusters across multiple regions with separate operator instances per region. Using [Helm](https://helm.sh/), set configurations that manage the operator and CockroachDB nodes across regions. + +## CockroachDB operator + +This section describes how to: + +- [Deploy a CockroachDB cluster using the CockroachDB operator]({% link {{page.version.version}}/deploy-cockroachdb-with-kubernetes-operator.md %}). +- Migrate from an existing CockroachDB Kubernetes deployment using [Helm]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-helm.md %}) or the [public operator]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-operator.md %}). +- Operate a CockroachDB cluster: + + - [Manage pod scheduling]({% link {{page.version.version}}/schedule-cockroachdb-kubernetes-operator.md %}). + - [Manage cluster resources]({% link {{page.version.version}}/configure-cockroachdb-kubernetes-operator.md %}). + - [Manage certificates]({% link {{page.version.version}}/secure-cockroachdb-kubernetes-operator.md %}). + - [Scale a cluster]({% link {{page.version.version}}/scale-cockroachdb-kubernetes-operator.md %}). + - [Monitor a cluster]({% link {{page.version.version}}/monitor-cockroachdb-kubernetes-operator.md %}). + - [Upgrade a cluster]({% link {{page.version.version}}/upgrade-cockroachdb-kubernetes-operator.md %}). + - [Improve cluster performance]({% link {{page.version.version}}/kubernetes-operator-performance.md %}). + +## Kubernetes terminology + +Before starting, review some basic Kubernetes terminology. Note that CockroachDB [nodes]({% link {{ page.version.version }}/architecture/glossary.md %}#cockroachdb-architecture-terms) are distinct from Kubernetes "nodes" or "worker nodes". + +Feature | Description +--------|------------ +[node](https://kubernetes.io/docs/concepts/architecture/nodes/) | A physical or virtual machine. In the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}), you'll create instances and join them as worker nodes into a single Kubernetes cluster. +[pod](http://kubernetes.io/docs/user-guide/pods/) | A pod is a group of one of more Docker containers. In the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}), each pod will run on a separate Kubernetes worker node and include one Docker container running a single CockroachDB node, reflecting our [topology recommendations]({% link {{ page.version.version }}/recommended-production-settings.md %}#topology). +[operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) | An operator is an extension to Kubernetes that uses custom resources to efficiently manage specific applications. The CockroachDB operator includes two custom resource definitions, `cockroachdb` to manage a CockroachDB pod and `operator` to manage the operator pod itself. Unlike the older [public operator](https://github.com/cockroachdb/cockroach-operator), the CockroachDB operator does not use StatefulSets and is designed to simplify multi-region deployments. +[persistent volume](http://kubernetes.io/docs/user-guide/persistent-volumes/) | A persistent volume is a piece of networked storage (Persistent Disk on GCE, Elastic Block Store on AWS) mounted into a pod. The lifetime of a persistent volume is decoupled from the lifetime of the pod that's using it, ensuring that each CockroachDB node binds back to the same storage on restart.

The [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}) assumes that dynamic volume provisioning is available. When that is not the case, [persistent volume claims](http://kubernetes.io/docs/user-guide/persistent-volumes/#persistentvolumeclaims) need to be created manually. +[RBAC](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) | RBAC, or Role-Based Access Control, is the system Kubernetes uses to manage permissions within the cluster. In order to take an action (e.g., `get` or `create`) on an API resource (e.g., a `pod`), the client must have a `Role` that allows it to do so. The [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}) creates the RBAC resources necessary for CockroachDB to create and access certificates. diff --git a/src/current/v25.2/kubernetes-operator-performance.md b/src/current/v25.2/kubernetes-operator-performance.md index 83df6072b43..53447db465a 100644 --- a/src/current/v25.2/kubernetes-operator-performance.md +++ b/src/current/v25.2/kubernetes-operator-performance.md @@ -7,6 +7,10 @@ docs_area: deploy Kubernetes provides many useful abstractions for deploying and operating distributed systems, but some of the abstractions come with a performance overhead and an increase in underlying system complexity. This page outlines potential bottlenecks when running CockroachDB in Kubernetes and how to optimize performance. +{{site.data.alerts.callout_info}} +The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + ## Before you begin Before you focus on optimizing a Kubernetes-orchestrated CockroachDB cluster: diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md index 30ba55d3fd4..39c649a49f4 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md @@ -9,6 +9,10 @@ docs_area: deploy This guide describes how to migrate an existing CockroachDB cluster managed via StatefulSet to the CockroachDB operator. +{{site.data.alerts.callout_info}} +The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + These instructions assume that you are migrating from a StatefulSet cluster that was configured using the Helm chart with the following command: ~~~ shell diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md index c9bf01b051b..9615fe1613d 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md @@ -9,6 +9,10 @@ docs_area: deploy This guide describes how to migrate an existing CockroachDB cluster managed via the public operator to the CockroachDB operator. +{{site.data.alerts.callout_info}} +The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + These instructions assume that you are migrating from a public operator cluster that is managed with kubectl via the following yaml files: {% include_cached copy-clipboard.html %} diff --git a/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md b/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md index e299a12dea3..33b9628ba5b 100644 --- a/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md @@ -8,6 +8,10 @@ docs_area: deploy Despite CockroachDB's various [built-in safeguards against failure](architecture/replication-layer.html), it is critical to actively monitor the overall health and performance of a cluster running in production and to create alerting rules that promptly send notifications when there are events that require investigation or intervention. +{{site.data.alerts.callout_info}} +The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + ## Configure Prometheus Every node of a CockroachDB cluster exports granular timeseries metrics formatted for easy integration with [Prometheus](https://prometheus.io/), an open source tool for storing, aggregating, and querying timeseries data. This section shows you how to orchestrate Prometheus as part of your Kubernetes cluster and pull these metrics into Prometheus for external monitoring. diff --git a/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md b/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md index d99af8d32a1..cc336f33757 100644 --- a/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md @@ -9,6 +9,10 @@ docs_area: deploy This page explains how to add and remove CockroachDB nodes on Kubernetes. +{{site.data.alerts.callout_info}} +The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + ## Add nodes Before scaling up CockroachDB, note the following [topology recommendations](recommended-production-settings.html#topology): diff --git a/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md b/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md index 64c166a5587..ea38586f5db 100644 --- a/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md @@ -9,6 +9,10 @@ docs_area: deploy This page describes how to configure pod scheduling settings. These settings control how CockroachDB pods should be identified or scheduled onto worker nodes, which are then proxied to the Kubernetes scheduler. +{{site.data.alerts.callout_info}} +The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + ## Node selectors A pod with a *node selector* will be scheduled onto a worker node that has matching [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/), or key-value pairs. diff --git a/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md b/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md index 69d893e24e9..6221394281a 100644 --- a/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md @@ -9,6 +9,10 @@ docs_area: deploy This page describes how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster](deploy-cockroachdb-with-kubernetes-operator.html) with the CockroachDB operator. +{{site.data.alerts.callout_info}} +The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + ## Overview {% include common/upgrade/overview.md %} From 3270650ece6428f650c637a635538f50bd7fddbd Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Mon, 4 Aug 2025 14:55:01 -0400 Subject: [PATCH 05/27] Add cross-references to new operator content --- .../v25.2/cockroachdb-operator-recommendation.md | 5 +++++ .../v25.2/configure-cockroachdb-kubernetes.md | 10 +++++++--- ...ploy-cockroachdb-with-kubernetes-openshift.md | 2 ++ .../v25.2/deploy-cockroachdb-with-kubernetes.md | 6 ++++-- src/current/v25.2/kubernetes-overview.md | 8 +++++--- src/current/v25.2/kubernetes-performance.md | 4 ++++ .../v25.2/monitor-cockroachdb-kubernetes.md | 4 ++++ ...-cockroachdb-with-kubernetes-multi-cluster.md | 4 ++-- .../v25.2/scale-cockroachdb-kubernetes.md | 6 +++++- .../v25.2/schedule-cockroachdb-kubernetes.md | 16 +++++++++++++--- .../v25.2/secure-cockroachdb-kubernetes.md | 4 ++++ .../v25.2/upgrade-cockroachdb-kubernetes.md | 4 ++++ 12 files changed, 59 insertions(+), 14 deletions(-) create mode 100644 src/current/_includes/v25.2/cockroachdb-operator-recommendation.md diff --git a/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md b/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md new file mode 100644 index 00000000000..5e7409b6ab4 --- /dev/null +++ b/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md @@ -0,0 +1,5 @@ +{{site.data.alerts.callout_success}} +The CockroachDB operator is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [CockroachDB operator documentation]({% link {{page.version.version}}/kubernetes-operator-overview.md %}). + +New deployments of CockroachDB on Kubernetes are recommended to use the CockroachDB operator. To migrate an existing deployment to use the CockroachDB operator, read the [Helm]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-helm.md %}) and [public operator]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. +{{site.data.alerts.end}} \ No newline at end of file diff --git a/src/current/v25.2/configure-cockroachdb-kubernetes.md b/src/current/v25.2/configure-cockroachdb-kubernetes.md index ac07d9131f4..5488843d136 100644 --- a/src/current/v25.2/configure-cockroachdb-kubernetes.md +++ b/src/current/v25.2/configure-cockroachdb-kubernetes.md @@ -9,13 +9,17 @@ docs_area: deploy {% capture latest_operator_version %}{% include_cached latest_operator_version.md %}{% endcapture %} -This page explains how to configure Kubernetes cluster resources such as memory, CPU, and storage. +This page explains how to configure Kubernetes cluster resources such as memory, CPU, and storage. + +This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [Resource management with the CockroachDB operator]({% link {{ page.version.version }}/configure-cockroachdb-kubernetes-operator.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} These settings override the defaults used when [deploying CockroachDB on Kubernetes]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}).
- - + +
diff --git a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-openshift.md b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-openshift.md index e35721c7bfc..50eba46d776 100644 --- a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-openshift.md +++ b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-openshift.md @@ -8,6 +8,8 @@ docs_area: This page shows you how to start and stop a secure 3-node CockroachDB cluster on the Red Hat OpenShift platform, using the [CockroachDB Kubernetes Operator](https://marketplace.redhat.com/en-us/products/cockroachdb-operator). +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} + ## Before you begin You must have the following set up before proceeding with this tutorial: diff --git a/src/current/v25.2/deploy-cockroachdb-with-kubernetes.md b/src/current/v25.2/deploy-cockroachdb-with-kubernetes.md index 009b6fc9229..2ed27085e95 100644 --- a/src/current/v25.2/deploy-cockroachdb-with-kubernetes.md +++ b/src/current/v25.2/deploy-cockroachdb-with-kubernetes.md @@ -9,9 +9,11 @@ docs_area: {% include {{ page.version.version }}/filter-tabs/crdb-single-kubernetes.md %} -This page shows you how to start and stop a secure 3-node CockroachDB cluster in a single [Kubernetes](http://kubernetes.io/) cluster. You can use any of the following approaches: +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} -- [Public Kubernetes operator](https://github.com/cockroachdb/cockroach-operator) +This page shows you how to start and stop a secure 3-node CockroachDB cluster in a single [Kubernetes](http://kubernetes.io/) cluster using the following approaches: + +- [Public operator](https://github.com/cockroachdb/cockroach-operator) {{site.data.alerts.callout_info}} The CockroachDB Kubernetes Operator is also available on platforms such as [Red Hat OpenShift]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-openshift.md %}) and [IBM Cloud Pak for Data](https://www.ibm.com/products/cloud-pak-for-data). diff --git a/src/current/v25.2/kubernetes-overview.md b/src/current/v25.2/kubernetes-overview.md index c0155fed329..925edd86e70 100644 --- a/src/current/v25.2/kubernetes-overview.md +++ b/src/current/v25.2/kubernetes-overview.md @@ -10,12 +10,14 @@ key: operate-cockroachdb-kubernetes.html Kubernetes is a portable, extensible, open source platform for managing containerized workloads and services. For a given workload, you provide Kubernetes with a configuration, and Kubernetes applies that configuration to all Kubernetes nodes that are running the application. -CockroachDB can be deployed and managed on Kubernetes using the following methods: +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} -- [CockroachDB Kubernetes Operator](https://github.com/cockroachdb/cockroach-operator) +You can also deploy CockroachDB on Kubernetes using the following methods: + +- [Public operator](https://github.com/cockroachdb/cockroach-operator) {{site.data.alerts.callout_info}} - The CockroachDB Kubernetes Operator is also available on platforms such as [Red Hat OpenShift]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-openshift.md %}) and [IBM Cloud Pak for Data](https://www.ibm.com/products/cloud-pak-for-data). + The public operator is also available on platforms such as [Red Hat OpenShift]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-openshift.md %}) and [IBM Cloud Pak for Data](https://www.ibm.com/products/cloud-pak-for-data). {{site.data.alerts.end}} - Manual [StatefulSet](http://kubernetes.io/docs/concepts/abstractions/controllers/statefulsets/) configuration diff --git a/src/current/v25.2/kubernetes-performance.md b/src/current/v25.2/kubernetes-performance.md index 47aef82797d..5d53065ce4d 100644 --- a/src/current/v25.2/kubernetes-performance.md +++ b/src/current/v25.2/kubernetes-performance.md @@ -7,6 +7,10 @@ docs_area: deploy Kubernetes provides many useful abstractions for deploying and operating distributed systems, but some of the abstractions come with a performance overhead and an increase in underlying system complexity. This page explains potential bottlenecks to be aware of when [running CockroachDB in Kubernetes]({% link {{ page.version.version }}/kubernetes-overview.md %}) and shows you how to optimize your deployment for better performance. +This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [CockroachDB Performance on Kubernetes with the CockroachDB Operator]({% link {{ page.version.version }}/kubernetes-operator-performance.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} +
## Before you begin diff --git a/src/current/v25.2/monitor-cockroachdb-kubernetes.md b/src/current/v25.2/monitor-cockroachdb-kubernetes.md index b800eb696dd..7eb100b41e0 100644 --- a/src/current/v25.2/monitor-cockroachdb-kubernetes.md +++ b/src/current/v25.2/monitor-cockroachdb-kubernetes.md @@ -12,6 +12,10 @@ This article assumes you have already [deployed CockroachDB on a single Kubernet Despite CockroachDB's various [built-in safeguards against failure]({% link {{ page.version.version }}/architecture/replication-layer.md %}), it is critical to actively monitor the overall health and performance of a cluster running in production and to create alerting rules that promptly send notifications when there are events that require investigation or intervention. +This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [Cluster Monitoring with the CockroachDB Operator]({% link {{ page.version.version }}/monitor-cockroachdb-kubernetes-operator.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} +
diff --git a/src/current/v25.2/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md b/src/current/v25.2/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md index b47593bda96..55f144f4d74 100644 --- a/src/current/v25.2/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md +++ b/src/current/v25.2/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md @@ -15,9 +15,9 @@ docs_area: deploy This page shows you how to orchestrate a secure CockroachDB deployment across three [Kubernetes](http://kubernetes.io/) clusters, each in a different geographic region, using [StatefulSets](http://kubernetes.io/docs/concepts/abstractions/controllers/statefulsets/) to manage the containers within each cluster and linking them together via DNS. This will result in a single, multi-region CockroachDB cluster running on Kubernetes. -{{site.data.alerts.callout_success}} +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} + To deploy CockroachDB in a single Kubernetes cluster instead, see [Kubernetes Single-Cluster Deployment]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}). Also, for details about potential performance bottlenecks to be aware of when running CockroachDB in Kubernetes and guidance on how to optimize your deployment for better performance, see [CockroachDB Performance on Kubernetes]({% link {{ page.version.version }}/kubernetes-performance.md %}). -{{site.data.alerts.end}} ## Before you begin diff --git a/src/current/v25.2/scale-cockroachdb-kubernetes.md b/src/current/v25.2/scale-cockroachdb-kubernetes.md index 7e62cf5cdf7..2ac2e256932 100644 --- a/src/current/v25.2/scale-cockroachdb-kubernetes.md +++ b/src/current/v25.2/scale-cockroachdb-kubernetes.md @@ -13,8 +13,12 @@ This article assumes you have already [deployed CockroachDB on a single Kubernet This page explains how to add and remove CockroachDB nodes on Kubernetes. +This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [Cluster Scaling with the CockroachDB Operator]({% link {{ page.version.version }}/scale-cockroachdb-kubernetes-operator.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} +
- +
diff --git a/src/current/v25.2/schedule-cockroachdb-kubernetes.md b/src/current/v25.2/schedule-cockroachdb-kubernetes.md index 973d234ffa9..4ec9eff650f 100644 --- a/src/current/v25.2/schedule-cockroachdb-kubernetes.md +++ b/src/current/v25.2/schedule-cockroachdb-kubernetes.md @@ -7,15 +7,25 @@ secure: true docs_area: deploy --- -This page describes how to configure the following, using the [Operator](https://github.com/cockroachdb/cockroach-operator): +This page describes how to configure the following, using the [public operator](https://github.com/cockroachdb/cockroach-operator): +- [Enable feature gates](#enable-feature-gates) - [Node selectors](#node-selectors) -- [Node affinities](#add-a-node-affinity) -- [Pod affinities and anti-affinities](#add-a-pod-affinity-or-anti-affinity) +- [Affinities and anti-affinities](#affinities-and-anti-affinities) + - [Add a node affinity](#add-a-node-affinity) + - [Add a pod affinity or anti-affinity](#add-a-pod-affinity-or-anti-affinity) + - [Example: Scheduling CockroachDB onto labeled nodes](#example-scheduling-cockroachdb-onto-labeled-nodes) - [Taints and tolerations](#taints-and-tolerations) + - [Add a toleration](#add-a-toleration) + - [Example: Evicting CockroachDB from a running worker node](#example-evicting-cockroachdb-from-a-running-worker-node) - [Topology spread constraints](#topology-spread-constraints) + - [Add a topology spread constraint](#add-a-topology-spread-constraint) - [Resource labels and annotations](#resource-labels-and-annotations) +This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [Pod Scheduling with the CockroachDB Operator]({% link {{ page.version.version }}/schedule-cockroachdb-kubernetes-operator.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} + These settings control how CockroachDB pods can be identified or scheduled onto worker nodes. {% include {{ page.version.version }}/orchestration/operator-check-namespace.md %} diff --git a/src/current/v25.2/secure-cockroachdb-kubernetes.md b/src/current/v25.2/secure-cockroachdb-kubernetes.md index 6fcf7b7fee8..3675a474154 100644 --- a/src/current/v25.2/secure-cockroachdb-kubernetes.md +++ b/src/current/v25.2/secure-cockroachdb-kubernetes.md @@ -13,6 +13,10 @@ This article assumes you have already [deployed CockroachDB securely on a single By default, self-signed certificates are used when using the Operator or Helm to securely [deploy CockroachDB on Kubernetes]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}). However, the recommended approach is to use `cert-manager` for certificate management. For details, refer to [Deploy cert-manager for mTLS](?filters=helm#deploy-cert-manager-for-mtls). +This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [Certificate Management with the CockroachDB Operator]({% link {{ page.version.version }}/secure-cockroachdb-kubernetes-operator.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} + This page explains how to: - Authenticate an Operator or Helm deployment using a [custom CA](#use-a-custom-ca) diff --git a/src/current/v25.2/upgrade-cockroachdb-kubernetes.md b/src/current/v25.2/upgrade-cockroachdb-kubernetes.md index 69116e7bc8b..01349a83f06 100644 --- a/src/current/v25.2/upgrade-cockroachdb-kubernetes.md +++ b/src/current/v25.2/upgrade-cockroachdb-kubernetes.md @@ -9,6 +9,10 @@ docs_area: deploy This page shows how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}). +This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [Upgrade a Cluster in Kubernetes with the CockroachDB Operator]({% link {{ page.version.version }}/upgrade-cockroachdb-kubernetes-operator.md %}). + +{% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} + ## Overview {% include common/upgrade/overview.md %} From 26257a69c8013add89c8f1783f668c8c3079a543 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Mon, 4 Aug 2025 16:23:20 -0400 Subject: [PATCH 06/27] Make operator naming consistent and tokenized --- src/current/_data/products.yml | 4 ++ .../_includes/sidebar-all-releases.json | 2 +- src/current/_includes/sidebar-releases.json | 2 +- .../orchestration/kubernetes-limitations.md | 4 +- .../cockroachdb-operator-recommendation.md | 16 +++++-- .../orchestration/kubernetes-limitations.md | 10 ++--- .../orchestration/kubernetes-stop-cluster.md | 4 +- .../orchestration/operator-check-namespace.md | 2 +- .../start-cockroachdb-operator-secure.md | 16 +++---- .../sidebar-data/self-hosted-deployments.json | 2 +- .../cockroachdb-operator-recommendation.md | 13 ++++++ src/current/releases/kubernetes-operator.md | 42 +++++++++--------- src/current/v23.1/node-shutdown.md | 4 +- src/current/v23.2/node-shutdown.md | 4 +- src/current/v24.1/node-shutdown.md | 4 +- src/current/v24.2/node-shutdown.md | 4 +- src/current/v24.3/node-shutdown.md | 4 +- src/current/v25.1/node-shutdown.md | 4 +- ...nfigure-cockroachdb-kubernetes-operator.md | 12 ++--- .../v25.2/configure-cockroachdb-kubernetes.md | 28 ++++++------ .../create-security-certificates-custom-ca.md | 2 +- src/current/v25.2/create-sequence.md | 2 +- ...y-cockroachdb-with-kubernetes-openshift.md | 16 +++---- ...oy-cockroachdb-with-kubernetes-operator.md | 10 ++--- .../deploy-cockroachdb-with-kubernetes.md | 10 ++--- .../v25.2/kubernetes-operator-overview.md | 18 ++++---- .../v25.2/kubernetes-operator-performance.md | 2 +- src/current/v25.2/kubernetes-overview.md | 4 +- src/current/v25.2/kubernetes-performance.md | 2 +- .../migrate-cockroachdb-kubernetes-helm.md | 8 ++-- ...migrate-cockroachdb-kubernetes-operator.md | 28 ++++++------ ...monitor-cockroachdb-kubernetes-operator.md | 4 +- .../v25.2/monitor-cockroachdb-kubernetes.md | 18 ++++---- src/current/v25.2/node-shutdown.md | 10 +++-- ...estrate-a-local-cluster-with-kubernetes.md | 8 ++-- .../scale-cockroachdb-kubernetes-operator.md | 2 +- .../v25.2/scale-cockroachdb-kubernetes.md | 26 +++++------ ...chedule-cockroachdb-kubernetes-operator.md | 2 +- .../v25.2/schedule-cockroachdb-kubernetes.md | 30 ++++++------- .../secure-cockroachdb-kubernetes-operator.md | 6 +-- .../v25.2/secure-cockroachdb-kubernetes.md | 44 +++++++++---------- ...upgrade-cockroachdb-kubernetes-operator.md | 4 +- .../v25.2/upgrade-cockroachdb-kubernetes.md | 6 +-- src/current/v25.3/node-shutdown.md | 4 +- 44 files changed, 240 insertions(+), 207 deletions(-) create mode 100644 src/current/_includes/v25.3/cockroachdb-operator-recommendation.md diff --git a/src/current/_data/products.yml b/src/current/_data/products.yml index 3262992fe56..49f1daefd4b 100644 --- a/src/current/_data/products.yml +++ b/src/current/_data/products.yml @@ -15,3 +15,7 @@ basic: Basic standard: Standard advanced: Advanced + +public-operator: Public operator + +cockroachdb-operator: CockroachDB operator diff --git a/src/current/_includes/sidebar-all-releases.json b/src/current/_includes/sidebar-all-releases.json index 6074a7d3679..2cec07dc252 100644 --- a/src/current/_includes/sidebar-all-releases.json +++ b/src/current/_includes/sidebar-all-releases.json @@ -63,7 +63,7 @@ ] }, { - "title": "Kubernetes Operator", + "title": "Public Kubernetes Operator", "urls": [ "/releases/kubernetes-operator.html" ] diff --git a/src/current/_includes/sidebar-releases.json b/src/current/_includes/sidebar-releases.json index 1484fbae75b..cc4fc3f5800 100644 --- a/src/current/_includes/sidebar-releases.json +++ b/src/current/_includes/sidebar-releases.json @@ -65,7 +65,7 @@ ] }, { - "title": "CockroachDB Kubernetes Operator", + "title": "Public Kubernetes Operator", "urls": [ "/releases/kubernetes-operator.html" ] diff --git a/src/current/_includes/v23.1/orchestration/kubernetes-limitations.md b/src/current/_includes/v23.1/orchestration/kubernetes-limitations.md index 5e9784c28d1..1addf94e08a 100644 --- a/src/current/_includes/v23.1/orchestration/kubernetes-limitations.md +++ b/src/current/_includes/v23.1/orchestration/kubernetes-limitations.md @@ -2,9 +2,9 @@ To deploy CockroachDB {{page.version.version}}, Kubernetes 1.18 or higher is required. Cockroach Labs strongly recommends that you use a Kubernetes version that is [eligible for patch support by the Kubernetes project](https://kubernetes.io/releases/). -#### Kubernetes Operator +#### Public kubernetes Operator -- The CockroachDB Kubernetes Operator currently deploys clusters in a single region. For multi-region deployments using manual configs, see [Orchestrate CockroachDB Across Multiple Kubernetes Clusters]({% link {{ page.version.version }}/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md %}). +- The {{ site.data.products.public-operator }} currently deploys clusters in a single region. For multi-region deployments using manual configs, see [Orchestrate CockroachDB Across Multiple Kubernetes Clusters]({% link {{ page.version.version }}/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md %}). - Using the Operator, you can give a new cluster an arbitrary number of [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/). However, a cluster's labels cannot be modified after it is deployed. To track the status of this limitation, refer to [#993](https://github.com/cockroachdb/cockroach-operator/issues/993) in the Operator project's issue tracker. diff --git a/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md b/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md index 5e7409b6ab4..36d7095395b 100644 --- a/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md +++ b/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md @@ -1,5 +1,13 @@ -{{site.data.alerts.callout_success}} -The CockroachDB operator is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [CockroachDB operator documentation]({% link {{page.version.version}}/kubernetes-operator-overview.md %}). +{% if page.name == "kubernetes-operator.md" %} +{{ site.data.alerts.callout_success }} +The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link v25.2/kubernetes-operator-overview.md %}). -New deployments of CockroachDB on Kubernetes are recommended to use the CockroachDB operator. To migrate an existing deployment to use the CockroachDB operator, read the [Helm]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-helm.md %}) and [public operator]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. -{{site.data.alerts.end}} \ No newline at end of file +New deployments of CockroachDB on Kubernetes are recommended to use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link v25.2/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link v25.2/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. +{{ site.data.alerts.end }} +{% else %} +{{ site.data.alerts.callout_success }} +The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link {{ page.version.version }}/kubernetes-operator-overview.md %}). + +New deployments of CockroachDB on Kubernetes are recommended to use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. +{{ site.data.alerts.end }} +{% endif %} \ No newline at end of file diff --git a/src/current/_includes/v25.2/orchestration/kubernetes-limitations.md b/src/current/_includes/v25.2/orchestration/kubernetes-limitations.md index 5e9784c28d1..7f032b6151e 100644 --- a/src/current/_includes/v25.2/orchestration/kubernetes-limitations.md +++ b/src/current/_includes/v25.2/orchestration/kubernetes-limitations.md @@ -2,11 +2,11 @@ To deploy CockroachDB {{page.version.version}}, Kubernetes 1.18 or higher is required. Cockroach Labs strongly recommends that you use a Kubernetes version that is [eligible for patch support by the Kubernetes project](https://kubernetes.io/releases/). -#### Kubernetes Operator +#### {{ site.data.products.public-operator }} -- The CockroachDB Kubernetes Operator currently deploys clusters in a single region. For multi-region deployments using manual configs, see [Orchestrate CockroachDB Across Multiple Kubernetes Clusters]({% link {{ page.version.version }}/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md %}). +- The {{ site.data.products.public-operator }} deploys clusters in a single region. For multi-region deployments using manual configs, Cockroach Labs recommends using the [{{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/kubernetes-operator-overview.md %}) which is designed to support multi-region deployments. For guidance on how to force multi-region support with the {{ site.data.products.public-operator }}, see [Orchestrate CockroachDB Across Multiple Kubernetes Clusters]({% link {{ page.version.version }}/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md %}). -- Using the Operator, you can give a new cluster an arbitrary number of [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/). However, a cluster's labels cannot be modified after it is deployed. To track the status of this limitation, refer to [#993](https://github.com/cockroachdb/cockroach-operator/issues/993) in the Operator project's issue tracker. +- Using the {{ site.data.products.public-operator }}, you can give a new cluster an arbitrary number of [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/). However, a cluster's labels cannot be modified after it is deployed. To track the status of this limitation, refer to [#993](https://github.com/cockroachdb/cockroach-operator/issues/993) in the {{ site.data.products.public-operator }} project's issue tracker. {% unless page.name == "orchestrate-cockroachdb-with-kubernetes-multi-cluster.md" %} #### Helm version @@ -17,9 +17,9 @@ The CockroachDB Helm chart requires Helm 3.0 or higher. If you attempt to use an Error: UPGRADE FAILED: template: cockroachdb/templates/tests/client.yaml:6:14: executing "cockroachdb/templates/tests/client.yaml" at <.Values.networkPolicy.enabled>: nil pointer evaluating interface {}.enabled ~~~ -The CockroachDB Helm chart is currently not under active development, and no new features are planned. However, Cockroach Labs remains committed to fully supporting the Helm chart by addressing defects, providing security patches, and addressing breaking changes due to deprecations in Kubernetes APIs. +The public Helm chart is currently not under active development, and no new features are planned. However, Cockroach Labs remains committed to fully supporting the Helm chart by addressing defects, providing security patches, and addressing breaking changes due to deprecations in Kubernetes APIs. -A deprecation notice for the Helm chart will be provided to customers a minimum of 6 months in advance of actual deprecation. +A deprecation notice for the public Helm chart will be provided to customers a minimum of 6 months in advance of actual deprecation. {% endunless %} #### Network diff --git a/src/current/_includes/v25.2/orchestration/kubernetes-stop-cluster.md b/src/current/_includes/v25.2/orchestration/kubernetes-stop-cluster.md index 58d79611e6d..c1db8bca26a 100644 --- a/src/current/_includes/v25.2/orchestration/kubernetes-stop-cluster.md +++ b/src/current/_includes/v25.2/orchestration/kubernetes-stop-cluster.md @@ -10,14 +10,14 @@ To shut down the CockroachDB cluster: kubectl delete -f example.yaml ~~~ -1. Remove the Operator: +1. Remove the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell kubectl delete -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/install/operator.yaml ~~~ - This will delete the CockroachDB cluster being run by the Operator. It intentionally does **not** delete: + This will delete the CockroachDB cluster being run by the {{ site.data.products.public-operator }}. It intentionally does **not** delete: - The persistent volumes that were attached to the pods, to avoid the risk of data loss. Before deleting a cluster's persistent volumes, be sure to back them up. For more information, refer to [Delete a Cluster's Persistent Volumes](#delete-a-clusters-persistent-volumes) in the Kubernetes project's documentation. - Any secrets you may have created. For more information on managing secrets, refer to [Managing Secrets Using `kubectl`](https://kubernetes.io/docs/tasks/configmap-secret/managing-secret-using-kubectl) in the Kubernetes project's documentation. diff --git a/src/current/_includes/v25.2/orchestration/operator-check-namespace.md b/src/current/_includes/v25.2/orchestration/operator-check-namespace.md index bc37c6e1681..4a37876acd4 100644 --- a/src/current/_includes/v25.2/orchestration/operator-check-namespace.md +++ b/src/current/_includes/v25.2/orchestration/operator-check-namespace.md @@ -1,3 +1,3 @@ {{site.data.alerts.callout_info}} -All `kubectl` steps should be performed in the [namespace where you installed the Operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#install-the-operator). By default, this is `cockroach-operator-system`. +All `kubectl` steps should be performed in the [namespace where you installed the operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#install-the-operator). By default, this is `cockroach-operator-system`. {{site.data.alerts.end}} \ No newline at end of file diff --git a/src/current/_includes/v25.2/orchestration/start-cockroachdb-operator-secure.md b/src/current/_includes/v25.2/orchestration/start-cockroachdb-operator-secure.md index 5cbc1c49af9..543297c3433 100644 --- a/src/current/_includes/v25.2/orchestration/start-cockroachdb-operator-secure.md +++ b/src/current/_includes/v25.2/orchestration/start-cockroachdb-operator-secure.md @@ -1,4 +1,4 @@ -### Install the Operator +### Install the {{ site.data.products.public-operator }} {% capture latest_operator_version %}{% include_cached latest_operator_version.md %}{% endcapture %} {% capture apply_default_operator_manifest_command %}{% include_cached copy-clipboard.html %} @@ -28,7 +28,7 @@ ~~~ {% endcapture %} -1. Apply the [custom resource definition (CRD)](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/#customresourcedefinitions) for the Operator: +1. Apply the [custom resource definition (CRD)](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/#customresourcedefinitions) for the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell @@ -39,16 +39,16 @@ customresourcedefinition.apiextensions.k8s.io/crdbclusters.crdb.cockroachlabs.com created ~~~ -1. By default, the Operator is configured to install in the `cockroach-operator-system` namespace and to manage CockroachDB instances for all namespaces on the cluster.
  • To use these defaults, apply the Operator manifest without modifying it: {{ apply_default_operator_manifest_command }}
  • To change these defaults:
    1. Download the Operator manifest: {{ download_operator_manifest_command }}
    2. To use a custom namespace, edit all instances of namespace: cockroach-operator-system with your desired namespace.
    3. To limit the namespaces that will be monitored, set the WATCH_NAMESPACE environment variable in the Deployment pod spec. This can be set to a single namespace or a comma-delimited set of namespaces. When set, only those CrdbCluster resources in the supplied namespace(s) will be reconciled.
    4. Apply your local version of the Operator manifest to the cluster: {{ apply_local_operator_manifest_command }}
+1. By default, the {{ site.data.products.public-operator }} is configured to install in the `cockroach-operator-system` namespace and to manage CockroachDB instances for all namespaces on the cluster.
  • To use these defaults, apply the {{ site.data.products.public-operator }} manifest without modifying it: {{ apply_default_operator_manifest_command }}
  • To change these defaults:
    1. Download the {{ site.data.products.public-operator }} manifest: {{ download_operator_manifest_command }}
    2. To use a custom namespace, edit all instances of namespace: cockroach-operator-system with your desired namespace.
    3. To limit the namespaces that will be monitored, set the WATCH_NAMESPACE environment variable in the Deployment pod spec. This can be set to a single namespace or a comma-delimited set of namespaces. When set, only those CrdbCluster resources in the supplied namespace(s) will be reconciled.
    4. Apply your local version of the {{ site.data.products.public-operator }} manifest to the cluster: {{ apply_local_operator_manifest_command }}
-1. Set your current namespace to the one used by the Operator. For example, to use the Operator's default namespace: +1. Set your current namespace to the one used by the {{ site.data.products.public-operator }}. For example, to use the {{ site.data.products.public-operator }}'s default namespace: {% include_cached copy-clipboard.html %} ~~~ shell $ kubectl config set-context --current --namespace=cockroach-operator-system ~~~ -1. Validate that the Operator is running: +1. Validate that the operator is running: {% include_cached copy-clipboard.html %} ~~~ shell @@ -66,7 +66,7 @@ After a cluster managed by the Kubernetes operator is initialized, its Kubernetes labels cannot be modified. For more details, refer to [Best practices](#best-practices). {{site.data.alerts.end}} -1. Download `example.yaml`, a custom resource that tells the Operator how to configure the Kubernetes cluster. +1. Download `example.yaml`, a custom resource that tells the operator how to configure the Kubernetes cluster. {% include_cached copy-clipboard.html %} ~~~ shell @@ -76,7 +76,7 @@ After a cluster managed by the Kubernetes operator is initialized, its Kubernete By default, this custom resource specifies CPU and memory resources that are appropriate for the virtual machines used in this deployment example. On a production cluster, you should substitute values that are appropriate for your machines and workload. For details on configuring your deployment, see [Configure the Cluster](configure-cockroachdb-kubernetes.html). {{site.data.alerts.callout_info}} - By default, the Operator will generate and sign 1 client and 1 node certificate to secure the cluster. This means that if you do not provide a CA, a `cockroach`-generated CA is used. If you want to authenticate using your own CA, [specify the generated secrets in the custom resource](secure-cockroachdb-kubernetes.html#use-a-custom-ca) **before** proceeding to the next step. + By default, the operator will generate and sign 1 client and 1 node certificate to secure the cluster. This means that if you do not provide a CA, a `cockroach`-generated CA is used. If you want to authenticate using your own CA, [specify the generated secrets in the custom resource](secure-cockroachdb-kubernetes.html#use-a-custom-ca) **before** proceeding to the next step. {{site.data.alerts.end}} 1. Apply `example.yaml`: @@ -86,7 +86,7 @@ After a cluster managed by the Kubernetes operator is initialized, its Kubernete $ kubectl apply -f example.yaml ~~~ - The Operator will create a StatefulSet and initialize the nodes as a cluster. + The operator will create a StatefulSet and initialize the nodes as a cluster. ~~~ crdbcluster.crdb.cockroachlabs.com/cockroachdb created diff --git a/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json b/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json index 0333793d24c..b350f6acd6c 100644 --- a/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json +++ b/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json @@ -195,7 +195,7 @@ ] }, { - "title": "Migrate from Public Operator", + "title": "Migrate from {{ site.data.products.public-operator }}", "urls": [ "/${VERSION}/migrate-cockroachdb-kubernetes-operator.html" ] diff --git a/src/current/_includes/v25.3/cockroachdb-operator-recommendation.md b/src/current/_includes/v25.3/cockroachdb-operator-recommendation.md new file mode 100644 index 00000000000..36d7095395b --- /dev/null +++ b/src/current/_includes/v25.3/cockroachdb-operator-recommendation.md @@ -0,0 +1,13 @@ +{% if page.name == "kubernetes-operator.md" %} +{{ site.data.alerts.callout_success }} +The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link v25.2/kubernetes-operator-overview.md %}). + +New deployments of CockroachDB on Kubernetes are recommended to use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link v25.2/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link v25.2/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. +{{ site.data.alerts.end }} +{% else %} +{{ site.data.alerts.callout_success }} +The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link {{ page.version.version }}/kubernetes-operator-overview.md %}). + +New deployments of CockroachDB on Kubernetes are recommended to use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. +{{ site.data.alerts.end }} +{% endif %} \ No newline at end of file diff --git a/src/current/releases/kubernetes-operator.md b/src/current/releases/kubernetes-operator.md index 8fe96acd078..b3c6a818d5a 100644 --- a/src/current/releases/kubernetes-operator.md +++ b/src/current/releases/kubernetes-operator.md @@ -1,5 +1,5 @@ --- -title: CockroachDB Kubernetes Operator Releases +title: Public Kubernetes Operator Releases summary: Changelog for the Kubernetes Operator for CockroachDB toc: true docs_area: releases @@ -7,16 +7,18 @@ docs_area: releases {% capture latest_operator_version %}{% include_cached latest_operator_version.md %}{% endcapture %} -The CockroachDB [Kubernetes Operator](https://www.cockroachlabs.com/docs/stable/kubernetes-overview) allows you to configure, deploy, and manage CockroachDB {{ site.data.products.core }} clusters on Kubernetes. The Kubernetes Operator is released on a separate schedule and is versioned independently from CockroachDB. To learn more about different approaches to deploy CockroachDB using Kubernetes, refer to [Kubernetes Overview](https://www.cockroachlabs.com/docs/stable/kubernetes-overview). +The [Public Kubernetes operator](https://www.cockroachlabs.com/docs/stable/kubernetes-overview) allows you to configure, deploy, and manage CockroachDB {{ site.data.products.core }} clusters on Kubernetes. The {{ site.data.products.public-operator }} is released on a separate schedule and is versioned independently from CockroachDB. To learn more about different approaches to deploy CockroachDB using Kubernetes, refer to [Kubernetes Overview](https://www.cockroachlabs.com/docs/stable/kubernetes-overview). -This page announces releases of the Kubernetes Operator and provides links to more information on GitHub. **Version {{ latest_operator_version }} is the latest release**. +{% include /{{ site.versions.stable }}/cockroachdb-operator-recommendation.md %} -In addition to monitoring this page, you can subscribe to be notified about releases to the Kubernetes Operator. Visit [CockroachDB Kubernetes Operator source code repository](https://github.com/cockroachdb/cockroach-operator) and click **Watch**. +This page announces releases of the {{ site.data.products.public-operator }} and provides links to more information on GitHub. **Version {{ latest_operator_version }} is the latest release**. + +In addition to monitoring this page, you can subscribe to be notified about releases to the public. Visit the [{{ site.data.products.public-operator }} source code repository](https://github.com/cockroachdb/cockroach-operator) and click **Watch**. {{site.data.alerts.callout_success}} -If you already use the [Helm](https://helm.sh/) package manager to manage your Kubernetes infrastructure, you can manage CockroachDB {{ site.data.products.core }} clusters by using the [CockroachDB Helm chart](https://github.com/cockroachdb/helm-charts/tree/master/cockroachdb) instead of using the Kubernetes Operator. The Helm chart does not use the Kubernetes Operator, and there is no automated way to migrate from one method to the other. +If you already use the [Helm](https://helm.sh/) package manager to manage your Kubernetes infrastructure, you can manage CockroachDB {{ site.data.products.core }} clusters by using the [public Helm chart](https://github.com/cockroachdb/helm-charts/tree/master/cockroachdb) instead of using the {{ site.data.products.public-operator }}. The Helm chart does not use the {{ site.data.products.public-operator }}, and there is no automated way to migrate from one method to the other. -To be notified about updates to the Helm chart, visit the [CockroachDB Helm chart source code repository](https://github.com/cockroachdb/helm-charts/tree/master/cockroachdb) and click **Watch**. +To be notified about updates to the Helm chart, visit the [public Helm chart source code repository](https://github.com/cockroachdb/helm-charts/tree/master/cockroachdb) and click **Watch**. {{site.data.alerts.end}} {% comment %} Copy the top section below and bump the variable {% endcomment %} @@ -24,7 +26,7 @@ To be notified about updates to the Helm chart, visit the [CockroachDB Helm char ## April 16, 2024 {% assign operator_version = "2.14.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) @@ -32,7 +34,7 @@ CockroachDB Kubernetes Operator {{ operator_version }} is available. ## March 12, 2024 {% assign operator_version = "2.13.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) @@ -40,7 +42,7 @@ CockroachDB Kubernetes Operator {{ operator_version }} is available. ## October 12, 2023 {% assign operator_version = "2.12.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) @@ -48,7 +50,7 @@ CockroachDB Kubernetes Operator {{ operator_version }} is available. ## July 25, 2023 {% assign operator_version = "2.11.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) @@ -56,7 +58,7 @@ CockroachDB Kubernetes Operator {{ operator_version }} is available. ## January 19, 2023 {% assign operator_version = "2.10.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) @@ -64,7 +66,7 @@ CockroachDB Kubernetes Operator {{ operator_version }} is available. ## December 16, 2022 {% assign operator_version = "2.9.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) @@ -72,7 +74,7 @@ CockroachDB Kubernetes Operator {{ operator_version }} is available. ## July 13, 2022 {% assign operator_version = "2.8.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) @@ -80,7 +82,7 @@ CockroachDB Kubernetes Operator {{ operator_version }} is available. ## May 26, 2022 {% assign operator_version = "2.7.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) @@ -88,7 +90,7 @@ CockroachDB Kubernetes Operator {{ operator_version }} is available. ## April 14, 2022 {% assign operator_version = "2.6.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) @@ -96,7 +98,7 @@ CockroachDB Kubernetes Operator {{ operator_version }} is available. ## January 7, 2022 {% assign operator_version = "2.5.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) @@ -104,7 +106,7 @@ CockroachDB Kubernetes Operator {{ operator_version }} is available. ## November 8, 2021 {% assign operator_version = "2.4.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) @@ -112,7 +114,7 @@ CockroachDB Kubernetes Operator {{ operator_version }} is available. ## October 19, 2021 {% assign operator_version = "2.3.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) @@ -120,7 +122,7 @@ CockroachDB Kubernetes Operator {{ operator_version }} is available. ## October 5, 2021 {% assign operator_version = "2.2.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) @@ -128,7 +130,7 @@ CockroachDB Kubernetes Operator {{ operator_version }} is available. ## August 27, 2021 {% assign operator_version = "2.1.0" %} -CockroachDB Kubernetes Operator {{ operator_version }} is available. +{{ site.data.products.public-operator }} {{ operator_version }} is available. - [Changelog](https://github.com/cockroachdb/cockroach-operator/blob/master/CHANGELOG.md#v{{ operator_version }}) - [Download](https://github.com/cockroachdb/cockroach-operator/releases/tag/v{{ operator_version }}) diff --git a/src/current/v23.1/node-shutdown.md b/src/current/v23.1/node-shutdown.md index 59556b95075..b0ea6e2940c 100644 --- a/src/current/v23.1/node-shutdown.md +++ b/src/current/v23.1/node-shutdown.md @@ -834,7 +834,7 @@ On the **Cluster Overview** page of the DB Console, the [node status]({% link {{ Most of the guidance in this page is most relevant to manual deployments that don't use Kubernetes. If you use Kubernetes to deploy CockroachDB, draining and decommissioning work the same way for the `cockroach` process, but Kubernetes handles them on your behalf. In a deployment without Kubernetes, an administrator initiates decommissioning or draining directly. In a Kubernetes deployment, an administrator modifies the desired configuration of the Kubernetes cluster and Kubernetes makes the required changes to the cluster, including decommissioning or draining nodes as required. -- Whether you deployed a cluster using the CockroachDB Operator, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. +- Whether you deployed a cluster using the {{ site.data.products.cockroachdb-operator }}, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. - If you deployed a cluster using the [CockroachDB Kubernetes Operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), the best way to scale down a cluster is to update the specification for the Kubernetes deployment to reduce the value of `nodes:` and apply the change using a [rolling update](https://kubernetes.io/docs/tutorials/kubernetes-basics/update/update-intro/). Kubernetes will notice that there are now too many nodes and will reduce them and clean up their storage automatically. @@ -854,7 +854,7 @@ After Kubernetes issues a termination request to the `cockroach` process on a cl If undefined, Kubernetes sets `terminationGracePeriodSeconds` to 30 seconds. This is too short for the `cockroach` process to stop gracefully before Kubernetes terminates it forcibly. Do not set `terminationGracePeriodSeconds` to `0`, which prevents Kubernetes from detecting and terminating a stuck pod. -For clusters deployed using the CockroachDB Public Operator, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). +For clusters deployed using the CockroachDB {{ site.data.products.public-operator }}, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). For clusters deployed using the CockroachDB Helm chart or a manual StatefulSet, the default depends upon the values file or manifest you used when you created the cluster. Cockroach Labs recommends that you: diff --git a/src/current/v23.2/node-shutdown.md b/src/current/v23.2/node-shutdown.md index 9d5b830c27f..593eb2c2a54 100644 --- a/src/current/v23.2/node-shutdown.md +++ b/src/current/v23.2/node-shutdown.md @@ -834,7 +834,7 @@ On the **Cluster Overview** page of the DB Console, the [node status]({% link {{ Most of the guidance in this page is most relevant to manual deployments that don't use Kubernetes. If you use Kubernetes to deploy CockroachDB, draining and decommissioning work the same way for the `cockroach` process, but Kubernetes handles them on your behalf. In a deployment without Kubernetes, an administrator initiates decommissioning or draining directly. In a Kubernetes deployment, an administrator modifies the desired configuration of the Kubernetes cluster and Kubernetes makes the required changes to the cluster, including decommissioning or draining nodes as required. -- Whether you deployed a cluster using the CockroachDB Operator, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. +- Whether you deployed a cluster using the {{ site.data.products.cockroachdb-operator }}, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. - If you deployed a cluster using the [CockroachDB Kubernetes Operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), the best way to scale down a cluster is to update the specification for the Kubernetes deployment to reduce the value of `nodes:` and apply the change using a [rolling update](https://kubernetes.io/docs/tutorials/kubernetes-basics/update/update-intro/). Kubernetes will notice that there are now too many nodes and will reduce them and clean up their storage automatically. @@ -854,7 +854,7 @@ After Kubernetes issues a termination request to the `cockroach` process on a cl If undefined, Kubernetes sets `terminationGracePeriodSeconds` to 30 seconds. This is too short for the `cockroach` process to stop gracefully before Kubernetes terminates it forcibly. Do not set `terminationGracePeriodSeconds` to `0`, which prevents Kubernetes from detecting and terminating a stuck pod. -For clusters deployed using the CockroachDB Public Operator, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). +For clusters deployed using the CockroachDB {{ site.data.products.public-operator }}, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). For clusters deployed using the CockroachDB Helm chart or a manual StatefulSet, the default depends upon the values file or manifest you used when you created the cluster. Cockroach Labs recommends that you: diff --git a/src/current/v24.1/node-shutdown.md b/src/current/v24.1/node-shutdown.md index 9d5b830c27f..593eb2c2a54 100644 --- a/src/current/v24.1/node-shutdown.md +++ b/src/current/v24.1/node-shutdown.md @@ -834,7 +834,7 @@ On the **Cluster Overview** page of the DB Console, the [node status]({% link {{ Most of the guidance in this page is most relevant to manual deployments that don't use Kubernetes. If you use Kubernetes to deploy CockroachDB, draining and decommissioning work the same way for the `cockroach` process, but Kubernetes handles them on your behalf. In a deployment without Kubernetes, an administrator initiates decommissioning or draining directly. In a Kubernetes deployment, an administrator modifies the desired configuration of the Kubernetes cluster and Kubernetes makes the required changes to the cluster, including decommissioning or draining nodes as required. -- Whether you deployed a cluster using the CockroachDB Operator, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. +- Whether you deployed a cluster using the {{ site.data.products.cockroachdb-operator }}, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. - If you deployed a cluster using the [CockroachDB Kubernetes Operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), the best way to scale down a cluster is to update the specification for the Kubernetes deployment to reduce the value of `nodes:` and apply the change using a [rolling update](https://kubernetes.io/docs/tutorials/kubernetes-basics/update/update-intro/). Kubernetes will notice that there are now too many nodes and will reduce them and clean up their storage automatically. @@ -854,7 +854,7 @@ After Kubernetes issues a termination request to the `cockroach` process on a cl If undefined, Kubernetes sets `terminationGracePeriodSeconds` to 30 seconds. This is too short for the `cockroach` process to stop gracefully before Kubernetes terminates it forcibly. Do not set `terminationGracePeriodSeconds` to `0`, which prevents Kubernetes from detecting and terminating a stuck pod. -For clusters deployed using the CockroachDB Public Operator, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). +For clusters deployed using the CockroachDB {{ site.data.products.public-operator }}, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). For clusters deployed using the CockroachDB Helm chart or a manual StatefulSet, the default depends upon the values file or manifest you used when you created the cluster. Cockroach Labs recommends that you: diff --git a/src/current/v24.2/node-shutdown.md b/src/current/v24.2/node-shutdown.md index 9d5b830c27f..593eb2c2a54 100644 --- a/src/current/v24.2/node-shutdown.md +++ b/src/current/v24.2/node-shutdown.md @@ -834,7 +834,7 @@ On the **Cluster Overview** page of the DB Console, the [node status]({% link {{ Most of the guidance in this page is most relevant to manual deployments that don't use Kubernetes. If you use Kubernetes to deploy CockroachDB, draining and decommissioning work the same way for the `cockroach` process, but Kubernetes handles them on your behalf. In a deployment without Kubernetes, an administrator initiates decommissioning or draining directly. In a Kubernetes deployment, an administrator modifies the desired configuration of the Kubernetes cluster and Kubernetes makes the required changes to the cluster, including decommissioning or draining nodes as required. -- Whether you deployed a cluster using the CockroachDB Operator, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. +- Whether you deployed a cluster using the {{ site.data.products.cockroachdb-operator }}, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. - If you deployed a cluster using the [CockroachDB Kubernetes Operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), the best way to scale down a cluster is to update the specification for the Kubernetes deployment to reduce the value of `nodes:` and apply the change using a [rolling update](https://kubernetes.io/docs/tutorials/kubernetes-basics/update/update-intro/). Kubernetes will notice that there are now too many nodes and will reduce them and clean up their storage automatically. @@ -854,7 +854,7 @@ After Kubernetes issues a termination request to the `cockroach` process on a cl If undefined, Kubernetes sets `terminationGracePeriodSeconds` to 30 seconds. This is too short for the `cockroach` process to stop gracefully before Kubernetes terminates it forcibly. Do not set `terminationGracePeriodSeconds` to `0`, which prevents Kubernetes from detecting and terminating a stuck pod. -For clusters deployed using the CockroachDB Public Operator, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). +For clusters deployed using the CockroachDB {{ site.data.products.public-operator }}, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). For clusters deployed using the CockroachDB Helm chart or a manual StatefulSet, the default depends upon the values file or manifest you used when you created the cluster. Cockroach Labs recommends that you: diff --git a/src/current/v24.3/node-shutdown.md b/src/current/v24.3/node-shutdown.md index 9d5b830c27f..593eb2c2a54 100644 --- a/src/current/v24.3/node-shutdown.md +++ b/src/current/v24.3/node-shutdown.md @@ -834,7 +834,7 @@ On the **Cluster Overview** page of the DB Console, the [node status]({% link {{ Most of the guidance in this page is most relevant to manual deployments that don't use Kubernetes. If you use Kubernetes to deploy CockroachDB, draining and decommissioning work the same way for the `cockroach` process, but Kubernetes handles them on your behalf. In a deployment without Kubernetes, an administrator initiates decommissioning or draining directly. In a Kubernetes deployment, an administrator modifies the desired configuration of the Kubernetes cluster and Kubernetes makes the required changes to the cluster, including decommissioning or draining nodes as required. -- Whether you deployed a cluster using the CockroachDB Operator, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. +- Whether you deployed a cluster using the {{ site.data.products.cockroachdb-operator }}, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. - If you deployed a cluster using the [CockroachDB Kubernetes Operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), the best way to scale down a cluster is to update the specification for the Kubernetes deployment to reduce the value of `nodes:` and apply the change using a [rolling update](https://kubernetes.io/docs/tutorials/kubernetes-basics/update/update-intro/). Kubernetes will notice that there are now too many nodes and will reduce them and clean up their storage automatically. @@ -854,7 +854,7 @@ After Kubernetes issues a termination request to the `cockroach` process on a cl If undefined, Kubernetes sets `terminationGracePeriodSeconds` to 30 seconds. This is too short for the `cockroach` process to stop gracefully before Kubernetes terminates it forcibly. Do not set `terminationGracePeriodSeconds` to `0`, which prevents Kubernetes from detecting and terminating a stuck pod. -For clusters deployed using the CockroachDB Public Operator, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). +For clusters deployed using the CockroachDB {{ site.data.products.public-operator }}, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). For clusters deployed using the CockroachDB Helm chart or a manual StatefulSet, the default depends upon the values file or manifest you used when you created the cluster. Cockroach Labs recommends that you: diff --git a/src/current/v25.1/node-shutdown.md b/src/current/v25.1/node-shutdown.md index e95ca33f513..6465879950a 100644 --- a/src/current/v25.1/node-shutdown.md +++ b/src/current/v25.1/node-shutdown.md @@ -834,7 +834,7 @@ On the **Cluster Overview** page of the DB Console, the [node status]({% link {{ Most of the guidance in this page is most relevant to manual deployments that don't use Kubernetes. If you use Kubernetes to deploy CockroachDB, draining and decommissioning work the same way for the `cockroach` process, but Kubernetes handles them on your behalf. In a deployment without Kubernetes, an administrator initiates decommissioning or draining directly. In a Kubernetes deployment, an administrator modifies the desired configuration of the Kubernetes cluster and Kubernetes makes the required changes to the cluster, including decommissioning or draining nodes as required. -- Whether you deployed a cluster using the CockroachDB Operator, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. +- Whether you deployed a cluster using the {{ site.data.products.cockroachdb-operator }}, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. - If you deployed a cluster using the [CockroachDB Kubernetes Operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), the best way to scale down a cluster is to update the specification for the Kubernetes deployment to reduce the value of `nodes:` and apply the change using a [rolling update](https://kubernetes.io/docs/tutorials/kubernetes-basics/update/update-intro/). Kubernetes will notice that there are now too many nodes and will reduce them and clean up their storage automatically. @@ -854,7 +854,7 @@ After Kubernetes issues a termination request to the `cockroach` process on a cl If undefined, Kubernetes sets `terminationGracePeriodSeconds` to 30 seconds. This is too short for the `cockroach` process to stop gracefully before Kubernetes terminates it forcibly. Do not set `terminationGracePeriodSeconds` to `0`, which prevents Kubernetes from detecting and terminating a stuck pod. -For clusters deployed using the CockroachDB Public Operator, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). +For clusters deployed using the CockroachDB {{ site.data.products.public-operator }}, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). For clusters deployed using the CockroachDB Helm chart or a manual StatefulSet, the default depends upon the values file or manifest you used when you created the cluster. Cockroach Labs recommends that you: diff --git a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md index a4e33ae4c18..0c3e22f3e05 100644 --- a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md @@ -10,7 +10,7 @@ docs_area: deploy This page explains how to configure Kubernetes cluster resources such as memory, CPU, and storage. {{site.data.alerts.callout_info}} -The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). {{site.data.alerts.end}} On a production cluster, the resources you allocate to CockroachDB should be proportionate to your machine types and workload. Cockroach Labs recommends that you determine and set these values before deploying the cluster, but you can also update the values on a running cluster. @@ -60,7 +60,7 @@ For more information on how Kubernetes handles resources, see the [Kubernetes do Each CockroachDB node reserves a portion of its available memory for its cache and for storing temporary data for SQL queries. For more information on these settings, see the [Production Checklist](recommended-production-settings.html#cache-and-sql-memory-size). -The CockroachDB operator dynamically sets cache size and SQL memory size each to 25% (the recommended percentage) of the available memory, which depends on the memory request and limit you [specified](#memory-and-cpu) for your configuration. These values can be modified by adding the `cache` or `max-sql-memory` fields to `cockroachdb.crdbCluster.flags`, which is equivalent to appending `--cache` or `--max-sql-memory` as [cockroach start flags](cockroach-start.html#flags). +The {{ site.data.products.cockroachdb-operator }} dynamically sets cache size and SQL memory size each to 25% (the recommended percentage) of the available memory, which depends on the memory request and limit you [specified](#memory-and-cpu) for your configuration. These values can be modified by adding the `cache` or `max-sql-memory` fields to `cockroachdb.crdbCluster.flags`, which is equivalent to appending `--cache` or `--max-sql-memory` as [cockroach start flags](cockroach-start.html#flags). ## Persistent storage @@ -105,13 +105,13 @@ Apply the new settings to the cluster: helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE ~~~ -The CockroachDB operator updates all nodes and triggers a rolling restart of the pods with the new storage capacity. +The {{ site.data.products.cockroachdb-operator }} updates all nodes and triggers a rolling restart of the pods with the new storage capacity. To verify that the storage capacity has been updated, run `kubectl get pvc` to view the persistent volume claims (PVCs). It will take a few minutes before the PVCs are completely updated. ## Network ports -The CockroachDB operator separates network traffic into three ports: +The {{ site.data.products.cockroachdb-operator }} separates network traffic into three ports: | Protocol | Default Port| Description | Custom Resource Field | |------------|-------------|-------------------------------|----------------------------------| @@ -119,7 +119,7 @@ The CockroachDB operator separates network traffic into three ports: | HTTP | 8080 | Used to access the DB Console | service.ports.http | | SQL | 26257 | Used for SQL shell access | service.ports.sql | -Specify alternate port numbers in `cockroachdb.crdbCluster.service.ports` of the CockroachDB operator's [custom resource](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): +Specify alternate port numbers in `cockroachdb.crdbCluster.service.ports` of the {{ site.data.products.cockroachdb-operator }}'s [custom resource](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): ~~~ yaml cockroachdb: @@ -136,4 +136,4 @@ Apply the new settings to the cluster: helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb --values ./cockroachdb-parent/charts/cockroachdb/values.yaml -n $NAMESPACE ~~~ -The CockroachDB operator updates all nodes and triggers a rolling restart of the pods with the new port settings. +The {{ site.data.products.cockroachdb-operator }} updates all nodes and triggers a rolling restart of the pods with the new port settings. diff --git a/src/current/v25.2/configure-cockroachdb-kubernetes.md b/src/current/v25.2/configure-cockroachdb-kubernetes.md index 5488843d136..3528fcca91f 100644 --- a/src/current/v25.2/configure-cockroachdb-kubernetes.md +++ b/src/current/v25.2/configure-cockroachdb-kubernetes.md @@ -11,14 +11,14 @@ docs_area: deploy This page explains how to configure Kubernetes cluster resources such as memory, CPU, and storage. -This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [Resource management with the CockroachDB operator]({% link {{ page.version.version }}/configure-cockroachdb-kubernetes-operator.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Resource management with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/configure-cockroachdb-kubernetes-operator.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} These settings override the defaults used when [deploying CockroachDB on Kubernetes]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}).
- +
@@ -46,7 +46,7 @@ You can set the CPU and memory resources allocated to the CockroachDB container {{site.data.alerts.end}}
-Specify CPU and memory values in `resources.requests` and `resources.limits` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +Specify CPU and memory values in `resources.requests` and `resources.limits` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: @@ -114,7 +114,7 @@ Each CockroachDB node reserves a portion of its available memory for its cache a Our Kubernetes manifests dynamically set cache size and SQL memory size each to 1/4 (the recommended fraction) of the available memory, which depends on the memory request and limit you [specified](#memory-and-cpu) for your configuration. If you want to customize these values, set them explicitly. -Specify `cache` and `maxSQLMemory` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +Specify `cache` and `maxSQLMemory` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: @@ -152,7 +152,7 @@ conf: When you start your cluster, Kubernetes dynamically provisions and mounts a persistent volume into each pod. For more information on persistent volumes, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/persistent-volumes/).
-The storage capacity of each volume is set in `pvc.spec.resources` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +The storage capacity of each volume is set in `pvc.spec.resources` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: @@ -195,7 +195,7 @@ You should provision an appropriate amount of disk storage for your workload. Fo If you discover that you need more capacity, you can expand the persistent volumes on a running cluster. Increasing disk size is often [beneficial for CockroachDB performance]({% link {{ page.version.version }}/kubernetes-performance.md %}#disk-size).
-Specify a new volume size in `resources.requests` and `resources.limits` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +Specify a new volume size in `resources.requests` and `resources.limits` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: @@ -211,7 +211,7 @@ spec: {% include {{ page.version.version }}/orchestration/apply-custom-resource.md %} -The Operator updates the StatefulSet and triggers a rolling restart of the pods with the new storage capacity. +The {{ site.data.products.public-operator }} updates the StatefulSet and triggers a rolling restart of the pods with the new storage capacity. To verify that the storage capacity has been updated, run `kubectl get pvc` to view the persistent volume claims (PVCs). It will take a few minutes before the PVCs are completely updated.
@@ -227,7 +227,7 @@ To verify that the storage capacity has been updated, run `kubectl get pvc` to v
## Network ports -The Operator separates network traffic into three ports: +The {{ site.data.products.public-operator }} separates network traffic into three ports: | Protocol | Default | Description | Custom Resource Field | |----------|---------|---------------------------------------------------------------------|-----------------------| @@ -235,7 +235,7 @@ The Operator separates network traffic into three ports: | HTTP | 8080 | Used to [access the DB Console]({% link {{ page.version.version }}/ui-overview.md %}#db-console-access) | `httpPort` | | SQL | 26257 | Used for SQL shell access | `sqlPort` | -Specify alternate port numbers in the Operator's [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): +Specify alternate port numbers in the {{ site.data.products.public-operator }}'s [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): ~~~ yaml spec: @@ -244,19 +244,19 @@ spec: {% include {{ page.version.version }}/orchestration/apply-custom-resource.md %} -The Operator updates the StatefulSet and triggers a rolling restart of the pods with the new port settings. +The {{ site.data.products.public-operator }} updates the StatefulSet and triggers a rolling restart of the pods with the new port settings. {{site.data.alerts.callout_danger}} -Currently, only the pods are updated with new ports. To connect to the cluster, you need to ensure that the `public` service is also updated to use the new port. You can do this by deleting the service with `kubectl delete service {cluster-name}-public`. When service is recreated by the Operator, it will use the new port. This is a known limitation that will be fixed in an Operator update. +Currently, only the pods are updated with new ports. To connect to the cluster, you need to ensure that the `public` service is also updated to use the new port. You can do this by deleting the service with `kubectl delete service {cluster-name}-public`. When service is recreated by the operator, it will use the new port. This is a known limitation. {{site.data.alerts.end}} ## Ingress You can configure an [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) object to expose an internal HTTP or SQL [`ClusterIP` service](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) through a hostname. -In order to use the Ingress resource, your cluster must be running an [Ingress controller](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/) for load balancing. This is **not** handled by the Operator and must be deployed separately. +In order to use the Ingress resource, your cluster must be running an [Ingress controller](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/) for load balancing. This is **not** handled by the {{ site.data.products.public-operator }} and must be deployed separately. -Specify Ingress objects in `ingress.ui` (HTTP) or `ingress.sql` (SQL) in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +Specify Ingress objects in `ingress.ui` (HTTP) or `ingress.sql` (SQL) in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: @@ -287,5 +287,5 @@ spec: Changing the SQL Ingress `host` on a running deployment will cause a rolling restart of the cluster, due to new node certificates being generated for the SQL host. {{site.data.alerts.end}} -The [custom resource definition](https://github.com/cockroachdb/cockroach-operator/blob/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the Operator. +The [custom resource definition](https://github.com/cockroachdb/cockroach-operator/blob/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the operator.
diff --git a/src/current/v25.2/create-security-certificates-custom-ca.md b/src/current/v25.2/create-security-certificates-custom-ca.md index 9f745bce7e7..27dc77b6cc2 100644 --- a/src/current/v25.2/create-security-certificates-custom-ca.md +++ b/src/current/v25.2/create-security-certificates-custom-ca.md @@ -31,7 +31,7 @@ For secure clusters, you can avoid getting the warning message by using a certif pkill -SIGHUP -x cockroach ~~~ The `SIGHUP` signal must be sent by the same user running the process or by a user with adequate privileges to send signals to processes owned by another user, such as a user with `sudo` access. - - In a cluster deployed using the [Kubernetes Operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), there is no way to send a `SIGHUP` signal to the individual `cockroach` process on each cluster node. Instead, perform a rolling restart of the cluster's pods. + - In a cluster deployed using the [{{ site.data.products.public-operator }}]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), there is no way to send a `SIGHUP` signal to the individual `cockroach` process on each cluster node. Instead, perform a rolling restart of the cluster's pods. ### Node key and certificates diff --git a/src/current/v25.2/create-sequence.md b/src/current/v25.2/create-sequence.md index e3bf4214fe9..108b7e151b6 100644 --- a/src/current/v25.2/create-sequence.md +++ b/src/current/v25.2/create-sequence.md @@ -326,6 +326,6 @@ SHOW CREATE customer_seq_node_cached; - [`DROP SEQUENCE`]({% link {{ page.version.version }}/drop-sequence.md %}) - [`SHOW CREATE`]({% link {{ page.version.version }}/show-create.md %}) - [`SHOW SEQUENCES`]({% link {{ page.version.version }}/show-sequences.md %}) -- [Functions and Operators]({% link {{ page.version.version }}/functions-and-operators.md %}) +- [Functions and s]({% link {{ page.version.version }}/functions-and-operators.md %}) - [SQL Statements]({% link {{ page.version.version }}/sql-statements.md %}) - [Online Schema Changes]({% link {{ page.version.version }}/online-schema-changes.md %}) diff --git a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-openshift.md b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-openshift.md index 50eba46d776..50ba14aac86 100644 --- a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-openshift.md +++ b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-openshift.md @@ -6,7 +6,7 @@ secure: true docs_area: --- -This page shows you how to start and stop a secure 3-node CockroachDB cluster on the Red Hat OpenShift platform, using the [CockroachDB Kubernetes Operator](https://marketplace.redhat.com/en-us/products/cockroachdb-operator). +This page shows you how to start and stop a secure 3-node CockroachDB cluster on the Red Hat OpenShift platform, using the [{{ site.data.products.public-operator }}](https://marketplace.redhat.com/en-us/products/cockroachdb-operator). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} @@ -61,7 +61,7 @@ This article assumes you have already installed the OpenShift Container Platform This lets you issue `oc` commands without having to specify the namespace each time. -## Step 2. Install the Operator +## Step 2. Install the {{ site.data.products.public-operator }} 1. Navigate to your OpenShift web console and click **OperatorHub**. @@ -73,7 +73,7 @@ This article assumes you have already installed the OpenShift Container Platform 1. On the **Install Operator** page, select `cockroachdb` in the **Installed Namespace** dropdown and click **Install**. -1. Confirm that the Operator is running: +1. Confirm that the operator is running: {% include_cached copy-clipboard.html %} ~~~ shell @@ -89,13 +89,13 @@ This article assumes you have already installed the OpenShift Container Platform {% capture latest_operator_version %}{% include_cached latest_operator_version.md %}{% endcapture %} -1. When the Operator is ready, click **View Operator** to navigate to the **Installed Operators** page. +1. When the operator is ready, click **View Operator** to navigate to the **Installed Operators** page. 1. In the **CockroachDB Operator** tile, click **Create instance**. OpenShift OperatorHub -1. Make sure **CockroachDB Version** is set to a valid CockroachDB version. For a list of compatible image names, see `spec.containers.env` in the [Operator manifest](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/install/operator.yaml) on GitHub. +1. Make sure **CockroachDB Version** is set to a valid CockroachDB version. For a list of compatible image names, see `spec.containers.env` in the [pulic operator manifest](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/install/operator.yaml) on GitHub. 1. This will open the **Create CrdbCluster** page. By default, this deploys a 3-node secure cluster. Leave the other fields unchanged and click **Create**. @@ -122,10 +122,10 @@ This article assumes you have already installed the OpenShift Container Platform To use the CockroachDB SQL client, first launch a secure pod running the `cockroach` binary. -This can be defined with the following YAML, which mounts the Operator's generated certificates: +This can be defined with the following YAML, which mounts the operator's generated certificates: {{site.data.alerts.callout_success}} -`spec.containers.image` should match the **Image** value that is displayed under the **Containers** section on the **Pods** page when you select a CockroachDB pod. Be sure to select a CockroachDB pod and not the Operator pod. +`spec.containers.image` should match the **Image** value that is displayed under the **Containers** section on the **Pods** page when you select a CockroachDB pod. Be sure to select a CockroachDB pod and not the operator pod. Note that OpenShift may display the image SHA instead of the tag. In this case, you should use the SHA for `spec.containers.image`. {{site.data.alerts.end}} @@ -329,7 +329,7 @@ If you want to continue using this cluster, see the documentation on [configurin OpenShift OperatorHub -This will delete the CockroachDB cluster being run by the Operator. It will *not* delete: +This will delete the CockroachDB cluster being run by the operator. It will *not* delete: - The persistent volumes that were attached to the pods. This can be done by deleting the PVCs via **Storage** > **Persistent Volume Claims**. - The opaque secrets used to authenticate the cluster. This can be done via **Workloads** > **Secrets**. diff --git a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md index 61d8ae4f961..5f53a69583c 100644 --- a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md +++ b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md @@ -10,7 +10,7 @@ docs_area: deploy This page describes how to start and stop a secure 3-node CockroachDB cluster in a single [Kubernetes](http://kubernetes.io/) cluster. {{site.data.alerts.callout_info}} -The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). {{site.data.alerts.end}} ## Prerequisites and best practices @@ -29,7 +29,7 @@ Error: UPGRADE FAILED: template: cockroachdb/templates/tests/client.yaml:6:14: e The Helm chart consists of two sub-charts: -- `operator`: The CockroachDB operator chart to be installed first. +- `operator`: The {{ site.data.products.cockroachdb-operator }} chart to be installed first. - `cockroachdb`: The CockroachDB application chart to be installed after the operator is ready. ### Network @@ -50,7 +50,7 @@ For more information on how locality labels are used by CockroachDB, refer to th ### Architecture -The CockroachDB operator is only supported in environments with an ARM64 or AMD64 architecture. +The {{ site.data.products.cockroachdb-operator }} is only supported in environments with an ARM64 or AMD64 architecture. ### Resources @@ -631,7 +631,7 @@ The following example uses [cockroach cert commands](cockroach-cert.html) to gen --ca-key=my-safe-directory/ca.key ~~~ -1. Upload the client certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the CockroachDB operator: +1. Upload the client certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the {{ site.data.products.cockroachdb-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell @@ -660,7 +660,7 @@ The following example uses [cockroach cert commands](cockroach-cert.html) to gen --ca-key=my-safe-directory/ca.key ~~~ -1. Upload the node certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the CockroachDB operator: +1. Upload the node certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the {{ site.data.products.cockroachdb-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell diff --git a/src/current/v25.2/deploy-cockroachdb-with-kubernetes.md b/src/current/v25.2/deploy-cockroachdb-with-kubernetes.md index 2ed27085e95..6b84fc3ce7a 100644 --- a/src/current/v25.2/deploy-cockroachdb-with-kubernetes.md +++ b/src/current/v25.2/deploy-cockroachdb-with-kubernetes.md @@ -13,10 +13,10 @@ docs_area: This page shows you how to start and stop a secure 3-node CockroachDB cluster in a single [Kubernetes](http://kubernetes.io/) cluster using the following approaches: -- [Public operator](https://github.com/cockroachdb/cockroach-operator) +- [{{ site.data.products.public-operator }}](https://github.com/cockroachdb/cockroach-operator) {{site.data.alerts.callout_info}} - The CockroachDB Kubernetes Operator is also available on platforms such as [Red Hat OpenShift]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-openshift.md %}) and [IBM Cloud Pak for Data](https://www.ibm.com/products/cloud-pak-for-data). + The {{ site.data.products.public-operator }} is also available on platforms such as [Red Hat OpenShift]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-openshift.md %}) and [IBM Cloud Pak for Data](https://www.ibm.com/products/cloud-pak-for-data). {{site.data.alerts.end}} - Manual [StatefulSet](http://kubernetes.io/docs/concepts/abstractions/controllers/statefulsets/) configuration @@ -40,12 +40,12 @@ Choose how you want to deploy and maintain the CockroachDB cluster. {{site.data.alerts.callout_info}} The [Public Kubernetes operator](https://github.com/cockroachdb/cockroach-operator) eases CockroachDB cluster creation and management on a single Kubernetes cluster. -The Operator does not provision or apply an Enterprise license key. To use CockroachDB with the Operator, [set a license]({% link {{ page.version.version }}/licensing-faqs.md %}#set-a-license) in the SQL shell. +The {{ site.data.products.public-operator }} does not provision or apply a license key. To use CockroachDB with the {{ site.data.products.public-operator }}, [set a license]({% link {{ page.version.version }}/licensing-faqs.md %}#set-a-license) in the SQL shell. {{site.data.alerts.end}}
- - + +
diff --git a/src/current/v25.2/kubernetes-operator-overview.md b/src/current/v25.2/kubernetes-operator-overview.md index 50e4da793cc..9c008a27903 100644 --- a/src/current/v25.2/kubernetes-operator-overview.md +++ b/src/current/v25.2/kubernetes-operator-overview.md @@ -1,6 +1,6 @@ --- title: CockroachDB Operator Overview -summary: An overview of deployment and management of a CockroachDB cluster using the CockroachDB operator with Kubernetes. +summary: An overview of deployment and management of a CockroachDB cluster using the {{ site.data.products.cockroachdb-operator }} with Kubernetes. toc: true toc_not_nested: true secure: true @@ -8,20 +8,22 @@ docs_area: deploy key: operate-cockroachdb-kubernetes-operator.html --- -The CockroachDB operator is a fully-featured [Kubernetes operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) that allows you to deploy and manage CockroachDB self-hosted clusters. +The {{ site.data.products.cockroachdb-operator }} is a fully-featured [Kubernetes operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) that allows you to deploy and manage CockroachDB self-hosted clusters. {{site.data.alerts.callout_info}} -The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). + +For information on the generally-available {{ site.data.products.public-operator }}, read the [{{ site.data.products.public-operator }} documentation]({% link {{ page.version.version }}/kubernetes-overview.md %}) and see the [GitHub repository](https://github.com/cockroachdb/cockroach-operator). {{site.data.alerts.end}} -With the CockroachDB operator, you can deploy CockroachDB clusters across multiple regions with separate operator instances per region. Using [Helm](https://helm.sh/), set configurations that manage the operator and CockroachDB nodes across regions. +With the {{ site.data.products.cockroachdb-operator }}, you can deploy CockroachDB clusters across multiple regions with separate operator instances per region. Using [Helm](https://helm.sh/), set configurations that manage the operator and CockroachDB nodes across regions. -## CockroachDB operator +## {{ site.data.products.cockroachdb-operator }} This section describes how to: -- [Deploy a CockroachDB cluster using the CockroachDB operator]({% link {{page.version.version}}/deploy-cockroachdb-with-kubernetes-operator.md %}). -- Migrate from an existing CockroachDB Kubernetes deployment using [Helm]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-helm.md %}) or the [public operator]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-operator.md %}). +- [Deploy a CockroachDB cluster using the {{ site.data.products.cockroachdb-operator }}]({% link {{page.version.version}}/deploy-cockroachdb-with-kubernetes-operator.md %}). +- Migrate from an existing CockroachDB Kubernetes deployment using [Helm]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-helm.md %}) or the [{{ site.data.products.public-operator }}]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-operator.md %}). - Operate a CockroachDB cluster: - [Manage pod scheduling]({% link {{page.version.version}}/schedule-cockroachdb-kubernetes-operator.md %}). @@ -40,6 +42,6 @@ Feature | Description --------|------------ [node](https://kubernetes.io/docs/concepts/architecture/nodes/) | A physical or virtual machine. In the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}), you'll create instances and join them as worker nodes into a single Kubernetes cluster. [pod](http://kubernetes.io/docs/user-guide/pods/) | A pod is a group of one of more Docker containers. In the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}), each pod will run on a separate Kubernetes worker node and include one Docker container running a single CockroachDB node, reflecting our [topology recommendations]({% link {{ page.version.version }}/recommended-production-settings.md %}#topology). -[operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) | An operator is an extension to Kubernetes that uses custom resources to efficiently manage specific applications. The CockroachDB operator includes two custom resource definitions, `cockroachdb` to manage a CockroachDB pod and `operator` to manage the operator pod itself. Unlike the older [public operator](https://github.com/cockroachdb/cockroach-operator), the CockroachDB operator does not use StatefulSets and is designed to simplify multi-region deployments. +[operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) | An operator is an extension to Kubernetes that uses custom resources to efficiently manage specific applications. The {{ site.data.products.cockroachdb-operator }} includes two custom resource definitions, `cockroachdb` to manage a CockroachDB pod and `operator` to manage the operator pod itself. Unlike the older [{{ site.data.products.public-operator }}](https://github.com/cockroachdb/cockroach-operator), the {{ site.data.products.cockroachdb-operator }} does not use StatefulSets and is designed to simplify multi-region deployments. [persistent volume](http://kubernetes.io/docs/user-guide/persistent-volumes/) | A persistent volume is a piece of networked storage (Persistent Disk on GCE, Elastic Block Store on AWS) mounted into a pod. The lifetime of a persistent volume is decoupled from the lifetime of the pod that's using it, ensuring that each CockroachDB node binds back to the same storage on restart.

The [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}) assumes that dynamic volume provisioning is available. When that is not the case, [persistent volume claims](http://kubernetes.io/docs/user-guide/persistent-volumes/#persistentvolumeclaims) need to be created manually. [RBAC](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) | RBAC, or Role-Based Access Control, is the system Kubernetes uses to manage permissions within the cluster. In order to take an action (e.g., `get` or `create`) on an API resource (e.g., a `pod`), the client must have a `Role` that allows it to do so. The [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}) creates the RBAC resources necessary for CockroachDB to create and access certificates. diff --git a/src/current/v25.2/kubernetes-operator-performance.md b/src/current/v25.2/kubernetes-operator-performance.md index 53447db465a..98643eba64e 100644 --- a/src/current/v25.2/kubernetes-operator-performance.md +++ b/src/current/v25.2/kubernetes-operator-performance.md @@ -8,7 +8,7 @@ docs_area: deploy Kubernetes provides many useful abstractions for deploying and operating distributed systems, but some of the abstractions come with a performance overhead and an increase in underlying system complexity. This page outlines potential bottlenecks when running CockroachDB in Kubernetes and how to optimize performance. {{site.data.alerts.callout_info}} -The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). {{site.data.alerts.end}} ## Before you begin diff --git a/src/current/v25.2/kubernetes-overview.md b/src/current/v25.2/kubernetes-overview.md index 925edd86e70..db3bd953e08 100644 --- a/src/current/v25.2/kubernetes-overview.md +++ b/src/current/v25.2/kubernetes-overview.md @@ -14,10 +14,10 @@ Kubernetes is a portable, extensible, open source platform for managing containe You can also deploy CockroachDB on Kubernetes using the following methods: -- [Public operator](https://github.com/cockroachdb/cockroach-operator) +- [{{ site.data.products.public-operator }}](https://github.com/cockroachdb/cockroach-operator) {{site.data.alerts.callout_info}} - The public operator is also available on platforms such as [Red Hat OpenShift]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-openshift.md %}) and [IBM Cloud Pak for Data](https://www.ibm.com/products/cloud-pak-for-data). + The {{ site.data.products.public-operator }} is also available on platforms such as [Red Hat OpenShift]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-openshift.md %}) and [IBM Cloud Pak for Data](https://www.ibm.com/products/cloud-pak-for-data). {{site.data.alerts.end}} - Manual [StatefulSet](http://kubernetes.io/docs/concepts/abstractions/controllers/statefulsets/) configuration diff --git a/src/current/v25.2/kubernetes-performance.md b/src/current/v25.2/kubernetes-performance.md index 5d53065ce4d..841831da74b 100644 --- a/src/current/v25.2/kubernetes-performance.md +++ b/src/current/v25.2/kubernetes-performance.md @@ -7,7 +7,7 @@ docs_area: deploy Kubernetes provides many useful abstractions for deploying and operating distributed systems, but some of the abstractions come with a performance overhead and an increase in underlying system complexity. This page explains potential bottlenecks to be aware of when [running CockroachDB in Kubernetes]({% link {{ page.version.version }}/kubernetes-overview.md %}) and shows you how to optimize your deployment for better performance. -This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [CockroachDB Performance on Kubernetes with the CockroachDB Operator]({% link {{ page.version.version }}/kubernetes-operator-performance.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [CockroachDB Performance on Kubernetes with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/kubernetes-operator-performance.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md index 39c649a49f4..920b0033e5b 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md @@ -1,16 +1,16 @@ --- title: Migrate from Helm StatefulSet -summary: Migration guide detailing how to migrate away from a Helm deployment of CockroachDB to the CockroachDB operator. +summary: Migration guide detailing how to migrate away from a Helm deployment of CockroachDB to the {{ site.data.products.cockroachdb-operator }}. toc: true toc_not_nested: true secure: true docs_area: deploy --- -This guide describes how to migrate an existing CockroachDB cluster managed via StatefulSet to the CockroachDB operator. +This guide describes how to migrate an existing CockroachDB cluster managed via StatefulSet to the {{ site.data.products.cockroachdb-operator }}. {{site.data.alerts.callout_info}} -The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). {{site.data.alerts.end}} These instructions assume that you are migrating from a StatefulSet cluster that was configured using the Helm chart with the following command: @@ -20,7 +20,7 @@ helm upgrade --install --set operator.enabled=false crdb-test --debug ./cockroac ~~~ {{site.data.alerts.callout_success}} -If your existing cluster was created using the public operator, refer to the [public operator migration guide](migrate-cockroachdb-kubernetes-operator.html). +If your existing cluster was created using the {{ site.data.products.public-operator }}, refer to the [{{ site.data.products.public-operator }} migration guide](migrate-cockroachdb-kubernetes-operator.html). {{site.data.alerts.end}} This migration can be completed without affecting cluster availability, and preserves existing disks so that data doesn't need to be replicated into empty volumes. The process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md index 9615fe1613d..7e4bc595d6a 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md @@ -7,13 +7,13 @@ secure: true docs_area: deploy --- -This guide describes how to migrate an existing CockroachDB cluster managed via the public operator to the CockroachDB operator. +This guide describes how to migrate an existing CockroachDB cluster managed via the {{ site.data.products.public-operator }} to the {{ site.data.products.cockroachdb-operator }}. {{site.data.alerts.callout_info}} -The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). {{site.data.alerts.end}} -These instructions assume that you are migrating from a public operator cluster that is managed with kubectl via the following yaml files: +These instructions assume that you are migrating from a {{ site.data.products.public-operator }} cluster that is managed with kubectl via the following yaml files: {% include_cached copy-clipboard.html %} ~~~ shell @@ -40,7 +40,7 @@ export PATH=$PATH:$(pwd)/bin Export environment variables for the existing deployment: -- Set CRDBCLUSTER to the crdbcluster custom resource name in the public operator: +- Set CRDBCLUSTER to the crdbcluster custom resource name in the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell export CRDBCLUSTER="cockroachdb" @@ -74,14 +74,14 @@ kubectl get crdbcluster -o yaml $CRDBCLUSTER > backup/crdbcluster-$CRDBCLUSTER.y ## Step 2. Generate manifests with the migration helper -The CockroachDB operator uses slightly different certificates than the public operator, and mounts them in configmaps and secrets with different names. Use the migration helper utility with the `migrate-certs` option to re-map and generate TLS certificates: +The {{ site.data.products.cockroachdb-operator }} uses slightly different certificates than the {{ site.data.products.public-operator }}, and mounts them in configmaps and secrets with different names. Use the migration helper utility with the `migrate-certs` option to re-map and generate TLS certificates: {% include_cached copy-clipboard.html %} ~~~ shell bin/migration-helper migrate-certs --statefulset-name $STS_NAME --namespace $NAMESPACE ~~~ -Generate a manifest for each crdbnode and the crdbcluster based on the state of the StatefulSet. The new pods and their associated PVCs must have the same names as the original StatefulSet-managed pods and PVCs. The new CockroachDB operator-managed pods will then use the original PVCs, rather than replicate data into empty nodes. +Generate a manifest for each crdbnode and the crdbcluster based on the state of the StatefulSet. The new pods and their associated PVCs must have the same names as the original StatefulSet-managed pods and PVCs. The new {{ site.data.products.cockroachdb-operator }}-managed pods will then use the original PVCs, rather than replicate data into empty nodes. {% include_cached copy-clipboard.html %} ~~~ shell @@ -89,9 +89,9 @@ mkdir -p manifests bin/migration-helper build-manifest helm --statefulset-name $STS_NAME --namespace $NAMESPACE --cloud-provider $CLOUD_PROVIDER --cloud-region $REGION --output-dir ./manifests ~~~ -## Step 3. Uninstall and replace the public operator +## Step 3. Uninstall and replace the {{ site.data.products.public-operator }} -The public operator and the CockroachDB operator use custom resource definitions with the same names, so you must remove the public operator before installing the CockroachDB operator. Run the following commands to uninstall the public operator, without deleting its managed resources: +The {{ site.data.products.public-operator }} and the {{ site.data.products.cockroachdb-operator }} use custom resource definitions with the same names, so you must remove the {{ site.data.products.public-operator }} before installing the {{ site.data.products.cockroachdb-operator }}. Run the following commands to uninstall the {{ site.data.products.public-operator }}, without deleting its managed resources: - Ensure that the operator can't accidentally delete managed Kubernetes objects: {% include_cached copy-clipboard.html %} @@ -99,13 +99,13 @@ The public operator and the CockroachDB operator use custom resource definitions kubectl delete clusterrolebinding cockroach-operator-rolebinding ~~~ -- Delete the public operator custom resource: +- Delete the {{ site.data.products.public-operator }} custom resource: {% include_cached copy-clipboard.html %} ~~~ shell kubectl delete crdbcluster $CRDBCLUSTER --cascade=orphan ~~~ -- Delete public operator resources and custom resource definition: +- Delete {{ site.data.products.public-operator }} resources and custom resource definition: {% include_cached copy-clipboard.html %} ~~~ shell kubectl delete -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/crds.yaml @@ -118,7 +118,7 @@ The public operator and the CockroachDB operator use custom resource definitions kubectl delete validatingwebhookconfigurations cockroach-operator-validating-webhook-configuration ~~~ -Run `helm upgrade` to install the CockroachDB operator and wait for it to become ready: +Run `helm upgrade` to install the {{ site.data.products.cockroachdb-operator }} and wait for it to become ready: {% include_cached copy-clipboard.html %} ~~~ shell @@ -128,7 +128,7 @@ kubectl rollout status deployment/cockroach-operator --timeout=60s ## Step 4. Replace statefulset pods with operator-managed nodes -To migrate seamlessly from the public operator to the CockroachDB operator, scale down StatefulSet-managed pods and replace them with crdbnode objects, one by one. Then we’ll create the crdbcluster object that manages the crdbnodes. +To migrate seamlessly from the {{ site.data.products.public-operator }} to the {{ site.data.products.cockroachdb-operator }}, scale down StatefulSet-managed pods and replace them with crdbnode objects, one by one. Then we’ll create the crdbcluster object that manages the crdbnodes. Create objects with `kubectl` that will eventually be owned by the crdbcluster: @@ -182,7 +182,7 @@ Repeat these steps until the StatefulSet has zero replicas. ## Step 5. Update the crdbcluster manifest -The public operator creates a pod disruption budget that conflicts with a pod disruption budget managed by the CockroachDB operator. Before applying the crdbcluster manifest, delete the existing pod disruption budget: +The {{ site.data.products.public-operator }} creates a pod disruption budget that conflicts with a pod disruption budget managed by the {{ site.data.products.cockroachdb-operator }}. Before applying the crdbcluster manifest, delete the existing pod disruption budget: {% include_cached copy-clipboard.html %} ~~~ shell @@ -205,7 +205,7 @@ Apply the crdbcluster manifest: helm install $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb -f manifests/values.yaml ~~~ -Once the migration is successful, delete the StatefulSet that was created by the public operator: +Once the migration is successful, delete the StatefulSet that was created by the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell diff --git a/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md b/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md index 33b9628ba5b..138e1b1a936 100644 --- a/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md @@ -9,7 +9,7 @@ docs_area: deploy Despite CockroachDB's various [built-in safeguards against failure](architecture/replication-layer.html), it is critical to actively monitor the overall health and performance of a cluster running in production and to create alerting rules that promptly send notifications when there are events that require investigation or intervention. {{site.data.alerts.callout_info}} -The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). {{site.data.alerts.end}} ## Configure Prometheus @@ -218,7 +218,7 @@ Active monitoring helps you spot problems early, but it is also essential to sen ## Monitor the operator -The CockroachDB operator automatically exposes [Prometheus-style metrics](https://prometheus.io/docs/concepts/metric_types/) that you can monitor to observe its operations. +The {{ site.data.products.cockroachdb-operator }} automatically exposes [Prometheus-style metrics](https://prometheus.io/docs/concepts/metric_types/) that you can monitor to observe its operations. Metrics can be collected from the operator via HTTP requests (port 8080 by default) against the `/metrics` endpoint. The response will describe the current node metrics, for example: diff --git a/src/current/v25.2/monitor-cockroachdb-kubernetes.md b/src/current/v25.2/monitor-cockroachdb-kubernetes.md index 7eb100b41e0..40dcb13eac2 100644 --- a/src/current/v25.2/monitor-cockroachdb-kubernetes.md +++ b/src/current/v25.2/monitor-cockroachdb-kubernetes.md @@ -12,13 +12,13 @@ This article assumes you have already [deployed CockroachDB on a single Kubernet Despite CockroachDB's various [built-in safeguards against failure]({% link {{ page.version.version }}/architecture/replication-layer.md %}), it is critical to actively monitor the overall health and performance of a cluster running in production and to create alerting rules that promptly send notifications when there are events that require investigation or intervention. -This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [Cluster Monitoring with the CockroachDB Operator]({% link {{ page.version.version }}/monitor-cockroachdb-kubernetes-operator.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Cluster Monitoring with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/monitor-cockroachdb-kubernetes-operator.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %}
- - + +
@@ -136,7 +136,7 @@ If you're on Hosted GKE, before starting, make sure the email address associated ~~~ {{site.data.alerts.callout_info}} - By default, this manifest uses the secret name generated by the CockroachDB Kubernetes Operator. If you generated your own certificates and keys when [starting CockroachDB]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#step-2-start-cockroachdb), be sure that `ca.secret.name` matches the name of the node secret you created. + By default, this manifest uses the secret name generated by the {{ site.data.products.public-operator }}. If you generated your own certificates and keys when [starting CockroachDB]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#step-2-start-cockroachdb), be sure that `ca.secret.name` matches the name of the node secret you created. {{site.data.alerts.end}} 1. Apply the Prometheus manifest. This creates the various objects necessary to run a Prometheus instance: @@ -290,7 +290,7 @@ Active monitoring helps you spot problems early, but it is also essential to sen ## Configure logging -When running CockroachDB v21.1 and later, you can use the Operator to configure the CockroachDB logging system. This allows you to output logs to [configurable log sinks] (configure-logs.html#configure-log-sinks) such as file or network logging destinations. +When running CockroachDB v21.1 and later, you can use the {{ site.data.products.public-operator }} to configure the CockroachDB logging system. This allows you to output logs to [configurable log sinks] (configure-logs.html#configure-log-sinks) such as file or network logging destinations. {{site.data.alerts.callout_info}} By default, Kubernetes deployments running CockroachDB v20.2 or earlier output all logs to `stderr`. @@ -329,14 +329,14 @@ The above configuration overrides the [default logging configuration]({% link {{ - Save debug-level logs (the `DEV` [log channel]({% link {{ page.version.version }}/logging-overview.md %}#logging-channels)) to disk for troubleshooting. - Send operational- and security-level logs to a [network collector]({% link {{ page.version.version }}/logging-use-cases.md %}#network-logging), in this case [Fluentd]({% link {{ page.version.version }}/configure-logs.md %}#fluentd-logging-format). -The ConfigMap `name` must match the `logConfigMap` object of the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +The ConfigMap `name` must match the `logConfigMap` object of the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: logConfigMap: logconfig ~~~ -By default, the Operator also modifies the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) with the following: +By default, the {{ site.data.products.public-operator }} also modifies the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) with the following: ~~~ yaml sinks: @@ -351,7 +351,7 @@ This outputs logging events in the [`OPS`]({% link {{ page.version.version }}/lo In this example, CockroachDB has already been deployed on a Kubernetes cluster. We override the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) to output [`DEV`]({% link {{ page.version.version }}/logging.md %}#dev) logs to a `cockroach-dev.log` file. -1. Create a ConfigMap named `logconfig`. Note that `namespace` is set to the Operator's default namespace (`cockroach-operator-system`): +1. Create a ConfigMap named `logconfig`. Note that `namespace` is set to the {{ site.data.products.public-operator }}'s default namespace (`cockroach-operator-system`): {% include_cached copy-clipboard.html %} ~~~ yaml @@ -388,7 +388,7 @@ In this example, CockroachDB has already been deployed on a Kubernetes cluster. configmap/logconfig created ~~~ -1. Add the `name` of the ConfigMap in `logConfigMap` to the [Operator's custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +1. Add the `name` of the ConfigMap in `logConfigMap` to the [{{ site.data.products.public-operator }}'s custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): {% include_cached copy-clipboard.html %} ~~~ yaml diff --git a/src/current/v25.2/node-shutdown.md b/src/current/v25.2/node-shutdown.md index e95ca33f513..36293da177c 100644 --- a/src/current/v25.2/node-shutdown.md +++ b/src/current/v25.2/node-shutdown.md @@ -834,9 +834,13 @@ On the **Cluster Overview** page of the DB Console, the [node status]({% link {{ Most of the guidance in this page is most relevant to manual deployments that don't use Kubernetes. If you use Kubernetes to deploy CockroachDB, draining and decommissioning work the same way for the `cockroach` process, but Kubernetes handles them on your behalf. In a deployment without Kubernetes, an administrator initiates decommissioning or draining directly. In a Kubernetes deployment, an administrator modifies the desired configuration of the Kubernetes cluster and Kubernetes makes the required changes to the cluster, including decommissioning or draining nodes as required. -- Whether you deployed a cluster using the CockroachDB Operator, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. +- The {{ site.data.products.public-operator }}, Helm, and manual StatefulSet deployments use a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. - - If you deployed a cluster using the [CockroachDB Kubernetes Operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), the best way to scale down a cluster is to update the specification for the Kubernetes deployment to reduce the value of `nodes:` and apply the change using a [rolling update](https://kubernetes.io/docs/tutorials/kubernetes-basics/update/update-intro/). Kubernetes will notice that there are now too many nodes and will reduce them and clean up their storage automatically. + {{ site.data.alerts.callout_success }} + Unlike the other Kubernetes deployment methods, the [{{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/kubernetes-operator-overview.md %}) does not use StatefulSets so deployments using that operator do not share this limitation. With the {{ site.data.products.cockroachdb-operator }}, nodes can be scaled down in any order. For more information, read [Cluster Scaling with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/scale-cockroachdb-kubernetes-operator.md %}). + {{ site.data.alerts.end }} + + - If you deployed a cluster using the [{{ site.data.products.public-operator }}]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), the best way to scale down a cluster is to update the specification for the Kubernetes deployment to reduce the value of `nodes:` and apply the change using a [rolling update](https://kubernetes.io/docs/tutorials/kubernetes-basics/update/update-intro/). Kubernetes will notice that there are now too many nodes and will reduce them and clean up their storage automatically. - If you deployed the cluster using [Helm]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}?filters=helm) or a [manual StatefulSet]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}?filters=manual), the best way to scale down a cluster is to interactively decommission and drain the highest-order node. After that node is decommissioned, drained, and terminated, you can repeat the process to further reduce the cluster's size. @@ -854,7 +858,7 @@ After Kubernetes issues a termination request to the `cockroach` process on a cl If undefined, Kubernetes sets `terminationGracePeriodSeconds` to 30 seconds. This is too short for the `cockroach` process to stop gracefully before Kubernetes terminates it forcibly. Do not set `terminationGracePeriodSeconds` to `0`, which prevents Kubernetes from detecting and terminating a stuck pod. -For clusters deployed using the CockroachDB Public Operator, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). +For clusters deployed using the CockroachDB {{ site.data.products.public-operator }}, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). For clusters deployed using the CockroachDB Helm chart or a manual StatefulSet, the default depends upon the values file or manifest you used when you created the cluster. Cockroach Labs recommends that you: diff --git a/src/current/v25.2/orchestrate-a-local-cluster-with-kubernetes.md b/src/current/v25.2/orchestrate-a-local-cluster-with-kubernetes.md index b8746775bdf..95fea70cd25 100644 --- a/src/current/v25.2/orchestrate-a-local-cluster-with-kubernetes.md +++ b/src/current/v25.2/orchestrate-a-local-cluster-with-kubernetes.md @@ -13,7 +13,7 @@ On top of CockroachDB's built-in automation, you can use a third-party [orchestr This page demonstrates a basic integration with the open-source [Kubernetes](http://kubernetes.io/) orchestration system. Using either the CockroachDB [Helm](https://helm.sh/) chart or a few configuration files, you'll quickly create a 3-node local cluster. You'll run some SQL commands against the cluster and then simulate node failure, watching how Kubernetes auto-restarts without the need for any manual intervention. You'll then scale the cluster with a single command before shutting the cluster down, again with a single command. {{site.data.alerts.callout_info}} -To orchestrate a physically distributed cluster in production, see [Orchestrated Deployments]({% link {{ page.version.version }}/kubernetes-overview.md %}). To deploy a 30-day free CockroachDB {{ site.data.products.dedicated }} cluster instead of running CockroachDB yourself, see the [Quickstart]({% link cockroachcloud/quickstart.md %}). +To orchestrate a physically distributed cluster in production, see [Orchestrated Deployments]({% link {{ page.version.version }}/kubernetes-operator-overview.md %}). To deploy a 30-day free CockroachDB {{ site.data.products.dedicated }} cluster instead of running CockroachDB yourself, see the [Quickstart]({% link cockroachcloud/quickstart.md %}). {{site.data.alerts.end}} @@ -27,14 +27,14 @@ To orchestrate a physically distributed cluster in production, see [Orchestrated Choose a way to deploy and maintain the CockroachDB cluster: -- [CockroachDB Kubernetes Operator](https://github.com/cockroachdb/cockroach-operator) (recommended) +- [{{ site.data.products.public-operator }}](https://github.com/cockroachdb/cockroach-operator) - [Helm](https://helm.sh/) package manager - Manually apply our StatefulSet configuration and related files
- + - +
diff --git a/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md b/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md index cc336f33757..97f9310ee3a 100644 --- a/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md @@ -10,7 +10,7 @@ docs_area: deploy This page explains how to add and remove CockroachDB nodes on Kubernetes. {{site.data.alerts.callout_info}} -The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). {{site.data.alerts.end}} ## Add nodes diff --git a/src/current/v25.2/scale-cockroachdb-kubernetes.md b/src/current/v25.2/scale-cockroachdb-kubernetes.md index 2ac2e256932..2094e7239d0 100644 --- a/src/current/v25.2/scale-cockroachdb-kubernetes.md +++ b/src/current/v25.2/scale-cockroachdb-kubernetes.md @@ -13,12 +13,12 @@ This article assumes you have already [deployed CockroachDB on a single Kubernet This page explains how to add and remove CockroachDB nodes on Kubernetes. -This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [Cluster Scaling with the CockroachDB Operator]({% link {{ page.version.version }}/scale-cockroachdb-kubernetes-operator.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Cluster Scaling with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/scale-cockroachdb-kubernetes-operator.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %}
- +
@@ -55,7 +55,7 @@ If your cluster has 3 CockroachDB nodes distributed across 3 availability zones 1. If you are adding nodes after previously [scaling down](#remove-nodes), and have not enabled [automatic PVC pruning](#automatic-pvc-pruning), you must first manually delete any persistent volumes that were orphaned by node removal. {{site.data.alerts.callout_info}} - Due to a [known issue](https://github.com/cockroachdb/cockroach-operator/issues/542), automatic pruning of PVCs is currently disabled by default. This means that after decommissioning and removing a node, the Operator will not remove the persistent volume that was mounted to its pod. + Due to a [known issue](https://github.com/cockroachdb/cockroach-operator/issues/542), automatic pruning of PVCs is currently disabled by default. This means that after decommissioning and removing a node, the {{ site.data.products.public-operator }} will not remove the persistent volume that was mounted to its pod. {{site.data.alerts.end}} View the PVCs on the cluster: @@ -107,7 +107,7 @@ If your cluster has 3 CockroachDB nodes distributed across 3 availability zones persistentvolumeclaim "datadir-cockroachdb-5" deleted ~~~ -1. Update `nodes` in the Operator's custom resource, which you downloaded when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster), with the target size of the CockroachDB cluster. This value refers to the number of CockroachDB nodes, each running in one pod: +1. Update `nodes` in the {{ site.data.products.public-operator }}'s custom resource, which you downloaded when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster), with the target size of the CockroachDB cluster. This value refers to the number of CockroachDB nodes, each running in one pod: ~~~ nodes: 6 @@ -161,13 +161,13 @@ Do **not** scale down to fewer than 3 nodes. This is considered an anti-pattern
{{site.data.alerts.callout_danger}} -Due to a [known issue](https://github.com/cockroachdb/cockroach-operator/issues/542), automatic pruning of PVCs is currently disabled by default. This means that after decommissioning and removing a node, the Operator will not remove the persistent volume that was mounted to its pod. +Due to a [known issue](https://github.com/cockroachdb/cockroach-operator/issues/542), automatic pruning of PVCs is currently disabled by default. This means that after decommissioning and removing a node, the {{ site.data.products.public-operator }} will not remove the persistent volume that was mounted to its pod. If you plan to eventually [scale up](#add-nodes) the cluster after scaling down, you will need to manually delete any PVCs that were orphaned by node removal before scaling up. For more information, see [Add nodes](#add-nodes). {{site.data.alerts.end}} {{site.data.alerts.callout_info}} -If you want to enable the Operator to automatically prune PVCs when scaling down, see [Automatic PVC pruning](#automatic-pvc-pruning). However, note that this workflow is currently unsupported. +If you want to enable the {{ site.data.products.public-operator }} to automatically prune PVCs when scaling down, see [Automatic PVC pruning](#automatic-pvc-pruning). However, note that this workflow is currently unsupported. {{site.data.alerts.end}} Before scaling down CockroachDB, note the following [topology recommendation]({% link {{ page.version.version }}/recommended-production-settings.md %}#topology): @@ -183,7 +183,7 @@ If your nodes are distributed across 3 availability zones (as in our [deployment ~~~ {{site.data.alerts.callout_info}} - Before removing a node, the Operator first decommissions the node. This lets a node finish in-flight requests, rejects any new requests, and transfers all range replicas and range leases off the node. + Before removing a node, the {{ site.data.products.public-operator }} first decommissions the node. This lets a node finish in-flight requests, rejects any new requests, and transfers all range replicas and range leases off the node. {{site.data.alerts.end}} 1. Apply the new settings to the cluster: @@ -193,7 +193,7 @@ If your nodes are distributed across 3 availability zones (as in our [deployment $ kubectl apply -f example.yaml ~~~ - The Operator will remove nodes from the cluster one at a time, starting from the pod with the highest number in its address. + The {{ site.data.products.public-operator }} will remove nodes from the cluster one at a time, starting from the pod with the highest number in its address. 1. Verify that the pods were successfully removed: @@ -212,7 +212,7 @@ If your nodes are distributed across 3 availability zones (as in our [deployment ### Automatic PVC pruning -To enable the Operator to automatically remove persistent volumes when [scaling down](#remove-nodes) a cluster, turn on automatic PVC pruning through a feature gate. +To enable the {{ site.data.products.public-operator }} to automatically remove persistent volumes when [scaling down](#remove-nodes) a cluster, turn on automatic PVC pruning through a feature gate. {{site.data.alerts.callout_danger}} This workflow is unsupported and should be enabled at your own risk. @@ -220,28 +220,28 @@ This workflow is unsupported and should be enabled at your own risk. {% capture latest_operator_version %}{% include_cached latest_operator_version.md %}{% endcapture %} -1. Download the Operator manifest: +1. Download the {{ site.data.products.public-operator }} manifest: {% include_cached copy-clipboard.html %} ~~~ shell $ curl -0 https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/install/operator.yaml ~~~ -1. Uncomment the following lines in the Operator manifest: +1. Uncomment the following lines in the {{ site.data.products.public-operator }} manifest: ~~~ yaml - feature-gates - AutoPrunePVC=true ~~~ -1. Reapply the Operator manifest: +1. Reapply the {{ site.data.products.public-operator }} manifest: {% include_cached copy-clipboard.html %} ~~~ shell $ kubectl apply -f operator.yaml ~~~ -1. Validate that the Operator is running: +1. Validate that the {{ site.data.products.public-operator }} is running: {% include_cached copy-clipboard.html %} ~~~ shell diff --git a/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md b/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md index ea38586f5db..3f25c24a0a8 100644 --- a/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md @@ -10,7 +10,7 @@ docs_area: deploy This page describes how to configure pod scheduling settings. These settings control how CockroachDB pods should be identified or scheduled onto worker nodes, which are then proxied to the Kubernetes scheduler. {{site.data.alerts.callout_info}} -The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). {{site.data.alerts.end}} ## Node selectors diff --git a/src/current/v25.2/schedule-cockroachdb-kubernetes.md b/src/current/v25.2/schedule-cockroachdb-kubernetes.md index 4ec9eff650f..40ca835683d 100644 --- a/src/current/v25.2/schedule-cockroachdb-kubernetes.md +++ b/src/current/v25.2/schedule-cockroachdb-kubernetes.md @@ -7,7 +7,7 @@ secure: true docs_area: deploy --- -This page describes how to configure the following, using the [public operator](https://github.com/cockroachdb/cockroach-operator): +This page describes how to configure the following, using the [{{ site.data.products.public-operator }}](https://github.com/cockroachdb/cockroach-operator): - [Enable feature gates](#enable-feature-gates) - [Node selectors](#node-selectors) @@ -22,7 +22,7 @@ This page describes how to configure the following, using the [public operator]( - [Add a topology spread constraint](#add-a-topology-spread-constraint) - [Resource labels and annotations](#resource-labels-and-annotations) -This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [Pod Scheduling with the CockroachDB Operator]({% link {{ page.version.version }}/schedule-cockroachdb-kubernetes-operator.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Pod Scheduling with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/schedule-cockroachdb-kubernetes-operator.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} @@ -34,7 +34,7 @@ These settings control how CockroachDB pods can be identified or scheduled onto {% capture latest_operator_version %}{% include_cached latest_operator_version.md %}{% endcapture %} -To enable the [affinity](#affinities-and-anti-affinities), [toleration](#taints-and-tolerations), and [topology spread constraint](#topology-spread-constraints) rules, [download the Operator manifest](https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/install/operator.yaml) and add the following line to the `spec.containers.args` field: +To enable the [affinity](#affinities-and-anti-affinities), [toleration](#taints-and-tolerations), and [topology spread constraint](#topology-spread-constraints) rules, [download the {{ site.data.products.public-operator }} manifest](https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/install/operator.yaml) and add the following line to the `spec.containers.args` field: {% include_cached copy-clipboard.html %} ~~~ yaml @@ -48,7 +48,7 @@ spec: A pod with a *node selector* will be scheduled onto a worker node that has matching [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/), or key-value pairs. -Specify the labels in `nodeSelector` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `nodeSelector` labels, the node must match all of them. +Specify the labels in `nodeSelector` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `nodeSelector` labels, the node must match all of them. The following configuration causes CockroachDB pods to be scheduled onto worker nodes that have *both* the labels `worker-pool-name=crdb-workers` and `kubernetes.io/arch=amd64`: @@ -81,7 +81,7 @@ For an example, see [Scheduling CockroachDB onto labeled nodes](#example-schedul ### Add a node affinity -Specify node affinities in `affinity.nodeAffinity` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. +Specify node affinities in `affinity.nodeAffinity` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. The following configuration requires that CockroachDB pods are scheduled onto worker nodes running either an `intel` or `amd64` CPU, with a preference against worker nodes in the `us-east4-b` availability zone. @@ -112,11 +112,11 @@ The `requiredDuringSchedulingIgnoredDuringExecution` node affinity rule, using t The `preferredDuringSchedulingIgnoredDuringExecution` node affinity rule, using the `NotIn` operator and specified `weight`, discourages (but does not disallow) CockroachDB pods from being scheduled onto nodes with the label `topology.kubernetes.io/zone=us-east4-b`. This achieves a similar effect as a `PreferNoSchedule` [taint](#taints-and-tolerations). -For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the Operator. +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the {{ site.data.products.public-operator }}. ### Add a pod affinity or anti-affinity -Specify pod affinities and anti-affinities in `affinity.podAffinity` and `affinity.podAntiAffinity` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. +Specify pod affinities and anti-affinities in `affinity.podAffinity` and `affinity.podAntiAffinity` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. The following configuration attempts to schedule CockroachDB pods in the same zones as the pods that run our example [load generator](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/example-app.yaml) app. It disallows CockroachDB pods from being co-located on the same worker node. @@ -150,7 +150,7 @@ The `preferredDuringSchedulingIgnoredDuringExecution` pod affinity rule, using t The `requiredDuringSchedulingIgnoredDuringExecution` pod anti-affinity rule, using the `In` operator, requires CockroachDB pods not to be co-located on a worker node, as specified with `topologyKey`. -For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the Operator. +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). The [custom resource definition](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the {{ site.data.products.public-operator }}. ### Example: Scheduling CockroachDB onto labeled nodes @@ -192,7 +192,7 @@ In this example, CockroachDB has not yet been deployed to a running Kubernetes c This also ensures that the CockroachDB pods, which will be bound to persistent volumes in the same 3 availability zones, can be scheduled onto worker nodes in their respective zones. {{site.data.alerts.end}} -1. Add the following rules to the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +1. Add the following rules to the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): {% include_cached copy-clipboard.html %} ~~~ yaml @@ -258,7 +258,7 @@ For an example, see [Evicting CockroachDB from a running worker node](#example-e ### Add a toleration -Specify pod tolerations in the `tolerations` object of the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). +Specify pod tolerations in the `tolerations` object of the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). The following toleration matches a taint with the specified key, value, and `NoSchedule` effect, using the `Equal` operator. A toleration that uses the `Equal` operator must include a `value` field: @@ -292,7 +292,7 @@ spec: A `NoExecute` taint on a node prevents pods from being scheduled onto the node, and evicts pods from the node if they are already running on the node. The matching toleration allows a pod to be scheduled onto the node, and to continue running on the node if `tolerationSeconds` is not specified. If `tolerationSeconds` is specified, the pod is evicted after this number of seconds. -For more information on using taints and tolerations, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/). The [custom resource definition](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the Operator. +For more information on using taints and tolerations, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/). The [custom resource definition](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the {{ site.data.products.public-operator }}. ### Example: Evicting CockroachDB from a running worker node @@ -326,7 +326,7 @@ In this example, CockroachDB has already been deployed on a Kubernetes cluster. node/gke-cockroachdb-default-pool-4e5ce539-j1h1 tainted ~~~ -1. Add a matching `tolerations` object to the Operator's custom resource, which was used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +1. Add a matching `tolerations` object to the {{ site.data.products.public-operator }}'s custom resource, which was used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): ~~~ yaml spec: @@ -372,7 +372,7 @@ A pod with a *topology spread constraint* must satisfy its conditions when being ### Add a topology spread constraint -Specify pod topology spread constraints in the `topologySpreadConstraints` object of the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `topologySpreadConstraints` objects, the matching pods must satisfy all of the constraints. +Specify pod topology spread constraints in the `topologySpreadConstraints` object of the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). If you specify multiple `topologySpreadConstraints` objects, the matching pods must satisfy all of the constraints. The following topology spread constraint ensures that CockroachDB pods deployed with the label `environment=production` will not be unevenly distributed across zones by more than `1` pod: @@ -390,13 +390,13 @@ spec: The `DoNotSchedule` condition prevents labeled pods from being scheduled onto Kubernetes worker nodes when doing so would fail to meet the spread and topology constraints specified with `maxSkew` and `topologyKey`, respectively. -For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). The [custom resource definition](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the Operator. +For more context on how these rules work, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). The [custom resource definition](https://raw.github.com/cockroachdb/cockroach-operator/v{{ latest_operator_version }}/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml) details the fields supported by the {{ site.data.products.public-operator }}. ## Resource labels and annotations To assist in working with your cluster, you can add labels and annotations to your resources. -Specify labels in `additionalLabels` and annotations in `additionalAnnotations` in the Operator's custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +Specify labels in `additionalLabels` and annotations in `additionalAnnotations` in the {{ site.data.products.public-operator }}'s custom resource, which is used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): {% include_cached copy-clipboard.html %} ~~~ yaml diff --git a/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md index 0664dbdcf10..df196b505aa 100644 --- a/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md @@ -32,7 +32,7 @@ If you previously [authenticated with cockroach cert](deploy-cockroachdb-with-ku --overwrite ~~~ -1. Upload the new client certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the CockroachDB operator: +1. Upload the new client certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the {{ site.data.products.cockroachdb-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell @@ -62,7 +62,7 @@ If you previously [authenticated with cockroach cert](deploy-cockroachdb-with-ku --overwrite ~~~ -1. Upload the new node certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the CockroachDB operator: +1. Upload the new node certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the {{ site.data.products.cockroachdb-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell @@ -149,7 +149,7 @@ If you previously [authenticated with cockroach cert](deploy-cockroachdb-with-ku The operator ships with both [mutating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#mutatingadmissionwebhook) and [validating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#validatingadmissionwebhook) webhooks. Communication between the Kubernetes API server and the webhook service must be secured with TLS. -By default, the CockroachDB operator searches for the TLS secret `cockroach-operator-certs`, which contains a CA certificate. If the secret is not found, the operator auto-generates `cockroach-operator-certs` with a CA certificate for future runs. +By default, the {{ site.data.products.cockroachdb-operator }} searches for the TLS secret `cockroach-operator-certs`, which contains a CA certificate. If the secret is not found, the operator auto-generates `cockroach-operator-certs` with a CA certificate for future runs. The operator then generates a one-time server certificate for the webhook server that is signed with `cockroach-operator-certs`. Finally, the CA bundle for both mutating and validating webhook configurations is patched with the CA certificate. diff --git a/src/current/v25.2/secure-cockroachdb-kubernetes.md b/src/current/v25.2/secure-cockroachdb-kubernetes.md index 3675a474154..f898e2183e3 100644 --- a/src/current/v25.2/secure-cockroachdb-kubernetes.md +++ b/src/current/v25.2/secure-cockroachdb-kubernetes.md @@ -8,27 +8,27 @@ docs_area: deploy --- {{site.data.alerts.callout_info}} -This article assumes you have already [deployed CockroachDB securely on a single Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}) using the Operator or Helm. However, it's possible to configure these settings before starting CockroachDB on Kubernetes. +This article assumes you have already [deployed CockroachDB securely on a single Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}) using the {{ site.data.products.public-operator }} or Helm. However, it's possible to configure these settings before starting CockroachDB on Kubernetes. {{site.data.alerts.end}} -By default, self-signed certificates are used when using the Operator or Helm to securely [deploy CockroachDB on Kubernetes]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}). However, the recommended approach is to use `cert-manager` for certificate management. For details, refer to [Deploy cert-manager for mTLS](?filters=helm#deploy-cert-manager-for-mtls). +By default, self-signed certificates are used when using the {{ site.data.products.public-operator }} or Helm to securely [deploy CockroachDB on Kubernetes]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}). However, the recommended approach is to use `cert-manager` for certificate management. For details, refer to [Deploy cert-manager for mTLS](?filters=helm#deploy-cert-manager-for-mtls). -This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [Certificate Management with the CockroachDB Operator]({% link {{ page.version.version }}/secure-cockroachdb-kubernetes-operator.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Certificate Management with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/secure-cockroachdb-kubernetes-operator.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} This page explains how to: -- Authenticate an Operator or Helm deployment using a [custom CA](#use-a-custom-ca) +- Authenticate a {{ site.data.products.public-operator }} or Helm deployment using a [custom CA](#use-a-custom-ca) - [Rotate security certificates](#rotate-security-certificates) -- [Secure the webhooks](#secure-the-webhooks) (Operator) +- [Secure the webhooks](#secure-the-webhooks) (public perator) {{site.data.alerts.callout_danger}} If you are running a secure Helm deployment on Kubernetes 1.22 and later, you must migrate away from using the Kubernetes CA for cluster authentication. The recommended approach is to use `cert-manager` for certificate management. For details, refer to [Deploy cert-manager for mTLS](?filters=helm#deploy-cert-manager-for-mtls). {{site.data.alerts.end}}
- +
@@ -37,9 +37,9 @@ If you are running a secure Helm deployment on Kubernetes 1.22 and later, you mu ## Use a custom CA
-By default, the Operator will generate and sign 1 client and 1 node certificate to secure the cluster. +By default, the {{ site.data.products.public-operator }} will generate and sign 1 client and 1 node certificate to secure the cluster. -To use your own certificate authority instead, add the following to the Operator's custom resource **before** [initializing the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): +To use your own certificate authority instead, add the following to the {{ site.data.products.public-operator }}'s custom resource **before** [initializing the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster): {% include_cached copy-clipboard.html %} ~~~ yaml @@ -54,7 +54,7 @@ Replace: - `{client_secret_name}`: the name of the Kubernetes secret that contains the generated node certificate and key. {{site.data.alerts.callout_info}} -Currently, the Operator requires that the client and node secrets each contain the filenames `tls.crt` and `tls.key`. +Currently, the {{ site.data.products.public-operator }} requires that the client and node secrets each contain the filenames `tls.crt` and `tls.key`. {{site.data.alerts.end}} {% include {{ page.version.version }}/orchestration/apply-custom-resource.md %} @@ -124,7 +124,7 @@ Complete the following steps **before** [initializing the cluster]({% link {{ pa --ca-key=my-safe-directory/ca.key ~~~ -1. Upload the client certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the Operator: +1. Upload the client certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell @@ -138,7 +138,7 @@ Complete the following steps **before** [initializing the cluster]({% link {{ pa secret/cockroachdb.client.root created ~~~ -1. Create the certificate and key pair for your CockroachDB nodes, specifying the namespace you used when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). This example uses the Operator's default namespace (`cockroach-operator-system`): +1. Create the certificate and key pair for your CockroachDB nodes, specifying the namespace you used when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). This example uses the {{ site.data.products.public-operator }}'s default namespace (`cockroach-operator-system`): {% include_cached copy-clipboard.html %} ~~~ shell @@ -154,7 +154,7 @@ Complete the following steps **before** [initializing the cluster]({% link {{ pa --ca-key=my-safe-directory/ca.key ~~~ -1. Upload the node certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the Operator: +1. Upload the node certificate and key to the Kubernetes cluster as a secret, renaming them to the filenames required by the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell @@ -182,7 +182,7 @@ Complete the following steps **before** [initializing the cluster]({% link {{ pa default-token-6js7b kubernetes.io/service-account-token 3 9h ~~~ -1. Add `nodeTLSSecret` and `clientTLSSecret` to the Operator's [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster), specifying the generated secret names: +1. Add `nodeTLSSecret` and `clientTLSSecret` to the {{ site.data.products.public-operator }}'s [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster), specifying the generated secret names: ~~~ yaml spec: @@ -322,7 +322,7 @@ If you previously [authenticated with `cockroach cert`](#example-authenticate-wi --overwrite ~~~ -1. Upload the new client certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the Operator: +1. Upload the new client certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell @@ -336,7 +336,7 @@ If you previously [authenticated with `cockroach cert`](#example-authenticate-wi secret/cockroachdb.client.root.2 created ~~~ -1. Create a new certificate and key pair for your CockroachDB nodes, overwriting the previous certificate and key. Specify the namespace you used when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). This example uses the Operator's default namespace (`cockroach-operator-system`): +1. Create a new certificate and key pair for your CockroachDB nodes, overwriting the previous certificate and key. Specify the namespace you used when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster). This example uses the {{ site.data.products.public-operator }}'s default namespace (`cockroach-operator-system`): {% include_cached copy-clipboard.html %} ~~~ shell @@ -353,7 +353,7 @@ If you previously [authenticated with `cockroach cert`](#example-authenticate-wi --overwrite ~~~ -1. Upload the new node certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the Operator: +1. Upload the new node certificate and key to the Kubernetes cluster as a **new** secret, renaming them to the filenames required by the {{ site.data.products.public-operator }}: {% include_cached copy-clipboard.html %} ~~~ shell @@ -367,7 +367,7 @@ If you previously [authenticated with `cockroach cert`](#example-authenticate-wi secret/cockroachdb.node.2 created ~~~ -1. Add `nodeTLSSecret` and `clientTLSSecret` to the Operator's [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster), specifying the new secret names: +1. Add `nodeTLSSecret` and `clientTLSSecret` to the {{ site.data.products.public-operator }}'s [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster), specifying the new secret names: ~~~ yaml spec: @@ -390,7 +390,7 @@ If you previously [authenticated with `cockroach cert`](#example-authenticate-wi ~~~ {{site.data.alerts.callout_info}} - Remember that `nodeTLSSecret` and `clientTLSSecret` in the Operator's [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster) must specify these secret names. For details, see [Use a custom CA](#use-a-custom-ca). + Remember that `nodeTLSSecret` and `clientTLSSecret` in the {{ site.data.products.public-operator }}'s [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}#initialize-the-cluster) must specify these secret names. For details, see [Use a custom CA](#use-a-custom-ca). {{site.data.alerts.end}} 1. Apply the new settings to the cluster: @@ -567,11 +567,11 @@ Previously, the Helm chart used a self-signer for cluster authentication. This a ## Secure the webhooks -The Operator ships with both [mutating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#mutatingadmissionwebhook) and [validating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#validatingadmissionwebhook) webhooks. Communication between the Kubernetes API server and the webhook service must be secured with TLS. +The {{ site.data.products.public-operator }} ships with both [mutating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#mutatingadmissionwebhook) and [validating](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#validatingadmissionwebhook) webhooks. Communication between the Kubernetes API server and the webhook service must be secured with TLS. -By default, the Operator searches for the TLS secret `cockroach-operator-webhook-ca`, which contains a CA certificate. If the secret is not found, the Operator auto-generates `cockroach-operator-webhook-ca` with a CA certificate for future runs. +By default, the {{ site.data.products.public-operator }} searches for the TLS secret `cockroach-operator-webhook-ca`, which contains a CA certificate. If the secret is not found, the {{ site.data.products.public-operator }} auto-generates `cockroach-operator-webhook-ca` with a CA certificate for future runs. -The Operator then generates a one-time server certificate for the webhook server that is signed with `cockroach-operator-webhook-ca`. Finally, the CA bundle for both mutating and validating webhook configurations is patched with the CA certificate. +The {{ site.data.products.public-operator }} then generates a one-time server certificate for the webhook server that is signed with `cockroach-operator-webhook-ca`. Finally, the CA bundle for both mutating and validating webhook configurations is patched with the CA certificate. You can also use your own certificate authority rather than `cockroach-operator-webhook-ca`. Both the certificate and key files you generate must be PEM-encoded. See the following [example](#example-using-openssl-to-secure-the-webhooks). @@ -611,7 +611,7 @@ These steps demonstrate how to use the [`openssl genrsa`](https://www.openssl.or rm tls.crt tls.key ~~~ -1. Roll the Operator deployment to ensure a new server certificate is generated: +1. Roll the {{ site.data.products.public-operator }} deployment to ensure a new server certificate is generated: {% include_cached copy-clipboard.html %} ~~~ shell diff --git a/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md b/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md index 6221394281a..ab0a63cb831 100644 --- a/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md @@ -7,10 +7,10 @@ secure: true docs_area: deploy --- -This page describes how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster](deploy-cockroachdb-with-kubernetes-operator.html) with the CockroachDB operator. +This page describes how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster](deploy-cockroachdb-with-kubernetes-operator.html) with the {{ site.data.products.cockroachdb-operator }}. {{site.data.alerts.callout_info}} -The CockroachDB operator is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). {{site.data.alerts.end}} ## Overview diff --git a/src/current/v25.2/upgrade-cockroachdb-kubernetes.md b/src/current/v25.2/upgrade-cockroachdb-kubernetes.md index 01349a83f06..360d9edecec 100644 --- a/src/current/v25.2/upgrade-cockroachdb-kubernetes.md +++ b/src/current/v25.2/upgrade-cockroachdb-kubernetes.md @@ -9,7 +9,7 @@ docs_area: deploy This page shows how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}). -This page is for Kubernetes deployments that are not using the CockroachDB operator. For guidance specific to the CockroachDB operator, read [Upgrade a Cluster in Kubernetes with the CockroachDB Operator]({% link {{ page.version.version }}/upgrade-cockroachdb-kubernetes-operator.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Upgrade a Cluster in Kubernetes with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/upgrade-cockroachdb-kubernetes-operator.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} @@ -22,8 +22,8 @@ On Kubernetes, the upgrade is a [staged update](https://kubernetes.io/docs/tutor Select the cluster's deployment method to continue.
- - + +
diff --git a/src/current/v25.3/node-shutdown.md b/src/current/v25.3/node-shutdown.md index e95ca33f513..6465879950a 100644 --- a/src/current/v25.3/node-shutdown.md +++ b/src/current/v25.3/node-shutdown.md @@ -834,7 +834,7 @@ On the **Cluster Overview** page of the DB Console, the [node status]({% link {{ Most of the guidance in this page is most relevant to manual deployments that don't use Kubernetes. If you use Kubernetes to deploy CockroachDB, draining and decommissioning work the same way for the `cockroach` process, but Kubernetes handles them on your behalf. In a deployment without Kubernetes, an administrator initiates decommissioning or draining directly. In a Kubernetes deployment, an administrator modifies the desired configuration of the Kubernetes cluster and Kubernetes makes the required changes to the cluster, including decommissioning or draining nodes as required. -- Whether you deployed a cluster using the CockroachDB Operator, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. +- Whether you deployed a cluster using the {{ site.data.products.cockroachdb-operator }}, Helm, or a manual StatefulSet, the resulting deployment is a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. - If you deployed a cluster using the [CockroachDB Kubernetes Operator]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), the best way to scale down a cluster is to update the specification for the Kubernetes deployment to reduce the value of `nodes:` and apply the change using a [rolling update](https://kubernetes.io/docs/tutorials/kubernetes-basics/update/update-intro/). Kubernetes will notice that there are now too many nodes and will reduce them and clean up their storage automatically. @@ -854,7 +854,7 @@ After Kubernetes issues a termination request to the `cockroach` process on a cl If undefined, Kubernetes sets `terminationGracePeriodSeconds` to 30 seconds. This is too short for the `cockroach` process to stop gracefully before Kubernetes terminates it forcibly. Do not set `terminationGracePeriodSeconds` to `0`, which prevents Kubernetes from detecting and terminating a stuck pod. -For clusters deployed using the CockroachDB Public Operator, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). +For clusters deployed using the CockroachDB {{ site.data.products.public-operator }}, `terminationGracePeriodSeconds` defaults to 300 seconds (5 minutes). For clusters deployed using the CockroachDB Helm chart or a manual StatefulSet, the default depends upon the values file or manifest you used when you created the cluster. Cockroach Labs recommends that you: From 5157320cae6fe85e14b19a95842336b53929fb40 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Mon, 4 Aug 2025 21:41:20 -0400 Subject: [PATCH 07/27] Add rollback process to migration guides) --- .../migrate-cockroachdb-kubernetes-helm.md | 79 ++++++++++++++- ...migrate-cockroachdb-kubernetes-operator.md | 99 +++++++++++++++++++ 2 files changed, 177 insertions(+), 1 deletion(-) diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md index 920b0033e5b..c61aaac4c92 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md @@ -130,6 +130,10 @@ For each pod in the StatefulSet, perform the following steps: Repeat these steps until the StatefulSet has zero replicas. +{{site.data.alerts.callout_danger}} +If there are issues with the migration and you need to revert back to the previous deployment, follow the [rollback process](#roll-back-a-migration-in-progress). +{{site.data.alerts.end}} + ## Step 4. Update the public service The Helm chart creates a public Service that exposes both SQL and gRPC connections over a single power. However, the operator uses a different port for gRPC communication. To ensure compatibility, update the public Service to reflect the correct gRPC port used by the operator. @@ -162,4 +166,77 @@ Apply the crdbcluster manifest using Helm: {% include_cached copy-clipboard.html %} ~~~ shell helm upgrade $RELEASE_NAME ./cockroachdb-parent/charts/cockroachdb -f manifests/values.yaml -~~~ \ No newline at end of file +~~~ + +## Roll back a migration in progress + +If the migration to the {{ site.data.products.cockroachdb-operator}} fails during the stage where you are applying the generated `crdbnode` manifests, follow the steps below to safely restore the original state using the previously backed-up resources and preserved volumes. This assumes the StatefulSet and PVCs are not deleted. + +1. Delete the applied `crdbnode` resources and simultaneously scale the StatefulSet back up. + + Delete the individual `crdbnode` manifests in the reverse order of their creation (starting with the last one created, e.g., `crdbnode-2.yaml`) and scale the StatefulSet back to its original replica count (e.g., 3). For example, assuming you have applied two `crdbnode` yaml files (`crdbnode-2.yaml` & `crdbnode-1.yaml`): + + 1. Delete a `crdbnode` manifest in reverse order, starting with `crdbnode-2.yaml`. + 1. Scale the StatefulSet replica count up by one (to 2). + 1. Verify that data has propagated by waiting for there to be zero under-replicated ranges: + + 1. Set up port forwarding to access the CockroachDB node's HTTP interface, replacing `cockroachdb-X` with the node name: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward pod/cockroachdb-X 8080:8080 + ~~~ + + The DB Console runs on port 8080 by default. + + 1. Check the `ranges_underreplicated` metric: + + {% include_cached copy-clipboard.html %} + ~~~ shell + curl --insecure -s https://localhost:8080/_status/vars | grep "ranges_underreplicated{" | awk ' {print $2}' + ~~~ + + This command outputs the number of under-replicated ranges on the node, which should be zero before proceeding with the next node. This may take some time depending on the deployment, but is necessary to ensure that there is no downtime in data availability. + + 1. Repeat steps a through c for each node, deleting the `crdbnode-1.yaml`, scaling replica count to 3, and so on. + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete -f manifests/crdbnode-2.yaml + kubectl scale statefulset $CRDBCLUSTER --replicas=2 + ~~~ + + Repeat the `kubectl delete -f ... command` for each `crdbnode` manifest you applied during migration. Make sure to verify that there are no underreplicated ranges after rolling back each node. + +1. Delete the PriorityClass and RBAC resources created for the CockroachDB operator: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete priorityclass crdb-critical + kubectl delete -f manifests/rbac.yaml + ~~~ + +1. Uninstall the {{ site.data.products.cockroachdb-operator }}: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm uninstall crdb-operator + ~~~ + +1. Clean up {{ site.data.products.cockroachdb-operator }} resources and custom resource definitions: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete crds crdbnodes.crdb.cockroachlabs.com + kubectl delete crds crdbtenants.crdb.cockroachlabs.com + kubectl delete serviceaccount cockroachdb-sa + kubectl delete service cockroach-webhook-service + kubectl delete validatingwebhookconfiguration cockroach-webhook-config + ~~~ + +1. Confirm that all CockroachDB pods are "Running" or "Ready" as shown with the following command: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods + ~~~ diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md index 7e4bc595d6a..102e069a496 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md @@ -180,6 +180,10 @@ For each pod in the StatefulSet, perform the following steps: Repeat these steps until the StatefulSet has zero replicas. +{{site.data.alerts.callout_danger}} +If there are issues with the migration and you need to revert back to the previous deployment, follow the [rollback process](#roll-back-a-migration-in-progress). +{{site.data.alerts.end}} + ## Step 5. Update the crdbcluster manifest The {{ site.data.products.public-operator }} creates a pod disruption budget that conflicts with a pod disruption budget managed by the {{ site.data.products.cockroachdb-operator }}. Before applying the crdbcluster manifest, delete the existing pod disruption budget: @@ -211,3 +215,98 @@ Once the migration is successful, delete the StatefulSet that was created by the ~~~ shell kubectl delete poddisruptionbudget $STS_NAME-budget ~~~ + +## Roll back a migration in progress + +If the migration to the {{ site.data.products.cockroachdb-operator}} fails during the stage where you are applying the generated `crdbnode` manifests, follow the steps below to safely restore the original state using the previously backed-up resources and preserved volumes. This assumes the StatefulSet and PVCs are not deleted. + +1. Delete the applied `crdbnode` resources and simultaneously scale the StatefulSet back up. + + Delete the individual `crdbnode` manifests in the reverse order of their creation (starting with the last one created, e.g., `crdbnode-2.yaml`) and scale the StatefulSet back to its original replica count (e.g., 3). For example, assuming you have applied two `crdbnode` yaml files (`crdbnode-2.yaml` & `crdbnode-1.yaml`): + + 1. Delete a `crdbnode` manifest in reverse order, starting with `crdbnode-2.yaml`. + 1. Scale the StatefulSet replica count up by one (to 2). + 1. Verify that data has propagated by waiting for there to be zero under-replicated ranges: + + 1. Set up port forwarding to access the CockroachDB node's HTTP interface, replacing `cockroachdb-X` with the node name: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl port-forward pod/cockroachdb-X 8080:8080 + ~~~ + + The DB Console runs on port 8080 by default. + + 1. Check the `ranges_underreplicated` metric: + + {% include_cached copy-clipboard.html %} + ~~~ shell + curl --insecure -s https://localhost:8080/_status/vars | grep "ranges_underreplicated{" | awk ' {print $2}' + ~~~ + + This command outputs the number of under-replicated ranges on the node, which should be zero before proceeding with the next node. This may take some time depending on the deployment, but is necessary to ensure that there is no downtime in data availability. + + 1. Repeat steps a through c for each node, deleting the `crdbnode-1.yaml`, scaling replica count to 3, and so on. + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete -f manifests/crdbnode-2.yaml + kubectl scale statefulset $CRDBCLUSTER --replicas=2 + ~~~ + + Repeat the `kubectl delete -f ... command` for each `crdbnode` manifest you applied during migration. Make sure to verify that there are no underreplicated ranges after rolling back each node. + +1. Delete the PriorityClass and RBAC resources created for the CockroachDB operator: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete priorityclass crdb-critical + kubectl delete -f manifests/rbac.yaml + ~~~ + +1. Uninstall the {{ site.data.products.cockroachdb-operator }}: + + {% include_cached copy-clipboard.html %} + ~~~ shell + helm uninstall crdb-operator + ~~~ + +1. Clean up {{ site.data.products.cockroachdb-operator }} resources and custom resource definitions: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete crds crdbnodes.crdb.cockroachlabs.com + kubectl delete crds crdbtenants.crdb.cockroachlabs.com + kubectl delete serviceaccount cockroachdb-sa + kubectl delete service cockroach-webhook-service + kubectl delete validatingwebhookconfiguration cockroach-webhook-config + ~~~ + +1. Restore the {{ site.data.products.public-operator }}: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/crds.yaml + kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/v2.17.0/install/operator.yaml + ~~~ + + Wait for the operator pod to be "Running" as shown with the following command: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods -n cockroach-operator-system + ~~~ + +1. Restore the original `crdbcluster` custom resource: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl apply -f backup/crdbcluster-$CRDBCLUSTER.yaml + ~~~ + +1. Confirm that all CockroachDB pods are "Running" or "Ready" as shown with the following command: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl get pods + ~~~ From ac1300efdadf57b6edee020af50703b9072d6ff4 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Mon, 4 Aug 2025 21:57:41 -0400 Subject: [PATCH 08/27] Update to localityMappings --- ...oy-cockroachdb-with-kubernetes-operator.md | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md index 5f53a69583c..0297d02ed8a 100644 --- a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md +++ b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md @@ -42,9 +42,11 @@ If you want to secure your cluster to use TLS certificates for all network commu ### Localities -CockroachDB clusters use locality labels to efficiently distribute replicas. This is especially important in multi-region deployments. In cloud provider deployments (e.g., [GKE](#hosted-gke), [EKS](#hosted-eks), or [AKS](#hosted-aks)), the [`topology.kubernetes.io/region`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesioregion) and [`topology.kubernetes.io/zone`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) labels are applied implicitly to Kubernetes nodes and populated by the regions and zones specific to the cloud provider. For further granularity, you can define arbitrary locality labels (e.g., `province`, `datacenter`, `rack`), but these need to be applied individually to the Kubernetes node when initialized so that CockroachDB can understand where the node lives and distribute replicas accordingly. +CockroachDB clusters use localities to efficiently distribute replicas. This is especially important in multi-region deployments. With the {{ site.data.products.cockroachdb-operator }}, you specify mappings between locality levels and the location on a Kubernetes node where the value for that locality can be found. -On bare metal Kubernetes deployments, you must plan a hierarchy of locality labels that suit your CockroachDB node distribution, then apply these labels individually to nodes when they are initialized. Although you can set most of these values arbitrarily, you must set region and zone locations in the reserved `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` namespaces, respectively. +In cloud provider deployments (e.g., [GKE](#hosted-gke), [EKS](#hosted-eks), or [AKS](#hosted-aks)), the [`topology.kubernetes.io/region`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesioregion) and [`topology.kubernetes.io/zone`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) values on Kubernetes nodes are populated by cloud provider. For further granularity, you can define arbitrary locality labels (e.g., `province`, `datacenter`, `rack`), but these need to be applied individually to the Kubernetes node when initialized so that CockroachDB can understand where the node lives and distribute replicas accordingly. + +On bare metal Kubernetes deployments, you must plan a hierarchy of localities that suit your CockroachDB node distribution, then apply these values individually to nodes when they are initialized. Although you can set most of these values arbitrarily, you must set region and zone locations in the reserved `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` namespaces, respectively. For more information on how locality labels are used by CockroachDB, refer to the [`--locality` documentation](cockroach-start.html#locality). @@ -382,29 +384,32 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st For a detailed tutorial of a TLS configuration with manual certificates, refer to [Example: Authenticate with cockroach cert](#example-authenticate-with-cockroach-cert). -1. In `cockroachdb.crdbCluster.localityLabels`, provide [locality labels](#localities) that specify where the locality information of each Kubernetes node is stored. When CockroachDB is initialized on a node, it processes these values as though they are provided through the [`cockroach start --locality`](cockroach-start#locality) flag. +1. In `cockroachdb.crdbCluster.localityMappings`, provide [locality mappings](#localities) that define locality levels and map them to node labels where the locality information of each Kubernetes node is stored. When CockroachDB is initialized on a node, it processes these values as though they are provided through the [`cockroach start --locality`](cockroach-start#locality) flag. - The default configuration uses the `region` and `zone` locality levels, which are set differently depending on the deployment type: - - In cloud provider deployments, the [`topology.kubernetes.io/region`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesioregion) and [`topology.kubernetes.io/zone`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) locality labels are applied implicitly to Kubernetes nodes and populated by the regions and zones specific to the cloud provider. - - In bare metal deployments, the `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` values are not set implicitly by a cloud provider when initializing the node, so you must set them manually or configure custom locality labels. + The default configuration uses the `region` and `zone` locality labels, mapped implicitly to the [`topology.kubernetes.io/region`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesioregion) and [`topology.kubernetes.io/zone`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) node labels. + - In cloud provider deployments, the `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` values on a node are populated by the cloud provider. + - In bare metal deployments, the `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` node label values are not set implicitly by a cloud provider when initializing the node, so you must set them manually or configure custom locality labels. - To add more granular levels of locality to your nodes or use different locality labels, add custom locality levels as values in the `cockroachdb.crdbCluster.localityLabels` list. Any custom `localityLabels` configuration overrides the default `region` and `zone` configuration, so if you append an additional locality level but wish to keep the `region` and `zone` labels you must declare them manually. + To add more granular levels of locality to your nodes or use different locality labels, add custom locality levels as values in the `cockroachdb.crdbCluster.localityMappings` list. Any custom `localityMappings` configuration overrides the default `region` and `zone` configuration, so if you append an additional locality level but wish to keep the `region` and `zone` labels you must declare them manually. - The following example uses the existing `region` and `zone` labels and adds an additional `datacenter` locality label that is more granular than `zone`. This example declares that the `datacenter` locality information is stored in the `example.datacenter.locality` variable on the node: + The following example uses the existing `region` and `zone` labels and adds an additional `datacenter` locality mapping that is more granular than `zone`. This example declares that the `dc` locality information is stored in the `example.datacenter.locality` node label: ~~~ yaml cockroachdb: crdbCluster: - localityLabels: - - topology.kubernetes.io/region - - topology.kubernetes.io/zone - - example.datacenter.locality + localityMappings: + - nodeLabel: "topology.kubernetes.io/region" + localityLabel: "region" + - nodeLabel: "topology.kubernetes.io/zone" + localityLabel: "zone" + - nodeLabel: "example.datacenter.locality" + localityLabel: "dc" ~~~ In this example, if a Kubernetes node is initialized in the `us-central1` region, `us-central1-c` zone, and `dc2` datacenter, its `cockroach start --locality` flag would be equivalent to the following: ~~~ shell - cockroach start --locality region=us-central1,zone=us-central1-c,example.datacenter.locality=dc2 + cockroach start --locality region=us-central1,zone=us-central1-c,dc=dc2 ~~~ Optionally, review the `cockroachdb.crdbCluster.topologySpreadConstraints` configuration and set `topologyKey` to a locality variable that will have distinct values for each node. By default the lowest locality level is `zone`, so the following configuration sets that value as the `topologyKey`: From c51de92506c196eedadd990a808a5dbb16032a15 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Mon, 4 Aug 2025 22:36:36 -0400 Subject: [PATCH 09/27] Add podTemplate and startFlags docs --- .../sidebar-data/self-hosted-deployments.json | 6 ++ .../v25.2/kubernetes-operator-overview.md | 1 + .../override-templates-kubernetes-operator.md | 93 +++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 src/current/v25.2/override-templates-kubernetes-operator.md diff --git a/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json b/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json index b350f6acd6c..51d98ae0f17 100644 --- a/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json +++ b/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json @@ -241,6 +241,12 @@ "/${VERSION}/upgrade-cockroachdb-kubernetes-operator.html" ] }, + { + "title": "Override Templates", + "urls": [ + "/${VERSION}/override-templates-kubernetes-operator.html" + ] + }, { "title": "Kubernetes Performance", "urls": [ diff --git a/src/current/v25.2/kubernetes-operator-overview.md b/src/current/v25.2/kubernetes-operator-overview.md index 9c008a27903..ec45cb9af93 100644 --- a/src/current/v25.2/kubernetes-operator-overview.md +++ b/src/current/v25.2/kubernetes-operator-overview.md @@ -32,6 +32,7 @@ This section describes how to: - [Scale a cluster]({% link {{page.version.version}}/scale-cockroachdb-kubernetes-operator.md %}). - [Monitor a cluster]({% link {{page.version.version}}/monitor-cockroachdb-kubernetes-operator.md %}). - [Upgrade a cluster]({% link {{page.version.version}}/upgrade-cockroachdb-kubernetes-operator.md %}). + - [Override deployment templates]({% link {{page.version.version}}/override-templates-kubernetes-operator.md %}). - [Improve cluster performance]({% link {{page.version.version}}/kubernetes-operator-performance.md %}). ## Kubernetes terminology diff --git a/src/current/v25.2/override-templates-kubernetes-operator.md b/src/current/v25.2/override-templates-kubernetes-operator.md new file mode 100644 index 00000000000..9aac64f4042 --- /dev/null +++ b/src/current/v25.2/override-templates-kubernetes-operator.md @@ -0,0 +1,93 @@ +--- +title: Override Deployment Templates with the CockroachDB Operator +summary: Use advanced configuration operations to manually override pod templates and cockroach start flags with the CockcroachDB operator. +toc: true +docs_area: deploy +--- + +The {{ site.data.products.cockroachdb-operator }} provides abstractions that simplify cluster deployment and node initialization: + +- A default pod specification is used for the CockroachDB Kubernetes pod. +- The `values.yaml` configuration maps to a subset of `cockroach start` flags when CockroachDB is initialized. + +This page describes configuration options that allow advanced users to manually override the pod template and `cockroach start` flags as needed for deployment. + +{{site.data.alerts.callout_info}} +The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). +{{site.data.alerts.end}} + +{{site.data.alerts.callout_danger}} +This page describes advanced configurations that override the supported default templates used by the {{ site.data.products.cockroachdb-operator }}. Cockroach Labs strongly recommends testing these configurations in a non-production environment first. +{{site.data.alerts.end}} + +## Override the default pod + +The `cockroachdb.crdbCluster.podTemplate` field allows you to override the default pod metadata and specification configured by the {{ site.data.products.cockroachdb-operator }}. The values in this field are merged with the default pod specification, where settings in `podTemplate` override any values in the default. + +~~~ yaml +cockroachdb: + crdbCluster: + podTemplate: + # metadata captures the pod metadata for CockroachDB pods. + metadata: {} + # spec captures the pod specification for CockroachDB pods. + spec: + # initContainers captures the list of init containers for CockroachDB pods. + initContainers: + - name : cockroachdb-init + image: us-docker.pkg.dev/cockroach-cloud-images/data-plane/init-container@sha256:c3e4ba851802a429c7f76c639a64b9152d206cebb31162c1760f05e98f7c4254 + # containers captures the list of containers for CockroachDB pods. + containers: + - name: cockroachdb + image: cockroachdb/cockroach:v25.2.2 + - name: cert-reloader + image: us-docker.pkg.dev/cockroach-cloud-images/data-plane/inotifywait:87edf086db32734c7fa083a62d1055d664900840 + # imagePullSecrets captures the secrets for fetching images from private registries. + imagePullSecrets: [] +~~~ + +At least one value for `containers` must be specified if any part of `podTemplate` is being modified. For example, the following `podTemplate` configuration overrides pod anti-affinity behavior and specifies a default `cockroachdb/cockroach:v25.2.2` container image: + +~~~ yaml +cockroachdb: + crdbCluster: + podTemplate: + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: + - cockroachdb + topologyKey: kubernetes.io/hostname + containers: + - name: cockroachdb + image: cockroachdb/cockroach:v25.2.2 +~~~ + +## Override the default `cockroach start` flags + +The `cockroachdb.crdbCluster.startFlags` field allows you to customize the [`cockroach start` flags]({% link {{ page.version.version }}/cockroach-start.md %}#flags) used when initializing the CockroachDB cluster. + +Within this field, you can specify flags to upsert and flags to omit: + +- Upserted flags are added to the `cockroach start` command, their values overriding any matching flags in the command. +- Omitted flags are removed from the `cockroach start` command if they were present. + +~~~ yaml +cockroachdb: + crdbCluster: + startFlags: {} + # upsert captures a set of flags that are given higher precedence in the start command. + upsert: + - "--cache=30%" + - "--max-sql-memory=35%" + # omit defines a set of flags which will be omitted from the start command. + omit: + - "" +~~~ From 8feb650e8764bd891cd25f46fe69bd8dab52f874 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Mon, 4 Aug 2025 23:00:25 -0400 Subject: [PATCH 10/27] Fix broken upgrade finalization includes --- .../upgrade/disable-auto-finalization.md | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/current/_includes/common/upgrade/disable-auto-finalization.md b/src/current/_includes/common/upgrade/disable-auto-finalization.md index 996d514396b..d3abfb90372 100644 --- a/src/current/_includes/common/upgrade/disable-auto-finalization.md +++ b/src/current/_includes/common/upgrade/disable-auto-finalization.md @@ -8,10 +8,12 @@ Either of these settings prevents automatic finalization. {{site.data.alerts.end}} {% endcapture %} -{% if page.path contains 'kubernetes' %} +{% if page.name == 'upgrade-cockroachdb-kubernetes.md' %}
-For clusters managed by the Operator, auto-finalization is disabled and cannot be enabled. A major version upgrade is not complete until it is manually [finalized](#finalize-a-major-version-upgrade-manually). The Operator does not yet support the [cluster setting]({% link {{ page.version.version }}/cluster-settings.md %}) `cluster.auto_upgrade.enabled`. + +For clusters managed by the {{ site.data.products.public-operator }}, auto-finalization is disabled and cannot be enabled. A major version upgrade is not complete until it is manually [finalized](#finalize-a-major-version-upgrade-manually). The {{ site.data.products.public-operator }} does not support the [cluster setting]({% link {{ page.version.version }}/cluster-settings.md %}) `cluster.auto_upgrade.enabled`. +
@@ -22,6 +24,7 @@ To disable auto-finalization: 1. Connect to the cluster using the SQL shell: + {% include_cached copy-clipboard.html %} ~~~ shell $ kubectl exec -it cockroachdb-client-secure \ -- ./cockroach sql \ @@ -45,6 +48,7 @@ To disable auto-finalization: 1. Connect to the cluster using the SQL shell: + {% include_cached copy-clipboard.html %} ~~~ shell $ kubectl exec -it cockroachdb-client-secure \ -- ./cockroach sql \ @@ -58,6 +62,28 @@ Now, to complete a major-version upgrade, you must manually [finalize it](#final
+{% else if page.name == 'upgrade-cockroachdb-kubernetes-operator.md' %} + +By default, auto-finalization is enabled, and a major-version upgrade is finalized when all nodes have rejoined the cluster using the new `cockroach` binary. This means that by default, a major-version upgrade cannot be rolled back. Instead, you must [restore the cluster to the previous version]({% link {{ page.version.version }}/restoring-backups-across-versions.md %}#support-for-restoring-backups-into-a-newer-version). + +To disable auto-finalization: + +1. Connect to the cluster using the SQL shell: + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl exec -it cockroachdb-client-secure \ + -- ./cockroach sql \ + --certs-dir=/cockroach-certs \ + --host=cockroachdb-public + ~~~ + +1. Set the [cluster setting]({% link {{ page.version.version }}/cluster-settings.md %}) `cluster.auto_upgrade.enabled` to `false`. + +Now, to complete a major-version upgrade, you must manually [finalize it](#finalize-a-major-version-upgrade-manually) or [roll it back](#roll-back-a-major-version-upgrade). + +{{ new_flag }} + {% else %} By default, auto-finalization is enabled, and a major-version upgrade is finalized when all nodes have rejoined the cluster using the new `cockroach` binary. This means that by default, a major-version upgrade cannot be rolled back. Instead, you must [restore the cluster to the previous version]({% link {{ page.version.version }}/restoring-backups-across-versions.md %}#support-for-restoring-backups-into-a-newer-version). From 0bc8759f97af93316bfbb2aa83b7a325741aa0d2 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Mon, 4 Aug 2025 23:18:23 -0400 Subject: [PATCH 11/27] quick fixes --- .../v25.2/orchestration/start-cockroachdb-operator-secure.md | 2 +- src/current/v25.2/create-sequence.md | 2 +- src/current/v25.2/kubernetes-operator-overview.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/current/_includes/v25.2/orchestration/start-cockroachdb-operator-secure.md b/src/current/_includes/v25.2/orchestration/start-cockroachdb-operator-secure.md index 543297c3433..feb492fcd41 100644 --- a/src/current/_includes/v25.2/orchestration/start-cockroachdb-operator-secure.md +++ b/src/current/_includes/v25.2/orchestration/start-cockroachdb-operator-secure.md @@ -1,4 +1,4 @@ -### Install the {{ site.data.products.public-operator }} +### Install the Operator {% capture latest_operator_version %}{% include_cached latest_operator_version.md %}{% endcapture %} {% capture apply_default_operator_manifest_command %}{% include_cached copy-clipboard.html %} diff --git a/src/current/v25.2/create-sequence.md b/src/current/v25.2/create-sequence.md index 108b7e151b6..e3bf4214fe9 100644 --- a/src/current/v25.2/create-sequence.md +++ b/src/current/v25.2/create-sequence.md @@ -326,6 +326,6 @@ SHOW CREATE customer_seq_node_cached; - [`DROP SEQUENCE`]({% link {{ page.version.version }}/drop-sequence.md %}) - [`SHOW CREATE`]({% link {{ page.version.version }}/show-create.md %}) - [`SHOW SEQUENCES`]({% link {{ page.version.version }}/show-sequences.md %}) -- [Functions and s]({% link {{ page.version.version }}/functions-and-operators.md %}) +- [Functions and Operators]({% link {{ page.version.version }}/functions-and-operators.md %}) - [SQL Statements]({% link {{ page.version.version }}/sql-statements.md %}) - [Online Schema Changes]({% link {{ page.version.version }}/online-schema-changes.md %}) diff --git a/src/current/v25.2/kubernetes-operator-overview.md b/src/current/v25.2/kubernetes-operator-overview.md index ec45cb9af93..3e5baf0af18 100644 --- a/src/current/v25.2/kubernetes-operator-overview.md +++ b/src/current/v25.2/kubernetes-operator-overview.md @@ -1,6 +1,6 @@ --- title: CockroachDB Operator Overview -summary: An overview of deployment and management of a CockroachDB cluster using the {{ site.data.products.cockroachdb-operator }} with Kubernetes. +summary: An overview of deployment and management of a CockroachDB cluster using the CockroachDB operator with Kubernetes. toc: true toc_not_nested: true secure: true From ddfb07903e3477b65c60624308244c24ed70cf5a Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Tue, 5 Aug 2025 00:16:43 -0400 Subject: [PATCH 12/27] Fix links --- ...nfigure-cockroachdb-kubernetes-operator.md | 18 ++++---- ...oy-cockroachdb-with-kubernetes-operator.md | 45 ++++++++++--------- .../v25.2/kubernetes-operator-performance.md | 16 +++---- .../migrate-cockroachdb-kubernetes-helm.md | 4 +- ...migrate-cockroachdb-kubernetes-operator.md | 4 +- ...monitor-cockroachdb-kubernetes-operator.md | 24 +++++----- .../scale-cockroachdb-kubernetes-operator.md | 10 ++--- ...chedule-cockroachdb-kubernetes-operator.md | 20 ++++----- .../secure-cockroachdb-kubernetes-operator.md | 8 ++-- ...ate-a-multi-region-cluster-on-localhost.md | 2 +- ...upgrade-cockroachdb-kubernetes-operator.md | 4 +- 11 files changed, 78 insertions(+), 77 deletions(-) diff --git a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md index 0c3e22f3e05..03aba85264e 100644 --- a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md @@ -24,10 +24,10 @@ Run `kubectl describe nodes` to see the available resources on the instances tha You can set the CPU and memory resources allocated to the CockroachDB container on each pod. {{site.data.alerts.callout_info}} -1 CPU in Kubernetes is equivalent to 1 vCPU or 1 hyperthread. For best practices on provisioning CPU and memory for CockroachDB, refer to the [Production Checklist](recommended-production-settings.html#hardware). +1 CPU in Kubernetes is equivalent to 1 vCPU or 1 hyperthread. For best practices on provisioning CPU and memory for CockroachDB, refer to the [Production Checklist]({% link {{ page.version.version }}/recommended-production-settings.md %}#hardware). {{site.data.alerts.end}} -Specify CPU and memory values in `cockroachdb.crdbCluster.resources.limits` and `cockroachdb.crdbCluster.resources.requests` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): +Specify CPU and memory values in `cockroachdb.crdbCluster.resources.limits` and `cockroachdb.crdbCluster.resources.requests` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: @@ -58,15 +58,15 @@ For more information on how Kubernetes handles resources, see the [Kubernetes do ### Cache and SQL memory size -Each CockroachDB node reserves a portion of its available memory for its cache and for storing temporary data for SQL queries. For more information on these settings, see the [Production Checklist](recommended-production-settings.html#cache-and-sql-memory-size). +Each CockroachDB node reserves a portion of its available memory for its cache and for storing temporary data for SQL queries. For more information on these settings, see the [Production Checklist]({% link {{ page.version.version }}/recommended-production-settings.md %}#cache-and-sql-memory-size). -The {{ site.data.products.cockroachdb-operator }} dynamically sets cache size and SQL memory size each to 25% (the recommended percentage) of the available memory, which depends on the memory request and limit you [specified](#memory-and-cpu) for your configuration. These values can be modified by adding the `cache` or `max-sql-memory` fields to `cockroachdb.crdbCluster.flags`, which is equivalent to appending `--cache` or `--max-sql-memory` as [cockroach start flags](cockroach-start.html#flags). +The {{ site.data.products.cockroachdb-operator }} dynamically sets cache size and SQL memory size each to 25% (the recommended percentage) of the available memory, which depends on the memory request and limit you [specified](#memory-and-cpu) for your configuration. These values can be modified by adding the `cache` or `max-sql-memory` fields to `cockroachdb.crdbCluster.flags`, which is equivalent to appending `--cache` or `--max-sql-memory` as [cockroach start flags]({% link {{ page.version.version }}/cockroach-start.md %}#flags). ## Persistent storage When you start your cluster, Kubernetes dynamically provisions and mounts a persistent volume into each pod. For more information on persistent volumes, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/persistent-volumes/). -The storage capacity of each volume is set in `cockroachdb.crdbCluster.dataStore.volumeClaimTemplate.spec.resources` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): +The storage capacity of each volume is set in `cockroachdb.crdbCluster.dataStore.volumeClaimTemplate.spec.resources` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: @@ -79,13 +79,13 @@ cockroachdb: storage: "10Gi" ~~~ -You should provision an appropriate amount of disk storage for your workload. For recommendations on this, see the [Production Checklist](recommended-production-settings.html#storage). +You should provision an appropriate amount of disk storage for your workload. For recommendations on this, see the [Production Checklist]({% link {{ page.version.version }}/recommended-production-settings.md %}#storage). ### Expand disk size -If you discover that you need more capacity, you can expand the persistent volumes on a running cluster. Increasing disk size is often [beneficial for CockroachDB performance](kubernetes-operator-performance.html). +If you discover that you need more capacity, you can expand the persistent volumes on a running cluster. Increasing disk size is often [beneficial for CockroachDB performance]({% link {{ page.version.version }}/kubernetes-operator-performance.md %}). -Specify a new volume size in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): +Specify a new volume size in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: @@ -119,7 +119,7 @@ The {{ site.data.products.cockroachdb-operator }} separates network traffic into | HTTP | 8080 | Used to access the DB Console | service.ports.http | | SQL | 26257 | Used for SQL shell access | service.ports.sql | -Specify alternate port numbers in `cockroachdb.crdbCluster.service.ports` of the {{ site.data.products.cockroachdb-operator }}'s [custom resource](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): +Specify alternate port numbers in `cockroachdb.crdbCluster.service.ports` of the {{ site.data.products.cockroachdb-operator }}'s [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): ~~~ yaml cockroachdb: diff --git a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md index 0297d02ed8a..7121b1c5a7a 100644 --- a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md +++ b/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md @@ -48,7 +48,7 @@ In cloud provider deployments (e.g., [GKE](#hosted-gke), [EKS](#hosted-eks), or On bare metal Kubernetes deployments, you must plan a hierarchy of localities that suit your CockroachDB node distribution, then apply these values individually to nodes when they are initialized. Although you can set most of these values arbitrarily, you must set region and zone locations in the reserved `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` namespaces, respectively. -For more information on how locality labels are used by CockroachDB, refer to the [`--locality` documentation](cockroach-start.html#locality). +For more information on how locality labels are used by CockroachDB, refer to the [`--locality` documentation]({% link {{ page.version.version }}/cockroach-start.md %}#locality). ### Architecture @@ -60,7 +60,7 @@ When starting Kubernetes, select machines with at least 4 vCPUs and 16 GiB of me ### Storage -Kubernetes deployments use external persistent volumes that are often replicated by the provider. CockroachDB replicates data automatically, and this redundant layer of [replication](% link {{ page.version.version }}/architecture/overview.md %}#replication) can impact performance. Using [local volumes](https://kubernetes.io/docs/concepts/storage/volumes/#local) may improve performance. +Kubernetes deployments use external persistent volumes that are often replicated by the provider. CockroachDB replicates data automatically, and this redundant layer of [replication]({% link {{ page.version.version }}/architecture/overview.md %}#replication) can impact performance. Using [local volumes](https://kubernetes.io/docs/concepts/storage/volumes/#local) may improve performance. ## Step 1. Start Kubernetes @@ -96,7 +96,7 @@ Cloud providers such as GKE, EKS, and AKS are not required to run CockroachDB on This creates GKE instances and joins them into a single Kubernetes cluster named `cockroachdb`. The `--region` flag specifies a [regional three-zone cluster](https://cloud.google.com/kubernetes-engine/docs/how-to/creating-a-regional-cluster), and `--num-nodes` specifies one Kubernetes worker node in each zone. - The `--machine-type` flag tells the node pool to use the [n2-standard-4](https://cloud.google.com/compute/docs/machine-types#standard_machine_types) machine type (4 vCPUs, 16 GB memory), which meets our [recommended CPU and memory configuration](recommended-production-settings#basic-hardware-recommendations). + The `--machine-type` flag tells the node pool to use the [n2-standard-4](https://cloud.google.com/compute/docs/machine-types#standard_machine_types) machine type (4 vCPUs, 16 GB memory), which meets our [recommended CPU and memory configuration]({% link {{ page.version.version }}/recommended-production-settings.md %}#basic-hardware-recommendations). {{site.data.alerts.callout_info}} Consider creating another, dedicated node group for the operator pod for system resource availability. @@ -152,7 +152,7 @@ Cloud providers such as GKE, EKS, and AKS are not required to run CockroachDB on --node-ami auto ~~~ - This creates EKS instances and joins them into a single Kubernetes cluster named `cockroachdb`. The `--node-type` flag tells the node pool to use the [m6i.xlarge](https://aws.amazon.com/ec2/instance-types/) instance type (4 vCPUs, 16 GB memory), which meets our [recommended CPU and memory configuration](recommended-production-settings#basic-hardware-recommendations). + This creates EKS instances and joins them into a single Kubernetes cluster named `cockroachdb`. The `--node-type` flag tells the node pool to use the [m6i.xlarge](https://aws.amazon.com/ec2/instance-types/) instance type (4 vCPUs, 16 GB memory), which meets our [recommended CPU and memory configuration]({% link {{ page.version.version }}/recommended-production-settings.md %}#basic-hardware-recommendations). {{site.data.alerts.callout_info}} Consider creating another, dedicated node group for the operator pod for system resource availability. @@ -252,7 +252,7 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st 1. Uncomment and modify `cockroachdb.crdbCluster.resources` in the values file with the CPU and memory requests and limits for each node to use. The default values are 4vCPU and 16GB of memory: - For more information on configuring node resource allocation, refer to [Resource management](configure-cockroachdb-kubernetes-operator.html) + For more information on configuring node resource allocation, refer to [Resource management]({% link {{ page.version.version }}/configure-cockroachdb-kubernetes-operator.md %}) 1. Modify the TLS configuration as desired. For a secure deployment, set `cockroachdb.tls.enabled` in the values file to `true`. You can either allow the operator to generate self-signed certificates, provide a custom CA certificate and generate other certificates, or use your own certificates. - **All self-signed certificates**: By default, the certificates are created automatically by a self-signer utility, which requires no configuration beyond setting a custom certificate duration if desired. This utility creates self-signed certificates for the nodes and root client which are stored in a secret. You can see these certificates by running `kubectl get secrets`: @@ -382,9 +382,9 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st - `{node_secret_name}`: The name of the Kubernetes secret that contains the generated client certificate and key. - `{client_secret_name}`: The name of the Kubernetes secret that contains the generated node certificate and key. - For a detailed tutorial of a TLS configuration with manual certificates, refer to [Example: Authenticate with cockroach cert](#example-authenticate-with-cockroach-cert). + For a detailed tutorial of a TLS configuration with manual certificates, refer to [Authenticate with cockroach cert](#authenticate-with-cockroach-cert). -1. In `cockroachdb.crdbCluster.localityMappings`, provide [locality mappings](#localities) that define locality levels and map them to node labels where the locality information of each Kubernetes node is stored. When CockroachDB is initialized on a node, it processes these values as though they are provided through the [`cockroach start --locality`](cockroach-start#locality) flag. +1. In `cockroachdb.crdbCluster.localityMappings`, provide [locality mappings](#localities) that define locality levels and map them to node labels where the locality information of each Kubernetes node is stored. When CockroachDB is initialized on a node, it processes these values as though they are provided through the [`cockroach start --locality`]({% link {{ page.version.version }}/cockroach-start.md %}#locality) flag. The default configuration uses the `region` and `zone` locality labels, mapped implicitly to the [`topology.kubernetes.io/region`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesioregion) and [`topology.kubernetes.io/zone`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) node labels. - In cloud provider deployments, the `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` values on a node are populated by the cloud provider. @@ -492,7 +492,7 @@ To use the CockroachDB SQL client, follow these steps to launch a secure pod run kubectl create -f client-secure.yaml ~~~ -1. Get a shell into the pod and start the CockroachDB [built-in SQL client](cockroach-sql.html): +1. Get a shell into the pod and start the CockroachDB [built-in SQL client]({% link {{ page.version.version }}/cockroach-sql.md %}): {% include_cached copy-clipboard.html %} ~~~ shell @@ -516,7 +516,7 @@ To use the CockroachDB SQL client, follow these steps to launch a secure pod run This pod will continue running indefinitely, so any time you need to reopen the built-in SQL client or run any other cockroach client commands (e.g., cockroach node), repeat this step using the appropriate cockroach command. If you'd prefer to delete the pod and recreate it when needed, run `kubectl delete pod cockroachdb-client-secure`. -1. Run some basic [CockroachDB SQL statements](learn-cockroachdb-sql.html): +1. Run some basic [CockroachDB SQL statements]({% link {{ page.version.version }}/learn-cockroachdb-sql.md %}): ~~~ sql CREATE DATABASE bank; @@ -529,7 +529,7 @@ To use the CockroachDB SQL client, follow these steps to launch a secure pod run (1 row) ~~~ -1. [Create a user with a password](create-user.html#create-a-user-with-a-password): +1. [Create a user with a password]({% link {{ page.version.version }}/create-user.md %}#create-a-user-with-a-password): ~~~ sql CREATE USER roach WITH PASSWORD 'Q7gc8rEdS'; @@ -545,11 +545,11 @@ To use the CockroachDB SQL client, follow these steps to launch a secure pod run ## Step 4. Access the DB Console -To access the cluster's [DB Console](ui-overview.html): +To access the cluster's [DB Console]({% link {{ page.version.version }}/ui-overview.md %}): -1. On secure clusters, [certain pages of the DB Console](ui-overview.html#db-console-access) can only be accessed by `admin` users. +1. On secure clusters, [certain pages of the DB Console]({% link {{ page.version.version }}/ui-overview.md %}#db-console-access) can only be accessed by `admin` users. - Get a shell into the pod and start the CockroachDB [built-in SQL client](cockroach-sql.html): + Get a shell into the pod and start the CockroachDB [built-in SQL client]({% link {{ page.version.version }}/cockroach-sql.md %}): {% include_cached copy-clipboard.html %} ~~~ shell @@ -590,26 +590,27 @@ To access the cluster's [DB Console](ui-overview.html): {{site.data.alerts.end}} 1. In the DB Console, verify that the cluster is running as expected: - 1. View the [**Node List**](ui-cluster-overview-page.html#node-list) to ensure that all nodes successfully joined the cluster. + 1. View the [**Node List**]({% link {{ page.version.version }}/ui-cluster-overview-page.md %}#node-list) to ensure that all nodes successfully joined the cluster. 1. Click the **Databases** tab on the left to verify that `bank` is listed. ## Next steps Read the following pages for detailed information on cluster scaling, certificate management, resource management, best practices, and other cluster operation details: -- [Pod scheduling](schedule-cockroachdb-kubernetes-operator.html) -- [Resource management](configure-cockroachdb-kubernetes-operator.html) -- [Certificate management](secure-cockroachdb-kubernetes-operator.html) -- [Cluster scaling](scale-cockroachdb-kubernetes-operator.html) -- [Cluster monitoring](monitor-cockroachdb-kubernetes-operator.html) -- [Upgrade a cluster](upgrade-cockroachdb-kubernetes-operator.html) -- [CockroachDB performance on Kubernetes](kubernetes-operator-performance.html) +- [Pod scheduling]({% link {{ page.version.version }}/schedule-cockroachdb-kubernetes-operator.md %}) +- [Resource management]({% link {{ page.version.version }}/configure-cockroachdb-kubernetes-operator.md %}) +- [Certificate management]({% link {{ page.version.version }}/secure-cockroachdb-kubernetes-operator.md %}) +- [Cluster scaling]({% link {{ page.version.version }}/scale-cockroachdb-kubernetes-operator.md %}) +- [Cluster monitoring]({% link {{ page.version.version }}/monitor-cockroachdb-kubernetes-operator.md %}) +- [Upgrade a cluster]({% link {{ page.version.version }}/upgrade-cockroachdb-kubernetes-operator.md %}) +- [Override deployment templates]({% link {{ page.version.version }}/override-templates-kubernetes-operator.md %}) +- [CockroachDB performance on Kubernetes]({% link {{ page.version.version }}/kubernetes-operator-performance.md %}) ## Examples ### Authenticate with `cockroach cert` -The following example uses [cockroach cert commands](cockroach-cert.html) to generate and sign the CockroachDB node and client certificates. To learn more about the supported methods of signing certificates, refer to [Authentication](authentication.html#using-digital-certificates-with-cockroachdb). +The following example uses [cockroach cert commands]({% link {{ page.version.version }}/cockroach-cert.md %}) to generate and sign the CockroachDB node and client certificates. To learn more about the supported methods of signing certificates, refer to [Authentication]({% link {{ page.version.version }}/authentication.md %}#using-digital-certificates-with-cockroachdb). 1. Create two directories: diff --git a/src/current/v25.2/kubernetes-operator-performance.md b/src/current/v25.2/kubernetes-operator-performance.md index 98643eba64e..2b60972da7d 100644 --- a/src/current/v25.2/kubernetes-operator-performance.md +++ b/src/current/v25.2/kubernetes-operator-performance.md @@ -15,9 +15,9 @@ The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ pag Before you focus on optimizing a Kubernetes-orchestrated CockroachDB cluster: -1. Before deploying on Kubernetes, ensure that performance is optimized for your workload on identical hardware. You may find that you first need to [modify your workload](performance-best-practices-overview.html) or use [different machine specs](recommended-production-settings.html#hardware) to achieve the performance you need. +1. Before deploying on Kubernetes, ensure that performance is optimized for your workload on identical hardware. You may find that you first need to [modify your workload]({% link {{ page.version.version }}/performance-best-practices-overview.md %}) or use [different machine specs]({% link {{ page.version.version }}/recommended-production-settings.md %}#hardware) to achieve the performance you need. -1. Read the documentation for [deploying CockroachDB on a Kubernetes cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster) to familiarize yourself with the necessary Kubernetes terminology and deployment abstractions. +1. Read the documentation for [deploying CockroachDB on a Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster) to familiarize yourself with the necessary Kubernetes terminology and deployment abstractions. ## Performance factors @@ -31,15 +31,15 @@ Because CockroachDB is under very active development, there are typically substa ### Client workload -Your workload is the single most important factor in database performance. Read through [SQL performance best practices](performance-best-practices-overview.html) and determine whether you can make workload changes to speed up your application. +Your workload is the single most important factor in database performance. Read through [SQL performance best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}) and determine whether you can make workload changes to speed up your application. ### Machine size -The size of the machines you're using is not a Kubernetes-specific concern, but is a good place to start if you want more performance. Using machines with more CPU will almost always allow for greater throughput. Because Kubernetes runs a set of processes on every machine in a cluster, it is typically more efficient to use fewer large machines than more small machines. For specific suggestions, refer to [Hardware](recommended-production-settings.html#hardware). +The size of the machines you're using is not a Kubernetes-specific concern, but is a good place to start if you want more performance. Using machines with more CPU will almost always allow for greater throughput. Because Kubernetes runs a set of processes on every machine in a cluster, it is typically more efficient to use fewer large machines than more small machines. For specific suggestions, refer to [Hardware]({% link {{ page.version.version }}/recommended-production-settings.md %}#hardware). ### Disk type -CockroachDB makes heavy use of the disks you provide it, so using faster disks is an easy way to improve your cluster's performance. For the best performance, [SSDs are strongly recommended](recommended-production-settings.html#hardware). +CockroachDB makes heavy use of the disks you provide it, so using faster disks is an easy way to improve your cluster's performance. For the best performance, [SSDs are strongly recommended]({% link {{ page.version.version }}/recommended-production-settings.md %}#hardware). The Cockroach Labs-provided configuration does not specify disk type, so in most environments Kubernetes will auto-provision disks of the default type. In the common cloud environments (AWS, GCP, Azure) this means you'll get slow disks that aren't optimized for database workloads (e.g., HDDs on GCE, SSDs without provisioned IOPS on AWS). @@ -126,7 +126,7 @@ Since [GCE disk IOPS scale linearly with disk size](https://cloud.google.com/com The examples thus far assume the use of auto-provisioned, remotely attached disks. However, local disks typically provide better performance than remotely attached disks. For example, SSD Instance Store Volumes outperform EBS Volumes on AWS, and Local SSDs outperform Persistent Disks on GCE. As of v1.14, Kubernetes supports [local volumes](https://kubernetes.io/docs/concepts/storage/volumes/#local). -When using local disks, consider using [replication controls](configure-replication-zones.html) to increase the replication factor of your data from 3 (default) to 5. This is because local disks have a greater chance of experiencing a disk failure than a cloud provider's network-attached disks, which are often replicated underneath the covers. +When using local disks, consider using [replication controls]({% link {{ page.version.version }}/configure-replication-zones.md %}) to increase the replication factor of your data from 3 (default) to 5. This is because local disks have a greater chance of experiencing a disk failure than a cloud provider's network-attached disks, which are often replicated underneath the covers. ### Resource requests and limits @@ -134,7 +134,7 @@ When you ask Kubernetes to run a pod, you can tell it to reserve certain amounts #### Resource requests -Resource requests reserve a certain amount of CPU or memory for your container. If you add resource requests to your CockroachDB YAML file, Kubernetes will schedule each CockroachDB pod onto a node with sufficient unreserved resources and ensure the pods are guaranteed the reserved resources using the applicable Linux container primitives. If you are running other workloads in your Kubernetes cluster, setting resource requests is strongly recommended to ensure good performance. If you do not set resource requests, CockroachDB could be starved of CPU cycles or [OOM-stopped](cluster-setup-troubleshooting.html#out-of-memory-oom-crash) before less important processes. +Resource requests reserve a certain amount of CPU or memory for your container. If you add resource requests to your CockroachDB YAML file, Kubernetes will schedule each CockroachDB pod onto a node with sufficient unreserved resources and ensure the pods are guaranteed the reserved resources using the applicable Linux container primitives. If you are running other workloads in your Kubernetes cluster, setting resource requests is strongly recommended to ensure good performance. If you do not set resource requests, CockroachDB could be starved of CPU cycles or [OOM-stopped]({% link {{ page.version.version }}/cluster-setup-troubleshooting.md %}#out-of-memory-oom-crash) before less important processes. To determine how many resources are usable on your Kubernetes nodes, you can run: @@ -305,4 +305,4 @@ If you aren't using a hosted Kubernetes service, you'll need to choose a [networ If your Kubernetes cluster uses heterogeneous hardware, you will likely want to ensure that CockroachDB only runs on specific machines. To optimize performance, it can be beneficial to dedicate those machines exclusively to CockroachDB. -For more information, refer to [Pod scheduling](schedule-cockroachdb-kubernetes-operator.html). +For more information, refer to [Pod scheduling]({% link {{ page.version.version }}/schedule-cockroachdb-kubernetes-operator.md %}). diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md index c61aaac4c92..3ec61cad52a 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md @@ -20,7 +20,7 @@ helm upgrade --install --set operator.enabled=false crdb-test --debug ./cockroac ~~~ {{site.data.alerts.callout_success}} -If your existing cluster was created using the {{ site.data.products.public-operator }}, refer to the [{{ site.data.products.public-operator }} migration guide](migrate-cockroachdb-kubernetes-operator.html). +If your existing cluster was created using the {{ site.data.products.public-operator }}, refer to the [{{ site.data.products.public-operator }} migration guide]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-operator.md %}). {{site.data.alerts.end}} This migration can be completed without affecting cluster availability, and preserves existing disks so that data doesn't need to be replicated into empty volumes. The process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. @@ -112,7 +112,7 @@ For each pod in the StatefulSet, perform the following steps: kubectl apply -f manifests/crdbnode-4.yaml ~~~ -3. Wait for the new pod to become ready. If it doesn’t, [check the operator logs](monitor-cockroachdb-kubernetes-operator.html#monitor-the-operator) for errors. +3. Wait for the new pod to become ready. If it doesn’t, [check the operator logs]({% link {{ page.version.version }}/monitor-cockroachdb-kubernetes-operator.md %}#monitor-the-operator) for errors. 4. Before moving on to the next replica migration, verify that there are no underreplicated ranges: 1. Set up port forwarding to access the CockroachDB node’s HTTP interface. Note that the DB Console runs on port 8080 by default: diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md index 102e069a496..033170f7a08 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md @@ -23,7 +23,7 @@ kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operato ~~~ {{site.data.alerts.callout_success}} -If your existing cluster was created as a StatefulSet using Helm, refer to the [Helm migration guide](migrate-cockroachdb-kubernetes-helm.html). +If your existing cluster was created as a StatefulSet using Helm, refer to the [Helm migration guide]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-helm.md %}). {{site.data.alerts.end}} This migration process can be completed without affecting cluster availability, and preserves existing disks so that data doesn’t need to be replicated into empty volumes. This process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. @@ -161,7 +161,7 @@ For each pod in the StatefulSet, perform the following steps: kubectl apply -f manifests/crdbnode-4.yaml ~~~ -1. Wait for the new pod to become ready. If it doesn’t, [check the operator logs](monitor-cockroachdb-kubernetes-operator.html#monitor-the-operator) for errors. +1. Wait for the new pod to become ready. If it doesn’t, [check the operator logs]({% link {{ page.version.version }}/monitor-cockroachdb-kubernetes-operator.md %}#monitor-the-operator) for errors. 1. Before moving on to the next replica migration, verify that there are no underreplicated ranges: 1. Set up port forwarding to access the CockroachDB node’s HTTP interface. Note that the DB Console runs on port 8080 by default: diff --git a/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md b/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md index 138e1b1a936..024d04925d2 100644 --- a/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md @@ -6,7 +6,7 @@ toc_not_nested: true docs_area: deploy --- -Despite CockroachDB's various [built-in safeguards against failure](architecture/replication-layer.html), it is critical to actively monitor the overall health and performance of a cluster running in production and to create alerting rules that promptly send notifications when there are events that require investigation or intervention. +Despite CockroachDB's various [built-in safeguards against failure]({% link {{ page.version.version }}/architecture/replication-layer.md %}), it is critical to actively monitor the overall health and performance of a cluster running in production and to create alerting rules that promptly send notifications when there are events that require investigation or intervention. {{site.data.alerts.callout_info}} The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). @@ -19,7 +19,7 @@ Every node of a CockroachDB cluster exports granular timeseries metrics formatte This guidance is based on [CoreOS's Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator/tree/main), which allows a Prometheus instance to be managed using built-in Kubernetes concepts. {{site.data.alerts.callout_info}} -If you're on Hosted GKE, before starting, make sure the email address associated with your Google Cloud account is part of the `cluster-admin` RBAC group, as shown in [Deploy CockroachDB with Kubernetes](deploy-cockroachdb-with-kubernetes-operator.html). +If you're on Hosted GKE, before starting, make sure the email address associated with your Google Cloud account is part of the `cluster-admin` RBAC group, as shown in [Deploy CockroachDB with Kubernetes]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}). {{site.data.alerts.end}} 1. From your local workstation, edit the cockroachdb service to add the prometheus: cockroachdb label: @@ -112,7 +112,7 @@ If you're on Hosted GKE, before starting, make sure the email address associated Prometheus graph {{site.data.alerts.callout_info}} - Prometheus auto-completes CockroachDB time series metrics for you, but if you want to see a full listing, with descriptions, port-forward as described in [Access the DB Console](deploy-cockroachdb-with-kubernetes-operator.html#step-4-access-the-db-console) and then point your browser to [http://localhost:8080/_status/vars](http://localhost:8080/_status/vars). + Prometheus auto-completes CockroachDB time series metrics for you, but if you want to see a full listing, with descriptions, port-forward as described in [Access the DB Console]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#step-4-access-the-db-console) and then point your browser to [http://localhost:8080/_status/vars](http://localhost:8080/_status/vars). {{site.data.alerts.end}} For more details on using the Prometheus UI, see their [official documentation](https://prometheus.io/docs/introduction/getting_started/). @@ -235,7 +235,7 @@ node_decommissioning{node="cockroachdb-r4wz8"} 0 ## Configure logging -You can use the operator to configure the CockroachDB logging system. This allows you to output logs to [configurable log sinks](configure-logs.html#configure-log-sinks) such as file or network logging destinations. +You can use the operator to configure the CockroachDB logging system. This allows you to output logs to [configurable log sinks]({% link {{ page.version.version }}/configure-logs.md %}#configure-log-sinks) such as file or network logging destinations. The logging configuration is defined in a [ConfigMap](https://kubernetes.io/docs/concepts/configuration/configmap/) object, using a key named `logs.yaml`. For example: @@ -265,12 +265,12 @@ metadata: namespace: cockroach-ns ~~~ -The above configuration overrides the [default logging configuration](configure-logs.html#default-logging-configuration) and reflects our recommended Kubernetes logging configuration: +The above configuration overrides the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) and reflects our recommended Kubernetes logging configuration: -- Save debug-level logs (the `DEV` [log channel](logging-overview.html#logging-channels)) to disk for troubleshooting. -- Send operational- and security-level logs to a [network collector](logging-use-cases.html#network-logging), in this case [Fluentd](configure-logs.html#fluentd-logging-format). +- Save debug-level logs (the `DEV` [log channel]({% link {{ page.version.version }}/logging-overview.md %}#logging-channels)) to disk for troubleshooting. +- Send operational- and security-level logs to a [network collector]({% link {{ page.version.version }}/logging-use-cases.md %}#network-logging), in this case [Fluentd]({% link {{ page.version.version }}/configure-logs.md %}#fluentd-logging-format). -The ConfigMap `name` must match the `cockroachdb.crdbCluster.loggingConfigMapName` object in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): +The ConfigMap `name` must match the `cockroachdb.crdbCluster.loggingConfigMapName` object in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: @@ -278,7 +278,7 @@ cockroachdb: loggingConfigMapName: logconfig ~~~ -By default, the operator also modifies the [default logging configuration](configure-logs.html#default-logging-configuration) with the following: +By default, the operator also modifies the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) with the following: ~~~ yaml sinks: @@ -287,11 +287,11 @@ sinks: redact: true ~~~ -This outputs logging events in the [OPS](logging.html#ops) channel to a `cockroach-stderr.log` file. +This outputs logging events in the [OPS]({% link {{ page.version.version }}/logging.md %}#ops) channel to a `cockroach-stderr.log` file. ### Example: Configuring a troubleshooting log file on pods -In this example, CockroachDB has already been deployed on a Kubernetes cluster. Override the [default logging configuration](configure-logs.html#default-logging-configuration) to output [DEV](logging.html#dev) logs to a `cockroach-dev.log` file. +In this example, CockroachDB has already been deployed on a Kubernetes cluster. Override the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) to output [DEV]({% link {{ page.version.version }}/logging.md %}#dev) logs to a `cockroach-dev.log` file. 1. Create a ConfigMap named `logconfig`. Note that `namespace` is set to the `cockroach-ns` namespace: @@ -316,7 +316,7 @@ In this example, CockroachDB has already been deployed on a Kubernetes cluster. The ConfigMap key is not related to the ConfigMap `name` or YAML filename, and must be named `logging.yaml`. {{site.data.alerts.end}} - This configuration outputs `DEV` logs that have severity [WARNING](logging.html#logging-levels-severities) to a `cockroach-dev.log` file on each pod. + This configuration outputs `DEV` logs that have severity [WARNING]({% link {{ page.version.version }}/logging.md %}#logging-levels-severities) to a `cockroach-dev.log` file on each pod. 1. Apply the ConfigMap to the cluster: diff --git a/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md b/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md index 97f9310ee3a..991f6bfa49a 100644 --- a/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md @@ -15,12 +15,12 @@ The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ pag ## Add nodes -Before scaling up CockroachDB, note the following [topology recommendations](recommended-production-settings.html#topology): +Before scaling up CockroachDB, note the following [topology recommendations]({% link {{ page.version.version }}/recommended-production-settings.md %}#topology): - Each CockroachDB node (running in its own pod) should run on a separate Kubernetes worker node. - Each availability zone should have the same number of CockroachDB nodes. -If your cluster has 3 CockroachDB nodes distributed across 3 availability zones (as in our [deployment example](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster)), Cockroach Labs recommends scaling up by a multiple of 3 to retain an even distribution of nodes. You should therefore scale up to a minimum of 6 CockroachDB nodes, with 2 nodes in each zone. +If your cluster has 3 CockroachDB nodes distributed across 3 availability zones (as in our [deployment example]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster)), Cockroach Labs recommends scaling up by a multiple of 3 to retain an even distribution of nodes. You should therefore scale up to a minimum of 6 CockroachDB nodes, with 2 nodes in each zone. 1. Run `kubectl get nodes` to list the worker nodes in your Kubernetes cluster. There should be at least as many worker nodes as pods you plan to add. This ensures that no more than one pod will be placed on each worker node. @@ -33,7 +33,7 @@ If your cluster has 3 CockroachDB nodes distributed across 3 availability zones This example distributes 2 worker nodes across the default 3 zones, raising the total to 6 worker nodes. -1. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster), with the target size of the CockroachDB cluster in the specified region. This value refers to the number of CockroachDB nodes, each running in one pod: +1. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster), with the target size of the CockroachDB cluster in the specified region. This value refers to the number of CockroachDB nodes, each running in one pod: ~~~ yaml cockroachdb: @@ -73,13 +73,13 @@ If your cluster has 3 CockroachDB nodes distributed across 3 availability zones ## Remove nodes -If your nodes are distributed across 3 availability zones (as in our [deployment example](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster)), Cockroach Labs recommends scaling down by a multiple of 3 to retain an even distribution. If your cluster has 6 CockroachDB nodes, you should therefore scale down to 3, with 1 node in each zone. +If your nodes are distributed across 3 availability zones (as in our [deployment example]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster)), Cockroach Labs recommends scaling down by a multiple of 3 to retain an even distribution. If your cluster has 6 CockroachDB nodes, you should therefore scale down to 3, with 1 node in each zone. {{site.data.alerts.callout_danger}} Do not scale down to fewer than 3 nodes. This is considered an anti-pattern on CockroachDB and will cause errors. Before scaling down CockroachDB, note that each availability zone should have the same number of CockroachDB nodes. {{site.data.alerts.end}} -1. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster), with the target size of the CockroachDB cluster. For instance, to scale a cluster in Google Cloud down to 3 nodes: +1. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster), with the target size of the CockroachDB cluster. For instance, to scale a cluster in Google Cloud down to 3 nodes: ~~~ yaml cockroachdb: diff --git a/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md b/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md index 3f25c24a0a8..2d3c661f3c2 100644 --- a/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md @@ -17,7 +17,7 @@ The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ pag A pod with a *node selector* will be scheduled onto a worker node that has matching [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/), or key-value pairs. -Specify the labels in `cockroachdb.crdbCluster.nodeSelector` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). If you specify multiple `nodeSelector` labels, the node must match all of them. +Specify the labels in `cockroachdb.crdbCluster.nodeSelector` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). If you specify multiple `nodeSelector` labels, the node must match all of them. The following configuration causes CockroachDB pods to be scheduled onto worker nodes that have *both* the labels `worker-pool-name=crdb-workers` and `kubernetes.io/arch=amd64`: @@ -46,7 +46,7 @@ For an example, see [Scheduling CockroachDB onto labeled nodes](#example-schedul ### Add a node affinity -Specify node affinities in `cockroachdb.crdbCluster.affinity.nodeAffinity` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. +Specify node affinities in `cockroachdb.crdbCluster.affinity.nodeAffinity` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. The following configuration requires that CockroachDB pods are scheduled onto worker nodes running a Linux operating system, with a preference against worker nodes in the `us-east4-b` availability zone. @@ -80,7 +80,7 @@ For more context on how these rules work, see the [Kubernetes documentation](htt ### Add a pod affinity or anti-affinity -Specify pod affinities and anti-affinities in `cockroachdb.crdbCluster.affinity.podAffinity` and `cockroachdb.crdbCluster.affinity.podAntiAffinity` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. +Specify pod affinities and anti-affinities in `cockroachdb.crdbCluster.affinity.podAffinity` and `cockroachdb.crdbCluster.affinity.podAntiAffinity` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. The following configuration attempts to schedule CockroachDB pods in the same zones as the pods that run our example [load generator](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/example-app.yaml) app. It disallows CockroachDB pods from being co-located on the same worker node. @@ -148,13 +148,13 @@ In this example, CockroachDB has not yet been deployed to a running Kubernetes c node/gke-cockroachdb-default-pool-ee4d4d67-w18b labeled ~~~ - In this example, 6 GKE nodes are deployed in 3 [node pools](https://cloud.google.com/kubernetes-engine/docs/concepts/node-pools), and each node pool resides in a separate availability zone. To maintain an even distribution of CockroachDB pods as specified in our [topology recommendations](recommended-production-settings.html#topology), each of the 3 labeled worker nodes must belong to a different node pool. + In this example, 6 GKE nodes are deployed in 3 [node pools](https://cloud.google.com/kubernetes-engine/docs/concepts/node-pools), and each node pool resides in a separate availability zone. To maintain an even distribution of CockroachDB pods as specified in our [topology recommendations]({% link {{ page.version.version }}/recommended-production-settings.md %}#topology), each of the 3 labeled worker nodes must belong to a different node pool. {{site.data.alerts.callout_info}} This also ensures that the CockroachDB pods, which will be bound to persistent volumes in the same three availability zones, can be scheduled onto worker nodes in their respective zones. {{site.data.alerts.end}} -1. Add the following rules to the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): +1. Add the following rules to the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: @@ -215,7 +215,7 @@ For an example, see [Evicting CockroachDB from a running worker node](#example-e ### Add a toleration -Specify pod tolerations in the `cockroachdb.crdbCluster.tolerations` object in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). +Specify pod tolerations in the `cockroachdb.crdbCluster.tolerations` object in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). The following toleration matches a taint with the specified key, value, and `NoSchedule` effect, using the `Equal` operator. A toleration that uses the `Equal` operator must include a `value` field: @@ -229,7 +229,7 @@ cockroachdb: effect: "NoSchedule" ~~~ -A `NoSchedule` taint on a node prevents pods from being scheduled onto the node. The matching toleration allows a pod to be scheduled onto the node. A `NoSchedule` toleration is therefore best included before [deploying the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). +A `NoSchedule` taint on a node prevents pods from being scheduled onto the node. The matching toleration allows a pod to be scheduled onto the node. A `NoSchedule` toleration is therefore best included before [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). {{site.data.alerts.callout_info}} A `PreferNoSchedule` taint discourages, but does not disallow, pods from being scheduled onto the node. @@ -281,7 +281,7 @@ In this example, CockroachDB has already been deployed on a Kubernetes cluster. node/gke-cockroachdb-default-pool-4e5ce539-j1h1 tainted ~~~ -1. Add a matching tolerations object in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). +1. Add a matching tolerations object in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). ~~~ yaml cockroachdb: @@ -323,7 +323,7 @@ A pod with a *topology spread constraint* must satisfy its conditions when being ### Add a topology spread constraint -Specify pod topology spread constraints in the `cockroachdb.crdbCluster.topologySpreadConstraints` object of the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). If you specify multiple `topologySpreadConstraints` objects, the matching pods must satisfy all of the constraints. +Specify pod topology spread constraints in the `cockroachdb.crdbCluster.topologySpreadConstraints` object of the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). If you specify multiple `topologySpreadConstraints` objects, the matching pods must satisfy all of the constraints. The following topology spread constraint ensures that CockroachDB pods deployed with the label `environment=production` will not be unevenly distributed across zones by more than `1` pod: @@ -347,7 +347,7 @@ For more context on how these rules work, see the [Kubernetes documentation](htt To assist in working with your cluster, you can add labels and annotations to your resources. -Specify labels in `cockroachdb.crdbCluster.podLabels` and annotations in `cockroachdb.crdbCluster.podAnnotations` in the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): +Specify labels in `cockroachdb.crdbCluster.podLabels` and annotations in `cockroachdb.crdbCluster.podAnnotations` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: diff --git a/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md index df196b505aa..9d17572f55c 100644 --- a/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md @@ -20,7 +20,7 @@ You may need to rotate the node, client, or CA certificates in the following sce ### Example: Rotate certificates signed with `cockroach cert` -If you previously [authenticated with cockroach cert](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster), follow these steps to rotate the certificates using the same CA: +If you previously [authenticated with cockroach cert]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster), follow these steps to rotate the certificates using the same CA: 1. Create a new client certificate and key pair for the root user, overwriting the previous certificate and key: @@ -45,7 +45,7 @@ If you previously [authenticated with cockroach cert](deploy-cockroachdb-with-ku secret/cockroachdb.client.root.2 created ~~~ -1. Create a new certificate and key pair for your CockroachDB nodes, overwriting the previous certificate and key. Specify the namespace you used when [deploying the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). This example uses the `cockroach-ns` namespace: +1. Create a new certificate and key pair for your CockroachDB nodes, overwriting the previous certificate and key. Specify the namespace you used when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). This example uses the `cockroach-ns` namespace: {% include_cached copy-clipboard.html %} ~~~ shell @@ -75,7 +75,7 @@ If you previously [authenticated with cockroach cert](deploy-cockroachdb-with-ku secret/cockroachdb.node.2 created ~~~ -1. Add `cockroachdb.tls.externalCertificates.certificates.nodeClientSecretName` and `cockroachdb.tls.externalCertificates.certificates.nodeSecretName` to the values file used to [deploy the cluster](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster): +1. Add `cockroachdb.tls.externalCertificates.certificates.nodeClientSecretName` and `cockroachdb.tls.externalCertificates.certificates.nodeSecretName` to the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: @@ -101,7 +101,7 @@ If you previously [authenticated with cockroach cert](deploy-cockroachdb-with-ku ~~~ {{site.data.alerts.callout_info}} - Remember that `nodeSecretName` and `nodeClientSecretName` in the operator configuration must specify these secret names. For details, see the [deployment guide](deploy-cockroachdb-with-kubernetes-operator.html#initialize-the-cluster). + Remember that `nodeSecretName` and `nodeClientSecretName` in the operator configuration must specify these secret names. For details, see the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). {{site.data.alerts.end}} 1. Apply the new settings to the cluster: diff --git a/src/current/v25.2/simulate-a-multi-region-cluster-on-localhost.md b/src/current/v25.2/simulate-a-multi-region-cluster-on-localhost.md index 3ddca542d37..59450587dba 100644 --- a/src/current/v25.2/simulate-a-multi-region-cluster-on-localhost.md +++ b/src/current/v25.2/simulate-a-multi-region-cluster-on-localhost.md @@ -8,7 +8,7 @@ docs_area: deploy Once you've [installed CockroachDB]({% link {{ page.version.version }}/install-cockroachdb.md %}), you can simulate multi-region cluster on your local machine using [`cockroach demo`]({% link {{ page.version.version }}/cockroach-demo.md %})to learn about CockroachDB's [multi-region abstractions]({% link {{ page.version.version }}/multiregion-overview.md %}). {{site.data.alerts.callout_info}} -[`cockroach demo`]({% link {{ page.version.version }}/cockroach-demo.md %}) is not suitable for production deployments. Additionally, simulating multiple geographically distributed nodes on a single host is not representative of the [performance you should expect]({% link {{ page.version.version }}/frequently-asked-questions.md %}#single-row-perf) in a production deployment. To learn more about production multi-region deployments, refer to [Orchestrate CockroachDB Across Multiple Kubernetes Clusters]({% link {{ page.version.version }}/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md %}) and [Deploy a Global, Serverless Application]({% link {{ page.version.version }}/movr-flask-deployment.md %}), and review the [Production Checklist](recommended-production-settings.html). +[`cockroach demo`]({% link {{ page.version.version }}/cockroach-demo.md %}) is not suitable for production deployments. Additionally, simulating multiple geographically distributed nodes on a single host is not representative of the [performance you should expect]({% link {{ page.version.version }}/frequently-asked-questions.md %}#single-row-perf) in a production deployment. To learn more about production multi-region deployments, refer to [Orchestrate CockroachDB Across Multiple Kubernetes Clusters]({% link {{ page.version.version }}/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md %}) and [Deploy a Global, Serverless Application]({% link {{ page.version.version }}/movr-flask-deployment.md %}), and review the [Production Checklist]({% link {{ page.version.version }}/recommended-production-settings.md %}). {{site.data.alerts.end}} ## Before you begin diff --git a/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md b/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md index ab0a63cb831..0f4272e5d16 100644 --- a/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md @@ -7,7 +7,7 @@ secure: true docs_area: deploy --- -This page describes how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster](deploy-cockroachdb-with-kubernetes-operator.html) with the {{ site.data.products.cockroachdb-operator }}. +This page describes how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}) with the {{ site.data.products.cockroachdb-operator }}. {{site.data.alerts.callout_info}} The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). @@ -60,7 +60,7 @@ To upgrade from one patch release to another within the same major version, perf -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.containers[0].image}{"\n"}' ~~~ -You can also check the CockroachDB version of each node in the [DB Console](ui-cluster-overview-page.html#node-details). +You can also check the CockroachDB version of each node in the [DB Console]({% link {{ page.version.version }}/ui-cluster-overview-page.md %}#node-details). ### Roll back a patch upgrade From 446c61394ab8f1ddab8e4a6fee26ede09a98bce0 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Tue, 5 Aug 2025 11:38:09 -0400 Subject: [PATCH 13/27] Add ingress configuration --- ...nfigure-cockroachdb-kubernetes-operator.md | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md index 03aba85264e..4335082f1fa 100644 --- a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md @@ -137,3 +137,42 @@ helm upgrade --reuse-values $CRDBCLUSTER ./cockroachdb-parent/charts/cockroachdb ~~~ The {{ site.data.products.cockroachdb-operator }} updates all nodes and triggers a rolling restart of the pods with the new port settings. + +## Ingress + +You can configure an [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) object to expose an internal HTTP or SQL [`ClusterIP` service](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) through a hostname. + +In order to use the Ingress resource, your cluster must be running an [Ingress controller](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/) for load balancing. This is **not** handled by the {{ site.data.products.cockroachdb-operator }} and must be deployed separately. + +Specify Ingress objects in `cockroachdb.crdbCluster.ingress`. Set `ingress.enabled` to `true` and specify `ingress.ui` (HTTP) or `ingress.sql` (SQL) in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster)): + +~~~ yaml +cockroachdb: + crdbCluster: + ingress: + enabled: true + ui: + ingressClassName: nginx + annotations: + key: value + host: ui.example.com + sql: + ingressClassName: nginx + annotations: + key: value + host: sql.example.com +~~~ + +- `ingressClassName` specifies the [`IngressClass`](https://kubernetes.io/docs/concepts/services-networking/ingress/#ingress-class) of the Ingress controller. This example uses the [nginx](https://kubernetes.github.io/ingress-nginx/) controller. + +- The `host` must be made publicly accessible. For example, create a route in [Amazon Route 53](https://aws.amazon.com/route53/), or add an entry to `/etc/hosts` that maps the IP address of the Ingress controller to the hostname. + + {{site.data.alerts.callout_info}} + Multiple hosts can be mapped to the same Ingress controller IP. + {{site.data.alerts.end}} + +- TCP connections for SQL clients must be enabled for the Ingress controller. For an example, see the [nginx documentation](https://kubernetes.github.io/ingress-nginx/user-guide/exposing-tcp-udp-services/). + + {{site.data.alerts.callout_info}} + Changing the SQL Ingress `host` on a running deployment will cause a rolling restart of the cluster, due to new node certificates being generated for the SQL host. + {{site.data.alerts.end}} From afcf3f6947492ebdca2cbc8c871f0252616576c9 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Tue, 5 Aug 2025 13:38:29 -0400 Subject: [PATCH 14/27] Adjust startFlags example --- src/current/v25.2/override-templates-kubernetes-operator.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/current/v25.2/override-templates-kubernetes-operator.md b/src/current/v25.2/override-templates-kubernetes-operator.md index 9aac64f4042..18f7e97a1b6 100644 --- a/src/current/v25.2/override-templates-kubernetes-operator.md +++ b/src/current/v25.2/override-templates-kubernetes-operator.md @@ -86,8 +86,7 @@ cockroachdb: # upsert captures a set of flags that are given higher precedence in the start command. upsert: - "--cache=30%" - - "--max-sql-memory=35%" # omit defines a set of flags which will be omitted from the start command. omit: - - "" + - "--max-sql-memory" ~~~ From 2369f976f24e849c963acc16ab4a76d4ecd46894 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Wed, 6 Aug 2025 10:34:29 -0400 Subject: [PATCH 15/27] Eng comments --- ...nfigure-cockroachdb-kubernetes-operator.md | 27 ++++++++++--------- .../override-templates-kubernetes-operator.md | 2 +- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md index 4335082f1fa..b93a05dafc3 100644 --- a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md @@ -144,23 +144,24 @@ You can configure an [Ingress](https://kubernetes.io/docs/concepts/services-netw In order to use the Ingress resource, your cluster must be running an [Ingress controller](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/) for load balancing. This is **not** handled by the {{ site.data.products.cockroachdb-operator }} and must be deployed separately. -Specify Ingress objects in `cockroachdb.crdbCluster.ingress`. Set `ingress.enabled` to `true` and specify `ingress.ui` (HTTP) or `ingress.sql` (SQL) in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster)): +Specify Ingress objects in `cockroachdb.crdbCluster.service.ingress`. Set `ingress.enabled` to `true` and specify `ingress.ui` (HTTP) or `ingress.sql` (SQL) in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster)): ~~~ yaml cockroachdb: crdbCluster: - ingress: - enabled: true - ui: - ingressClassName: nginx - annotations: - key: value - host: ui.example.com - sql: - ingressClassName: nginx - annotations: - key: value - host: sql.example.com + service: + ingress: + enabled: true + ui: + ingressClassName: nginx + annotations: + key: value + host: ui.example.com + sql: + ingressClassName: nginx + annotations: + key: value + host: sql.example.com ~~~ - `ingressClassName` specifies the [`IngressClass`](https://kubernetes.io/docs/concepts/services-networking/ingress/#ingress-class) of the Ingress controller. This example uses the [nginx](https://kubernetes.github.io/ingress-nginx/) controller. diff --git a/src/current/v25.2/override-templates-kubernetes-operator.md b/src/current/v25.2/override-templates-kubernetes-operator.md index 18f7e97a1b6..02de07fadf5 100644 --- a/src/current/v25.2/override-templates-kubernetes-operator.md +++ b/src/current/v25.2/override-templates-kubernetes-operator.md @@ -82,7 +82,7 @@ Within this field, you can specify flags to upsert and flags to omit: ~~~ yaml cockroachdb: crdbCluster: - startFlags: {} + startFlags: # upsert captures a set of flags that are given higher precedence in the start command. upsert: - "--cache=30%" From 7106f3962daf871f23971ba7fa0c9d0d27f56e20 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Wed, 6 Aug 2025 15:37:41 -0400 Subject: [PATCH 16/27] Michael comments --- .../upgrade/disable-auto-finalization.md | 2 +- .../cockroachdb-operator-recommendation.md | 4 +-- .../orchestration/kubernetes-limitations.md | 2 +- .../sidebar-data/self-hosted-deployments.json | 20 +++++------ .../cockroachdb-operator-recommendation.md | 4 +-- ...ew.md => cockroachdb-operator-overview.md} | 26 +++++++------- ...md => cockroachdb-operator-performance.md} | 4 +-- .../v25.2/configure-cockroachdb-kubernetes.md | 2 +- ...r.md => configure-cockroachdb-operator.md} | 12 +++---- ...-cockroachdb-with-cockroachdb-operator.md} | 34 +++++++++++-------- src/current/v25.2/kubernetes-performance.md | 2 +- .../migrate-cockroachdb-kubernetes-helm.md | 2 +- ...migrate-cockroachdb-kubernetes-operator.md | 2 +- .../v25.2/monitor-cockroachdb-kubernetes.md | 2 +- ...tor.md => monitor-cockroachdb-operator.md} | 6 ++-- src/current/v25.2/node-shutdown.md | 2 +- ...estrate-a-local-cluster-with-kubernetes.md | 2 +- ...verride-templates-cockroachdb-operator.md} | 0 .../v25.2/scale-cockroachdb-kubernetes.md | 2 +- ...rator.md => scale-cockroachdb-operator.md} | 8 ++--- .../v25.2/schedule-cockroachdb-kubernetes.md | 2 +- ...or.md => schedule-cockroachdb-operator.md} | 18 +++++----- .../v25.2/secure-cockroachdb-kubernetes.md | 2 +- ...ator.md => secure-cockroachdb-operator.md} | 8 ++--- .../v25.2/upgrade-cockroachdb-kubernetes.md | 2 +- ...tor.md => upgrade-cockroachdb-operator.md} | 2 +- 26 files changed, 88 insertions(+), 84 deletions(-) rename src/current/v25.2/{kubernetes-operator-overview.md => cockroachdb-operator-overview.md} (76%) rename src/current/v25.2/{kubernetes-operator-performance.md => cockroachdb-operator-performance.md} (99%) rename src/current/v25.2/{configure-cockroachdb-kubernetes-operator.md => configure-cockroachdb-operator.md} (94%) rename src/current/v25.2/{deploy-cockroachdb-with-kubernetes-operator.md => deploy-cockroachdb-with-cockroachdb-operator.md} (94%) rename src/current/v25.2/{monitor-cockroachdb-kubernetes-operator.md => monitor-cockroachdb-operator.md} (98%) rename src/current/v25.2/{override-templates-kubernetes-operator.md => override-templates-cockroachdb-operator.md} (100%) rename src/current/v25.2/{scale-cockroachdb-kubernetes-operator.md => scale-cockroachdb-operator.md} (80%) rename src/current/v25.2/{schedule-cockroachdb-kubernetes-operator.md => schedule-cockroachdb-operator.md} (93%) rename src/current/v25.2/{secure-cockroachdb-kubernetes-operator.md => secure-cockroachdb-operator.md} (94%) rename src/current/v25.2/{upgrade-cockroachdb-kubernetes-operator.md => upgrade-cockroachdb-operator.md} (97%) diff --git a/src/current/_includes/common/upgrade/disable-auto-finalization.md b/src/current/_includes/common/upgrade/disable-auto-finalization.md index d3abfb90372..e6e500db2d6 100644 --- a/src/current/_includes/common/upgrade/disable-auto-finalization.md +++ b/src/current/_includes/common/upgrade/disable-auto-finalization.md @@ -62,7 +62,7 @@ Now, to complete a major-version upgrade, you must manually [finalize it](#final
-{% else if page.name == 'upgrade-cockroachdb-kubernetes-operator.md' %} +{% else if page.name == 'upgrade-cockroachdb-operator.md' %} By default, auto-finalization is enabled, and a major-version upgrade is finalized when all nodes have rejoined the cluster using the new `cockroach` binary. This means that by default, a major-version upgrade cannot be rolled back. Instead, you must [restore the cluster to the previous version]({% link {{ page.version.version }}/restoring-backups-across-versions.md %}#support-for-restoring-backups-into-a-newer-version). diff --git a/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md b/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md index 36d7095395b..823a3696b55 100644 --- a/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md +++ b/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md @@ -1,12 +1,12 @@ {% if page.name == "kubernetes-operator.md" %} {{ site.data.alerts.callout_success }} -The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link v25.2/kubernetes-operator-overview.md %}). +The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of both single-region and multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link v25.2/cockroachdb-operator-overview.md %}). New deployments of CockroachDB on Kubernetes are recommended to use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link v25.2/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link v25.2/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. {{ site.data.alerts.end }} {% else %} {{ site.data.alerts.callout_success }} -The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link {{ page.version.version }}/kubernetes-operator-overview.md %}). +The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link {{ page.version.version }}/cockroachdb-operator-overview.md %}). New deployments of CockroachDB on Kubernetes are recommended to use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. {{ site.data.alerts.end }} diff --git a/src/current/_includes/v25.2/orchestration/kubernetes-limitations.md b/src/current/_includes/v25.2/orchestration/kubernetes-limitations.md index 7f032b6151e..0144b1cb4ed 100644 --- a/src/current/_includes/v25.2/orchestration/kubernetes-limitations.md +++ b/src/current/_includes/v25.2/orchestration/kubernetes-limitations.md @@ -4,7 +4,7 @@ To deploy CockroachDB {{page.version.version}}, Kubernetes 1.18 or higher is req #### {{ site.data.products.public-operator }} -- The {{ site.data.products.public-operator }} deploys clusters in a single region. For multi-region deployments using manual configs, Cockroach Labs recommends using the [{{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/kubernetes-operator-overview.md %}) which is designed to support multi-region deployments. For guidance on how to force multi-region support with the {{ site.data.products.public-operator }}, see [Orchestrate CockroachDB Across Multiple Kubernetes Clusters]({% link {{ page.version.version }}/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md %}). +- The {{ site.data.products.public-operator }} deploys clusters in a single region. For multi-region deployments using manual configs, Cockroach Labs recommends using the [{{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/cockroachdb-operator-overview.md %}) which is designed to support multi-region deployments. For guidance on how to force multi-region support with the {{ site.data.products.public-operator }}, see [Orchestrate CockroachDB Across Multiple Kubernetes Clusters]({% link {{ page.version.version }}/orchestrate-cockroachdb-with-kubernetes-multi-cluster.md %}). - Using the {{ site.data.products.public-operator }}, you can give a new cluster an arbitrary number of [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/). However, a cluster's labels cannot be modified after it is deployed. To track the status of this limitation, refer to [#993](https://github.com/cockroachdb/cockroach-operator/issues/993) in the {{ site.data.products.public-operator }} project's issue tracker. diff --git a/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json b/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json index 51d98ae0f17..6f152167c08 100644 --- a/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json +++ b/src/current/_includes/v25.2/sidebar-data/self-hosted-deployments.json @@ -176,13 +176,13 @@ { "title": "Overview", "urls": [ - "/${VERSION}/kubernetes-operator-overview.html" + "/${VERSION}/cockroachdb-operator-overview.html" ] }, { "title": "CockroachDB Operator Deployment Guide", "urls": [ - "/${VERSION}/deploy-cockroachdb-with-kubernetes-operator.html" + "/${VERSION}/deploy-cockroachdb-with-cockroachdb-operator.html" ] }, { @@ -208,49 +208,49 @@ { "title": "Pod Scheduling", "urls": [ - "/${VERSION}/schedule-cockroachdb-kubernetes-operator.html" + "/${VERSION}/schedule-cockroachdb-operator.html" ] }, { "title": "Resource Management", "urls": [ - "/${VERSION}/configure-cockroachdb-kubernetes-operator.html" + "/${VERSION}/configure-cockroachdb-operator.html" ] }, { "title": "Certificate Management", "urls": [ - "/${VERSION}/secure-cockroachdb-kubernetes-operator.html" + "/${VERSION}/secure-cockroachdb-operator.html" ] }, { "title": "Cluster Scaling", "urls": [ - "/${VERSION}/scale-cockroachdb-kubernetes-operator.html" + "/${VERSION}/scale-cockroachdb-operator.html" ] }, { "title": "Cluster Monitoring", "urls": [ - "/${VERSION}/monitor-cockroachdb-kubernetes-operator.html" + "/${VERSION}/monitor-cockroachdb-operator.html" ] }, { "title": "Cluster Upgrades", "urls": [ - "/${VERSION}/upgrade-cockroachdb-kubernetes-operator.html" + "/${VERSION}/upgrade-cockroachdb-operator.html" ] }, { "title": "Override Templates", "urls": [ - "/${VERSION}/override-templates-kubernetes-operator.html" + "/${VERSION}/override-templates-cockroachdb-operator.html" ] }, { "title": "Kubernetes Performance", "urls": [ - "/${VERSION}/kubernetes-operator-performance.html" + "/${VERSION}/cockroachdb-operator-performance.html" ] } ] diff --git a/src/current/_includes/v25.3/cockroachdb-operator-recommendation.md b/src/current/_includes/v25.3/cockroachdb-operator-recommendation.md index 36d7095395b..59ec152a77a 100644 --- a/src/current/_includes/v25.3/cockroachdb-operator-recommendation.md +++ b/src/current/_includes/v25.3/cockroachdb-operator-recommendation.md @@ -1,12 +1,12 @@ {% if page.name == "kubernetes-operator.md" %} {{ site.data.alerts.callout_success }} -The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link v25.2/kubernetes-operator-overview.md %}). +The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link v25.2/cockroachdb-operator-overview.md %}). New deployments of CockroachDB on Kubernetes are recommended to use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link v25.2/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link v25.2/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. {{ site.data.alerts.end }} {% else %} {{ site.data.alerts.callout_success }} -The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link {{ page.version.version }}/kubernetes-operator-overview.md %}). +The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link {{ page.version.version }}/cockroachdb-operator-overview.md %}). New deployments of CockroachDB on Kubernetes are recommended to use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. {{ site.data.alerts.end }} diff --git a/src/current/v25.2/kubernetes-operator-overview.md b/src/current/v25.2/cockroachdb-operator-overview.md similarity index 76% rename from src/current/v25.2/kubernetes-operator-overview.md rename to src/current/v25.2/cockroachdb-operator-overview.md index 3e5baf0af18..f00f70e7d98 100644 --- a/src/current/v25.2/kubernetes-operator-overview.md +++ b/src/current/v25.2/cockroachdb-operator-overview.md @@ -22,18 +22,18 @@ With the {{ site.data.products.cockroachdb-operator }}, you can deploy Cockroach This section describes how to: -- [Deploy a CockroachDB cluster using the {{ site.data.products.cockroachdb-operator }}]({% link {{page.version.version}}/deploy-cockroachdb-with-kubernetes-operator.md %}). +- [Deploy a CockroachDB cluster using the {{ site.data.products.cockroachdb-operator }}]({% link {{page.version.version}}/deploy-cockroachdb-with-cockroachdb-operator.md %}). - Migrate from an existing CockroachDB Kubernetes deployment using [Helm]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-helm.md %}) or the [{{ site.data.products.public-operator }}]({% link {{page.version.version}}/migrate-cockroachdb-kubernetes-operator.md %}). - Operate a CockroachDB cluster: - - [Manage pod scheduling]({% link {{page.version.version}}/schedule-cockroachdb-kubernetes-operator.md %}). - - [Manage cluster resources]({% link {{page.version.version}}/configure-cockroachdb-kubernetes-operator.md %}). - - [Manage certificates]({% link {{page.version.version}}/secure-cockroachdb-kubernetes-operator.md %}). - - [Scale a cluster]({% link {{page.version.version}}/scale-cockroachdb-kubernetes-operator.md %}). - - [Monitor a cluster]({% link {{page.version.version}}/monitor-cockroachdb-kubernetes-operator.md %}). - - [Upgrade a cluster]({% link {{page.version.version}}/upgrade-cockroachdb-kubernetes-operator.md %}). - - [Override deployment templates]({% link {{page.version.version}}/override-templates-kubernetes-operator.md %}). - - [Improve cluster performance]({% link {{page.version.version}}/kubernetes-operator-performance.md %}). + - [Manage pod scheduling]({% link {{page.version.version}}/schedule-cockroachdb-operator.md %}). + - [Manage cluster resources]({% link {{page.version.version}}/configure-cockroachdb-operator.md %}). + - [Manage certificates]({% link {{page.version.version}}/secure-cockroachdb-operator.md %}). + - [Scale a cluster]({% link {{page.version.version}}/scale-cockroachdb-operator.md %}). + - [Monitor a cluster]({% link {{page.version.version}}/monitor-cockroachdb-operator.md %}). + - [Upgrade a cluster]({% link {{page.version.version}}/upgrade-cockroachdb-operator.md %}). + - [Override deployment templates]({% link {{page.version.version}}/override-templates-cockroachdb-operator.md %}). + - [Improve cluster performance]({% link {{page.version.version}}/cockroachdb-operator-performance.md %}). ## Kubernetes terminology @@ -41,8 +41,8 @@ Before starting, review some basic Kubernetes terminology. Note that CockroachDB Feature | Description --------|------------ -[node](https://kubernetes.io/docs/concepts/architecture/nodes/) | A physical or virtual machine. In the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}), you'll create instances and join them as worker nodes into a single Kubernetes cluster. -[pod](http://kubernetes.io/docs/user-guide/pods/) | A pod is a group of one of more Docker containers. In the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}), each pod will run on a separate Kubernetes worker node and include one Docker container running a single CockroachDB node, reflecting our [topology recommendations]({% link {{ page.version.version }}/recommended-production-settings.md %}#topology). +[node](https://kubernetes.io/docs/concepts/architecture/nodes/) | A physical or virtual machine. In the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}), you'll create instances and join them as worker nodes into a single Kubernetes cluster. +[pod](http://kubernetes.io/docs/user-guide/pods/) | A pod is a group of one of more Docker containers. In the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}), each pod will run on a separate Kubernetes worker node and include one Docker container running a single CockroachDB node, reflecting our [topology recommendations]({% link {{ page.version.version }}/recommended-production-settings.md %}#topology). [operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) | An operator is an extension to Kubernetes that uses custom resources to efficiently manage specific applications. The {{ site.data.products.cockroachdb-operator }} includes two custom resource definitions, `cockroachdb` to manage a CockroachDB pod and `operator` to manage the operator pod itself. Unlike the older [{{ site.data.products.public-operator }}](https://github.com/cockroachdb/cockroach-operator), the {{ site.data.products.cockroachdb-operator }} does not use StatefulSets and is designed to simplify multi-region deployments. -[persistent volume](http://kubernetes.io/docs/user-guide/persistent-volumes/) | A persistent volume is a piece of networked storage (Persistent Disk on GCE, Elastic Block Store on AWS) mounted into a pod. The lifetime of a persistent volume is decoupled from the lifetime of the pod that's using it, ensuring that each CockroachDB node binds back to the same storage on restart.

The [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}) assumes that dynamic volume provisioning is available. When that is not the case, [persistent volume claims](http://kubernetes.io/docs/user-guide/persistent-volumes/#persistentvolumeclaims) need to be created manually. -[RBAC](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) | RBAC, or Role-Based Access Control, is the system Kubernetes uses to manage permissions within the cluster. In order to take an action (e.g., `get` or `create`) on an API resource (e.g., a `pod`), the client must have a `Role` that allows it to do so. The [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}) creates the RBAC resources necessary for CockroachDB to create and access certificates. +[persistent volume](http://kubernetes.io/docs/user-guide/persistent-volumes/) | A persistent volume is a piece of networked storage (Persistent Disk on GCE, Elastic Block Store on AWS) mounted into a pod. The lifetime of a persistent volume is decoupled from the lifetime of the pod that's using it, ensuring that each CockroachDB node binds back to the same storage on restart.

The [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}) assumes that dynamic volume provisioning is available. When that is not the case, [persistent volume claims](http://kubernetes.io/docs/user-guide/persistent-volumes/#persistentvolumeclaims) need to be created manually. +[RBAC](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) | RBAC, or Role-Based Access Control, is the system Kubernetes uses to manage permissions within the cluster. In order to take an action (e.g., `get` or `create`) on an API resource (e.g., a `pod`), the client must have a `Role` that allows it to do so. The [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}) creates the RBAC resources necessary for CockroachDB to create and access certificates. diff --git a/src/current/v25.2/kubernetes-operator-performance.md b/src/current/v25.2/cockroachdb-operator-performance.md similarity index 99% rename from src/current/v25.2/kubernetes-operator-performance.md rename to src/current/v25.2/cockroachdb-operator-performance.md index 2b60972da7d..98d6abb9077 100644 --- a/src/current/v25.2/kubernetes-operator-performance.md +++ b/src/current/v25.2/cockroachdb-operator-performance.md @@ -17,7 +17,7 @@ Before you focus on optimizing a Kubernetes-orchestrated CockroachDB cluster: 1. Before deploying on Kubernetes, ensure that performance is optimized for your workload on identical hardware. You may find that you first need to [modify your workload]({% link {{ page.version.version }}/performance-best-practices-overview.md %}) or use [different machine specs]({% link {{ page.version.version }}/recommended-production-settings.md %}#hardware) to achieve the performance you need. -1. Read the documentation for [deploying CockroachDB on a Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster) to familiarize yourself with the necessary Kubernetes terminology and deployment abstractions. +1. Read the documentation for [deploying CockroachDB on a Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster) to familiarize yourself with the necessary Kubernetes terminology and deployment abstractions. ## Performance factors @@ -305,4 +305,4 @@ If you aren't using a hosted Kubernetes service, you'll need to choose a [networ If your Kubernetes cluster uses heterogeneous hardware, you will likely want to ensure that CockroachDB only runs on specific machines. To optimize performance, it can be beneficial to dedicate those machines exclusively to CockroachDB. -For more information, refer to [Pod scheduling]({% link {{ page.version.version }}/schedule-cockroachdb-kubernetes-operator.md %}). +For more information, refer to [Pod scheduling]({% link {{ page.version.version }}/schedule-cockroachdb-operator.md %}). diff --git a/src/current/v25.2/configure-cockroachdb-kubernetes.md b/src/current/v25.2/configure-cockroachdb-kubernetes.md index 3528fcca91f..00cd0065fc1 100644 --- a/src/current/v25.2/configure-cockroachdb-kubernetes.md +++ b/src/current/v25.2/configure-cockroachdb-kubernetes.md @@ -11,7 +11,7 @@ docs_area: deploy This page explains how to configure Kubernetes cluster resources such as memory, CPU, and storage. -This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Resource management with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/configure-cockroachdb-kubernetes-operator.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Resource management with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/configure-cockroachdb-operator.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} diff --git a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/configure-cockroachdb-operator.md similarity index 94% rename from src/current/v25.2/configure-cockroachdb-kubernetes-operator.md rename to src/current/v25.2/configure-cockroachdb-operator.md index b93a05dafc3..c09063e26c5 100644 --- a/src/current/v25.2/configure-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/configure-cockroachdb-operator.md @@ -27,7 +27,7 @@ You can set the CPU and memory resources allocated to the CockroachDB container 1 CPU in Kubernetes is equivalent to 1 vCPU or 1 hyperthread. For best practices on provisioning CPU and memory for CockroachDB, refer to the [Production Checklist]({% link {{ page.version.version }}/recommended-production-settings.md %}#hardware). {{site.data.alerts.end}} -Specify CPU and memory values in `cockroachdb.crdbCluster.resources.limits` and `cockroachdb.crdbCluster.resources.requests` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): +Specify CPU and memory values in `cockroachdb.crdbCluster.resources.limits` and `cockroachdb.crdbCluster.resources.requests` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: @@ -66,7 +66,7 @@ The {{ site.data.products.cockroachdb-operator }} dynamically sets cache size an When you start your cluster, Kubernetes dynamically provisions and mounts a persistent volume into each pod. For more information on persistent volumes, see the [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/persistent-volumes/). -The storage capacity of each volume is set in `cockroachdb.crdbCluster.dataStore.volumeClaimTemplate.spec.resources` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): +The storage capacity of each volume is set in `cockroachdb.crdbCluster.dataStore.volumeClaimTemplate.spec.resources` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: @@ -83,9 +83,9 @@ You should provision an appropriate amount of disk storage for your workload. Fo ### Expand disk size -If you discover that you need more capacity, you can expand the persistent volumes on a running cluster. Increasing disk size is often [beneficial for CockroachDB performance]({% link {{ page.version.version }}/kubernetes-operator-performance.md %}). +If you discover that you need more capacity, you can expand the persistent volumes on a running cluster. Increasing disk size is often [beneficial for CockroachDB performance]({% link {{ page.version.version }}/cockroachdb-operator-performance.md %}). -Specify a new volume size in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): +Specify a new volume size in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: @@ -119,7 +119,7 @@ The {{ site.data.products.cockroachdb-operator }} separates network traffic into | HTTP | 8080 | Used to access the DB Console | service.ports.http | | SQL | 26257 | Used for SQL shell access | service.ports.sql | -Specify alternate port numbers in `cockroachdb.crdbCluster.service.ports` of the {{ site.data.products.cockroachdb-operator }}'s [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): +Specify alternate port numbers in `cockroachdb.crdbCluster.service.ports` of the {{ site.data.products.cockroachdb-operator }}'s [custom resource]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster) (for example, to match the default port `5432` on PostgreSQL): ~~~ yaml cockroachdb: @@ -144,7 +144,7 @@ You can configure an [Ingress](https://kubernetes.io/docs/concepts/services-netw In order to use the Ingress resource, your cluster must be running an [Ingress controller](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/) for load balancing. This is **not** handled by the {{ site.data.products.cockroachdb-operator }} and must be deployed separately. -Specify Ingress objects in `cockroachdb.crdbCluster.service.ingress`. Set `ingress.enabled` to `true` and specify `ingress.ui` (HTTP) or `ingress.sql` (SQL) in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster)): +Specify Ingress objects in `cockroachdb.crdbCluster.service.ingress`. Set `ingress.enabled` to `true` and specify `ingress.ui` (HTTP) or `ingress.sql` (SQL) in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster)): ~~~ yaml cockroachdb: diff --git a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md b/src/current/v25.2/deploy-cockroachdb-with-cockroachdb-operator.md similarity index 94% rename from src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md rename to src/current/v25.2/deploy-cockroachdb-with-cockroachdb-operator.md index 7121b1c5a7a..47e0f4077ba 100644 --- a/src/current/v25.2/deploy-cockroachdb-with-kubernetes-operator.md +++ b/src/current/v25.2/deploy-cockroachdb-with-cockroachdb-operator.md @@ -27,7 +27,7 @@ The CockroachDB Helm chart requires Helm 3.0 or higher. If you attempt to use an Error: UPGRADE FAILED: template: cockroachdb/templates/tests/client.yaml:6:14: executing "cockroachdb/templates/tests/client.yaml" at <.Values.networkPolicy.enabled>: nil pointer evaluating interface {}.enabled ~~~ -The Helm chart consists of two sub-charts: +There are two Helm charts that must be deployed: - `operator`: The {{ site.data.products.cockroachdb-operator }} chart to be installed first. - `cockroachdb`: The CockroachDB application chart to be installed after the operator is ready. @@ -236,7 +236,7 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st 1. Open `cockroachdb-parent/charts/cockroachdb/values.yaml`, a values file that tells Helm how to configure the Kubernetes cluster, in your text editor. -1. Modify the `cockroachdb.crdbCluster.regions` section to describe the number of nodes to deploy and what region(s) to deploy them in. Replace the default `cloudProvider` with the appropriate value (`gcp`, `aws`, `azure`). For bare metal deployments, you can remove the `cloudProvider` field. The following example initializes three nodes on Google Cloud in the `us-central1` region: +1. Modify the `cockroachdb.crdbCluster.regions` section to describe the number of CockroachDB nodes to deploy and what region(s) to deploy them in. Replace the default `cloudProvider` with the appropriate value (`gcp`, `aws`, `azure`). For bare metal deployments, you can remove the `cloudProvider` field. The following example initializes three nodes on Google Cloud in the `us-central1` region: ~~~ yaml cockroachdb: @@ -248,11 +248,13 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st namespace: cockroach-ns ~~~ + {{site.data.alerts.callout_info}} If you intend to deploy CockroachDB nodes across multiple different regions, follow the additional steps described in [Deploy across multiple regions](#deploy-across-multiple-regions). + {{site.data.alerts.callout_end}} -1. Uncomment and modify `cockroachdb.crdbCluster.resources` in the values file with the CPU and memory requests and limits for each node to use. The default values are 4vCPU and 16GB of memory: +1. Uncomment and modify `cockroachdb.crdbCluster.resources` in the values file with the CPU and memory requests and limits for each node to use. The default values are 4vCPU and 16GiB of memory: - For more information on configuring node resource allocation, refer to [Resource management]({% link {{ page.version.version }}/configure-cockroachdb-kubernetes-operator.md %}) + For more information on configuring node resource allocation, refer to [Resource management]({% link {{ page.version.version }}/configure-cockroachdb-operator.md %}) 1. Modify the TLS configuration as desired. For a secure deployment, set `cockroachdb.tls.enabled` in the values file to `true`. You can either allow the operator to generate self-signed certificates, provide a custom CA certificate and generate other certificates, or use your own certificates. - **All self-signed certificates**: By default, the certificates are created automatically by a self-signer utility, which requires no configuration beyond setting a custom certificate duration if desired. This utility creates self-signed certificates for the nodes and root client which are stored in a secret. You can see these certificates by running `kubectl get secrets`: @@ -386,7 +388,7 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st 1. In `cockroachdb.crdbCluster.localityMappings`, provide [locality mappings](#localities) that define locality levels and map them to node labels where the locality information of each Kubernetes node is stored. When CockroachDB is initialized on a node, it processes these values as though they are provided through the [`cockroach start --locality`]({% link {{ page.version.version }}/cockroach-start.md %}#locality) flag. - The default configuration uses the `region` and `zone` locality labels, mapped implicitly to the [`topology.kubernetes.io/region`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesioregion) and [`topology.kubernetes.io/zone`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) node labels. + If `localityMappings` is not configured, by default the {{ site.data.products.cockroachdb-operator }} uses the `region` and `zone` locality labels, mapped implicitly to the [`topology.kubernetes.io/region`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesioregion) and [`topology.kubernetes.io/zone`](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) node labels. - In cloud provider deployments, the `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` values on a node are populated by the cloud provider. - In bare metal deployments, the `topology.kubernetes.io/region` and `topology.kubernetes.io/zone` node label values are not set implicitly by a cloud provider when initializing the node, so you must set them manually or configure custom locality labels. @@ -406,13 +408,13 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st localityLabel: "dc" ~~~ - In this example, if a Kubernetes node is initialized in the `us-central1` region, `us-central1-c` zone, and `dc2` datacenter, its `cockroach start --locality` flag would be equivalent to the following: + The list of `localityMappings` is processed in a top-down hierarchy, where each entry is processed as a lower locality level than the previous locality. In this example, if a Kubernetes node is initialized in the `us-central1` region, `us-central1-c` zone, and `dc2` datacenter, its `cockroach start --locality` flag would be equivalent to the following: ~~~ shell cockroach start --locality region=us-central1,zone=us-central1-c,dc=dc2 ~~~ - Optionally, review the `cockroachdb.crdbCluster.topologySpreadConstraints` configuration and set `topologyKey` to a locality variable that will have distinct values for each node. By default the lowest locality level is `zone`, so the following configuration sets that value as the `topologyKey`: + Optionally, review the `cockroachdb.crdbCluster.topologySpreadConstraints` configuration and set `topologyKey` to the `nodeLabel` value of a locality level that has distinct values for each node. By default the lowest locality level is `zone`, so the following configuration sets that value as the `topologyKey`: ~~~ yaml cockroachdb: @@ -421,6 +423,8 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st topologyKey: topology.kubernetes.io/zone ~~~ + For more information on localities and topology planning, see the [topology patterns documentation]({% link {{ page.version.version }}/topology-patterns.md %}). + 1. Modify other relevant parts of the configuration such as other `topologySpreadConstraints` fields, `service.ports`, and others as needed for your configuration. 1. Run the following command to install the CockroachDB chart using Helm: @@ -597,14 +601,14 @@ To access the cluster's [DB Console]({% link {{ page.version.version }}/ui-overv Read the following pages for detailed information on cluster scaling, certificate management, resource management, best practices, and other cluster operation details: -- [Pod scheduling]({% link {{ page.version.version }}/schedule-cockroachdb-kubernetes-operator.md %}) -- [Resource management]({% link {{ page.version.version }}/configure-cockroachdb-kubernetes-operator.md %}) -- [Certificate management]({% link {{ page.version.version }}/secure-cockroachdb-kubernetes-operator.md %}) -- [Cluster scaling]({% link {{ page.version.version }}/scale-cockroachdb-kubernetes-operator.md %}) -- [Cluster monitoring]({% link {{ page.version.version }}/monitor-cockroachdb-kubernetes-operator.md %}) -- [Upgrade a cluster]({% link {{ page.version.version }}/upgrade-cockroachdb-kubernetes-operator.md %}) -- [Override deployment templates]({% link {{ page.version.version }}/override-templates-kubernetes-operator.md %}) -- [CockroachDB performance on Kubernetes]({% link {{ page.version.version }}/kubernetes-operator-performance.md %}) +- [Pod scheduling]({% link {{ page.version.version }}/schedule-cockroachdb-operator.md %}) +- [Resource management]({% link {{ page.version.version }}/configure-cockroachdb-operator.md %}) +- [Certificate management]({% link {{ page.version.version }}/secure-cockroachdb-operator.md %}) +- [Cluster scaling]({% link {{ page.version.version }}/scale-cockroachdb-operator.md %}) +- [Cluster monitoring]({% link {{ page.version.version }}/monitor-cockroachdb-operator.md %}) +- [Upgrade a cluster]({% link {{ page.version.version }}/upgrade-cockroachdb-operator.md %}) +- [Override deployment templates]({% link {{ page.version.version }}/override-templates-cockroachdb-operator.md %}) +- [CockroachDB performance on Kubernetes]({% link {{ page.version.version }}/cockroachdb-operator-performance.md %}) ## Examples diff --git a/src/current/v25.2/kubernetes-performance.md b/src/current/v25.2/kubernetes-performance.md index 841831da74b..6fa4d534442 100644 --- a/src/current/v25.2/kubernetes-performance.md +++ b/src/current/v25.2/kubernetes-performance.md @@ -7,7 +7,7 @@ docs_area: deploy Kubernetes provides many useful abstractions for deploying and operating distributed systems, but some of the abstractions come with a performance overhead and an increase in underlying system complexity. This page explains potential bottlenecks to be aware of when [running CockroachDB in Kubernetes]({% link {{ page.version.version }}/kubernetes-overview.md %}) and shows you how to optimize your deployment for better performance. -This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [CockroachDB Performance on Kubernetes with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/kubernetes-operator-performance.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [CockroachDB Performance on Kubernetes with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/cockroachdb-operator-performance.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md index 3ec61cad52a..b7023447491 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md @@ -112,7 +112,7 @@ For each pod in the StatefulSet, perform the following steps: kubectl apply -f manifests/crdbnode-4.yaml ~~~ -3. Wait for the new pod to become ready. If it doesn’t, [check the operator logs]({% link {{ page.version.version }}/monitor-cockroachdb-kubernetes-operator.md %}#monitor-the-operator) for errors. +3. Wait for the new pod to become ready. If it doesn’t, [check the operator logs]({% link {{ page.version.version }}/monitor-cockroachdb-operator.md %}#monitor-the-operator) for errors. 4. Before moving on to the next replica migration, verify that there are no underreplicated ranges: 1. Set up port forwarding to access the CockroachDB node’s HTTP interface. Note that the DB Console runs on port 8080 by default: diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md index 033170f7a08..3e3a1acb10a 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md @@ -161,7 +161,7 @@ For each pod in the StatefulSet, perform the following steps: kubectl apply -f manifests/crdbnode-4.yaml ~~~ -1. Wait for the new pod to become ready. If it doesn’t, [check the operator logs]({% link {{ page.version.version }}/monitor-cockroachdb-kubernetes-operator.md %}#monitor-the-operator) for errors. +1. Wait for the new pod to become ready. If it doesn’t, [check the operator logs]({% link {{ page.version.version }}/monitor-cockroachdb-operator.md %}#monitor-the-operator) for errors. 1. Before moving on to the next replica migration, verify that there are no underreplicated ranges: 1. Set up port forwarding to access the CockroachDB node’s HTTP interface. Note that the DB Console runs on port 8080 by default: diff --git a/src/current/v25.2/monitor-cockroachdb-kubernetes.md b/src/current/v25.2/monitor-cockroachdb-kubernetes.md index 40dcb13eac2..da454617282 100644 --- a/src/current/v25.2/monitor-cockroachdb-kubernetes.md +++ b/src/current/v25.2/monitor-cockroachdb-kubernetes.md @@ -12,7 +12,7 @@ This article assumes you have already [deployed CockroachDB on a single Kubernet Despite CockroachDB's various [built-in safeguards against failure]({% link {{ page.version.version }}/architecture/replication-layer.md %}), it is critical to actively monitor the overall health and performance of a cluster running in production and to create alerting rules that promptly send notifications when there are events that require investigation or intervention. -This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Cluster Monitoring with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/monitor-cockroachdb-kubernetes-operator.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Cluster Monitoring with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/monitor-cockroachdb-operator.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} diff --git a/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md b/src/current/v25.2/monitor-cockroachdb-operator.md similarity index 98% rename from src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md rename to src/current/v25.2/monitor-cockroachdb-operator.md index 024d04925d2..6c7efd04d26 100644 --- a/src/current/v25.2/monitor-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/monitor-cockroachdb-operator.md @@ -19,7 +19,7 @@ Every node of a CockroachDB cluster exports granular timeseries metrics formatte This guidance is based on [CoreOS's Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator/tree/main), which allows a Prometheus instance to be managed using built-in Kubernetes concepts. {{site.data.alerts.callout_info}} -If you're on Hosted GKE, before starting, make sure the email address associated with your Google Cloud account is part of the `cluster-admin` RBAC group, as shown in [Deploy CockroachDB with Kubernetes]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}). +If you're on Hosted GKE, before starting, make sure the email address associated with your Google Cloud account is part of the `cluster-admin` RBAC group, as shown in [Deploy CockroachDB with Kubernetes]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}). {{site.data.alerts.end}} 1. From your local workstation, edit the cockroachdb service to add the prometheus: cockroachdb label: @@ -112,7 +112,7 @@ If you're on Hosted GKE, before starting, make sure the email address associated Prometheus graph {{site.data.alerts.callout_info}} - Prometheus auto-completes CockroachDB time series metrics for you, but if you want to see a full listing, with descriptions, port-forward as described in [Access the DB Console]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#step-4-access-the-db-console) and then point your browser to [http://localhost:8080/_status/vars](http://localhost:8080/_status/vars). + Prometheus auto-completes CockroachDB time series metrics for you, but if you want to see a full listing, with descriptions, port-forward as described in [Access the DB Console]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#step-4-access-the-db-console) and then point your browser to [http://localhost:8080/_status/vars](http://localhost:8080/_status/vars). {{site.data.alerts.end}} For more details on using the Prometheus UI, see their [official documentation](https://prometheus.io/docs/introduction/getting_started/). @@ -270,7 +270,7 @@ The above configuration overrides the [default logging configuration]({% link {{ - Save debug-level logs (the `DEV` [log channel]({% link {{ page.version.version }}/logging-overview.md %}#logging-channels)) to disk for troubleshooting. - Send operational- and security-level logs to a [network collector]({% link {{ page.version.version }}/logging-use-cases.md %}#network-logging), in this case [Fluentd]({% link {{ page.version.version }}/configure-logs.md %}#fluentd-logging-format). -The ConfigMap `name` must match the `cockroachdb.crdbCluster.loggingConfigMapName` object in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): +The ConfigMap `name` must match the `cockroachdb.crdbCluster.loggingConfigMapName` object in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: diff --git a/src/current/v25.2/node-shutdown.md b/src/current/v25.2/node-shutdown.md index 36293da177c..d924e112d5c 100644 --- a/src/current/v25.2/node-shutdown.md +++ b/src/current/v25.2/node-shutdown.md @@ -837,7 +837,7 @@ Most of the guidance in this page is most relevant to manual deployments that do - The {{ site.data.products.public-operator }}, Helm, and manual StatefulSet deployments use a StatefulSet. Due to the nature of StatefulSets, it's safe to decommission **only** the Cockroach node with the highest StatefulSet ordinal in preparation for scaling down the StatefulSet. If you think you need to decommission any other node, consider the following recommendations and [contact Support](https://support.cockroachlabs.com/hc/en-us) for assistance. {{ site.data.alerts.callout_success }} - Unlike the other Kubernetes deployment methods, the [{{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/kubernetes-operator-overview.md %}) does not use StatefulSets so deployments using that operator do not share this limitation. With the {{ site.data.products.cockroachdb-operator }}, nodes can be scaled down in any order. For more information, read [Cluster Scaling with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/scale-cockroachdb-kubernetes-operator.md %}). + Unlike the other Kubernetes deployment methods, the [{{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/cockroachdb-operator-overview.md %}) does not use StatefulSets so deployments using that operator do not share this limitation. With the {{ site.data.products.cockroachdb-operator }}, nodes can be scaled down in any order. For more information, read [Cluster Scaling with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/scale-cockroachdb-operator.md %}). {{ site.data.alerts.end }} - If you deployed a cluster using the [{{ site.data.products.public-operator }}]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}), the best way to scale down a cluster is to update the specification for the Kubernetes deployment to reduce the value of `nodes:` and apply the change using a [rolling update](https://kubernetes.io/docs/tutorials/kubernetes-basics/update/update-intro/). Kubernetes will notice that there are now too many nodes and will reduce them and clean up their storage automatically. diff --git a/src/current/v25.2/orchestrate-a-local-cluster-with-kubernetes.md b/src/current/v25.2/orchestrate-a-local-cluster-with-kubernetes.md index 95fea70cd25..852d8cbe6f4 100644 --- a/src/current/v25.2/orchestrate-a-local-cluster-with-kubernetes.md +++ b/src/current/v25.2/orchestrate-a-local-cluster-with-kubernetes.md @@ -13,7 +13,7 @@ On top of CockroachDB's built-in automation, you can use a third-party [orchestr This page demonstrates a basic integration with the open-source [Kubernetes](http://kubernetes.io/) orchestration system. Using either the CockroachDB [Helm](https://helm.sh/) chart or a few configuration files, you'll quickly create a 3-node local cluster. You'll run some SQL commands against the cluster and then simulate node failure, watching how Kubernetes auto-restarts without the need for any manual intervention. You'll then scale the cluster with a single command before shutting the cluster down, again with a single command. {{site.data.alerts.callout_info}} -To orchestrate a physically distributed cluster in production, see [Orchestrated Deployments]({% link {{ page.version.version }}/kubernetes-operator-overview.md %}). To deploy a 30-day free CockroachDB {{ site.data.products.dedicated }} cluster instead of running CockroachDB yourself, see the [Quickstart]({% link cockroachcloud/quickstart.md %}). +To orchestrate a physically distributed cluster in production, see [Orchestrated Deployments]({% link {{ page.version.version }}/cockroachdb-operator-overview.md %}). To deploy a 30-day free CockroachDB {{ site.data.products.dedicated }} cluster instead of running CockroachDB yourself, see the [Quickstart]({% link cockroachcloud/quickstart.md %}). {{site.data.alerts.end}} diff --git a/src/current/v25.2/override-templates-kubernetes-operator.md b/src/current/v25.2/override-templates-cockroachdb-operator.md similarity index 100% rename from src/current/v25.2/override-templates-kubernetes-operator.md rename to src/current/v25.2/override-templates-cockroachdb-operator.md diff --git a/src/current/v25.2/scale-cockroachdb-kubernetes.md b/src/current/v25.2/scale-cockroachdb-kubernetes.md index 2094e7239d0..4c03460161e 100644 --- a/src/current/v25.2/scale-cockroachdb-kubernetes.md +++ b/src/current/v25.2/scale-cockroachdb-kubernetes.md @@ -13,7 +13,7 @@ This article assumes you have already [deployed CockroachDB on a single Kubernet This page explains how to add and remove CockroachDB nodes on Kubernetes. -This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Cluster Scaling with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/scale-cockroachdb-kubernetes-operator.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Cluster Scaling with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/scale-cockroachdb-operator.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} diff --git a/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md b/src/current/v25.2/scale-cockroachdb-operator.md similarity index 80% rename from src/current/v25.2/scale-cockroachdb-kubernetes-operator.md rename to src/current/v25.2/scale-cockroachdb-operator.md index 991f6bfa49a..76de7364979 100644 --- a/src/current/v25.2/scale-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/scale-cockroachdb-operator.md @@ -20,7 +20,7 @@ Before scaling up CockroachDB, note the following [topology recommendations]({% - Each CockroachDB node (running in its own pod) should run on a separate Kubernetes worker node. - Each availability zone should have the same number of CockroachDB nodes. -If your cluster has 3 CockroachDB nodes distributed across 3 availability zones (as in our [deployment example]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster)), Cockroach Labs recommends scaling up by a multiple of 3 to retain an even distribution of nodes. You should therefore scale up to a minimum of 6 CockroachDB nodes, with 2 nodes in each zone. +If your cluster has 3 CockroachDB nodes distributed across 3 availability zones (as in our [deployment example]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster)), Cockroach Labs recommends scaling up by a multiple of 3 to retain an even distribution of nodes. You should therefore scale up to a minimum of 6 CockroachDB nodes, with 2 nodes in each zone. 1. Run `kubectl get nodes` to list the worker nodes in your Kubernetes cluster. There should be at least as many worker nodes as pods you plan to add. This ensures that no more than one pod will be placed on each worker node. @@ -33,7 +33,7 @@ If your cluster has 3 CockroachDB nodes distributed across 3 availability zones This example distributes 2 worker nodes across the default 3 zones, raising the total to 6 worker nodes. -1. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster), with the target size of the CockroachDB cluster in the specified region. This value refers to the number of CockroachDB nodes, each running in one pod: +1. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster), with the target size of the CockroachDB cluster in the specified region. This value refers to the number of CockroachDB nodes, each running in one pod: ~~~ yaml cockroachdb: @@ -73,13 +73,13 @@ If your cluster has 3 CockroachDB nodes distributed across 3 availability zones ## Remove nodes -If your nodes are distributed across 3 availability zones (as in our [deployment example]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster)), Cockroach Labs recommends scaling down by a multiple of 3 to retain an even distribution. If your cluster has 6 CockroachDB nodes, you should therefore scale down to 3, with 1 node in each zone. +If your nodes are distributed across 3 availability zones (as in our [deployment example]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster)), Cockroach Labs recommends scaling down by a multiple of 3 to retain an even distribution. If your cluster has 6 CockroachDB nodes, you should therefore scale down to 3, with 1 node in each zone. {{site.data.alerts.callout_danger}} Do not scale down to fewer than 3 nodes. This is considered an anti-pattern on CockroachDB and will cause errors. Before scaling down CockroachDB, note that each availability zone should have the same number of CockroachDB nodes. {{site.data.alerts.end}} -1. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster), with the target size of the CockroachDB cluster. For instance, to scale a cluster in Google Cloud down to 3 nodes: +1. Update `cockroachdb.crdbCluster.regions.code.nodes` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster), with the target size of the CockroachDB cluster. For instance, to scale a cluster in Google Cloud down to 3 nodes: ~~~ yaml cockroachdb: diff --git a/src/current/v25.2/schedule-cockroachdb-kubernetes.md b/src/current/v25.2/schedule-cockroachdb-kubernetes.md index 40ca835683d..dd67d369c5e 100644 --- a/src/current/v25.2/schedule-cockroachdb-kubernetes.md +++ b/src/current/v25.2/schedule-cockroachdb-kubernetes.md @@ -22,7 +22,7 @@ This page describes how to configure the following, using the [{{ site.data.prod - [Add a topology spread constraint](#add-a-topology-spread-constraint) - [Resource labels and annotations](#resource-labels-and-annotations) -This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Pod Scheduling with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/schedule-cockroachdb-kubernetes-operator.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Pod Scheduling with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/schedule-cockroachdb-operator.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} diff --git a/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md b/src/current/v25.2/schedule-cockroachdb-operator.md similarity index 93% rename from src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md rename to src/current/v25.2/schedule-cockroachdb-operator.md index 2d3c661f3c2..733a56f9c7d 100644 --- a/src/current/v25.2/schedule-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/schedule-cockroachdb-operator.md @@ -17,7 +17,7 @@ The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ pag A pod with a *node selector* will be scheduled onto a worker node that has matching [labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/), or key-value pairs. -Specify the labels in `cockroachdb.crdbCluster.nodeSelector` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). If you specify multiple `nodeSelector` labels, the node must match all of them. +Specify the labels in `cockroachdb.crdbCluster.nodeSelector` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). If you specify multiple `nodeSelector` labels, the node must match all of them. The following configuration causes CockroachDB pods to be scheduled onto worker nodes that have *both* the labels `worker-pool-name=crdb-workers` and `kubernetes.io/arch=amd64`: @@ -46,7 +46,7 @@ For an example, see [Scheduling CockroachDB onto labeled nodes](#example-schedul ### Add a node affinity -Specify node affinities in `cockroachdb.crdbCluster.affinity.nodeAffinity` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. +Specify node affinities in `cockroachdb.crdbCluster.affinity.nodeAffinity` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. The following configuration requires that CockroachDB pods are scheduled onto worker nodes running a Linux operating system, with a preference against worker nodes in the `us-east4-b` availability zone. @@ -80,7 +80,7 @@ For more context on how these rules work, see the [Kubernetes documentation](htt ### Add a pod affinity or anti-affinity -Specify pod affinities and anti-affinities in `cockroachdb.crdbCluster.affinity.podAffinity` and `cockroachdb.crdbCluster.affinity.podAntiAffinity` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. +Specify pod affinities and anti-affinities in `cockroachdb.crdbCluster.affinity.podAffinity` and `cockroachdb.crdbCluster.affinity.podAntiAffinity` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. The following configuration attempts to schedule CockroachDB pods in the same zones as the pods that run our example [load generator](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/example-app.yaml) app. It disallows CockroachDB pods from being co-located on the same worker node. @@ -154,7 +154,7 @@ In this example, CockroachDB has not yet been deployed to a running Kubernetes c This also ensures that the CockroachDB pods, which will be bound to persistent volumes in the same three availability zones, can be scheduled onto worker nodes in their respective zones. {{site.data.alerts.end}} -1. Add the following rules to the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): +1. Add the following rules to the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: @@ -215,7 +215,7 @@ For an example, see [Evicting CockroachDB from a running worker node](#example-e ### Add a toleration -Specify pod tolerations in the `cockroachdb.crdbCluster.tolerations` object in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). +Specify pod tolerations in the `cockroachdb.crdbCluster.tolerations` object in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). The following toleration matches a taint with the specified key, value, and `NoSchedule` effect, using the `Equal` operator. A toleration that uses the `Equal` operator must include a `value` field: @@ -229,7 +229,7 @@ cockroachdb: effect: "NoSchedule" ~~~ -A `NoSchedule` taint on a node prevents pods from being scheduled onto the node. The matching toleration allows a pod to be scheduled onto the node. A `NoSchedule` toleration is therefore best included before [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). +A `NoSchedule` taint on a node prevents pods from being scheduled onto the node. The matching toleration allows a pod to be scheduled onto the node. A `NoSchedule` toleration is therefore best included before [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). {{site.data.alerts.callout_info}} A `PreferNoSchedule` taint discourages, but does not disallow, pods from being scheduled onto the node. @@ -281,7 +281,7 @@ In this example, CockroachDB has already been deployed on a Kubernetes cluster. node/gke-cockroachdb-default-pool-4e5ce539-j1h1 tainted ~~~ -1. Add a matching tolerations object in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). +1. Add a matching tolerations object in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). ~~~ yaml cockroachdb: @@ -323,7 +323,7 @@ A pod with a *topology spread constraint* must satisfy its conditions when being ### Add a topology spread constraint -Specify pod topology spread constraints in the `cockroachdb.crdbCluster.topologySpreadConstraints` object of the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). If you specify multiple `topologySpreadConstraints` objects, the matching pods must satisfy all of the constraints. +Specify pod topology spread constraints in the `cockroachdb.crdbCluster.topologySpreadConstraints` object of the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). If you specify multiple `topologySpreadConstraints` objects, the matching pods must satisfy all of the constraints. The following topology spread constraint ensures that CockroachDB pods deployed with the label `environment=production` will not be unevenly distributed across zones by more than `1` pod: @@ -347,7 +347,7 @@ For more context on how these rules work, see the [Kubernetes documentation](htt To assist in working with your cluster, you can add labels and annotations to your resources. -Specify labels in `cockroachdb.crdbCluster.podLabels` and annotations in `cockroachdb.crdbCluster.podAnnotations` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): +Specify labels in `cockroachdb.crdbCluster.podLabels` and annotations in `cockroachdb.crdbCluster.podAnnotations` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: diff --git a/src/current/v25.2/secure-cockroachdb-kubernetes.md b/src/current/v25.2/secure-cockroachdb-kubernetes.md index f898e2183e3..fc14a375b08 100644 --- a/src/current/v25.2/secure-cockroachdb-kubernetes.md +++ b/src/current/v25.2/secure-cockroachdb-kubernetes.md @@ -13,7 +13,7 @@ This article assumes you have already [deployed CockroachDB securely on a single By default, self-signed certificates are used when using the {{ site.data.products.public-operator }} or Helm to securely [deploy CockroachDB on Kubernetes]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}). However, the recommended approach is to use `cert-manager` for certificate management. For details, refer to [Deploy cert-manager for mTLS](?filters=helm#deploy-cert-manager-for-mtls). -This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Certificate Management with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/secure-cockroachdb-kubernetes-operator.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Certificate Management with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/secure-cockroachdb-operator.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} diff --git a/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md b/src/current/v25.2/secure-cockroachdb-operator.md similarity index 94% rename from src/current/v25.2/secure-cockroachdb-kubernetes-operator.md rename to src/current/v25.2/secure-cockroachdb-operator.md index 9d17572f55c..f88415e4367 100644 --- a/src/current/v25.2/secure-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/secure-cockroachdb-operator.md @@ -20,7 +20,7 @@ You may need to rotate the node, client, or CA certificates in the following sce ### Example: Rotate certificates signed with `cockroach cert` -If you previously [authenticated with cockroach cert]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster), follow these steps to rotate the certificates using the same CA: +If you previously [authenticated with cockroach cert]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster), follow these steps to rotate the certificates using the same CA: 1. Create a new client certificate and key pair for the root user, overwriting the previous certificate and key: @@ -45,7 +45,7 @@ If you previously [authenticated with cockroach cert]({% link {{ page.version.ve secret/cockroachdb.client.root.2 created ~~~ -1. Create a new certificate and key pair for your CockroachDB nodes, overwriting the previous certificate and key. Specify the namespace you used when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). This example uses the `cockroach-ns` namespace: +1. Create a new certificate and key pair for your CockroachDB nodes, overwriting the previous certificate and key. Specify the namespace you used when [deploying the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). This example uses the `cockroach-ns` namespace: {% include_cached copy-clipboard.html %} ~~~ shell @@ -75,7 +75,7 @@ If you previously [authenticated with cockroach cert]({% link {{ page.version.ve secret/cockroachdb.node.2 created ~~~ -1. Add `cockroachdb.tls.externalCertificates.certificates.nodeClientSecretName` and `cockroachdb.tls.externalCertificates.certificates.nodeSecretName` to the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster): +1. Add `cockroachdb.tls.externalCertificates.certificates.nodeClientSecretName` and `cockroachdb.tls.externalCertificates.certificates.nodeSecretName` to the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: @@ -101,7 +101,7 @@ If you previously [authenticated with cockroach cert]({% link {{ page.version.ve ~~~ {{site.data.alerts.callout_info}} - Remember that `nodeSecretName` and `nodeClientSecretName` in the operator configuration must specify these secret names. For details, see the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}#initialize-the-cluster). + Remember that `nodeSecretName` and `nodeClientSecretName` in the operator configuration must specify these secret names. For details, see the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). {{site.data.alerts.end}} 1. Apply the new settings to the cluster: diff --git a/src/current/v25.2/upgrade-cockroachdb-kubernetes.md b/src/current/v25.2/upgrade-cockroachdb-kubernetes.md index 360d9edecec..dabf930eef0 100644 --- a/src/current/v25.2/upgrade-cockroachdb-kubernetes.md +++ b/src/current/v25.2/upgrade-cockroachdb-kubernetes.md @@ -9,7 +9,7 @@ docs_area: deploy This page shows how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes.md %}). -This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Upgrade a Cluster in Kubernetes with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/upgrade-cockroachdb-kubernetes-operator.md %}). +This page is for Kubernetes deployments that are not using the {{ site.data.products.cockroachdb-operator }}. For guidance specific to the {{ site.data.products.cockroachdb-operator }}, read [Upgrade a Cluster in Kubernetes with the {{ site.data.products.cockroachdb-operator }}]({% link {{ page.version.version }}/upgrade-cockroachdb-operator.md %}). {% include {{ page.version.version }}/cockroachdb-operator-recommendation.md %} diff --git a/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md b/src/current/v25.2/upgrade-cockroachdb-operator.md similarity index 97% rename from src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md rename to src/current/v25.2/upgrade-cockroachdb-operator.md index 0f4272e5d16..a9c78200904 100644 --- a/src/current/v25.2/upgrade-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/upgrade-cockroachdb-operator.md @@ -7,7 +7,7 @@ secure: true docs_area: deploy --- -This page describes how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-kubernetes-operator.md %}) with the {{ site.data.products.cockroachdb-operator }}. +This page describes how to upgrade a CockroachDB cluster that is [deployed on a Kubernetes cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}) with the {{ site.data.products.cockroachdb-operator }}. {{site.data.alerts.callout_info}} The {{ site.data.products.cockroachdb-operator }} is in [Preview]({% link {{ page.version.version }}/cockroachdb-feature-availability.md %}). From 9a95d154608b7134d90cffceb3583c78a72b71fc Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Wed, 6 Aug 2025 16:49:39 -0400 Subject: [PATCH 17/27] Boilerplate grammar fix --- .../_includes/v25.2/cockroachdb-operator-recommendation.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md b/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md index 823a3696b55..167c3e3ef05 100644 --- a/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md +++ b/src/current/_includes/v25.2/cockroachdb-operator-recommendation.md @@ -2,12 +2,12 @@ {{ site.data.alerts.callout_success }} The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of both single-region and multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link v25.2/cockroachdb-operator-overview.md %}). -New deployments of CockroachDB on Kubernetes are recommended to use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link v25.2/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link v25.2/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. +Cockroach Labs recommends that new deployments of CockroachDB on Kubernetes use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link v25.2/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link v25.2/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. {{ site.data.alerts.end }} {% else %} {{ site.data.alerts.callout_success }} The {{ site.data.products.cockroachdb-operator }} is a fully-featured Kubernetes operator that is designed for ease of deployment and scaling of multi-region clusters. To learn more, read the [{{ site.data.products.cockroachdb-operator }} documentation]({% link {{ page.version.version }}/cockroachdb-operator-overview.md %}). -New deployments of CockroachDB on Kubernetes are recommended to use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. +Cockroach Labs recommends that new deployments of CockroachDB on Kubernetes use the {{ site.data.products.cockroachdb-operator }}. To migrate an existing deployment to use the {{ site.data.products.cockroachdb-operator }}, read the [Helm]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-helm.md %}) and [{{ site.data.products.public-operator }}]({% link {{ page.version.version }}/migrate-cockroachdb-kubernetes-operator.md %}) migration guides. {{ site.data.alerts.end }} {% endif %} \ No newline at end of file From 4e7da8553dae2451a7794f7d2f60fea21d61fc74 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Wed, 6 Aug 2025 22:54:34 -0400 Subject: [PATCH 18/27] Address remaining Michael comments --- .../v25.2/cockroachdb-operator-overview.md | 2 +- .../migrate-cockroachdb-kubernetes-helm.md | 24 ++++++++++------- ...migrate-cockroachdb-kubernetes-operator.md | 26 ++++++++++++------- ...override-templates-cockroachdb-operator.md | 2 -- 4 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/current/v25.2/cockroachdb-operator-overview.md b/src/current/v25.2/cockroachdb-operator-overview.md index f00f70e7d98..45ebe199749 100644 --- a/src/current/v25.2/cockroachdb-operator-overview.md +++ b/src/current/v25.2/cockroachdb-operator-overview.md @@ -43,6 +43,6 @@ Feature | Description --------|------------ [node](https://kubernetes.io/docs/concepts/architecture/nodes/) | A physical or virtual machine. In the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}), you'll create instances and join them as worker nodes into a single Kubernetes cluster. [pod](http://kubernetes.io/docs/user-guide/pods/) | A pod is a group of one of more Docker containers. In the [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}), each pod will run on a separate Kubernetes worker node and include one Docker container running a single CockroachDB node, reflecting our [topology recommendations]({% link {{ page.version.version }}/recommended-production-settings.md %}#topology). -[operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) | An operator is an extension to Kubernetes that uses custom resources to efficiently manage specific applications. The {{ site.data.products.cockroachdb-operator }} includes two custom resource definitions, `cockroachdb` to manage a CockroachDB pod and `operator` to manage the operator pod itself. Unlike the older [{{ site.data.products.public-operator }}](https://github.com/cockroachdb/cockroach-operator), the {{ site.data.products.cockroachdb-operator }} does not use StatefulSets and is designed to simplify multi-region deployments. +[operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) | An operator is an extension to Kubernetes that uses custom resources to efficiently manage specific applications. The {{ site.data.products.cockroachdb-operator }} includes two custom resource definitions to manage CockroachDB, `CrdbCluster` and `CrdbNode`. Unlike the older [{{ site.data.products.public-operator }}](https://github.com/cockroachdb/cockroach-operator), the {{ site.data.products.cockroachdb-operator }} does not use StatefulSets and is designed to simplify multi-region deployments. [persistent volume](http://kubernetes.io/docs/user-guide/persistent-volumes/) | A persistent volume is a piece of networked storage (Persistent Disk on GCE, Elastic Block Store on AWS) mounted into a pod. The lifetime of a persistent volume is decoupled from the lifetime of the pod that's using it, ensuring that each CockroachDB node binds back to the same storage on restart.

The [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}) assumes that dynamic volume provisioning is available. When that is not the case, [persistent volume claims](http://kubernetes.io/docs/user-guide/persistent-volumes/#persistentvolumeclaims) need to be created manually. [RBAC](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) | RBAC, or Role-Based Access Control, is the system Kubernetes uses to manage permissions within the cluster. In order to take an action (e.g., `get` or `create`) on an API resource (e.g., a `pod`), the client must have a `Role` that allows it to do so. The [deployment guide]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}) creates the RBAC resources necessary for CockroachDB to create and access certificates. diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md index b7023447491..8eacdca3198 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md @@ -174,10 +174,22 @@ If the migration to the {{ site.data.products.cockroachdb-operator}} fails durin 1. Delete the applied `crdbnode` resources and simultaneously scale the StatefulSet back up. - Delete the individual `crdbnode` manifests in the reverse order of their creation (starting with the last one created, e.g., `crdbnode-2.yaml`) and scale the StatefulSet back to its original replica count (e.g., 3). For example, assuming you have applied two `crdbnode` yaml files (`crdbnode-2.yaml` & `crdbnode-1.yaml`): + Delete the individual `crdbnode` manifests in the reverse order of their creation (starting with the last one created, e.g., `crdbnode-1.yaml`) and scale the StatefulSet back to its original replica count (e.g., 2). For example, assuming you have applied two `crdbnode` yaml files (`crdbnode-2.yaml` and `crdbnode-1.yaml`): + + 1. Delete a `crdbnode` manifest in reverse order, starting with `crdbnode-1.yaml`. + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete -f manifests/crdbnode-1.yaml + ~~~ - 1. Delete a `crdbnode` manifest in reverse order, starting with `crdbnode-2.yaml`. 1. Scale the StatefulSet replica count up by one (to 2). + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl scale statefulset $CRDBCLUSTER --replicas=2 + ~~~ + 1. Verify that data has propagated by waiting for there to be zero under-replicated ranges: 1. Set up port forwarding to access the CockroachDB node's HTTP interface, replacing `cockroachdb-X` with the node name: @@ -198,14 +210,8 @@ If the migration to the {{ site.data.products.cockroachdb-operator}} fails durin This command outputs the number of under-replicated ranges on the node, which should be zero before proceeding with the next node. This may take some time depending on the deployment, but is necessary to ensure that there is no downtime in data availability. - 1. Repeat steps a through c for each node, deleting the `crdbnode-1.yaml`, scaling replica count to 3, and so on. + 1. Repeat steps a through c for each node, deleting the `crdbnode-2.yaml`, scaling replica count to 3, and so on. - {% include_cached copy-clipboard.html %} - ~~~ shell - kubectl delete -f manifests/crdbnode-2.yaml - kubectl scale statefulset $CRDBCLUSTER --replicas=2 - ~~~ - Repeat the `kubectl delete -f ... command` for each `crdbnode` manifest you applied during migration. Make sure to verify that there are no underreplicated ranges after rolling back each node. 1. Delete the PriorityClass and RBAC resources created for the CockroachDB operator: diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md index 3e3a1acb10a..b9d0d539ecb 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md @@ -222,10 +222,22 @@ If the migration to the {{ site.data.products.cockroachdb-operator}} fails durin 1. Delete the applied `crdbnode` resources and simultaneously scale the StatefulSet back up. - Delete the individual `crdbnode` manifests in the reverse order of their creation (starting with the last one created, e.g., `crdbnode-2.yaml`) and scale the StatefulSet back to its original replica count (e.g., 3). For example, assuming you have applied two `crdbnode` yaml files (`crdbnode-2.yaml` & `crdbnode-1.yaml`): + Delete the individual `crdbnode` manifests in the reverse order of their creation (starting with the last one created, e.g., `crdbnode-1.yaml`) and scale the StatefulSet back to its original replica count (e.g., 2). For example, assuming you have applied two `crdbnode` yaml files (`crdbnode-2.yaml` and `crdbnode-1.yaml`): + + 1. Delete a `crdbnode` manifest in reverse order, starting with `crdbnode-1.yaml`. + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl delete -f manifests/crdbnode-1.yaml + ~~~ - 1. Delete a `crdbnode` manifest in reverse order, starting with `crdbnode-2.yaml`. 1. Scale the StatefulSet replica count up by one (to 2). + + {% include_cached copy-clipboard.html %} + ~~~ shell + kubectl scale statefulset $CRDBCLUSTER --replicas=2 + ~~~ + 1. Verify that data has propagated by waiting for there to be zero under-replicated ranges: 1. Set up port forwarding to access the CockroachDB node's HTTP interface, replacing `cockroachdb-X` with the node name: @@ -245,15 +257,9 @@ If the migration to the {{ site.data.products.cockroachdb-operator}} fails durin ~~~ This command outputs the number of under-replicated ranges on the node, which should be zero before proceeding with the next node. This may take some time depending on the deployment, but is necessary to ensure that there is no downtime in data availability. - - 1. Repeat steps a through c for each node, deleting the `crdbnode-1.yaml`, scaling replica count to 3, and so on. - {% include_cached copy-clipboard.html %} - ~~~ shell - kubectl delete -f manifests/crdbnode-2.yaml - kubectl scale statefulset $CRDBCLUSTER --replicas=2 - ~~~ - + 1. Repeat steps a through c for each node, deleting the `crdbnode-2.yaml`, scaling replica count to 3, and so on. + Repeat the `kubectl delete -f ... command` for each `crdbnode` manifest you applied during migration. Make sure to verify that there are no underreplicated ranges after rolling back each node. 1. Delete the PriorityClass and RBAC resources created for the CockroachDB operator: diff --git a/src/current/v25.2/override-templates-cockroachdb-operator.md b/src/current/v25.2/override-templates-cockroachdb-operator.md index 02de07fadf5..f98cb7d209c 100644 --- a/src/current/v25.2/override-templates-cockroachdb-operator.md +++ b/src/current/v25.2/override-templates-cockroachdb-operator.md @@ -40,8 +40,6 @@ cockroachdb: containers: - name: cockroachdb image: cockroachdb/cockroach:v25.2.2 - - name: cert-reloader - image: us-docker.pkg.dev/cockroach-cloud-images/data-plane/inotifywait:87edf086db32734c7fa083a62d1055d664900840 # imagePullSecrets captures the secrets for fetching images from private registries. imagePullSecrets: [] ~~~ From c45bf5fa7eb18b0954206d7b4348595067d9276f Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Thu, 7 Aug 2025 11:30:26 -0400 Subject: [PATCH 19/27] Add warning about RPC downtime during Helm migration --- src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md index 8eacdca3198..5404a0fc194 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md @@ -25,6 +25,10 @@ If your existing cluster was created using the {{ site.data.products.public-oper This migration can be completed without affecting cluster availability, and preserves existing disks so that data doesn't need to be replicated into empty volumes. The process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. +{{site.data.alerts.callout_danger}} +The {{ site.data.products.cockroachdb-operator }} uses a different port than StatefulSets for RPC services, meaning that the migration will cause temporary downtime specifically for issuing commands that use RPCs. This includes commands such as `cockroach node drain` and `cockroach node decommission`. Plan for this limited downtime until the public service is updated in Step 4 as described in this guide. +{{site.data.alerts.end}} + ## Step 1. Prepare the migration helper Build the migration helper and add the `./bin` directory to your PATH: From 24cf6e44a332e4be2f04b29f61ee98714ef05903 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Thu, 7 Aug 2025 11:38:09 -0400 Subject: [PATCH 20/27] Add note about overriding pod anti-affinity --- src/current/v25.2/schedule-cockroachdb-operator.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/current/v25.2/schedule-cockroachdb-operator.md b/src/current/v25.2/schedule-cockroachdb-operator.md index 733a56f9c7d..6b3b5563ec6 100644 --- a/src/current/v25.2/schedule-cockroachdb-operator.md +++ b/src/current/v25.2/schedule-cockroachdb-operator.md @@ -80,7 +80,9 @@ For more context on how these rules work, see the [Kubernetes documentation](htt ### Add a pod affinity or anti-affinity -Specify pod affinities and anti-affinities in `cockroachdb.crdbCluster.affinity.podAffinity` and `cockroachdb.crdbCluster.affinity.podAntiAffinity` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. +Specify pod affinities and node anti-affinities in `cockroachdb.crdbCluster.affinity.podAffinity` and `cockroachdb.crdbCluster.affinity.podAntiAffinity` in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster). If you specify multiple `matchExpressions` labels, the node must match all of them. If you specify multiple `values` for a label, the node can match any of the values. + +The {{ site.data.products.cockroachdb-operator }} hard-codes the pod template to only allow one pod per Kubernetes node. If you need to override this value, you can [override the pod template]({% link {{ page.version.version }}/override-templates-cockroachdb-operator.md %}#override-the-default-pod). The following configuration attempts to schedule CockroachDB pods in the same zones as the pods that run our example [load generator](https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/example-app.yaml) app. It disallows CockroachDB pods from being co-located on the same worker node. From cf11e82eda8ac96c8431d4d89fc98e66664faa91 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Thu, 7 Aug 2025 11:43:28 -0400 Subject: [PATCH 21/27] Add callout about dynamically scaling disks --- src/current/v25.2/configure-cockroachdb-operator.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/current/v25.2/configure-cockroachdb-operator.md b/src/current/v25.2/configure-cockroachdb-operator.md index c09063e26c5..2568e3aa818 100644 --- a/src/current/v25.2/configure-cockroachdb-operator.md +++ b/src/current/v25.2/configure-cockroachdb-operator.md @@ -85,6 +85,10 @@ You should provision an appropriate amount of disk storage for your workload. Fo If you discover that you need more capacity, you can expand the persistent volumes on a running cluster. Increasing disk size is often [beneficial for CockroachDB performance]({% link {{ page.version.version }}/cockroachdb-operator-performance.md %}). +{{site.data.alerts.callout_info}} +The volume size should only adjusted on disk types that can dynamically scale up, such as Amazon EBS volumes. Adjusting the volume size on non-dynamically scaling disks is not recommended, and instead you should horizontally scale the number of disks used. +{{site.data.alerts.end}} + Specify a new volume size in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): ~~~ yaml From 08f9f204a77343bd7164f47f7f42447761adebe1 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Thu, 7 Aug 2025 11:45:02 -0400 Subject: [PATCH 22/27] Remove undocumented fluentservers --- src/current/v25.2/monitor-cockroachdb-operator.md | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/current/v25.2/monitor-cockroachdb-operator.md b/src/current/v25.2/monitor-cockroachdb-operator.md index 6c7efd04d26..d4b75a886f3 100644 --- a/src/current/v25.2/monitor-cockroachdb-operator.md +++ b/src/current/v25.2/monitor-cockroachdb-operator.md @@ -248,17 +248,6 @@ data: dev: channels: DEV filter: WARNING - fluent-servers: - ops: - channels: [OPS, HEALTH, SQL_SCHEMA] - address: 127.0.0.1:5170 - net: tcp - redact: true - security: - channels: [SESSIONS, USER_ADMIN, PRIVILEGES, SENSITIVE_ACCESS] - address: 127.0.0.1:5170 - net: tcp - auditable: true kind: ConfigMap metadata: name: logconfig From fb867fb8b6652fba74dcd31fc76fddcfc4fa020e Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Thu, 7 Aug 2025 11:54:07 -0400 Subject: [PATCH 23/27] Clarify where to build the migration-helper --- src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md | 2 +- src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md index 5404a0fc194..6499be0a157 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md @@ -31,7 +31,7 @@ The {{ site.data.products.cockroachdb-operator }} uses a different port than Sta ## Step 1. Prepare the migration helper -Build the migration helper and add the `./bin` directory to your PATH: +In the root of the [cockroachdb/helm-charts](https://github.com/cockroachdb/helm-charts/tree/master) repository, build the migration helper and add the `./bin` directory to your PATH: {% include_cached copy-clipboard.html %} ~~~ shell diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md index b9d0d539ecb..5aa0a3dfa7f 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md @@ -30,7 +30,7 @@ This migration process can be completed without affecting cluster availability, ## Step 1. Prepare the migration helper -Build the migration helper and add the `./bin` directory to your PATH: +In the root of the [cockroachdb/helm-charts](https://github.com/cockroachdb/helm-charts/tree/master) repository, build the migration helper and add the `./bin` directory to your PATH: {% include_cached copy-clipboard.html %} ~~~ shell From a01099eafa742381f8162e1ac76f0ef2f07a4a21 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Thu, 7 Aug 2025 13:50:21 -0400 Subject: [PATCH 24/27] Add certManager example yaml --- ...y-cockroachdb-with-cockroachdb-operator.md | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/current/v25.2/deploy-cockroachdb-with-cockroachdb-operator.md b/src/current/v25.2/deploy-cockroachdb-with-cockroachdb-operator.md index 47e0f4077ba..89718c6ae95 100644 --- a/src/current/v25.2/deploy-cockroachdb-with-cockroachdb-operator.md +++ b/src/current/v25.2/deploy-cockroachdb-with-cockroachdb-operator.md @@ -340,7 +340,28 @@ For bare metal deployments, the specific Kubernetes infrastructure deployment st The subject alternative names are based on a release called `my-release` in the `cockroach-ns` namespace. Make sure they match the services created with the release during Helm install. {{site.data.alerts.end}} - If you wish to supply certificates with [cert-manager](https://cert-manager.io/), set `cockroachdb.tls.certManager.enabled` to `true`, and `cockroachdb.tls.certManager.issuer` to an IssuerRef (as they appear in certificate resources) pointing to a clusterIssuer or issuer that you have set up in the cluster. The following Kubernetes application describes an example issuer: + If you wish to supply certificates with [cert-manager](https://cert-manager.io/), set `cockroachdb.tls.certManager.enabled` to `true`, and `cockroachdb.tls.certManager.issuer` to an IssuerRef (as they appear in certificate resources) pointing to a clusterIssuer or issuer that you have set up in the cluster: + + ~~~ yaml + cockroachdb: + tls: + enabled: true + certManager: + enabled: true + caConfigMap: cockroachdb-ca + nodeSecret: cockroachdb-node + clientRootSecret: cockroachdb-root + issuer: + group: cert-manager.io + kind: Issuer + name: cockroachdb-cert-issuer + clientCertDuration: 672h + clientCertExpiryWindow: 48h + nodeCertDuration: 8760h + nodeCertExpiryWindow: 168h + ~~~ + + The following Kubernetes application describes an example issuer. ~~~ yaml apiVersion: v1 From 23c156570089dad8509ac11d24b4252731db4ac4 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Thu, 7 Aug 2025 14:17:26 -0400 Subject: [PATCH 25/27] Florence feedback Co-authored-by: Florence Morris <58752716+florence-crl@users.noreply.github.com> --- src/current/v25.2/configure-cockroachdb-operator.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/current/v25.2/configure-cockroachdb-operator.md b/src/current/v25.2/configure-cockroachdb-operator.md index 2568e3aa818..9029f6bd2fa 100644 --- a/src/current/v25.2/configure-cockroachdb-operator.md +++ b/src/current/v25.2/configure-cockroachdb-operator.md @@ -148,7 +148,7 @@ You can configure an [Ingress](https://kubernetes.io/docs/concepts/services-netw In order to use the Ingress resource, your cluster must be running an [Ingress controller](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/) for load balancing. This is **not** handled by the {{ site.data.products.cockroachdb-operator }} and must be deployed separately. -Specify Ingress objects in `cockroachdb.crdbCluster.service.ingress`. Set `ingress.enabled` to `true` and specify `ingress.ui` (HTTP) or `ingress.sql` (SQL) in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster)): +Specify Ingress objects in `cockroachdb.crdbCluster.service.ingress`. Set `ingress.enabled` to `true` and specify `ingress.ui` (HTTP) or `ingress.sql` (SQL) in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster): ~~~ yaml cockroachdb: From 60d4b4ef1e44043a55aeded9aba21e8d0bc283e0 Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Thu, 7 Aug 2025 14:23:39 -0400 Subject: [PATCH 26/27] Add --cascade=orphan flag info --- src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md index 5aa0a3dfa7f..2f0fb8f9092 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-operator.md @@ -105,6 +105,8 @@ The {{ site.data.products.public-operator }} and the {{ site.data.products.cockr kubectl delete crdbcluster $CRDBCLUSTER --cascade=orphan ~~~ + The `--cascade=orphan` flag tells Kubernetes not to delete the dependent resources (StatefulSets, Services, PVCs, etc.) created by the `CrdbCluster` custom resource. This ensures that only the parent custom resource is deleted, while child resources are left intact in the cluster. This allows the CockroachDB cluster to continue running as a StatefulSet until the migration is complete. + - Delete {{ site.data.products.public-operator }} resources and custom resource definition: {% include_cached copy-clipboard.html %} ~~~ shell From 680b0cde89c4a13d9c1e8cd5c43f74f5072db61b Mon Sep 17 00:00:00 2001 From: Joe Lodin Date: Thu, 7 Aug 2025 16:43:25 -0400 Subject: [PATCH 27/27] Michael comments --- src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md | 2 +- src/current/v25.2/monitor-cockroachdb-operator.md | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md index 6499be0a157..5a8ece3cb28 100644 --- a/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md +++ b/src/current/v25.2/migrate-cockroachdb-kubernetes-helm.md @@ -26,7 +26,7 @@ If your existing cluster was created using the {{ site.data.products.public-oper This migration can be completed without affecting cluster availability, and preserves existing disks so that data doesn't need to be replicated into empty volumes. The process scales down the StatefulSet by one node before adding each operator-managed pod, so the maximum cluster capacity will be reduced by one node periodically throughout the migration. {{site.data.alerts.callout_danger}} -The {{ site.data.products.cockroachdb-operator }} uses a different port than StatefulSets for RPC services, meaning that the migration will cause temporary downtime specifically for issuing commands that use RPCs. This includes commands such as `cockroach node drain` and `cockroach node decommission`. Plan for this limited downtime until the public service is updated in Step 4 as described in this guide. +Commands that use RPCs (such as `cockroach node drain` and `cockroach node decommission`) will be unavailable until the public service is updated in step 4. The {{ site.data.products.cockroachdb-operator }} uses a different port than StatefulSets for RPC services, causing these commands to fail for a limited time. {{site.data.alerts.end}} ## Step 1. Prepare the migration helper diff --git a/src/current/v25.2/monitor-cockroachdb-operator.md b/src/current/v25.2/monitor-cockroachdb-operator.md index d4b75a886f3..86cd625d29f 100644 --- a/src/current/v25.2/monitor-cockroachdb-operator.md +++ b/src/current/v25.2/monitor-cockroachdb-operator.md @@ -254,10 +254,7 @@ metadata: namespace: cockroach-ns ~~~ -The above configuration overrides the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) and reflects our recommended Kubernetes logging configuration: - -- Save debug-level logs (the `DEV` [log channel]({% link {{ page.version.version }}/logging-overview.md %}#logging-channels)) to disk for troubleshooting. -- Send operational- and security-level logs to a [network collector]({% link {{ page.version.version }}/logging-use-cases.md %}#network-logging), in this case [Fluentd]({% link {{ page.version.version }}/configure-logs.md %}#fluentd-logging-format). +The above configuration overrides the [default logging configuration]({% link {{ page.version.version }}/configure-logs.md %}#default-logging-configuration) and saves debug-level logs (the `DEV` [log channel]({% link {{ page.version.version }}/logging-overview.md %}#logging-channels)) to disk for troubleshooting. The ConfigMap `name` must match the `cockroachdb.crdbCluster.loggingConfigMapName` object in the values file used to [deploy the cluster]({% link {{ page.version.version }}/deploy-cockroachdb-with-cockroachdb-operator.md %}#initialize-the-cluster):