diff --git a/charts/sentry/templates/snuba/cleanup/cronjob-clickhouse-cleanup.yaml b/charts/sentry/templates/snuba/cleanup/cronjob-clickhouse-cleanup.yaml new file mode 100644 index 000000000..e8eee7a07 --- /dev/null +++ b/charts/sentry/templates/snuba/cleanup/cronjob-clickhouse-cleanup.yaml @@ -0,0 +1,242 @@ +{{- if .Values.snuba.cleanup.enabled }} +{{- $batchApiIsStable := eq (include "sentry.batch.isStable" .) "true" -}} +apiVersion: {{ include "sentry.batch.apiVersion" . }} +kind: CronJob +metadata: + name: {{ template "sentry.fullname" . }}-clickhouse-cleanup + labels: + app: {{ template "sentry.fullname" . }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}" + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +spec: + schedule: "{{ .Values.snuba.cleanup.schedule }}" + successfulJobsHistoryLimit: {{ .Values.snuba.cleanup.successfulJobsHistoryLimit }} + failedJobsHistoryLimit: {{ .Values.snuba.cleanup.failedJobsHistoryLimit }} + concurrencyPolicy: "{{ .Values.snuba.cleanup.concurrencyPolicy }}" + jobTemplate: + spec: + {{- if .Values.snuba.cleanup.activeDeadlineSeconds }} + activeDeadlineSeconds: {{ .Values.snuba.cleanup.activeDeadlineSeconds }} + {{- end}} + template: + metadata: + annotations: + checksum/configYml: {{ .Values.config.configYml | toYaml | toString | sha256sum }} + checksum/config.yaml: {{ include "snuba.config" . | sha256sum }} + {{- if .Values.snuba.cleanup.annotations }} +{{ toYaml .Values.snuba.cleanup.annotations | indent 12 }} + {{- end }} + labels: + app: {{ template "sentry.fullname" . }} + release: "{{ .Release.Name }}" + {{- if .Values.snuba.cleanup.podLabels }} +{{ toYaml .Values.snuba.cleanup.podLabels | indent 12 }} + {{- end }} + spec: + {{- if .Values.snuba.cleanup.affinity }} + affinity: +{{ toYaml .Values.snuba.cleanup.affinity | indent 12 }} + {{- end }} + {{- if .Values.snuba.cleanup.nodeSelector }} + nodeSelector: +{{ toYaml .Values.snuba.cleanup.nodeSelector | indent 12 }} + {{- else if .Values.global.nodeSelector }} + nodeSelector: +{{ toYaml .Values.global.nodeSelector | indent 12 }} + {{- end }} + {{- if .Values.snuba.cleanup.tolerations }} + tolerations: +{{ toYaml .Values.snuba.cleanup.tolerations | indent 12 }} + {{- else if .Values.global.tolerations }} + tolerations: +{{ toYaml .Values.global.tolerations | indent 12 }} + {{- end }} + {{- if .Values.dnsPolicy }} + dnsPolicy: {{ .Values.dnsPolicy | quote }} + {{- end }} + {{- if .Values.dnsConfig }} + dnsConfig: +{{ toYaml .Values.dnsConfig | indent 12 }} + {{- end }} + {{- if .Values.images.snuba.imagePullSecrets }} + imagePullSecrets: +{{ toYaml .Values.images.snuba.imagePullSecrets | indent 12 }} + {{- end }} + {{- if .Values.snuba.cleanup.securityContext }} + securityContext: +{{ toYaml .Values.snuba.cleanup.securityContext | indent 12 }} + {{- end }} + containers: + - name: {{ .Chart.Name }}-clickhouse-cleanup + image: clickhouse/clickhouse-client:latest + imagePullPolicy: IfNotPresent + command: ["/bin/bash"] + args: + - "-c" + - | + set -e + echo "Starting ClickHouse cleanup with retention period of {{ .Values.snuba.cleanup.retentionDays }} days" + echo "Connecting to ClickHouse at $CLICKHOUSE_HOST:$CLICKHOUSE_PORT" + + # Function to discover tables + discover_tables() { + echo "Auto-detecting tables in ClickHouse database..." + + # Get all tables from the database + ALL_TABLES=$(clickhouse-client \ + --host="$CLICKHOUSE_HOST" \ + --port="$CLICKHOUSE_PORT" \ + --user="$CLICKHOUSE_USER" \ + --database="$CLICKHOUSE_DATABASE" \ + --password="$CLICKHOUSE_PASSWORD" \ + --query="SHOW TABLES" 2>/dev/null || echo "") + + if [ -z "$ALL_TABLES" ]; then + echo "Warning: Could not retrieve table list, falling back to hardcoded list" + return 1 + fi + + echo "Found tables: $ALL_TABLES" + + # Filter tables based on include/exclude patterns + DISCOVERED_TABLES=() + {{- range .Values.snuba.cleanup.tables.includePatterns }} + INCLUDE_PATTERN="{{ . }}" + {{- end }} + {{- range .Values.snuba.cleanup.tables.excludePatterns }} + EXCLUDE_PATTERN="{{ . }}" + {{- end }} + + for table in $ALL_TABLES; do + # Check include patterns + INCLUDE_MATCH=false + {{- range .Values.snuba.cleanup.tables.includePatterns }} + if echo "$table" | grep -E "{{ . }}" >/dev/null 2>&1; then + INCLUDE_MATCH=true + fi + {{- end }} + + # Check exclude patterns + EXCLUDE_MATCH=false + {{- range .Values.snuba.cleanup.tables.excludePatterns }} + if echo "$table" | grep -E "{{ . }}" >/dev/null 2>&1; then + EXCLUDE_MATCH=true + fi + {{- end }} + + # Add to discovered tables if it matches include and doesn't match exclude + if [ "$INCLUDE_MATCH" = true ] && [ "$EXCLUDE_MATCH" = false ]; then + DISCOVERED_TABLES+=("$table") + echo "Including table: $table" + else + echo "Excluding table: $table (include=$INCLUDE_MATCH, exclude=$EXCLUDE_MATCH)" + fi + done + + if [ ${#DISCOVERED_TABLES[@]} -eq 0 ]; then + echo "Warning: No tables matched the include/exclude patterns, falling back to hardcoded list" + return 1 + fi + + # Export discovered tables for cleanup + printf '%s\n' "${DISCOVERED_TABLES[@]}" + return 0 + } + + # Determine which tables to clean up + {{- if .Values.snuba.cleanup.tables.autoDetect }} + echo "Table auto-detection enabled" + if TABLES_TO_CLEANUP=$(discover_tables); then + readarray -t TABLES <<< "$TABLES_TO_CLEANUP" + echo "Using auto-detected tables: ${TABLES[*]}" + else + echo "Auto-detection failed, using fallback list" + TABLES=( + {{- range .Values.snuba.cleanup.tables.fallbackList }} + "{{ . }}" + {{- end }} + ) + fi + {{- else }} + echo "Table auto-detection disabled, using configured fallback list" + TABLES=( + {{- range .Values.snuba.cleanup.tables.fallbackList }} + "{{ . }}" + {{- end }} + ) + {{- end }} + + echo "Tables to cleanup: ${TABLES[*]}" + + # Execute cleanup for each table + CLEANED_COUNT=0 + FAILED_COUNT=0 + + for table in "${TABLES[@]}"; do + if [ -n "$table" ]; then + echo "Cleaning up table: $table" + if clickhouse-client \ + --host="$CLICKHOUSE_HOST" \ + --port="$CLICKHOUSE_PORT" \ + --user="$CLICKHOUSE_USER" \ + --database="$CLICKHOUSE_DATABASE" \ + --password="$CLICKHOUSE_PASSWORD" \ + --query="DELETE FROM $table WHERE timestamp < now() - INTERVAL {{ .Values.snuba.cleanup.retentionDays }} DAY"; then + echo "Successfully cleaned up table: $table" + CLEANED_COUNT=$((CLEANED_COUNT + 1)) + else + echo "Warning: Failed to cleanup table $table (table may not exist or have timestamp column)" + FAILED_COUNT=$((FAILED_COUNT + 1)) + fi + fi + done + + echo "ClickHouse cleanup completed: $CLEANED_COUNT tables cleaned, $FAILED_COUNT failed" + env: + - name: CLICKHOUSE_HOST + value: {{ include "sentry.clickhouse.host" . | quote }} + - name: CLICKHOUSE_PORT + value: {{ include "sentry.clickhouse.port" . | quote }} + - name: CLICKHOUSE_USER + value: "default" + - name: CLICKHOUSE_DATABASE + value: "default" + - name: CLICKHOUSE_PASSWORD + valueFrom: + secretKeyRef: + name: {{ template "sentry.fullname" . }}-clickhouse + key: clickhouse-password + optional: true +{{- if .Values.snuba.cleanup.env }} +{{ toYaml .Values.snuba.cleanup.env | indent 12 }} +{{- end }} + resources: +{{ toYaml .Values.snuba.cleanup.resources | indent 14 }} +{{- if .Values.snuba.cleanup.containerSecurityContext }} + securityContext: +{{ toYaml .Values.snuba.cleanup.containerSecurityContext | indent 14 }} +{{- end }} +{{- if .Values.snuba.cleanup.sidecars }} +{{ toYaml .Values.snuba.cleanup.sidecars | indent 10 }} +{{- end }} +{{- if .Values.global.sidecars }} +{{ toYaml .Values.global.sidecars | indent 10 }} +{{- end }} + restartPolicy: Never + {{- if or .Values.snuba.cleanup.volumes .Values.global.volumes }} + volumes: + {{- if .Values.snuba.cleanup.volumes }} +{{ toYaml .Values.snuba.cleanup.volumes | indent 10 }} + {{- end }} + {{- if .Values.global.volumes }} +{{ toYaml .Values.global.volumes | indent 10 }} + {{- end }} + {{- end }} + {{- if .Values.snuba.cleanup.priorityClassName }} + priorityClassName: "{{ .Values.snuba.cleanup.priorityClassName }}" + {{- end }} + {{- if .Values.serviceAccount.enabled }} + serviceAccountName: {{ .Values.serviceAccount.name }}-clickhouse-cleanup + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/sentry/templates/snuba/cleanup/serviceaccount-clickhouse-cleanup.yaml b/charts/sentry/templates/snuba/cleanup/serviceaccount-clickhouse-cleanup.yaml new file mode 100644 index 000000000..422c6e953 --- /dev/null +++ b/charts/sentry/templates/snuba/cleanup/serviceaccount-clickhouse-cleanup.yaml @@ -0,0 +1,10 @@ +{{- if .Values.serviceAccount.enabled }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ .Values.serviceAccount.name }}-clickhouse-cleanup +{{- if .Values.serviceAccount.annotations }} + annotations: {{ toYaml .Values.serviceAccount.annotations | nindent 4 }} +{{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }} +{{- end }} \ No newline at end of file diff --git a/charts/sentry/values.yaml b/charts/sentry/values.yaml index ad97fdcff..05ab49ec2 100644 --- a/charts/sentry/values.yaml +++ b/charts/sentry/values.yaml @@ -2019,6 +2019,68 @@ snuba: rustConsumer: false + # ClickHouse database cleanup configuration + cleanup: + # -- Enable ClickHouse database cleanup cronjob + enabled: false + # -- Retention period in days for ClickHouse data cleanup + retentionDays: 30 + # -- Cron schedule for cleanup job (daily at 2 AM by default) + schedule: "0 2 * * *" + # -- Number of successful job executions to retain + successfulJobsHistoryLimit: 5 + # -- Number of failed job executions to retain + failedJobsHistoryLimit: 5 + # -- Maximum time in seconds for cleanup job to run + activeDeadlineSeconds: 300 + # -- Concurrency policy for cleanup job + concurrencyPolicy: Forbid + # -- Resource limits and requests for cleanup job + resources: {} + # Table discovery configuration + tables: + # -- Auto-detect tables instead of using a hardcoded list + autoDetect: true + # -- Include patterns for table names (regex patterns) + includePatterns: + - ".*_local$" + - ".*_raw_local$" + # -- Exclude patterns for table names (regex patterns) + excludePatterns: + - "^system\\." + - "^information_schema\\." + - "^INFORMATION_SCHEMA\\." + # -- Fallback hardcoded table list (used if autoDetect fails or is disabled) + fallbackList: + - "discover_local" + - "events_local" + - "events_ro_local" + - "outcomes_raw_local" + - "querylog_local" + - "sessions_raw_local" + - "transactions_local" + - "profiles_raw_local" + - "functions_raw_local" + - "replays_local" + - "generic_metric_sets_raw_local" + - "generic_metric_distributions_raw_local" + - "generic_metric_counters_raw_local" + - "spans_local" + - "group_attributes_raw_local" + - "generic_metric_gauges_raw_local" + - "profile_chunks_raw_local" + # podLabels: {} + # affinity: {} + # nodeSelector: {} + # tolerations: [] + # env: [] + # volumes: [] + # sidecars: [] + # containerSecurityContext: {} + # securityContext: {} + # annotations: {} + # volumeMounts: [] + hooks: enabled: true preUpgrade: false @@ -2174,15 +2236,6 @@ symbolicator: # volumeMounts: [] # sidecars: [] - # TODO The cleanup cronjob is not yet implemented - cleanup: - enabled: false - # podLabels: {} - # affinity: {} - # env: [] - # volumes: [] - # sidecars: [] - auth: register: true