From 9068785d4898a98639255b1da9ff8a48dafb5d71 Mon Sep 17 00:00:00 2001 From: Paulo Motta Date: Sun, 5 Oct 2025 13:28:56 -0300 Subject: [PATCH 01/13] Add ProcessLifecycleProvider --- .circleci/config.yml | 16 +- .gitignore | 4 + build.gradle | 2 + conf/sidecar.yaml | 15 + examples/lifecycle/README.md | 162 +++++++ examples/lifecycle/conf/sidecar.yaml.template | 432 ++++++++++++++++++ examples/lifecycle/setup.sh | 54 +++ examples/{ => sidecar-ccm}/README.md | 2 +- .../{ => sidecar-ccm}/conf/sidecar-ccm.yaml | 0 gradle.properties | 1 + gradle/common/integrationTestTask.gradle | 17 + integration-tests/build.gradle | 1 + ...ocessLifecycleProviderIntegrationTest.java | 198 ++++++++ .../sidecar/utils/TestFileUtils.java | 168 +++++++ .../resources/config/sidecar.yaml.template | 416 +++++++++++++++++ scripts/build-cassandra-tarball.sh | 30 ++ scripts/build-dtest-jars.sh | 28 +- .../cluster/instance/InstanceMetadata.java | 15 + .../instance/InstanceMetadataImpl.java | 38 ++ .../sidecar/config/InstanceConfiguration.java | 6 + .../yaml/InstanceConfigurationImpl.java | 16 +- .../sidecar/lifecycle/LifecycleManager.java | 4 +- .../lifecycle/ProcessLifecycleProvider.java | 210 ++++++++- .../ProcessRuntimeConfiguration.java | 237 ++++++++++ .../sidecar/modules/ConfigurationModule.java | 1 + .../ProcessLifecycleProviderTest.java | 320 +++++++++++++ .../ProcessRuntimeConfigurationTest.java | 310 +++++++++++++ 27 files changed, 2681 insertions(+), 22 deletions(-) create mode 100644 examples/lifecycle/README.md create mode 100644 examples/lifecycle/conf/sidecar.yaml.template create mode 100755 examples/lifecycle/setup.sh rename examples/{ => sidecar-ccm}/README.md (98%) rename examples/{ => sidecar-ccm}/conf/sidecar-ccm.yaml (100%) create mode 100644 integration-tests/src/integrationTest/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderIntegrationTest.java create mode 100644 integration-tests/src/integrationTest/org/apache/cassandra/sidecar/utils/TestFileUtils.java create mode 100644 integration-tests/src/integrationTest/resources/config/sidecar.yaml.template create mode 100755 scripts/build-cassandra-tarball.sh create mode 100644 server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfiguration.java create mode 100644 server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java create mode 100644 server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfigurationTest.java diff --git a/.circleci/config.yml b/.circleci/config.yml index 0b385c480..2589c3101 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -110,6 +110,10 @@ jobs: root: dtest-jars paths: - "*.jar" + - persist_to_workspace: + root: cassandra-tarballs + paths: + - "*.tar.gz" integration_cassandra_40_java11: docker: @@ -150,8 +154,10 @@ jobs: - checkout - attach_workspace: at: dtest-jars + - attach_workspace: + at: cassandra-tarballs - run: ./scripts/install-shaded-dtest-jar-local.sh - - run: ./gradlew --no-daemon -PdtestVersion=4.0.16 -Dcassandra.sidecar.versions_to_test="4.0" integrationTestHeavyWeight --stacktrace + - run: ./gradlew --no-daemon -PdtestVersion=4.0.16 -PtarballVersion=4.0 -Dcassandra.sidecar.versions_to_test="4.0" jar integrationTestHeavyWeight --stacktrace - store_artifacts: path: build/reports @@ -203,8 +209,10 @@ jobs: - checkout - attach_workspace: at: dtest-jars + - attach_workspace: + at: cassandra-tarballs - run: ./scripts/install-shaded-dtest-jar-local.sh - - run: ./gradlew --no-daemon -PdtestVersion=5.0.3 -Dcassandra.sidecar.versions_to_test="5.0" integrationTestHeavyWeight --stacktrace + - run: ./gradlew --no-daemon -PdtestVersion=5.0.3 -PtarballVersion=5.0 -Dcassandra.sidecar.versions_to_test="5.0" jar integrationTestHeavyWeight --stacktrace - store_artifacts: path: build/reports @@ -256,8 +264,10 @@ jobs: - checkout - attach_workspace: at: dtest-jars + - attach_workspace: + at: cassandra-tarballs - run: ./scripts/install-shaded-dtest-jar-local.sh - - run: ./gradlew --no-daemon -PdtestVersion=5.1 -Dcassandra.sidecar.versions_to_test="5.1" integrationTestHeavyWeight --stacktrace + - run: ./gradlew --no-daemon -PdtestVersion=5.1 -PtarballVersion=5.1 -Dcassandra.sidecar.versions_to_test="5.1" jar integrationTestHeavyWeight --stacktrace - store_artifacts: path: build/reports diff --git a/.gitignore b/.gitignore index 65419151d..8bdfcd613 100644 --- a/.gitignore +++ b/.gitignore @@ -97,6 +97,10 @@ lib .gradle dtest-jars +cassandra-tarballs scripts/dependency-reduced-pom.xml default-stylesheet.xsl + +# Examples +examples/lifecycle/nodes/ diff --git a/build.gradle b/build.gradle index 2f4d36679..578bf6825 100644 --- a/build.gradle +++ b/build.gradle @@ -90,6 +90,7 @@ ext { } dependencyLocation = propertyWithDefault("CASSANDRA_DEP_DIR", "${rootDir}/dtest-jars") + "/" + tarballsDir = propertyWithDefault("CASSANDRA_TARBALL_DIR", "${rootDir}/cassandra-tarballs") + "/" // This allows simplified builds and local maven installs. forceSigning = propertyExists("forceSigning") @@ -226,6 +227,7 @@ distributions { exclude '**.idea/**' exclude 'bin/**' exclude 'dtest-jars/**' + exclude 'cassandra-tarballs/**' exclude '**lib/**' exclude '**logs/**' exclude '**/build/**' diff --git a/conf/sidecar.yaml b/conf/sidecar.yaml index 57be2e787..d0652a983 100644 --- a/conf/sidecar.yaml +++ b/conf/sidecar.yaml @@ -66,6 +66,9 @@ cassandra_instances: jmx_ssl_enabled: false # jmx_role: # jmx_role_password: + lifecycle_options: + cassandra_conf_dir: /etc/cassandra +# cassandra_log_dir: /var/log/cassandra sidecar: host: 0.0.0.0 @@ -455,3 +458,15 @@ live_migration: migration_map: # Map of source and destination Cassandra instances # localhost1: localhost4 # This entry says that localhost1 will be migrated to localhost4 max_concurrent_downloads: 20 # Maximum number of concurrent downloads allowed + +# Configuration to allow sidecar start and stop Cassandra instances via the lifecycle API (disabled by default) +lifecycle: + enabled: false + provider: + class_name: org.apache.cassandra.sidecar.lifecycle.ProcessLifecycleProvider + parameters: + state_dir: /var/lib/cassandra-sidecar/lifecycle # The directory where the process state is stored + cassandra_home: /opt/cassandra # The default Cassandra installation directory +# cassandra.ring_delay_ms: 30000 # It is possible to override Cassandra JVM options via the "cassandra." prefix +# env.MAX_HEAP_SIZE: 8G # It is possible to set environment variables used to start Cassandra via the "env" prefix +# env.HEAP_NEWSIZE: 8G diff --git a/examples/lifecycle/README.md b/examples/lifecycle/README.md new file mode 100644 index 000000000..24965b347 --- /dev/null +++ b/examples/lifecycle/README.md @@ -0,0 +1,162 @@ + + +# Starting and stopping Cassandra via lifecycle APIs + +In this guide we will show how to start and stop a local Cassandra instance via sidecar lifecycle APIs. + +## Pre-requirements + +- Configuring Cassandra +- Java 11 + +## Installing and configuring Cassandra + +Use the setup.sh script to install and configure Cassandra and sidecar. + +```shell +$ ./setup.sh +``` + +Once executed, the script should create the following directory structure, simulating a Cassandra host install: + +```shell +$ls -l nodes/localhost/* +nodes/localhost/etc: +total 4 +drwxr-xr-x 3 paulo paulo 4096 Aug 29 16:58 cassandra + +nodes/localhost/opt: +total 4 +drwxr-xr-x 8 paulo paulo 4096 Aug 29 16:58 apache-cassandra-4.1.9 + +nodes/localhost/tmp: +total 50672 +-rw-r--r-- 1 paulo paulo 51883388 May 16 08:09 apache-cassandra-4.1.9-bin.tar.gz + +nodes/localhost/var: +total 8 +drwxr-xr-x 3 paulo paulo 4096 Aug 29 16:58 lib +drwxr-xr-x 3 paulo paulo 4096 Aug 29 16:58 log +``` + +## Starting sidecar + +We can now start our Sidecar instance. The `setup.sh` has already configured `sidecar.yaml` configuration with the correct locations. Start sidecar with: + +```shell +./gradlew run -Dsidecar.config=file:///$PWD/examples/lifecycle/conf/sidecar.yaml +``` + +Since Cassandra is not started yet, you should see the following sidecar logs indicating it's not able to reach Cassandra via JMX: + +``` +INFO [sidecar-internal-worker-pool-1] 2025-08-29 17:10:33,441 JmxClient.java:197 - Could not connect to JMX on service:jmx:rmi://127.0.0.1:7199/jndi/rmi://127.0.0.1:7199/jmxrmi after 1 attempts. Will retry. +``` + +## Checking Cassandra lifecycle state via lifecycle API + +Use the following command to check that the Cassandra instance is not running and CQL is not up: + +```shell +# Check lifecycle state +$curl localhost:9043/api/v1/cassandra/lifecycle +{"current_state":"STOPPED","desired_state":"UNKNOWN","status":"UNDEFINED","last_update":"No lifecycle task submitted for this instance yet."} + +# Check CQL State +$curl localhost:9043/api/v1/cassandra/native/__health +{"status":"NOT_OK"} +``` + +## Starting Cassandra via sidecar + +Now let's try to start Cassandra: + +```shell + $curl -XPUT http://localhost:9043/api/v1/cassandra/lifecycle -d'{"state": "start"}' +{"current_state":"STOPPED","desired_state":"RUNNING","status":"CONVERGING","last_update":"Submitting start task for instance"} +``` + +If you see an error during this step, check the logs at `examples/lifecycle/nodes/localhost/var/lib/cassandra-sidecar/lifecycle/cassandra-localhost.out` (and corresponding `cassandra-localhost.err` file). + +Query the lifecycle status until the instance is started: +```shell +$ curl localhost:9043/api/v1/cassandra/lifecycle +{"current_state":"RUNNING","desired_state":"RUNNING","status":"CONVERGED","last_update":"Instance has started"} +``` + +Query the CQL status until it's started. This might take some time since as the Cassandra process initializes. + +```shell +curl localhost:9043/api/v1/cassandra/native/__health +{"status":"OK"} +``` +You should see the following in the sidecar logs, indicating the Cassandra instance is started, and it's able to connect to it via CQL and JMX: + +```shell +INFO [sidecar-internal-worker-pool-2] 2025-08-29 17:32:22,504 ProcessLifecycleProvider.java:118 - Starting Cassandra instance localhost with command: [/tmp/examples/lifecycle/nodes/localhost/opt/apache-cassandra-4.1.9/bin/cassandra, -p, /tmp/examples/lifecycle/nodes/localhost/var/lib/cassandra-sidecar/lifecycle/cassandra-localhost.pid, -Dcassandra.ring_delay_ms=5000, -D, cassandra.storagedir=/tmp/examples/lifecycle/nodes/localhost/var/lib/cassandra] +INFO [vert.x-eventloop-thread-2] 2025-08-29 17:32:22,520 ?:? - 0:0:0:0:0:0:0:1 - - [Fri, 29 Aug 2025 21:32:22 GMT] "PUT /api/v1/cassandra/lifecycle HTTP/1.1" 202 126 "-" "curl/8.10.1" +INFO [sidecar-internal-worker-pool-2] 2025-08-29 17:32:25,365 ProcessLifecycleProvider.java:124 - Started Cassandra instance localhost with PID 882 +INFO [sidecar-internal-worker-pool-2] 2025-08-29 17:32:48,745 JmxClient.java:215 - Connected to JMX server at service:jmx:rmi://127.0.0.1:7199/jndi/rmi://127.0.0.1:7199/jmxrmi after 1 attempt(s) +INFO [sidecar-internal-worker-pool-2] 2025-08-29 17:32:48,757 CassandraAdapterDelegate.java:225 - Cassandra version change detected (from=null to=4.1.9) for cassandraInstanceId=1. New adapter loaded=CassandraAdapter@694c957d +INFO [sidecar-internal-worker-pool-2] 2025-08-29 17:32:48,758 CassandraAdapterDelegate.java:520 - JMX connected to cassandraInstanceId=1 +INFO [sidecar-internal-worker-pool-2] 2025-08-29 17:32:48,758 CQLSessionProviderImpl.java:186 - Connecting to cluster using contact points [/127.0.0.1:9042] +INFO [sidecar-internal-worker-pool-2] 2025-08-29 17:32:48,931 CQLSessionProviderImpl.java:225 - Successfully connected to Cassandra! +INFO [sidecar-internal-worker-pool-2] 2025-08-29 17:32:48,948 CassandraAdapterDelegate.java:529 - CQL connected to cassandraInstanceId=1 +INFO [vert.x-eventloop-thread-0] 2025-08-29 17:32:48,951 Server.java:329 - CQL is ready for all Cassandra instances. [1] +``` + +Check that the Cassandra process ID matches the PID in the lifecycle process ID file: +```shell +$ ps aux | grep CassandraDaemon | grep -v grep | awk '{ print $2 }' +8821 +$ cat nodes/localhost/var/lib/cassandra-sidecar/lifecycle/cassandra-localhost.pid +8821 +``` + +At this stage, you may explore the cassandra logs at `examples/lifecycle/nodes/localhost/var/logs/cassandra/system.log` or cassandra startup logs at `examples/lifecycle/nodes/localhost/var/lib/cassandra-sidecar/cassandra-localhost.out`. + +## Stopping Cassandra via sidecar + +Stop Cassandra via sidecar with the following command: +```shell +curl -XPUT http://localhost:9043/api/v1/cassandra/lifecycle -d'{"state": "stop"}' +{"current_state":"RUNNING","desired_state":"STOPPED","status":"CONVERGING","last_update":"Submitting stop task for instance"} +``` + +If you see an error during this step, check the logs at `examples/lifecycle/nodes/localhost/var/lib/cassandra-sidecar/lifecycle/cassandra-localhost.out` (and corresponding `cassandra-localhost.err` file). + +Query the lifecycle status until the process is stopped: +```shell +$ curl http://localhost:9043/api/v1/cassandra/lifecycle +{"current_state":"STOPPED","desired_state":"STOPPED","status":"CONVERGED","last_update":"Instance has stopped"} +``` + +You should see the following in the sidecar logs, indicating the Cassandra instance is successfully stopped. + +```shell +INFO [sidecar-internal-worker-pool-11] 2025-08-29 18:03:05,957 ProcessLifecycleProvider.java:147 - Stopping Cassandra instance localhost with command: [/tmp/examples/lifecycle/nodes/localhost/opt/apache-cassandra-4.1.9/bin/stop-server, -p, /tmp/examples/lifecycle/nodes/localhost/var/lib/cassandra-sidecar/lifecycle/cassandra-localhost.pid] +INFO [vert.x-eventloop-thread-2] 2025-08-29 18:03:05,958 ?:? - 0:0:0:0:0:0:0:1 - - [Fri, 29 Aug 2025 22:03:05 GMT] "PUT /api/v1/cassandra/lifecycle HTTP/1.1" 202 125 "-" "curl/8.10.1" +INFO [sidecar-internal-worker-pool-11] 2025-08-29 18:03:05,960 ProcessLifecycleProvider.java:185 - Waiting for Cassandra instance localhost with PID 15652 to stop... +INFO [cluster11-worker-0] 2025-08-29 18:03:05,969 CassandraAdapterDelegate.java:540 - CQL disconnection from cassandraInstanceId=1 +INFO [sidecar-internal-worker-pool-11] 2025-08-29 18:03:10,961 ProcessLifecycleProvider.java:159 - Stopped Cassandra instance localhost with PID 15652. +INFO [sidecar-internal-worker-pool-19] 2025-08-29 18:06:30,985 CassandraAdapterDelegate.java:556 - JMX disconnection from cassandraInstanceId=1 +``` + diff --git a/examples/lifecycle/conf/sidecar.yaml.template b/examples/lifecycle/conf/sidecar.yaml.template new file mode 100644 index 000000000..b4507602f --- /dev/null +++ b/examples/lifecycle/conf/sidecar.yaml.template @@ -0,0 +1,432 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Cassandra Sidecar configuration file +# +cassandra_instances: + - id: 1 + host: localhost + port: 9042 + + # The instance's storage directory as defined per the cassandra.storagedir property + # which defaults to the $CASSANDRA_HOME/data directory, but can be configured to any + # directory. By default, storage directory is the parent directory of data dirs, + # commitlog dir, cdc dir, hints dir and saved caches dir. If data_dirs, commitlog_dir, + # cdc_dir, hints_dir and saved_caches_dir are configured explicitly, then they will be used. + # Otherwise, default paths based on storage directory will be used. + storage_dir: $baseDir/var/lib/cassandra + + # List of directories where Cassandra stores data on disk. If not set, the default directory + # is /data + #data_dirs: + # - /var/lib/cassandra/data + + # A temporary directory used to stage sstables before importing them into Cassandra. + staging_dir: /var/lib/cassandra/sstable-staging + + # Directory where Cassandra stores mutations. If not set, the default directory is + # /cdc_raw. + #cdc_dir: /var/lib/cassandra/cdc_raw + + # Directory where Cassandra stores the commit logs. If not set, the default directory + # is /commitlog + #commitlog_dir: /var/lib/cassandra/commitlog + + # Directory where Cassandra stores hints. If not set, the default directory is + # /hints. + #hints_dir: /var/lib/cassandra/hints + + # Directory where Cassandra stores the saved caches. If not set, the default directory + # is /saved_caches + #saved_caches_dir: /var/lib/cassandra/saved_caches + + # Directory where Cassandra stores the data of the local system keyspaces. + # By default, Cassandra stores the data of the local system keyspaces in the + # first of the data directories specified by data_file_directories. + # local_system_data_file_dir: + + jmx_host: 127.0.0.1 + jmx_port: 7199 + jmx_ssl_enabled: false +# jmx_role: +# jmx_role_password: + lifecycle_options: + cassandra_conf_dir: $baseDir/etc/cassandra + cassandra_log_dir: $baseDir/var/log/cassandra + +sidecar: + host: 0.0.0.0 + port: 9043 + request_idle_timeout: 5m + request_timeout: 5m + tcp_keep_alive: false + accept_backlog: 1024 + server_verticle_instances: 1 + # DnsResolver to use (org.apache.cassandra.sidecar.common.server.dns.DnsResolver). Currently supported + # values are 'default' and 'resolve_to_ip'. Default will resolve hostnames to addresses and addresses to + # hostnames whereas resolve_to_ip will only resolve hostnames to addresses. + dns_resolver: default + throttle: + stream_requests_per_sec: 5000 + timeout: 10s + traffic_shaping: + inbound_global_bandwidth_bps: 0 # 0 implies unthrottled, the inbound bandwidth in bytes per second + outbound_global_bandwidth_bps: 0 # 0 implies unthrottled, the outbound bandwidth in bytes per second + peak_outbound_global_bandwidth_bps: 419430400 # the peak outbound bandwidth in bytes per second. The default is 400 mebibytes per second + max_delay_to_wait: 15s # 15 seconds + check_interval_for_stats: 1s # 1 second + inbound_global_file_bandwidth_bps: 0 # 0 implies unthrottled, the inbound bandwidth allocated for incoming files in bytes per second, upper-bounded by inbound_global_bandwidth_bps + sstable_upload: + concurrent_upload_limit: 80 + min_free_space_percent: 10 + # file_permissions: "rw-r--r--" # when not specified, the default file permissions are owner read & write, group & others read + # The maximum allowable time skew between the server and the client. + # Resolution is in minutes. The minimum configurable value is 1 minute. + allowable_time_skew: 1h + sstable_import: + execute_interval: 100ms + cache: + expire_after_access: 2h # 2 hours + maximum_size: 10000 + sstable_snapshot: + snapshot_list_cache: + expire_after_access: 2h # 2 hours + maximum_size: 10000 + cdc: + segment_hardlink_cache_expiry: 5m # 5 minutes + worker_pools: + service: + name: "sidecar-worker-pool" + size: 20 + max_execution_time: 1m # 1 minute + internal: + name: "sidecar-internal-worker-pool" + size: 20 + max_execution_time: 15m # 15 minutes + jmx: + max_retries: 3 + retry_delay: 200ms + schema: + is_enabled: false + keyspace: sidecar_internal + replication_strategy: SimpleStrategy + replication_factor: 1 + # The TTL in seconds used to insert entries into the sidecar_lease schema + lease_schema_ttl: 5m + coordination: + # Captures configuration parameters for the task that performs the cluster lease claim process + cluster_lease_claim: + # The name of the strategy used to determine the electorate membership (defaults to MostReplicatedKeyspaceTokenZeroElectorateMembership) + # Out of the box Sidecar provides the MostReplicatedKeyspaceTokenZeroElectorateMembership, and + # SidecarInternalTokenZeroElectorateMembership implementations. + # - MostReplicatedKeyspaceTokenZeroElectorateMembership the current Sidecar will be determined to be part + # of the electorate iff one of the Cassandra instances it + # manages owns token 0 for the user keyspace that has the + # highest replication factor. If multiple keyspaces have + # the highest replication factor, the keyspace to be used + # is decided by the keyspace with the name that sorts + # first in the lexicographic sort order. If no user + # keyspaces are created, the internal sidecar keyspace will + # be used. + # - SidecarInternalTokenZeroElectorateMembership the current Sidecar will be determined to be part of the + # electorate iff one of the Cassandra instances it manages + # owns token {@code 0} for the {@code sidecar_internal} + # keyspace. + electorate_membership_strategy: MostReplicatedKeyspaceTokenZeroElectorateMembership + # Whether the process is enabled + enabled: true + # The initial delay for the first execution of the cluster lease claim process task after being + # scheduled or rescheduled. + # The minimum value for the initial delay is 0ms. + initial_delay: 1s + # A random delta value to add jitter to the initial delay for the first execution of the cluster + # lease claim process. The actual initial delay for the task will be a millisecond value of the + # initial_delay + RANDOM(initial_delay_random_delta) configuration. + # The minimum value for the initial delay random delta is 0ms, which in practice disables the jitter. + initial_delay_random_delta: 30s + # How often the cluster lease claim process task will execute after the previous task has completed + # the execution. + # The minimum value allowed for the cluster lease claim process task implementation is 30 seconds + execute_interval: 100s + +# Exposes vertx configurations +vertx: + # Configuration options for the vertx filesystem + # These options will affect the behavior when serving static resources from the + # classpath. For example, setting classpath_resolving_enabled to false will cause + # the static file resolution to only look for files in the filesystem and avoid + # resolving resources from the classpath. + filesystem_options: + # When vert.x cannot find the file on the filesystem it tries to resolve the + # file from the classpath when this is set to true. Otherwise, vert.x + # will not attempt to resolve the file on the classpath + classpath_resolving_enabled: true + # Set to true to enable caching files on the real file system + # when the filesystem performs class path resolving. Set to false to + # disable caching. + file_caching_enabled: true + # When vert.x reads a file that is packaged with the application it gets + # extracted to this directory first and subsequent reads will use the extracted + # file to get better IO performance. + #file_cache_dir: /tmp + +schema_reporting: # Schema Reporting configuration + enabled: false # Disabled by default + initial_delay: 6h # Maximum delay before the first schema report (actual delay is randomized) + execute_interval: 12h # Exact interval between two sequential schema reports + endpoint: http://localhost/schema # Endpoint address for schema reporting + method: PUT # HTTP verb to use for schema reporting + max_retries: 3 # Number of times a failing schema report is retried + retry_delay: 1m # Delay before a failing schema report is retried + +# +# Enable SSL configuration (Disabled by default) +# +#ssl: +# enabled: true +# use_openssl: true +# handshake_timeout: 10s +# client_auth: NONE # valid options are NONE, REQUEST, REQUIRED +# accepted_protocols: +# - TLSv1.2 +# - TLSv1.3 +# cipher_suites: [] +# keystore: +# type: PKCS12 +# path: "path/to/keystore.p12" +# password: password +# check_interval: 5m +# truststore: +# path: "path/to/truststore.p12" +# password: password + +# To enable access control, the role used by sidecar to talk to Cassandra must be granted following permissions +# - GRANT DESCRIBE ON ALL ROLES to - Allows sidecar to list roles that belong to a user +# - GRANT SELECT ON KEYSPACE system_auth to - Allows sidecar to read from roles and +# role_permissions table +access_control: + # When enabled requests need to be authenticated and authorized before servicing. + enabled: false + # Supports setting multiple authenticators, request is authenticated when the first authenticator allows the request + # to go through. + # Out of the box, Cassandra Sidecar provides following authenticator provider factories + # org.apache.cassandra.sidecar.acl.authentication.{MutualTlsAuthenticationHandlerFactory, JwtAuthenticationHandlerFactory}. + # - MutualTlsAuthenticationHandlerFactory allows authenticating based on user certificates + # - JwtAuthenticationHandlerFactory allows authenticating with user JWT tokens + authenticators: + - class_name: org.apache.cassandra.sidecar.acl.authentication.MutualTlsAuthenticationHandlerFactory + parameters: + # Certificate validator is used to validate details within a certificate, such as issuer organization, + # issuer country, CNs, certificate expiry etc. + # + # io.vertx.ext.auth.mtls.impl.AllowAllCertificateValidator performs no checks, it marks all certificates as valid. + # other options are, io.vertx.ext.auth.mtls.impl.CertificateValidatorImpl + certificate_validator: io.vertx.ext.auth.mtls.impl.AllowAllCertificateValidator + # CertificateIdentityExtractor is used to extract valid identities from certificates. These identities will be + # used for authorizing users. + # + # org.apache.cassandra.sidecar.acl.authentication.CassandraIdentityExtractor verifies that identity + # extracted from certificate is present in identity_to_role table. Identities in identity_to_role table in + # Cassandra are authenticated identities in Cassandra. + # + # other options are, io.vertx.ext.auth.mtls.impl.SpiffeIdentityExtractor. + certificate_identity_extractor: org.apache.cassandra.sidecar.acl.authentication.CassandraIdentityExtractor + # JwtAuthenticationHandlerFactory adds support to authenticate users with their JWT tokens. It also includes + # supports for OpenID discovery. + - class_name: org.apache.cassandra.sidecar.acl.authentication.JwtAuthenticationHandlerFactory + parameters: + # To selectively enable or disable JWT authentication + enabled: false + # Site for sidecar to dynamically retrieve the configuration information of an OpenID provider, without + # having to manually configure settings like issuer etc. + site: https://authorization.com + # Client Id is a unique identifier assigned by OpenID provider. It is used to identity applications/users + # trying to connect. + client_id: recognized_client_id + config_discover_interval: 1h + authorizer: + # Authorization backend, implementing io.vertx.ext.auth.authorization.AuthorizationProvider; used to + # provide permissions a user holds. + # Out of the box, Cassandra Sidecar provides + # org.apache.cassandra.sidecar.acl.authorization.{AllowAllAuthorizationProvider, RoleBasedAuthorizationProvider}. + # + # - AllowAllAuthorizationProvider allows any action to any user - use it to disable authorization. + # - RoleBasedAuthorizationProvider validates role associated with authenticated user has permission + # for resource it accesses. + - class_name: org.apache.cassandra.sidecar.acl.authorization.AllowAllAuthorizationProvider + # Identities that are authenticated and authorized. + admin_identities: +# - spiffe://authorized/admin/identities + permission_cache: + enabled: true + expire_after_access: 5m + maximum_size: 1000 + warmup_retries: 5 + warmup_retry_interval: 2s + +driver_parameters: + contact_points: + - "127.0.0.1:9042" + username: cassandra + password: cassandra + ssl: + enabled: false + keystore: + type: PKCS12 + path: path/to/keystore.p12 + password: password + truststore: + type: PKCS12 + path: path/to/keystore.p12 + password: password + num_connections: 6 +# local_dc: datacenter1 + +healthcheck: + initial_delay: 0ms + execute_interval: 30s + +# Sidecar Peer Health Monitor settings +# Enables a periodic task checking for the health of adjacent Sidecar peers in the token ring +sidecar_peer_health: + # Determines if the peer health monitor periodic task is enabled or not + enabled: false + # Time between peer health checks + execute_interval: 30s + # The amount of retries the client will attempt a request + max_retries: 5 + # The initial delay between the retries the client will attempt a request + retry_delay: 10s + +# Sidecar client settings used to interact with other sidecars +sidecar_client: + request_timeout: 30s # Time in which a request made by the sidecar client will time out + request_idle_timeout: 30s # How long the request can be idle + + # More advanced options to configure the sidecar client + + #connection_pool_max_size: 10 # Max size of the client connection pool + #connection_pool_clearing_period: 5s # Period of time for the connection pool to clear + #connection_pool_event_loop_size: 0 # Defines the size of the event loop pool, set to 0 to reuse current event-loop + #connection_pool_max_wait_queue_size: -1 # Connection pool max time to wait on the queue size, set to -1 for an unbounded queue + #max_retries: 5 # The amount of retries the client will attempt a request + #retry_delay: 500ms # The initial delay between the retries the client will attempt a request + #max_retry_delay: 10s # The max delay between the retries the client will attempt a request + #ssl: # if ssl is enabled, this is the ssl configuration used for the sidecar client + # enabled: false + # keystore: + # type: PKCS12 # Keystore type + # path: path/to/client/keystore.p12 # Path to the client keystore file + # password: password # Keystore password + # truststore: + # type: PKCS12 # Truststore type + # path: path/to/truststore.p12 # Path to the truststore file + # password: password # Truststore password + +metrics: + registry_name: cassandra_sidecar + vertx: + enabled: true + expose_via_jmx: false + jmx_domain_name: sidecar.vertx.jmx_domain + include: # empty include list means include all + - type: "regex" # possible filter types are "regex" and "equals" + value: "Sidecar.*" + - type: "regex" + value: "vertx.*" + exclude: # empty exclude list means exclude nothing +# - type: "regex" # possible filter types are "regex" and "equals" +# value: "vertx.eventbus.*" # exclude all metrics starts with vertx.eventbus + +cassandra_input_validation: + forbidden_keyspaces: + - system_schema + - system_traces + - system_distributed + - system + - system_auth + - system_views + - system_virtual_schema + - sidecar_internal + allowed_chars_for_directory: "[a-zA-Z][a-zA-Z0-9_]{0,47}" + allowed_chars_for_quoted_name: "[a-zA-Z_0-9]{1,48}" + allowed_chars_for_component_name: "[a-zA-Z0-9_-]+(.db|.cql|.json|.crc32|TOC.txt)" + allowed_chars_for_restricted_component_name: "[a-zA-Z0-9_-]+(.db|TOC.txt)" + +blob_restore: + job_discovery_active_loop_delay: 5m + job_discovery_idle_loop_delay: 10m + job_discovery_recency_days: 5 + slice_process_max_concurrency: 20 + restore_job_tables_ttl: 90d + slow_task_threshold: 10m + slow_task_report_delay: 1m + ring_topology_refresh_delay: 1m + +s3_client: + concurrency: 4 + thread_name_prefix: s3-client + thread_keep_alive: 1m + api_call_timeout: 1m # 1 minute + range_get_object_bytes_size: 5242880 # 5 MiB +# proxy_config: +# uri: +# username: +# password: + +live_migration: + # By default, the directories handled by Cassandra like data_dir, commitlog_dir, hints_dir, saved_caches_dir, + # cdc_dir, and local_system_data_file_dir are included as part of migration. Admin/operator can exclude files + # and directories from migration by specifying them in the `files_to_exclude` list and the `dirs_to_exclude` + # list configurations mentioned below. + # + # 'glob' and 'regex' patterns can be used to specify exclusions. Internally, Java's java.nio.file.PathMatcher + # is used to evaluate these exclusions. Exclusions should be specified in the format "syntax:pattern" expected + # by PathMatcher. There is no need to specify the absolute path of directories handled by Cassandra. + # They can be specified using the supported placeholders listed below. + # + # COMMITLOG_DIR - commit log directory + # HINTS_DIR - hints directory + # SAVED_CACHES_DIR - saved caches directory + # CDC_RAW_DIR - cdc raw directory + # LOCAL_SYSTEM_DATA_FILE_DIR - local system data file data directory + # DATA_FILE_DIR - all data directories + # DATA_FILE_DIR_0 - first data directory + # DATA_FILE_DIR_1 - second data directory + # + # A sample glob pattern to exclude all snapshot folders from Live Migration looks like this: + # glob:${DATA_FILE_DIR}/*/*/snapshots + files_to_exclude: [ ] # List of files to exclude from Live Migration. + dirs_to_exclude: + - glob:${DATA_FILE_DIR}/*/*/snapshots # Excludes snapshot directories in data folder to copy to destination + migration_map: # Map of source and destination Cassandra instances +# localhost: localhost4 # This entry says that localhost will be migrated to localhost4 + max_concurrent_downloads: 20 # Maximum number of concurrent downloads allowed + +# Configuration to allow sidecar start and stop Cassandra instances via the lifecycle API (disabled by default) +lifecycle: + enabled: true + provider: + class_name: org.apache.cassandra.sidecar.lifecycle.ProcessLifecycleProvider + parameters: + state_dir: $baseDir/var/lib/cassandra-sidecar/lifecycle # The directory where the process state is stored + cassandra_home: $cassandraHome # The default Cassandra installation directory + cassandra.ring_delay_ms: 5000 # It is possible to override Cassandra JVM options via the "cassandra." prefix + env.MAX_HEAP_SIZE: 4G # It is possible to set environment variables used to start Cassandra via the "env" prefix + env.HEAP_NEWSIZE: 4G \ No newline at end of file diff --git a/examples/lifecycle/setup.sh b/examples/lifecycle/setup.sh new file mode 100755 index 000000000..dc4df60e3 --- /dev/null +++ b/examples/lifecycle/setup.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +set -eu + +SCRIPT_DIR=$(realpath "$(dirname "$0")") +CASSANDRA_VERSION=4.1.10 +CASSANDRA_DIR="apache-cassandra-${CASSANDRA_VERSION}" +TARBALL_NAME="${CASSANDRA_DIR}-bin.tar.gz" +TARBALL_URL="https://dlcdn.apache.org/cassandra/${CASSANDRA_VERSION}/${TARBALL_NAME}" +NODE_DIR="${SCRIPT_DIR}/nodes/localhost" + +SIDECAR_YAML="${SCRIPT_DIR}/conf/sidecar.yaml" +SIDECAR_YAML_TEMPLATE="${SCRIPT_DIR}/conf/sidecar.yaml.template" +CASSANDRA_HOME="${NODE_DIR}/opt/${CASSANDRA_DIR}" +CASSANDRA_LOG_DIR="${NODE_DIR}/var/log/cassandra" +CASSANDRA_CONF="${NODE_DIR}/etc/cassandra" +CASSANDRA_STORAGE_DIR="${NODE_DIR}/var/lib/cassandra" +SIDECAR_LIFECYCLE_DIR="${NODE_DIR}/var/lib/cassandra-sidecar/lifecycle" +TMP_DIR="${NODE_DIR}/tmp" + +echo "Creating directories" +mkdir -p ${CASSANDRA_HOME} ${CASSANDRA_LOG_DIR} ${CASSANDRA_CONF} ${CASSANDRA_STORAGE_DIR} ${SIDECAR_LIFECYCLE_DIR} ${TMP_DIR} + +if [ -f ${CASSANDRA_HOME}/bin/cassandra ]; then + echo "Cassandra already installed at ${CASSANDRA_HOME}, skipping install" +else + echo "Installing Cassandra at ${CASSANDRA_HOME}" + echo "Downloading ${TARBALL_URL}" + wget -P ${TMP_DIR} ${TARBALL_URL} + + echo "Extracting tarball" + tar -xvzf ${TMP_DIR}/${TARBALL_NAME} -C $(dirname $CASSANDRA_HOME) +fi + +echo "Creating configuration" +cp -r ${CASSANDRA_HOME}/conf/* ${CASSANDRA_CONF} +sed "s#\$cassandraHome#${CASSANDRA_HOME}#g" ${SIDECAR_YAML_TEMPLATE} > ${SIDECAR_YAML} +sed -i "s#\$baseDir#${NODE_DIR}#g" ${SIDECAR_YAML} diff --git a/examples/README.md b/examples/sidecar-ccm/README.md similarity index 98% rename from examples/README.md rename to examples/sidecar-ccm/README.md index d2211575e..14437b981 100644 --- a/examples/README.md +++ b/examples/sidecar-ccm/README.md @@ -79,7 +79,7 @@ We can now start our Sidecar instance. A single Sidecar instance will manage all in our CCM cluster. ```shell -./gradlew run -Dsidecar.config=file:///$PWD/examples/conf/sidecar-ccm.yaml +./gradlew run -Dsidecar.config=file:///$PWD/examples/sidecar-ccm/conf/sidecar-ccm.yaml ``` We should see our Sidecar logs showing a successful connection to the all Cassandra instances diff --git a/examples/conf/sidecar-ccm.yaml b/examples/sidecar-ccm/conf/sidecar-ccm.yaml similarity index 100% rename from examples/conf/sidecar-ccm.yaml rename to examples/sidecar-ccm/conf/sidecar-ccm.yaml diff --git a/gradle.properties b/gradle.properties index 463fcb93f..2d1fc937f 100644 --- a/gradle.properties +++ b/gradle.properties @@ -30,6 +30,7 @@ dtestApiVersion=0.0.17 assertjCoreVersion=3.24.2 # trunk is currently 5.1 - update when trunk moves dtestVersion=5.1 +tarballVersion=5.1 dtestDependencyName=cassandra-dtest-local-all awsSdkVersion=2.26.12 # The dep is to introduce xxhash impl diff --git a/gradle/common/integrationTestTask.gradle b/gradle/common/integrationTestTask.gradle index 39104b8f0..50a18b29b 100644 --- a/gradle/common/integrationTestTask.gradle +++ b/gradle/common/integrationTestTask.gradle @@ -29,6 +29,22 @@ println("Using mTLS for tests? ${integrationEnableMtls}") apply from: "${project.rootDir}/gradle/common/java11Options.gradle" +def getCassandraTarballLocation() { + def tarballsDirFile = file(tarballsDir) + if (tarballsDirFile.exists() && tarballsDirFile.isDirectory()) { + def matchingFiles = fileTree(tarballsDirFile).files.findAll {it.name ==~ /^apache-cassandra-${tarballVersion}\.?\d*-SNAPSHOT-bin\.tar.gz$/ } + if (matchingFiles.size() > 0) { + def tarballToTest = matchingFiles.first() + println("Testing with tarball ${tarballToTest}") + return tarballToTest + } else { + logger.warn("WARN: No tarballs matching version ${tarballVersion} found in the directory: ${tarballsDir}. Use script build-dtest-jars.sh to build it.") + } + } else { + logger.warn("WARN: Tarballs directory does not exist or is not a directory: ${tarballsDir}. Use script build-dtest-jars.sh to build it.") + } +} + ['integrationTestLightWeight', 'integrationTestHeavyWeight'].each { name -> tasks.register(name, Test) { if (JavaVersion.current().isJava11Compatible()) { @@ -37,6 +53,7 @@ apply from: "${project.rootDir}/gradle/common/java11Options.gradle" } systemProperty "cassandra.integration.sidecar.test.enable_mtls", integrationEnableMtls systemProperty "cassandra.test.dtest_jar_path", dependencyLocation + systemProperty "cassandra.test.tarball_path", getCassandraTarballLocation() // Disable direct memory allocator as it doesn't release properly systemProperty "cassandra.netty_use_heap_allocator", "true" systemProperty "vertxweb.environment", "dev" diff --git a/integration-tests/build.gradle b/integration-tests/build.gradle index 07d1c8e7d..c34a7503f 100644 --- a/integration-tests/build.gradle +++ b/integration-tests/build.gradle @@ -72,6 +72,7 @@ dependencies { integrationTestImplementation(group: 'net.java.dev.jna', name: 'jna', version: '5.9.0') integrationTestImplementation(project(path: ":integration-framework")) integrationTestImplementation("com.google.inject:guice:${guiceVersion}") + integrationTestImplementation 'org.apache.commons:commons-compress:1.27.1' testRuntimeOnly 'com.fasterxml.jackson.core:jackson-annotations:2.14.2' integrationTestCompileOnly(group: 'com.intellij', name: 'annotations', version: '12.0') diff --git a/integration-tests/src/integrationTest/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderIntegrationTest.java b/integration-tests/src/integrationTest/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderIntegrationTest.java new file mode 100644 index 000000000..6b24e9537 --- /dev/null +++ b/integration-tests/src/integrationTest/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderIntegrationTest.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.sidecar.lifecycle; + +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeoutException; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.CleanupMode; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.inject.Guice; +import com.google.inject.Injector; +import io.vertx.core.Vertx; +import io.vertx.ext.web.client.WebClient; +import io.vertx.junit5.VertxExtension; +import org.apache.cassandra.sidecar.modules.SidecarModules; +import org.apache.cassandra.sidecar.server.Server; + +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.cassandra.sidecar.utils.TestFileUtils.copyDirectoryRecursively; +import static org.apache.cassandra.sidecar.utils.TestFileUtils.extractGzippedTarball; +import static org.apache.cassandra.sidecar.utils.TestFileUtils.replacePlaceholdersInFileWithPattern; + +/** + * Process lifecycle provider integration test. + */ +@ExtendWith(VertxExtension.class) +@Tag("heavy") +public class ProcessLifecycleProviderIntegrationTest +{ + static final String TEST_NODE = "localhost"; + static final int TIMEOUT_SECONDS = 30; + + protected static final Logger LOG = LoggerFactory.getLogger(ProcessLifecycleProviderIntegrationTest.class); + + static String sidecarDeploymentId; + static Path lifecycleDir; + + static final String TARBALL_PATH = System.getProperty("cassandra.test.tarball_path"); + + @TempDir(cleanup = CleanupMode.ON_SUCCESS) + private static Path tmpDir; + + private static Server server; + private static Vertx vertx; + private static WebClient client; + + private static ProcessRuntimeConfiguration cassandraConfig; + + @BeforeAll + public static void setup() throws IOException, URISyntaxException, ExecutionException, InterruptedException, TimeoutException + { + LOG.info("Created temporary directory for test: {}", tmpDir); + + // Setup Cassandra + Path cassandraInstallDir = tmpDir.resolve("opt"); + Path cassandraConfDir = Files.createDirectories(tmpDir.resolve("etc/cassandra")); + cassandraConfig = installCassandra(cassandraInstallDir, cassandraConfDir); + + // Setup sidecar configuration + lifecycleDir = Files.createDirectories(tmpDir.resolve("var/lib/sidecar/lifecycle")); + Path sidecarYaml = createSidecarYaml(cassandraConfig, lifecycleDir); + LOG.info("Testing with cassandra config at: {} and sidecar yaml at: {}", cassandraConfig.cassandraConf(), sidecarYaml); + configureSidecar(sidecarYaml); + + sidecarDeploymentId = server.start().toCompletionStage().toCompletableFuture().get(TIMEOUT_SECONDS, SECONDS); + } + + @AfterAll + public static void tearDown() throws ExecutionException, InterruptedException, TimeoutException, IOException + { + server.stop(sidecarDeploymentId).toCompletionStage().toCompletableFuture().get(TIMEOUT_SECONDS, SECONDS); + // Make sure server is stopped + forceCassandraStop(); + } + + private static void forceCassandraStop() + { + Path pidFileLocation = Path.of(ProcessLifecycleProvider.getPidFileLocation(lifecycleDir.toString(), TEST_NODE)); + if (!pidFileLocation.toFile().exists()) + { + LOG.info("No PID file exists, Cassandra already stopped."); + return; + } + Long pid = ProcessLifecycleProvider.readPidFromFile(pidFileLocation); + try + { + Optional processHandle = ProcessHandle.of(pid); + if (processHandle.isPresent()) + { + LOG.info("Killing Cassandra process with PID {}", pid); + CompletableFuture terminationFuture = processHandle.get().onExit(); + processHandle.get().destroyForcibly(); + terminationFuture.get(TIMEOUT_SECONDS, SECONDS); + } + } + catch (InterruptedException | ExecutionException | TimeoutException e) + { + LOG.error("Failed to kill Cassandra process with PID {}", pid, e); + throw new RuntimeException("Failed to kill Cassandra process", e); + } + } + + static void configureSidecar(Path sidecarYamlPath) + { + Injector injector = Guice.createInjector(SidecarModules.all(sidecarYamlPath)); + vertx = injector.getInstance(Vertx.class); + server = injector.getInstance(Server.class); + client = WebClient.create(vertx); + } + + @Test + void testProcessLifecycleProviderStartAndStopAndRecoveryAfterCrash() throws Exception + { + LifecycleProviderIntegrationTester tester = new LifecycleProviderIntegrationTester( + client, + TEST_NODE, + server.actualPort(), + ProcessLifecycleProviderIntegrationTest::forceCassandraStop); + + tester.testLifecycleProviderStartAndStopAndRecoveryAfterCrash(); + + } + + private static Path createSidecarYaml(ProcessRuntimeConfiguration cassandraConfig, Path lifecycleDir) throws IOException, URISyntaxException + { + Path sidecarConfDir = Files.createDirectories(tmpDir.resolve("etc/sidecar")); + URL sidecarYamlTemplateUrl = ProcessLifecycleProviderIntegrationTest.class.getResource("/config/sidecar.yaml.template"); + Path sidecarYamlTemplatePath = Paths.get(Objects.requireNonNull(sidecarYamlTemplateUrl).toURI()); + Path sidecarYaml = sidecarConfDir.resolve("sidecar.yaml"); + replacePlaceholdersInFileWithPattern(sidecarYamlTemplatePath, + Map.of("cassandraHome", cassandraConfig.cassandraHome().toString(), + "lifecycleDir", lifecycleDir.toString(), + "cassandraConfDir", cassandraConfig.cassandraConfDir.toString(), + "cassandraStorageDir", Objects.requireNonNull(cassandraConfig.storageDir).toString(), + "cassandraLogDir", cassandraConfig.cassandraLogDir.toString()), + sidecarYaml); + return sidecarYaml; + } + + public static ProcessRuntimeConfiguration installCassandra(Path installDir, Path confDir) throws IOException + { + if (TARBALL_PATH == null || TARBALL_PATH.isEmpty()) + { + throw new IllegalStateException("System property 'cassandra.test.tarball_path' is not set"); + } + Files.createDirectories(installDir); + extractGzippedTarball(Path.of(TARBALL_PATH), installDir); + File[] files = installDir.toFile().listFiles(); + assert files != null && files.length == 1 && files[0].isDirectory() : "Expected a single directory in " + installDir; + Path cassandraHome = files[0].toPath(); + Path originalCassandraConfDir = cassandraHome.resolve("conf"); + copyDirectoryRecursively(originalCassandraConfDir, confDir); + Path cassandraStorageDir = Files.createDirectories(tmpDir.resolve("var/lib/cassandra")); + Path cassandraLogDir = Files.createDirectories(tmpDir.resolve("var/log")); + return new ProcessRuntimeConfiguration.Builder() + .withHost(TEST_NODE) + .withCassandraHome(cassandraHome.toString()) + .withCassandraConfDir(confDir.toString()) + .withCassandraLogDir(cassandraLogDir.toString()) + .withStorageDir(cassandraStorageDir.toString()) + .build(); + } +} diff --git a/integration-tests/src/integrationTest/org/apache/cassandra/sidecar/utils/TestFileUtils.java b/integration-tests/src/integrationTest/org/apache/cassandra/sidecar/utils/TestFileUtils.java new file mode 100644 index 000000000..238628294 --- /dev/null +++ b/integration-tests/src/integrationTest/org/apache/cassandra/sidecar/utils/TestFileUtils.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.sidecar.utils; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; +import java.nio.file.attribute.PosixFilePermission; +import java.util.EnumSet; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utilities for handling files on integration tests + */ +public class TestFileUtils +{ + protected static final Logger LOG = LoggerFactory.getLogger(TestFileUtils.class); + + public static void extractGzippedTarball(Path tarballPath, Path targetDir) throws IOException + { + // check if tarball exists and ends with .tar.gz + if (!tarballPath.toFile().exists() || !tarballPath.toString().endsWith(".tar.gz")) + { + throw new IllegalStateException("Cassandra tarball does not exist or is not a .tar.gz file: " + tarballPath); + } + LOG.info("Extracting tarball {} to directory {}", tarballPath, targetDir); + try (TarArchiveInputStream tarInput = new TarArchiveInputStream(new GzipCompressorInputStream(Files.newInputStream(tarballPath)))) + { + TarArchiveEntry entry; + while ((entry = tarInput.getNextEntry()) != null) + { + Path entryPath = targetDir.resolve(entry.getName()); + if (entry.isDirectory()) + { + Files.createDirectories(entryPath); + } + else + { + Files.copy(tarInput, entryPath); + // Set correct permission from TarArchiveEntry + if (entry.getMode() != 0) + { + Files.setPosixFilePermissions(entryPath, permissionsFromInteger(entry.getMode())); + } + } + } + } + LOG.info("Extracted tarball {} to directory {}", tarballPath, targetDir); + } + + public static void copyDirectoryRecursively(Path sourceDir, Path targetDir) throws IOException + { + LOG.info("Copying directory {} to {}", sourceDir, targetDir); + try (var walk = Files.walk(sourceDir)) + { + walk.forEach(source -> + { + try + { + Path target = targetDir.resolve(sourceDir.relativize(source)); + if (Files.isDirectory(source)) + { + Files.createDirectories(target); + } + else + { + Files.createDirectories(target.getParent()); + Files.copy(source, target, StandardCopyOption.REPLACE_EXISTING); + } + } + catch (IOException e) + { + throw new RuntimeException("Failed to copy " + source, e); + } + }); + } + } + + public static void replacePlaceholdersInFileWithPattern(Path templateFile, Map placeholders, + Path destFile) throws IOException + { + LOG.info("Replacing placeholders {} in file {}", placeholders.keySet(), templateFile); + String content = Files.readString(templateFile); + for (Map.Entry entry : placeholders.entrySet()) + { + String placeholder = "\\$\\{" + Pattern.quote(entry.getKey()) + "\\}"; + content = content.replaceAll(placeholder, Matcher.quoteReplacement(entry.getValue())); + } + Files.writeString(destFile, content, StandardOpenOption.CREATE_NEW); + LOG.info("Replaced {} placeholders in file {} and writing to {}", placeholders.size(), templateFile, destFile); + } + + public static Set permissionsFromInteger(int mode) + { + Set permissions = EnumSet.noneOf(PosixFilePermission.class); + + // Owner permissions + if ((mode & 0400) != 0) + { + permissions.add(PosixFilePermission.OWNER_READ); + } + if ((mode & 0200) != 0) + { + permissions.add(PosixFilePermission.OWNER_WRITE); + } + if ((mode & 0100) != 0) + { + permissions.add(PosixFilePermission.OWNER_EXECUTE); + } + + // Group permissions + if ((mode & 0040) != 0) + { + permissions.add(PosixFilePermission.GROUP_READ); + } + if ((mode & 0020) != 0) + { + permissions.add(PosixFilePermission.GROUP_WRITE); + } + if ((mode & 0010) != 0) + { + permissions.add(PosixFilePermission.GROUP_EXECUTE); + } + + // Others permissions + if ((mode & 0004) != 0) + { + permissions.add(PosixFilePermission.OTHERS_READ); + } + if ((mode & 0002) != 0) + { + permissions.add(PosixFilePermission.OTHERS_WRITE); + } + if ((mode & 0001) != 0) + { + permissions.add(PosixFilePermission.OTHERS_EXECUTE); + } + + return permissions; + } +} diff --git a/integration-tests/src/integrationTest/resources/config/sidecar.yaml.template b/integration-tests/src/integrationTest/resources/config/sidecar.yaml.template new file mode 100644 index 000000000..628ecb3f2 --- /dev/null +++ b/integration-tests/src/integrationTest/resources/config/sidecar.yaml.template @@ -0,0 +1,416 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Cassandra Sidecar configuration file +# +cassandra_instances: + - id: 1 + host: localhost + port: 9042 + + # The instance's storage directory as defined per the cassandra.storagedir property + # which defaults to the $CASSANDRA_HOME/data directory, but can be configured to any + # directory. By default, storage directory is the parent directory of data dirs, + # commitlog dir, cdc dir, hints dir and saved caches dir. If data_dirs, commitlog_dir, + # cdc_dir, hints_dir and saved_caches_dir are configured explicitly, then they will be used. + # Otherwise, default paths based on storage directory will be used. + storage_dir: ${cassandraStorageDir} + + # List of directories where Cassandra stores data on disk. If not set, the default directory + # is /data + #data_dirs: + # - /var/lib/cassandra/data + + # A temporary directory used to stage sstables before importing them into Cassandra. + staging_dir: /var/lib/cassandra/sstable-staging + + # Directory where Cassandra stores mutations. If not set, the default directory is + # /cdc_raw. + #cdc_dir: /var/lib/cassandra/cdc_raw + + # Directory where Cassandra stores the commit logs. If not set, the default directory + # is /commitlog + #commitlog_dir: /var/lib/cassandra/commitlog + + # Directory where Cassandra stores hints. If not set, the default directory is + # /hints. + #hints_dir: /var/lib/cassandra/hints + + # Directory where Cassandra stores the saved caches. If not set, the default directory + # is /saved_caches + #saved_caches_dir: /var/lib/cassandra/saved_caches + + # Directory where Cassandra stores the data of the local system keyspaces. + # By default, Cassandra stores the data of the local system keyspaces in the + # first of the data directories specified by data_file_directories. + # local_system_data_file_dir: + + jmx_host: 127.0.0.1 + jmx_port: 7199 + jmx_ssl_enabled: false +# jmx_role: +# jmx_role_password: + lifecycle_options: + cassandra_conf_dir: ${cassandraConfDir} + cassandra_log_dir: ${cassandraLogDir} + +sidecar: + host: 0.0.0.0 + port: 9043 + request_idle_timeout: 5m + request_timeout: 5m + tcp_keep_alive: false + accept_backlog: 1024 + server_verticle_instances: 1 + # DnsResolver to use (org.apache.cassandra.sidecar.common.server.dns.DnsResolver). Currently supported + # values are 'default' and 'resolve_to_ip'. Default will resolve hostnames to addresses and addresses to + # hostnames whereas resolve_to_ip will only resolve hostnames to addresses. + dns_resolver: default + throttle: + stream_requests_per_sec: 5000 + timeout: 10s + traffic_shaping: + inbound_global_bandwidth_bps: 0 # 0 implies unthrottled, the inbound bandwidth in bytes per second + outbound_global_bandwidth_bps: 0 # 0 implies unthrottled, the outbound bandwidth in bytes per second + peak_outbound_global_bandwidth_bps: 419430400 # the peak outbound bandwidth in bytes per second. The default is 400 mebibytes per second + max_delay_to_wait: 15s # 15 seconds + check_interval_for_stats: 1s # 1 second + inbound_global_file_bandwidth_bps: 0 # 0 implies unthrottled, the inbound bandwidth allocated for incoming files in bytes per second, upper-bounded by inbound_global_bandwidth_bps + sstable_upload: + concurrent_upload_limit: 80 + min_free_space_percent: 10 + # file_permissions: "rw-r--r--" # when not specified, the default file permissions are owner read & write, group & others read + # The maximum allowable time skew between the server and the client. + # Resolution is in minutes. The minimum configurable value is 1 minute. + allowable_time_skew: 1h + sstable_import: + execute_interval: 100ms + cache: + expire_after_access: 2h # 2 hours + maximum_size: 10000 + sstable_snapshot: + snapshot_list_cache: + expire_after_access: 2h # 2 hours + maximum_size: 10000 + cdc: + segment_hardlink_cache_expiry: 5m # 5 minutes + worker_pools: + service: + name: "sidecar-worker-pool" + size: 20 + max_execution_time: 1m # 1 minute + internal: + name: "sidecar-internal-worker-pool" + size: 20 + max_execution_time: 15m # 15 minutes + jmx: + max_retries: 3 + retry_delay: 200ms + schema: + is_enabled: false + keyspace: sidecar_internal + replication_strategy: SimpleStrategy + replication_factor: 1 + # The TTL in seconds used to insert entries into the sidecar_lease schema + lease_schema_ttl: 5m + coordination: + # Captures configuration parameters for the task that performs the cluster lease claim process + cluster_lease_claim: + # The name of the strategy used to determine the electorate membership (defaults to MostReplicatedKeyspaceTokenZeroElectorateMembership) + # Out of the box Sidecar provides the MostReplicatedKeyspaceTokenZeroElectorateMembership, and + # SidecarInternalTokenZeroElectorateMembership implementations. + # - MostReplicatedKeyspaceTokenZeroElectorateMembership the current Sidecar will be determined to be part + # of the electorate iff one of the Cassandra instances it + # manages owns token 0 for the user keyspace that has the + # highest replication factor. If multiple keyspaces have + # the highest replication factor, the keyspace to be used + # is decided by the keyspace with the name that sorts + # first in the lexicographic sort order. If no user + # keyspaces are created, the internal sidecar keyspace will + # be used. + # - SidecarInternalTokenZeroElectorateMembership the current Sidecar will be determined to be part of the + # electorate iff one of the Cassandra instances it manages + # owns token {@code 0} for the {@code sidecar_internal} + # keyspace. + electorate_membership_strategy: MostReplicatedKeyspaceTokenZeroElectorateMembership + # Whether the process is enabled + enabled: true + # The initial delay for the first execution of the cluster lease claim process task after being + # scheduled or rescheduled. + # The minimum value for the initial delay is 0ms. + initial_delay: 1s + # A random delta value to add jitter to the initial delay for the first execution of the cluster + # lease claim process. The actual initial delay for the task will be a millisecond value of the + # initial_delay + RANDOM(initial_delay_random_delta) configuration. + # The minimum value for the initial delay random delta is 0ms, which in practice disables the jitter. + initial_delay_random_delta: 30s + # How often the cluster lease claim process task will execute after the previous task has completed + # the execution. + # The minimum value allowed for the cluster lease claim process task implementation is 30 seconds + execute_interval: 100s + +vertx: + filesystem_options: + classpath_resolving_enabled: false + file_caching_enabled: false + +schema_reporting: # Schema Reporting configuration + enabled: false # Disabled by default + initial_delay: 6h # Maximum delay before the first schema report (actual delay is randomized) + execute_interval: 12h # Exact interval between two sequential schema reports + endpoint: http://localhost/schema # Endpoint address for schema reporting + method: PUT # HTTP verb to use for schema reporting + max_retries: 3 # Number of times a failing schema report is retried + retry_delay: 1m # Delay before a failing schema report is retried + +# +# Enable SSL configuration (Disabled by default) +# +#ssl: +# enabled: true +# use_openssl: true +# handshake_timeout: 10s +# client_auth: NONE # valid options are NONE, REQUEST, REQUIRED +# accepted_protocols: +# - TLSv1.2 +# - TLSv1.3 +# cipher_suites: [] +# keystore: +# type: PKCS12 +# path: "path/to/keystore.p12" +# password: password +# check_interval: 5m +# truststore: +# path: "path/to/truststore.p12" +# password: password + +# To enable access control, the role used by sidecar to talk to Cassandra must be granted following permissions +# - GRANT DESCRIBE ON ALL ROLES to - Allows sidecar to list roles that belong to a user +# - GRANT SELECT ON KEYSPACE system_auth to - Allows sidecar to read from roles and +# role_permissions table +access_control: + # When enabled requests need to be authenticated and authorized before servicing. + enabled: false + # Supports setting multiple authenticators, request is authenticated when the first authenticator allows the request + # to go through. + # Out of the box, Cassandra Sidecar provides following authenticator provider factories + # org.apache.cassandra.sidecar.acl.authentication.{MutualTlsAuthenticationHandlerFactory, JwtAuthenticationHandlerFactory}. + # - MutualTlsAuthenticationHandlerFactory allows authenticating based on user certificates + # - JwtAuthenticationHandlerFactory allows authenticating with user JWT tokens + authenticators: + - class_name: org.apache.cassandra.sidecar.acl.authentication.MutualTlsAuthenticationHandlerFactory + parameters: + # Certificate validator is used to validate details within a certificate, such as issuer organization, + # issuer country, CNs, certificate expiry etc. + # + # io.vertx.ext.auth.mtls.impl.AllowAllCertificateValidator performs no checks, it marks all certificates as valid. + # other options are, io.vertx.ext.auth.mtls.impl.CertificateValidatorImpl + certificate_validator: io.vertx.ext.auth.mtls.impl.AllowAllCertificateValidator + # CertificateIdentityExtractor is used to extract valid identities from certificates. These identities will be + # used for authorizing users. + # + # org.apache.cassandra.sidecar.acl.authentication.CassandraIdentityExtractor verifies that identity + # extracted from certificate is present in identity_to_role table. Identities in identity_to_role table in + # Cassandra are authenticated identities in Cassandra. + # + # other options are, io.vertx.ext.auth.mtls.impl.SpiffeIdentityExtractor. + certificate_identity_extractor: org.apache.cassandra.sidecar.acl.authentication.CassandraIdentityExtractor + # JwtAuthenticationHandlerFactory adds support to authenticate users with their JWT tokens. It also includes + # supports for OpenID discovery. + - class_name: org.apache.cassandra.sidecar.acl.authentication.JwtAuthenticationHandlerFactory + parameters: + # To selectively enable or disable JWT authentication + enabled: false + # Site for sidecar to dynamically retrieve the configuration information of an OpenID provider, without + # having to manually configure settings like issuer etc. + site: https://authorization.com + # Client Id is a unique identifier assigned by OpenID provider. It is used to identity applications/users + # trying to connect. + client_id: recognized_client_id + config_discover_interval: 1h + authorizer: + # Authorization backend, implementing io.vertx.ext.auth.authorization.AuthorizationProvider; used to + # provide permissions a user holds. + # Out of the box, Cassandra Sidecar provides + # org.apache.cassandra.sidecar.acl.authorization.{AllowAllAuthorizationProvider, RoleBasedAuthorizationProvider}. + # + # - AllowAllAuthorizationProvider allows any action to any user - use it to disable authorization. + # - RoleBasedAuthorizationProvider validates role associated with authenticated user has permission + # for resource it accesses. + - class_name: org.apache.cassandra.sidecar.acl.authorization.AllowAllAuthorizationProvider + # Identities that are authenticated and authorized. + admin_identities: +# - spiffe://authorized/admin/identities + permission_cache: + enabled: true + expire_after_access: 5m + maximum_size: 1000 + warmup_retries: 5 + warmup_retry_interval: 2s + +driver_parameters: + contact_points: + - "127.0.0.1:9042" + username: cassandra + password: cassandra + ssl: + enabled: false + keystore: + type: PKCS12 + path: path/to/keystore.p12 + password: password + truststore: + type: PKCS12 + path: path/to/keystore.p12 + password: password + num_connections: 6 +# local_dc: datacenter1 + +healthcheck: + initial_delay: 0ms + execute_interval: 30s + +# Sidecar Peer Health Monitor settings +# Enables a periodic task checking for the health of adjacent Sidecar peers in the token ring +sidecar_peer_health: + # Determines if the peer health monitor periodic task is enabled or not + enabled: false + # Time between peer health checks + execute_interval: 30s + # The amount of retries the client will attempt a request + max_retries: 5 + # The initial delay between the retries the client will attempt a request + retry_delay: 10s + +# Sidecar client settings used to interact with other sidecars +sidecar_client: + request_timeout: 30s # Time in which a request made by the sidecar client will time out + request_idle_timeout: 30s # How long the request can be idle + + # More advanced options to configure the sidecar client + + #connection_pool_max_size: 10 # Max size of the client connection pool + #connection_pool_clearing_period: 5s # Period of time for the connection pool to clear + #connection_pool_event_loop_size: 0 # Defines the size of the event loop pool, set to 0 to reuse current event-loop + #connection_pool_max_wait_queue_size: -1 # Connection pool max time to wait on the queue size, set to -1 for an unbounded queue + #max_retries: 5 # The amount of retries the client will attempt a request + #retry_delay: 500ms # The initial delay between the retries the client will attempt a request + #max_retry_delay: 10s # The max delay between the retries the client will attempt a request + #ssl: # if ssl is enabled, this is the ssl configuration used for the sidecar client + # enabled: false + # keystore: + # type: PKCS12 # Keystore type + # path: path/to/client/keystore.p12 # Path to the client keystore file + # password: password # Keystore password + # truststore: + # type: PKCS12 # Truststore type + # path: path/to/truststore.p12 # Path to the truststore file + # password: password # Truststore password + +metrics: + registry_name: cassandra_sidecar + vertx: + enabled: true + expose_via_jmx: false + jmx_domain_name: sidecar.vertx.jmx_domain + include: # empty include list means include all + - type: "regex" # possible filter types are "regex" and "equals" + value: "Sidecar.*" + - type: "regex" + value: "vertx.*" + exclude: # empty exclude list means exclude nothing +# - type: "regex" # possible filter types are "regex" and "equals" +# value: "vertx.eventbus.*" # exclude all metrics starts with vertx.eventbus + +cassandra_input_validation: + forbidden_keyspaces: + - system_schema + - system_traces + - system_distributed + - system + - system_auth + - system_views + - system_virtual_schema + - sidecar_internal + allowed_chars_for_directory: "[a-zA-Z][a-zA-Z0-9_]{0,47}" + allowed_chars_for_quoted_name: "[a-zA-Z_0-9]{1,48}" + allowed_chars_for_component_name: "[a-zA-Z0-9_-]+(.db|.cql|.json|.crc32|TOC.txt)" + allowed_chars_for_restricted_component_name: "[a-zA-Z0-9_-]+(.db|TOC.txt)" + +blob_restore: + job_discovery_active_loop_delay: 5m + job_discovery_idle_loop_delay: 10m + job_discovery_recency_days: 5 + slice_process_max_concurrency: 20 + restore_job_tables_ttl: 90d + slow_task_threshold: 10m + slow_task_report_delay: 1m + ring_topology_refresh_delay: 1m + +s3_client: + concurrency: 4 + thread_name_prefix: s3-client + thread_keep_alive: 1m + api_call_timeout: 1m # 1 minute + range_get_object_bytes_size: 5242880 # 5 MiB +# proxy_config: +# uri: +# username: +# password: + +live_migration: + # By default, the directories handled by Cassandra like data_dir, commitlog_dir, hints_dir, saved_caches_dir, + # cdc_dir, and local_system_data_file_dir are included as part of migration. Admin/operator can exclude files + # and directories from migration by specifying them in the `files_to_exclude` list and the `dirs_to_exclude` + # list configurations mentioned below. + # + # 'glob' and 'regex' patterns can be used to specify exclusions. Internally, Java's java.nio.file.PathMatcher + # is used to evaluate these exclusions. Exclusions should be specified in the format "syntax:pattern" expected + # by PathMatcher. There is no need to specify the absolute path of directories handled by Cassandra. + # They can be specified using the supported placeholders listed below. + # + # COMMITLOG_DIR - commit log directory + # HINTS_DIR - hints directory + # SAVED_CACHES_DIR - saved caches directory + # CDC_RAW_DIR - cdc raw directory + # LOCAL_SYSTEM_DATA_FILE_DIR - local system data file data directory + # DATA_FILE_DIR - all data directories + # DATA_FILE_DIR_0 - first data directory + # DATA_FILE_DIR_1 - second data directory + # + # A sample glob pattern to exclude all snapshot folders from Live Migration looks like this: + # glob:${DATA_FILE_DIR}/*/*/snapshots + files_to_exclude: [ ] # List of files to exclude from Live Migration. + dirs_to_exclude: + - glob:${DATA_FILE_DIR}/*/*/snapshots # Excludes snapshot directories in data folder to copy to destination + migration_map: # Map of source and destination Cassandra instances +# localhost1: localhost4 # This entry says that localhost1 will be migrated to localhost4 + max_concurrent_downloads: 20 # Maximum number of concurrent downloads allowed + +# Configuration to allow sidecar start and stop Cassandra instances via the lifecycle API (disabled by default) +lifecycle: + enabled: true + provider: + class_name: org.apache.cassandra.sidecar.lifecycle.ProcessLifecycleProvider + parameters: + state_dir: ${lifecycleDir} + cassandra_home: ${cassandraHome} # The default Cassandra installation directory + cassandra.ring_delay_ms: 5000 # down from 30s default + env.MAX_HEAP_SIZE: 4G # this should be enough, since we only start and stop cassandra + env.HEAP_NEWSIZE: 1G # this should be enough, since we only start and stop cassandra \ No newline at end of file diff --git a/scripts/build-cassandra-tarball.sh b/scripts/build-cassandra-tarball.sh new file mode 100755 index 000000000..d47801d72 --- /dev/null +++ b/scripts/build-cassandra-tarball.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -xe + +export CASSANDRA_USE_JDK11=true +SCRIPT_DIR=$( dirname -- "$( readlink -f -- "$0"; )"; ) +TARBALL_DIR="$(dirname "${SCRIPT_DIR}/")/cassandra-tarballs" + +ant artifacts -Dant.gen-doc.skip=true -Dno-checkstyle=true +mkdir -p ${TARBALL_DIR} +cp ./build/apache-cassandra-*-bin.tar.gz ${TARBALL_DIR} + +set +xe diff --git a/scripts/build-dtest-jars.sh b/scripts/build-dtest-jars.sh index 806de69ab..d48b7bd39 100755 --- a/scripts/build-dtest-jars.sh +++ b/scripts/build-dtest-jars.sh @@ -30,11 +30,14 @@ REPO=${REPO:-"https://github.com/apache/cassandra.git"} SCRIPT_DIR=$( dirname -- "$( readlink -f -- "$0"; )"; ) DTEST_JAR_DIR="$(dirname "${SCRIPT_DIR}/")/dtest-jars" DTEST_JAR_DIR=${CASSANDRA_DEP_DIR:-$DTEST_JAR_DIR} +TARBALL_DIR="$(dirname "${SCRIPT_DIR}/")/cassandra-tarballs" BUILD_DIR="${DTEST_JAR_DIR}/build" if [[ "x$CLEAN" != "x" ]]; then echo "Clean up $DTEST_JAR_DIR" rm -rf $DTEST_JAR_DIR + echo "Cleanup $TARBALL_DIR" + rm -rf $TARBALL_DIR fi source "$SCRIPT_DIR/functions.sh" @@ -83,16 +86,27 @@ for index in "${!CANDIDATE_BRANCHES[@]}"; do CASSANDRA_VERSION=$(cat build.xml | grep 'property name="base.version"' | awk -F "\"" '{print $4}') # Loop to prevent failure due to maven-ant-tasks not downloading a jar. for x in $(seq 1 3); do - if [ -f "${DTEST_JAR_DIR}/dtest-${CASSANDRA_VERSION}.jar" ]; then - RETURN="0" + RETURN="0" + DTEST_JAR_FILE="${DTEST_JAR_DIR}/dtest-${CASSANDRA_VERSION}.jar" + TARBALL_GLOB_FILE="${TARBALL_DIR}/apache-cassandra-${CASSANDRA_VERSION}*-bin.tar.gz" + + if [ -f "${DTEST_JAR_DIR}/dtest-${CASSANDRA_VERSION}.jar" ] && (ls ${TARBALL_GLOB_FILE} >/dev/null 2>&1); then + echo "Found existing dtest jar ${DTEST_JAR_FILE} and tarball `ls ${TARBALL_GLOB_FILE}` for version ${CASSANDRA_VERSION}, skipping build." break - else + fi + + if ! [ -f "${DTEST_JAR_FILE}" ]; then + echo "Building dtest jar ${DTEST_JAR_FILE} for version ${CASSANDRA_VERSION}" "${SCRIPT_DIR}/build-shaded-dtest-jar-local.sh" RETURN="$?" - if [ "${RETURN}" -eq "0" ]; then - break - fi - fi + fi + + if [ "${RETURN}" -eq "0" ] && ! (ls ${TARBALL_GLOB_FILE} >/dev/null 2>&1); then + echo "Building tarball for version ${CASSANDRA_VERSION}" + "${SCRIPT_DIR}/build-cassandra-tarball.sh" + RETURN="$?" + break + fi done # Exit, if we didn't build successfully if [ "${RETURN}" -ne "0" ]; then diff --git a/server/src/main/java/org/apache/cassandra/sidecar/cluster/instance/InstanceMetadata.java b/server/src/main/java/org/apache/cassandra/sidecar/cluster/instance/InstanceMetadata.java index 5abcea8b0..0118575d5 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/cluster/instance/InstanceMetadata.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/cluster/instance/InstanceMetadata.java @@ -20,6 +20,7 @@ import java.net.UnknownHostException; import java.util.List; +import java.util.Map; import org.apache.cassandra.sidecar.cluster.CassandraAdapterDelegate; import org.apache.cassandra.sidecar.exceptions.CassandraUnavailableException; @@ -74,6 +75,11 @@ default String refreshIpAddress() throws UnknownHostException */ String stagingDir(); + /** + * @return the storage directory of the cassandra instance + */ + String storageDir(); + /** * @return a {@link CassandraAdapterDelegate} specific for the instance, or throws when the delegate is unavailable * @throws CassandraUnavailableException when the Cassandra service is unavailable @@ -118,4 +124,13 @@ default String refreshIpAddress() throws UnknownHostException */ @NotNull InstanceMetrics metrics(); + + /** + * @return The lifecycle options specific to this instance + * For example, Cassandra configuration directory or logging directory, + * since this is different between different Cassandra instances + * residing in the same host + */ + @NotNull + Map lifecycleOptions(); } diff --git a/server/src/main/java/org/apache/cassandra/sidecar/cluster/instance/InstanceMetadataImpl.java b/server/src/main/java/org/apache/cassandra/sidecar/cluster/instance/InstanceMetadataImpl.java index b7e66f318..fa7d930fd 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/cluster/instance/InstanceMetadataImpl.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/cluster/instance/InstanceMetadataImpl.java @@ -23,10 +23,12 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.stream.Collectors; import com.codahale.metrics.MetricRegistry; +import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.cassandra.sidecar.cluster.CassandraAdapterDelegate; import org.apache.cassandra.sidecar.common.DataObjectBuilder; import org.apache.cassandra.sidecar.common.server.dns.DnsResolver; @@ -64,10 +66,13 @@ public class InstanceMetadataImpl implements InstanceMetadata private final String savedCachesDir; private final String localSystemDataFileDir; @Nullable + private final String storageDir; + @Nullable private final CassandraAdapterDelegate delegate; private final InstanceMetrics metrics; private final DnsResolver dnsResolver; private volatile String ipAddress; + private final Map lifecycleOptions; protected InstanceMetadataImpl(Builder builder) { @@ -89,6 +94,10 @@ protected InstanceMetadataImpl(Builder builder) hintsDir = builder.resolveHintsDir(); savedCachesDir = builder.resolveSavedCachesDir(); localSystemDataFileDir = FileUtils.maybeResolveHomeDirectory(builder.localSystemDataFileDir); + lifecycleOptions = builder.lifecycleOptions != null + ? Collections.unmodifiableMap(builder.lifecycleOptions) + : Collections.emptyMap(); + storageDir = builder.storageDir; } @Override @@ -136,6 +145,12 @@ public String stagingDir() return stagingDir; } + @Override + public String storageDir() + { + return storageDir; + } + @Override @Nullable public String cdcDir() @@ -188,6 +203,16 @@ public InstanceMetrics metrics() return metrics; } + /** + * @return The lifecycle options for this Cassandra instance + */ + @Override + @JsonProperty("lifecycle_options") + public Map lifecycleOptions() + { + return lifecycleOptions; + } + public static Builder builder() { return new Builder(); @@ -224,6 +249,7 @@ public static class Builder implements DataObjectBuilder lifecycleOptions; protected Builder() { @@ -244,6 +270,7 @@ protected Builder(InstanceMetadataImpl instanceMetadata) localSystemDataFileDir = instanceMetadata.localSystemDataFileDir; delegate = instanceMetadata.delegate; metrics = instanceMetadata.metrics; + lifecycleOptions = instanceMetadata.lifecycleOptions; } @Override @@ -378,6 +405,17 @@ public Builder savedCachesDir(String savedCachesDir) return update(b -> b.savedCachesDir = savedCachesDir); } + /** + * Sets the {@code lifecycleOptions} and returns a reference to this Builder enabling method chaining. + * + * @param lifecycleOptions the {@code lifecycleOptions} to set + * @return a reference to this Builder + */ + public Builder lifecycleOptions(Map lifecycleOptions) + { + return update(b -> b.lifecycleOptions = lifecycleOptions); + } + /** * Sets the {@code localSystemDataFileDir} and return a reference to this Builder enabling method chaining. * diff --git a/server/src/main/java/org/apache/cassandra/sidecar/config/InstanceConfiguration.java b/server/src/main/java/org/apache/cassandra/sidecar/config/InstanceConfiguration.java index bd34f4818..f91b27148 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/config/InstanceConfiguration.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/config/InstanceConfiguration.java @@ -19,6 +19,7 @@ package org.apache.cassandra.sidecar.config; import java.util.List; +import java.util.Map; /** * Encapsulates the basic configuration needed to connect to a single Cassandra instance @@ -104,4 +105,9 @@ public interface InstanceConfiguration * @return the password for the JMX role for the JMX service for the Cassandra instance */ String jmxRolePassword(); + + /** + * @return The lifecycle options for this Cassandra instance + */ + Map lifecycleOptions(); } diff --git a/server/src/main/java/org/apache/cassandra/sidecar/config/yaml/InstanceConfigurationImpl.java b/server/src/main/java/org/apache/cassandra/sidecar/config/yaml/InstanceConfigurationImpl.java index d7360891c..b952ca242 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/config/yaml/InstanceConfigurationImpl.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/config/yaml/InstanceConfigurationImpl.java @@ -20,6 +20,7 @@ import java.util.Collections; import java.util.List; +import java.util.Map; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; @@ -48,6 +49,7 @@ public class InstanceConfigurationImpl implements InstanceConfiguration protected final boolean jmxSslEnabled; protected final String jmxRole; protected final String jmxRolePassword; + protected final Map lifecycleOptions; @JsonCreator public InstanceConfigurationImpl(@JsonProperty("id") int id, @@ -65,7 +67,8 @@ public InstanceConfigurationImpl(@JsonProperty("id") int id, @JsonProperty("jmx_port") int jmxPort, @JsonProperty("jmx_ssl_enabled") boolean jmxSslEnabled, @Nullable @JsonProperty("jmx_role") String jmxRole, - @Nullable @JsonProperty("jmx_role_password") String jmxRolePassword) + @Nullable @JsonProperty("jmx_role_password") String jmxRolePassword, + @Nullable @JsonProperty("lifecycle_options") Map lifecycleOptions) { this.id = id; this.host = host; @@ -83,6 +86,7 @@ public InstanceConfigurationImpl(@JsonProperty("id") int id, this.jmxSslEnabled = jmxSslEnabled; this.jmxRole = jmxRole; this.jmxRolePassword = jmxRolePassword; + this.lifecycleOptions = lifecycleOptions != null ? Collections.unmodifiableMap(lifecycleOptions) : Collections.emptyMap(); } /** @@ -241,4 +245,14 @@ public String jmxRolePassword() { return jmxRolePassword; } + + /** + * @return The lifecycle options for this Cassandra instance + */ + @Override + @JsonProperty("lifecycle_options") + public Map lifecycleOptions() + { + return lifecycleOptions; + } } diff --git a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/LifecycleManager.java b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/LifecycleManager.java index 183387410..7f5f8f191 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/LifecycleManager.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/LifecycleManager.java @@ -148,7 +148,7 @@ private Future submitStartTask(String instanceId) } catch (Exception e) { - LOG.error("Failed to start instance {}: {}", instanceId, e.getMessage()); + LOG.error("Failed to start instance {}: {}", instanceId, e.getMessage(), e); lastUpdateMsgByInstance.put(instanceId, String.format("Failed to start instance %s: %s", instanceId, e.getMessage())); } }); @@ -166,7 +166,7 @@ private Future submitStopTask(String instanceId) } catch (Exception e) { - LOG.error("Failed to stop instance {}: {}", instanceId, e.getMessage()); + LOG.error("Failed to stop instance {}: {}", instanceId, e.getMessage(), e); lastUpdateMsgByInstance.put(instanceId, String.format("Failed to stop instance %s: %s", instanceId, e.getMessage())); } }); diff --git a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java index 681dc9908..aa50301b5 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java @@ -18,38 +18,232 @@ package org.apache.cassandra.sidecar.lifecycle; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; import java.util.Map; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.cassandra.sidecar.cluster.instance.InstanceMetadata; +import org.apache.cassandra.sidecar.exceptions.ConfigurationException; +import org.jetbrains.annotations.VisibleForTesting; /** - * A {@link LifecycleProvider} that manages Cassandra instances as OS processes. - *

- * This implementation is a placeholder and is not yet implemented. + * Manage the lifecycle of Cassandra instances running on local processes */ public class ProcessLifecycleProvider implements LifecycleProvider { + static final String OPT_CASSANDRA_HOME = "cassandra_home"; + static final String OPT_CASSANDRA_CONF_DIR = "cassandra_conf_dir"; + static final String OPT_CASSANDRA_LOG_DIR = "cassandra_log_dir"; + static final String OPT_STATE_DIR = "state_dir"; + + protected static final Logger LOG = LoggerFactory.getLogger(ProcessLifecycleProvider.class); + public static final long CASSANDRA_PROCESS_TIMEOUT_MS = Long.getLong("cassandra.sidecar.lifecycle.process.timeout.ms", 120_000L); + + private final String lifecycleDir; + private final String defaultCassandraHome; + private final Map defaultJvmProperties = new HashMap<>(); + private final Map defaultEnvVars = new HashMap<>(); + public ProcessLifecycleProvider(Map params) { - // Params unused for now + // Extract any JVM properties or environment variables from the params + for (Map.Entry entry : params.entrySet()) + { + if (entry.getKey().startsWith("cassandra.")) + { + defaultJvmProperties.put(entry.getKey(), entry.getValue()); + } + else if (entry.getKey().startsWith("env.")) + { + defaultEnvVars.put(entry.getKey().replaceAll("env.", ""), entry.getValue()); + } + } + this.lifecycleDir = params.get(OPT_STATE_DIR); + this.defaultCassandraHome = params.get(OPT_CASSANDRA_HOME); + if (lifecycleDir == null || lifecycleDir.isEmpty()) + { + throw new ConfigurationException("Configuration property '" + OPT_STATE_DIR + "' must be set for ProcessLifecycleProvider"); + } + if (defaultCassandraHome == null || defaultCassandraHome.isEmpty()) + { + throw new ConfigurationException("Configuration property '" + OPT_CASSANDRA_HOME + "' must be set for ProcessLifecycleProvider"); + } } @Override public void start(InstanceMetadata instance) { - throw new UnsupportedOperationException("Not implemented yet"); + if (isCassandraProcessRunning(instance)) + { + LOG.info("Cassandra instance {} is already running.", instance.host()); + return; + } + startCassandra(instance); } @Override public void stop(InstanceMetadata instance) { - throw new UnsupportedOperationException("Not implemented yet"); - + if (!isCassandraProcessRunning(instance)) + { + LOG.info("Cassandra instance {} is already stopped.", instance.host()); + return; + } + stopCassandra(instance); } @Override public boolean isRunning(InstanceMetadata instance) { - throw new UnsupportedOperationException("Not implemented yet"); + return isCassandraProcessRunning(instance); + } + + private void startCassandra(InstanceMetadata instance) + { + ProcessRuntimeConfiguration runtimeConfig = getRuntimeConfiguration(instance); + try + { + String stdoutLocation = getStdoutLocation(runtimeConfig.instanceName()); + String stderrLocation = getStderrLocation(runtimeConfig.instanceName()); + String pidFileLocation = getPidFileLocation(runtimeConfig.instanceName()); + ProcessBuilder processBuilder = runtimeConfig.buildStartCommand(pidFileLocation, + stdoutLocation, + stderrLocation); + LOG.info("Starting Cassandra instance {} with command: {}", runtimeConfig.instanceName(), processBuilder.command()); + + Process process = processBuilder.start(); + process.waitFor(CASSANDRA_PROCESS_TIMEOUT_MS, TimeUnit.MILLISECONDS); // blocking call, make async? + + if (isCassandraProcessRunning(instance)) + { + LOG.info("Started Cassandra instance {} with PID {}", runtimeConfig.instanceName(), readPidFromFile(Path.of(pidFileLocation))); + } + else + { + throw new RuntimeException("Failed to start Cassandra instance " + runtimeConfig.instanceName() + + ". Check stdout at " + stdoutLocation + " and stderr at " + stderrLocation); + } + } + catch (Throwable t) + { + throw new RuntimeException("Failed to start Cassandra instance " + runtimeConfig.instanceName() + " due to " + t.getMessage(), t); + } + } + + private void stopCassandra(InstanceMetadata instance) + { + ProcessRuntimeConfiguration casCfg = getRuntimeConfiguration(instance); + try + { + String pidFileLocation = getPidFileLocation(casCfg.instanceName()); + Long pid = readPidFromFile(Path.of(pidFileLocation)); + Optional processHandle = ProcessHandle.of(pid); + if (processHandle.isPresent()) + { + LOG.info("Stopping process of Cassandra instance {} with PID {}.", casCfg.instanceName(), pid); + CompletableFuture terminationFuture = processHandle.get().onExit(); + processHandle.get().destroy(); // blocking call, make async? + terminationFuture.get(CASSANDRA_PROCESS_TIMEOUT_MS, TimeUnit.MILLISECONDS); + } + else + { + LOG.warn("No process running for Cassandra instance {} with PID {}.", casCfg.instanceName, pid); + } + } + catch (Throwable t) + { + throw new RuntimeException("Failed to stop process for Cassandra instance " + casCfg.instanceName() + " due to " + t.getMessage(), t); + } + } + + @VisibleForTesting + protected ProcessRuntimeConfiguration getRuntimeConfiguration(InstanceMetadata instance) + { + String cassandraHome = Optional.ofNullable(instance.lifecycleOptions().get(OPT_CASSANDRA_HOME)) + .orElse(defaultCassandraHome); + String cassandraConfDir = instance.lifecycleOptions().get(OPT_CASSANDRA_CONF_DIR); + String cassandraLogDir = instance.lifecycleOptions().get(OPT_CASSANDRA_LOG_DIR); + return new ProcessRuntimeConfiguration.Builder() + .withHost(instance.host()) + .withCassandraHome(cassandraHome) + .withCassandraConfDir(cassandraConfDir) + .withCassandraLogDir(cassandraLogDir) + .withStorageDir(instance.storageDir()) + .withJvmOptions(defaultJvmProperties) + .withEnvVars(defaultEnvVars) + .build(); + } + + private boolean isCassandraProcessRunning(InstanceMetadata instance) + { + Path pidFilePath = Path.of(getPidFileLocation(instance.host())); + if (!Files.isRegularFile(pidFilePath) || !Files.isReadable(pidFilePath)) + { + LOG.debug("PID file does not exist or is not readable for instance {} at path {}", instance.host(), pidFilePath); + return false; + } + + try + { + Long pid = readPidFromFile(pidFilePath); + if (ProcessHandle.of(pid).isPresent()) + { + LOG.debug("Cassandra instance {} is running with PID {}", instance.host(), pid); + return true; + } + else + { + LOG.warn("PID file exists for instance {}, but process with PID {} is not running", instance.host(), pid); + return false; + } + } + catch (Exception e) + { + LOG.warn("Failed to read PID from file {} for instance {}: {}", pidFilePath, instance.host(), e.getMessage()); + return false; + } + } + + public static Long readPidFromFile(Path pidFilePath) + { + try + { + String pidFileContent = Files.readString(pidFilePath).trim(); + return Long.parseLong(pidFileContent); + } + catch (IOException | NumberFormatException e) + { + throw new RuntimeException("Failed to read PID from file: " + pidFilePath, e); + } + } + + protected String getPidFileLocation(String host) + { + return getPidFileLocation(lifecycleDir, host); + } + + protected String getStdoutLocation(String instanceName) + { + return Paths.get(lifecycleDir, "cassandra-" + instanceName + ".out").toString(); + } + + protected String getStderrLocation(String instanceName) + { + return Paths.get(lifecycleDir, "cassandra-" + instanceName + ".err").toString(); + } + + @VisibleForTesting + public static String getPidFileLocation(String lifecycleDir, String instanceName) + { + return Paths.get(lifecycleDir, "cassandra-" + instanceName + ".pid").toString(); } } diff --git a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfiguration.java b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfiguration.java new file mode 100644 index 000000000..1d6e2baf3 --- /dev/null +++ b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfiguration.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.sidecar.lifecycle; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +/** + * Represents the configuration for a Cassandra process instance. + */ +public class ProcessRuntimeConfiguration +{ + static class Builder + { + private String host; + private String cassandraHome; + private String cassandraConfDir; + private String cassandraLogDir; + private String storageDir; + private Map jvmOpts; + private Map envVars; + + public Builder withHost(String host) + { + this.host = host; + return this; + } + + public Builder withCassandraHome(String cassandraHome) + { + this.cassandraHome = cassandraHome; + return this; + } + + public Builder withCassandraConfDir(String cassandraConfDir) + { + this.cassandraConfDir = cassandraConfDir; + return this; + } + + public Builder withCassandraLogDir(String cassandraLogDir) + { + this.cassandraLogDir = cassandraLogDir; + return this; + } + + public Builder withStorageDir(String storageDir) + { + this.storageDir = storageDir; + return this; + } + + public Builder withJvmOptions(Map jvmOptions) + { + this.jvmOpts = jvmOptions; + return this; + } + + public Builder withEnvVars(Map envVars) + { + this.envVars = envVars; + return this; + } + + public ProcessRuntimeConfiguration build() + { + ProcessRuntimeConfiguration casCfg = new ProcessRuntimeConfiguration(host, cassandraHome, cassandraConfDir, cassandraLogDir, storageDir); + casCfg.extraEnvVars.putAll(envVars != null ? envVars : Map.of()); + casCfg.extraJvmOpts.putAll(jvmOpts != null ? jvmOpts : Map.of()); + return casCfg; + } + } + + @NotNull + final String instanceName; + @NotNull + final Path cassandraHome; + @NotNull + final Path cassandraConfDir; + @Nullable + final String cassandraLogDir; + @Nullable + final String storageDir; + final Map extraJvmOpts = new HashMap<>(); + final Map extraEnvVars = new HashMap<>(); + + private ProcessRuntimeConfiguration(@NotNull String host, String cassandraHome, String cassandraConfDir, + @Nullable String cassandraLogDir, @Nullable String storageDir) + { + this.instanceName = host; + this.cassandraHome = Path.of(cassandraHome); + this.cassandraConfDir = Path.of(cassandraConfDir); + this.cassandraLogDir = cassandraLogDir; + this.storageDir = storageDir; + } + + public String instanceName() + { + return instanceName; + } + + public Path cassandraHome() + { + return cassandraHome; + } + + public String cassandraConf() + { + return cassandraConfDir.toString(); + } + + public Path cassandraBin() + { + return cassandraHome.resolve(Path.of("bin", "cassandra")); + } + + public Path stopServerBin() + { + return cassandraHome.resolve(Path.of("bin", "stop-server")); + } + + private Path cassandraYaml() + { + return cassandraConfDir.resolve(Path.of("cassandra.yaml")); + } + + public void validateStart() throws IllegalArgumentException + { + // Check existence + if (!Files.isDirectory(cassandraHome)) + { + throw new IllegalArgumentException("Cassandra home does not exist or is not a directory: " + cassandraHome); + } + if (!Files.isDirectory(cassandraConfDir)) + { + throw new IllegalArgumentException("Cassandra configuration directory does not exist or is not a directory: " + cassandraConfDir); + } + if (!Files.isRegularFile(cassandraYaml())) + { + throw new IllegalArgumentException("Cassandra YAML configuration file does not exist: " + cassandraYaml()); + } + if (!Files.isRegularFile(cassandraBin())) + { + throw new IllegalArgumentException("Cassandra binary does not exist or is not a regular file: " + cassandraBin()); + } + // Check permissions + if (!Files.isExecutable(cassandraBin())) + { + throw new IllegalArgumentException("Cassandra binary is not executable: " + cassandraBin()); + } + if (!Files.isReadable(cassandraConfDir)) + { + throw new IllegalArgumentException("Cassandra configuration directory is not readable: " + cassandraConfDir); + } + } + + public ProcessBuilder buildStartCommand(String pidFileLocation, String stdoutFileLocation, String stderrFileLocation) + { + validateStart(); + + List startCassandraCmd = new ArrayList<>(); + startCassandraCmd.add(cassandraBin().toString()); + startCassandraCmd.add("-p"); + startCassandraCmd.add(pidFileLocation); + for (Map.Entry jvmOpt : extraJvmOpts.entrySet()) + { + startCassandraCmd.add("-D" + jvmOpt.getKey() + "=" + jvmOpt.getValue()); + } + + // Override storage dir if present in sidecar configuration + if (storageDir != null) + { + startCassandraCmd.add("-D"); + startCassandraCmd.add("cassandra.storagedir=" + storageDir); + } + + ProcessBuilder processBuilder = new ProcessBuilder(); + processBuilder.command(startCassandraCmd); + + // Set environment variables + Map env = processBuilder.environment(); + env.put("CASSANDRA_HOME", cassandraHome().toString()); + env.put("CASSANDRA_CONF", cassandraConf()); + env.putAll(extraEnvVars); + + // Only override CASSANDRA_LOG_DIR if it is set in the configuration + if (cassandraLogDir != null) + { + env.put("CASSANDRA_LOG_DIR", cassandraLogDir); + } + + // Redirect output to logs + processBuilder.redirectOutput(ProcessBuilder.Redirect.appendTo(new File(stdoutFileLocation))); + processBuilder.redirectError(ProcessBuilder.Redirect.appendTo(new File(stderrFileLocation))); + + // Set working directory + processBuilder.directory(cassandraHome().toFile()); + return processBuilder; + } + + public String toString() + { + return "ProcessRuntimeConfiguration{" + + "instanceName='" + instanceName + '\'' + + ", cassandraHome=" + cassandraHome + + ", cassandraConfDir=" + cassandraConfDir + + ", cassandraLogDir='" + cassandraLogDir + '\'' + + ", storageDir='" + storageDir + '\'' + + ", extraJvmOpts=" + extraJvmOpts + + ", extraEnvVars=" + extraEnvVars + + '}'; + } +} diff --git a/server/src/main/java/org/apache/cassandra/sidecar/modules/ConfigurationModule.java b/server/src/main/java/org/apache/cassandra/sidecar/modules/ConfigurationModule.java index ad21dd45b..667ffa343 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/modules/ConfigurationModule.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/modules/ConfigurationModule.java @@ -250,6 +250,7 @@ private static InstanceMetadata buildInstanceMetadata(Vertx vertx, .hintsDir(cassandraInstance.hintsDir()) .savedCachesDir(cassandraInstance.savedCachesDir()) .localSystemDataFileDir(cassandraInstance.localSystemDataFileDir()) + .lifecycleOptions(cassandraInstance.lifecycleOptions()) .delegate(delegate) .metricRegistry(instanceSpecificRegistry) .build(); diff --git a/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java b/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java new file mode 100644 index 000000000..73976f0b1 --- /dev/null +++ b/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java @@ -0,0 +1,320 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.sidecar.lifecycle; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import org.apache.cassandra.sidecar.cluster.instance.InstanceMetadata; +import org.mockito.MockedStatic; + +import static org.apache.cassandra.sidecar.lifecycle.ProcessLifecycleProvider.getPidFileLocation; +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.when; + +/** + * Unit tests for {@link ProcessLifecycleProvider}. These tests use mocking to simulate process behavior. + * Actual process execution is tested by the {@link org.apache.cassandra.sidecar.lifecycle.ProcessLifecycleProviderIntegrationTest} + */ +public class ProcessLifecycleProviderTest +{ + @TempDir + Path lifecycleStateDir; + + /** + * A fake implementation of ProcessLifecycleProvider for testing purposes. + * This class overrides the buildCassandraConfig method to return a mock configuration to avoid actual process execution. + * Also, this simulates starting and stopping a process by creating and deleting a PID file. + */ + class FakeProcessLifecycleProvider extends ProcessLifecycleProvider + { + public FakeProcessLifecycleProvider(Map params) + { + super(params); + } + + protected ProcessRuntimeConfiguration getRuntimeConfiguration(InstanceMetadata instance) + { + try + { + Process mockProcess = mock(Process.class); + when(mockProcess.waitFor()).thenReturn(0); + + ProcessBuilder startMock = mock(ProcessBuilder.class); + when(startMock.start()).then(invocation -> { + String pidFileLocation = getPidFileLocation(lifecycleStateDir.toString(), "localhost"); + // create the pid file to simulate a started process + Path pidFile = Path.of(pidFileLocation); + Files.writeString(pidFile, "12345"); + return mockProcess; + }); + ProcessRuntimeConfiguration mockConfig = mock(ProcessRuntimeConfiguration.class); + when(mockConfig.buildStartCommand(any(), any(), any())).thenReturn(startMock); + when(mockConfig.instanceName()).thenReturn("localhost"); + return mockConfig; + } + catch (InterruptedException | IOException e) + { + throw new RuntimeException(e); + } + } + } + + @Test + void testStartStopIsRunning() throws InterruptedException + { + try (MockedStatic processHandleMock = mockStatic(ProcessHandle.class)) + { + // Mock ProcessHandle.of to simulate process running state + ProcessHandle mockHandle = mock(ProcessHandle.class); + when(mockHandle.onExit()).then(i -> CompletableFuture.supplyAsync(() -> { + String pidFileLocation = getPidFileLocation(lifecycleStateDir.toString(), "localhost"); + // delete the pid file to simulate a stopped process + File file = Path.of(pidFileLocation).toFile(); + file.delete(); + return null; + })); + Optional presentHandle = Optional.of(mockHandle); + processHandleMock.when(() -> ProcessHandle.of(12345L)) + .thenReturn(presentHandle); + + // Create provider with temporary lifecycle state directory + Map params = Map.of( + ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ); + FakeProcessLifecycleProvider provider = new FakeProcessLifecycleProvider(params); + + // Create mock instance metadata + InstanceMetadata instance = mock(InstanceMetadata.class); + when(instance.host()).thenReturn("localhost"); + when(instance.storageDir()).thenReturn("/custom/storage/dir"); + when(instance.lifecycleOptions()).thenReturn(Map.of()); + + // Initially, instance should not be running (no PID file exists) + String pidFileLocation = getPidFileLocation(lifecycleStateDir.toString(), "localhost"); + Path pidFilePath = Path.of(pidFileLocation); + assertThat(pidFilePath).doesNotExist(); + assertThat(provider.isRunning(instance)).isFalse(); + + // Start the instance + provider.start(instance); + + // After starting, instance should be running (PID file should exist) + assertThat(pidFilePath).exists(); + assertThat(provider.isRunning(instance)).isTrue(); + + // Stop the instance + provider.stop(instance); + + // After stopping, instance should not be running (PID file should be deleted) + assertThat(pidFilePath).doesNotExist(); + assertThat(provider.isRunning(instance)).isFalse(); + } + } + + @Test + void testBuildCassandraConfigWithCassandraHomeOverride() + { + Map params = Map.of( + ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ); + + ProcessLifecycleProvider provider = new ProcessLifecycleProvider(params); + + // Create mock instance metadata + InstanceMetadata instance = mock(InstanceMetadata.class); + when(instance.host()).thenReturn("localhost"); + when(instance.storageDir()).thenReturn("/custom/storage/dir"); + + Map lifecycleOptions = Map.of( + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/instance/cassandra/home", + ProcessLifecycleProvider.OPT_CASSANDRA_CONF_DIR, "/instance/conf/dir", + ProcessLifecycleProvider.OPT_CASSANDRA_LOG_DIR, "/instance/log/dir" + ); + when(instance.lifecycleOptions()).thenReturn(lifecycleOptions); + + ProcessRuntimeConfiguration config = provider.getRuntimeConfiguration(instance); + + // Verify the configuration was built correctly + assertThat(config.instanceName()).isEqualTo("localhost"); + assertThat(config.cassandraHome()).isEqualTo(Path.of("/instance/cassandra/home")); + assertThat(config.cassandraConfDir).isEqualTo(Path.of("/instance/conf/dir")); + assertThat(config.cassandraLogDir).isEqualTo("/instance/log/dir"); + assertThat(config.storageDir).isEqualTo("/custom/storage/dir"); + } + + @Test + void testBuildCassandraConfigWithDefaultCassandraHome() + { + Map params = Map.of( + ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ); + + ProcessLifecycleProvider provider = new ProcessLifecycleProvider(params); + + // Create mock instance metadata + InstanceMetadata instance = mock(InstanceMetadata.class); + when(instance.host()).thenReturn("localhost"); + when(instance.storageDir()).thenReturn("/custom/storage/dir"); + + // Lifecycle options without CASSANDRA_HOME override + Map lifecycleOptions = Map.of( + ProcessLifecycleProvider.OPT_CASSANDRA_CONF_DIR, "/instance/conf/dir", + ProcessLifecycleProvider.OPT_CASSANDRA_LOG_DIR, "/instance/log/dir" + ); + when(instance.lifecycleOptions()).thenReturn(lifecycleOptions); + + ProcessRuntimeConfiguration config = provider.getRuntimeConfiguration(instance); + + // Verify the configuration uses default Cassandra home + assertThat(config.cassandraHome()).isEqualTo(Path.of("/default/cassandra/home")); + } + + @Test + void testBuildStartCommand() throws IOException + { + // Create temporary files to simulate the required directories and files first + Path tempCassandraHome = lifecycleStateDir.resolve("cassandra"); + Path tempBinDir = tempCassandraHome.resolve("bin"); + Path tempConfDir = lifecycleStateDir.resolve("conf"); + Files.createDirectories(tempBinDir); + Files.createDirectories(tempConfDir); + + Path cassandraBin = tempBinDir.resolve("cassandra"); + Path cassandraYaml = tempConfDir.resolve("cassandra.yaml"); + Files.createFile(cassandraBin); + Files.createFile(cassandraYaml); + cassandraBin.toFile().setExecutable(true); + + // Use the temp directory for Cassandra home instead of hardcoded path + Map params = Map.of( + ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, tempCassandraHome.toString() + ); + + ProcessLifecycleProvider provider = new ProcessLifecycleProvider(params); + + // Create mock instance metadata without storage dir + InstanceMetadata instance = mock(InstanceMetadata.class); + when(instance.host()).thenReturn("testhost"); + when(instance.storageDir()).thenReturn(null); + + Map lifecycleOptions = Map.of( + ProcessLifecycleProvider.OPT_CASSANDRA_CONF_DIR, tempConfDir.toString() + ); + when(instance.lifecycleOptions()).thenReturn(lifecycleOptions); + + // Build the config and test the start command using provider helper methods + ProcessRuntimeConfiguration testConfig = provider.getRuntimeConfiguration(instance); + String pidFileLocation = provider.getPidFileLocation("testhost"); + String stdoutLocation = provider.getStdoutLocation("testhost"); + String stderrLocation = provider.getStderrLocation("testhost"); + + ProcessBuilder processBuilder = testConfig.buildStartCommand(pidFileLocation, stdoutLocation, stderrLocation); + + // Verify command arguments (no storage dir override) + List command = processBuilder.command(); + assertThat(command).containsExactly( + cassandraBin.toString(), + "-p", + pidFileLocation + ); + + // Verify environment variables + Map env = processBuilder.environment(); + assertThat(env.get("CASSANDRA_HOME")).isEqualTo(tempCassandraHome.toString()); + assertThat(env.get("CASSANDRA_CONF")).isEqualTo(tempConfDir.toString()); + assertThat(env.get("CASSANDRA_LOG_DIR")).isNull(); + } + + @Test + void testBuildStartCommandWithExtraJvmOptsAndEnvVars() throws IOException + { + // Create temporary files to simulate the required directories and files first + Path tempCassandraHome = lifecycleStateDir.resolve("cassandra"); + Path tempBinDir = tempCassandraHome.resolve("bin"); + Path tempConfDir = lifecycleStateDir.resolve("conf"); + Files.createDirectories(tempBinDir); + Files.createDirectories(tempConfDir); + + Path cassandraBin = tempBinDir.resolve("cassandra"); + Path cassandraYaml = tempConfDir.resolve("cassandra.yaml"); + Files.createFile(cassandraBin); + Files.createFile(cassandraYaml); + cassandraBin.toFile().setExecutable(true); + + // Create provider with extra JVM options and environment variables + Map params = Map.of( + ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, tempCassandraHome.toString(), + "cassandra.max_queued_native_transport_requests", "1024", + "env.JVM_OPTS", "-Xms1G -Xmx2G", + "env.CUSTOM_VAR", "custom_value" + ); + + ProcessLifecycleProvider provider = new ProcessLifecycleProvider(params); + + // Create mock instance metadata + InstanceMetadata instance = mock(InstanceMetadata.class); + when(instance.host()).thenReturn("testhost"); + when(instance.storageDir()).thenReturn(null); + + Map lifecycleOptions = Map.of( + ProcessLifecycleProvider.OPT_CASSANDRA_CONF_DIR, tempConfDir.toString() + ); + when(instance.lifecycleOptions()).thenReturn(lifecycleOptions); + + // Build the config and test the start command + ProcessRuntimeConfiguration testConfig = provider.getRuntimeConfiguration(instance); + String pidFileLocation = provider.getPidFileLocation("testhost"); + String stdoutLocation = provider.getStdoutLocation("testhost"); + String stderrLocation = provider.getStderrLocation("testhost"); + + ProcessBuilder processBuilder = testConfig.buildStartCommand(pidFileLocation, stdoutLocation, stderrLocation); + + // Verify command includes JVM options as -D parameters + List command = processBuilder.command(); + assertThat(command).contains(cassandraBin.toString()); + assertThat(command).contains("-p"); + assertThat(command).contains(pidFileLocation); + assertThat(command).contains("-Dcassandra.max_queued_native_transport_requests=1024"); + + // Verify environment variables include both standard and extra vars + Map env = processBuilder.environment(); + assertThat(env.get("CASSANDRA_HOME")).isEqualTo(tempCassandraHome.toString()); + assertThat(env.get("CASSANDRA_CONF")).isEqualTo(tempConfDir.toString()); + assertThat(env.get("JVM_OPTS")).isEqualTo("-Xms1G -Xmx2G"); + assertThat(env.get("CUSTOM_VAR")).isEqualTo("custom_value"); + } +} diff --git a/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfigurationTest.java b/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfigurationTest.java new file mode 100644 index 000000000..50b2d6ea3 --- /dev/null +++ b/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfigurationTest.java @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.sidecar.lifecycle; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.PosixFilePermission; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Tests for {@link ProcessRuntimeConfiguration} + */ +class ProcessRuntimeConfigurationTest +{ + @TempDir + Path tempDir; + + private Path cassandraHome; + private Path cassandraConfDir; + private Path cassandraBin; + private Path cassandraYaml; + + @BeforeEach + void setup() throws IOException + { + cassandraHome = Files.createDirectories(tempDir.resolve("cassandra")); + cassandraConfDir = Files.createDirectories(tempDir.resolve("conf")); + + // Create bin directory and cassandra executable + Path binDir = Files.createDirectories(cassandraHome.resolve("bin")); + cassandraBin = Files.createFile(binDir.resolve("cassandra")); + Files.setPosixFilePermissions(cassandraBin, Set.of( + PosixFilePermission.OWNER_READ, + PosixFilePermission.OWNER_WRITE, + PosixFilePermission.OWNER_EXECUTE + )); + + // Create cassandra.yaml + cassandraYaml = Files.createFile(cassandraConfDir.resolve("cassandra.yaml")); + } + + @Test + void testValidateStartWithValidConfiguration() + { + ProcessRuntimeConfiguration config = new ProcessRuntimeConfiguration.Builder() + .withHost("localhost") + .withCassandraHome(cassandraHome.toString()) + .withCassandraConfDir(cassandraConfDir.toString()) + .build(); + + assertThatCode(config::validateStart).doesNotThrowAnyException(); + } + + @Test + void testValidateStartThrowsWhenCassandraHomeDoesNotExist() + { + Path nonExistentHome = tempDir.resolve("nonexistent"); + + ProcessRuntimeConfiguration config = new ProcessRuntimeConfiguration.Builder() + .withHost("localhost") + .withCassandraHome(nonExistentHome.toString()) + .withCassandraConfDir(cassandraConfDir.toString()) + .build(); + + assertThatThrownBy(config::validateStart) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cassandra home does not exist or is not a directory"); + } + + @Test + void testValidateStartThrowsWhenCassandraHomeIsFile() throws IOException + { + Path homeAsFile = Files.createFile(tempDir.resolve("homeAsFile")); + + ProcessRuntimeConfiguration config = new ProcessRuntimeConfiguration.Builder() + .withHost("localhost") + .withCassandraHome(homeAsFile.toString()) + .withCassandraConfDir(cassandraConfDir.toString()) + .build(); + + assertThatThrownBy(config::validateStart) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cassandra home does not exist or is not a directory"); + } + + @Test + void testValidateStartThrowsWhenConfDirDoesNotExist() + { + Path nonExistentConfDir = tempDir.resolve("nonexistent-conf"); + + ProcessRuntimeConfiguration config = new ProcessRuntimeConfiguration.Builder() + .withHost("localhost") + .withCassandraHome(cassandraHome.toString()) + .withCassandraConfDir(nonExistentConfDir.toString()) + .build(); + + assertThatThrownBy(config::validateStart) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cassandra configuration directory does not exist or is not a directory"); + } + + @Test + void testValidateStartThrowsWhenCassandraYamlDoesNotExist() throws IOException + { + Files.delete(cassandraYaml); + + ProcessRuntimeConfiguration config = new ProcessRuntimeConfiguration.Builder() + .withHost("localhost") + .withCassandraHome(cassandraHome.toString()) + .withCassandraConfDir(cassandraConfDir.toString()) + .build(); + + assertThatThrownBy(config::validateStart) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cassandra YAML configuration file does not exist"); + } + + @Test + void testValidateStartThrowsWhenCassandraBinDoesNotExist() throws IOException + { + Files.delete(cassandraBin); + + ProcessRuntimeConfiguration config = new ProcessRuntimeConfiguration.Builder() + .withHost("localhost") + .withCassandraHome(cassandraHome.toString()) + .withCassandraConfDir(cassandraConfDir.toString()) + .build(); + + assertThatThrownBy(config::validateStart) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cassandra binary does not exist or is not a regular file"); + } + + @Test + void testValidateStartThrowsWhenCassandraBinNotExecutable() throws IOException + { + Files.setPosixFilePermissions(cassandraBin, Set.of( + PosixFilePermission.OWNER_READ, + PosixFilePermission.OWNER_WRITE + )); + + ProcessRuntimeConfiguration config = new ProcessRuntimeConfiguration.Builder() + .withHost("localhost") + .withCassandraHome(cassandraHome.toString()) + .withCassandraConfDir(cassandraConfDir.toString()) + .build(); + + assertThatThrownBy(config::validateStart) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cassandra binary is not executable"); + } + + @Test + void testValidateStartThrowsWhenConfDirNotReadable() throws IOException + { + Files.setPosixFilePermissions(cassandraConfDir, Set.of( + PosixFilePermission.OWNER_WRITE, + PosixFilePermission.OWNER_EXECUTE + )); + + ProcessRuntimeConfiguration config = new ProcessRuntimeConfiguration.Builder() + .withHost("localhost") + .withCassandraHome(cassandraHome.toString()) + .withCassandraConfDir(cassandraConfDir.toString()) + .build(); + + assertThatThrownBy(config::validateStart) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cassandra configuration directory is not readable"); + } + + @Test + void testBuildStartCommand() + { + ProcessRuntimeConfiguration config = new ProcessRuntimeConfiguration.Builder() + .withHost("localhost") + .withCassandraHome(cassandraHome.toString()) + .withCassandraConfDir(cassandraConfDir.toString()) + .withCassandraLogDir("/custom/log/dir") + .withStorageDir("/custom/storage/dir") + .build(); + + String pidFile = "/tmp/cassandra.pid"; + String stdoutFile = "/tmp/cassandra.out"; + String stderrFile = "/tmp/cassandra.err"; + + ProcessBuilder pb = config.buildStartCommand(pidFile, stdoutFile, stderrFile); + + // Verify command + List command = pb.command(); + assertThat(command).hasSize(5); + assertThat(command.get(0)).isEqualTo(cassandraBin.toString()); + assertThat(command.get(1)).isEqualTo("-p"); + assertThat(command.get(2)).isEqualTo(pidFile); + assertThat(command.get(3)).isEqualTo("-D"); + assertThat(command.get(4)).isEqualTo("cassandra.storagedir=/custom/storage/dir"); + + // Verify environment variables + Map env = pb.environment(); + assertThat(env.get("CASSANDRA_HOME")).isEqualTo(cassandraHome.toString()); + assertThat(env.get("CASSANDRA_CONF")).isEqualTo(cassandraConfDir.toString()); + assertThat(env.get("CASSANDRA_LOG_DIR")).isEqualTo("/custom/log/dir"); + + // Verify working directory + assertThat(pb.directory()).isEqualTo(cassandraHome.toFile()); + + // Verify redirects are configured (files don't need to exist for ProcessBuilder creation) + assertThat(pb.redirectOutput().type()).isEqualTo(ProcessBuilder.Redirect.Type.APPEND); + assertThat(pb.redirectError().type()).isEqualTo(ProcessBuilder.Redirect.Type.APPEND); + } + + @Test + void testBuildStartCommandWithoutStorageAndLogDir() + { + ProcessRuntimeConfiguration config = new ProcessRuntimeConfiguration.Builder() + .withHost("localhost") + .withCassandraHome(cassandraHome.toString()) + .withCassandraConfDir(cassandraConfDir.toString()) + .build(); + + String pidFile = "/tmp/cassandra.pid"; + String stdoutFile = "/tmp/cassandra.out"; + String stderrFile = "/tmp/cassandra.err"; + + ProcessBuilder pb = config.buildStartCommand(pidFile, stdoutFile, stderrFile); + + // Verify command - should not include storage dir parameters + List command = pb.command(); + assertThat(command).hasSize(3); + assertThat(command.get(0)).isEqualTo(cassandraBin.toString()); + assertThat(command.get(1)).isEqualTo("-p"); + assertThat(command.get(2)).isEqualTo(pidFile); + + // Verify environment variables + Map env = pb.environment(); + assertThat(env.get("CASSANDRA_HOME")).isEqualTo(cassandraHome.toString()); + assertThat(env.get("CASSANDRA_CONF")).isEqualTo(cassandraConfDir.toString()); + + // Verify working directory + assertThat(pb.directory()).isEqualTo(cassandraHome.toFile()); + + // Verify redirects are configured + assertThat(pb.redirectOutput().type()).isEqualTo(ProcessBuilder.Redirect.Type.APPEND); + assertThat(pb.redirectError().type()).isEqualTo(ProcessBuilder.Redirect.Type.APPEND); + } + + @Test + void testBuildStartCommandWithExtraJvmOptsAndEnvVars() + { + Map extraJvmOpts = Map.of( + "cassandra.jmx.local.port", "7199", + "java.rmi.server.hostname", "localhost" + ); + + Map extraEnvVars = Map.of( + "JVM_OPTS", "-Xms1G -Xmx2G", + "CUSTOM_VAR", "custom_value" + ); + + ProcessRuntimeConfiguration config = new ProcessRuntimeConfiguration.Builder() + .withHost("localhost") + .withCassandraHome(cassandraHome.toString()) + .withCassandraConfDir(cassandraConfDir.toString()) + .withJvmOptions(extraJvmOpts) + .withEnvVars(extraEnvVars) + .build(); + + String pidFile = "/tmp/cassandra.pid"; + String stdoutFile = "/tmp/cassandra.out"; + String stderrFile = "/tmp/cassandra.err"; + ProcessBuilder pb = config.buildStartCommand(pidFile, stdoutFile, stderrFile); + + // Verify JVM options are included (order may vary) + List command = pb.command(); + assertThat(command).contains("-Dcassandra.jmx.local.port=7199"); + assertThat(command).contains("-Djava.rmi.server.hostname=localhost"); + + // Verify environment variables include both standard and extra vars + Map env = pb.environment(); + assertThat(env.get("JVM_OPTS")).isEqualTo("-Xms1G -Xmx2G"); + assertThat(env.get("CUSTOM_VAR")).isEqualTo("custom_value"); + } +} From 5cbdacb21e8a67a81d6170576be3dff90d2b8bab Mon Sep 17 00:00:00 2001 From: Paulo Motta Date: Fri, 5 Dec 2025 14:25:12 -0500 Subject: [PATCH 02/13] Make sed command cross-compatible with macOS and Linux Empty string argument to -i works on both BSD and GNU sed. --- examples/lifecycle/setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/lifecycle/setup.sh b/examples/lifecycle/setup.sh index dc4df60e3..b35a41895 100755 --- a/examples/lifecycle/setup.sh +++ b/examples/lifecycle/setup.sh @@ -51,4 +51,4 @@ fi echo "Creating configuration" cp -r ${CASSANDRA_HOME}/conf/* ${CASSANDRA_CONF} sed "s#\$cassandraHome#${CASSANDRA_HOME}#g" ${SIDECAR_YAML_TEMPLATE} > ${SIDECAR_YAML} -sed -i "s#\$baseDir#${NODE_DIR}#g" ${SIDECAR_YAML} +sed -i '' "s#\$baseDir#${NODE_DIR}#g" ${SIDECAR_YAML} From 95ec387fe5cff4f27bc78fe013f30403e4c6f4c5 Mon Sep 17 00:00:00 2001 From: Paulo Motta Date: Fri, 5 Dec 2025 14:40:08 -0500 Subject: [PATCH 03/13] Overwrite instead of appending startup logs to avoid unbounded growth --- .../sidecar/lifecycle/ProcessRuntimeConfiguration.java | 4 ++-- .../lifecycle/ProcessRuntimeConfigurationTest.java | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfiguration.java b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfiguration.java index 1d6e2baf3..2ba4bdf66 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfiguration.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfiguration.java @@ -214,8 +214,8 @@ public ProcessBuilder buildStartCommand(String pidFileLocation, String stdoutFil } // Redirect output to logs - processBuilder.redirectOutput(ProcessBuilder.Redirect.appendTo(new File(stdoutFileLocation))); - processBuilder.redirectError(ProcessBuilder.Redirect.appendTo(new File(stderrFileLocation))); + processBuilder.redirectOutput(ProcessBuilder.Redirect.to(new File(stdoutFileLocation))); + processBuilder.redirectError(ProcessBuilder.Redirect.to(new File(stderrFileLocation))); // Set working directory processBuilder.directory(cassandraHome().toFile()); diff --git a/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfigurationTest.java b/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfigurationTest.java index 50b2d6ea3..8e99531a0 100644 --- a/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfigurationTest.java +++ b/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessRuntimeConfigurationTest.java @@ -232,8 +232,8 @@ void testBuildStartCommand() assertThat(pb.directory()).isEqualTo(cassandraHome.toFile()); // Verify redirects are configured (files don't need to exist for ProcessBuilder creation) - assertThat(pb.redirectOutput().type()).isEqualTo(ProcessBuilder.Redirect.Type.APPEND); - assertThat(pb.redirectError().type()).isEqualTo(ProcessBuilder.Redirect.Type.APPEND); + assertThat(pb.redirectOutput().type()).isEqualTo(ProcessBuilder.Redirect.Type.WRITE); + assertThat(pb.redirectError().type()).isEqualTo(ProcessBuilder.Redirect.Type.WRITE); } @Test @@ -267,8 +267,8 @@ void testBuildStartCommandWithoutStorageAndLogDir() assertThat(pb.directory()).isEqualTo(cassandraHome.toFile()); // Verify redirects are configured - assertThat(pb.redirectOutput().type()).isEqualTo(ProcessBuilder.Redirect.Type.APPEND); - assertThat(pb.redirectError().type()).isEqualTo(ProcessBuilder.Redirect.Type.APPEND); + assertThat(pb.redirectOutput().type()).isEqualTo(ProcessBuilder.Redirect.Type.WRITE); + assertThat(pb.redirectError().type()).isEqualTo(ProcessBuilder.Redirect.Type.WRITE); } @Test From 585c638a3ab141b7b3a43ea7117de396c20b4974 Mon Sep 17 00:00:00 2001 From: Paulo Motta Date: Fri, 5 Dec 2025 18:26:40 -0500 Subject: [PATCH 04/13] Enhance stale PID file detection and cleanup - Verify process is Cassandra before considering it running - Auto-remove stale PID files when process missing or not Cassandra - Add cleanup in stopCassandra() and improved process validation - Add unit tests for stale PID scenarios --- .../LifecycleProviderIntegrationTester.java | 2 +- .../lifecycle/ProcessLifecycleProvider.java | 106 ++++++- .../ProcessLifecycleProviderTest.java | 286 +++++++++++++++++- 3 files changed, 370 insertions(+), 24 deletions(-) diff --git a/integration-tests/src/integrationTest/org/apache/cassandra/sidecar/lifecycle/LifecycleProviderIntegrationTester.java b/integration-tests/src/integrationTest/org/apache/cassandra/sidecar/lifecycle/LifecycleProviderIntegrationTester.java index 7431cbf22..0a7a1d1e6 100644 --- a/integration-tests/src/integrationTest/org/apache/cassandra/sidecar/lifecycle/LifecycleProviderIntegrationTester.java +++ b/integration-tests/src/integrationTest/org/apache/cassandra/sidecar/lifecycle/LifecycleProviderIntegrationTester.java @@ -203,7 +203,7 @@ private void waitForLastUpdateToConverge(String expectedLastUpdate, int timeoutS throw new TimeoutException("Expected lifecycle update not reached after " + timeoutSeconds + " seconds. " + ", last_update: " + lastUpdate); } - Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS); + Uninterruptibles.sleepUninterruptibly(10, TimeUnit.SECONDS); lastUpdate = getLifecycle().bodyAsJsonObject().getString("last_update"); } } diff --git a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java index aa50301b5..3cf9838a0 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java @@ -18,7 +18,9 @@ package org.apache.cassandra.sidecar.lifecycle; +import java.io.BufferedReader; import java.io.IOException; +import java.io.InputStreamReader; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -28,12 +30,11 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import org.apache.cassandra.sidecar.cluster.instance.InstanceMetadata; import org.apache.cassandra.sidecar.exceptions.ConfigurationException; import org.jetbrains.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Manage the lifecycle of Cassandra instances running on local processes @@ -153,6 +154,7 @@ private void stopCassandra(InstanceMetadata instance) CompletableFuture terminationFuture = processHandle.get().onExit(); processHandle.get().destroy(); // blocking call, make async? terminationFuture.get(CASSANDRA_PROCESS_TIMEOUT_MS, TimeUnit.MILLISECONDS); + Files.deleteIfExists(Path.of(pidFileLocation)); } else { @@ -183,28 +185,60 @@ protected ProcessRuntimeConfiguration getRuntimeConfiguration(InstanceMetadata i .build(); } + /** + * Checks whether a Cassandra instance is currently running as a local process + * and automatically cleans up stale PID files. + * + * Performs four validation steps: + * 1. Verifies the PID file exists and is readable. Returns false if not found. + * 2. Reads the PID and checks if the process is alive. Returns false and deletes the + * PID file if the process no longer exists or is not alive. + * 3. Verifies the process is a Cassandra instance by checking for + * org.apache.cassandra.service.CassandraDaemon in the command line. Returns true if + * the command line contains the Cassandra daemon class or cannot be determined. + * 4. If the process is running but is not a Cassandra process, returns false and deletes + * the stale PID file. + * + * @param instance the instance metadata containing host information + * @return true if the instance is running as a Cassandra process, false otherwise + */ private boolean isCassandraProcessRunning(InstanceMetadata instance) { Path pidFilePath = Path.of(getPidFileLocation(instance.host())); - if (!Files.isRegularFile(pidFilePath) || !Files.isReadable(pidFilePath)) - { - LOG.debug("PID file does not exist or is not readable for instance {} at path {}", instance.host(), pidFilePath); - return false; - } - try { - Long pid = readPidFromFile(pidFilePath); - if (ProcessHandle.of(pid).isPresent()) + // Case 1: PID file does not exist or is not readable + if (!Files.isRegularFile(pidFilePath) || !Files.isReadable(pidFilePath)) { - LOG.debug("Cassandra instance {} is running with PID {}", instance.host(), pid); - return true; + LOG.debug("PID file does not exist or is not readable for instance {} at path {}", instance.host(), pidFilePath); + return false; } - else + + Long pid = readPidFromFile(pidFilePath); + Optional processHandle = ProcessHandle.of(pid); + + // Case 2: No process with such PID or process is not alive + if (processHandle.isEmpty() || !processHandle.get().isAlive()) { - LOG.warn("PID file exists for instance {}, but process with PID {} is not running", instance.host(), pid); + LOG.debug("No running process found with PID {} for instance {}", pid, instance.host()); + deletePidFile(instance, pidFilePath); return false; } + + // Case 3: Process with such PID is running - check if it's a Cassandra process + // If we can't determine the command line, we assume it's Cassandra + Optional cmdLine = getCommandLinePlatformIndependent(processHandle.get()); + if (cmdLine.isEmpty() || cmdLine.get().contains("org.apache.cassandra.service.CassandraDaemon")) + { + LOG.debug("Cassandra instance {} is running with PID {}", instance.host(), pid); + return true; + } + + // Case 4: Process with such PID is running but it's not a Cassandra process + LOG.debug("Process with PID {} for instance {} is not a Cassandra process (command line: {}).", + pid, instance.host(), cmdLine); + deletePidFile(instance, pidFilePath); + return false; } catch (Exception e) { @@ -213,6 +247,48 @@ private boolean isCassandraProcessRunning(InstanceMetadata instance) } } + protected static void deletePidFile(InstanceMetadata instance, Path pidFilePath) { + try + { + LOG.info("Deleting stale PID file {} for instance {}", pidFilePath, instance.host()); + Files.delete(pidFilePath); + } + catch (Exception e) + { + LOG.warn("Failed to delete stale PID file {} for instance {}: {}", pidFilePath, instance.host(), e.getMessage()); + } + } + + /* + * Due to JDK-8345117 java can sometimes truncate the command line returned by ProcessHandle.info().commandLine() + * on some platforms (ie. Linux). To work around this, we use the 'ps' command to get the full command line. + * This method should be platform-independent as it relies on the 'ps' command which is available on most Unix-like systems. + * For non-Unix systems, we fall back to the default implementation. + */ + protected static Optional getCommandLinePlatformIndependent(ProcessHandle processHandle) { + long pid = processHandle.pid(); + try { + ProcessBuilder pb = new ProcessBuilder("ps", "-p", String.valueOf(pid), "-o", "args="); + Process proc = pb.start(); + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(proc.getInputStream()))) { + String line = reader.readLine(); + proc.waitFor(5, TimeUnit.SECONDS); + if (line != null && !line.isEmpty()) + { + return Optional.of(line.trim()); + } + } + } + catch (Exception e) + { + LOG.warn("Failed to get command line via ps for PID {}", pid, e); + } + // Fallback to default implementation + return processHandle.info().commandLine(); + } + + @VisibleForTesting public static Long readPidFromFile(Path pidFilePath) { try diff --git a/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java b/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java index 73976f0b1..5e59cd81c 100644 --- a/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java +++ b/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java @@ -18,7 +18,6 @@ package org.apache.cassandra.sidecar.lifecycle; -import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -95,13 +94,12 @@ void testStartStopIsRunning() throws InterruptedException { // Mock ProcessHandle.of to simulate process running state ProcessHandle mockHandle = mock(ProcessHandle.class); - when(mockHandle.onExit()).then(i -> CompletableFuture.supplyAsync(() -> { - String pidFileLocation = getPidFileLocation(lifecycleStateDir.toString(), "localhost"); - // delete the pid file to simulate a stopped process - File file = Path.of(pidFileLocation).toFile(); - file.delete(); - return null; - })); + ProcessHandle.Info mockInfo = mock(ProcessHandle.Info.class); + when(mockHandle.isAlive()).thenReturn(true); + when(mockHandle.info()).thenReturn(mockInfo); + when(mockInfo.commandLine()).thenReturn(Optional.of("java -cp /path/to/cassandra org.apache.cassandra.service.CassandraDaemon")); + when(mockHandle.onExit()).thenReturn(CompletableFuture.completedFuture(null)); + when(mockHandle.pid()).thenReturn(12345L); Optional presentHandle = Optional.of(mockHandle); processHandleMock.when(() -> ProcessHandle.of(12345L)) .thenReturn(presentHandle); @@ -317,4 +315,276 @@ void testBuildStartCommandWithExtraJvmOptsAndEnvVars() throws IOException assertThat(env.get("JVM_OPTS")).isEqualTo("-Xms1G -Xmx2G"); assertThat(env.get("CUSTOM_VAR")).isEqualTo("custom_value"); } + + @Test + void testIsCassandraProcessRunningRemovesStalePidFile() throws IOException + { + try (MockedStatic processHandleMock = mockStatic(ProcessHandle.class)) + { + // Create provider + Map params = Map.of( + ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ); + ProcessLifecycleProvider provider = new ProcessLifecycleProvider(params); + + // Create mock instance metadata + InstanceMetadata instance = mock(InstanceMetadata.class); + when(instance.host()).thenReturn("localhost"); + when(instance.storageDir()).thenReturn("/storage/dir"); + when(instance.lifecycleOptions()).thenReturn(Map.of()); + + // Create a PID file with a stale PID (process that doesn't exist) + String pidFileLocation = getPidFileLocation(lifecycleStateDir.toString(), "localhost"); + Path pidFilePath = Path.of(pidFileLocation); + Files.writeString(pidFilePath, "99999"); + + // Mock ProcessHandle.of to return empty (process doesn't exist) + processHandleMock.when(() -> ProcessHandle.of(99999L)) + .thenReturn(Optional.empty()); + + // Verify PID file exists before the check + assertThat(pidFilePath).exists(); + + // Call isRunning - should return false and remove the stale PID file + boolean isRunning = provider.isRunning(instance); + + // Verify process is not running + assertThat(isRunning).isFalse(); + + // Verify the stale PID file was removed + assertThat(pidFilePath).doesNotExist(); + } + } + + @Test + void testIsCassandraProcessRunningRemovesStalePidFileWhenNotCassandra() throws IOException + { + try (MockedStatic processHandleMock = mockStatic(ProcessHandle.class)) + { + // Create provider + Map params = Map.of( + ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ); + ProcessLifecycleProvider provider = new ProcessLifecycleProvider(params); + + // Create mock instance metadata + InstanceMetadata instance = mock(InstanceMetadata.class); + when(instance.host()).thenReturn("localhost"); + when(instance.storageDir()).thenReturn("/storage/dir"); + when(instance.lifecycleOptions()).thenReturn(Map.of()); + + // Create a PID file with a PID + String pidFileLocation = getPidFileLocation(lifecycleStateDir.toString(), "localhost"); + Path pidFilePath = Path.of(pidFileLocation); + Files.writeString(pidFilePath, "55555"); + + // Mock ProcessHandle that exists and is alive but is not a Cassandra process + ProcessHandle mockHandle = mock(ProcessHandle.class); + ProcessHandle.Info mockInfo = mock(ProcessHandle.Info.class); + when(mockHandle.isAlive()).thenReturn(true); + when(mockHandle.info()).thenReturn(mockInfo); + when(mockInfo.commandLine()).thenReturn(Optional.of("/usr/bin/someothercommand")); + when(mockHandle.pid()).thenReturn(55555L); + + processHandleMock.when(() -> ProcessHandle.of(55555L)) + .thenReturn(Optional.of(mockHandle)); + + // Verify PID file exists before the check + assertThat(pidFilePath).exists(); + + // Call isRunning - should return false and remove the stale PID file + boolean isRunning = provider.isRunning(instance); + + // Verify process is not running + assertThat(isRunning).isFalse(); + + // Verify the stale PID file was removed + assertThat(pidFilePath).doesNotExist(); + } + } + + @Test + void testIsCassandraProcessRunningDoesNotRemovePidFileWhenCassandraIsRunning() throws IOException + { + try (MockedStatic processHandleMock = mockStatic(ProcessHandle.class)) + { + // Create provider + Map params = Map.of( + ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ); + ProcessLifecycleProvider provider = new ProcessLifecycleProvider(params); + + // Create mock instance metadata + InstanceMetadata instance = mock(InstanceMetadata.class); + when(instance.host()).thenReturn("localhost"); + when(instance.storageDir()).thenReturn("/storage/dir"); + when(instance.lifecycleOptions()).thenReturn(Map.of()); + + // Create a PID file with a valid Cassandra process PID + String pidFileLocation = getPidFileLocation(lifecycleStateDir.toString(), "localhost"); + Path pidFilePath = Path.of(pidFileLocation); + Files.writeString(pidFilePath, "77777"); + + // Mock ProcessHandle that exists and is a Cassandra process + ProcessHandle mockHandle = mock(ProcessHandle.class); + ProcessHandle.Info mockInfo = mock(ProcessHandle.Info.class); + when(mockHandle.isAlive()).thenReturn(true); + when(mockHandle.info()).thenReturn(mockInfo); + when(mockInfo.commandLine()).thenReturn(Optional.of("java -cp /path/to/cassandra org.apache.cassandra.service.CassandraDaemon")); + when(mockHandle.pid()).thenReturn(77777L); + + processHandleMock.when(() -> ProcessHandle.of(77777L)) + .thenReturn(Optional.of(mockHandle)); + + // Verify PID file exists before the check + assertThat(pidFilePath).exists(); + + // Call isRunning + boolean isRunning = provider.isRunning(instance); + + // Verify process is running + assertThat(isRunning).isTrue(); + + // Verify the PID file was NOT removed since process is running + assertThat(pidFilePath).exists(); + assertThat(Files.readString(pidFilePath).trim()).isEqualTo("77777"); + } + } + + @Test + void testGetCommandLinePlatformIndependentReturnsFallbackWhenPsFails() + { + // Mock ProcessHandle + ProcessHandle mockHandle = mock(ProcessHandle.class); + ProcessHandle.Info mockInfo = mock(ProcessHandle.Info.class); + when(mockHandle.info()).thenReturn(mockInfo); + String expectedCommandLine = "java -cp /path/to/cassandra org.apache.cassandra.service.CassandraDaemon"; + when(mockInfo.commandLine()).thenReturn(Optional.of(expectedCommandLine)); + when(mockHandle.pid()).thenReturn(99999L); + + // Since ps will fail (PID doesn't exist), we should get the fallback command line + Optional result = ProcessLifecycleProvider.getCommandLinePlatformIndependent(mockHandle); + + // Verify that we get the fallback result (since ps will return empty for non-existent PID) + assertThat(result).isPresent(); + assertThat(result.get()).isEqualTo(expectedCommandLine); + } + + @Test + void testReadPidFromFile() throws IOException + { + // Create a PID file with a valid PID + Path pidFilePath = lifecycleStateDir.resolve("test.pid"); + Files.writeString(pidFilePath, "12345"); + + // Read the PID from the file + Long pid = ProcessLifecycleProvider.readPidFromFile(pidFilePath); + + // Verify the PID is read correctly + assertThat(pid).isEqualTo(12345L); + } + + @Test + void testReadPidFromFileWithWhitespace() throws IOException + { + // Create a PID file with whitespace around the PID + Path pidFilePath = lifecycleStateDir.resolve("test.pid"); + Files.writeString(pidFilePath, " 54321 \n"); + + // Read the PID from the file + Long pid = ProcessLifecycleProvider.readPidFromFile(pidFilePath); + + // Verify the PID is read correctly and whitespace is trimmed + assertThat(pid).isEqualTo(54321L); + } + + @Test + void testReadPidFromFileThrowsExceptionForInvalidPid() + { + // Create a PID file with invalid content + Path pidFilePath = lifecycleStateDir.resolve("test.pid"); + assertThat(pidFilePath).doesNotExist(); + + try + { + Files.writeString(pidFilePath, "invalid_pid"); + ProcessLifecycleProvider.readPidFromFile(pidFilePath); + + // Should not reach here + assertThat(false).as("Expected RuntimeException to be thrown").isTrue(); + } + catch (RuntimeException e) + { + assertThat(e.getMessage()).contains("Failed to read PID from file"); + assertThat(e.getCause()).isInstanceOf(NumberFormatException.class); + } + catch (IOException e) + { + throw new RuntimeException(e); + } + } + + @Test + void testReadPidFromFileThrowsExceptionForMissingFile() + { + // Try to read a PID file that doesn't exist + Path pidFilePath = lifecycleStateDir.resolve("nonexistent.pid"); + + try + { + ProcessLifecycleProvider.readPidFromFile(pidFilePath); + + // Should not reach here + assertThat(false).as("Expected RuntimeException to be thrown").isTrue(); + } + catch (RuntimeException e) + { + assertThat(e.getMessage()).contains("Failed to read PID from file"); + assertThat(e.getCause()).isInstanceOf(IOException.class); + } + } + + @Test + void testDeletePidFile() throws IOException + { + // Create a mock instance + InstanceMetadata instance = mock(InstanceMetadata.class); + when(instance.host()).thenReturn("testhost"); + + // Create a PID file + Path pidFilePath = lifecycleStateDir.resolve("cassandra-testhost.pid"); + Files.writeString(pidFilePath, "99999"); + + // Verify the file exists + assertThat(pidFilePath).exists(); + + // Delete the PID file + ProcessLifecycleProvider.deletePidFile(instance, pidFilePath); + + // Verify the file was deleted + assertThat(pidFilePath).doesNotExist(); + } + + @Test + void testDeletePidFileWhenFileDoesNotExist() + { + // Create a mock instance + InstanceMetadata instance = mock(InstanceMetadata.class); + when(instance.host()).thenReturn("testhost"); + + // Try to delete a PID file that doesn't exist + Path pidFilePath = lifecycleStateDir.resolve("nonexistent-cassandra-testhost.pid"); + + // Verify the file doesn't exist + assertThat(pidFilePath).doesNotExist(); + + // Should not throw an exception when trying to delete a non-existent file + ProcessLifecycleProvider.deletePidFile(instance, pidFilePath); + + // Verify the file still doesn't exist + assertThat(pidFilePath).doesNotExist(); + } } From b7e8af25a64dcc260c2c742bf2a2dd07660e3d26 Mon Sep 17 00:00:00 2001 From: Paulo Motta Date: Fri, 5 Dec 2025 18:09:33 -0500 Subject: [PATCH 05/13] Allow non-snapshot tarballs to be supplied --- gradle/common/integrationTestTask.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/common/integrationTestTask.gradle b/gradle/common/integrationTestTask.gradle index 50a18b29b..5fa3a1baa 100644 --- a/gradle/common/integrationTestTask.gradle +++ b/gradle/common/integrationTestTask.gradle @@ -32,7 +32,7 @@ apply from: "${project.rootDir}/gradle/common/java11Options.gradle" def getCassandraTarballLocation() { def tarballsDirFile = file(tarballsDir) if (tarballsDirFile.exists() && tarballsDirFile.isDirectory()) { - def matchingFiles = fileTree(tarballsDirFile).files.findAll {it.name ==~ /^apache-cassandra-${tarballVersion}\.?\d*-SNAPSHOT-bin\.tar.gz$/ } + def matchingFiles = fileTree(tarballsDirFile).files.findAll {it.name ==~ /^apache-cassandra-${tarballVersion}\.?\d*(-SNAPSHOT){0,1}-bin\.tar.gz$/ } if (matchingFiles.size() > 0) { def tarballToTest = matchingFiles.first() println("Testing with tarball ${tarballToTest}") From 4927f3c803348ff568b97cbd3f6de83996cddd32 Mon Sep 17 00:00:00 2001 From: Paulo Motta Date: Fri, 5 Dec 2025 19:03:31 -0500 Subject: [PATCH 06/13] Fix checkstyle violations --- .../lifecycle/ProcessLifecycleProvider.java | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java index 3cf9838a0..706b1cd23 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java @@ -30,11 +30,12 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.cassandra.sidecar.cluster.instance.InstanceMetadata; import org.apache.cassandra.sidecar.exceptions.ConfigurationException; import org.jetbrains.annotations.VisibleForTesting; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * Manage the lifecycle of Cassandra instances running on local processes @@ -217,7 +218,7 @@ private boolean isCassandraProcessRunning(InstanceMetadata instance) Long pid = readPidFromFile(pidFilePath); Optional processHandle = ProcessHandle.of(pid); - // Case 2: No process with such PID or process is not alive + // Case 2: No process running with such PID or process is not alive if (processHandle.isEmpty() || !processHandle.get().isAlive()) { LOG.debug("No running process found with PID {} for instance {}", pid, instance.host()); @@ -247,7 +248,8 @@ private boolean isCassandraProcessRunning(InstanceMetadata instance) } } - protected static void deletePidFile(InstanceMetadata instance, Path pidFilePath) { + protected static void deletePidFile(InstanceMetadata instance, Path pidFilePath) + { try { LOG.info("Deleting stale PID file {} for instance {}", pidFilePath, instance.host()); @@ -265,13 +267,16 @@ protected static void deletePidFile(InstanceMetadata instance, Path pidFilePath) * This method should be platform-independent as it relies on the 'ps' command which is available on most Unix-like systems. * For non-Unix systems, we fall back to the default implementation. */ - protected static Optional getCommandLinePlatformIndependent(ProcessHandle processHandle) { + protected static Optional getCommandLinePlatformIndependent(ProcessHandle processHandle) + { long pid = processHandle.pid(); - try { + try + { ProcessBuilder pb = new ProcessBuilder("ps", "-p", String.valueOf(pid), "-o", "args="); Process proc = pb.start(); try (BufferedReader reader = new BufferedReader( - new InputStreamReader(proc.getInputStream()))) { + new InputStreamReader(proc.getInputStream()))) + { String line = reader.readLine(); proc.waitFor(5, TimeUnit.SECONDS); if (line != null && !line.isEmpty()) From e63bd29e622fd3152f1bdc8c3e276be283b6d452 Mon Sep 17 00:00:00 2001 From: Paulo Motta Date: Fri, 5 Dec 2025 19:06:37 -0500 Subject: [PATCH 07/13] Apply suggestion from @arjunashok Co-authored-by: Arjun Ashok --- .../cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java index 706b1cd23..79969e223 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java @@ -159,7 +159,7 @@ private void stopCassandra(InstanceMetadata instance) } else { - LOG.warn("No process running for Cassandra instance {} with PID {}.", casCfg.instanceName, pid); + LOG.warn("No process running for Cassandra instance {} with PID {}.", casCfg.instanceName(), pid); } } catch (Throwable t) From 56ca1f6c985f5e198d23f28ea9611da21c9d6167 Mon Sep 17 00:00:00 2001 From: Paulo Motta Date: Fri, 19 Dec 2025 12:22:07 -0500 Subject: [PATCH 08/13] Add destroyForcibly() fallback when stop times out When stopping a Cassandra process, if the graceful destroy() does not complete within the timeout period, the code now calls destroyForcibly() to ensure the process is terminated. If destroyForcibly() also fails (returns false), an exception is thrown. --- .../lifecycle/ProcessLifecycleProvider.java | 19 +++- .../ProcessLifecycleProviderTest.java | 103 ++++++++++++++++++ 2 files changed, 120 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java index 79969e223..5cf2d8a0d 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java @@ -29,6 +29,7 @@ import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -153,8 +154,22 @@ private void stopCassandra(InstanceMetadata instance) { LOG.info("Stopping process of Cassandra instance {} with PID {}.", casCfg.instanceName(), pid); CompletableFuture terminationFuture = processHandle.get().onExit(); - processHandle.get().destroy(); // blocking call, make async? - terminationFuture.get(CASSANDRA_PROCESS_TIMEOUT_MS, TimeUnit.MILLISECONDS); + processHandle.get().destroy(); + try + { + terminationFuture.get(CASSANDRA_PROCESS_TIMEOUT_MS, TimeUnit.MILLISECONDS); + } + catch (TimeoutException e) + { + LOG.warn("Process {} did not terminate within timeout, forcing destroy.", pid); + boolean destroyed = processHandle.get().destroyForcibly(); + if (!destroyed) + { + throw new RuntimeException("Failed to forcibly destroy process " + pid + + " for Cassandra instance " + casCfg.instanceName(), e); + } + LOG.info("Process {} was forcibly destroyed.", pid); + } Files.deleteIfExists(Path.of(pidFileLocation)); } else diff --git a/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java b/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java index 5e59cd81c..f77d7600b 100644 --- a/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java +++ b/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java @@ -25,6 +25,7 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeoutException; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -34,9 +35,12 @@ import static org.apache.cassandra.sidecar.lifecycle.ProcessLifecycleProvider.getPidFileLocation; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; /** @@ -587,4 +591,103 @@ void testDeletePidFileWhenFileDoesNotExist() // Verify the file still doesn't exist assertThat(pidFilePath).doesNotExist(); } + + @Test + void testStopCallsDestroyForciblyOnTimeout() throws Exception + { + try (MockedStatic processHandleMock = mockStatic(ProcessHandle.class)) + { + // Create provider + Map params = Map.of( + ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ); + FakeProcessLifecycleProvider provider = new FakeProcessLifecycleProvider(params); + + // Create mock instance metadata + InstanceMetadata instance = mock(InstanceMetadata.class); + when(instance.host()).thenReturn("localhost"); + when(instance.storageDir()).thenReturn("/storage/dir"); + when(instance.lifecycleOptions()).thenReturn(Map.of()); + + // Create a PID file + String pidFileLocation = getPidFileLocation(lifecycleStateDir.toString(), "localhost"); + Path pidFilePath = Path.of(pidFileLocation); + Files.writeString(pidFilePath, "12345"); + + // Mock ProcessHandle that times out on graceful termination but succeeds with force + ProcessHandle mockHandle = mock(ProcessHandle.class); + ProcessHandle.Info mockInfo = mock(ProcessHandle.Info.class); + when(mockHandle.isAlive()).thenReturn(true); + when(mockHandle.info()).thenReturn(mockInfo); + when(mockInfo.commandLine()).thenReturn(Optional.of("java org.apache.cassandra.service.CassandraDaemon")); + when(mockHandle.pid()).thenReturn(12345L); + + // Simulate timeout by mocking CompletableFuture.get() to throw TimeoutException + @SuppressWarnings("unchecked") + CompletableFuture timeoutFuture = mock(CompletableFuture.class); + when(timeoutFuture.get(anyLong(), any())).thenThrow(new TimeoutException("Simulated timeout")); + when(mockHandle.onExit()).thenReturn(timeoutFuture); + when(mockHandle.destroy()).thenReturn(true); + when(mockHandle.destroyForcibly()).thenReturn(true); + + processHandleMock.when(() -> ProcessHandle.of(12345L)) + .thenReturn(Optional.of(mockHandle)); + + // Call stop - should call destroyForcibly after timeout + provider.stop(instance); + + // Verify destroyForcibly was called + verify(mockHandle).destroyForcibly(); + } + } + + @Test + void testStopThrowsExceptionWhenDestroyForciblyFails() throws Exception + { + try (MockedStatic processHandleMock = mockStatic(ProcessHandle.class)) + { + // Create provider + Map params = Map.of( + ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ); + FakeProcessLifecycleProvider provider = new FakeProcessLifecycleProvider(params); + + // Create mock instance metadata + InstanceMetadata instance = mock(InstanceMetadata.class); + when(instance.host()).thenReturn("localhost"); + when(instance.storageDir()).thenReturn("/storage/dir"); + when(instance.lifecycleOptions()).thenReturn(Map.of()); + + // Create a PID file + String pidFileLocation = getPidFileLocation(lifecycleStateDir.toString(), "localhost"); + Path pidFilePath = Path.of(pidFileLocation); + Files.writeString(pidFilePath, "12345"); + + // Mock ProcessHandle that times out and fails to force destroy + ProcessHandle mockHandle = mock(ProcessHandle.class); + ProcessHandle.Info mockInfo = mock(ProcessHandle.Info.class); + when(mockHandle.isAlive()).thenReturn(true); + when(mockHandle.info()).thenReturn(mockInfo); + when(mockInfo.commandLine()).thenReturn(Optional.of("java org.apache.cassandra.service.CassandraDaemon")); + when(mockHandle.pid()).thenReturn(12345L); + + // Simulate timeout by mocking CompletableFuture.get() to throw TimeoutException + @SuppressWarnings("unchecked") + CompletableFuture timeoutFuture = mock(CompletableFuture.class); + when(timeoutFuture.get(anyLong(), any())).thenThrow(new TimeoutException("Simulated timeout")); + when(mockHandle.onExit()).thenReturn(timeoutFuture); + when(mockHandle.destroy()).thenReturn(true); + when(mockHandle.destroyForcibly()).thenReturn(false); // Force destroy fails + + processHandleMock.when(() -> ProcessHandle.of(12345L)) + .thenReturn(Optional.of(mockHandle)); + + // Call stop - should throw exception because destroyForcibly returned false + assertThatThrownBy(() -> provider.stop(instance)) + .isInstanceOf(RuntimeException.class) + .hasMessageContaining("Failed to forcibly destroy process"); + } + } } From 369363a4146d5c8d581b1cbd58f40f5e0c51c193 Mon Sep 17 00:00:00 2001 From: Paulo Motta Date: Fri, 19 Dec 2025 13:10:34 -0500 Subject: [PATCH 09/13] Add fail-fast directory validation in constructor Validates that state directory and cassandra home exist and have appropriate permissions at construction time. --- .../lifecycle/ProcessLifecycleProvider.java | 25 ++++++++ .../ProcessLifecycleProviderTest.java | 57 ++++++++++++++++--- 2 files changed, 73 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java index 5cf2d8a0d..de7df1c33 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java @@ -72,6 +72,11 @@ else if (entry.getKey().startsWith("env.")) } this.lifecycleDir = params.get(OPT_STATE_DIR); this.defaultCassandraHome = params.get(OPT_CASSANDRA_HOME); + validateConfiguration(); + } + + private void validateConfiguration() + { if (lifecycleDir == null || lifecycleDir.isEmpty()) { throw new ConfigurationException("Configuration property '" + OPT_STATE_DIR + "' must be set for ProcessLifecycleProvider"); @@ -80,6 +85,26 @@ else if (entry.getKey().startsWith("env.")) { throw new ConfigurationException("Configuration property '" + OPT_CASSANDRA_HOME + "' must be set for ProcessLifecycleProvider"); } + + Path stateDir = Path.of(lifecycleDir); + if (!Files.isDirectory(stateDir)) + { + throw new ConfigurationException("State directory '" + lifecycleDir + "' does not exist or is not a directory"); + } + if (!Files.isWritable(stateDir)) + { + throw new ConfigurationException("State directory '" + lifecycleDir + "' is not writable"); + } + + Path cassandraHomePath = Path.of(defaultCassandraHome); + if (!Files.isDirectory(cassandraHomePath)) + { + throw new ConfigurationException("Cassandra home '" + defaultCassandraHome + "' does not exist or is not a directory"); + } + if (!Files.isReadable(cassandraHomePath)) + { + throw new ConfigurationException("Cassandra home '" + defaultCassandraHome + "' is not readable"); + } } @Override diff --git a/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java b/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java index f77d7600b..9f0b126fb 100644 --- a/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java +++ b/server/src/test/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProviderTest.java @@ -27,10 +27,12 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeoutException; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.apache.cassandra.sidecar.cluster.instance.InstanceMetadata; +import org.apache.cassandra.sidecar.exceptions.ConfigurationException; import org.mockito.MockedStatic; import static org.apache.cassandra.sidecar.lifecycle.ProcessLifecycleProvider.getPidFileLocation; @@ -52,6 +54,15 @@ public class ProcessLifecycleProviderTest @TempDir Path lifecycleStateDir; + Path defaultCassandraHome; + + @BeforeEach + void setUp() throws IOException + { + defaultCassandraHome = lifecycleStateDir.resolve("cassandra-home"); + Files.createDirectories(defaultCassandraHome); + } + /** * A fake implementation of ProcessLifecycleProvider for testing purposes. * This class overrides the buildCassandraConfig method to return a mock configuration to avoid actual process execution. @@ -111,7 +122,7 @@ void testStartStopIsRunning() throws InterruptedException // Create provider with temporary lifecycle state directory Map params = Map.of( ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), - ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, defaultCassandraHome.toString() ); FakeProcessLifecycleProvider provider = new FakeProcessLifecycleProvider(params); @@ -148,7 +159,7 @@ void testBuildCassandraConfigWithCassandraHomeOverride() { Map params = Map.of( ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), - ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, defaultCassandraHome.toString() ); ProcessLifecycleProvider provider = new ProcessLifecycleProvider(params); @@ -180,7 +191,7 @@ void testBuildCassandraConfigWithDefaultCassandraHome() { Map params = Map.of( ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), - ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, defaultCassandraHome.toString() ); ProcessLifecycleProvider provider = new ProcessLifecycleProvider(params); @@ -200,7 +211,7 @@ void testBuildCassandraConfigWithDefaultCassandraHome() ProcessRuntimeConfiguration config = provider.getRuntimeConfiguration(instance); // Verify the configuration uses default Cassandra home - assertThat(config.cassandraHome()).isEqualTo(Path.of("/default/cassandra/home")); + assertThat(config.cassandraHome()).isEqualTo(defaultCassandraHome); } @Test @@ -328,7 +339,7 @@ void testIsCassandraProcessRunningRemovesStalePidFile() throws IOException // Create provider Map params = Map.of( ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), - ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, defaultCassandraHome.toString() ); ProcessLifecycleProvider provider = new ProcessLifecycleProvider(params); @@ -369,7 +380,7 @@ void testIsCassandraProcessRunningRemovesStalePidFileWhenNotCassandra() throws I // Create provider Map params = Map.of( ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), - ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, defaultCassandraHome.toString() ); ProcessLifecycleProvider provider = new ProcessLifecycleProvider(params); @@ -417,7 +428,7 @@ void testIsCassandraProcessRunningDoesNotRemovePidFileWhenCassandraIsRunning() t // Create provider Map params = Map.of( ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), - ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, defaultCassandraHome.toString() ); ProcessLifecycleProvider provider = new ProcessLifecycleProvider(params); @@ -600,7 +611,7 @@ void testStopCallsDestroyForciblyOnTimeout() throws Exception // Create provider Map params = Map.of( ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), - ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, defaultCassandraHome.toString() ); FakeProcessLifecycleProvider provider = new FakeProcessLifecycleProvider(params); @@ -650,7 +661,7 @@ void testStopThrowsExceptionWhenDestroyForciblyFails() throws Exception // Create provider Map params = Map.of( ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), - ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/default/cassandra/home" + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, defaultCassandraHome.toString() ); FakeProcessLifecycleProvider provider = new FakeProcessLifecycleProvider(params); @@ -690,4 +701,32 @@ void testStopThrowsExceptionWhenDestroyForciblyFails() throws Exception .hasMessageContaining("Failed to forcibly destroy process"); } } + + @Test + void testThrowsExceptionWhenStateDirDoesNotExist() + { + Map params = Map.of( + ProcessLifecycleProvider.OPT_STATE_DIR, "/nonexistent/state/dir", + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, defaultCassandraHome.toString() + ); + + assertThatThrownBy(() -> new ProcessLifecycleProvider(params)) + .isInstanceOf(ConfigurationException.class) + .hasMessageContaining("State directory") + .hasMessageContaining("does not exist or is not a directory"); + } + + @Test + void testThrowsExceptionWhenCassandraHomeDoesNotExist() + { + Map params = Map.of( + ProcessLifecycleProvider.OPT_STATE_DIR, lifecycleStateDir.toString(), + ProcessLifecycleProvider.OPT_CASSANDRA_HOME, "/nonexistent/cassandra/home" + ); + + assertThatThrownBy(() -> new ProcessLifecycleProvider(params)) + .isInstanceOf(ConfigurationException.class) + .hasMessageContaining("Cassandra home") + .hasMessageContaining("does not exist or is not a directory"); + } } From b643899ce6968dc05b0cb9fcb7ea51c1bd54021a Mon Sep 17 00:00:00 2001 From: Paulo Motta Date: Fri, 19 Dec 2025 13:14:06 -0500 Subject: [PATCH 10/13] Use explicit UTF-8 charset in InputStreamReader and Files.readString --- .../sidecar/lifecycle/ProcessLifecycleProvider.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java index de7df1c33..ca458648f 100644 --- a/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java +++ b/server/src/main/java/org/apache/cassandra/sidecar/lifecycle/ProcessLifecycleProvider.java @@ -21,6 +21,7 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -315,7 +316,7 @@ protected static Optional getCommandLinePlatformIndependent(ProcessHandl ProcessBuilder pb = new ProcessBuilder("ps", "-p", String.valueOf(pid), "-o", "args="); Process proc = pb.start(); try (BufferedReader reader = new BufferedReader( - new InputStreamReader(proc.getInputStream()))) + new InputStreamReader(proc.getInputStream(), StandardCharsets.UTF_8))) { String line = reader.readLine(); proc.waitFor(5, TimeUnit.SECONDS); @@ -338,7 +339,7 @@ public static Long readPidFromFile(Path pidFilePath) { try { - String pidFileContent = Files.readString(pidFilePath).trim(); + String pidFileContent = Files.readString(pidFilePath, StandardCharsets.UTF_8).trim(); return Long.parseLong(pidFileContent); } catch (IOException | NumberFormatException e) From e4baad920775ce79cc2d753d26f3bab209250cf9 Mon Sep 17 00:00:00 2001 From: Paulo Motta Date: Fri, 19 Dec 2025 13:16:28 -0500 Subject: [PATCH 11/13] Add CASSSIDECAR-340 entry to CHANGES.txt --- CHANGES.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.txt b/CHANGES.txt index 6ddd84af4..2b54155ae 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,6 @@ 0.3.0 ----- + * Add default process-based lifecycle provider (CASSSIDECAR-340) * Upgrade vertx to 4.5.23 (CASSSIDECAR-391) * Fix for deadlock during JMX reconnection (CASSSIDECAR-390) * Fix request execution continues on wrong thread (CASSSIDECAR-368) From f9a6c9f5a3ed500abd90a946fd291976783d595a Mon Sep 17 00:00:00 2001 From: Paulo Motta Date: Tue, 23 Dec 2025 11:54:26 -0500 Subject: [PATCH 12/13] Add cassandra tarball support to GitHub Actions heavyweight tests Configure -PtarballVersion for heavyweight integration tests to fix ProcessLifecycleProviderIntegrationTest which requires cassandra.test.tarball_path. --- .github/workflows/ci.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 74f52b444..1e58e1c32 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,7 +66,9 @@ jobs: id: cache-dtest-jars uses: actions/cache@v4 with: - path: dtest-jars/ + path: | + dtest-jars/ + cassandra-tarballs/ key: dtest-jars-${{ hashFiles('scripts/build-dtest-jars.sh', 'gradle/wrapper/gradle-wrapper.properties') }} - name: Dtest jars cache status @@ -89,6 +91,13 @@ jobs: path: dtest-jars/ retention-days: 90 + - name: Upload cassandra tarballs artifact + uses: actions/upload-artifact@v5 + with: + name: cassandra-tarballs + path: cassandra-tarballs/ + retention-days: 30 + # Run unit tests with static analysis on all Java versions unit-tests: name: Unit tests (Java ${{ matrix.java }}) @@ -304,6 +313,12 @@ jobs: name: dtest-jars path: dtest-jars/ + - name: Download cassandra tarballs + uses: actions/download-artifact@v5 + with: + name: cassandra-tarballs + path: cassandra-tarballs/ + - name: Install dtest jars to local Maven run: | ./scripts/install-shaded-dtest-jar-local.sh @@ -314,6 +329,7 @@ jobs: run: | ./gradlew --no-daemon \ -PdtestVersion=${{ matrix.dtestVersion }} \ + -PtarballVersion=${{ matrix.cassandra }} \ -Dcassandra.sidecar.versions_to_test="${{ matrix.cassandra }}" \ integrationTestHeavyWeight \ --stacktrace From de0d9692f80081bec47be08fd584ac696b342632 Mon Sep 17 00:00:00 2001 From: Paulo Motta Date: Tue, 23 Dec 2025 12:47:56 -0500 Subject: [PATCH 13/13] Add jar task before heavyweight integration tests in GitHub Actions The sidecar.version resource file is generated during the jar task. Without running jar first, SidecarVersionProvider throws IllegalStateException when trying to read the missing resource. This aligns GitHub Actions with CircleCI which already includes jar before integrationTestHeavyWeight. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1e58e1c32..ecb0a55b4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -331,7 +331,7 @@ jobs: -PdtestVersion=${{ matrix.dtestVersion }} \ -PtarballVersion=${{ matrix.cassandra }} \ -Dcassandra.sidecar.versions_to_test="${{ matrix.cassandra }}" \ - integrationTestHeavyWeight \ + jar integrationTestHeavyWeight \ --stacktrace env: CASSANDRA_DEP_DIR: ${{ github.workspace }}/dtest-jars