From 46796a82a32d5ab7e2054e1838aa3540d716359d Mon Sep 17 00:00:00 2001 From: Thomas Achatz Date: Fri, 17 Apr 2026 15:22:07 +0200 Subject: [PATCH] =?UTF-8?q?Block=20upgrades=20to=20CrateDB=206.0.0?= =?UTF-8?q?=E2=80=936.0.5=20if=20pre-5.5=20tables=20exist?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.rst | 3 ++ Dockerfile | 5 +-- crate/operator/upgrade.py | 73 +++++++++++++++++++++++++++++++++++++++ tests/test_upgrade.py | 73 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 152 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 4707cfcd..783736ed 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,9 @@ Changelog Unreleased ---------- +* Prevent upgrades to CrateDB 6.0.0–6.0.5 if the cluster contains tables created + before 5.5, avoiding potential data loss. + 2.59.1 (2026-04-20) ------------------- diff --git a/Dockerfile b/Dockerfile index fe22b1b0..d0ec5d88 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,8 +13,9 @@ RUN apt-get update && \ apt-get install git -y COPY . /src -RUN python -m pip install -U setuptools==${SETUPTOOLS_VERSION} && \ - python setup.py clean bdist_wheel +RUN python -m pip install -U setuptools==${SETUPTOOLS_VERSION} \ + && pip install setuptools_scm vcs_versioning \ + && python setup.py clean bdist_wheel # Run container diff --git a/crate/operator/upgrade.py b/crate/operator/upgrade.py index 69076f1d..92ffb1e2 100644 --- a/crate/operator/upgrade.py +++ b/crate/operator/upgrade.py @@ -339,6 +339,78 @@ async def check_reindexing_tables( ) +async def check_unsafe_upgrade_paths( + core: CoreV1Api, + namespace: str, + name: str, + body: kopf.Body, + old: kopf.Body, + logger: logging.Logger, +): + """ + Block upgrades to CrateDB 6.0.0–6.0.5 if the cluster contains tables + created before CrateDB 5.5, which are affected by a data-loss bug. + + :param core: An instance of the Kubernetes Core V1 API. + :param namespace: The Kubernetes namespace for the CrateDB cluster. + :param name: The name for the ``CrateDB`` custom resource. + :param body: The full body of the ``CrateDB`` custom resource per + :class:`kopf.Body`. + :param old: The old resource body. Required to get the old version. + """ + target_version = CrateVersion(body.spec["cluster"]["version"]) + + if not (CrateVersion("6.0.0") <= target_version < CrateVersion("6.0.6")): + return + + logger.info( + "Checking for unsafe upgrade path to %s (CrateDB 6.0.0–6.0.5)", + target_version, + ) + + host = await get_host(core, namespace, name) + password = await get_system_user_password(core, namespace, name) + conn_factory = connection_factory(host, password) + + async with conn_factory() as conn: + async with conn.cursor() as cursor: + await cursor.execute( + """ + SELECT COUNT(*) + FROM sys.shards + WHERE + lpad(split_part(min_lucene_version, '.', 1), 2, '0') || '.' || + lpad(split_part(min_lucene_version, '.', 2), 2, '0') || '.' || + lpad(split_part(min_lucene_version, '.', 3), 2, '0') <= '09.06.00' + """ + ) + q1_count = (await cursor.fetchone())[0] + + await cursor.execute( + """ + SELECT COUNT(*) + FROM information_schema.tables + WHERE + split_part(version['created'], '.', 1) = '5' + AND lpad(split_part(version['created'], '.', 2), 2, '0') < '05' + """ + ) + q2_count = (await cursor.fetchone())[0] + + logger.info( + "Unsafe upgrade detection: lucene_check=%s, version_check=%s", + q1_count, + q2_count, + ) + + if q1_count > 0 or q2_count > 0: + raise kopf.PermanentError( + "Upgrade blocked: Cluster contains tables created before CrateDB 5.5. " + "Upgrading to CrateDB 6.0.0–6.0.5 may cause data loss. " + "Please upgrade to >= 6.0.6 instead." + ) + + async def recreate_internal_tables( core: CoreV1Api, namespace: str, @@ -489,6 +561,7 @@ async def handle( # type: ignore async with GlobalApiClient() as api_client: core = CoreV1Api(api_client) await check_reindexing_tables(core, namespace, name, body, old, logger) + await check_unsafe_upgrade_paths(core, namespace, name, body, old, logger) class AfterUpgradeSubHandler(StateBasedSubHandler): diff --git a/tests/test_upgrade.py b/tests/test_upgrade.py index f43ccf12..9027507e 100644 --- a/tests/test_upgrade.py +++ b/tests/test_upgrade.py @@ -36,6 +36,7 @@ from crate.operator.create import get_statefulset_crate_command from crate.operator.upgrade import ( check_reindexing_tables, + check_unsafe_upgrade_paths, recreate_internal_tables, upgrade_command_data_nodes, upgrade_command_global_jwt_config, @@ -717,3 +718,75 @@ async def test_recreate_internal_tables_skips_missing_tables( assert f'INSERT INTO "{schema}"."{tmp_table}"' not in executed_sql assert f'ALTER CLUSTER SWAP TABLE "{schema}"."{tmp_table}"' not in executed_sql assert f'DROP TABLE IF EXISTS "{schema}"."{tmp_table}"' not in executed_sql + + +@pytest.mark.asyncio +@mock.patch("crate.operator.upgrade.get_host", new_callable=mock.AsyncMock) +@mock.patch( + "crate.operator.upgrade.get_system_user_password", new_callable=mock.AsyncMock +) +async def test_check_unsafe_upgrade_paths_blocks_on_old_tables( + mock_get_pwd, + mock_get_host, + mock_cratedb_connection, +): + mock_get_host.return_value = "localhost" + mock_get_pwd.return_value = "pwd" + + mock_cursor = mock_cratedb_connection["mock_cursor"] + + # First query (lucene) -> returns > 0 + # Second query (version_created) -> returns 0 + mock_cursor.fetchone.side_effect = [ + (1,), # q1_count + (0,), # q2_count + ] + + body = mock.MagicMock() + old = {"spec": {"cluster": {"version": "5.10.16"}}} + body.spec = {"cluster": {"version": "6.0.5"}} + + with pytest.raises( + kopf.PermanentError, + match="Upgrade blocked: Cluster contains tables created before CrateDB 5.5", + ): + await check_unsafe_upgrade_paths( + core=mock.MagicMock(), + namespace="ns", + name="my-cluster", + body=body, + old=old, + logger=mock.MagicMock(), + ) + + # Ensure both queries were executed + assert mock_cursor.execute.call_count == 2 + + +@pytest.mark.asyncio +@mock.patch("crate.operator.upgrade.get_host", new_callable=mock.AsyncMock) +@mock.patch( + "crate.operator.upgrade.get_system_user_password", new_callable=mock.AsyncMock +) +async def test_check_unsafe_upgrade_paths_allows_safe_versions( + mock_get_pwd, + mock_get_host, + mock_cratedb_connection, +): + mock_get_host.return_value = "localhost" + mock_get_pwd.return_value = "pwd" + + body = mock.MagicMock() + old = {"spec": {"cluster": {"version": "5.10.16"}}} + body.spec = {"cluster": {"version": "6.0.6"}} # safe version + + await check_unsafe_upgrade_paths( + core=mock.MagicMock(), + namespace="ns", + name="my-cluster", + body=body, + old=old, + logger=mock.MagicMock(), + ) + + mock_cratedb_connection["mock_cursor"].execute.assert_not_called()