From f60078385cac6a1b6c4fe7fc046a98bb5d05b360 Mon Sep 17 00:00:00 2001 From: Guillaume Boutry Date: Thu, 9 Apr 2026 11:29:08 +0200 Subject: [PATCH] feat: support safe ovs bridge rename adoption Teach OVSCli to decide whether bridge renames are safe by checking local OVN Southbound bindings. When the chassis has no bindings, the external networking setup can replace an old bridge with the configured target bridge and adopt a pre-existing OVS bridge from bridge-mapping, including MAAS-created bridges when they are compatible. Keep the autonomous hook path fail-closed when OVS or authoritative SB state cannot be read. Add an explicit `hypervisor setup-bridge` override for operators who need to allow stale SB reads against relay-backed deployments. Assisted-By: Codex (GPT-5-4) Signed-off-by: Guillaume Boutry --- openstack_hypervisor/bridge_datapath.py | 167 ++++++++++++ openstack_hypervisor/cli/hypervisor.py | 30 +++ openstack_hypervisor/hooks.py | 67 ++++- tests/unit/cli/test_hypervisor.py | 77 ++++++ tests/unit/test_bridge_datapath.py | 323 ++++++++++++++++++++++++ tests/unit/test_hooks.py | 156 ++++++++++++ 6 files changed, 819 insertions(+), 1 deletion(-) diff --git a/openstack_hypervisor/bridge_datapath.py b/openstack_hypervisor/bridge_datapath.py index a4241e3..8da891b 100644 --- a/openstack_hypervisor/bridge_datapath.py +++ b/openstack_hypervisor/bridge_datapath.py @@ -6,6 +6,7 @@ import json import logging import subprocess +import time import uuid from collections.abc import Generator from dataclasses import dataclass @@ -13,6 +14,9 @@ DEFAULT_LAA_MAC_PREFIX = "0a:c5" INTEGRATION_BRIDGE = "br-int" +SB_RENAME_SAFE_QUERY_TIMEOUT_S = 15 +SB_RENAME_SAFE_QUERY_RETRIES = 3 +SB_RENAME_SAFE_RETRY_DELAY_S = 1 @dataclass(frozen=True) @@ -65,6 +69,18 @@ class OVSTimeoutError(OVSError, TimeoutError): """Raised when an OVS command times out.""" +class OVNError(RuntimeError): + """Common base class for OVN-related errors.""" + + +class OVNCommandError(OVNError): + """Raised when querying OVN state fails.""" + + +class OVNTimeoutError(OVNError, TimeoutError): + """Raised when an OVN command times out.""" + + def _normalize_ovs_vsctl_value(raw_value: str) -> str | None: """Normalize ovs-vsctl output values into plain strings.""" cleaned = raw_value.strip() @@ -165,6 +181,11 @@ def with_timeout(self, timeout: int) -> Generator["OVSCli", None, None]: finally: self._timeout = original_timeout + @property + def timeout(self) -> int | None: + """Return the configured default command timeout.""" + return self._timeout + def commit(self, retry) -> str: """Execute all batched commands in a single ovs-vsctl transaction. @@ -710,6 +731,152 @@ def get_dpdk_initialized(self) -> bool: return False +class OVNSBCli: + """Client for interacting with OVN Southbound via ovn-sbctl.""" + + def __init__(self, db: str, timeout: int | None = None): + self.db = db + self._timeout = timeout + + def sbctl( + self, + *args: str, + timeout: int | None = None, + allow_stale_reads: bool = False, + ) -> str: + """Run ovn-sbctl with the provided arguments and return stdout.""" + cmd = ["ovn-sbctl", f"--db={self.db}"] + timeout = timeout or self._timeout + if timeout is not None: + cmd.append(f"--timeout={timeout}") + if allow_stale_reads: + cmd.append("--no-leader-only") + cmd.extend(args) + logging.debug("Executing command: %s", " ".join(cmd)) + + try: + completed = subprocess.run( # nosec B603 + cmd, + check=True, + capture_output=True, + text=True, + ) + except FileNotFoundError as exc: + raise OVNCommandError("ovn-sbctl binary not found") from exc + except subprocess.CalledProcessError as exc: # pragma: no cover - defensive + stderr = (exc.stderr or "").strip() + stdout = (exc.stdout or "").strip() + details = stderr or stdout or f"Command failed with exit code {exc.returncode}" + if "Alarm clock" in details: + raise OVNTimeoutError(details) from exc + raise OVNCommandError(details) from exc + + return completed.stdout + + def rename_safety_query(self, *args: str, allow_stale_reads: bool = False) -> str: + """Run an OVN SB query with the rename-safety freshness policy.""" + if allow_stale_reads: + return self.sbctl(*args, allow_stale_reads=True) + + attempts = SB_RENAME_SAFE_QUERY_RETRIES + if attempts < 1: + raise ValueError("SB_RENAME_SAFE_QUERY_RETRIES must be >= 1") + + for attempt in range(1, attempts + 1): + try: + return self.sbctl(*args) + except OVNError: + if attempt == attempts: + raise + time.sleep(SB_RENAME_SAFE_RETRY_DELAY_S) + + +def _get_ovs_external_id(ovs_cli: OVSCli, key: str) -> str | None: + """Read and normalize an OVS external_ids value.""" + raw_value = ovs_cli.vsctl( + "get", + "open", + ".", + f"external_ids:{key}", + skip_transaction=True, + ) + return _normalize_ovs_vsctl_value(raw_value) + + +def _find_local_chassis_uuid( + ovn_sb_cli: OVNSBCli, system_id: str, allow_stale_sb_read: bool +) -> uuid.UUID | None: + """Return the local chassis UUID when the node is registered in SB.""" + chassis_output = ovn_sb_cli.rename_safety_query( + "--format=json", + "--columns=_uuid", + "find", + "Chassis", + f"name={system_id}", + allow_stale_reads=allow_stale_sb_read, + ) + chassis_data = json.loads(chassis_output) + chassis_idx = chassis_data["headings"].index("_uuid") + if not chassis_data["data"]: + return None + return _parse_ovsdb_data(chassis_data["data"][0][chassis_idx]) + + +def _chassis_has_port_bindings( + ovn_sb_cli: OVNSBCli, local_chassis: uuid.UUID, allow_stale_sb_read: bool +) -> bool: + """Return whether any Port_Binding row is bound to the local chassis.""" + bindings_output = ovn_sb_cli.rename_safety_query( + "--format=json", + "--columns=_uuid", + "find", + "Port_Binding", + f"chassis={local_chassis}", + allow_stale_reads=allow_stale_sb_read, + ) + bindings_data = json.loads(bindings_output) + return bool(bindings_data["data"]) + + +def is_bridge_rename_safe(ovs_cli: OVSCli, allow_stale_sb_read: bool = False) -> bool: + """Return whether bridges can be destroyed and recreated safely.""" + try: + ovn_remote = _get_ovs_external_id(ovs_cli, "ovn-remote") + if not ovn_remote: + return True + + system_id = _get_ovs_external_id(ovs_cli, "system-id") + except OVSCommandError: + return False + + if not system_id: + return True + + ovn_sb_cli = OVNSBCli( + ovn_remote, + timeout=( + ovs_cli.timeout if ovs_cli.timeout is not None else SB_RENAME_SAFE_QUERY_TIMEOUT_S + ), + ) + try: + local_chassis = _find_local_chassis_uuid( + ovn_sb_cli, system_id, allow_stale_sb_read=allow_stale_sb_read + ) + if local_chassis is None: + return True + if _chassis_has_port_bindings( + ovn_sb_cli, + local_chassis, + allow_stale_sb_read=allow_stale_sb_read, + ): + return False + except (OVNError, KeyError, IndexError, json.JSONDecodeError, ValueError) as exc: + logging.warning("Unable to determine OVN bridge rename safety: %s", exc) + return False + + return True + + def resolve_bridge_mappings( # noqa: C901 external_bridge: str, physnet_name: str, diff --git a/openstack_hypervisor/cli/hypervisor.py b/openstack_hypervisor/cli/hypervisor.py index 3abe18f..3ada794 100644 --- a/openstack_hypervisor/cli/hypervisor.py +++ b/openstack_hypervisor/cli/hypervisor.py @@ -20,8 +20,10 @@ click_option_format, ) from openstack_hypervisor.hooks import ( + _configure_ovn_external_networking, _dpdk_config_is_ready, _get_configure_context, + _set_ovs_managed_by, ovs_switch_socket, ovs_switchd_ctl_socket, ) @@ -178,3 +180,31 @@ def dpdk_ready() -> None: else: click.echo("DPDK configuration is NOT ready - external OVS restart may be required") sys.exit(1) + + +@hypervisor.command("setup-bridge") +@click.option( + "--allow-stale-sb-read", + is_flag=True, + help="Allow OVN Southbound relay/follower reads when checking rename safety.", +) +def setup_bridge(allow_stale_sb_read: bool) -> None: + """Apply OVN external bridge configuration.""" + snap = Snap() + _set_ovs_managed_by(snap) + ovs_socket = ovs_switch_socket(snap) + switchd_ctl_socket = ovs_switchd_ctl_socket(snap) + ovs_cli = OVSCli(ovs_socket, switchd_ctl_socket) + context = _get_configure_context(snap) + + if allow_stale_sb_read: + click.echo( + "Warning: allowing stale OVN Southbound reads may misjudge bridge rename safety." + ) + + _configure_ovn_external_networking( + snap, + ovs_cli, + context, + allow_stale_sb_read=allow_stale_sb_read, + ) diff --git a/openstack_hypervisor/hooks.py b/openstack_hypervisor/hooks.py index 7de5c79..433e0c4 100644 --- a/openstack_hypervisor/hooks.py +++ b/openstack_hypervisor/hooks.py @@ -46,6 +46,7 @@ OVSCommandError, OVSTimeoutError, detect_current_mappings, + is_bridge_rename_safe, resolve_bridge_mappings, resolve_ovs_changes, update_mappings_from_rename, @@ -1000,6 +1001,39 @@ def _add_interface_to_bridge(ovs_cli: OVSCli, external_bridge: str, external_nic ) +def _bridge_can_be_adopted( + ovs_cli: OVSCli, + bridge: str, + external_nic: str | None, + existing_bridges: set[str] | None = None, +) -> bool: + """Return whether an existing bridge can be adopted for a mapping.""" + if existing_bridges is not None and bridge not in existing_bridges: + return True + + try: + bridge_ifaces = ovs_cli.list_bridge_interfaces(bridge) + except OVSCommandError as exc: + logging.warning("Unable to inspect bridge %s for adoption: %s", bridge, exc) + return False + + if external_nic: + return set(bridge_ifaces).issubset({external_nic}) + + return not bridge_ifaces + + +def _adopt_bridge_interface(ovs_cli: OVSCli, bridge: str, external_nic: str | None) -> None: + """Mark an existing bridge interface as charm-managed for explicit adoption.""" + if not external_nic: + return + + if external_nic not in ovs_cli.list_bridge_interfaces(bridge): + return + + ovs_cli.set("Port", external_nic, "external_ids", {"microstack-function": "ext-port"}) + + def _del_interface_from_bridge(ovs_cli: OVSCli, external_bridge: str, external_nic: str) -> None: """Remove an interface from a given bridge. @@ -1701,7 +1735,7 @@ def get_machine_id() -> str: def _configure_ovn_external_networking( # noqa: C901 - snap: Snap, ovs_cli: OVSCli, context: dict + snap: Snap, ovs_cli: OVSCli, context: dict, allow_stale_sb_read: bool = False ) -> None: """Configure OVS/OVN external networking. @@ -1742,6 +1776,37 @@ def _configure_ovn_external_networking( # noqa: C901 changes = resolve_ovs_changes(current_mappings, mappings) logging.debug("OVS external networking changes: %s", changes) + if changes["renamed_bridges"] and is_bridge_rename_safe( + ovs_cli, allow_stale_sb_read=allow_stale_sb_read + ): + existing_bridges = set(ovs_cli.list_bridges()) + remaining_renames: list[tuple[str, str]] = [] + + for old_bridge, new_bridge in changes["renamed_bridges"]: + target_mapping = next((m for m in mappings if m.bridge == new_bridge), None) + if target_mapping and _bridge_can_be_adopted( + ovs_cli, + new_bridge, + target_mapping.interface, + existing_bridges, + ): + if new_bridge in existing_bridges: + _adopt_bridge_interface(ovs_cli, new_bridge, target_mapping.interface) + if old_bridge not in changes["removed_bridges"]: + changes["removed_bridges"].append(old_bridge) + if new_bridge not in changes["added_bridges"]: + changes["added_bridges"].append(new_bridge) + continue + + logging.info( + "Bridge %s cannot be adopted as %s, keeping current bridge name", + old_bridge, + new_bridge, + ) + remaining_renames.append((old_bridge, new_bridge)) + + changes["renamed_bridges"] = remaining_renames + mappings = update_mappings_from_rename(mappings, changes["renamed_bridges"]) for bridge, change in changes["interface_changes"].items(): diff --git a/tests/unit/cli/test_hypervisor.py b/tests/unit/cli/test_hypervisor.py index c584d02..9bc270a 100644 --- a/tests/unit/cli/test_hypervisor.py +++ b/tests/unit/cli/test_hypervisor.py @@ -111,3 +111,80 @@ def test_dpdk_ready_uses_correct_socket( mock_ovs_cli_class.assert_called_once_with( "unix:/custom/socket/path", "unix:/custom/ctl/socket/path" ) + + +class TestSetupBridgeCommand: + @patch("openstack_hypervisor.hooks.is_connected") + @patch("openstack_hypervisor.cli.hypervisor._set_ovs_managed_by") + @patch("openstack_hypervisor.cli.hypervisor.Snap") + @patch("openstack_hypervisor.cli.hypervisor.OVSCli") + @patch("openstack_hypervisor.cli.hypervisor.ovs_switch_socket") + @patch("openstack_hypervisor.cli.hypervisor.ovs_switchd_ctl_socket") + @patch("openstack_hypervisor.cli.hypervisor._get_configure_context") + @patch("openstack_hypervisor.cli.hypervisor._configure_ovn_external_networking") + def test_setup_bridge_defaults_to_fresh_sb_reads( + self, + mock_setup_bridge, + mock_get_context, + mock_switchd_ctl_socket, + mock_socket, + mock_ovs_cli_class, + mock_snap_class, + mock_set_ovs_managed_by, + mock_connected, + ): + mock_get_context.return_value = {"network": {}} + mock_socket.return_value = "unix:/custom/socket/path" + mock_switchd_ctl_socket.return_value = "unix:/custom/ctl/socket/path" + + runner = CliRunner() + result = runner.invoke(hypervisor, ["setup-bridge"]) + + assert result.exit_code == 0 + mock_set_ovs_managed_by.assert_called_once_with(mock_snap_class.return_value) + mock_setup_bridge.assert_called_once_with( + mock_snap_class.return_value, + mock_ovs_cli_class.return_value, + {"network": {}}, + allow_stale_sb_read=False, + ) + + @patch("openstack_hypervisor.hooks.is_connected") + @patch("openstack_hypervisor.cli.hypervisor.click.echo") + @patch("openstack_hypervisor.cli.hypervisor._set_ovs_managed_by") + @patch("openstack_hypervisor.cli.hypervisor.Snap") + @patch("openstack_hypervisor.cli.hypervisor.OVSCli") + @patch("openstack_hypervisor.cli.hypervisor.ovs_switch_socket") + @patch("openstack_hypervisor.cli.hypervisor.ovs_switchd_ctl_socket") + @patch("openstack_hypervisor.cli.hypervisor._get_configure_context") + @patch("openstack_hypervisor.cli.hypervisor._configure_ovn_external_networking") + def test_setup_bridge_allows_stale_sb_reads_with_flag( + self, + mock_setup_bridge, + mock_get_context, + mock_switchd_ctl_socket, + mock_socket, + mock_ovs_cli_class, + mock_snap_class, + mock_set_ovs_managed_by, + mock_echo, + mock_connected, + ): + mock_get_context.return_value = {"network": {}} + mock_socket.return_value = "unix:/custom/socket/path" + mock_switchd_ctl_socket.return_value = "unix:/custom/ctl/socket/path" + + runner = CliRunner() + result = runner.invoke(hypervisor, ["setup-bridge", "--allow-stale-sb-read"]) + + assert result.exit_code == 0 + mock_set_ovs_managed_by.assert_called_once_with(mock_snap_class.return_value) + mock_setup_bridge.assert_called_once_with( + mock_snap_class.return_value, + mock_ovs_cli_class.return_value, + {"network": {}}, + allow_stale_sb_read=True, + ) + mock_echo.assert_any_call( + "Warning: allowing stale OVN Southbound reads may misjudge bridge rename safety." + ) diff --git a/tests/unit/test_bridge_datapath.py b/tests/unit/test_bridge_datapath.py index 2090579..d9f78e9 100644 --- a/tests/unit/test_bridge_datapath.py +++ b/tests/unit/test_bridge_datapath.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import hashlib +import json import re from unittest.mock import patch @@ -9,11 +10,16 @@ from openstack_hypervisor.bridge_datapath import ( DEFAULT_LAA_MAC_PREFIX, + SB_RENAME_SAFE_QUERY_TIMEOUT_S, BridgeMapping, + OVNCommandError, + OVNSBCli, + OVNTimeoutError, OVSCli, OVSCommandError, detect_current_mappings, generate_stable_laa_mac, + is_bridge_rename_safe, resolve_bridge_mappings, resolve_ovs_changes, update_mappings_from_rename, @@ -474,7 +480,324 @@ def test_rename_with_missing_mapping(self): assert update_mappings_from_rename(mappings, renames) == mappings +class TestBridgeRenameSafety: + def test_is_bridge_rename_safe_uses_fallback_sb_timeout(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch("openstack_hypervisor.bridge_datapath.OVNSBCli") as mock_ovn_sb_cls, + patch( + "openstack_hypervisor.bridge_datapath._find_local_chassis_uuid", + return_value=None, + ), + ): + assert is_bridge_rename_safe(ovs) is True + + mock_ovn_sb_cls.assert_called_once_with( + "tcp:127.0.0.1:6642", + timeout=SB_RENAME_SAFE_QUERY_TIMEOUT_S, + ) + + def test_is_bridge_rename_safe_preserves_explicit_sb_timeout(self): + ovs = OVSCli(timeout=7) + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch("openstack_hypervisor.bridge_datapath.OVNSBCli") as mock_ovn_sb_cls, + patch( + "openstack_hypervisor.bridge_datapath._find_local_chassis_uuid", + return_value=None, + ), + ): + assert is_bridge_rename_safe(ovs) is True + + mock_ovn_sb_cls.assert_called_once_with("tcp:127.0.0.1:6642", timeout=7) + + def test_is_bridge_rename_safe_when_ovn_remote_missing(self): + ovs = OVSCli() + with patch.object(ovs, "vsctl", return_value="[]\n") as mock_vsctl: + assert is_bridge_rename_safe(ovs) is True + mock_vsctl.assert_called_once_with( + "get", + "open", + ".", + "external_ids:ovn-remote", + skip_transaction=True, + ) + + def test_is_bridge_rename_safe_fails_closed_when_ovn_remote_query_fails(self): + ovs = OVSCli() + with patch.object(ovs, "vsctl", side_effect=OVSCommandError("ovs failed")): + assert is_bridge_rename_safe(ovs) is False + + def test_is_bridge_rename_safe_when_system_id_missing(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return "[]\n" + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with patch.object(ovs, "vsctl", side_effect=fake_vsctl): + assert is_bridge_rename_safe(ovs) is True + + def test_is_bridge_rename_safe_fails_closed_when_system_id_query_fails(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + raise OVSCommandError("ovs failed") + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with patch.object(ovs, "vsctl", side_effect=fake_vsctl): + assert is_bridge_rename_safe(ovs) is False + + def test_is_bridge_rename_safe_when_chassis_missing(self): + ovs = OVSCli() + chassis_output = json.dumps({"headings": ["_uuid"], "data": []}) + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object(OVNSBCli, "sbctl", return_value=chassis_output) as mock_sbctl, + ): + assert is_bridge_rename_safe(ovs) is True + + mock_sbctl.assert_called_once_with( + "--format=json", + "--columns=_uuid", + "find", + "Chassis", + "name=node.example.com", + ) + + def test_is_bridge_rename_safe_when_local_chassis_has_no_bindings(self): + ovs = OVSCli() + chassis_uuid = "fd8d4990-a66f-45cb-9881-cae642ef2796" + chassis_output = json.dumps({"headings": ["_uuid"], "data": [[["uuid", chassis_uuid]]]}) + bindings_output = json.dumps({"headings": ["_uuid"], "data": []}) + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object( + OVNSBCli, + "sbctl", + side_effect=[chassis_output, bindings_output], + ) as mock_sbctl, + ): + assert is_bridge_rename_safe(ovs) is True + + assert mock_sbctl.call_count == 2 + assert mock_sbctl.call_args_list[1].args == ( + "--format=json", + "--columns=_uuid", + "find", + "Port_Binding", + f"chassis={chassis_uuid}", + ) + + def test_is_bridge_rename_safe_when_local_chassis_has_bindings(self): + ovs = OVSCli() + chassis_uuid = "fd8d4990-a66f-45cb-9881-cae642ef2796" + chassis_output = json.dumps({"headings": ["_uuid"], "data": [[["uuid", chassis_uuid]]]}) + bindings_output = json.dumps({"headings": ["_uuid"], "data": [[["uuid", "binding-uuid"]]]}) + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object( + OVNSBCli, + "sbctl", + side_effect=[chassis_output, bindings_output], + ), + ): + assert is_bridge_rename_safe(ovs) is False + + def test_is_bridge_rename_safe_fails_closed_when_sb_query_fails(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object( + OVNSBCli, + "sbctl", + side_effect=OVNCommandError("sb failed"), + ), + patch("openstack_hypervisor.bridge_datapath.time.sleep"), + ): + assert is_bridge_rename_safe(ovs) is False + + def test_is_bridge_rename_safe_retries_strict_sb_queries(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object( + OVNSBCli, + "sbctl", + side_effect=OVNCommandError("sb failed"), + ) as mock_sbctl, + patch("openstack_hypervisor.bridge_datapath.time.sleep") as mock_sleep, + ): + assert is_bridge_rename_safe(ovs) is False + + assert mock_sbctl.call_count == 3 + assert mock_sleep.call_count == 2 + + def test_is_bridge_rename_safe_retries_on_sb_timeout(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object( + OVNSBCli, + "sbctl", + side_effect=OVNTimeoutError("sb timeout"), + ) as mock_sbctl, + patch("openstack_hypervisor.bridge_datapath.time.sleep") as mock_sleep, + ): + assert is_bridge_rename_safe(ovs) is False + + assert mock_sbctl.call_count == 3 + assert mock_sleep.call_count == 2 + + def test_is_bridge_rename_safe_fails_closed_when_sb_times_out(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object( + OVNSBCli, + "sbctl", + side_effect=OVNTimeoutError("sb timeout"), + ), + patch("openstack_hypervisor.bridge_datapath.time.sleep"), + ): + assert is_bridge_rename_safe(ovs) is False + + def test_is_bridge_rename_safe_allows_stale_sb_reads(self): + ovs = OVSCli() + chassis_output = json.dumps({"headings": ["_uuid"], "data": []}) + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object(OVNSBCli, "sbctl", return_value=chassis_output) as mock_sbctl, + ): + assert is_bridge_rename_safe(ovs, allow_stale_sb_read=True) is True + + mock_sbctl.assert_called_once_with( + "--format=json", + "--columns=_uuid", + "find", + "Chassis", + "name=node.example.com", + allow_stale_reads=True, + ) + + +class TestOVNSBCli: + def test_rename_safety_query_raises_for_invalid_retry_count(self, monkeypatch): + ovn_sb = OVNSBCli("tcp:127.0.0.1:6642") + monkeypatch.setattr( + "openstack_hypervisor.bridge_datapath.SB_RENAME_SAFE_QUERY_RETRIES", + 0, + ) + + with pytest.raises(ValueError, match="SB_RENAME_SAFE_QUERY_RETRIES must be >= 1"): + ovn_sb.rename_safety_query("show") + + def test_rename_safety_query_returns_when_last_retry_succeeds(self): + ovn_sb = OVNSBCli("tcp:127.0.0.1:6642") + + with ( + patch.object( + ovn_sb, + "sbctl", + side_effect=[OVNCommandError("first"), OVNCommandError("second"), "ok"], + ) as mock_sbctl, + patch("openstack_hypervisor.bridge_datapath.time.sleep") as mock_sleep, + ): + assert ovn_sb.rename_safety_query("show") == "ok" + + assert mock_sbctl.call_count == 3 + assert mock_sleep.call_count == 2 + + class TestOVSCli: + def test_list_bridge_interfaces_filters_types(self): ovs = OVSCli() with patch.object(ovs, "vsctl") as mock_vsctl: diff --git a/tests/unit/test_hooks.py b/tests/unit/test_hooks.py index 07846c2..f915212 100644 --- a/tests/unit/test_hooks.py +++ b/tests/unit/test_hooks.py @@ -15,6 +15,7 @@ from snaphelpers._conf import UnknownConfigKey from openstack_hypervisor import hooks +from openstack_hypervisor.bridge_datapath import BridgeMapping from openstack_hypervisor.cli import pci_devices from openstack_hypervisor.hooks import OwnedPath @@ -298,6 +299,7 @@ def test_add_interface_to_bridge_noop(self, ovs_cli): ovs_cli.list_bridge_interfaces.return_value = ["int1", "int2"] hooks._add_interface_to_bridge(ovs_cli, "br1", "int2") assert not ovs_cli.add_port.called + ovs_cli.set.assert_not_called() def test_del_interface_from_bridge(self, ovs_cli): ovs_cli.list_bridge_interfaces.return_value = ["int1", "int2"] @@ -1871,6 +1873,160 @@ def test_ready_when_dpdk_not_enabled(self, mocker, snap, ovs_cli): assert result is True + +class TestConfigureOvnExternalNetworking: + def test_safe_rename_recreates_bridge(self, mocker, snap, ovs_cli): + snap.config.get.side_effect = lambda key: { + "network.ovn-sb-connection": "tcp:127.0.0.1:6642", + "network.external-bridge-address": hooks.IPVANYNETWORK_UNSET, + "network.external-bridge": "", + "network.physnet-name": "", + "network.external-nic": "", + "network.bridge-mapping": "br-new:physnet1:eth0", + }.get(key) + mock_rename_safe = mocker.patch.object(hooks, "is_bridge_rename_safe", return_value=True) + ovs_cli.list_bridges.return_value = [] + mocker.patch.object( + hooks, + "detect_current_mappings", + return_value=[BridgeMapping("br-old", "physnet1", "eth0")], + ) + mocker.patch.object(hooks, "_wait_for_interface") + mocker.patch.object(hooks, "_delete_iptable_postrouting_rule") + mocker.patch.object(hooks, "_delete_ips_from_interface") + mocker.patch.object(hooks, "_ensure_single_nic_on_bridge") + mocker.patch.object(hooks, "_ensure_link_up") + mocker.patch.object(hooks, "_enable_chassis_as_gateway") + mocker.patch.object(hooks, "_disable_chassis_as_gateway") + mocker.patch.object(hooks, "get_machine_id", return_value="machine-id") + + hooks._configure_ovn_external_networking(snap, ovs_cli, {"network": {}}) + + mock_rename_safe.assert_called_once_with(ovs_cli, allow_stale_sb_read=False) + ovs_cli.del_bridge.assert_called_once_with("br-old") + ovs_cli.add_bridge.assert_called_once_with( + "br-new", "system", "protocols=OpenFlow13,OpenFlow15" + ) + ovs_cli.set.assert_any_call( + "open", + ".", + "external_ids", + {"ovn-bridge-mappings": "physnet1:br-new"}, + ) + + def test_unsafe_rename_keeps_existing_bridge(self, mocker, snap, ovs_cli): + snap.config.get.side_effect = lambda key: { + "network.ovn-sb-connection": "tcp:127.0.0.1:6642", + "network.external-bridge-address": hooks.IPVANYNETWORK_UNSET, + "network.external-bridge": "", + "network.physnet-name": "", + "network.external-nic": "", + "network.bridge-mapping": "br-new:physnet1:eth0", + }.get(key) + mock_rename_safe = mocker.patch.object(hooks, "is_bridge_rename_safe", return_value=False) + mocker.patch.object( + hooks, + "detect_current_mappings", + return_value=[BridgeMapping("br-old", "physnet1", "eth0")], + ) + mocker.patch.object(hooks, "_wait_for_interface") + mocker.patch.object(hooks, "_delete_iptable_postrouting_rule") + mocker.patch.object(hooks, "_delete_ips_from_interface") + mocker.patch.object(hooks, "_ensure_single_nic_on_bridge") + mocker.patch.object(hooks, "_ensure_link_up") + mocker.patch.object(hooks, "_enable_chassis_as_gateway") + mocker.patch.object(hooks, "_disable_chassis_as_gateway") + mocker.patch.object(hooks, "get_machine_id", return_value="machine-id") + + hooks._configure_ovn_external_networking(snap, ovs_cli, {"network": {}}) + + mock_rename_safe.assert_called_once_with(ovs_cli, allow_stale_sb_read=False) + ovs_cli.del_bridge.assert_not_called() + ovs_cli.add_bridge.assert_not_called() + ovs_cli.set.assert_any_call( + "open", + ".", + "external_ids", + {"ovn-bridge-mappings": "physnet1:br-old"}, + ) + + def test_safe_rename_adopts_existing_bridge_with_matching_interface( + self, mocker, snap, ovs_cli + ): + snap.config.get.side_effect = lambda key: { + "network.ovn-sb-connection": "tcp:127.0.0.1:6642", + "network.external-bridge-address": hooks.IPVANYNETWORK_UNSET, + "network.external-bridge": "", + "network.physnet-name": "", + "network.external-nic": "", + "network.bridge-mapping": "br-maas:physnet1:eth0", + }.get(key) + mocker.patch.object(hooks, "is_bridge_rename_safe", return_value=True) + ovs_cli.list_bridges.return_value = ["br-maas"] + ovs_cli.list_bridge_interfaces.return_value = ["eth0"] + mocker.patch.object( + hooks, + "detect_current_mappings", + return_value=[BridgeMapping("br-old", "physnet1", "eth0")], + ) + mocker.patch.object(hooks, "_wait_for_interface") + mocker.patch.object(hooks, "_delete_iptable_postrouting_rule") + mocker.patch.object(hooks, "_delete_ips_from_interface") + mocker.patch.object(hooks, "_ensure_link_up") + mocker.patch.object(hooks, "_enable_chassis_as_gateway") + mocker.patch.object(hooks, "_disable_chassis_as_gateway") + mocker.patch.object(hooks, "get_machine_id", return_value="machine-id") + + hooks._configure_ovn_external_networking(snap, ovs_cli, {"network": {}}) + + ovs_cli.del_bridge.assert_called_once_with("br-old") + ovs_cli.add_bridge.assert_called_once_with( + "br-maas", "system", "protocols=OpenFlow13,OpenFlow15" + ) + ovs_cli.set.assert_any_call( + "Port", + "eth0", + "external_ids", + {"microstack-function": "ext-port"}, + ) + + def test_safe_rename_does_not_adopt_incompatible_existing_bridge(self, mocker, snap, ovs_cli): + snap.config.get.side_effect = lambda key: { + "network.ovn-sb-connection": "tcp:127.0.0.1:6642", + "network.external-bridge-address": hooks.IPVANYNETWORK_UNSET, + "network.external-bridge": "", + "network.physnet-name": "", + "network.external-nic": "", + "network.bridge-mapping": "br-maas:physnet1:eth0", + }.get(key) + mocker.patch.object(hooks, "is_bridge_rename_safe", return_value=True) + ovs_cli.list_bridges.return_value = ["br-maas"] + ovs_cli.list_bridge_interfaces.return_value = ["eth9"] + mocker.patch.object( + hooks, + "detect_current_mappings", + return_value=[BridgeMapping("br-old", "physnet1", "eth0")], + ) + mocker.patch.object(hooks, "_wait_for_interface") + mocker.patch.object(hooks, "_delete_iptable_postrouting_rule") + mocker.patch.object(hooks, "_delete_ips_from_interface") + mocker.patch.object(hooks, "_ensure_single_nic_on_bridge") + mocker.patch.object(hooks, "_ensure_link_up") + mocker.patch.object(hooks, "_enable_chassis_as_gateway") + mocker.patch.object(hooks, "_disable_chassis_as_gateway") + mocker.patch.object(hooks, "get_machine_id", return_value="machine-id") + + hooks._configure_ovn_external_networking(snap, ovs_cli, {"network": {}}) + + ovs_cli.del_bridge.assert_not_called() + ovs_cli.add_bridge.assert_not_called() + ovs_cli.set.assert_any_call( + "open", + ".", + "external_ids", + {"ovn-bridge-mappings": "physnet1:br-old"}, + ) + def test_not_ready_when_dpdk_not_initialized(self, mocker, snap, ovs_cli): """Test returns False when DPDK is enabled but not initialized.""" ovs_cli.get_dpdk_initialized.return_value = False