diff --git a/openstack_hypervisor/bridge_datapath.py b/openstack_hypervisor/bridge_datapath.py index a4241e3..8da891b 100644 --- a/openstack_hypervisor/bridge_datapath.py +++ b/openstack_hypervisor/bridge_datapath.py @@ -6,6 +6,7 @@ import json import logging import subprocess +import time import uuid from collections.abc import Generator from dataclasses import dataclass @@ -13,6 +14,9 @@ DEFAULT_LAA_MAC_PREFIX = "0a:c5" INTEGRATION_BRIDGE = "br-int" +SB_RENAME_SAFE_QUERY_TIMEOUT_S = 15 +SB_RENAME_SAFE_QUERY_RETRIES = 3 +SB_RENAME_SAFE_RETRY_DELAY_S = 1 @dataclass(frozen=True) @@ -65,6 +69,18 @@ class OVSTimeoutError(OVSError, TimeoutError): """Raised when an OVS command times out.""" +class OVNError(RuntimeError): + """Common base class for OVN-related errors.""" + + +class OVNCommandError(OVNError): + """Raised when querying OVN state fails.""" + + +class OVNTimeoutError(OVNError, TimeoutError): + """Raised when an OVN command times out.""" + + def _normalize_ovs_vsctl_value(raw_value: str) -> str | None: """Normalize ovs-vsctl output values into plain strings.""" cleaned = raw_value.strip() @@ -165,6 +181,11 @@ def with_timeout(self, timeout: int) -> Generator["OVSCli", None, None]: finally: self._timeout = original_timeout + @property + def timeout(self) -> int | None: + """Return the configured default command timeout.""" + return self._timeout + def commit(self, retry) -> str: """Execute all batched commands in a single ovs-vsctl transaction. @@ -710,6 +731,152 @@ def get_dpdk_initialized(self) -> bool: return False +class OVNSBCli: + """Client for interacting with OVN Southbound via ovn-sbctl.""" + + def __init__(self, db: str, timeout: int | None = None): + self.db = db + self._timeout = timeout + + def sbctl( + self, + *args: str, + timeout: int | None = None, + allow_stale_reads: bool = False, + ) -> str: + """Run ovn-sbctl with the provided arguments and return stdout.""" + cmd = ["ovn-sbctl", f"--db={self.db}"] + timeout = timeout or self._timeout + if timeout is not None: + cmd.append(f"--timeout={timeout}") + if allow_stale_reads: + cmd.append("--no-leader-only") + cmd.extend(args) + logging.debug("Executing command: %s", " ".join(cmd)) + + try: + completed = subprocess.run( # nosec B603 + cmd, + check=True, + capture_output=True, + text=True, + ) + except FileNotFoundError as exc: + raise OVNCommandError("ovn-sbctl binary not found") from exc + except subprocess.CalledProcessError as exc: # pragma: no cover - defensive + stderr = (exc.stderr or "").strip() + stdout = (exc.stdout or "").strip() + details = stderr or stdout or f"Command failed with exit code {exc.returncode}" + if "Alarm clock" in details: + raise OVNTimeoutError(details) from exc + raise OVNCommandError(details) from exc + + return completed.stdout + + def rename_safety_query(self, *args: str, allow_stale_reads: bool = False) -> str: + """Run an OVN SB query with the rename-safety freshness policy.""" + if allow_stale_reads: + return self.sbctl(*args, allow_stale_reads=True) + + attempts = SB_RENAME_SAFE_QUERY_RETRIES + if attempts < 1: + raise ValueError("SB_RENAME_SAFE_QUERY_RETRIES must be >= 1") + + for attempt in range(1, attempts + 1): + try: + return self.sbctl(*args) + except OVNError: + if attempt == attempts: + raise + time.sleep(SB_RENAME_SAFE_RETRY_DELAY_S) + + +def _get_ovs_external_id(ovs_cli: OVSCli, key: str) -> str | None: + """Read and normalize an OVS external_ids value.""" + raw_value = ovs_cli.vsctl( + "get", + "open", + ".", + f"external_ids:{key}", + skip_transaction=True, + ) + return _normalize_ovs_vsctl_value(raw_value) + + +def _find_local_chassis_uuid( + ovn_sb_cli: OVNSBCli, system_id: str, allow_stale_sb_read: bool +) -> uuid.UUID | None: + """Return the local chassis UUID when the node is registered in SB.""" + chassis_output = ovn_sb_cli.rename_safety_query( + "--format=json", + "--columns=_uuid", + "find", + "Chassis", + f"name={system_id}", + allow_stale_reads=allow_stale_sb_read, + ) + chassis_data = json.loads(chassis_output) + chassis_idx = chassis_data["headings"].index("_uuid") + if not chassis_data["data"]: + return None + return _parse_ovsdb_data(chassis_data["data"][0][chassis_idx]) + + +def _chassis_has_port_bindings( + ovn_sb_cli: OVNSBCli, local_chassis: uuid.UUID, allow_stale_sb_read: bool +) -> bool: + """Return whether any Port_Binding row is bound to the local chassis.""" + bindings_output = ovn_sb_cli.rename_safety_query( + "--format=json", + "--columns=_uuid", + "find", + "Port_Binding", + f"chassis={local_chassis}", + allow_stale_reads=allow_stale_sb_read, + ) + bindings_data = json.loads(bindings_output) + return bool(bindings_data["data"]) + + +def is_bridge_rename_safe(ovs_cli: OVSCli, allow_stale_sb_read: bool = False) -> bool: + """Return whether bridges can be destroyed and recreated safely.""" + try: + ovn_remote = _get_ovs_external_id(ovs_cli, "ovn-remote") + if not ovn_remote: + return True + + system_id = _get_ovs_external_id(ovs_cli, "system-id") + except OVSCommandError: + return False + + if not system_id: + return True + + ovn_sb_cli = OVNSBCli( + ovn_remote, + timeout=( + ovs_cli.timeout if ovs_cli.timeout is not None else SB_RENAME_SAFE_QUERY_TIMEOUT_S + ), + ) + try: + local_chassis = _find_local_chassis_uuid( + ovn_sb_cli, system_id, allow_stale_sb_read=allow_stale_sb_read + ) + if local_chassis is None: + return True + if _chassis_has_port_bindings( + ovn_sb_cli, + local_chassis, + allow_stale_sb_read=allow_stale_sb_read, + ): + return False + except (OVNError, KeyError, IndexError, json.JSONDecodeError, ValueError) as exc: + logging.warning("Unable to determine OVN bridge rename safety: %s", exc) + return False + + return True + + def resolve_bridge_mappings( # noqa: C901 external_bridge: str, physnet_name: str, diff --git a/openstack_hypervisor/cli/hypervisor.py b/openstack_hypervisor/cli/hypervisor.py index 3abe18f..3ada794 100644 --- a/openstack_hypervisor/cli/hypervisor.py +++ b/openstack_hypervisor/cli/hypervisor.py @@ -20,8 +20,10 @@ click_option_format, ) from openstack_hypervisor.hooks import ( + _configure_ovn_external_networking, _dpdk_config_is_ready, _get_configure_context, + _set_ovs_managed_by, ovs_switch_socket, ovs_switchd_ctl_socket, ) @@ -178,3 +180,31 @@ def dpdk_ready() -> None: else: click.echo("DPDK configuration is NOT ready - external OVS restart may be required") sys.exit(1) + + +@hypervisor.command("setup-bridge") +@click.option( + "--allow-stale-sb-read", + is_flag=True, + help="Allow OVN Southbound relay/follower reads when checking rename safety.", +) +def setup_bridge(allow_stale_sb_read: bool) -> None: + """Apply OVN external bridge configuration.""" + snap = Snap() + _set_ovs_managed_by(snap) + ovs_socket = ovs_switch_socket(snap) + switchd_ctl_socket = ovs_switchd_ctl_socket(snap) + ovs_cli = OVSCli(ovs_socket, switchd_ctl_socket) + context = _get_configure_context(snap) + + if allow_stale_sb_read: + click.echo( + "Warning: allowing stale OVN Southbound reads may misjudge bridge rename safety." + ) + + _configure_ovn_external_networking( + snap, + ovs_cli, + context, + allow_stale_sb_read=allow_stale_sb_read, + ) diff --git a/openstack_hypervisor/hooks.py b/openstack_hypervisor/hooks.py index 7de5c79..433e0c4 100644 --- a/openstack_hypervisor/hooks.py +++ b/openstack_hypervisor/hooks.py @@ -46,6 +46,7 @@ OVSCommandError, OVSTimeoutError, detect_current_mappings, + is_bridge_rename_safe, resolve_bridge_mappings, resolve_ovs_changes, update_mappings_from_rename, @@ -1000,6 +1001,39 @@ def _add_interface_to_bridge(ovs_cli: OVSCli, external_bridge: str, external_nic ) +def _bridge_can_be_adopted( + ovs_cli: OVSCli, + bridge: str, + external_nic: str | None, + existing_bridges: set[str] | None = None, +) -> bool: + """Return whether an existing bridge can be adopted for a mapping.""" + if existing_bridges is not None and bridge not in existing_bridges: + return True + + try: + bridge_ifaces = ovs_cli.list_bridge_interfaces(bridge) + except OVSCommandError as exc: + logging.warning("Unable to inspect bridge %s for adoption: %s", bridge, exc) + return False + + if external_nic: + return set(bridge_ifaces).issubset({external_nic}) + + return not bridge_ifaces + + +def _adopt_bridge_interface(ovs_cli: OVSCli, bridge: str, external_nic: str | None) -> None: + """Mark an existing bridge interface as charm-managed for explicit adoption.""" + if not external_nic: + return + + if external_nic not in ovs_cli.list_bridge_interfaces(bridge): + return + + ovs_cli.set("Port", external_nic, "external_ids", {"microstack-function": "ext-port"}) + + def _del_interface_from_bridge(ovs_cli: OVSCli, external_bridge: str, external_nic: str) -> None: """Remove an interface from a given bridge. @@ -1701,7 +1735,7 @@ def get_machine_id() -> str: def _configure_ovn_external_networking( # noqa: C901 - snap: Snap, ovs_cli: OVSCli, context: dict + snap: Snap, ovs_cli: OVSCli, context: dict, allow_stale_sb_read: bool = False ) -> None: """Configure OVS/OVN external networking. @@ -1742,6 +1776,37 @@ def _configure_ovn_external_networking( # noqa: C901 changes = resolve_ovs_changes(current_mappings, mappings) logging.debug("OVS external networking changes: %s", changes) + if changes["renamed_bridges"] and is_bridge_rename_safe( + ovs_cli, allow_stale_sb_read=allow_stale_sb_read + ): + existing_bridges = set(ovs_cli.list_bridges()) + remaining_renames: list[tuple[str, str]] = [] + + for old_bridge, new_bridge in changes["renamed_bridges"]: + target_mapping = next((m for m in mappings if m.bridge == new_bridge), None) + if target_mapping and _bridge_can_be_adopted( + ovs_cli, + new_bridge, + target_mapping.interface, + existing_bridges, + ): + if new_bridge in existing_bridges: + _adopt_bridge_interface(ovs_cli, new_bridge, target_mapping.interface) + if old_bridge not in changes["removed_bridges"]: + changes["removed_bridges"].append(old_bridge) + if new_bridge not in changes["added_bridges"]: + changes["added_bridges"].append(new_bridge) + continue + + logging.info( + "Bridge %s cannot be adopted as %s, keeping current bridge name", + old_bridge, + new_bridge, + ) + remaining_renames.append((old_bridge, new_bridge)) + + changes["renamed_bridges"] = remaining_renames + mappings = update_mappings_from_rename(mappings, changes["renamed_bridges"]) for bridge, change in changes["interface_changes"].items(): diff --git a/tests/unit/cli/test_hypervisor.py b/tests/unit/cli/test_hypervisor.py index c584d02..9bc270a 100644 --- a/tests/unit/cli/test_hypervisor.py +++ b/tests/unit/cli/test_hypervisor.py @@ -111,3 +111,80 @@ def test_dpdk_ready_uses_correct_socket( mock_ovs_cli_class.assert_called_once_with( "unix:/custom/socket/path", "unix:/custom/ctl/socket/path" ) + + +class TestSetupBridgeCommand: + @patch("openstack_hypervisor.hooks.is_connected") + @patch("openstack_hypervisor.cli.hypervisor._set_ovs_managed_by") + @patch("openstack_hypervisor.cli.hypervisor.Snap") + @patch("openstack_hypervisor.cli.hypervisor.OVSCli") + @patch("openstack_hypervisor.cli.hypervisor.ovs_switch_socket") + @patch("openstack_hypervisor.cli.hypervisor.ovs_switchd_ctl_socket") + @patch("openstack_hypervisor.cli.hypervisor._get_configure_context") + @patch("openstack_hypervisor.cli.hypervisor._configure_ovn_external_networking") + def test_setup_bridge_defaults_to_fresh_sb_reads( + self, + mock_setup_bridge, + mock_get_context, + mock_switchd_ctl_socket, + mock_socket, + mock_ovs_cli_class, + mock_snap_class, + mock_set_ovs_managed_by, + mock_connected, + ): + mock_get_context.return_value = {"network": {}} + mock_socket.return_value = "unix:/custom/socket/path" + mock_switchd_ctl_socket.return_value = "unix:/custom/ctl/socket/path" + + runner = CliRunner() + result = runner.invoke(hypervisor, ["setup-bridge"]) + + assert result.exit_code == 0 + mock_set_ovs_managed_by.assert_called_once_with(mock_snap_class.return_value) + mock_setup_bridge.assert_called_once_with( + mock_snap_class.return_value, + mock_ovs_cli_class.return_value, + {"network": {}}, + allow_stale_sb_read=False, + ) + + @patch("openstack_hypervisor.hooks.is_connected") + @patch("openstack_hypervisor.cli.hypervisor.click.echo") + @patch("openstack_hypervisor.cli.hypervisor._set_ovs_managed_by") + @patch("openstack_hypervisor.cli.hypervisor.Snap") + @patch("openstack_hypervisor.cli.hypervisor.OVSCli") + @patch("openstack_hypervisor.cli.hypervisor.ovs_switch_socket") + @patch("openstack_hypervisor.cli.hypervisor.ovs_switchd_ctl_socket") + @patch("openstack_hypervisor.cli.hypervisor._get_configure_context") + @patch("openstack_hypervisor.cli.hypervisor._configure_ovn_external_networking") + def test_setup_bridge_allows_stale_sb_reads_with_flag( + self, + mock_setup_bridge, + mock_get_context, + mock_switchd_ctl_socket, + mock_socket, + mock_ovs_cli_class, + mock_snap_class, + mock_set_ovs_managed_by, + mock_echo, + mock_connected, + ): + mock_get_context.return_value = {"network": {}} + mock_socket.return_value = "unix:/custom/socket/path" + mock_switchd_ctl_socket.return_value = "unix:/custom/ctl/socket/path" + + runner = CliRunner() + result = runner.invoke(hypervisor, ["setup-bridge", "--allow-stale-sb-read"]) + + assert result.exit_code == 0 + mock_set_ovs_managed_by.assert_called_once_with(mock_snap_class.return_value) + mock_setup_bridge.assert_called_once_with( + mock_snap_class.return_value, + mock_ovs_cli_class.return_value, + {"network": {}}, + allow_stale_sb_read=True, + ) + mock_echo.assert_any_call( + "Warning: allowing stale OVN Southbound reads may misjudge bridge rename safety." + ) diff --git a/tests/unit/test_bridge_datapath.py b/tests/unit/test_bridge_datapath.py index 2090579..d9f78e9 100644 --- a/tests/unit/test_bridge_datapath.py +++ b/tests/unit/test_bridge_datapath.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import hashlib +import json import re from unittest.mock import patch @@ -9,11 +10,16 @@ from openstack_hypervisor.bridge_datapath import ( DEFAULT_LAA_MAC_PREFIX, + SB_RENAME_SAFE_QUERY_TIMEOUT_S, BridgeMapping, + OVNCommandError, + OVNSBCli, + OVNTimeoutError, OVSCli, OVSCommandError, detect_current_mappings, generate_stable_laa_mac, + is_bridge_rename_safe, resolve_bridge_mappings, resolve_ovs_changes, update_mappings_from_rename, @@ -474,7 +480,324 @@ def test_rename_with_missing_mapping(self): assert update_mappings_from_rename(mappings, renames) == mappings +class TestBridgeRenameSafety: + def test_is_bridge_rename_safe_uses_fallback_sb_timeout(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch("openstack_hypervisor.bridge_datapath.OVNSBCli") as mock_ovn_sb_cls, + patch( + "openstack_hypervisor.bridge_datapath._find_local_chassis_uuid", + return_value=None, + ), + ): + assert is_bridge_rename_safe(ovs) is True + + mock_ovn_sb_cls.assert_called_once_with( + "tcp:127.0.0.1:6642", + timeout=SB_RENAME_SAFE_QUERY_TIMEOUT_S, + ) + + def test_is_bridge_rename_safe_preserves_explicit_sb_timeout(self): + ovs = OVSCli(timeout=7) + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch("openstack_hypervisor.bridge_datapath.OVNSBCli") as mock_ovn_sb_cls, + patch( + "openstack_hypervisor.bridge_datapath._find_local_chassis_uuid", + return_value=None, + ), + ): + assert is_bridge_rename_safe(ovs) is True + + mock_ovn_sb_cls.assert_called_once_with("tcp:127.0.0.1:6642", timeout=7) + + def test_is_bridge_rename_safe_when_ovn_remote_missing(self): + ovs = OVSCli() + with patch.object(ovs, "vsctl", return_value="[]\n") as mock_vsctl: + assert is_bridge_rename_safe(ovs) is True + mock_vsctl.assert_called_once_with( + "get", + "open", + ".", + "external_ids:ovn-remote", + skip_transaction=True, + ) + + def test_is_bridge_rename_safe_fails_closed_when_ovn_remote_query_fails(self): + ovs = OVSCli() + with patch.object(ovs, "vsctl", side_effect=OVSCommandError("ovs failed")): + assert is_bridge_rename_safe(ovs) is False + + def test_is_bridge_rename_safe_when_system_id_missing(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return "[]\n" + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with patch.object(ovs, "vsctl", side_effect=fake_vsctl): + assert is_bridge_rename_safe(ovs) is True + + def test_is_bridge_rename_safe_fails_closed_when_system_id_query_fails(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + raise OVSCommandError("ovs failed") + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with patch.object(ovs, "vsctl", side_effect=fake_vsctl): + assert is_bridge_rename_safe(ovs) is False + + def test_is_bridge_rename_safe_when_chassis_missing(self): + ovs = OVSCli() + chassis_output = json.dumps({"headings": ["_uuid"], "data": []}) + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object(OVNSBCli, "sbctl", return_value=chassis_output) as mock_sbctl, + ): + assert is_bridge_rename_safe(ovs) is True + + mock_sbctl.assert_called_once_with( + "--format=json", + "--columns=_uuid", + "find", + "Chassis", + "name=node.example.com", + ) + + def test_is_bridge_rename_safe_when_local_chassis_has_no_bindings(self): + ovs = OVSCli() + chassis_uuid = "fd8d4990-a66f-45cb-9881-cae642ef2796" + chassis_output = json.dumps({"headings": ["_uuid"], "data": [[["uuid", chassis_uuid]]]}) + bindings_output = json.dumps({"headings": ["_uuid"], "data": []}) + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object( + OVNSBCli, + "sbctl", + side_effect=[chassis_output, bindings_output], + ) as mock_sbctl, + ): + assert is_bridge_rename_safe(ovs) is True + + assert mock_sbctl.call_count == 2 + assert mock_sbctl.call_args_list[1].args == ( + "--format=json", + "--columns=_uuid", + "find", + "Port_Binding", + f"chassis={chassis_uuid}", + ) + + def test_is_bridge_rename_safe_when_local_chassis_has_bindings(self): + ovs = OVSCli() + chassis_uuid = "fd8d4990-a66f-45cb-9881-cae642ef2796" + chassis_output = json.dumps({"headings": ["_uuid"], "data": [[["uuid", chassis_uuid]]]}) + bindings_output = json.dumps({"headings": ["_uuid"], "data": [[["uuid", "binding-uuid"]]]}) + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object( + OVNSBCli, + "sbctl", + side_effect=[chassis_output, bindings_output], + ), + ): + assert is_bridge_rename_safe(ovs) is False + + def test_is_bridge_rename_safe_fails_closed_when_sb_query_fails(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object( + OVNSBCli, + "sbctl", + side_effect=OVNCommandError("sb failed"), + ), + patch("openstack_hypervisor.bridge_datapath.time.sleep"), + ): + assert is_bridge_rename_safe(ovs) is False + + def test_is_bridge_rename_safe_retries_strict_sb_queries(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object( + OVNSBCli, + "sbctl", + side_effect=OVNCommandError("sb failed"), + ) as mock_sbctl, + patch("openstack_hypervisor.bridge_datapath.time.sleep") as mock_sleep, + ): + assert is_bridge_rename_safe(ovs) is False + + assert mock_sbctl.call_count == 3 + assert mock_sleep.call_count == 2 + + def test_is_bridge_rename_safe_retries_on_sb_timeout(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object( + OVNSBCli, + "sbctl", + side_effect=OVNTimeoutError("sb timeout"), + ) as mock_sbctl, + patch("openstack_hypervisor.bridge_datapath.time.sleep") as mock_sleep, + ): + assert is_bridge_rename_safe(ovs) is False + + assert mock_sbctl.call_count == 3 + assert mock_sleep.call_count == 2 + + def test_is_bridge_rename_safe_fails_closed_when_sb_times_out(self): + ovs = OVSCli() + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object( + OVNSBCli, + "sbctl", + side_effect=OVNTimeoutError("sb timeout"), + ), + patch("openstack_hypervisor.bridge_datapath.time.sleep"), + ): + assert is_bridge_rename_safe(ovs) is False + + def test_is_bridge_rename_safe_allows_stale_sb_reads(self): + ovs = OVSCli() + chassis_output = json.dumps({"headings": ["_uuid"], "data": []}) + + def fake_vsctl(*args, **kwargs): + if args == ("get", "open", ".", "external_ids:ovn-remote"): + return '"tcp:127.0.0.1:6642"\n' + if args == ("get", "open", ".", "external_ids:system-id"): + return '"node.example.com"\n' + raise AssertionError(f"Unexpected ovs-vsctl call: {args}") + + with ( + patch.object(ovs, "vsctl", side_effect=fake_vsctl), + patch.object(OVNSBCli, "sbctl", return_value=chassis_output) as mock_sbctl, + ): + assert is_bridge_rename_safe(ovs, allow_stale_sb_read=True) is True + + mock_sbctl.assert_called_once_with( + "--format=json", + "--columns=_uuid", + "find", + "Chassis", + "name=node.example.com", + allow_stale_reads=True, + ) + + +class TestOVNSBCli: + def test_rename_safety_query_raises_for_invalid_retry_count(self, monkeypatch): + ovn_sb = OVNSBCli("tcp:127.0.0.1:6642") + monkeypatch.setattr( + "openstack_hypervisor.bridge_datapath.SB_RENAME_SAFE_QUERY_RETRIES", + 0, + ) + + with pytest.raises(ValueError, match="SB_RENAME_SAFE_QUERY_RETRIES must be >= 1"): + ovn_sb.rename_safety_query("show") + + def test_rename_safety_query_returns_when_last_retry_succeeds(self): + ovn_sb = OVNSBCli("tcp:127.0.0.1:6642") + + with ( + patch.object( + ovn_sb, + "sbctl", + side_effect=[OVNCommandError("first"), OVNCommandError("second"), "ok"], + ) as mock_sbctl, + patch("openstack_hypervisor.bridge_datapath.time.sleep") as mock_sleep, + ): + assert ovn_sb.rename_safety_query("show") == "ok" + + assert mock_sbctl.call_count == 3 + assert mock_sleep.call_count == 2 + + class TestOVSCli: + def test_list_bridge_interfaces_filters_types(self): ovs = OVSCli() with patch.object(ovs, "vsctl") as mock_vsctl: diff --git a/tests/unit/test_hooks.py b/tests/unit/test_hooks.py index 07846c2..f915212 100644 --- a/tests/unit/test_hooks.py +++ b/tests/unit/test_hooks.py @@ -15,6 +15,7 @@ from snaphelpers._conf import UnknownConfigKey from openstack_hypervisor import hooks +from openstack_hypervisor.bridge_datapath import BridgeMapping from openstack_hypervisor.cli import pci_devices from openstack_hypervisor.hooks import OwnedPath @@ -298,6 +299,7 @@ def test_add_interface_to_bridge_noop(self, ovs_cli): ovs_cli.list_bridge_interfaces.return_value = ["int1", "int2"] hooks._add_interface_to_bridge(ovs_cli, "br1", "int2") assert not ovs_cli.add_port.called + ovs_cli.set.assert_not_called() def test_del_interface_from_bridge(self, ovs_cli): ovs_cli.list_bridge_interfaces.return_value = ["int1", "int2"] @@ -1871,6 +1873,160 @@ def test_ready_when_dpdk_not_enabled(self, mocker, snap, ovs_cli): assert result is True + +class TestConfigureOvnExternalNetworking: + def test_safe_rename_recreates_bridge(self, mocker, snap, ovs_cli): + snap.config.get.side_effect = lambda key: { + "network.ovn-sb-connection": "tcp:127.0.0.1:6642", + "network.external-bridge-address": hooks.IPVANYNETWORK_UNSET, + "network.external-bridge": "", + "network.physnet-name": "", + "network.external-nic": "", + "network.bridge-mapping": "br-new:physnet1:eth0", + }.get(key) + mock_rename_safe = mocker.patch.object(hooks, "is_bridge_rename_safe", return_value=True) + ovs_cli.list_bridges.return_value = [] + mocker.patch.object( + hooks, + "detect_current_mappings", + return_value=[BridgeMapping("br-old", "physnet1", "eth0")], + ) + mocker.patch.object(hooks, "_wait_for_interface") + mocker.patch.object(hooks, "_delete_iptable_postrouting_rule") + mocker.patch.object(hooks, "_delete_ips_from_interface") + mocker.patch.object(hooks, "_ensure_single_nic_on_bridge") + mocker.patch.object(hooks, "_ensure_link_up") + mocker.patch.object(hooks, "_enable_chassis_as_gateway") + mocker.patch.object(hooks, "_disable_chassis_as_gateway") + mocker.patch.object(hooks, "get_machine_id", return_value="machine-id") + + hooks._configure_ovn_external_networking(snap, ovs_cli, {"network": {}}) + + mock_rename_safe.assert_called_once_with(ovs_cli, allow_stale_sb_read=False) + ovs_cli.del_bridge.assert_called_once_with("br-old") + ovs_cli.add_bridge.assert_called_once_with( + "br-new", "system", "protocols=OpenFlow13,OpenFlow15" + ) + ovs_cli.set.assert_any_call( + "open", + ".", + "external_ids", + {"ovn-bridge-mappings": "physnet1:br-new"}, + ) + + def test_unsafe_rename_keeps_existing_bridge(self, mocker, snap, ovs_cli): + snap.config.get.side_effect = lambda key: { + "network.ovn-sb-connection": "tcp:127.0.0.1:6642", + "network.external-bridge-address": hooks.IPVANYNETWORK_UNSET, + "network.external-bridge": "", + "network.physnet-name": "", + "network.external-nic": "", + "network.bridge-mapping": "br-new:physnet1:eth0", + }.get(key) + mock_rename_safe = mocker.patch.object(hooks, "is_bridge_rename_safe", return_value=False) + mocker.patch.object( + hooks, + "detect_current_mappings", + return_value=[BridgeMapping("br-old", "physnet1", "eth0")], + ) + mocker.patch.object(hooks, "_wait_for_interface") + mocker.patch.object(hooks, "_delete_iptable_postrouting_rule") + mocker.patch.object(hooks, "_delete_ips_from_interface") + mocker.patch.object(hooks, "_ensure_single_nic_on_bridge") + mocker.patch.object(hooks, "_ensure_link_up") + mocker.patch.object(hooks, "_enable_chassis_as_gateway") + mocker.patch.object(hooks, "_disable_chassis_as_gateway") + mocker.patch.object(hooks, "get_machine_id", return_value="machine-id") + + hooks._configure_ovn_external_networking(snap, ovs_cli, {"network": {}}) + + mock_rename_safe.assert_called_once_with(ovs_cli, allow_stale_sb_read=False) + ovs_cli.del_bridge.assert_not_called() + ovs_cli.add_bridge.assert_not_called() + ovs_cli.set.assert_any_call( + "open", + ".", + "external_ids", + {"ovn-bridge-mappings": "physnet1:br-old"}, + ) + + def test_safe_rename_adopts_existing_bridge_with_matching_interface( + self, mocker, snap, ovs_cli + ): + snap.config.get.side_effect = lambda key: { + "network.ovn-sb-connection": "tcp:127.0.0.1:6642", + "network.external-bridge-address": hooks.IPVANYNETWORK_UNSET, + "network.external-bridge": "", + "network.physnet-name": "", + "network.external-nic": "", + "network.bridge-mapping": "br-maas:physnet1:eth0", + }.get(key) + mocker.patch.object(hooks, "is_bridge_rename_safe", return_value=True) + ovs_cli.list_bridges.return_value = ["br-maas"] + ovs_cli.list_bridge_interfaces.return_value = ["eth0"] + mocker.patch.object( + hooks, + "detect_current_mappings", + return_value=[BridgeMapping("br-old", "physnet1", "eth0")], + ) + mocker.patch.object(hooks, "_wait_for_interface") + mocker.patch.object(hooks, "_delete_iptable_postrouting_rule") + mocker.patch.object(hooks, "_delete_ips_from_interface") + mocker.patch.object(hooks, "_ensure_link_up") + mocker.patch.object(hooks, "_enable_chassis_as_gateway") + mocker.patch.object(hooks, "_disable_chassis_as_gateway") + mocker.patch.object(hooks, "get_machine_id", return_value="machine-id") + + hooks._configure_ovn_external_networking(snap, ovs_cli, {"network": {}}) + + ovs_cli.del_bridge.assert_called_once_with("br-old") + ovs_cli.add_bridge.assert_called_once_with( + "br-maas", "system", "protocols=OpenFlow13,OpenFlow15" + ) + ovs_cli.set.assert_any_call( + "Port", + "eth0", + "external_ids", + {"microstack-function": "ext-port"}, + ) + + def test_safe_rename_does_not_adopt_incompatible_existing_bridge(self, mocker, snap, ovs_cli): + snap.config.get.side_effect = lambda key: { + "network.ovn-sb-connection": "tcp:127.0.0.1:6642", + "network.external-bridge-address": hooks.IPVANYNETWORK_UNSET, + "network.external-bridge": "", + "network.physnet-name": "", + "network.external-nic": "", + "network.bridge-mapping": "br-maas:physnet1:eth0", + }.get(key) + mocker.patch.object(hooks, "is_bridge_rename_safe", return_value=True) + ovs_cli.list_bridges.return_value = ["br-maas"] + ovs_cli.list_bridge_interfaces.return_value = ["eth9"] + mocker.patch.object( + hooks, + "detect_current_mappings", + return_value=[BridgeMapping("br-old", "physnet1", "eth0")], + ) + mocker.patch.object(hooks, "_wait_for_interface") + mocker.patch.object(hooks, "_delete_iptable_postrouting_rule") + mocker.patch.object(hooks, "_delete_ips_from_interface") + mocker.patch.object(hooks, "_ensure_single_nic_on_bridge") + mocker.patch.object(hooks, "_ensure_link_up") + mocker.patch.object(hooks, "_enable_chassis_as_gateway") + mocker.patch.object(hooks, "_disable_chassis_as_gateway") + mocker.patch.object(hooks, "get_machine_id", return_value="machine-id") + + hooks._configure_ovn_external_networking(snap, ovs_cli, {"network": {}}) + + ovs_cli.del_bridge.assert_not_called() + ovs_cli.add_bridge.assert_not_called() + ovs_cli.set.assert_any_call( + "open", + ".", + "external_ids", + {"ovn-bridge-mappings": "physnet1:br-old"}, + ) + def test_not_ready_when_dpdk_not_initialized(self, mocker, snap, ovs_cli): """Test returns False when DPDK is enabled but not initialized.""" ovs_cli.get_dpdk_initialized.return_value = False