diff --git a/openstack_hypervisor/hooks.py b/openstack_hypervisor/hooks.py index f4b2202..ca7fa78 100644 --- a/openstack_hypervisor/hooks.py +++ b/openstack_hypervisor/hooks.py @@ -73,6 +73,15 @@ OVN_CHASSIS_PLUG = "ovn-chassis" +# OVS management mode values for the network.ovs-managed-by snap config +OVS_MANAGED_BY_AUTO = "auto" # default: detect via ovn-chassis plug connection +OVS_MANAGED_BY_MICROOVN = "microovn" # external OVS managed by microovn snap +OVS_MANAGED_BY_HYPERVISOR = "hypervisor" # internal OVS managed by hypervisor snap + +# Module-level cache of the OVS management mode, set once per configure hook run. +# Avoids threading snap through every function that checks OVS mode. +_OVS_MANAGED_BY: str = OVS_MANAGED_BY_AUTO + # NOTE(dmitriis): there is currently no way to make sure this directory gets # recreated on reboot which would normally be done via systemd-tmpfiles. # mkdir -p /run/lock/snap.$SNAP_INSTANCE_NAME @@ -389,6 +398,10 @@ def _get_local_ip_by_default_route() -> str: "masakari.enable": False, # Option to signal external switch restart is required "network.external-switch-restart": False, + # OVS management mode: 'auto' (detect via ovn-chassis plug), 'microovn' (external), + # or 'hypervisor' (internal). Set by charm when microovn may be installed after + # hypervisor to avoid the snap assuming internal OVS too early. + "network.ovs-managed-by": OVS_MANAGED_BY_AUTO, } @@ -1808,7 +1821,7 @@ def _configure_tls(snap: Snap, ovs_cli: OVSCli, configure_ovn_tls: bool = True) if configure_ovn_tls: _configure_ovn_tls(snap, ovs_cli, is_ovs_external()) else: - logging.info("Internal OVS not ready, deferring OVN TLS configuration.") + logging.info("OVS not ready, deferring OVN TLS configuration.") _configure_libvirt_tls(snap) _configure_cabundle_tls(snap) @@ -2815,13 +2828,51 @@ def is_connected(name: str) -> bool: return False +def _set_ovs_managed_by(snap: Snap) -> None: + """Read network.ovs-managed-by from snap config and cache it for this hook run. + + Must be called once at the start of the configure hook, after defaults have + been applied, and before any code that calls is_ovs_external(). + + Valid values for network.ovs-managed-by: + - 'auto' (default) detect by whether ovn-chassis plug is connected + - 'microovn' force external OVS; microovn manages OVS (may be installed later) + - 'hypervisor' force internal OVS; hypervisor snap manages OVS + """ + global _OVS_MANAGED_BY + + value = snap.config.get("network.ovs-managed-by") or OVS_MANAGED_BY_AUTO + if value not in (OVS_MANAGED_BY_AUTO, OVS_MANAGED_BY_MICROOVN, OVS_MANAGED_BY_HYPERVISOR): + logging.warning( + "Unrecognised network.ovs-managed-by value %r, falling back to 'auto'", value + ) + value = OVS_MANAGED_BY_AUTO + + _OVS_MANAGED_BY = value + is_ovs_external.cache_clear() + logging.info("OVS managed-by mode: %s", _OVS_MANAGED_BY) + + @functools.lru_cache(maxsize=1) def is_ovs_external() -> bool: - """Check if OVN chassis plug is connected. + """Return True if OVS is managed externally (by microovn), False otherwise. - Result is cached during configure hook execution to avoid repeated - subprocess calls. + Checks network.ovs-managed-by snap config (cached in _OVS_MANAGED_BY) first: + - 'microovn' -> True (external OVS; microovn may not yet be installed) + - 'hypervisor' -> False (internal OVS; ignore plug state) + - 'auto' -> check whether the ovn-chassis content plug is connected + + Result is cached for the duration of a single configure hook execution to + avoid repeated subprocess calls. Call _set_ovs_managed_by() at the start + of each configure hook run to refresh the cache. """ + if _OVS_MANAGED_BY == OVS_MANAGED_BY_MICROOVN: + logging.debug("OVS managed-by=microovn, treating as external OVS.") + return True + if _OVS_MANAGED_BY == OVS_MANAGED_BY_HYPERVISOR: + logging.debug("OVS managed-by=hypervisor, treating as internal OVS.") + return False + # OVS_MANAGED_BY_AUTO: fall back to plug connection check return is_connected(OVN_CHASSIS_PLUG) @@ -2882,6 +2933,17 @@ def _internal_ovs_ready(snap: Snap) -> bool: return ovs_socket_path.exists() and bool(ctl_socket and Path(ctl_socket).exists()) +def _external_ovs_ready(snap: Snap) -> bool: + """Return whether the external OVS (microovn) socket is present. + + When network.ovs-managed-by=microovn is set but microovn has not yet been + installed, the socket file will be absent and all ovs-vsctl commands would + hang. This check allows the configure hook to defer OVS/OVN networking + configuration until microovn is actually available. + """ + return _ovs_socket_path(snap).exists() + + def configure(snap: Snap) -> None: """Runs the `configure` hook for the snap. @@ -2899,6 +2961,10 @@ def configure(snap: Snap) -> None: _mkdirs(snap) _update_default_config(snap) _setup_secrets(snap) + # Cache OVS management mode from snap config before any is_ovs_external() calls. + # This allows the charm to set network.ovs-managed-by=microovn so that the snap + # behaves correctly even when microovn is installed after openstack-hypervisor. + _set_ovs_managed_by(snap) _detect_compute_flavors(snap) ovs_socket = ovs_switch_socket(snap) @@ -2910,13 +2976,18 @@ def configure(snap: Snap) -> None: exclude_services = _get_exclude_services(context) services = snap.services.list() ovs_external = is_ovs_external() + logging.info( + "OVS management: %s", "external (microovn)" if ovs_external else "internal (hypervisor)" + ) internal_ovs_deferred = not ovs_external and not _internal_ovs_ready(snap) + external_ovs_deferred = ovs_external and not _external_ovs_ready(snap) + ovs_deferred = internal_ovs_deferred or external_ovs_deferred for service in exclude_services: services[service].stop(disable=True) with RestartOnChange(snap, {**TEMPLATES, **TLS_TEMPLATES}, exclude_services): _render_templates(snap, context) - _configure_tls(snap, ovs_cli, configure_ovn_tls=not internal_ovs_deferred) + _configure_tls(snap, ovs_cli, configure_ovn_tls=not ovs_deferred) _configure_webdav_apache(snap, context) if internal_ovs_deferred: @@ -2924,6 +2995,11 @@ def configure(snap: Snap) -> None: "Internal OVS is not ready yet, deferring OVS/OVN configuration until the next " "configure hook." ) + elif external_ovs_deferred: + logging.info( + "External OVS (microovn) socket not present yet, deferring OVS/OVN configuration " + "until the next configure hook. Install microovn or connect the ovn-chassis plug." + ) else: try: _configure_networking(snap, ovs_cli, context) @@ -2941,7 +3017,36 @@ def configure(snap: Snap) -> None: snap, bool(context.get("network", {}).get("sriov_nic_physical_device_mappings")) ) if not ovs_external: - _ensure_internal_ovs_services(snap, exclude_services) + # When OVS mode is 'auto', the snap doesn't yet know whether microovn will be + # used. The reliable signal that the charm has finished its initial configuration + # is that identity credentials have been set: identity.auth-url is a real Keystone + # endpoint (not the placeholder default) AND identity.username has been provided. + # Until then, skip starting ovs-vswitchd: creating system@ovs-system here would + # block a concurrently-installing microovn snap. + # + # Checking both fields guards against the edge case where an operator legitimately + # runs Keystone at http://localhost:5000/v3 — in that scenario the charm will + # always have set identity.username so the guard still clears correctly. + # + # When OVS mode is explicitly 'hypervisor' (operator/charm intent is clear), + # bypass this check and start internal OVS immediately. + identity_opts = snap.config.get_options("identity") + identity_url = identity_opts.get("identity.auth-url") + identity_username = identity_opts.get("identity.username") + charm_not_configured = ( + _OVS_MANAGED_BY == OVS_MANAGED_BY_AUTO + and identity_url == DEFAULT_CONFIG["identity.auth-url"] + and identity_username is None + ) + if charm_not_configured: + logging.info( + "identity.auth-url is still the default placeholder and " + "identity.username is unset: the charm has not yet applied " + "configuration. Deferring internal OVS service startup to avoid " + "creating system@ovs-system before microovn can install." + ) + else: + _ensure_internal_ovs_services(snap, exclude_services) def _get_configure_context(snap: Snap) -> dict: diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 8a7963d..6c10d12 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -13,8 +13,9 @@ @pytest.fixture(autouse=True) def clear_ovs_external_cache(): - """Clear the lru_cache on is_ovs_external before each test.""" + """Clear the lru_cache on is_ovs_external and reset the managed-by config before each test.""" hooks.is_ovs_external.cache_clear() + hooks._OVS_MANAGED_BY = hooks.OVS_MANAGED_BY_AUTO libvirt_mock = MagicMock() diff --git a/tests/unit/test_hooks.py b/tests/unit/test_hooks.py index 45b46dc..478d67f 100644 --- a/tests/unit/test_hooks.py +++ b/tests/unit/test_hooks.py @@ -1236,6 +1236,85 @@ def test_configure_ovn_base_external_ovs_skips_without_ip(self, mocker, snap, ov ovs_cli.set.assert_not_called() + def test_external_ovs_config_microovn_returns_true_without_plug(self, mocker): + """Config 'microovn' returns True even when ovn-chassis plug is disconnected.""" + import subprocess + + mock_check_call = mocker.patch("subprocess.check_call") + mock_check_call.side_effect = subprocess.CalledProcessError(1, "cmd") + hooks._OVS_MANAGED_BY = hooks.OVS_MANAGED_BY_MICROOVN + + assert hooks.is_ovs_external() is True + mock_check_call.assert_not_called() + + def test_external_ovs_config_hypervisor_returns_false_despite_plug(self, mocker): + """Config 'hypervisor' returns False even when ovn-chassis plug is connected.""" + mock_check_call = mocker.patch("subprocess.check_call") + mock_check_call.return_value = 0 + hooks._OVS_MANAGED_BY = hooks.OVS_MANAGED_BY_HYPERVISOR + + assert hooks.is_ovs_external() is False + mock_check_call.assert_not_called() + + def test_external_ovs_config_auto_falls_back_to_plug_connected(self, mocker): + """Config 'auto' checks the plug when connected.""" + mock_check_call = mocker.patch("subprocess.check_call") + mock_check_call.return_value = 0 + hooks._OVS_MANAGED_BY = hooks.OVS_MANAGED_BY_AUTO + + assert hooks.is_ovs_external() is True + mock_check_call.assert_called_once_with( + ["snapctl", "is-connected", hooks.OVN_CHASSIS_PLUG] + ) + + def test_external_ovs_config_auto_falls_back_to_plug_disconnected(self, mocker): + """Config 'auto' checks the plug when disconnected.""" + import subprocess + + mock_check_call = mocker.patch("subprocess.check_call") + mock_check_call.side_effect = subprocess.CalledProcessError(1, "cmd") + hooks._OVS_MANAGED_BY = hooks.OVS_MANAGED_BY_AUTO + + assert hooks.is_ovs_external() is False + + def test_set_ovs_managed_by_microovn(self, snap): + """_set_ovs_managed_by caches 'microovn' from snap config.""" + snap.config.get.return_value = hooks.OVS_MANAGED_BY_MICROOVN + hooks._set_ovs_managed_by(snap) + assert hooks._OVS_MANAGED_BY == hooks.OVS_MANAGED_BY_MICROOVN + snap.config.get.assert_called_once_with("network.ovs-managed-by") + + def test_set_ovs_managed_by_hypervisor(self, snap): + """_set_ovs_managed_by caches 'hypervisor' from snap config.""" + snap.config.get.return_value = hooks.OVS_MANAGED_BY_HYPERVISOR + hooks._set_ovs_managed_by(snap) + assert hooks._OVS_MANAGED_BY == hooks.OVS_MANAGED_BY_HYPERVISOR + + def test_set_ovs_managed_by_auto(self, snap): + """_set_ovs_managed_by caches 'auto' from snap config.""" + snap.config.get.return_value = hooks.OVS_MANAGED_BY_AUTO + hooks._set_ovs_managed_by(snap) + assert hooks._OVS_MANAGED_BY == hooks.OVS_MANAGED_BY_AUTO + + def test_set_ovs_managed_by_invalid_falls_back_to_auto(self, snap): + """_set_ovs_managed_by falls back to 'auto' for unrecognised values.""" + snap.config.get.return_value = "unknown-value" + hooks._set_ovs_managed_by(snap) + assert hooks._OVS_MANAGED_BY == hooks.OVS_MANAGED_BY_AUTO + + def test_set_ovs_managed_by_none_falls_back_to_auto(self, snap): + """_set_ovs_managed_by falls back to 'auto' when config returns None.""" + snap.config.get.return_value = None + hooks._set_ovs_managed_by(snap) + assert hooks._OVS_MANAGED_BY == hooks.OVS_MANAGED_BY_AUTO + + def test_set_ovs_managed_by_clears_lru_cache(self, mocker, snap): + """_set_ovs_managed_by clears the is_ovs_external LRU cache.""" + mock_cache_clear = mocker.patch.object(hooks.is_ovs_external, "cache_clear") + snap.config.get.return_value = hooks.OVS_MANAGED_BY_MICROOVN + hooks._set_ovs_managed_by(snap) + mock_cache_clear.assert_called_once() + class TestExcludeServices: """Tests for _get_exclude_services function.""" @@ -1464,6 +1543,9 @@ def test_internal_ovs_not_ready_defers_ovs_configuration(self, mocker, snap): "_ensure_internal_ovs_services", side_effect=lambda *_: order.append("ensure"), ) + # Simulate charm already configured (real identity URL) so the OVS + # startup guard does not interfere with what this test is checking. + snap.config.get_options.return_value.get.return_value = "http://10.0.0.1:5000/v3" hooks.configure(snap) @@ -1502,6 +1584,9 @@ def test_internal_ovs_ready_runs_configuration(self, mocker, snap): "_ensure_internal_ovs_services", side_effect=lambda *_: order.append("ensure"), ) + # Simulate charm already configured (real identity URL) so the OVS + # startup guard does not interfere with what this test is checking. + snap.config.get_options.return_value.get.return_value = "http://10.0.0.1:5000/v3" hooks.configure(snap) @@ -1518,6 +1603,7 @@ def test_external_ovs_skips_internal_deferral_and_enable(self, mocker, snap): mocker.patch.object(hooks, "_get_exclude_services", return_value=[]) mocker.patch.object(hooks, "OVSCli", return_value=mock.Mock()) mocker.patch.object(hooks, "is_ovs_external", return_value=True) + mocker.patch.object(hooks, "_external_ovs_ready", return_value=True) mocker.patch.object(hooks, "RestartOnChange", return_value=nullcontext()) mocker.patch.object(hooks, "_render_templates") mocker.patch.object(hooks, "_configure_webdav_apache") @@ -1536,6 +1622,185 @@ def test_external_ovs_skips_internal_deferral_and_enable(self, mocker, snap): mock_ready.assert_not_called() mock_ensure.assert_not_called() + def test_external_ovs_deferred_when_microovn_not_installed(self, mocker, snap): + """When microovn is not yet installed, OVS/OVN configuration is deferred.""" + snap.services.list.return_value = {} + mocker.patch.object(hooks, "_mkdirs") + mocker.patch.object(hooks, "_update_default_config") + mocker.patch.object(hooks, "_setup_secrets") + mocker.patch.object(hooks, "_detect_compute_flavors") + mocker.patch.object(hooks, "_get_configure_context", return_value={"network": {}}) + mocker.patch.object(hooks, "_get_exclude_services", return_value=[]) + mocker.patch.object(hooks, "OVSCli", return_value=mock.Mock()) + mocker.patch.object(hooks, "is_ovs_external", return_value=True) + # microovn socket does not exist yet + mocker.patch.object(hooks, "_external_ovs_ready", return_value=False) + mocker.patch.object(hooks, "RestartOnChange", return_value=nullcontext()) + mocker.patch.object(hooks, "_render_templates") + mocker.patch.object(hooks, "_configure_webdav_apache") + mocker.patch.object(hooks, "_configure_kvm") + mocker.patch.object(hooks, "_configure_monitoring_services") + mocker.patch.object(hooks, "_configure_ceph") + mocker.patch.object(hooks, "_configure_masakari_services") + mocker.patch.object(hooks, "_configure_sriov_agent_service") + mock_configure_tls = mocker.patch.object(hooks, "_configure_tls") + mock_configure_networking = mocker.patch.object(hooks, "_configure_networking") + + hooks.configure(snap) + + # TLS must be deferred (configure_ovn_tls=False) + mock_configure_tls.assert_called_once() + _, kwargs = mock_configure_tls.call_args + assert kwargs.get("configure_ovn_tls") is False + # Networking must NOT be called + mock_configure_networking.assert_not_called() + + def test_external_ovs_ready_check_uses_socket_path(self, mocker, snap, tmp_path): + """_external_ovs_ready returns True only when the OVS socket exists.""" + mocker.patch.object(hooks, "is_ovs_external", return_value=True) + socket_file = tmp_path / "db.sock" + mocker.patch.object(hooks, "_ovs_socket_path", return_value=socket_file) + + assert hooks._external_ovs_ready(snap) is False + + socket_file.touch() + assert hooks._external_ovs_ready(snap) is True + + def test_internal_ovs_not_started_on_unconfigured_first_run(self, mocker, snap): + """Internal OVS services must NOT be started while identity is unconfigured. + + Snapd fires a configure hook automatically right after 'snap install', and + again when the charm calls ``snap set network.ovs-managed-by=auto`` in its + own install hook — both times before any real Keystone URL has been provided. + The snap detects this by checking that ``identity.auth-url`` still equals the + placeholder default. In that state ``_ensure_internal_ovs_services`` must be + skipped to avoid creating ``system@ovs-system`` before microovn installs. + """ + snap.services.list.return_value = {} + mocker.patch.object(hooks, "_mkdirs") + mocker.patch.object(hooks, "_update_default_config") + mocker.patch.object(hooks, "_setup_secrets") + mocker.patch.object(hooks, "_detect_compute_flavors") + mocker.patch.object(hooks, "_get_configure_context", return_value={"network": {}}) + mocker.patch.object(hooks, "_get_exclude_services", return_value=[]) + mocker.patch.object(hooks, "OVSCli", return_value=mock.Mock()) + # OVS mode is 'auto' (default from conftest) and plug is not connected + mocker.patch.object(hooks, "is_ovs_external", return_value=False) + mocker.patch.object(hooks, "_internal_ovs_ready", return_value=True) + mocker.patch.object(hooks, "RestartOnChange", return_value=nullcontext()) + mocker.patch.object(hooks, "_render_templates") + mocker.patch.object(hooks, "_configure_webdav_apache") + mocker.patch.object(hooks, "_configure_tls") + mocker.patch.object(hooks, "_configure_networking") + mocker.patch.object(hooks, "_configure_kvm") + mocker.patch.object(hooks, "_configure_monitoring_services") + mocker.patch.object(hooks, "_configure_ceph") + mocker.patch.object(hooks, "_configure_masakari_services") + mocker.patch.object(hooks, "_configure_sriov_agent_service") + mock_ensure = mocker.patch.object(hooks, "_ensure_internal_ovs_services") + + # Simulate snap not yet configured by the charm: identity URL is the placeholder + # AND username has not been set yet (None). Both conditions must hold for the + # guard to defer OVS startup. + def unconfigured_identity_get(key, default=None): + if key == "identity.auth-url": + return hooks.DEFAULT_CONFIG["identity.auth-url"] + if key == "identity.username": + return None + return default + + snap.config.get_options.return_value.get.side_effect = unconfigured_identity_get + + hooks.configure(snap) + + # _ensure_internal_ovs_services must NOT have been called — starting + # ovs-vswitchd here would create system@ovs-system and block microovn. + mock_ensure.assert_not_called() + + def test_internal_ovs_started_when_managed_by_hypervisor_with_default_identity( + self, mocker, snap + ): + """Internal OVS must start when explicitly managed by 'hypervisor'. + + Even if ``identity.auth-url`` is still at the placeholder default and + ``identity.username`` has not been set (i.e. the charm hasn't configured + identity yet), setting ``network.ovs-managed-by`` to ``'hypervisor'`` must + bypass the guard and ensure internal OVS services are started. + """ + snap.services.list.return_value = {} + mocker.patch.object(hooks, "_mkdirs") + mocker.patch.object(hooks, "_update_default_config") + mocker.patch.object(hooks, "_setup_secrets") + # Force OVS mode to 'hypervisor' (simulates charm explicitly setting it). + mocker.patch.object( + hooks, + "_set_ovs_managed_by", + side_effect=lambda _: setattr( + hooks, "_OVS_MANAGED_BY", hooks.OVS_MANAGED_BY_HYPERVISOR + ), + ) + mocker.patch.object(hooks, "_detect_compute_flavors") + mocker.patch.object(hooks, "_get_configure_context", return_value={"network": {}}) + mocker.patch.object(hooks, "_get_exclude_services", return_value=[]) + mocker.patch.object(hooks, "OVSCli", return_value=mock.Mock()) + mocker.patch.object(hooks, "is_ovs_external", return_value=False) + mocker.patch.object(hooks, "_internal_ovs_ready", return_value=True) + mocker.patch.object(hooks, "RestartOnChange", return_value=nullcontext()) + mocker.patch.object(hooks, "_render_templates") + mocker.patch.object(hooks, "_configure_webdav_apache") + mocker.patch.object(hooks, "_configure_tls") + mocker.patch.object(hooks, "_configure_networking") + mocker.patch.object(hooks, "_configure_kvm") + mocker.patch.object(hooks, "_configure_monitoring_services") + mocker.patch.object(hooks, "_configure_ceph") + mocker.patch.object(hooks, "_configure_masakari_services") + mocker.patch.object(hooks, "_configure_sriov_agent_service") + mock_ensure = mocker.patch.object(hooks, "_ensure_internal_ovs_services") + # Identity is still unconfigured (placeholder URL, no username) — the guard + # must be bypassed because mode is explicitly 'hypervisor'. + snap.config.get_options.return_value.get.return_value = hooks.DEFAULT_CONFIG[ + "identity.auth-url" + ] + + hooks.configure(snap) + + mock_ensure.assert_called_once() + + def test_internal_ovs_started_when_identity_configured(self, mocker, snap): + """Internal OVS must start once the charm has provided a real identity URL. + + When ``identity.auth-url`` has been set to a real Keystone endpoint (anything + other than the placeholder default) and ``network.ovs-managed-by`` is 'auto' + with no plug connected, ``_ensure_internal_ovs_services`` must be called. + """ + snap.services.list.return_value = {} + mocker.patch.object(hooks, "_mkdirs") + mocker.patch.object(hooks, "_update_default_config") + mocker.patch.object(hooks, "_setup_secrets") + mocker.patch.object(hooks, "_detect_compute_flavors") + mocker.patch.object(hooks, "_get_configure_context", return_value={"network": {}}) + mocker.patch.object(hooks, "_get_exclude_services", return_value=[]) + mocker.patch.object(hooks, "OVSCli", return_value=mock.Mock()) + mocker.patch.object(hooks, "is_ovs_external", return_value=False) + mocker.patch.object(hooks, "_internal_ovs_ready", return_value=True) + mocker.patch.object(hooks, "RestartOnChange", return_value=nullcontext()) + mocker.patch.object(hooks, "_render_templates") + mocker.patch.object(hooks, "_configure_webdav_apache") + mocker.patch.object(hooks, "_configure_tls") + mocker.patch.object(hooks, "_configure_networking") + mocker.patch.object(hooks, "_configure_kvm") + mocker.patch.object(hooks, "_configure_monitoring_services") + mocker.patch.object(hooks, "_configure_ceph") + mocker.patch.object(hooks, "_configure_masakari_services") + mocker.patch.object(hooks, "_configure_sriov_agent_service") + mock_ensure = mocker.patch.object(hooks, "_ensure_internal_ovs_services") + # Charm has provided the real Keystone URL. + snap.config.get_options.return_value.get.return_value = "http://10.0.0.1:5000/v3" + + hooks.configure(snap) + + mock_ensure.assert_called_once() + class TestDPDKConfigReady: """Tests for _dpdk_config_is_ready function."""