From bf4be96dc96d0c06d0a68d4d00d3441c14437dd4 Mon Sep 17 00:00:00 2001 From: Travis Brown Date: Tue, 3 Mar 2026 23:17:23 +0000 Subject: [PATCH 1/2] [Nexthop] Distro CLI: Add getip and ssh commands The Distro CLI getip and ssh commands make a best-effort attempt to determine an IP for the given switch based on its MAC address and then either return that IP or directly ssh to it. It does so by connecting to the Distro Infra container and checking on the configured interface for pre-existing IP neighbor/ARP entries. If no such entries are found it will attempt a subnet ping to refresh the Linux kernel neighbor entries. In most cases this is sufficient after the system has PXE booted and therefore the neighbor cache is filled. Testing is as easy as starting the Distro Infra container: ``` $ ./distro_infra.sh --intf eth1 --persist-dir data ``` Then running the fboss-image device getip command with the appropriate MAC address: ``` $ ./fboss-image device dc:da:4d:fc:ad:2d getip [0.00s] Getting IP for device dc:da:4d:fc:ad:2d 10.250.33.2 ``` --- fboss-image/distro_cli/cmds/device.py | 106 +++++- fboss-image/distro_cli/lib/distro_infra.py | 128 +++++++ .../distro_cli/lib/docker/container.py | 159 ++++++++ fboss-image/distro_cli/lib/exceptions.py | 11 + fboss-image/distro_cli/ruff.toml | 1 - fboss-image/distro_cli/tests/device_test.py | 166 +++++++-- fboss-image/distro_cli/tests/docker_test.py | 91 ++++- fboss-image/distro_infra/Dockerfile | 3 +- fboss-image/distro_infra/distro_infra.sh | 5 + fboss-image/distro_infra/parts/getip.sh | 349 ++++++++++++++++++ 10 files changed, 973 insertions(+), 46 deletions(-) create mode 100644 fboss-image/distro_cli/lib/distro_infra.py create mode 100755 fboss-image/distro_infra/parts/getip.sh diff --git a/fboss-image/distro_cli/cmds/device.py b/fboss-image/distro_cli/cmds/device.py index 0d18beb47fdeb..41801cdcdc330 100644 --- a/fboss-image/distro_cli/cmds/device.py +++ b/fboss-image/distro_cli/cmds/device.py @@ -7,13 +7,27 @@ """Device command implementation.""" +import json import logging +import os -from lib.cli import validate_path +from distro_cli.lib.cli import validate_path +from distro_cli.lib.distro_infra import ( + DISTRO_INFRA_CONTAINER, + GETIP_SCRIPT_CONTAINER_PATH, + get_interface_name, +) +from distro_cli.lib.docker import container +from distro_cli.lib.exceptions import DistroInfraError logger = logging.getLogger("fboss-image") +def print_to_console(message: str) -> None: + """Print message to console""" + print(message) # noqa: T201 + + def image_upstream_command(args): """Download full image from upstream repository and set it to be loaded onto device""" logger.info(f"Setting upstream image for device {args.mac}") @@ -40,16 +54,92 @@ def update_command(args): logger.info("Device update command (stub)") +def get_device_ip(mac: str) -> str | None: + """Get device IP address by querying the distro-infra container. + + Args: + mac: Device MAC address + + Returns: + IP address string (IPv4 preferred, IPv6 fallback), or None if not found + """ + if not container.container_is_running(DISTRO_INFRA_CONTAINER): + logger.error(f"Container '{DISTRO_INFRA_CONTAINER}' is not running") + logger.error("Please start the distro-infra container first") + return None + + try: + interface = get_interface_name() + except DistroInfraError as e: + logger.error(f"Failed to get interface name: {e}") + return None + + cmd = [GETIP_SCRIPT_CONTAINER_PATH, mac, interface] + + # Execute in container + exit_code, stdout, stderr = container.exec_in_container(DISTRO_INFRA_CONTAINER, cmd) + + if exit_code != 0: + logger.error(f"getip.sh failed with exit code {exit_code}") + if stderr: + logger.error(f"stderr: {stderr}") + if stdout: + logger.error(f"stdout: {stdout}") + return None + + try: + result = json.loads(stdout) + + if "error_code" in result: + logger.error(f"Error: {result.get('error', 'Unknown error')}") + logger.error(f"Error code: {result['error_code']}") + return None + + ipv4 = result.get("ipv4") + ipv6 = result.get("ipv6") + + return ipv4 if ipv4 else ipv6 + + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON output: {e}") + logger.error(f"Output was: {stdout}") + return None + + def getip_command(args): """Get device IP address""" logger.info(f"Getting IP for device {args.mac}") - logger.info("Device getip command (stub)") + + ip_address = get_device_ip(args.mac) + + if ip_address: + print_to_console(ip_address) + else: + logger.error("No IP address found in response") def ssh_command(args): """SSH to device""" logger.info(f"SSH to device {args.mac}") - logger.info("Device ssh command (stub)") + + ip_address = get_device_ip(args.mac) + + if not ip_address: + logger.error("No IP address found for device") + return + + logger.info(f"Connecting to {ip_address}") + os.execvp( + "ssh", + [ + "ssh", + "-o", + "StrictHostKeyChecking=no", + "-o", + "UserKnownHostsFile=/dev/null", + f"root@{ip_address}", + ], + ) def setup_device_commands(cli): @@ -103,7 +193,13 @@ def setup_device_commands(cli): ) device.add_command( - "getip", getip_command, help_text="Get device IP address", arguments=[] + "getip", + getip_command, + help_text="Get device IP address", ) - device.add_command("ssh", ssh_command, help_text="SSH to device", arguments=[]) + device.add_command( + "ssh", + ssh_command, + help_text="SSH to device", + ) diff --git a/fboss-image/distro_cli/lib/distro_infra.py b/fboss-image/distro_cli/lib/distro_infra.py new file mode 100644 index 0000000000000..fd53dc2db52fa --- /dev/null +++ b/fboss-image/distro_cli/lib/distro_infra.py @@ -0,0 +1,128 @@ +# Copyright (c) 2004-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. + +"""Distro Infrastructure helper functions.""" + +import json +import logging +import re +import subprocess +from pathlib import Path + +from distro_cli.lib.docker import container +from distro_cli.lib.exceptions import DistroInfraError + +logger = logging.getLogger("fboss-image") + +# This should match DISTRO_CONTAINER_NAME in distro_infra/distro_infra.sh +DISTRO_INFRA_CONTAINER = "fboss-distro-infra" + +GETIP_SCRIPT_CONTAINER_PATH = "/distro_infra/getip.sh" + + +def normalize_mac_address(mac: str) -> tuple[str, str]: + """Normalize MAC address to both dash and colon formats. + + Args: + mac: MAC address in any format + + Returns: + Tuple of (dash_format, colon_format) + e.g., ("aa-bb-cc-dd-ee-ff", "aa:bb:cc:dd:ee:ff") + + Raises: + DistroInfraError: If MAC address is invalid + """ + # Remove all separators and convert to lowercase + mac_clean = re.sub(r"[:\-]", "", mac.lower()) + + # Validate MAC address format (12 hex characters) + if not re.match(r"^[0-9a-f]{12}$", mac_clean): + raise DistroInfraError( + f"Invalid MAC address: {mac}. Expected 12 hex characters with optional colons or dashes." + ) + + # Convert to dash and colon formats + dash_mac = "-".join([mac_clean[i : i + 2] for i in range(0, 12, 2)]) + colon_mac = ":".join([mac_clean[i : i + 2] for i in range(0, 12, 2)]) + + return dash_mac, colon_mac + + +def get_interface_name() -> str: + """Get the network interface name of the distro-infra container from the persistent directory. + + Returns: + Network interface name + + Raises: + DistroInfraError: If interface_name.txt not found or empty + """ + persistent_dir = find_persistent_dir() + interface_file = persistent_dir / "interface_name.txt" + + if not interface_file.exists(): + raise DistroInfraError( + f"Interface name file not found: {interface_file}. " + "The distro-infra container may not have started properly." + ) + + interface = interface_file.read_text().strip() + if not interface: + raise DistroInfraError(f"Interface name file is empty: {interface_file}") + + return interface + + +def find_persistent_dir() -> Path: + """Find the persistent directory mounted in the distro_infra container. + + Returns: + Path to the persistent directory on the host + + Raises: + DistroInfraError: If container is not running or persistent dir not found + """ + # Check if container is running + if not container.container_is_running(DISTRO_INFRA_CONTAINER): + raise DistroInfraError( + f"Container '{DISTRO_INFRA_CONTAINER}' is not running. " + "Please start it first with distro_infra.sh" + ) + + try: + result = subprocess.run( + ["docker", "inspect", DISTRO_INFRA_CONTAINER], + capture_output=True, + text=True, + check=True, + ) + inspect_data = json.loads(result.stdout) + + if not inspect_data: + raise DistroInfraError( + f"Container {DISTRO_INFRA_CONTAINER} is not running. " + "Please start it first with distro_infra.sh" + ) + + # Find the volume mount for /distro_infra/persistent + mounts = inspect_data[0].get("Mounts", []) + for mount in mounts: + if mount.get("Destination") == "/distro_infra/persistent": + return Path(mount["Source"]) + + raise DistroInfraError( + f"Could not find persistent directory mount in container {DISTRO_INFRA_CONTAINER}" + ) + + except subprocess.CalledProcessError as e: + raise DistroInfraError( + f"Container {DISTRO_INFRA_CONTAINER} is not running. " + "Please start it first with distro_infra.sh" + ) from e + except (json.JSONDecodeError, KeyError, IndexError) as e: + raise DistroInfraError(f"Failed to parse container inspect data: {e}") from e diff --git a/fboss-image/distro_cli/lib/docker/container.py b/fboss-image/distro_cli/lib/docker/container.py index 8eedac3ca2a21..0e02ef6610b0a 100644 --- a/fboss-image/distro_cli/lib/docker/container.py +++ b/fboss-image/distro_cli/lib/docker/container.py @@ -22,6 +22,7 @@ def run_container( # noqa: PLR0913 privileged: bool = False, interactive: bool = False, ephemeral: bool = True, + detach: bool = False, working_dir: str | None = None, name: str | None = None, ) -> int: @@ -60,6 +61,9 @@ def run_container( # noqa: PLR0913 if interactive: cmd.extend(["-i", "-t"]) + if detach: + cmd.append("-d") + if privileged: cmd.append("--privileged") @@ -95,3 +99,158 @@ def run_container( # noqa: PLR0913 raise RuntimeError( "Docker command not found. Is Docker installed and in PATH?" ) from e + except subprocess.CalledProcessError as e: + logger.error(f"Command failed: {e}") + return e.returncode + + +def exec_in_container( + name: str, + command: list[str], +) -> tuple[int, str, str]: + """Execute a command in a running Docker container. + + Args: + name: Name of the container + command: Command to execute in container (as list) + + Returns: + Tuple of exit code, stdout, and stderr from the command execution + + Raises: + RuntimeError: If docker command fails + """ + logger.info(f"Executing command in container {name}: {command}") + + cmd = ["docker", "exec", name] + cmd.extend(command) + + logger.debug(f"Running: {' '.join(str(c) for c in cmd)}") + + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=True, + ) + return result.returncode, result.stdout, result.stderr + except FileNotFoundError: + raise RuntimeError("Docker command not found. Is Docker installed and in PATH?") + except subprocess.CalledProcessError as e: + logger.error(f"Command failed: {e}") + return e.returncode, e.stdout, e.stderr + + +def container_is_running(name: str) -> bool: + """Check if a Docker container is running. + + Args: + name: Name of the container + + Returns: + True if container is running, False otherwise + + Raises: + RuntimeError: If docker command fails + """ + logger.info(f"Checking if container is running: {name}") + + cmd = ["docker", "ps", "-aq", "--filter", f"name={name}"] + + logger.debug(f"Running: {' '.join(str(c) for c in cmd)}") + + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=True, + ) + return result.returncode == 0 and bool(result.stdout.strip()) + except FileNotFoundError: + raise RuntimeError("Docker command not found. Is Docker installed and in PATH?") + except subprocess.CalledProcessError as e: + logger.error(f"Check command failed: {e}") + return False + + +def stop_container(name: str) -> int: + """Stop a Docker container. + + Args: + name: Name of the container + + Returns: + Exit code from the stop command + + Raises: + RuntimeError: If docker command fails + """ + logger.info(f"Stopping container: {name}") + + cmd = ["docker", "stop", name] + + logger.debug(f"Running: {' '.join(str(c) for c in cmd)}") + + try: + result = subprocess.run(cmd, check=True) + logger.info(f"Stop command exited with code: {result.returncode}") + return result.returncode + except FileNotFoundError as e: + raise RuntimeError( + "Docker command not found. Is Docker installed and in PATH?" + ) from e + except subprocess.CalledProcessError as e: + logger.error(f"Stop command failed: {e}") + return e.returncode + + +def remove_container(name: str) -> int: + """Remove a Docker container. + + Args: + name: Name of the container + + Returns: + Exit code from the remove command + + Raises: + RuntimeError: If docker command fails + """ + logger.info(f"Removing container: {name}") + + cmd = ["docker", "rm", "-f", name] + + logger.debug(f"Running: {' '.join(str(c) for c in cmd)}") + + try: + result = subprocess.run(cmd, check=True) + logger.info(f"Remove command exited with code: {result.returncode}") + return result.returncode + except FileNotFoundError as e: + raise RuntimeError( + "Docker command not found. Is Docker installed and in PATH?" + ) from e + except subprocess.CalledProcessError as e: + logger.error(f"Remove command failed: {e}") + return e.returncode + + +def stop_and_remove_container(name: str) -> int: + """Stop and remove a Docker container. + + Args: + name: Name of the container + + Returns: + Exit code from the remove command + + Raises: + RuntimeError: If docker command fails + """ + logger.info(f"Stopping and removing container: {name}") + stop_exit_code = stop_container(name) + if stop_exit_code != 0: + return stop_exit_code + return remove_container(name) diff --git a/fboss-image/distro_cli/lib/exceptions.py b/fboss-image/distro_cli/lib/exceptions.py index d61d23efbaa38..05a3ce45b3871 100644 --- a/fboss-image/distro_cli/lib/exceptions.py +++ b/fboss-image/distro_cli/lib/exceptions.py @@ -55,3 +55,14 @@ class ComponentError(FbossImageError): - Component configuration invalid - Component builder not implemented """ + + +class DistroInfraError(FbossImageError): + """Distro Infrastructure operation failed. + + Raised when: + - Container operations fail + - PXE boot configuration fails + - Device IP retrieval fails + - MAC address validation fails + """ diff --git a/fboss-image/distro_cli/ruff.toml b/fboss-image/distro_cli/ruff.toml index e6171bfd83887..ba5f5384dd42b 100644 --- a/fboss-image/distro_cli/ruff.toml +++ b/fboss-image/distro_cli/ruff.toml @@ -24,7 +24,6 @@ select = [ ignore = [ "B904", # Allow raising exceptions without 'from' for cleaner error messages - "PLR0913", # Allow more than 8 arguments to be specified ] [lint.per-file-ignores] diff --git a/fboss-image/distro_cli/tests/device_test.py b/fboss-image/distro_cli/tests/device_test.py index 08895f5e3bb3e..a9457ec0811a6 100644 --- a/fboss-image/distro_cli/tests/device_test.py +++ b/fboss-image/distro_cli/tests/device_test.py @@ -7,30 +7,19 @@ # LICENSE file in the root directory of this source tree. An additional grant # of patent rights can be found in the PATENTS file in the same directory. -""" -Unit tests for device commands +"""Unit tests for device commands.""" -NOTE: These are skeleton tests for stub implementations. -When device commands are fully implemented, these tests will be expanded -to verify actual functionality. - -These tests verify that: -1. Device command group exists and has expected subcommands -2. Commands can be called without crashing (stub behavior) -3. Context passing works correctly -""" - -import sys +import argparse +import shutil +import subprocess +import tarfile import tempfile import unittest from pathlib import Path +from unittest.mock import patch -# Add parent directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -import argparse - -from cmds.device import ( +from distro_cli.cmds.device import ( + get_device_ip, getip_command, image_command, image_upstream_command, @@ -39,29 +28,91 @@ ssh_command, update_command, ) +from distro_cli.lib.distro_infra import DISTRO_INFRA_CONTAINER +from distro_cli.lib.docker import container class TestDeviceCommands(unittest.TestCase): - """Test device command group and subcommands (stubs)""" + """Test device command group and subcommands""" + + @classmethod + def setUpClass(cls): + """Set up test container before all tests""" + try: + result = subprocess.run( + ["docker", "images", "-q", "fboss_distro_infra"], + capture_output=True, + text=True, + check=True, + ) + if not result.stdout.strip(): + raise unittest.SkipTest( + "fboss_distro_infra Docker image not found. " + "Please build it with: cd fboss-image/distro_infra && ./build.sh" + ) + except (subprocess.CalledProcessError, FileNotFoundError): + raise unittest.SkipTest("Docker not available or image not built") + + cwd = Path.cwd() + cls.container_temp_dir = Path( + tempfile.mkdtemp(prefix="distro_infra_test_", dir=cwd) + ) + cls.container_persistent_dir = cls.container_temp_dir / "persistent" + cls.container_persistent_dir.mkdir(parents=True, exist_ok=True) + + # Write interface name file (normally done by distro_infra.sh) + interface_file = cls.container_persistent_dir / "interface_name.txt" + interface_file.write_text("lo") + + # Clean up any existing container with the same name + if container.container_is_running(DISTRO_INFRA_CONTAINER): + container.stop_and_remove_container(DISTRO_INFRA_CONTAINER) + + # Start the fboss-distro-infra container in background + volumes = {cls.container_persistent_dir: Path("/distro_infra/persistent")} + + exit_code = container.run_container( + image="fboss_distro_infra", + command=["/distro_infra/run_distro_infra.sh", "--intf", "lo"], + volumes=volumes, + ephemeral=False, + detach=True, + name=DISTRO_INFRA_CONTAINER, + privileged=True, # Required for network operations + ) + + if exit_code != 0: + raise RuntimeError(f"Failed to start {DISTRO_INFRA_CONTAINER} container") + + @classmethod + def tearDownClass(cls): + """Clean up test container after all tests""" + if container.container_is_running(DISTRO_INFRA_CONTAINER): + container.stop_and_remove_container(DISTRO_INFRA_CONTAINER) + + shutil.rmtree(cls.container_temp_dir, ignore_errors=True) def setUp(self): """Set up test fixtures""" self.test_mac = "aa:bb:cc:dd:ee:ff" - # Create a temporary manifest file for tests that need it with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: f.write('{"test": "manifest"}') self.manifest_path = Path(f.name) - # Create a temporary image file for tests that need it - with tempfile.NamedTemporaryFile(mode="w", suffix=".bin", delete=False) as f: - f.write("fake image data") - self.image_path = Path(f.name) + self.temp_dir = tempfile.mkdtemp() + self.image_path = Path(self.temp_dir) / "test_image.tar" + + test_file = Path(self.temp_dir) / "test_file.txt" + test_file.write_text("test content") + + with tarfile.open(self.image_path, "w") as tar: + tar.add(test_file, arcname="test_file.txt") def tearDown(self): """Clean up test fixtures""" self.manifest_path.unlink() - self.image_path.unlink() + shutil.rmtree(self.temp_dir, ignore_errors=True) def test_device_commands_exist(self): """Test that device commands exist""" @@ -101,18 +152,63 @@ def test_update_stub(self): # Call command - just verify it doesn't crash update_command(args) - def test_getip_stub(self): - """Test getip command (stub)""" - args = argparse.Namespace(mac=self.test_mac) - # Call command - just verify it doesn't crash - getip_command(args) + @patch("distro_cli.cmds.device.container.exec_in_container") + @patch("distro_cli.cmds.device.container.container_is_running") + def test_get_device_ip_ipv4(self, mock_is_running, mock_exec): + """Test get_device_ip returns IPv4 when available""" + mock_is_running.return_value = True + mock_exec.return_value = ( + 0, + '{"mac": "aa:bb:cc:dd:ee:ff", "ipv4": "192.168.1.100", "ipv6": "fe80::1"}', + "", + ) - def test_ssh_stub(self): - """Test ssh command (stub)""" - args = argparse.Namespace(mac=self.test_mac) - # Call command - just verify it doesn't crash + ip = get_device_ip(self.test_mac) + self.assertEqual(ip, "192.168.1.100") + + @patch("distro_cli.cmds.device.container.exec_in_container") + @patch("distro_cli.cmds.device.container.container_is_running") + def test_get_device_ip_ipv6_fallback(self, mock_is_running, mock_exec): + """Test get_device_ip returns IPv6 when IPv4 not available""" + mock_is_running.return_value = True + mock_exec.return_value = ( + 0, + '{"mac": "aa:bb:cc:dd:ee:ff", "ipv6": "fe80::1"}', + "", + ) + + ip = get_device_ip(self.test_mac) + self.assertEqual(ip, "fe80::1") + + @patch("distro_cli.cmds.device.container.exec_in_container") + @patch("distro_cli.cmds.device.container.container_is_running") + @patch("distro_cli.cmds.device.os.execvp") + def test_ssh_command_calls_execvp_correctly( + self, mock_execvp, mock_is_running, mock_exec + ): + """Test ssh command calls os.execvp with correct arguments""" + mock_is_running.return_value = True + mock_exec.return_value = ( + 0, + '{"mac": "aa:bb:cc:dd:ee:ff", "ipv4": "192.168.1.100"}', + "", + ) + + args = argparse.Namespace(mac=self.test_mac, interface=None) ssh_command(args) + mock_execvp.assert_called_once_with( + "ssh", + [ + "ssh", + "-o", + "StrictHostKeyChecking=no", + "-o", + "UserKnownHostsFile=/dev/null", + "root@192.168.1.100", + ], + ) + if __name__ == "__main__": unittest.main() diff --git a/fboss-image/distro_cli/tests/docker_test.py b/fboss-image/distro_cli/tests/docker_test.py index 3b713f388a322..ca8d2cb74a4b6 100644 --- a/fboss-image/distro_cli/tests/docker_test.py +++ b/fboss-image/distro_cli/tests/docker_test.py @@ -3,7 +3,7 @@ import unittest from distro_cli.lib.constants import FBOSS_BUILDER_IMAGE -from distro_cli.lib.docker.container import run_container +from distro_cli.lib.docker import container from distro_cli.tests.test_helpers import ensure_test_docker_image @@ -15,15 +15,98 @@ def setUpClass(cls): """Ensure fboss_builder image exists before running tests.""" ensure_test_docker_image() - def test_run_simple_container(self): + def test_run_container(self): """Test running a simple container command.""" - exit_code = run_container( + exit_code = container.run_container( image=FBOSS_BUILDER_IMAGE, - command=["echo", "hello from container"], + command=["echo", "hello from run_container"], ephemeral=True, ) self.assertEqual(exit_code, 0) + def test_exec_in_container(self): + """Test executing a command in a running container.""" + exit_code = container.run_container( + image=FBOSS_BUILDER_IMAGE, + command=["sleep", "inf"], + ephemeral=False, + detach=True, + name="test_exec_container", + ) + self.assertEqual(exit_code, 0) + + exec_exit_code, stdout, stderr = container.exec_in_container( + name="test_exec_container", + command=["echo", "hello from exec_in_container"], + ) + self.assertEqual(exec_exit_code, 0) + self.assertEqual(stdout.strip(), "hello from exec_in_container") + + # Clean up the container + container.stop_and_remove_container(name="test_exec_container") + + # Check if container is stopped and removed + is_running = container.container_is_running("test_exec_container") + self.assertFalse(is_running) + + # Try to exec in the removed container - should fail with non-zero exit code + exec_exit_code, stdout, stderr = container.exec_in_container( + name="test_exec_container", + command=["echo", "should not work"], + ) + self.assertNotEqual(exec_exit_code, 0) + + def test_container_is_running(self): + """Test checking if a container is running.""" + # Check non-existent container + is_running = container.container_is_running("non_existent_container") + self.assertFalse(is_running) + + # Start a container + exit_code = container.run_container( + image=FBOSS_BUILDER_IMAGE, + command=["sleep", "1"], + ephemeral=False, + name="test_running_container", + ) + self.assertEqual(exit_code, 0) + + # Check if container is running + is_running = container.container_is_running("test_running_container") + self.assertTrue(is_running) + + # Stop and remove the container + container.stop_and_remove_container(name="test_running_container") + + # Check if container is stopped and removed + is_running = container.container_is_running("test_running_container") + self.assertFalse(is_running) + + def test_stop_and_remove_container(self): + """Test stopping and removing a container.""" + # Start a container + exit_code = container.run_container( + image=FBOSS_BUILDER_IMAGE, + command=["sleep", "1"], + ephemeral=False, + name="test_stop_and_remove_container", + ) + self.assertEqual(exit_code, 0) + + # Check if container is running + is_running = container.container_is_running("test_stop_and_remove_container") + self.assertTrue(is_running) + + # Stop and remove the container + exit_code = container.stop_and_remove_container( + name="test_stop_and_remove_container" + ) + self.assertEqual(exit_code, 0) + + # Check if container is stopped and removed + is_running = container.container_is_running("test_stop_and_remove_container") + self.assertFalse(is_running) + if __name__ == "__main__": unittest.main() diff --git a/fboss-image/distro_infra/Dockerfile b/fboss-image/distro_infra/Dockerfile index 0fc9d291fd083..067f114458259 100644 --- a/fboss-image/distro_infra/Dockerfile +++ b/fboss-image/distro_infra/Dockerfile @@ -4,7 +4,7 @@ RUN dnf install -y 'dnf-command(config-manager)' && \ dnf config-manager --set-enabled crb && \ dnf install -y epel-release epel-next-release && \ dnf install -y --allowerasing \ - wget curl tcpdump zstd iputils which net-tools iproute \ + wget curl tcpdump zstd iputils which net-tools iproute jq \ man dnsmasq vim nginx procps-ng && \ dnf clean all && rm -rf /var/cache/dnf @@ -15,6 +15,7 @@ COPY parts/ipxe/ipxev4.efi /distro_infra COPY parts/ipxe/ipxev6.efi /distro_infra COPY parts/autoexec.ipxe /distro_infra COPY parts/nginx.conf /distro_infra +COPY parts/getip.sh /distro_infra RUN mkdir -p /distro_infra/persistent WORKDIR /distro_infra/persistent diff --git a/fboss-image/distro_infra/distro_infra.sh b/fboss-image/distro_infra/distro_infra.sh index 43fd2059ea3da..48c17260cab8a 100755 --- a/fboss-image/distro_infra/distro_infra.sh +++ b/fboss-image/distro_infra/distro_infra.sh @@ -2,6 +2,7 @@ INTERFACE="" PERSIST_DIR="" +DISTRO_CONTAINER_NAME="fboss-distro-infra" # This should match DISTRO_CONTAINER_NAME in distro_cli/cmds/device.py help() { echo "Usage: $0 --intf --persist-dir " @@ -55,7 +56,11 @@ fi mkdir -p "${PERSIST_DIR}" +# Write interface name to persistent directory so it can be read by distro_cli +echo -n "${INTERFACE}" >"${PERSIST_DIR}/interface_name.txt" + # Run the Docker container with the parsed arguments docker run --rm -it --network host --cap-add=NET_ADMIN \ --volume "$(realpath "${PERSIST_DIR}")":/distro_infra/persistent:rw \ + --name "${DISTRO_CONTAINER_NAME}" \ fboss_distro_infra /distro_infra/run_distro_infra.sh --intf "${INTERFACE}" diff --git a/fboss-image/distro_infra/parts/getip.sh b/fboss-image/distro_infra/parts/getip.sh new file mode 100755 index 0000000000000..33082866af4fb --- /dev/null +++ b/fboss-image/distro_infra/parts/getip.sh @@ -0,0 +1,349 @@ +#!/bin/bash + +# getip.sh - MAC Address to IP Resolution Utility (JSON Output) +# +# Description: +# Resolves IP addresses (IPv4/IPv6) from MAC addresses using the kernel's +# neighbor table (ARP/NDP cache). Supports optional network interface filtering. +# Returns results in JSON format. +# +# Usage: +# getip.sh [INTERFACE] +# +# Algorithm: +# 1. Check neighbor table for existing MAC-to-IP mappings +# 2. If found: Ping specific IPs to verify and refresh the mapping +# 3. If not found: Ping broadcast (IPv4) and multicast (IPv6) to discover devices +# 4. Wait for neighbor table to update (1 second) +# 5. Query neighbor table again and return the IP addresses +# +# Output Format (JSON): +# Success: +# { +# "mac": "aa:bb:cc:dd:ee:ff", +# "interface": "eth0", # Optional, if interface specified +# "ipv4": "192.168.1.100", # Optional, if IPv4 found +# "ipv6": "fe80::1" # Optional, if IPv6 found +# } +# +# Error (MAC not found): +# { +# "mac": "aa:bb:cc:dd:ee:ff", +# "error_code": "MAC_NOT_FOUND", +# "error": "MAC address not found in ip neighbor table." +# } +# +# Error (Invalid arguments): +# { +# "error_code": "INVALID_ARGUMENTS", +# "error": "MAC address argument required. Use -h for help." +# } +# +# Error (Command failed): +# { +# "error_code": "COMMAND_FAILED", +# "error": "Command 'ip -4 neighbor show dev eth99' failed: Device \"eth99\" does not exist." +# } +# +# Error Codes: +# MAC_NOT_FOUND - The specified MAC address was not found in the neighbor table +# INVALID_ARGUMENTS - Missing or invalid command-line arguments +# COMMAND_FAILED - A system command (ip) failed to execute (command included in error message) +# +# Exit Codes: +# 0 - Success: IP address found and returned +# 1 - Error: MAC address not found in neighbor table or command failed +# 2 - Error: Invalid arguments or missing MAC address +# +# Dependencies: +# - iproute (ip command) +# - iputils (ping, ping6 commands) +# - jq (JSON processor) + +print_usage() { + cat < [INTERFACE] + +Get the IP address associated with a MAC address and an optional interface +from the ip neighbor table. Returns results in JSON format. + +Arguments: + MAC_ADDRESS The MAC address to look up (e.g., aa:bb:cc:dd:ee:ff) + INTERFACE (Optional) The network interface to filter the search (e.g., eth0) + +Options: + -h Show this help message and exit + +Output: + JSON object containing mac, ipv4, ipv6, and optional interface fields. + On error, returns JSON with error_code and error fields. Possible error codes: + - INVALID_ARGUMENTS + - MAC_NOT_FOUND + - COMMAND_FAILED + +Examples: + $(basename "$0") aa:bb:cc:dd:ee:ff eth0 + $(basename "$0") aa:bb:cc:dd:ee:ff + $(basename "$0") -h + +EOF +} + +# Helper function to build JSON error output +# Args: $1=error_code, $2=error_message, $3=MAC address (optional) +build_error_json() { + local error_code="$1" + local error_msg="$2" + local mac="$3" + + # Build JSON using jq + jq -n \ + --arg mac "$mac" \ + --arg error_code "$error_code" \ + --arg error "$error_msg" \ + '(if $mac != "" then {mac: $mac} else {} end) + + {error_code: $error_code, error: $error}' +} + +# Helper function to build JSON output for successful MAC-to-IP resolution +# Args: $1=MAC address, $2=interface (optional), $3=IPv4 address (optional), $4=IPv6 address (optional) +build_success_json() { + local mac="$1" + local interface="$2" + local ipv4="$3" + local ipv6="$4" + + # Build JSON using jq + jq -n \ + --arg mac "$mac" \ + --arg interface "$interface" \ + --arg ipv4 "$ipv4" \ + --arg ipv6 "$ipv6" \ + '{mac: $mac} + + (if $interface != "" then {interface: $interface} else {} end) + + (if $ipv4 != "" then {ipv4: $ipv4} else {} end) + + (if $ipv6 != "" then {ipv6: $ipv6} else {} end)' +} + +# Get IPv4 broadcast address from local interface configs or device (if specified) +get_ipv4_broadcast() { + local target_intf="$1" + local broadcast_ip="" + local dev_option="" + + if [ -n "$target_intf" ]; then + dev_option="dev $target_intf" + fi + + # Capture both stdout and stderr + local tmp_output="/tmp/getip_broadcast_$$" + local tmp_error="/tmp/getip_broadcast_err_$$" + local cmd="ip -4 addr show ${dev_option}" + + ip -4 addr show ${dev_option} >"$tmp_output" 2>"$tmp_error" + local exit_code=$? + + if [ $exit_code -ne 0 ]; then + # Command failed, return error in JSON format + local error_msg="" + error_msg=$(cat "$tmp_error" 2>/dev/null || echo "Failed to get IPv4 broadcast address") + rm -f "$tmp_output" "$tmp_error" + build_error_json "COMMAND_FAILED" "Command '$cmd' failed: $error_msg" + return 1 + fi + + broadcast_ip=$(grep -oP 'brd \K[\d.]+' "$tmp_output" | head -n 1) + rm -f "$tmp_output" "$tmp_error" + + echo "$broadcast_ip" +} + +# Get link-local multicast address for IPv6 +get_ipv6_multicast() { + # Use all-nodes multicast address + echo "ff02::1" +} + +# Helper function to get IP from neighbor table for a given MAC address +# Args: $1=IP version (4 or 6), $2=MAC address, $3=dev_option (optional) +get_ip_from_neighbor() { + local ip_version="$1" + local target_mac="$2" + local dev_option="$3" + + # Capture both stdout and stderr + local tmp_output="/tmp/getip_neighbor_${ip_version}_$$" + local tmp_error="/tmp/getip_neighbor_err_${ip_version}_$$" + local cmd="ip -${ip_version} neighbor show ${dev_option}" + + ip -"${ip_version}" neighbor show ${dev_option} >"$tmp_output" 2>"$tmp_error" + local exit_code=$? + + if [ $exit_code -ne 0 ]; then + # Command failed, return error in JSON format + local error_msg="" + error_msg=$(cat "$tmp_error" 2>/dev/null || echo "Failed to query IP neighbor table") + rm -f "$tmp_output" "$tmp_error" + build_error_json "COMMAND_FAILED" "Command '$cmd' failed: $error_msg" + return 1 + fi + + local result="" + result=$(grep -i "lladdr $target_mac" "$tmp_output" | awk '{print $1}' | head -n 1) + rm -f "$tmp_output" "$tmp_error" + + echo "$result" +} + +# Helper function to ping an IP address with optional interface +# Args: $1=IP address, $2=interface (optional), $3=additional options (optional) +ping_ip() { + local ip_addr="$1" + local target_intf="$2" + local extra_options="$3" + local ping_cmd="ping" + local ping_options="-c 1 -w 1 -q" + + # Determine if IPv6 based on presence of colon in IP + if [[ $ip_addr =~ : ]]; then + ping_cmd="ping6" + fi + + # Add extra options if provided (e.g., -b for broadcast) + if [ -n "$extra_options" ]; then + ping_options="$extra_options $ping_options" + fi + + # Add interface option if provided + if [ -n "$target_intf" ]; then + ping_options="$ping_options -I $target_intf" + fi + + # Ping the IP address. Suppress output and errors. + $ping_cmd $ping_options "$ip_addr" >/dev/null 2>&1 +} + +# Check if an IP is IPv6 +is_ipv6() { + local ip="$1" + [[ $ip =~ : ]] +} + +get_ip_from_mac() { + local target_mac="$1" + local target_intf="$2" # Optional interface argument + + # Build device option for ip commands + local dev_option="" + if [ -n "$target_intf" ]; then + dev_option="dev $target_intf" + fi + + # Step 1: Check the neighbor table for existing entries (both IPv4 and IPv6) + # Check for IPv4 entry + local existing_ipv4="" + existing_ipv4=$(get_ip_from_neighbor 4 "$target_mac" "$dev_option") + + # Check if IPv4 query returned an error + if echo "$existing_ipv4" | grep -q '"error_code"'; then + echo "$existing_ipv4" + return 1 + fi + + # Check for IPv6 entry + local existing_ipv6="" + existing_ipv6=$(get_ip_from_neighbor 6 "$target_mac" "$dev_option") + + # Check if IPv6 query returned an error + if echo "$existing_ipv6" | grep -q '"error_code"'; then + echo "$existing_ipv6" + return 1 + fi + + if [ -n "$existing_ipv4" ] || [ -n "$existing_ipv6" ]; then + # Entry exists, ping the specific IP(s) to verify the MAC-IP mapping + [ -n "$existing_ipv4" ] && ping_ip "$existing_ipv4" "$target_intf" + [ -n "$existing_ipv6" ] && ping_ip "$existing_ipv6" "$target_intf" + else + # Entry doesn't exist, ping the broadcast/multicast addresses + + # Ping IPv4 broadcast if we have one + local broadcast_ipv4="" + broadcast_ipv4=$(get_ipv4_broadcast "$target_intf") + + # Check if broadcast query returned an error + if echo "$broadcast_ipv4" | grep -q '"error_code"'; then + echo "$broadcast_ipv4" + return 1 + fi + + [ -n "$broadcast_ipv4" ] && ping_ip "$broadcast_ipv4" "$target_intf" "-b" + + # Ping IPv6 multicast address + local multicast_ipv6="" + multicast_ipv6=$(get_ipv6_multicast) + ping_ip "$multicast_ipv6" "$target_intf" + fi + + # Wait a moment for the neighbor table to update + sleep 1 + + # Step 2: Check the neighbor table again and return all IPs which match the MAC + # Get IPv4 address + local ipv4_addr="" + ipv4_addr=$(get_ip_from_neighbor 4 "$target_mac" "$dev_option") + + # Check if IPv4 query returned an error + if echo "$ipv4_addr" | grep -q '"error_code"'; then + echo "$ipv4_addr" + return 1 + fi + + # Get IPv6 address + local ipv6_addr="" + ipv6_addr=$(get_ip_from_neighbor 6 "$target_mac" "$dev_option") + + # Check if IPv6 query returned an error + if echo "$ipv6_addr" | grep -q '"error_code"'; then + echo "$ipv6_addr" + return 1 + fi + + # Build and return JSON output + build_success_json "$target_mac" "$target_intf" "$ipv4_addr" "$ipv6_addr" +} + +# Parse arguments +if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then + print_usage + exit 0 +fi + +if [ -z "$1" ]; then + build_error_json "INVALID_ARGUMENTS" "MAC address argument required. Use -h for help." + exit 2 +fi + +# Get the IP address for the provided MAC address (and optional interface) +result_json=$(get_ip_from_mac "$@") + +# Parse the JSON to check if an IP was found or if there's an error +if echo "$result_json" | grep -q '"error_code"'; then + # Already has an error (e.g., COMMAND_FAILED), pass it through + echo "$result_json" + exit 1 +elif echo "$result_json" | grep -qE '"ipv4"|"ipv6"'; then + # Success case - found at least one IP address + echo "$result_json" + exit 0 +else + # No IP found and no error - this is MAC_NOT_FOUND + if [ -n "$2" ]; then + error_msg="MAC address $1 not found in ip neighbor table on interface $2." + else + error_msg="MAC address $1 not found in ip neighbor table." + fi + + build_error_json "MAC_NOT_FOUND" "$error_msg" "$1" + exit 1 +fi From 0b14af0c0840474cf797f7555dc18fa821e68916 Mon Sep 17 00:00:00 2001 From: Travis Brown Date: Wed, 4 Mar 2026 00:37:27 +0000 Subject: [PATCH 2/2] Distro CLI image command The fboss-image device image command configures the Distro Infrastructure to PXE boot the given image file for the given device on the next reboot. This replaces the manual steps previously necessary with the MVP Distro Infrastucture. Testing: ``` $ ./fboss-image device dc:da:4d:fc:ad:2d image nh-fboss-broadcom-xgs.tar [0.00s] Setting image for device dc:da:4d:fc:ad:2d: nh-fboss-broadcom-xgs.tar [0.07s] Using persistent directory: /home/travisb/work/upstream/fboss/fboss-image/distro_infra/data [0.07s] Extracting image tarball to /home/travisb/work/upstream/fboss/fboss-image/distro_infra/data/dc-da-4d-fc-ad-2d... [1.96s] Image extracted successfully to /home/travisb/work/upstream/fboss/fboss-image/distro_infra/data/dc-da-4d-fc-ad-2d [1.96s] Enabling PXE boot for MAC address: dc-da-4d-fc-ad-2d [2.09s] Successfully configured device dc:da:4d:fc:ad:2d with image nh-fboss-broadcom-xgs.tar [2.09s] Device is ready for PXE boot ``` Then watch the switch PXE boot on the next boot: ``` >>Checking Media Presence...... >>Media Present...... >>Start PXE over IPv6 on MAC: DC-DA-4D-FC-AD-2D. Press ESC key to abort PXE boot.. Station IP address is FC00:33:0:0:0:FB05:5030:774C Server IP address is FC00:33:0:0:0:0:0:1000 NBP filename is ipxev6.efi NBP filesize is 1158144 Bytes >>Checking Media Presence...... >>Media Present...... Downloading NBP file... NBP file downloaded successfully. iPXE initialising devices... ``` --- fboss-image/distro_cli/cmds/device.py | 19 ++++- fboss-image/distro_cli/lib/distro_infra.py | 73 +++++++++++++++++++ fboss-image/distro_cli/tests/device_test.py | 59 +++++++++++++++ fboss-image/distro_cli/tests/test_helpers.py | 30 ++++++++ fboss-image/distro_infra/Dockerfile | 1 + .../distro_infra/parts/enable_pxeboot.sh | 66 +++++++++++++++++ .../distro_infra/parts/run_distro_infra.sh | 37 +--------- 7 files changed, 248 insertions(+), 37 deletions(-) create mode 100755 fboss-image/distro_infra/parts/enable_pxeboot.sh diff --git a/fboss-image/distro_cli/cmds/device.py b/fboss-image/distro_cli/cmds/device.py index 41801cdcdc330..b2872537919b3 100644 --- a/fboss-image/distro_cli/cmds/device.py +++ b/fboss-image/distro_cli/cmds/device.py @@ -10,11 +10,13 @@ import json import logging import os +import sys from distro_cli.lib.cli import validate_path from distro_cli.lib.distro_infra import ( DISTRO_INFRA_CONTAINER, GETIP_SCRIPT_CONTAINER_PATH, + deploy_image_to_device, get_interface_name, ) from distro_cli.lib.docker import container @@ -35,9 +37,22 @@ def image_upstream_command(args): def image_command(args): - """Set device image from file""" + """Set device image from file and configure PXE boot""" logger.info(f"Setting image for device {args.mac}: {args.image_path}") - logger.info("Device image command (stub)") + + try: + deploy_image_to_device(args.mac, args.image_path) + logger.info( + f"Successfully configured device {args.mac} with image {args.image_path}" + ) + logger.info("Device is ready for PXE boot") + + except DistroInfraError as e: + logger.error(f"Failed to configure device: {e}") + sys.exit(1) + except Exception as e: + logger.error(f"Unexpected error: {e}") + sys.exit(1) def reprovision_command(args): diff --git a/fboss-image/distro_cli/lib/distro_infra.py b/fboss-image/distro_cli/lib/distro_infra.py index fd53dc2db52fa..e585822d1408c 100644 --- a/fboss-image/distro_cli/lib/distro_infra.py +++ b/fboss-image/distro_cli/lib/distro_infra.py @@ -11,6 +11,7 @@ import logging import re import subprocess +import tarfile from pathlib import Path from distro_cli.lib.docker import container @@ -126,3 +127,75 @@ def find_persistent_dir() -> Path: ) from e except (json.JSONDecodeError, KeyError, IndexError) as e: raise DistroInfraError(f"Failed to parse container inspect data: {e}") from e + + +def enable_pxe_boot(mac: str) -> None: + """Enable PXE boot for a device MAC address. + + This creates the necessary directory structure and configuration files + to enable PXE boot for the specified MAC address by calling the + enable_pxeboot.sh script inside the container. + + Args: + mac: MAC address of the device + + Raises: + DistroInfraError: If operation fails + """ + dash_mac, _ = normalize_mac_address(mac) + logger.info(f"Enabling PXE boot for MAC address: {dash_mac}") + + # Call the enable_pxeboot.sh script inside the container + exit_code, stdout, stderr = container.exec_in_container( + DISTRO_INFRA_CONTAINER, + ["/distro_infra/enable_pxeboot.sh", dash_mac], + ) + + if exit_code != 0: + raise DistroInfraError(f"Failed to enable PXE boot for {dash_mac}: {stderr}") + + # Log the output from the script + if stdout: + for line in stdout.strip().split("\n"): + logger.debug(f"enable_pxeboot.sh: {line}") + + +def deploy_image_to_device(mac: str, image_path: str) -> None: + """Deploy an image to a device for PXE boot. + + This function only supports tarball images (.tar, .tar.gz, .tar.zst, etc.) + as produced by the image builder. + + Args: + mac: MAC address of the device + image_path: Path to the image tarball + + Raises: + DistroInfraError: If operation fails or image format is unsupported + """ + image_path_obj = Path(image_path) + + if not image_path_obj.exists(): + raise DistroInfraError(f"Image path not found: {image_path}") + + if not tarfile.is_tarfile(image_path_obj): + raise DistroInfraError( + f"Unsupported image format: {image_path}. " + "Only tarball images (.tar, .tar.gz, .tar.zst) are supported." + ) + + persistent_dir = find_persistent_dir() + logger.info(f"Using persistent directory: {persistent_dir}") + + dash_mac, _ = normalize_mac_address(mac) + mac_dir = persistent_dir / dash_mac + + logger.info(f"Extracting image tarball to {mac_dir}...") + try: + with tarfile.open(image_path_obj, "r") as tar: + tar.extractall(path=mac_dir, filter="data") + logger.info(f"Image extracted successfully to {mac_dir}") + except tarfile.TarError as e: + raise DistroInfraError(f"Failed to extract tarball: {e}") from e + + enable_pxe_boot(mac) diff --git a/fboss-image/distro_cli/tests/device_test.py b/fboss-image/distro_cli/tests/device_test.py index a9457ec0811a6..afab271e96990 100644 --- a/fboss-image/distro_cli/tests/device_test.py +++ b/fboss-image/distro_cli/tests/device_test.py @@ -30,11 +30,14 @@ ) from distro_cli.lib.distro_infra import DISTRO_INFRA_CONTAINER from distro_cli.lib.docker import container +from distro_cli.tests.test_helpers import waitfor class TestDeviceCommands(unittest.TestCase): """Test device command group and subcommands""" + IPXE_FILES = ("ipxev4.efi", "ipxev6.efi", "autoexec.ipxe") + @classmethod def setUpClass(cls): """Set up test container before all tests""" @@ -92,6 +95,62 @@ def tearDownClass(cls): shutil.rmtree(cls.container_temp_dir, ignore_errors=True) + def setup_image_command_test(self): + """Wait for container to create PXE boot infrastructure.""" + cache_dir = self.container_persistent_dir / "cache" + + waitfor( + cache_dir.exists, + lambda: self.fail("Timed out waiting for cache directory to be created"), + ) + + for filename in self.IPXE_FILES: + cache_file = cache_dir / filename + waitfor( + cache_file.exists, + lambda f=filename: self.fail( + f"Timed out waiting for {f} to be created" + ), + ) + + def verify_image_command_common(self, mac): + """Verify common PXE boot infrastructure created by image command""" + dash_mac = mac.replace(":", "-") + mac_dir = self.container_persistent_dir / dash_mac + + self.assertTrue(mac_dir.exists()) + self.assertTrue(mac_dir.is_dir()) + + for ipxe_file in self.IPXE_FILES: + ipxe_path = mac_dir / ipxe_file + self.assertTrue(ipxe_path.exists()) + + pxeboot_marker = mac_dir / "pxeboot_complete" + self.assertTrue(pxeboot_marker.exists()) + + ipxev6_serverip = mac_dir / "ipxev6.efi-serverip" + if ipxev6_serverip.exists(): + content = ipxev6_serverip.read_text() + self.assertIn("#!ipxe", content) + self.assertIn("set server_ip", content) + + result = subprocess.run( + [ + "docker", + "exec", + DISTRO_INFRA_CONTAINER, + "cat", + f"/distro_infra/dnsmasq_conf.d/{dash_mac}", + ], + capture_output=True, + check=False, + text=True, + ) + self.assertEqual(result.returncode, 0) + self.assertIn(mac, result.stdout) + + return mac_dir + def setUp(self): """Set up test fixtures""" self.test_mac = "aa:bb:cc:dd:ee:ff" diff --git a/fboss-image/distro_cli/tests/test_helpers.py b/fboss-image/distro_cli/tests/test_helpers.py index 3edcccbf81676..ea4ce2111dbe7 100644 --- a/fboss-image/distro_cli/tests/test_helpers.py +++ b/fboss-image/distro_cli/tests/test_helpers.py @@ -11,6 +11,7 @@ import os import shutil import tempfile +import time from collections.abc import Generator from contextlib import contextmanager from pathlib import Path @@ -97,3 +98,32 @@ def override_artifact_store_dir(store_dir: Path) -> Generator[None, None, None]: yield finally: ArtifactStore.ARTIFACT_STORE_DIR = original + + +def waitfor(condition_fn, assert_fn, timeout=60.0, interval=0.1): + """Wait for a condition to become true with timeout. + + Repeatedly checks a condition function until it returns True or the timeout + expires. If the timeout is reached, calls the assert function to fail the test. + + Args: + condition_fn: Callable that returns True when the condition is met + assert_fn: Callable to invoke if timeout expires (should fail the test) + timeout: Maximum time to wait in seconds (default: 60.0) + interval: Time to sleep between checks in seconds (default: 0.1) + + Example: + waitfor( + lambda: cache_dir.exists, + lambda: self.fail("Cache directory not created"), + timeout=30.0 + ) + """ + start = time.time() + while True: + if condition_fn(): + return + if time.time() - start > timeout: + assert_fn() + raise AssertionError("assert_fn should not have returned!") + time.sleep(interval) diff --git a/fboss-image/distro_infra/Dockerfile b/fboss-image/distro_infra/Dockerfile index 067f114458259..8a58b5033eb2e 100644 --- a/fboss-image/distro_infra/Dockerfile +++ b/fboss-image/distro_infra/Dockerfile @@ -11,6 +11,7 @@ RUN dnf install -y 'dnf-command(config-manager)' && \ RUN mkdir -p /distro_infra/dnsmasq_conf.d COPY parts/run_distro_infra.sh /distro_infra COPY parts/post_tftp.sh /distro_infra +COPY parts/enable_pxeboot.sh /distro_infra COPY parts/ipxe/ipxev4.efi /distro_infra COPY parts/ipxe/ipxev6.efi /distro_infra COPY parts/autoexec.ipxe /distro_infra diff --git a/fboss-image/distro_infra/parts/enable_pxeboot.sh b/fboss-image/distro_infra/parts/enable_pxeboot.sh new file mode 100755 index 0000000000000..e7fe8b86a9040 --- /dev/null +++ b/fboss-image/distro_infra/parts/enable_pxeboot.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Enable PXE boot for a device MAC address +# This script is run inside the distro_infra container + +set -euo pipefail + +if [ $# -ne 1 ]; then + echo "Usage: $0 " >&2 + echo " mac_address: MAC address in dash format (aa-bb-cc-dd-ee-ff)" >&2 + exit 1 +fi + +DASH_MAC="$1" + +# Validate dash format MAC address +if ! echo "$DASH_MAC" | grep -qE '^[0-9a-f]{2}(-[0-9a-f]{2}){5}$'; then + echo "Error: Invalid MAC address: $DASH_MAC" >&2 + echo "Expected format: aa-bb-cc-dd-ee-ff" >&2 + exit 1 +fi + +echo "Enabling PXE boot for MAC address: $DASH_MAC" + +MAC_DIR="/distro_infra/persistent/$DASH_MAC" +mkdir -p "$MAC_DIR" +chmod 777 "$MAC_DIR" + +CACHE_DIR="/distro_infra/persistent/cache" + +for filename in ipxev4.efi ipxev6.efi autoexec.ipxe; do + SRC="$CACHE_DIR/$filename" + DST="$MAC_DIR/$filename" + rm -f "$DST" + ln "$SRC" "$DST" +done + +touch "$MAC_DIR/pxeboot_complete" + +INTERFACE_FILE="/distro_infra/persistent/interface_name.txt" +if [ ! -f "$INTERFACE_FILE" ]; then + echo "Error: Interface file not found: $INTERFACE_FILE" >&2 + exit 1 +fi + +INTERFACE=$(cat "$INTERFACE_FILE") +if [ -z "$INTERFACE" ]; then + echo "Error: Interface file is empty: $INTERFACE_FILE" >&2 + exit 1 +fi + +IPV6=$(ip -6 addr show dev "$INTERFACE" scope global | awk -F '[[:space:]/]+' '/inet6/{print $3; exit}') +if [ -n "$IPV6" ]; then + cat >"$MAC_DIR/ipxev6.efi-serverip" <"/distro_infra/dnsmasq_conf.d/$DASH_MAC" + +echo "PXE boot enabled for $DASH_MAC" +echo "MAC directory: $MAC_DIR" diff --git a/fboss-image/distro_infra/parts/run_distro_infra.sh b/fboss-image/distro_infra/parts/run_distro_infra.sh index d4c90f866f80c..c6a435ba6b5a1 100755 --- a/fboss-image/distro_infra/parts/run_distro_infra.sh +++ b/fboss-image/distro_infra/parts/run_distro_infra.sh @@ -79,38 +79,5 @@ dnsmasq --interface="${INTERFACE}" --no-daemon \ --dhcp-range=tag:fbossdut,::fb05:5000:0001,::fb05:50ff:ffff,constructor:"$INTERFACE",5m \ --dhcp-option=tag:fbossdut,option6:bootfile-url,tftp://["${v6_ip}"]/ipxev6.efi & -sleep 2 # Wait for dnsmasq log spew - -# Loop asking the user for a MAC address, then creating the appropriate configuration files. Exiting the loop on an -# empty MAC -while read -rp "Enter MAC address (blank to exit): " mac; do - if [[ ${#mac} -eq 0 ]]; then - break - elif [[ ${#mac} -ne 17 ]]; then - echo "Invalid MAC address" - continue - fi - - dashmac=$(echo "$mac" | tr '[:upper:]:' '[:lower:]-') - colonmac=$(echo "$dashmac" | tr '-' ':') - - mkdir -m 777 "/distro_infra/persistent/${dashmac}" 2>/dev/null - ln -f /distro_infra/persistent/cache/ipxev4.efi "/distro_infra/persistent/${dashmac}/ipxev4.efi" - ln -f /distro_infra/persistent/cache/ipxev6.efi "/distro_infra/persistent/${dashmac}/ipxev6.efi" - ln -f /distro_infra/persistent/cache/autoexec.ipxe "/distro_infra/persistent/${dashmac}/autoexec.ipxe" - touch "/distro_infra/persistent/${dashmac}/pxeboot_complete" - - # IPv6 - # When booting over IPv6, iPXE only receives a fully-formed bootfile-url DHCPv6 option and it appears there is no - # way to give just iPXE other options. bootfile-url becomes the iPXE ${filename} setting, but is a full URL and iPXE - # scripting is not powerful enough to extract just the server IP from it so we can use HTTP downloading for the - # large artifacts. Thus we autogenerate this iPXE script simply to set the server IP to be used by autoexec.ipxe. - echo "#!ipxe" >"/distro_infra/persistent/${dashmac}/ipxev6.efi-serverip" - echo "set server_ip [${v6_ip}]" >>"/distro_infra/persistent/${dashmac}/ipxev6.efi-serverip" - echo "imgexec autoexec.ipxe" >>"/distro_infra/persistent/${dashmac}/ipxev6.efi-serverip" - - # Activate IPv4 and IPv6 - echo "${colonmac},id:*,set:fbossdut" >"/distro_infra/dnsmasq_conf.d/${dashmac}" - - sleep 1 # Wait for dnsmasq log spew -done +# Block on dnsmasq running in the background +wait