From dfdd26ef913fdde9e4e84af850d6ad3dae7bca12 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 27 Mar 2026 14:23:34 +0100 Subject: [PATCH 01/14] ptyfwd: Add transparent flag --- src/shared/ptyfwd.c | 29 ++++++++++++++++------------- src/shared/ptyfwd.h | 3 +++ 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/shared/ptyfwd.c b/src/shared/ptyfwd.c index 2e8d77dee1c43..88cfd596d0508 100644 --- a/src/shared/ptyfwd.c +++ b/src/shared/ptyfwd.c @@ -669,19 +669,22 @@ static int do_shovel(PTYForward *f) { f->stdin_event_source = sd_event_source_unref(f->stdin_event_source); } else { - /* Check if ^] has been pressed three times within one second. If we get this we quite - * immediately. */ - RequestOperation q = look_for_escape(f, f->in_buffer + f->in_buffer_full, k); - f->in_buffer_full += (size_t) k; - if (q < 0) - return q; - if (q == REQUEST_EXIT) - return -ECANCELED; - if (q >= REQUEST_HOTKEY_A && q <= REQUEST_HOTKEY_Z && f->hotkey_handler) { - r = f->hotkey_handler(f, q - REQUEST_HOTKEY_BASE, f->hotkey_userdata); - if (r < 0) - return r; - } + if (!FLAGS_SET(f->flags, PTY_FORWARD_TRANSPARENT)) { + /* Check if ^] has been pressed three times within one second. If we get this we quit + * immediately. */ + RequestOperation q = look_for_escape(f, f->in_buffer + f->in_buffer_full, k); + f->in_buffer_full += (size_t) k; + if (q < 0) + return q; + if (q == REQUEST_EXIT) + return -ECANCELED; + if (q >= REQUEST_HOTKEY_A && q <= REQUEST_HOTKEY_Z && f->hotkey_handler) { + r = f->hotkey_handler(f, q - REQUEST_HOTKEY_BASE, f->hotkey_userdata); + if (r < 0) + return r; + } + } else + f->in_buffer_full += (size_t) k; } did_something = true; diff --git a/src/shared/ptyfwd.h b/src/shared/ptyfwd.h index 1c1246f37f163..f92676dabe3e8 100644 --- a/src/shared/ptyfwd.h +++ b/src/shared/ptyfwd.h @@ -17,6 +17,9 @@ typedef enum PTYForwardFlags { /* Don't tint the background, or set window title */ PTY_FORWARD_DUMB_TERMINAL = 1 << 3, + + /* Don't interpret escape sequences (^] exit, hotkeys), just forward everything as-is */ + PTY_FORWARD_TRANSPARENT = 1 << 4, } PTYForwardFlags; typedef int (*PTYForwardHangupHandler)(PTYForward *f, int rcode, void *userdata); From d6125929ef73fea6dd3e9261b572fecbc29ae2ff Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 27 Mar 2026 14:24:00 +0100 Subject: [PATCH 02/14] vmspawn: Use qemu config file for smp and memory Pass -no-user-config while we're at it to avoid loading qemu config from /etc which is more likely to cause hard to debug issues rather than do something useful. --- src/vmspawn/vmspawn.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 02f2b0df2e08a..466b0baceefd8 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -2292,6 +2292,16 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { return r; } + r = qemu_config_section(config_file, "smp-opts", /* id= */ NULL, + "cpus", arg_cpus ?: "1"); + if (r < 0) + return r; + + r = qemu_config_section(config_file, "memory", /* id= */ NULL, + "size", mem); + if (r < 0) + return r; + r = qemu_config_section(config_file, "object", "rng0", "qom-type", "rng-random", "filename", "/dev/urandom"); @@ -2333,8 +2343,7 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { /* Start building the cmdline for items that must remain as command line arguments */ cmdline = strv_new(qemu_binary, - "-smp", arg_cpus ?: "1", - "-m", mem); + "-no-user-config"); if (!cmdline) return log_oom(); From 6a1d44dde4972a2ffe8734e327fda89c7724c4c9 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 27 Mar 2026 14:25:26 +0100 Subject: [PATCH 03/14] vmspawn: Beef up --console=gui Improvements taken from mkosi. While we're at it, put more of the stuff in config files. --- src/vmspawn/vmspawn.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 466b0baceefd8..2b5c3da5125b5 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -2578,11 +2578,27 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { } case CONSOLE_GUI: - /* -vga is a convenience option, keep on cmdline */ - r = strv_extend_many(&cmdline, "-vga", "virtio"); + /* -display has no config file equivalent */ + r = strv_extend_many(&cmdline, "-display", "sdl,gl=auto", "-vga", "none"); if (r < 0) return log_oom(); + r = qemu_config_section(config_file, "device", "vga0", + "driver", "virtio-vga"); + if (r < 0) + return r; + + r = qemu_config_section(config_file, "audiodev", "audio0", + "driver", "default"); + if (r < 0) + return r; + + r = qemu_config_section(config_file, "device", "virtio-sound0", + "driver", "virtio-sound-pci", + "audiodev", "audio0"); + if (r < 0) + return r; + r = qemu_config_section(config_file, "device", "virtio-serial0", "driver", "virtio-serial"); if (r < 0) From a57b215f3e6a3e5d195f69359569898b09ab32e7 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 27 Mar 2026 14:26:16 +0100 Subject: [PATCH 04/14] vmspawn: Beef up credential handling If we're direct kernel booting on arm, smbios won't work. Add a fwcfg and kernel command line fallback in case smbios isn't available. --- src/vmspawn/vmspawn-util.h | 6 +++ src/vmspawn/vmspawn.c | 83 ++++++++++++++++++++++++++++++-------- 2 files changed, 72 insertions(+), 17 deletions(-) diff --git a/src/vmspawn/vmspawn-util.h b/src/vmspawn/vmspawn-util.h index 90efd93661224..9111bbab3423e 100644 --- a/src/vmspawn/vmspawn-util.h +++ b/src/vmspawn/vmspawn-util.h @@ -33,6 +33,12 @@ # define ARCHITECTURE_SUPPORTS_HPET 0 #endif +#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) +# define ARCHITECTURE_SUPPORTS_FW_CFG 1 +#else +# define ARCHITECTURE_SUPPORTS_FW_CFG 0 +#endif + #if defined(__x86_64__) || defined(__i386__) # define QEMU_MACHINE_TYPE "q35" #elif defined(__arm__) || defined(__aarch64__) || defined(__riscv) || defined(__loongarch64) || defined(__m68k__) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 2b5c3da5125b5..889711b0c5614 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -3069,10 +3069,6 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return log_error_errno(r, "Failed to create temporary directory: %m"); - r = cmdline_add_kernel_cmdline(&cmdline, kernel, smbios_dir); - if (r < 0) - return r; - r = cmdline_add_smbios11(&cmdline, smbios_dir); if (r < 0) return r; @@ -3275,16 +3271,19 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { return log_error_errno(r, "Failed to set credential systemd.unit-dropin.sshd-vsock@.service: %m"); } - if (ARCHITECTURE_SUPPORTS_SMBIOS) - FOREACH_ARRAY(cred, arg_credentials.credentials, arg_credentials.n_credentials) { - _cleanup_free_ char *p = NULL, *cred_data_b64 = NULL; - ssize_t n; + FOREACH_ARRAY(cred, arg_credentials.credentials, arg_credentials.n_credentials) { + _cleanup_free_ char *cred_data_b64 = NULL; + ssize_t n; - n = base64mem(cred->data, cred->size, &cred_data_b64); - if (n < 0) - return log_oom(); + n = base64mem(cred->data, cred->size, &cred_data_b64); + if (n < 0) + return log_oom(); - p = path_join(smbios_dir, cred->id); + /* SMBIOS is always available on x86, but on ARM it requires UEFI firmware + * and does not work with direct kernel boot. */ + if (ARCHITECTURE_SUPPORTS_SMBIOS && + (IN_SET(native_architecture(), ARCHITECTURE_X86, ARCHITECTURE_X86_64) || !kernel)) { + _cleanup_free_ char *p = path_join(smbios_dir, cred->id); if (!p) return log_oom(); @@ -3295,14 +3294,64 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return log_error_errno(r, "Failed to write smbios credential file %s: %m", p); - r = strv_extend(&cmdline, "-smbios"); - if (r < 0) + if (strv_extend(&cmdline, "-smbios") < 0) return log_oom(); - r = strv_extend_joined(&cmdline, "type=11,path=", p); - if (r < 0) + if (strv_extend_joined(&cmdline, "type=11,path=", p) < 0) return log_oom(); - } + + } else if (ARCHITECTURE_SUPPORTS_FW_CFG) { + /* fw_cfg keys are limited to 55 characters */ + _cleanup_free_ char *key = strjoin("opt/io.systemd.credentials/", cred->id); + if (!key) + return log_oom(); + + if (strlen(key) <= 55) { + _cleanup_free_ char *p = path_join(smbios_dir, cred->id); + if (!p) + return log_oom(); + + r = write_data_file_atomic_at( + AT_FDCWD, p, + &IOVEC_MAKE(cred->data, cred->size), + WRITE_DATA_FILE_MODE_0400); + if (r < 0) + return log_error_errno(r, "Failed to write fw_cfg credential file %s: %m", p); + + if (strv_extend(&cmdline, "-fw_cfg") < 0) + return log_oom(); + + if (strv_extendf(&cmdline, "name=%s,file=%s", key, p) < 0) + return log_oom(); + + continue; + } + + /* Fall through to kernel command line if key is too long */ + log_debug("fw_cfg key '%s' exceeds 55 character limit, falling back to kernel command line.", key); + + if (!kernel) { + log_warning("Cannot pass credential '%s' to VM, fw_cfg key exceeds 55 character limit and no kernel for direct boot specified.", + cred->id); + continue; + } + + if (strv_extendf(&arg_kernel_cmdline_extra, + "systemd.set_credential_binary=%s:%s", cred->id, cred_data_b64) < 0) + return log_oom(); + + } else if (kernel) { + if (strv_extendf(&arg_kernel_cmdline_extra, + "systemd.set_credential_binary=%s:%s", cred->id, cred_data_b64) < 0) + return log_oom(); + } else + log_warning("Cannot pass credential '%s' to VM, native architecture doesn't support SMBIOS or fw_cfg and no kernel for direct boot specified.", + cred->id); + } + + r = cmdline_add_kernel_cmdline(&cmdline, kernel, smbios_dir); + if (r < 0) + return r; if (use_vsock) { notify_sock_fd = open_vsock(); From e754fa17b0f02c30d31863062ab7d4742475d0d0 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 27 Mar 2026 13:38:47 +0000 Subject: [PATCH 05/14] vmspawn: use PTY for native console to avoid QEMU O_NONBLOCK issue QEMU's stdio chardev sets O_NONBLOCK on both stdin and stdout (see chardev/char-stdio.c [1] and chardev/char-fd.c [2]). Since forked processes share file descriptions, and on a terminal all three stdio fds typically reference the same file description, this affects our own stdio too. Avoid this by using a PTY with chardev serial instead of chardev stdio for native console mode, matching the approach already used for interactive and read-only modes. The PTY forwarder shovels bytes transparently between our stdio and QEMU's PTY using the new PTY_FORWARD_DUMB_TERMINAL and PTY_FORWARD_TRANSPARENT flags, which disable terminal decoration (background tinting, window title, OSC context) and escape sequence handling (Ctrl-] exit, hotkeys) respectively. The chardev is configured with mux=on so the QEMU monitor remains accessible via Ctrl-a c. Also dedup CONSOLE_NATIVE, CONSOLE_READ_ONLY, and CONSOLE_INTERACTIVE handling by using fallthrough, with the only differences being the ptyfwd flags, mux setting, and monitor section. [1] https://gitlab.com/qemu-project/qemu/-/blob/master/chardev/char-stdio.c [2] https://gitlab.com/qemu-project/qemu/-/blob/master/chardev/char-fd.c Co-developed-by: Claude Opus 4.6 --- src/vmspawn/vmspawn.c | 91 +++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 50 deletions(-) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 889711b0c5614..b6594a6a4b2bc 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -2541,12 +2541,22 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { PTYForwardFlags ptyfwd_flags = 0; switch (arg_console_mode) { + case CONSOLE_NATIVE: + /* Use a PTY instead of chardev stdio to prevent QEMU from setting O_NONBLOCK on + * our stdio file descriptions (see qemu's chardev/char-stdio.c and char-fd.c). + * Use PTY_FORWARD_DUMB_TERMINAL|PTY_FORWARD_TRANSPARENT so the forwarder just + * shovels bytes without any terminal manipulation or escape sequence handling. */ + ptyfwd_flags |= PTY_FORWARD_DUMB_TERMINAL|PTY_FORWARD_TRANSPARENT; + + _fallthrough_; + case CONSOLE_READ_ONLY: - ptyfwd_flags |= PTY_FORWARD_READ_ONLY; + if (arg_console_mode == CONSOLE_READ_ONLY) + ptyfwd_flags |= PTY_FORWARD_READ_ONLY; _fallthrough_; - case CONSOLE_INTERACTIVE: { + case CONSOLE_INTERACTIVE: { _cleanup_free_ char *pty_path = NULL; master = openpt_allocate(O_RDWR|O_NONBLOCK, &pty_path); @@ -2562,9 +2572,11 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return r; + /* Enable mux for native console so the QEMU monitor is accessible via Ctrl-a c */ r = qemu_config_section(config_file, "chardev", "console", "backend", "serial", - "path", pty_path); + "path", pty_path, + "mux", on_off(arg_console_mode == CONSOLE_NATIVE)); if (r < 0) return r; @@ -2574,6 +2586,13 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return r; + if (arg_console_mode == CONSOLE_NATIVE) { + r = qemu_config_section(config_file, "mon", "mon0", + "chardev", "console"); + if (r < 0) + return r; + } + break; } @@ -2620,36 +2639,6 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { break; - case CONSOLE_NATIVE: - r = strv_extend_many(&cmdline, "-nographic", "-nodefaults"); - if (r < 0) - return log_oom(); - - r = qemu_config_section(config_file, "chardev", "console", - "backend", "stdio", - "mux", "on", - "signal", "off"); - if (r < 0) - return r; - - r = qemu_config_section(config_file, "device", "vmspawn-virtio-serial-pci", - "driver", "virtio-serial-pci"); - if (r < 0) - return r; - - r = qemu_config_section(config_file, "device", "virtconsole0", - "driver", "virtconsole", - "chardev", "console"); - if (r < 0) - return r; - - r = qemu_config_section(config_file, "mon", "mon0", - "chardev", "console"); - if (r < 0) - return r; - - break; - case CONSOLE_HEADLESS: r = strv_extend_many(&cmdline, "-nographic", "-nodefaults"); if (r < 0) @@ -3582,29 +3571,31 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { _cleanup_(osc_context_closep) sd_id128_t osc_context_id = SD_ID128_NULL; _cleanup_(pty_forward_freep) PTYForward *forward = NULL; if (master >= 0) { - if (!terminal_is_dumb()) { - r = osc_context_open_vm(arg_machine, /* ret_seq= */ NULL, &osc_context_id); - if (r < 0) - return r; - } - r = pty_forward_new(event, master, ptyfwd_flags, &forward); if (r < 0) return log_error_errno(r, "Failed to create PTY forwarder: %m"); - if (!arg_background) { - _cleanup_free_ char *bg = NULL; + if (!FLAGS_SET(ptyfwd_flags, PTY_FORWARD_DUMB_TERMINAL)) { + if (!terminal_is_dumb()) { + r = osc_context_open_vm(arg_machine, /* ret_seq= */ NULL, &osc_context_id); + if (r < 0) + return r; + } - r = terminal_tint_color(130 /* green */, &bg); - if (r < 0) - log_debug_errno(r, "Failed to determine terminal background color, not tinting."); - else - (void) pty_forward_set_background_color(forward, bg); - } else if (!isempty(arg_background)) - (void) pty_forward_set_background_color(forward, arg_background); + if (!arg_background) { + _cleanup_free_ char *bg = NULL; - (void) pty_forward_set_window_title(forward, GLYPH_GREEN_CIRCLE, /* hostname= */ NULL, - STRV_MAKE("Virtual Machine", arg_machine)); + r = terminal_tint_color(130 /* green */, &bg); + if (r < 0) + log_debug_errno(r, "Failed to determine terminal background color, not tinting."); + else + (void) pty_forward_set_background_color(forward, bg); + } else if (!isempty(arg_background)) + (void) pty_forward_set_background_color(forward, arg_background); + + (void) pty_forward_set_window_title(forward, GLYPH_GREEN_CIRCLE, /* hostname= */ NULL, + STRV_MAKE("Virtual Machine", arg_machine)); + } } r = sd_event_loop(event); From 44487c47b786db0829c1db609c1ee4c9b0f5caa8 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 27 Mar 2026 13:53:02 +0000 Subject: [PATCH 06/14] vmspawn: add --cxl= option and memory hotplug support Add --cxl=BOOL option to enable CXL (Compute Express Link) support in the virtual machine. When enabled, adds cxl=on to the QEMU machine configuration. Supported on x86 and ARM architectures, matching mkosi's CXL= setting. Extend --ram= to accept an optional maximum size for memory hotplug, using the syntax --ram=SIZE[:MAXSIZE] (e.g. --ram=2G:8G). When a maximum is specified, the maxmem key is added to the QEMU memory configuration section to enable memory hotplug up to the given limit. Co-developed-by: Claude Opus 4.6 --- man/systemd-vmspawn.xml | 17 ++++- shell-completion/bash/systemd-vmspawn | 2 +- src/vmspawn/vmspawn-util.h | 6 ++ src/vmspawn/vmspawn.c | 93 +++++++++++++++++++++++++-- 4 files changed, 109 insertions(+), 9 deletions(-) diff --git a/man/systemd-vmspawn.xml b/man/systemd-vmspawn.xml index 28070cfe8f66c..be39ba23bd96d 100644 --- a/man/systemd-vmspawn.xml +++ b/man/systemd-vmspawn.xml @@ -166,10 +166,12 @@ - + - The amount of memory to start the virtual machine with. - Defaults to 2G. + The amount of memory to start the virtual machine with. Defaults to 2G. + If a maximum size is specified after a colon, memory hotplug is enabled with the given + upper limit. The number of hotplug slots can optionally be specified after a second colon + and defaults to 1. @@ -184,6 +186,15 @@ + + + + Controls whether to enable CXL (Compute Express Link) support in the virtual + machine. Only supported on x86 and ARM architectures. + + + + diff --git a/shell-completion/bash/systemd-vmspawn b/shell-completion/bash/systemd-vmspawn index b035a42a6550e..995aeb1271298 100644 --- a/shell-completion/bash/systemd-vmspawn +++ b/shell-completion/bash/systemd-vmspawn @@ -31,7 +31,7 @@ _systemd_vmspawn() { local -A OPTS=( [STANDALONE]='-h --help --version -q --quiet --no-pager -n --network-tap --network-user-mode --user --system -x --ephemeral' [PATH]='-D --directory -i --image --linux --initrd --extra-drive --forward-journal --efi-nvram-template' - [BOOL]='--kvm --vsock --tpm --discard-disk --register --pass-ssh-key' + [BOOL]='--kvm --cxl --vsock --tpm --discard-disk --register --pass-ssh-key' [SECURE_BOOT]='--secure-boot' [FIRMWARE]='--firmware' [FIRMWARE_FEATURES]='--firmware-features' diff --git a/src/vmspawn/vmspawn-util.h b/src/vmspawn/vmspawn-util.h index 9111bbab3423e..75644b250287c 100644 --- a/src/vmspawn/vmspawn-util.h +++ b/src/vmspawn/vmspawn-util.h @@ -39,6 +39,12 @@ # define ARCHITECTURE_SUPPORTS_FW_CFG 0 #endif +#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) +# define ARCHITECTURE_SUPPORTS_CXL 1 +#else +# define ARCHITECTURE_SUPPORTS_CXL 0 +#endif + #if defined(__x86_64__) || defined(__i386__) # define QEMU_MACHINE_TYPE "q35" #elif defined(__arm__) || defined(__aarch64__) || defined(__riscv) || defined(__loongarch64) || defined(__m68k__) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index b6594a6a4b2bc..6eb135cd27db5 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -126,7 +126,10 @@ static char *arg_slice = NULL; static char **arg_property = NULL; static char *arg_cpus = NULL; static uint64_t arg_ram = UINT64_C(2) * U64_GB; +static uint64_t arg_ram_max = 0; +static unsigned arg_ram_slots = 0; static int arg_kvm = -1; +static int arg_cxl = -1; static int arg_vsock = -1; static unsigned arg_vsock_cid = VMADDR_CID_ANY; static int arg_tpm = -1; @@ -223,8 +226,11 @@ static int help(void) { " Specify disk type (virtio-blk, virtio-scsi, nvme; default: virtio-blk)\n" "\n%3$sHost Configuration:%4$s\n" " --cpus=CPUS Configure number of CPUs in guest\n" - " --ram=BYTES Configure guest's RAM size\n" + " --ram=BYTES[:MAXBYTES[:SLOTS]]\n" + " Configure guest's RAM size (and max/slots for\n" + " hotplug)\n" " --kvm=BOOL Enable use of KVM\n" + " --cxl=BOOL Enable CXL support\n" " --vsock=BOOL Override autodetection of VSOCK support\n" " --vsock-cid=CID Specify the CID to use for the guest's VSOCK support\n" " --tpm=BOOL Enable use of a virtual TPM\n" @@ -331,6 +337,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_CPUS, ARG_RAM, ARG_KVM, + ARG_CXL, ARG_VSOCK, ARG_VSOCK_CID, ARG_TPM, @@ -388,6 +395,7 @@ static int parse_argv(int argc, char *argv[]) { { "ram", required_argument, NULL, ARG_RAM }, { "qemu-mem", required_argument, NULL, ARG_RAM }, /* Compat alias */ { "kvm", required_argument, NULL, ARG_KVM }, + { "cxl", required_argument, NULL, ARG_CXL }, { "qemu-kvm", required_argument, NULL, ARG_KVM }, /* Compat alias */ { "vsock", required_argument, NULL, ARG_VSOCK }, { "qemu-vsock", required_argument, NULL, ARG_VSOCK }, /* Compat alias */ @@ -506,11 +514,48 @@ static int parse_argv(int argc, char *argv[]) { return r; break; - case ARG_RAM: - r = parse_size(optarg, 1024, &arg_ram); - if (r < 0) - return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg); + case ARG_RAM: { + const char *e = strchr(optarg, ':'); + if (e) { + _cleanup_free_ char *first = strndup(optarg, e - optarg); + if (!first) + return log_oom(); + + r = parse_size(first, 1024, &arg_ram); + if (r < 0) + return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg); + + const char *e2 = strchr(e + 1, ':'); + if (e2) { + _cleanup_free_ char *second = strndup(e + 1, e2 - e - 1); + if (!second) + return log_oom(); + + r = parse_size(second, 1024, &arg_ram_max); + if (r < 0) + return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg); + + r = safe_atou(e2 + 1, &arg_ram_slots); + if (r < 0) + return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg); + } else { + r = parse_size(e + 1, 1024, &arg_ram_max); + if (r < 0) + return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg); + + arg_ram_slots = 0; + } + } else { + r = parse_size(optarg, 1024, &arg_ram); + if (r < 0) + return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg); + + arg_ram_max = 0; + arg_ram_slots = 0; + } + break; + } case ARG_KVM: r = parse_tristate_argument_with_auto("--kvm=", optarg, &arg_kvm); @@ -518,6 +563,12 @@ static int parse_argv(int argc, char *argv[]) { return r; break; + case ARG_CXL: + r = parse_tristate_argument_with_auto("--cxl=", optarg, &arg_cxl); + if (r < 0) + return r; + break; + case ARG_VSOCK: r = parse_tristate_argument_with_auto("--vsock=", optarg, &arg_vsock); if (r < 0) @@ -975,6 +1026,12 @@ static int parse_argv(int argc, char *argv[]) { if (!strv_isempty(arg_bind_user_groups) && strv_isempty(arg_bind_user)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --bind-user-group= without --bind-user="); + if (arg_ram_max > 0 && arg_ram_max < arg_ram) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Maximum RAM size must be greater than or equal to initial RAM size."); + + if (arg_ram_slots > 0 && arg_ram_max == 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Memory hotplug slots require a maximum RAM size."); + if (arg_ephemeral && arg_extra_drives.n_drives > 0) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --ephemeral with --extra-drive="); @@ -2232,6 +2289,11 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (asprintf(&mem, "%" PRIu64 "M", DIV_ROUND_UP(arg_ram, U64_MB)) < 0) return log_oom(); + _cleanup_free_ char *mem_max = NULL; + if (arg_ram_max > 0) + if (asprintf(&mem_max, "%" PRIu64 "M", DIV_ROUND_UP(arg_ram_max, U64_MB)) < 0) + return log_oom(); + /* Create runtime directory for the QEMU config file and other state */ _cleanup_free_ char *runtime_dir = NULL; _cleanup_(rm_rf_physical_and_freep) char *runtime_dir_destroy = NULL; @@ -2280,6 +2342,17 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { return r; } + if (!ARCHITECTURE_SUPPORTS_CXL) { + if (arg_cxl > 0) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "CXL not supported on %s, refusing.", architecture_to_string(native_architecture())); + if (arg_cxl < 0) + log_debug("CXL not supported on %s, disabling.", architecture_to_string(native_architecture())); + } else if (arg_cxl > 0) { + r = qemu_config_key(config_file, "cxl", "on"); + if (r < 0) + return r; + } + if (arg_directory || arg_runtime_mounts.n_mounts != 0) { r = qemu_config_key(config_file, "memory-backend", "mem"); if (r < 0) @@ -2302,6 +2375,16 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return r; + if (mem_max) { + r = qemu_config_key(config_file, "maxmem", mem_max); + if (r < 0) + return r; + + r = qemu_config_keyf(config_file, "slots", "%u", arg_ram_slots > 0 ? arg_ram_slots : 1u); + if (r < 0) + return r; + } + r = qemu_config_section(config_file, "object", "rng0", "qom-type", "rng-random", "filename", "/dev/urandom"); From 96cc064bb40e920c500ded6973fbbb9a906044dd Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 27 Mar 2026 14:38:09 +0000 Subject: [PATCH 07/14] vmspawn: add --forward-journal-config= for journal-remote configuration Add --forward-journal-config=PATH to specify a custom configuration file for systemd-journal-remote when forwarding journal entries from the VM. Defaults to /dev/null when not specified, causing systemd-journal-remote to ignore its default configuration files. This allows callers like mkosi to control journal-remote settings (MaxUse, KeepFree, MaxFileSize, etc.) without interfering with the host's journal-remote configuration. Add $SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE environment variable support to systemd-journal-remote. When set, the specified file is read instead of the standard configuration file paths and drop-in directories. When set to /dev/null, configuration file parsing is skipped entirely. Make fork_notify() argv parameter optional. When NULL is passed, fork_notify() returns 0 in the child (with $NOTIFY_SOCKET set) and lets the caller run custom code before exec. This allows vmspawn to set $SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE in the child environment without polluting the parent process. Co-developed-by: Claude Opus 4.6 --- docs/ENVIRONMENT.md | 5 ++++ man/systemd-vmspawn.xml | 12 +++++++++ shell-completion/bash/systemd-vmspawn | 2 +- src/journal-remote/journal-remote-main.c | 17 ++++++++++++ src/shared/fork-notify.c | 10 ++++--- src/vmspawn/vmspawn.c | 33 ++++++++++++++++++++++-- 6 files changed, 73 insertions(+), 6 deletions(-) diff --git a/docs/ENVIRONMENT.md b/docs/ENVIRONMENT.md index 5390754661879..a40ac4744db47 100644 --- a/docs/ENVIRONMENT.md +++ b/docs/ENVIRONMENT.md @@ -679,6 +679,11 @@ SYSTEMD_HOME_DEBUG_SUFFIX=foo \ string format. Overrides the default maximum allowed size for a file-descriptor based input record to be stored in the journal. +* `$SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE` – path to a configuration file for + `systemd-journal-remote`. When set, the specified file is used instead of the + default configuration file and drop-in directories. If set to `/dev/null`, + configuration file parsing is skipped entirely. + * `$SYSTEMD_CATALOG` – path to the compiled catalog database file to use for `journalctl -x`, `journalctl --update-catalog`, `journalctl --list-catalog` and related calls. diff --git a/man/systemd-vmspawn.xml b/man/systemd-vmspawn.xml index be39ba23bd96d..ccedb45c2ec8b 100644 --- a/man/systemd-vmspawn.xml +++ b/man/systemd-vmspawn.xml @@ -672,6 +672,18 @@ + + + + Specifies a configuration file for + systemd-journal-remote8 + to use when forwarding journal entries from the VM. If not specified, + /dev/null is used, which causes + systemd-journal-remote to ignore its default configuration files. + + + + diff --git a/shell-completion/bash/systemd-vmspawn b/shell-completion/bash/systemd-vmspawn index 995aeb1271298..26c86aadb22a5 100644 --- a/shell-completion/bash/systemd-vmspawn +++ b/shell-completion/bash/systemd-vmspawn @@ -30,7 +30,7 @@ _systemd_vmspawn() { local -A OPTS=( [STANDALONE]='-h --help --version -q --quiet --no-pager -n --network-tap --network-user-mode --user --system -x --ephemeral' - [PATH]='-D --directory -i --image --linux --initrd --extra-drive --forward-journal --efi-nvram-template' + [PATH]='-D --directory -i --image --linux --initrd --extra-drive --forward-journal --forward-journal-config --efi-nvram-template' [BOOL]='--kvm --cxl --vsock --tpm --discard-disk --register --pass-ssh-key' [SECURE_BOOT]='--secure-boot' [FIRMWARE]='--firmware' diff --git a/src/journal-remote/journal-remote-main.c b/src/journal-remote/journal-remote-main.c index 0ff44ede6fc1c..69d5665df2ded 100644 --- a/src/journal-remote/journal-remote-main.c +++ b/src/journal-remote/journal-remote-main.c @@ -22,6 +22,7 @@ #include "main-func.h" #include "microhttpd-util.h" #include "parse-argument.h" +#include "path-util.h" #include "parse-helpers.h" #include "parse-util.h" #include "pretty-print.h" @@ -828,6 +829,22 @@ static int parse_config(void) { {} }; + const char *config_file = getenv("SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE"); + if (config_file) { + if (path_equal(config_file, "/dev/null")) + return 0; + + return config_parse( + /* unit= */ NULL, + config_file, + /* f= */ NULL, + "Remote\0", + config_item_table_lookup, items, + CONFIG_PARSE_WARN, + /* userdata= */ NULL, + /* ret_stat= */ NULL); + } + return config_parse_standard_file_with_dropins( "systemd/journal-remote.conf", "Remote\0", diff --git a/src/shared/fork-notify.c b/src/shared/fork-notify.c index 6f87a2fdce2b2..bcedf729605de 100644 --- a/src/shared/fork-notify.c +++ b/src/shared/fork-notify.c @@ -90,7 +90,6 @@ static int on_child_notify(sd_event_source *s, int fd, uint32_t revents, void *u int fork_notify(char * const *argv, PidRef *ret_pidref) { int r; - assert(!strv_isempty(argv)); assert(ret_pidref); if (!is_main_thread()) @@ -119,7 +118,7 @@ int fork_notify(char * const *argv, PidRef *ret_pidref) { if (r < 0) return r; - if (DEBUG_LOGGING) { + if (DEBUG_LOGGING && argv) { _cleanup_free_ char *l = quote_command_line(argv, SHELL_ESCAPE_EMPTY); log_debug("Invoking '%s' as child.", strnull(l)); } @@ -141,6 +140,11 @@ int fork_notify(char * const *argv, PidRef *ret_pidref) { _exit(EXIT_MEMORY); } + if (!argv) { + *ret_pidref = TAKE_PIDREF(child); + return 0; /* Let the caller run custom code in the child */ + } + r = invoke_callout_binary(argv[0], argv); log_debug_errno(r, "Failed to invoke %s: %m", argv[0]); _exit(EXIT_EXEC); @@ -164,7 +168,7 @@ int fork_notify(char * const *argv, PidRef *ret_pidref) { *ret_pidref = TAKE_PIDREF(child); - return 0; + return 1; /* In the parent */ } static void fork_notify_terminate_internal(PidRef *pidref) { diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 6eb135cd27db5..fce455b7de188 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -20,6 +20,7 @@ #include "architecture.h" #include "bootspec.h" #include "build-path.h" +#include "exit-status.h" #include "build.h" #include "bus-error.h" #include "bus-internal.h" @@ -145,6 +146,7 @@ static bool arg_firmware_describe = false; static Set *arg_firmware_features_include = NULL; static Set *arg_firmware_features_exclude = NULL; static char *arg_forward_journal = NULL; +static char *arg_forward_journal_config = NULL; static bool arg_register = true; static bool arg_keep_unit = false; static sd_id128_t arg_uuid = {}; @@ -185,6 +187,7 @@ STATIC_DESTRUCTOR_REGISTER(arg_linux, freep); STATIC_DESTRUCTOR_REGISTER(arg_initrds, strv_freep); STATIC_DESTRUCTOR_REGISTER(arg_runtime_mounts, runtime_mount_context_done); STATIC_DESTRUCTOR_REGISTER(arg_forward_journal, freep); +STATIC_DESTRUCTOR_REGISTER(arg_forward_journal_config, freep); STATIC_DESTRUCTOR_REGISTER(arg_kernel_cmdline_extra, strv_freep); STATIC_DESTRUCTOR_REGISTER(arg_extra_drives, extra_drive_context_done); STATIC_DESTRUCTOR_REGISTER(arg_background, freep); @@ -286,6 +289,8 @@ static int help(void) { "\n%3$sIntegration:%4$s\n" " --forward-journal=FILE|DIR\n" " Forward the VM's journal to the host\n" + " --forward-journal-config=PATH\n" + " Configuration file for systemd-journal-remote\n" " --pass-ssh-key=BOOL Create an SSH key to access the VM\n" " --ssh-key-type=TYPE Choose what type of SSH key to pass\n" "\n%3$sInput/Output:%4$s\n" @@ -354,6 +359,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_SECURE_BOOT, ARG_PRIVATE_USERS, ARG_FORWARD_JOURNAL, + ARG_FORWARD_JOURNAL_CONFIG, ARG_PASS_SSH_KEY, ARG_SSH_KEY_TYPE, ARG_SET_CREDENTIAL, @@ -415,7 +421,8 @@ static int parse_argv(int argc, char *argv[]) { { "extra-drive", required_argument, NULL, ARG_EXTRA_DRIVE }, { "secure-boot", required_argument, NULL, ARG_SECURE_BOOT }, { "private-users", required_argument, NULL, ARG_PRIVATE_USERS }, - { "forward-journal", required_argument, NULL, ARG_FORWARD_JOURNAL }, + { "forward-journal", required_argument, NULL, ARG_FORWARD_JOURNAL }, + { "forward-journal-config", required_argument, NULL, ARG_FORWARD_JOURNAL_CONFIG }, { "pass-ssh-key", required_argument, NULL, ARG_PASS_SSH_KEY }, { "ssh-key-type", required_argument, NULL, ARG_SSH_KEY_TYPE }, { "set-credential", required_argument, NULL, ARG_SET_CREDENTIAL }, @@ -745,6 +752,12 @@ static int parse_argv(int argc, char *argv[]) { return r; break; + case ARG_FORWARD_JOURNAL_CONFIG: + r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_forward_journal_config); + if (r < 0) + return r; + break; + case ARG_PASS_SSH_KEY: r = parse_boolean_argument("--pass-ssh-key=", optarg, &arg_pass_ssh_key); if (r < 0) @@ -1032,6 +1045,9 @@ static int parse_argv(int argc, char *argv[]) { if (arg_ram_slots > 0 && arg_ram_max == 0) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Memory hotplug slots require a maximum RAM size."); + if (arg_forward_journal_config && !arg_forward_journal) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--forward-journal-config= requires --forward-journal=."); + if (arg_ephemeral && arg_extra_drives.n_drives > 0) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --ephemeral with --extra-drive="); @@ -1615,9 +1631,22 @@ static int start_systemd_journal_remote( if (!argv) return log_oom(); - r = fork_notify(argv, ret_pidref); + r = fork_notify(/* argv= */ NULL, ret_pidref); if (r < 0) return r; + if (r == 0) { + /* In the child */ + if (setenv("SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE", + arg_forward_journal_config ?: "/dev/null", + /* overwrite= */ true) < 0) { + log_debug_errno(errno, "Failed to set $SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE: %m"); + _exit(EXIT_MEMORY); + } + + r = invoke_callout_binary(argv[0], argv); + log_error_errno(r, "Failed to invoke %s: %m", argv[0]); + _exit(EXIT_EXEC); + } if (ret_listen_address) *ret_listen_address = TAKE_PTR(listen_address); From 7a3d814e949db8d5ddb7cb8a5178be9633f0274f Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 27 Mar 2026 14:57:40 +0000 Subject: [PATCH 08/14] vmspawn: pass --log-level=error and --modcaps=-mknod to virtiofsd Reduce virtiofsd log noise by setting --log-level=error, and drop the unnecessary mknod capability with --modcaps=-mknod, matching mkosi's virtiofsd invocation. Co-developed-by: Claude Opus 4.6 --- src/vmspawn/vmspawn.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index fce455b7de188..7e10eb8d6dff9 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -1772,7 +1772,9 @@ static int start_virtiofsd( "--shared-dir", source_uid == FOREIGN_UID_MIN ? "/run/systemd/mount-rootfs" : directory, "--xattr", "--fd", sockstr, - "--no-announce-submounts"); + "--no-announce-submounts", + "--log-level=error", + "--modcaps=-mknod"); if (!argv) return log_oom(); From fc86ea2bdfc6cbc133ca6c449171c4695bec6321 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 27 Mar 2026 14:58:35 +0000 Subject: [PATCH 09/14] vmspawn: use fstab.extra credential for runtime mounts instead of kernel cmdline Switch runtime virtiofs mount configuration from systemd.mount-extra= kernel command line parameters to the fstab.extra credential. This avoids consuming kernel command line space (which is limited) and matches the approach used by mkosi. Each mount is added as an fstab entry in the format: {tag} {destination} virtiofs {ro|rw},x-initrd.mount If the user already specified a fstab.extra credential via --set-credential= or --load-credential=, the virtiofs mount entries are appended to it rather than conflicting. Co-developed-by: Claude Opus 4.6 --- src/basic/escape.c | 6 +++--- src/basic/escape.h | 5 ++++- src/vmspawn/vmspawn.c | 34 ++++++++++++++++++++++++++++++---- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/basic/escape.c b/src/basic/escape.c index e1771bf432278..9af8efacc7423 100644 --- a/src/basic/escape.c +++ b/src/basic/escape.c @@ -447,10 +447,10 @@ char* escape_non_printable_full(const char *str, size_t console_width, XEscapeFl FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS)); } -char* octescape(const char *s, size_t len) { +char* octescape_full(const char *s, size_t len, const char *bad) { char *buf, *t; - /* Escapes \ and " chars, in \nnn style escaping. */ + /* Escapes all chars in bad, in addition to \ and " chars, in \nnn octal style escaping. */ assert(s || len == 0); @@ -467,7 +467,7 @@ char* octescape(const char *s, size_t len) { for (size_t i = 0; i < len; i++) { uint8_t u = (uint8_t) s[i]; - if (u < ' ' || u >= 127 || IN_SET(u, '\\', '"')) { + if (u < ' ' || u >= 127 || IN_SET(u, '\\', '"') || (bad && strchr(bad, u))) { *(t++) = '\\'; *(t++) = '0' + (u >> 6); *(t++) = '0' + ((u >> 3) & 7); diff --git a/src/basic/escape.h b/src/basic/escape.h index a8b68fa75c277..625758f2f4c9f 100644 --- a/src/basic/escape.h +++ b/src/basic/escape.h @@ -59,7 +59,10 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape static inline char* xescape(const char *s, const char *bad) { return xescape_full(s, bad, SIZE_MAX, 0); } -char* octescape(const char *s, size_t len); +char* octescape_full(const char *s, size_t len, const char *bad); +static inline char* octescape(const char *s, size_t len) { + return octescape_full(s, len, NULL); +} char* decescape(const char *s, size_t len, const char *bad) _nonnull_if_nonzero_(1, 2); char* escape_non_printable_full(const char *str, size_t console_width, XEscapeFlags flags); diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 7e10eb8d6dff9..ff76c923c9fc9 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -3112,6 +3112,8 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { return log_oom(); } + _cleanup_free_ char *fstab_extra = NULL; + for (size_t j = 0; j < arg_runtime_mounts.n_mounts; j++) { RuntimeMount *m = arg_runtime_mounts.mounts + j; _cleanup_free_ char *listen_address = NULL; @@ -3158,15 +3160,39 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return r; - _cleanup_free_ char *clean_target = xescape(m->target, "\":"); - if (!clean_target) + /* fstab uses whitespace as field separator, so octal-escape spaces in paths */ + _cleanup_free_ char *escaped_target = octescape_full(m->target, SIZE_MAX, " \t"); + if (!escaped_target) return log_oom(); - if (strv_extendf(&arg_kernel_cmdline_extra, "systemd.mount-extra=\"%s:%s:virtiofs:%s\"", - id, clean_target, m->read_only ? "ro" : "rw") < 0) + if (strextendf(&fstab_extra, "%s %s virtiofs %s,x-initrd.mount\n", + id, escaped_target, m->read_only ? "ro" : "rw") < 0) return log_oom(); } + if (fstab_extra) { + /* If the user already specified a fstab.extra credential, combine it with ours */ + MachineCredential *existing = machine_credential_find(&arg_credentials, "fstab.extra"); + if (existing) { + _cleanup_free_ char *combined = NULL; + + if (existing->size > 0 && existing->data[existing->size - 1] != '\n') + r = asprintf(&combined, "%.*s\n%s", (int) existing->size, existing->data, fstab_extra); + else + r = asprintf(&combined, "%.*s%s", (int) existing->size, existing->data, fstab_extra); + if (r < 0) + return log_oom(); + + erase_and_free(existing->data); + existing->data = TAKE_PTR(combined); + existing->size = strlen(existing->data); + } else { + r = machine_credential_add(&arg_credentials, "fstab.extra", fstab_extra, SIZE_MAX); + if (r < 0) + return r; + } + } + _cleanup_(rm_rf_physical_and_freep) char *smbios_dir = NULL; r = mkdtemp_malloc("/var/tmp/vmspawn-smbios-XXXXXX", &smbios_dir); if (r < 0) From 1551ddbe02d72774bf521cdb70d0454b2f73ce30 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Sat, 28 Mar 2026 13:12:25 +0000 Subject: [PATCH 10/14] vmspawn: propagate $TERM from host into VM via kernel command line When running in a console mode (interactive, native, or read-only), propagate the host's $TERM into the VM by adding TERM= and systemd.tty.term.hvc0= to the kernel command line. TERM= is picked up by PID 1 and inherited by services on /dev/console (such as emergency.service). systemd.tty.term.hvc0= is used by services directly attached to /dev/hvc0 (such as serial-getty@hvc0.service) which look up $TERM via the systemd.tty.term. kernel command line parameter. While systemd can auto-detect the terminal type via DCS XTGETTCAP, not all terminal emulators implement this, so explicitly propagating $TERM provides a more reliable experience. We skip propagation when $TERM is unset or set to "unknown" (as is the case in GitHub Actions and some other CI environments). Previously this was handled by mkosi synthesizing the corresponding kernel command line parameters externally. Co-developed-by: Claude Opus 4.6 --- src/vmspawn/vmspawn.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index ff76c923c9fc9..626d58f9c6592 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -3110,6 +3110,26 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { r = strv_prepend(&arg_kernel_cmdline_extra, "console=hvc0"); if (r < 0) return log_oom(); + + /* Propagate the host's $TERM into the VM via the kernel command line. TERM= is + * picked up by PID 1 and inherited by services on /dev/console, and + * systemd.tty.term.hvc0= is used by services directly attached to /dev/hvc0 (such + * as serial-getty). While systemd can auto-detect the terminal type via DCS + * XTGETTCAP, not all terminal emulators implement this, so let's always propagate + * $TERM if we have it. */ + const char *term = getenv("TERM"); + if (!isempty(term) && !streq(term, "unknown") /* some CI environments set TERM=unknown */ && + !strchr(term, ' ') && !strchr(term, '=')) { + FOREACH_STRING(tty_key, "systemd.tty.term.hvc0", "TERM") { + _cleanup_free_ char *p = strjoin(tty_key, "=", term); + if (!p) + return log_oom(); + + r = strv_consume_prepend(&arg_kernel_cmdline_extra, TAKE_PTR(p)); + if (r < 0) + return log_oom(); + } + } } _cleanup_free_ char *fstab_extra = NULL; From 0561cb936b700f67b84efde2dd0222b07f4cd523 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Sun, 29 Mar 2026 11:10:42 +0000 Subject: [PATCH 11/14] nspawn: add --runtime-scope= and replace arg_privileged with arg_runtime_scope Add --runtime-scope=system|user option to both nspawn and vmspawn for explicit runtime scope selection. In vmspawn, --system and --user are kept as undocumented compatibility aliases. The default is auto-detected from the effective UID. Replace all uses of the arg_privileged boolean with arg_runtime_scope comparisons throughout nspawn. Co-developed-by: Claude Opus 4.6 --- man/systemd-nspawn.xml | 12 ++++++ man/systemd-vmspawn.xml | 12 +++--- src/nspawn/nspawn.c | 90 +++++++++++++++++++++++++---------------- src/vmspawn/vmspawn.c | 12 +++++- 4 files changed, 83 insertions(+), 43 deletions(-) diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml index d241ca5c52c5e..0e24e9ae99ba8 100644 --- a/man/systemd-nspawn.xml +++ b/man/systemd-nspawn.xml @@ -1922,6 +1922,18 @@ After=sys-subsystem-net-devices-ens1.device Other + + + + Takes either system or user to specify whether + to interact with the user service manager or the system service manager and whether to register with + the user machined instance or the system machined instance. If unspecified, the system service manager + and machined instance will be used when running as root, otherwise the user service manager and machined + instance will be used. + + + + diff --git a/man/systemd-vmspawn.xml b/man/systemd-vmspawn.xml index ccedb45c2ec8b..ecd8de888e941 100644 --- a/man/systemd-vmspawn.xml +++ b/man/systemd-vmspawn.xml @@ -70,13 +70,13 @@ - - + - Specify whether to interact with the user manager or the system manager and whether - to register with the user machined instance or the system machined instance. If - unspecified, the system manager and machined instance will be used when running as root, otherwise - the user manager and machined instance will be used. + Takes either system or user to specify whether + to interact with the user service manager or the system service manager and whether to register with + the user machined instance or the system machined instance. If unspecified, the system service manager + and machined instance will be used when running as root, otherwise the user service manager and machined + instance will be used. diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 1740ab4d6eb18..2d80cefc8c7b3 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -153,7 +153,7 @@ static char *arg_hostname = NULL; /* The name the payload sees by default */ static const char *arg_selinux_context = NULL; static const char *arg_selinux_apifs_context = NULL; static char *arg_slice = NULL; -static bool arg_private_network; /* initialized depending on arg_privileged in run() */ +static bool arg_private_network; /* defaulted depending on arg_runtime_scope in verify_arguments() */ static bool arg_read_only = false; static StartMode arg_start_mode = START_PID1; static bool arg_ephemeral = false; @@ -212,7 +212,9 @@ static VolatileMode arg_volatile_mode = VOLATILE_NO; static ExposePort *arg_expose_ports = NULL; static char **arg_property = NULL; static sd_bus_message *arg_property_message = NULL; -static UserNamespaceMode arg_userns_mode; /* initialized depending on arg_privileged in run() */ +static UserNamespaceMode arg_userns_mode; /* defaulted depending on arg_runtime_scope in verify_arguments(), + * -U sets to _USER_NAMESPACE_MODE_INVALID which is resolved there + * once arg_runtime_scope has its final value. */ static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U; static unsigned arg_delegate_container_ranges = 0; static UserNamespaceOwnership arg_userns_ownership = _USER_NAMESPACE_OWNERSHIP_INVALID; @@ -253,7 +255,7 @@ static char *arg_settings_filename = NULL; static Architecture arg_architecture = _ARCHITECTURE_INVALID; static ImagePolicy *arg_image_policy = NULL; static char *arg_background = NULL; -static bool arg_privileged = false; +static RuntimeScope arg_runtime_scope = _RUNTIME_SCOPE_INVALID; static bool arg_cleanup = false; static bool arg_ask_password = true; @@ -521,6 +523,9 @@ static int help(void) { " --load-credential=ID:PATH\n" " Load credential to pass to container from file or\n" " AF_UNIX stream socket.\n" + "\n%3$sOther:%4$s\n" + " --runtime-scope=system|user\n" + " Run in system or user service manager scope\n" "\nSee the %2$s for details.\n", program_invocation_short_name, link, @@ -749,6 +754,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_CLEANUP, ARG_NO_ASK_PASSWORD, ARG_MSTACK, + ARG_RUNTIME_SCOPE, }; static const struct option options[] = { @@ -830,6 +836,7 @@ static int parse_argv(int argc, char *argv[]) { { "cleanup", no_argument, NULL, ARG_CLEANUP }, { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD }, { "mstack", required_argument, NULL, ARG_MSTACK }, + { "runtime-scope", required_argument, NULL, ARG_RUNTIME_SCOPE }, {} }; @@ -1229,8 +1236,11 @@ static int parse_argv(int argc, char *argv[]) { case 'U': if (userns_supported()) { - /* Note that arg_userns_ownership is implied by USER_NAMESPACE_PICK further down. */ - arg_userns_mode = arg_privileged ? USER_NAMESPACE_PICK : USER_NAMESPACE_MANAGED; + /* Note that arg_userns_ownership is implied by USER_NAMESPACE_PICK further down. + * We use _USER_NAMESPACE_MODE_INVALID as a marker so that the final resolution + * (PICK vs MANAGED) is deferred to verify_arguments() where arg_runtime_scope + * has its final value regardless of option order. */ + arg_userns_mode = _USER_NAMESPACE_MODE_INVALID; arg_uid_shift = UID_INVALID; arg_uid_range = UINT32_C(0x10000); @@ -1599,6 +1609,12 @@ static int parse_argv(int argc, char *argv[]) { arg_ask_password = false; break; + case ARG_RUNTIME_SCOPE: + arg_runtime_scope = runtime_scope_from_string(optarg); + if (!IN_SET(arg_runtime_scope, RUNTIME_SCOPE_SYSTEM, RUNTIME_SCOPE_USER)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse runtime scope: %s", optarg); + break; + case '?': return -EINVAL; @@ -1645,20 +1661,36 @@ static int parse_argv(int argc, char *argv[]) { static int verify_arguments(void) { int r; + /* Apply scope-dependent defaults now that arg_runtime_scope has its final value. -U sets + * arg_userns_mode to _USER_NAMESPACE_MODE_INVALID to defer the resolution to here. */ + if (!FLAGS_SET(arg_settings_mask, SETTING_USERNS)) + arg_userns_mode = arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? USER_NAMESPACE_NO : USER_NAMESPACE_MANAGED; + else if (arg_userns_mode == _USER_NAMESPACE_MODE_INVALID) + arg_userns_mode = arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? USER_NAMESPACE_PICK : USER_NAMESPACE_MANAGED; + + if (!FLAGS_SET(arg_settings_mask, SETTING_NETWORK)) + arg_private_network = arg_runtime_scope != RUNTIME_SCOPE_SYSTEM; + SET_FLAG(arg_mount_settings, MOUNT_UNMANAGED, arg_userns_mode != USER_NAMESPACE_MANAGED); /* We can mount selinuxfs only if we are privileged and can do so before userns. In managed mode we * have to enter the userns earlier, hence cannot do that. */ - /* SET_FLAG(arg_mount_settings, MOUNT_PRIVILEGED, arg_privileged); */ + /* SET_FLAG(arg_mount_settings, MOUNT_PRIVILEGED, arg_runtime_scope == RUNTIME_SCOPE_SYSTEM); */ SET_FLAG(arg_mount_settings, MOUNT_PRIVILEGED, arg_userns_mode != USER_NAMESPACE_MANAGED); SET_FLAG(arg_mount_settings, MOUNT_USE_USERNS, arg_userns_mode != USER_NAMESPACE_NO); + /* If we're not unsharing the network namespace and are unsharing the user namespace, we won't have + * permissions to bind ports in the container, so let's drop the CAP_NET_BIND_SERVICE capability to + * indicate that. */ + if (!arg_private_network && arg_userns_mode != USER_NAMESPACE_NO) + arg_caps_retain &= ~(UINT64_C(1) << CAP_NET_BIND_SERVICE); + if (arg_private_network) SET_FLAG(arg_mount_settings, MOUNT_APPLY_APIVFS_NETNS, arg_private_network); - if (!arg_privileged && arg_userns_mode != USER_NAMESPACE_MANAGED) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unprivileged operation requires managed user namespaces, as otherwise no UID range can be acquired."); + if (arg_runtime_scope != RUNTIME_SCOPE_SYSTEM && arg_userns_mode != USER_NAMESPACE_MANAGED) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User-scoped operation requires managed user namespaces, as otherwise no UID range can be acquired."); if (arg_userns_mode == USER_NAMESPACE_MANAGED && !arg_private_network) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Managed user namespace operation requires private networking, as otherwise /sys/ may not be mounted."); @@ -3183,7 +3215,7 @@ static int determine_names(void) { if (arg_machine) { _cleanup_(image_unrefp) Image *i = NULL; - r = image_find(arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, + r = image_find(arg_runtime_scope, IMAGE_MACHINE, arg_machine, NULL, &i); if (r == -ENOENT) return log_error_errno(r, "No image for machine '%s'.", arg_machine); @@ -5147,7 +5179,7 @@ static int load_settings(void) { _SD_PATH_INVALID, }; - const uint64_t *q = arg_privileged ? lookup_dir_system : lookup_dir_user; + const uint64_t *q = arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? lookup_dir_system : lookup_dir_user; for (; *q != _SD_PATH_INVALID; q++) { _cleanup_free_ char *cd = NULL; r = sd_path_lookup(*q, "systemd/nspawn", &cd); @@ -5578,7 +5610,7 @@ static int run_container( /* Registration always happens on the system bus */ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *system_bus = NULL; - if (arg_register || (arg_privileged && !arg_keep_unit)) { + if (arg_register || (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM && !arg_keep_unit)) { r = sd_bus_default_system(&system_bus); if (r < 0) return log_error_errno(r, "Failed to open system bus: %m"); @@ -5594,7 +5626,7 @@ static int run_container( _cleanup_(sd_bus_unrefp) sd_bus *runtime_bus = NULL; if (arg_register || !arg_keep_unit) { - if (arg_privileged) + if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM) runtime_bus = sd_bus_ref(system_bus); else { r = sd_bus_default_user(&user_bus); @@ -5672,14 +5704,14 @@ static int run_container( ifi, arg_container_service_name); if (r < 0) { - if (arg_privileged) /* if privileged the request to register definitely failed */ + if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM) /* if system scope the request to register definitely failed */ return r; log_notice_errno(r, "Failed to register machine in system context, will try in user context."); } else registered_system = true; - if (!arg_privileged) { + if (arg_runtime_scope != RUNTIME_SCOPE_SYSTEM) { r = register_machine( runtime_bus, arg_machine, @@ -6062,19 +6094,13 @@ static int cant_be_in_netns(void) { } static void initialize_defaults(void) { - arg_privileged = getuid() == 0; - - /* If running unprivileged default to systemd-nsresourced operation */ - arg_userns_mode = arg_privileged ? USER_NAMESPACE_NO : USER_NAMESPACE_MANAGED; - - /* Imply private networking for unprivileged operation, since kernel otherwise refuses mounting sysfs */ - arg_private_network = !arg_privileged; + arg_runtime_scope = getuid() == 0 ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER; } static void cleanup_propagation_and_export_directories(void) { const char *p; - if (!arg_machine || !arg_privileged) + if (!arg_machine || arg_runtime_scope != RUNTIME_SCOPE_SYSTEM) return; p = strjoina("/run/systemd/nspawn/propagate/", arg_machine); @@ -6164,12 +6190,6 @@ static int run(int argc, char *argv[]) { if (r < 0) goto finish; - /* If we're not unsharing the network namespace and are unsharing the user namespace, we won't have - * permissions to bind ports in the container, so let's drop the CAP_NET_BIND_SERVICE capability to - * indicate that. */ - if (!arg_private_network && arg_userns_mode != USER_NAMESPACE_NO) - arg_caps_retain &= ~(UINT64_C(1) << CAP_NET_BIND_SERVICE); - r = verify_arguments(); if (r < 0) goto finish; @@ -6274,7 +6294,7 @@ static int run(int argc, char *argv[]) { r = create_ephemeral_snapshot( arg_directory, - arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, + arg_runtime_scope, arg_read_only, &tree_global_lock, &tree_local_lock, @@ -6295,10 +6315,10 @@ static int run(int argc, char *argv[]) { goto finish; r = image_path_lock( - arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, + arg_runtime_scope, arg_directory, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, - arg_privileged ? &tree_global_lock : NULL, + arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? &tree_global_lock : NULL, &tree_local_lock); if (r == -EBUSY) { log_error_errno(r, "Directory tree %s is currently busy.", arg_directory); @@ -6426,10 +6446,10 @@ static int run(int argc, char *argv[]) { /* Always take an exclusive lock on our own ephemeral copy. */ r = image_path_lock( - arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, + arg_runtime_scope, np, LOCK_EX|LOCK_NB, - arg_privileged ? &tree_global_lock : NULL, + arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? &tree_global_lock : NULL, &tree_local_lock); if (r < 0) { log_error_errno(r, "Failed to create image lock: %m"); @@ -6454,10 +6474,10 @@ static int run(int argc, char *argv[]) { remove_image = true; } else { r = image_path_lock( - arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, + arg_runtime_scope, arg_image, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, - arg_privileged ? &tree_global_lock : NULL, + arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? &tree_global_lock : NULL, &tree_local_lock); if (r == -EBUSY) { log_error_errno(r, "Disk image %s is currently busy.", arg_image); diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 626d58f9c6592..a1597d7a77517 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -218,8 +218,8 @@ static int help(void) { " -q --quiet Do not show status information\n" " --no-pager Do not pipe output into a pager\n" " --no-ask-password Do not prompt for password\n" - " --user Interact with user manager\n" - " --system Interact with system manager\n" + " --runtime-scope=system|user\n" + " Run in system or user service manager scope\n" "\n%3$sImage:%4$s\n" " -D --directory=PATH Root directory for the VM\n" " -x --ephemeral Run VM with snapshot of the disk or directory\n" @@ -378,6 +378,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_BIND_USER, ARG_BIND_USER_SHELL, ARG_BIND_USER_GROUP, + ARG_RUNTIME_SCOPE, ARG_SYSTEM, ARG_USER, ARG_IMAGE_FORMAT, @@ -442,6 +443,7 @@ static int parse_argv(int argc, char *argv[]) { { "bind-user", required_argument, NULL, ARG_BIND_USER }, { "bind-user-shell", required_argument, NULL, ARG_BIND_USER_SHELL }, { "bind-user-group", required_argument, NULL, ARG_BIND_USER_GROUP }, + { "runtime-scope", required_argument, NULL, ARG_RUNTIME_SCOPE }, { "system", no_argument, NULL, ARG_SYSTEM }, { "user", no_argument, NULL, ARG_USER }, {} @@ -1014,6 +1016,12 @@ static int parse_argv(int argc, char *argv[]) { break; + case ARG_RUNTIME_SCOPE: + arg_runtime_scope = runtime_scope_from_string(optarg); + if (!IN_SET(arg_runtime_scope, RUNTIME_SCOPE_SYSTEM, RUNTIME_SCOPE_USER)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse runtime scope: %s", optarg); + break; + case ARG_SYSTEM: arg_runtime_scope = RUNTIME_SCOPE_SYSTEM; break; From 68ed9ac1e873fd7045eda16801484a8309628d60 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Sun, 29 Mar 2026 11:15:35 +0000 Subject: [PATCH 12/14] nspawn: add --forward-journal= and --forward-journal-config= Add --forward-journal=FILE|DIR to forward the container's journal entries to the host via systemd-journal-remote. When specified, nspawn starts systemd-journal-remote listening on a Unix socket, bind-mounts it into the container at /run/host/journal/socket, and passes a journal.forward_to_socket credential pointing to it. Add --forward-journal-config=PATH to specify a configuration file for the spawned systemd-journal-remote instance. Extract fork_journal_remote() into fork-notify.{c,h} as a shared helper used by both nspawn and vmspawn. Co-developed-by: Claude Opus 4.6 --- man/systemd-nspawn.xml | 27 +++++++ src/libsystemd/sd-path/path-lookup.c | 36 +++++++++ src/libsystemd/sd-path/path-lookup.h | 1 + src/nspawn/nspawn.c | 77 ++++++++++++++++++- src/shared/fork-notify.c | 75 ++++++++++++++++++ src/shared/fork-notify.h | 6 ++ src/vmspawn/vmspawn.c | 111 ++------------------------- 7 files changed, 228 insertions(+), 105 deletions(-) diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml index 0e24e9ae99ba8..d9870451921bc 100644 --- a/man/systemd-nspawn.xml +++ b/man/systemd-nspawn.xml @@ -1565,6 +1565,33 @@ After=sys-subsystem-net-devices-ens1.device + + + + Forward the container's journal to the host by starting + systemd-journal-remote8 + listening on a Unix socket that is bind-mounted into the container. The container's + systemd-journald8 + connects to the socket via the journal.forward_to_socket credential and streams + journal entries to the host in real-time. Takes a path to a journal file or directory where the received + entries will be stored. If the path ends in .journal, entries are written to a single + file; otherwise, entries are split per host into the specified directory. + + + + + + + + Specifies a configuration file for the + systemd-journal-remote8 + instance started by . When not specified, defaults to + /dev/null, causing systemd-journal-remote to ignore its default + configuration files. + + + + diff --git a/src/libsystemd/sd-path/path-lookup.c b/src/libsystemd/sd-path/path-lookup.c index 32c14fb14a7d5..3de91cdc8426e 100644 --- a/src/libsystemd/sd-path/path-lookup.c +++ b/src/libsystemd/sd-path/path-lookup.c @@ -5,8 +5,10 @@ #include "alloc-util.h" #include "fs-util.h" #include "log.h" +#include "mkdir.h" #include "path-lookup.h" #include "path-util.h" +#include "random-util.h" #include "stat-util.h" #include "string-util.h" #include "strv.h" @@ -101,6 +103,40 @@ int runtime_directory(RuntimeScope scope, const char *fallback_suffix, char **re return 1; } +int runtime_directory_make(RuntimeScope scope, const char *prefix, char **ret_dir, char **ret_dir_destroy) { + _cleanup_free_ char *subdir = NULL, *dir = NULL; + int r; + + assert(prefix); + assert(ret_dir); + + if (asprintf(&subdir, "systemd/%s.%" PRIx64, prefix, random_u64()) < 0) + return -ENOMEM; + + r = runtime_directory(scope, subdir, &dir); + if (r < 0) + return r; + + if (r > 0) { + r = mkdir_p(dir, 0755); + if (r < 0) + return r; + + if (ret_dir_destroy) { + char *copy = strdup(dir); + if (!copy) + return -ENOMEM; + *ret_dir_destroy = copy; + } + } else { + if (ret_dir_destroy) + *ret_dir_destroy = NULL; + } + + *ret_dir = TAKE_PTR(dir); + return 0; +} + static const char* const user_data_unit_paths[] = { "/usr/local/lib/systemd/user", "/usr/local/share/systemd/user", diff --git a/src/libsystemd/sd-path/path-lookup.h b/src/libsystemd/sd-path/path-lookup.h index 67a4f5d69cf0f..32284df6423e9 100644 --- a/src/libsystemd/sd-path/path-lookup.h +++ b/src/libsystemd/sd-path/path-lookup.h @@ -60,6 +60,7 @@ void lookup_paths_done(LookupPaths *p); int config_directory_generic(RuntimeScope scope, const char *suffix, char **ret); int runtime_directory_generic(RuntimeScope scope, const char *suffix, char **ret); int runtime_directory(RuntimeScope scope, const char *fallback_suffix, char **ret); +int runtime_directory_make(RuntimeScope scope, const char *prefix, char **ret_dir, char **ret_dir_destroy); /* We don't treat /etc/xdg/systemd/ in these functions as the xdg base dir spec suggests because we assume * that is a link to /etc/systemd/ anyway. */ diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 2d80cefc8c7b3..8060c7cf09293 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -50,6 +50,7 @@ #include "fd-util.h" #include "fdset.h" #include "fileio.h" +#include "fork-notify.h" #include "format-util.h" #include "fs-util.h" #include "gpt.h" @@ -91,6 +92,7 @@ #include "osc-context.h" #include "pager.h" #include "parse-argument.h" +#include "path-lookup.h" #include "parse-util.h" #include "path-util.h" #include "pidref.h" @@ -129,6 +131,7 @@ /* The notify socket inside the container it can use to talk to nspawn using the sd_notify(3) protocol */ #define NSPAWN_NOTIFY_SOCKET_PATH "/run/host/notify" #define NSPAWN_MOUNT_TUNNEL "/run/host/incoming" +#define NSPAWN_JOURNAL_SOCKET_PATH "/run/host/journal/socket" #define EXIT_FORCE_RESTART 133 @@ -255,9 +258,11 @@ static char *arg_settings_filename = NULL; static Architecture arg_architecture = _ARCHITECTURE_INVALID; static ImagePolicy *arg_image_policy = NULL; static char *arg_background = NULL; -static RuntimeScope arg_runtime_scope = _RUNTIME_SCOPE_INVALID; static bool arg_cleanup = false; static bool arg_ask_password = true; +static char *arg_forward_journal = NULL; +static char *arg_forward_journal_config = NULL; +static RuntimeScope arg_runtime_scope = _RUNTIME_SCOPE_INVALID; STATIC_DESTRUCTOR_REGISTER(arg_directory, freep); STATIC_DESTRUCTOR_REGISTER(arg_template, freep); @@ -298,6 +303,8 @@ STATIC_DESTRUCTOR_REGISTER(arg_bind_user_groups, strv_freep); STATIC_DESTRUCTOR_REGISTER(arg_settings_filename, freep); STATIC_DESTRUCTOR_REGISTER(arg_image_policy, image_policy_freep); STATIC_DESTRUCTOR_REGISTER(arg_background, freep); +STATIC_DESTRUCTOR_REGISTER(arg_forward_journal, freep); +STATIC_DESTRUCTOR_REGISTER(arg_forward_journal_config, freep); static int parse_private_users( const char *s, @@ -493,6 +500,10 @@ static int help(void) { " --link-journal=MODE Link up guest journal, one of no, auto, guest, \n" " host, try-guest, try-host\n" " -j Equivalent to --link-journal=try-guest\n" + " --forward-journal=FILE|DIR\n" + " Forward the container's journal to the host\n" + " --forward-journal-config=PATH\n" + " Configuration file for systemd-journal-remote\n" "\n%3$sMounts:%4$s\n" " --bind=PATH[:PATH[:OPTIONS]]\n" " Bind mount a file or directory from the host into\n" @@ -754,6 +765,8 @@ static int parse_argv(int argc, char *argv[]) { ARG_CLEANUP, ARG_NO_ASK_PASSWORD, ARG_MSTACK, + ARG_FORWARD_JOURNAL, + ARG_FORWARD_JOURNAL_CONFIG, ARG_RUNTIME_SCOPE, }; @@ -836,6 +849,8 @@ static int parse_argv(int argc, char *argv[]) { { "cleanup", no_argument, NULL, ARG_CLEANUP }, { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD }, { "mstack", required_argument, NULL, ARG_MSTACK }, + { "forward-journal", required_argument, NULL, ARG_FORWARD_JOURNAL }, + { "forward-journal-config", required_argument, NULL, ARG_FORWARD_JOURNAL_CONFIG }, { "runtime-scope", required_argument, NULL, ARG_RUNTIME_SCOPE }, {} }; @@ -1609,6 +1624,18 @@ static int parse_argv(int argc, char *argv[]) { arg_ask_password = false; break; + case ARG_FORWARD_JOURNAL: + r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_forward_journal); + if (r < 0) + return r; + break; + + case ARG_FORWARD_JOURNAL_CONFIG: + r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_forward_journal_config); + if (r < 0) + return r; + break; + case ARG_RUNTIME_SCOPE: arg_runtime_scope = runtime_scope_from_string(optarg); if (!IN_SET(arg_runtime_scope, RUNTIME_SCOPE_SYSTEM, RUNTIME_SCOPE_USER)) @@ -1642,6 +1669,9 @@ static int parse_argv(int argc, char *argv[]) { arg_caps_retain |= arg_private_network ? UINT64_C(1) << CAP_NET_ADMIN : 0; arg_caps_retain &= ~minus; + if (arg_forward_journal_config && !arg_forward_journal) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--forward-journal-config= requires --forward-journal=."); + /* Make sure to parse environment before we reset the settings mask below */ r = parse_environment(); if (r < 0) @@ -6141,6 +6171,9 @@ static int run(int argc, char *argv[]) { _cleanup_(sd_netlink_unrefp) sd_netlink *nfnl = NULL; _cleanup_(pidref_done) PidRef pid = PIDREF_NULL; _cleanup_(sd_varlink_unrefp) sd_varlink *nsresource_link = NULL, *mountfsd_link = NULL; + _cleanup_(fork_notify_terminate) PidRef journal_remote_pidref = PIDREF_NULL; + _cleanup_free_ char *runtime_dir = NULL; + _cleanup_(rm_rf_physical_and_freep) char *runtime_dir_destroy = NULL; log_setup(); @@ -6666,6 +6699,48 @@ static int run(int argc, char *argv[]) { expose_args.nfnl = nfnl; } + if (arg_forward_journal) { + r = runtime_directory_make(arg_runtime_scope, "nspawn-journal", &runtime_dir, &runtime_dir_destroy); + if (r < 0) { + log_error_errno(r, "Failed to create runtime directory: %m"); + goto finish; + } + + _cleanup_free_ char *socket_path = path_join(runtime_dir, "socket"); + if (!socket_path) { + r = log_oom(); + goto finish; + } + + r = fork_journal_remote(socket_path, arg_forward_journal, arg_forward_journal_config, &journal_remote_pidref); + if (r < 0) + goto finish; + + CustomMount *cm = custom_mount_add(&arg_custom_mounts, &arg_n_custom_mounts, CUSTOM_MOUNT_BIND); + if (!cm) { + r = log_oom(); + goto finish; + } + + cm->source = TAKE_PTR(socket_path); + cm->destination = strdup(NSPAWN_JOURNAL_SOCKET_PATH); + cm->read_only = true; + if (!cm->destination) { + r = log_oom(); + goto finish; + } + + r = machine_credential_add(&arg_credentials, "journal.forward_to_socket", NSPAWN_JOURNAL_SOCKET_PATH, SIZE_MAX); + if (r == -EEXIST) { + log_error("Credential 'journal.forward_to_socket' already set via --set-credential=, refusing --forward-journal=."); + goto finish; + } + if (r < 0) { + log_error_errno(r, "Failed to add 'journal.forward_to_socket' credential: %m"); + goto finish; + } + } + for (;;) { r = run_container( rootdir, diff --git a/src/shared/fork-notify.c b/src/shared/fork-notify.c index bcedf729605de..a8038524a5fbe 100644 --- a/src/shared/fork-notify.c +++ b/src/shared/fork-notify.c @@ -3,14 +3,19 @@ #include #include +#include "alloc-util.h" #include "build-path.h" +#include "chase.h" +#include "chattr-util.h" #include "escape.h" #include "event-util.h" #include "exit-status.h" +#include "fd-util.h" #include "fork-notify.h" #include "log.h" #include "notify-recv.h" #include "parse-util.h" +#include "path-util.h" #include "pidref.h" #include "process-util.h" #include "runtime-scope.h" @@ -234,3 +239,73 @@ int journal_fork(RuntimeScope scope, char * const* units, PidRef *ret_pidref) { return fork_notify(argv, ret_pidref); } + +int fork_journal_remote( + const char *listen_address, + const char *output, + const char *config_file, + PidRef *ret_pidref) { + + int r; + + assert(listen_address); + assert(output); + assert(ret_pidref); + + ChaseFlags chase_flags = CHASE_MKDIR_0755|CHASE_MUST_BE_DIRECTORY; + if (endswith(output, ".journal")) + chase_flags |= CHASE_PARENT; + + _cleanup_close_ int fd = -EBADF; + r = chase(output, /* root= */ NULL, chase_flags, /* ret_path= */ NULL, &fd); + if (r < 0) + return log_error_errno(r, "Failed to create journal directory for '%s': %m", output); + + r = chattr_fd(fd, FS_NOCOW_FL, FS_NOCOW_FL); + if (r < 0) + log_debug_errno(r, "Failed to set NOCOW flag on journal directory for '%s', ignoring: %m", output); + + _cleanup_free_ char *sd_socket_activate = NULL; + r = find_executable("systemd-socket-activate", &sd_socket_activate); + if (r < 0) + return log_error_errno(r, "Failed to find systemd-socket-activate binary: %m"); + + _cleanup_free_ char *sd_journal_remote = NULL; + r = find_executable_full( + "systemd-journal-remote", + /* root= */ NULL, + STRV_MAKE(LIBEXECDIR), + /* use_path_envvar= */ true, + &sd_journal_remote, + /* ret_fd= */ NULL); + if (r < 0) + return log_error_errno(r, "Failed to find systemd-journal-remote binary: %m"); + + _cleanup_strv_free_ char **argv = strv_new( + sd_socket_activate, + "--listen", listen_address, + sd_journal_remote, + "--output", output, + "--split-mode", endswith(output, ".journal") ? "none" : "host"); + if (!argv) + return log_oom(); + + r = fork_notify(/* argv= */ NULL, ret_pidref); + if (r < 0) + return r; + if (r == 0) { + /* In the child */ + if (setenv("SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE", + config_file ?: "/dev/null", + /* overwrite= */ true) < 0) { + log_debug_errno(errno, "Failed to set $SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE: %m"); + _exit(EXIT_MEMORY); + } + + r = invoke_callout_binary(argv[0], argv); + log_error_errno(r, "Failed to invoke %s: %m", argv[0]); + _exit(EXIT_EXEC); + } + + return 0; +} diff --git a/src/shared/fork-notify.h b/src/shared/fork-notify.h index 103ab78983371..95b6aaad43e12 100644 --- a/src/shared/fork-notify.h +++ b/src/shared/fork-notify.h @@ -10,3 +10,9 @@ void fork_notify_terminate(PidRef *pidref); void fork_notify_terminate_many(sd_event_source **array, size_t n); int journal_fork(RuntimeScope scope, char * const *units, PidRef *ret_pidref); + +int fork_journal_remote( + const char *listen_address, + const char *output, + const char *config_file, + PidRef *ret_pidref); diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index a1597d7a77517..c910cb611c01b 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -20,15 +20,12 @@ #include "architecture.h" #include "bootspec.h" #include "build-path.h" -#include "exit-status.h" #include "build.h" #include "bus-error.h" #include "bus-internal.h" #include "bus-locator.h" #include "bus-util.h" #include "capability-util.h" -#include "chase.h" -#include "chattr-util.h" #include "common-signal.h" #include "copy.h" #include "discover-image.h" @@ -1598,70 +1595,6 @@ static int start_tpm( return 0; } -static int start_systemd_journal_remote( - const char *scope, - unsigned port, - const char *sd_socket_activate, - char **ret_listen_address, - PidRef *ret_pidref) { - - int r; - - assert(scope); - - _cleanup_free_ char *scope_prefix = NULL; - r = unit_name_to_prefix(scope, &scope_prefix); - if (r < 0) - return log_error_errno(r, "Failed to strip .scope suffix from scope: %m"); - - _cleanup_free_ char *listen_address = NULL; - if (asprintf(&listen_address, "vsock:2:%u", port) < 0) - return log_oom(); - - _cleanup_free_ char *sd_journal_remote = NULL; - r = find_executable_full( - "systemd-journal-remote", - /* root= */ NULL, - STRV_MAKE(LIBEXECDIR), - /* use_path_envvar= */ true, /* systemd-journal-remote should be installed in - * LIBEXECDIR, but for supporting fancy setups. */ - &sd_journal_remote, - /* ret_fd= */ NULL); - if (r < 0) - return log_error_errno(r, "Failed to find systemd-journal-remote binary: %m"); - - _cleanup_strv_free_ char **argv = strv_new( - sd_socket_activate, - "--listen", listen_address, - sd_journal_remote, - "--output", arg_forward_journal, - "--split-mode", endswith(arg_forward_journal, ".journal") ? "none" : "host"); - if (!argv) - return log_oom(); - - r = fork_notify(/* argv= */ NULL, ret_pidref); - if (r < 0) - return r; - if (r == 0) { - /* In the child */ - if (setenv("SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE", - arg_forward_journal_config ?: "/dev/null", - /* overwrite= */ true) < 0) { - log_debug_errno(errno, "Failed to set $SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE: %m"); - _exit(EXIT_MEMORY); - } - - r = invoke_callout_binary(argv[0], argv); - log_error_errno(r, "Failed to invoke %s: %m", argv[0]); - _exit(EXIT_EXEC); - } - - if (ret_listen_address) - *ret_listen_address = TAKE_PTR(listen_address); - - return 0; -} - static int discover_root(char **ret) { int r; _cleanup_(dissected_image_unrefp) DissectedImage *image = NULL; @@ -2336,30 +2269,11 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { /* Create runtime directory for the QEMU config file and other state */ _cleanup_free_ char *runtime_dir = NULL; _cleanup_(rm_rf_physical_and_freep) char *runtime_dir_destroy = NULL; - { - _cleanup_free_ char *subdir = NULL; - - if (asprintf(&subdir, "systemd/vmspawn.%" PRIx64, random_u64()) < 0) - return log_oom(); - - r = runtime_directory(arg_runtime_scope, subdir, &runtime_dir); - if (r < 0) - return log_error_errno(r, "Failed to lookup runtime directory: %m"); - if (r > 0) { /* We need to create our own runtime dir */ - r = mkdir_p(runtime_dir, 0755); - if (r < 0) - return log_error_errno(r, "Failed to create runtime directory '%s': %m", runtime_dir); - - /* We created this, hence also destroy it */ - runtime_dir_destroy = TAKE_PTR(runtime_dir); - - runtime_dir = strdup(runtime_dir_destroy); - if (!runtime_dir) - return log_oom(); - } + r = runtime_directory_make(arg_runtime_scope, "vmspawn", &runtime_dir, &runtime_dir_destroy); + if (r < 0) + return log_error_errno(r, "Failed to create runtime directory: %m"); - log_debug("Using runtime directory: %s", runtime_dir); - } + log_debug("Using runtime directory: %s", runtime_dir); /* Build a QEMU config file for -readconfig. Items that can be expressed as QemuOpts sections go * here; things that require cmdline-only switches (e.g. -kernel, -smbios, -nographic, --add-fd) @@ -3340,25 +3254,14 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (arg_forward_journal) { _cleanup_free_ char *listen_address = NULL; - - ChaseFlags chase_flags = CHASE_MKDIR_0755|CHASE_MUST_BE_DIRECTORY; - if (endswith(arg_forward_journal, ".journal")) - chase_flags |= CHASE_PARENT; - - _cleanup_close_ int journal_fd = -EBADF; - r = chase(arg_forward_journal, /* root= */ NULL, chase_flags, /* ret_path= */ NULL, &journal_fd); - if (r < 0) - return log_error_errno(r, "Failed to create journal directory for '%s': %m", arg_forward_journal); - - r = chattr_fd(journal_fd, FS_NOCOW_FL, FS_NOCOW_FL); - if (r < 0) - log_debug_errno(r, "Failed to set NOCOW flag on journal directory for '%s', ignoring: %m", arg_forward_journal); + if (asprintf(&listen_address, "vsock:2:%u", child_cid) < 0) + return log_oom(); if (!GREEDY_REALLOC(children, n_children + 1)) return log_oom(); _cleanup_(fork_notify_terminate) PidRef child = PIDREF_NULL; - r = start_systemd_journal_remote(unit, child_cid, sd_socket_activate, &listen_address, &child); + r = fork_journal_remote(listen_address, arg_forward_journal, arg_forward_journal_config, &child); if (r < 0) return r; From ae96bb91b680c55aac142786658983828601704e Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Sun, 29 Mar 2026 18:22:40 +0000 Subject: [PATCH 13/14] shared: move machine registration to shared machine-register.{c,h} Move register_machine() and unregister_machine() from vmspawn-register.{c,h} into shared machine-register.{c,h} so both nspawn and vmspawn can use the same implementation. The unified register_machine() uses varlink first (for richer features like SSH support and unit allocation) with a D-Bus RegisterMachineWithNetwork fallback for older machined. The interface adds a class parameter ("vm" or "container") and local_ifindex for nspawn's network interface support. The unified unregister_machine() similarly tries varlink first (io.systemd.Machine.Unregister) before falling back to D-Bus, with debug logging when varlink operations are unavailable. Remove register_machine() and unregister_machine() from nspawn-register.c (keeping allocate_scope/terminate_scope which remain nspawn-specific). Co-developed-by: Claude Opus 4.6 --- src/nspawn/nspawn-register.c | 144 ----------- src/nspawn/nspawn-register.h | 10 - src/nspawn/nspawn.c | 25 +- src/shared/machine-register.c | 243 ++++++++++++++++++ .../machine-register.h} | 4 +- src/shared/meson.build | 1 + src/vmspawn/meson.build | 1 - src/vmspawn/vmspawn-register.c | 104 -------- src/vmspawn/vmspawn.c | 10 +- 9 files changed, 273 insertions(+), 269 deletions(-) create mode 100644 src/shared/machine-register.c rename src/{vmspawn/vmspawn-register.h => shared/machine-register.h} (76%) delete mode 100644 src/vmspawn/vmspawn-register.c diff --git a/src/nspawn/nspawn-register.c b/src/nspawn/nspawn-register.c index 04031adcc5ab5..ace0f6637a545 100644 --- a/src/nspawn/nspawn-register.c +++ b/src/nspawn/nspawn-register.c @@ -131,150 +131,6 @@ static int can_set_coredump_receive(sd_bus *bus) { return r >= 0; } -static int register_machine_ex( - sd_bus *bus, - const char *machine_name, - const PidRef *pid, - const char *directory, - sd_id128_t uuid, - int local_ifindex, - const char *service, - sd_bus_error *error) { - - _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; - int r; - - assert(bus); - assert(machine_name); - assert(service); - assert(error); - - r = bus_message_new_method_call(bus, &m, bus_machine_mgr, "RegisterMachineEx"); - if (r < 0) - return bus_log_create_error(r); - - r = sd_bus_message_append(m, "s", machine_name); - if (r < 0) - return bus_log_create_error(r); - - r = sd_bus_message_open_container(m, 'a', "(sv)"); - if (r < 0) - return bus_log_create_error(r); - - r = sd_bus_message_append( - m, - "(sv)(sv)(sv)", - "Id", "ay", SD_BUS_MESSAGE_APPEND_ID128(uuid), - "Service", "s", service, - "Class", "s", "container"); - if (r < 0) - return bus_log_create_error(r); - - if (pidref_is_set(pid)) { - if (pid->fd >= 0) { - r = sd_bus_message_append(m, "(sv)", "LeaderPIDFD", "h", pid->fd); - if (r < 0) - return bus_log_create_error(r); - } - - if (pid->fd_id > 0) { - r = sd_bus_message_append(m, "(sv)", "LeaderPIDFDID", "t", pid->fd_id); - if (r < 0) - return bus_log_create_error(r); - - r = sd_bus_message_append(m, "(sv)", "LeaderPID", "u", pid->pid); - if (r < 0) - return bus_log_create_error(r); - } - } - - if (!isempty(directory)) { - r = sd_bus_message_append(m, "(sv)", "RootDirectory", "s", directory); - if (r < 0) - return bus_log_create_error(r); - } - - if (local_ifindex > 0) { - r = sd_bus_message_append(m, "(sv)", "NetworkInterfaces", "ai", 1, local_ifindex); - if (r < 0) - return bus_log_create_error(r); - } - - r = sd_bus_message_close_container(m); - if (r < 0) - return bus_log_create_error(r); - - return sd_bus_call(bus, m, 0, error, NULL); -} - -int register_machine( - sd_bus *bus, - const char *machine_name, - const PidRef *pid, - const char *directory, - sd_id128_t uuid, - int local_ifindex, - const char *service) { - - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - int r; - - assert(bus); - assert(machine_name); - assert(service); - - r = register_machine_ex( - bus, - machine_name, - pid, - directory, - uuid, - local_ifindex, - service, - &error); - if (r >= 0) - return 0; - if (!sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD)) - return log_error_errno(r, "Failed to register machine: %s", bus_error_message(&error, r)); - - sd_bus_error_free(&error); - - r = bus_call_method( - bus, - bus_machine_mgr, - "RegisterMachineWithNetwork", - &error, - NULL, - "sayssusai", - machine_name, - SD_BUS_MESSAGE_APPEND_ID128(uuid), - service, - "container", - pidref_is_set(pid) ? (uint32_t) pid->pid : 0, - strempty(directory), - local_ifindex > 0 ? 1 : 0, local_ifindex); - if (r < 0) - return log_error_errno(r, "Failed to register machine: %s", bus_error_message(&error, r)); - - return 0; -} - -int unregister_machine( - sd_bus *bus, - const char *machine_name) { - - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - int r; - - assert(bus); - - r = bus_call_method(bus, bus_machine_mgr, "UnregisterMachine", &error, NULL, "s", machine_name); - if (r < 0) - log_debug("Failed to unregister machine: %s", bus_error_message(&error, r)); - - return 0; -} - int allocate_scope( sd_bus *bus, const char *machine_name, diff --git a/src/nspawn/nspawn-register.h b/src/nspawn/nspawn-register.h index c4b8048606251..d82c780181c6d 100644 --- a/src/nspawn/nspawn-register.h +++ b/src/nspawn/nspawn-register.h @@ -4,16 +4,6 @@ #include "shared-forward.h" #include "nspawn-settings.h" -int register_machine( - sd_bus *bus, - const char *machine_name, - const PidRef *pid, - const char *directory, - sd_id128_t uuid, - int local_ifindex, - const char *service); -int unregister_machine(sd_bus *bus, const char *machine_name); - typedef enum AllocateScopeFlags { ALLOCATE_SCOPE_ALLOW_PIDFD = 1 << 0, } AllocateScopeFlags; diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 8060c7cf09293..42eb924a06b45 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -67,6 +67,7 @@ #include "loopback-setup.h" #include "machine-bind-user.h" #include "machine-credential.h" +#include "machine-register.h" #include "main-func.h" #include "mkdir.h" #include "mount-util.h" @@ -5728,11 +5729,17 @@ static int run_container( r = register_machine( system_bus, arg_machine, + arg_uuid, + arg_container_service_name, + "container", pid, arg_directory, - arg_uuid, + /* cid= */ 0, ifi, - arg_container_service_name); + /* address= */ NULL, + /* key_path= */ NULL, + /* allocate_unit= */ false, + RUNTIME_SCOPE_SYSTEM); if (r < 0) { if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM) /* if system scope the request to register definitely failed */ return r; @@ -5745,11 +5752,17 @@ static int run_container( r = register_machine( runtime_bus, arg_machine, + arg_uuid, + arg_container_service_name, + "container", pid, arg_directory, - arg_uuid, + /* cid= */ 0, ifi, - arg_container_service_name); + /* address= */ NULL, + /* key_path= */ NULL, + /* allocate_unit= */ false, + RUNTIME_SCOPE_USER); if (r < 0) { if (!registered_system) /* neither registration worked: fail */ return r; @@ -5970,9 +5983,9 @@ static int run_container( /* Tell machined that we are gone. */ if (registered_system) - (void) unregister_machine(system_bus, arg_machine); + (void) unregister_machine(system_bus, arg_machine, RUNTIME_SCOPE_SYSTEM); if (registered_runtime) - (void) unregister_machine(runtime_bus, arg_machine); + (void) unregister_machine(runtime_bus, arg_machine, RUNTIME_SCOPE_USER); if (r < 0) /* We failed to wait for the container, or the container exited abnormally. */ diff --git a/src/shared/machine-register.c b/src/shared/machine-register.c new file mode 100644 index 0000000000000..b3b1055a9f2b3 --- /dev/null +++ b/src/shared/machine-register.c @@ -0,0 +1,243 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "sd-bus.h" +#include "sd-id128.h" +#include "sd-json.h" +#include "sd-varlink.h" + +#include "bus-error.h" +#include "bus-locator.h" +#include "bus-util.h" +#include "errno-util.h" +#include "json-util.h" +#include "log.h" +#include "machine-register.h" +#include "path-lookup.h" +#include "pidref.h" +#include "socket-util.h" +#include "string-util.h" +#include "terminal-util.h" +#include "varlink-util.h" + +static int register_machine_dbus_ex( + sd_bus *bus, + const char *machine_name, + sd_id128_t uuid, + const char *service, + const char *class, + const PidRef *pidref, + const char *directory, + int local_ifindex, + sd_bus_error *error) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + int r; + + assert(bus); + assert(machine_name); + assert(service); + assert(class); + + r = bus_message_new_method_call(bus, &m, bus_machine_mgr, "RegisterMachineEx"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "s", machine_name); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(sv)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append( + m, + "(sv)(sv)(sv)", + "Id", "ay", SD_BUS_MESSAGE_APPEND_ID128(uuid), + "Service", "s", service, + "Class", "s", class); + if (r < 0) + return bus_log_create_error(r); + + if (pidref_is_set(pidref)) { + if (pidref->fd >= 0) { + r = sd_bus_message_append(m, "(sv)", "LeaderPIDFD", "h", pidref->fd); + if (r < 0) + return bus_log_create_error(r); + } + + if (pidref->fd_id > 0) { + r = sd_bus_message_append(m, "(sv)", "LeaderPIDFDID", "t", pidref->fd_id); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "(sv)", "LeaderPID", "u", pidref->pid); + if (r < 0) + return bus_log_create_error(r); + } + } + + if (!isempty(directory)) { + r = sd_bus_message_append(m, "(sv)", "RootDirectory", "s", directory); + if (r < 0) + return bus_log_create_error(r); + } + + if (local_ifindex > 0) { + r = sd_bus_message_append(m, "(sv)", "NetworkInterfaces", "ai", 1, local_ifindex); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return sd_bus_call(bus, m, 0, error, NULL); +} + +static int register_machine_dbus( + sd_bus *bus, + const char *machine_name, + sd_id128_t uuid, + const char *service, + const char *class, + const PidRef *pidref, + const char *directory, + int local_ifindex) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + int r; + + assert(bus); + assert(machine_name); + assert(service); + assert(class); + + /* First try RegisterMachineEx which supports PIDFD-based leader tracking. */ + r = register_machine_dbus_ex(bus, machine_name, uuid, service, class, pidref, directory, local_ifindex, &error); + if (r >= 0) + return 0; + if (!sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD)) + return log_error_errno(r, "Failed to register machine: %s", bus_error_message(&error, r)); + + sd_bus_error_free(&error); + + r = bus_call_method( + bus, + bus_machine_mgr, + "RegisterMachineWithNetwork", + &error, + NULL, + "sayssusai", + machine_name, + SD_BUS_MESSAGE_APPEND_ID128(uuid), + service, + class, + pidref_is_set(pidref) ? (uint32_t) pidref->pid : 0, + strempty(directory), + local_ifindex > 0 ? 1 : 0, local_ifindex); + if (r < 0) + return log_error_errno(r, "Failed to register machine: %s", bus_error_message(&error, r)); + + return 0; +} + +int register_machine( + sd_bus *bus, + const char *machine_name, + sd_id128_t uuid, + const char *service, + const char *class, + const PidRef *pidref, + const char *directory, + unsigned cid, + int local_ifindex, + const char *address, + const char *key_path, + bool allocate_unit, + RuntimeScope scope) { + + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + int r; + + assert(machine_name); + assert(service); + assert(class); + + /* First try to use varlink, as it provides more features (such as SSH support). */ + _cleanup_free_ char *p = NULL; + r = runtime_directory_generic(scope, "systemd/machine/io.systemd.Machine", &p); + if (r >= 0) + r = sd_varlink_connect_address(&vl, p); + if (r == -ENOENT || ERRNO_IS_DISCONNECT(r)) { + log_debug_errno(r, "Failed to connect to machined via varlink%s%s, falling back to D-Bus: %m", + p ? " on " : "", strempty(p)); + + /* In case we are running with an older machined, fall back to D-Bus. */ + if (!bus) + return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Varlink connection to machined not available and no bus provided."); + + return register_machine_dbus(bus, machine_name, uuid, service, class, pidref, directory, local_ifindex); + } + if (r < 0) + return log_error_errno(r, "Failed to connect to machined on %s: %m", strna(p)); + + return varlink_callbo_and_log( + vl, + "io.systemd.Machine.Register", + /* ret_reply= */ NULL, + SD_JSON_BUILD_PAIR_STRING("name", machine_name), + SD_JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(uuid), "id", SD_JSON_BUILD_ID128(uuid)), + SD_JSON_BUILD_PAIR_STRING("service", service), + SD_JSON_BUILD_PAIR_STRING("class", class), + SD_JSON_BUILD_PAIR_CONDITION(VSOCK_CID_IS_REGULAR(cid), "vSockCid", SD_JSON_BUILD_UNSIGNED(cid)), + SD_JSON_BUILD_PAIR_CONDITION(local_ifindex > 0, "networkInterfaces", SD_JSON_BUILD_ARRAY(SD_JSON_BUILD_INTEGER(local_ifindex))), + SD_JSON_BUILD_PAIR_CONDITION(!!directory, "rootDirectory", SD_JSON_BUILD_STRING(directory)), + SD_JSON_BUILD_PAIR_CONDITION(!!address, "sshAddress", SD_JSON_BUILD_STRING(address)), + SD_JSON_BUILD_PAIR_CONDITION(!!key_path, "sshPrivateKeyPath", SD_JSON_BUILD_STRING(key_path)), + SD_JSON_BUILD_PAIR_CONDITION(isatty_safe(STDIN_FILENO), "allowInteractiveAuthentication", SD_JSON_BUILD_BOOLEAN(true)), + SD_JSON_BUILD_PAIR_CONDITION(allocate_unit, "allocateUnit", SD_JSON_BUILD_BOOLEAN(true)), + SD_JSON_BUILD_PAIR_CONDITION(pidref_is_set(pidref), "leaderProcessId", JSON_BUILD_PIDREF(pidref))); +} + +int unregister_machine(sd_bus *bus, const char *machine_name, RuntimeScope scope) { + int r; + + assert(machine_name); + + /* First try varlink */ + _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; + _cleanup_free_ char *p = NULL; + r = runtime_directory_generic(scope, "systemd/machine/io.systemd.Machine", &p); + if (r < 0) + log_debug_errno(r, "Failed to determine runtime directory for varlink, falling back to D-Bus: %m"); + else { + r = sd_varlink_connect_address(&vl, p); + if (r < 0) + log_debug_errno(r, "Failed to connect to machined via varlink on %s, falling back to D-Bus: %m", p); + else { + r = varlink_callbo_and_log( + vl, + "io.systemd.Machine.Unregister", + /* ret_reply= */ NULL, + SD_JSON_BUILD_PAIR_STRING("name", machine_name)); + if (r >= 0) + return 0; + + log_debug_errno(r, "Failed to unregister machine via varlink, falling back to D-Bus: %m"); + } + } + + /* Fall back to D-Bus */ + if (bus) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + r = bus_call_method(bus, bus_machine_mgr, "UnregisterMachine", &error, NULL, "s", machine_name); + if (r < 0) + log_debug("Failed to unregister machine: %s", bus_error_message(&error, r)); + } + + return 0; +} diff --git a/src/vmspawn/vmspawn-register.h b/src/shared/machine-register.h similarity index 76% rename from src/vmspawn/vmspawn-register.h rename to src/shared/machine-register.h index de118b7492fa2..df4f53b6be68c 100644 --- a/src/vmspawn/vmspawn-register.h +++ b/src/shared/machine-register.h @@ -8,12 +8,14 @@ int register_machine( const char *machine_name, sd_id128_t uuid, const char *service, + const char *class, const PidRef *pidref, const char *directory, unsigned cid, + int local_ifindex, const char *address, const char *key_path, bool allocate_unit, RuntimeScope scope); -int unregister_machine(sd_bus *bus, const char *machine_name); +int unregister_machine(sd_bus *bus, const char *machine_name, RuntimeScope scope); diff --git a/src/shared/meson.build b/src/shared/meson.build index cdbe763d0137d..3bd167bc3ade4 100644 --- a/src/shared/meson.build +++ b/src/shared/meson.build @@ -123,6 +123,7 @@ shared_sources = files( 'lsm-util.c', 'machine-bind-user.c', 'machine-credential.c', + 'machine-register.c', 'machine-id-setup.c', 'macvlan-util.c', 'main-func.c', diff --git a/src/vmspawn/meson.build b/src/vmspawn/meson.build index 722e6a52cc7f2..99bad2d618973 100644 --- a/src/vmspawn/meson.build +++ b/src/vmspawn/meson.build @@ -10,7 +10,6 @@ vmspawn_sources = files( 'vmspawn-settings.c', 'vmspawn-scope.c', 'vmspawn-mount.c', - 'vmspawn-register.c', ) vmspawn_extract_sources = files( 'vmspawn-util.c', diff --git a/src/vmspawn/vmspawn-register.c b/src/vmspawn/vmspawn-register.c deleted file mode 100644 index 46f292ce49525..0000000000000 --- a/src/vmspawn/vmspawn-register.c +++ /dev/null @@ -1,104 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1-or-later */ - -#include - -#include "sd-bus.h" -#include "sd-id128.h" -#include "sd-json.h" -#include "sd-varlink.h" - -#include "bus-error.h" -#include "bus-locator.h" -#include "errno-util.h" -#include "json-util.h" -#include "log.h" -#include "path-lookup.h" -#include "pidref.h" -#include "socket-util.h" -#include "string-util.h" -#include "terminal-util.h" -#include "varlink-util.h" -#include "vmspawn-register.h" - -int register_machine( - sd_bus *bus, - const char *machine_name, - sd_id128_t uuid, - const char *service, - const PidRef *pidref, - const char *directory, - unsigned cid, - const char *address, - const char *key_path, - bool allocate_unit, - RuntimeScope scope) { - - _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL; - int r; - - assert(machine_name); - assert(service); - - /* First try to use varlink, as it provides more features (such as SSH support). */ - _cleanup_free_ char *p = NULL; - r = runtime_directory_generic(scope, "systemd/machine/io.systemd.Machine", &p); - if (r < 0) - return r; - - r = sd_varlink_connect_address(&vl, p); - if (r == -ENOENT || ERRNO_IS_DISCONNECT(r)) { - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - - assert(bus); - - /* In case we are running with an older machined, fallback to the existing D-Bus method. */ - r = bus_call_method( - bus, - bus_machine_mgr, - "RegisterMachine", - &error, - NULL, - "sayssus", - machine_name, - SD_BUS_MESSAGE_APPEND_ID128(uuid), - service, - "vm", - (uint32_t) (pidref_is_set(pidref) ? pidref->pid : 0), - strempty(directory)); - if (r < 0) - return log_error_errno(r, "Failed to register machine: %s", bus_error_message(&error, r)); - - return 0; - } - if (r < 0) - return log_error_errno(r, "Failed to connect to machined on %p: %m", p); - - return varlink_callbo_and_log( - vl, - "io.systemd.Machine.Register", - /* ret_reply= */ NULL, - SD_JSON_BUILD_PAIR_STRING("name", machine_name), - SD_JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(uuid), "id", SD_JSON_BUILD_ID128(uuid)), - SD_JSON_BUILD_PAIR_STRING("service", service), - SD_JSON_BUILD_PAIR_STRING("class", "vm"), - SD_JSON_BUILD_PAIR_CONDITION(VSOCK_CID_IS_REGULAR(cid), "vSockCid", SD_JSON_BUILD_UNSIGNED(cid)), - SD_JSON_BUILD_PAIR_CONDITION(!!directory, "rootDirectory", SD_JSON_BUILD_STRING(directory)), - SD_JSON_BUILD_PAIR_CONDITION(!!address, "sshAddress", SD_JSON_BUILD_STRING(address)), - SD_JSON_BUILD_PAIR_CONDITION(!!key_path, "sshPrivateKeyPath", SD_JSON_BUILD_STRING(key_path)), - SD_JSON_BUILD_PAIR_CONDITION(isatty_safe(STDIN_FILENO), "allowInteractiveAuthentication", SD_JSON_BUILD_BOOLEAN(true)), - SD_JSON_BUILD_PAIR_CONDITION(allocate_unit, "allocateUnit", SD_JSON_BUILD_BOOLEAN(true)), - SD_JSON_BUILD_PAIR_CONDITION(pidref_is_set(pidref), "leaderProcessId", JSON_BUILD_PIDREF(pidref))); -} - -int unregister_machine(sd_bus *bus, const char *machine_name) { - _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - int r; - - assert(bus); - - r = bus_call_method(bus, bus_machine_mgr, "UnregisterMachine", &error, NULL, "s", machine_name); - if (r < 0) - log_debug("Failed to unregister machine: %s", bus_error_message(&error, r)); - - return 0; -} diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index c910cb611c01b..8a9e6b5ce129a 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -48,6 +48,7 @@ #include "log.h" #include "machine-bind-user.h" #include "machine-credential.h" +#include "machine-register.h" #include "main-func.h" #include "mkdir.h" #include "namespace-util.h" @@ -86,7 +87,6 @@ #include "utf8.h" #include "vmspawn-mount.h" #include "vmspawn-qemu-config.h" -#include "vmspawn-register.h" #include "vmspawn-scope.h" #include "vmspawn-settings.h" #include "vmspawn-util.h" @@ -3538,9 +3538,11 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { arg_machine, arg_uuid, "systemd-vmspawn", + "vm", &child_pidref, arg_directory, child_cid, + /* local_ifindex= */ 0, child_cid != VMADDR_CID_ANY ? vm_address : NULL, ssh_private_key_path, !arg_keep_unit && arg_runtime_scope == RUNTIME_SCOPE_SYSTEM, @@ -3560,9 +3562,11 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { arg_machine, arg_uuid, "systemd-vmspawn", + "vm", &child_pidref, arg_directory, child_cid, + /* local_ifindex= */ 0, child_cid != VMADDR_CID_ANY ? vm_address : NULL, ssh_private_key_path, !arg_keep_unit, @@ -3678,9 +3682,9 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { terminate_scope(runtime_bus, arg_machine); if (registered_system) - (void) unregister_machine(system_bus, arg_machine); + (void) unregister_machine(system_bus, arg_machine, RUNTIME_SCOPE_SYSTEM); if (registered_runtime) - (void) unregister_machine(runtime_bus, arg_machine); + (void) unregister_machine(runtime_bus, arg_machine, RUNTIME_SCOPE_USER); if (use_vsock) { if (exit_status == INT_MAX) { From d33041012d6a9836fb0112b3c175b8f738b463aa Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Sun, 29 Mar 2026 20:22:24 +0000 Subject: [PATCH 14/14] vmspawn: add scsi-cd disk type for ISO/CD-ROM image support Add DISK_TYPE_SCSI_CD to support attaching disk images as CD-ROM drives, needed for testing El Torito ISO images built by systemd-repart. When --image-disk-type=scsi-cd is specified, the image is attached with media=cdrom and readonly=on on the drive, using scsi-cd as the device driver on the SCSI bus. This also works for --extra-drive= with the scsi-cd: prefix. The QEMU configuration matches the standard OVMF CD-ROM boot setup: -drive if=none,media=cdrom,format=raw,readonly=on -device virtio-scsi-pci -device scsi-cd When direct kernel booting with scsi-cd, if the kernel command line contains "rw", append "ro" to override it since CD-ROMs are read-only. Co-developed-by: Claude Opus 4.6 --- man/systemd-vmspawn.xml | 5 ++- src/vmspawn/vmspawn-settings.c | 1 + src/vmspawn/vmspawn-settings.h | 1 + src/vmspawn/vmspawn.c | 76 ++++++++++++++++++++++++++-------- 4 files changed, 64 insertions(+), 19 deletions(-) diff --git a/man/systemd-vmspawn.xml b/man/systemd-vmspawn.xml index ecd8de888e941..633cf92040dbd 100644 --- a/man/systemd-vmspawn.xml +++ b/man/systemd-vmspawn.xml @@ -131,8 +131,9 @@ Specifies the disk type to use for the root disk passed to . Extra drives added via inherit this disk type unless overridden with an explicit disk type prefix. Takes one of virtio-blk, - virtio-scsi, or nvme. Defaults to - virtio-blk. + virtio-scsi, nvme, or scsi-cd. Defaults to + virtio-blk. When scsi-cd is specified, the disk is attached + as a read-only CD-ROM drive. diff --git a/src/vmspawn/vmspawn-settings.c b/src/vmspawn/vmspawn-settings.c index d19a65d55debf..57d673c39d82d 100644 --- a/src/vmspawn/vmspawn-settings.c +++ b/src/vmspawn/vmspawn-settings.c @@ -14,6 +14,7 @@ static const char *const disk_type_table[_DISK_TYPE_MAX] = { [DISK_TYPE_VIRTIO_BLK] = "virtio-blk", [DISK_TYPE_VIRTIO_SCSI] = "virtio-scsi", [DISK_TYPE_NVME] = "nvme", + [DISK_TYPE_SCSI_CD] = "scsi-cd", }; DEFINE_STRING_TABLE_LOOKUP(disk_type, DiskType); diff --git a/src/vmspawn/vmspawn-settings.h b/src/vmspawn/vmspawn-settings.h index cfcee6fbb61b6..495dceaae8d5c 100644 --- a/src/vmspawn/vmspawn-settings.h +++ b/src/vmspawn/vmspawn-settings.h @@ -14,6 +14,7 @@ typedef enum DiskType { DISK_TYPE_VIRTIO_BLK, DISK_TYPE_VIRTIO_SCSI, DISK_TYPE_NVME, + DISK_TYPE_SCSI_CD, _DISK_TYPE_MAX, _DISK_TYPE_INVALID = -EINVAL, } DiskType; diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 8a9e6b5ce129a..3c714ef6dad7c 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -223,7 +223,8 @@ static int help(void) { " -i --image=FILE|DEVICE Root file system disk image or device for the VM\n" " --image-format=FORMAT Specify disk image format (raw, qcow2; default: raw)\n" " --image-disk-type=TYPE\n" - " Specify disk type (virtio-blk, virtio-scsi, nvme; default: virtio-blk)\n" + " Specify disk type (virtio-blk, virtio-scsi, nvme,\n" + " scsi-cd; default: virtio-blk)\n" "\n%3$sHost Configuration:%4$s\n" " --cpus=CPUS Configure number of CPUs in guest\n" " --ram=BYTES[:MAXBYTES[:SLOTS]]\n" @@ -277,7 +278,8 @@ static int help(void) { " --extra-drive=[FORMAT:][DISKTYPE:]PATH\n" " Adds an additional disk to the VM\n" " FORMAT: raw, qcow2\n" - " DISKTYPE: virtio-blk, virtio-scsi, nvme\n" + " DISKTYPE: virtio-blk, virtio-scsi, nvme,\n" + " scsi-cd\n" " --bind-user=NAME Bind user from host to virtual machine\n" " --bind-user-shell=BOOL|PATH\n" " Configure the shell to use for --bind-user= users\n" @@ -2790,15 +2792,16 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { r = kernel_cmdline_maybe_append_root(); if (r < 0) return r; + } } bool need_scsi_controller = - arg_image_disk_type == DISK_TYPE_VIRTIO_SCSI && arg_image; + IN_SET(arg_image_disk_type, DISK_TYPE_VIRTIO_SCSI, DISK_TYPE_SCSI_CD) && arg_image; if (!need_scsi_controller) FOREACH_ARRAY(drive, arg_extra_drives.drives, arg_extra_drives.n_drives) { DiskType dt = drive->disk_type >= 0 ? drive->disk_type : arg_image_disk_type; - if (dt == DISK_TYPE_VIRTIO_SCSI) { + if (IN_SET(dt, DISK_TYPE_VIRTIO_SCSI, DISK_TYPE_SCSI_CD)) { need_scsi_controller = true; break; } @@ -2822,12 +2825,20 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { arg_image); } - r = qemu_config_section(config_file, "drive", "vmspawn", - "if", "none", - "file", arg_image, - "format", image_format_to_string(arg_image_format), - "discard", on_off(arg_discard_disk), - "snapshot", on_off(arg_ephemeral)); + if (arg_image_disk_type == DISK_TYPE_SCSI_CD) + r = qemu_config_section(config_file, "drive", "vmspawn", + "if", "none", + "file", arg_image, + "format", image_format_to_string(arg_image_format), + "media", "cdrom", + "readonly", "on"); + else + r = qemu_config_section(config_file, "drive", "vmspawn", + "if", "none", + "file", arg_image, + "format", image_format_to_string(arg_image_format), + "discard", on_off(arg_discard_disk), + "snapshot", on_off(arg_ephemeral)); if (r < 0) return r; @@ -2858,6 +2869,12 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return log_oom(); break; + case DISK_TYPE_SCSI_CD: + disk_driver = "scsi-cd"; + r = disk_serial(image_fn, 30, &serial); + if (r < 0) + return log_oom(); + break; default: assert_not_reached(); } @@ -2870,15 +2887,17 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return r; - if (arg_image_disk_type == DISK_TYPE_VIRTIO_SCSI) { + if (IN_SET(arg_image_disk_type, DISK_TYPE_VIRTIO_SCSI, DISK_TYPE_SCSI_CD)) { r = qemu_config_key(config_file, "bus", "vmspawn_scsi.0"); if (r < 0) return r; } - r = grow_image(arg_image, arg_grow_image); - if (r < 0) - return r; + if (arg_image_disk_type != DISK_TYPE_SCSI_CD) { + r = grow_image(arg_image, arg_grow_image); + if (r < 0) + return r; + } } _cleanup_(sd_event_unrefp) sd_event *event = NULL; @@ -2983,7 +3002,11 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { } else return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expected regular file or block device, not '%s'.", drive->path); - if (strv_extendf(&cmdline, "driver=%s,cache.direct=off,cache.no-flush=on,file.driver=%s,file.filename=%s,node-name=vmspawn_extra_%zu", image_format_to_string(drive->format), driver, escaped_drive, i) < 0) + DiskType dt = drive->disk_type >= 0 ? drive->disk_type : arg_image_disk_type; + + if (strv_extendf(&cmdline, "driver=%s,cache.direct=off,cache.no-flush=on,file.driver=%s,file.filename=%s,node-name=vmspawn_extra_%zu%s", + image_format_to_string(drive->format), driver, escaped_drive, i, + dt == DISK_TYPE_SCSI_CD ? ",read-only=on" : "") < 0) return log_oom(); _cleanup_free_ char *drive_fn = NULL; @@ -2998,8 +3021,6 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (strv_extend(&cmdline, "-device") < 0) return log_oom(); - DiskType dt = drive->disk_type >= 0 ? drive->disk_type : arg_image_disk_type; - switch (dt) { case DISK_TYPE_VIRTIO_BLK: if (strv_extendf(&cmdline, "virtio-blk-pci,drive=vmspawn_extra_%zu,serial=%s", i++, escaped_drive_fn) < 0) @@ -3023,6 +3044,15 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { return log_oom(); break; } + case DISK_TYPE_SCSI_CD: { + _cleanup_free_ char *serial = NULL; + r = disk_serial(escaped_drive_fn, 30, &serial); + if (r < 0) + return log_oom(); + if (strv_extendf(&cmdline, "scsi-cd,bus=vmspawn_scsi.0,drive=vmspawn_extra_%zu,serial=%s", i++, serial) < 0) + return log_oom(); + break; + } default: assert_not_reached(); } @@ -3409,6 +3439,11 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { cred->id); } + /* CD-ROMs are read-only, so override any "rw" on the kernel command line. */ + if (arg_image_disk_type == DISK_TYPE_SCSI_CD && strv_contains(arg_kernel_cmdline_extra, "rw")) + if (strv_extend(&arg_kernel_cmdline_extra, "ro") < 0) + return log_oom(); + r = cmdline_add_kernel_cmdline(&cmdline, kernel, smbios_dir); if (r < 0) return r; @@ -3771,6 +3806,13 @@ static int verify_arguments(void) { if (!strv_isempty(arg_initrds) && !arg_linux) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --initrd= cannot be used without --linux=."); + if (arg_image_disk_type == DISK_TYPE_SCSI_CD) { + if (arg_ephemeral) + log_warning("--ephemeral has no effect with --image-disk-type=scsi-cd (CD-ROMs are read-only)."); + if (arg_discard_disk) + log_warning("--discard-disk has no effect with --image-disk-type=scsi-cd (CD-ROMs are read-only)."); + } + return 0; }