diff --git a/docs/ENVIRONMENT.md b/docs/ENVIRONMENT.md
index 5390754661879..a40ac4744db47 100644
--- a/docs/ENVIRONMENT.md
+++ b/docs/ENVIRONMENT.md
@@ -679,6 +679,11 @@ SYSTEMD_HOME_DEBUG_SUFFIX=foo \
string format. Overrides the default maximum allowed size for a file-descriptor
based input record to be stored in the journal.
+* `$SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE` – path to a configuration file for
+ `systemd-journal-remote`. When set, the specified file is used instead of the
+ default configuration file and drop-in directories. If set to `/dev/null`,
+ configuration file parsing is skipped entirely.
+
* `$SYSTEMD_CATALOG` – path to the compiled catalog database file to use for
`journalctl -x`, `journalctl --update-catalog`, `journalctl --list-catalog`
and related calls.
diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml
index d241ca5c52c5e..d9870451921bc 100644
--- a/man/systemd-nspawn.xml
+++ b/man/systemd-nspawn.xml
@@ -1565,6 +1565,33 @@ After=sys-subsystem-net-devices-ens1.device
+
+
+
+ Forward the container's journal to the host by starting
+ systemd-journal-remote8
+ listening on a Unix socket that is bind-mounted into the container. The container's
+ systemd-journald8
+ connects to the socket via the journal.forward_to_socket credential and streams
+ journal entries to the host in real-time. Takes a path to a journal file or directory where the received
+ entries will be stored. If the path ends in .journal, entries are written to a single
+ file; otherwise, entries are split per host into the specified directory.
+
+
+
+
+
+
+
+ Specifies a configuration file for the
+ systemd-journal-remote8
+ instance started by . When not specified, defaults to
+ /dev/null, causing systemd-journal-remote to ignore its default
+ configuration files.
+
+
+
+
@@ -1922,6 +1949,18 @@ After=sys-subsystem-net-devices-ens1.device
Other
+
+
+
+ Takes either system or user to specify whether
+ to interact with the user service manager or the system service manager and whether to register with
+ the user machined instance or the system machined instance. If unspecified, the system service manager
+ and machined instance will be used when running as root, otherwise the user service manager and machined
+ instance will be used.
+
+
+
+
diff --git a/man/systemd-vmspawn.xml b/man/systemd-vmspawn.xml
index 28070cfe8f66c..633cf92040dbd 100644
--- a/man/systemd-vmspawn.xml
+++ b/man/systemd-vmspawn.xml
@@ -70,13 +70,13 @@
-
-
+
- Specify whether to interact with the user manager or the system manager and whether
- to register with the user machined instance or the system machined instance. If
- unspecified, the system manager and machined instance will be used when running as root, otherwise
- the user manager and machined instance will be used.
+ Takes either system or user to specify whether
+ to interact with the user service manager or the system service manager and whether to register with
+ the user machined instance or the system machined instance. If unspecified, the system service manager
+ and machined instance will be used when running as root, otherwise the user service manager and machined
+ instance will be used.
@@ -131,8 +131,9 @@
Specifies the disk type to use for the root disk passed to .
Extra drives added via inherit this disk type unless overridden
with an explicit disk type prefix. Takes one of virtio-blk,
- virtio-scsi, or nvme. Defaults to
- virtio-blk.
+ virtio-scsi, nvme, or scsi-cd. Defaults to
+ virtio-blk. When scsi-cd is specified, the disk is attached
+ as a read-only CD-ROM drive.
@@ -166,10 +167,12 @@
-
+
- The amount of memory to start the virtual machine with.
- Defaults to 2G.
+ The amount of memory to start the virtual machine with. Defaults to 2G.
+ If a maximum size is specified after a colon, memory hotplug is enabled with the given
+ upper limit. The number of hotplug slots can optionally be specified after a second colon
+ and defaults to 1.
@@ -184,6 +187,15 @@
+
+
+
+ Controls whether to enable CXL (Compute Express Link) support in the virtual
+ machine. Only supported on x86 and ARM architectures.
+
+
+
+
@@ -661,6 +673,18 @@
+
+
+
+ Specifies a configuration file for
+ systemd-journal-remote8
+ to use when forwarding journal entries from the VM. If not specified,
+ /dev/null is used, which causes
+ systemd-journal-remote to ignore its default configuration files.
+
+
+
+
diff --git a/shell-completion/bash/systemd-vmspawn b/shell-completion/bash/systemd-vmspawn
index b035a42a6550e..26c86aadb22a5 100644
--- a/shell-completion/bash/systemd-vmspawn
+++ b/shell-completion/bash/systemd-vmspawn
@@ -30,8 +30,8 @@ _systemd_vmspawn() {
local -A OPTS=(
[STANDALONE]='-h --help --version -q --quiet --no-pager -n --network-tap --network-user-mode --user --system -x --ephemeral'
- [PATH]='-D --directory -i --image --linux --initrd --extra-drive --forward-journal --efi-nvram-template'
- [BOOL]='--kvm --vsock --tpm --discard-disk --register --pass-ssh-key'
+ [PATH]='-D --directory -i --image --linux --initrd --extra-drive --forward-journal --forward-journal-config --efi-nvram-template'
+ [BOOL]='--kvm --cxl --vsock --tpm --discard-disk --register --pass-ssh-key'
[SECURE_BOOT]='--secure-boot'
[FIRMWARE]='--firmware'
[FIRMWARE_FEATURES]='--firmware-features'
diff --git a/src/basic/escape.c b/src/basic/escape.c
index e1771bf432278..9af8efacc7423 100644
--- a/src/basic/escape.c
+++ b/src/basic/escape.c
@@ -447,10 +447,10 @@ char* escape_non_printable_full(const char *str, size_t console_width, XEscapeFl
FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS));
}
-char* octescape(const char *s, size_t len) {
+char* octescape_full(const char *s, size_t len, const char *bad) {
char *buf, *t;
- /* Escapes \ and " chars, in \nnn style escaping. */
+ /* Escapes all chars in bad, in addition to \ and " chars, in \nnn octal style escaping. */
assert(s || len == 0);
@@ -467,7 +467,7 @@ char* octescape(const char *s, size_t len) {
for (size_t i = 0; i < len; i++) {
uint8_t u = (uint8_t) s[i];
- if (u < ' ' || u >= 127 || IN_SET(u, '\\', '"')) {
+ if (u < ' ' || u >= 127 || IN_SET(u, '\\', '"') || (bad && strchr(bad, u))) {
*(t++) = '\\';
*(t++) = '0' + (u >> 6);
*(t++) = '0' + ((u >> 3) & 7);
diff --git a/src/basic/escape.h b/src/basic/escape.h
index a8b68fa75c277..625758f2f4c9f 100644
--- a/src/basic/escape.h
+++ b/src/basic/escape.h
@@ -59,7 +59,10 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape
static inline char* xescape(const char *s, const char *bad) {
return xescape_full(s, bad, SIZE_MAX, 0);
}
-char* octescape(const char *s, size_t len);
+char* octescape_full(const char *s, size_t len, const char *bad);
+static inline char* octescape(const char *s, size_t len) {
+ return octescape_full(s, len, NULL);
+}
char* decescape(const char *s, size_t len, const char *bad) _nonnull_if_nonzero_(1, 2);
char* escape_non_printable_full(const char *str, size_t console_width, XEscapeFlags flags);
diff --git a/src/journal-remote/journal-remote-main.c b/src/journal-remote/journal-remote-main.c
index 0ff44ede6fc1c..69d5665df2ded 100644
--- a/src/journal-remote/journal-remote-main.c
+++ b/src/journal-remote/journal-remote-main.c
@@ -22,6 +22,7 @@
#include "main-func.h"
#include "microhttpd-util.h"
#include "parse-argument.h"
+#include "path-util.h"
#include "parse-helpers.h"
#include "parse-util.h"
#include "pretty-print.h"
@@ -828,6 +829,22 @@ static int parse_config(void) {
{}
};
+ const char *config_file = getenv("SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE");
+ if (config_file) {
+ if (path_equal(config_file, "/dev/null"))
+ return 0;
+
+ return config_parse(
+ /* unit= */ NULL,
+ config_file,
+ /* f= */ NULL,
+ "Remote\0",
+ config_item_table_lookup, items,
+ CONFIG_PARSE_WARN,
+ /* userdata= */ NULL,
+ /* ret_stat= */ NULL);
+ }
+
return config_parse_standard_file_with_dropins(
"systemd/journal-remote.conf",
"Remote\0",
diff --git a/src/libsystemd/sd-path/path-lookup.c b/src/libsystemd/sd-path/path-lookup.c
index 32c14fb14a7d5..3de91cdc8426e 100644
--- a/src/libsystemd/sd-path/path-lookup.c
+++ b/src/libsystemd/sd-path/path-lookup.c
@@ -5,8 +5,10 @@
#include "alloc-util.h"
#include "fs-util.h"
#include "log.h"
+#include "mkdir.h"
#include "path-lookup.h"
#include "path-util.h"
+#include "random-util.h"
#include "stat-util.h"
#include "string-util.h"
#include "strv.h"
@@ -101,6 +103,40 @@ int runtime_directory(RuntimeScope scope, const char *fallback_suffix, char **re
return 1;
}
+int runtime_directory_make(RuntimeScope scope, const char *prefix, char **ret_dir, char **ret_dir_destroy) {
+ _cleanup_free_ char *subdir = NULL, *dir = NULL;
+ int r;
+
+ assert(prefix);
+ assert(ret_dir);
+
+ if (asprintf(&subdir, "systemd/%s.%" PRIx64, prefix, random_u64()) < 0)
+ return -ENOMEM;
+
+ r = runtime_directory(scope, subdir, &dir);
+ if (r < 0)
+ return r;
+
+ if (r > 0) {
+ r = mkdir_p(dir, 0755);
+ if (r < 0)
+ return r;
+
+ if (ret_dir_destroy) {
+ char *copy = strdup(dir);
+ if (!copy)
+ return -ENOMEM;
+ *ret_dir_destroy = copy;
+ }
+ } else {
+ if (ret_dir_destroy)
+ *ret_dir_destroy = NULL;
+ }
+
+ *ret_dir = TAKE_PTR(dir);
+ return 0;
+}
+
static const char* const user_data_unit_paths[] = {
"/usr/local/lib/systemd/user",
"/usr/local/share/systemd/user",
diff --git a/src/libsystemd/sd-path/path-lookup.h b/src/libsystemd/sd-path/path-lookup.h
index 67a4f5d69cf0f..32284df6423e9 100644
--- a/src/libsystemd/sd-path/path-lookup.h
+++ b/src/libsystemd/sd-path/path-lookup.h
@@ -60,6 +60,7 @@ void lookup_paths_done(LookupPaths *p);
int config_directory_generic(RuntimeScope scope, const char *suffix, char **ret);
int runtime_directory_generic(RuntimeScope scope, const char *suffix, char **ret);
int runtime_directory(RuntimeScope scope, const char *fallback_suffix, char **ret);
+int runtime_directory_make(RuntimeScope scope, const char *prefix, char **ret_dir, char **ret_dir_destroy);
/* We don't treat /etc/xdg/systemd/ in these functions as the xdg base dir spec suggests because we assume
* that is a link to /etc/systemd/ anyway. */
diff --git a/src/nspawn/nspawn-register.c b/src/nspawn/nspawn-register.c
index 04031adcc5ab5..ace0f6637a545 100644
--- a/src/nspawn/nspawn-register.c
+++ b/src/nspawn/nspawn-register.c
@@ -131,150 +131,6 @@ static int can_set_coredump_receive(sd_bus *bus) {
return r >= 0;
}
-static int register_machine_ex(
- sd_bus *bus,
- const char *machine_name,
- const PidRef *pid,
- const char *directory,
- sd_id128_t uuid,
- int local_ifindex,
- const char *service,
- sd_bus_error *error) {
-
- _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
- int r;
-
- assert(bus);
- assert(machine_name);
- assert(service);
- assert(error);
-
- r = bus_message_new_method_call(bus, &m, bus_machine_mgr, "RegisterMachineEx");
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_append(m, "s", machine_name);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_open_container(m, 'a', "(sv)");
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_append(
- m,
- "(sv)(sv)(sv)",
- "Id", "ay", SD_BUS_MESSAGE_APPEND_ID128(uuid),
- "Service", "s", service,
- "Class", "s", "container");
- if (r < 0)
- return bus_log_create_error(r);
-
- if (pidref_is_set(pid)) {
- if (pid->fd >= 0) {
- r = sd_bus_message_append(m, "(sv)", "LeaderPIDFD", "h", pid->fd);
- if (r < 0)
- return bus_log_create_error(r);
- }
-
- if (pid->fd_id > 0) {
- r = sd_bus_message_append(m, "(sv)", "LeaderPIDFDID", "t", pid->fd_id);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_append(m, "(sv)", "LeaderPID", "u", pid->pid);
- if (r < 0)
- return bus_log_create_error(r);
- }
- }
-
- if (!isempty(directory)) {
- r = sd_bus_message_append(m, "(sv)", "RootDirectory", "s", directory);
- if (r < 0)
- return bus_log_create_error(r);
- }
-
- if (local_ifindex > 0) {
- r = sd_bus_message_append(m, "(sv)", "NetworkInterfaces", "ai", 1, local_ifindex);
- if (r < 0)
- return bus_log_create_error(r);
- }
-
- r = sd_bus_message_close_container(m);
- if (r < 0)
- return bus_log_create_error(r);
-
- return sd_bus_call(bus, m, 0, error, NULL);
-}
-
-int register_machine(
- sd_bus *bus,
- const char *machine_name,
- const PidRef *pid,
- const char *directory,
- sd_id128_t uuid,
- int local_ifindex,
- const char *service) {
-
- _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
- int r;
-
- assert(bus);
- assert(machine_name);
- assert(service);
-
- r = register_machine_ex(
- bus,
- machine_name,
- pid,
- directory,
- uuid,
- local_ifindex,
- service,
- &error);
- if (r >= 0)
- return 0;
- if (!sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD))
- return log_error_errno(r, "Failed to register machine: %s", bus_error_message(&error, r));
-
- sd_bus_error_free(&error);
-
- r = bus_call_method(
- bus,
- bus_machine_mgr,
- "RegisterMachineWithNetwork",
- &error,
- NULL,
- "sayssusai",
- machine_name,
- SD_BUS_MESSAGE_APPEND_ID128(uuid),
- service,
- "container",
- pidref_is_set(pid) ? (uint32_t) pid->pid : 0,
- strempty(directory),
- local_ifindex > 0 ? 1 : 0, local_ifindex);
- if (r < 0)
- return log_error_errno(r, "Failed to register machine: %s", bus_error_message(&error, r));
-
- return 0;
-}
-
-int unregister_machine(
- sd_bus *bus,
- const char *machine_name) {
-
- _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
- int r;
-
- assert(bus);
-
- r = bus_call_method(bus, bus_machine_mgr, "UnregisterMachine", &error, NULL, "s", machine_name);
- if (r < 0)
- log_debug("Failed to unregister machine: %s", bus_error_message(&error, r));
-
- return 0;
-}
-
int allocate_scope(
sd_bus *bus,
const char *machine_name,
diff --git a/src/nspawn/nspawn-register.h b/src/nspawn/nspawn-register.h
index c4b8048606251..d82c780181c6d 100644
--- a/src/nspawn/nspawn-register.h
+++ b/src/nspawn/nspawn-register.h
@@ -4,16 +4,6 @@
#include "shared-forward.h"
#include "nspawn-settings.h"
-int register_machine(
- sd_bus *bus,
- const char *machine_name,
- const PidRef *pid,
- const char *directory,
- sd_id128_t uuid,
- int local_ifindex,
- const char *service);
-int unregister_machine(sd_bus *bus, const char *machine_name);
-
typedef enum AllocateScopeFlags {
ALLOCATE_SCOPE_ALLOW_PIDFD = 1 << 0,
} AllocateScopeFlags;
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 1740ab4d6eb18..42eb924a06b45 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -50,6 +50,7 @@
#include "fd-util.h"
#include "fdset.h"
#include "fileio.h"
+#include "fork-notify.h"
#include "format-util.h"
#include "fs-util.h"
#include "gpt.h"
@@ -66,6 +67,7 @@
#include "loopback-setup.h"
#include "machine-bind-user.h"
#include "machine-credential.h"
+#include "machine-register.h"
#include "main-func.h"
#include "mkdir.h"
#include "mount-util.h"
@@ -91,6 +93,7 @@
#include "osc-context.h"
#include "pager.h"
#include "parse-argument.h"
+#include "path-lookup.h"
#include "parse-util.h"
#include "path-util.h"
#include "pidref.h"
@@ -129,6 +132,7 @@
/* The notify socket inside the container it can use to talk to nspawn using the sd_notify(3) protocol */
#define NSPAWN_NOTIFY_SOCKET_PATH "/run/host/notify"
#define NSPAWN_MOUNT_TUNNEL "/run/host/incoming"
+#define NSPAWN_JOURNAL_SOCKET_PATH "/run/host/journal/socket"
#define EXIT_FORCE_RESTART 133
@@ -153,7 +157,7 @@ static char *arg_hostname = NULL; /* The name the payload sees by default */
static const char *arg_selinux_context = NULL;
static const char *arg_selinux_apifs_context = NULL;
static char *arg_slice = NULL;
-static bool arg_private_network; /* initialized depending on arg_privileged in run() */
+static bool arg_private_network; /* defaulted depending on arg_runtime_scope in verify_arguments() */
static bool arg_read_only = false;
static StartMode arg_start_mode = START_PID1;
static bool arg_ephemeral = false;
@@ -212,7 +216,9 @@ static VolatileMode arg_volatile_mode = VOLATILE_NO;
static ExposePort *arg_expose_ports = NULL;
static char **arg_property = NULL;
static sd_bus_message *arg_property_message = NULL;
-static UserNamespaceMode arg_userns_mode; /* initialized depending on arg_privileged in run() */
+static UserNamespaceMode arg_userns_mode; /* defaulted depending on arg_runtime_scope in verify_arguments(),
+ * -U sets to _USER_NAMESPACE_MODE_INVALID which is resolved there
+ * once arg_runtime_scope has its final value. */
static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
static unsigned arg_delegate_container_ranges = 0;
static UserNamespaceOwnership arg_userns_ownership = _USER_NAMESPACE_OWNERSHIP_INVALID;
@@ -253,9 +259,11 @@ static char *arg_settings_filename = NULL;
static Architecture arg_architecture = _ARCHITECTURE_INVALID;
static ImagePolicy *arg_image_policy = NULL;
static char *arg_background = NULL;
-static bool arg_privileged = false;
static bool arg_cleanup = false;
static bool arg_ask_password = true;
+static char *arg_forward_journal = NULL;
+static char *arg_forward_journal_config = NULL;
+static RuntimeScope arg_runtime_scope = _RUNTIME_SCOPE_INVALID;
STATIC_DESTRUCTOR_REGISTER(arg_directory, freep);
STATIC_DESTRUCTOR_REGISTER(arg_template, freep);
@@ -296,6 +304,8 @@ STATIC_DESTRUCTOR_REGISTER(arg_bind_user_groups, strv_freep);
STATIC_DESTRUCTOR_REGISTER(arg_settings_filename, freep);
STATIC_DESTRUCTOR_REGISTER(arg_image_policy, image_policy_freep);
STATIC_DESTRUCTOR_REGISTER(arg_background, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_forward_journal, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_forward_journal_config, freep);
static int parse_private_users(
const char *s,
@@ -491,6 +501,10 @@ static int help(void) {
" --link-journal=MODE Link up guest journal, one of no, auto, guest, \n"
" host, try-guest, try-host\n"
" -j Equivalent to --link-journal=try-guest\n"
+ " --forward-journal=FILE|DIR\n"
+ " Forward the container's journal to the host\n"
+ " --forward-journal-config=PATH\n"
+ " Configuration file for systemd-journal-remote\n"
"\n%3$sMounts:%4$s\n"
" --bind=PATH[:PATH[:OPTIONS]]\n"
" Bind mount a file or directory from the host into\n"
@@ -521,6 +535,9 @@ static int help(void) {
" --load-credential=ID:PATH\n"
" Load credential to pass to container from file or\n"
" AF_UNIX stream socket.\n"
+ "\n%3$sOther:%4$s\n"
+ " --runtime-scope=system|user\n"
+ " Run in system or user service manager scope\n"
"\nSee the %2$s for details.\n",
program_invocation_short_name,
link,
@@ -749,6 +766,9 @@ static int parse_argv(int argc, char *argv[]) {
ARG_CLEANUP,
ARG_NO_ASK_PASSWORD,
ARG_MSTACK,
+ ARG_FORWARD_JOURNAL,
+ ARG_FORWARD_JOURNAL_CONFIG,
+ ARG_RUNTIME_SCOPE,
};
static const struct option options[] = {
@@ -830,6 +850,9 @@ static int parse_argv(int argc, char *argv[]) {
{ "cleanup", no_argument, NULL, ARG_CLEANUP },
{ "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
{ "mstack", required_argument, NULL, ARG_MSTACK },
+ { "forward-journal", required_argument, NULL, ARG_FORWARD_JOURNAL },
+ { "forward-journal-config", required_argument, NULL, ARG_FORWARD_JOURNAL_CONFIG },
+ { "runtime-scope", required_argument, NULL, ARG_RUNTIME_SCOPE },
{}
};
@@ -1229,8 +1252,11 @@ static int parse_argv(int argc, char *argv[]) {
case 'U':
if (userns_supported()) {
- /* Note that arg_userns_ownership is implied by USER_NAMESPACE_PICK further down. */
- arg_userns_mode = arg_privileged ? USER_NAMESPACE_PICK : USER_NAMESPACE_MANAGED;
+ /* Note that arg_userns_ownership is implied by USER_NAMESPACE_PICK further down.
+ * We use _USER_NAMESPACE_MODE_INVALID as a marker so that the final resolution
+ * (PICK vs MANAGED) is deferred to verify_arguments() where arg_runtime_scope
+ * has its final value regardless of option order. */
+ arg_userns_mode = _USER_NAMESPACE_MODE_INVALID;
arg_uid_shift = UID_INVALID;
arg_uid_range = UINT32_C(0x10000);
@@ -1599,6 +1625,24 @@ static int parse_argv(int argc, char *argv[]) {
arg_ask_password = false;
break;
+ case ARG_FORWARD_JOURNAL:
+ r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_forward_journal);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_FORWARD_JOURNAL_CONFIG:
+ r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_forward_journal_config);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_RUNTIME_SCOPE:
+ arg_runtime_scope = runtime_scope_from_string(optarg);
+ if (!IN_SET(arg_runtime_scope, RUNTIME_SCOPE_SYSTEM, RUNTIME_SCOPE_USER))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse runtime scope: %s", optarg);
+ break;
+
case '?':
return -EINVAL;
@@ -1626,6 +1670,9 @@ static int parse_argv(int argc, char *argv[]) {
arg_caps_retain |= arg_private_network ? UINT64_C(1) << CAP_NET_ADMIN : 0;
arg_caps_retain &= ~minus;
+ if (arg_forward_journal_config && !arg_forward_journal)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--forward-journal-config= requires --forward-journal=.");
+
/* Make sure to parse environment before we reset the settings mask below */
r = parse_environment();
if (r < 0)
@@ -1645,20 +1692,36 @@ static int parse_argv(int argc, char *argv[]) {
static int verify_arguments(void) {
int r;
+ /* Apply scope-dependent defaults now that arg_runtime_scope has its final value. -U sets
+ * arg_userns_mode to _USER_NAMESPACE_MODE_INVALID to defer the resolution to here. */
+ if (!FLAGS_SET(arg_settings_mask, SETTING_USERNS))
+ arg_userns_mode = arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? USER_NAMESPACE_NO : USER_NAMESPACE_MANAGED;
+ else if (arg_userns_mode == _USER_NAMESPACE_MODE_INVALID)
+ arg_userns_mode = arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? USER_NAMESPACE_PICK : USER_NAMESPACE_MANAGED;
+
+ if (!FLAGS_SET(arg_settings_mask, SETTING_NETWORK))
+ arg_private_network = arg_runtime_scope != RUNTIME_SCOPE_SYSTEM;
+
SET_FLAG(arg_mount_settings, MOUNT_UNMANAGED, arg_userns_mode != USER_NAMESPACE_MANAGED);
/* We can mount selinuxfs only if we are privileged and can do so before userns. In managed mode we
* have to enter the userns earlier, hence cannot do that. */
- /* SET_FLAG(arg_mount_settings, MOUNT_PRIVILEGED, arg_privileged); */
+ /* SET_FLAG(arg_mount_settings, MOUNT_PRIVILEGED, arg_runtime_scope == RUNTIME_SCOPE_SYSTEM); */
SET_FLAG(arg_mount_settings, MOUNT_PRIVILEGED, arg_userns_mode != USER_NAMESPACE_MANAGED);
SET_FLAG(arg_mount_settings, MOUNT_USE_USERNS, arg_userns_mode != USER_NAMESPACE_NO);
+ /* If we're not unsharing the network namespace and are unsharing the user namespace, we won't have
+ * permissions to bind ports in the container, so let's drop the CAP_NET_BIND_SERVICE capability to
+ * indicate that. */
+ if (!arg_private_network && arg_userns_mode != USER_NAMESPACE_NO)
+ arg_caps_retain &= ~(UINT64_C(1) << CAP_NET_BIND_SERVICE);
+
if (arg_private_network)
SET_FLAG(arg_mount_settings, MOUNT_APPLY_APIVFS_NETNS, arg_private_network);
- if (!arg_privileged && arg_userns_mode != USER_NAMESPACE_MANAGED)
- return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unprivileged operation requires managed user namespaces, as otherwise no UID range can be acquired.");
+ if (arg_runtime_scope != RUNTIME_SCOPE_SYSTEM && arg_userns_mode != USER_NAMESPACE_MANAGED)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User-scoped operation requires managed user namespaces, as otherwise no UID range can be acquired.");
if (arg_userns_mode == USER_NAMESPACE_MANAGED && !arg_private_network)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Managed user namespace operation requires private networking, as otherwise /sys/ may not be mounted.");
@@ -3183,7 +3246,7 @@ static int determine_names(void) {
if (arg_machine) {
_cleanup_(image_unrefp) Image *i = NULL;
- r = image_find(arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER,
+ r = image_find(arg_runtime_scope,
IMAGE_MACHINE, arg_machine, NULL, &i);
if (r == -ENOENT)
return log_error_errno(r, "No image for machine '%s'.", arg_machine);
@@ -5147,7 +5210,7 @@ static int load_settings(void) {
_SD_PATH_INVALID,
};
- const uint64_t *q = arg_privileged ? lookup_dir_system : lookup_dir_user;
+ const uint64_t *q = arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? lookup_dir_system : lookup_dir_user;
for (; *q != _SD_PATH_INVALID; q++) {
_cleanup_free_ char *cd = NULL;
r = sd_path_lookup(*q, "systemd/nspawn", &cd);
@@ -5578,7 +5641,7 @@ static int run_container(
/* Registration always happens on the system bus */
_cleanup_(sd_bus_flush_close_unrefp) sd_bus *system_bus = NULL;
- if (arg_register || (arg_privileged && !arg_keep_unit)) {
+ if (arg_register || (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM && !arg_keep_unit)) {
r = sd_bus_default_system(&system_bus);
if (r < 0)
return log_error_errno(r, "Failed to open system bus: %m");
@@ -5594,7 +5657,7 @@ static int run_container(
_cleanup_(sd_bus_unrefp) sd_bus *runtime_bus = NULL;
if (arg_register || !arg_keep_unit) {
- if (arg_privileged)
+ if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM)
runtime_bus = sd_bus_ref(system_bus);
else {
r = sd_bus_default_user(&user_bus);
@@ -5666,28 +5729,40 @@ static int run_container(
r = register_machine(
system_bus,
arg_machine,
+ arg_uuid,
+ arg_container_service_name,
+ "container",
pid,
arg_directory,
- arg_uuid,
+ /* cid= */ 0,
ifi,
- arg_container_service_name);
+ /* address= */ NULL,
+ /* key_path= */ NULL,
+ /* allocate_unit= */ false,
+ RUNTIME_SCOPE_SYSTEM);
if (r < 0) {
- if (arg_privileged) /* if privileged the request to register definitely failed */
+ if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM) /* if system scope the request to register definitely failed */
return r;
log_notice_errno(r, "Failed to register machine in system context, will try in user context.");
} else
registered_system = true;
- if (!arg_privileged) {
+ if (arg_runtime_scope != RUNTIME_SCOPE_SYSTEM) {
r = register_machine(
runtime_bus,
arg_machine,
+ arg_uuid,
+ arg_container_service_name,
+ "container",
pid,
arg_directory,
- arg_uuid,
+ /* cid= */ 0,
ifi,
- arg_container_service_name);
+ /* address= */ NULL,
+ /* key_path= */ NULL,
+ /* allocate_unit= */ false,
+ RUNTIME_SCOPE_USER);
if (r < 0) {
if (!registered_system) /* neither registration worked: fail */
return r;
@@ -5908,9 +5983,9 @@ static int run_container(
/* Tell machined that we are gone. */
if (registered_system)
- (void) unregister_machine(system_bus, arg_machine);
+ (void) unregister_machine(system_bus, arg_machine, RUNTIME_SCOPE_SYSTEM);
if (registered_runtime)
- (void) unregister_machine(runtime_bus, arg_machine);
+ (void) unregister_machine(runtime_bus, arg_machine, RUNTIME_SCOPE_USER);
if (r < 0)
/* We failed to wait for the container, or the container exited abnormally. */
@@ -6062,19 +6137,13 @@ static int cant_be_in_netns(void) {
}
static void initialize_defaults(void) {
- arg_privileged = getuid() == 0;
-
- /* If running unprivileged default to systemd-nsresourced operation */
- arg_userns_mode = arg_privileged ? USER_NAMESPACE_NO : USER_NAMESPACE_MANAGED;
-
- /* Imply private networking for unprivileged operation, since kernel otherwise refuses mounting sysfs */
- arg_private_network = !arg_privileged;
+ arg_runtime_scope = getuid() == 0 ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER;
}
static void cleanup_propagation_and_export_directories(void) {
const char *p;
- if (!arg_machine || !arg_privileged)
+ if (!arg_machine || arg_runtime_scope != RUNTIME_SCOPE_SYSTEM)
return;
p = strjoina("/run/systemd/nspawn/propagate/", arg_machine);
@@ -6115,6 +6184,9 @@ static int run(int argc, char *argv[]) {
_cleanup_(sd_netlink_unrefp) sd_netlink *nfnl = NULL;
_cleanup_(pidref_done) PidRef pid = PIDREF_NULL;
_cleanup_(sd_varlink_unrefp) sd_varlink *nsresource_link = NULL, *mountfsd_link = NULL;
+ _cleanup_(fork_notify_terminate) PidRef journal_remote_pidref = PIDREF_NULL;
+ _cleanup_free_ char *runtime_dir = NULL;
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir_destroy = NULL;
log_setup();
@@ -6164,12 +6236,6 @@ static int run(int argc, char *argv[]) {
if (r < 0)
goto finish;
- /* If we're not unsharing the network namespace and are unsharing the user namespace, we won't have
- * permissions to bind ports in the container, so let's drop the CAP_NET_BIND_SERVICE capability to
- * indicate that. */
- if (!arg_private_network && arg_userns_mode != USER_NAMESPACE_NO)
- arg_caps_retain &= ~(UINT64_C(1) << CAP_NET_BIND_SERVICE);
-
r = verify_arguments();
if (r < 0)
goto finish;
@@ -6274,7 +6340,7 @@ static int run(int argc, char *argv[]) {
r = create_ephemeral_snapshot(
arg_directory,
- arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER,
+ arg_runtime_scope,
arg_read_only,
&tree_global_lock,
&tree_local_lock,
@@ -6295,10 +6361,10 @@ static int run(int argc, char *argv[]) {
goto finish;
r = image_path_lock(
- arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER,
+ arg_runtime_scope,
arg_directory,
(arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB,
- arg_privileged ? &tree_global_lock : NULL,
+ arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? &tree_global_lock : NULL,
&tree_local_lock);
if (r == -EBUSY) {
log_error_errno(r, "Directory tree %s is currently busy.", arg_directory);
@@ -6426,10 +6492,10 @@ static int run(int argc, char *argv[]) {
/* Always take an exclusive lock on our own ephemeral copy. */
r = image_path_lock(
- arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER,
+ arg_runtime_scope,
np,
LOCK_EX|LOCK_NB,
- arg_privileged ? &tree_global_lock : NULL,
+ arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? &tree_global_lock : NULL,
&tree_local_lock);
if (r < 0) {
log_error_errno(r, "Failed to create image lock: %m");
@@ -6454,10 +6520,10 @@ static int run(int argc, char *argv[]) {
remove_image = true;
} else {
r = image_path_lock(
- arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER,
+ arg_runtime_scope,
arg_image,
(arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB,
- arg_privileged ? &tree_global_lock : NULL,
+ arg_runtime_scope == RUNTIME_SCOPE_SYSTEM ? &tree_global_lock : NULL,
&tree_local_lock);
if (r == -EBUSY) {
log_error_errno(r, "Disk image %s is currently busy.", arg_image);
@@ -6646,6 +6712,48 @@ static int run(int argc, char *argv[]) {
expose_args.nfnl = nfnl;
}
+ if (arg_forward_journal) {
+ r = runtime_directory_make(arg_runtime_scope, "nspawn-journal", &runtime_dir, &runtime_dir_destroy);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create runtime directory: %m");
+ goto finish;
+ }
+
+ _cleanup_free_ char *socket_path = path_join(runtime_dir, "socket");
+ if (!socket_path) {
+ r = log_oom();
+ goto finish;
+ }
+
+ r = fork_journal_remote(socket_path, arg_forward_journal, arg_forward_journal_config, &journal_remote_pidref);
+ if (r < 0)
+ goto finish;
+
+ CustomMount *cm = custom_mount_add(&arg_custom_mounts, &arg_n_custom_mounts, CUSTOM_MOUNT_BIND);
+ if (!cm) {
+ r = log_oom();
+ goto finish;
+ }
+
+ cm->source = TAKE_PTR(socket_path);
+ cm->destination = strdup(NSPAWN_JOURNAL_SOCKET_PATH);
+ cm->read_only = true;
+ if (!cm->destination) {
+ r = log_oom();
+ goto finish;
+ }
+
+ r = machine_credential_add(&arg_credentials, "journal.forward_to_socket", NSPAWN_JOURNAL_SOCKET_PATH, SIZE_MAX);
+ if (r == -EEXIST) {
+ log_error("Credential 'journal.forward_to_socket' already set via --set-credential=, refusing --forward-journal=.");
+ goto finish;
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to add 'journal.forward_to_socket' credential: %m");
+ goto finish;
+ }
+ }
+
for (;;) {
r = run_container(
rootdir,
diff --git a/src/shared/fork-notify.c b/src/shared/fork-notify.c
index 6f87a2fdce2b2..a8038524a5fbe 100644
--- a/src/shared/fork-notify.c
+++ b/src/shared/fork-notify.c
@@ -3,14 +3,19 @@
#include
#include
+#include "alloc-util.h"
#include "build-path.h"
+#include "chase.h"
+#include "chattr-util.h"
#include "escape.h"
#include "event-util.h"
#include "exit-status.h"
+#include "fd-util.h"
#include "fork-notify.h"
#include "log.h"
#include "notify-recv.h"
#include "parse-util.h"
+#include "path-util.h"
#include "pidref.h"
#include "process-util.h"
#include "runtime-scope.h"
@@ -90,7 +95,6 @@ static int on_child_notify(sd_event_source *s, int fd, uint32_t revents, void *u
int fork_notify(char * const *argv, PidRef *ret_pidref) {
int r;
- assert(!strv_isempty(argv));
assert(ret_pidref);
if (!is_main_thread())
@@ -119,7 +123,7 @@ int fork_notify(char * const *argv, PidRef *ret_pidref) {
if (r < 0)
return r;
- if (DEBUG_LOGGING) {
+ if (DEBUG_LOGGING && argv) {
_cleanup_free_ char *l = quote_command_line(argv, SHELL_ESCAPE_EMPTY);
log_debug("Invoking '%s' as child.", strnull(l));
}
@@ -141,6 +145,11 @@ int fork_notify(char * const *argv, PidRef *ret_pidref) {
_exit(EXIT_MEMORY);
}
+ if (!argv) {
+ *ret_pidref = TAKE_PIDREF(child);
+ return 0; /* Let the caller run custom code in the child */
+ }
+
r = invoke_callout_binary(argv[0], argv);
log_debug_errno(r, "Failed to invoke %s: %m", argv[0]);
_exit(EXIT_EXEC);
@@ -164,7 +173,7 @@ int fork_notify(char * const *argv, PidRef *ret_pidref) {
*ret_pidref = TAKE_PIDREF(child);
- return 0;
+ return 1; /* In the parent */
}
static void fork_notify_terminate_internal(PidRef *pidref) {
@@ -230,3 +239,73 @@ int journal_fork(RuntimeScope scope, char * const* units, PidRef *ret_pidref) {
return fork_notify(argv, ret_pidref);
}
+
+int fork_journal_remote(
+ const char *listen_address,
+ const char *output,
+ const char *config_file,
+ PidRef *ret_pidref) {
+
+ int r;
+
+ assert(listen_address);
+ assert(output);
+ assert(ret_pidref);
+
+ ChaseFlags chase_flags = CHASE_MKDIR_0755|CHASE_MUST_BE_DIRECTORY;
+ if (endswith(output, ".journal"))
+ chase_flags |= CHASE_PARENT;
+
+ _cleanup_close_ int fd = -EBADF;
+ r = chase(output, /* root= */ NULL, chase_flags, /* ret_path= */ NULL, &fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create journal directory for '%s': %m", output);
+
+ r = chattr_fd(fd, FS_NOCOW_FL, FS_NOCOW_FL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to set NOCOW flag on journal directory for '%s', ignoring: %m", output);
+
+ _cleanup_free_ char *sd_socket_activate = NULL;
+ r = find_executable("systemd-socket-activate", &sd_socket_activate);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find systemd-socket-activate binary: %m");
+
+ _cleanup_free_ char *sd_journal_remote = NULL;
+ r = find_executable_full(
+ "systemd-journal-remote",
+ /* root= */ NULL,
+ STRV_MAKE(LIBEXECDIR),
+ /* use_path_envvar= */ true,
+ &sd_journal_remote,
+ /* ret_fd= */ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find systemd-journal-remote binary: %m");
+
+ _cleanup_strv_free_ char **argv = strv_new(
+ sd_socket_activate,
+ "--listen", listen_address,
+ sd_journal_remote,
+ "--output", output,
+ "--split-mode", endswith(output, ".journal") ? "none" : "host");
+ if (!argv)
+ return log_oom();
+
+ r = fork_notify(/* argv= */ NULL, ret_pidref);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* In the child */
+ if (setenv("SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE",
+ config_file ?: "/dev/null",
+ /* overwrite= */ true) < 0) {
+ log_debug_errno(errno, "Failed to set $SYSTEMD_JOURNAL_REMOTE_CONFIG_FILE: %m");
+ _exit(EXIT_MEMORY);
+ }
+
+ r = invoke_callout_binary(argv[0], argv);
+ log_error_errno(r, "Failed to invoke %s: %m", argv[0]);
+ _exit(EXIT_EXEC);
+ }
+
+ return 0;
+}
diff --git a/src/shared/fork-notify.h b/src/shared/fork-notify.h
index 103ab78983371..95b6aaad43e12 100644
--- a/src/shared/fork-notify.h
+++ b/src/shared/fork-notify.h
@@ -10,3 +10,9 @@ void fork_notify_terminate(PidRef *pidref);
void fork_notify_terminate_many(sd_event_source **array, size_t n);
int journal_fork(RuntimeScope scope, char * const *units, PidRef *ret_pidref);
+
+int fork_journal_remote(
+ const char *listen_address,
+ const char *output,
+ const char *config_file,
+ PidRef *ret_pidref);
diff --git a/src/shared/machine-register.c b/src/shared/machine-register.c
new file mode 100644
index 0000000000000..b3b1055a9f2b3
--- /dev/null
+++ b/src/shared/machine-register.c
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include
+
+#include "sd-bus.h"
+#include "sd-id128.h"
+#include "sd-json.h"
+#include "sd-varlink.h"
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-util.h"
+#include "errno-util.h"
+#include "json-util.h"
+#include "log.h"
+#include "machine-register.h"
+#include "path-lookup.h"
+#include "pidref.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "varlink-util.h"
+
+static int register_machine_dbus_ex(
+ sd_bus *bus,
+ const char *machine_name,
+ sd_id128_t uuid,
+ const char *service,
+ const char *class,
+ const PidRef *pidref,
+ const char *directory,
+ int local_ifindex,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+ assert(machine_name);
+ assert(service);
+ assert(class);
+
+ r = bus_message_new_method_call(bus, &m, bus_machine_mgr, "RegisterMachineEx");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", machine_name);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "(sv)(sv)(sv)",
+ "Id", "ay", SD_BUS_MESSAGE_APPEND_ID128(uuid),
+ "Service", "s", service,
+ "Class", "s", class);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (pidref_is_set(pidref)) {
+ if (pidref->fd >= 0) {
+ r = sd_bus_message_append(m, "(sv)", "LeaderPIDFD", "h", pidref->fd);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (pidref->fd_id > 0) {
+ r = sd_bus_message_append(m, "(sv)", "LeaderPIDFDID", "t", pidref->fd_id);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "(sv)", "LeaderPID", "u", pidref->pid);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+ }
+
+ if (!isempty(directory)) {
+ r = sd_bus_message_append(m, "(sv)", "RootDirectory", "s", directory);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (local_ifindex > 0) {
+ r = sd_bus_message_append(m, "(sv)", "NetworkInterfaces", "ai", 1, local_ifindex);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return sd_bus_call(bus, m, 0, error, NULL);
+}
+
+static int register_machine_dbus(
+ sd_bus *bus,
+ const char *machine_name,
+ sd_id128_t uuid,
+ const char *service,
+ const char *class,
+ const PidRef *pidref,
+ const char *directory,
+ int local_ifindex) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(machine_name);
+ assert(service);
+ assert(class);
+
+ /* First try RegisterMachineEx which supports PIDFD-based leader tracking. */
+ r = register_machine_dbus_ex(bus, machine_name, uuid, service, class, pidref, directory, local_ifindex, &error);
+ if (r >= 0)
+ return 0;
+ if (!sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD))
+ return log_error_errno(r, "Failed to register machine: %s", bus_error_message(&error, r));
+
+ sd_bus_error_free(&error);
+
+ r = bus_call_method(
+ bus,
+ bus_machine_mgr,
+ "RegisterMachineWithNetwork",
+ &error,
+ NULL,
+ "sayssusai",
+ machine_name,
+ SD_BUS_MESSAGE_APPEND_ID128(uuid),
+ service,
+ class,
+ pidref_is_set(pidref) ? (uint32_t) pidref->pid : 0,
+ strempty(directory),
+ local_ifindex > 0 ? 1 : 0, local_ifindex);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register machine: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+int register_machine(
+ sd_bus *bus,
+ const char *machine_name,
+ sd_id128_t uuid,
+ const char *service,
+ const char *class,
+ const PidRef *pidref,
+ const char *directory,
+ unsigned cid,
+ int local_ifindex,
+ const char *address,
+ const char *key_path,
+ bool allocate_unit,
+ RuntimeScope scope) {
+
+ _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
+ int r;
+
+ assert(machine_name);
+ assert(service);
+ assert(class);
+
+ /* First try to use varlink, as it provides more features (such as SSH support). */
+ _cleanup_free_ char *p = NULL;
+ r = runtime_directory_generic(scope, "systemd/machine/io.systemd.Machine", &p);
+ if (r >= 0)
+ r = sd_varlink_connect_address(&vl, p);
+ if (r == -ENOENT || ERRNO_IS_DISCONNECT(r)) {
+ log_debug_errno(r, "Failed to connect to machined via varlink%s%s, falling back to D-Bus: %m",
+ p ? " on " : "", strempty(p));
+
+ /* In case we are running with an older machined, fall back to D-Bus. */
+ if (!bus)
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Varlink connection to machined not available and no bus provided.");
+
+ return register_machine_dbus(bus, machine_name, uuid, service, class, pidref, directory, local_ifindex);
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to machined on %s: %m", strna(p));
+
+ return varlink_callbo_and_log(
+ vl,
+ "io.systemd.Machine.Register",
+ /* ret_reply= */ NULL,
+ SD_JSON_BUILD_PAIR_STRING("name", machine_name),
+ SD_JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(uuid), "id", SD_JSON_BUILD_ID128(uuid)),
+ SD_JSON_BUILD_PAIR_STRING("service", service),
+ SD_JSON_BUILD_PAIR_STRING("class", class),
+ SD_JSON_BUILD_PAIR_CONDITION(VSOCK_CID_IS_REGULAR(cid), "vSockCid", SD_JSON_BUILD_UNSIGNED(cid)),
+ SD_JSON_BUILD_PAIR_CONDITION(local_ifindex > 0, "networkInterfaces", SD_JSON_BUILD_ARRAY(SD_JSON_BUILD_INTEGER(local_ifindex))),
+ SD_JSON_BUILD_PAIR_CONDITION(!!directory, "rootDirectory", SD_JSON_BUILD_STRING(directory)),
+ SD_JSON_BUILD_PAIR_CONDITION(!!address, "sshAddress", SD_JSON_BUILD_STRING(address)),
+ SD_JSON_BUILD_PAIR_CONDITION(!!key_path, "sshPrivateKeyPath", SD_JSON_BUILD_STRING(key_path)),
+ SD_JSON_BUILD_PAIR_CONDITION(isatty_safe(STDIN_FILENO), "allowInteractiveAuthentication", SD_JSON_BUILD_BOOLEAN(true)),
+ SD_JSON_BUILD_PAIR_CONDITION(allocate_unit, "allocateUnit", SD_JSON_BUILD_BOOLEAN(true)),
+ SD_JSON_BUILD_PAIR_CONDITION(pidref_is_set(pidref), "leaderProcessId", JSON_BUILD_PIDREF(pidref)));
+}
+
+int unregister_machine(sd_bus *bus, const char *machine_name, RuntimeScope scope) {
+ int r;
+
+ assert(machine_name);
+
+ /* First try varlink */
+ _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
+ _cleanup_free_ char *p = NULL;
+ r = runtime_directory_generic(scope, "systemd/machine/io.systemd.Machine", &p);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine runtime directory for varlink, falling back to D-Bus: %m");
+ else {
+ r = sd_varlink_connect_address(&vl, p);
+ if (r < 0)
+ log_debug_errno(r, "Failed to connect to machined via varlink on %s, falling back to D-Bus: %m", p);
+ else {
+ r = varlink_callbo_and_log(
+ vl,
+ "io.systemd.Machine.Unregister",
+ /* ret_reply= */ NULL,
+ SD_JSON_BUILD_PAIR_STRING("name", machine_name));
+ if (r >= 0)
+ return 0;
+
+ log_debug_errno(r, "Failed to unregister machine via varlink, falling back to D-Bus: %m");
+ }
+ }
+
+ /* Fall back to D-Bus */
+ if (bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ r = bus_call_method(bus, bus_machine_mgr, "UnregisterMachine", &error, NULL, "s", machine_name);
+ if (r < 0)
+ log_debug("Failed to unregister machine: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
diff --git a/src/vmspawn/vmspawn-register.h b/src/shared/machine-register.h
similarity index 76%
rename from src/vmspawn/vmspawn-register.h
rename to src/shared/machine-register.h
index de118b7492fa2..df4f53b6be68c 100644
--- a/src/vmspawn/vmspawn-register.h
+++ b/src/shared/machine-register.h
@@ -8,12 +8,14 @@ int register_machine(
const char *machine_name,
sd_id128_t uuid,
const char *service,
+ const char *class,
const PidRef *pidref,
const char *directory,
unsigned cid,
+ int local_ifindex,
const char *address,
const char *key_path,
bool allocate_unit,
RuntimeScope scope);
-int unregister_machine(sd_bus *bus, const char *machine_name);
+int unregister_machine(sd_bus *bus, const char *machine_name, RuntimeScope scope);
diff --git a/src/shared/meson.build b/src/shared/meson.build
index cdbe763d0137d..3bd167bc3ade4 100644
--- a/src/shared/meson.build
+++ b/src/shared/meson.build
@@ -123,6 +123,7 @@ shared_sources = files(
'lsm-util.c',
'machine-bind-user.c',
'machine-credential.c',
+ 'machine-register.c',
'machine-id-setup.c',
'macvlan-util.c',
'main-func.c',
diff --git a/src/shared/ptyfwd.c b/src/shared/ptyfwd.c
index 2e8d77dee1c43..88cfd596d0508 100644
--- a/src/shared/ptyfwd.c
+++ b/src/shared/ptyfwd.c
@@ -669,19 +669,22 @@ static int do_shovel(PTYForward *f) {
f->stdin_event_source = sd_event_source_unref(f->stdin_event_source);
} else {
- /* Check if ^] has been pressed three times within one second. If we get this we quite
- * immediately. */
- RequestOperation q = look_for_escape(f, f->in_buffer + f->in_buffer_full, k);
- f->in_buffer_full += (size_t) k;
- if (q < 0)
- return q;
- if (q == REQUEST_EXIT)
- return -ECANCELED;
- if (q >= REQUEST_HOTKEY_A && q <= REQUEST_HOTKEY_Z && f->hotkey_handler) {
- r = f->hotkey_handler(f, q - REQUEST_HOTKEY_BASE, f->hotkey_userdata);
- if (r < 0)
- return r;
- }
+ if (!FLAGS_SET(f->flags, PTY_FORWARD_TRANSPARENT)) {
+ /* Check if ^] has been pressed three times within one second. If we get this we quit
+ * immediately. */
+ RequestOperation q = look_for_escape(f, f->in_buffer + f->in_buffer_full, k);
+ f->in_buffer_full += (size_t) k;
+ if (q < 0)
+ return q;
+ if (q == REQUEST_EXIT)
+ return -ECANCELED;
+ if (q >= REQUEST_HOTKEY_A && q <= REQUEST_HOTKEY_Z && f->hotkey_handler) {
+ r = f->hotkey_handler(f, q - REQUEST_HOTKEY_BASE, f->hotkey_userdata);
+ if (r < 0)
+ return r;
+ }
+ } else
+ f->in_buffer_full += (size_t) k;
}
did_something = true;
diff --git a/src/shared/ptyfwd.h b/src/shared/ptyfwd.h
index 1c1246f37f163..f92676dabe3e8 100644
--- a/src/shared/ptyfwd.h
+++ b/src/shared/ptyfwd.h
@@ -17,6 +17,9 @@ typedef enum PTYForwardFlags {
/* Don't tint the background, or set window title */
PTY_FORWARD_DUMB_TERMINAL = 1 << 3,
+
+ /* Don't interpret escape sequences (^] exit, hotkeys), just forward everything as-is */
+ PTY_FORWARD_TRANSPARENT = 1 << 4,
} PTYForwardFlags;
typedef int (*PTYForwardHangupHandler)(PTYForward *f, int rcode, void *userdata);
diff --git a/src/vmspawn/meson.build b/src/vmspawn/meson.build
index 722e6a52cc7f2..99bad2d618973 100644
--- a/src/vmspawn/meson.build
+++ b/src/vmspawn/meson.build
@@ -10,7 +10,6 @@ vmspawn_sources = files(
'vmspawn-settings.c',
'vmspawn-scope.c',
'vmspawn-mount.c',
- 'vmspawn-register.c',
)
vmspawn_extract_sources = files(
'vmspawn-util.c',
diff --git a/src/vmspawn/vmspawn-register.c b/src/vmspawn/vmspawn-register.c
deleted file mode 100644
index 46f292ce49525..0000000000000
--- a/src/vmspawn/vmspawn-register.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1-or-later */
-
-#include
-
-#include "sd-bus.h"
-#include "sd-id128.h"
-#include "sd-json.h"
-#include "sd-varlink.h"
-
-#include "bus-error.h"
-#include "bus-locator.h"
-#include "errno-util.h"
-#include "json-util.h"
-#include "log.h"
-#include "path-lookup.h"
-#include "pidref.h"
-#include "socket-util.h"
-#include "string-util.h"
-#include "terminal-util.h"
-#include "varlink-util.h"
-#include "vmspawn-register.h"
-
-int register_machine(
- sd_bus *bus,
- const char *machine_name,
- sd_id128_t uuid,
- const char *service,
- const PidRef *pidref,
- const char *directory,
- unsigned cid,
- const char *address,
- const char *key_path,
- bool allocate_unit,
- RuntimeScope scope) {
-
- _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
- int r;
-
- assert(machine_name);
- assert(service);
-
- /* First try to use varlink, as it provides more features (such as SSH support). */
- _cleanup_free_ char *p = NULL;
- r = runtime_directory_generic(scope, "systemd/machine/io.systemd.Machine", &p);
- if (r < 0)
- return r;
-
- r = sd_varlink_connect_address(&vl, p);
- if (r == -ENOENT || ERRNO_IS_DISCONNECT(r)) {
- _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
-
- assert(bus);
-
- /* In case we are running with an older machined, fallback to the existing D-Bus method. */
- r = bus_call_method(
- bus,
- bus_machine_mgr,
- "RegisterMachine",
- &error,
- NULL,
- "sayssus",
- machine_name,
- SD_BUS_MESSAGE_APPEND_ID128(uuid),
- service,
- "vm",
- (uint32_t) (pidref_is_set(pidref) ? pidref->pid : 0),
- strempty(directory));
- if (r < 0)
- return log_error_errno(r, "Failed to register machine: %s", bus_error_message(&error, r));
-
- return 0;
- }
- if (r < 0)
- return log_error_errno(r, "Failed to connect to machined on %p: %m", p);
-
- return varlink_callbo_and_log(
- vl,
- "io.systemd.Machine.Register",
- /* ret_reply= */ NULL,
- SD_JSON_BUILD_PAIR_STRING("name", machine_name),
- SD_JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(uuid), "id", SD_JSON_BUILD_ID128(uuid)),
- SD_JSON_BUILD_PAIR_STRING("service", service),
- SD_JSON_BUILD_PAIR_STRING("class", "vm"),
- SD_JSON_BUILD_PAIR_CONDITION(VSOCK_CID_IS_REGULAR(cid), "vSockCid", SD_JSON_BUILD_UNSIGNED(cid)),
- SD_JSON_BUILD_PAIR_CONDITION(!!directory, "rootDirectory", SD_JSON_BUILD_STRING(directory)),
- SD_JSON_BUILD_PAIR_CONDITION(!!address, "sshAddress", SD_JSON_BUILD_STRING(address)),
- SD_JSON_BUILD_PAIR_CONDITION(!!key_path, "sshPrivateKeyPath", SD_JSON_BUILD_STRING(key_path)),
- SD_JSON_BUILD_PAIR_CONDITION(isatty_safe(STDIN_FILENO), "allowInteractiveAuthentication", SD_JSON_BUILD_BOOLEAN(true)),
- SD_JSON_BUILD_PAIR_CONDITION(allocate_unit, "allocateUnit", SD_JSON_BUILD_BOOLEAN(true)),
- SD_JSON_BUILD_PAIR_CONDITION(pidref_is_set(pidref), "leaderProcessId", JSON_BUILD_PIDREF(pidref)));
-}
-
-int unregister_machine(sd_bus *bus, const char *machine_name) {
- _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
- int r;
-
- assert(bus);
-
- r = bus_call_method(bus, bus_machine_mgr, "UnregisterMachine", &error, NULL, "s", machine_name);
- if (r < 0)
- log_debug("Failed to unregister machine: %s", bus_error_message(&error, r));
-
- return 0;
-}
diff --git a/src/vmspawn/vmspawn-settings.c b/src/vmspawn/vmspawn-settings.c
index d19a65d55debf..57d673c39d82d 100644
--- a/src/vmspawn/vmspawn-settings.c
+++ b/src/vmspawn/vmspawn-settings.c
@@ -14,6 +14,7 @@ static const char *const disk_type_table[_DISK_TYPE_MAX] = {
[DISK_TYPE_VIRTIO_BLK] = "virtio-blk",
[DISK_TYPE_VIRTIO_SCSI] = "virtio-scsi",
[DISK_TYPE_NVME] = "nvme",
+ [DISK_TYPE_SCSI_CD] = "scsi-cd",
};
DEFINE_STRING_TABLE_LOOKUP(disk_type, DiskType);
diff --git a/src/vmspawn/vmspawn-settings.h b/src/vmspawn/vmspawn-settings.h
index cfcee6fbb61b6..495dceaae8d5c 100644
--- a/src/vmspawn/vmspawn-settings.h
+++ b/src/vmspawn/vmspawn-settings.h
@@ -14,6 +14,7 @@ typedef enum DiskType {
DISK_TYPE_VIRTIO_BLK,
DISK_TYPE_VIRTIO_SCSI,
DISK_TYPE_NVME,
+ DISK_TYPE_SCSI_CD,
_DISK_TYPE_MAX,
_DISK_TYPE_INVALID = -EINVAL,
} DiskType;
diff --git a/src/vmspawn/vmspawn-util.h b/src/vmspawn/vmspawn-util.h
index 90efd93661224..75644b250287c 100644
--- a/src/vmspawn/vmspawn-util.h
+++ b/src/vmspawn/vmspawn-util.h
@@ -33,6 +33,18 @@
# define ARCHITECTURE_SUPPORTS_HPET 0
#endif
+#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)
+# define ARCHITECTURE_SUPPORTS_FW_CFG 1
+#else
+# define ARCHITECTURE_SUPPORTS_FW_CFG 0
+#endif
+
+#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)
+# define ARCHITECTURE_SUPPORTS_CXL 1
+#else
+# define ARCHITECTURE_SUPPORTS_CXL 0
+#endif
+
#if defined(__x86_64__) || defined(__i386__)
# define QEMU_MACHINE_TYPE "q35"
#elif defined(__arm__) || defined(__aarch64__) || defined(__riscv) || defined(__loongarch64) || defined(__m68k__)
diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c
index 02f2b0df2e08a..3c714ef6dad7c 100644
--- a/src/vmspawn/vmspawn.c
+++ b/src/vmspawn/vmspawn.c
@@ -26,8 +26,6 @@
#include "bus-locator.h"
#include "bus-util.h"
#include "capability-util.h"
-#include "chase.h"
-#include "chattr-util.h"
#include "common-signal.h"
#include "copy.h"
#include "discover-image.h"
@@ -50,6 +48,7 @@
#include "log.h"
#include "machine-bind-user.h"
#include "machine-credential.h"
+#include "machine-register.h"
#include "main-func.h"
#include "mkdir.h"
#include "namespace-util.h"
@@ -88,7 +87,6 @@
#include "utf8.h"
#include "vmspawn-mount.h"
#include "vmspawn-qemu-config.h"
-#include "vmspawn-register.h"
#include "vmspawn-scope.h"
#include "vmspawn-settings.h"
#include "vmspawn-util.h"
@@ -126,7 +124,10 @@ static char *arg_slice = NULL;
static char **arg_property = NULL;
static char *arg_cpus = NULL;
static uint64_t arg_ram = UINT64_C(2) * U64_GB;
+static uint64_t arg_ram_max = 0;
+static unsigned arg_ram_slots = 0;
static int arg_kvm = -1;
+static int arg_cxl = -1;
static int arg_vsock = -1;
static unsigned arg_vsock_cid = VMADDR_CID_ANY;
static int arg_tpm = -1;
@@ -142,6 +143,7 @@ static bool arg_firmware_describe = false;
static Set *arg_firmware_features_include = NULL;
static Set *arg_firmware_features_exclude = NULL;
static char *arg_forward_journal = NULL;
+static char *arg_forward_journal_config = NULL;
static bool arg_register = true;
static bool arg_keep_unit = false;
static sd_id128_t arg_uuid = {};
@@ -182,6 +184,7 @@ STATIC_DESTRUCTOR_REGISTER(arg_linux, freep);
STATIC_DESTRUCTOR_REGISTER(arg_initrds, strv_freep);
STATIC_DESTRUCTOR_REGISTER(arg_runtime_mounts, runtime_mount_context_done);
STATIC_DESTRUCTOR_REGISTER(arg_forward_journal, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_forward_journal_config, freep);
STATIC_DESTRUCTOR_REGISTER(arg_kernel_cmdline_extra, strv_freep);
STATIC_DESTRUCTOR_REGISTER(arg_extra_drives, extra_drive_context_done);
STATIC_DESTRUCTOR_REGISTER(arg_background, freep);
@@ -212,19 +215,23 @@ static int help(void) {
" -q --quiet Do not show status information\n"
" --no-pager Do not pipe output into a pager\n"
" --no-ask-password Do not prompt for password\n"
- " --user Interact with user manager\n"
- " --system Interact with system manager\n"
+ " --runtime-scope=system|user\n"
+ " Run in system or user service manager scope\n"
"\n%3$sImage:%4$s\n"
" -D --directory=PATH Root directory for the VM\n"
" -x --ephemeral Run VM with snapshot of the disk or directory\n"
" -i --image=FILE|DEVICE Root file system disk image or device for the VM\n"
" --image-format=FORMAT Specify disk image format (raw, qcow2; default: raw)\n"
" --image-disk-type=TYPE\n"
- " Specify disk type (virtio-blk, virtio-scsi, nvme; default: virtio-blk)\n"
+ " Specify disk type (virtio-blk, virtio-scsi, nvme,\n"
+ " scsi-cd; default: virtio-blk)\n"
"\n%3$sHost Configuration:%4$s\n"
" --cpus=CPUS Configure number of CPUs in guest\n"
- " --ram=BYTES Configure guest's RAM size\n"
+ " --ram=BYTES[:MAXBYTES[:SLOTS]]\n"
+ " Configure guest's RAM size (and max/slots for\n"
+ " hotplug)\n"
" --kvm=BOOL Enable use of KVM\n"
+ " --cxl=BOOL Enable CXL support\n"
" --vsock=BOOL Override autodetection of VSOCK support\n"
" --vsock-cid=CID Specify the CID to use for the guest's VSOCK support\n"
" --tpm=BOOL Enable use of a virtual TPM\n"
@@ -271,7 +278,8 @@ static int help(void) {
" --extra-drive=[FORMAT:][DISKTYPE:]PATH\n"
" Adds an additional disk to the VM\n"
" FORMAT: raw, qcow2\n"
- " DISKTYPE: virtio-blk, virtio-scsi, nvme\n"
+ " DISKTYPE: virtio-blk, virtio-scsi, nvme,\n"
+ " scsi-cd\n"
" --bind-user=NAME Bind user from host to virtual machine\n"
" --bind-user-shell=BOOL|PATH\n"
" Configure the shell to use for --bind-user= users\n"
@@ -280,6 +288,8 @@ static int help(void) {
"\n%3$sIntegration:%4$s\n"
" --forward-journal=FILE|DIR\n"
" Forward the VM's journal to the host\n"
+ " --forward-journal-config=PATH\n"
+ " Configuration file for systemd-journal-remote\n"
" --pass-ssh-key=BOOL Create an SSH key to access the VM\n"
" --ssh-key-type=TYPE Choose what type of SSH key to pass\n"
"\n%3$sInput/Output:%4$s\n"
@@ -331,6 +341,7 @@ static int parse_argv(int argc, char *argv[]) {
ARG_CPUS,
ARG_RAM,
ARG_KVM,
+ ARG_CXL,
ARG_VSOCK,
ARG_VSOCK_CID,
ARG_TPM,
@@ -347,6 +358,7 @@ static int parse_argv(int argc, char *argv[]) {
ARG_SECURE_BOOT,
ARG_PRIVATE_USERS,
ARG_FORWARD_JOURNAL,
+ ARG_FORWARD_JOURNAL_CONFIG,
ARG_PASS_SSH_KEY,
ARG_SSH_KEY_TYPE,
ARG_SET_CREDENTIAL,
@@ -365,6 +377,7 @@ static int parse_argv(int argc, char *argv[]) {
ARG_BIND_USER,
ARG_BIND_USER_SHELL,
ARG_BIND_USER_GROUP,
+ ARG_RUNTIME_SCOPE,
ARG_SYSTEM,
ARG_USER,
ARG_IMAGE_FORMAT,
@@ -388,6 +401,7 @@ static int parse_argv(int argc, char *argv[]) {
{ "ram", required_argument, NULL, ARG_RAM },
{ "qemu-mem", required_argument, NULL, ARG_RAM }, /* Compat alias */
{ "kvm", required_argument, NULL, ARG_KVM },
+ { "cxl", required_argument, NULL, ARG_CXL },
{ "qemu-kvm", required_argument, NULL, ARG_KVM }, /* Compat alias */
{ "vsock", required_argument, NULL, ARG_VSOCK },
{ "qemu-vsock", required_argument, NULL, ARG_VSOCK }, /* Compat alias */
@@ -407,7 +421,8 @@ static int parse_argv(int argc, char *argv[]) {
{ "extra-drive", required_argument, NULL, ARG_EXTRA_DRIVE },
{ "secure-boot", required_argument, NULL, ARG_SECURE_BOOT },
{ "private-users", required_argument, NULL, ARG_PRIVATE_USERS },
- { "forward-journal", required_argument, NULL, ARG_FORWARD_JOURNAL },
+ { "forward-journal", required_argument, NULL, ARG_FORWARD_JOURNAL },
+ { "forward-journal-config", required_argument, NULL, ARG_FORWARD_JOURNAL_CONFIG },
{ "pass-ssh-key", required_argument, NULL, ARG_PASS_SSH_KEY },
{ "ssh-key-type", required_argument, NULL, ARG_SSH_KEY_TYPE },
{ "set-credential", required_argument, NULL, ARG_SET_CREDENTIAL },
@@ -427,6 +442,7 @@ static int parse_argv(int argc, char *argv[]) {
{ "bind-user", required_argument, NULL, ARG_BIND_USER },
{ "bind-user-shell", required_argument, NULL, ARG_BIND_USER_SHELL },
{ "bind-user-group", required_argument, NULL, ARG_BIND_USER_GROUP },
+ { "runtime-scope", required_argument, NULL, ARG_RUNTIME_SCOPE },
{ "system", no_argument, NULL, ARG_SYSTEM },
{ "user", no_argument, NULL, ARG_USER },
{}
@@ -506,11 +522,48 @@ static int parse_argv(int argc, char *argv[]) {
return r;
break;
- case ARG_RAM:
- r = parse_size(optarg, 1024, &arg_ram);
- if (r < 0)
- return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg);
+ case ARG_RAM: {
+ const char *e = strchr(optarg, ':');
+ if (e) {
+ _cleanup_free_ char *first = strndup(optarg, e - optarg);
+ if (!first)
+ return log_oom();
+
+ r = parse_size(first, 1024, &arg_ram);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg);
+
+ const char *e2 = strchr(e + 1, ':');
+ if (e2) {
+ _cleanup_free_ char *second = strndup(e + 1, e2 - e - 1);
+ if (!second)
+ return log_oom();
+
+ r = parse_size(second, 1024, &arg_ram_max);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg);
+
+ r = safe_atou(e2 + 1, &arg_ram_slots);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg);
+ } else {
+ r = parse_size(e + 1, 1024, &arg_ram_max);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg);
+
+ arg_ram_slots = 0;
+ }
+ } else {
+ r = parse_size(optarg, 1024, &arg_ram);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --ram=%s: %m", optarg);
+
+ arg_ram_max = 0;
+ arg_ram_slots = 0;
+ }
+
break;
+ }
case ARG_KVM:
r = parse_tristate_argument_with_auto("--kvm=", optarg, &arg_kvm);
@@ -518,6 +571,12 @@ static int parse_argv(int argc, char *argv[]) {
return r;
break;
+ case ARG_CXL:
+ r = parse_tristate_argument_with_auto("--cxl=", optarg, &arg_cxl);
+ if (r < 0)
+ return r;
+ break;
+
case ARG_VSOCK:
r = parse_tristate_argument_with_auto("--vsock=", optarg, &arg_vsock);
if (r < 0)
@@ -694,6 +753,12 @@ static int parse_argv(int argc, char *argv[]) {
return r;
break;
+ case ARG_FORWARD_JOURNAL_CONFIG:
+ r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_forward_journal_config);
+ if (r < 0)
+ return r;
+ break;
+
case ARG_PASS_SSH_KEY:
r = parse_boolean_argument("--pass-ssh-key=", optarg, &arg_pass_ssh_key);
if (r < 0)
@@ -950,6 +1015,12 @@ static int parse_argv(int argc, char *argv[]) {
break;
+ case ARG_RUNTIME_SCOPE:
+ arg_runtime_scope = runtime_scope_from_string(optarg);
+ if (!IN_SET(arg_runtime_scope, RUNTIME_SCOPE_SYSTEM, RUNTIME_SCOPE_USER))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse runtime scope: %s", optarg);
+ break;
+
case ARG_SYSTEM:
arg_runtime_scope = RUNTIME_SCOPE_SYSTEM;
break;
@@ -975,6 +1046,15 @@ static int parse_argv(int argc, char *argv[]) {
if (!strv_isempty(arg_bind_user_groups) && strv_isempty(arg_bind_user))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --bind-user-group= without --bind-user=");
+ if (arg_ram_max > 0 && arg_ram_max < arg_ram)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Maximum RAM size must be greater than or equal to initial RAM size.");
+
+ if (arg_ram_slots > 0 && arg_ram_max == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Memory hotplug slots require a maximum RAM size.");
+
+ if (arg_forward_journal_config && !arg_forward_journal)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--forward-journal-config= requires --forward-journal=.");
+
if (arg_ephemeral && arg_extra_drives.n_drives > 0)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --ephemeral with --extra-drive=");
@@ -1517,57 +1597,6 @@ static int start_tpm(
return 0;
}
-static int start_systemd_journal_remote(
- const char *scope,
- unsigned port,
- const char *sd_socket_activate,
- char **ret_listen_address,
- PidRef *ret_pidref) {
-
- int r;
-
- assert(scope);
-
- _cleanup_free_ char *scope_prefix = NULL;
- r = unit_name_to_prefix(scope, &scope_prefix);
- if (r < 0)
- return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
-
- _cleanup_free_ char *listen_address = NULL;
- if (asprintf(&listen_address, "vsock:2:%u", port) < 0)
- return log_oom();
-
- _cleanup_free_ char *sd_journal_remote = NULL;
- r = find_executable_full(
- "systemd-journal-remote",
- /* root= */ NULL,
- STRV_MAKE(LIBEXECDIR),
- /* use_path_envvar= */ true, /* systemd-journal-remote should be installed in
- * LIBEXECDIR, but for supporting fancy setups. */
- &sd_journal_remote,
- /* ret_fd= */ NULL);
- if (r < 0)
- return log_error_errno(r, "Failed to find systemd-journal-remote binary: %m");
-
- _cleanup_strv_free_ char **argv = strv_new(
- sd_socket_activate,
- "--listen", listen_address,
- sd_journal_remote,
- "--output", arg_forward_journal,
- "--split-mode", endswith(arg_forward_journal, ".journal") ? "none" : "host");
- if (!argv)
- return log_oom();
-
- r = fork_notify(argv, ret_pidref);
- if (r < 0)
- return r;
-
- if (ret_listen_address)
- *ret_listen_address = TAKE_PTR(listen_address);
-
- return 0;
-}
-
static int discover_root(char **ret) {
int r;
_cleanup_(dissected_image_unrefp) DissectedImage *image = NULL;
@@ -1686,7 +1715,9 @@ static int start_virtiofsd(
"--shared-dir", source_uid == FOREIGN_UID_MIN ? "/run/systemd/mount-rootfs" : directory,
"--xattr",
"--fd", sockstr,
- "--no-announce-submounts");
+ "--no-announce-submounts",
+ "--log-level=error",
+ "--modcaps=-mknod");
if (!argv)
return log_oom();
@@ -2232,33 +2263,19 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
if (asprintf(&mem, "%" PRIu64 "M", DIV_ROUND_UP(arg_ram, U64_MB)) < 0)
return log_oom();
+ _cleanup_free_ char *mem_max = NULL;
+ if (arg_ram_max > 0)
+ if (asprintf(&mem_max, "%" PRIu64 "M", DIV_ROUND_UP(arg_ram_max, U64_MB)) < 0)
+ return log_oom();
+
/* Create runtime directory for the QEMU config file and other state */
_cleanup_free_ char *runtime_dir = NULL;
_cleanup_(rm_rf_physical_and_freep) char *runtime_dir_destroy = NULL;
- {
- _cleanup_free_ char *subdir = NULL;
-
- if (asprintf(&subdir, "systemd/vmspawn.%" PRIx64, random_u64()) < 0)
- return log_oom();
-
- r = runtime_directory(arg_runtime_scope, subdir, &runtime_dir);
- if (r < 0)
- return log_error_errno(r, "Failed to lookup runtime directory: %m");
- if (r > 0) { /* We need to create our own runtime dir */
- r = mkdir_p(runtime_dir, 0755);
- if (r < 0)
- return log_error_errno(r, "Failed to create runtime directory '%s': %m", runtime_dir);
-
- /* We created this, hence also destroy it */
- runtime_dir_destroy = TAKE_PTR(runtime_dir);
+ r = runtime_directory_make(arg_runtime_scope, "vmspawn", &runtime_dir, &runtime_dir_destroy);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create runtime directory: %m");
- runtime_dir = strdup(runtime_dir_destroy);
- if (!runtime_dir)
- return log_oom();
- }
-
- log_debug("Using runtime directory: %s", runtime_dir);
- }
+ log_debug("Using runtime directory: %s", runtime_dir);
/* Build a QEMU config file for -readconfig. Items that can be expressed as QemuOpts sections go
* here; things that require cmdline-only switches (e.g. -kernel, -smbios, -nographic, --add-fd)
@@ -2280,6 +2297,17 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
return r;
}
+ if (!ARCHITECTURE_SUPPORTS_CXL) {
+ if (arg_cxl > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "CXL not supported on %s, refusing.", architecture_to_string(native_architecture()));
+ if (arg_cxl < 0)
+ log_debug("CXL not supported on %s, disabling.", architecture_to_string(native_architecture()));
+ } else if (arg_cxl > 0) {
+ r = qemu_config_key(config_file, "cxl", "on");
+ if (r < 0)
+ return r;
+ }
+
if (arg_directory || arg_runtime_mounts.n_mounts != 0) {
r = qemu_config_key(config_file, "memory-backend", "mem");
if (r < 0)
@@ -2292,6 +2320,26 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
return r;
}
+ r = qemu_config_section(config_file, "smp-opts", /* id= */ NULL,
+ "cpus", arg_cpus ?: "1");
+ if (r < 0)
+ return r;
+
+ r = qemu_config_section(config_file, "memory", /* id= */ NULL,
+ "size", mem);
+ if (r < 0)
+ return r;
+
+ if (mem_max) {
+ r = qemu_config_key(config_file, "maxmem", mem_max);
+ if (r < 0)
+ return r;
+
+ r = qemu_config_keyf(config_file, "slots", "%u", arg_ram_slots > 0 ? arg_ram_slots : 1u);
+ if (r < 0)
+ return r;
+ }
+
r = qemu_config_section(config_file, "object", "rng0",
"qom-type", "rng-random",
"filename", "/dev/urandom");
@@ -2333,8 +2381,7 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
/* Start building the cmdline for items that must remain as command line arguments */
cmdline = strv_new(qemu_binary,
- "-smp", arg_cpus ?: "1",
- "-m", mem);
+ "-no-user-config");
if (!cmdline)
return log_oom();
@@ -2532,12 +2579,22 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
PTYForwardFlags ptyfwd_flags = 0;
switch (arg_console_mode) {
+ case CONSOLE_NATIVE:
+ /* Use a PTY instead of chardev stdio to prevent QEMU from setting O_NONBLOCK on
+ * our stdio file descriptions (see qemu's chardev/char-stdio.c and char-fd.c).
+ * Use PTY_FORWARD_DUMB_TERMINAL|PTY_FORWARD_TRANSPARENT so the forwarder just
+ * shovels bytes without any terminal manipulation or escape sequence handling. */
+ ptyfwd_flags |= PTY_FORWARD_DUMB_TERMINAL|PTY_FORWARD_TRANSPARENT;
+
+ _fallthrough_;
+
case CONSOLE_READ_ONLY:
- ptyfwd_flags |= PTY_FORWARD_READ_ONLY;
+ if (arg_console_mode == CONSOLE_READ_ONLY)
+ ptyfwd_flags |= PTY_FORWARD_READ_ONLY;
_fallthrough_;
- case CONSOLE_INTERACTIVE: {
+ case CONSOLE_INTERACTIVE: {
_cleanup_free_ char *pty_path = NULL;
master = openpt_allocate(O_RDWR|O_NONBLOCK, &pty_path);
@@ -2553,9 +2610,11 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
if (r < 0)
return r;
+ /* Enable mux for native console so the QEMU monitor is accessible via Ctrl-a c */
r = qemu_config_section(config_file, "chardev", "console",
"backend", "serial",
- "path", pty_path);
+ "path", pty_path,
+ "mux", on_off(arg_console_mode == CONSOLE_NATIVE));
if (r < 0)
return r;
@@ -2565,15 +2624,38 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
if (r < 0)
return r;
+ if (arg_console_mode == CONSOLE_NATIVE) {
+ r = qemu_config_section(config_file, "mon", "mon0",
+ "chardev", "console");
+ if (r < 0)
+ return r;
+ }
+
break;
}
case CONSOLE_GUI:
- /* -vga is a convenience option, keep on cmdline */
- r = strv_extend_many(&cmdline, "-vga", "virtio");
+ /* -display has no config file equivalent */
+ r = strv_extend_many(&cmdline, "-display", "sdl,gl=auto", "-vga", "none");
if (r < 0)
return log_oom();
+ r = qemu_config_section(config_file, "device", "vga0",
+ "driver", "virtio-vga");
+ if (r < 0)
+ return r;
+
+ r = qemu_config_section(config_file, "audiodev", "audio0",
+ "driver", "default");
+ if (r < 0)
+ return r;
+
+ r = qemu_config_section(config_file, "device", "virtio-sound0",
+ "driver", "virtio-sound-pci",
+ "audiodev", "audio0");
+ if (r < 0)
+ return r;
+
r = qemu_config_section(config_file, "device", "virtio-serial0",
"driver", "virtio-serial");
if (r < 0)
@@ -2595,36 +2677,6 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
break;
- case CONSOLE_NATIVE:
- r = strv_extend_many(&cmdline, "-nographic", "-nodefaults");
- if (r < 0)
- return log_oom();
-
- r = qemu_config_section(config_file, "chardev", "console",
- "backend", "stdio",
- "mux", "on",
- "signal", "off");
- if (r < 0)
- return r;
-
- r = qemu_config_section(config_file, "device", "vmspawn-virtio-serial-pci",
- "driver", "virtio-serial-pci");
- if (r < 0)
- return r;
-
- r = qemu_config_section(config_file, "device", "virtconsole0",
- "driver", "virtconsole",
- "chardev", "console");
- if (r < 0)
- return r;
-
- r = qemu_config_section(config_file, "mon", "mon0",
- "chardev", "console");
- if (r < 0)
- return r;
-
- break;
-
case CONSOLE_HEADLESS:
r = strv_extend_many(&cmdline, "-nographic", "-nodefaults");
if (r < 0)
@@ -2740,15 +2792,16 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
r = kernel_cmdline_maybe_append_root();
if (r < 0)
return r;
+
}
}
bool need_scsi_controller =
- arg_image_disk_type == DISK_TYPE_VIRTIO_SCSI && arg_image;
+ IN_SET(arg_image_disk_type, DISK_TYPE_VIRTIO_SCSI, DISK_TYPE_SCSI_CD) && arg_image;
if (!need_scsi_controller)
FOREACH_ARRAY(drive, arg_extra_drives.drives, arg_extra_drives.n_drives) {
DiskType dt = drive->disk_type >= 0 ? drive->disk_type : arg_image_disk_type;
- if (dt == DISK_TYPE_VIRTIO_SCSI) {
+ if (IN_SET(dt, DISK_TYPE_VIRTIO_SCSI, DISK_TYPE_SCSI_CD)) {
need_scsi_controller = true;
break;
}
@@ -2772,12 +2825,20 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
arg_image);
}
- r = qemu_config_section(config_file, "drive", "vmspawn",
- "if", "none",
- "file", arg_image,
- "format", image_format_to_string(arg_image_format),
- "discard", on_off(arg_discard_disk),
- "snapshot", on_off(arg_ephemeral));
+ if (arg_image_disk_type == DISK_TYPE_SCSI_CD)
+ r = qemu_config_section(config_file, "drive", "vmspawn",
+ "if", "none",
+ "file", arg_image,
+ "format", image_format_to_string(arg_image_format),
+ "media", "cdrom",
+ "readonly", "on");
+ else
+ r = qemu_config_section(config_file, "drive", "vmspawn",
+ "if", "none",
+ "file", arg_image,
+ "format", image_format_to_string(arg_image_format),
+ "discard", on_off(arg_discard_disk),
+ "snapshot", on_off(arg_ephemeral));
if (r < 0)
return r;
@@ -2808,6 +2869,12 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
if (r < 0)
return log_oom();
break;
+ case DISK_TYPE_SCSI_CD:
+ disk_driver = "scsi-cd";
+ r = disk_serial(image_fn, 30, &serial);
+ if (r < 0)
+ return log_oom();
+ break;
default:
assert_not_reached();
}
@@ -2820,15 +2887,17 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
if (r < 0)
return r;
- if (arg_image_disk_type == DISK_TYPE_VIRTIO_SCSI) {
+ if (IN_SET(arg_image_disk_type, DISK_TYPE_VIRTIO_SCSI, DISK_TYPE_SCSI_CD)) {
r = qemu_config_key(config_file, "bus", "vmspawn_scsi.0");
if (r < 0)
return r;
}
- r = grow_image(arg_image, arg_grow_image);
- if (r < 0)
- return r;
+ if (arg_image_disk_type != DISK_TYPE_SCSI_CD) {
+ r = grow_image(arg_image, arg_grow_image);
+ if (r < 0)
+ return r;
+ }
}
_cleanup_(sd_event_unrefp) sd_event *event = NULL;
@@ -2933,7 +3002,11 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
} else
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expected regular file or block device, not '%s'.", drive->path);
- if (strv_extendf(&cmdline, "driver=%s,cache.direct=off,cache.no-flush=on,file.driver=%s,file.filename=%s,node-name=vmspawn_extra_%zu", image_format_to_string(drive->format), driver, escaped_drive, i) < 0)
+ DiskType dt = drive->disk_type >= 0 ? drive->disk_type : arg_image_disk_type;
+
+ if (strv_extendf(&cmdline, "driver=%s,cache.direct=off,cache.no-flush=on,file.driver=%s,file.filename=%s,node-name=vmspawn_extra_%zu%s",
+ image_format_to_string(drive->format), driver, escaped_drive, i,
+ dt == DISK_TYPE_SCSI_CD ? ",read-only=on" : "") < 0)
return log_oom();
_cleanup_free_ char *drive_fn = NULL;
@@ -2948,8 +3021,6 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
if (strv_extend(&cmdline, "-device") < 0)
return log_oom();
- DiskType dt = drive->disk_type >= 0 ? drive->disk_type : arg_image_disk_type;
-
switch (dt) {
case DISK_TYPE_VIRTIO_BLK:
if (strv_extendf(&cmdline, "virtio-blk-pci,drive=vmspawn_extra_%zu,serial=%s", i++, escaped_drive_fn) < 0)
@@ -2973,6 +3044,15 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
return log_oom();
break;
}
+ case DISK_TYPE_SCSI_CD: {
+ _cleanup_free_ char *serial = NULL;
+ r = disk_serial(escaped_drive_fn, 30, &serial);
+ if (r < 0)
+ return log_oom();
+ if (strv_extendf(&cmdline, "scsi-cd,bus=vmspawn_scsi.0,drive=vmspawn_extra_%zu,serial=%s", i++, serial) < 0)
+ return log_oom();
+ break;
+ }
default:
assert_not_reached();
}
@@ -2982,8 +3062,30 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
r = strv_prepend(&arg_kernel_cmdline_extra, "console=hvc0");
if (r < 0)
return log_oom();
+
+ /* Propagate the host's $TERM into the VM via the kernel command line. TERM= is
+ * picked up by PID 1 and inherited by services on /dev/console, and
+ * systemd.tty.term.hvc0= is used by services directly attached to /dev/hvc0 (such
+ * as serial-getty). While systemd can auto-detect the terminal type via DCS
+ * XTGETTCAP, not all terminal emulators implement this, so let's always propagate
+ * $TERM if we have it. */
+ const char *term = getenv("TERM");
+ if (!isempty(term) && !streq(term, "unknown") /* some CI environments set TERM=unknown */ &&
+ !strchr(term, ' ') && !strchr(term, '=')) {
+ FOREACH_STRING(tty_key, "systemd.tty.term.hvc0", "TERM") {
+ _cleanup_free_ char *p = strjoin(tty_key, "=", term);
+ if (!p)
+ return log_oom();
+
+ r = strv_consume_prepend(&arg_kernel_cmdline_extra, TAKE_PTR(p));
+ if (r < 0)
+ return log_oom();
+ }
+ }
}
+ _cleanup_free_ char *fstab_extra = NULL;
+
for (size_t j = 0; j < arg_runtime_mounts.n_mounts; j++) {
RuntimeMount *m = arg_runtime_mounts.mounts + j;
_cleanup_free_ char *listen_address = NULL;
@@ -3030,24 +3132,44 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
if (r < 0)
return r;
- _cleanup_free_ char *clean_target = xescape(m->target, "\":");
- if (!clean_target)
+ /* fstab uses whitespace as field separator, so octal-escape spaces in paths */
+ _cleanup_free_ char *escaped_target = octescape_full(m->target, SIZE_MAX, " \t");
+ if (!escaped_target)
return log_oom();
- if (strv_extendf(&arg_kernel_cmdline_extra, "systemd.mount-extra=\"%s:%s:virtiofs:%s\"",
- id, clean_target, m->read_only ? "ro" : "rw") < 0)
+ if (strextendf(&fstab_extra, "%s %s virtiofs %s,x-initrd.mount\n",
+ id, escaped_target, m->read_only ? "ro" : "rw") < 0)
return log_oom();
}
+ if (fstab_extra) {
+ /* If the user already specified a fstab.extra credential, combine it with ours */
+ MachineCredential *existing = machine_credential_find(&arg_credentials, "fstab.extra");
+ if (existing) {
+ _cleanup_free_ char *combined = NULL;
+
+ if (existing->size > 0 && existing->data[existing->size - 1] != '\n')
+ r = asprintf(&combined, "%.*s\n%s", (int) existing->size, existing->data, fstab_extra);
+ else
+ r = asprintf(&combined, "%.*s%s", (int) existing->size, existing->data, fstab_extra);
+ if (r < 0)
+ return log_oom();
+
+ erase_and_free(existing->data);
+ existing->data = TAKE_PTR(combined);
+ existing->size = strlen(existing->data);
+ } else {
+ r = machine_credential_add(&arg_credentials, "fstab.extra", fstab_extra, SIZE_MAX);
+ if (r < 0)
+ return r;
+ }
+ }
+
_cleanup_(rm_rf_physical_and_freep) char *smbios_dir = NULL;
r = mkdtemp_malloc("/var/tmp/vmspawn-smbios-XXXXXX", &smbios_dir);
if (r < 0)
return log_error_errno(r, "Failed to create temporary directory: %m");
- r = cmdline_add_kernel_cmdline(&cmdline, kernel, smbios_dir);
- if (r < 0)
- return r;
-
r = cmdline_add_smbios11(&cmdline, smbios_dir);
if (r < 0)
return r;
@@ -3162,25 +3284,14 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
if (arg_forward_journal) {
_cleanup_free_ char *listen_address = NULL;
-
- ChaseFlags chase_flags = CHASE_MKDIR_0755|CHASE_MUST_BE_DIRECTORY;
- if (endswith(arg_forward_journal, ".journal"))
- chase_flags |= CHASE_PARENT;
-
- _cleanup_close_ int journal_fd = -EBADF;
- r = chase(arg_forward_journal, /* root= */ NULL, chase_flags, /* ret_path= */ NULL, &journal_fd);
- if (r < 0)
- return log_error_errno(r, "Failed to create journal directory for '%s': %m", arg_forward_journal);
-
- r = chattr_fd(journal_fd, FS_NOCOW_FL, FS_NOCOW_FL);
- if (r < 0)
- log_debug_errno(r, "Failed to set NOCOW flag on journal directory for '%s', ignoring: %m", arg_forward_journal);
+ if (asprintf(&listen_address, "vsock:2:%u", child_cid) < 0)
+ return log_oom();
if (!GREEDY_REALLOC(children, n_children + 1))
return log_oom();
_cleanup_(fork_notify_terminate) PidRef child = PIDREF_NULL;
- r = start_systemd_journal_remote(unit, child_cid, sd_socket_activate, &listen_address, &child);
+ r = fork_journal_remote(listen_address, arg_forward_journal, arg_forward_journal_config, &child);
if (r < 0)
return r;
@@ -3250,16 +3361,19 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
return log_error_errno(r, "Failed to set credential systemd.unit-dropin.sshd-vsock@.service: %m");
}
- if (ARCHITECTURE_SUPPORTS_SMBIOS)
- FOREACH_ARRAY(cred, arg_credentials.credentials, arg_credentials.n_credentials) {
- _cleanup_free_ char *p = NULL, *cred_data_b64 = NULL;
- ssize_t n;
+ FOREACH_ARRAY(cred, arg_credentials.credentials, arg_credentials.n_credentials) {
+ _cleanup_free_ char *cred_data_b64 = NULL;
+ ssize_t n;
- n = base64mem(cred->data, cred->size, &cred_data_b64);
- if (n < 0)
- return log_oom();
+ n = base64mem(cred->data, cred->size, &cred_data_b64);
+ if (n < 0)
+ return log_oom();
- p = path_join(smbios_dir, cred->id);
+ /* SMBIOS is always available on x86, but on ARM it requires UEFI firmware
+ * and does not work with direct kernel boot. */
+ if (ARCHITECTURE_SUPPORTS_SMBIOS &&
+ (IN_SET(native_architecture(), ARCHITECTURE_X86, ARCHITECTURE_X86_64) || !kernel)) {
+ _cleanup_free_ char *p = path_join(smbios_dir, cred->id);
if (!p)
return log_oom();
@@ -3270,14 +3384,69 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
if (r < 0)
return log_error_errno(r, "Failed to write smbios credential file %s: %m", p);
- r = strv_extend(&cmdline, "-smbios");
- if (r < 0)
+ if (strv_extend(&cmdline, "-smbios") < 0)
return log_oom();
- r = strv_extend_joined(&cmdline, "type=11,path=", p);
- if (r < 0)
+ if (strv_extend_joined(&cmdline, "type=11,path=", p) < 0)
return log_oom();
- }
+
+ } else if (ARCHITECTURE_SUPPORTS_FW_CFG) {
+ /* fw_cfg keys are limited to 55 characters */
+ _cleanup_free_ char *key = strjoin("opt/io.systemd.credentials/", cred->id);
+ if (!key)
+ return log_oom();
+
+ if (strlen(key) <= 55) {
+ _cleanup_free_ char *p = path_join(smbios_dir, cred->id);
+ if (!p)
+ return log_oom();
+
+ r = write_data_file_atomic_at(
+ AT_FDCWD, p,
+ &IOVEC_MAKE(cred->data, cred->size),
+ WRITE_DATA_FILE_MODE_0400);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write fw_cfg credential file %s: %m", p);
+
+ if (strv_extend(&cmdline, "-fw_cfg") < 0)
+ return log_oom();
+
+ if (strv_extendf(&cmdline, "name=%s,file=%s", key, p) < 0)
+ return log_oom();
+
+ continue;
+ }
+
+ /* Fall through to kernel command line if key is too long */
+ log_debug("fw_cfg key '%s' exceeds 55 character limit, falling back to kernel command line.", key);
+
+ if (!kernel) {
+ log_warning("Cannot pass credential '%s' to VM, fw_cfg key exceeds 55 character limit and no kernel for direct boot specified.",
+ cred->id);
+ continue;
+ }
+
+ if (strv_extendf(&arg_kernel_cmdline_extra,
+ "systemd.set_credential_binary=%s:%s", cred->id, cred_data_b64) < 0)
+ return log_oom();
+
+ } else if (kernel) {
+ if (strv_extendf(&arg_kernel_cmdline_extra,
+ "systemd.set_credential_binary=%s:%s", cred->id, cred_data_b64) < 0)
+ return log_oom();
+ } else
+ log_warning("Cannot pass credential '%s' to VM, native architecture doesn't support SMBIOS or fw_cfg and no kernel for direct boot specified.",
+ cred->id);
+ }
+
+ /* CD-ROMs are read-only, so override any "rw" on the kernel command line. */
+ if (arg_image_disk_type == DISK_TYPE_SCSI_CD && strv_contains(arg_kernel_cmdline_extra, "rw"))
+ if (strv_extend(&arg_kernel_cmdline_extra, "ro") < 0)
+ return log_oom();
+
+ r = cmdline_add_kernel_cmdline(&cmdline, kernel, smbios_dir);
+ if (r < 0)
+ return r;
if (use_vsock) {
notify_sock_fd = open_vsock();
@@ -3404,9 +3573,11 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
arg_machine,
arg_uuid,
"systemd-vmspawn",
+ "vm",
&child_pidref,
arg_directory,
child_cid,
+ /* local_ifindex= */ 0,
child_cid != VMADDR_CID_ANY ? vm_address : NULL,
ssh_private_key_path,
!arg_keep_unit && arg_runtime_scope == RUNTIME_SCOPE_SYSTEM,
@@ -3426,9 +3597,11 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
arg_machine,
arg_uuid,
"systemd-vmspawn",
+ "vm",
&child_pidref,
arg_directory,
child_cid,
+ /* local_ifindex= */ 0,
child_cid != VMADDR_CID_ANY ? vm_address : NULL,
ssh_private_key_path,
!arg_keep_unit,
@@ -3508,29 +3681,31 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
_cleanup_(osc_context_closep) sd_id128_t osc_context_id = SD_ID128_NULL;
_cleanup_(pty_forward_freep) PTYForward *forward = NULL;
if (master >= 0) {
- if (!terminal_is_dumb()) {
- r = osc_context_open_vm(arg_machine, /* ret_seq= */ NULL, &osc_context_id);
- if (r < 0)
- return r;
- }
-
r = pty_forward_new(event, master, ptyfwd_flags, &forward);
if (r < 0)
return log_error_errno(r, "Failed to create PTY forwarder: %m");
- if (!arg_background) {
- _cleanup_free_ char *bg = NULL;
+ if (!FLAGS_SET(ptyfwd_flags, PTY_FORWARD_DUMB_TERMINAL)) {
+ if (!terminal_is_dumb()) {
+ r = osc_context_open_vm(arg_machine, /* ret_seq= */ NULL, &osc_context_id);
+ if (r < 0)
+ return r;
+ }
- r = terminal_tint_color(130 /* green */, &bg);
- if (r < 0)
- log_debug_errno(r, "Failed to determine terminal background color, not tinting.");
- else
- (void) pty_forward_set_background_color(forward, bg);
- } else if (!isempty(arg_background))
- (void) pty_forward_set_background_color(forward, arg_background);
+ if (!arg_background) {
+ _cleanup_free_ char *bg = NULL;
- (void) pty_forward_set_window_title(forward, GLYPH_GREEN_CIRCLE, /* hostname= */ NULL,
- STRV_MAKE("Virtual Machine", arg_machine));
+ r = terminal_tint_color(130 /* green */, &bg);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine terminal background color, not tinting.");
+ else
+ (void) pty_forward_set_background_color(forward, bg);
+ } else if (!isempty(arg_background))
+ (void) pty_forward_set_background_color(forward, arg_background);
+
+ (void) pty_forward_set_window_title(forward, GLYPH_GREEN_CIRCLE, /* hostname= */ NULL,
+ STRV_MAKE("Virtual Machine", arg_machine));
+ }
}
r = sd_event_loop(event);
@@ -3542,9 +3717,9 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
terminate_scope(runtime_bus, arg_machine);
if (registered_system)
- (void) unregister_machine(system_bus, arg_machine);
+ (void) unregister_machine(system_bus, arg_machine, RUNTIME_SCOPE_SYSTEM);
if (registered_runtime)
- (void) unregister_machine(runtime_bus, arg_machine);
+ (void) unregister_machine(runtime_bus, arg_machine, RUNTIME_SCOPE_USER);
if (use_vsock) {
if (exit_status == INT_MAX) {
@@ -3631,6 +3806,13 @@ static int verify_arguments(void) {
if (!strv_isempty(arg_initrds) && !arg_linux)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --initrd= cannot be used without --linux=.");
+ if (arg_image_disk_type == DISK_TYPE_SCSI_CD) {
+ if (arg_ephemeral)
+ log_warning("--ephemeral has no effect with --image-disk-type=scsi-cd (CD-ROMs are read-only).");
+ if (arg_discard_disk)
+ log_warning("--discard-disk has no effect with --image-disk-type=scsi-cd (CD-ROMs are read-only).");
+ }
+
return 0;
}