Skip to content

Commit 563ac3d

Browse files
committed
Merge tag 'pull-vfio-20250704' of https://github.com/legoater/qemu into staging
vfio queue: * Added small cleanups for b4 and scope * Restricted TDX build to 64-bit target * Fixed issues introduced in first part of VFIO live update support * Added full VFIO live update support # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmhnlBMACgkQUaNDx8/7 # 7KFOxw//dIPpGcYIjEGpIkIh6NF3VK6xmDAG0aZEeM+5fCzdor2DPkD7ZPyqND3S # /YkR8GSOHd+Qm5W+73LHOdV5RFMt4wagyHiAKUMpEFHY7ZLduxIXlACoUo+F5cnh # SUnhC6KX7Gu1/Nndb4X4w6SNOyhoRKtQ2EqpRsrGdIaBkX8s6w2jF/INPTPdpg73 # lulJZCAFNzyIWytck9ohJf8To9IsvkCXTF6mcywURa9MBaAarRttXoFjuZsXb7zn # NqGVtantNAaJmKu26X3ScUWn9P02WryhPB6KT7+B3G/b87Su1cnbAwYakNSFPJIx # I/gaw0EPzHM+b6mavA4IdvKDJGR7GMvpJEGqUEpntc6FJ3+g1B7qsedgeBUc/RKB # UaRmtYbvlMv5wSmaLcxsT3S3BnABbrd4EedZX5uOBFMrtnTiOqrMUEcoMaf5ogvN # KlJkrjNQkfHxTbp5G+nXHuTzae3k2Ylm196b2yhgARfUL70jiak/B+ADeezVcVmW # 6ZpotrAvMxu9RlFdxTSbL0/lR0rfKZTecqMOSFA+FlmjcTJ0QW1SbweMdsfgW/uU # /2Hfmw6zUQ80/tMqYMztFWsiov7C8a8ZMmuZwDQp+AdCVGgFEigfNJVQYgujbqKz # g9Ta9cNPyvF5hpnml5u8IzAzM95HrhIPFmmpUBZyWOCeL6chSHk= # =Cu7b # -----END PGP SIGNATURE----- # gpg: Signature made Fri 04 Jul 2025 04:42:59 EDT # gpg: using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1 # gpg: Good signature from "Cédric Le Goater <clg@redhat.com>" [full] # gpg: aka "Cédric Le Goater <clg@kaod.org>" [full] # Primary key fingerprint: A0F6 6548 F048 95EB FE6B 0B60 51A3 43C7 CFFB ECA1 * tag 'pull-vfio-20250704' of https://github.com/legoater/qemu: (27 commits) vfio: doc changes for cpr vfio/container: delete old cpr register iommufd: preserve DMA mappings vfio/iommufd: change process vfio/iommufd: reconstruct hwpt vfio/iommufd: reconstruct device vfio/iommufd: preserve descriptors vfio/iommufd: cpr state migration: vfio cpr state hook vfio/iommufd: register container for cpr vfio/iommufd: device name blocker vfio/iommufd: add vfio_device_free_name vfio/iommufd: invariant device name vfio/iommufd: use IOMMU_IOAS_MAP_FILE physmem: qemu_ram_get_fd_offset backends/iommufd: change process ioctl backends/iommufd: iommufd_backend_map_file_dma migration: cpr_get_fd_param helper migration: close kvm after cpr vfio-pci: preserve INTx ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2 parents a876b05 + 7437caa commit 563ac3d

File tree

33 files changed

+888
-92
lines changed

33 files changed

+888
-92
lines changed

.b4-config

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,3 @@
1111
prep-perpatch-check-cmd = scripts/checkpatch.pl -q --terse --no-summary --mailback -
1212
searchmask = https://lore.kernel.org/qemu-devel/?x=m&t=1&q=%s
1313
linkmask = https://lore.kernel.org/qemu-devel/%s
14-
linktrailermask = Message-ID: <%s>

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ distclean: clean recurse-distclean
227227
rm -Rf .sdk qemu-bundle
228228

229229
find-src-path = find "$(SRC_PATH)" -path "$(SRC_PATH)/meson" -prune -o \
230+
-path "$(SRC_PATH)/.pc" -prune -o \
230231
-type l -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \)
231232

232233
.PHONY: ctags

accel/kvm/kvm-all.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,16 +515,23 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
515515
goto err;
516516
}
517517

518+
/* If I am the CPU that created coalesced_mmio_ring, then discard it */
519+
if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
520+
s->coalesced_mmio_ring = NULL;
521+
}
522+
518523
ret = munmap(cpu->kvm_run, mmap_size);
519524
if (ret < 0) {
520525
goto err;
521526
}
527+
cpu->kvm_run = NULL;
522528

523529
if (cpu->kvm_dirty_gfns) {
524530
ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_bytes);
525531
if (ret < 0) {
526532
goto err;
527533
}
534+
cpu->kvm_dirty_gfns = NULL;
528535
}
529536

530537
kvm_park_vcpu(cpu);
@@ -608,6 +615,31 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
608615
return ret;
609616
}
610617

618+
void kvm_close(void)
619+
{
620+
CPUState *cpu;
621+
622+
if (!kvm_state || kvm_state->fd == -1) {
623+
return;
624+
}
625+
626+
CPU_FOREACH(cpu) {
627+
cpu_remove_sync(cpu);
628+
close(cpu->kvm_fd);
629+
cpu->kvm_fd = -1;
630+
close(cpu->kvm_vcpu_stats_fd);
631+
cpu->kvm_vcpu_stats_fd = -1;
632+
}
633+
634+
if (kvm_state && kvm_state->fd != -1) {
635+
close(kvm_state->vmfd);
636+
kvm_state->vmfd = -1;
637+
close(kvm_state->fd);
638+
kvm_state->fd = -1;
639+
}
640+
kvm_state = NULL;
641+
}
642+
611643
/*
612644
* dirty pages logging control
613645
*/

backends/iommufd.c

Lines changed: 103 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,18 @@
1616
#include "qemu/module.h"
1717
#include "qom/object_interfaces.h"
1818
#include "qemu/error-report.h"
19+
#include "migration/cpr.h"
1920
#include "monitor/monitor.h"
2021
#include "trace.h"
2122
#include "hw/vfio/vfio-device.h"
2223
#include <sys/ioctl.h>
2324
#include <linux/iommufd.h>
2425

26+
static const char *iommufd_fd_name(IOMMUFDBackend *be)
27+
{
28+
return object_get_canonical_path_component(OBJECT(be));
29+
}
30+
2531
static void iommufd_backend_init(Object *obj)
2632
{
2733
IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
@@ -64,26 +70,73 @@ static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
6470
return !be->users;
6571
}
6672

73+
static void iommufd_backend_complete(UserCreatable *uc, Error **errp)
74+
{
75+
IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
76+
const char *name = iommufd_fd_name(be);
77+
78+
if (!be->owned) {
79+
/* fd came from the command line. Fetch updated value from cpr state. */
80+
if (cpr_is_incoming()) {
81+
be->fd = cpr_find_fd(name, 0);
82+
} else {
83+
cpr_save_fd(name, 0, be->fd);
84+
}
85+
}
86+
}
87+
6788
static void iommufd_backend_class_init(ObjectClass *oc, const void *data)
6889
{
6990
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
7091

7192
ucc->can_be_deleted = iommufd_backend_can_be_deleted;
93+
ucc->complete = iommufd_backend_complete;
7294

7395
object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
7496
}
7597

98+
bool iommufd_change_process_capable(IOMMUFDBackend *be)
99+
{
100+
struct iommu_ioas_change_process args = {.size = sizeof(args)};
101+
102+
/*
103+
* Call IOMMU_IOAS_CHANGE_PROCESS to verify it is a recognized ioctl.
104+
* This is a no-op if the process has not changed since DMA was mapped.
105+
*/
106+
return !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args);
107+
}
108+
109+
bool iommufd_change_process(IOMMUFDBackend *be, Error **errp)
110+
{
111+
struct iommu_ioas_change_process args = {.size = sizeof(args)};
112+
bool ret = !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args);
113+
114+
if (!ret) {
115+
error_setg_errno(errp, errno, "IOMMU_IOAS_CHANGE_PROCESS fd %d failed",
116+
be->fd);
117+
}
118+
trace_iommufd_change_process(be->fd, ret);
119+
return ret;
120+
}
121+
76122
bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
77123
{
78124
int fd;
79125

80126
if (be->owned && !be->users) {
81-
fd = qemu_open("/dev/iommu", O_RDWR, errp);
127+
fd = cpr_open_fd("/dev/iommu", O_RDWR, iommufd_fd_name(be), 0, errp);
82128
if (fd < 0) {
83129
return false;
84130
}
85131
be->fd = fd;
86132
}
133+
if (!be->users && !vfio_iommufd_cpr_register_iommufd(be, errp)) {
134+
if (be->owned) {
135+
close(be->fd);
136+
be->fd = -1;
137+
}
138+
return false;
139+
}
87140
be->users++;
88141

89142
trace_iommufd_backend_connect(be->fd, be->owned, be->users);
@@ -96,9 +149,13 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be)
96149
goto out;
97150
}
98151
be->users--;
99-
if (!be->users && be->owned) {
100-
close(be->fd);
101-
be->fd = -1;
152+
if (!be->users) {
153+
vfio_iommufd_cpr_unregister_iommufd(be);
154+
if (be->owned) {
155+
cpr_delete_fd(iommufd_fd_name(be), 0);
156+
close(be->fd);
157+
be->fd = -1;
158+
}
102159
}
103160
out:
104161
trace_iommufd_backend_disconnect(be->fd, be->users);
@@ -172,6 +229,44 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
172229
return ret;
173230
}
174231

232+
int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id,
233+
hwaddr iova, ram_addr_t size,
234+
int mfd, unsigned long start, bool readonly)
235+
{
236+
int ret, fd = be->fd;
237+
struct iommu_ioas_map_file map = {
238+
.size = sizeof(map),
239+
.flags = IOMMU_IOAS_MAP_READABLE |
240+
IOMMU_IOAS_MAP_FIXED_IOVA,
241+
.ioas_id = ioas_id,
242+
.fd = mfd,
243+
.start = start,
244+
.iova = iova,
245+
.length = size,
246+
};
247+
248+
if (cpr_is_incoming()) {
249+
return 0;
250+
}
251+
252+
if (!readonly) {
253+
map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
254+
}
255+
256+
ret = ioctl(fd, IOMMU_IOAS_MAP_FILE, &map);
257+
trace_iommufd_backend_map_file_dma(fd, ioas_id, iova, size, mfd, start,
258+
readonly, ret);
259+
if (ret) {
260+
ret = -errno;
261+
262+
/* TODO: Not support mapping hardware PCI BAR region for now. */
263+
if (errno == EFAULT) {
264+
warn_report("IOMMU_IOAS_MAP_FILE failed: %m, PCI BAR?");
265+
}
266+
}
267+
return ret;
268+
}
269+
175270
int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
176271
hwaddr iova, ram_addr_t size)
177272
{
@@ -183,6 +278,10 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
183278
.length = size,
184279
};
185280

281+
if (cpr_is_incoming()) {
282+
return 0;
283+
}
284+
186285
ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
187286
/*
188287
* IOMMUFD takes mapping as some kind of object, unmapping

backends/trace-events

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@ dbus_vmstate_loading(const char *id) "id: %s"
77
dbus_vmstate_saving(const char *id) "id: %s"
88

99
# iommufd.c
10+
iommufd_change_process(int fd, bool ret) "fd=%d (%d)"
1011
iommufd_backend_connect(int fd, bool owned, uint32_t users) "fd=%d owned=%d users=%d"
1112
iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d"
1213
iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d"
1314
iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)"
15+
iommufd_backend_map_file_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int fd, unsigned long start, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" fd=%d start=%ld readonly=%d (%d)"
1416
iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
1517
iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
1618
iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d"

docs/devel/migration/CPR.rst

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,7 @@ cpr-transfer mode
152152
This mode allows the user to transfer a guest to a new QEMU instance
153153
on the same host with minimal guest pause time, by preserving guest
154154
RAM in place, albeit with new virtual addresses in new QEMU. Devices
155-
and their pinned memory pages will also be preserved in a future QEMU
156-
release.
155+
and their pinned memory pages are also preserved for VFIO and IOMMUFD.
157156

158157
The user starts new QEMU on the same host as old QEMU, with command-
159158
line arguments to create the same machine, plus the ``-incoming``
@@ -322,6 +321,6 @@ Futures
322321

323322
cpr-transfer mode is based on a capability to transfer open file
324323
descriptors from old to new QEMU. In the future, descriptors for
325-
vfio, iommufd, vhost, and char devices could be transferred,
324+
vhost, and char devices could be transferred,
326325
preserving those devices and their kernel state without interruption,
327326
even if they do not explicitly support live migration.

hw/i386/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ config SGX
1313
config TDX
1414
bool
1515
select X86_FW_OVMF
16-
depends on KVM
16+
depends on KVM && X86_64
1717

1818
config PC
1919
bool

hw/vfio-user/container.c

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
#include "hw/vfio-user/container.h"
1414
#include "hw/vfio-user/device.h"
1515
#include "hw/vfio-user/trace.h"
16-
#include "hw/vfio/vfio-cpr.h"
1716
#include "hw/vfio/vfio-device.h"
1817
#include "hw/vfio/vfio-listener.h"
1918
#include "qapi/error.h"
@@ -225,14 +224,10 @@ vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev,
225224

226225
bcontainer = &container->bcontainer;
227226

228-
if (!vfio_cpr_register_container(bcontainer, errp)) {
229-
goto free_container_exit;
230-
}
231-
232227
ret = ram_block_uncoordinated_discard_disable(true);
233228
if (ret) {
234229
error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
235-
goto unregister_container_exit;
230+
goto free_container_exit;
236231
}
237232

238233
vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
@@ -261,9 +256,6 @@ vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev,
261256
enable_discards_exit:
262257
ram_block_uncoordinated_discard_disable(false);
263258

264-
unregister_container_exit:
265-
vfio_cpr_unregister_container(bcontainer);
266-
267259
free_container_exit:
268260
object_unref(container);
269261

@@ -286,7 +278,6 @@ static void vfio_user_container_disconnect(VFIOUserContainer *container)
286278
vioc->release(bcontainer);
287279
}
288280

289-
vfio_cpr_unregister_container(bcontainer);
290281
object_unref(container);
291282

292283
vfio_address_space_put(space);

hw/vfio/ap.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
265265

266266
error:
267267
error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
268-
g_free(vbasedev->name);
268+
vfio_device_free_name(vbasedev);
269269
}
270270

271271
static void vfio_ap_unrealize(DeviceState *dev)
@@ -275,7 +275,7 @@ static void vfio_ap_unrealize(DeviceState *dev)
275275
vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX);
276276
vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX);
277277
vfio_device_detach(&vapdev->vdev);
278-
g_free(vapdev->vdev.name);
278+
vfio_device_free_name(&vapdev->vdev);
279279
}
280280

281281
static const Property vfio_ap_properties[] = {

hw/vfio/ccw.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,7 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
619619
out_region_err:
620620
vfio_device_detach(vbasedev);
621621
out_attach_dev_err:
622-
g_free(vbasedev->name);
622+
vfio_device_free_name(vbasedev);
623623
out_unrealize:
624624
if (cdc->unrealize) {
625625
cdc->unrealize(cdev);
@@ -637,7 +637,7 @@ static void vfio_ccw_unrealize(DeviceState *dev)
637637
vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
638638
vfio_ccw_put_region(vcdev);
639639
vfio_device_detach(&vcdev->vdev);
640-
g_free(vcdev->vdev.name);
640+
vfio_device_free_name(&vcdev->vdev);
641641

642642
if (cdc->unrealize) {
643643
cdc->unrealize(cdev);

0 commit comments

Comments
 (0)