vfio/container: restore DMA vaddr

In new QEMU, do not register the memory listener at device creation time.
Register it later, in the container post_load handler, after all vmstate
that may affect regions and mapping boundaries has been loaded.  The
post_load registration will cause the listener to invoke its callback on
each flat section, and the calls will match the mappings remembered by the
kernel.

The listener calls a special dma_map handler that passes the new VA of each
section to the kernel using VFIO_DMA_MAP_FLAG_VADDR.  Restore the normal
handler at the end.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/qemu-devel/1749569991-25171-7-git-send-email-steven.sistare@oracle.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
This commit is contained in:
Steve Sistare 2025-06-10 08:39:19 -07:00 committed by Cédric Le Goater
parent 1faadd9630
commit 7e9f214113
3 changed files with 73 additions and 2 deletions

View file

@ -136,6 +136,8 @@ static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer,
int ret;
Error *local_err = NULL;
g_assert(!cpr_is_incoming());
if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) {
if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) &&
bcontainer->dirty_pages_supported) {
@ -690,9 +692,18 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
}
group_was_added = true;
/*
* If CPR, register the listener later, after all state that may
* affect regions and mapping boundaries has been cpr load'ed. Later,
* the listener will invoke its callback on each flat section and call
* dma_map to supply the new vaddr, and the calls will match the mappings
* remembered by the kernel.
*/
if (!cpr_is_incoming()) {
if (!vfio_listener_register(bcontainer, errp)) {
goto fail;
}
}
bcontainer->initialized = true;

View file

@ -9,11 +9,13 @@
#include "qemu/osdep.h"
#include "hw/vfio/vfio-container.h"
#include "hw/vfio/vfio-device.h"
#include "hw/vfio/vfio-listener.h"
#include "migration/blocker.h"
#include "migration/cpr.h"
#include "migration/migration.h"
#include "migration/vmstate.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
{
@ -30,6 +32,32 @@ static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
return true;
}
/*
* Set the new @vaddr for any mappings registered during cpr load.
* The incoming state is cleared thereafter.
*/
static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size, void *vaddr,
bool readonly, MemoryRegion *mr)
{
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
bcontainer);
struct vfio_iommu_type1_dma_map map = {
.argsz = sizeof(map),
.flags = VFIO_DMA_MAP_FLAG_VADDR,
.vaddr = (__u64)(uintptr_t)vaddr,
.iova = iova,
.size = size,
};
g_assert(cpr_is_incoming());
if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) {
return -errno;
}
return 0;
}
static bool vfio_cpr_supported(VFIOContainer *container, Error **errp)
{
@ -58,11 +86,34 @@ static int vfio_container_pre_save(void *opaque)
return 0;
}
static int vfio_container_post_load(void *opaque, int version_id)
{
VFIOContainer *container = opaque;
VFIOContainerBase *bcontainer = &container->bcontainer;
VFIOGroup *group;
Error *local_err = NULL;
if (!vfio_listener_register(bcontainer, &local_err)) {
error_report_err(local_err);
return -1;
}
QLIST_FOREACH(group, &container->group_list, container_next) {
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
/* Restore original dma_map function */
vioc->dma_map = container->cpr.saved_dma_map;
}
return 0;
}
static const VMStateDescription vfio_container_vmstate = {
.name = "vfio-container",
.version_id = 0,
.minimum_version_id = 0,
.priority = MIG_PRI_LOW, /* Must happen after devices and groups */
.pre_save = vfio_container_pre_save,
.post_load = vfio_container_post_load,
.needed = cpr_incoming_needed,
.fields = (VMStateField[]) {
VMSTATE_END_OF_LIST()
@ -85,6 +136,12 @@ bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
vmstate_register(NULL, -1, &vfio_container_vmstate, container);
/* During incoming CPR, divert calls to dma_map. */
if (cpr_is_incoming()) {
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
container->cpr.saved_dma_map = vioc->dma_map;
vioc->dma_map = vfio_legacy_cpr_dma_map;
}
return true;
}

View file

@ -17,6 +17,9 @@ struct VFIOGroup;
typedef struct VFIOContainerCPR {
Error *blocker;
int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
void *vaddr, bool readonly, MemoryRegion *mr);
} VFIOContainerCPR;