mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-04 08:13:54 -06:00
vfio/container: recover from unmap-all-vaddr failure
If there are multiple containers and unmap-all fails for some container, we need to remap vaddr for the other containers for which unmap-all succeeded. Recover by walking all address ranges of all containers to restore the vaddr for each. Do so by invoking the vfio listener callback, and passing a new "remap" flag that tells it to restore a mapping without re-allocating new userland data structures. Signed-off-by: Steve Sistare <steven.sistare@oracle.com> Reviewed-by: Cédric Le Goater <clg@redhat.com> Link: https://lore.kernel.org/qemu-devel/1749569991-25171-9-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater <clg@redhat.com>
This commit is contained in:
parent
dac0dd68d9
commit
eba1f657cb
4 changed files with 122 additions and 1 deletions
|
@ -29,6 +29,7 @@ static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
|
||||||
error_setg_errno(errp, errno, "vfio_dma_unmap_vaddr_all");
|
error_setg_errno(errp, errno, "vfio_dma_unmap_vaddr_all");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
container->cpr.vaddr_unmapped = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,6 +60,14 @@ static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void vfio_region_remap(MemoryListener *listener,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
VFIOContainer *container = container_of(listener, VFIOContainer,
|
||||||
|
cpr.remap_listener);
|
||||||
|
vfio_container_region_add(&container->bcontainer, section, true);
|
||||||
|
}
|
||||||
|
|
||||||
static bool vfio_cpr_supported(VFIOContainer *container, Error **errp)
|
static bool vfio_cpr_supported(VFIOContainer *container, Error **errp)
|
||||||
{
|
{
|
||||||
if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) {
|
if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) {
|
||||||
|
@ -120,6 +129,40 @@ static const VMStateDescription vfio_container_vmstate = {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
|
||||||
|
MigrationEvent *e, Error **errp)
|
||||||
|
{
|
||||||
|
VFIOContainer *container =
|
||||||
|
container_of(notifier, VFIOContainer, cpr.transfer_notifier);
|
||||||
|
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
|
||||||
|
if (e->type != MIG_EVENT_PRECOPY_FAILED) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (container->cpr.vaddr_unmapped) {
|
||||||
|
/*
|
||||||
|
* Force a call to vfio_region_remap for each mapped section by
|
||||||
|
* temporarily registering a listener, and temporarily diverting
|
||||||
|
* dma_map to vfio_legacy_cpr_dma_map. The latter restores vaddr.
|
||||||
|
*/
|
||||||
|
|
||||||
|
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
|
||||||
|
vioc->dma_map = vfio_legacy_cpr_dma_map;
|
||||||
|
|
||||||
|
container->cpr.remap_listener = (MemoryListener) {
|
||||||
|
.name = "vfio cpr recover",
|
||||||
|
.region_add = vfio_region_remap
|
||||||
|
};
|
||||||
|
memory_listener_register(&container->cpr.remap_listener,
|
||||||
|
bcontainer->space->as);
|
||||||
|
memory_listener_unregister(&container->cpr.remap_listener);
|
||||||
|
container->cpr.vaddr_unmapped = false;
|
||||||
|
vioc->dma_map = container->cpr.saved_dma_map;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
|
bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
|
||||||
{
|
{
|
||||||
VFIOContainerBase *bcontainer = &container->bcontainer;
|
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
@ -142,6 +185,10 @@ bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
|
||||||
container->cpr.saved_dma_map = vioc->dma_map;
|
container->cpr.saved_dma_map = vioc->dma_map;
|
||||||
vioc->dma_map = vfio_legacy_cpr_dma_map;
|
vioc->dma_map = vfio_legacy_cpr_dma_map;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
migration_add_notifier_mode(&container->cpr.transfer_notifier,
|
||||||
|
vfio_cpr_fail_notifier,
|
||||||
|
MIG_MODE_CPR_TRANSFER);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -152,6 +199,50 @@ void vfio_legacy_cpr_unregister_container(VFIOContainer *container)
|
||||||
migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
|
migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
|
||||||
migrate_del_blocker(&container->cpr.blocker);
|
migrate_del_blocker(&container->cpr.blocker);
|
||||||
vmstate_unregister(NULL, &vfio_container_vmstate, container);
|
vmstate_unregister(NULL, &vfio_container_vmstate, container);
|
||||||
|
migration_remove_notifier(&container->cpr.transfer_notifier);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
|
||||||
|
* succeeding for others, so the latter have lost their vaddr. Call this
|
||||||
|
* to restore vaddr for a section with a giommu.
|
||||||
|
*
|
||||||
|
* The giommu already exists. Find it and replay it, which calls
|
||||||
|
* vfio_legacy_cpr_dma_map further down the stack.
|
||||||
|
*/
|
||||||
|
void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
VFIOGuestIOMMU *giommu = NULL;
|
||||||
|
hwaddr as_offset = section->offset_within_address_space;
|
||||||
|
hwaddr iommu_offset = as_offset - section->offset_within_region;
|
||||||
|
|
||||||
|
QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
|
||||||
|
if (giommu->iommu_mr == IOMMU_MEMORY_REGION(section->mr) &&
|
||||||
|
giommu->iommu_offset == iommu_offset) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
g_assert(giommu);
|
||||||
|
memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
|
||||||
|
* succeeding for others, so the latter have lost their vaddr. Call this
|
||||||
|
* to restore vaddr for a section with a RamDiscardManager.
|
||||||
|
*
|
||||||
|
* The ram discard listener already exists. Call its populate function
|
||||||
|
* directly, which calls vfio_legacy_cpr_dma_map.
|
||||||
|
*/
|
||||||
|
bool vfio_cpr_ram_discard_register_listener(VFIOContainerBase *bcontainer,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
VFIORamDiscardListener *vrdl =
|
||||||
|
vfio_find_ram_discard_listener(bcontainer, section);
|
||||||
|
|
||||||
|
g_assert(vrdl);
|
||||||
|
return vrdl->listener.notify_populate(&vrdl->listener, section) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int vfio_cpr_group_get_device_fd(int d, const char *name)
|
int vfio_cpr_group_get_device_fd(int d, const char *name)
|
||||||
|
|
|
@ -481,6 +481,13 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
{
|
{
|
||||||
VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
||||||
listener);
|
listener);
|
||||||
|
vfio_container_region_add(bcontainer, section, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vfio_container_region_add(VFIOContainerBase *bcontainer,
|
||||||
|
MemoryRegionSection *section,
|
||||||
|
bool cpr_remap)
|
||||||
|
{
|
||||||
hwaddr iova, end;
|
hwaddr iova, end;
|
||||||
Int128 llend, llsize;
|
Int128 llend, llsize;
|
||||||
void *vaddr;
|
void *vaddr;
|
||||||
|
@ -516,6 +523,11 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
int iommu_idx;
|
int iommu_idx;
|
||||||
|
|
||||||
trace_vfio_listener_region_add_iommu(section->mr->name, iova, end);
|
trace_vfio_listener_region_add_iommu(section->mr->name, iova, end);
|
||||||
|
|
||||||
|
if (cpr_remap) {
|
||||||
|
vfio_cpr_giommu_remap(bcontainer, section);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* FIXME: For VFIO iommu types which have KVM acceleration to
|
* FIXME: For VFIO iommu types which have KVM acceleration to
|
||||||
* avoid bouncing all map/unmaps through qemu this way, this
|
* avoid bouncing all map/unmaps through qemu this way, this
|
||||||
|
@ -558,7 +570,12 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
* about changes.
|
* about changes.
|
||||||
*/
|
*/
|
||||||
if (memory_region_has_ram_discard_manager(section->mr)) {
|
if (memory_region_has_ram_discard_manager(section->mr)) {
|
||||||
|
if (!cpr_remap) {
|
||||||
vfio_ram_discard_register_listener(bcontainer, section);
|
vfio_ram_discard_register_listener(bcontainer, section);
|
||||||
|
} else if (!vfio_cpr_ram_discard_register_listener(bcontainer,
|
||||||
|
section)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -256,4 +256,7 @@ struct VFIOIOMMUClass {
|
||||||
VFIORamDiscardListener *vfio_find_ram_discard_listener(
|
VFIORamDiscardListener *vfio_find_ram_discard_listener(
|
||||||
VFIOContainerBase *bcontainer, MemoryRegionSection *section);
|
VFIOContainerBase *bcontainer, MemoryRegionSection *section);
|
||||||
|
|
||||||
|
void vfio_container_region_add(VFIOContainerBase *bcontainer,
|
||||||
|
MemoryRegionSection *section, bool cpr_remap);
|
||||||
|
|
||||||
#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */
|
#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#define HW_VFIO_VFIO_CPR_H
|
#define HW_VFIO_VFIO_CPR_H
|
||||||
|
|
||||||
#include "migration/misc.h"
|
#include "migration/misc.h"
|
||||||
|
#include "system/memory.h"
|
||||||
|
|
||||||
struct VFIOContainer;
|
struct VFIOContainer;
|
||||||
struct VFIOContainerBase;
|
struct VFIOContainerBase;
|
||||||
|
@ -17,6 +18,9 @@ struct VFIOGroup;
|
||||||
|
|
||||||
typedef struct VFIOContainerCPR {
|
typedef struct VFIOContainerCPR {
|
||||||
Error *blocker;
|
Error *blocker;
|
||||||
|
bool vaddr_unmapped;
|
||||||
|
NotifierWithReturn transfer_notifier;
|
||||||
|
MemoryListener remap_listener;
|
||||||
int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer,
|
int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer,
|
||||||
hwaddr iova, ram_addr_t size,
|
hwaddr iova, ram_addr_t size,
|
||||||
void *vaddr, bool readonly, MemoryRegion *mr);
|
void *vaddr, bool readonly, MemoryRegion *mr);
|
||||||
|
@ -42,4 +46,10 @@ int vfio_cpr_group_get_device_fd(int d, const char *name);
|
||||||
bool vfio_cpr_container_match(struct VFIOContainer *container,
|
bool vfio_cpr_container_match(struct VFIOContainer *container,
|
||||||
struct VFIOGroup *group, int fd);
|
struct VFIOGroup *group, int fd);
|
||||||
|
|
||||||
|
void vfio_cpr_giommu_remap(struct VFIOContainerBase *bcontainer,
|
||||||
|
MemoryRegionSection *section);
|
||||||
|
|
||||||
|
bool vfio_cpr_ram_discard_register_listener(
|
||||||
|
struct VFIOContainerBase *bcontainer, MemoryRegionSection *section);
|
||||||
|
|
||||||
#endif /* HW_VFIO_VFIO_CPR_H */
|
#endif /* HW_VFIO_VFIO_CPR_H */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue