mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-12-11 16:00:50 -07:00
vfio queue:
* Added small cleanups for b4 and scope * Restricted TDX build to 64-bit target * Fixed issues introduced in first part of VFIO live update support * Added full VFIO live update support -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmhnlBMACgkQUaNDx8/7 7KFOxw//dIPpGcYIjEGpIkIh6NF3VK6xmDAG0aZEeM+5fCzdor2DPkD7ZPyqND3S /YkR8GSOHd+Qm5W+73LHOdV5RFMt4wagyHiAKUMpEFHY7ZLduxIXlACoUo+F5cnh SUnhC6KX7Gu1/Nndb4X4w6SNOyhoRKtQ2EqpRsrGdIaBkX8s6w2jF/INPTPdpg73 lulJZCAFNzyIWytck9ohJf8To9IsvkCXTF6mcywURa9MBaAarRttXoFjuZsXb7zn NqGVtantNAaJmKu26X3ScUWn9P02WryhPB6KT7+B3G/b87Su1cnbAwYakNSFPJIx I/gaw0EPzHM+b6mavA4IdvKDJGR7GMvpJEGqUEpntc6FJ3+g1B7qsedgeBUc/RKB UaRmtYbvlMv5wSmaLcxsT3S3BnABbrd4EedZX5uOBFMrtnTiOqrMUEcoMaf5ogvN KlJkrjNQkfHxTbp5G+nXHuTzae3k2Ylm196b2yhgARfUL70jiak/B+ADeezVcVmW 6ZpotrAvMxu9RlFdxTSbL0/lR0rfKZTecqMOSFA+FlmjcTJ0QW1SbweMdsfgW/uU /2Hfmw6zUQ80/tMqYMztFWsiov7C8a8ZMmuZwDQp+AdCVGgFEigfNJVQYgujbqKz g9Ta9cNPyvF5hpnml5u8IzAzM95HrhIPFmmpUBZyWOCeL6chSHk= =Cu7b -----END PGP SIGNATURE----- Merge tag 'pull-vfio-20250704' of https://github.com/legoater/qemu into staging vfio queue: * Added small cleanups for b4 and scope * Restricted TDX build to 64-bit target * Fixed issues introduced in first part of VFIO live update support * Added full VFIO live update support # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmhnlBMACgkQUaNDx8/7 # 7KFOxw//dIPpGcYIjEGpIkIh6NF3VK6xmDAG0aZEeM+5fCzdor2DPkD7ZPyqND3S # /YkR8GSOHd+Qm5W+73LHOdV5RFMt4wagyHiAKUMpEFHY7ZLduxIXlACoUo+F5cnh # SUnhC6KX7Gu1/Nndb4X4w6SNOyhoRKtQ2EqpRsrGdIaBkX8s6w2jF/INPTPdpg73 # lulJZCAFNzyIWytck9ohJf8To9IsvkCXTF6mcywURa9MBaAarRttXoFjuZsXb7zn # NqGVtantNAaJmKu26X3ScUWn9P02WryhPB6KT7+B3G/b87Su1cnbAwYakNSFPJIx # I/gaw0EPzHM+b6mavA4IdvKDJGR7GMvpJEGqUEpntc6FJ3+g1B7qsedgeBUc/RKB # UaRmtYbvlMv5wSmaLcxsT3S3BnABbrd4EedZX5uOBFMrtnTiOqrMUEcoMaf5ogvN # KlJkrjNQkfHxTbp5G+nXHuTzae3k2Ylm196b2yhgARfUL70jiak/B+ADeezVcVmW # 6ZpotrAvMxu9RlFdxTSbL0/lR0rfKZTecqMOSFA+FlmjcTJ0QW1SbweMdsfgW/uU # /2Hfmw6zUQ80/tMqYMztFWsiov7C8a8ZMmuZwDQp+AdCVGgFEigfNJVQYgujbqKz # g9Ta9cNPyvF5hpnml5u8IzAzM95HrhIPFmmpUBZyWOCeL6chSHk= # =Cu7b # -----END PGP SIGNATURE----- # gpg: Signature made Fri 04 Jul 2025 04:42:59 EDT # gpg: using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1 # gpg: Good signature from "Cédric Le Goater <clg@redhat.com>" [full] # gpg: aka "Cédric Le Goater <clg@kaod.org>" [full] # Primary key fingerprint: A0F6 6548 F048 95EB FE6B 0B60 51A3 43C7 CFFB ECA1 * tag 'pull-vfio-20250704' of https://github.com/legoater/qemu: (27 commits) vfio: doc changes for cpr vfio/container: delete old cpr register iommufd: preserve DMA mappings vfio/iommufd: change process vfio/iommufd: reconstruct hwpt vfio/iommufd: reconstruct device vfio/iommufd: preserve descriptors vfio/iommufd: cpr state migration: vfio cpr state hook vfio/iommufd: register container for cpr vfio/iommufd: device name blocker vfio/iommufd: add vfio_device_free_name vfio/iommufd: invariant device name vfio/iommufd: use IOMMU_IOAS_MAP_FILE physmem: qemu_ram_get_fd_offset backends/iommufd: change process ioctl backends/iommufd: iommufd_backend_map_file_dma migration: cpr_get_fd_param helper migration: close kvm after cpr vfio-pci: preserve INTx ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
563ac3d181
33 changed files with 888 additions and 92 deletions
|
|
@ -11,4 +11,3 @@
|
|||
prep-perpatch-check-cmd = scripts/checkpatch.pl -q --terse --no-summary --mailback -
|
||||
searchmask = https://lore.kernel.org/qemu-devel/?x=m&t=1&q=%s
|
||||
linkmask = https://lore.kernel.org/qemu-devel/%s
|
||||
linktrailermask = Message-ID: <%s>
|
||||
|
|
|
|||
1
Makefile
1
Makefile
|
|
@ -227,6 +227,7 @@ distclean: clean recurse-distclean
|
|||
rm -Rf .sdk qemu-bundle
|
||||
|
||||
find-src-path = find "$(SRC_PATH)" -path "$(SRC_PATH)/meson" -prune -o \
|
||||
-path "$(SRC_PATH)/.pc" -prune -o \
|
||||
-type l -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \)
|
||||
|
||||
.PHONY: ctags
|
||||
|
|
|
|||
|
|
@ -515,16 +515,23 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
|
|||
goto err;
|
||||
}
|
||||
|
||||
/* If I am the CPU that created coalesced_mmio_ring, then discard it */
|
||||
if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
|
||||
s->coalesced_mmio_ring = NULL;
|
||||
}
|
||||
|
||||
ret = munmap(cpu->kvm_run, mmap_size);
|
||||
if (ret < 0) {
|
||||
goto err;
|
||||
}
|
||||
cpu->kvm_run = NULL;
|
||||
|
||||
if (cpu->kvm_dirty_gfns) {
|
||||
ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_bytes);
|
||||
if (ret < 0) {
|
||||
goto err;
|
||||
}
|
||||
cpu->kvm_dirty_gfns = NULL;
|
||||
}
|
||||
|
||||
kvm_park_vcpu(cpu);
|
||||
|
|
@ -608,6 +615,31 @@ err:
|
|||
return ret;
|
||||
}
|
||||
|
||||
void kvm_close(void)
|
||||
{
|
||||
CPUState *cpu;
|
||||
|
||||
if (!kvm_state || kvm_state->fd == -1) {
|
||||
return;
|
||||
}
|
||||
|
||||
CPU_FOREACH(cpu) {
|
||||
cpu_remove_sync(cpu);
|
||||
close(cpu->kvm_fd);
|
||||
cpu->kvm_fd = -1;
|
||||
close(cpu->kvm_vcpu_stats_fd);
|
||||
cpu->kvm_vcpu_stats_fd = -1;
|
||||
}
|
||||
|
||||
if (kvm_state && kvm_state->fd != -1) {
|
||||
close(kvm_state->vmfd);
|
||||
kvm_state->vmfd = -1;
|
||||
close(kvm_state->fd);
|
||||
kvm_state->fd = -1;
|
||||
}
|
||||
kvm_state = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* dirty pages logging control
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -16,12 +16,18 @@
|
|||
#include "qemu/module.h"
|
||||
#include "qom/object_interfaces.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "migration/cpr.h"
|
||||
#include "monitor/monitor.h"
|
||||
#include "trace.h"
|
||||
#include "hw/vfio/vfio-device.h"
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/iommufd.h>
|
||||
|
||||
static const char *iommufd_fd_name(IOMMUFDBackend *be)
|
||||
{
|
||||
return object_get_canonical_path_component(OBJECT(be));
|
||||
}
|
||||
|
||||
static void iommufd_backend_init(Object *obj)
|
||||
{
|
||||
IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||
|
|
@ -64,26 +70,73 @@ static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
|
|||
return !be->users;
|
||||
}
|
||||
|
||||
static void iommufd_backend_complete(UserCreatable *uc, Error **errp)
|
||||
{
|
||||
IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
|
||||
const char *name = iommufd_fd_name(be);
|
||||
|
||||
if (!be->owned) {
|
||||
/* fd came from the command line. Fetch updated value from cpr state. */
|
||||
if (cpr_is_incoming()) {
|
||||
be->fd = cpr_find_fd(name, 0);
|
||||
} else {
|
||||
cpr_save_fd(name, 0, be->fd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void iommufd_backend_class_init(ObjectClass *oc, const void *data)
|
||||
{
|
||||
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
|
||||
|
||||
ucc->can_be_deleted = iommufd_backend_can_be_deleted;
|
||||
ucc->complete = iommufd_backend_complete;
|
||||
|
||||
object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
|
||||
}
|
||||
|
||||
bool iommufd_change_process_capable(IOMMUFDBackend *be)
|
||||
{
|
||||
struct iommu_ioas_change_process args = {.size = sizeof(args)};
|
||||
|
||||
/*
|
||||
* Call IOMMU_IOAS_CHANGE_PROCESS to verify it is a recognized ioctl.
|
||||
* This is a no-op if the process has not changed since DMA was mapped.
|
||||
*/
|
||||
return !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args);
|
||||
}
|
||||
|
||||
bool iommufd_change_process(IOMMUFDBackend *be, Error **errp)
|
||||
{
|
||||
struct iommu_ioas_change_process args = {.size = sizeof(args)};
|
||||
bool ret = !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args);
|
||||
|
||||
if (!ret) {
|
||||
error_setg_errno(errp, errno, "IOMMU_IOAS_CHANGE_PROCESS fd %d failed",
|
||||
be->fd);
|
||||
}
|
||||
trace_iommufd_change_process(be->fd, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||
{
|
||||
int fd;
|
||||
|
||||
if (be->owned && !be->users) {
|
||||
fd = qemu_open("/dev/iommu", O_RDWR, errp);
|
||||
fd = cpr_open_fd("/dev/iommu", O_RDWR, iommufd_fd_name(be), 0, errp);
|
||||
if (fd < 0) {
|
||||
return false;
|
||||
}
|
||||
be->fd = fd;
|
||||
}
|
||||
if (!be->users && !vfio_iommufd_cpr_register_iommufd(be, errp)) {
|
||||
if (be->owned) {
|
||||
close(be->fd);
|
||||
be->fd = -1;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
be->users++;
|
||||
|
||||
trace_iommufd_backend_connect(be->fd, be->owned, be->users);
|
||||
|
|
@ -96,9 +149,13 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
|||
goto out;
|
||||
}
|
||||
be->users--;
|
||||
if (!be->users && be->owned) {
|
||||
close(be->fd);
|
||||
be->fd = -1;
|
||||
if (!be->users) {
|
||||
vfio_iommufd_cpr_unregister_iommufd(be);
|
||||
if (be->owned) {
|
||||
cpr_delete_fd(iommufd_fd_name(be), 0);
|
||||
close(be->fd);
|
||||
be->fd = -1;
|
||||
}
|
||||
}
|
||||
out:
|
||||
trace_iommufd_backend_disconnect(be->fd, be->users);
|
||||
|
|
@ -172,6 +229,44 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
|
|||
return ret;
|
||||
}
|
||||
|
||||
int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
int mfd, unsigned long start, bool readonly)
|
||||
{
|
||||
int ret, fd = be->fd;
|
||||
struct iommu_ioas_map_file map = {
|
||||
.size = sizeof(map),
|
||||
.flags = IOMMU_IOAS_MAP_READABLE |
|
||||
IOMMU_IOAS_MAP_FIXED_IOVA,
|
||||
.ioas_id = ioas_id,
|
||||
.fd = mfd,
|
||||
.start = start,
|
||||
.iova = iova,
|
||||
.length = size,
|
||||
};
|
||||
|
||||
if (cpr_is_incoming()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!readonly) {
|
||||
map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
|
||||
}
|
||||
|
||||
ret = ioctl(fd, IOMMU_IOAS_MAP_FILE, &map);
|
||||
trace_iommufd_backend_map_file_dma(fd, ioas_id, iova, size, mfd, start,
|
||||
readonly, ret);
|
||||
if (ret) {
|
||||
ret = -errno;
|
||||
|
||||
/* TODO: Not support mapping hardware PCI BAR region for now. */
|
||||
if (errno == EFAULT) {
|
||||
warn_report("IOMMU_IOAS_MAP_FILE failed: %m, PCI BAR?");
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||
hwaddr iova, ram_addr_t size)
|
||||
{
|
||||
|
|
@ -183,6 +278,10 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
|||
.length = size,
|
||||
};
|
||||
|
||||
if (cpr_is_incoming()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
|
||||
/*
|
||||
* IOMMUFD takes mapping as some kind of object, unmapping
|
||||
|
|
|
|||
|
|
@ -7,10 +7,12 @@ dbus_vmstate_loading(const char *id) "id: %s"
|
|||
dbus_vmstate_saving(const char *id) "id: %s"
|
||||
|
||||
# iommufd.c
|
||||
iommufd_change_process(int fd, bool ret) "fd=%d (%d)"
|
||||
iommufd_backend_connect(int fd, bool owned, uint32_t users) "fd=%d owned=%d users=%d"
|
||||
iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d"
|
||||
iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d"
|
||||
iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)"
|
||||
iommufd_backend_map_file_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int fd, unsigned long start, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" fd=%d start=%ld readonly=%d (%d)"
|
||||
iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
|
||||
iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
|
||||
iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d"
|
||||
|
|
|
|||
|
|
@ -152,8 +152,7 @@ cpr-transfer mode
|
|||
This mode allows the user to transfer a guest to a new QEMU instance
|
||||
on the same host with minimal guest pause time, by preserving guest
|
||||
RAM in place, albeit with new virtual addresses in new QEMU. Devices
|
||||
and their pinned memory pages will also be preserved in a future QEMU
|
||||
release.
|
||||
and their pinned memory pages are also preserved for VFIO and IOMMUFD.
|
||||
|
||||
The user starts new QEMU on the same host as old QEMU, with command-
|
||||
line arguments to create the same machine, plus the ``-incoming``
|
||||
|
|
@ -322,6 +321,6 @@ Futures
|
|||
|
||||
cpr-transfer mode is based on a capability to transfer open file
|
||||
descriptors from old to new QEMU. In the future, descriptors for
|
||||
vfio, iommufd, vhost, and char devices could be transferred,
|
||||
vhost, and char devices could be transferred,
|
||||
preserving those devices and their kernel state without interruption,
|
||||
even if they do not explicitly support live migration.
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ config SGX
|
|||
config TDX
|
||||
bool
|
||||
select X86_FW_OVMF
|
||||
depends on KVM
|
||||
depends on KVM && X86_64
|
||||
|
||||
config PC
|
||||
bool
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@
|
|||
#include "hw/vfio-user/container.h"
|
||||
#include "hw/vfio-user/device.h"
|
||||
#include "hw/vfio-user/trace.h"
|
||||
#include "hw/vfio/vfio-cpr.h"
|
||||
#include "hw/vfio/vfio-device.h"
|
||||
#include "hw/vfio/vfio-listener.h"
|
||||
#include "qapi/error.h"
|
||||
|
|
@ -225,14 +224,10 @@ vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev,
|
|||
|
||||
bcontainer = &container->bcontainer;
|
||||
|
||||
if (!vfio_cpr_register_container(bcontainer, errp)) {
|
||||
goto free_container_exit;
|
||||
}
|
||||
|
||||
ret = ram_block_uncoordinated_discard_disable(true);
|
||||
if (ret) {
|
||||
error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
|
||||
goto unregister_container_exit;
|
||||
goto free_container_exit;
|
||||
}
|
||||
|
||||
vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
|
||||
|
|
@ -261,9 +256,6 @@ listener_release_exit:
|
|||
enable_discards_exit:
|
||||
ram_block_uncoordinated_discard_disable(false);
|
||||
|
||||
unregister_container_exit:
|
||||
vfio_cpr_unregister_container(bcontainer);
|
||||
|
||||
free_container_exit:
|
||||
object_unref(container);
|
||||
|
||||
|
|
@ -286,7 +278,6 @@ static void vfio_user_container_disconnect(VFIOUserContainer *container)
|
|||
vioc->release(bcontainer);
|
||||
}
|
||||
|
||||
vfio_cpr_unregister_container(bcontainer);
|
||||
object_unref(container);
|
||||
|
||||
vfio_address_space_put(space);
|
||||
|
|
|
|||
|
|
@ -265,7 +265,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
|
|||
|
||||
error:
|
||||
error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
|
||||
g_free(vbasedev->name);
|
||||
vfio_device_free_name(vbasedev);
|
||||
}
|
||||
|
||||
static void vfio_ap_unrealize(DeviceState *dev)
|
||||
|
|
@ -275,7 +275,7 @@ static void vfio_ap_unrealize(DeviceState *dev)
|
|||
vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX);
|
||||
vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX);
|
||||
vfio_device_detach(&vapdev->vdev);
|
||||
g_free(vapdev->vdev.name);
|
||||
vfio_device_free_name(&vapdev->vdev);
|
||||
}
|
||||
|
||||
static const Property vfio_ap_properties[] = {
|
||||
|
|
|
|||
|
|
@ -619,7 +619,7 @@ out_io_notifier_err:
|
|||
out_region_err:
|
||||
vfio_device_detach(vbasedev);
|
||||
out_attach_dev_err:
|
||||
g_free(vbasedev->name);
|
||||
vfio_device_free_name(vbasedev);
|
||||
out_unrealize:
|
||||
if (cdc->unrealize) {
|
||||
cdc->unrealize(cdev);
|
||||
|
|
@ -637,7 +637,7 @@ static void vfio_ccw_unrealize(DeviceState *dev)
|
|||
vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
|
||||
vfio_ccw_put_region(vcdev);
|
||||
vfio_device_detach(&vcdev->vdev);
|
||||
g_free(vcdev->vdev.name);
|
||||
vfio_device_free_name(&vcdev->vdev);
|
||||
|
||||
if (cdc->unrealize) {
|
||||
cdc->unrealize(cdev);
|
||||
|
|
|
|||
|
|
@ -78,7 +78,16 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer,
|
|||
void *vaddr, bool readonly, MemoryRegion *mr)
|
||||
{
|
||||
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
|
||||
RAMBlock *rb = mr->ram_block;
|
||||
int mfd = rb ? qemu_ram_get_fd(rb) : -1;
|
||||
|
||||
if (mfd >= 0 && vioc->dma_map_file) {
|
||||
unsigned long start = vaddr - qemu_ram_get_host_addr(rb);
|
||||
unsigned long offset = qemu_ram_get_fd_offset(rb);
|
||||
|
||||
return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset,
|
||||
readonly);
|
||||
}
|
||||
g_assert(vioc->dma_map);
|
||||
return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr);
|
||||
}
|
||||
|
|
|
|||
225
hw/vfio/cpr-iommufd.c
Normal file
225
hw/vfio/cpr-iommufd.c
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
/*
|
||||
* Copyright (c) 2024-2025 Oracle and/or its affiliates.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qapi/error.h"
|
||||
#include "hw/vfio/vfio-cpr.h"
|
||||
#include "hw/vfio/vfio-device.h"
|
||||
#include "migration/blocker.h"
|
||||
#include "migration/cpr.h"
|
||||
#include "migration/migration.h"
|
||||
#include "migration/vmstate.h"
|
||||
#include "system/iommufd.h"
|
||||
#include "vfio-iommufd.h"
|
||||
#include "trace.h"
|
||||
|
||||
typedef struct CprVFIODevice {
|
||||
char *name;
|
||||
unsigned int namelen;
|
||||
uint32_t ioas_id;
|
||||
int devid;
|
||||
uint32_t hwpt_id;
|
||||
QLIST_ENTRY(CprVFIODevice) next;
|
||||
} CprVFIODevice;
|
||||
|
||||
static const VMStateDescription vmstate_cpr_vfio_device = {
|
||||
.name = "cpr vfio device",
|
||||
.version_id = 1,
|
||||
.minimum_version_id = 1,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_UINT32(namelen, CprVFIODevice),
|
||||
VMSTATE_VBUFFER_ALLOC_UINT32(name, CprVFIODevice, 0, NULL, namelen),
|
||||
VMSTATE_INT32(devid, CprVFIODevice),
|
||||
VMSTATE_UINT32(ioas_id, CprVFIODevice),
|
||||
VMSTATE_UINT32(hwpt_id, CprVFIODevice),
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
};
|
||||
|
||||
const VMStateDescription vmstate_cpr_vfio_devices = {
|
||||
.name = CPR_STATE "/vfio devices",
|
||||
.version_id = 1,
|
||||
.minimum_version_id = 1,
|
||||
.fields = (const VMStateField[]){
|
||||
VMSTATE_QLIST_V(vfio_devices, CprState, 1, vmstate_cpr_vfio_device,
|
||||
CprVFIODevice, next),
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
};
|
||||
|
||||
static void vfio_cpr_save_device(VFIODevice *vbasedev)
|
||||
{
|
||||
CprVFIODevice *elem = g_new0(CprVFIODevice, 1);
|
||||
|
||||
elem->name = g_strdup(vbasedev->name);
|
||||
elem->namelen = strlen(vbasedev->name) + 1;
|
||||
elem->ioas_id = vbasedev->cpr.ioas_id;
|
||||
elem->devid = vbasedev->devid;
|
||||
elem->hwpt_id = vbasedev->cpr.hwpt_id;
|
||||
QLIST_INSERT_HEAD(&cpr_state.vfio_devices, elem, next);
|
||||
}
|
||||
|
||||
static CprVFIODevice *find_device(const char *name)
|
||||
{
|
||||
CprVFIODeviceList *head = &cpr_state.vfio_devices;
|
||||
CprVFIODevice *elem;
|
||||
|
||||
QLIST_FOREACH(elem, head, next) {
|
||||
if (!strcmp(elem->name, name)) {
|
||||
return elem;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void vfio_cpr_delete_device(const char *name)
|
||||
{
|
||||
CprVFIODevice *elem = find_device(name);
|
||||
|
||||
if (elem) {
|
||||
QLIST_REMOVE(elem, next);
|
||||
g_free(elem->name);
|
||||
g_free(elem);
|
||||
}
|
||||
}
|
||||
|
||||
static bool vfio_cpr_find_device(VFIODevice *vbasedev)
|
||||
{
|
||||
CprVFIODevice *elem = find_device(vbasedev->name);
|
||||
|
||||
if (elem) {
|
||||
vbasedev->cpr.ioas_id = elem->ioas_id;
|
||||
vbasedev->devid = elem->devid;
|
||||
vbasedev->cpr.hwpt_id = elem->hwpt_id;
|
||||
trace_vfio_cpr_find_device(elem->ioas_id, elem->devid, elem->hwpt_id);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool vfio_cpr_supported(IOMMUFDBackend *be, Error **errp)
|
||||
{
|
||||
if (!iommufd_change_process_capable(be)) {
|
||||
if (errp) {
|
||||
error_setg(errp, "vfio iommufd backend does not support "
|
||||
"IOMMU_IOAS_CHANGE_PROCESS");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int iommufd_cpr_pre_save(void *opaque)
|
||||
{
|
||||
IOMMUFDBackend *be = opaque;
|
||||
|
||||
/*
|
||||
* The process has not changed yet, but proactively try the ioctl,
|
||||
* and it will fail if any DMA mappings are not supported.
|
||||
*/
|
||||
if (!iommufd_change_process_capable(be)) {
|
||||
error_report("some memory regions do not support "
|
||||
"IOMMU_IOAS_CHANGE_PROCESS");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iommufd_cpr_post_load(void *opaque, int version_id)
|
||||
{
|
||||
IOMMUFDBackend *be = opaque;
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (!iommufd_change_process(be, &local_err)) {
|
||||
error_report_err(local_err);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const VMStateDescription iommufd_cpr_vmstate = {
|
||||
.name = "iommufd",
|
||||
.version_id = 0,
|
||||
.minimum_version_id = 0,
|
||||
.pre_save = iommufd_cpr_pre_save,
|
||||
.post_load = iommufd_cpr_post_load,
|
||||
.needed = cpr_incoming_needed,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
};
|
||||
|
||||
bool vfio_iommufd_cpr_register_iommufd(IOMMUFDBackend *be, Error **errp)
|
||||
{
|
||||
Error **cpr_blocker = &be->cpr_blocker;
|
||||
|
||||
if (!vfio_cpr_supported(be, cpr_blocker)) {
|
||||
return migrate_add_blocker_modes(cpr_blocker, errp,
|
||||
MIG_MODE_CPR_TRANSFER, -1) == 0;
|
||||
}
|
||||
|
||||
vmstate_register(NULL, -1, &iommufd_cpr_vmstate, be);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void vfio_iommufd_cpr_unregister_iommufd(IOMMUFDBackend *be)
|
||||
{
|
||||
vmstate_unregister(NULL, &iommufd_cpr_vmstate, be);
|
||||
migrate_del_blocker(&be->cpr_blocker);
|
||||
}
|
||||
|
||||
bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container,
|
||||
Error **errp)
|
||||
{
|
||||
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||
|
||||
migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
|
||||
vfio_cpr_reboot_notifier,
|
||||
MIG_MODE_CPR_REBOOT);
|
||||
|
||||
vfio_cpr_add_kvm_notifier();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container)
|
||||
{
|
||||
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||
|
||||
migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
|
||||
}
|
||||
|
||||
void vfio_iommufd_cpr_register_device(VFIODevice *vbasedev)
|
||||
{
|
||||
if (!cpr_is_incoming()) {
|
||||
/*
|
||||
* Beware fd may have already been saved by vfio_device_set_fd,
|
||||
* so call resave to avoid a duplicate entry.
|
||||
*/
|
||||
cpr_resave_fd(vbasedev->name, 0, vbasedev->fd);
|
||||
vfio_cpr_save_device(vbasedev);
|
||||
}
|
||||
}
|
||||
|
||||
void vfio_iommufd_cpr_unregister_device(VFIODevice *vbasedev)
|
||||
{
|
||||
cpr_delete_fd(vbasedev->name, 0);
|
||||
vfio_cpr_delete_device(vbasedev->name);
|
||||
}
|
||||
|
||||
void vfio_cpr_load_device(VFIODevice *vbasedev)
|
||||
{
|
||||
if (cpr_is_incoming()) {
|
||||
bool ret = vfio_cpr_find_device(vbasedev);
|
||||
g_assert(ret);
|
||||
|
||||
if (vbasedev->fd < 0) {
|
||||
vbasedev->fd = cpr_find_fd(vbasedev->name, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -99,20 +99,21 @@ static int vfio_container_post_load(void *opaque, int version_id)
|
|||
{
|
||||
VFIOContainer *container = opaque;
|
||||
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||
VFIOGroup *group;
|
||||
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
|
||||
dma_map_fn saved_dma_map = vioc->dma_map;
|
||||
Error *local_err = NULL;
|
||||
|
||||
/* During incoming CPR, divert calls to dma_map. */
|
||||
vioc->dma_map = vfio_legacy_cpr_dma_map;
|
||||
|
||||
if (!vfio_listener_register(bcontainer, &local_err)) {
|
||||
error_report_err(local_err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(group, &container->group_list, container_next) {
|
||||
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
|
||||
/* Restore original dma_map function */
|
||||
vioc->dma_map = saved_dma_map;
|
||||
|
||||
/* Restore original dma_map function */
|
||||
vioc->dma_map = container->cpr.saved_dma_map;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -148,6 +149,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
|
|||
*/
|
||||
|
||||
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
|
||||
dma_map_fn saved_dma_map = vioc->dma_map;
|
||||
vioc->dma_map = vfio_legacy_cpr_dma_map;
|
||||
|
||||
container->cpr.remap_listener = (MemoryListener) {
|
||||
|
|
@ -158,7 +160,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
|
|||
bcontainer->space->as);
|
||||
memory_listener_unregister(&container->cpr.remap_listener);
|
||||
container->cpr.vaddr_unmapped = false;
|
||||
vioc->dma_map = container->cpr.saved_dma_map;
|
||||
vioc->dma_map = saved_dma_map;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -177,14 +179,9 @@ bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
|
|||
MIG_MODE_CPR_TRANSFER, -1) == 0;
|
||||
}
|
||||
|
||||
vmstate_register(NULL, -1, &vfio_container_vmstate, container);
|
||||
vfio_cpr_add_kvm_notifier();
|
||||
|
||||
/* During incoming CPR, divert calls to dma_map. */
|
||||
if (cpr_is_incoming()) {
|
||||
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
|
||||
container->cpr.saved_dma_map = vioc->dma_map;
|
||||
vioc->dma_map = vfio_legacy_cpr_dma_map;
|
||||
}
|
||||
vmstate_register(NULL, -1, &vfio_container_vmstate, container);
|
||||
|
||||
migration_add_notifier_mode(&container->cpr.transfer_notifier,
|
||||
vfio_cpr_fail_notifier,
|
||||
|
|
|
|||
144
hw/vfio/cpr.c
144
hw/vfio/cpr.c
|
|
@ -9,6 +9,8 @@
|
|||
#include "hw/vfio/vfio-device.h"
|
||||
#include "hw/vfio/vfio-cpr.h"
|
||||
#include "hw/vfio/pci.h"
|
||||
#include "hw/pci/msix.h"
|
||||
#include "hw/pci/msi.h"
|
||||
#include "migration/cpr.h"
|
||||
#include "qapi/error.h"
|
||||
#include "system/runstate.h"
|
||||
|
|
@ -27,17 +29,67 @@ int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
|
|||
return 0;
|
||||
}
|
||||
|
||||
bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp)
|
||||
#define STRDUP_VECTOR_FD_NAME(vdev, name) \
|
||||
g_strdup_printf("%s_%s", (vdev)->vbasedev.name, (name))
|
||||
|
||||
void vfio_cpr_save_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr,
|
||||
int fd)
|
||||
{
|
||||
migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
|
||||
vfio_cpr_reboot_notifier,
|
||||
MIG_MODE_CPR_REBOOT);
|
||||
return true;
|
||||
g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
|
||||
cpr_save_fd(fdname, nr, fd);
|
||||
}
|
||||
|
||||
void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer)
|
||||
int vfio_cpr_load_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
|
||||
{
|
||||
migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
|
||||
g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
|
||||
return cpr_find_fd(fdname, nr);
|
||||
}
|
||||
|
||||
void vfio_cpr_delete_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
|
||||
{
|
||||
g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
|
||||
cpr_delete_fd(fdname, nr);
|
||||
}
|
||||
|
||||
static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors,
|
||||
bool msix)
|
||||
{
|
||||
int i, fd;
|
||||
bool pending = false;
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
|
||||
vdev->nr_vectors = nr_vectors;
|
||||
vdev->msi_vectors = g_new0(VFIOMSIVector, nr_vectors);
|
||||
vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI;
|
||||
|
||||
vfio_pci_prepare_kvm_msi_virq_batch(vdev);
|
||||
|
||||
for (i = 0; i < nr_vectors; i++) {
|
||||
VFIOMSIVector *vector = &vdev->msi_vectors[i];
|
||||
|
||||
fd = vfio_cpr_load_vector_fd(vdev, "interrupt", i);
|
||||
if (fd >= 0) {
|
||||
vfio_pci_vector_init(vdev, i);
|
||||
vfio_pci_msi_set_handler(vdev, i);
|
||||
}
|
||||
|
||||
if (vfio_cpr_load_vector_fd(vdev, "kvm_interrupt", i) >= 0) {
|
||||
vfio_pci_add_kvm_msi_virq(vdev, vector, i, msix);
|
||||
} else {
|
||||
vdev->msi_vectors[i].virq = -1;
|
||||
}
|
||||
|
||||
if (msix && msix_is_pending(pdev, i) && msix_is_masked(pdev, i)) {
|
||||
set_bit(i, vdev->msix->pending);
|
||||
pending = true;
|
||||
}
|
||||
}
|
||||
|
||||
vfio_pci_commit_kvm_msi_virq_batch(vdev);
|
||||
|
||||
if (msix) {
|
||||
memory_region_set_enabled(&pdev->msix_pba_mmio, pending);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -58,13 +110,91 @@ static int vfio_cpr_pci_pre_load(void *opaque)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_cpr_pci_post_load(void *opaque, int version_id)
|
||||
{
|
||||
VFIOPCIDevice *vdev = opaque;
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
int nr_vectors;
|
||||
|
||||
if (msix_enabled(pdev)) {
|
||||
vfio_pci_msix_set_notifiers(vdev);
|
||||
nr_vectors = vdev->msix->entries;
|
||||
vfio_cpr_claim_vectors(vdev, nr_vectors, true);
|
||||
|
||||
} else if (msi_enabled(pdev)) {
|
||||
nr_vectors = msi_nr_vectors_allocated(pdev);
|
||||
vfio_cpr_claim_vectors(vdev, nr_vectors, false);
|
||||
|
||||
} else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
|
||||
Error *local_err = NULL;
|
||||
if (!vfio_pci_intx_enable(vdev, &local_err)) {
|
||||
error_report_err(local_err);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool pci_msix_present(void *opaque, int version_id)
|
||||
{
|
||||
PCIDevice *pdev = opaque;
|
||||
|
||||
return msix_present(pdev);
|
||||
}
|
||||
|
||||
static const VMStateDescription vfio_intx_vmstate = {
|
||||
.name = "vfio-cpr-intx",
|
||||
.version_id = 0,
|
||||
.minimum_version_id = 0,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_BOOL(pending, VFIOINTx),
|
||||
VMSTATE_UINT32(route.mode, VFIOINTx),
|
||||
VMSTATE_INT32(route.irq, VFIOINTx),
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
};
|
||||
|
||||
#define VMSTATE_VFIO_INTX(_field, _state) { \
|
||||
.name = (stringify(_field)), \
|
||||
.size = sizeof(VFIOINTx), \
|
||||
.vmsd = &vfio_intx_vmstate, \
|
||||
.flags = VMS_STRUCT, \
|
||||
.offset = vmstate_offset_value(_state, _field, VFIOINTx), \
|
||||
}
|
||||
|
||||
const VMStateDescription vfio_cpr_pci_vmstate = {
|
||||
.name = "vfio-cpr-pci",
|
||||
.version_id = 0,
|
||||
.minimum_version_id = 0,
|
||||
.pre_load = vfio_cpr_pci_pre_load,
|
||||
.post_load = vfio_cpr_pci_post_load,
|
||||
.needed = cpr_incoming_needed,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
|
||||
VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, pci_msix_present),
|
||||
VMSTATE_VFIO_INTX(intx, VFIOPCIDevice),
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
};
|
||||
|
||||
static NotifierWithReturn kvm_close_notifier;
|
||||
|
||||
static int vfio_cpr_kvm_close_notifier(NotifierWithReturn *notifier,
|
||||
MigrationEvent *e,
|
||||
Error **errp)
|
||||
{
|
||||
if (e->type == MIG_EVENT_PRECOPY_DONE) {
|
||||
vfio_kvm_device_close();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vfio_cpr_add_kvm_notifier(void)
|
||||
{
|
||||
if (!kvm_close_notifier.notify) {
|
||||
migration_add_notifier_mode(&kvm_close_notifier,
|
||||
vfio_cpr_kvm_close_notifier,
|
||||
MIG_MODE_CPR_TRANSFER);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@
|
|||
#include "qapi/error.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/units.h"
|
||||
#include "migration/cpr.h"
|
||||
#include "migration/blocker.h"
|
||||
#include "monitor/monitor.h"
|
||||
#include "vfio-helpers.h"
|
||||
|
||||
|
|
@ -316,28 +318,40 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
|
|||
error_setg(errp, "Use FD passing only with iommufd backend");
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* Give a name with fd so any function printing out vbasedev->name
|
||||
* will not break.
|
||||
*/
|
||||
if (!vbasedev->name) {
|
||||
vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
|
||||
|
||||
if (vbasedev->dev->id) {
|
||||
vbasedev->name = g_strdup(vbasedev->dev->id);
|
||||
return true;
|
||||
} else {
|
||||
/*
|
||||
* Assign a name so any function printing it will not break.
|
||||
* The fd number changes across processes, so this cannot be
|
||||
* used as an invariant name for CPR.
|
||||
*/
|
||||
vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
|
||||
error_setg(&vbasedev->cpr.id_blocker,
|
||||
"vfio device with fd=%d needs an id property",
|
||||
vbasedev->fd);
|
||||
return migrate_add_blocker_modes(&vbasedev->cpr.id_blocker,
|
||||
errp, MIG_MODE_CPR_TRANSFER,
|
||||
-1) == 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void vfio_device_free_name(VFIODevice *vbasedev)
|
||||
{
|
||||
g_clear_pointer(&vbasedev->name, g_free);
|
||||
migrate_del_blocker(&vbasedev->cpr.id_blocker);
|
||||
}
|
||||
|
||||
void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
|
||||
{
|
||||
ERRP_GUARD();
|
||||
int fd = monitor_fd_param(monitor_cur(), str, errp);
|
||||
|
||||
if (fd < 0) {
|
||||
error_prepend(errp, "Could not parse remote object fd %s:", str);
|
||||
return;
|
||||
}
|
||||
vbasedev->fd = fd;
|
||||
vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp);
|
||||
}
|
||||
|
||||
static VFIODeviceIOOps vfio_device_io_ops_ioctl;
|
||||
|
|
|
|||
|
|
@ -117,6 +117,17 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
|
|||
int vfio_kvm_device_fd = -1;
|
||||
#endif
|
||||
|
||||
void vfio_kvm_device_close(void)
|
||||
{
|
||||
#ifdef CONFIG_KVM
|
||||
kvm_close();
|
||||
if (vfio_kvm_device_fd != -1) {
|
||||
close(vfio_kvm_device_fd);
|
||||
vfio_kvm_device_fd = -1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int vfio_kvm_device_add_fd(int fd, Error **errp)
|
||||
{
|
||||
#ifdef CONFIG_KVM
|
||||
|
|
|
|||
18
hw/vfio/iommufd-stubs.c
Normal file
18
hw/vfio/iommufd-stubs.c
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
/*
|
||||
* Copyright (c) 2025 Oracle and/or its affiliates.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "migration/cpr.h"
|
||||
#include "migration/vmstate.h"
|
||||
|
||||
const VMStateDescription vmstate_cpr_vfio_devices = {
|
||||
.name = CPR_STATE "/vfio devices",
|
||||
.version_id = 1,
|
||||
.minimum_version_id = 1,
|
||||
.fields = (const VMStateField[]){
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
};
|
||||
|
|
@ -25,6 +25,7 @@
|
|||
#include "system/reset.h"
|
||||
#include "qemu/cutils.h"
|
||||
#include "qemu/chardev_open.h"
|
||||
#include "migration/cpr.h"
|
||||
#include "pci.h"
|
||||
#include "vfio-iommufd.h"
|
||||
#include "vfio-helpers.h"
|
||||
|
|
@ -45,6 +46,18 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
|
|||
iova, size, vaddr, readonly);
|
||||
}
|
||||
|
||||
static int iommufd_cdev_map_file(const VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
int fd, unsigned long start, bool readonly)
|
||||
{
|
||||
const VFIOIOMMUFDContainer *container =
|
||||
container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
|
||||
|
||||
return iommufd_backend_map_file_dma(container->be,
|
||||
container->ioas_id,
|
||||
iova, size, fd, start, readonly);
|
||||
}
|
||||
|
||||
static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
IOMMUTLBEntry *iotlb, bool unmap_all)
|
||||
|
|
@ -109,6 +122,10 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
|
|||
goto err_kvm_device_add;
|
||||
}
|
||||
|
||||
if (cpr_is_incoming()) {
|
||||
goto skip_bind;
|
||||
}
|
||||
|
||||
/* Bind device to iommufd */
|
||||
bind.iommufd = iommufd->fd;
|
||||
if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) {
|
||||
|
|
@ -120,6 +137,8 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
|
|||
vbasedev->devid = bind.out_devid;
|
||||
trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name,
|
||||
vbasedev->fd, vbasedev->devid);
|
||||
|
||||
skip_bind:
|
||||
return true;
|
||||
err_bind:
|
||||
iommufd_cdev_kvm_device_del(vbasedev);
|
||||
|
|
@ -313,7 +332,14 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
|
|||
|
||||
/* Try to find a domain */
|
||||
QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
|
||||
ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
|
||||
if (!cpr_is_incoming()) {
|
||||
ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
|
||||
} else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) {
|
||||
ret = 0;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
/* -EINVAL means the domain is incompatible with the device. */
|
||||
if (ret == -EINVAL) {
|
||||
|
|
@ -330,6 +356,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
|
|||
return false;
|
||||
} else {
|
||||
vbasedev->hwpt = hwpt;
|
||||
vbasedev->cpr.hwpt_id = hwpt->hwpt_id;
|
||||
QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
|
||||
vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
|
||||
return true;
|
||||
|
|
@ -352,6 +379,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
|
|||
flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
|
||||
}
|
||||
|
||||
if (cpr_is_incoming()) {
|
||||
hwpt_id = vbasedev->cpr.hwpt_id;
|
||||
goto skip_alloc;
|
||||
}
|
||||
|
||||
if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
|
||||
container->ioas_id, flags,
|
||||
IOMMU_HWPT_DATA_NONE, 0, NULL,
|
||||
|
|
@ -359,19 +391,20 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
|
|||
return false;
|
||||
}
|
||||
|
||||
ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
|
||||
if (ret) {
|
||||
iommufd_backend_free_id(container->be, hwpt_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
skip_alloc:
|
||||
hwpt = g_malloc0(sizeof(*hwpt));
|
||||
hwpt->hwpt_id = hwpt_id;
|
||||
hwpt->hwpt_flags = flags;
|
||||
QLIST_INIT(&hwpt->device_list);
|
||||
|
||||
ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
|
||||
if (ret) {
|
||||
iommufd_backend_free_id(container->be, hwpt->hwpt_id);
|
||||
g_free(hwpt);
|
||||
return false;
|
||||
}
|
||||
|
||||
vbasedev->hwpt = hwpt;
|
||||
vbasedev->cpr.hwpt_id = hwpt->hwpt_id;
|
||||
vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
|
||||
QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
|
||||
QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
|
||||
|
|
@ -409,7 +442,9 @@ static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
|
|||
return iommufd_cdev_autodomains_get(vbasedev, container, errp);
|
||||
}
|
||||
|
||||
return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
|
||||
/* If CPR, we are already attached to ioas_id. */
|
||||
return cpr_is_incoming() ||
|
||||
!iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
|
||||
}
|
||||
|
||||
static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
|
||||
|
|
@ -434,7 +469,7 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
|
|||
if (!QLIST_EMPTY(&bcontainer->device_list)) {
|
||||
return;
|
||||
}
|
||||
vfio_cpr_unregister_container(bcontainer);
|
||||
vfio_iommufd_cpr_unregister_container(container);
|
||||
vfio_listener_unregister(bcontainer);
|
||||
iommufd_backend_free_id(container->be, container->ioas_id);
|
||||
object_unref(container);
|
||||
|
|
@ -498,11 +533,14 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
|||
VFIOAddressSpace *space;
|
||||
struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
|
||||
int ret, devfd;
|
||||
bool res;
|
||||
uint32_t ioas_id;
|
||||
Error *err = NULL;
|
||||
const VFIOIOMMUClass *iommufd_vioc =
|
||||
VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
|
||||
|
||||
vfio_cpr_load_device(vbasedev);
|
||||
|
||||
if (vbasedev->fd < 0) {
|
||||
devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
|
||||
if (devfd < 0) {
|
||||
|
|
@ -526,7 +564,16 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
|||
vbasedev->iommufd != container->be) {
|
||||
continue;
|
||||
}
|
||||
if (!iommufd_cdev_attach_container(vbasedev, container, &err)) {
|
||||
|
||||
if (!cpr_is_incoming()) {
|
||||
res = iommufd_cdev_attach_container(vbasedev, container, &err);
|
||||
} else if (vbasedev->cpr.ioas_id == container->ioas_id) {
|
||||
res = true;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!res) {
|
||||
const char *msg = error_get_pretty(err);
|
||||
|
||||
trace_iommufd_cdev_fail_attach_existing_container(msg);
|
||||
|
|
@ -543,6 +590,11 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
|||
}
|
||||
}
|
||||
|
||||
if (cpr_is_incoming()) {
|
||||
ioas_id = vbasedev->cpr.ioas_id;
|
||||
goto skip_ioas_alloc;
|
||||
}
|
||||
|
||||
/* Need to allocate a new dedicated container */
|
||||
if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) {
|
||||
goto err_alloc_ioas;
|
||||
|
|
@ -550,10 +602,12 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
|||
|
||||
trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id);
|
||||
|
||||
skip_ioas_alloc:
|
||||
container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
|
||||
container->be = vbasedev->iommufd;
|
||||
container->ioas_id = ioas_id;
|
||||
QLIST_INIT(&container->hwpt_list);
|
||||
vbasedev->cpr.ioas_id = ioas_id;
|
||||
|
||||
bcontainer = &container->bcontainer;
|
||||
vfio_address_space_insert(space, bcontainer);
|
||||
|
|
@ -580,7 +634,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
|||
goto err_listener_register;
|
||||
}
|
||||
|
||||
if (!vfio_cpr_register_container(bcontainer, errp)) {
|
||||
if (!vfio_iommufd_cpr_register_container(container, errp)) {
|
||||
goto err_listener_register;
|
||||
}
|
||||
|
||||
|
|
@ -611,6 +665,7 @@ found_container:
|
|||
}
|
||||
|
||||
vfio_device_prepare(vbasedev, bcontainer, &dev_info);
|
||||
vfio_iommufd_cpr_register_device(vbasedev);
|
||||
|
||||
trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs,
|
||||
vbasedev->num_regions, vbasedev->flags);
|
||||
|
|
@ -648,6 +703,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev)
|
|||
iommufd_cdev_container_destroy(container);
|
||||
vfio_address_space_put(space);
|
||||
|
||||
vfio_iommufd_cpr_unregister_device(vbasedev);
|
||||
iommufd_cdev_unbind_and_disconnect(vbasedev);
|
||||
close(vbasedev->fd);
|
||||
}
|
||||
|
|
@ -807,6 +863,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
|
|||
VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
|
||||
|
||||
vioc->dma_map = iommufd_cdev_map;
|
||||
vioc->dma_map_file = iommufd_cdev_map_file;
|
||||
vioc->dma_unmap = iommufd_cdev_unmap;
|
||||
vioc->attach_device = iommufd_cdev_attach;
|
||||
vioc->detach_device = iommufd_cdev_detach;
|
||||
|
|
|
|||
|
|
@ -31,7 +31,9 @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files(
|
|||
))
|
||||
system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files(
|
||||
'iommufd.c',
|
||||
'cpr-iommufd.c',
|
||||
))
|
||||
system_ss.add(when: 'CONFIG_IOMMUFD', if_false: files('iommufd-stubs.c'))
|
||||
system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
|
||||
'display.c',
|
||||
))
|
||||
|
|
|
|||
109
hw/vfio/pci.c
109
hw/vfio/pci.c
|
|
@ -29,6 +29,7 @@
|
|||
#include "hw/pci/pci_bridge.h"
|
||||
#include "hw/qdev-properties.h"
|
||||
#include "hw/qdev-properties-system.h"
|
||||
#include "hw/vfio/vfio-cpr.h"
|
||||
#include "migration/vmstate.h"
|
||||
#include "migration/cpr.h"
|
||||
#include "qobject/qdict.h"
|
||||
|
|
@ -57,20 +58,33 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
|
|||
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
|
||||
static void vfio_msi_disable_common(VFIOPCIDevice *vdev);
|
||||
|
||||
/* Create new or reuse existing eventfd */
|
||||
static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e,
|
||||
const char *name, int nr, Error **errp)
|
||||
{
|
||||
int ret = event_notifier_init(e, 0);
|
||||
int fd, ret;
|
||||
|
||||
fd = vfio_cpr_load_vector_fd(vdev, name, nr);
|
||||
if (fd >= 0) {
|
||||
event_notifier_init_fd(e, fd);
|
||||
return true;
|
||||
}
|
||||
|
||||
ret = event_notifier_init(e, 0);
|
||||
if (ret) {
|
||||
error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name);
|
||||
return false;
|
||||
}
|
||||
return !ret;
|
||||
|
||||
fd = event_notifier_get_fd(e);
|
||||
vfio_cpr_save_vector_fd(vdev, name, nr, fd);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void vfio_notifier_cleanup(VFIOPCIDevice *vdev, EventNotifier *e,
|
||||
const char *name, int nr)
|
||||
{
|
||||
vfio_cpr_delete_vector_fd(vdev, name, nr);
|
||||
event_notifier_cleanup(e);
|
||||
}
|
||||
|
||||
|
|
@ -196,6 +210,36 @@ fail:
|
|||
#endif
|
||||
}
|
||||
|
||||
static bool vfio_cpr_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
|
||||
{
|
||||
#ifdef CONFIG_KVM
|
||||
if (vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
|
||||
vdev->intx.route.mode != PCI_INTX_ENABLED ||
|
||||
!kvm_resamplefds_enabled()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
|
||||
&vdev->intx.interrupt,
|
||||
&vdev->intx.unmask,
|
||||
vdev->intx.route.irq)) {
|
||||
error_setg_errno(errp, errno, "failed to setup resample irqfd");
|
||||
vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0);
|
||||
return false;
|
||||
}
|
||||
|
||||
vdev->intx.kvm_accel = true;
|
||||
trace_vfio_intx_enable_kvm(vdev->vbasedev.name);
|
||||
return true;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev)
|
||||
{
|
||||
#ifdef CONFIG_KVM
|
||||
|
|
@ -291,7 +335,13 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
|
|||
return true;
|
||||
}
|
||||
|
||||
vfio_disable_interrupts(vdev);
|
||||
/*
|
||||
* Do not alter interrupt state during vfio_realize and cpr load.
|
||||
* The incoming state is cleared thereafter.
|
||||
*/
|
||||
if (!cpr_is_incoming()) {
|
||||
vfio_disable_interrupts(vdev);
|
||||
}
|
||||
|
||||
vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
|
||||
pci_config_set_interrupt_pin(vdev->pdev.config, pin);
|
||||
|
|
@ -314,6 +364,14 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
|
|||
fd = event_notifier_get_fd(&vdev->intx.interrupt);
|
||||
qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev);
|
||||
|
||||
|
||||
if (cpr_is_incoming()) {
|
||||
if (!vfio_cpr_intx_enable_kvm(vdev, &err)) {
|
||||
warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
|
||||
}
|
||||
goto skip_signaling;
|
||||
}
|
||||
|
||||
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
|
||||
VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
|
||||
qemu_set_fd_handler(fd, NULL, NULL, vdev);
|
||||
|
|
@ -325,6 +383,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
|
|||
warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
|
||||
}
|
||||
|
||||
skip_signaling:
|
||||
vdev->interrupt = VFIO_INT_INTx;
|
||||
|
||||
trace_vfio_intx_enable(vdev->vbasedev.name);
|
||||
|
|
@ -394,6 +453,14 @@ static void vfio_msi_interrupt(void *opaque)
|
|||
notify(&vdev->pdev, nr);
|
||||
}
|
||||
|
||||
void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr)
|
||||
{
|
||||
VFIOMSIVector *vector = &vdev->msi_vectors[nr];
|
||||
int fd = event_notifier_get_fd(&vector->interrupt);
|
||||
|
||||
qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL, vector);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get MSI-X enabled, but no vector enabled, by setting vector 0 with an invalid
|
||||
* fd to kernel.
|
||||
|
|
@ -656,6 +723,15 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
|
|||
static int vfio_msix_vector_use(PCIDevice *pdev,
|
||||
unsigned int nr, MSIMessage msg)
|
||||
{
|
||||
/*
|
||||
* Ignore the callback from msix_set_vector_notifiers during resume.
|
||||
* The necessary subset of these actions is called from
|
||||
* vfio_cpr_claim_vectors during post load.
|
||||
*/
|
||||
if (cpr_is_incoming()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return vfio_msix_vector_do_use(pdev, nr, &msg, vfio_msi_interrupt);
|
||||
}
|
||||
|
||||
|
|
@ -686,6 +762,12 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
|
|||
}
|
||||
}
|
||||
|
||||
void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev)
|
||||
{
|
||||
msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
|
||||
vfio_msix_vector_release, NULL);
|
||||
}
|
||||
|
||||
void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
|
||||
{
|
||||
assert(!vdev->defer_kvm_irq_routing);
|
||||
|
|
@ -2914,7 +2996,7 @@ void vfio_pci_put_device(VFIOPCIDevice *vdev)
|
|||
|
||||
vfio_device_detach(&vdev->vbasedev);
|
||||
|
||||
g_free(vdev->vbasedev.name);
|
||||
vfio_device_free_name(&vdev->vbasedev);
|
||||
g_free(vdev->msix);
|
||||
}
|
||||
|
||||
|
|
@ -2965,6 +3047,11 @@ void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev)
|
|||
fd = event_notifier_get_fd(&vdev->err_notifier);
|
||||
qemu_set_fd_handler(fd, vfio_err_notifier_handler, NULL, vdev);
|
||||
|
||||
/* Do not alter irq_signaling during vfio_realize for cpr */
|
||||
if (cpr_is_incoming()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0,
|
||||
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
|
||||
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
|
||||
|
|
@ -3032,6 +3119,12 @@ void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev)
|
|||
fd = event_notifier_get_fd(&vdev->req_notifier);
|
||||
qemu_set_fd_handler(fd, vfio_req_notifier_handler, NULL, vdev);
|
||||
|
||||
/* Do not alter irq_signaling during vfio_realize for cpr */
|
||||
if (cpr_is_incoming()) {
|
||||
vdev->req_enabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0,
|
||||
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
|
||||
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
|
||||
|
|
@ -3189,7 +3282,13 @@ bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
|
|||
vfio_intx_routing_notifier);
|
||||
vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
|
||||
kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
|
||||
if (!vfio_intx_enable(vdev, errp)) {
|
||||
|
||||
/*
|
||||
* During CPR, do not call vfio_intx_enable at this time. Instead,
|
||||
* call it from vfio_pci_post_load after the intx routing data has
|
||||
* been loaded from vmstate.
|
||||
*/
|
||||
if (!cpr_is_incoming() && !vfio_intx_enable(vdev, errp)) {
|
||||
timer_free(vdev->intx.mmap_timer);
|
||||
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
|
||||
kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
|
||||
|
|
|
|||
|
|
@ -218,6 +218,8 @@ void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
|
|||
void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev);
|
||||
void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev);
|
||||
bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp);
|
||||
void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev);
|
||||
void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr);
|
||||
|
||||
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
|
||||
void vfio_pci_write_config(PCIDevice *pdev,
|
||||
|
|
|
|||
|
|
@ -530,7 +530,7 @@ static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
|
|||
{
|
||||
/* @fd takes precedence over @sysfsdev which takes precedence over @host */
|
||||
if (vbasedev->fd < 0 && vbasedev->sysfsdev) {
|
||||
g_free(vbasedev->name);
|
||||
vfio_device_free_name(vbasedev);
|
||||
vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
|
||||
} else if (vbasedev->fd < 0) {
|
||||
if (!vbasedev->name || strchr(vbasedev->name, '/')) {
|
||||
|
|
|
|||
|
|
@ -197,6 +197,9 @@ iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD con
|
|||
iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
|
||||
iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d"
|
||||
|
||||
# cpr-iommufd.c
|
||||
vfio_cpr_find_device(uint32_t ioas_id, int devid, uint32_t hwpt_id) "ioas_id %u, devid %d, hwpt_id %u"
|
||||
|
||||
# device.c
|
||||
vfio_device_get_region_info_type(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x"
|
||||
vfio_device_reset_handler(void) ""
|
||||
|
|
|
|||
|
|
@ -85,6 +85,7 @@ void qemu_ram_unset_idstr(RAMBlock *block);
|
|||
const char *qemu_ram_get_idstr(RAMBlock *rb);
|
||||
void *qemu_ram_get_host_addr(RAMBlock *rb);
|
||||
ram_addr_t qemu_ram_get_offset(RAMBlock *rb);
|
||||
ram_addr_t qemu_ram_get_fd_offset(RAMBlock *rb);
|
||||
ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
|
||||
ram_addr_t qemu_ram_get_max_length(RAMBlock *rb);
|
||||
bool qemu_ram_is_shared(RAMBlock *rb);
|
||||
|
|
|
|||
|
|
@ -167,6 +167,21 @@ struct VFIOIOMMUClass {
|
|||
int (*dma_map)(const VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
void *vaddr, bool readonly, MemoryRegion *mr);
|
||||
/**
|
||||
* @dma_map_file
|
||||
*
|
||||
* Map a file range for the container.
|
||||
*
|
||||
* @bcontainer: #VFIOContainerBase to use for map
|
||||
* @iova: start address to map
|
||||
* @size: size of the range to map
|
||||
* @fd: descriptor of the file to map
|
||||
* @start: starting file offset of the range to map
|
||||
* @readonly: map read only if true
|
||||
*/
|
||||
int (*dma_map_file)(const VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
int fd, unsigned long start, bool readonly);
|
||||
/**
|
||||
* @dma_unmap
|
||||
*
|
||||
|
|
|
|||
|
|
@ -15,19 +15,27 @@
|
|||
struct VFIOContainer;
|
||||
struct VFIOContainerBase;
|
||||
struct VFIOGroup;
|
||||
struct VFIODevice;
|
||||
struct VFIOPCIDevice;
|
||||
struct VFIOIOMMUFDContainer;
|
||||
struct IOMMUFDBackend;
|
||||
|
||||
typedef int (*dma_map_fn)(const struct VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size, void *vaddr,
|
||||
bool readonly, MemoryRegion *mr);
|
||||
|
||||
typedef struct VFIOContainerCPR {
|
||||
Error *blocker;
|
||||
bool vaddr_unmapped;
|
||||
NotifierWithReturn transfer_notifier;
|
||||
MemoryListener remap_listener;
|
||||
int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
void *vaddr, bool readonly, MemoryRegion *mr);
|
||||
} VFIOContainerCPR;
|
||||
|
||||
typedef struct VFIODeviceCPR {
|
||||
Error *mdev_blocker;
|
||||
Error *id_blocker;
|
||||
uint32_t hwpt_id;
|
||||
uint32_t ioas_id;
|
||||
} VFIODeviceCPR;
|
||||
|
||||
bool vfio_legacy_cpr_register_container(struct VFIOContainer *container,
|
||||
|
|
@ -37,9 +45,15 @@ void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container);
|
|||
int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e,
|
||||
Error **errp);
|
||||
|
||||
bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer,
|
||||
Error **errp);
|
||||
void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);
|
||||
bool vfio_iommufd_cpr_register_container(struct VFIOIOMMUFDContainer *container,
|
||||
Error **errp);
|
||||
void vfio_iommufd_cpr_unregister_container(
|
||||
struct VFIOIOMMUFDContainer *container);
|
||||
bool vfio_iommufd_cpr_register_iommufd(struct IOMMUFDBackend *be, Error **errp);
|
||||
void vfio_iommufd_cpr_unregister_iommufd(struct IOMMUFDBackend *be);
|
||||
void vfio_iommufd_cpr_register_device(struct VFIODevice *vbasedev);
|
||||
void vfio_iommufd_cpr_unregister_device(struct VFIODevice *vbasedev);
|
||||
void vfio_cpr_load_device(struct VFIODevice *vbasedev);
|
||||
|
||||
int vfio_cpr_group_get_device_fd(int d, const char *name);
|
||||
|
||||
|
|
@ -52,6 +66,16 @@ void vfio_cpr_giommu_remap(struct VFIOContainerBase *bcontainer,
|
|||
bool vfio_cpr_ram_discard_register_listener(
|
||||
struct VFIOContainerBase *bcontainer, MemoryRegionSection *section);
|
||||
|
||||
void vfio_cpr_save_vector_fd(struct VFIOPCIDevice *vdev, const char *name,
|
||||
int nr, int fd);
|
||||
int vfio_cpr_load_vector_fd(struct VFIOPCIDevice *vdev, const char *name,
|
||||
int nr);
|
||||
void vfio_cpr_delete_vector_fd(struct VFIOPCIDevice *vdev, const char *name,
|
||||
int nr);
|
||||
|
||||
extern const VMStateDescription vfio_cpr_pci_vmstate;
|
||||
extern const VMStateDescription vmstate_cpr_vfio_devices;
|
||||
|
||||
void vfio_cpr_add_kvm_notifier(void);
|
||||
|
||||
#endif /* HW_VFIO_VFIO_CPR_H */
|
||||
|
|
|
|||
|
|
@ -279,8 +279,11 @@ int vfio_device_get_irq_info(VFIODevice *vbasedev, int index,
|
|||
|
||||
/* Returns 0 on success, or a negative errno. */
|
||||
bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
|
||||
void vfio_device_free_name(VFIODevice *vbasedev);
|
||||
void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
|
||||
void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
|
||||
DeviceState *dev, bool ram_discard);
|
||||
int vfio_device_get_aw_bits(VFIODevice *vdev);
|
||||
|
||||
void vfio_kvm_device_close(void);
|
||||
#endif /* HW_VFIO_VFIO_COMMON_H */
|
||||
|
|
|
|||
|
|
@ -9,11 +9,23 @@
|
|||
#define MIGRATION_CPR_H
|
||||
|
||||
#include "qapi/qapi-types-migration.h"
|
||||
#include "qemu/queue.h"
|
||||
|
||||
#define MIG_MODE_NONE -1
|
||||
|
||||
#define QEMU_CPR_FILE_MAGIC 0x51435052
|
||||
#define QEMU_CPR_FILE_VERSION 0x00000001
|
||||
#define CPR_STATE "CprState"
|
||||
|
||||
typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
|
||||
typedef QLIST_HEAD(CprVFIODeviceList, CprVFIODevice) CprVFIODeviceList;
|
||||
|
||||
typedef struct CprState {
|
||||
CprFdList fds;
|
||||
CprVFIODeviceList vfio_devices;
|
||||
} CprState;
|
||||
|
||||
extern CprState cpr_state;
|
||||
|
||||
void cpr_save_fd(const char *name, int id, int fd);
|
||||
void cpr_delete_fd(const char *name, int id);
|
||||
|
|
@ -32,6 +44,8 @@ void cpr_state_close(void);
|
|||
struct QIOChannel *cpr_state_ioc(void);
|
||||
|
||||
bool cpr_incoming_needed(void *opaque);
|
||||
int cpr_get_fd_param(const char *name, const char *fdname, int index,
|
||||
Error **errp);
|
||||
|
||||
QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
|
||||
QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp);
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ struct IOMMUFDBackend {
|
|||
/*< protected >*/
|
||||
int fd; /* /dev/iommu file descriptor */
|
||||
bool owned; /* is the /dev/iommu opened internally */
|
||||
Error *cpr_blocker;/* set if be does not support CPR */
|
||||
uint32_t users;
|
||||
|
||||
/*< public >*/
|
||||
|
|
@ -43,6 +44,9 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be);
|
|||
bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||
Error **errp);
|
||||
void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id);
|
||||
int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||
hwaddr iova, ram_addr_t size, int fd,
|
||||
unsigned long start, bool readonly);
|
||||
int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
|
||||
ram_addr_t size, void *vaddr, bool readonly);
|
||||
int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||
|
|
@ -66,6 +70,9 @@ bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
|
|||
uint32_t *entry_num, void *data,
|
||||
Error **errp);
|
||||
|
||||
bool iommufd_change_process_capable(IOMMUFDBackend *be);
|
||||
bool iommufd_change_process(IOMMUFDBackend *be, Error **errp);
|
||||
|
||||
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
|
||||
OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass,
|
||||
HOST_IOMMU_DEVICE_IOMMUFD)
|
||||
|
|
|
|||
|
|
@ -195,6 +195,7 @@ bool kvm_has_sync_mmu(void);
|
|||
int kvm_has_vcpu_events(void);
|
||||
int kvm_max_nested_state_length(void);
|
||||
int kvm_has_gsi_routing(void);
|
||||
void kvm_close(void);
|
||||
|
||||
/**
|
||||
* kvm_arm_supports_user_irq
|
||||
|
|
|
|||
|
|
@ -7,25 +7,21 @@
|
|||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qapi/error.h"
|
||||
#include "hw/vfio/vfio-device.h"
|
||||
#include "migration/cpr.h"
|
||||
#include "migration/misc.h"
|
||||
#include "migration/options.h"
|
||||
#include "migration/qemu-file.h"
|
||||
#include "migration/savevm.h"
|
||||
#include "migration/vmstate.h"
|
||||
#include "monitor/monitor.h"
|
||||
#include "system/runstate.h"
|
||||
#include "trace.h"
|
||||
|
||||
/*************************************************************************/
|
||||
/* cpr state container for all information to be saved. */
|
||||
|
||||
typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
|
||||
|
||||
typedef struct CprState {
|
||||
CprFdList fds;
|
||||
} CprState;
|
||||
|
||||
static CprState cpr_state;
|
||||
CprState cpr_state;
|
||||
|
||||
/****************************************************************************/
|
||||
|
||||
|
|
@ -126,8 +122,6 @@ int cpr_open_fd(const char *path, int flags, const char *name, int id,
|
|||
}
|
||||
|
||||
/*************************************************************************/
|
||||
#define CPR_STATE "CprState"
|
||||
|
||||
static const VMStateDescription vmstate_cpr_state = {
|
||||
.name = CPR_STATE,
|
||||
.version_id = 1,
|
||||
|
|
@ -135,6 +129,10 @@ static const VMStateDescription vmstate_cpr_state = {
|
|||
.fields = (VMStateField[]) {
|
||||
VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next),
|
||||
VMSTATE_END_OF_LIST()
|
||||
},
|
||||
.subsections = (const VMStateDescription * const []) {
|
||||
&vmstate_cpr_vfio_devices,
|
||||
NULL
|
||||
}
|
||||
};
|
||||
/*************************************************************************/
|
||||
|
|
@ -264,3 +262,39 @@ bool cpr_incoming_needed(void *opaque)
|
|||
MigMode mode = migrate_mode();
|
||||
return mode == MIG_MODE_CPR_TRANSFER;
|
||||
}
|
||||
|
||||
/*
|
||||
* cpr_get_fd_param: find a descriptor and return its value.
|
||||
*
|
||||
* @name: CPR name for the descriptor
|
||||
* @fdname: An integer-valued string, or a name passed to a getfd command
|
||||
* @index: CPR index of the descriptor
|
||||
* @errp: returned error message
|
||||
*
|
||||
* If CPR is not being performed, then use @fdname to find the fd.
|
||||
* If CPR is being performed, then ignore @fdname, and look for @name
|
||||
* and @index in CPR state.
|
||||
*
|
||||
* On success returns the fd value, else returns -1.
|
||||
*/
|
||||
int cpr_get_fd_param(const char *name, const char *fdname, int index,
|
||||
Error **errp)
|
||||
{
|
||||
ERRP_GUARD();
|
||||
int fd;
|
||||
|
||||
if (cpr_is_incoming()) {
|
||||
fd = cpr_find_fd(name, index);
|
||||
if (fd < 0) {
|
||||
error_setg(errp, "cannot find saved value for fd %s", fdname);
|
||||
}
|
||||
} else {
|
||||
fd = monitor_fd_param(monitor_cur(), fdname, errp);
|
||||
if (fd >= 0) {
|
||||
cpr_save_fd(name, index, fd);
|
||||
} else {
|
||||
error_prepend(errp, "Could not parse object fd %s:", fdname);
|
||||
}
|
||||
}
|
||||
return fd;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -620,8 +620,10 @@
|
|||
#
|
||||
# @cpr-transfer: This mode allows the user to transfer a guest to a
|
||||
# new QEMU instance on the same host with minimal guest pause
|
||||
# time by preserving guest RAM in place. Devices and their pinned
|
||||
# pages will also be preserved in a future QEMU release.
|
||||
# time by preserving guest RAM in place.
|
||||
#
|
||||
# Devices and their pinned pages are also preserved for VFIO and
|
||||
# IOMMUFD. (since 10.1)
|
||||
#
|
||||
# The user starts new QEMU on the same host as old QEMU, with
|
||||
# command-line arguments to create the same machine, plus the
|
||||
|
|
|
|||
|
|
@ -1593,6 +1593,11 @@ ram_addr_t qemu_ram_get_offset(RAMBlock *rb)
|
|||
return rb->offset;
|
||||
}
|
||||
|
||||
ram_addr_t qemu_ram_get_fd_offset(RAMBlock *rb)
|
||||
{
|
||||
return rb->fd_offset;
|
||||
}
|
||||
|
||||
ram_addr_t qemu_ram_get_used_length(RAMBlock *rb)
|
||||
{
|
||||
return rb->used_length;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue