vfio queue:

* Fixed newly added potential issues in vfio-pci
 * Added support to report vfio-ap configuration changes
 * Added prerequisite support for vfio-user
 * Added first part for VFIO live update support
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmhJm00ACgkQUaNDx8/7
 7KHBehAAlbSt+QCPwdNJ/5QPGGPWIQ86acIHaI/sE/lpcJx9FideQhtKTtt0gTOE
 ZNGbzfeCnewCM+VLMgkrYZC9DWd9OpEO68tDy6ev577F6ijSR8wzXRtDl2j5Revm
 R9gBuOm/cQ6Mafiv8SNPNSGW2tQ0M9Bd4GJRa5K3VBf8kFwPpWEZC/yDWbvSVvwc
 99TFXziIbWJEYGRzG4h7hoEEd/GapZOwTRIPRoRGHznbOPMsxShjIhExn8ZGTlU9
 woaNBPZXS5xjjy5tKyURexu+eyxbR6WsZFyeAA03+HzWEfRzhFc/rhAC6mBbpq7v
 03a/4ewkKZ0fYUf9G2H5YpXTXl6io+qk+irKi99/4GT0oSBMrm+/NcY7u9Hv2MwA
 50h3iXUhLQYzL2G2bSSoBTKOGxV84Xtto9j7dM7fy8e0nYv9rucvKl+V3Ox1Qwv4
 8+bQsxP5tjmHlXE/n6ckfcrWtSHuWmb3JJ8yxdBttdo3Cz/+KxJ3UjtP9U81RXxY
 gepxCRXZmcTfnv1dV6FyjOE6QOhB3WIT5rHmgoQIvHGhtBsLpT2mDlSsMVEQIXvm
 ixQnRguwQv9fgEZeYB/ck/ezluOxewBlOv5Q3CPpHQBd2Ykh4N/8xsWpXlKI1KWr
 Tj7Nh/2ObqNXbKdmb9nNiuo6eQDkPOm4mr1cs2ncMr7/cRGeKeA=
 =KOf3
 -----END PGP SIGNATURE-----

Merge tag 'pull-vfio-20250611' of https://github.com/legoater/qemu into staging

vfio queue:

* Fixed newly added potential issues in vfio-pci
* Added support to report vfio-ap configuration changes
* Added prerequisite support for vfio-user
* Added first part for VFIO live update support

# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmhJm00ACgkQUaNDx8/7
# 7KHBehAAlbSt+QCPwdNJ/5QPGGPWIQ86acIHaI/sE/lpcJx9FideQhtKTtt0gTOE
# ZNGbzfeCnewCM+VLMgkrYZC9DWd9OpEO68tDy6ev577F6ijSR8wzXRtDl2j5Revm
# R9gBuOm/cQ6Mafiv8SNPNSGW2tQ0M9Bd4GJRa5K3VBf8kFwPpWEZC/yDWbvSVvwc
# 99TFXziIbWJEYGRzG4h7hoEEd/GapZOwTRIPRoRGHznbOPMsxShjIhExn8ZGTlU9
# woaNBPZXS5xjjy5tKyURexu+eyxbR6WsZFyeAA03+HzWEfRzhFc/rhAC6mBbpq7v
# 03a/4ewkKZ0fYUf9G2H5YpXTXl6io+qk+irKi99/4GT0oSBMrm+/NcY7u9Hv2MwA
# 50h3iXUhLQYzL2G2bSSoBTKOGxV84Xtto9j7dM7fy8e0nYv9rucvKl+V3Ox1Qwv4
# 8+bQsxP5tjmHlXE/n6ckfcrWtSHuWmb3JJ8yxdBttdo3Cz/+KxJ3UjtP9U81RXxY
# gepxCRXZmcTfnv1dV6FyjOE6QOhB3WIT5rHmgoQIvHGhtBsLpT2mDlSsMVEQIXvm
# ixQnRguwQv9fgEZeYB/ck/ezluOxewBlOv5Q3CPpHQBd2Ykh4N/8xsWpXlKI1KWr
# Tj7Nh/2ObqNXbKdmb9nNiuo6eQDkPOm4mr1cs2ncMr7/cRGeKeA=
# =KOf3
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 11 Jun 2025 11:05:49 EDT
# gpg:                using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1
# gpg: Good signature from "Cédric Le Goater <clg@redhat.com>" [full]
# gpg:                 aka "Cédric Le Goater <clg@kaod.org>" [full]
# Primary key fingerprint: A0F6 6548 F048 95EB FE6B  0B60 51A3 43C7 CFFB ECA1

* tag 'pull-vfio-20250611' of https://github.com/legoater/qemu: (27 commits)
  vfio: improve VFIODeviceIOOps docs
  vfio/pci: export MSI functions
  vfio/pci: vfio_notifier_cleanup
  vfio/pci: vfio_notifier_init cpr parameters
  vfio/pci: pass vector to virq functions
  vfio/pci: vfio_notifier_init
  vfio/pci: vfio_pci_vector_init
  vfio-pci: skip reset during cpr
  pci: skip reset during cpr
  pci: export msix_is_pending
  vfio/container: recover from unmap-all-vaddr failure
  vfio/container: mdev cpr blocker
  vfio/container: restore DMA vaddr
  vfio/container: discard old DMA vaddr
  vfio/container: preserve descriptors
  vfio/container: register container for cpr
  migration: lower handler priority
  migration: cpr helpers
  vfio: mark posted writes in region write callbacks
  vfio: add per-region fd support
  ...

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2025-06-11 11:39:53 -04:00
commit d9ce74873a
29 changed files with 914 additions and 128 deletions

View file

@ -112,6 +112,7 @@ F: hw/intc/s390_flic.c
F: hw/intc/s390_flic_kvm.c
F: hw/s390x/
F: hw/vfio/ap.c
F: hw/s390x/ap-stub.c
F: hw/vfio/ccw.c
F: hw/watchdog/wdt_diag288.c
F: include/hw/s390x/

View file

@ -72,7 +72,7 @@ static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
return dev->msix_pba + vector / 8;
}
static int msix_is_pending(PCIDevice *dev, int vector)
int msix_is_pending(PCIDevice *dev, unsigned int vector)
{
return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
}

View file

@ -32,6 +32,7 @@
#include "hw/pci/pci_host.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
#include "migration/cpr.h"
#include "migration/qemu-file-types.h"
#include "migration/vmstate.h"
#include "net/net.h"
@ -537,6 +538,10 @@ static void pci_reset_regions(PCIDevice *dev)
static void pci_do_device_reset(PCIDevice *dev)
{
if ((dev->cap_present & QEMU_PCI_SKIP_RESET_ON_CPR) && cpr_is_incoming()) {
return;
}
pci_device_deassert_intx(dev);
assert(dev->irq_state == 0);

21
hw/s390x/ap-stub.c Normal file
View file

@ -0,0 +1,21 @@
/*
* VFIO based AP matrix device assignment
*
* Copyright 2025 IBM Corp.
* Author(s): Rorie Reyes <rreyes@linux.ibm.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "qemu/osdep.h"
#include "hw/s390x/ap-bridge.h"
int ap_chsc_sei_nt0_get_event(void *res)
{
return EVENT_INFORMATION_NOT_STORED;
}
bool ap_chsc_sei_nt0_have_event(void)
{
return false;
}

View file

@ -33,6 +33,7 @@ s390x_ss.add(when: 'CONFIG_S390_CCW_VIRTIO', if_true: files(
))
s390x_ss.add(when: 'CONFIG_TERMINAL3270', if_true: files('3270-ccw.c'))
s390x_ss.add(when: 'CONFIG_VFIO', if_true: files('s390-pci-vfio.c'))
s390x_ss.add(when: 'CONFIG_VFIO_AP', if_false: files('ap-stub.c'))
virtio_ss = ss.source_set()
virtio_ss.add(files('virtio-ccw.c'))

View file

@ -10,6 +10,7 @@
* directory.
*/
#include <stdbool.h>
#include "qemu/osdep.h"
#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
#include <linux/vfio.h>
@ -18,8 +19,10 @@
#include "hw/vfio/vfio-device.h"
#include "system/iommufd.h"
#include "hw/s390x/ap-device.h"
#include "hw/s390x/css.h"
#include "qemu/error-report.h"
#include "qemu/event_notifier.h"
#include "qemu/lockable.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
#include "qemu/option.h"
@ -37,8 +40,18 @@ struct VFIOAPDevice {
APDevice apdev;
VFIODevice vdev;
EventNotifier req_notifier;
EventNotifier cfg_notifier;
};
typedef struct APConfigChgEvent {
QTAILQ_ENTRY(APConfigChgEvent) next;
} APConfigChgEvent;
static QTAILQ_HEAD(, APConfigChgEvent) cfg_chg_events =
QTAILQ_HEAD_INITIALIZER(cfg_chg_events);
static QemuMutex cfg_chg_events_lock;
OBJECT_DECLARE_SIMPLE_TYPE(VFIOAPDevice, VFIO_AP_DEVICE)
static void vfio_ap_compute_needs_reset(VFIODevice *vdev)
@ -70,6 +83,57 @@ static void vfio_ap_req_notifier_handler(void *opaque)
}
}
static void vfio_ap_cfg_chg_notifier_handler(void *opaque)
{
APConfigChgEvent *cfg_chg_event;
VFIOAPDevice *vapdev = opaque;
if (!event_notifier_test_and_clear(&vapdev->cfg_notifier)) {
return;
}
cfg_chg_event = g_new0(APConfigChgEvent, 1);
WITH_QEMU_LOCK_GUARD(&cfg_chg_events_lock) {
QTAILQ_INSERT_TAIL(&cfg_chg_events, cfg_chg_event, next);
}
css_generate_css_crws(0);
}
int ap_chsc_sei_nt0_get_event(void *res)
{
ChscSeiNt0Res *nt0_res = (ChscSeiNt0Res *)res;
APConfigChgEvent *cfg_chg_event;
WITH_QEMU_LOCK_GUARD(&cfg_chg_events_lock) {
if (QTAILQ_EMPTY(&cfg_chg_events)) {
return EVENT_INFORMATION_NOT_STORED;
}
cfg_chg_event = QTAILQ_FIRST(&cfg_chg_events);
QTAILQ_REMOVE(&cfg_chg_events, cfg_chg_event, next);
}
memset(nt0_res, 0, sizeof(*nt0_res));
g_free(cfg_chg_event);
nt0_res->flags |= PENDING_EVENT_INFO_BITMASK;
nt0_res->length = sizeof(ChscSeiNt0Res);
nt0_res->code = NT0_RES_RESPONSE_CODE;
nt0_res->nt = NT0_RES_NT_DEFAULT;
nt0_res->rs = NT0_RES_RS_AP_CHANGE;
nt0_res->cc = NT0_RES_CC_AP_CHANGE;
return EVENT_INFORMATION_STORED;
}
bool ap_chsc_sei_nt0_have_event(void)
{
QEMU_LOCK_GUARD(&cfg_chg_events_lock);
return !QTAILQ_EMPTY(&cfg_chg_events);
}
static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev,
unsigned int irq, Error **errp)
{
@ -85,6 +149,10 @@ static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev,
notifier = &vapdev->req_notifier;
fd_read = vfio_ap_req_notifier_handler;
break;
case VFIO_AP_CFG_CHG_IRQ_INDEX:
notifier = &vapdev->cfg_notifier;
fd_read = vfio_ap_cfg_chg_notifier_handler;
break;
default:
error_setg(errp, "vfio: Unsupported device irq(%d)", irq);
return false;
@ -137,6 +205,9 @@ static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev,
case VFIO_AP_REQ_IRQ_INDEX:
notifier = &vapdev->req_notifier;
break;
case VFIO_AP_CFG_CHG_IRQ_INDEX:
notifier = &vapdev->cfg_notifier;
break;
default:
error_report("vfio: Unsupported device irq(%d)", irq);
return;
@ -159,6 +230,13 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev);
VFIODevice *vbasedev = &vapdev->vdev;
static bool lock_initialized;
if (!lock_initialized) {
qemu_mutex_init(&cfg_chg_events_lock);
lock_initialized = true;
}
if (!vfio_device_get_name(vbasedev, errp)) {
return;
}
@ -176,6 +254,15 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
warn_report_err(err);
}
if (!vfio_ap_register_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX, &err))
{
/*
* Report this error, but do not make it a failing condition.
* Lack of this IRQ in the host does not prevent normal operation.
*/
warn_report_err(err);
}
return;
error:
@ -188,6 +275,7 @@ static void vfio_ap_unrealize(DeviceState *dev)
VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev);
vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX);
vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX);
vfio_device_detach(&vapdev->vdev);
g_free(vapdev->vdev.name);
}

View file

@ -31,9 +31,10 @@
#include "system/reset.h"
#include "trace.h"
#include "qapi/error.h"
#include "migration/cpr.h"
#include "migration/blocker.h"
#include "pci.h"
#include "hw/vfio/vfio-container.h"
#include "hw/vfio/vfio-cpr.h"
#include "vfio-helpers.h"
#include "vfio-listener.h"
@ -135,6 +136,8 @@ static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer,
int ret;
Error *local_err = NULL;
g_assert(!cpr_is_incoming());
if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) {
if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) &&
bcontainer->dirty_pages_supported) {
@ -426,7 +429,12 @@ static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
return NULL;
}
if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
/*
* During CPR, just set the container type and skip the ioctls, as the
* container and group are already configured in the kernel.
*/
if (!cpr_is_incoming() &&
!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
return NULL;
}
@ -593,6 +601,11 @@ static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group,
group->container = container;
QLIST_INSERT_HEAD(&container->group_list, group, container_next);
vfio_group_add_kvm_device(group);
/*
* Remember the container fd for each group, so we can attach to the same
* container after CPR.
*/
cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd);
return true;
}
@ -602,6 +615,7 @@ static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group)
group->container = NULL;
vfio_group_del_kvm_device(group);
vfio_ram_block_discard_disable(container, false);
cpr_delete_fd("vfio_container_for_group", group->groupid);
}
static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
@ -616,17 +630,34 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
bool group_was_added = false;
space = vfio_address_space_get(as);
fd = cpr_find_fd("vfio_container_for_group", group->groupid);
QLIST_FOREACH(bcontainer, &space->containers, next) {
container = container_of(bcontainer, VFIOContainer, bcontainer);
if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
return vfio_container_group_add(container, group, errp);
if (!cpr_is_incoming()) {
QLIST_FOREACH(bcontainer, &space->containers, next) {
container = container_of(bcontainer, VFIOContainer, bcontainer);
if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
return vfio_container_group_add(container, group, errp);
}
}
}
fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
if (fd < 0) {
goto fail;
fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
if (fd < 0) {
goto fail;
}
} else {
/*
* For incoming CPR, the group is already attached in the kernel.
* If a container with matching fd is found, then update the
* userland group list and return. If not, then after the loop,
* create the container struct and group list.
*/
QLIST_FOREACH(bcontainer, &space->containers, next) {
container = container_of(bcontainer, VFIOContainer, bcontainer);
if (vfio_cpr_container_match(container, group, fd)) {
return vfio_container_group_add(container, group, errp);
}
}
}
ret = ioctl(fd, VFIO_GET_API_VERSION);
@ -643,7 +674,7 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
new_container = true;
bcontainer = &container->bcontainer;
if (!vfio_cpr_register_container(bcontainer, errp)) {
if (!vfio_legacy_cpr_register_container(container, errp)) {
goto fail;
}
@ -661,8 +692,17 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
}
group_was_added = true;
if (!vfio_listener_register(bcontainer, errp)) {
goto fail;
/*
* If CPR, register the listener later, after all state that may
* affect regions and mapping boundaries has been cpr load'ed. Later,
* the listener will invoke its callback on each flat section and call
* dma_map to supply the new vaddr, and the calls will match the mappings
* remembered by the kernel.
*/
if (!cpr_is_incoming()) {
if (!vfio_listener_register(bcontainer, errp)) {
goto fail;
}
}
bcontainer->initialized = true;
@ -679,7 +719,7 @@ fail:
vioc->release(bcontainer);
}
if (new_container) {
vfio_cpr_unregister_container(bcontainer);
vfio_legacy_cpr_unregister_container(container);
object_unref(container);
}
if (fd >= 0) {
@ -698,6 +738,7 @@ static void vfio_container_disconnect(VFIOGroup *group)
QLIST_REMOVE(group, container_next);
group->container = NULL;
cpr_delete_fd("vfio_container_for_group", group->groupid);
/*
* Explicitly release the listener first before unset container,
@ -720,7 +761,7 @@ static void vfio_container_disconnect(VFIOGroup *group)
VFIOAddressSpace *space = bcontainer->space;
trace_vfio_container_disconnect(container->fd);
vfio_cpr_unregister_container(bcontainer);
vfio_legacy_cpr_unregister_container(container);
close(container->fd);
object_unref(container);
@ -751,7 +792,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp)
group = g_malloc0(sizeof(*group));
snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
group->fd = qemu_open(path, O_RDWR, errp);
group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp);
if (group->fd < 0) {
goto free_group_exit;
}
@ -783,6 +824,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp)
return group;
close_fd_exit:
cpr_delete_fd("vfio_group", groupid);
close(group->fd);
free_group_exit:
@ -804,6 +846,7 @@ static void vfio_group_put(VFIOGroup *group)
vfio_container_disconnect(group);
QLIST_REMOVE(group, next);
trace_vfio_group_put(group->fd);
cpr_delete_fd("vfio_group", group->groupid);
close(group->fd);
g_free(group);
}
@ -814,7 +857,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
g_autofree struct vfio_device_info *info = NULL;
int fd;
fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
fd = vfio_cpr_group_get_device_fd(group->fd, name);
if (fd < 0) {
error_setg_errno(errp, errno, "error getting device from group %d",
group->groupid);
@ -827,8 +870,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
info = vfio_get_device_info(fd);
if (!info) {
error_setg_errno(errp, errno, "error getting device info");
close(fd);
return false;
goto fail;
}
/*
@ -842,8 +884,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
if (!QLIST_EMPTY(&group->device_list)) {
error_setg(errp, "Inconsistent setting of support for discarding "
"RAM (e.g., balloon) within group");
close(fd);
return false;
goto fail;
}
if (!group->ram_block_discard_allowed) {
@ -861,6 +902,11 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs);
return true;
fail:
close(fd);
cpr_delete_fd(name, 0);
return false;
}
static void vfio_device_put(VFIODevice *vbasedev)
@ -871,6 +917,7 @@ static void vfio_device_put(VFIODevice *vbasedev)
QLIST_REMOVE(vbasedev, next);
vbasedev->group = NULL;
trace_vfio_device_put(vbasedev->fd);
cpr_delete_fd(vbasedev->name, 0);
close(vbasedev->fd);
}
@ -940,6 +987,13 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
goto device_put_exit;
}
if (vbasedev->mdev) {
error_setg(&vbasedev->cpr.mdev_blocker,
"CPR does not support vfio mdev %s", vbasedev->name);
migrate_add_blocker_modes(&vbasedev->cpr.mdev_blocker, &error_fatal,
MIG_MODE_CPR_TRANSFER, -1);
}
return true;
device_put_exit:
@ -957,6 +1011,7 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev)
vfio_device_unprepare(vbasedev);
migrate_del_blocker(&vbasedev->cpr.mdev_blocker);
object_unref(vbasedev->hiod);
vfio_device_put(vbasedev);
vfio_group_put(group);

287
hw/vfio/cpr-legacy.c Normal file
View file

@ -0,0 +1,287 @@
/*
* Copyright (c) 2021-2025 Oracle and/or its affiliates.
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include <sys/ioctl.h>
#include <linux/vfio.h>
#include "qemu/osdep.h"
#include "hw/vfio/vfio-container.h"
#include "hw/vfio/vfio-device.h"
#include "hw/vfio/vfio-listener.h"
#include "migration/blocker.h"
#include "migration/cpr.h"
#include "migration/migration.h"
#include "migration/vmstate.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
{
struct vfio_iommu_type1_dma_unmap unmap = {
.argsz = sizeof(unmap),
.flags = VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL,
.iova = 0,
.size = 0,
};
if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
error_setg_errno(errp, errno, "vfio_dma_unmap_vaddr_all");
return false;
}
container->cpr.vaddr_unmapped = true;
return true;
}
/*
* Set the new @vaddr for any mappings registered during cpr load.
* The incoming state is cleared thereafter.
*/
static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size, void *vaddr,
bool readonly, MemoryRegion *mr)
{
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
bcontainer);
struct vfio_iommu_type1_dma_map map = {
.argsz = sizeof(map),
.flags = VFIO_DMA_MAP_FLAG_VADDR,
.vaddr = (__u64)(uintptr_t)vaddr,
.iova = iova,
.size = size,
};
g_assert(cpr_is_incoming());
if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) {
return -errno;
}
return 0;
}
static void vfio_region_remap(MemoryListener *listener,
MemoryRegionSection *section)
{
VFIOContainer *container = container_of(listener, VFIOContainer,
cpr.remap_listener);
vfio_container_region_add(&container->bcontainer, section, true);
}
static bool vfio_cpr_supported(VFIOContainer *container, Error **errp)
{
if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) {
error_setg(errp, "VFIO container does not support VFIO_UPDATE_VADDR");
return false;
} else if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL)) {
error_setg(errp, "VFIO container does not support VFIO_UNMAP_ALL");
return false;
} else {
return true;
}
}
static int vfio_container_pre_save(void *opaque)
{
VFIOContainer *container = opaque;
Error *local_err = NULL;
if (!vfio_dma_unmap_vaddr_all(container, &local_err)) {
error_report_err(local_err);
return -1;
}
return 0;
}
static int vfio_container_post_load(void *opaque, int version_id)
{
VFIOContainer *container = opaque;
VFIOContainerBase *bcontainer = &container->bcontainer;
VFIOGroup *group;
Error *local_err = NULL;
if (!vfio_listener_register(bcontainer, &local_err)) {
error_report_err(local_err);
return -1;
}
QLIST_FOREACH(group, &container->group_list, container_next) {
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
/* Restore original dma_map function */
vioc->dma_map = container->cpr.saved_dma_map;
}
return 0;
}
static const VMStateDescription vfio_container_vmstate = {
.name = "vfio-container",
.version_id = 0,
.minimum_version_id = 0,
.priority = MIG_PRI_LOW, /* Must happen after devices and groups */
.pre_save = vfio_container_pre_save,
.post_load = vfio_container_post_load,
.needed = cpr_incoming_needed,
.fields = (VMStateField[]) {
VMSTATE_END_OF_LIST()
}
};
static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
MigrationEvent *e, Error **errp)
{
VFIOContainer *container =
container_of(notifier, VFIOContainer, cpr.transfer_notifier);
VFIOContainerBase *bcontainer = &container->bcontainer;
if (e->type != MIG_EVENT_PRECOPY_FAILED) {
return 0;
}
if (container->cpr.vaddr_unmapped) {
/*
* Force a call to vfio_region_remap for each mapped section by
* temporarily registering a listener, and temporarily diverting
* dma_map to vfio_legacy_cpr_dma_map. The latter restores vaddr.
*/
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
vioc->dma_map = vfio_legacy_cpr_dma_map;
container->cpr.remap_listener = (MemoryListener) {
.name = "vfio cpr recover",
.region_add = vfio_region_remap
};
memory_listener_register(&container->cpr.remap_listener,
bcontainer->space->as);
memory_listener_unregister(&container->cpr.remap_listener);
container->cpr.vaddr_unmapped = false;
vioc->dma_map = container->cpr.saved_dma_map;
}
return 0;
}
bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
{
VFIOContainerBase *bcontainer = &container->bcontainer;
Error **cpr_blocker = &container->cpr.blocker;
migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
vfio_cpr_reboot_notifier,
MIG_MODE_CPR_REBOOT);
if (!vfio_cpr_supported(container, cpr_blocker)) {
return migrate_add_blocker_modes(cpr_blocker, errp,
MIG_MODE_CPR_TRANSFER, -1) == 0;
}
vmstate_register(NULL, -1, &vfio_container_vmstate, container);
/* During incoming CPR, divert calls to dma_map. */
if (cpr_is_incoming()) {
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
container->cpr.saved_dma_map = vioc->dma_map;
vioc->dma_map = vfio_legacy_cpr_dma_map;
}
migration_add_notifier_mode(&container->cpr.transfer_notifier,
vfio_cpr_fail_notifier,
MIG_MODE_CPR_TRANSFER);
return true;
}
void vfio_legacy_cpr_unregister_container(VFIOContainer *container)
{
VFIOContainerBase *bcontainer = &container->bcontainer;
migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
migrate_del_blocker(&container->cpr.blocker);
vmstate_unregister(NULL, &vfio_container_vmstate, container);
migration_remove_notifier(&container->cpr.transfer_notifier);
}
/*
* In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
* succeeding for others, so the latter have lost their vaddr. Call this
* to restore vaddr for a section with a giommu.
*
* The giommu already exists. Find it and replay it, which calls
* vfio_legacy_cpr_dma_map further down the stack.
*/
void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer,
MemoryRegionSection *section)
{
VFIOGuestIOMMU *giommu = NULL;
hwaddr as_offset = section->offset_within_address_space;
hwaddr iommu_offset = as_offset - section->offset_within_region;
QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
if (giommu->iommu_mr == IOMMU_MEMORY_REGION(section->mr) &&
giommu->iommu_offset == iommu_offset) {
break;
}
}
g_assert(giommu);
memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
}
/*
* In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
* succeeding for others, so the latter have lost their vaddr. Call this
* to restore vaddr for a section with a RamDiscardManager.
*
* The ram discard listener already exists. Call its populate function
* directly, which calls vfio_legacy_cpr_dma_map.
*/
bool vfio_cpr_ram_discard_register_listener(VFIOContainerBase *bcontainer,
MemoryRegionSection *section)
{
VFIORamDiscardListener *vrdl =
vfio_find_ram_discard_listener(bcontainer, section);
g_assert(vrdl);
return vrdl->listener.notify_populate(&vrdl->listener, section) == 0;
}
int vfio_cpr_group_get_device_fd(int d, const char *name)
{
const int id = 0;
int fd = cpr_find_fd(name, id);
if (fd < 0) {
fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name);
if (fd >= 0) {
cpr_save_fd(name, id, fd);
}
}
return fd;
}
static bool same_device(int fd1, int fd2)
{
struct stat st1, st2;
return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev;
}
bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group,
int fd)
{
if (container->fd == fd) {
return true;
}
if (!same_device(container->fd, fd)) {
return false;
}
/*
* Same device, different fd. This occurs when the container fd is
* cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS
* produces duplicates. De-dup it.
*/
cpr_delete_fd("vfio_container_for_group", group->groupid);
close(fd);
cpr_save_fd("vfio_container_for_group", group->groupid, container->fd);
return true;
}

View file

@ -7,13 +7,14 @@
#include "qemu/osdep.h"
#include "hw/vfio/vfio-device.h"
#include "migration/misc.h"
#include "hw/vfio/vfio-cpr.h"
#include "hw/vfio/pci.h"
#include "migration/cpr.h"
#include "qapi/error.h"
#include "system/runstate.h"
static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
MigrationEvent *e, Error **errp)
int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
MigrationEvent *e, Error **errp)
{
if (e->type == MIG_EVENT_PRECOPY_SETUP &&
!runstate_check(RUN_STATE_SUSPENDED) && !vm_get_suspended()) {
@ -38,3 +39,32 @@ void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer)
{
migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
}
/*
* The kernel may change non-emulated config bits. Exclude them from the
* changed-bits check in get_pci_config_device.
*/
static int vfio_cpr_pci_pre_load(void *opaque)
{
VFIOPCIDevice *vdev = opaque;
PCIDevice *pdev = &vdev->pdev;
int size = MIN(pci_config_size(pdev), vdev->config_size);
int i;
for (i = 0; i < size; i++) {
pdev->cmask[i] &= vdev->emulated_config_bits[i];
}
return 0;
}
const VMStateDescription vfio_cpr_pci_vmstate = {
.name = "vfio-cpr-pci",
.version_id = 0,
.minimum_version_id = 0,
.pre_load = vfio_cpr_pci_pre_load,
.needed = cpr_incoming_needed,
.fields = (VMStateField[]) {
VMSTATE_END_OF_LIST()
}
};

View file

@ -200,6 +200,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info **info)
{
size_t argsz = sizeof(struct vfio_region_info);
int fd = -1;
int ret;
/* check cache */
@ -214,7 +215,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
retry:
(*info)->argsz = argsz;
ret = vbasedev->io_ops->get_region_info(vbasedev, *info);
ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd);
if (ret != 0) {
g_free(*info);
*info = NULL;
@ -225,11 +226,19 @@ retry:
argsz = (*info)->argsz;
*info = g_realloc(*info, argsz);
if (fd != -1) {
close(fd);
fd = -1;
}
goto retry;
}
/* fill cache */
vbasedev->reginfo[index] = *info;
if (vbasedev->region_fds != NULL) {
vbasedev->region_fds[index] = fd;
}
return 0;
}
@ -334,6 +343,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
vbasedev->io_ops = &vfio_device_io_ops_ioctl;
vbasedev->dev = dev;
vbasedev->fd = -1;
vbasedev->use_region_fds = false;
vbasedev->ram_block_discard_allowed = ram_discard;
}
@ -444,6 +454,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
vbasedev->reginfo = g_new0(struct vfio_region_info *,
vbasedev->num_regions);
if (vbasedev->use_region_fds) {
vbasedev->region_fds = g_new0(int, vbasedev->num_regions);
}
}
void vfio_device_unprepare(VFIODevice *vbasedev)
@ -452,9 +465,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
for (i = 0; i < vbasedev->num_regions; i++) {
g_free(vbasedev->reginfo[i]);
if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
close(vbasedev->region_fds[i]);
}
}
g_free(vbasedev->reginfo);
vbasedev->reginfo = NULL;
g_clear_pointer(&vbasedev->reginfo, g_free);
g_clear_pointer(&vbasedev->region_fds, g_free);
QLIST_REMOVE(vbasedev, container_next);
QLIST_REMOVE(vbasedev, global_next);
@ -476,10 +494,13 @@ static int vfio_device_io_device_feature(VFIODevice *vbasedev,
}
static int vfio_device_io_get_region_info(VFIODevice *vbasedev,
struct vfio_region_info *info)
struct vfio_region_info *info,
int *fd)
{
int ret;
*fd = -1;
ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
return ret < 0 ? -errno : ret;
@ -522,7 +543,8 @@ static int vfio_device_io_region_read(VFIODevice *vbasedev, uint8_t index,
}
static int vfio_device_io_region_write(VFIODevice *vbasedev, uint8_t index,
off_t off, uint32_t size, void *data)
off_t off, uint32_t size, void *data,
bool post)
{
struct vfio_region_info *info;
int ret;

View file

@ -437,7 +437,7 @@ static void vfio_listener_commit(MemoryListener *listener)
listener);
void (*listener_commit)(VFIOContainerBase *bcontainer);
listener_commit = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_begin;
listener_commit = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_commit;
if (listener_commit) {
listener_commit(bcontainer);
@ -481,6 +481,13 @@ static void vfio_listener_region_add(MemoryListener *listener,
{
VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
listener);
vfio_container_region_add(bcontainer, section, false);
}
void vfio_container_region_add(VFIOContainerBase *bcontainer,
MemoryRegionSection *section,
bool cpr_remap)
{
hwaddr iova, end;
Int128 llend, llsize;
void *vaddr;
@ -516,6 +523,11 @@ static void vfio_listener_region_add(MemoryListener *listener,
int iommu_idx;
trace_vfio_listener_region_add_iommu(section->mr->name, iova, end);
if (cpr_remap) {
vfio_cpr_giommu_remap(bcontainer, section);
}
/*
* FIXME: For VFIO iommu types which have KVM acceleration to
* avoid bouncing all map/unmaps through qemu this way, this
@ -558,7 +570,12 @@ static void vfio_listener_region_add(MemoryListener *listener,
* about changes.
*/
if (memory_region_has_ram_discard_manager(section->mr)) {
vfio_ram_discard_register_listener(bcontainer, section);
if (!cpr_remap) {
vfio_ram_discard_register_listener(bcontainer, section);
} else if (!vfio_cpr_ram_discard_register_listener(bcontainer,
section)) {
goto fail;
}
return;
}

View file

@ -21,6 +21,7 @@ system_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c'))
system_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c'))
system_ss.add(when: 'CONFIG_VFIO', if_true: files(
'cpr.c',
'cpr-legacy.c',
'device.c',
'migration.c',
'migration-multifd.c',

View file

@ -30,6 +30,7 @@
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
#include "migration/vmstate.h"
#include "migration/cpr.h"
#include "qobject/qdict.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
@ -56,6 +57,23 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
static void vfio_msi_disable_common(VFIOPCIDevice *vdev);
static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e,
const char *name, int nr, Error **errp)
{
int ret = event_notifier_init(e, 0);
if (ret) {
error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name);
}
return !ret;
}
static void vfio_notifier_cleanup(VFIOPCIDevice *vdev, EventNotifier *e,
const char *name, int nr)
{
event_notifier_cleanup(e);
}
/*
* Disabling BAR mmaping can be slow, but toggling it around INTx can
* also be a huge overhead. We try to get the best of both worlds by
@ -103,7 +121,7 @@ static void vfio_intx_interrupt(void *opaque)
}
}
static void vfio_intx_eoi(VFIODevice *vbasedev)
void vfio_pci_intx_eoi(VFIODevice *vbasedev)
{
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
@ -111,7 +129,7 @@ static void vfio_intx_eoi(VFIODevice *vbasedev)
return;
}
trace_vfio_intx_eoi(vbasedev->name);
trace_vfio_pci_intx_eoi(vbasedev->name);
vdev->intx.pending = false;
pci_irq_deassert(&vdev->pdev);
@ -136,8 +154,7 @@ static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
pci_irq_deassert(&vdev->pdev);
/* Get an eventfd for resample/unmask */
if (event_notifier_init(&vdev->intx.unmask, 0)) {
error_setg(errp, "event_notifier_init failed eoi");
if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) {
goto fail;
}
@ -169,7 +186,7 @@ fail_vfio:
kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vdev->intx.interrupt,
vdev->intx.route.irq);
fail_irqfd:
event_notifier_cleanup(&vdev->intx.unmask);
vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0);
fail:
qemu_set_fd_handler(irq_fd, vfio_intx_interrupt, NULL, vdev);
vfio_device_irq_unmask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
@ -201,7 +218,7 @@ static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev)
}
/* We only need to close the eventfd for VFIO to cleanup the kernel side */
event_notifier_cleanup(&vdev->intx.unmask);
vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0);
/* QEMU starts listening for interrupt events. */
qemu_set_fd_handler(event_notifier_get_fd(&vdev->intx.interrupt),
@ -236,7 +253,7 @@ static void vfio_intx_update(VFIOPCIDevice *vdev, PCIINTxRoute *route)
}
/* Re-enable the interrupt in cased we missed an EOI */
vfio_intx_eoi(&vdev->vbasedev);
vfio_pci_intx_eoi(&vdev->vbasedev);
}
static void vfio_intx_routing_notifier(PCIDevice *pdev)
@ -268,7 +285,6 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
Error *err = NULL;
int32_t fd;
int ret;
if (!pin) {
@ -291,9 +307,8 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
}
#endif
ret = event_notifier_init(&vdev->intx.interrupt, 0);
if (ret) {
error_setg_errno(errp, -ret, "event_notifier_init failed");
if (!vfio_notifier_init(vdev, &vdev->intx.interrupt, "intx-interrupt", 0,
errp)) {
return false;
}
fd = event_notifier_get_fd(&vdev->intx.interrupt);
@ -302,7 +317,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
qemu_set_fd_handler(fd, NULL, NULL, vdev);
event_notifier_cleanup(&vdev->intx.interrupt);
vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0);
return false;
}
@ -329,13 +344,18 @@ static void vfio_intx_disable(VFIOPCIDevice *vdev)
fd = event_notifier_get_fd(&vdev->intx.interrupt);
qemu_set_fd_handler(fd, NULL, NULL, vdev);
event_notifier_cleanup(&vdev->intx.interrupt);
vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0);
vdev->interrupt = VFIO_INT_NONE;
trace_vfio_intx_disable(vdev->vbasedev.name);
}
bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp)
{
return vfio_intx_enable(vdev, errp);
}
/*
* MSI/X
*/
@ -460,8 +480,8 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
return ret;
}
static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
int vector_n, bool msix)
void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
int vector_n, bool msix)
{
if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) {
return;
@ -471,13 +491,16 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
vector_n, &vdev->pdev);
}
static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector)
static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector, int nr)
{
const char *name = "kvm_interrupt";
if (vector->virq < 0) {
return;
}
if (event_notifier_init(&vector->kvm_interrupt, 0)) {
if (!vfio_notifier_init(vector->vdev, &vector->kvm_interrupt, name, nr,
NULL)) {
goto fail_notifier;
}
@ -489,19 +512,20 @@ static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector)
return;
fail_kvm:
event_notifier_cleanup(&vector->kvm_interrupt);
vfio_notifier_cleanup(vector->vdev, &vector->kvm_interrupt, name, nr);
fail_notifier:
kvm_irqchip_release_virq(kvm_state, vector->virq);
vector->virq = -1;
}
static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector)
static void vfio_remove_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
int nr)
{
kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt,
vector->virq);
kvm_irqchip_release_virq(kvm_state, vector->virq);
vector->virq = -1;
event_notifier_cleanup(&vector->kvm_interrupt);
vfio_notifier_cleanup(vdev, &vector->kvm_interrupt, "kvm_interrupt", nr);
}
static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg,
@ -530,6 +554,24 @@ static void set_irq_signalling(VFIODevice *vbasedev, VFIOMSIVector *vector,
}
}
void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr)
{
VFIOMSIVector *vector = &vdev->msi_vectors[nr];
PCIDevice *pdev = &vdev->pdev;
Error *local_err = NULL;
vector->vdev = vdev;
vector->virq = -1;
if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", nr,
&local_err)) {
error_report_err(local_err);
}
vector->use = true;
if (vdev->interrupt == VFIO_INT_MSIX) {
msix_vector_use(pdev, nr);
}
}
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
MSIMessage *msg, IOHandler *handler)
{
@ -543,13 +585,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
vector = &vdev->msi_vectors[nr];
if (!vector->use) {
vector->vdev = vdev;
vector->virq = -1;
if (event_notifier_init(&vector->interrupt, 0)) {
error_report("vfio: Error: event_notifier_init failed");
}
vector->use = true;
msix_vector_use(pdev, nr);
vfio_pci_vector_init(vdev, nr);
}
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
@ -561,19 +597,19 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
*/
if (vector->virq >= 0) {
if (!msg) {
vfio_remove_kvm_msi_virq(vector);
vfio_remove_kvm_msi_virq(vdev, vector, nr);
} else {
vfio_update_kvm_msi_virq(vector, *msg, pdev);
}
} else {
if (msg) {
if (vdev->defer_kvm_irq_routing) {
vfio_add_kvm_msi_virq(vdev, vector, nr, true);
vfio_pci_add_kvm_msi_virq(vdev, vector, nr, true);
} else {
vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state);
vfio_add_kvm_msi_virq(vdev, vector, nr, true);
vfio_pci_add_kvm_msi_virq(vdev, vector, nr, true);
kvm_irqchip_commit_route_changes(&vfio_route_change);
vfio_connect_kvm_msi_virq(vector);
vfio_connect_kvm_msi_virq(vector, nr);
}
}
}
@ -650,14 +686,14 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
}
}
static void vfio_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
{
assert(!vdev->defer_kvm_irq_routing);
vdev->defer_kvm_irq_routing = true;
vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state);
}
static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
{
int i;
@ -667,7 +703,7 @@ static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
kvm_irqchip_commit_route_changes(&vfio_route_change);
for (i = 0; i < vdev->nr_vectors; i++) {
vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i]);
vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i], i);
}
}
@ -687,14 +723,14 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
* routes once rather than per vector provides a substantial
* performance improvement.
*/
vfio_prepare_kvm_msi_virq_batch(vdev);
vfio_pci_prepare_kvm_msi_virq_batch(vdev);
if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
vfio_msix_vector_release, NULL)) {
error_report("vfio: msix_set_vector_notifiers failed");
}
vfio_commit_kvm_msi_virq_batch(vdev);
vfio_pci_commit_kvm_msi_virq_batch(vdev);
if (vdev->nr_vectors) {
ret = vfio_enable_vectors(vdev, true);
@ -738,19 +774,21 @@ retry:
* Deferring to commit the KVM routes once rather than per vector
* provides a substantial performance improvement.
*/
vfio_prepare_kvm_msi_virq_batch(vdev);
vfio_pci_prepare_kvm_msi_virq_batch(vdev);
vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors);
for (i = 0; i < vdev->nr_vectors; i++) {
VFIOMSIVector *vector = &vdev->msi_vectors[i];
Error *local_err = NULL;
vector->vdev = vdev;
vector->virq = -1;
vector->use = true;
if (event_notifier_init(&vector->interrupt, 0)) {
error_report("vfio: Error: event_notifier_init failed");
if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", i,
&local_err)) {
error_report_err(local_err);
}
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
@ -760,10 +798,10 @@ retry:
* Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable.
*/
vfio_add_kvm_msi_virq(vdev, vector, i, false);
vfio_pci_add_kvm_msi_virq(vdev, vector, i, false);
}
vfio_commit_kvm_msi_virq_batch(vdev);
vfio_pci_commit_kvm_msi_virq_batch(vdev);
/* Set interrupt type prior to possible interrupts */
vdev->interrupt = VFIO_INT_MSI;
@ -806,11 +844,11 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev)
VFIOMSIVector *vector = &vdev->msi_vectors[i];
if (vdev->msi_vectors[i].use) {
if (vector->virq >= 0) {
vfio_remove_kvm_msi_virq(vector);
vfio_remove_kvm_msi_virq(vdev, vector, i);
}
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
NULL, NULL, NULL);
event_notifier_cleanup(&vector->interrupt);
vfio_notifier_cleanup(vdev, &vector->interrupt, "interrupt", i);
}
}
@ -989,7 +1027,7 @@ static int vfio_pci_config_space_write(VFIOPCIDevice *vdev, off_t offset,
{
return vdev->vbasedev.io_ops->region_write(&vdev->vbasedev,
VFIO_PCI_CONFIG_REGION_INDEX,
offset, size, data);
offset, size, data, false);
}
static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
@ -1743,7 +1781,7 @@ static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
return true;
}
static void vfio_teardown_msi(VFIOPCIDevice *vdev)
void vfio_pci_teardown_msi(VFIOPCIDevice *vdev)
{
msi_uninit(&vdev->pdev);
@ -1793,6 +1831,9 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr)
bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
~PCI_BASE_ADDRESS_MEM_MASK);
bar->size = bar->region.size;
/* IO regions are sync, memory can be async */
bar->region.post_wr = (bar->ioport == 0);
}
static void vfio_bars_prepare(VFIOPCIDevice *vdev)
@ -1839,7 +1880,7 @@ static void vfio_bars_register(VFIOPCIDevice *vdev)
}
}
static void vfio_bars_exit(VFIOPCIDevice *vdev)
void vfio_pci_bars_exit(VFIOPCIDevice *vdev)
{
int i;
@ -2430,7 +2471,7 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
g_free(config);
}
static bool vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
@ -2706,7 +2747,7 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
static VFIODeviceOps vfio_pci_ops = {
.vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
.vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
.vfio_eoi = vfio_intx_eoi,
.vfio_eoi = vfio_pci_intx_eoi,
.vfio_get_object = vfio_pci_get_object,
.vfio_save_config = vfio_pci_save_config,
.vfio_load_config = vfio_pci_load_config,
@ -2777,7 +2818,7 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
return true;
}
static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp)
{
VFIODevice *vbasedev = &vdev->vbasedev;
struct vfio_region_info *reg_info = NULL;
@ -2823,7 +2864,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
return false;
}
trace_vfio_populate_device_config(vdev->vbasedev.name,
trace_vfio_pci_populate_device_config(vdev->vbasedev.name,
(unsigned long)reg_info->size,
(unsigned long)reg_info->offset,
(unsigned long)reg_info->flags);
@ -2845,7 +2886,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info);
if (ret) {
/* This can fail for an old kernel or legacy PCI dev */
trace_vfio_populate_device_get_irq_info_failure(strerror(-ret));
trace_vfio_pci_populate_device_get_irq_info_failure(strerror(-ret));
} else if (irq_info.count == 1) {
vdev->pci_aer = true;
} else {
@ -2857,7 +2898,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
return true;
}
static void vfio_pci_put_device(VFIOPCIDevice *vdev)
void vfio_pci_put_device(VFIOPCIDevice *vdev)
{
vfio_display_finalize(vdev);
vfio_bars_finalize(vdev);
@ -2905,7 +2946,7 @@ static void vfio_err_notifier_handler(void *opaque)
* and continue after disabling error recovery support for the
* device.
*/
static void vfio_register_err_notifier(VFIOPCIDevice *vdev)
void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev)
{
Error *err = NULL;
int32_t fd;
@ -2914,8 +2955,9 @@ static void vfio_register_err_notifier(VFIOPCIDevice *vdev)
return;
}
if (event_notifier_init(&vdev->err_notifier, 0)) {
error_report("vfio: Unable to init event notifier for error detection");
if (!vfio_notifier_init(vdev, &vdev->err_notifier, "err_notifier", 0,
&err)) {
error_report_err(err);
vdev->pci_aer = false;
return;
}
@ -2927,7 +2969,7 @@ static void vfio_register_err_notifier(VFIOPCIDevice *vdev)
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
qemu_set_fd_handler(fd, NULL, NULL, vdev);
event_notifier_cleanup(&vdev->err_notifier);
vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0);
vdev->pci_aer = false;
}
}
@ -2946,7 +2988,7 @@ static void vfio_unregister_err_notifier(VFIOPCIDevice *vdev)
}
qemu_set_fd_handler(event_notifier_get_fd(&vdev->err_notifier),
NULL, NULL, vdev);
event_notifier_cleanup(&vdev->err_notifier);
vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0);
}
static void vfio_req_notifier_handler(void *opaque)
@ -2964,7 +3006,7 @@ static void vfio_req_notifier_handler(void *opaque)
}
}
static void vfio_register_req_notifier(VFIOPCIDevice *vdev)
void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev)
{
struct vfio_irq_info irq_info;
Error *err = NULL;
@ -2981,8 +3023,9 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev)
return;
}
if (event_notifier_init(&vdev->req_notifier, 0)) {
error_report("vfio: Unable to init event notifier for device request");
if (!vfio_notifier_init(vdev, &vdev->req_notifier, "req_notifier", 0,
&err)) {
error_report_err(err);
return;
}
@ -2993,7 +3036,7 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev)
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
qemu_set_fd_handler(fd, NULL, NULL, vdev);
event_notifier_cleanup(&vdev->req_notifier);
vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0);
} else {
vdev->req_enabled = true;
}
@ -3013,12 +3056,12 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
}
qemu_set_fd_handler(event_notifier_get_fd(&vdev->req_notifier),
NULL, NULL, vdev);
event_notifier_cleanup(&vdev->req_notifier);
vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0);
vdev->req_enabled = false;
}
static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
VFIODevice *vbasedev = &vdev->vbasedev;
@ -3124,7 +3167,7 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
return true;
}
static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
@ -3214,7 +3257,7 @@ static void vfio_pci_realize(PCIDevice *pdev, Error **errp)
goto error;
}
if (!vfio_populate_device(vdev, errp)) {
if (!vfio_pci_populate_device(vdev, errp)) {
goto error;
}
@ -3228,7 +3271,7 @@ static void vfio_pci_realize(PCIDevice *pdev, Error **errp)
goto out_teardown;
}
if (!vfio_add_capabilities(vdev, errp)) {
if (!vfio_pci_add_capabilities(vdev, errp)) {
goto out_unset_idev;
}
@ -3244,7 +3287,7 @@ static void vfio_pci_realize(PCIDevice *pdev, Error **errp)
vfio_bar_quirk_setup(vdev, i);
}
if (!vfio_interrupt_setup(vdev, errp)) {
if (!vfio_pci_interrupt_setup(vdev, errp)) {
goto out_unset_idev;
}
@ -3288,8 +3331,8 @@ static void vfio_pci_realize(PCIDevice *pdev, Error **errp)
}
}
vfio_register_err_notifier(vdev);
vfio_register_req_notifier(vdev);
vfio_pci_register_err_notifier(vdev);
vfio_pci_register_req_notifier(vdev);
vfio_setup_resetfn_quirk(vdev);
return;
@ -3310,8 +3353,8 @@ out_unset_idev:
pci_device_unset_iommu_device(pdev);
}
out_teardown:
vfio_teardown_msi(vdev);
vfio_bars_exit(vdev);
vfio_pci_teardown_msi(vdev);
vfio_pci_bars_exit(vdev);
error:
error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
}
@ -3338,9 +3381,9 @@ static void vfio_exitfn(PCIDevice *pdev)
if (vdev->intx.mmap_timer) {
timer_free(vdev->intx.mmap_timer);
}
vfio_teardown_msi(vdev);
vfio_pci_teardown_msi(vdev);
vfio_pci_disable_rp_atomics(vdev);
vfio_bars_exit(vdev);
vfio_pci_bars_exit(vdev);
vfio_migration_exit(vbasedev);
if (!vbasedev->mdev) {
pci_device_unset_iommu_device(pdev);
@ -3351,6 +3394,11 @@ static void vfio_pci_reset(DeviceState *dev)
{
VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev);
/* Do not reset the device during qemu_system_reset prior to cpr load */
if (cpr_is_incoming()) {
return;
}
trace_vfio_pci_reset(vdev->vbasedev.name);
vfio_pci_pre_reset(vdev);
@ -3408,6 +3456,13 @@ static void vfio_instance_init(Object *obj)
/* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
* line, therefore, no need to wait to realize like other devices */
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
/*
* A device that is resuming for cpr is already configured, so do not
* reset it during qemu_system_reset prior to cpr load, else interrupts
* may be lost.
*/
pci_dev->cap_present |= QEMU_PCI_SKIP_RESET_ON_CPR;
}
static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data)
@ -3425,7 +3480,7 @@ static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data)
static const TypeInfo vfio_pci_base_dev_info = {
.name = TYPE_VFIO_PCI_BASE,
.parent = TYPE_PCI_DEVICE,
.instance_size = 0,
.instance_size = sizeof(VFIOPCIDevice),
.abstract = true,
.class_init = vfio_pci_base_dev_class_init,
.interfaces = (const InterfaceInfo[]) {
@ -3520,6 +3575,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
#ifdef CONFIG_IOMMUFD
object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
#endif
dc->vmsd = &vfio_cpr_pci_vmstate;
dc->desc = "VFIO-based PCI device assignment";
pdc->realize = vfio_pci_realize;
@ -3647,7 +3703,6 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
static const TypeInfo vfio_pci_dev_info = {
.name = TYPE_VFIO_PCI,
.parent = TYPE_VFIO_PCI_BASE,
.instance_size = sizeof(VFIOPCIDevice),
.class_init = vfio_pci_dev_class_init,
.instance_init = vfio_instance_init,
.instance_finalize = vfio_instance_finalize,

View file

@ -210,6 +210,14 @@ static inline bool vfio_is_vga(VFIOPCIDevice *vdev)
return class == PCI_CLASS_DISPLAY_VGA;
}
/* MSI/MSI-X/INTx */
void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr);
void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
int vector_n, bool msix);
void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev);
void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev);
bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp);
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr, uint32_t val, int len);
@ -248,4 +256,15 @@ void vfio_display_finalize(VFIOPCIDevice *vdev);
extern const VMStateDescription vfio_display_vmstate;
void vfio_pci_bars_exit(VFIOPCIDevice *vdev);
bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp);
bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp);
bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp);
void vfio_pci_intx_eoi(VFIODevice *vbasedev);
void vfio_pci_put_device(VFIOPCIDevice *vdev);
bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp);
void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev);
void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev);
void vfio_pci_teardown_msi(VFIOPCIDevice *vdev);
#endif /* HW_VFIO_VFIO_PCI_H */

View file

@ -66,7 +66,7 @@ void vfio_region_write(void *opaque, hwaddr addr,
}
ret = vbasedev->io_ops->region_write(vbasedev, region->nr,
addr, size, &buf);
addr, size, &buf, region->post_wr);
if (ret != size) {
error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
",%d) failed: %s",
@ -200,6 +200,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
region->size = info->size;
region->fd_offset = info->offset;
region->nr = index;
region->post_wr = false;
if (region->size) {
region->mem = g_new0(MemoryRegion, 1);
@ -241,6 +242,7 @@ int vfio_region_mmap(VFIORegion *region)
{
int i, ret, prot = 0;
char *name;
int fd;
if (!region->mem) {
return 0;
@ -271,14 +273,18 @@ int vfio_region_mmap(VFIORegion *region)
goto no_mmap;
}
/* Use the per-region fd if set, or the shared fd. */
fd = region->vbasedev->region_fds ?
region->vbasedev->region_fds[region->nr] :
region->vbasedev->fd,
map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
munmap(map_base, map_align - map_base);
munmap(map_align + region->mmaps[i].size,
align - (map_align - map_base));
region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
MAP_SHARED | MAP_FIXED,
region->vbasedev->fd,
MAP_SHARED | MAP_FIXED, fd,
region->fd_offset +
region->mmaps[i].offset);
if (region->mmaps[i].mmap == MAP_FAILED) {

View file

@ -2,7 +2,7 @@
# pci.c
vfio_intx_interrupt(const char *name, char line) " (%s) Pin %c"
vfio_intx_eoi(const char *name) " (%s) EOI"
vfio_pci_intx_eoi(const char *name) " (%s) EOI"
vfio_intx_enable_kvm(const char *name) " (%s) KVM INTx accel enabled"
vfio_intx_disable_kvm(const char *name) " (%s) KVM INTx accel disabled"
vfio_intx_update(const char *name, int new_irq, int target_irq) " (%s) IRQ moved %d -> %d"
@ -35,8 +35,8 @@ vfio_pci_hot_reset(const char *name, const char *type) " (%s) %s"
vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent devices:"
vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d"
vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device '%s' config: size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
vfio_pci_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device '%s' config: size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
vfio_pci_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
vfio_pci_reset(const char *name) " (%s)"

View file

@ -32,6 +32,7 @@ int msix_present(PCIDevice *dev);
bool msix_is_masked(PCIDevice *dev, unsigned vector);
void msix_set_pending(PCIDevice *dev, unsigned vector);
void msix_clr_pending(PCIDevice *dev, int vector);
int msix_is_pending(PCIDevice *dev, unsigned vector);
void msix_vector_use(PCIDevice *dev, unsigned vector);
void msix_vector_unuse(PCIDevice *dev, unsigned vector);

View file

@ -222,6 +222,8 @@ enum {
QEMU_PCIE_EXT_TAG = (1 << QEMU_PCIE_EXT_TAG_BITNR),
#define QEMU_PCI_CAP_PM_BITNR 14
QEMU_PCI_CAP_PM = (1 << QEMU_PCI_CAP_PM_BITNR),
#define QEMU_PCI_SKIP_RESET_ON_CPR_BITNR 15
QEMU_PCI_SKIP_RESET_ON_CPR = (1 << QEMU_PCI_SKIP_RESET_ON_CPR_BITNR),
};
typedef struct PCIINTxRoute {

View file

@ -16,4 +16,43 @@
void s390_init_ap(void);
typedef struct ChscSeiNt0Res {
uint16_t length;
uint16_t code;
uint8_t reserved1;
uint16_t reserved2;
uint8_t nt;
#define PENDING_EVENT_INFO_BITMASK 0x80;
uint8_t flags;
uint8_t reserved3;
uint8_t rs;
uint8_t cc;
} QEMU_PACKED ChscSeiNt0Res;
#define NT0_RES_RESPONSE_CODE 1
#define NT0_RES_NT_DEFAULT 0
#define NT0_RES_RS_AP_CHANGE 5
#define NT0_RES_CC_AP_CHANGE 3
#define EVENT_INFORMATION_NOT_STORED 1
#define EVENT_INFORMATION_STORED 0
/**
* ap_chsc_sei_nt0_get_event - Retrieve the next pending AP config
* change event
* @res: Pointer to a ChscSeiNt0Res struct to be filled with event
* data
*
* This function checks for any pending AP config change events and,
* if present, populates the provided response structure with the
* appropriate SEI NT0 fields.
*
* Return:
* EVENT_INFORMATION_STORED - An event was available and written to @res
* EVENT_INFORMATION_NOT_STORED - No event was available
*/
int ap_chsc_sei_nt0_get_event(void *res);
bool ap_chsc_sei_nt0_have_event(void);
#endif

View file

@ -256,4 +256,7 @@ struct VFIOIOMMUClass {
VFIORamDiscardListener *vfio_find_ram_discard_listener(
VFIOContainerBase *bcontainer, MemoryRegionSection *section);
void vfio_container_region_add(VFIOContainerBase *bcontainer,
MemoryRegionSection *section, bool cpr_remap);
#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */

View file

@ -10,6 +10,7 @@
#define HW_VFIO_CONTAINER_H
#include "hw/vfio/vfio-container-base.h"
#include "hw/vfio/vfio-cpr.h"
typedef struct VFIOContainer VFIOContainer;
typedef struct VFIODevice VFIODevice;
@ -29,6 +30,7 @@ typedef struct VFIOContainer {
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
unsigned iommu_type;
QLIST_HEAD(, VFIOGroup) group_list;
VFIOContainerCPR cpr;
} VFIOContainer;
OBJECT_DECLARE_SIMPLE_TYPE(VFIOContainer, VFIO_IOMMU_LEGACY);

View file

@ -9,10 +9,49 @@
#ifndef HW_VFIO_VFIO_CPR_H
#define HW_VFIO_VFIO_CPR_H
#include "migration/misc.h"
#include "system/memory.h"
struct VFIOContainer;
struct VFIOContainerBase;
struct VFIOGroup;
typedef struct VFIOContainerCPR {
Error *blocker;
bool vaddr_unmapped;
NotifierWithReturn transfer_notifier;
MemoryListener remap_listener;
int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
void *vaddr, bool readonly, MemoryRegion *mr);
} VFIOContainerCPR;
typedef struct VFIODeviceCPR {
Error *mdev_blocker;
} VFIODeviceCPR;
bool vfio_legacy_cpr_register_container(struct VFIOContainer *container,
Error **errp);
void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container);
int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e,
Error **errp);
bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer,
Error **errp);
void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);
int vfio_cpr_group_get_device_fd(int d, const char *name);
bool vfio_cpr_container_match(struct VFIOContainer *container,
struct VFIOGroup *group, int fd);
void vfio_cpr_giommu_remap(struct VFIOContainerBase *bcontainer,
MemoryRegionSection *section);
bool vfio_cpr_ram_discard_register_listener(
struct VFIOContainerBase *bcontainer, MemoryRegionSection *section);
extern const VMStateDescription vfio_cpr_pci_vmstate;
#endif /* HW_VFIO_VFIO_CPR_H */

View file

@ -28,6 +28,7 @@
#endif
#include "system/system.h"
#include "hw/vfio/vfio-container-base.h"
#include "hw/vfio/vfio-cpr.h"
#include "system/host_iommu_device.h"
#include "system/iommufd.h"
@ -66,6 +67,7 @@ typedef struct VFIODevice {
OnOffAuto enable_migration;
OnOffAuto migration_multifd_transfer;
bool migration_events;
bool use_region_fds;
VFIODeviceOps *ops;
VFIODeviceIOOps *io_ops;
unsigned int num_irqs;
@ -84,6 +86,8 @@ typedef struct VFIODevice {
VFIOIOASHwpt *hwpt;
QLIST_ENTRY(VFIODevice) hwpt_next;
struct vfio_region_info **reginfo;
int *region_fds;
VFIODeviceCPR cpr;
} VFIODevice;
struct VFIODeviceOps {
@ -164,36 +168,64 @@ struct VFIODeviceIOOps {
* @device_feature
*
* Fill in feature info for the given device.
*
* @vdev: #VFIODevice to use
* @feat: feature information to fill in
*
* Returns 0 on success or -errno.
*/
int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *);
int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *feat);
/**
* @get_region_info
*
* Fill in @info with information on the region given by @info->index.
* Get the information for a given region on the device.
*
* @vdev: #VFIODevice to use
* @info: set @info->index to the region index to look up; the rest of the
* struct will be filled in on success
* @fd: pointer to the fd for the region; will be -1 if not found
*
* Returns 0 on success or -errno.
*/
int (*get_region_info)(VFIODevice *vdev,
struct vfio_region_info *info);
struct vfio_region_info *info, int *fd);
/**
* @get_irq_info
*
* Fill in @irq with information on the IRQ given by @info->index.
* @vdev: #VFIODevice to use
* @irq: set @irq->index to the IRQ index to look up; the rest of the struct
* will be filled in on success
*
* Returns 0 on success or -errno.
*/
int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq);
/**
* @set_irqs
*
* Configure IRQs as defined by @irqs.
* Configure IRQs.
*
* @vdev: #VFIODevice to use
* @irqs: IRQ configuration as defined by VFIO docs.
*
* Returns 0 on success or -errno.
*/
int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs);
/**
* @region_read
*
* Read @size bytes from the region @nr at offset @off into the buffer
* @data.
* Read part of a region.
*
* @vdev: #VFIODevice to use
* @nr: region index
* @off: offset within the region
* @size: size in bytes to read
* @data: buffer to read into
*
* Returns number of bytes read on success or -errno.
*/
int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
void *data);
@ -201,11 +233,18 @@ struct VFIODeviceIOOps {
/**
* @region_write
*
* Write @size bytes to the region @nr at offset @off from the buffer
* @data.
* Write part of a region.
*
* @vdev: #VFIODevice to use
* @nr: region index
* @off: offset within the region
* @size: size in bytes to write
* @data: buffer to write from
*
* Returns number of bytes write on success or -errno.
*/
int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
void *data);
void *data, bool post);
};
void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,

View file

@ -29,6 +29,7 @@ typedef struct VFIORegion {
uint32_t nr_mmaps;
VFIOMmap *mmaps;
uint8_t nr; /* cache the region number for debug */
bool post_wr; /* writes can be posted */
} VFIORegion;

View file

@ -18,6 +18,9 @@
void cpr_save_fd(const char *name, int id, int fd);
void cpr_delete_fd(const char *name, int id);
int cpr_find_fd(const char *name, int id);
void cpr_resave_fd(const char *name, int id, int fd);
int cpr_open_fd(const char *path, int flags, const char *name, int id,
Error **errp);
MigMode cpr_get_incoming_mode(void);
void cpr_set_incoming_mode(MigMode mode);
@ -28,6 +31,8 @@ int cpr_state_load(MigrationChannel *channel, Error **errp);
void cpr_state_close(void);
struct QIOChannel *cpr_state_ioc(void);
bool cpr_incoming_needed(void *opaque);
QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp);

View file

@ -155,7 +155,11 @@ enum VMStateFlags {
};
typedef enum {
MIG_PRI_DEFAULT = 0,
MIG_PRI_UNINITIALIZED = 0, /* An uninitialized priority field maps to */
/* MIG_PRI_DEFAULT in save_state_priority */
MIG_PRI_LOW, /* Must happen after default */
MIG_PRI_DEFAULT,
MIG_PRI_IOMMU, /* Must happen before PCI devices */
MIG_PRI_PCI_BUS, /* Must happen before IOMMU */
MIG_PRI_VIRTIO_MEM, /* Must happen before IOMMU */

View file

@ -95,6 +95,36 @@ int cpr_find_fd(const char *name, int id)
trace_cpr_find_fd(name, id, fd);
return fd;
}
void cpr_resave_fd(const char *name, int id, int fd)
{
CprFd *elem = find_fd(&cpr_state.fds, name, id);
int old_fd = elem ? elem->fd : -1;
if (old_fd < 0) {
cpr_save_fd(name, id, fd);
} else if (old_fd != fd) {
error_setg(&error_fatal,
"internal error: cpr fd '%s' id %d value %d "
"already saved with a different value %d",
name, id, fd, old_fd);
}
}
int cpr_open_fd(const char *path, int flags, const char *name, int id,
Error **errp)
{
int fd = cpr_find_fd(name, id);
if (fd < 0) {
fd = qemu_open(path, flags, errp);
if (fd >= 0) {
cpr_save_fd(name, id, fd);
}
}
return fd;
}
/*************************************************************************/
#define CPR_STATE "CprState"
@ -228,3 +258,9 @@ void cpr_state_close(void)
cpr_state_file = NULL;
}
}
bool cpr_incoming_needed(void *opaque)
{
MigMode mode = migrate_mode();
return mode == MIG_MODE_CPR_TRANSFER;
}

View file

@ -266,7 +266,7 @@ typedef struct SaveState {
static SaveState savevm_state = {
.handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
.handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL },
.handler_pri_head = { [0 ... MIG_PRI_MAX] = NULL },
.global_section_id = 0,
};
@ -737,7 +737,7 @@ static int calculate_compat_instance_id(const char *idstr)
static inline MigrationPriority save_state_priority(SaveStateEntry *se)
{
if (se->vmsd) {
if (se->vmsd && se->vmsd->priority) {
return se->vmsd->priority;
}
return MIG_PRI_DEFAULT;

View file

@ -18,6 +18,7 @@
#include "trace.h"
#include "hw/s390x/s390-pci-bus.h"
#include "target/s390x/kvm/pv.h"
#include "hw/s390x/ap-bridge.h"
/* All I/O instructions but chsc use the s format */
static uint64_t get_address_from_regs(CPUS390XState *env, uint32_t ipb,
@ -574,13 +575,19 @@ out:
static int chsc_sei_nt0_get_event(void *res)
{
/* no events yet */
if (s390_has_feat(S390_FEAT_AP)) {
return ap_chsc_sei_nt0_get_event(res);
}
return 1;
}
static int chsc_sei_nt0_have_event(void)
{
/* no events yet */
if (s390_has_feat(S390_FEAT_AP)) {
return ap_chsc_sei_nt0_have_event();
}
return 0;
}