mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-07-28 21:03:54 -06:00

For vfio-user, device operations such as IRQ handling and region read/writes are implemented in userspace over the control socket, not ioctl() to the vfio kernel driver; add an ops vector to generalize this, and implement vfio_device_io_ops_ioctl for interacting with the kernel vfio driver. Originally-by: John Johnson <john.g.johnson@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> Signed-off-by: John Levon <john.levon@nutanix.com> Reviewed-by: Cédric Le Goater <clg@redhat.com> Link: https://lore.kernel.org/qemu-devel/20250507152020.1254632-11-john.levon@nutanix.com Signed-off-by: Cédric Le Goater <clg@redhat.com>
338 lines
9.7 KiB
C
338 lines
9.7 KiB
C
/*
|
|
* VFIO BASE CONTAINER
|
|
*
|
|
* Copyright (C) 2023 Intel Corporation.
|
|
* Copyright Red Hat, Inc. 2023
|
|
*
|
|
* Authors: Yi Liu <yi.l.liu@intel.com>
|
|
* Eric Auger <eric.auger@redhat.com>
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
|
*/
|
|
|
|
#include <sys/ioctl.h>
|
|
#include <linux/vfio.h>
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "system/tcg.h"
|
|
#include "system/ram_addr.h"
|
|
#include "qapi/error.h"
|
|
#include "qemu/error-report.h"
|
|
#include "hw/vfio/vfio-container-base.h"
|
|
#include "hw/vfio/vfio-device.h" /* vfio_device_reset_handler */
|
|
#include "system/reset.h"
|
|
#include "vfio-helpers.h"
|
|
|
|
#include "trace.h"
|
|
|
|
static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces =
|
|
QLIST_HEAD_INITIALIZER(vfio_address_spaces);
|
|
|
|
VFIOAddressSpace *vfio_address_space_get(AddressSpace *as)
|
|
{
|
|
VFIOAddressSpace *space;
|
|
|
|
QLIST_FOREACH(space, &vfio_address_spaces, list) {
|
|
if (space->as == as) {
|
|
return space;
|
|
}
|
|
}
|
|
|
|
/* No suitable VFIOAddressSpace, create a new one */
|
|
space = g_malloc0(sizeof(*space));
|
|
space->as = as;
|
|
QLIST_INIT(&space->containers);
|
|
|
|
if (QLIST_EMPTY(&vfio_address_spaces)) {
|
|
qemu_register_reset(vfio_device_reset_handler, NULL);
|
|
}
|
|
|
|
QLIST_INSERT_HEAD(&vfio_address_spaces, space, list);
|
|
|
|
return space;
|
|
}
|
|
|
|
void vfio_address_space_put(VFIOAddressSpace *space)
|
|
{
|
|
if (!QLIST_EMPTY(&space->containers)) {
|
|
return;
|
|
}
|
|
|
|
QLIST_REMOVE(space, list);
|
|
g_free(space);
|
|
|
|
if (QLIST_EMPTY(&vfio_address_spaces)) {
|
|
qemu_unregister_reset(vfio_device_reset_handler, NULL);
|
|
}
|
|
}
|
|
|
|
void vfio_address_space_insert(VFIOAddressSpace *space,
|
|
VFIOContainerBase *bcontainer)
|
|
{
|
|
QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
|
|
bcontainer->space = space;
|
|
}
|
|
|
|
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
|
|
hwaddr iova, ram_addr_t size,
|
|
void *vaddr, bool readonly)
|
|
{
|
|
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
|
|
|
|
g_assert(vioc->dma_map);
|
|
return vioc->dma_map(bcontainer, iova, size, vaddr, readonly);
|
|
}
|
|
|
|
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
|
hwaddr iova, ram_addr_t size,
|
|
IOMMUTLBEntry *iotlb, bool unmap_all)
|
|
{
|
|
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
|
|
|
|
g_assert(vioc->dma_unmap);
|
|
return vioc->dma_unmap(bcontainer, iova, size, iotlb, unmap_all);
|
|
}
|
|
|
|
bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
|
|
MemoryRegionSection *section,
|
|
Error **errp)
|
|
{
|
|
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
|
|
|
|
if (!vioc->add_window) {
|
|
return true;
|
|
}
|
|
|
|
return vioc->add_window(bcontainer, section, errp);
|
|
}
|
|
|
|
void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
|
|
MemoryRegionSection *section)
|
|
{
|
|
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
|
|
|
|
if (!vioc->del_window) {
|
|
return;
|
|
}
|
|
|
|
return vioc->del_window(bcontainer, section);
|
|
}
|
|
|
|
int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
|
bool start, Error **errp)
|
|
{
|
|
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
|
|
int ret;
|
|
|
|
if (!bcontainer->dirty_pages_supported) {
|
|
return 0;
|
|
}
|
|
|
|
g_assert(vioc->set_dirty_page_tracking);
|
|
if (bcontainer->dirty_pages_started == start) {
|
|
return 0;
|
|
}
|
|
|
|
ret = vioc->set_dirty_page_tracking(bcontainer, start, errp);
|
|
if (!ret) {
|
|
bcontainer->dirty_pages_started = start;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static bool vfio_container_devices_dirty_tracking_is_started(
|
|
const VFIOContainerBase *bcontainer)
|
|
{
|
|
VFIODevice *vbasedev;
|
|
|
|
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
|
if (!vbasedev->dirty_tracking) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool vfio_container_dirty_tracking_is_started(
|
|
const VFIOContainerBase *bcontainer)
|
|
{
|
|
return vfio_container_devices_dirty_tracking_is_started(bcontainer) ||
|
|
bcontainer->dirty_pages_started;
|
|
}
|
|
|
|
bool vfio_container_devices_dirty_tracking_is_supported(
|
|
const VFIOContainerBase *bcontainer)
|
|
{
|
|
VFIODevice *vbasedev;
|
|
|
|
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
|
if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) {
|
|
return false;
|
|
}
|
|
if (!vbasedev->dirty_pages_supported) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
|
|
hwaddr size, void *bitmap)
|
|
{
|
|
uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
|
|
sizeof(struct vfio_device_feature_dma_logging_report),
|
|
sizeof(uint64_t))] = {};
|
|
struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
|
|
struct vfio_device_feature_dma_logging_report *report =
|
|
(struct vfio_device_feature_dma_logging_report *)feature->data;
|
|
|
|
report->iova = iova;
|
|
report->length = size;
|
|
report->page_size = qemu_real_host_page_size();
|
|
report->bitmap = (uintptr_t)bitmap;
|
|
|
|
feature->argsz = sizeof(buf);
|
|
feature->flags = VFIO_DEVICE_FEATURE_GET |
|
|
VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
|
|
|
|
return vbasedev->io_ops->device_feature(vbasedev, feature);
|
|
}
|
|
|
|
static int vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
|
VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
|
|
{
|
|
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
|
|
|
|
g_assert(vioc->query_dirty_bitmap);
|
|
return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size,
|
|
errp);
|
|
}
|
|
|
|
static int vfio_container_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
|
VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
|
|
{
|
|
VFIODevice *vbasedev;
|
|
int ret;
|
|
|
|
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
|
ret = vfio_device_dma_logging_report(vbasedev, iova, size,
|
|
vbmap->bitmap);
|
|
if (ret) {
|
|
error_setg_errno(errp, -ret,
|
|
"%s: Failed to get DMA logging report, iova: "
|
|
"0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx,
|
|
vbasedev->name, iova, size);
|
|
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
|
|
uint64_t size, ram_addr_t ram_addr, Error **errp)
|
|
{
|
|
bool all_device_dirty_tracking =
|
|
vfio_container_devices_dirty_tracking_is_supported(bcontainer);
|
|
uint64_t dirty_pages;
|
|
VFIOBitmap vbmap;
|
|
int ret;
|
|
|
|
if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) {
|
|
cpu_physical_memory_set_dirty_range(ram_addr, size,
|
|
tcg_enabled() ? DIRTY_CLIENTS_ALL :
|
|
DIRTY_CLIENTS_NOCODE);
|
|
return 0;
|
|
}
|
|
|
|
ret = vfio_bitmap_alloc(&vbmap, size);
|
|
if (ret) {
|
|
error_setg_errno(errp, -ret,
|
|
"Failed to allocate dirty tracking bitmap");
|
|
return ret;
|
|
}
|
|
|
|
if (all_device_dirty_tracking) {
|
|
ret = vfio_container_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
|
|
errp);
|
|
} else {
|
|
ret = vfio_container_iommu_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
|
|
errp);
|
|
}
|
|
|
|
if (ret) {
|
|
goto out;
|
|
}
|
|
|
|
dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr,
|
|
vbmap.pages);
|
|
|
|
trace_vfio_container_query_dirty_bitmap(iova, size, vbmap.size, ram_addr,
|
|
dirty_pages);
|
|
out:
|
|
g_free(vbmap.bitmap);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static gpointer copy_iova_range(gconstpointer src, gpointer data)
|
|
{
|
|
Range *source = (Range *)src;
|
|
Range *dest = g_new(Range, 1);
|
|
|
|
range_set_bounds(dest, range_lob(source), range_upb(source));
|
|
return dest;
|
|
}
|
|
|
|
GList *vfio_container_get_iova_ranges(const VFIOContainerBase *bcontainer)
|
|
{
|
|
assert(bcontainer);
|
|
return g_list_copy_deep(bcontainer->iova_ranges, copy_iova_range, NULL);
|
|
}
|
|
|
|
static void vfio_container_instance_finalize(Object *obj)
|
|
{
|
|
VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
|
|
VFIOGuestIOMMU *giommu, *tmp;
|
|
|
|
QLIST_SAFE_REMOVE(bcontainer, next);
|
|
|
|
QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) {
|
|
memory_region_unregister_iommu_notifier(
|
|
MEMORY_REGION(giommu->iommu_mr), &giommu->n);
|
|
QLIST_REMOVE(giommu, giommu_next);
|
|
g_free(giommu);
|
|
}
|
|
|
|
g_list_free_full(bcontainer->iova_ranges, g_free);
|
|
}
|
|
|
|
static void vfio_container_instance_init(Object *obj)
|
|
{
|
|
VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
|
|
|
|
bcontainer->error = NULL;
|
|
bcontainer->dirty_pages_supported = false;
|
|
bcontainer->dma_max_mappings = 0;
|
|
bcontainer->iova_ranges = NULL;
|
|
QLIST_INIT(&bcontainer->giommu_list);
|
|
QLIST_INIT(&bcontainer->vrdl_list);
|
|
}
|
|
|
|
static const TypeInfo types[] = {
|
|
{
|
|
.name = TYPE_VFIO_IOMMU,
|
|
.parent = TYPE_OBJECT,
|
|
.instance_init = vfio_container_instance_init,
|
|
.instance_finalize = vfio_container_instance_finalize,
|
|
.instance_size = sizeof(VFIOContainerBase),
|
|
.class_size = sizeof(VFIOIOMMUClass),
|
|
.abstract = true,
|
|
},
|
|
};
|
|
|
|
DEFINE_TYPES(types)
|