vfio: add per-region fd support

For vfio-user, each region has its own fd rather than sharing
vbasedev's. Add the necessary plumbing to support this, and use the
correct fd in vfio_region_mmap().

Signed-off-by: John Levon <john.levon@nutanix.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250607001056.335310-4-john.levon@nutanix.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
This commit is contained in:
John Levon 2025-06-06 17:10:35 -07:00 committed by Cédric Le Goater
parent 7163c0bca7
commit 59adfc6f18
3 changed files with 37 additions and 8 deletions

View file

@ -200,6 +200,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info **info) struct vfio_region_info **info)
{ {
size_t argsz = sizeof(struct vfio_region_info); size_t argsz = sizeof(struct vfio_region_info);
int fd = -1;
int ret; int ret;
/* check cache */ /* check cache */
@ -214,7 +215,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
retry: retry:
(*info)->argsz = argsz; (*info)->argsz = argsz;
ret = vbasedev->io_ops->get_region_info(vbasedev, *info); ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd);
if (ret != 0) { if (ret != 0) {
g_free(*info); g_free(*info);
*info = NULL; *info = NULL;
@ -225,11 +226,19 @@ retry:
argsz = (*info)->argsz; argsz = (*info)->argsz;
*info = g_realloc(*info, argsz); *info = g_realloc(*info, argsz);
if (fd != -1) {
close(fd);
fd = -1;
}
goto retry; goto retry;
} }
/* fill cache */ /* fill cache */
vbasedev->reginfo[index] = *info; vbasedev->reginfo[index] = *info;
if (vbasedev->region_fds != NULL) {
vbasedev->region_fds[index] = fd;
}
return 0; return 0;
} }
@ -334,6 +343,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
vbasedev->io_ops = &vfio_device_io_ops_ioctl; vbasedev->io_ops = &vfio_device_io_ops_ioctl;
vbasedev->dev = dev; vbasedev->dev = dev;
vbasedev->fd = -1; vbasedev->fd = -1;
vbasedev->use_region_fds = false;
vbasedev->ram_block_discard_allowed = ram_discard; vbasedev->ram_block_discard_allowed = ram_discard;
} }
@ -444,6 +454,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
vbasedev->reginfo = g_new0(struct vfio_region_info *, vbasedev->reginfo = g_new0(struct vfio_region_info *,
vbasedev->num_regions); vbasedev->num_regions);
if (vbasedev->use_region_fds) {
vbasedev->region_fds = g_new0(int, vbasedev->num_regions);
}
} }
void vfio_device_unprepare(VFIODevice *vbasedev) void vfio_device_unprepare(VFIODevice *vbasedev)
@ -452,9 +465,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
for (i = 0; i < vbasedev->num_regions; i++) { for (i = 0; i < vbasedev->num_regions; i++) {
g_free(vbasedev->reginfo[i]); g_free(vbasedev->reginfo[i]);
if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
close(vbasedev->region_fds[i]);
} }
g_free(vbasedev->reginfo);
vbasedev->reginfo = NULL; }
g_clear_pointer(&vbasedev->reginfo, g_free);
g_clear_pointer(&vbasedev->region_fds, g_free);
QLIST_REMOVE(vbasedev, container_next); QLIST_REMOVE(vbasedev, container_next);
QLIST_REMOVE(vbasedev, global_next); QLIST_REMOVE(vbasedev, global_next);
@ -476,10 +494,13 @@ static int vfio_device_io_device_feature(VFIODevice *vbasedev,
} }
static int vfio_device_io_get_region_info(VFIODevice *vbasedev, static int vfio_device_io_get_region_info(VFIODevice *vbasedev,
struct vfio_region_info *info) struct vfio_region_info *info,
int *fd)
{ {
int ret; int ret;
*fd = -1;
ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info); ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
return ret < 0 ? -errno : ret; return ret < 0 ? -errno : ret;

View file

@ -241,6 +241,7 @@ int vfio_region_mmap(VFIORegion *region)
{ {
int i, ret, prot = 0; int i, ret, prot = 0;
char *name; char *name;
int fd;
if (!region->mem) { if (!region->mem) {
return 0; return 0;
@ -271,14 +272,18 @@ int vfio_region_mmap(VFIORegion *region)
goto no_mmap; goto no_mmap;
} }
/* Use the per-region fd if set, or the shared fd. */
fd = region->vbasedev->region_fds ?
region->vbasedev->region_fds[region->nr] :
region->vbasedev->fd,
map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align); map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
munmap(map_base, map_align - map_base); munmap(map_base, map_align - map_base);
munmap(map_align + region->mmaps[i].size, munmap(map_align + region->mmaps[i].size,
align - (map_align - map_base)); align - (map_align - map_base));
region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot, region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
MAP_SHARED | MAP_FIXED, MAP_SHARED | MAP_FIXED, fd,
region->vbasedev->fd,
region->fd_offset + region->fd_offset +
region->mmaps[i].offset); region->mmaps[i].offset);
if (region->mmaps[i].mmap == MAP_FAILED) { if (region->mmaps[i].mmap == MAP_FAILED) {

View file

@ -66,6 +66,7 @@ typedef struct VFIODevice {
OnOffAuto enable_migration; OnOffAuto enable_migration;
OnOffAuto migration_multifd_transfer; OnOffAuto migration_multifd_transfer;
bool migration_events; bool migration_events;
bool use_region_fds;
VFIODeviceOps *ops; VFIODeviceOps *ops;
VFIODeviceIOOps *io_ops; VFIODeviceIOOps *io_ops;
unsigned int num_irqs; unsigned int num_irqs;
@ -84,6 +85,7 @@ typedef struct VFIODevice {
VFIOIOASHwpt *hwpt; VFIOIOASHwpt *hwpt;
QLIST_ENTRY(VFIODevice) hwpt_next; QLIST_ENTRY(VFIODevice) hwpt_next;
struct vfio_region_info **reginfo; struct vfio_region_info **reginfo;
int *region_fds;
} VFIODevice; } VFIODevice;
struct VFIODeviceOps { struct VFIODeviceOps {
@ -170,10 +172,11 @@ struct VFIODeviceIOOps {
/** /**
* @get_region_info * @get_region_info
* *
* Fill in @info with information on the region given by @info->index. * Fill in @info (and optionally @fd) with information on the region given
* by @info->index.
*/ */
int (*get_region_info)(VFIODevice *vdev, int (*get_region_info)(VFIODevice *vdev,
struct vfio_region_info *info); struct vfio_region_info *info, int *fd);
/** /**
* @get_irq_info * @get_irq_info