vfio/migration: Multifd device state transfer support - send side

Implement the multifd device state transfer via additional per-device
thread inside save_live_complete_precopy_thread handler.

Switch between doing the data transfer in the new handler and doing it
in the old save_state handler depending if VFIO multifd transfer is enabled
or not.

Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/qemu-devel/4d727e2e0435e0022d50004e474077632830e08d.1741124640.git.maciej.szmigiero@oracle.com
[ clg: - Reordered savevm_vfio_handlers
       - Updated save_live_complete_precopy* documentation ]
Signed-off-by: Cédric Le Goater <clg@redhat.com>
This commit is contained in:
Maciej S. Szmigiero 2025-03-04 23:03:57 +01:00 committed by Cédric Le Goater
parent b659c07c53
commit 6d644baef2
6 changed files with 189 additions and 8 deletions

View file

@ -71,11 +71,23 @@ VFIO implements the device hooks for the iterative approach as follows:
reassembles the multifd received data and loads it in-order into the device.
In the non-multifd mode this function is a NOP.
* A ``save_state`` function to save the device config space if it is present.
* A ``save_state`` function to save the device config space if it is present
in the non-multifd mode.
In the multifd mode it just emits either a dummy EOS marker.
* A ``save_live_complete_precopy`` function that sets the VFIO device in
_STOP_COPY state and iteratively copies the data for the VFIO device until
the vendor driver indicates that no data remains.
In the multifd mode it just emits a dummy EOS marker.
* A ``save_live_complete_precopy_thread`` function that in the multifd mode
provides thread handler performing multifd device state transfer.
It sets the VFIO device to _STOP_COPY state, iteratively reads the data
from the VFIO device and queues it for multifd transmission until the vendor
driver indicates that no data remains.
After that, it saves the device config space and queues it for multifd
transfer too.
In the non-multifd mode this thread is a NOP.
* A ``load_state`` function that loads the config section and the data
sections that are generated by the save functions above.
@ -184,8 +196,11 @@ Live migration save path
Then the VFIO device is put in _STOP_COPY state
(FINISH_MIGRATE, _ACTIVE, _STOP_COPY)
.save_live_complete_precopy() is called for each active device
For the VFIO device, iterate in .save_live_complete_precopy() until
For the VFIO device: in the non-multifd mode iterate in
.save_live_complete_precopy() until
pending data is 0
In the multifd mode this iteration is done in
.save_live_complete_precopy_thread() instead.
|
(POSTMIGRATE, _COMPLETED, _STOP_COPY)
Migraton thread schedules cleanup bottom half and exits

View file

@ -496,6 +496,148 @@ bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp)
return true;
}
void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
{
assert(vfio_multifd_transfer_enabled(vbasedev));
/*
* Emit dummy NOP data on the main migration channel since the actual
* device state transfer is done via multifd channels.
*/
qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
}
static bool
vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
char *idstr,
uint32_t instance_id,
uint32_t idx,
Error **errp)
{
g_autoptr(QIOChannelBuffer) bioc = NULL;
g_autoptr(QEMUFile) f = NULL;
int ret;
g_autofree VFIODeviceStatePacket *packet = NULL;
size_t packet_len;
bioc = qio_channel_buffer_new(0);
qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
f = qemu_file_new_output(QIO_CHANNEL(bioc));
if (vfio_save_device_config_state(f, vbasedev, errp)) {
return false;
}
ret = qemu_fflush(f);
if (ret) {
error_setg(errp, "%s: save config state flush failed: %d",
vbasedev->name, ret);
return false;
}
packet_len = sizeof(*packet) + bioc->usage;
packet = g_malloc0(packet_len);
packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
packet->idx = idx;
packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
memcpy(&packet->data, bioc->data, bioc->usage);
if (!multifd_queue_device_state(idstr, instance_id,
(char *)packet, packet_len)) {
error_setg(errp, "%s: multifd config data queuing failed",
vbasedev->name);
return false;
}
vfio_mig_add_bytes_transferred(packet_len);
return true;
}
/*
* This thread is spawned by the migration core directly via
* .save_live_complete_precopy_thread SaveVMHandler.
*
* It exits after either:
* * completing saving the remaining device state and device config, OR:
* * encountering some error while doing the above, OR:
* * being forcefully aborted by the migration core by
* multifd_device_state_save_thread_should_exit() returning true.
*/
bool
vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
Error **errp)
{
VFIODevice *vbasedev = d->handler_opaque;
VFIOMigration *migration = vbasedev->migration;
bool ret = false;
g_autofree VFIODeviceStatePacket *packet = NULL;
uint32_t idx;
if (!vfio_multifd_transfer_enabled(vbasedev)) {
/* Nothing to do, vfio_save_complete_precopy() does the transfer. */
return true;
}
trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
d->idstr, d->instance_id);
/* We reach here with device state STOP or STOP_COPY only */
if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
VFIO_DEVICE_STATE_STOP, errp)) {
goto thread_exit;
}
packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
for (idx = 0; ; idx++) {
ssize_t data_size;
size_t packet_size;
if (multifd_device_state_save_thread_should_exit()) {
error_setg(errp, "operation cancelled");
goto thread_exit;
}
data_size = read(migration->data_fd, &packet->data,
migration->data_buffer_size);
if (data_size < 0) {
error_setg(errp, "%s: reading state buffer %" PRIu32 " failed: %d",
vbasedev->name, idx, errno);
goto thread_exit;
} else if (data_size == 0) {
break;
}
packet->idx = idx;
packet_size = sizeof(*packet) + data_size;
if (!multifd_queue_device_state(d->idstr, d->instance_id,
(char *)packet, packet_size)) {
error_setg(errp, "%s: multifd data queuing failed", vbasedev->name);
goto thread_exit;
}
vfio_mig_add_bytes_transferred(packet_size);
}
if (!vfio_save_complete_precopy_thread_config_state(vbasedev,
d->idstr,
d->instance_id,
idx, errp)) {
goto thread_exit;
}
ret = true;
thread_exit:
trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
return ret;
}
int vfio_multifd_switchover_start(VFIODevice *vbasedev)
{
VFIOMigration *migration = vbasedev->migration;

View file

@ -23,6 +23,12 @@ bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev);
bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
Error **errp);
void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
bool
vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
Error **errp);
int vfio_multifd_switchover_start(VFIODevice *vbasedev);
#endif

View file

@ -120,10 +120,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
vfio_migration_send_event(vbasedev);
}
static int vfio_migration_set_state(VFIODevice *vbasedev,
enum vfio_device_mig_state new_state,
enum vfio_device_mig_state recover_state,
Error **errp)
int vfio_migration_set_state(VFIODevice *vbasedev,
enum vfio_device_mig_state new_state,
enum vfio_device_mig_state recover_state,
Error **errp)
{
VFIOMigration *migration = vbasedev->migration;
uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
return ret;
}
static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
Error **errp)
int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
{
VFIODevice *vbasedev = opaque;
int ret;
@ -638,6 +637,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
int ret;
Error *local_err = NULL;
if (vfio_multifd_transfer_enabled(vbasedev)) {
vfio_multifd_emit_dummy_eos(vbasedev, f);
return 0;
}
trace_vfio_save_complete_precopy_start(vbasedev->name);
/* We reach here with device state STOP or STOP_COPY only */
@ -669,6 +673,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
Error *local_err = NULL;
int ret;
if (vfio_multifd_transfer_enabled(vbasedev)) {
vfio_multifd_emit_dummy_eos(vbasedev, f);
return;
}
ret = vfio_save_device_config_state(f, opaque, &local_err);
if (ret) {
error_prepend(&local_err,
@ -825,6 +834,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
*/
.load_state_buffer = vfio_multifd_load_state_buffer,
.switchover_start = vfio_switchover_start,
.save_live_complete_precopy_thread = vfio_multifd_save_complete_precopy_thread,
};
/* ---------------------------------------------------------------------- */

View file

@ -171,6 +171,8 @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)"
vfio_save_cleanup(const char *name) " (%s)"
vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
vfio_save_complete_precopy_start(const char *name) " (%s)"
vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32
vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d"
vfio_save_device_config_state(const char *name) " (%s)"
vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
vfio_save_iterate_start(const char *name) " (%s)"

View file

@ -298,6 +298,7 @@ void vfio_mig_add_bytes_transferred(unsigned long val);
bool vfio_device_state_is_running(VFIODevice *vbasedev);
bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
int vfio_load_device_config_state(QEMUFile *f, void *opaque);
#ifdef CONFIG_LINUX
@ -314,6 +315,11 @@ struct vfio_info_cap_header *
vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
struct vfio_info_cap_header *
vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
int vfio_migration_set_state(VFIODevice *vbasedev,
enum vfio_device_mig_state new_state,
enum vfio_device_mig_state recover_state,
Error **errp);
#endif
bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);