mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-02 23:33:54 -06:00

Load device config received via multifd using the existing machinery behind vfio_load_device_config_state(). Also, make sure to process the relevant main migration channel flags. Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> Reviewed-by: Cédric Le Goater <clg@redhat.com> Link: https://lore.kernel.org/qemu-devel/5dbd3f3703ec1097da2cf82a7262233452146fee.1741124640.git.maciej.szmigiero@oracle.com Signed-off-by: Cédric Le Goater <clg@redhat.com>
517 lines
15 KiB
C
517 lines
15 KiB
C
/*
|
|
* Multifd VFIO migration
|
|
*
|
|
* Copyright (C) 2024,2025 Oracle and/or its affiliates.
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
* See the COPYING file in the top-level directory.
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "hw/vfio/vfio-common.h"
|
|
#include "migration/misc.h"
|
|
#include "qapi/error.h"
|
|
#include "qemu/error-report.h"
|
|
#include "qemu/lockable.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "qemu/thread.h"
|
|
#include "io/channel-buffer.h"
|
|
#include "migration/qemu-file.h"
|
|
#include "migration-multifd.h"
|
|
#include "trace.h"
|
|
|
|
#define VFIO_DEVICE_STATE_CONFIG_STATE (1)
|
|
|
|
#define VFIO_DEVICE_STATE_PACKET_VER_CURRENT (0)
|
|
|
|
typedef struct VFIODeviceStatePacket {
|
|
uint32_t version;
|
|
uint32_t idx;
|
|
uint32_t flags;
|
|
uint8_t data[0];
|
|
} QEMU_PACKED VFIODeviceStatePacket;
|
|
|
|
/* type safety */
|
|
typedef struct VFIOStateBuffers {
|
|
GArray *array;
|
|
} VFIOStateBuffers;
|
|
|
|
typedef struct VFIOStateBuffer {
|
|
bool is_present;
|
|
char *data;
|
|
size_t len;
|
|
} VFIOStateBuffer;
|
|
|
|
typedef struct VFIOMultifd {
|
|
bool load_bufs_thread_running;
|
|
bool load_bufs_thread_want_exit;
|
|
|
|
VFIOStateBuffers load_bufs;
|
|
QemuCond load_bufs_buffer_ready_cond;
|
|
QemuCond load_bufs_thread_finished_cond;
|
|
QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
|
|
uint32_t load_buf_idx;
|
|
uint32_t load_buf_idx_last;
|
|
} VFIOMultifd;
|
|
|
|
static void vfio_state_buffer_clear(gpointer data)
|
|
{
|
|
VFIOStateBuffer *lb = data;
|
|
|
|
if (!lb->is_present) {
|
|
return;
|
|
}
|
|
|
|
g_clear_pointer(&lb->data, g_free);
|
|
lb->is_present = false;
|
|
}
|
|
|
|
static void vfio_state_buffers_init(VFIOStateBuffers *bufs)
|
|
{
|
|
bufs->array = g_array_new(FALSE, TRUE, sizeof(VFIOStateBuffer));
|
|
g_array_set_clear_func(bufs->array, vfio_state_buffer_clear);
|
|
}
|
|
|
|
static void vfio_state_buffers_destroy(VFIOStateBuffers *bufs)
|
|
{
|
|
g_clear_pointer(&bufs->array, g_array_unref);
|
|
}
|
|
|
|
static void vfio_state_buffers_assert_init(VFIOStateBuffers *bufs)
|
|
{
|
|
assert(bufs->array);
|
|
}
|
|
|
|
static unsigned int vfio_state_buffers_size_get(VFIOStateBuffers *bufs)
|
|
{
|
|
return bufs->array->len;
|
|
}
|
|
|
|
static void vfio_state_buffers_size_set(VFIOStateBuffers *bufs,
|
|
unsigned int size)
|
|
{
|
|
g_array_set_size(bufs->array, size);
|
|
}
|
|
|
|
static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs,
|
|
unsigned int idx)
|
|
{
|
|
return &g_array_index(bufs->array, VFIOStateBuffer, idx);
|
|
}
|
|
|
|
/* called with load_bufs_mutex locked */
|
|
static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
|
|
VFIODeviceStatePacket *packet,
|
|
size_t packet_total_size,
|
|
Error **errp)
|
|
{
|
|
VFIOMigration *migration = vbasedev->migration;
|
|
VFIOMultifd *multifd = migration->multifd;
|
|
VFIOStateBuffer *lb;
|
|
|
|
vfio_state_buffers_assert_init(&multifd->load_bufs);
|
|
if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
|
|
vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1);
|
|
}
|
|
|
|
lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx);
|
|
if (lb->is_present) {
|
|
error_setg(errp, "%s: state buffer %" PRIu32 " already filled",
|
|
vbasedev->name, packet->idx);
|
|
return false;
|
|
}
|
|
|
|
assert(packet->idx >= multifd->load_buf_idx);
|
|
|
|
lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet));
|
|
lb->len = packet_total_size - sizeof(*packet);
|
|
lb->is_present = true;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
|
|
Error **errp)
|
|
{
|
|
VFIODevice *vbasedev = opaque;
|
|
VFIOMigration *migration = vbasedev->migration;
|
|
VFIOMultifd *multifd = migration->multifd;
|
|
VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data;
|
|
|
|
if (!vfio_multifd_transfer_enabled(vbasedev)) {
|
|
error_setg(errp,
|
|
"%s: got device state packet but not doing multifd transfer",
|
|
vbasedev->name);
|
|
return false;
|
|
}
|
|
|
|
assert(multifd);
|
|
|
|
if (data_size < sizeof(*packet)) {
|
|
error_setg(errp, "%s: packet too short at %zu (min is %zu)",
|
|
vbasedev->name, data_size, sizeof(*packet));
|
|
return false;
|
|
}
|
|
|
|
if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) {
|
|
error_setg(errp, "%s: packet has unknown version %" PRIu32,
|
|
vbasedev->name, packet->version);
|
|
return false;
|
|
}
|
|
|
|
if (packet->idx == UINT32_MAX) {
|
|
error_setg(errp, "%s: packet index is invalid", vbasedev->name);
|
|
return false;
|
|
}
|
|
|
|
trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx);
|
|
|
|
/*
|
|
* Holding BQL here would violate the lock order and can cause
|
|
* a deadlock once we attempt to lock load_bufs_mutex below.
|
|
*/
|
|
assert(!bql_locked());
|
|
|
|
WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
|
|
/* config state packet should be the last one in the stream */
|
|
if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) {
|
|
multifd->load_buf_idx_last = packet->idx;
|
|
}
|
|
|
|
if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size,
|
|
errp)) {
|
|
return false;
|
|
}
|
|
|
|
qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool vfio_load_bufs_thread_load_config(VFIODevice *vbasedev,
|
|
Error **errp)
|
|
{
|
|
VFIOMigration *migration = vbasedev->migration;
|
|
VFIOMultifd *multifd = migration->multifd;
|
|
VFIOStateBuffer *lb;
|
|
g_autoptr(QIOChannelBuffer) bioc = NULL;
|
|
g_autoptr(QEMUFile) f_out = NULL, f_in = NULL;
|
|
uint64_t mig_header;
|
|
int ret;
|
|
|
|
assert(multifd->load_buf_idx == multifd->load_buf_idx_last);
|
|
lb = vfio_state_buffers_at(&multifd->load_bufs, multifd->load_buf_idx);
|
|
assert(lb->is_present);
|
|
|
|
bioc = qio_channel_buffer_new(lb->len);
|
|
qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-load");
|
|
|
|
f_out = qemu_file_new_output(QIO_CHANNEL(bioc));
|
|
qemu_put_buffer(f_out, (uint8_t *)lb->data, lb->len);
|
|
|
|
ret = qemu_fflush(f_out);
|
|
if (ret) {
|
|
error_setg(errp, "%s: load config state flush failed: %d",
|
|
vbasedev->name, ret);
|
|
return false;
|
|
}
|
|
|
|
qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
|
|
f_in = qemu_file_new_input(QIO_CHANNEL(bioc));
|
|
|
|
mig_header = qemu_get_be64(f_in);
|
|
if (mig_header != VFIO_MIG_FLAG_DEV_CONFIG_STATE) {
|
|
error_setg(errp, "%s: expected FLAG_DEV_CONFIG_STATE but got %" PRIx64,
|
|
vbasedev->name, mig_header);
|
|
return false;
|
|
}
|
|
|
|
bql_lock();
|
|
ret = vfio_load_device_config_state(f_in, vbasedev);
|
|
bql_unlock();
|
|
|
|
if (ret < 0) {
|
|
error_setg(errp, "%s: vfio_load_device_config_state() failed: %d",
|
|
vbasedev->name, ret);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static VFIOStateBuffer *vfio_load_state_buffer_get(VFIOMultifd *multifd)
|
|
{
|
|
VFIOStateBuffer *lb;
|
|
unsigned int bufs_len;
|
|
|
|
bufs_len = vfio_state_buffers_size_get(&multifd->load_bufs);
|
|
if (multifd->load_buf_idx >= bufs_len) {
|
|
assert(multifd->load_buf_idx == bufs_len);
|
|
return NULL;
|
|
}
|
|
|
|
lb = vfio_state_buffers_at(&multifd->load_bufs,
|
|
multifd->load_buf_idx);
|
|
if (!lb->is_present) {
|
|
return NULL;
|
|
}
|
|
|
|
return lb;
|
|
}
|
|
|
|
static bool vfio_load_state_buffer_write(VFIODevice *vbasedev,
|
|
VFIOStateBuffer *lb,
|
|
Error **errp)
|
|
{
|
|
VFIOMigration *migration = vbasedev->migration;
|
|
VFIOMultifd *multifd = migration->multifd;
|
|
g_autofree char *buf = NULL;
|
|
char *buf_cur;
|
|
size_t buf_len;
|
|
|
|
if (!lb->len) {
|
|
return true;
|
|
}
|
|
|
|
trace_vfio_load_state_device_buffer_load_start(vbasedev->name,
|
|
multifd->load_buf_idx);
|
|
|
|
/* lb might become re-allocated when we drop the lock */
|
|
buf = g_steal_pointer(&lb->data);
|
|
buf_cur = buf;
|
|
buf_len = lb->len;
|
|
while (buf_len > 0) {
|
|
ssize_t wr_ret;
|
|
int errno_save;
|
|
|
|
/*
|
|
* Loading data to the device takes a while,
|
|
* drop the lock during this process.
|
|
*/
|
|
qemu_mutex_unlock(&multifd->load_bufs_mutex);
|
|
wr_ret = write(migration->data_fd, buf_cur, buf_len);
|
|
errno_save = errno;
|
|
qemu_mutex_lock(&multifd->load_bufs_mutex);
|
|
|
|
if (wr_ret < 0) {
|
|
error_setg(errp,
|
|
"%s: writing state buffer %" PRIu32 " failed: %d",
|
|
vbasedev->name, multifd->load_buf_idx, errno_save);
|
|
return false;
|
|
}
|
|
|
|
assert(wr_ret <= buf_len);
|
|
buf_len -= wr_ret;
|
|
buf_cur += wr_ret;
|
|
}
|
|
|
|
trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
|
|
multifd->load_buf_idx);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool vfio_load_bufs_thread_want_exit(VFIOMultifd *multifd,
|
|
bool *should_quit)
|
|
{
|
|
return multifd->load_bufs_thread_want_exit || qatomic_read(should_quit);
|
|
}
|
|
|
|
/*
|
|
* This thread is spawned by vfio_multifd_switchover_start() which gets
|
|
* called upon encountering the switchover point marker in main migration
|
|
* stream.
|
|
*
|
|
* It exits after either:
|
|
* * completing loading the remaining device state and device config, OR:
|
|
* * encountering some error while doing the above, OR:
|
|
* * being forcefully aborted by the migration core by it setting should_quit
|
|
* or by vfio_load_cleanup_load_bufs_thread() setting
|
|
* multifd->load_bufs_thread_want_exit.
|
|
*/
|
|
static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp)
|
|
{
|
|
VFIODevice *vbasedev = opaque;
|
|
VFIOMigration *migration = vbasedev->migration;
|
|
VFIOMultifd *multifd = migration->multifd;
|
|
bool ret = false;
|
|
|
|
trace_vfio_load_bufs_thread_start(vbasedev->name);
|
|
|
|
assert(multifd);
|
|
QEMU_LOCK_GUARD(&multifd->load_bufs_mutex);
|
|
|
|
assert(multifd->load_bufs_thread_running);
|
|
|
|
while (true) {
|
|
VFIOStateBuffer *lb;
|
|
|
|
/*
|
|
* Always check cancellation first after the buffer_ready wait below in
|
|
* case that cond was signalled by vfio_load_cleanup_load_bufs_thread().
|
|
*/
|
|
if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) {
|
|
error_setg(errp, "operation cancelled");
|
|
goto thread_exit;
|
|
}
|
|
|
|
assert(multifd->load_buf_idx <= multifd->load_buf_idx_last);
|
|
|
|
lb = vfio_load_state_buffer_get(multifd);
|
|
if (!lb) {
|
|
trace_vfio_load_state_device_buffer_starved(vbasedev->name,
|
|
multifd->load_buf_idx);
|
|
qemu_cond_wait(&multifd->load_bufs_buffer_ready_cond,
|
|
&multifd->load_bufs_mutex);
|
|
continue;
|
|
}
|
|
|
|
if (multifd->load_buf_idx == multifd->load_buf_idx_last) {
|
|
break;
|
|
}
|
|
|
|
if (multifd->load_buf_idx == 0) {
|
|
trace_vfio_load_state_device_buffer_start(vbasedev->name);
|
|
}
|
|
|
|
if (!vfio_load_state_buffer_write(vbasedev, lb, errp)) {
|
|
goto thread_exit;
|
|
}
|
|
|
|
if (multifd->load_buf_idx == multifd->load_buf_idx_last - 1) {
|
|
trace_vfio_load_state_device_buffer_end(vbasedev->name);
|
|
}
|
|
|
|
multifd->load_buf_idx++;
|
|
}
|
|
|
|
if (!vfio_load_bufs_thread_load_config(vbasedev, errp)) {
|
|
goto thread_exit;
|
|
}
|
|
|
|
ret = true;
|
|
|
|
thread_exit:
|
|
/*
|
|
* Notify possibly waiting vfio_load_cleanup_load_bufs_thread() that
|
|
* this thread is exiting.
|
|
*/
|
|
multifd->load_bufs_thread_running = false;
|
|
qemu_cond_signal(&multifd->load_bufs_thread_finished_cond);
|
|
|
|
trace_vfio_load_bufs_thread_end(vbasedev->name);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static VFIOMultifd *vfio_multifd_new(void)
|
|
{
|
|
VFIOMultifd *multifd = g_new(VFIOMultifd, 1);
|
|
|
|
vfio_state_buffers_init(&multifd->load_bufs);
|
|
|
|
qemu_mutex_init(&multifd->load_bufs_mutex);
|
|
|
|
multifd->load_buf_idx = 0;
|
|
multifd->load_buf_idx_last = UINT32_MAX;
|
|
qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
|
|
|
|
multifd->load_bufs_thread_running = false;
|
|
multifd->load_bufs_thread_want_exit = false;
|
|
qemu_cond_init(&multifd->load_bufs_thread_finished_cond);
|
|
|
|
return multifd;
|
|
}
|
|
|
|
/*
|
|
* Terminates vfio_load_bufs_thread by setting
|
|
* multifd->load_bufs_thread_want_exit and signalling all the conditions
|
|
* the thread could be blocked on.
|
|
*
|
|
* Waits for the thread to signal that it had finished.
|
|
*/
|
|
static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd)
|
|
{
|
|
/* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
|
|
bql_unlock();
|
|
WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
|
|
while (multifd->load_bufs_thread_running) {
|
|
multifd->load_bufs_thread_want_exit = true;
|
|
|
|
qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
|
|
qemu_cond_wait(&multifd->load_bufs_thread_finished_cond,
|
|
&multifd->load_bufs_mutex);
|
|
}
|
|
}
|
|
bql_lock();
|
|
}
|
|
|
|
static void vfio_multifd_free(VFIOMultifd *multifd)
|
|
{
|
|
vfio_load_cleanup_load_bufs_thread(multifd);
|
|
|
|
qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond);
|
|
vfio_state_buffers_destroy(&multifd->load_bufs);
|
|
qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond);
|
|
qemu_mutex_destroy(&multifd->load_bufs_mutex);
|
|
|
|
g_free(multifd);
|
|
}
|
|
|
|
void vfio_multifd_cleanup(VFIODevice *vbasedev)
|
|
{
|
|
VFIOMigration *migration = vbasedev->migration;
|
|
|
|
g_clear_pointer(&migration->multifd, vfio_multifd_free);
|
|
}
|
|
|
|
bool vfio_multifd_transfer_supported(void)
|
|
{
|
|
return multifd_device_state_supported() &&
|
|
migrate_send_switchover_start();
|
|
}
|
|
|
|
bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp)
|
|
{
|
|
VFIOMigration *migration = vbasedev->migration;
|
|
|
|
if (!vfio_multifd_transfer_enabled(vbasedev)) {
|
|
/* Nothing further to check or do */
|
|
return true;
|
|
}
|
|
|
|
if (alloc_multifd) {
|
|
assert(!migration->multifd);
|
|
migration->multifd = vfio_multifd_new();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
int vfio_multifd_switchover_start(VFIODevice *vbasedev)
|
|
{
|
|
VFIOMigration *migration = vbasedev->migration;
|
|
VFIOMultifd *multifd = migration->multifd;
|
|
|
|
assert(multifd);
|
|
|
|
/* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
|
|
bql_unlock();
|
|
WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
|
|
assert(!multifd->load_bufs_thread_running);
|
|
multifd->load_bufs_thread_running = true;
|
|
}
|
|
bql_lock();
|
|
|
|
qemu_loadvm_start_load_thread(vfio_load_bufs_thread, vbasedev);
|
|
|
|
return 0;
|
|
}
|