system/runstate: add VM state change cb with return value

This patch adds the new VM state change cb type `VMChangeStateHandlerWithRet`,
which has return value for `VMChangeStateEntry`.

Thus, we can register a new VM state change cb with return value for device.
Note that `VMChangeStateHandler` and `VMChangeStateHandlerWithRet` are mutually
exclusive and cannot be provided at the same time.

This patch is the pre patch for 'vhost-user: return failure if backend crashes
when live migration', which makes the live migration aware of the loss of
connection with the vhost-user backend and aborts the live migration.

Virtio device will use VMChangeStateHandlerWithRet.

Signed-off-by: Haoqian He <haoqian.he@smartx.com>
Message-Id: <20250416024729.3289157-2-haoqian.he@smartx.com>
Tested-by: Lei Yang <leiyang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
Haoqian He 2025-04-15 22:47:26 -04:00 committed by Michael S. Tsirkin
parent 8717987fb5
commit e0f300b36d
8 changed files with 62 additions and 20 deletions

View file

@ -1802,7 +1802,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
* called after ->start_ioeventfd() has already set blk's AioContext. * called after ->start_ioeventfd() has already set blk's AioContext.
*/ */
s->change = s->change =
qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, s); qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, NULL, s);
blk_ram_registrar_init(&s->blk_ram_registrar, s->blk); blk_ram_registrar_init(&s->blk_ram_registrar, s->blk);
blk_set_dev_ops(s->blk, &virtio_block_ops, s); blk_set_dev_ops(s->blk, &virtio_block_ops, s);

View file

@ -40,6 +40,7 @@ static int qdev_get_dev_tree_depth(DeviceState *dev)
* qdev_add_vm_change_state_handler: * qdev_add_vm_change_state_handler:
* @dev: the device that owns this handler * @dev: the device that owns this handler
* @cb: the callback function to be invoked * @cb: the callback function to be invoked
* @cb_ret: the callback function with return value to be invoked
* @opaque: user data passed to the callback function * @opaque: user data passed to the callback function
* *
* This function works like qemu_add_vm_change_state_handler() except callbacks * This function works like qemu_add_vm_change_state_handler() except callbacks
@ -50,25 +51,30 @@ static int qdev_get_dev_tree_depth(DeviceState *dev)
* controller's callback is invoked before the children on its bus when the VM * controller's callback is invoked before the children on its bus when the VM
* starts running. The order is reversed when the VM stops running. * starts running. The order is reversed when the VM stops running.
* *
* Note that the parameter `cb` and `cb_ret` are mutually exclusive.
*
* Returns: an entry to be freed with qemu_del_vm_change_state_handler() * Returns: an entry to be freed with qemu_del_vm_change_state_handler()
*/ */
VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
VMChangeStateHandler *cb, VMChangeStateHandler *cb,
VMChangeStateHandlerWithRet *cb_ret,
void *opaque) void *opaque)
{ {
return qdev_add_vm_change_state_handler_full(dev, cb, NULL, opaque); assert(!cb || !cb_ret);
return qdev_add_vm_change_state_handler_full(dev, cb, NULL, cb_ret, opaque);
} }
/* /*
* Exactly like qdev_add_vm_change_state_handler() but passes a prepare_cb * Exactly like qdev_add_vm_change_state_handler() but passes a prepare_cb
* argument too. * and the cb_ret arguments too.
*/ */
VMChangeStateEntry *qdev_add_vm_change_state_handler_full( VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
DeviceState *dev, VMChangeStateHandler *cb, DeviceState *dev, VMChangeStateHandler *cb, VMChangeStateHandler *prepare_cb,
VMChangeStateHandler *prepare_cb, void *opaque) VMChangeStateHandlerWithRet *cb_ret, void *opaque)
{ {
int depth = qdev_get_dev_tree_depth(dev); int depth = qdev_get_dev_tree_depth(dev);
return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, opaque, assert(!cb || !cb_ret);
depth); return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, cb_ret,
opaque, depth);
} }

View file

@ -400,7 +400,7 @@ static void scsi_qdev_realize(DeviceState *qdev, Error **errp)
return; return;
} }
dev->vmsentry = qdev_add_vm_change_state_handler(DEVICE(dev), dev->vmsentry = qdev_add_vm_change_state_handler(DEVICE(dev),
scsi_dma_restart_cb, dev); scsi_dma_restart_cb, NULL, dev);
} }
static void scsi_qdev_unrealize(DeviceState *qdev) static void scsi_qdev_unrealize(DeviceState *qdev)

View file

@ -1016,7 +1016,7 @@ static int vfio_migration_init(VFIODevice *vbasedev)
vfio_vmstate_change_prepare : vfio_vmstate_change_prepare :
NULL; NULL;
migration->vm_state = qdev_add_vm_change_state_handler_full( migration->vm_state = qdev_add_vm_change_state_handler_full(
vbasedev->dev, vfio_vmstate_change, prepare_cb, vbasedev); vbasedev->dev, vfio_vmstate_change, prepare_cb, NULL, vbasedev);
migration_add_notifier(&migration->migration_state, migration_add_notifier(&migration->migration_state,
vfio_migration_state_notifier); vfio_migration_state_notifier);

View file

@ -3489,7 +3489,7 @@ void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
vdev->config = NULL; vdev->config = NULL;
} }
vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
virtio_vmstate_change, vdev); virtio_vmstate_change, NULL, vdev);
vdev->device_endian = virtio_default_endian(); vdev->device_endian = virtio_default_endian();
vdev->use_guest_notifier_mask = true; vdev->use_guest_notifier_mask = true;
} }

View file

@ -12,6 +12,7 @@ bool runstate_needs_reset(void);
void runstate_replay_enable(void); void runstate_replay_enable(void);
typedef void VMChangeStateHandler(void *opaque, bool running, RunState state); typedef void VMChangeStateHandler(void *opaque, bool running, RunState state);
typedef int VMChangeStateHandlerWithRet(void *opaque, bool running, RunState state);
VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
void *opaque); void *opaque);
@ -20,21 +21,27 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
VMChangeStateEntry * VMChangeStateEntry *
qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
VMChangeStateHandler *prepare_cb, VMChangeStateHandler *prepare_cb,
VMChangeStateHandlerWithRet *cb_ret,
void *opaque, int priority); void *opaque, int priority);
VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
VMChangeStateHandler *cb, VMChangeStateHandler *cb,
VMChangeStateHandlerWithRet *cb_ret,
void *opaque); void *opaque);
VMChangeStateEntry *qdev_add_vm_change_state_handler_full( VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
DeviceState *dev, VMChangeStateHandler *cb, DeviceState *dev, VMChangeStateHandler *cb, VMChangeStateHandler *prepare_cb,
VMChangeStateHandler *prepare_cb, void *opaque); VMChangeStateHandlerWithRet *cb_ret, void *opaque);
void qemu_del_vm_change_state_handler(VMChangeStateEntry *e); void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
/** /**
* vm_state_notify: Notify the state of the VM * vm_state_notify: Notify the state of the VM
* *
* @running: whether the VM is running or not. * @running: whether the VM is running or not.
* @state: the #RunState of the VM. * @state: the #RunState of the VM.
*
* Return the result of the callback which has return value.
* If no callback has return value, still return 0 and the
* upper layer should not do additional processing.
*/ */
void vm_state_notify(bool running, RunState state); int vm_state_notify(bool running, RunState state);
static inline bool shutdown_caused_by_guest(ShutdownCause cause) static inline bool shutdown_caused_by_guest(ShutdownCause cause)
{ {

View file

@ -299,14 +299,18 @@ static int do_vm_stop(RunState state, bool send_stop)
if (oldstate == RUN_STATE_RUNNING) { if (oldstate == RUN_STATE_RUNNING) {
pause_all_vcpus(); pause_all_vcpus();
} }
vm_state_notify(0, state); ret = vm_state_notify(0, state);
if (send_stop) { if (send_stop) {
qapi_event_send_stop(); qapi_event_send_stop();
} }
} }
bdrv_drain_all(); bdrv_drain_all();
ret = bdrv_flush_all(); /*
* Even if vm_state_notify() return failure,
* it would be better to flush as before.
*/
ret |= bdrv_flush_all();
trace_vm_stop_flush_all(ret); trace_vm_stop_flush_all(ret);
return ret; return ret;

View file

@ -297,6 +297,7 @@ void qemu_system_vmstop_request(RunState state)
struct VMChangeStateEntry { struct VMChangeStateEntry {
VMChangeStateHandler *cb; VMChangeStateHandler *cb;
VMChangeStateHandler *prepare_cb; VMChangeStateHandler *prepare_cb;
VMChangeStateHandlerWithRet *cb_ret;
void *opaque; void *opaque;
QTAILQ_ENTRY(VMChangeStateEntry) entries; QTAILQ_ENTRY(VMChangeStateEntry) entries;
int priority; int priority;
@ -320,14 +321,15 @@ static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head =
VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
VMChangeStateHandler *cb, void *opaque, int priority) VMChangeStateHandler *cb, void *opaque, int priority)
{ {
return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque, return qemu_add_vm_change_state_handler_prio_full(cb, NULL, NULL,
priority); opaque, priority);
} }
/** /**
* qemu_add_vm_change_state_handler_prio_full: * qemu_add_vm_change_state_handler_prio_full:
* @cb: the main callback to invoke * @cb: the main callback to invoke
* @prepare_cb: a callback to invoke before the main callback * @prepare_cb: a callback to invoke before the main callback
* @cb_ret: the main callback to invoke with return value
* @opaque: user data passed to the callbacks * @opaque: user data passed to the callbacks
* @priority: low priorities execute first when the vm runs and the reverse is * @priority: low priorities execute first when the vm runs and the reverse is
* true when the vm stops * true when the vm stops
@ -344,6 +346,7 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
VMChangeStateEntry * VMChangeStateEntry *
qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
VMChangeStateHandler *prepare_cb, VMChangeStateHandler *prepare_cb,
VMChangeStateHandlerWithRet *cb_ret,
void *opaque, int priority) void *opaque, int priority)
{ {
VMChangeStateEntry *e; VMChangeStateEntry *e;
@ -352,6 +355,7 @@ qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
e = g_malloc0(sizeof(*e)); e = g_malloc0(sizeof(*e));
e->cb = cb; e->cb = cb;
e->prepare_cb = prepare_cb; e->prepare_cb = prepare_cb;
e->cb_ret = cb_ret;
e->opaque = opaque; e->opaque = opaque;
e->priority = priority; e->priority = priority;
@ -379,9 +383,10 @@ void qemu_del_vm_change_state_handler(VMChangeStateEntry *e)
g_free(e); g_free(e);
} }
void vm_state_notify(bool running, RunState state) int vm_state_notify(bool running, RunState state)
{ {
VMChangeStateEntry *e, *next; VMChangeStateEntry *e, *next;
int ret = 0;
trace_vm_state_notify(running, state, RunState_str(state)); trace_vm_state_notify(running, state, RunState_str(state));
@ -393,7 +398,17 @@ void vm_state_notify(bool running, RunState state)
} }
QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) { QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
e->cb(e->opaque, running, state); if (e->cb) {
e->cb(e->opaque, running, state);
} else if (e->cb_ret) {
/*
* Here ignore the return value of cb_ret because
* we only care about the stopping the device during
* the VM live migration to indicate whether the
* connection between qemu and backend is normal.
*/
e->cb_ret(e->opaque, running, state);
}
} }
} else { } else {
QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) { QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
@ -403,9 +418,19 @@ void vm_state_notify(bool running, RunState state)
} }
QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) { QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
e->cb(e->opaque, running, state); if (e->cb) {
e->cb(e->opaque, running, state);
} else if (e->cb_ret) {
/*
* We should execute all registered callbacks even if
* one of them returns failure, otherwise, some cleanup
* work of the device will be skipped.
*/
ret |= e->cb_ret(e->opaque, running, state);
}
} }
} }
return ret;
} }
static ShutdownCause reset_requested; static ShutdownCause reset_requested;