mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-02 23:33:54 -06:00
migration: cpr-transfer mode
Add the cpr-transfer migration mode, which allows the user to transfer a guest to a new QEMU instance on the same host with minimal guest pause time, by preserving guest RAM in place, albeit with new virtual addresses in new QEMU, and by preserving device file descriptors. Pages that were locked in memory for DMA in old QEMU remain locked in new QEMU, because the descriptor of the device that locked them remains open. cpr-transfer preserves memory and devices descriptors by sending them to new QEMU over a unix domain socket using SCM_RIGHTS. Such CPR state cannot be sent over the normal migration channel, because devices and backends are created prior to reading the channel, so this mode sends CPR state over a second "cpr" migration channel. New QEMU reads the cpr channel prior to creating devices or backends. The user specifies the cpr channel in the channel arguments on the outgoing side, and in a second -incoming command-line parameter on the incoming side. The user must start old QEMU with the the '-machine aux-ram-share=on' option, which allows anonymous memory to be transferred in place to the new process by transferring a memory descriptor for each ram block. Memory-backend objects must have the share=on attribute, but memory-backend-epc is not supported. The user starts new QEMU on the same host as old QEMU, with command-line arguments to create the same machine, plus the -incoming option for the main migration channel, like normal live migration. In addition, the user adds a second -incoming option with channel type "cpr". This CPR channel must support file descriptor transfer with SCM_RIGHTS, i.e. it must be a UNIX domain socket. To initiate CPR, the user issues a migrate command to old QEMU, adding a second migration channel of type "cpr" in the channels argument. Old QEMU stops the VM, saves state to the migration channels, and enters the postmigrate state. New QEMU mmap's memory descriptors, and execution resumes. The implementation splits qmp_migrate into start and finish functions. Start sends CPR state to new QEMU, which responds by closing the CPR channel. Old QEMU detects the HUP then calls finish, which connects the main migration channel. In summary, the usage is: qemu-system-$arch -machine aux-ram-share=on ... start new QEMU with "-incoming <main-uri> -incoming <cpr-channel>" Issue commands to old QEMU: migrate_set_parameter mode cpr-transfer {"execute": "migrate", ... {"channel-type": "main"...}, {"channel-type": "cpr"...} ... } Signed-off-by: Steve Sistare <steven.sistare@oracle.com> Reviewed-by: Peter Xu <peterx@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Link: https://lore.kernel.org/r/1736967650-129648-17-git-send-email-steven.sistare@oracle.com Signed-off-by: Fabiano Rosas <farosas@suse.de>
This commit is contained in:
parent
b3698869f4
commit
624e6e654e
11 changed files with 210 additions and 10 deletions
|
@ -77,6 +77,7 @@
|
|||
static NotifierWithReturnList migration_state_notifiers[] = {
|
||||
NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_NORMAL),
|
||||
NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_REBOOT),
|
||||
NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_TRANSFER),
|
||||
};
|
||||
|
||||
/* Messages sent on the return path from destination to source */
|
||||
|
@ -110,6 +111,7 @@ static int migration_maybe_pause(MigrationState *s,
|
|||
static void migrate_fd_cancel(MigrationState *s);
|
||||
static bool close_return_path_on_source(MigrationState *s);
|
||||
static void migration_completion_end(MigrationState *s);
|
||||
static void migrate_hup_delete(MigrationState *s);
|
||||
|
||||
static void migration_downtime_start(MigrationState *s)
|
||||
{
|
||||
|
@ -220,6 +222,12 @@ migration_channels_and_transport_compatible(MigrationAddress *addr,
|
|||
return false;
|
||||
}
|
||||
|
||||
if (migrate_mode() == MIG_MODE_CPR_TRANSFER &&
|
||||
addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
|
||||
error_setg(errp, "Migration requires streamable transport (eg unix)");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -435,6 +443,7 @@ void migration_incoming_state_destroy(void)
|
|||
mis->postcopy_qemufile_dst = NULL;
|
||||
}
|
||||
|
||||
cpr_set_incoming_mode(MIG_MODE_NONE);
|
||||
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
|
||||
}
|
||||
|
||||
|
@ -747,6 +756,9 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
|
|||
} else {
|
||||
error_setg(errp, "unknown migration protocol: %s", uri);
|
||||
}
|
||||
|
||||
/* Close cpr socket to tell source that we are listening */
|
||||
cpr_state_close();
|
||||
}
|
||||
|
||||
static void process_incoming_migration_bh(void *opaque)
|
||||
|
@ -1423,6 +1435,8 @@ static void migrate_fd_cleanup(MigrationState *s)
|
|||
s->vmdesc = NULL;
|
||||
|
||||
qemu_savevm_state_cleanup();
|
||||
cpr_state_close();
|
||||
migrate_hup_delete(s);
|
||||
|
||||
close_return_path_on_source(s);
|
||||
|
||||
|
@ -1534,6 +1548,7 @@ static void migrate_fd_error(MigrationState *s, const Error *error)
|
|||
static void migrate_fd_cancel(MigrationState *s)
|
||||
{
|
||||
int old_state ;
|
||||
bool setup = (s->state == MIGRATION_STATUS_SETUP);
|
||||
|
||||
trace_migrate_fd_cancel();
|
||||
|
||||
|
@ -1568,6 +1583,17 @@ static void migrate_fd_cancel(MigrationState *s)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If qmp_migrate_finish has not been called, then there is no path that
|
||||
* will complete the cancellation. Do it now.
|
||||
*/
|
||||
if (setup && !s->to_dst_file) {
|
||||
migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
|
||||
MIGRATION_STATUS_CANCELLED);
|
||||
cpr_state_close();
|
||||
migrate_hup_delete(s);
|
||||
}
|
||||
}
|
||||
|
||||
void migration_add_notifier_mode(NotifierWithReturn *notify,
|
||||
|
@ -1665,7 +1691,9 @@ bool migration_thread_is_self(void)
|
|||
|
||||
bool migrate_mode_is_cpr(MigrationState *s)
|
||||
{
|
||||
return s->parameters.mode == MIG_MODE_CPR_REBOOT;
|
||||
MigMode mode = s->parameters.mode;
|
||||
return mode == MIG_MODE_CPR_REBOOT ||
|
||||
mode == MIG_MODE_CPR_TRANSFER;
|
||||
}
|
||||
|
||||
int migrate_init(MigrationState *s, Error **errp)
|
||||
|
@ -2046,6 +2074,40 @@ static bool migrate_prepare(MigrationState *s, bool resume, Error **errp)
|
|||
return true;
|
||||
}
|
||||
|
||||
static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested,
|
||||
Error **errp);
|
||||
|
||||
static void migrate_hup_add(MigrationState *s, QIOChannel *ioc, GSourceFunc cb,
|
||||
void *opaque)
|
||||
{
|
||||
s->hup_source = qio_channel_create_watch(ioc, G_IO_HUP);
|
||||
g_source_set_callback(s->hup_source, cb, opaque, NULL);
|
||||
g_source_attach(s->hup_source, NULL);
|
||||
}
|
||||
|
||||
static void migrate_hup_delete(MigrationState *s)
|
||||
{
|
||||
if (s->hup_source) {
|
||||
g_source_destroy(s->hup_source);
|
||||
g_source_unref(s->hup_source);
|
||||
s->hup_source = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static gboolean qmp_migrate_finish_cb(QIOChannel *channel,
|
||||
GIOCondition cond,
|
||||
void *opaque)
|
||||
{
|
||||
MigrationAddress *addr = opaque;
|
||||
|
||||
qmp_migrate_finish(addr, false, NULL);
|
||||
|
||||
cpr_state_close();
|
||||
migrate_hup_delete(migrate_get_current());
|
||||
qapi_free_MigrationAddress(addr);
|
||||
return G_SOURCE_REMOVE;
|
||||
}
|
||||
|
||||
void qmp_migrate(const char *uri, bool has_channels,
|
||||
MigrationChannelList *channels, bool has_detach, bool detach,
|
||||
bool has_resume, bool resume, Error **errp)
|
||||
|
@ -2056,6 +2118,7 @@ void qmp_migrate(const char *uri, bool has_channels,
|
|||
g_autoptr(MigrationChannel) channel = NULL;
|
||||
MigrationAddress *addr = NULL;
|
||||
MigrationChannel *channelv[MIGRATION_CHANNEL_TYPE__MAX] = { NULL };
|
||||
MigrationChannel *cpr_channel = NULL;
|
||||
|
||||
/*
|
||||
* Having preliminary checks for uri and channel
|
||||
|
@ -2076,6 +2139,7 @@ void qmp_migrate(const char *uri, bool has_channels,
|
|||
}
|
||||
channelv[type] = channels->value;
|
||||
}
|
||||
cpr_channel = channelv[MIGRATION_CHANNEL_TYPE_CPR];
|
||||
addr = channelv[MIGRATION_CHANNEL_TYPE_MAIN]->addr;
|
||||
if (!addr) {
|
||||
error_setg(errp, "Channel list has no main entry");
|
||||
|
@ -2096,12 +2160,52 @@ void qmp_migrate(const char *uri, bool has_channels,
|
|||
return;
|
||||
}
|
||||
|
||||
if (s->parameters.mode == MIG_MODE_CPR_TRANSFER && !cpr_channel) {
|
||||
error_setg(errp, "missing 'cpr' migration channel");
|
||||
return;
|
||||
}
|
||||
|
||||
resume_requested = has_resume && resume;
|
||||
if (!migrate_prepare(s, resume_requested, errp)) {
|
||||
/* Error detected, put into errp */
|
||||
return;
|
||||
}
|
||||
|
||||
if (cpr_state_save(cpr_channel, &local_err)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* For cpr-transfer, the target may not be listening yet on the migration
|
||||
* channel, because first it must finish cpr_load_state. The target tells
|
||||
* us it is listening by closing the cpr-state socket. Wait for that HUP
|
||||
* event before connecting in qmp_migrate_finish.
|
||||
*
|
||||
* The HUP could occur because the target fails while reading CPR state,
|
||||
* in which case the target will not listen for the incoming migration
|
||||
* connection, so qmp_migrate_finish will fail to connect, and then recover.
|
||||
*/
|
||||
if (s->parameters.mode == MIG_MODE_CPR_TRANSFER) {
|
||||
migrate_hup_add(s, cpr_state_ioc(), (GSourceFunc)qmp_migrate_finish_cb,
|
||||
QAPI_CLONE(MigrationAddress, addr));
|
||||
|
||||
} else {
|
||||
qmp_migrate_finish(addr, resume_requested, errp);
|
||||
}
|
||||
|
||||
out:
|
||||
if (local_err) {
|
||||
migrate_fd_error(s, local_err);
|
||||
error_propagate(errp, local_err);
|
||||
}
|
||||
}
|
||||
|
||||
static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested,
|
||||
Error **errp)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (!resume_requested) {
|
||||
if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
|
||||
return;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue