mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-02 15:23:53 -06:00

Add the cpr-transfer migration mode, which allows the user to transfer a guest to a new QEMU instance on the same host with minimal guest pause time, by preserving guest RAM in place, albeit with new virtual addresses in new QEMU, and by preserving device file descriptors. Pages that were locked in memory for DMA in old QEMU remain locked in new QEMU, because the descriptor of the device that locked them remains open. cpr-transfer preserves memory and devices descriptors by sending them to new QEMU over a unix domain socket using SCM_RIGHTS. Such CPR state cannot be sent over the normal migration channel, because devices and backends are created prior to reading the channel, so this mode sends CPR state over a second "cpr" migration channel. New QEMU reads the cpr channel prior to creating devices or backends. The user specifies the cpr channel in the channel arguments on the outgoing side, and in a second -incoming command-line parameter on the incoming side. The user must start old QEMU with the the '-machine aux-ram-share=on' option, which allows anonymous memory to be transferred in place to the new process by transferring a memory descriptor for each ram block. Memory-backend objects must have the share=on attribute, but memory-backend-epc is not supported. The user starts new QEMU on the same host as old QEMU, with command-line arguments to create the same machine, plus the -incoming option for the main migration channel, like normal live migration. In addition, the user adds a second -incoming option with channel type "cpr". This CPR channel must support file descriptor transfer with SCM_RIGHTS, i.e. it must be a UNIX domain socket. To initiate CPR, the user issues a migrate command to old QEMU, adding a second migration channel of type "cpr" in the channels argument. Old QEMU stops the VM, saves state to the migration channels, and enters the postmigrate state. New QEMU mmap's memory descriptors, and execution resumes. The implementation splits qmp_migrate into start and finish functions. Start sends CPR state to new QEMU, which responds by closing the CPR channel. Old QEMU detects the HUP then calls finish, which connects the main migration channel. In summary, the usage is: qemu-system-$arch -machine aux-ram-share=on ... start new QEMU with "-incoming <main-uri> -incoming <cpr-channel>" Issue commands to old QEMU: migrate_set_parameter mode cpr-transfer {"execute": "migrate", ... {"channel-type": "main"...}, {"channel-type": "cpr"...} ... } Signed-off-by: Steve Sistare <steven.sistare@oracle.com> Reviewed-by: Peter Xu <peterx@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Link: https://lore.kernel.org/r/1736967650-129648-17-git-send-email-steven.sistare@oracle.com Signed-off-by: Fabiano Rosas <farosas@suse.de>
224 lines
5.1 KiB
C
224 lines
5.1 KiB
C
/*
|
|
* Copyright (c) 2021-2024 Oracle and/or its affiliates.
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
* See the COPYING file in the top-level directory.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qapi/error.h"
|
|
#include "migration/cpr.h"
|
|
#include "migration/misc.h"
|
|
#include "migration/options.h"
|
|
#include "migration/qemu-file.h"
|
|
#include "migration/savevm.h"
|
|
#include "migration/vmstate.h"
|
|
#include "system/runstate.h"
|
|
#include "trace.h"
|
|
|
|
/*************************************************************************/
|
|
/* cpr state container for all information to be saved. */
|
|
|
|
typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
|
|
|
|
typedef struct CprState {
|
|
CprFdList fds;
|
|
} CprState;
|
|
|
|
static CprState cpr_state;
|
|
|
|
/****************************************************************************/
|
|
|
|
typedef struct CprFd {
|
|
char *name;
|
|
unsigned int namelen;
|
|
int id;
|
|
int fd;
|
|
QLIST_ENTRY(CprFd) next;
|
|
} CprFd;
|
|
|
|
static const VMStateDescription vmstate_cpr_fd = {
|
|
.name = "cpr fd",
|
|
.version_id = 1,
|
|
.minimum_version_id = 1,
|
|
.fields = (VMStateField[]) {
|
|
VMSTATE_UINT32(namelen, CprFd),
|
|
VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen),
|
|
VMSTATE_INT32(id, CprFd),
|
|
VMSTATE_FD(fd, CprFd),
|
|
VMSTATE_END_OF_LIST()
|
|
}
|
|
};
|
|
|
|
void cpr_save_fd(const char *name, int id, int fd)
|
|
{
|
|
CprFd *elem = g_new0(CprFd, 1);
|
|
|
|
trace_cpr_save_fd(name, id, fd);
|
|
elem->name = g_strdup(name);
|
|
elem->namelen = strlen(name) + 1;
|
|
elem->id = id;
|
|
elem->fd = fd;
|
|
QLIST_INSERT_HEAD(&cpr_state.fds, elem, next);
|
|
}
|
|
|
|
static CprFd *find_fd(CprFdList *head, const char *name, int id)
|
|
{
|
|
CprFd *elem;
|
|
|
|
QLIST_FOREACH(elem, head, next) {
|
|
if (!strcmp(elem->name, name) && elem->id == id) {
|
|
return elem;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
void cpr_delete_fd(const char *name, int id)
|
|
{
|
|
CprFd *elem = find_fd(&cpr_state.fds, name, id);
|
|
|
|
if (elem) {
|
|
QLIST_REMOVE(elem, next);
|
|
g_free(elem->name);
|
|
g_free(elem);
|
|
}
|
|
|
|
trace_cpr_delete_fd(name, id);
|
|
}
|
|
|
|
int cpr_find_fd(const char *name, int id)
|
|
{
|
|
CprFd *elem = find_fd(&cpr_state.fds, name, id);
|
|
int fd = elem ? elem->fd : -1;
|
|
|
|
trace_cpr_find_fd(name, id, fd);
|
|
return fd;
|
|
}
|
|
/*************************************************************************/
|
|
#define CPR_STATE "CprState"
|
|
|
|
static const VMStateDescription vmstate_cpr_state = {
|
|
.name = CPR_STATE,
|
|
.version_id = 1,
|
|
.minimum_version_id = 1,
|
|
.fields = (VMStateField[]) {
|
|
VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next),
|
|
VMSTATE_END_OF_LIST()
|
|
}
|
|
};
|
|
/*************************************************************************/
|
|
|
|
static QEMUFile *cpr_state_file;
|
|
|
|
QIOChannel *cpr_state_ioc(void)
|
|
{
|
|
return qemu_file_get_ioc(cpr_state_file);
|
|
}
|
|
|
|
static MigMode incoming_mode = MIG_MODE_NONE;
|
|
|
|
MigMode cpr_get_incoming_mode(void)
|
|
{
|
|
return incoming_mode;
|
|
}
|
|
|
|
void cpr_set_incoming_mode(MigMode mode)
|
|
{
|
|
incoming_mode = mode;
|
|
}
|
|
|
|
int cpr_state_save(MigrationChannel *channel, Error **errp)
|
|
{
|
|
int ret;
|
|
QEMUFile *f;
|
|
MigMode mode = migrate_mode();
|
|
|
|
trace_cpr_state_save(MigMode_str(mode));
|
|
|
|
if (mode == MIG_MODE_CPR_TRANSFER) {
|
|
f = cpr_transfer_output(channel, errp);
|
|
} else {
|
|
return 0;
|
|
}
|
|
if (!f) {
|
|
return -1;
|
|
}
|
|
|
|
qemu_put_be32(f, QEMU_CPR_FILE_MAGIC);
|
|
qemu_put_be32(f, QEMU_CPR_FILE_VERSION);
|
|
|
|
ret = vmstate_save_state(f, &vmstate_cpr_state, &cpr_state, 0);
|
|
if (ret) {
|
|
error_setg(errp, "vmstate_save_state error %d", ret);
|
|
qemu_fclose(f);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Close the socket only partially so we can later detect when the other
|
|
* end closes by getting a HUP event.
|
|
*/
|
|
qemu_fflush(f);
|
|
qio_channel_shutdown(qemu_file_get_ioc(f), QIO_CHANNEL_SHUTDOWN_WRITE,
|
|
NULL);
|
|
cpr_state_file = f;
|
|
return 0;
|
|
}
|
|
|
|
int cpr_state_load(MigrationChannel *channel, Error **errp)
|
|
{
|
|
int ret;
|
|
uint32_t v;
|
|
QEMUFile *f;
|
|
MigMode mode = 0;
|
|
|
|
if (channel) {
|
|
mode = MIG_MODE_CPR_TRANSFER;
|
|
cpr_set_incoming_mode(mode);
|
|
f = cpr_transfer_input(channel, errp);
|
|
} else {
|
|
return 0;
|
|
}
|
|
if (!f) {
|
|
return -1;
|
|
}
|
|
|
|
trace_cpr_state_load(MigMode_str(mode));
|
|
|
|
v = qemu_get_be32(f);
|
|
if (v != QEMU_CPR_FILE_MAGIC) {
|
|
error_setg(errp, "Not a migration stream (bad magic %x)", v);
|
|
qemu_fclose(f);
|
|
return -EINVAL;
|
|
}
|
|
v = qemu_get_be32(f);
|
|
if (v != QEMU_CPR_FILE_VERSION) {
|
|
error_setg(errp, "Unsupported migration stream version %d", v);
|
|
qemu_fclose(f);
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1);
|
|
if (ret) {
|
|
error_setg(errp, "vmstate_load_state error %d", ret);
|
|
qemu_fclose(f);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Let the caller decide when to close the socket (and generate a HUP event
|
|
* for the sending side).
|
|
*/
|
|
cpr_state_file = f;
|
|
|
|
return ret;
|
|
}
|
|
|
|
void cpr_state_close(void)
|
|
{
|
|
if (cpr_state_file) {
|
|
qemu_fclose(cpr_state_file);
|
|
cpr_state_file = NULL;
|
|
}
|
|
}
|