migration: check magic value for deciding the mapping of channels

Current logic assumes that channel connections on the destination side are
always established in the same order as the source and the first one will
always be the main channel followed by the multifid or post-copy
preemption channel. This may not be always true, as even if a channel has a
connection established on the source side it can be in the pending state on
the destination side and a newer connection can be established first.
Basically causing out of order mapping of channels on the destination side.
Currently, all channels except post-copy preempt send a magic number, this
patch uses that magic number to decide the type of channel. This logic is
applicable only for precopy(multifd) live migration, as mentioned, the
post-copy preempt channel does not send any magic number. Also, tls live
migrations already does tls handshake before creating other channels, so
this issue is not possible with tls, hence this logic is avoided for tls
live migrations. This patch uses read peek to check the magic number of
channels so that current data/control stream management remains
un-effected.

Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Suggested-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
This commit is contained in:
manish.mishra 2022-12-20 18:44:18 +00:00 committed by Juan Quintela
parent 84615a19dd
commit 6720c2b327
7 changed files with 101 additions and 31 deletions

View file

@ -31,6 +31,7 @@
#include "migration.h"
#include "savevm.h"
#include "qemu-file.h"
#include "channel.h"
#include "migration/vmstate.h"
#include "block/block.h"
#include "qapi/error.h"
@ -664,10 +665,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp)
{
MigrationIncomingState *mis = migration_incoming_get_current();
if (multifd_load_setup(errp) != 0) {
return false;
}
if (!mis->from_src_file) {
mis->from_src_file = f;
}
@ -734,31 +731,56 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
{
MigrationIncomingState *mis = migration_incoming_get_current();
Error *local_err = NULL;
bool start_migration;
QEMUFile *f;
bool default_channel = true;
uint32_t channel_magic = 0;
int ret = 0;
if (!mis->from_src_file) {
/* The first connection (multifd may have multiple) */
if (migrate_use_multifd() && !migrate_postcopy_ram() &&
qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
/*
* With multiple channels, it is possible that we receive channels
* out of order on destination side, causing incorrect mapping of
* source channels on destination side. Check channel MAGIC to
* decide type of channel. Please note this is best effort, postcopy
* preempt channel does not send any magic number so avoid it for
* postcopy live migration. Also tls live migration already does
* tls handshake while initializing main channel so with tls this
* issue is not possible.
*/
ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
sizeof(channel_magic), &local_err);
if (ret != 0) {
error_propagate(errp, local_err);
return;
}
default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
} else {
default_channel = !mis->from_src_file;
}
if (multifd_load_setup(errp) != 0) {
error_setg(errp, "Failed to setup multifd channels");
return;
}
if (default_channel) {
f = qemu_file_new_input(ioc);
if (!migration_incoming_setup(f, errp)) {
return;
}
/*
* Common migration only needs one channel, so we can start
* right now. Some features need more than one channel, we wait.
*/
start_migration = !migration_needs_multiple_sockets();
} else {
/* Multiple connections */
assert(migration_needs_multiple_sockets());
if (migrate_use_multifd()) {
start_migration = multifd_recv_new_channel(ioc, &local_err);
multifd_recv_new_channel(ioc, &local_err);
} else {
assert(migrate_postcopy_preempt());
f = qemu_file_new_input(ioc);
start_migration = postcopy_preempt_new_channel(mis, f);
postcopy_preempt_new_channel(mis, f);
}
if (local_err) {
error_propagate(errp, local_err);
@ -766,7 +788,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
}
}
if (start_migration) {
if (migration_has_all_channels()) {
/* If it's a recovery, we're done */
if (postcopy_try_recover()) {
return;