migration/multifd: Add a compat property for TLS termination

We're currently changing the way the source multifd migration handles
the shutdown of the multifd channels when TLS is in use to perform a
clean termination by calling gnutls_bye().

Older src QEMUs will always close the channel without terminating the
TLS session. New dst QEMUs treat an unclean termination as an error.

Add multifd_clean_tls_termination (default true) that can be switched
on the destination whenever a src QEMU <= 9.2 is in use.

(Note that the compat property is only strictly necessary for src
QEMUs older than 9.1. Due to synchronization coincidences, src QEMUs
9.1 and 9.2 can put the destination in a condition where it doesn't
see the unclean termination. Still, make the property more inclusive
to facilitate potential backports.)

Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Fabiano Rosas <farosas@suse.de>
This commit is contained in:
Fabiano Rosas 2025-02-07 10:50:49 -03:00
parent 48796f6b44
commit 9b3b192f65
5 changed files with 50 additions and 2 deletions

View file

@ -42,6 +42,7 @@ GlobalProperty hw_compat_9_2[] = {
{ "virtio-balloon-pci-transitional", "vectors", "0" }, { "virtio-balloon-pci-transitional", "vectors", "0" },
{ "virtio-balloon-pci-non-transitional", "vectors", "0" }, { "virtio-balloon-pci-non-transitional", "vectors", "0" },
{ "virtio-mem-pci", "vectors", "0" }, { "virtio-mem-pci", "vectors", "0" },
{ "migration", "multifd-clean-tls-termination", "false" },
}; };
const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2); const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2);

View file

@ -443,6 +443,39 @@ struct MigrationState {
* Default value is false. (since 8.1) * Default value is false. (since 8.1)
*/ */
bool multifd_flush_after_each_section; bool multifd_flush_after_each_section;
/*
* This variable only makes sense when set on the machine that is
* the destination of a multifd migration with TLS enabled. It
* affects the behavior of the last send->recv iteration with
* regards to termination of the TLS session.
*
* When set:
*
* - the destination QEMU instance can expect to never get a
* GNUTLS_E_PREMATURE_TERMINATION error. Manifested as the error
* message: "The TLS connection was non-properly terminated".
*
* When clear:
*
* - the destination QEMU instance can expect to see a
* GNUTLS_E_PREMATURE_TERMINATION error in any multifd channel
* whenever the last recv() call of that channel happens after
* the source QEMU instance has already issued shutdown() on the
* channel.
*
* Commit 637280aeb2 (since 9.1) introduced a side effect that
* causes the destination instance to not be affected by the
* premature termination, while commit 1d457daf86 (since 10.0)
* causes the premature termination condition to be once again
* reachable.
*
* NOTE: Regardless of the state of this option, a premature
* termination of the TLS connection might happen due to error at
* any moment prior to the last send->recv iteration.
*/
bool multifd_clean_tls_termination;
/* /*
* This decides the size of guest memory chunk that will be used * This decides the size of guest memory chunk that will be used
* to track dirty bitmap clearing. The size of memory chunk will * to track dirty bitmap clearing. The size of memory chunk will

View file

@ -1151,6 +1151,7 @@ void multifd_recv_sync_main(void)
static void *multifd_recv_thread(void *opaque) static void *multifd_recv_thread(void *opaque)
{ {
MigrationState *s = migrate_get_current();
MultiFDRecvParams *p = opaque; MultiFDRecvParams *p = opaque;
Error *local_err = NULL; Error *local_err = NULL;
bool use_packets = multifd_use_packets(); bool use_packets = multifd_use_packets();
@ -1159,18 +1160,27 @@ static void *multifd_recv_thread(void *opaque)
trace_multifd_recv_thread_start(p->id); trace_multifd_recv_thread_start(p->id);
rcu_register_thread(); rcu_register_thread();
if (!s->multifd_clean_tls_termination) {
p->read_flags = QIO_CHANNEL_READ_FLAG_RELAXED_EOF;
}
while (true) { while (true) {
uint32_t flags = 0; uint32_t flags = 0;
bool has_data = false; bool has_data = false;
p->normal_num = 0; p->normal_num = 0;
if (use_packets) { if (use_packets) {
struct iovec iov = {
.iov_base = (void *)p->packet,
.iov_len = p->packet_len
};
if (multifd_recv_should_exit()) { if (multifd_recv_should_exit()) {
break; break;
} }
ret = qio_channel_read_all_eof(p->c, (void *)p->packet, ret = qio_channel_readv_full_all_eof(p->c, &iov, 1, NULL, NULL,
p->packet_len, &local_err); p->read_flags, &local_err);
if (!ret) { if (!ret) {
/* EOF */ /* EOF */
assert(!local_err); assert(!local_err);

View file

@ -244,6 +244,8 @@ typedef struct {
uint32_t zero_num; uint32_t zero_num;
/* used for de-compression methods */ /* used for de-compression methods */
void *compress_data; void *compress_data;
/* Flags for the QIOChannel */
int read_flags;
} MultiFDRecvParams; } MultiFDRecvParams;
typedef struct { typedef struct {

View file

@ -99,6 +99,8 @@ const Property migration_properties[] = {
clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState,
preempt_pre_7_2, false), preempt_pre_7_2, false),
DEFINE_PROP_BOOL("multifd-clean-tls-termination", MigrationState,
multifd_clean_tls_termination, true),
/* Migration parameters */ /* Migration parameters */
DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,