util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive

With the default TCP stack configuration, it could be even 2 hours
before the connection times out due to the other side not being
reachable. However, in some cases, the application needs to be aware of
a connection issue much sooner.

This is the case, for example, for postcopy live migration. If there is
no traffic from the migration destination guest (server-side) to the
migration source guest (client-side), the destination keeps waiting for
pages indefinitely and does not switch to the postcopy-paused state.
This can happen, for example, if the destination QEMU instance is
started with the '-S' command line option and the machine is not started
yet, or if the machine is idle and produces no new page faults for
not-yet-migrated pages.

This patch introduces new inet socket parameters that control count,
idle period, and interval of TCP keep-alive packets before the
connection is considered broken. These parameters are available on
systems where the respective TCP socket options are defined, that
includes Linux, Windows, macOS, but not OpenBSD. Additionally, macOS
defines TCP_KEEPIDLE as TCP_KEEPALIVE instead, so the patch supplies its
own definition.

The default value for all is 0, which means the system configuration is
used.

Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
This commit is contained in:
Juraj Marcin 2025-05-21 15:52:35 +02:00 committed by Daniel P. Berrangé
parent 316e8ee8d6
commit 1bd4237cb1
4 changed files with 168 additions and 0 deletions

View file

@ -2760,6 +2760,36 @@ if linux_io_uring.found()
config_host_data.set('HAVE_IO_URING_PREP_WRITEV2',
cc.has_header_symbol('liburing.h', 'io_uring_prep_writev2'))
endif
config_host_data.set('HAVE_TCP_KEEPCNT',
cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPCNT') or
cc.compiles('''
#include <ws2tcpip.h>
#ifndef TCP_KEEPCNT
#error
#endif
int main(void) { return 0; }''',
name: 'Win32 TCP_KEEPCNT'))
# On Darwin TCP_KEEPIDLE is available under different name, TCP_KEEPALIVE.
# https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/bsd/man/man4/tcp.4#L172
config_host_data.set('HAVE_TCP_KEEPIDLE',
cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPIDLE') or
cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPALIVE') or
cc.compiles('''
#include <ws2tcpip.h>
#ifndef TCP_KEEPIDLE
#error
#endif
int main(void) { return 0; }''',
name: 'Win32 TCP_KEEPIDLE'))
config_host_data.set('HAVE_TCP_KEEPINTVL',
cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPINTVL') or
cc.compiles('''
#include <ws2tcpip.h>
#ifndef TCP_KEEPINTVL
#error
#endif
int main(void) { return 0; }''',
name: 'Win32 TCP_KEEPINTVL'))
# has_member
config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',

View file

@ -59,6 +59,22 @@
# @keep-alive: enable keep-alive when connecting to/listening on this socket.
# (Since 4.2, not supported for listening sockets until 10.1)
#
# @keep-alive-count: number of keep-alive packets sent before the connection is
# closed. Only supported for TCP sockets on systems where TCP_KEEPCNT
# socket option is defined (this includes Linux, Windows, macOS, FreeBSD,
# but not OpenBSD). When set to 0, system setting is used. (Since 10.1)
#
# @keep-alive-idle: time in seconds the connection needs to be idle before
# sending a keepalive packet. Only supported for TCP sockets on systems
# where TCP_KEEPIDLE socket option is defined (this includes Linux,
# Windows, macOS, FreeBSD, but not OpenBSD). When set to 0, system setting
# is used. (Since 10.1)
#
# @keep-alive-interval: time in seconds between keep-alive packets. Only
# supported for TCP sockets on systems where TCP_KEEPINTVL is defined (this
# includes Linux, Windows, macOS, FreeBSD, but not OpenBSD). When set to
# 0, system setting is used. (Since 10.1)
#
# @mptcp: enable multi-path TCP. (Since 6.1)
#
# Since: 1.3
@ -71,6 +87,9 @@
'*ipv4': 'bool',
'*ipv6': 'bool',
'*keep-alive': 'bool',
'*keep-alive-count': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPCNT' },
'*keep-alive-idle': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPIDLE' },
'*keep-alive-interval': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPINTVL' },
'*mptcp': { 'type': 'bool', 'if': 'HAVE_IPPROTO_MPTCP' } } }
##

View file

@ -359,6 +359,24 @@ static void inet_parse_test_helper(const char *str,
g_assert_cmpint(addr.ipv6, ==, exp_addr->ipv6);
g_assert_cmpint(addr.has_keep_alive, ==, exp_addr->has_keep_alive);
g_assert_cmpint(addr.keep_alive, ==, exp_addr->keep_alive);
#ifdef HAVE_TCP_KEEPCNT
g_assert_cmpint(addr.has_keep_alive_count, ==,
exp_addr->has_keep_alive_count);
g_assert_cmpint(addr.keep_alive_count, ==,
exp_addr->keep_alive_count);
#endif
#ifdef HAVE_TCP_KEEPIDLE
g_assert_cmpint(addr.has_keep_alive_idle, ==,
exp_addr->has_keep_alive_idle);
g_assert_cmpint(addr.keep_alive_idle, ==,
exp_addr->keep_alive_idle);
#endif
#ifdef HAVE_TCP_KEEPINTVL
g_assert_cmpint(addr.has_keep_alive_interval, ==,
exp_addr->has_keep_alive_interval);
g_assert_cmpint(addr.keep_alive_interval, ==,
exp_addr->keep_alive_interval);
#endif
#ifdef HAVE_IPPROTO_MPTCP
g_assert_cmpint(addr.has_mptcp, ==, exp_addr->has_mptcp);
g_assert_cmpint(addr.mptcp, ==, exp_addr->mptcp);
@ -460,6 +478,18 @@ static void test_inet_parse_all_options_good(void)
.ipv6 = true,
.has_keep_alive = true,
.keep_alive = true,
#ifdef HAVE_TCP_KEEPCNT
.has_keep_alive_count = true,
.keep_alive_count = 10,
#endif
#ifdef HAVE_TCP_KEEPIDLE
.has_keep_alive_idle = true,
.keep_alive_idle = 60,
#endif
#ifdef HAVE_TCP_KEEPINTVL
.has_keep_alive_interval = true,
.keep_alive_interval = 30,
#endif
#ifdef HAVE_IPPROTO_MPTCP
.has_mptcp = true,
.mptcp = false,
@ -467,6 +497,15 @@ static void test_inet_parse_all_options_good(void)
};
inet_parse_test_helper(
"[::1]:5000,numeric=on,to=5006,ipv4=off,ipv6=on,keep-alive=on"
#ifdef HAVE_TCP_KEEPCNT
",keep-alive-count=10"
#endif
#ifdef HAVE_TCP_KEEPIDLE
",keep-alive-idle=60"
#endif
#ifdef HAVE_TCP_KEEPINTVL
",keep-alive-interval=30"
#endif
#ifdef HAVE_IPPROTO_MPTCP
",mptcp=off"
#endif

View file

@ -45,6 +45,14 @@
# define AI_NUMERICSERV 0
#endif
/*
* On macOS TCP_KEEPIDLE is available under a different name, TCP_KEEPALIVE.
* https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/bsd/man/man4/tcp.4#L172
*/
#if defined(TCP_KEEPALIVE) && !defined(TCP_KEEPIDLE)
# define TCP_KEEPIDLE TCP_KEEPALIVE
#endif
static int inet_getport(struct addrinfo *e)
{
@ -218,6 +226,42 @@ static int inet_set_sockopts(int sock, InetSocketAddress *saddr, Error **errp)
"Unable to set keep-alive option on socket");
return -1;
}
#ifdef HAVE_TCP_KEEPCNT
if (saddr->has_keep_alive_count && saddr->keep_alive_count) {
int keep_count = saddr->keep_alive_count;
ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, &keep_count,
sizeof(keep_count));
if (ret < 0) {
error_setg_errno(errp, errno,
"Unable to set TCP keep-alive count option on socket");
return -1;
}
}
#endif
#ifdef HAVE_TCP_KEEPIDLE
if (saddr->has_keep_alive_idle && saddr->keep_alive_idle) {
int keep_idle = saddr->keep_alive_idle;
ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, &keep_idle,
sizeof(keep_idle));
if (ret < 0) {
error_setg_errno(errp, errno,
"Unable to set TCP keep-alive idle option on socket");
return -1;
}
}
#endif
#ifdef HAVE_TCP_KEEPINTVL
if (saddr->has_keep_alive_interval && saddr->keep_alive_interval) {
int keep_interval = saddr->keep_alive_interval;
ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, &keep_interval,
sizeof(keep_interval));
if (ret < 0) {
error_setg_errno(errp, errno,
"Unable to set TCP keep-alive interval option on socket");
return -1;
}
}
#endif
}
return 0;
}
@ -630,6 +674,24 @@ static QemuOptsList inet_opts = {
.name = "keep-alive",
.type = QEMU_OPT_BOOL,
},
#ifdef HAVE_TCP_KEEPCNT
{
.name = "keep-alive-count",
.type = QEMU_OPT_NUMBER,
},
#endif
#ifdef HAVE_TCP_KEEPIDLE
{
.name = "keep-alive-idle",
.type = QEMU_OPT_NUMBER,
},
#endif
#ifdef HAVE_TCP_KEEPINTVL
{
.name = "keep-alive-interval",
.type = QEMU_OPT_NUMBER,
},
#endif
#ifdef HAVE_IPPROTO_MPTCP
{
.name = "mptcp",
@ -695,6 +757,24 @@ int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
addr->has_keep_alive = true;
addr->keep_alive = qemu_opt_get_bool(opts, "keep-alive", false);
}
#ifdef HAVE_TCP_KEEPCNT
if (qemu_opt_find(opts, "keep-alive-count")) {
addr->has_keep_alive_count = true;
addr->keep_alive_count = qemu_opt_get_number(opts, "keep-alive-count", 0);
}
#endif
#ifdef HAVE_TCP_KEEPIDLE
if (qemu_opt_find(opts, "keep-alive-idle")) {
addr->has_keep_alive_idle = true;
addr->keep_alive_idle = qemu_opt_get_number(opts, "keep-alive-idle", 0);
}
#endif
#ifdef HAVE_TCP_KEEPINTVL
if (qemu_opt_find(opts, "keep-alive-interval")) {
addr->has_keep_alive_interval = true;
addr->keep_alive_interval = qemu_opt_get_number(opts, "keep-alive-interval", 0);
}
#endif
#ifdef HAVE_IPPROTO_MPTCP
if (qemu_opt_find(opts, "mptcp")) {
addr->has_mptcp = true;