From 1f37280b37dbf85f36748f359a9f8802c8fe7ccd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 23 Oct 2024 09:50:56 +0100 Subject: [PATCH 01/14] net: fix build when libbpf is disabled, but libxdp is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The net/af-xdp.c code is enabled when the libxdp library is present, however, it also has direct API calls to bpf_xdp_query_id & bpf_xdp_detach which are provided by the libbpf library. As a result if building with --disable-libbpf, but libxdp gets auto-detected, we'll fail to link QEMU /usr/bin/ld: libcommon.a.p/net_af-xdp.c.o: undefined reference to symbol 'bpf_xdp_query_id@@LIBBPF_0.7.0' There are two bugs here * Since we have direct libbpf API calls, when building net/af-xdp.c, we must tell meson that libbpf is a dependancy, so that we directly link to it, rather than relying on indirect linkage. * When must skip probing for libxdp at all, when libbpf is not found, raising an error if --enable-libxdp was given explicitly. Fixes: cb039ef3d9e3112da01e1ecd9b136ac9809ef733 Signed-off-by: Daniel P. Berrangé Signed-off-by: Jason Wang --- meson.build | 10 ++++++++-- net/meson.build | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/meson.build b/meson.build index c26c417de1..9432c5f173 100644 --- a/meson.build +++ b/meson.build @@ -2164,8 +2164,14 @@ endif # libxdp libxdp = not_found if not get_option('af_xdp').auto() or have_system - libxdp = dependency('libxdp', required: get_option('af_xdp'), - version: '>=1.4.0', method: 'pkg-config') + if libbpf.found() + libxdp = dependency('libxdp', required: get_option('af_xdp'), + version: '>=1.4.0', method: 'pkg-config') + else + if get_option('af_xdp').enabled() + error('libxdp requested, but libbpf is not available') + endif + endif endif # libdw diff --git a/net/meson.build b/net/meson.build index e0cd71470e..bb97b4dcbe 100644 --- a/net/meson.build +++ b/net/meson.build @@ -39,7 +39,7 @@ if have_netmap system_ss.add(files('netmap.c')) endif -system_ss.add(when: libxdp, if_true: files('af-xdp.c')) +system_ss.add(when: [libxdp, libbpf], if_true: files('af-xdp.c')) if have_vhost_net_user system_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c')) From 493a2403c247bdcfc812303f8dc0801778de4798 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 23 Oct 2024 09:50:57 +0100 Subject: [PATCH 02/14] hw/net: fix typo s/epbf/ebpf/ in virtio-net MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Daniel P. Berrangé Signed-off-by: Jason Wang --- hw/net/virtio-net.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index fb84d142ee..7c050246ea 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1254,7 +1254,7 @@ static void rss_data_to_rss_config(struct VirtioNetRssData *data, config->default_queue = data->default_queue; } -static bool virtio_net_attach_epbf_rss(VirtIONet *n) +static bool virtio_net_attach_ebpf_rss(VirtIONet *n) { struct EBPFRSSConfig config = {}; @@ -1276,7 +1276,7 @@ static bool virtio_net_attach_epbf_rss(VirtIONet *n) return true; } -static void virtio_net_detach_epbf_rss(VirtIONet *n) +static void virtio_net_detach_ebpf_rss(VirtIONet *n) { virtio_net_attach_ebpf_to_backend(n->nic, -1); } @@ -1286,8 +1286,8 @@ static void virtio_net_commit_rss_config(VirtIONet *n) if (n->rss_data.enabled) { n->rss_data.enabled_software_rss = n->rss_data.populate_hash; if (n->rss_data.populate_hash) { - virtio_net_detach_epbf_rss(n); - } else if (!virtio_net_attach_epbf_rss(n)) { + virtio_net_detach_ebpf_rss(n); + } else if (!virtio_net_attach_ebpf_rss(n)) { if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { warn_report("Can't load eBPF RSS for vhost"); } else { @@ -1300,7 +1300,7 @@ static void virtio_net_commit_rss_config(VirtIONet *n) n->rss_data.indirections_len, sizeof(n->rss_data.key)); } else { - virtio_net_detach_epbf_rss(n); + virtio_net_detach_ebpf_rss(n); trace_virtio_net_rss_disable(); } } From a9436dd4077b0ee04dbbd2354a6738654530a206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 23 Oct 2024 09:50:58 +0100 Subject: [PATCH 03/14] ebpf: drop redundant parameter checks in static methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Various static methods have checks on their parameters which were already checked immediately before the method was invoked. Drop these redundat checks to simplify the following commit which adds formal error reporting. Signed-off-by: Daniel P. Berrangé Signed-off-by: Jason Wang --- ebpf/ebpf_rss.c | 32 +++++--------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c index dcaa80f380..0c42355a93 100644 --- a/ebpf/ebpf_rss.c +++ b/ebpf/ebpf_rss.c @@ -49,10 +49,6 @@ bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx) static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx) { - if (!ebpf_rss_is_loaded(ctx)) { - return false; - } - ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(), PROT_READ | PROT_WRITE, MAP_SHARED, ctx->map_configuration, 0); @@ -90,10 +86,6 @@ toeplitz_fail: static void ebpf_rss_munmap(struct EBPFRSSContext *ctx) { - if (!ebpf_rss_is_loaded(ctx)) { - return; - } - munmap(ctx->mmap_indirections_table, qemu_real_host_page_size()); munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size()); munmap(ctx->mmap_configuration, qemu_real_host_page_size()); @@ -177,15 +169,10 @@ bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd, return true; } -static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx, +static void ebpf_rss_set_config(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config) { - if (!ebpf_rss_is_loaded(ctx)) { - return false; - } - memcpy(ctx->mmap_configuration, config, sizeof(*config)); - return true; } static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx, @@ -194,8 +181,7 @@ static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx, { char *cursor = ctx->mmap_indirections_table; - if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL || - len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { + if (len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { return false; } @@ -207,20 +193,16 @@ static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx, return true; } -static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx, +static void ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx, uint8_t *toeplitz_key) { /* prepare toeplitz key */ uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {}; - if (!ebpf_rss_is_loaded(ctx) || toeplitz_key == NULL) { - return false; - } memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE); *(uint32_t *)toe = ntohl(*(uint32_t *)toe); memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE); - return true; } bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config, @@ -231,18 +213,14 @@ bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config, return false; } - if (!ebpf_rss_set_config(ctx, config)) { - return false; - } + ebpf_rss_set_config(ctx, config); if (!ebpf_rss_set_indirections_table(ctx, indirections_table, config->indirections_len)) { return false; } - if (!ebpf_rss_set_toepliz_key(ctx, toeplitz_key)) { - return false; - } + ebpf_rss_set_toepliz_key(ctx, toeplitz_key); return true; } From 31efce1e311830431718536c6461815b04e08bf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 23 Oct 2024 09:50:59 +0100 Subject: [PATCH 04/14] ebpf: improve error trace events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A design pattern of trace_foo_error("descriptive string") is undesirable because it does not allow for filtering trace events based on the error scenario. Split eBPF error trace event into three separate events to address this filtering need. Signed-off-by: Daniel P. Berrangé Signed-off-by: Jason Wang --- ebpf/ebpf_rss.c | 10 +++++----- ebpf/trace-events | 4 +++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c index 0c42355a93..d39916b368 100644 --- a/ebpf/ebpf_rss.c +++ b/ebpf/ebpf_rss.c @@ -53,21 +53,21 @@ static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx) PROT_READ | PROT_WRITE, MAP_SHARED, ctx->map_configuration, 0); if (ctx->mmap_configuration == MAP_FAILED) { - trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array"); + trace_ebpf_rss_mmap_error(ctx, "configuration"); return false; } ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(), PROT_READ | PROT_WRITE, MAP_SHARED, ctx->map_toeplitz_key, 0); if (ctx->mmap_toeplitz_key == MAP_FAILED) { - trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key"); + trace_ebpf_rss_mmap_error(ctx, "toeplitz key"); goto toeplitz_fail; } ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(), PROT_READ | PROT_WRITE, MAP_SHARED, ctx->map_indirections_table, 0); if (ctx->mmap_indirections_table == MAP_FAILED) { - trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table"); + trace_ebpf_rss_mmap_error(ctx, "indirections table"); goto indirection_fail; } @@ -105,14 +105,14 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx) rss_bpf_ctx = rss_bpf__open(); if (rss_bpf_ctx == NULL) { - trace_ebpf_error("eBPF RSS", "can not open eBPF RSS object"); + trace_ebpf_rss_open_error(ctx); goto error; } bpf_program__set_type(rss_bpf_ctx->progs.tun_rss_steering_prog, BPF_PROG_TYPE_SOCKET_FILTER); if (rss_bpf__load(rss_bpf_ctx)) { - trace_ebpf_error("eBPF RSS", "can not load RSS program"); + trace_ebpf_rss_load_error(ctx); goto error; } diff --git a/ebpf/trace-events b/ebpf/trace-events index b3ad1a35f2..a0f157be37 100644 --- a/ebpf/trace-events +++ b/ebpf/trace-events @@ -1,4 +1,6 @@ # See docs/devel/tracing.rst for syntax documentation. # ebpf-rss.c -ebpf_error(const char *s1, const char *s2) "error in %s: %s" +ebpf_rss_load_error(void *ctx) "ctx=%p" +ebpf_rss_mmap_error(void *ctx, const char *object) "ctx=%p object=%s" +ebpf_rss_open_error(void *ctx) "ctx=%p" From 00b69f1d867ddcf8884c92f5647b424088e754e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 23 Oct 2024 09:51:00 +0100 Subject: [PATCH 05/14] ebpf: add formal error reporting to all APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The eBPF code is currently reporting error messages through trace events. Trace events are fine for debugging, but they are not to be considered the primary error reporting mechanism, as their output is inaccessible to callers. This adds an "Error **errp" parameter to all methods which have important error scenarios to report to the caller. Signed-off-by: Daniel P. Berrangé Signed-off-by: Jason Wang --- ebpf/ebpf_rss-stub.c | 8 +++--- ebpf/ebpf_rss.c | 59 +++++++++++++++++++++++++++++++++++--------- ebpf/ebpf_rss.h | 10 +++++--- hw/net/virtio-net.c | 7 +++--- 4 files changed, 64 insertions(+), 20 deletions(-) diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c index 8d7fae2ad9..d0e7f99fb9 100644 --- a/ebpf/ebpf_rss-stub.c +++ b/ebpf/ebpf_rss-stub.c @@ -23,19 +23,21 @@ bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx) return false; } -bool ebpf_rss_load(struct EBPFRSSContext *ctx) +bool ebpf_rss_load(struct EBPFRSSContext *ctx, Error **errp) { return false; } bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd, - int config_fd, int toeplitz_fd, int table_fd) + int config_fd, int toeplitz_fd, int table_fd, + Error **errp) { return false; } bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config, - uint16_t *indirections_table, uint8_t *toeplitz_key) + uint16_t *indirections_table, uint8_t *toeplitz_key, + Error **errp) { return false; } diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c index d39916b368..67cdab197d 100644 --- a/ebpf/ebpf_rss.c +++ b/ebpf/ebpf_rss.c @@ -47,13 +47,14 @@ bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx) return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1); } -static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx) +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx, Error **errp) { ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(), PROT_READ | PROT_WRITE, MAP_SHARED, ctx->map_configuration, 0); if (ctx->mmap_configuration == MAP_FAILED) { trace_ebpf_rss_mmap_error(ctx, "configuration"); + error_setg(errp, "Unable to map eBPF configuration array"); return false; } ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(), @@ -61,6 +62,7 @@ static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx) ctx->map_toeplitz_key, 0); if (ctx->mmap_toeplitz_key == MAP_FAILED) { trace_ebpf_rss_mmap_error(ctx, "toeplitz key"); + error_setg(errp, "Unable to map eBPF toeplitz array"); goto toeplitz_fail; } ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(), @@ -68,6 +70,7 @@ static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx) ctx->map_indirections_table, 0); if (ctx->mmap_indirections_table == MAP_FAILED) { trace_ebpf_rss_mmap_error(ctx, "indirections table"); + error_setg(errp, "Unable to map eBPF indirection array"); goto indirection_fail; } @@ -95,7 +98,7 @@ static void ebpf_rss_munmap(struct EBPFRSSContext *ctx) ctx->mmap_indirections_table = NULL; } -bool ebpf_rss_load(struct EBPFRSSContext *ctx) +bool ebpf_rss_load(struct EBPFRSSContext *ctx, Error **errp) { struct rss_bpf *rss_bpf_ctx; @@ -106,6 +109,7 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx) rss_bpf_ctx = rss_bpf__open(); if (rss_bpf_ctx == NULL) { trace_ebpf_rss_open_error(ctx); + error_setg(errp, "Unable to open eBPF RSS object"); goto error; } @@ -113,6 +117,7 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx) if (rss_bpf__load(rss_bpf_ctx)) { trace_ebpf_rss_load_error(ctx); + error_setg(errp, "Unable to load eBPF program"); goto error; } @@ -126,7 +131,7 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx) ctx->map_toeplitz_key = bpf_map__fd( rss_bpf_ctx->maps.tap_rss_map_toeplitz_key); - if (!ebpf_rss_mmap(ctx)) { + if (!ebpf_rss_mmap(ctx, errp)) { goto error; } @@ -143,13 +148,28 @@ error: } bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd, - int config_fd, int toeplitz_fd, int table_fd) + int config_fd, int toeplitz_fd, int table_fd, + Error **errp) { if (ebpf_rss_is_loaded(ctx)) { + error_setg(errp, "eBPF program is already loaded"); return false; } - if (program_fd < 0 || config_fd < 0 || toeplitz_fd < 0 || table_fd < 0) { + if (program_fd < 0) { + error_setg(errp, "eBPF program FD is not open"); + return false; + } + if (config_fd < 0) { + error_setg(errp, "eBPF config FD is not open"); + return false; + } + if (toeplitz_fd < 0) { + error_setg(errp, "eBPF toeplitz FD is not open"); + return false; + } + if (table_fd < 0) { + error_setg(errp, "eBPF indirection FD is not open"); return false; } @@ -158,7 +178,7 @@ bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd, ctx->map_toeplitz_key = toeplitz_fd; ctx->map_indirections_table = table_fd; - if (!ebpf_rss_mmap(ctx)) { + if (!ebpf_rss_mmap(ctx, errp)) { ctx->program_fd = -1; ctx->map_configuration = -1; ctx->map_toeplitz_key = -1; @@ -177,11 +197,14 @@ static void ebpf_rss_set_config(struct EBPFRSSContext *ctx, static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx, uint16_t *indirections_table, - size_t len) + size_t len, + Error **errp) { char *cursor = ctx->mmap_indirections_table; if (len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { + error_setg(errp, "Indirections table length %zu exceeds limit %d", + len, VIRTIO_NET_RSS_MAX_TABLE_LEN); return false; } @@ -206,17 +229,31 @@ static void ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx, } bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config, - uint16_t *indirections_table, uint8_t *toeplitz_key) + uint16_t *indirections_table, uint8_t *toeplitz_key, + Error **errp) { - if (!ebpf_rss_is_loaded(ctx) || config == NULL || - indirections_table == NULL || toeplitz_key == NULL) { + if (!ebpf_rss_is_loaded(ctx)) { + error_setg(errp, "eBPF program is not loaded"); + return false; + } + if (config == NULL) { + error_setg(errp, "eBPF config table is NULL"); + return false; + } + if (indirections_table == NULL) { + error_setg(errp, "eBPF indirections table is NULL"); + return false; + } + if (toeplitz_key == NULL) { + error_setg(errp, "eBPF toeplitz key is NULL"); return false; } ebpf_rss_set_config(ctx, config); if (!ebpf_rss_set_indirections_table(ctx, indirections_table, - config->indirections_len)) { + config->indirections_len, + errp)) { return false; } diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h index 239242b0d2..86a5787789 100644 --- a/ebpf/ebpf_rss.h +++ b/ebpf/ebpf_rss.h @@ -14,6 +14,8 @@ #ifndef QEMU_EBPF_RSS_H #define QEMU_EBPF_RSS_H +#include "qapi/error.h" + #define EBPF_RSS_MAX_FDS 4 struct EBPFRSSContext { @@ -41,13 +43,15 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx); bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx); -bool ebpf_rss_load(struct EBPFRSSContext *ctx); +bool ebpf_rss_load(struct EBPFRSSContext *ctx, Error **errp); bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd, - int config_fd, int toeplitz_fd, int table_fd); + int config_fd, int toeplitz_fd, int table_fd, + Error **errp); bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config, - uint16_t *indirections_table, uint8_t *toeplitz_key); + uint16_t *indirections_table, uint8_t *toeplitz_key, + Error **errp); void ebpf_rss_unload(struct EBPFRSSContext *ctx); diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 7c050246ea..289fba8152 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1265,7 +1265,8 @@ static bool virtio_net_attach_ebpf_rss(VirtIONet *n) rss_data_to_rss_config(&n->rss_data, &config); if (!ebpf_rss_set_all(&n->ebpf_rss, &config, - n->rss_data.indirections_table, n->rss_data.key)) { + n->rss_data.indirections_table, n->rss_data.key, + NULL)) { return false; } @@ -1336,7 +1337,7 @@ static bool virtio_net_load_ebpf_fds(VirtIONet *n) } } - ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3]); + ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], NULL); exit: if (!ret) { @@ -1354,7 +1355,7 @@ static bool virtio_net_load_ebpf(VirtIONet *n) if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) { if (!(n->ebpf_rss_fds && virtio_net_load_ebpf_fds(n))) { - ret = ebpf_rss_load(&n->ebpf_rss); + ret = ebpf_rss_load(&n->ebpf_rss, NULL); } } From b5900dff14e5a8334766de6b37629c8020c6bbb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 23 Oct 2024 09:51:01 +0100 Subject: [PATCH 06/14] hw/net: report errors from failing to use eBPF RSS FDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the user/mgmt app passed in a set of pre-opened FDs for eBPF RSS, then it is expecting QEMU to use them. Any failure to do so must be considered a fatal error and propagated back up the stack, otherwise deployment mistakes will not be detectable in a prompt manner. When not using pre-opened FDs, then eBPF RSS is tried on a "best effort" basis only and thus fallback to software RSS is valid. Signed-off-by: Daniel P. Berrangé Signed-off-by: Jason Wang --- hw/net/virtio-net.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 289fba8152..c08ae55424 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1316,28 +1316,27 @@ static void virtio_net_disable_rss(VirtIONet *n) virtio_net_commit_rss_config(n); } -static bool virtio_net_load_ebpf_fds(VirtIONet *n) +static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp) { int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1}; int ret = true; int i = 0; if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) { - warn_report("Expected %d file descriptors but got %d", - EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds); - return false; - } + error_setg(errp, "Expected %d file descriptors but got %d", + EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds); + return false; + } for (i = 0; i < n->nr_ebpf_rss_fds; i++) { - fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], - &error_warn); + fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp); if (fds[i] < 0) { ret = false; goto exit; } } - ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], NULL); + ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp); exit: if (!ret) { @@ -1349,13 +1348,15 @@ exit: return ret; } -static bool virtio_net_load_ebpf(VirtIONet *n) +static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp) { bool ret = false; if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) { - if (!(n->ebpf_rss_fds && virtio_net_load_ebpf_fds(n))) { - ret = ebpf_rss_load(&n->ebpf_rss, NULL); + if (n->ebpf_rss_fds) { + ret = virtio_net_load_ebpf_fds(n, errp); + } else { + ret = ebpf_rss_load(&n->ebpf_rss, errp); } } @@ -3761,7 +3762,23 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) net_rx_pkt_init(&n->rx_pkt); if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { - virtio_net_load_ebpf(n); + Error *err = NULL; + if (!virtio_net_load_ebpf(n, &err)) { + /* + * If user explicitly gave QEMU RSS FDs to use, then + * failing to use them must be considered a fatal + * error. If no RSS FDs were provided, QEMU is trying + * eBPF on a "best effort" basis only, so report a + * warning and allow fallback to software RSS. + */ + if (n->ebpf_rss_fds) { + error_propagate(errp, err); + } else { + warn_report("unable to load eBPF RSS: %s", + error_get_pretty(err)); + error_free(err); + } + } } } From f5cae19d109b99ed48ad147fc6c6a641329119bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 23 Oct 2024 09:51:02 +0100 Subject: [PATCH 07/14] ebpf: improve trace event coverage to all key operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing error trace event is renamed to have a name prefix matching its source file & to remove the redundant first arg that adds no useful information. Signed-off-by: Daniel P. Berrangé Signed-off-by: Jason Wang --- ebpf/ebpf_rss.c | 19 +++++++++++++++++++ ebpf/trace-events | 4 ++++ 2 files changed, 23 insertions(+) diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c index 67cdab197d..e793786c17 100644 --- a/ebpf/ebpf_rss.c +++ b/ebpf/ebpf_rss.c @@ -74,6 +74,10 @@ static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx, Error **errp) goto indirection_fail; } + trace_ebpf_rss_mmap(ctx, + ctx->mmap_configuration, + ctx->mmap_toeplitz_key, + ctx->mmap_indirections_table); return true; indirection_fail: @@ -131,6 +135,11 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx, Error **errp) ctx->map_toeplitz_key = bpf_map__fd( rss_bpf_ctx->maps.tap_rss_map_toeplitz_key); + trace_ebpf_rss_load(ctx, + ctx->program_fd, + ctx->map_configuration, + ctx->map_indirections_table, + ctx->map_toeplitz_key); if (!ebpf_rss_mmap(ctx, errp)) { goto error; } @@ -178,6 +187,12 @@ bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd, ctx->map_toeplitz_key = toeplitz_fd; ctx->map_indirections_table = table_fd; + trace_ebpf_rss_load(ctx, + ctx->program_fd, + ctx->map_configuration, + ctx->map_indirections_table, + ctx->map_toeplitz_key); + if (!ebpf_rss_mmap(ctx, errp)) { ctx->program_fd = -1; ctx->map_configuration = -1; @@ -259,6 +274,8 @@ bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config, ebpf_rss_set_toepliz_key(ctx, toeplitz_key); + trace_ebpf_rss_set_data(ctx, config, indirections_table, toeplitz_key); + return true; } @@ -268,6 +285,8 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx) return; } + trace_ebpf_rss_unload(ctx); + ebpf_rss_munmap(ctx); if (ctx->obj) { diff --git a/ebpf/trace-events b/ebpf/trace-events index a0f157be37..bf3d9b6451 100644 --- a/ebpf/trace-events +++ b/ebpf/trace-events @@ -1,6 +1,10 @@ # See docs/devel/tracing.rst for syntax documentation. # ebpf-rss.c +ebpf_rss_load(void *ctx, int progfd, int cfgfd, int toepfd, int indirfd) "ctx=%p program-fd=%d config-fd=%d toeplitz-fd=%d indirection-fd=%d" ebpf_rss_load_error(void *ctx) "ctx=%p" +ebpf_rss_mmap(void *ctx, void *cfgptr, void *toepptr, void *indirptr) "ctx=%p config-ptr=%p toeplitz-ptr=%p indirection-ptr=%p" ebpf_rss_mmap_error(void *ctx, const char *object) "ctx=%p object=%s" ebpf_rss_open_error(void *ctx) "ctx=%p" +ebpf_rss_set_data(void *ctx, void *cfgptr, void *toepptr, void *indirptr) "ctx=%p config-ptr=%p toeplitz-ptr=%p indirection-ptr=%p" +ebpf_rss_unload(void *ctx) "rss unload ctx=%p" From ae311fb31543ca4e9de38c8435ebbdf6eca664d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 23 Oct 2024 09:51:03 +0100 Subject: [PATCH 08/14] hw/net: improve tracing of eBPF RSS setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds more trace events to key eBPF RSS setup operations, and also distinguishes events from multiple NIC instances. Signed-off-by: Daniel P. Berrangé Signed-off-by: Jason Wang --- hw/net/trace-events | 8 +++++--- hw/net/virtio-net.c | 9 ++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/hw/net/trace-events b/hw/net/trace-events index 4c6687923e..91a3d0c054 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -394,9 +394,11 @@ virtio_net_announce_notify(void) "" virtio_net_announce_timer(int round) "%d" virtio_net_handle_announce(int round) "%d" virtio_net_post_load_device(void) -virtio_net_rss_disable(void) -virtio_net_rss_error(const char *msg, uint32_t value) "%s, value 0x%08x" -virtio_net_rss_enable(uint32_t p1, uint16_t p2, uint8_t p3) "hashes 0x%x, table of %d, key of %d" +virtio_net_rss_load(void *nic, size_t nfds, void *fds) "nic=%p nfds=%zu fds=%p" +virtio_net_rss_attach_ebpf(void *nic, int prog_fd) "nic=%p prog-fd=%d" +virtio_net_rss_disable(void *nic) "nic=%p" +virtio_net_rss_error(void *nic, const char *msg, uint32_t value) "nic=%p msg=%s, value 0x%08x" +virtio_net_rss_enable(void *nic, uint32_t p1, uint16_t p2, uint8_t p3) "nic=%p hashes 0x%x, table of %d, key of %d" # tulip.c tulip_reg_write(uint64_t addr, const char *name, int size, uint64_t val) "addr 0x%02"PRIx64" (%s) size %d value 0x%08"PRIx64 diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index c08ae55424..60e19bffcb 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1241,6 +1241,7 @@ static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) return false; } + trace_virtio_net_rss_attach_ebpf(nic, prog_fd); return nc->info->set_steering_ebpf(nc, prog_fd); } @@ -1297,12 +1298,13 @@ static void virtio_net_commit_rss_config(VirtIONet *n) } } - trace_virtio_net_rss_enable(n->rss_data.hash_types, + trace_virtio_net_rss_enable(n, + n->rss_data.hash_types, n->rss_data.indirections_len, sizeof(n->rss_data.key)); } else { virtio_net_detach_ebpf_rss(n); - trace_virtio_net_rss_disable(); + trace_virtio_net_rss_disable(n); } } @@ -1353,6 +1355,7 @@ static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp) bool ret = false; if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) { + trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds); if (n->ebpf_rss_fds) { ret = virtio_net_load_ebpf_fds(n, errp); } else { @@ -1484,7 +1487,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, virtio_net_commit_rss_config(n); return queue_pairs; error: - trace_virtio_net_rss_error(err_msg, err_value); + trace_virtio_net_rss_error(n, err_msg, err_value); virtio_net_disable_rss(n); return 0; } From c40e962d83f9968beb50fa7f3b6718461a7aedbf Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Fri, 25 Oct 2024 10:35:24 +0300 Subject: [PATCH 09/14] net/stream: deprecate 'reconnect' in favor of 'reconnect-ms' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do the same thing we already did for chardev in c8e2b6b4d7e, and introduce a new 'reconnect-ms' option to make it possible to specify sub-second timeouts. This also changes the related documentaion and tests to use reconnect-ms as well. Signed-off-by: Daniil Tatianin Reviewed-by: Marc-André Lureau Signed-off-by: Jason Wang --- docs/about/deprecated.rst | 10 ++++++++++ net/stream.c | 34 ++++++++++++++++++++++------------ qapi/net.json | 13 ++++++++++++- qemu-options.hx | 24 ++++++++++++------------ tests/qtest/netdev-socket.c | 2 +- 5 files changed, 57 insertions(+), 26 deletions(-) diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index ce38a3d0cf..1e1e9f5f18 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -400,6 +400,16 @@ Backend ``memory`` (since 9.0) The ``reconnect`` option only allows specifiying second granularity timeouts, which is not enough for all types of use cases, use ``reconnect-ms`` instead. + +Net device options +'''''''''''''''''' + +Stream ``reconnect`` (since 9.2) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``reconnect`` option only allows specifiying second granularity timeouts, +which is not enough for all types of use cases, use ``reconnect-ms`` instead. + CPU device properties ''''''''''''''''''''' diff --git a/net/stream.c b/net/stream.c index 97e6ec6679..4de5613844 100644 --- a/net/stream.c +++ b/net/stream.c @@ -51,7 +51,7 @@ typedef struct NetStreamState { guint ioc_write_tag; SocketReadState rs; unsigned int send_index; /* number of bytes sent*/ - uint32_t reconnect; + uint32_t reconnect_ms; guint timer_tag; SocketAddress *addr; } NetStreamState; @@ -387,10 +387,9 @@ static gboolean net_stream_reconnect(gpointer data) static void net_stream_arm_reconnect(NetStreamState *s) { - if (s->reconnect && s->timer_tag == 0) { + if (s->reconnect_ms && s->timer_tag == 0) { qemu_set_info_str(&s->nc, "connecting"); - s->timer_tag = g_timeout_add_seconds(s->reconnect, - net_stream_reconnect, s); + s->timer_tag = g_timeout_add(s->reconnect_ms, net_stream_reconnect, s); } } @@ -398,7 +397,7 @@ static int net_stream_client_init(NetClientState *peer, const char *model, const char *name, SocketAddress *addr, - uint32_t reconnect, + uint32_t reconnect_ms, Error **errp) { NetStreamState *s; @@ -412,8 +411,8 @@ static int net_stream_client_init(NetClientState *peer, s->ioc = QIO_CHANNEL(sioc); s->nc.link_down = true; - s->reconnect = reconnect; - if (reconnect) { + s->reconnect_ms = reconnect_ms; + if (reconnect_ms) { s->addr = QAPI_CLONE(SocketAddress, addr); } qio_channel_socket_connect_async(sioc, addr, @@ -432,13 +431,24 @@ int net_init_stream(const Netdev *netdev, const char *name, sock = &netdev->u.stream; if (!sock->has_server || !sock->server) { + uint32_t reconnect_ms = 0; + + if (sock->has_reconnect && sock->has_reconnect_ms) { + error_setg(errp, "'reconnect' and 'reconnect-ms' are mutually " + "exclusive"); + return -1; + } else if (sock->has_reconnect_ms) { + reconnect_ms = sock->reconnect_ms; + } else if (sock->has_reconnect) { + reconnect_ms = sock->reconnect * 1000u; + } + return net_stream_client_init(peer, "stream", name, sock->addr, - sock->has_reconnect ? sock->reconnect : 0, - errp); + reconnect_ms, errp); } - if (sock->has_reconnect) { - error_setg(errp, "'reconnect' option is incompatible with " - "socket in server mode"); + if (sock->has_reconnect || sock->has_reconnect_ms) { + error_setg(errp, "'reconnect' and 'reconnect-ms' options are " + "incompatible with socket in server mode"); return -1; } return net_stream_server_init(peer, "stream", name, sock->addr, errp); diff --git a/qapi/net.json b/qapi/net.json index 87fc0d0b28..2739a2f423 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -650,15 +650,26 @@ # attempt a reconnect after the given number of seconds. Setting # this to zero disables this function. (default: 0) (since 8.0) # +# @reconnect-ms: For a client socket, if a socket is disconnected, then +# attempt a reconnect after the given number of milliseconds. Setting +# this to zero disables this function. This member is mutually +# exclusive with @reconnect. (default: 0) (Since: 9.2) +# # Only SocketAddress types 'unix', 'inet' and 'fd' are supported. # +# Features: +# +# @deprecated: Member @reconnect is deprecated. Use @reconnect-ms +# instead. +# # Since: 7.2 ## { 'struct': 'NetdevStreamOptions', 'data': { 'addr': 'SocketAddress', '*server': 'bool', - '*reconnect': 'uint32' } } + '*reconnect': { 'type': 'int', 'features': [ 'deprecated' ] }, + '*reconnect-ms': 'int' } } ## # @NetdevDgramOptions: diff --git a/qemu-options.hx b/qemu-options.hx index daae494147..bb228f6200 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2833,9 +2833,9 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n" " configure a network backend to connect to another network\n" " using an UDP tunnel\n" - "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n" - "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n" - "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n" + "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect-ms=milliseconds]\n" + "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect-ms=milliseconds]\n" + "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect-ms=milliseconds]\n" " configure a network backend to connect to another network\n" " using a socket connection in stream mode.\n" "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n" @@ -3291,7 +3291,7 @@ SRST -device e1000,netdev=n1,mac=52:54:00:12:34:56 \\ -netdev socket,id=n1,mcast=239.192.168.1:1102,localaddr=1.2.3.4 -``-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]`` +``-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect-ms=milliseconds]`` Configure a network backend to connect to another QEMU virtual machine or a proxy using a TCP/IP socket. ``server=on|off`` @@ -3333,9 +3333,9 @@ SRST # second VM |qemu_system| linux.img \\ -device virtio-net,netdev=net0,mac=52:54:00:12:34:57 \\ - -netdev stream,id=net0,server=off,addr.type=inet,addr.host=localhost,addr.port=1234,reconnect=5 + -netdev stream,id=net0,server=off,addr.type=inet,addr.host=localhost,addr.port=1234,reconnect-ms=5000 -``-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]`` +``-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect-ms=milliseconds]`` Configure a network backend to connect to another QEMU virtual machine or a proxy using a stream oriented unix domain socket. ``server=on|off`` @@ -3350,8 +3350,8 @@ SRST ``tight=on|off`` if false, pad an abstract socket address with enough null bytes to make it fill struct sockaddr_un member sun_path. - ``reconnect=seconds`` - for a client socket, if a socket is disconnected, then attempt a reconnect after the given number of seconds. + ``reconnect-ms=milliseconds`` + for a client socket, if a socket is disconnected, then attempt a reconnect after the given number of milliseconds. Setting this to zero disables this function. (default: 0) Example (using passt as a replacement of -netdev user): @@ -3377,9 +3377,9 @@ SRST # second VM |qemu_system| linux.img \\ -device virtio-net,netdev=net0,mac=52:54:00:12:34:57 \\ - -netdev stream,id=net0,server=off,addr.type=unix,addr.path=/tmp/qemu0,reconnect=5 + -netdev stream,id=net0,server=off,addr.type=unix,addr.path=/tmp/qemu0,reconnect-ms=5000 -``-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]`` +``-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect-ms=milliseconds]`` Configure a network backend to connect to another QEMU virtual machine or a proxy using a stream oriented socket file descriptor. ``server=on|off`` @@ -4390,14 +4390,14 @@ SRST ``telnet options:`` localhost 5555 - ``tcp:[host]:port[,server=on|off][,wait=on|off][,nodelay=on|off][,reconnect=seconds]`` + ``tcp:[host]:port[,server=on|off][,wait=on|off][,nodelay=on|off][,reconnect-ms=milliseconds]`` The TCP Net Console has two modes of operation. It can send the serial I/O to a location or wait for a connection from a location. By default the TCP Net Console is sent to host at the port. If you use the ``server=on`` option QEMU will wait for a client socket application to connect to the port before continuing, unless the ``wait=on|off`` option was specified. The ``nodelay=on|off`` - option disables the Nagle buffering algorithm. The ``reconnect=on`` + option disables the Nagle buffering algorithm. The ``reconnect-ms`` option only applies if ``server=no`` is set, if the connection goes down it will attempt to reconnect at the given interval. If host is omitted, 0.0.0.0 is assumed. Only one TCP connection at a diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c index fc7d11961e..317af03817 100644 --- a/tests/qtest/netdev-socket.c +++ b/tests/qtest/netdev-socket.c @@ -204,7 +204,7 @@ static void test_stream_unix_reconnect(void) qts1 = qtest_initf("-nodefaults -M none " "-netdev stream,server=false,id=st0,addr.type=unix," - "addr.path=%s,reconnect=1", path); + "addr.path=%s,reconnect-ms=1000", path); wait_stream_connected(qts0, "st0", &addr); g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_UNIX); From 96e610b23d4dcfa07b0f85eef02c70e28b32f4e6 Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Fri, 25 Oct 2024 10:35:25 +0300 Subject: [PATCH 10/14] chardev: finalize 'reconnect' deprecation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change all related docs and tests to use the new 'reconnect-ms' option instead of the now deprecated 'reconnect'. Signed-off-by: Daniil Tatianin Reviewed-by: Marc-André Lureau Signed-off-by: Jason Wang --- docs/COLO-FT.txt | 4 ++-- docs/system/ppc/powernv.rst | 2 +- qemu-options.hx | 22 +++++++++++----------- tests/qtest/ipmi-bt-test.c | 2 +- tests/qtest/vhost-user-test.c | 2 +- tests/unit/test-char.c | 8 ++++---- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt index 2e760a4aee..2283a09c08 100644 --- a/docs/COLO-FT.txt +++ b/docs/COLO-FT.txt @@ -193,8 +193,8 @@ any IP's here, except for the $primary_ip variable. -device piix3-usb-uhci -device usb-tablet -name secondary \ -netdev tap,id=hn0,vhost=off,helper=/usr/lib/qemu/qemu-bridge-helper \ -device rtl8139,id=e0,netdev=hn0 \ - -chardev socket,id=red0,host=$primary_ip,port=9003,reconnect=1 \ - -chardev socket,id=red1,host=$primary_ip,port=9004,reconnect=1 \ + -chardev socket,id=red0,host=$primary_ip,port=9003,reconnect-ms=1000 \ + -chardev socket,id=red1,host=$primary_ip,port=9004,reconnect-ms=1000 \ -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 \ -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 \ -object filter-rewriter,id=rew0,netdev=hn0,queue=all \ diff --git a/docs/system/ppc/powernv.rst b/docs/system/ppc/powernv.rst index 09f3965858..de7a807ac7 100644 --- a/docs/system/ppc/powernv.rst +++ b/docs/system/ppc/powernv.rst @@ -181,7 +181,7 @@ connected to a remote QEMU machine acting as BMC, using these options .. code-block:: bash - -chardev socket,id=ipmi0,host=localhost,port=9002,reconnect=10 \ + -chardev socket,id=ipmi0,host=localhost,port=9002,reconnect-ms=10000 \ -device ipmi-bmc-extern,id=bmc0,chardev=ipmi0 \ -device isa-ipmi-bt,bmc=bmc0,irq=10 \ -nodefaults diff --git a/qemu-options.hx b/qemu-options.hx index bb228f6200..dacc9790a4 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1102,7 +1102,7 @@ SRST external entity that provides the IPMI services. A connection is made to an external BMC simulator. If you do this, - it is strongly recommended that you use the "reconnect=" chardev + it is strongly recommended that you use the "reconnect-ms=" chardev option to reconnect to the simulator if the connection is lost. Note that if this is not used carefully, it can be a security issue, as the interface has the ability to send resets, NMIs, and power off @@ -3318,8 +3318,8 @@ SRST ``ipv6=on|off`` whether to accept IPv6 addresses, default to try both IPv4 and IPv6 - ``reconnect=seconds`` - for a client socket, if a socket is disconnected, then attempt a reconnect after the given number of seconds. + ``reconnect-ms=milliseconds`` + for a client socket, if a socket is disconnected, then attempt a reconnect after the given number of milliseconds. Setting this to zero disables this function. (default: 0) Example (two guests connected using a TCP/IP socket): @@ -3388,8 +3388,8 @@ SRST ``addr.str=file-descriptor`` file descriptor number to use as a socket - ``reconnect=seconds`` - for a client socket, if a socket is disconnected, then attempt a reconnect after the given number of seconds. + ``reconnect-ms=milliseconds`` + for a client socket, if a socket is disconnected, then attempt a reconnect after the given number of milliseconds. Setting this to zero disables this function. (default: 0) ``-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]`` @@ -3679,9 +3679,9 @@ DEF("chardev", HAS_ARG, QEMU_OPTION_chardev, "-chardev help\n" "-chardev null,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" "-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4=on|off][,ipv6=on|off][,nodelay=on|off]\n" - " [,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect=seconds][,mux=on|off]\n" + " [,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect-ms=milliseconds][,mux=on|off]\n" " [,logfile=PATH][,logappend=on|off][,tls-creds=ID][,tls-authz=ID] (tcp)\n" - "-chardev socket,id=id,path=path[,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect=seconds]\n" + "-chardev socket,id=id,path=path[,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect-ms=milliseconds]\n" " [,mux=on|off][,logfile=PATH][,logappend=on|off][,abstract=on|off][,tight=on|off] (unix)\n" "-chardev udp,id=id[,host=host],port=port[,localaddr=localaddr]\n" " [,localport=localport][,ipv4=on|off][,ipv6=on|off][,mux=on|off]\n" @@ -3792,7 +3792,7 @@ The available backends are: A void device. This device will not emit any data, and will drop any data it receives. The null backend does not take any options. -``-chardev socket,id=id[,TCP options or unix options][,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect=seconds][,tls-creds=id][,tls-authz=id]`` +``-chardev socket,id=id[,TCP options or unix options][,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect-ms=milliseconds][,tls-creds=id][,tls-authz=id]`` Create a two-way stream socket, which can be either a TCP or a unix socket. A unix socket will be created if ``path`` is specified. Behaviour is undefined if TCP options are specified for a unix @@ -3809,9 +3809,9 @@ The available backends are: ``websocket=on|off`` specifies that the socket uses WebSocket protocol for communication. - ``reconnect`` sets the timeout for reconnecting on non-server + ``reconnect-ms`` sets the timeout for reconnecting on non-server sockets when the remote end goes away. qemu will delay this many - seconds and then attempt to reconnect. Zero disables reconnecting, + milliseconds and then attempt to reconnect. Zero disables reconnecting, and is the default. ``tls-creds`` requests enablement of the TLS protocol for @@ -4427,7 +4427,7 @@ SRST The WebSocket protocol is used instead of raw tcp socket. The port acts as a WebSocket server. Client mode is not supported. - ``unix:path[,server=on|off][,wait=on|off][,reconnect=seconds]`` + ``unix:path[,server=on|off][,wait=on|off][,reconnect-ms=milliseconds]`` A unix domain socket is used instead of a tcp socket. The option works the same as if you had specified ``-serial tcp`` except the unix domain socket path is used for connections. diff --git a/tests/qtest/ipmi-bt-test.c b/tests/qtest/ipmi-bt-test.c index 13f7c841f5..637732fd5a 100644 --- a/tests/qtest/ipmi-bt-test.c +++ b/tests/qtest/ipmi-bt-test.c @@ -411,7 +411,7 @@ int main(int argc, char **argv) g_test_init(&argc, &argv, NULL); global_qtest = qtest_initf( - " -chardev socket,id=ipmi0,host=127.0.0.1,port=%d,reconnect=10" + " -chardev socket,id=ipmi0,host=127.0.0.1,port=%d,reconnect-ms=10000" " -device ipmi-bmc-extern,chardev=ipmi0,id=bmc0" " -device isa-ipmi-bt,bmc=bmc0", emu_port); qtest_irq_intercept_in(global_qtest, "ioapic"); diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c index d6075001e7..8948fb81ef 100644 --- a/tests/qtest/vhost-user-test.c +++ b/tests/qtest/vhost-user-test.c @@ -920,7 +920,7 @@ static void wait_for_rings_started(TestServer *s, size_t count) static inline void test_server_connect(TestServer *server) { - test_server_create_chr(server, ",reconnect=1"); + test_server_create_chr(server, ",reconnect-ms=1000"); } static gboolean diff --git a/tests/unit/test-char.c b/tests/unit/test-char.c index a1c6bb874c..a6e8753e1c 100644 --- a/tests/unit/test-char.c +++ b/tests/unit/test-char.c @@ -1545,18 +1545,18 @@ int main(int argc, char **argv) static CharSocketClientTestConfig client2 ## name = \ { addr, NULL, true, false, char_socket_event }; \ static CharSocketClientTestConfig client3 ## name = \ - { addr, ",reconnect=1", false, false, char_socket_event }; \ + { addr, ",reconnect-ms=1000", false, false, char_socket_event }; \ static CharSocketClientTestConfig client4 ## name = \ - { addr, ",reconnect=1", true, false, char_socket_event }; \ + { addr, ",reconnect-ms=1000", true, false, char_socket_event }; \ static CharSocketClientTestConfig client5 ## name = \ { addr, NULL, false, true, char_socket_event }; \ static CharSocketClientTestConfig client6 ## name = \ { addr, NULL, true, true, char_socket_event }; \ static CharSocketClientTestConfig client7 ## name = \ - { addr, ",reconnect=1", true, false, \ + { addr, ",reconnect-ms=1000", true, false, \ char_socket_event_with_error }; \ static CharSocketClientTestConfig client8 ## name = \ - { addr, ",reconnect=1", false, false, char_socket_event }; \ + { addr, ",reconnect-ms=1000", false, false, char_socket_event };\ g_test_add_data_func("/char/socket/client/mainloop/" # name, \ &client1 ##name, char_socket_client_test); \ g_test_add_data_func("/char/socket/client/wait-conn/" # name, \ From 75fe36b4e8a994cdf9fd6eb601f49e96b1bc791d Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Tue, 8 Oct 2024 22:28:42 +0200 Subject: [PATCH 11/14] net/tap-win32: Fix gcc 14 format truncation errors The patch fixes the following errors generated by GCC 14.2: ../src/net/tap-win32.c:343:19: error: '%s' directive output may be truncated writing up to 255 bytes into a region of size 176 [-Werror=format-truncation=] 343 | "%s\\%s\\Connection", | ^~ 344 | NETWORK_CONNECTIONS_KEY, enum_name); | ~~~~~~~~~ ../src/net/tap-win32.c:341:9: note: 'snprintf' output between 92 and 347 bytes into a destination of size 256 341 | snprintf(connection_string, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ 342 | sizeof(connection_string), | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 343 | "%s\\%s\\Connection", | ~~~~~~~~~~~~~~~~~~~~~ 344 | NETWORK_CONNECTIONS_KEY, enum_name); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../src/net/tap-win32.c:242:58: error: '%s' directive output may be truncated writing up to 255 bytes into a region of size 178 [-Werror=format-truncation=] 242 | snprintf (unit_string, sizeof(unit_string), "%s\\%s", | ^~ 243 | ADAPTER_KEY, enum_name); | ~~~~~~~~~ ../src/net/tap-win32.c:242:9: note: 'snprintf' output between 79 and 334 bytes into a destination of size 256 242 | snprintf (unit_string, sizeof(unit_string), "%s\\%s", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 243 | ADAPTER_KEY, enum_name); | ~~~~~~~~~~~~~~~~~~~~~~~ ../src/net/tap-win32.c:620:52: error: '%s' directive output may be truncated writing up to 255 bytes into a region of size 245 [-Werror=format-truncation=] 620 | snprintf (device_path, sizeof(device_path), "%s%s%s", | ^~ 621 | USERMODEDEVICEDIR, 622 | device_guid, | ~~~~~~~~~~~ ../src/net/tap-win32.c:620:5: note: 'snprintf' output between 16 and 271 bytes into a destination of size 256 620 | snprintf (device_path, sizeof(device_path), "%s%s%s", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 621 | USERMODEDEVICEDIR, | ~~~~~~~~~~~~~~~~~~ 622 | device_guid, | ~~~~~~~~~~~~ 623 | TAPSUFFIX); | ~~~~~~~~~~ Signed-off-by: Bernhard Beschow Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2607 Cc: qemu-stable@nongnu.org Reviewed-by: Michael Tokarev Reviewed-by: Pierrick Bouvier Signed-off-by: Jason Wang --- net/tap-win32.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/tap-win32.c b/net/tap-win32.c index 7edbd71633..671dee970f 100644 --- a/net/tap-win32.c +++ b/net/tap-win32.c @@ -214,7 +214,7 @@ static int is_tap_win32_dev(const char *guid) for (;;) { char enum_name[256]; - char unit_string[256]; + g_autofree char *unit_string = NULL; HKEY unit_key; char component_id_string[] = "ComponentId"; char component_id[256]; @@ -239,8 +239,7 @@ static int is_tap_win32_dev(const char *guid) return FALSE; } - snprintf (unit_string, sizeof(unit_string), "%s\\%s", - ADAPTER_KEY, enum_name); + unit_string = g_strdup_printf("%s\\%s", ADAPTER_KEY, enum_name); status = RegOpenKeyEx( HKEY_LOCAL_MACHINE, @@ -315,7 +314,7 @@ static int get_device_guid( while (!stop) { char enum_name[256]; - char connection_string[256]; + g_autofree char *connection_string = NULL; HKEY connection_key; char name_data[256]; DWORD name_type; @@ -338,9 +337,7 @@ static int get_device_guid( return -1; } - snprintf(connection_string, - sizeof(connection_string), - "%s\\%s\\Connection", + connection_string = g_strdup_printf("%s\\%s\\Connection", NETWORK_CONNECTIONS_KEY, enum_name); status = RegOpenKeyEx( @@ -595,7 +592,7 @@ static void tap_win32_free_buffer(tap_win32_overlapped_t *overlapped, static int tap_win32_open(tap_win32_overlapped_t **phandle, const char *preferred_name) { - char device_path[256]; + g_autofree char *device_path = NULL; char device_guid[0x100]; int rc; HANDLE handle; @@ -617,7 +614,7 @@ static int tap_win32_open(tap_win32_overlapped_t **phandle, if (rc) return -1; - snprintf (device_path, sizeof(device_path), "%s%s%s", + device_path = g_strdup_printf("%s%s%s", USERMODEDEVICEDIR, device_guid, TAPSUFFIX); From 76240dd2a37c7b361740616a7d6080beafdb8a71 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Sat, 17 Aug 2024 16:00:43 +0900 Subject: [PATCH 12/14] net: Check if nc is NULL in qemu_get_vnet_hdr_len() A netdev may not have a peer specified, resulting in NULL. We should make it behave like /dev/null in such a case instead of letting it cause segmentatin fault. Fixes: 4b52d63249a5 ("tap: Remove qemu_using_vnet_hdr()") Cc: qemu-stable@nongnu.org Reported-by: Jonathan Cameron Signed-off-by: Akihiko Odaki Tested-by; Jonathan Cameron Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- net/net.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/net.c b/net/net.c index d9b23a8f8c..7ef6885876 100644 --- a/net/net.c +++ b/net/net.c @@ -542,6 +542,10 @@ void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6, int qemu_get_vnet_hdr_len(NetClientState *nc) { + if (!nc) { + return 0; + } + return nc->vnet_hdr_len; } From e29bc931e1699a98959680f6776b48673825762b Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 9 Sep 2024 22:42:54 +0200 Subject: [PATCH 13/14] Fix calculation of minimum in colo_compare_tcp GitHub's CodeQL reports a critical error which is fixed by using the MIN macro: Unsigned difference expression compared to zero Signed-off-by: Stefan Weil Cc: qemu-stable@nongnu.org Reviewed-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index c4ad0ab71f..39f90c4065 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -412,8 +412,7 @@ static void colo_compare_tcp(CompareState *s, Connection *conn) * can ensure that the packet's payload is acknowledged by * primary and secondary. */ - uint32_t min_ack = conn->pack - conn->sack > 0 ? - conn->sack : conn->pack; + uint32_t min_ack = MIN(conn->pack, conn->sack); pri: if (g_queue_is_empty(&conn->primary_list)) { From cd76e8fcbe1a340776ae61b4e182be3a45b26219 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 8 Oct 2024 15:51:03 +0900 Subject: [PATCH 14/14] virtio-net: Avoid indirection_table_mask overflow We computes indirections_len by adding 1 to indirection_table_mask, but it may overflow indirection_table_mask is UINT16_MAX. Check if indirection_table_mask is small enough before adding 1. Fixes: 590790297c0d ("virtio-net: implement RSS configuration command") Signed-off-by: Akihiko Odaki Signed-off-by: Jason Wang --- hw/net/virtio-net.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 60e19bffcb..f2104ed364 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1406,17 +1406,17 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); n->rss_data.indirections_len = virtio_lduw_p(vdev, &cfg.indirection_table_mask); - n->rss_data.indirections_len++; if (!do_rss) { - n->rss_data.indirections_len = 1; + n->rss_data.indirections_len = 0; } - if (!is_power_of_2(n->rss_data.indirections_len)) { - err_msg = "Invalid size of indirection table"; + if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) { + err_msg = "Too large indirection table"; err_value = n->rss_data.indirections_len; goto error; } - if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { - err_msg = "Too large indirection table"; + n->rss_data.indirections_len++; + if (!is_power_of_2(n->rss_data.indirections_len)) { + err_msg = "Invalid size of indirection table"; err_value = n->rss_data.indirections_len; goto error; }