From f605796aae42885034400c83ed6a9b07cd6d6481 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 17 Apr 2025 11:05:27 -0400 Subject: [PATCH 1/4] file-posix: probe discard alignment on Linux block devices Populate the pdiscard_alignment block limit so the block layer is able align discard requests correctly. Signed-off-by: Stefan Hajnoczi Message-ID: <20250417150528.76470-2-stefanha@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/file-posix.c | 67 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/block/file-posix.c b/block/file-posix.c index 56d1972d15..0d6e12f880 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1276,10 +1276,10 @@ static int get_sysfs_zoned_model(struct stat *st, BlockZoneModel *zoned) } #endif /* defined(CONFIG_BLKZONED) */ +#ifdef CONFIG_LINUX /* * Get a sysfs attribute value as a long integer. */ -#ifdef CONFIG_LINUX static long get_sysfs_long_val(struct stat *st, const char *attribute) { g_autofree char *str = NULL; @@ -1299,6 +1299,30 @@ static long get_sysfs_long_val(struct stat *st, const char *attribute) } return ret; } + +/* + * Get a sysfs attribute value as a uint32_t. + */ +static int get_sysfs_u32_val(struct stat *st, const char *attribute, + uint32_t *u32) +{ + g_autofree char *str = NULL; + const char *end; + unsigned int val; + int ret; + + ret = get_sysfs_str_val(st, attribute, &str); + if (ret < 0) { + return ret; + } + + /* The file is ended with '\n', pass 'end' to accept that. */ + ret = qemu_strtoui(str, &end, 10, &val); + if (ret == 0 && end && *end == '\0') { + *u32 = val; + } + return ret; +} #endif static int hdev_get_max_segments(int fd, struct stat *st) @@ -1318,6 +1342,23 @@ static int hdev_get_max_segments(int fd, struct stat *st) #endif } +/* + * Fills in *dalign with the discard alignment and returns 0 on success, + * -errno otherwise. + */ +static int hdev_get_pdiscard_alignment(struct stat *st, uint32_t *dalign) +{ +#ifdef CONFIG_LINUX + /* + * Note that Linux "discard_granularity" is QEMU "discard_alignment". Linux + * "discard_alignment" is something else. + */ + return get_sysfs_u32_val(st, "discard_granularity", dalign); +#else + return -ENOTSUP; +#endif +} + #if defined(CONFIG_BLKZONED) /* * If the reset_all flag is true, then the wps of zone whose state is @@ -1527,6 +1568,30 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) } } + if (S_ISBLK(st.st_mode)) { + uint32_t dalign = 0; + int ret; + + ret = hdev_get_pdiscard_alignment(&st, &dalign); + if (ret == 0) { + uint32_t ralign = bs->bl.request_alignment; + + /* Probably never happens, but handle it just in case */ + if (dalign < ralign && (ralign % dalign == 0)) { + dalign = ralign; + } + + /* The block layer requires a multiple of request_alignment */ + if (dalign % ralign != 0) { + error_setg(errp, "Invalid pdiscard_alignment limit %u is not a " + "multiple of request_alignment %u", dalign, ralign); + return; + } + + bs->bl.pdiscard_alignment = dalign; + } + } + raw_refresh_zoned_limits(bs, &st, errp); } From 4733cb0833c4b223f92ec0136980eeb5239ecb87 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 17 Apr 2025 11:05:28 -0400 Subject: [PATCH 2/4] block/io: skip head/tail requests on EINVAL When guests send misaligned discard requests, the block layer breaks them up into a misaligned head, an aligned main body, and a misaligned tail. The file-posix block driver on Linux returns -EINVAL on misaligned discard requests. This causes bdrv_co_pdiscard() to fail and guests configured with werror=stop will pause. Add a special case for misaligned head/tail requests. Simply continue when EINVAL is encountered so that the aligned main body of the request can be completed and the guest is not paused. This is the best we can do when guest discard limits do not match the host discard limits. Fixes: https://issues.redhat.com/browse/RHEL-86032 Signed-off-by: Stefan Hajnoczi Reviewed-by: Hanna Czenczek Message-ID: <20250417150528.76470-3-stefanha@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/io.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/block/io.c b/block/io.c index 1ba8d1aeea..ccec11386b 100644 --- a/block/io.c +++ b/block/io.c @@ -3109,11 +3109,12 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, /* Invalidate the cached block-status data range if this discard overlaps */ bdrv_bsc_invalidate_range(bs, offset, bytes); - /* Discard is advisory, but some devices track and coalesce + /* + * Discard is advisory, but some devices track and coalesce * unaligned requests, so we must pass everything down rather than - * round here. Still, most devices will just silently ignore - * unaligned requests (by returning -ENOTSUP), so we must fragment - * the request accordingly. */ + * round here. Still, most devices reject unaligned requests with + * -EINVAL or -ENOTSUP, so we must fragment the request accordingly. + */ align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment); assert(align % bs->bl.request_alignment == 0); head = offset % align; @@ -3180,7 +3181,11 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, } } if (ret && ret != -ENOTSUP) { - goto out; + if (ret == -EINVAL && (offset % align != 0 || num % align != 0)) { + /* Silently skip rejected unaligned head/tail requests */ + } else { + goto out; /* bail out */ + } } offset += num; From ed1aef171671ef49761017cb1d8d10bf67d05c57 Mon Sep 17 00:00:00 2001 From: Sunny Zhu Date: Tue, 22 Apr 2025 02:21:26 +0800 Subject: [PATCH 3/4] block: Remove unused callback function *bdrv_aio_pdiscard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bytes type in *bdrv_aio_pdiscard should be int64_t rather than int. There are no drivers implementing the *bdrv_aio_pdiscard() callback, it appears to be an unused function. Therefore, we'll simply remove it instead of fixing it. Additionally, coroutine-based callbacks are preferred. If someone needs to implement bdrv_aio_pdiscard, a coroutine-based version would be straightforward to implement. Signed-off-by: Sunny Zhu Message-ID: Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/io.c | 20 ++------------------ include/block/block_int-common.h | 4 ---- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/block/io.c b/block/io.c index ccec11386b..6d98b0abb9 100644 --- a/block/io.c +++ b/block/io.c @@ -3102,7 +3102,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, return 0; } - if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) { + if (!bs->drv->bdrv_co_pdiscard) { return 0; } @@ -3162,24 +3162,8 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, ret = -ENOMEDIUM; goto out; } - if (bs->drv->bdrv_co_pdiscard) { - ret = bs->drv->bdrv_co_pdiscard(bs, offset, num); - } else { - BlockAIOCB *acb; - CoroutineIOCompletion co = { - .coroutine = qemu_coroutine_self(), - }; - acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num, - bdrv_co_io_em_complete, &co); - if (acb == NULL) { - ret = -EIO; - goto out; - } else { - qemu_coroutine_yield(); - ret = co.ret; - } - } + ret = bs->drv->bdrv_co_pdiscard(bs, offset, num); if (ret && ret != -ENOTSUP) { if (ret == -EINVAL && (offset % align != 0 || num % align != 0)) { /* Silently skip rejected unaligned head/tail requests */ diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index ebb4e56a50..0d8187f656 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -506,10 +506,6 @@ struct BlockDriver { BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_flush)( BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque); - BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_pdiscard)( - BlockDriverState *bs, int64_t offset, int bytes, - BlockCompletionFunc *cb, void *opaque); - int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_readv)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); From 2b689db0bedd24eda8b491cb1fcfb015dfec5a31 Mon Sep 17 00:00:00 2001 From: Denis Rastyogin Date: Thu, 27 Mar 2025 19:24:23 +0300 Subject: [PATCH 4/4] qemu-img: improve queue depth validation in img_bench This error was discovered by fuzzing qemu-img. Currently, running `qemu-img bench -d 0` in img_bench is allowed, which is a pointless operation and causes qemu-img to hang. Signed-off-by: Denis Rastyogin Message-ID: <20250327162423.25154-5-gerben@altlinux.org> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- qemu-img.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qemu-img.c b/qemu-img.c index 2044c22a4c..76ac5d3028 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -4571,7 +4571,7 @@ static int img_bench(int argc, char **argv) { unsigned long res; - if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) { + if (qemu_strtoul(optarg, NULL, 0, &res) <= 0 || res > INT_MAX) { error_report("Invalid queue depth specified"); return 1; }