mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-09 18:44:58 -06:00
Block patches:
- One patch to make qcow2's discard-no-unref option do better what it is supposed to do (i.e. prevent fragmentation) - Two fixes for zoned requests -----BEGIN PGP SIGNATURE----- iQJGBAABCAAwFiEEy2LXoO44KeRfAE00ofpA0JgBnN8FAmVJHbgSHGhyZWl0ekBy ZWRoYXQuY29tAAoJEKH6QNCYAZzfLn4QAKxuUYZaXirv6K4U2tW4aAJtc5uESdwv WYhG7YU7MleBGCY0fRoih5thrPrzRLC8o1QhbRcA36+/PAZf4BYrJEfqLUdzuN5x 6Vb1n3NRUzPD1+VfL/B9hVZhFbtTOUZuxPGEqCoHAmqBaeKuYRT1bLZbtRtPVLSk 5eTMiyrpRMlBWc7O71eGKLqU4k0vAznwHBGf2Z93qWAsKcRZCwbAWYa7Q6rJ9jJ8 1jNsQuAk0p74/uGEpFhoEVrFEcV6pMbI4+jB9i0t9YYxT0tLIdIX1VUx+AHJfItk IF2stB6SFOaAy2W3Fn+0oJvz40aMLzg9VjEeTpGmdlKC67ZTYa6Obwzy5WNLPIap k7VUheUEe8qoKUtxQNxGLR/HKEJSFXyhU0lgAGxE1gl2xc1QFFFsrimpwFd3d37j 3PwfhjARHonf4ZXgsvtIjb7nG9seMZYO7Vht0OztJyW8c2XN5OFVPir9xLbd9VUg wZNGB8jAsHgj77+S/mRIwpP+laKL8wB7zYZ1mgFI98QJIYqL8tGdV/IiUhLljHzc XAmwekOhBMMbgHhliBy9zDuTy59+zZ0FoxZPn/JvBjqBAkEnz9EbhHxi2imQg+1d XSoLbx1X1yEbepWz8mCGiveLIPkt+3qMJuuQF76nURaA+nm3tCl/nKca6QLnVKzU 2QtPWS0qRmwd =5w7S -----END PGP SIGNATURE----- Merge tag 'pull-block-2023-11-06' of https://gitlab.com/hreitz/qemu into staging Block patches: - One patch to make qcow2's discard-no-unref option do better what it is supposed to do (i.e. prevent fragmentation) - Two fixes for zoned requests # -----BEGIN PGP SIGNATURE----- # # iQJGBAABCAAwFiEEy2LXoO44KeRfAE00ofpA0JgBnN8FAmVJHbgSHGhyZWl0ekBy # ZWRoYXQuY29tAAoJEKH6QNCYAZzfLn4QAKxuUYZaXirv6K4U2tW4aAJtc5uESdwv # WYhG7YU7MleBGCY0fRoih5thrPrzRLC8o1QhbRcA36+/PAZf4BYrJEfqLUdzuN5x # 6Vb1n3NRUzPD1+VfL/B9hVZhFbtTOUZuxPGEqCoHAmqBaeKuYRT1bLZbtRtPVLSk # 5eTMiyrpRMlBWc7O71eGKLqU4k0vAznwHBGf2Z93qWAsKcRZCwbAWYa7Q6rJ9jJ8 # 1jNsQuAk0p74/uGEpFhoEVrFEcV6pMbI4+jB9i0t9YYxT0tLIdIX1VUx+AHJfItk # IF2stB6SFOaAy2W3Fn+0oJvz40aMLzg9VjEeTpGmdlKC67ZTYa6Obwzy5WNLPIap # k7VUheUEe8qoKUtxQNxGLR/HKEJSFXyhU0lgAGxE1gl2xc1QFFFsrimpwFd3d37j # 3PwfhjARHonf4ZXgsvtIjb7nG9seMZYO7Vht0OztJyW8c2XN5OFVPir9xLbd9VUg # wZNGB8jAsHgj77+S/mRIwpP+laKL8wB7zYZ1mgFI98QJIYqL8tGdV/IiUhLljHzc # XAmwekOhBMMbgHhliBy9zDuTy59+zZ0FoxZPn/JvBjqBAkEnz9EbhHxi2imQg+1d # XSoLbx1X1yEbepWz8mCGiveLIPkt+3qMJuuQF76nURaA+nm3tCl/nKca6QLnVKzU # 2QtPWS0qRmwd # =5w7S # -----END PGP SIGNATURE----- # gpg: Signature made Tue 07 Nov 2023 01:09:12 HKT # gpg: using RSA key CB62D7A0EE3829E45F004D34A1FA40D098019CDF # gpg: issuer "hreitz@redhat.com" # gpg: Good signature from "Hanna Reitz <hreitz@redhat.com>" [unknown] # gpg: WARNING: The key's User ID is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: CB62 D7A0 EE38 29E4 5F00 4D34 A1FA 40D0 9801 9CDF * tag 'pull-block-2023-11-06' of https://gitlab.com/hreitz/qemu: file-posix: fix over-writing of returning zone_append offset block/file-posix: fix update_zones_wp() caller qcow2: keep reference on zeroize with discard-no-unref enabled Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
80aaef96b1
4 changed files with 51 additions and 28 deletions
|
@ -160,7 +160,6 @@ typedef struct BDRVRawState {
|
||||||
bool has_write_zeroes:1;
|
bool has_write_zeroes:1;
|
||||||
bool use_linux_aio:1;
|
bool use_linux_aio:1;
|
||||||
bool use_linux_io_uring:1;
|
bool use_linux_io_uring:1;
|
||||||
int64_t *offset; /* offset of zone append operation */
|
|
||||||
int page_cache_inconsistent; /* errno from fdatasync failure */
|
int page_cache_inconsistent; /* errno from fdatasync failure */
|
||||||
bool has_fallocate;
|
bool has_fallocate;
|
||||||
bool needs_alignment;
|
bool needs_alignment;
|
||||||
|
@ -2445,12 +2444,13 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
|
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||||||
uint64_t bytes, QEMUIOVector *qiov, int type)
|
uint64_t bytes, QEMUIOVector *qiov, int type)
|
||||||
{
|
{
|
||||||
BDRVRawState *s = bs->opaque;
|
BDRVRawState *s = bs->opaque;
|
||||||
RawPosixAIOData acb;
|
RawPosixAIOData acb;
|
||||||
int ret;
|
int ret;
|
||||||
|
uint64_t offset = *offset_ptr;
|
||||||
|
|
||||||
if (fd_open(bs) < 0)
|
if (fd_open(bs) < 0)
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
@ -2513,8 +2513,8 @@ out:
|
||||||
uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
|
uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
|
||||||
if (!BDRV_ZT_IS_CONV(*wp)) {
|
if (!BDRV_ZT_IS_CONV(*wp)) {
|
||||||
if (type & QEMU_AIO_ZONE_APPEND) {
|
if (type & QEMU_AIO_ZONE_APPEND) {
|
||||||
*s->offset = *wp;
|
*offset_ptr = *wp;
|
||||||
trace_zbd_zone_append_complete(bs, *s->offset
|
trace_zbd_zone_append_complete(bs, *offset_ptr
|
||||||
>> BDRV_SECTOR_BITS);
|
>> BDRV_SECTOR_BITS);
|
||||||
}
|
}
|
||||||
/* Advance the wp if needed */
|
/* Advance the wp if needed */
|
||||||
|
@ -2523,7 +2523,10 @@ out:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
update_zones_wp(bs, s->fd, 0, 1);
|
/*
|
||||||
|
* write and append write are not allowed to cross zone boundaries
|
||||||
|
*/
|
||||||
|
update_zones_wp(bs, s->fd, offset, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
qemu_co_mutex_unlock(&wps->colock);
|
qemu_co_mutex_unlock(&wps->colock);
|
||||||
|
@ -2536,14 +2539,14 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
|
||||||
int64_t bytes, QEMUIOVector *qiov,
|
int64_t bytes, QEMUIOVector *qiov,
|
||||||
BdrvRequestFlags flags)
|
BdrvRequestFlags flags)
|
||||||
{
|
{
|
||||||
return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
|
return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
|
static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
|
||||||
int64_t bytes, QEMUIOVector *qiov,
|
int64_t bytes, QEMUIOVector *qiov,
|
||||||
BdrvRequestFlags flags)
|
BdrvRequestFlags flags)
|
||||||
{
|
{
|
||||||
return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
|
return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
|
static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
|
||||||
|
@ -3470,7 +3473,7 @@ static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
|
||||||
len >> BDRV_SECTOR_BITS);
|
len >> BDRV_SECTOR_BITS);
|
||||||
ret = raw_thread_pool_submit(handle_aiocb_zone_mgmt, &acb);
|
ret = raw_thread_pool_submit(handle_aiocb_zone_mgmt, &acb);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
update_zones_wp(bs, s->fd, offset, i);
|
update_zones_wp(bs, s->fd, offset, nrz);
|
||||||
error_report("ioctl %s failed %d", op_name, ret);
|
error_report("ioctl %s failed %d", op_name, ret);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -3506,8 +3509,6 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
|
||||||
int64_t zone_size_mask = bs->bl.zone_size - 1;
|
int64_t zone_size_mask = bs->bl.zone_size - 1;
|
||||||
int64_t iov_len = 0;
|
int64_t iov_len = 0;
|
||||||
int64_t len = 0;
|
int64_t len = 0;
|
||||||
BDRVRawState *s = bs->opaque;
|
|
||||||
s->offset = offset;
|
|
||||||
|
|
||||||
if (*offset & zone_size_mask) {
|
if (*offset & zone_size_mask) {
|
||||||
error_report("sector offset %" PRId64 " is not aligned to zone size "
|
error_report("sector offset %" PRId64 " is not aligned to zone size "
|
||||||
|
@ -3528,7 +3529,7 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
|
||||||
}
|
}
|
||||||
|
|
||||||
trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
|
trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
|
||||||
return raw_co_prw(bs, *offset, len, qiov, QEMU_AIO_ZONE_APPEND);
|
return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -1983,7 +1983,7 @@ discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, uint64_t nb_clusters,
|
||||||
/* If we keep the reference, pass on the discard still */
|
/* If we keep the reference, pass on the discard still */
|
||||||
bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
|
bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
|
||||||
s->cluster_size);
|
s->cluster_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
|
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
|
||||||
|
@ -2061,9 +2061,15 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
|
||||||
QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry);
|
QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry);
|
||||||
bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) ||
|
bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) ||
|
||||||
((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type));
|
((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type));
|
||||||
uint64_t new_l2_entry = unmap ? 0 : old_l2_entry;
|
bool keep_reference =
|
||||||
|
(s->discard_no_unref && type != QCOW2_CLUSTER_COMPRESSED);
|
||||||
|
uint64_t new_l2_entry = old_l2_entry;
|
||||||
uint64_t new_l2_bitmap = old_l2_bitmap;
|
uint64_t new_l2_bitmap = old_l2_bitmap;
|
||||||
|
|
||||||
|
if (unmap && !keep_reference) {
|
||||||
|
new_l2_entry = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (has_subclusters(s)) {
|
if (has_subclusters(s)) {
|
||||||
new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
|
new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
|
||||||
} else {
|
} else {
|
||||||
|
@ -2081,9 +2087,17 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
|
||||||
set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
|
set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Then decrease the refcount */
|
|
||||||
if (unmap) {
|
if (unmap) {
|
||||||
qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST);
|
if (!keep_reference) {
|
||||||
|
/* Then decrease the refcount */
|
||||||
|
qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST);
|
||||||
|
} else if (s->discard_passthrough[QCOW2_DISCARD_REQUEST] &&
|
||||||
|
(type == QCOW2_CLUSTER_NORMAL ||
|
||||||
|
type == QCOW2_CLUSTER_ZERO_ALLOC)) {
|
||||||
|
/* If we keep the reference, pass on the discard still */
|
||||||
|
bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
|
||||||
|
s->cluster_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3528,16 +3528,20 @@
|
||||||
# @pass-discard-other: whether discard requests for the data source
|
# @pass-discard-other: whether discard requests for the data source
|
||||||
# should be issued on other occasions where a cluster gets freed
|
# should be issued on other occasions where a cluster gets freed
|
||||||
#
|
#
|
||||||
# @discard-no-unref: when enabled, discards from the guest will not
|
# @discard-no-unref: when enabled, data clusters will remain
|
||||||
# cause cluster allocations to be relinquished. This prevents
|
# preallocated when they are no longer used, e.g. because they are
|
||||||
# qcow2 fragmentation that would be caused by such discards.
|
# discarded or converted to zero clusters. As usual, whether the
|
||||||
# Besides potential performance degradation, such fragmentation
|
# old data is discarded or kept on the protocol level (i.e. in the
|
||||||
# can lead to increased allocation of clusters past the end of the
|
# image file) depends on the setting of the pass-discard-request
|
||||||
# image file, resulting in image files whose file length can grow
|
# option. Keeping the clusters preallocated prevents qcow2
|
||||||
# much larger than their guest disk size would suggest. If image
|
# fragmentation that would otherwise be caused by freeing and
|
||||||
# file length is of concern (e.g. when storing qcow2 images
|
# re-allocating them later. Besides potential performance
|
||||||
# directly on block devices), you should consider enabling this
|
# degradation, such fragmentation can lead to increased allocation
|
||||||
# option. (since 8.1)
|
# of clusters past the end of the image file, resulting in image
|
||||||
|
# files whose file length can grow much larger than their guest disk
|
||||||
|
# size would suggest. If image file length is of concern (e.g. when
|
||||||
|
# storing qcow2 images directly on block devices), you should
|
||||||
|
# consider enabling this option. (since 8.1)
|
||||||
#
|
#
|
||||||
# @overlap-check: which overlap checks to perform for writes to the
|
# @overlap-check: which overlap checks to perform for writes to the
|
||||||
# image, defaults to 'cached' (since 2.2)
|
# image, defaults to 'cached' (since 2.2)
|
||||||
|
|
|
@ -1457,9 +1457,13 @@ SRST
|
||||||
(on/off; default: off)
|
(on/off; default: off)
|
||||||
|
|
||||||
``discard-no-unref``
|
``discard-no-unref``
|
||||||
When enabled, discards from the guest will not cause cluster
|
When enabled, data clusters will remain preallocated when they are
|
||||||
allocations to be relinquished. This prevents qcow2 fragmentation
|
no longer used, e.g. because they are discarded or converted to
|
||||||
that would be caused by such discards. Besides potential
|
zero clusters. As usual, whether the old data is discarded or kept
|
||||||
|
on the protocol level (i.e. in the image file) depends on the
|
||||||
|
setting of the pass-discard-request option. Keeping the clusters
|
||||||
|
preallocated prevents qcow2 fragmentation that would otherwise be
|
||||||
|
caused by freeing and re-allocating them later. Besides potential
|
||||||
performance degradation, such fragmentation can lead to increased
|
performance degradation, such fragmentation can lead to increased
|
||||||
allocation of clusters past the end of the image file,
|
allocation of clusters past the end of the image file,
|
||||||
resulting in image files whose file length can grow much larger
|
resulting in image files whose file length can grow much larger
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue