From 30c321f97fa5898a5ad71070af6ce271fbdba347 Mon Sep 17 00:00:00 2001 From: Anton Kuchin Date: Tue, 7 May 2019 11:12:56 +0300 Subject: [PATCH 01/15] block: remove bs from lists before closing Close involves flush that can be performed asynchronously and bs must be protected from being referenced before it is deleted. Signed-off-by: Anton Kuchin Signed-off-by: Kevin Wolf --- block.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 5c2c6aa761..6999aad446 100644 --- a/block.c +++ b/block.c @@ -4082,14 +4082,14 @@ static void bdrv_delete(BlockDriverState *bs) assert(bdrv_op_blocker_is_empty(bs)); assert(!bs->refcnt); - bdrv_close(bs); - /* remove from list, if necessary */ if (bs->node_name[0] != '\0') { QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); } QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); + bdrv_close(bs); + g_free(bs); } From 5c6090fa5bc02bd99efae19c80d18df1f1962764 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 6 May 2019 08:18:54 +0200 Subject: [PATCH 02/15] MAINTAINERS: Downgrade status of block sections without "M:" to "Odd Fixes" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes might still get picked up via the qemu-block mailing list, so the status is not "Orphan" yet. Also add the gluster mailing list as suggested by Niels here: https://patchwork.kernel.org/patch/10613297/#22409943 Signed-off-by: Thomas Huth Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Niels de Vos Signed-off-by: Kevin Wolf --- MAINTAINERS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 66ddbda9c9..899a4cd572 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2404,12 +2404,13 @@ F: block/ssh.c CURL L: qemu-block@nongnu.org -S: Supported +S: Odd Fixes F: block/curl.c GLUSTER L: qemu-block@nongnu.org -S: Supported +L: integration@gluster.org +S: Odd Fixes F: block/gluster.c Null Block Driver From e0c9d0c128595efd10843d2791acab650b9b9e60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 6 May 2019 00:47:37 +0200 Subject: [PATCH 03/15] MAINTAINERS: Add an entry for the Parallel NOR Flash devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step in to maintain it, since I have some familiarity with the technology. Signed-off-by: Philippe Mathieu-Daudé Acked-by: Laszlo Ersek Acked-by: Michael S. Tsirkin Reviewed-by: Thomas Huth Signed-off-by: Kevin Wolf --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 899a4cd572..f25729a06d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1391,6 +1391,13 @@ F: include/hw/net/ F: tests/virtio-net-test.c T: git https://github.com/jasowang/qemu.git net +Parallel NOR Flash devices +M: Philippe Mathieu-Daudé +T: git https://gitlab.com/philmd/qemu.git pflash-next +S: Maintained +F: hw/block/pflash_cfi*.c +F: include/hw/block/flash.h + SCSI M: Paolo Bonzini R: Fam Zheng From 4d231a384c5d72491e6ddc8006d6be929ca6c609 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 8 May 2019 14:00:44 +0200 Subject: [PATCH 04/15] qemu-iotests: Fix cleanup for 192 Test case 192 calls _launch_qemu, so it also needs to _cleanup_qemu when it's done, otherwise the QMP FIFOs stay around in scratch/. It also creates a temporary NBD socket that needs to be removed as well at the end of the test case. Reported-by: Thomas Huth Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Tested-by: Thomas Huth --- tests/qemu-iotests/192 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/192 b/tests/qemu-iotests/192 index 158086f9d2..61a88ac88d 100755 --- a/tests/qemu-iotests/192 +++ b/tests/qemu-iotests/192 @@ -29,7 +29,9 @@ status=1 # failure is the default! _cleanup() { - _cleanup_test_img + _cleanup_qemu + _cleanup_test_img + rm -f "$TEST_DIR/nbd" } trap "_cleanup; exit \$status" 0 1 2 3 15 From 13726123ba916dd8ead23156a61db8d2c8bd42e8 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 3 May 2019 19:17:43 +0200 Subject: [PATCH 05/15] blockjob: Fix coroutine thread after AioContext change Commit 463e0be10 ('blockjob: add AioContext attached callback') tried to make block jobs robust against AioContext changes of their main node, but it never made sure that the job coroutine actually runs in the new thread. Instead of waking up the job coroutine in whatever thread it ran before, let's always pass the AioContext where it should be running now. Signed-off-by: Kevin Wolf --- job.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/job.c b/job.c index da8e4b7bf2..2167d53717 100644 --- a/job.c +++ b/job.c @@ -432,7 +432,7 @@ void job_enter_cond(Job *job, bool(*fn)(Job *job)) timer_del(&job->sleep_timer); job->busy = true; job_unlock(); - aio_co_wake(job->co); + aio_co_enter(job->aio_context, job->co); } void job_enter(Job *job) From 93c60f38626a076b678793dd7e24fb646ce70a34 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 3 May 2019 19:17:44 +0200 Subject: [PATCH 06/15] test-block-iothread: Job coroutine thread after AioContext switch This tests that a job coroutine always runs in the right iothread after the AioContext of its main node has changed. Signed-off-by: Kevin Wolf --- tests/test-block-iothread.c | 107 ++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index 97ac0b159d..036ed9a3b3 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -354,6 +354,111 @@ static void test_sync_op(const void *opaque) blk_unref(blk); } +typedef struct TestBlockJob { + BlockJob common; + bool should_complete; + int n; +} TestBlockJob; + +static int test_job_prepare(Job *job) +{ + g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + return 0; +} + +static int coroutine_fn test_job_run(Job *job, Error **errp) +{ + TestBlockJob *s = container_of(job, TestBlockJob, common.job); + + job_transition_to_ready(&s->common.job); + while (!s->should_complete) { + s->n++; + g_assert(qemu_get_current_aio_context() == job->aio_context); + + /* Avoid job_sleep_ns() because it marks the job as !busy. We want to + * emulate some actual activity (probably some I/O) here so that the + * drain involved in AioContext switches has to wait for this activity + * to stop. */ + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); + + job_pause_point(&s->common.job); + } + + g_assert(qemu_get_current_aio_context() == job->aio_context); + return 0; +} + +static void test_job_complete(Job *job, Error **errp) +{ + TestBlockJob *s = container_of(job, TestBlockJob, common.job); + s->should_complete = true; +} + +BlockJobDriver test_job_driver = { + .job_driver = { + .instance_size = sizeof(TestBlockJob), + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = test_job_run, + .complete = test_job_complete, + .prepare = test_job_prepare, + }, +}; + +static void test_attach_blockjob(void) +{ + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); + BlockBackend *blk; + BlockDriverState *bs; + TestBlockJob *tjob; + + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); + blk_insert_bs(blk, bs, &error_abort); + + tjob = block_job_create("job0", &test_job_driver, NULL, bs, + 0, BLK_PERM_ALL, + 0, 0, NULL, NULL, &error_abort); + job_start(&tjob->common.job); + + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + blk_set_aio_context(blk, ctx); + + tjob->n = 0; + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + aio_context_acquire(ctx); + blk_set_aio_context(blk, qemu_get_aio_context()); + aio_context_release(ctx); + + tjob->n = 0; + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + blk_set_aio_context(blk, ctx); + + tjob->n = 0; + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + aio_context_acquire(ctx); + job_complete_sync(&tjob->common.job, &error_abort); + blk_set_aio_context(blk, qemu_get_aio_context()); + aio_context_release(ctx); + + bdrv_unref(bs); + blk_unref(blk); +} + int main(int argc, char **argv) { int i; @@ -368,5 +473,7 @@ int main(int argc, char **argv) g_test_add_data_func(t->name, t, test_sync_op); } + g_test_add_func("/attach/blockjob", test_attach_blockjob); + return g_test_run(); } From 97ede57a2b810445ac0c7e8ad1d2af6cc13bc76e Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 8 May 2019 12:43:24 +0200 Subject: [PATCH 07/15] qemu-img: Use IEC binary prefixes for size constants Using IEC binary prefixes in order to make the code more readable. Signed-off-by: Stefano Garzarella Signed-off-by: Kevin Wolf --- qemu-img.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index e6ad5978e0..71c92f142a 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -37,6 +37,7 @@ #include "qemu/option.h" #include "qemu/error-report.h" #include "qemu/log.h" +#include "qemu/units.h" #include "qom/object_interfaces.h" #include "sysemu/sysemu.h" #include "sysemu/block-backend.h" @@ -1216,7 +1217,7 @@ static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2, return res; } -#define IO_BUF_SIZE (2 * 1024 * 1024) +#define IO_BUF_SIZE (2 * MiB) /* * Check if passed sectors are empty (not allocated or contain only 0 bytes) @@ -2960,7 +2961,7 @@ static int img_map(int argc, char **argv) int64_t n; /* Probe up to 1 GiB at a time. */ - n = MIN(1 << 30, length - offset); + n = MIN(1 * GiB, length - offset); ret = get_block_status(bs, offset, n, &next); if (ret < 0) { From e3b4257d032dede8ffcfe868ffd74bb584842f62 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Wed, 1 May 2019 21:13:55 +0300 Subject: [PATCH 08/15] qcow2: Replace bdrv_write() with bdrv_pwrite() There's only one bdrv_write() call left in the qcow2 code, and it can be trivially replaced with the byte-based bdrv_pwrite(). Signed-off-by: Alberto Garcia Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index fa7ac1f7cb..7481903396 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -2429,8 +2429,8 @@ write_refblocks: on_disk_refblock = (void *)((char *) *refcount_table + refblock_index * s->cluster_size); - ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE, - on_disk_refblock, s->cluster_sectors); + ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, + s->cluster_size); if (ret < 0) { fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); goto fail; From d4f189713f0cbe9b872ef01f9fc4371dc05ad2c5 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Wed, 1 May 2019 21:13:56 +0300 Subject: [PATCH 09/15] vdi: Replace bdrv_{read,write}() with bdrv_{pread,pwrite}() There's only a couple of bdrv_read() and bdrv_write() calls left in the vdi code, and they can be trivially replaced with the byte-based bdrv_pread() and bdrv_pwrite(). Signed-off-by: Alberto Garcia Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block/vdi.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/block/vdi.c b/block/vdi.c index e1c42ad732..d7ef6628e7 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -171,6 +171,8 @@ typedef struct { uint64_t unused2[7]; } QEMU_PACKED VdiHeader; +QEMU_BUILD_BUG_ON(sizeof(VdiHeader) != 512); + typedef struct { /* The block map entries are little endian (even in memory). */ uint32_t *bmap; @@ -384,7 +386,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, logout("\n"); - ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1); + ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); if (ret < 0) { goto fail; } @@ -484,8 +486,8 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, - bmap_size); + ret = bdrv_pread(bs->file, header.offset_bmap, s->bmap, + bmap_size * SECTOR_SIZE); if (ret < 0) { goto fail_free_bmap; } @@ -704,7 +706,7 @@ nonallocating_write: assert(VDI_IS_ALLOCATED(bmap_first)); *header = s->header; vdi_header_to_le(header); - ret = bdrv_write(bs->file, 0, block, 1); + ret = bdrv_pwrite(bs->file, 0, block, sizeof(VdiHeader)); g_free(block); block = NULL; @@ -722,10 +724,11 @@ nonallocating_write: base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE; logout("will write %u block map sectors starting from entry %u\n", n_sectors, bmap_first); - ret = bdrv_write(bs->file, offset, base, n_sectors); + ret = bdrv_pwrite(bs->file, offset * SECTOR_SIZE, base, + n_sectors * SECTOR_SIZE); } - return ret; + return ret < 0 ? ret : 0; } static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, From e5a0a6784a63a15d5b1221326fe5c258be6b5561 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Wed, 1 May 2019 21:13:57 +0300 Subject: [PATCH 10/15] vvfat: Replace bdrv_{read,write}() with bdrv_{pread,pwrite}() There's only a couple of bdrv_read() and bdrv_write() calls left in the vvfat code, and they can be trivially replaced with the byte-based bdrv_pread() and bdrv_pwrite(). Signed-off-by: Alberto Garcia Signed-off-by: Kevin Wolf --- block/vvfat.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/block/vvfat.c b/block/vvfat.c index 5f66787890..253cc716dd 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1494,8 +1494,8 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num, DLOG(fprintf(stderr, "sectors %" PRId64 "+%" PRId64 " allocated\n", sector_num, n >> BDRV_SECTOR_BITS)); - if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, - n >> BDRV_SECTOR_BITS)) { + if (bdrv_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, + buf + i * 0x200, n) < 0) { return -1; } i += (n >> BDRV_SECTOR_BITS) - 1; @@ -1983,8 +1983,9 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s, if (res) { return -1; } - res = bdrv_write(s->qcow, offset, s->cluster_buffer, 1); - if (res) { + res = bdrv_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE, + s->cluster_buffer, BDRV_SECTOR_SIZE); + if (res < 0) { return -2; } } @@ -3050,7 +3051,8 @@ DLOG(checkpoint()); * Use qcow backend. Commit later. */ DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors)); - ret = bdrv_write(s->qcow, sector_num, buf, nb_sectors); + ret = bdrv_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE, buf, + nb_sectors * BDRV_SECTOR_SIZE); if (ret < 0) { fprintf(stderr, "Error writing to qcow backend\n"); return ret; From 2e11d7562ac9f065b9fe696fda51273a1e6671e9 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Wed, 1 May 2019 21:13:58 +0300 Subject: [PATCH 11/15] block: Remove bdrv_read() and bdrv_write() No one is using these functions anymore, all callers have switched to the byte-based bdrv_pread() and bdrv_pwrite() Signed-off-by: Alberto Garcia Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block/io.c | 43 +++++++------------------------------------ include/block/block.h | 4 ---- 2 files changed, 7 insertions(+), 40 deletions(-) diff --git a/block/io.c b/block/io.c index 0412a51314..aeebc9c23c 100644 --- a/block/io.c +++ b/block/io.c @@ -837,42 +837,6 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset, return rwco.ret; } -/* - * Process a synchronous request using coroutines - */ -static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf, - int nb_sectors, bool is_write, BdrvRequestFlags flags) -{ - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, - nb_sectors * BDRV_SECTOR_SIZE); - - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { - return -EINVAL; - } - - return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS, - &qiov, is_write, flags); -} - -/* return < 0 if error. See bdrv_write() for the return codes */ -int bdrv_read(BdrvChild *child, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - return bdrv_rw_co(child, sector_num, buf, nb_sectors, false, 0); -} - -/* Return < 0 if error. Important errors are: - -EIO generic I/O error (may happen for all errors) - -ENOMEDIUM No media inserted. - -EINVAL Invalid sector number or nb_sectors - -EACCES Trying to write a read-only device -*/ -int bdrv_write(BdrvChild *child, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - return bdrv_rw_co(child, sector_num, (uint8_t *)buf, nb_sectors, true, 0); -} - int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags) { @@ -935,6 +899,7 @@ int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) return qiov->size; } +/* See bdrv_pwrite() for the return codes */ int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); @@ -958,6 +923,12 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) return qiov->size; } +/* Return no. of bytes on success or < 0 on error. Important errors are: + -EIO generic I/O error (may happen for all errors) + -ENOMEDIUM No media inserted. + -EINVAL Invalid offset or number of bytes + -EACCES Trying to write a read-only device +*/ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); diff --git a/include/block/block.h b/include/block/block.h index c7a26199aa..5e2b98b0ee 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -316,10 +316,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp); void bdrv_reopen_commit(BDRVReopenState *reopen_state); void bdrv_reopen_abort(BDRVReopenState *reopen_state); -int bdrv_read(BdrvChild *child, int64_t sector_num, - uint8_t *buf, int nb_sectors); -int bdrv_write(BdrvChild *child, int64_t sector_num, - const uint8_t *buf, int nb_sectors); int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags); int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); From 433e8e3b22021027ab8bc27f6fefa132fb33bfed Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Wed, 1 May 2019 21:13:59 +0300 Subject: [PATCH 12/15] qcow2: Remove BDRVQcow2State.cluster_sectors The last user of this field disappeared when we replace the sector-based bdrv_write() with the byte-based bdrv_pwrite(). Signed-off-by: Alberto Garcia Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block/qcow2.c | 1 - block/qcow2.h | 1 - 2 files changed, 2 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index a520d116ef..8e024007db 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1259,7 +1259,6 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, s->cluster_bits = header.cluster_bits; s->cluster_size = 1 << s->cluster_bits; - s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS); /* Initialise version 3 header fields */ if (header.version == 2) { diff --git a/block/qcow2.h b/block/qcow2.h index fdee297f33..e62508d1ce 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -266,7 +266,6 @@ typedef struct Qcow2BitmapHeaderExt { typedef struct BDRVQcow2State { int cluster_bits; int cluster_size; - int cluster_sectors; int l2_slice_size; int l2_bits; int l2_size; From 35ddd9300b81b70171da10134c36a7e22c845f16 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 9 May 2019 19:52:35 +0200 Subject: [PATCH 13/15] qemu-img: Allow rebase with no input base Currently, without -u, you cannot add a backing file to an image when it currently has none: $ qemu-img rebase -b base.qcow2 foo.qcow2 qemu-img: Could not open old backing file '': The 'file' block driver requires a file name It is really simple to allow this, though (effectively by setting old_backing_size to 0), so this patch does just that. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- qemu-img.c | 61 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 71c92f142a..cfa44b4153 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3312,26 +3312,30 @@ static int img_rebase(int argc, char **argv) char backing_name[PATH_MAX]; QDict *options = NULL; - if (bs->backing_format[0] != '\0') { - options = qdict_new(); - qdict_put_str(options, "driver", bs->backing_format); - } - - if (force_share) { - if (!options) { + if (bs->backing) { + if (bs->backing_format[0] != '\0') { options = qdict_new(); + qdict_put_str(options, "driver", bs->backing_format); } - qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); - } - bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); - blk_old_backing = blk_new_open(backing_name, NULL, - options, src_flags, &local_err); - if (!blk_old_backing) { - error_reportf_err(local_err, - "Could not open old backing file '%s': ", - backing_name); - ret = -1; - goto out; + + if (force_share) { + if (!options) { + options = qdict_new(); + } + qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); + } + bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); + blk_old_backing = blk_new_open(backing_name, NULL, + options, src_flags, &local_err); + if (!blk_old_backing) { + error_reportf_err(local_err, + "Could not open old backing file '%s': ", + backing_name); + ret = -1; + goto out; + } + } else { + blk_old_backing = NULL; } if (out_baseimg[0]) { @@ -3384,7 +3388,7 @@ static int img_rebase(int argc, char **argv) */ if (!unsafe) { int64_t size; - int64_t old_backing_size; + int64_t old_backing_size = 0; int64_t new_backing_size = 0; uint64_t offset; int64_t n; @@ -3400,15 +3404,18 @@ static int img_rebase(int argc, char **argv) ret = -1; goto out; } - old_backing_size = blk_getlength(blk_old_backing); - if (old_backing_size < 0) { - char backing_name[PATH_MAX]; + if (blk_old_backing) { + old_backing_size = blk_getlength(blk_old_backing); + if (old_backing_size < 0) { + char backing_name[PATH_MAX]; - bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); - error_report("Could not get size of '%s': %s", - backing_name, strerror(-old_backing_size)); - ret = -1; - goto out; + bdrv_get_backing_filename(bs, backing_name, + sizeof(backing_name)); + error_report("Could not get size of '%s': %s", + backing_name, strerror(-old_backing_size)); + ret = -1; + goto out; + } } if (blk_new_backing) { new_backing_size = blk_getlength(blk_new_backing); From 1c6e87799252d166426f642c00be8927b68ab164 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 9 May 2019 19:52:36 +0200 Subject: [PATCH 14/15] qemu-img: Use zero writes after source backing EOF Past the end of the source backing file, we memset() buf_old to zero, so it is clearly easy to use blk_pwrite_zeroes() instead of blk_pwrite() then. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- qemu-img.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index cfa44b4153..28fba1e7a7 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3432,6 +3432,8 @@ static int img_rebase(int argc, char **argv) } for (offset = 0; offset < size; offset += n) { + bool buf_old_is_zero = false; + /* How many bytes can we handle with the next read? */ n = MIN(IO_BUF_SIZE, size - offset); @@ -3452,6 +3454,7 @@ static int img_rebase(int argc, char **argv) */ if (offset >= old_backing_size) { memset(buf_old, 0, n); + buf_old_is_zero = true; } else { if (offset + n > old_backing_size) { n = old_backing_size - offset; @@ -3487,8 +3490,12 @@ static int img_rebase(int argc, char **argv) if (compare_buffers(buf_old + written, buf_new + written, n - written, &pnum)) { - ret = blk_pwrite(blk, offset + written, - buf_old + written, pnum, 0); + if (buf_old_is_zero) { + ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0); + } else { + ret = blk_pwrite(blk, offset + written, + buf_old + written, pnum, 0); + } if (ret < 0) { error_report("Error while writing to COW image: %s", strerror(-ret)); From 11f6fc50e7501b5f5d04100ea1c21fa8f1cf7b53 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 9 May 2019 19:52:37 +0200 Subject: [PATCH 15/15] iotests: Add test for rebase without input base This patch adds a test for rebasing an image that currently does not have a backing file. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests/252 | 124 +++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/252.out | 39 ++++++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 164 insertions(+) create mode 100755 tests/qemu-iotests/252 create mode 100644 tests/qemu-iotests/252.out diff --git a/tests/qemu-iotests/252 b/tests/qemu-iotests/252 new file mode 100755 index 0000000000..f6c8f71444 --- /dev/null +++ b/tests/qemu-iotests/252 @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# +# Tests for rebasing COW images that require zero cluster support +# +# Copyright (C) 2019 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=mreitz@redhat.com + +seq=$(basename $0) +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + rm -f "$TEST_IMG.base_new" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.pattern + +# Currently only qcow2 and qed support rebasing, and only qcow2 v3 has +# zero cluster support +_supported_fmt qcow2 +_unsupported_imgopts 'compat=0.10' +_supported_proto file +_supported_os Linux + +CLUSTER_SIZE=65536 + +echo +echo "=== Test rebase without input base ===" +echo + +# Cluster allocations to be tested: +# +# Backing (new) 11 -- 11 -- 11 -- +# COW image 22 22 11 11 -- -- +# +# Expected result: +# +# COW image 22 22 11 11 00 -- +# +# (Cluster 2 might be "--" after the rebase, too, but rebase just +# compares the new backing file to the old one and disregards the +# overlay. Therefore, it will never discard overlay clusters.) + +_make_test_img $((6 * CLUSTER_SIZE)) +TEST_IMG="$TEST_IMG.base_new" _make_test_img $((6 * CLUSTER_SIZE)) + +echo + +$QEMU_IO "$TEST_IMG" \ + -c "write -P 0x22 $((0 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "write -P 0x11 $((2 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + | _filter_qemu_io + +$QEMU_IO "$TEST_IMG.base_new" \ + -c "write -P 0x11 $((0 * CLUSTER_SIZE)) $CLUSTER_SIZE" \ + -c "write -P 0x11 $((2 * CLUSTER_SIZE)) $CLUSTER_SIZE" \ + -c "write -P 0x11 $((4 * CLUSTER_SIZE)) $CLUSTER_SIZE" \ + | _filter_qemu_io + +echo + +# This should be a no-op +$QEMU_IMG rebase -b "" "$TEST_IMG" + +# Verify the data is correct +$QEMU_IO "$TEST_IMG" \ + -c "read -P 0x22 $((0 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x11 $((2 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x00 $((4 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + | _filter_qemu_io + +echo + +# Verify the allocation status (first four cluster should be allocated +# in TEST_IMG, clusters 4 and 5 should be unallocated (marked as zero +# clusters here because there is no backing file)) +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +echo + +$QEMU_IMG rebase -b "$TEST_IMG.base_new" "$TEST_IMG" + +# Verify the data is correct +$QEMU_IO "$TEST_IMG" \ + -c "read -P 0x22 $((0 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x11 $((2 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x00 $((4 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + | _filter_qemu_io + +echo + +# Verify the allocation status (first four cluster should be allocated +# in TEST_IMG, cluster 4 should be zero, and cluster 5 should be +# unallocated (signified by '"depth": 1')) +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/252.out b/tests/qemu-iotests/252.out new file mode 100644 index 0000000000..12dce889f8 --- /dev/null +++ b/tests/qemu-iotests/252.out @@ -0,0 +1,39 @@ +QA output created by 252 + +=== Test rebase without input base === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=393216 +Formatting 'TEST_DIR/t.IMGFMT.base_new', fmt=IMGFMT size=393216 + +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 131072 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 262144 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +read 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 131072 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 262144 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +[{ "start": 0, "length": 262144, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 262144, "length": 131072, "depth": 0, "zero": true, "data": false}] + +read 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 131072 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 262144 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +[{ "start": 0, "length": 262144, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 262144, "length": 65536, "depth": 0, "zero": true, "data": false}, +{ "start": 327680, "length": 65536, "depth": 1, "zero": true, "data": false}] +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 7ac9a5ea4a..00e474ab0a 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -249,3 +249,4 @@ 247 rw auto quick 248 rw auto quick 249 rw auto quick +252 rw auto backing quick