blockdev-backup: Add error handling option for copy-before-write jobs

This patch extends the blockdev-backup QMP command to allow users to specify
how to behave when IO errors occur during copy-before-write operations.
Previously, the behavior was fixed and could not be controlled by the user.

The new 'on-cbw-error' option can be set to one of two values:
- 'break-guest-write': Forwards the IO error to the guest and triggers
  the on-source-error policy. This preserves snapshot integrity at the
  expense of guest IO operations.
- 'break-snapshot': Allows the guest OS to continue running normally,
  but invalidates the snapshot and aborts related jobs. This prioritizes
  guest operation over backup consistency.

This enhancement provides more flexibility for backup operations in different
environments where requirements for guest availability versus backup
consistency may vary.

The default behavior remains unchanged to maintain backward compatibility.

Signed-off-by: Raman Dzehtsiar <Raman.Dzehtsiar@gmail.com>
Message-ID: <20250414090025.828660-1-Raman.Dzehtsiar@gmail.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
[vsementsov: fix long lines]
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Tested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
This commit is contained in:
Raman Dzehtsiar 2025-04-14 11:00:25 +02:00 committed by Vladimir Sementsov-Ogievskiy
parent b836bf2ab6
commit 3d3911f16b
9 changed files with 117 additions and 4 deletions

View file

@ -361,6 +361,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
BackupPerf *perf, BackupPerf *perf,
BlockdevOnError on_source_error, BlockdevOnError on_source_error,
BlockdevOnError on_target_error, BlockdevOnError on_target_error,
OnCbwError on_cbw_error,
int creation_flags, int creation_flags,
BlockCompletionFunc *cb, void *opaque, BlockCompletionFunc *cb, void *opaque,
JobTxn *txn, Error **errp) JobTxn *txn, Error **errp)
@ -458,7 +459,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
} }
cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source, cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
perf->min_cluster_size, &bcs, errp); perf->min_cluster_size, &bcs, on_cbw_error, errp);
if (!cbw) { if (!cbw) {
goto error; goto error;
} }

View file

@ -551,6 +551,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
bool discard_source, bool discard_source,
uint64_t min_cluster_size, uint64_t min_cluster_size,
BlockCopyState **bcs, BlockCopyState **bcs,
OnCbwError on_cbw_error,
Error **errp) Error **errp)
{ {
BDRVCopyBeforeWriteState *state; BDRVCopyBeforeWriteState *state;
@ -568,6 +569,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
} }
qdict_put_str(opts, "file", bdrv_get_node_name(source)); qdict_put_str(opts, "file", bdrv_get_node_name(source));
qdict_put_str(opts, "target", bdrv_get_node_name(target)); qdict_put_str(opts, "target", bdrv_get_node_name(target));
qdict_put_str(opts, "on-cbw-error", OnCbwError_str(on_cbw_error));
if (min_cluster_size > INT64_MAX) { if (min_cluster_size > INT64_MAX) {
error_setg(errp, "min-cluster-size too large: %" PRIu64 " > %" PRIi64, error_setg(errp, "min-cluster-size too large: %" PRIu64 " > %" PRIi64,

View file

@ -42,6 +42,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
bool discard_source, bool discard_source,
uint64_t min_cluster_size, uint64_t min_cluster_size,
BlockCopyState **bcs, BlockCopyState **bcs,
OnCbwError on_cbw_error,
Error **errp); Error **errp);
void bdrv_cbw_drop(BlockDriverState *bs); void bdrv_cbw_drop(BlockDriverState *bs);

View file

@ -583,7 +583,9 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false, 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false,
NULL, &perf, NULL, &perf,
BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL, BLOCKDEV_ON_ERROR_REPORT,
ON_CBW_ERROR_BREAK_GUEST_WRITE,
JOB_INTERNAL,
backup_job_completed, bs, NULL, &local_err); backup_job_completed, bs, NULL, &local_err);
if (local_err) { if (local_err) {
error_propagate(errp, local_err); error_propagate(errp, local_err);

View file

@ -2641,6 +2641,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
BdrvDirtyBitmap *bmap = NULL; BdrvDirtyBitmap *bmap = NULL;
BackupPerf perf = { .max_workers = 64 }; BackupPerf perf = { .max_workers = 64 };
int job_flags = JOB_DEFAULT; int job_flags = JOB_DEFAULT;
OnCbwError on_cbw_error = ON_CBW_ERROR_BREAK_GUEST_WRITE;
if (!backup->has_speed) { if (!backup->has_speed) {
backup->speed = 0; backup->speed = 0;
@ -2745,6 +2746,10 @@ static BlockJob *do_backup_common(BackupCommon *backup,
job_flags |= JOB_MANUAL_DISMISS; job_flags |= JOB_MANUAL_DISMISS;
} }
if (backup->has_on_cbw_error) {
on_cbw_error = backup->on_cbw_error;
}
job = backup_job_create(backup->job_id, bs, target_bs, backup->speed, job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
backup->sync, bmap, backup->bitmap_mode, backup->sync, bmap, backup->bitmap_mode,
backup->compress, backup->discard_source, backup->compress, backup->discard_source,
@ -2752,6 +2757,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
&perf, &perf,
backup->on_source_error, backup->on_source_error,
backup->on_target_error, backup->on_target_error,
on_cbw_error,
job_flags, NULL, NULL, txn, errp); job_flags, NULL, NULL, txn, errp);
return job; return job;
} }

View file

@ -179,6 +179,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
* all ".has_*" fields are ignored. * all ".has_*" fields are ignored.
* @on_source_error: The action to take upon error reading from the source. * @on_source_error: The action to take upon error reading from the source.
* @on_target_error: The action to take upon error writing to the target. * @on_target_error: The action to take upon error writing to the target.
* @on_cbw_error: The action to take upon error in copy-before-write operations.
* @creation_flags: Flags that control the behavior of the Job lifetime. * @creation_flags: Flags that control the behavior of the Job lifetime.
* See @BlockJobCreateFlags * See @BlockJobCreateFlags
* @cb: Completion function for the job. * @cb: Completion function for the job.
@ -198,6 +199,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
BackupPerf *perf, BackupPerf *perf,
BlockdevOnError on_source_error, BlockdevOnError on_source_error,
BlockdevOnError on_target_error, BlockdevOnError on_target_error,
OnCbwError on_cbw_error,
int creation_flags, int creation_flags,
BlockCompletionFunc *cb, void *opaque, BlockCompletionFunc *cb, void *opaque,
JobTxn *txn, Error **errp); JobTxn *txn, Error **errp);

View file

@ -1602,6 +1602,9 @@
# default 'report' (no limitations, since this applies to a # default 'report' (no limitations, since this applies to a
# different block device than @device). # different block device than @device).
# #
# @on-cbw-error: policy defining behavior on I/O errors in
# copy-before-write jobs; defaults to break-guest-write. (Since 10.1)
#
# @auto-finalize: When false, this job will wait in a PENDING state # @auto-finalize: When false, this job will wait in a PENDING state
# after it has finished its work, waiting for @block-job-finalize # after it has finished its work, waiting for @block-job-finalize
# before making any block graph changes. When true, this job will # before making any block graph changes. When true, this job will
@ -1641,6 +1644,7 @@
'*compress': 'bool', '*compress': 'bool',
'*on-source-error': 'BlockdevOnError', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError', '*on-target-error': 'BlockdevOnError',
'*on-cbw-error': 'OnCbwError',
'*auto-finalize': 'bool', '*auto-dismiss': 'bool', '*auto-finalize': 'bool', '*auto-dismiss': 'bool',
'*filter-node-name': 'str', '*filter-node-name': 'str',
'*discard-source': 'bool', '*discard-source': 'bool',

View file

@ -99,6 +99,68 @@ class TestCbwError(iotests.QMPTestCase):
log = iotests.filter_qemu_io(log) log = iotests.filter_qemu_io(log)
return log return log
def do_cbw_error_via_blockdev_backup(self, on_cbw_error=None):
self.vm.cmd('blockdev-add', {
'node-name': 'source',
'driver': iotests.imgfmt,
'file': {
'driver': 'file',
'filename': source_img
}
})
self.vm.cmd('blockdev-add', {
'node-name': 'target',
'driver': iotests.imgfmt,
'file': {
'driver': 'blkdebug',
'image': {
'driver': 'file',
'filename': temp_img
},
'inject-error': [
{
'event': 'write_aio',
'errno': 5,
'immediately': False,
'once': True
}
]
}
})
blockdev_backup_options = {
'device': 'source',
'target': 'target',
'sync': 'none',
'job-id': 'job-id',
'filter-node-name': 'cbw'
}
if on_cbw_error:
blockdev_backup_options['on-cbw-error'] = on_cbw_error
self.vm.cmd('blockdev-backup', blockdev_backup_options)
self.vm.cmd('blockdev-add', {
'node-name': 'access',
'driver': 'snapshot-access',
'file': 'cbw'
})
result = self.vm.qmp('human-monitor-command',
command_line='qemu-io cbw "write 0 1M"')
self.assert_qmp(result, 'return', '')
result = self.vm.qmp('human-monitor-command',
command_line='qemu-io access "read 0 1M"')
self.assert_qmp(result, 'return', '')
self.vm.shutdown()
log = self.vm.get_log()
log = iotests.filter_qemu_io(log)
return log
def test_break_snapshot_on_cbw_error(self): def test_break_snapshot_on_cbw_error(self):
"""break-snapshot behavior: """break-snapshot behavior:
Guest write succeed, but further snapshot-read fails, as snapshot is Guest write succeed, but further snapshot-read fails, as snapshot is
@ -123,6 +185,39 @@ read failed: Permission denied
write failed: Input/output error write failed: Input/output error
read 1048576/1048576 bytes at offset 0 read 1048576/1048576 bytes at offset 0
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
""")
def test_break_snapshot_policy_forwarding(self):
"""Ensure CBW filter accepts break-snapshot policy
specified in blockdev-backup QMP command.
"""
log = self.do_cbw_error_via_blockdev_backup('break-snapshot')
self.assertEqual(log, """\
wrote 1048576/1048576 bytes at offset 0
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read failed: Permission denied
""")
def test_break_guest_write_policy_forwarding(self):
"""Ensure CBW filter accepts break-guest-write policy
specified in blockdev-backup QMP command.
"""
log = self.do_cbw_error_via_blockdev_backup('break-guest-write')
self.assertEqual(log, """\
write failed: Input/output error
read 1048576/1048576 bytes at offset 0
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
""")
def test_default_on_cbw_error_policy_forwarding(self):
"""Ensure break-guest-write policy is used by default when
on-cbw-error is not explicitly specified.
"""
log = self.do_cbw_error_via_blockdev_backup()
self.assertEqual(log, """\
write failed: Input/output error
read 1048576/1048576 bytes at offset 0
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
""") """)
def do_cbw_timeout(self, on_cbw_error): def do_cbw_timeout(self, on_cbw_error):

View file

@ -1,5 +1,5 @@
.... .......
---------------------------------------------------------------------- ----------------------------------------------------------------------
Ran 4 tests Ran 7 tests
OK OK