Merge remote-tracking branch 'kwolf/for-anthony' into staging

* kwolf/for-anthony: (46 commits)
  qed: remove incoming live migration blocker
  qed: honor BDRV_O_INCOMING for incoming live migration
  migration: clear BDRV_O_INCOMING flags on end of incoming live migration
  qed: add bdrv_invalidate_cache to be called after incoming live migration
  blockdev: open images with BDRV_O_INCOMING on incoming live migration
  block: add a function to clear incoming live migration flags
  block: Add new BDRV_O_INCOMING flag to notice incoming live migration
  block stream: close unused files and update ->backing_hd
  qemu-iotests: Fix call syntax for qemu-io
  qemu-iotests: Fix call syntax for qemu-img
  qemu-iotests: Test unknown qcow2 header extensions
  qemu-iotests: qcow2.py
  sheepdog: fix send req helpers
  sheepdog: implement SD_OP_FLUSH_VDI operation
  block: bdrv_append() fixes
  qed: track dirty flag status
  qemu-img: add dirty flag status
  qed: image fragmentation statistics
  qemu-img: add image fragmentation statistics
  block: document job API
  ...
This commit is contained in:
Anthony Liguori 2012-04-10 08:16:12 -05:00
commit bb5d8dd757
44 changed files with 1089 additions and 502 deletions

View file

@ -397,12 +397,6 @@ static void blkdebug_close(BlockDriverState *bs)
}
}
static BlockDriverAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque)
{
return bdrv_aio_flush(bs->file, cb, opaque);
}
static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
BlkdebugVars *old_vars)
{
@ -452,7 +446,6 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_aio_readv = blkdebug_aio_readv,
.bdrv_aio_writev = blkdebug_aio_writev,
.bdrv_aio_flush = blkdebug_aio_flush,
.bdrv_debug_event = blkdebug_debug_event,
};

View file

@ -318,11 +318,6 @@ exit:
return ret;
}
static coroutine_fn int cow_co_flush(BlockDriverState *bs)
{
return bdrv_co_flush(bs->file);
}
static QEMUOptionParameter cow_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@ -348,7 +343,6 @@ static BlockDriver bdrv_cow = {
.bdrv_read = cow_co_read,
.bdrv_write = cow_co_write,
.bdrv_co_flush_to_disk = cow_co_flush,
.bdrv_co_is_allocated = cow_co_is_allocated,
.create_options = cow_create_options,

View file

@ -835,11 +835,6 @@ fail:
return ret;
}
static coroutine_fn int qcow_co_flush(BlockDriverState *bs)
{
return bdrv_co_flush(bs->file);
}
static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
BDRVQcowState *s = bs->opaque;
@ -877,7 +872,6 @@ static BlockDriver bdrv_qcow = {
.bdrv_co_readv = qcow_co_readv,
.bdrv_co_writev = qcow_co_writev,
.bdrv_co_flush_to_disk = qcow_co_flush,
.bdrv_co_is_allocated = qcow_co_is_allocated,
.bdrv_set_key = qcow_set_key,

View file

@ -466,7 +466,6 @@ out:
*/
static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
uint64_t **new_l2_table,
uint64_t *new_l2_offset,
int *new_l2_index)
{
BDRVQcowState *s = bs->opaque;
@ -514,7 +513,6 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
*new_l2_table = l2_table;
*new_l2_offset = l2_offset;
*new_l2_index = l2_index;
return 0;
@ -539,11 +537,11 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
{
BDRVQcowState *s = bs->opaque;
int l2_index, ret;
uint64_t l2_offset, *l2_table;
uint64_t *l2_table;
int64_t cluster_offset;
int nb_csectors;
ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
if (ret < 0) {
return 0;
}
@ -588,7 +586,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
{
BDRVQcowState *s = bs->opaque;
int i, j = 0, l2_index, ret;
uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
uint64_t *old_cluster, start_sect, *l2_table;
uint64_t cluster_offset = m->alloc_offset;
bool cow = false;
@ -633,7 +631,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
}
qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index);
ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
if (ret < 0) {
goto err;
}
@ -817,7 +815,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
{
BDRVQcowState *s = bs->opaque;
int l2_index, ret, sectors;
uint64_t l2_offset, *l2_table;
uint64_t *l2_table;
unsigned int nb_clusters, keep_clusters;
uint64_t cluster_offset;
@ -825,7 +823,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
n_start, n_end);
/* Find L2 entry for the first involved cluster */
ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
if (ret < 0) {
return ret;
}
@ -1000,12 +998,12 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
unsigned int nb_clusters)
{
BDRVQcowState *s = bs->opaque;
uint64_t l2_offset, *l2_table;
uint64_t *l2_table;
int l2_index;
int ret;
int i;
ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
if (ret < 0) {
return ret;
}

View file

@ -1253,11 +1253,6 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
return 0;
}
static coroutine_fn int qcow2_co_flush_to_disk(BlockDriverState *bs)
{
return bdrv_co_flush(bs->file);
}
static int64_t qcow2_vm_state_offset(BDRVQcowState *s)
{
return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
@ -1377,7 +1372,6 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_co_readv = qcow2_co_readv,
.bdrv_co_writev = qcow2_co_writev,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
.bdrv_co_flush_to_disk = qcow2_co_flush_to_disk,
.bdrv_co_discard = qcow2_co_discard,
.bdrv_truncate = qcow2_truncate,

View file

@ -68,6 +68,7 @@ static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
{
BDRVQEDState *s = check->s;
unsigned int i, num_invalid = 0;
uint64_t last_offset = 0;
for (i = 0; i < s->table_nelems; i++) {
uint64_t offset = table->offsets[i];
@ -76,6 +77,11 @@ static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
qed_offset_is_zero_cluster(offset)) {
continue;
}
check->result->bfi.allocated_clusters++;
if (last_offset && (last_offset + s->header.cluster_size != offset)) {
check->result->bfi.fragmented_clusters++;
}
last_offset = offset;
/* Detect invalid cluster offset */
if (!qed_check_cluster_offset(s, offset)) {
@ -200,6 +206,9 @@ int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
sizeof(check.used_clusters[0]));
check.result->bfi.total_clusters =
(s->header.image_size + s->header.cluster_size - 1) /
s->header.cluster_size;
ret = qed_check_l1_table(&check, s->l1_table);
if (ret == 0) {
/* Only check for leaks if entire image was scanned successfully */

View file

@ -450,7 +450,7 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
* feature is no longer valid.
*/
if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
!bdrv_is_read_only(bs->file)) {
!bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
ret = qed_write_header_sync(s);
@ -477,7 +477,8 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
* potentially inconsistent images to be opened read-only. This can
* aid data recovery from an otherwise inconsistent image.
*/
if (!bdrv_is_read_only(bs->file)) {
if (!bdrv_is_read_only(bs->file) &&
!(flags & BDRV_O_INCOMING)) {
BdrvCheckResult result = {0};
ret = qed_check(s, &result, true);
@ -497,12 +498,6 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
s->need_check_timer = qemu_new_timer_ns(vm_clock,
qed_need_check_timer_cb, s);
error_set(&s->migration_blocker,
QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
"qed", bs->device_name, "live migration");
migrate_add_blocker(s->migration_blocker);
out:
if (ret) {
qed_free_l2_cache(&s->l2_cache);
@ -515,9 +510,6 @@ static void bdrv_qed_close(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
migrate_del_blocker(s->migration_blocker);
error_free(s->migration_blocker);
qed_cancel_need_check_timer(s);
qemu_free_timer(s->need_check_timer);
@ -1350,13 +1342,6 @@ static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
opaque, QED_AIOCB_WRITE);
}
static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs,
BlockDriverCompletionFunc *cb,
void *opaque)
{
return bdrv_aio_flush(bs->file, cb, opaque);
}
typedef struct {
Coroutine *co;
int ret;
@ -1441,6 +1426,7 @@ static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
memset(bdi, 0, sizeof(*bdi));
bdi->cluster_size = s->header.cluster_size;
bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
return 0;
}
@ -1516,6 +1502,15 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
return ret;
}
static void bdrv_qed_invalidate_cache(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
bdrv_qed_close(bs);
memset(s, 0, sizeof(BDRVQEDState));
bdrv_qed_open(bs, bs->open_flags);
}
static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result)
{
BDRVQEDState *s = bs->opaque;
@ -1562,12 +1557,12 @@ static BlockDriver bdrv_qed = {
.bdrv_make_empty = bdrv_qed_make_empty,
.bdrv_aio_readv = bdrv_qed_aio_readv,
.bdrv_aio_writev = bdrv_qed_aio_writev,
.bdrv_aio_flush = bdrv_qed_aio_flush,
.bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes,
.bdrv_truncate = bdrv_qed_truncate,
.bdrv_getlength = bdrv_qed_getlength,
.bdrv_get_info = bdrv_qed_get_info,
.bdrv_change_backing_file = bdrv_qed_change_backing_file,
.bdrv_invalidate_cache = bdrv_qed_invalidate_cache,
.bdrv_check = bdrv_qed_check,
};

View file

@ -169,8 +169,6 @@ typedef struct {
/* Periodic flush and clear need check flag */
QEMUTimer *need_check_timer;
Error *migration_blocker;
} BDRVQEDState;
enum {

View file

@ -25,11 +25,6 @@ static void raw_close(BlockDriverState *bs)
{
}
static int coroutine_fn raw_co_flush(BlockDriverState *bs)
{
return bdrv_co_flush(bs->file);
}
static int64_t raw_getlength(BlockDriverState *bs)
{
return bdrv_getlength(bs->file);
@ -113,7 +108,6 @@ static BlockDriver bdrv_raw = {
.bdrv_co_readv = raw_co_readv,
.bdrv_co_writev = raw_co_writev,
.bdrv_co_flush_to_disk = raw_co_flush,
.bdrv_co_discard = raw_co_discard,
.bdrv_probe = raw_probe,

View file

@ -32,9 +32,11 @@
#define SD_OP_RELEASE_VDI 0x13
#define SD_OP_GET_VDI_INFO 0x14
#define SD_OP_READ_VDIS 0x15
#define SD_OP_FLUSH_VDI 0x16
#define SD_FLAG_CMD_WRITE 0x01
#define SD_FLAG_CMD_COW 0x02
#define SD_FLAG_CMD_CACHE 0x04
#define SD_RES_SUCCESS 0x00 /* Success */
#define SD_RES_UNKNOWN 0x01 /* Unknown error */
@ -293,10 +295,12 @@ typedef struct BDRVSheepdogState {
char name[SD_MAX_VDI_LEN];
int is_snapshot;
uint8_t cache_enabled;
char *addr;
char *port;
int fd;
int flush_fd;
CoMutex lock;
Coroutine *co_send;
@ -506,6 +510,7 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data,
ret = qemu_send_full(sockfd, hdr, sizeof(*hdr), 0);
if (ret < sizeof(*hdr)) {
error_report("failed to send a req, %s", strerror(errno));
return ret;
}
ret = qemu_send_full(sockfd, data, *wlen, 0);
@ -516,6 +521,24 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data,
return ret;
}
static int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
unsigned int *wlen)
{
int ret;
ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
if (ret < sizeof(*hdr)) {
error_report("failed to send a req, %s", strerror(errno));
return ret;
}
ret = qemu_co_send(sockfd, data, *wlen);
if (ret < *wlen) {
error_report("failed to send a req, %s", strerror(errno));
}
return ret;
}
static int do_req(int sockfd, SheepdogReq *hdr, void *data,
unsigned int *wlen, unsigned int *rlen)
{
@ -550,6 +573,40 @@ out:
return ret;
}
static int do_co_req(int sockfd, SheepdogReq *hdr, void *data,
unsigned int *wlen, unsigned int *rlen)
{
int ret;
socket_set_block(sockfd);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
goto out;
}
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret < sizeof(*hdr)) {
error_report("failed to get a rsp, %s", strerror(errno));
goto out;
}
if (*rlen > hdr->data_length) {
*rlen = hdr->data_length;
}
if (*rlen) {
ret = qemu_co_recv(sockfd, data, *rlen);
if (ret < *rlen) {
error_report("failed to get the data, %s", strerror(errno));
goto out;
}
}
ret = 0;
out:
socket_set_nonblock(sockfd);
return ret;
}
static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
struct iovec *iov, int niov, int create,
enum AIOCBState aiocb_type);
@ -900,6 +957,10 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
hdr.flags = SD_FLAG_CMD_WRITE | flags;
}
if (s->cache_enabled) {
hdr.flags |= SD_FLAG_CMD_CACHE;
}
hdr.oid = oid;
hdr.cow_oid = old_oid;
hdr.copies = s->inode.nr_copies;
@ -942,7 +1003,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
unsigned int datalen, uint64_t offset,
int write, int create)
int write, int create, uint8_t cache)
{
SheepdogObjReq hdr;
SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
@ -965,6 +1026,11 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
rlen = datalen;
hdr.opcode = SD_OP_READ_OBJ;
}
if (cache) {
hdr.flags |= SD_FLAG_CMD_CACHE;
}
hdr.oid = oid;
hdr.data_length = datalen;
hdr.offset = offset;
@ -986,15 +1052,18 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
}
static int read_object(int fd, char *buf, uint64_t oid, int copies,
unsigned int datalen, uint64_t offset)
unsigned int datalen, uint64_t offset, uint8_t cache)
{
return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0);
return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0,
cache);
}
static int write_object(int fd, char *buf, uint64_t oid, int copies,
unsigned int datalen, uint64_t offset, int create)
unsigned int datalen, uint64_t offset, int create,
uint8_t cache)
{
return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create);
return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create,
cache);
}
static int sd_open(BlockDriverState *bs, const char *filename, int flags)
@ -1026,6 +1095,15 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
goto out;
}
if (flags & BDRV_O_CACHE_WB) {
s->cache_enabled = 1;
s->flush_fd = connect_to_sdog(s->addr, s->port);
if (s->flush_fd < 0) {
error_report("failed to connect");
goto out;
}
}
if (snapid) {
dprintf("%" PRIx32 " snapshot inode was open.\n", vid);
s->is_snapshot = 1;
@ -1038,7 +1116,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
}
buf = g_malloc(SD_INODE_SIZE);
ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0);
ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0,
s->cache_enabled);
closesocket(fd);
@ -1272,6 +1351,9 @@ static void sd_close(BlockDriverState *bs)
qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL);
closesocket(s->fd);
if (s->cache_enabled) {
closesocket(s->flush_fd);
}
g_free(s->addr);
}
@ -1305,7 +1387,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
s->inode.vdi_size = offset;
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
s->inode.nr_copies, datalen, 0, 0);
s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
close(fd);
if (ret < 0) {
@ -1387,7 +1469,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
}
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
SD_INODE_SIZE, 0);
SD_INODE_SIZE, 0, s->cache_enabled);
closesocket(fd);
@ -1575,6 +1657,36 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
return acb->ret;
}
static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
{
BDRVSheepdogState *s = bs->opaque;
SheepdogObjReq hdr = { 0 };
SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
SheepdogInode *inode = &s->inode;
int ret;
unsigned int wlen = 0, rlen = 0;
if (!s->cache_enabled) {
return 0;
}
hdr.opcode = SD_OP_FLUSH_VDI;
hdr.oid = vid_to_vdi_oid(inode->vdi_id);
ret = do_co_req(s->flush_fd, (SheepdogReq *)&hdr, NULL, &wlen, &rlen);
if (ret) {
error_report("failed to send a request to the sheep");
return ret;
}
if (rsp->result != SD_RES_SUCCESS) {
error_report("%s", sd_strerror(rsp->result));
return -EIO;
}
return 0;
}
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
BDRVSheepdogState *s = bs->opaque;
@ -1610,7 +1722,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
}
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
s->inode.nr_copies, datalen, 0, 0);
s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
if (ret < 0) {
error_report("failed to write snapshot's inode.");
ret = -EIO;
@ -1629,7 +1741,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
inode = (SheepdogInode *)g_malloc(datalen);
ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
s->inode.nr_copies, datalen, 0);
s->inode.nr_copies, datalen, 0, s->cache_enabled);
if (ret < 0) {
error_report("failed to read new inode info. %s", strerror(errno));
@ -1684,7 +1796,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
buf = g_malloc(SD_INODE_SIZE);
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
SD_INODE_SIZE, 0);
SD_INODE_SIZE, 0, s->cache_enabled);
closesocket(fd);
@ -1779,7 +1891,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
/* we don't need to read entire object */
ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0);
0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
s->cache_enabled);
if (ret) {
continue;
@ -1835,10 +1948,12 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
create = (offset == 0);
if (load) {
ret = read_object(fd, (char *)data, vmstate_oid,
s->inode.nr_copies, data_len, offset);
s->inode.nr_copies, data_len, offset,
s->cache_enabled);
} else {
ret = write_object(fd, (char *)data, vmstate_oid,
s->inode.nr_copies, data_len, offset, create);
s->inode.nr_copies, data_len, offset, create,
s->cache_enabled);
}
if (ret < 0) {
@ -1904,6 +2019,7 @@ BlockDriver bdrv_sheepdog = {
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,

View file

@ -76,6 +76,39 @@ static int coroutine_fn stream_populate(BlockDriverState *bs,
return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
}
static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
const char *base_id)
{
BlockDriverState *intermediate;
intermediate = top->backing_hd;
while (intermediate) {
BlockDriverState *unused;
/* reached base */
if (intermediate == base) {
break;
}
unused = intermediate;
intermediate = intermediate->backing_hd;
unused->backing_hd = NULL;
bdrv_delete(unused);
}
top->backing_hd = base;
pstrcpy(top->backing_file, sizeof(top->backing_file), "");
pstrcpy(top->backing_format, sizeof(top->backing_format), "");
if (base_id) {
pstrcpy(top->backing_file, sizeof(top->backing_file), base_id);
if (base->drv) {
pstrcpy(top->backing_format, sizeof(top->backing_format),
base->drv->format_name);
}
}
}
/*
* Given an image chain: [BASE] -> [INTER1] -> [INTER2] -> [TOP]
*
@ -175,7 +208,7 @@ retry:
break;
}
s->common.busy = true;
if (base) {
ret = is_allocated_base(bs, base, sector_num,
STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
@ -189,6 +222,7 @@ retry:
if (s->common.speed) {
uint64_t delay_ns = ratelimit_calculate_delay(&s->limit, n);
if (delay_ns > 0) {
s->common.busy = false;
co_sleep_ns(rt_clock, delay_ns);
/* Recheck cancellation and that sectors are unallocated */
@ -208,6 +242,7 @@ retry:
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that qemu_aio_flush() returns.
*/
s->common.busy = false;
co_sleep_ns(rt_clock, 0);
}
@ -215,12 +250,13 @@ retry:
bdrv_disable_copy_on_read(bs);
}
if (sector_num == end && ret == 0) {
if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
const char *base_id = NULL;
if (base) {
base_id = s->backing_file_id;
}
ret = bdrv_change_backing_file(bs, base_id, NULL);
close_unused_images(bs, base, base_id);
}
qemu_vfree(buf);
@ -234,7 +270,6 @@ static int stream_set_speed(BlockJob *job, int64_t value)
if (value < 0) {
return -EINVAL;
}
job->speed = value;
ratelimit_set_speed(&s->limit, value / BDRV_SECTOR_SIZE);
return 0;
}

View file

@ -143,29 +143,6 @@ void uuid_unparse(const uuid_t uu, char *out)
}
#endif
typedef struct {
BlockDriverAIOCB common;
int64_t sector_num;
QEMUIOVector *qiov;
uint8_t *buf;
/* Total number of sectors. */
int nb_sectors;
/* Number of sectors for current AIO. */
int n_sectors;
/* New allocated block map entry. */
uint32_t bmap_first;
uint32_t bmap_last;
/* Buffer for new allocated block. */
void *block_buffer;
void *orig_buf;
bool is_write;
int header_modified;
BlockDriverAIOCB *hd_aiocb;
struct iovec hd_iov;
QEMUIOVector hd_qiov;
QEMUBH *bh;
} VdiAIOCB;
typedef struct {
char text[0x40];
uint32_t signature;
@ -489,332 +466,150 @@ static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
return VDI_IS_ALLOCATED(bmap_entry);
}
static void vdi_aio_cancel(BlockDriverAIOCB *blockacb)
static int vdi_co_read(BlockDriverState *bs,
int64_t sector_num, uint8_t *buf, int nb_sectors)
{
/* TODO: This code is untested. How can I get it executed? */
VdiAIOCB *acb = container_of(blockacb, VdiAIOCB, common);
logout("\n");
if (acb->hd_aiocb) {
bdrv_aio_cancel(acb->hd_aiocb);
}
qemu_aio_release(acb);
}
static AIOPool vdi_aio_pool = {
.aiocb_size = sizeof(VdiAIOCB),
.cancel = vdi_aio_cancel,
};
static VdiAIOCB *vdi_aio_setup(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque, int is_write)
{
VdiAIOCB *acb;
logout("%p, %" PRId64 ", %p, %d, %p, %p, %d\n",
bs, sector_num, qiov, nb_sectors, cb, opaque, is_write);
acb = qemu_aio_get(&vdi_aio_pool, bs, cb, opaque);
acb->hd_aiocb = NULL;
acb->sector_num = sector_num;
acb->qiov = qiov;
acb->is_write = is_write;
if (qiov->niov > 1) {
acb->buf = qemu_blockalign(bs, qiov->size);
acb->orig_buf = acb->buf;
if (is_write) {
qemu_iovec_to_buffer(qiov, acb->buf);
}
} else {
acb->buf = (uint8_t *)qiov->iov->iov_base;
}
acb->nb_sectors = nb_sectors;
acb->n_sectors = 0;
acb->bmap_first = VDI_UNALLOCATED;
acb->bmap_last = VDI_UNALLOCATED;
acb->block_buffer = NULL;
acb->header_modified = 0;
return acb;
}
static int vdi_schedule_bh(QEMUBHFunc *cb, VdiAIOCB *acb)
{
logout("\n");
if (acb->bh) {
return -EIO;
}
acb->bh = qemu_bh_new(cb, acb);
if (!acb->bh) {
return -EIO;
}
qemu_bh_schedule(acb->bh);
return 0;
}
static void vdi_aio_read_cb(void *opaque, int ret);
static void vdi_aio_write_cb(void *opaque, int ret);
static void vdi_aio_rw_bh(void *opaque)
{
VdiAIOCB *acb = opaque;
logout("\n");
qemu_bh_delete(acb->bh);
acb->bh = NULL;
if (acb->is_write) {
vdi_aio_write_cb(opaque, 0);
} else {
vdi_aio_read_cb(opaque, 0);
}
}
static void vdi_aio_read_cb(void *opaque, int ret)
{
VdiAIOCB *acb = opaque;
BlockDriverState *bs = acb->common.bs;
BDRVVdiState *s = bs->opaque;
uint32_t bmap_entry;
uint32_t block_index;
uint32_t sector_in_block;
uint32_t n_sectors;
logout("%u sectors read\n", acb->n_sectors);
acb->hd_aiocb = NULL;
if (ret < 0) {
goto done;
}
acb->nb_sectors -= acb->n_sectors;
if (acb->nb_sectors == 0) {
/* request completed */
ret = 0;
goto done;
}
acb->sector_num += acb->n_sectors;
acb->buf += acb->n_sectors * SECTOR_SIZE;
block_index = acb->sector_num / s->block_sectors;
sector_in_block = acb->sector_num % s->block_sectors;
n_sectors = s->block_sectors - sector_in_block;
if (n_sectors > acb->nb_sectors) {
n_sectors = acb->nb_sectors;
}
logout("will read %u sectors starting at sector %" PRIu64 "\n",
n_sectors, acb->sector_num);
/* prepare next AIO request */
acb->n_sectors = n_sectors;
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Block not allocated, return zeros, no need to wait. */
memset(acb->buf, 0, n_sectors * SECTOR_SIZE);
ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
if (ret < 0) {
goto done;
}
} else {
uint64_t offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors +
sector_in_block;
acb->hd_iov.iov_base = (void *)acb->buf;
acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
acb->hd_aiocb = bdrv_aio_readv(bs->file, offset, &acb->hd_qiov,
n_sectors, vdi_aio_read_cb, acb);
}
return;
done:
if (acb->qiov->niov > 1) {
qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
qemu_vfree(acb->orig_buf);
}
acb->common.cb(acb->common.opaque, ret);
qemu_aio_release(acb);
}
static BlockDriverAIOCB *vdi_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
VdiAIOCB *acb;
int ret;
int ret = 0;
logout("\n");
acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
if (ret < 0) {
if (acb->qiov->niov > 1) {
qemu_vfree(acb->orig_buf);
while (ret >= 0 && nb_sectors > 0) {
block_index = sector_num / s->block_sectors;
sector_in_block = sector_num % s->block_sectors;
n_sectors = s->block_sectors - sector_in_block;
if (n_sectors > nb_sectors) {
n_sectors = nb_sectors;
}
qemu_aio_release(acb);
return NULL;
logout("will read %u sectors starting at sector %" PRIu64 "\n",
n_sectors, sector_num);
/* prepare next AIO request */
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Block not allocated, return zeros, no need to wait. */
memset(buf, 0, n_sectors * SECTOR_SIZE);
ret = 0;
} else {
uint64_t offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors +
sector_in_block;
ret = bdrv_read(bs->file, offset, buf, n_sectors);
}
logout("%u sectors read\n", n_sectors);
nb_sectors -= n_sectors;
sector_num += n_sectors;
buf += n_sectors * SECTOR_SIZE;
}
return &acb->common;
return ret;
}
static void vdi_aio_write_cb(void *opaque, int ret)
static int vdi_co_write(BlockDriverState *bs,
int64_t sector_num, const uint8_t *buf, int nb_sectors)
{
VdiAIOCB *acb = opaque;
BlockDriverState *bs = acb->common.bs;
BDRVVdiState *s = bs->opaque;
uint32_t bmap_entry;
uint32_t block_index;
uint32_t sector_in_block;
uint32_t n_sectors;
uint32_t bmap_first = VDI_UNALLOCATED;
uint32_t bmap_last = VDI_UNALLOCATED;
uint8_t *block = NULL;
int ret = 0;
acb->hd_aiocb = NULL;
logout("\n");
if (ret < 0) {
goto done;
}
while (ret >= 0 && nb_sectors > 0) {
block_index = sector_num / s->block_sectors;
sector_in_block = sector_num % s->block_sectors;
n_sectors = s->block_sectors - sector_in_block;
if (n_sectors > nb_sectors) {
n_sectors = nb_sectors;
}
acb->nb_sectors -= acb->n_sectors;
acb->sector_num += acb->n_sectors;
acb->buf += acb->n_sectors * SECTOR_SIZE;
logout("will write %u sectors starting at sector %" PRIu64 "\n",
n_sectors, sector_num);
if (acb->nb_sectors == 0) {
logout("finished data write\n");
acb->n_sectors = 0;
if (acb->header_modified) {
VdiHeader *header = acb->block_buffer;
logout("now writing modified header\n");
assert(VDI_IS_ALLOCATED(acb->bmap_first));
*header = s->header;
vdi_header_to_le(header);
acb->header_modified = 0;
acb->hd_iov.iov_base = acb->block_buffer;
acb->hd_iov.iov_len = SECTOR_SIZE;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
acb->hd_aiocb = bdrv_aio_writev(bs->file, 0, &acb->hd_qiov, 1,
vdi_aio_write_cb, acb);
return;
} else if (VDI_IS_ALLOCATED(acb->bmap_first)) {
/* One or more new blocks were allocated. */
/* prepare next AIO request */
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Allocate new block and write to it. */
uint64_t offset;
uint32_t bmap_first;
uint32_t bmap_last;
g_free(acb->block_buffer);
acb->block_buffer = NULL;
bmap_first = acb->bmap_first;
bmap_last = acb->bmap_last;
logout("now writing modified block map entry %u...%u\n",
bmap_first, bmap_last);
/* Write modified sectors from block map. */
bmap_first /= (SECTOR_SIZE / sizeof(uint32_t));
bmap_last /= (SECTOR_SIZE / sizeof(uint32_t));
n_sectors = bmap_last - bmap_first + 1;
offset = s->bmap_sector + bmap_first;
acb->bmap_first = VDI_UNALLOCATED;
acb->hd_iov.iov_base = (void *)((uint8_t *)&s->bmap[0] +
bmap_first * SECTOR_SIZE);
acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
logout("will write %u block map sectors starting from entry %u\n",
n_sectors, bmap_first);
acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov,
n_sectors, vdi_aio_write_cb, acb);
return;
bmap_entry = s->header.blocks_allocated;
s->bmap[block_index] = cpu_to_le32(bmap_entry);
s->header.blocks_allocated++;
offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors;
if (block == NULL) {
block = g_malloc(s->block_size);
bmap_first = block_index;
}
bmap_last = block_index;
/* Copy data to be written to new block and zero unused parts. */
memset(block, 0, sector_in_block * SECTOR_SIZE);
memcpy(block + sector_in_block * SECTOR_SIZE,
buf, n_sectors * SECTOR_SIZE);
memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
(s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
ret = bdrv_write(bs->file, offset, block, s->block_sectors);
} else {
uint64_t offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors +
sector_in_block;
ret = bdrv_write(bs->file, offset, buf, n_sectors);
}
ret = 0;
goto done;
nb_sectors -= n_sectors;
sector_num += n_sectors;
buf += n_sectors * SECTOR_SIZE;
logout("%u sectors written\n", n_sectors);
}
logout("%u sectors written\n", acb->n_sectors);
block_index = acb->sector_num / s->block_sectors;
sector_in_block = acb->sector_num % s->block_sectors;
n_sectors = s->block_sectors - sector_in_block;
if (n_sectors > acb->nb_sectors) {
n_sectors = acb->nb_sectors;
}
logout("will write %u sectors starting at sector %" PRIu64 "\n",
n_sectors, acb->sector_num);
/* prepare next AIO request */
acb->n_sectors = n_sectors;
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Allocate new block and write to it. */
uint64_t offset;
uint8_t *block;
bmap_entry = s->header.blocks_allocated;
s->bmap[block_index] = cpu_to_le32(bmap_entry);
s->header.blocks_allocated++;
offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors;
block = acb->block_buffer;
if (block == NULL) {
block = g_malloc(s->block_size);
acb->block_buffer = block;
acb->bmap_first = block_index;
assert(!acb->header_modified);
acb->header_modified = 1;
}
acb->bmap_last = block_index;
/* Copy data to be written to new block and zero unused parts. */
memset(block, 0, sector_in_block * SECTOR_SIZE);
memcpy(block + sector_in_block * SECTOR_SIZE,
acb->buf, n_sectors * SECTOR_SIZE);
memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
(s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
acb->hd_iov.iov_base = (void *)block;
acb->hd_iov.iov_len = s->block_size;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
acb->hd_aiocb = bdrv_aio_writev(bs->file, offset,
&acb->hd_qiov, s->block_sectors,
vdi_aio_write_cb, acb);
} else {
uint64_t offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors +
sector_in_block;
acb->hd_iov.iov_base = (void *)acb->buf;
acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov,
n_sectors, vdi_aio_write_cb, acb);
}
return;
done:
if (acb->qiov->niov > 1) {
qemu_vfree(acb->orig_buf);
}
acb->common.cb(acb->common.opaque, ret);
qemu_aio_release(acb);
}
static BlockDriverAIOCB *vdi_aio_writev(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
VdiAIOCB *acb;
int ret;
logout("\n");
acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
ret = vdi_schedule_bh(vdi_aio_rw_bh, acb);
logout("finished data write\n");
if (ret < 0) {
if (acb->qiov->niov > 1) {
qemu_vfree(acb->orig_buf);
}
qemu_aio_release(acb);
return NULL;
return ret;
}
return &acb->common;
if (block) {
/* One or more new blocks were allocated. */
VdiHeader *header = (VdiHeader *) block;
uint8_t *base;
uint64_t offset;
logout("now writing modified header\n");
assert(VDI_IS_ALLOCATED(bmap_first));
*header = s->header;
vdi_header_to_le(header);
ret = bdrv_write(bs->file, 0, block, 1);
g_free(block);
block = NULL;
if (ret < 0) {
return ret;
}
logout("now writing modified block map entry %u...%u\n",
bmap_first, bmap_last);
/* Write modified sectors from block map. */
bmap_first /= (SECTOR_SIZE / sizeof(uint32_t));
bmap_last /= (SECTOR_SIZE / sizeof(uint32_t));
n_sectors = bmap_last - bmap_first + 1;
offset = s->bmap_sector + bmap_first;
base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
logout("will write %u block map sectors starting from entry %u\n",
n_sectors, bmap_first);
ret = bdrv_write(bs->file, offset, base, n_sectors);
}
return ret;
}
static int vdi_create(const char *filename, QEMUOptionParameter *options)
@ -930,13 +725,6 @@ static void vdi_close(BlockDriverState *bs)
error_free(s->migration_blocker);
}
static coroutine_fn int vdi_co_flush(BlockDriverState *bs)
{
logout("\n");
return bdrv_co_flush(bs->file);
}
static QEMUOptionParameter vdi_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@ -969,13 +757,12 @@ static BlockDriver bdrv_vdi = {
.bdrv_open = vdi_open,
.bdrv_close = vdi_close,
.bdrv_create = vdi_create,
.bdrv_co_flush_to_disk = vdi_co_flush,
.bdrv_co_is_allocated = vdi_co_is_allocated,
.bdrv_make_empty = vdi_make_empty,
.bdrv_aio_readv = vdi_aio_readv,
.bdrv_read = vdi_co_read,
#if defined(CONFIG_VDI_WRITE)
.bdrv_aio_writev = vdi_aio_writev,
.bdrv_write = vdi_co_write,
#endif
.bdrv_get_info = vdi_get_info,

View file

@ -1525,10 +1525,10 @@ static void vmdk_close(BlockDriverState *bs)
static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
{
int i, ret, err;
BDRVVmdkState *s = bs->opaque;
int i, err;
int ret = 0;
ret = bdrv_co_flush(bs->file);
for (i = 0; i < s->num_extents; i++) {
err = bdrv_co_flush(s->extents[i].file);
if (err < 0) {

View file

@ -189,6 +189,9 @@ static int vpc_open(BlockDriverState *bs, int flags)
fprintf(stderr, "block-vpc: The header checksum of '%s' is "
"incorrect.\n", bs->filename);
/* Write 'checksum' back to footer, or else will leave it with zero. */
footer->checksum = be32_to_cpu(checksum);
// The visible size of a image in Virtual PC depends on the geometry
// rather than on the size stored in the footer (the size in the footer
// is too large usually)
@ -507,11 +510,6 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
return ret;
}
static coroutine_fn int vpc_co_flush(BlockDriverState *bs)
{
return bdrv_co_flush(bs->file);
}
/*
* Calculates the number of cylinders, heads and sectors per cylinder
* based on a given number of sectors. This is the algorithm described
@ -789,7 +787,6 @@ static BlockDriver bdrv_vpc = {
.bdrv_read = vpc_co_read,
.bdrv_write = vpc_co_write,
.bdrv_co_flush_to_disk = vpc_co_flush,
.create_options = vpc_create_options,
};