mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-05 08:43:55 -06:00
sheepdog: implement SD_OP_FLUSH_VDI operation
Flush operation is supposed to flush the write-back cache of sheepdog cluster. By issuing flush operation, we can assure the Guest of data reaching the sheepdog cluster storage. Cc: Kevin Wolf <kwolf@redhat.com> Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp> Signed-off-by: Liu Yuan <tailai.ly@taobao.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
parent
f6801b83d0
commit
47622c44d0
1 changed files with 128 additions and 14 deletions
142
block/sheepdog.c
142
block/sheepdog.c
|
@ -32,9 +32,11 @@
|
||||||
#define SD_OP_RELEASE_VDI 0x13
|
#define SD_OP_RELEASE_VDI 0x13
|
||||||
#define SD_OP_GET_VDI_INFO 0x14
|
#define SD_OP_GET_VDI_INFO 0x14
|
||||||
#define SD_OP_READ_VDIS 0x15
|
#define SD_OP_READ_VDIS 0x15
|
||||||
|
#define SD_OP_FLUSH_VDI 0x16
|
||||||
|
|
||||||
#define SD_FLAG_CMD_WRITE 0x01
|
#define SD_FLAG_CMD_WRITE 0x01
|
||||||
#define SD_FLAG_CMD_COW 0x02
|
#define SD_FLAG_CMD_COW 0x02
|
||||||
|
#define SD_FLAG_CMD_CACHE 0x04
|
||||||
|
|
||||||
#define SD_RES_SUCCESS 0x00 /* Success */
|
#define SD_RES_SUCCESS 0x00 /* Success */
|
||||||
#define SD_RES_UNKNOWN 0x01 /* Unknown error */
|
#define SD_RES_UNKNOWN 0x01 /* Unknown error */
|
||||||
|
@ -293,10 +295,12 @@ typedef struct BDRVSheepdogState {
|
||||||
|
|
||||||
char name[SD_MAX_VDI_LEN];
|
char name[SD_MAX_VDI_LEN];
|
||||||
int is_snapshot;
|
int is_snapshot;
|
||||||
|
uint8_t cache_enabled;
|
||||||
|
|
||||||
char *addr;
|
char *addr;
|
||||||
char *port;
|
char *port;
|
||||||
int fd;
|
int fd;
|
||||||
|
int flush_fd;
|
||||||
|
|
||||||
CoMutex lock;
|
CoMutex lock;
|
||||||
Coroutine *co_send;
|
Coroutine *co_send;
|
||||||
|
@ -516,6 +520,23 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
|
||||||
|
unsigned int *wlen)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
|
||||||
|
if (ret < sizeof(*hdr)) {
|
||||||
|
error_report("failed to send a req, %s", strerror(errno));
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = qemu_co_send(sockfd, data, *wlen);
|
||||||
|
if (ret < *wlen) {
|
||||||
|
error_report("failed to send a req, %s", strerror(errno));
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
static int do_req(int sockfd, SheepdogReq *hdr, void *data,
|
static int do_req(int sockfd, SheepdogReq *hdr, void *data,
|
||||||
unsigned int *wlen, unsigned int *rlen)
|
unsigned int *wlen, unsigned int *rlen)
|
||||||
{
|
{
|
||||||
|
@ -550,6 +571,40 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int do_co_req(int sockfd, SheepdogReq *hdr, void *data,
|
||||||
|
unsigned int *wlen, unsigned int *rlen)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
socket_set_block(sockfd);
|
||||||
|
ret = send_co_req(sockfd, hdr, data, wlen);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
|
||||||
|
if (ret < sizeof(*hdr)) {
|
||||||
|
error_report("failed to get a rsp, %s", strerror(errno));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*rlen > hdr->data_length) {
|
||||||
|
*rlen = hdr->data_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*rlen) {
|
||||||
|
ret = qemu_co_recv(sockfd, data, *rlen);
|
||||||
|
if (ret < *rlen) {
|
||||||
|
error_report("failed to get the data, %s", strerror(errno));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret = 0;
|
||||||
|
out:
|
||||||
|
socket_set_nonblock(sockfd);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
|
static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
|
||||||
struct iovec *iov, int niov, int create,
|
struct iovec *iov, int niov, int create,
|
||||||
enum AIOCBState aiocb_type);
|
enum AIOCBState aiocb_type);
|
||||||
|
@ -900,6 +955,10 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
|
||||||
hdr.flags = SD_FLAG_CMD_WRITE | flags;
|
hdr.flags = SD_FLAG_CMD_WRITE | flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (s->cache_enabled) {
|
||||||
|
hdr.flags |= SD_FLAG_CMD_CACHE;
|
||||||
|
}
|
||||||
|
|
||||||
hdr.oid = oid;
|
hdr.oid = oid;
|
||||||
hdr.cow_oid = old_oid;
|
hdr.cow_oid = old_oid;
|
||||||
hdr.copies = s->inode.nr_copies;
|
hdr.copies = s->inode.nr_copies;
|
||||||
|
@ -942,7 +1001,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
|
||||||
|
|
||||||
static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
|
static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
|
||||||
unsigned int datalen, uint64_t offset,
|
unsigned int datalen, uint64_t offset,
|
||||||
int write, int create)
|
int write, int create, uint8_t cache)
|
||||||
{
|
{
|
||||||
SheepdogObjReq hdr;
|
SheepdogObjReq hdr;
|
||||||
SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
|
SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
|
||||||
|
@ -965,6 +1024,11 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
|
||||||
rlen = datalen;
|
rlen = datalen;
|
||||||
hdr.opcode = SD_OP_READ_OBJ;
|
hdr.opcode = SD_OP_READ_OBJ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cache) {
|
||||||
|
hdr.flags |= SD_FLAG_CMD_CACHE;
|
||||||
|
}
|
||||||
|
|
||||||
hdr.oid = oid;
|
hdr.oid = oid;
|
||||||
hdr.data_length = datalen;
|
hdr.data_length = datalen;
|
||||||
hdr.offset = offset;
|
hdr.offset = offset;
|
||||||
|
@ -986,15 +1050,18 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
|
||||||
}
|
}
|
||||||
|
|
||||||
static int read_object(int fd, char *buf, uint64_t oid, int copies,
|
static int read_object(int fd, char *buf, uint64_t oid, int copies,
|
||||||
unsigned int datalen, uint64_t offset)
|
unsigned int datalen, uint64_t offset, uint8_t cache)
|
||||||
{
|
{
|
||||||
return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0);
|
return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0,
|
||||||
|
cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int write_object(int fd, char *buf, uint64_t oid, int copies,
|
static int write_object(int fd, char *buf, uint64_t oid, int copies,
|
||||||
unsigned int datalen, uint64_t offset, int create)
|
unsigned int datalen, uint64_t offset, int create,
|
||||||
|
uint8_t cache)
|
||||||
{
|
{
|
||||||
return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create);
|
return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create,
|
||||||
|
cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int sd_open(BlockDriverState *bs, const char *filename, int flags)
|
static int sd_open(BlockDriverState *bs, const char *filename, int flags)
|
||||||
|
@ -1026,6 +1093,15 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (flags & BDRV_O_CACHE_WB) {
|
||||||
|
s->cache_enabled = 1;
|
||||||
|
s->flush_fd = connect_to_sdog(s->addr, s->port);
|
||||||
|
if (s->flush_fd < 0) {
|
||||||
|
error_report("failed to connect");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (snapid) {
|
if (snapid) {
|
||||||
dprintf("%" PRIx32 " snapshot inode was open.\n", vid);
|
dprintf("%" PRIx32 " snapshot inode was open.\n", vid);
|
||||||
s->is_snapshot = 1;
|
s->is_snapshot = 1;
|
||||||
|
@ -1038,7 +1114,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
|
||||||
}
|
}
|
||||||
|
|
||||||
buf = g_malloc(SD_INODE_SIZE);
|
buf = g_malloc(SD_INODE_SIZE);
|
||||||
ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0);
|
ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0,
|
||||||
|
s->cache_enabled);
|
||||||
|
|
||||||
closesocket(fd);
|
closesocket(fd);
|
||||||
|
|
||||||
|
@ -1272,6 +1349,9 @@ static void sd_close(BlockDriverState *bs)
|
||||||
|
|
||||||
qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL);
|
qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL);
|
||||||
closesocket(s->fd);
|
closesocket(s->fd);
|
||||||
|
if (s->cache_enabled) {
|
||||||
|
closesocket(s->flush_fd);
|
||||||
|
}
|
||||||
g_free(s->addr);
|
g_free(s->addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1305,7 +1385,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
|
||||||
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
|
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
|
||||||
s->inode.vdi_size = offset;
|
s->inode.vdi_size = offset;
|
||||||
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
|
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
|
||||||
s->inode.nr_copies, datalen, 0, 0);
|
s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
|
||||||
close(fd);
|
close(fd);
|
||||||
|
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
|
@ -1387,7 +1467,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
|
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
|
||||||
SD_INODE_SIZE, 0);
|
SD_INODE_SIZE, 0, s->cache_enabled);
|
||||||
|
|
||||||
closesocket(fd);
|
closesocket(fd);
|
||||||
|
|
||||||
|
@ -1575,6 +1655,36 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
|
||||||
return acb->ret;
|
return acb->ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
|
||||||
|
{
|
||||||
|
BDRVSheepdogState *s = bs->opaque;
|
||||||
|
SheepdogObjReq hdr = { 0 };
|
||||||
|
SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
|
||||||
|
SheepdogInode *inode = &s->inode;
|
||||||
|
int ret;
|
||||||
|
unsigned int wlen = 0, rlen = 0;
|
||||||
|
|
||||||
|
if (!s->cache_enabled) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
hdr.opcode = SD_OP_FLUSH_VDI;
|
||||||
|
hdr.oid = vid_to_vdi_oid(inode->vdi_id);
|
||||||
|
|
||||||
|
ret = do_co_req(s->flush_fd, (SheepdogReq *)&hdr, NULL, &wlen, &rlen);
|
||||||
|
if (ret) {
|
||||||
|
error_report("failed to send a request to the sheep");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rsp->result != SD_RES_SUCCESS) {
|
||||||
|
error_report("%s", sd_strerror(rsp->result));
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
|
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
|
||||||
{
|
{
|
||||||
BDRVSheepdogState *s = bs->opaque;
|
BDRVSheepdogState *s = bs->opaque;
|
||||||
|
@ -1610,7 +1720,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
|
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
|
||||||
s->inode.nr_copies, datalen, 0, 0);
|
s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
error_report("failed to write snapshot's inode.");
|
error_report("failed to write snapshot's inode.");
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
|
@ -1629,7 +1739,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
|
||||||
inode = (SheepdogInode *)g_malloc(datalen);
|
inode = (SheepdogInode *)g_malloc(datalen);
|
||||||
|
|
||||||
ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
|
ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
|
||||||
s->inode.nr_copies, datalen, 0);
|
s->inode.nr_copies, datalen, 0, s->cache_enabled);
|
||||||
|
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
error_report("failed to read new inode info. %s", strerror(errno));
|
error_report("failed to read new inode info. %s", strerror(errno));
|
||||||
|
@ -1684,7 +1794,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
|
||||||
|
|
||||||
buf = g_malloc(SD_INODE_SIZE);
|
buf = g_malloc(SD_INODE_SIZE);
|
||||||
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
|
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
|
||||||
SD_INODE_SIZE, 0);
|
SD_INODE_SIZE, 0, s->cache_enabled);
|
||||||
|
|
||||||
closesocket(fd);
|
closesocket(fd);
|
||||||
|
|
||||||
|
@ -1779,7 +1889,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
|
||||||
|
|
||||||
/* we don't need to read entire object */
|
/* we don't need to read entire object */
|
||||||
ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
|
ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
|
||||||
0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0);
|
0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
|
||||||
|
s->cache_enabled);
|
||||||
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -1835,10 +1946,12 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
|
||||||
create = (offset == 0);
|
create = (offset == 0);
|
||||||
if (load) {
|
if (load) {
|
||||||
ret = read_object(fd, (char *)data, vmstate_oid,
|
ret = read_object(fd, (char *)data, vmstate_oid,
|
||||||
s->inode.nr_copies, data_len, offset);
|
s->inode.nr_copies, data_len, offset,
|
||||||
|
s->cache_enabled);
|
||||||
} else {
|
} else {
|
||||||
ret = write_object(fd, (char *)data, vmstate_oid,
|
ret = write_object(fd, (char *)data, vmstate_oid,
|
||||||
s->inode.nr_copies, data_len, offset, create);
|
s->inode.nr_copies, data_len, offset, create,
|
||||||
|
s->cache_enabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
|
@ -1904,6 +2017,7 @@ BlockDriver bdrv_sheepdog = {
|
||||||
|
|
||||||
.bdrv_co_readv = sd_co_readv,
|
.bdrv_co_readv = sd_co_readv,
|
||||||
.bdrv_co_writev = sd_co_writev,
|
.bdrv_co_writev = sd_co_writev,
|
||||||
|
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
|
||||||
|
|
||||||
.bdrv_snapshot_create = sd_snapshot_create,
|
.bdrv_snapshot_create = sd_snapshot_create,
|
||||||
.bdrv_snapshot_goto = sd_snapshot_goto,
|
.bdrv_snapshot_goto = sd_snapshot_goto,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue