mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-07-31 14:23:53 -06:00
libvhost-user: Support tracking inflight I/O in shared memory
This patch adds support for VHOST_USER_GET_INFLIGHT_FD and VHOST_USER_SET_INFLIGHT_FD message to set/get shared buffer to/from qemu. Then backend can track inflight I/O in this buffer. Signed-off-by: Xie Yongji <xieyongji@baidu.com> Signed-off-by: Zhang Yu <zhangyu31@baidu.com> Message-Id: <20190228085355.9614-5-xieyongji@baidu.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
parent
f7671f3d49
commit
5f9ff1eff3
3 changed files with 400 additions and 21 deletions
|
@ -41,6 +41,8 @@
|
|||
#endif
|
||||
|
||||
#include "qemu/atomic.h"
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/memfd.h"
|
||||
|
||||
#include "libvhost-user.h"
|
||||
|
||||
|
@ -53,6 +55,18 @@
|
|||
_min1 < _min2 ? _min1 : _min2; })
|
||||
#endif
|
||||
|
||||
/* Round number down to multiple */
|
||||
#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
|
||||
|
||||
/* Round number up to multiple */
|
||||
#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
|
||||
|
||||
/* Align each region to cache line size in inflight buffer */
|
||||
#define INFLIGHT_ALIGNMENT 64
|
||||
|
||||
/* The version of inflight buffer */
|
||||
#define INFLIGHT_VERSION 1
|
||||
|
||||
#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
|
||||
|
||||
/* The version of the protocol we support */
|
||||
|
@ -66,6 +80,20 @@
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
static inline
|
||||
bool has_feature(uint64_t features, unsigned int fbit)
|
||||
{
|
||||
assert(fbit < 64);
|
||||
return !!(features & (1ULL << fbit));
|
||||
}
|
||||
|
||||
static inline
|
||||
bool vu_has_feature(VuDev *dev,
|
||||
unsigned int fbit)
|
||||
{
|
||||
return has_feature(dev->features, fbit);
|
||||
}
|
||||
|
||||
static const char *
|
||||
vu_request_to_string(unsigned int req)
|
||||
{
|
||||
|
@ -100,6 +128,8 @@ vu_request_to_string(unsigned int req)
|
|||
REQ(VHOST_USER_POSTCOPY_ADVISE),
|
||||
REQ(VHOST_USER_POSTCOPY_LISTEN),
|
||||
REQ(VHOST_USER_POSTCOPY_END),
|
||||
REQ(VHOST_USER_GET_INFLIGHT_FD),
|
||||
REQ(VHOST_USER_SET_INFLIGHT_FD),
|
||||
REQ(VHOST_USER_MAX),
|
||||
};
|
||||
#undef REQ
|
||||
|
@ -890,6 +920,91 @@ vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
|
|||
return true;
|
||||
}
|
||||
|
||||
static int
|
||||
inflight_desc_compare(const void *a, const void *b)
|
||||
{
|
||||
VuVirtqInflightDesc *desc0 = (VuVirtqInflightDesc *)a,
|
||||
*desc1 = (VuVirtqInflightDesc *)b;
|
||||
|
||||
if (desc1->counter > desc0->counter &&
|
||||
(desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
vu_check_queue_inflights(VuDev *dev, VuVirtq *vq)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
if (!has_feature(dev->protocol_features,
|
||||
VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(!vq->inflight)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (unlikely(!vq->inflight->version)) {
|
||||
/* initialize the buffer */
|
||||
vq->inflight->version = INFLIGHT_VERSION;
|
||||
return 0;
|
||||
}
|
||||
|
||||
vq->used_idx = vq->vring.used->idx;
|
||||
vq->resubmit_num = 0;
|
||||
vq->resubmit_list = NULL;
|
||||
vq->counter = 0;
|
||||
|
||||
if (unlikely(vq->inflight->used_idx != vq->used_idx)) {
|
||||
vq->inflight->desc[vq->inflight->last_batch_head].inflight = 0;
|
||||
|
||||
barrier();
|
||||
|
||||
vq->inflight->used_idx = vq->used_idx;
|
||||
}
|
||||
|
||||
for (i = 0; i < vq->inflight->desc_num; i++) {
|
||||
if (vq->inflight->desc[i].inflight == 1) {
|
||||
vq->inuse++;
|
||||
}
|
||||
}
|
||||
|
||||
vq->shadow_avail_idx = vq->last_avail_idx = vq->inuse + vq->used_idx;
|
||||
|
||||
if (vq->inuse) {
|
||||
vq->resubmit_list = malloc(sizeof(VuVirtqInflightDesc) * vq->inuse);
|
||||
if (!vq->resubmit_list) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < vq->inflight->desc_num; i++) {
|
||||
if (vq->inflight->desc[i].inflight) {
|
||||
vq->resubmit_list[vq->resubmit_num].index = i;
|
||||
vq->resubmit_list[vq->resubmit_num].counter =
|
||||
vq->inflight->desc[i].counter;
|
||||
vq->resubmit_num++;
|
||||
}
|
||||
}
|
||||
|
||||
if (vq->resubmit_num > 1) {
|
||||
qsort(vq->resubmit_list, vq->resubmit_num,
|
||||
sizeof(VuVirtqInflightDesc), inflight_desc_compare);
|
||||
}
|
||||
vq->counter = vq->resubmit_list[0].counter + 1;
|
||||
}
|
||||
|
||||
/* in case of I/O hang after reconnecting */
|
||||
if (eventfd_write(vq->kick_fd, 1)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
|
||||
{
|
||||
|
@ -923,6 +1038,10 @@ vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
|
|||
dev->vq[index].kick_fd, index);
|
||||
}
|
||||
|
||||
if (vu_check_queue_inflights(dev, &dev->vq[index])) {
|
||||
vu_panic(dev, "Failed to check inflights for vq: %d\n", index);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -995,6 +1114,11 @@ vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
|
|||
|
||||
dev->vq[index].call_fd = vmsg->fds[0];
|
||||
|
||||
/* in case of I/O hang after reconnecting */
|
||||
if (eventfd_write(vmsg->fds[0], 1)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
|
||||
|
||||
return false;
|
||||
|
@ -1209,6 +1333,116 @@ vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg)
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
vu_inflight_queue_size(uint16_t queue_size)
|
||||
{
|
||||
return ALIGN_UP(sizeof(VuDescStateSplit) * queue_size +
|
||||
sizeof(uint16_t), INFLIGHT_ALIGNMENT);
|
||||
}
|
||||
|
||||
static bool
|
||||
vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
|
||||
{
|
||||
int fd;
|
||||
void *addr;
|
||||
uint64_t mmap_size;
|
||||
uint16_t num_queues, queue_size;
|
||||
|
||||
if (vmsg->size != sizeof(vmsg->payload.inflight)) {
|
||||
vu_panic(dev, "Invalid get_inflight_fd message:%d", vmsg->size);
|
||||
vmsg->payload.inflight.mmap_size = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
num_queues = vmsg->payload.inflight.num_queues;
|
||||
queue_size = vmsg->payload.inflight.queue_size;
|
||||
|
||||
DPRINT("set_inflight_fd num_queues: %"PRId16"\n", num_queues);
|
||||
DPRINT("set_inflight_fd queue_size: %"PRId16"\n", queue_size);
|
||||
|
||||
mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
|
||||
|
||||
addr = qemu_memfd_alloc("vhost-inflight", mmap_size,
|
||||
F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
|
||||
&fd, NULL);
|
||||
|
||||
if (!addr) {
|
||||
vu_panic(dev, "Failed to alloc vhost inflight area");
|
||||
vmsg->payload.inflight.mmap_size = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
memset(addr, 0, mmap_size);
|
||||
|
||||
dev->inflight_info.addr = addr;
|
||||
dev->inflight_info.size = vmsg->payload.inflight.mmap_size = mmap_size;
|
||||
dev->inflight_info.fd = vmsg->fds[0] = fd;
|
||||
vmsg->fd_num = 1;
|
||||
vmsg->payload.inflight.mmap_offset = 0;
|
||||
|
||||
DPRINT("send inflight mmap_size: %"PRId64"\n",
|
||||
vmsg->payload.inflight.mmap_size);
|
||||
DPRINT("send inflight mmap offset: %"PRId64"\n",
|
||||
vmsg->payload.inflight.mmap_offset);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
vu_set_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
|
||||
{
|
||||
int fd, i;
|
||||
uint64_t mmap_size, mmap_offset;
|
||||
uint16_t num_queues, queue_size;
|
||||
void *rc;
|
||||
|
||||
if (vmsg->fd_num != 1 ||
|
||||
vmsg->size != sizeof(vmsg->payload.inflight)) {
|
||||
vu_panic(dev, "Invalid set_inflight_fd message size:%d fds:%d",
|
||||
vmsg->size, vmsg->fd_num);
|
||||
return false;
|
||||
}
|
||||
|
||||
fd = vmsg->fds[0];
|
||||
mmap_size = vmsg->payload.inflight.mmap_size;
|
||||
mmap_offset = vmsg->payload.inflight.mmap_offset;
|
||||
num_queues = vmsg->payload.inflight.num_queues;
|
||||
queue_size = vmsg->payload.inflight.queue_size;
|
||||
|
||||
DPRINT("set_inflight_fd mmap_size: %"PRId64"\n", mmap_size);
|
||||
DPRINT("set_inflight_fd mmap_offset: %"PRId64"\n", mmap_offset);
|
||||
DPRINT("set_inflight_fd num_queues: %"PRId16"\n", num_queues);
|
||||
DPRINT("set_inflight_fd queue_size: %"PRId16"\n", queue_size);
|
||||
|
||||
rc = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
fd, mmap_offset);
|
||||
|
||||
if (rc == MAP_FAILED) {
|
||||
vu_panic(dev, "set_inflight_fd mmap error: %s", strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dev->inflight_info.fd) {
|
||||
close(dev->inflight_info.fd);
|
||||
}
|
||||
|
||||
if (dev->inflight_info.addr) {
|
||||
munmap(dev->inflight_info.addr, dev->inflight_info.size);
|
||||
}
|
||||
|
||||
dev->inflight_info.fd = fd;
|
||||
dev->inflight_info.addr = rc;
|
||||
dev->inflight_info.size = mmap_size;
|
||||
|
||||
for (i = 0; i < num_queues; i++) {
|
||||
dev->vq[i].inflight = (VuVirtqInflight *)rc;
|
||||
dev->vq[i].inflight->desc_num = queue_size;
|
||||
rc = (void *)((char *)rc + vu_inflight_queue_size(queue_size));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
|
||||
{
|
||||
|
@ -1287,6 +1521,10 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
|
|||
return vu_set_postcopy_listen(dev, vmsg);
|
||||
case VHOST_USER_POSTCOPY_END:
|
||||
return vu_set_postcopy_end(dev, vmsg);
|
||||
case VHOST_USER_GET_INFLIGHT_FD:
|
||||
return vu_get_inflight_fd(dev, vmsg);
|
||||
case VHOST_USER_SET_INFLIGHT_FD:
|
||||
return vu_set_inflight_fd(dev, vmsg);
|
||||
default:
|
||||
vmsg_close_fds(vmsg);
|
||||
vu_panic(dev, "Unhandled request: %d", vmsg->request);
|
||||
|
@ -1354,8 +1592,24 @@ vu_deinit(VuDev *dev)
|
|||
close(vq->err_fd);
|
||||
vq->err_fd = -1;
|
||||
}
|
||||
|
||||
if (vq->resubmit_list) {
|
||||
free(vq->resubmit_list);
|
||||
vq->resubmit_list = NULL;
|
||||
}
|
||||
|
||||
vq->inflight = NULL;
|
||||
}
|
||||
|
||||
if (dev->inflight_info.addr) {
|
||||
munmap(dev->inflight_info.addr, dev->inflight_info.size);
|
||||
dev->inflight_info.addr = NULL;
|
||||
}
|
||||
|
||||
if (dev->inflight_info.fd > 0) {
|
||||
close(dev->inflight_info.fd);
|
||||
dev->inflight_info.fd = -1;
|
||||
}
|
||||
|
||||
vu_close_log(dev);
|
||||
if (dev->slave_fd != -1) {
|
||||
|
@ -1682,20 +1936,6 @@ vu_queue_empty(VuDev *dev, VuVirtq *vq)
|
|||
return vring_avail_idx(vq) == vq->last_avail_idx;
|
||||
}
|
||||
|
||||
static inline
|
||||
bool has_feature(uint64_t features, unsigned int fbit)
|
||||
{
|
||||
assert(fbit < 64);
|
||||
return !!(features & (1ULL << fbit));
|
||||
}
|
||||
|
||||
static inline
|
||||
bool vu_has_feature(VuDev *dev,
|
||||
unsigned int fbit)
|
||||
{
|
||||
return has_feature(dev->features, fbit);
|
||||
}
|
||||
|
||||
static bool
|
||||
vring_notify(VuDev *dev, VuVirtq *vq)
|
||||
{
|
||||
|
@ -1824,12 +2064,6 @@ virtqueue_map_desc(VuDev *dev,
|
|||
*p_num_sg = num_sg;
|
||||
}
|
||||
|
||||
/* Round number down to multiple */
|
||||
#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
|
||||
|
||||
/* Round number up to multiple */
|
||||
#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
|
||||
|
||||
static void *
|
||||
virtqueue_alloc_element(size_t sz,
|
||||
unsigned out_num, unsigned in_num)
|
||||
|
@ -1930,9 +2164,68 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
|
|||
return elem;
|
||||
}
|
||||
|
||||
static int
|
||||
vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
|
||||
{
|
||||
if (!has_feature(dev->protocol_features,
|
||||
VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(!vq->inflight)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
vq->inflight->desc[desc_idx].counter = vq->counter++;
|
||||
vq->inflight->desc[desc_idx].inflight = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
|
||||
{
|
||||
if (!has_feature(dev->protocol_features,
|
||||
VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(!vq->inflight)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
vq->inflight->last_batch_head = desc_idx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
vu_queue_inflight_post_put(VuDev *dev, VuVirtq *vq, int desc_idx)
|
||||
{
|
||||
if (!has_feature(dev->protocol_features,
|
||||
VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(!vq->inflight)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
barrier();
|
||||
|
||||
vq->inflight->desc[desc_idx].inflight = 0;
|
||||
|
||||
barrier();
|
||||
|
||||
vq->inflight->used_idx = vq->used_idx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *
|
||||
vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
|
||||
{
|
||||
int i;
|
||||
unsigned int head;
|
||||
VuVirtqElement *elem;
|
||||
|
||||
|
@ -1941,6 +2234,18 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (unlikely(vq->resubmit_list && vq->resubmit_num > 0)) {
|
||||
i = (--vq->resubmit_num);
|
||||
elem = vu_queue_map_desc(dev, vq, vq->resubmit_list[i].index, sz);
|
||||
|
||||
if (!vq->resubmit_num) {
|
||||
free(vq->resubmit_list);
|
||||
vq->resubmit_list = NULL;
|
||||
}
|
||||
|
||||
return elem;
|
||||
}
|
||||
|
||||
if (vu_queue_empty(dev, vq)) {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1971,6 +2276,8 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
|
|||
|
||||
vq->inuse++;
|
||||
|
||||
vu_queue_inflight_get(dev, vq, head);
|
||||
|
||||
return elem;
|
||||
}
|
||||
|
||||
|
@ -2131,5 +2438,7 @@ vu_queue_push(VuDev *dev, VuVirtq *vq,
|
|||
const VuVirtqElement *elem, unsigned int len)
|
||||
{
|
||||
vu_queue_fill(dev, vq, elem, len, 0);
|
||||
vu_queue_inflight_pre_put(dev, vq, elem->index);
|
||||
vu_queue_flush(dev, vq, 1);
|
||||
vu_queue_inflight_post_put(dev, vq, elem->index);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue