mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-17 07:02:03 -06:00
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1 iQEcBAABAgAGBQJi18PHAAoJEO8Ells5jWIRCEQH+wepXDoT6Q56xmUgxVs+hlAD CXGy71/cNV08Yu3PTTXo8SYaw+KXxsA9ECgIr2hsfPXarAdoOpJFpZR0HoqIzaXd kpD6bvwN8bEEOlAHxKcb6/VM+VYntZBfkH9m1WLGx3fHILazLblyL8w2Hkp7NK9J IBpQQ63uU8Xt0+js96Z/sPOKRjrtbKXFT1bhY2CI8MKZpuqNyED0jZYwbNdnRwZN fuKbpsaaT4Wxx+mQMg7H7a0e/xx3DNi2F6cAtGLH98WYzbLFgExSSK8G8jnwEVfM EKWfU7N4zmokq7jN99yvGzjIzLrnLX6yn/ifSs+lQOzdtCA9zEbotI+CDCVdPs4= =9zus -----END PGP SIGNATURE----- Merge tag 'net-pull-request' of https://github.com/jasowang/qemu into staging # gpg: Signature made Wed 20 Jul 2022 09:58:47 BST # gpg: using RSA key EF04965B398D6211 # gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" [marginal] # gpg: WARNING: This key is not certified with sufficiently trusted signatures! # gpg: It is not certain that the signature belongs to the owner. # Primary key fingerprint: 215D 46F4 8246 689E C77F 3562 EF04 965B 398D 6211 * tag 'net-pull-request' of https://github.com/jasowang/qemu: (25 commits) net/colo.c: fix segmentation fault when packet is not parsed correctly net/colo.c: No need to track conn_list for filter-rewriter net/colo: Fix a "double free" crash to clear the conn_list softmmu/runstate.c: add RunStateTransition support form COLO to PRELAUNCH vdpa: Add x-svq to NetdevVhostVDPAOptions vdpa: Add device migration blocker vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs vdpa: Buffer CVQ support on shadow virtqueue vdpa: manual forward CVQ buffers vhost-net-vdpa: add stubs for when no virtio-net device is present vdpa: Export vhost_vdpa_dma_map and unmap calls vhost: Add svq avail_handler callback vhost: add vhost_svq_poll vhost: Expose vhost_svq_add vhost: add vhost_svq_push_elem vhost: Track number of descs in SVQDescState vhost: Add SVQDescState vhost: Decouple vhost_svq_add from VirtQueueElement vhost: Check for queue full at vhost_svq_add vhost: Move vhost_svq_kick call to vhost_svq_add ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
8ec4bc3c8c
15 changed files with 675 additions and 128 deletions
|
@ -49,7 +49,6 @@
|
||||||
|
|
||||||
#define VIRTIO_NET_VM_VERSION 11
|
#define VIRTIO_NET_VM_VERSION 11
|
||||||
|
|
||||||
#define MAC_TABLE_ENTRIES 64
|
|
||||||
#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
|
#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
|
||||||
|
|
||||||
/* previously fixed value */
|
/* previously fixed value */
|
||||||
|
@ -1434,57 +1433,71 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
|
||||||
return VIRTIO_NET_OK;
|
return VIRTIO_NET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
|
size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
|
||||||
|
const struct iovec *in_sg, unsigned in_num,
|
||||||
|
const struct iovec *out_sg,
|
||||||
|
unsigned out_num)
|
||||||
{
|
{
|
||||||
VirtIONet *n = VIRTIO_NET(vdev);
|
VirtIONet *n = VIRTIO_NET(vdev);
|
||||||
struct virtio_net_ctrl_hdr ctrl;
|
struct virtio_net_ctrl_hdr ctrl;
|
||||||
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
|
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
|
||||||
VirtQueueElement *elem;
|
|
||||||
size_t s;
|
size_t s;
|
||||||
struct iovec *iov, *iov2;
|
struct iovec *iov, *iov2;
|
||||||
unsigned int iov_cnt;
|
|
||||||
|
if (iov_size(in_sg, in_num) < sizeof(status) ||
|
||||||
|
iov_size(out_sg, out_num) < sizeof(ctrl)) {
|
||||||
|
virtio_error(vdev, "virtio-net ctrl missing headers");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
|
||||||
|
s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
|
||||||
|
iov_discard_front(&iov, &out_num, sizeof(ctrl));
|
||||||
|
if (s != sizeof(ctrl)) {
|
||||||
|
status = VIRTIO_NET_ERR;
|
||||||
|
} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
|
||||||
|
status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
|
||||||
|
} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
|
||||||
|
status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
|
||||||
|
} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
|
||||||
|
status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
|
||||||
|
} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
|
||||||
|
status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
|
||||||
|
} else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
|
||||||
|
status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
|
||||||
|
} else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
|
||||||
|
status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
|
||||||
|
}
|
||||||
|
|
||||||
|
s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
|
||||||
|
assert(s == sizeof(status));
|
||||||
|
|
||||||
|
g_free(iov2);
|
||||||
|
return sizeof(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
|
||||||
|
{
|
||||||
|
VirtQueueElement *elem;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
size_t written;
|
||||||
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
|
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
|
||||||
if (!elem) {
|
if (!elem) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
|
|
||||||
iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
|
written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
|
||||||
virtio_error(vdev, "virtio-net ctrl missing headers");
|
elem->out_sg, elem->out_num);
|
||||||
|
if (written > 0) {
|
||||||
|
virtqueue_push(vq, elem, written);
|
||||||
|
virtio_notify(vdev, vq);
|
||||||
|
g_free(elem);
|
||||||
|
} else {
|
||||||
virtqueue_detach_element(vq, elem, 0);
|
virtqueue_detach_element(vq, elem, 0);
|
||||||
g_free(elem);
|
g_free(elem);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
iov_cnt = elem->out_num;
|
|
||||||
iov2 = iov = g_memdup2(elem->out_sg,
|
|
||||||
sizeof(struct iovec) * elem->out_num);
|
|
||||||
s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
|
|
||||||
iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
|
|
||||||
if (s != sizeof(ctrl)) {
|
|
||||||
status = VIRTIO_NET_ERR;
|
|
||||||
} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
|
|
||||||
status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
|
|
||||||
} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
|
|
||||||
status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
|
|
||||||
} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
|
|
||||||
status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
|
|
||||||
} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
|
|
||||||
status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
|
|
||||||
} else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
|
|
||||||
status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
|
|
||||||
} else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
|
|
||||||
status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
|
|
||||||
}
|
|
||||||
|
|
||||||
s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
|
|
||||||
assert(s == sizeof(status));
|
|
||||||
|
|
||||||
virtqueue_push(vq, elem, sizeof(status));
|
|
||||||
virtio_notify(vdev, vq);
|
|
||||||
g_free(iov2);
|
|
||||||
g_free(elem);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -122,17 +122,35 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
|
/**
|
||||||
const struct iovec *iovec, size_t num,
|
* Write descriptors to SVQ vring
|
||||||
bool more_descs, bool write)
|
*
|
||||||
|
* @svq: The shadow virtqueue
|
||||||
|
* @sg: Cache for hwaddr
|
||||||
|
* @iovec: The iovec from the guest
|
||||||
|
* @num: iovec length
|
||||||
|
* @more_descs: True if more descriptors come in the chain
|
||||||
|
* @write: True if they are writeable descriptors
|
||||||
|
*
|
||||||
|
* Return true if success, false otherwise and print error.
|
||||||
|
*/
|
||||||
|
static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
|
||||||
|
const struct iovec *iovec, size_t num,
|
||||||
|
bool more_descs, bool write)
|
||||||
{
|
{
|
||||||
uint16_t i = svq->free_head, last = svq->free_head;
|
uint16_t i = svq->free_head, last = svq->free_head;
|
||||||
unsigned n;
|
unsigned n;
|
||||||
uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
|
uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
|
||||||
vring_desc_t *descs = svq->vring.desc;
|
vring_desc_t *descs = svq->vring.desc;
|
||||||
|
bool ok;
|
||||||
|
|
||||||
if (num == 0) {
|
if (num == 0) {
|
||||||
return;
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ok = vhost_svq_translate_addr(svq, sg, iovec, num);
|
||||||
|
if (unlikely(!ok)) {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (n = 0; n < num; n++) {
|
for (n = 0; n < num; n++) {
|
||||||
|
@ -150,39 +168,38 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
|
||||||
}
|
}
|
||||||
|
|
||||||
svq->free_head = le16_to_cpu(svq->desc_next[last]);
|
svq->free_head = le16_to_cpu(svq->desc_next[last]);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
|
static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
|
||||||
VirtQueueElement *elem, unsigned *head)
|
const struct iovec *out_sg, size_t out_num,
|
||||||
|
const struct iovec *in_sg, size_t in_num,
|
||||||
|
unsigned *head)
|
||||||
{
|
{
|
||||||
unsigned avail_idx;
|
unsigned avail_idx;
|
||||||
vring_avail_t *avail = svq->vring.avail;
|
vring_avail_t *avail = svq->vring.avail;
|
||||||
bool ok;
|
bool ok;
|
||||||
g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
|
g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
|
||||||
|
|
||||||
*head = svq->free_head;
|
*head = svq->free_head;
|
||||||
|
|
||||||
/* We need some descriptors here */
|
/* We need some descriptors here */
|
||||||
if (unlikely(!elem->out_num && !elem->in_num)) {
|
if (unlikely(!out_num && !in_num)) {
|
||||||
qemu_log_mask(LOG_GUEST_ERROR,
|
qemu_log_mask(LOG_GUEST_ERROR,
|
||||||
"Guest provided element with no descriptors");
|
"Guest provided element with no descriptors");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
|
ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
|
||||||
if (unlikely(!ok)) {
|
false);
|
||||||
return false;
|
|
||||||
}
|
|
||||||
vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
|
|
||||||
elem->in_num > 0, false);
|
|
||||||
|
|
||||||
|
|
||||||
ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
|
|
||||||
if (unlikely(!ok)) {
|
if (unlikely(!ok)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
|
ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
|
||||||
|
if (unlikely(!ok)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Put the entry in the available array (but don't update avail->idx until
|
* Put the entry in the available array (but don't update avail->idx until
|
||||||
|
@ -199,26 +216,6 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Add an element to a SVQ.
|
|
||||||
*
|
|
||||||
* The caller must check that there is enough slots for the new element. It
|
|
||||||
* takes ownership of the element: In case of failure, it is free and the SVQ
|
|
||||||
* is considered broken.
|
|
||||||
*/
|
|
||||||
static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
|
|
||||||
{
|
|
||||||
unsigned qemu_head;
|
|
||||||
bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
|
|
||||||
if (unlikely(!ok)) {
|
|
||||||
g_free(elem);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
svq->ring_id_maps[qemu_head] = elem;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void vhost_svq_kick(VhostShadowVirtqueue *svq)
|
static void vhost_svq_kick(VhostShadowVirtqueue *svq)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -233,6 +230,46 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
|
||||||
event_notifier_set(&svq->hdev_kick);
|
event_notifier_set(&svq->hdev_kick);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add an element to a SVQ.
|
||||||
|
*
|
||||||
|
* The caller must check that there is enough slots for the new element. It
|
||||||
|
* takes ownership of the element: In case of failure not ENOSPC, it is free.
|
||||||
|
*
|
||||||
|
* Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
|
||||||
|
*/
|
||||||
|
int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
|
||||||
|
size_t out_num, const struct iovec *in_sg, size_t in_num,
|
||||||
|
VirtQueueElement *elem)
|
||||||
|
{
|
||||||
|
unsigned qemu_head;
|
||||||
|
unsigned ndescs = in_num + out_num;
|
||||||
|
bool ok;
|
||||||
|
|
||||||
|
if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
|
||||||
|
return -ENOSPC;
|
||||||
|
}
|
||||||
|
|
||||||
|
ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
|
||||||
|
if (unlikely(!ok)) {
|
||||||
|
g_free(elem);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
svq->desc_state[qemu_head].elem = elem;
|
||||||
|
svq->desc_state[qemu_head].ndescs = ndescs;
|
||||||
|
vhost_svq_kick(svq);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convenience wrapper to add a guest's element to SVQ */
|
||||||
|
static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
|
||||||
|
VirtQueueElement *elem)
|
||||||
|
{
|
||||||
|
return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg,
|
||||||
|
elem->in_num, elem);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Forward available buffers.
|
* Forward available buffers.
|
||||||
*
|
*
|
||||||
|
@ -257,7 +294,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
VirtQueueElement *elem;
|
VirtQueueElement *elem;
|
||||||
bool ok;
|
int r;
|
||||||
|
|
||||||
if (svq->next_guest_avail_elem) {
|
if (svq->next_guest_avail_elem) {
|
||||||
elem = g_steal_pointer(&svq->next_guest_avail_elem);
|
elem = g_steal_pointer(&svq->next_guest_avail_elem);
|
||||||
|
@ -269,28 +306,30 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) {
|
if (svq->ops) {
|
||||||
/*
|
r = svq->ops->avail_handler(svq, elem, svq->ops_opaque);
|
||||||
* This condition is possible since a contiguous buffer in GPA
|
} else {
|
||||||
* does not imply a contiguous buffer in qemu's VA
|
r = vhost_svq_add_element(svq, elem);
|
||||||
* scatter-gather segments. If that happens, the buffer exposed
|
|
||||||
* to the device needs to be a chain of descriptors at this
|
|
||||||
* moment.
|
|
||||||
*
|
|
||||||
* SVQ cannot hold more available buffers if we are here:
|
|
||||||
* queue the current guest descriptor and ignore further kicks
|
|
||||||
* until some elements are used.
|
|
||||||
*/
|
|
||||||
svq->next_guest_avail_elem = elem;
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
if (unlikely(r != 0)) {
|
||||||
|
if (r == -ENOSPC) {
|
||||||
|
/*
|
||||||
|
* This condition is possible since a contiguous buffer in
|
||||||
|
* GPA does not imply a contiguous buffer in qemu's VA
|
||||||
|
* scatter-gather segments. If that happens, the buffer
|
||||||
|
* exposed to the device needs to be a chain of descriptors
|
||||||
|
* at this moment.
|
||||||
|
*
|
||||||
|
* SVQ cannot hold more available buffers if we are here:
|
||||||
|
* queue the current guest descriptor and ignore kicks
|
||||||
|
* until some elements are used.
|
||||||
|
*/
|
||||||
|
svq->next_guest_avail_elem = elem;
|
||||||
|
}
|
||||||
|
|
||||||
ok = vhost_svq_add(svq, elem);
|
/* VQ is full or broken, just return and ignore kicks */
|
||||||
if (unlikely(!ok)) {
|
|
||||||
/* VQ is broken, just return and ignore any other kicks */
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
vhost_svq_kick(svq);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
virtio_queue_set_notification(svq->vq, true);
|
virtio_queue_set_notification(svq->vq, true);
|
||||||
|
@ -311,11 +350,12 @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n)
|
||||||
|
|
||||||
static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
|
static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
|
||||||
{
|
{
|
||||||
|
uint16_t *used_idx = &svq->vring.used->idx;
|
||||||
if (svq->last_used_idx != svq->shadow_used_idx) {
|
if (svq->last_used_idx != svq->shadow_used_idx) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx);
|
svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx);
|
||||||
|
|
||||||
return svq->last_used_idx != svq->shadow_used_idx;
|
return svq->last_used_idx != svq->shadow_used_idx;
|
||||||
}
|
}
|
||||||
|
@ -376,21 +416,36 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(!svq->ring_id_maps[used_elem.id])) {
|
if (unlikely(!svq->desc_state[used_elem.id].elem)) {
|
||||||
qemu_log_mask(LOG_GUEST_ERROR,
|
qemu_log_mask(LOG_GUEST_ERROR,
|
||||||
"Device %s says index %u is used, but it was not available",
|
"Device %s says index %u is used, but it was not available",
|
||||||
svq->vdev->name, used_elem.id);
|
svq->vdev->name, used_elem.id);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
num = svq->ring_id_maps[used_elem.id]->in_num +
|
num = svq->desc_state[used_elem.id].ndescs;
|
||||||
svq->ring_id_maps[used_elem.id]->out_num;
|
|
||||||
last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
|
last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
|
||||||
svq->desc_next[last_used_chain] = svq->free_head;
|
svq->desc_next[last_used_chain] = svq->free_head;
|
||||||
svq->free_head = used_elem.id;
|
svq->free_head = used_elem.id;
|
||||||
|
|
||||||
*len = used_elem.len;
|
*len = used_elem.len;
|
||||||
return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
|
return g_steal_pointer(&svq->desc_state[used_elem.id].elem);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Push an element to SVQ, returning it to the guest.
|
||||||
|
*/
|
||||||
|
void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
|
||||||
|
const VirtQueueElement *elem, uint32_t len)
|
||||||
|
{
|
||||||
|
virtqueue_push(svq->vq, elem, len);
|
||||||
|
if (svq->next_guest_avail_elem) {
|
||||||
|
/*
|
||||||
|
* Avail ring was full when vhost_svq_flush was called, so it's a
|
||||||
|
* good moment to make more descriptors available if possible.
|
||||||
|
*/
|
||||||
|
vhost_handle_guest_kick(svq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vhost_svq_flush(VhostShadowVirtqueue *svq,
|
static void vhost_svq_flush(VhostShadowVirtqueue *svq,
|
||||||
|
@ -434,6 +489,33 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
|
||||||
} while (!vhost_svq_enable_notification(svq));
|
} while (!vhost_svq_enable_notification(svq));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Poll the SVQ for one device used buffer.
|
||||||
|
*
|
||||||
|
* This function race with main event loop SVQ polling, so extra
|
||||||
|
* synchronization is needed.
|
||||||
|
*
|
||||||
|
* Return the length written by the device.
|
||||||
|
*/
|
||||||
|
size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
|
||||||
|
{
|
||||||
|
int64_t start_us = g_get_monotonic_time();
|
||||||
|
do {
|
||||||
|
uint32_t len;
|
||||||
|
VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
|
||||||
|
if (elem) {
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(g_get_monotonic_time() - start_us > 10e6)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make sure we read new used_idx */
|
||||||
|
smp_rmb();
|
||||||
|
} while (true);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Forward used buffers.
|
* Forward used buffers.
|
||||||
*
|
*
|
||||||
|
@ -560,7 +642,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
|
||||||
memset(svq->vring.desc, 0, driver_size);
|
memset(svq->vring.desc, 0, driver_size);
|
||||||
svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size);
|
svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size);
|
||||||
memset(svq->vring.used, 0, device_size);
|
memset(svq->vring.used, 0, device_size);
|
||||||
svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
|
svq->desc_state = g_new0(SVQDescState, svq->vring.num);
|
||||||
svq->desc_next = g_new0(uint16_t, svq->vring.num);
|
svq->desc_next = g_new0(uint16_t, svq->vring.num);
|
||||||
for (unsigned i = 0; i < svq->vring.num - 1; i++) {
|
for (unsigned i = 0; i < svq->vring.num - 1; i++) {
|
||||||
svq->desc_next[i] = cpu_to_le16(i + 1);
|
svq->desc_next[i] = cpu_to_le16(i + 1);
|
||||||
|
@ -585,7 +667,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
|
||||||
|
|
||||||
for (unsigned i = 0; i < svq->vring.num; ++i) {
|
for (unsigned i = 0; i < svq->vring.num; ++i) {
|
||||||
g_autofree VirtQueueElement *elem = NULL;
|
g_autofree VirtQueueElement *elem = NULL;
|
||||||
elem = g_steal_pointer(&svq->ring_id_maps[i]);
|
elem = g_steal_pointer(&svq->desc_state[i].elem);
|
||||||
if (elem) {
|
if (elem) {
|
||||||
virtqueue_detach_element(svq->vq, elem, 0);
|
virtqueue_detach_element(svq->vq, elem, 0);
|
||||||
}
|
}
|
||||||
|
@ -597,7 +679,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
|
||||||
}
|
}
|
||||||
svq->vq = NULL;
|
svq->vq = NULL;
|
||||||
g_free(svq->desc_next);
|
g_free(svq->desc_next);
|
||||||
g_free(svq->ring_id_maps);
|
g_free(svq->desc_state);
|
||||||
qemu_vfree(svq->vring.desc);
|
qemu_vfree(svq->vring.desc);
|
||||||
qemu_vfree(svq->vring.used);
|
qemu_vfree(svq->vring.used);
|
||||||
}
|
}
|
||||||
|
@ -607,12 +689,16 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
|
||||||
* shadow methods and file descriptors.
|
* shadow methods and file descriptors.
|
||||||
*
|
*
|
||||||
* @iova_tree: Tree to perform descriptors translations
|
* @iova_tree: Tree to perform descriptors translations
|
||||||
|
* @ops: SVQ owner callbacks
|
||||||
|
* @ops_opaque: ops opaque pointer
|
||||||
*
|
*
|
||||||
* Returns the new virtqueue or NULL.
|
* Returns the new virtqueue or NULL.
|
||||||
*
|
*
|
||||||
* In case of error, reason is reported through error_report.
|
* In case of error, reason is reported through error_report.
|
||||||
*/
|
*/
|
||||||
VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
|
VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
|
||||||
|
const VhostShadowVirtqueueOps *ops,
|
||||||
|
void *ops_opaque)
|
||||||
{
|
{
|
||||||
g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
|
g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
|
||||||
int r;
|
int r;
|
||||||
|
@ -634,6 +720,8 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
|
||||||
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
|
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
|
||||||
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
|
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
|
||||||
svq->iova_tree = iova_tree;
|
svq->iova_tree = iova_tree;
|
||||||
|
svq->ops = ops;
|
||||||
|
svq->ops_opaque = ops_opaque;
|
||||||
return g_steal_pointer(&svq);
|
return g_steal_pointer(&svq);
|
||||||
|
|
||||||
err_init_hdev_call:
|
err_init_hdev_call:
|
||||||
|
|
|
@ -15,6 +15,37 @@
|
||||||
#include "standard-headers/linux/vhost_types.h"
|
#include "standard-headers/linux/vhost_types.h"
|
||||||
#include "hw/virtio/vhost-iova-tree.h"
|
#include "hw/virtio/vhost-iova-tree.h"
|
||||||
|
|
||||||
|
typedef struct SVQDescState {
|
||||||
|
VirtQueueElement *elem;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of descriptors exposed to the device. May or may not match
|
||||||
|
* guest's
|
||||||
|
*/
|
||||||
|
unsigned int ndescs;
|
||||||
|
} SVQDescState;
|
||||||
|
|
||||||
|
typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback to handle an avail buffer.
|
||||||
|
*
|
||||||
|
* @svq: Shadow virtqueue
|
||||||
|
* @elem: Element placed in the queue by the guest
|
||||||
|
* @vq_callback_opaque: Opaque
|
||||||
|
*
|
||||||
|
* Returns 0 if the vq is running as expected.
|
||||||
|
*
|
||||||
|
* Note that ownership of elem is transferred to the callback.
|
||||||
|
*/
|
||||||
|
typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq,
|
||||||
|
VirtQueueElement *elem,
|
||||||
|
void *vq_callback_opaque);
|
||||||
|
|
||||||
|
typedef struct VhostShadowVirtqueueOps {
|
||||||
|
VirtQueueAvailCallback avail_handler;
|
||||||
|
} VhostShadowVirtqueueOps;
|
||||||
|
|
||||||
/* Shadow virtqueue to relay notifications */
|
/* Shadow virtqueue to relay notifications */
|
||||||
typedef struct VhostShadowVirtqueue {
|
typedef struct VhostShadowVirtqueue {
|
||||||
/* Shadow vring */
|
/* Shadow vring */
|
||||||
|
@ -47,8 +78,8 @@ typedef struct VhostShadowVirtqueue {
|
||||||
/* IOVA mapping */
|
/* IOVA mapping */
|
||||||
VhostIOVATree *iova_tree;
|
VhostIOVATree *iova_tree;
|
||||||
|
|
||||||
/* Map for use the guest's descriptors */
|
/* SVQ vring descriptors state */
|
||||||
VirtQueueElement **ring_id_maps;
|
SVQDescState *desc_state;
|
||||||
|
|
||||||
/* Next VirtQueue element that guest made available */
|
/* Next VirtQueue element that guest made available */
|
||||||
VirtQueueElement *next_guest_avail_elem;
|
VirtQueueElement *next_guest_avail_elem;
|
||||||
|
@ -59,6 +90,12 @@ typedef struct VhostShadowVirtqueue {
|
||||||
*/
|
*/
|
||||||
uint16_t *desc_next;
|
uint16_t *desc_next;
|
||||||
|
|
||||||
|
/* Caller callbacks */
|
||||||
|
const VhostShadowVirtqueueOps *ops;
|
||||||
|
|
||||||
|
/* Caller callbacks opaque */
|
||||||
|
void *ops_opaque;
|
||||||
|
|
||||||
/* Next head to expose to the device */
|
/* Next head to expose to the device */
|
||||||
uint16_t shadow_avail_idx;
|
uint16_t shadow_avail_idx;
|
||||||
|
|
||||||
|
@ -74,6 +111,13 @@ typedef struct VhostShadowVirtqueue {
|
||||||
|
|
||||||
bool vhost_svq_valid_features(uint64_t features, Error **errp);
|
bool vhost_svq_valid_features(uint64_t features, Error **errp);
|
||||||
|
|
||||||
|
void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
|
||||||
|
const VirtQueueElement *elem, uint32_t len);
|
||||||
|
int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
|
||||||
|
size_t out_num, const struct iovec *in_sg, size_t in_num,
|
||||||
|
VirtQueueElement *elem);
|
||||||
|
size_t vhost_svq_poll(VhostShadowVirtqueue *svq);
|
||||||
|
|
||||||
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
|
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
|
||||||
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
|
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
|
||||||
void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
|
void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
|
||||||
|
@ -85,7 +129,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
|
||||||
VirtQueue *vq);
|
VirtQueue *vq);
|
||||||
void vhost_svq_stop(VhostShadowVirtqueue *svq);
|
void vhost_svq_stop(VhostShadowVirtqueue *svq);
|
||||||
|
|
||||||
VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
|
VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
|
||||||
|
const VhostShadowVirtqueueOps *ops,
|
||||||
|
void *ops_opaque);
|
||||||
|
|
||||||
void vhost_svq_free(gpointer vq);
|
void vhost_svq_free(gpointer vq);
|
||||||
G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
|
G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include "hw/virtio/vhost-shadow-virtqueue.h"
|
#include "hw/virtio/vhost-shadow-virtqueue.h"
|
||||||
#include "hw/virtio/vhost-vdpa.h"
|
#include "hw/virtio/vhost-vdpa.h"
|
||||||
#include "exec/address-spaces.h"
|
#include "exec/address-spaces.h"
|
||||||
|
#include "migration/blocker.h"
|
||||||
#include "qemu/cutils.h"
|
#include "qemu/cutils.h"
|
||||||
#include "qemu/main-loop.h"
|
#include "qemu/main-loop.h"
|
||||||
#include "cpu.h"
|
#include "cpu.h"
|
||||||
|
@ -71,8 +72,8 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
|
int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
|
||||||
void *vaddr, bool readonly)
|
void *vaddr, bool readonly)
|
||||||
{
|
{
|
||||||
struct vhost_msg_v2 msg = {};
|
struct vhost_msg_v2 msg = {};
|
||||||
int fd = v->device_fd;
|
int fd = v->device_fd;
|
||||||
|
@ -97,8 +98,7 @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
|
int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size)
|
||||||
hwaddr size)
|
|
||||||
{
|
{
|
||||||
struct vhost_msg_v2 msg = {};
|
struct vhost_msg_v2 msg = {};
|
||||||
int fd = v->device_fd;
|
int fd = v->device_fd;
|
||||||
|
@ -418,8 +418,10 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
|
||||||
|
|
||||||
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
|
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
|
||||||
for (unsigned n = 0; n < hdev->nvqs; ++n) {
|
for (unsigned n = 0; n < hdev->nvqs; ++n) {
|
||||||
g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
|
g_autoptr(VhostShadowVirtqueue) svq;
|
||||||
|
|
||||||
|
svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
|
||||||
|
v->shadow_vq_ops_opaque);
|
||||||
if (unlikely(!svq)) {
|
if (unlikely(!svq)) {
|
||||||
error_setg(errp, "Cannot create svq %u", n);
|
error_setg(errp, "Cannot create svq %u", n);
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -1021,6 +1023,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (v->migration_blocker) {
|
||||||
|
int r = migrate_add_blocker(v->migration_blocker, &err);
|
||||||
|
if (unlikely(r < 0)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < v->shadow_vqs->len; ++i) {
|
for (i = 0; i < v->shadow_vqs->len; ++i) {
|
||||||
VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
|
VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
|
||||||
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
|
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
|
||||||
|
@ -1063,6 +1072,10 @@ err:
|
||||||
vhost_svq_stop(svq);
|
vhost_svq_stop(svq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (v->migration_blocker) {
|
||||||
|
migrate_del_blocker(v->migration_blocker);
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1082,6 +1095,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (v->migration_blocker) {
|
||||||
|
migrate_del_blocker(v->migration_blocker);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#include <gmodule.h>
|
#include <gmodule.h>
|
||||||
|
|
||||||
#include "hw/virtio/vhost-iova-tree.h"
|
#include "hw/virtio/vhost-iova-tree.h"
|
||||||
|
#include "hw/virtio/vhost-shadow-virtqueue.h"
|
||||||
#include "hw/virtio/virtio.h"
|
#include "hw/virtio/virtio.h"
|
||||||
#include "standard-headers/linux/vhost_types.h"
|
#include "standard-headers/linux/vhost_types.h"
|
||||||
|
|
||||||
|
@ -34,9 +35,16 @@ typedef struct vhost_vdpa {
|
||||||
bool shadow_vqs_enabled;
|
bool shadow_vqs_enabled;
|
||||||
/* IOVA mapping used by the Shadow Virtqueue */
|
/* IOVA mapping used by the Shadow Virtqueue */
|
||||||
VhostIOVATree *iova_tree;
|
VhostIOVATree *iova_tree;
|
||||||
|
Error *migration_blocker;
|
||||||
GPtrArray *shadow_vqs;
|
GPtrArray *shadow_vqs;
|
||||||
|
const VhostShadowVirtqueueOps *shadow_vq_ops;
|
||||||
|
void *shadow_vq_ops_opaque;
|
||||||
struct vhost_dev *dev;
|
struct vhost_dev *dev;
|
||||||
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
|
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
|
||||||
} VhostVDPA;
|
} VhostVDPA;
|
||||||
|
|
||||||
|
int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
|
||||||
|
void *vaddr, bool readonly);
|
||||||
|
int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -35,6 +35,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET)
|
||||||
* and latency. */
|
* and latency. */
|
||||||
#define TX_BURST 256
|
#define TX_BURST 256
|
||||||
|
|
||||||
|
/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */
|
||||||
|
#define MAC_TABLE_ENTRIES 64
|
||||||
|
|
||||||
typedef struct virtio_net_conf
|
typedef struct virtio_net_conf
|
||||||
{
|
{
|
||||||
uint32_t txtimer;
|
uint32_t txtimer;
|
||||||
|
@ -218,6 +221,10 @@ struct VirtIONet {
|
||||||
struct EBPFRSSContext ebpf_rss;
|
struct EBPFRSSContext ebpf_rss;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
|
||||||
|
const struct iovec *in_sg, unsigned in_num,
|
||||||
|
const struct iovec *out_sg,
|
||||||
|
unsigned out_num);
|
||||||
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
|
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
|
||||||
const char *type);
|
const char *type);
|
||||||
|
|
||||||
|
|
|
@ -1323,7 +1323,7 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
|
||||||
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
|
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
|
||||||
connection_key_equal,
|
connection_key_equal,
|
||||||
g_free,
|
g_free,
|
||||||
connection_destroy);
|
NULL);
|
||||||
|
|
||||||
colo_compare_iothread(s);
|
colo_compare_iothread(s);
|
||||||
|
|
||||||
|
|
11
net/colo.c
11
net/colo.c
|
@ -46,7 +46,14 @@ int parse_packet_early(Packet *pkt)
|
||||||
static const uint8_t vlan[] = {0x81, 0x00};
|
static const uint8_t vlan[] = {0x81, 0x00};
|
||||||
uint8_t *data = pkt->data + pkt->vnet_hdr_len;
|
uint8_t *data = pkt->data + pkt->vnet_hdr_len;
|
||||||
uint16_t l3_proto;
|
uint16_t l3_proto;
|
||||||
ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
|
ssize_t l2hdr_len;
|
||||||
|
|
||||||
|
if (data == NULL) {
|
||||||
|
trace_colo_proxy_main_vnet_info("This packet is not parsed correctly, "
|
||||||
|
"pkt->vnet_hdr_len", pkt->vnet_hdr_len);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
l2hdr_len = eth_get_l2_hdr_length(data);
|
||||||
|
|
||||||
if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) {
|
if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) {
|
||||||
trace_colo_proxy_main("pkt->size < ETH_HLEN");
|
trace_colo_proxy_main("pkt->size < ETH_HLEN");
|
||||||
|
@ -218,7 +225,7 @@ Connection *connection_get(GHashTable *connection_track_table,
|
||||||
/*
|
/*
|
||||||
* clear the conn_list
|
* clear the conn_list
|
||||||
*/
|
*/
|
||||||
while (!g_queue_is_empty(conn_list)) {
|
while (conn_list && !g_queue_is_empty(conn_list)) {
|
||||||
connection_destroy(g_queue_pop_head(conn_list));
|
connection_destroy(g_queue_pop_head(conn_list));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -383,7 +383,7 @@ static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
|
||||||
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
|
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
|
||||||
connection_key_equal,
|
connection_key_equal,
|
||||||
g_free,
|
g_free,
|
||||||
connection_destroy);
|
NULL);
|
||||||
s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
|
s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,8 @@ endif
|
||||||
softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix))
|
softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix))
|
||||||
softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c'))
|
softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c'))
|
||||||
if have_vhost_net_vdpa
|
if have_vhost_net_vdpa
|
||||||
softmmu_ss.add(files('vhost-vdpa.c'))
|
softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-vdpa.c'), if_false: files('vhost-vdpa-stub.c'))
|
||||||
|
softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-vdpa-stub.c'))
|
||||||
endif
|
endif
|
||||||
|
|
||||||
vmnet_files = files(
|
vmnet_files = files(
|
||||||
|
|
|
@ -9,6 +9,7 @@ vhost_user_event(const char *chr, int event) "chr: %s got event: %d"
|
||||||
|
|
||||||
# colo.c
|
# colo.c
|
||||||
colo_proxy_main(const char *chr) ": %s"
|
colo_proxy_main(const char *chr) ": %s"
|
||||||
|
colo_proxy_main_vnet_info(const char *sta, int size) ": %s = %d"
|
||||||
|
|
||||||
# colo-compare.c
|
# colo-compare.c
|
||||||
colo_compare_main(const char *chr) ": %s"
|
colo_compare_main(const char *chr) ": %s"
|
||||||
|
|
21
net/vhost-vdpa-stub.c
Normal file
21
net/vhost-vdpa-stub.c
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
/*
|
||||||
|
* vhost-vdpa-stub.c
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Red Hat, Inc.
|
||||||
|
*
|
||||||
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||||
|
* See the COPYING file in the top-level directory.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
#include "clients.h"
|
||||||
|
#include "net/vhost-vdpa.h"
|
||||||
|
#include "qapi/error.h"
|
||||||
|
|
||||||
|
int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
|
||||||
|
NetClientState *peer, Error **errp)
|
||||||
|
{
|
||||||
|
error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
|
||||||
|
return -1;
|
||||||
|
}
|
357
net/vhost-vdpa.c
357
net/vhost-vdpa.c
|
@ -11,11 +11,14 @@
|
||||||
|
|
||||||
#include "qemu/osdep.h"
|
#include "qemu/osdep.h"
|
||||||
#include "clients.h"
|
#include "clients.h"
|
||||||
|
#include "hw/virtio/virtio-net.h"
|
||||||
#include "net/vhost_net.h"
|
#include "net/vhost_net.h"
|
||||||
#include "net/vhost-vdpa.h"
|
#include "net/vhost-vdpa.h"
|
||||||
#include "hw/virtio/vhost-vdpa.h"
|
#include "hw/virtio/vhost-vdpa.h"
|
||||||
#include "qemu/config-file.h"
|
#include "qemu/config-file.h"
|
||||||
#include "qemu/error-report.h"
|
#include "qemu/error-report.h"
|
||||||
|
#include "qemu/log.h"
|
||||||
|
#include "qemu/memalign.h"
|
||||||
#include "qemu/option.h"
|
#include "qemu/option.h"
|
||||||
#include "qapi/error.h"
|
#include "qapi/error.h"
|
||||||
#include <linux/vhost.h>
|
#include <linux/vhost.h>
|
||||||
|
@ -30,6 +33,9 @@ typedef struct VhostVDPAState {
|
||||||
NetClientState nc;
|
NetClientState nc;
|
||||||
struct vhost_vdpa vhost_vdpa;
|
struct vhost_vdpa vhost_vdpa;
|
||||||
VHostNetState *vhost_net;
|
VHostNetState *vhost_net;
|
||||||
|
|
||||||
|
/* Control commands shadow buffers */
|
||||||
|
void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
|
||||||
bool started;
|
bool started;
|
||||||
} VhostVDPAState;
|
} VhostVDPAState;
|
||||||
|
|
||||||
|
@ -69,6 +75,28 @@ const int vdpa_feature_bits[] = {
|
||||||
VHOST_INVALID_FEATURE_BIT
|
VHOST_INVALID_FEATURE_BIT
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** Supported device specific feature bits with SVQ */
|
||||||
|
static const uint64_t vdpa_svq_device_features =
|
||||||
|
BIT_ULL(VIRTIO_NET_F_CSUM) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_MTU) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_MAC) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_STATUS) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
|
||||||
|
BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
|
||||||
|
BIT_ULL(VIRTIO_NET_F_STANDBY);
|
||||||
|
|
||||||
VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
|
VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
|
||||||
{
|
{
|
||||||
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
|
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
|
||||||
|
@ -127,7 +155,13 @@ err_init:
|
||||||
static void vhost_vdpa_cleanup(NetClientState *nc)
|
static void vhost_vdpa_cleanup(NetClientState *nc)
|
||||||
{
|
{
|
||||||
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
|
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
|
||||||
|
struct vhost_dev *dev = &s->vhost_net->dev;
|
||||||
|
|
||||||
|
qemu_vfree(s->cvq_cmd_out_buffer);
|
||||||
|
qemu_vfree(s->cvq_cmd_in_buffer);
|
||||||
|
if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
|
||||||
|
g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
|
||||||
|
}
|
||||||
if (s->vhost_net) {
|
if (s->vhost_net) {
|
||||||
vhost_net_cleanup(s->vhost_net);
|
vhost_net_cleanup(s->vhost_net);
|
||||||
g_free(s->vhost_net);
|
g_free(s->vhost_net);
|
||||||
|
@ -187,13 +221,251 @@ static NetClientInfo net_vhost_vdpa_info = {
|
||||||
.check_peer_type = vhost_vdpa_check_peer_type,
|
.check_peer_type = vhost_vdpa_check_peer_type,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
|
||||||
|
{
|
||||||
|
VhostIOVATree *tree = v->iova_tree;
|
||||||
|
DMAMap needle = {
|
||||||
|
/*
|
||||||
|
* No need to specify size or to look for more translations since
|
||||||
|
* this contiguous chunk was allocated by us.
|
||||||
|
*/
|
||||||
|
.translated_addr = (hwaddr)(uintptr_t)addr,
|
||||||
|
};
|
||||||
|
const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if (unlikely(!map)) {
|
||||||
|
error_report("Cannot locate expected map");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
|
||||||
|
if (unlikely(r != 0)) {
|
||||||
|
error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
|
||||||
|
}
|
||||||
|
|
||||||
|
vhost_iova_tree_remove(tree, map);
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t vhost_vdpa_net_cvq_cmd_len(void)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
|
||||||
|
* In buffer is always 1 byte, so it should fit here
|
||||||
|
*/
|
||||||
|
return sizeof(struct virtio_net_ctrl_hdr) +
|
||||||
|
2 * sizeof(struct virtio_net_ctrl_mac) +
|
||||||
|
MAC_TABLE_ENTRIES * ETH_ALEN;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
|
||||||
|
{
|
||||||
|
return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Copy and map a guest buffer. */
|
||||||
|
static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
|
||||||
|
const struct iovec *out_data,
|
||||||
|
size_t out_num, size_t data_len, void *buf,
|
||||||
|
size_t *written, bool write)
|
||||||
|
{
|
||||||
|
DMAMap map = {};
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if (unlikely(!data_len)) {
|
||||||
|
qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
|
||||||
|
__func__, write ? "in" : "out");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
*written = iov_to_buf(out_data, out_num, 0, buf, data_len);
|
||||||
|
map.translated_addr = (hwaddr)(uintptr_t)buf;
|
||||||
|
map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
|
||||||
|
map.perm = write ? IOMMU_RW : IOMMU_RO,
|
||||||
|
r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
|
||||||
|
if (unlikely(r != IOVA_OK)) {
|
||||||
|
error_report("Cannot map injected element");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
|
||||||
|
!write);
|
||||||
|
if (unlikely(r < 0)) {
|
||||||
|
goto dma_map_err;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
|
dma_map_err:
|
||||||
|
vhost_iova_tree_remove(v->iova_tree, &map);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy the guest element into a dedicated buffer suitable to be sent to NIC
|
||||||
|
*
|
||||||
|
* @iov: [0] is the out buffer, [1] is the in one
|
||||||
|
*/
|
||||||
|
static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
|
||||||
|
VirtQueueElement *elem,
|
||||||
|
struct iovec *iov)
|
||||||
|
{
|
||||||
|
size_t in_copied;
|
||||||
|
bool ok;
|
||||||
|
|
||||||
|
iov[0].iov_base = s->cvq_cmd_out_buffer;
|
||||||
|
ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
|
||||||
|
vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
|
||||||
|
&iov[0].iov_len, false);
|
||||||
|
if (unlikely(!ok)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
iov[1].iov_base = s->cvq_cmd_in_buffer;
|
||||||
|
ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
|
||||||
|
sizeof(virtio_net_ctrl_ack), iov[1].iov_base,
|
||||||
|
&in_copied, true);
|
||||||
|
if (unlikely(!ok)) {
|
||||||
|
vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do not forward commands not supported by SVQ. Otherwise, the device could
|
||||||
|
* accept it and qemu would not know how to update the device model.
|
||||||
|
*/
|
||||||
|
static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out,
|
||||||
|
size_t out_num)
|
||||||
|
{
|
||||||
|
struct virtio_net_ctrl_hdr ctrl;
|
||||||
|
size_t n;
|
||||||
|
|
||||||
|
n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl));
|
||||||
|
if (unlikely(n < sizeof(ctrl))) {
|
||||||
|
qemu_log_mask(LOG_GUEST_ERROR,
|
||||||
|
"%s: invalid legnth of out buffer %zu\n", __func__, n);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (ctrl.class) {
|
||||||
|
case VIRTIO_NET_CTRL_MAC:
|
||||||
|
switch (ctrl.cmd) {
|
||||||
|
case VIRTIO_NET_CTRL_MAC_ADDR_SET:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
|
||||||
|
__func__, ctrl.cmd);
|
||||||
|
};
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
|
||||||
|
__func__, ctrl.class);
|
||||||
|
};
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate and copy control virtqueue commands.
|
||||||
|
*
|
||||||
|
* Following QEMU guidelines, we offer a copy of the buffers to the device to
|
||||||
|
* prevent TOCTOU bugs.
|
||||||
|
*/
|
||||||
|
static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
|
||||||
|
VirtQueueElement *elem,
|
||||||
|
void *opaque)
|
||||||
|
{
|
||||||
|
VhostVDPAState *s = opaque;
|
||||||
|
size_t in_len, dev_written;
|
||||||
|
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
|
||||||
|
/* out and in buffers sent to the device */
|
||||||
|
struct iovec dev_buffers[2] = {
|
||||||
|
{ .iov_base = s->cvq_cmd_out_buffer },
|
||||||
|
{ .iov_base = s->cvq_cmd_in_buffer },
|
||||||
|
};
|
||||||
|
/* in buffer used for device model */
|
||||||
|
const struct iovec in = {
|
||||||
|
.iov_base = &status,
|
||||||
|
.iov_len = sizeof(status),
|
||||||
|
};
|
||||||
|
int r = -EINVAL;
|
||||||
|
bool ok;
|
||||||
|
|
||||||
|
ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
|
||||||
|
if (unlikely(!ok)) {
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1);
|
||||||
|
if (unlikely(!ok)) {
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
|
||||||
|
if (unlikely(r != 0)) {
|
||||||
|
if (unlikely(r == -ENOSPC)) {
|
||||||
|
qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
|
||||||
|
__func__);
|
||||||
|
}
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We can poll here since we've had BQL from the time we sent the
|
||||||
|
* descriptor. Also, we need to take the answer before SVQ pulls by itself,
|
||||||
|
* when BQL is released
|
||||||
|
*/
|
||||||
|
dev_written = vhost_svq_poll(svq);
|
||||||
|
if (unlikely(dev_written < sizeof(status))) {
|
||||||
|
error_report("Insufficient written data (%zu)", dev_written);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
|
||||||
|
if (status != VIRTIO_NET_OK) {
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = VIRTIO_NET_ERR;
|
||||||
|
virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
|
||||||
|
if (status != VIRTIO_NET_OK) {
|
||||||
|
error_report("Bad CVQ processing in model");
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
|
||||||
|
sizeof(status));
|
||||||
|
if (unlikely(in_len < sizeof(status))) {
|
||||||
|
error_report("Bad device CVQ written length");
|
||||||
|
}
|
||||||
|
vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
|
||||||
|
g_free(elem);
|
||||||
|
if (dev_buffers[0].iov_base) {
|
||||||
|
vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base);
|
||||||
|
}
|
||||||
|
if (dev_buffers[1].iov_base) {
|
||||||
|
vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
|
||||||
|
.avail_handler = vhost_vdpa_net_handle_ctrl_avail,
|
||||||
|
};
|
||||||
|
|
||||||
static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
|
static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
|
||||||
const char *device,
|
const char *device,
|
||||||
const char *name,
|
const char *name,
|
||||||
int vdpa_device_fd,
|
int vdpa_device_fd,
|
||||||
int queue_pair_index,
|
int queue_pair_index,
|
||||||
int nvqs,
|
int nvqs,
|
||||||
bool is_datapath)
|
bool is_datapath,
|
||||||
|
bool svq,
|
||||||
|
VhostIOVATree *iova_tree)
|
||||||
{
|
{
|
||||||
NetClientState *nc = NULL;
|
NetClientState *nc = NULL;
|
||||||
VhostVDPAState *s;
|
VhostVDPAState *s;
|
||||||
|
@ -211,6 +483,21 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
|
||||||
|
|
||||||
s->vhost_vdpa.device_fd = vdpa_device_fd;
|
s->vhost_vdpa.device_fd = vdpa_device_fd;
|
||||||
s->vhost_vdpa.index = queue_pair_index;
|
s->vhost_vdpa.index = queue_pair_index;
|
||||||
|
s->vhost_vdpa.shadow_vqs_enabled = svq;
|
||||||
|
s->vhost_vdpa.iova_tree = iova_tree;
|
||||||
|
if (!is_datapath) {
|
||||||
|
s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
|
||||||
|
vhost_vdpa_net_cvq_cmd_page_len());
|
||||||
|
memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
|
||||||
|
s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(),
|
||||||
|
vhost_vdpa_net_cvq_cmd_page_len());
|
||||||
|
memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
|
||||||
|
|
||||||
|
s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
|
||||||
|
s->vhost_vdpa.shadow_vq_ops_opaque = s;
|
||||||
|
error_setg(&s->vhost_vdpa.migration_blocker,
|
||||||
|
"Migration disabled: vhost-vdpa uses CVQ.");
|
||||||
|
}
|
||||||
ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
|
ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
qemu_del_net_client(nc);
|
qemu_del_net_client(nc);
|
||||||
|
@ -219,20 +506,32 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
|
||||||
return nc;
|
return nc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp)
|
static int vhost_vdpa_get_iova_range(int fd,
|
||||||
|
struct vhost_vdpa_iova_range *iova_range)
|
||||||
|
{
|
||||||
|
int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
|
||||||
|
|
||||||
|
return ret < 0 ? -errno : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
|
||||||
|
{
|
||||||
|
int ret = ioctl(fd, VHOST_GET_FEATURES, features);
|
||||||
|
if (unlikely(ret < 0)) {
|
||||||
|
error_setg_errno(errp, errno,
|
||||||
|
"Fail to query features from vhost-vDPA device");
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
|
||||||
|
int *has_cvq, Error **errp)
|
||||||
{
|
{
|
||||||
unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
|
unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
|
||||||
g_autofree struct vhost_vdpa_config *config = NULL;
|
g_autofree struct vhost_vdpa_config *config = NULL;
|
||||||
__virtio16 *max_queue_pairs;
|
__virtio16 *max_queue_pairs;
|
||||||
uint64_t features;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = ioctl(fd, VHOST_GET_FEATURES, &features);
|
|
||||||
if (ret) {
|
|
||||||
error_setg(errp, "Fail to query features from vhost-vDPA device");
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
|
if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
|
||||||
*has_cvq = 1;
|
*has_cvq = 1;
|
||||||
} else {
|
} else {
|
||||||
|
@ -262,10 +561,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
|
||||||
NetClientState *peer, Error **errp)
|
NetClientState *peer, Error **errp)
|
||||||
{
|
{
|
||||||
const NetdevVhostVDPAOptions *opts;
|
const NetdevVhostVDPAOptions *opts;
|
||||||
|
uint64_t features;
|
||||||
int vdpa_device_fd;
|
int vdpa_device_fd;
|
||||||
g_autofree NetClientState **ncs = NULL;
|
g_autofree NetClientState **ncs = NULL;
|
||||||
|
g_autoptr(VhostIOVATree) iova_tree = NULL;
|
||||||
NetClientState *nc;
|
NetClientState *nc;
|
||||||
int queue_pairs, i, has_cvq = 0;
|
int queue_pairs, r, i, has_cvq = 0;
|
||||||
|
|
||||||
assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
|
assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
|
||||||
opts = &netdev->u.vhost_vdpa;
|
opts = &netdev->u.vhost_vdpa;
|
||||||
|
@ -279,29 +580,57 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
|
||||||
return -errno;
|
return -errno;
|
||||||
}
|
}
|
||||||
|
|
||||||
queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd,
|
r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
|
||||||
|
if (unlikely(r < 0)) {
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
|
||||||
&has_cvq, errp);
|
&has_cvq, errp);
|
||||||
if (queue_pairs < 0) {
|
if (queue_pairs < 0) {
|
||||||
qemu_close(vdpa_device_fd);
|
qemu_close(vdpa_device_fd);
|
||||||
return queue_pairs;
|
return queue_pairs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (opts->x_svq) {
|
||||||
|
struct vhost_vdpa_iova_range iova_range;
|
||||||
|
|
||||||
|
uint64_t invalid_dev_features =
|
||||||
|
features & ~vdpa_svq_device_features &
|
||||||
|
/* Transport are all accepted at this point */
|
||||||
|
~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
|
||||||
|
VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
|
||||||
|
|
||||||
|
if (invalid_dev_features) {
|
||||||
|
error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
|
||||||
|
invalid_dev_features);
|
||||||
|
goto err_svq;
|
||||||
|
}
|
||||||
|
|
||||||
|
vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
|
||||||
|
iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
|
||||||
|
}
|
||||||
|
|
||||||
ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
|
ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
|
||||||
|
|
||||||
for (i = 0; i < queue_pairs; i++) {
|
for (i = 0; i < queue_pairs; i++) {
|
||||||
ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
|
ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
|
||||||
vdpa_device_fd, i, 2, true);
|
vdpa_device_fd, i, 2, true, opts->x_svq,
|
||||||
|
iova_tree);
|
||||||
if (!ncs[i])
|
if (!ncs[i])
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (has_cvq) {
|
if (has_cvq) {
|
||||||
nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
|
nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
|
||||||
vdpa_device_fd, i, 1, false);
|
vdpa_device_fd, i, 1, false,
|
||||||
|
opts->x_svq, iova_tree);
|
||||||
if (!nc)
|
if (!nc)
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* iova_tree ownership belongs to last NetClientState */
|
||||||
|
g_steal_pointer(&iova_tree);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err:
|
err:
|
||||||
|
@ -310,6 +639,8 @@ err:
|
||||||
qemu_del_net_client(ncs[i]);
|
qemu_del_net_client(ncs[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err_svq:
|
||||||
qemu_close(vdpa_device_fd);
|
qemu_close(vdpa_device_fd);
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
@ -445,12 +445,19 @@
|
||||||
# @queues: number of queues to be created for multiqueue vhost-vdpa
|
# @queues: number of queues to be created for multiqueue vhost-vdpa
|
||||||
# (default: 1)
|
# (default: 1)
|
||||||
#
|
#
|
||||||
|
# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1)
|
||||||
|
# (default: false)
|
||||||
|
#
|
||||||
|
# Features:
|
||||||
|
# @unstable: Member @x-svq is experimental.
|
||||||
|
#
|
||||||
# Since: 5.1
|
# Since: 5.1
|
||||||
##
|
##
|
||||||
{ 'struct': 'NetdevVhostVDPAOptions',
|
{ 'struct': 'NetdevVhostVDPAOptions',
|
||||||
'data': {
|
'data': {
|
||||||
'*vhostdev': 'str',
|
'*vhostdev': 'str',
|
||||||
'*queues': 'int' } }
|
'*queues': 'int',
|
||||||
|
'*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } }
|
||||||
|
|
||||||
##
|
##
|
||||||
# @NetdevVmnetHostOptions:
|
# @NetdevVmnetHostOptions:
|
||||||
|
|
|
@ -126,6 +126,7 @@ static const RunStateTransition runstate_transitions_def[] = {
|
||||||
{ RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
|
{ RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
|
||||||
|
|
||||||
{ RUN_STATE_COLO, RUN_STATE_RUNNING },
|
{ RUN_STATE_COLO, RUN_STATE_RUNNING },
|
||||||
|
{ RUN_STATE_COLO, RUN_STATE_PRELAUNCH },
|
||||||
{ RUN_STATE_COLO, RUN_STATE_SHUTDOWN},
|
{ RUN_STATE_COLO, RUN_STATE_SHUTDOWN},
|
||||||
|
|
||||||
{ RUN_STATE_RUNNING, RUN_STATE_DEBUG },
|
{ RUN_STATE_RUNNING, RUN_STATE_DEBUG },
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue