pci,virtio

This further optimizes MSIX handling in virtio-pci.
 Also included is pci cleanup by Paolo, and pci device
 assignment fix by Alex.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQEcBAABAgAGBQJQ7ZaiAAoJECgfDbjSjVRpFhcIAJkY4VQ3i7TLnLsnEDOR+FrP
 66YLEDwCSiKZ/UW7WERGN3p3tm0hAXLhPoHFqMGRPPV9pdcXI+Eb8v+u0IHVlt+7
 DsQ9TIemZkpSMuUJjQbu/RF8k9JV8+X7M6CKnWahq68p0UD/vDX+OgCiGKO/l/zY
 tENJhwD6M1MMzbxyzd4nCnkf3CPrHFvpPt2VAqQnkCw3wLAtR34SucBjr/dXcjuT
 arPiV8dNmXHTosdKvcodAWA+0YLLE7Bhz0nLK6eTt5L/UsfdbRN8q9Xdhd5nJjji
 DjKBJBfwdG5n3r96g7dlb/XdHuQjbFBq3uLmc8H2OdWOrk5PyqeoUA5fdBQxkb8=
 =vKSI
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'mst/tags/for_anthony' into staging

pci,virtio

This further optimizes MSIX handling in virtio-pci.
Also included is pci cleanup by Paolo, and pci device
assignment fix by Alex.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>

* mst/tags/for_anthony:
  pci-assign: Enable MSIX on device to match guest
  pci: use constants for devices under the 1B36 device ID, document them
  ivshmem: use symbolic constant for PCI ID, add to pci-ids.txt
  virtio-9p: use symbolic constant, add to pci-ids.txt
  reorganize pci-ids.txt
  docs: move pci-ids.txt to docs/specs/
  vhost: backend masking support
  vhost: set started flag while start is in progress
  virtio-net: set/clear vhost_started in reverse order
  virtio: backend virtqueue notifier masking
  virtio-pci: cache msix messages
  kvm: add stub for update msi route
  msix: add api to access msix message
  virtio: don't waste irqfds on control vqs
This commit is contained in:
Anthony Liguori 2013-01-14 10:23:50 -06:00
commit 8e9a8681dd
19 changed files with 438 additions and 101 deletions

View file

@ -170,7 +170,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data)
k->init = virtio_9p_init_pci;
k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
k->device_id = 0x1009;
k->device_id = PCI_DEVICE_ID_VIRTIO_9P;
k->revision = VIRTIO_PCI_ABI_VERSION;
k->class_id = 0x2;
dc->props = virtio_9p_properties;

View file

@ -29,6 +29,9 @@
#include <sys/mman.h>
#include <sys/types.h>
#define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET
#define PCI_DEVICE_ID_IVSHMEM 0x1110
#define IVSHMEM_IOEVENTFD 0
#define IVSHMEM_MSI 1
@ -800,8 +803,8 @@ static void ivshmem_class_init(ObjectClass *klass, void *data)
k->init = pci_ivshmem_init;
k->exit = pci_ivshmem_uninit;
k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
k->device_id = 0x1110;
k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
k->device_id = PCI_DEVICE_ID_IVSHMEM;
k->class_id = PCI_CLASS_MEMORY_RAM;
dc->reset = ivshmem_reset;
dc->props = ivshmem_properties;

View file

@ -1031,6 +1031,19 @@ static bool assigned_dev_msix_masked(MSIXTableEntry *entry)
return (entry->ctrl & cpu_to_le32(0x1)) != 0;
}
/*
* When MSI-X is first enabled the vector table typically has all the
* vectors masked, so we can't use that as the obvious test to figure out
* how many vectors to initially enable. Instead we look at the data field
* because this is what worked for pci-assign for a long time. This makes
* sure the physical MSI-X state tracks the guest's view, which is important
* for some VF/PF and PF/fw communication channels.
*/
static bool assigned_dev_msix_skipped(MSIXTableEntry *entry)
{
return !entry->data;
}
static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
{
AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
@ -1041,7 +1054,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
/* Get the usable entry number for allocating */
for (i = 0; i < adev->msix_max; i++, entry++) {
if (assigned_dev_msix_masked(entry)) {
if (assigned_dev_msix_skipped(entry)) {
continue;
}
entries_nr++;
@ -1070,7 +1083,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
for (i = 0; i < adev->msix_max; i++, entry++) {
adev->msi_virq[i] = -1;
if (assigned_dev_msix_masked(entry)) {
if (assigned_dev_msix_skipped(entry)) {
continue;
}

View file

@ -27,7 +27,7 @@
#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
#define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
{
uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
MSIMessage msg;

View file

@ -5,6 +5,7 @@
#include "hw/pci/pci.h"
void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
MSIMessage msix_get_message(PCIDevice *dev, unsigned int vector);
int msix_init(PCIDevice *dev, unsigned short nentries,
MemoryRegion *table_bar, uint8_t table_bar_nr,
unsigned table_offset, MemoryRegion *pba_bar,

View file

@ -77,6 +77,14 @@
#define PCI_DEVICE_ID_VIRTIO_CONSOLE 0x1003
#define PCI_DEVICE_ID_VIRTIO_SCSI 0x1004
#define PCI_DEVICE_ID_VIRTIO_RNG 0x1005
#define PCI_DEVICE_ID_VIRTIO_9P 0x1009
#define PCI_VENDOR_ID_REDHAT 0x1b36
#define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001
#define PCI_DEVICE_ID_REDHAT_SERIAL 0x0002
#define PCI_DEVICE_ID_REDHAT_SERIAL2 0x0003
#define PCI_DEVICE_ID_REDHAT_SERIAL4 0x0004
#define PCI_DEVICE_ID_REDHAT_QXL 0x0100
#define FMT_PCIBUS PRIx64

View file

@ -27,10 +27,6 @@
#include "exec/memory.h"
#include "pci/pci_bus.h"
#define REDHAT_PCI_VENDOR_ID 0x1b36
#define PCI_BRIDGE_DEV_VENDOR_ID REDHAT_PCI_VENDOR_ID
#define PCI_BRIDGE_DEV_DEVICE_ID 0x1
struct PCIBridgeDev {
PCIBridge bridge;
MemoryRegion bar;
@ -146,8 +142,8 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data)
k->init = pci_bridge_dev_initfn;
k->exit = pci_bridge_dev_exitfn;
k->config_write = pci_bridge_dev_write_config;
k->vendor_id = PCI_BRIDGE_DEV_VENDOR_ID;
k->device_id = PCI_BRIDGE_DEV_DEVICE_ID;
k->vendor_id = PCI_VENDOR_ID_REDHAT;
k->device_id = PCI_DEVICE_ID_REDHAT_BRIDGE;
k->class_id = PCI_CLASS_BRIDGE_PCI;
k->is_bridge = 1,
dc->desc = "Standard PCI Bridge";

View file

@ -185,8 +185,8 @@ static void serial_pci_class_initfn(ObjectClass *klass, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
pc->init = serial_pci_init;
pc->exit = serial_pci_exit;
pc->vendor_id = 0x1b36; /* Red Hat */
pc->device_id = 0x0002;
pc->vendor_id = PCI_VENDOR_ID_REDHAT;
pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL;
pc->revision = 1;
pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
dc->vmsd = &vmstate_pci_serial;
@ -199,8 +199,8 @@ static void multi_2x_serial_pci_class_initfn(ObjectClass *klass, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
pc->init = multi_serial_pci_init;
pc->exit = multi_serial_pci_exit;
pc->vendor_id = 0x1b36; /* Red Hat */
pc->device_id = 0x0003;
pc->vendor_id = PCI_VENDOR_ID_REDHAT;
pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL2;
pc->revision = 1;
pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
dc->vmsd = &vmstate_pci_multi_serial;
@ -213,8 +213,8 @@ static void multi_4x_serial_pci_class_initfn(ObjectClass *klass, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
pc->init = multi_serial_pci_init;
pc->exit = multi_serial_pci_exit;
pc->vendor_id = 0x1b36; /* Red Hat */
pc->device_id = 0x0004;
pc->vendor_id = PCI_VENDOR_ID_REDHAT;
pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL4;
pc->revision = 1;
pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
dc->vmsd = &vmstate_pci_multi_serial;

View file

@ -612,7 +612,7 @@ static void vhost_log_stop(MemoryListener *listener,
/* FIXME: implement */
}
static int vhost_virtqueue_init(struct vhost_dev *dev,
static int vhost_virtqueue_start(struct vhost_dev *dev,
struct VirtIODevice *vdev,
struct vhost_virtqueue *vq,
unsigned idx)
@ -681,16 +681,11 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
goto fail_kick;
}
file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
if (r) {
r = -errno;
goto fail_call;
}
/* Clear and discard previous events if any. */
event_notifier_test_and_clear(&vq->masked_notifier);
return 0;
fail_call:
fail_kick:
fail_alloc:
cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
@ -708,7 +703,7 @@ fail_alloc_desc:
return r;
}
static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
static void vhost_virtqueue_stop(struct vhost_dev *dev,
struct VirtIODevice *vdev,
struct vhost_virtqueue *vq,
unsigned idx)
@ -746,11 +741,39 @@ static void vhost_eventfd_del(MemoryListener *listener,
{
}
static int vhost_virtqueue_init(struct vhost_dev *dev,
struct vhost_virtqueue *vq, int n)
{
struct vhost_vring_file file = {
.index = n,
};
int r = event_notifier_init(&vq->masked_notifier, 0);
if (r < 0) {
return r;
}
file.fd = event_notifier_get_fd(&vq->masked_notifier);
r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
if (r) {
r = -errno;
goto fail_call;
}
return 0;
fail_call:
event_notifier_cleanup(&vq->masked_notifier);
return r;
}
static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
{
event_notifier_cleanup(&vq->masked_notifier);
}
int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
bool force)
{
uint64_t features;
int r;
int i, r;
if (devfd >= 0) {
hdev->control = devfd;
} else {
@ -768,6 +791,13 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
if (r < 0) {
goto fail;
}
for (i = 0; i < hdev->nvqs; ++i) {
r = vhost_virtqueue_init(hdev, hdev->vqs + i, i);
if (r < 0) {
goto fail_vq;
}
}
hdev->features = features;
hdev->memory_listener = (MemoryListener) {
@ -795,6 +825,10 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
memory_listener_register(&hdev->memory_listener, &address_space_memory);
hdev->force = force;
return 0;
fail_vq:
while (--i >= 0) {
vhost_virtqueue_cleanup(hdev->vqs + i);
}
fail:
r = -errno;
close(hdev->control);
@ -803,6 +837,10 @@ fail:
void vhost_dev_cleanup(struct vhost_dev *hdev)
{
int i;
for (i = 0; i < hdev->nvqs; ++i) {
vhost_virtqueue_cleanup(hdev->vqs + i);
}
memory_listener_unregister(&hdev->memory_listener);
g_free(hdev->mem);
g_free(hdev->mem_sections);
@ -869,17 +907,53 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
}
}
/* Test and clear event pending status.
* Should be called after unmask to avoid losing events.
*/
bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
{
struct vhost_virtqueue *vq = hdev->vqs + n;
assert(hdev->started);
return event_notifier_test_and_clear(&vq->masked_notifier);
}
/* Mask/unmask events from this vq. */
void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
bool mask)
{
struct VirtQueue *vvq = virtio_get_queue(vdev, n);
int r;
assert(hdev->started);
struct vhost_vring_file file = {
.index = n,
};
if (mask) {
file.fd = event_notifier_get_fd(&hdev->vqs[n].masked_notifier);
} else {
file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
}
r = ioctl(hdev->control, VHOST_SET_VRING_CALL, &file);
assert(r >= 0);
}
/* Host notifiers must be enabled at this point. */
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
{
int i, r;
hdev->started = true;
if (!vdev->binding->set_guest_notifiers) {
fprintf(stderr, "binding does not support guest notifiers\n");
r = -ENOSYS;
goto fail;
}
r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
r = vdev->binding->set_guest_notifiers(vdev->binding_opaque,
hdev->nvqs,
true);
if (r < 0) {
fprintf(stderr, "Error binding guest notifier: %d\n", -r);
goto fail_notifiers;
@ -895,7 +969,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
goto fail_mem;
}
for (i = 0; i < hdev->nvqs; ++i) {
r = vhost_virtqueue_init(hdev,
r = vhost_virtqueue_start(hdev,
vdev,
hdev->vqs + i,
i);
@ -916,22 +990,22 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
}
}
hdev->started = true;
return 0;
fail_log:
fail_vq:
while (--i >= 0) {
vhost_virtqueue_cleanup(hdev,
vhost_virtqueue_stop(hdev,
vdev,
hdev->vqs + i,
i);
}
fail_mem:
fail_features:
vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
vdev->binding->set_guest_notifiers(vdev->binding_opaque, hdev->nvqs, false);
fail_notifiers:
fail:
hdev->started = false;
return r;
}
@ -941,7 +1015,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
int i, r;
for (i = 0; i < hdev->nvqs; ++i) {
vhost_virtqueue_cleanup(hdev,
vhost_virtqueue_stop(hdev,
vdev,
hdev->vqs + i,
i);
@ -950,7 +1024,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
vhost_sync_dirty_bitmap(hdev, &hdev->mem_sections[i],
0, (hwaddr)~0x0ull);
}
r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
r = vdev->binding->set_guest_notifiers(vdev->binding_opaque,
hdev->nvqs,
false);
if (r < 0) {
fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
fflush(stderr);

View file

@ -18,6 +18,7 @@ struct vhost_virtqueue {
void *ring;
unsigned long long ring_phys;
unsigned ring_size;
EventNotifier masked_notifier;
};
typedef unsigned long vhost_log_chunk_t;
@ -53,4 +54,13 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
/* Test and clear masked event pending status.
* Should be called after unmask to avoid losing events.
*/
bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n);
/* Mask/unmask events from this vq.
*/
void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
bool mask);
#endif

View file

@ -109,6 +109,9 @@ struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
(1 << VHOST_NET_F_VIRTIO_NET_HDR);
net->backend = r;
net->dev.nvqs = 2;
net->dev.vqs = net->vqs;
r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force);
if (r < 0) {
goto fail;
@ -143,9 +146,6 @@ int vhost_net_start(struct vhost_net *net,
struct vhost_vring_file file = { };
int r;
net->dev.nvqs = 2;
net->dev.vqs = net->vqs;
r = vhost_dev_enable_notifiers(&net->dev, dev);
if (r < 0) {
goto fail_notifiers;
@ -200,6 +200,17 @@ void vhost_net_cleanup(struct vhost_net *net)
vhost_dev_cleanup(&net->dev);
g_free(net);
}
bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
{
return vhost_virtqueue_pending(&net->dev, idx);
}
void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask)
{
vhost_virtqueue_mask(&net->dev, dev, idx, mask);
}
#else
struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
bool force)
@ -234,4 +245,14 @@ unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
void vhost_net_ack_features(struct vhost_net *net, unsigned features)
{
}
bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
{
return -ENOSYS;
}
void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask)
{
}
#endif

View file

@ -17,4 +17,7 @@ void vhost_net_cleanup(VHostNetState *net);
unsigned vhost_net_get_features(VHostNetState *net, unsigned features);
void vhost_net_ack_features(VHostNetState *net, unsigned features);
bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask);
#endif

View file

@ -126,12 +126,12 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer), &n->vdev)) {
return;
}
n->vhost_started = 1;
r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
if (r < 0) {
error_report("unable to start vhost net: %d: "
"falling back on userspace virtio", -r);
} else {
n->vhost_started = 1;
n->vhost_started = 0;
}
} else {
vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
@ -1010,6 +1010,22 @@ static NetClientInfo net_virtio_info = {
.link_status_changed = virtio_net_set_link_status,
};
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
{
VirtIONet *n = to_virtio_net(vdev);
assert(n->vhost_started);
return vhost_net_virtqueue_pending(tap_get_vhost_net(n->nic->nc.peer), idx);
}
static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
bool mask)
{
VirtIONet *n = to_virtio_net(vdev);
assert(n->vhost_started);
vhost_net_virtqueue_mask(tap_get_vhost_net(n->nic->nc.peer),
vdev, idx, mask);
}
VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
virtio_net_conf *net)
{
@ -1026,6 +1042,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
n->vdev.bad_features = virtio_net_bad_features;
n->vdev.reset = virtio_net_reset;
n->vdev.set_status = virtio_net_set_status;
n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {

View file

@ -487,8 +487,6 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
unsigned int vector,
MSIMessage msg)
{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int ret;
@ -500,20 +498,33 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
irqfd->virq = ret;
}
irqfd->users++;
ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
if (ret < 0) {
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
}
return ret;
}
return 0;
}
static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
}
}
static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
int ret;
ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
return ret;
}
static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
@ -522,27 +533,143 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq);
assert(ret == 0);
}
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
{
PCIDevice *dev = &proxy->pci_dev;
VirtIODevice *vdev = proxy->vdev;
unsigned int vector;
int ret, queue_no;
MSIMessage msg;
for (queue_no = 0; queue_no < nvqs; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
vector = virtio_queue_vector(vdev, queue_no);
if (vector >= msix_nr_vectors_allocated(dev)) {
continue;
}
msg = msix_get_message(dev, vector);
ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
if (ret < 0) {
goto undo;
}
/* If guest supports masking, set up irqfd now.
* Otherwise, delay until unmasked in the frontend.
*/
if (proxy->vdev->guest_notifier_mask) {
ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
if (ret < 0) {
kvm_virtio_pci_vq_vector_release(proxy, vector);
goto undo;
}
}
}
return 0;
undo:
while (--queue_no >= 0) {
vector = virtio_queue_vector(vdev, queue_no);
if (vector >= msix_nr_vectors_allocated(dev)) {
continue;
}
if (proxy->vdev->guest_notifier_mask) {
kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
}
kvm_virtio_pci_vq_vector_release(proxy, vector);
}
return ret;
}
static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
{
PCIDevice *dev = &proxy->pci_dev;
VirtIODevice *vdev = proxy->vdev;
unsigned int vector;
int queue_no;
for (queue_no = 0; queue_no < nvqs; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
vector = virtio_queue_vector(vdev, queue_no);
if (vector >= msix_nr_vectors_allocated(dev)) {
continue;
}
/* If guest supports masking, clean up irqfd now.
* Otherwise, it was cleaned when masked in the frontend.
*/
if (proxy->vdev->guest_notifier_mask) {
kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
}
kvm_virtio_pci_vq_vector_release(proxy, vector);
}
}
static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
static int kvm_virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector,
MSIMessage msg)
{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int ret;
if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg);
if (ret < 0) {
return ret;
}
}
/* If guest supports masking, irqfd is already setup, unmask it.
* Otherwise, set it up now.
*/
if (proxy->vdev->guest_notifier_mask) {
proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, false);
/* Test after unmasking to avoid losing events. */
if (proxy->vdev->guest_notifier_pending &&
proxy->vdev->guest_notifier_pending(proxy->vdev, queue_no)) {
event_notifier_set(n);
}
} else {
ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
}
return ret;
}
static void kvm_virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
/* If guest supports masking, keep irqfd but mask it.
* Otherwise, clean it up now.
*/
if (proxy->vdev->guest_notifier_mask) {
proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, true);
} else {
kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
}
}
static int kvm_virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
MSIMessage msg)
{
VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
int ret, queue_no;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
ret = kvm_virtio_pci_vq_vector_unmask(proxy, queue_no, vector, msg);
if (ret < 0) {
goto undo;
}
@ -554,25 +681,25 @@ undo:
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector);
}
return ret;
}
static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
static void kvm_virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
{
VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
int queue_no;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector);
}
}
@ -587,7 +714,7 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
EventNotifier *notifier;
VirtQueue *vq;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
@ -598,7 +725,11 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
}
vq = virtio_get_queue(vdev, queue_no);
notifier = virtio_queue_get_guest_notifier(vq);
if (event_notifier_test_and_clear(notifier)) {
if (vdev->guest_notifier_pending) {
if (vdev->guest_notifier_pending(vdev, queue_no)) {
msix_set_pending(dev, vector);
}
} else if (event_notifier_test_and_clear(notifier)) {
msix_set_pending(dev, vector);
}
}
@ -631,7 +762,7 @@ static bool virtio_pci_query_guest_notifiers(DeviceState *d)
return msix_enabled(&proxy->pci_dev);
}
static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
{
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
VirtIODevice *vdev = proxy->vdev;
@ -639,14 +770,24 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
kvm_msi_via_irqfd_enabled();
nvqs = MIN(nvqs, VIRTIO_PCI_QUEUE_MAX);
/* When deassigning, pass a consistent nvqs value
* to avoid leaking notifiers.
*/
assert(assign || nvqs == proxy->nvqs_with_notifiers);
proxy->nvqs_with_notifiers = nvqs;
/* Must unset vector notifier while guest notifier is still assigned */
if (proxy->vector_irqfd && !assign) {
msix_unset_vector_notifiers(&proxy->pci_dev);
kvm_virtio_pci_vector_release(proxy, nvqs);
g_free(proxy->vector_irqfd);
proxy->vector_irqfd = NULL;
}
for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
for (n = 0; n < nvqs; n++) {
if (!virtio_queue_get_num(vdev, n)) {
break;
}
@ -663,17 +804,25 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
proxy->vector_irqfd =
g_malloc0(sizeof(*proxy->vector_irqfd) *
msix_nr_vectors_allocated(&proxy->pci_dev));
r = msix_set_vector_notifiers(&proxy->pci_dev,
kvm_virtio_pci_vector_use,
kvm_virtio_pci_vector_release,
kvm_virtio_pci_vector_poll);
r = kvm_virtio_pci_vector_use(proxy, nvqs);
if (r < 0) {
goto assign_error;
}
r = msix_set_vector_notifiers(&proxy->pci_dev,
kvm_virtio_pci_vector_unmask,
kvm_virtio_pci_vector_mask,
kvm_virtio_pci_vector_poll);
if (r < 0) {
goto notifiers_error;
}
}
return 0;
notifiers_error:
assert(assign);
kvm_virtio_pci_vector_release(proxy, nvqs);
assign_error:
/* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
assert(assign);

View file

@ -27,6 +27,7 @@
#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
typedef struct {
MSIMessage msg;
int virq;
unsigned int users;
} VirtIOIRQFD;
@ -51,6 +52,7 @@ typedef struct {
bool ioeventfd_disabled;
bool ioeventfd_started;
VirtIOIRQFD *vector_irqfd;
int nvqs_with_notifiers;
} VirtIOPCIProxy;
void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev);

View file

@ -99,7 +99,7 @@ typedef struct {
int (*load_done)(DeviceState *d, QEMUFile *f);
unsigned (*get_features)(DeviceState *d);
bool (*query_guest_notifiers)(DeviceState *d);
int (*set_guest_notifiers)(DeviceState *d, bool assigned);
int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assigned);
int (*set_host_notifier)(DeviceState *d, int n, bool assigned);
void (*vmstate_change)(DeviceState *d, bool running);
} VirtIOBindings;
@ -126,6 +126,19 @@ struct VirtIODevice
void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
void (*reset)(VirtIODevice *vdev);
void (*set_status)(VirtIODevice *vdev, uint8_t val);
/* Test and clear event pending status.
* Should be called after unmask to avoid losing events.
* If backend does not support masking,
* must check in frontend instead.
*/
bool (*guest_notifier_pending)(VirtIODevice *vdev, int n);
/* Mask/unmask events from this vq. Any events reported
* while masked will become pending.
* If backend does not support masking,
* must mask in frontend instead.
*/
void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask);
VirtQueue *vq;
const VirtIOBindings *binding;
DeviceState *binding_opaque;