Merge remote-tracking branch 'qemu-kvm/uq/master' into staging

* qemu-kvm/uq/master:
  virtio/vhost: Add support for KVM in-kernel MSI injection
  msix: Add msix_nr_vectors_allocated
  kvm: Enable use of kvm_irqchip_in_kernel in hwlib code
  kvm: Introduce kvm_irqchip_add/remove_irqfd
  kvm: Make kvm_irqchip_commit_routes an internal service
  kvm: Publicize kvm_irqchip_release_virq
  kvm: Introduce kvm_irqchip_add_msi_route
  kvm: Rename kvm_irqchip_add_route to kvm_irqchip_add_irq_route
  msix: Introduce vector notifiers
  msix: Invoke msix_handle_mask_update on msix_mask_all
  msix: Factor out msix_get_message
  kvm: update vmxcap for EPT A/D, INVPCID, RDRAND, VMFUNC
  kvm: Enable in-kernel irqchip support by default
  kvm: Add support for direct MSI injections
  kvm: Update kernel headers
  kvm: x86: Wire up MSI support for in-kernel irqchip
  pc: Enable MSI support at APIC level
  kvm: Introduce basic MSI support for in-kernel irqchips
  Introduce MSIMessage structure
  kvm: Refactor KVMState::max_gsi to gsi_count
This commit is contained in:
Anthony Liguori 2012-06-03 07:56:23 +08:00
commit 74f4d2279b
18 changed files with 624 additions and 52 deletions

View file

@ -19,6 +19,7 @@
#include "apic_internal.h"
#include "apic.h"
#include "ioapic.h"
#include "msi.h"
#include "host-utils.h"
#include "trace.h"
#include "pc.h"
@ -862,6 +863,8 @@ static void apic_init(APICCommonState *s)
s->timer = qemu_new_timer_ns(vm_clock, apic_timer, s);
local_apics[s->idx] = s;
msi_supported = true;
}
static void apic_class_init(ObjectClass *klass, void *data)

View file

@ -10,6 +10,7 @@
* See the COPYING file in the top-level directory.
*/
#include "hw/apic_internal.h"
#include "hw/msi.h"
#include "kvm.h"
static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
@ -145,10 +146,39 @@ static void kvm_apic_external_nmi(APICCommonState *s)
run_on_cpu(s->cpu_env, do_inject_external_nmi, s);
}
static uint64_t kvm_apic_mem_read(void *opaque, target_phys_addr_t addr,
unsigned size)
{
return ~(uint64_t)0;
}
static void kvm_apic_mem_write(void *opaque, target_phys_addr_t addr,
uint64_t data, unsigned size)
{
MSIMessage msg = { .address = addr, .data = data };
int ret;
ret = kvm_irqchip_send_msi(kvm_state, msg);
if (ret < 0) {
fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n",
strerror(-ret));
}
}
static const MemoryRegionOps kvm_apic_io_ops = {
.read = kvm_apic_mem_read,
.write = kvm_apic_mem_write,
.endianness = DEVICE_NATIVE_ENDIAN,
};
static void kvm_apic_init(APICCommonState *s)
{
memory_region_init_reservation(&s->io_memory, "kvm-apic-msi",
MSI_SPACE_SIZE);
memory_region_init_io(&s->io_memory, &kvm_apic_io_ops, s, "kvm-apic-msi",
MSI_SPACE_SIZE);
if (kvm_has_gsi_routing()) {
msi_supported = true;
}
}
static void kvm_apic_class_init(ObjectClass *klass, void *data)

View file

@ -24,6 +24,11 @@
#include "qemu-common.h"
#include "pci.h"
struct MSIMessage {
uint64_t address;
uint32_t data;
};
extern bool msi_supported;
bool msi_enabled(const PCIDevice *dev);

121
hw/msix.c
View file

@ -35,6 +35,15 @@
#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
#define MSIX_MAX_ENTRIES 32
static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
{
uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
MSIMessage msg;
msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
return msg;
}
/* Add MSI-X capability to the config space for the device. */
/* Given a bar and its size, add MSI-X table on top of it
@ -130,13 +139,34 @@ static bool msix_is_masked(PCIDevice *dev, int vector)
return msix_vector_masked(dev, vector, dev->msix_function_masked);
}
static void msix_fire_vector_notifier(PCIDevice *dev,
unsigned int vector, bool is_masked)
{
MSIMessage msg;
int ret;
if (!dev->msix_vector_use_notifier) {
return;
}
if (is_masked) {
dev->msix_vector_release_notifier(dev, vector);
} else {
msg = msix_get_message(dev, vector);
ret = dev->msix_vector_use_notifier(dev, vector, msg);
assert(ret >= 0);
}
}
static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
{
bool is_masked = msix_is_masked(dev, vector);
if (is_masked == was_masked) {
return;
}
msix_fire_vector_notifier(dev, vector, is_masked);
if (!is_masked && msix_is_pending(dev, vector)) {
msix_clr_pending(dev, vector);
msix_notify(dev, vector);
@ -222,10 +252,14 @@ static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
{
int vector;
for (vector = 0; vector < nentries; ++vector) {
unsigned offset =
vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
bool was_masked = msix_is_masked(dev, vector);
dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
msix_handle_mask_update(dev, vector, was_masked);
}
}
@ -317,6 +351,7 @@ void msix_save(PCIDevice *dev, QEMUFile *f)
void msix_load(PCIDevice *dev, QEMUFile *f)
{
unsigned n = dev->msix_entries_nr;
unsigned int vector;
if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
return;
@ -326,6 +361,10 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
msix_update_function_masked(dev);
for (vector = 0; vector < n; vector++) {
msix_handle_mask_update(dev, vector, true);
}
}
/* Does device support MSI-X? */
@ -352,9 +391,7 @@ uint32_t msix_bar_size(PCIDevice *dev)
/* Send an MSI-X message */
void msix_notify(PCIDevice *dev, unsigned vector)
{
uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
uint64_t address;
uint32_t data;
MSIMessage msg;
if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
return;
@ -363,9 +400,9 @@ void msix_notify(PCIDevice *dev, unsigned vector)
return;
}
address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
stl_le_phys(address, data);
msg = msix_get_message(dev, vector);
stl_le_phys(msg.address, msg.data);
}
void msix_reset(PCIDevice *dev)
@ -414,3 +451,75 @@ void msix_unuse_all_vectors(PCIDevice *dev)
return;
msix_free_irq_entries(dev);
}
unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
{
return dev->msix_entries_nr;
}
static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector)
{
MSIMessage msg;
if (msix_is_masked(dev, vector)) {
return 0;
}
msg = msix_get_message(dev, vector);
return dev->msix_vector_use_notifier(dev, vector, msg);
}
static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
{
if (msix_is_masked(dev, vector)) {
return;
}
dev->msix_vector_release_notifier(dev, vector);
}
int msix_set_vector_notifiers(PCIDevice *dev,
MSIVectorUseNotifier use_notifier,
MSIVectorReleaseNotifier release_notifier)
{
int vector, ret;
assert(use_notifier && release_notifier);
dev->msix_vector_use_notifier = use_notifier;
dev->msix_vector_release_notifier = release_notifier;
if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
(MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
for (vector = 0; vector < dev->msix_entries_nr; vector++) {
ret = msix_set_notifier_for_vector(dev, vector);
if (ret < 0) {
goto undo;
}
}
}
return 0;
undo:
while (--vector >= 0) {
msix_unset_notifier_for_vector(dev, vector);
}
dev->msix_vector_use_notifier = NULL;
dev->msix_vector_release_notifier = NULL;
return ret;
}
void msix_unset_vector_notifiers(PCIDevice *dev)
{
int vector;
assert(dev->msix_vector_use_notifier &&
dev->msix_vector_release_notifier);
if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
(MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
for (vector = 0; vector < dev->msix_entries_nr; vector++) {
msix_unset_notifier_for_vector(dev, vector);
}
}
dev->msix_vector_use_notifier = NULL;
dev->msix_vector_release_notifier = NULL;
}

View file

@ -13,6 +13,8 @@ void msix_write_config(PCIDevice *pci_dev, uint32_t address,
int msix_uninit(PCIDevice *d, MemoryRegion *bar);
unsigned int msix_nr_vectors_allocated(const PCIDevice *dev);
void msix_save(PCIDevice *dev, QEMUFile *f);
void msix_load(PCIDevice *dev, QEMUFile *f);
@ -29,4 +31,8 @@ void msix_notify(PCIDevice *dev, unsigned vector);
void msix_reset(PCIDevice *dev);
int msix_set_vector_notifiers(PCIDevice *dev,
MSIVectorUseNotifier use_notifier,
MSIVectorReleaseNotifier release_notifier);
void msix_unset_vector_notifiers(PCIDevice *dev);
#endif

View file

@ -912,15 +912,6 @@ static DeviceState *apic_init(void *env, uint8_t apic_id)
apic_mapped = 1;
}
/* KVM does not support MSI yet. */
if (!kvm_irqchip_in_kernel()) {
msi_supported = true;
}
if (xen_msi_support()) {
msi_supported = true;
}
return dev;
}

View file

@ -56,31 +56,27 @@ static void kvm_piix3_setup_irq_routing(bool pci_enabled)
{
#ifdef CONFIG_KVM
KVMState *s = kvm_state;
int ret, i;
int i;
if (kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) {
for (i = 0; i < 8; ++i) {
if (i == 2) {
continue;
}
kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_MASTER, i);
kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_MASTER, i);
}
for (i = 8; i < 16; ++i) {
kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8);
kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8);
}
if (pci_enabled) {
for (i = 0; i < 24; ++i) {
if (i == 0) {
kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, 2);
kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, 2);
} else if (i != 2) {
kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, i);
kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, i);
}
}
}
ret = kvm_irqchip_commit_routes(s);
if (ret < 0) {
hw_error("KVM IRQ routing setup failed");
}
}
#endif /* CONFIG_KVM */
}

View file

@ -173,6 +173,10 @@ typedef struct PCIDeviceClass {
const char *romfile;
} PCIDeviceClass;
typedef int (*MSIVectorUseNotifier)(PCIDevice *dev, unsigned int vector,
MSIMessage msg);
typedef void (*MSIVectorReleaseNotifier)(PCIDevice *dev, unsigned int vector);
struct PCIDevice {
DeviceState qdev;
/* PCI config space */
@ -243,6 +247,10 @@ struct PCIDevice {
bool has_rom;
MemoryRegion rom;
uint32_t rom_bar;
/* MSI-X notifiers */
MSIVectorUseNotifier msix_vector_use_notifier;
MSIVectorReleaseNotifier msix_vector_release_notifier;
};
void pci_register_bar(PCIDevice *pci_dev, int region_num,

View file

@ -24,6 +24,7 @@
#include "virtio-scsi.h"
#include "pci.h"
#include "qemu-error.h"
#include "msi.h"
#include "msix.h"
#include "net.h"
#include "loader.h"
@ -539,6 +540,107 @@ static void virtio_pci_guest_notifier_read(void *opaque)
}
}
static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector,
MSIMessage msg)
{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int fd, ret;
fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq));
if (irqfd->users == 0) {
ret = kvm_irqchip_add_msi_route(kvm_state, msg);
if (ret < 0) {
return ret;
}
irqfd->virq = ret;
}
irqfd->users++;
ret = kvm_irqchip_add_irqfd(kvm_state, fd, irqfd->virq);
if (ret < 0) {
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
}
return ret;
}
qemu_set_fd_handler(fd, NULL, NULL, NULL);
return 0;
}
static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int fd, ret;
fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq));
ret = kvm_irqchip_remove_irqfd(kvm_state, fd, irqfd->virq);
assert(ret == 0);
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
}
qemu_set_fd_handler(fd, virtio_pci_guest_notifier_read, NULL, vq);
}
static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
MSIMessage msg)
{
VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
int ret, queue_no;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
if (ret < 0) {
goto undo;
}
}
return 0;
undo:
while (--queue_no >= 0) {
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
}
return ret;
}
static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
{
VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
int queue_no;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
}
}
static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
{
VirtIOPCIProxy *proxy = opaque;
@ -555,6 +657,9 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
} else {
qemu_set_fd_handler(event_notifier_get_fd(notifier),
NULL, NULL, NULL);
/* Test and clear notifier before closing it,
* in case poll callback didn't have time to run. */
virtio_pci_guest_notifier_read(vq);
event_notifier_cleanup(notifier);
}
@ -573,6 +678,13 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign)
VirtIODevice *vdev = proxy->vdev;
int r, n;
/* Must unset vector notifier while guest notifier is still assigned */
if (kvm_irqchip_in_kernel() && !assign) {
msix_unset_vector_notifiers(&proxy->pci_dev);
g_free(proxy->vector_irqfd);
proxy->vector_irqfd = NULL;
}
for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
if (!virtio_queue_get_num(vdev, n)) {
break;
@ -584,10 +696,24 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign)
}
}
/* Must set vector notifier after guest notifier has been assigned */
if (kvm_irqchip_in_kernel() && assign) {
proxy->vector_irqfd =
g_malloc0(sizeof(*proxy->vector_irqfd) *
msix_nr_vectors_allocated(&proxy->pci_dev));
r = msix_set_vector_notifiers(&proxy->pci_dev,
kvm_virtio_pci_vector_use,
kvm_virtio_pci_vector_release);
if (r < 0) {
goto assign_error;
}
}
return 0;
assign_error:
/* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
assert(assign);
while (--n >= 0) {
virtio_pci_set_guest_notifier(opaque, n, !assign);
}

View file

@ -25,6 +25,11 @@
#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1
#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
typedef struct {
int virq;
unsigned int users;
} VirtIOIRQFD;
typedef struct {
PCIDevice pci_dev;
VirtIODevice *vdev;
@ -44,6 +49,7 @@ typedef struct {
VirtIOSCSIConf scsi;
bool ioeventfd_disabled;
bool ioeventfd_started;
VirtIOIRQFD *vector_irqfd;
} VirtIOPCIProxy;
void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev);

View file

@ -57,14 +57,4 @@ void xen_register_framebuffer(struct MemoryRegion *mr);
# define HVM_MAX_VCPUS 32
#endif
static inline int xen_msi_support(void)
{
#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \
&& CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420
return xen_enabled();
#else
return 0;
#endif
}
#endif /* QEMU_HW_XEN_H */

View file

@ -40,6 +40,11 @@ static void xen_apic_init(APICCommonState *s)
{
memory_region_init_io(&s->io_memory, &xen_apic_io_ops, s, "xen-apic-msi",
MSI_SPACE_SIZE);
#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \
&& CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420
msi_supported = true;
#endif
}
static void xen_apic_set_base(APICCommonState *s, uint64_t val)