pc, pci, virtio, vhost: fixes, features

Beginning of merging vDPA, new PCI ID, a new virtio balloon stat, intel
 iommu rework fixing a couple of security problems (no CVEs yet), fixes
 all over the place.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJbBX2cAAoJECgfDbjSjVRpOEYIAIR6KGkwbAJ9SnO9B71DQHl1
 yYYgM7i2HwyZ1YPnXOYWnI1lzQ1bARTf2krQJFGmfjlDaueFf9KnXdNByoVCmG8m
 UhF/rQp3DcJ4wTABktPtME8gWdQxKPmDxlN5W3f29Zrm3g9S+Hshi+sfPZUkBxL4
 gQMFRctb2SxvQXG+lusHVwo1oF6pzGZMmX35906he3m4xS/cfoeCP7Qj6nSvHZq7
 lsLoOeYxHtXWA9gTYxpd7zW+hhUxkspoOqcXySHfO7e5enJANaulTxKuC0T+6HL4
 O2iUM+1wjUYE0tQcNJ6x7emA82k5OdG2OMD6gbR1oSdquttJo7+4R+goqpb44rc=
 =NUoY
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

pc, pci, virtio, vhost: fixes, features

Beginning of merging vDPA, new PCI ID, a new virtio balloon stat, intel
iommu rework fixing a couple of security problems (no CVEs yet), fixes
all over the place.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Wed 23 May 2018 15:41:32 BST
# gpg:                using RSA key 281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (28 commits)
  intel-iommu: rework the page walk logic
  util: implement simple iova tree
  intel-iommu: trace domain id during page walk
  intel-iommu: pass in address space when page walk
  intel-iommu: introduce vtd_page_walk_info
  intel-iommu: only do page walk for MAP notifiers
  intel-iommu: add iommu lock
  intel-iommu: remove IntelIOMMUNotifierNode
  intel-iommu: send PSI always even if across PDEs
  nvdimm: fix typo in label-size definition
  contrib/vhost-user-blk: enable protocol feature for vhost-user-blk
  hw/virtio: Fix brace Werror with clang 6.0.0
  libvhost-user: Send messages with no data
  vhost-user+postcopy: Use qemu_set_nonblock
  virtio: support setting memory region based host notifier
  vhost-user: support receiving file descriptors in slave_read
  vhost-user: add Net prefix to internal state structure
  linux-headers: add kvm header for mips
  linux-headers: add unistd.h on all arches
  update-linux-headers.sh: unistd.h, kvm consistency
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2018-05-24 14:22:23 +01:00
commit 45eabb2ede
54 changed files with 1731 additions and 456 deletions

View file

@ -27,6 +27,7 @@
#include "hw/i386/ioapic.h"
#include "hw/pci/msi.h"
#include "hw/sysbus.h"
#include "qemu/iova-tree.h"
#define TYPE_INTEL_IOMMU_DEVICE "intel-iommu"
#define INTEL_IOMMU_DEVICE(obj) \
@ -67,7 +68,6 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
typedef struct VTDIrq VTDIrq;
typedef struct VTD_MSIMessage VTD_MSIMessage;
typedef struct IntelIOMMUNotifierNode IntelIOMMUNotifierNode;
/* Context-Entry */
struct VTDContextEntry {
@ -93,6 +93,10 @@ struct VTDAddressSpace {
MemoryRegion iommu_ir; /* Interrupt region: 0xfeeXXXXX */
IntelIOMMUState *iommu_state;
VTDContextCacheEntry context_cache_entry;
QLIST_ENTRY(VTDAddressSpace) next;
/* Superset of notifier flags that this address space has */
IOMMUNotifierFlag notifier_flags;
IOVATree *iova_tree; /* Traces mapped IOVA ranges */
};
struct VTDBus {
@ -253,11 +257,6 @@ struct VTD_MSIMessage {
/* When IR is enabled, all MSI/MSI-X data bits should be zero */
#define VTD_IR_MSI_DATA (0)
struct IntelIOMMUNotifierNode {
VTDAddressSpace *vtd_as;
QLIST_ENTRY(IntelIOMMUNotifierNode) next;
};
/* The iommu (DMAR) device state struct */
struct IntelIOMMUState {
X86IOMMUState x86_iommu;
@ -295,7 +294,7 @@ struct IntelIOMMUState {
GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by PCIBus* reference */
VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */
/* list of registered notifiers */
QLIST_HEAD(, IntelIOMMUNotifierNode) notifiers_list;
QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
/* interrupt remapping */
bool intr_enabled; /* Whether guest enabled IR */
@ -305,6 +304,12 @@ struct IntelIOMMUState {
OnOffAuto intr_eim; /* Toggle for EIM cabability */
bool buggy_eim; /* Force buggy EIM unless eim=off */
uint8_t aw_bits; /* Host/IOVA address width (in bits) */
/*
* Protects IOMMU states in general. Currently it protects the
* per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace.
*/
QemuMutex iommu_lock;
};
/* Find the VTD Address space associated with the given bus pointer,

View file

@ -48,7 +48,7 @@
#define NVDIMM_GET_CLASS(obj) OBJECT_GET_CLASS(NVDIMMClass, (obj), \
TYPE_NVDIMM)
#define NVDIMM_LABLE_SIZE_PROP "label-size"
#define NVDIMM_LABEL_SIZE_PROP "label-size"
#define NVDIMM_UNARMED_PROP "unarmed"
struct NVDIMMDevice {

View file

@ -101,6 +101,7 @@ extern bool pci_available;
#define PCI_DEVICE_ID_REDHAT_PCIE_RP 0x000c
#define PCI_DEVICE_ID_REDHAT_XHCI 0x000d
#define PCI_DEVICE_ID_REDHAT_PCIE_BRIDGE 0x000e
#define PCI_DEVICE_ID_REDHAT_MDPY 0x000f
#define PCI_DEVICE_ID_REDHAT_QXL 0x0100
#define FMT_PCIBUS PRIx64

View file

@ -52,6 +52,8 @@ typedef struct VirtioBusClass {
bool (*has_extra_state)(DeviceState *d);
bool (*query_guest_notifiers)(DeviceState *d);
int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assign);
int (*set_host_notifier_mr)(DeviceState *d, int n,
MemoryRegion *mr, bool assign);
void (*vmstate_change)(DeviceState *d, bool running);
/*
* Expose the features the transport layer supports before

View file

@ -239,6 +239,8 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align);
void virtio_queue_notify(VirtIODevice *vdev, int n);
uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
MemoryRegion *mr, bool assign);
int virtio_set_status(VirtIODevice *vdev, uint8_t val);
void virtio_reset(void *opaque);
void virtio_update_irq(VirtIODevice *vdev);

134
include/qemu/iova-tree.h Normal file
View file

@ -0,0 +1,134 @@
/*
* An very simplified iova tree implementation based on GTree.
*
* Copyright 2018 Red Hat, Inc.
*
* Authors:
* Peter Xu <peterx@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
*/
#ifndef IOVA_TREE_H
#define IOVA_TREE_H
/*
* Currently the iova tree will only allow to keep ranges
* information, and no extra user data is allowed for each element. A
* benefit is that we can merge adjacent ranges internally within the
* tree. It can save a lot of memory when the ranges are splitted but
* mostly continuous.
*
* Note that current implementation does not provide any thread
* protections. Callers of the iova tree should be responsible
* for the thread safety issue.
*/
#include "qemu/osdep.h"
#include "exec/memory.h"
#include "exec/hwaddr.h"
#define IOVA_OK (0)
#define IOVA_ERR_INVALID (-1) /* Invalid parameters */
#define IOVA_ERR_OVERLAP (-2) /* IOVA range overlapped */
typedef struct IOVATree IOVATree;
typedef struct DMAMap {
hwaddr iova;
hwaddr translated_addr;
hwaddr size; /* Inclusive */
IOMMUAccessFlags perm;
} QEMU_PACKED DMAMap;
typedef gboolean (*iova_tree_iterator)(DMAMap *map);
/**
* iova_tree_new:
*
* Create a new iova tree.
*
* Returns: the tree pointer when succeeded, or NULL if error.
*/
IOVATree *iova_tree_new(void);
/**
* iova_tree_insert:
*
* @tree: the iova tree to insert
* @map: the mapping to insert
*
* Insert an iova range to the tree. If there is overlapped
* ranges, IOVA_ERR_OVERLAP will be returned.
*
* Return: 0 if succeeded, or <0 if error.
*/
int iova_tree_insert(IOVATree *tree, DMAMap *map);
/**
* iova_tree_remove:
*
* @tree: the iova tree to remove range from
* @map: the map range to remove
*
* Remove mappings from the tree that are covered by the map range
* provided. The range does not need to be exactly what has inserted,
* all the mappings that are included in the provided range will be
* removed from the tree. Here map->translated_addr is meaningless.
*
* Return: 0 if succeeded, or <0 if error.
*/
int iova_tree_remove(IOVATree *tree, DMAMap *map);
/**
* iova_tree_find:
*
* @tree: the iova tree to search from
* @map: the mapping to search
*
* Search for a mapping in the iova tree that overlaps with the
* mapping range specified. Only the first found mapping will be
* returned.
*
* Return: DMAMap pointer if found, or NULL if not found. Note that
* the returned DMAMap pointer is maintained internally. User should
* only read the content but never modify or free the content. Also,
* user is responsible to make sure the pointer is valid (say, no
* concurrent deletion in progress).
*/
DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map);
/**
* iova_tree_find_address:
*
* @tree: the iova tree to search from
* @iova: the iova address to find
*
* Similar to iova_tree_find(), but it tries to find mapping with
* range iova=iova & size=0.
*
* Return: same as iova_tree_find().
*/
DMAMap *iova_tree_find_address(IOVATree *tree, hwaddr iova);
/**
* iova_tree_foreach:
*
* @tree: the iova tree to iterate on
* @iterator: the interator for the mappings, return true to stop
*
* Iterate over the iova tree.
*
* Return: 1 if found any overlap, 0 if not, <0 if error.
*/
void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator);
/**
* iova_tree_destroy:
*
* @tree: the iova tree to destroy
*
* Destroy an existing iova tree.
*
* Return: None.
*/
void iova_tree_destroy(IOVATree *tree);
#endif

View file

@ -0,0 +1,121 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_X86_KVM_PARA_H
#define _ASM_X86_KVM_PARA_H
#include "standard-headers/linux/types.h"
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
* should be used to determine that a VM is running under KVM.
*/
#define KVM_CPUID_SIGNATURE 0x40000000
/* This CPUID returns two feature bitmaps in eax, edx. Before enabling
* a particular paravirtualization, the appropriate feature bit should
* be checked in eax. The performance hint feature bit should be checked
* in edx.
*/
#define KVM_CPUID_FEATURES 0x40000001
#define KVM_FEATURE_CLOCKSOURCE 0
#define KVM_FEATURE_NOP_IO_DELAY 1
#define KVM_FEATURE_MMU_OP 2
/* This indicates that the new set of kvmclock msrs
* are available. The use of 0x11 and 0x12 is deprecated
*/
#define KVM_FEATURE_CLOCKSOURCE2 3
#define KVM_FEATURE_ASYNC_PF 4
#define KVM_FEATURE_STEAL_TIME 5
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
#define KVM_FEATURE_PV_TLB_FLUSH 9
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
#define KVM_HINTS_DEDICATED 0
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
*/
#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24
#define MSR_KVM_WALL_CLOCK 0x11
#define MSR_KVM_SYSTEM_TIME 0x12
#define KVM_MSR_ENABLED 1
/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00
#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04
struct kvm_steal_time {
uint64_t steal;
uint32_t version;
uint32_t flags;
uint8_t preempted;
uint8_t uint8_t_pad[3];
uint32_t pad[11];
};
#define KVM_VCPU_PREEMPTED (1 << 0)
#define KVM_VCPU_FLUSH_TLB (1 << 1)
#define KVM_CLOCK_PAIRING_WALLCLOCK 0
struct kvm_clock_pairing {
int64_t sec;
int64_t nsec;
uint64_t tsc;
uint32_t flags;
uint32_t pad[9];
};
#define KVM_STEAL_ALIGNMENT_BITS 5
#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
#define KVM_MAX_MMU_OP_BATCH 32
#define KVM_ASYNC_PF_ENABLED (1 << 0)
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
/* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1
#define KVM_MMU_OP_FLUSH_TLB 2
#define KVM_MMU_OP_RELEASE_PT 3
/* Payload for KVM_HC_MMU_OP */
struct kvm_mmu_op_header {
uint32_t op;
uint32_t pad;
};
struct kvm_mmu_op_write_pte {
struct kvm_mmu_op_header header;
uint64_t pte_phys;
uint64_t pte_val;
};
struct kvm_mmu_op_flush_tlb {
struct kvm_mmu_op_header header;
};
struct kvm_mmu_op_release_pt {
struct kvm_mmu_op_header header;
uint64_t pt_phys;
};
#define KVM_PV_REASON_PAGE_NOT_PRESENT 1
#define KVM_PV_REASON_PAGE_READY 2
struct kvm_vcpu_pv_apf_data {
uint32_t reason;
uint8_t pad[60];
uint32_t enabled;
};
#define KVM_PV_EOI_BIT 0
#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
#define KVM_PV_EOI_DISABLED 0x0
#endif /* _ASM_X86_KVM_PARA_H */

View file

@ -53,7 +53,9 @@ struct virtio_balloon_config {
#define VIRTIO_BALLOON_S_MEMTOT 5 /* Total amount of memory */
#define VIRTIO_BALLOON_S_AVAIL 6 /* Available memory as in /proc */
#define VIRTIO_BALLOON_S_CACHES 7 /* Disk caches */
#define VIRTIO_BALLOON_S_NR 8
#define VIRTIO_BALLOON_S_HTLB_PGALLOC 8 /* Hugetlb page allocations */
#define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */
#define VIRTIO_BALLOON_S_NR 10
/*
* Memory statistics structure.

View file

@ -22,7 +22,6 @@
#ifdef NEED_CPU_H
# ifdef CONFIG_KVM
# include <linux/kvm.h>
# include <linux/kvm_para.h>
# define CONFIG_KVM_IS_POSSIBLE
# endif
#else