vhost, pc, virtio features, fixes, cleanups

New features:
     VT-d support for devices behind a bridge
     vhost-user migration support
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJWKMrnAAoJECgfDbjSjVRpVL0H/iRc31o00QE4nWBRpxUpf8WJ
 V5RWE8qKkDgBha5bS5Nt4vs8K4jkkHGXCbmygMidWph96hUPK8/yHy1A/wmpBibB
 5hVSPDK8onavNGJwpaWDrkhd9OhKAaKOuu49T6+VWJGZY/uX5ayqmcN934y0NPUa
 4EhH5tyxPpYOYeW9i/VOMQ374gCJcpzYBMug4NJZRyFpfz/b2mzAQtoqw3EsPtB0
 vpVJ+fKiCyG39HFKQJW7cL12yBeXOoyhjfDxpumLqwLWMfmde+vJwTFx6wbechgV
 aU3jIdvUX8wHCNYaB937NsMaDALoGNqUjbpKnf+xD1w7xr9pwTzdyrGH3rpGLEE=
 =+G1+
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

vhost, pc, virtio features, fixes, cleanups

New features:
    VT-d support for devices behind a bridge
    vhost-user migration support

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Thu 22 Oct 2015 12:39:19 BST using RSA key ID D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"

* remotes/mst/tags/for_upstream: (37 commits)
  hw/isa/lpc_ich9: inject the SMI on the VCPU that is writing to APM_CNT
  i386: keep cpu_model field in MachineState uptodate
  vhost: set the correct queue index in case of migration with multiqueue
  piix: fix resource leak reported by Coverity
  seccomp: add memfd_create to whitelist
  vhost-user-test: check ownership during migration
  vhost-user-test: add live-migration test
  vhost-user-test: learn to tweak various qemu arguments
  vhost-user-test: wrap server in TestServer struct
  vhost-user-test: remove useless static check
  vhost-user-test: move wait_for_fds() out
  vhost: add migration block if memfd failed
  vhost-user: use an enum helper for features mask
  vhost user: add rarp sending after live migration for legacy guest
  vhost user: add support of live migration
  net: add trace_vhost_user_event
  vhost-user: document migration log
  vhost: use a function for each call
  vhost-user: add a migration blocker
  vhost-user: send log shm fd along with log_base
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2015-10-22 12:41:44 +01:00
commit ca3e40e233
45 changed files with 5097 additions and 483 deletions

View file

@ -22,6 +22,7 @@
#include "hw/sysbus.h"
#include "exec/address-spaces.h"
#include "intel_iommu_internal.h"
#include "hw/pci/pci.h"
/*#define DEBUG_INTEL_IOMMU*/
#ifdef DEBUG_INTEL_IOMMU
@ -166,19 +167,17 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
*/
static void vtd_reset_context_cache(IntelIOMMUState *s)
{
VTDAddressSpace **pvtd_as;
VTDAddressSpace *vtd_as;
uint32_t bus_it;
VTDBus *vtd_bus;
GHashTableIter bus_it;
uint32_t devfn_it;
g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
VTD_DPRINTF(CACHE, "global context_cache_gen=1");
for (bus_it = 0; bus_it < VTD_PCI_BUS_MAX; ++bus_it) {
pvtd_as = s->address_spaces[bus_it];
if (!pvtd_as) {
continue;
}
while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
vtd_as = pvtd_as[devfn_it];
vtd_as = vtd_bus->dev_as[devfn_it];
if (!vtd_as) {
continue;
}
@ -754,12 +753,13 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
* @is_write: The access is a write operation
* @entry: IOMMUTLBEntry that contain the addr to be translated and result
*/
static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num,
static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
uint8_t devfn, hwaddr addr, bool is_write,
IOMMUTLBEntry *entry)
{
IntelIOMMUState *s = vtd_as->iommu_state;
VTDContextEntry ce;
uint8_t bus_num = pci_bus_num(bus);
VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
uint64_t slpte;
uint32_t level;
@ -874,6 +874,29 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
}
}
/* Find the VTD address space currently associated with a given bus number,
*/
static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
{
VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
if (!vtd_bus) {
/* Iterate over the registered buses to find the one
* which currently hold this bus number, and update the bus_num lookup table:
*/
GHashTableIter iter;
g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
while (g_hash_table_iter_next (&iter, NULL, (void**)&vtd_bus)) {
if (pci_bus_num(vtd_bus->bus) == bus_num) {
s->vtd_as_by_bus_num[bus_num] = vtd_bus;
return vtd_bus;
}
}
}
return vtd_bus;
}
/* Do a context-cache device-selective invalidation.
* @func_mask: FM field after shifting
*/
@ -882,7 +905,7 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
uint16_t func_mask)
{
uint16_t mask;
VTDAddressSpace **pvtd_as;
VTDBus *vtd_bus;
VTDAddressSpace *vtd_as;
uint16_t devfn;
uint16_t devfn_it;
@ -903,11 +926,11 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
}
VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16
" mask %"PRIu16, source_id, mask);
pvtd_as = s->address_spaces[VTD_SID_TO_BUS(source_id)];
if (pvtd_as) {
vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
if (vtd_bus) {
devfn = VTD_SID_TO_DEVFN(source_id);
for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
vtd_as = pvtd_as[devfn_it];
vtd_as = vtd_bus->dev_as[devfn_it];
if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16,
devfn_it);
@ -1805,11 +1828,11 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
return ret;
}
vtd_do_iommu_translate(vtd_as, vtd_as->bus_num, vtd_as->devfn, addr,
vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr,
is_write, &ret);
VTD_DPRINTF(MMU,
"bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
" gpa 0x%"PRIx64 " hpa 0x%"PRIx64, vtd_as->bus_num,
" gpa 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn),
vtd_as->devfn, addr, ret.translated_addr);
return ret;
@ -1839,6 +1862,38 @@ static Property vtd_properties[] = {
DEFINE_PROP_END_OF_LIST(),
};
VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
{
uintptr_t key = (uintptr_t)bus;
VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
VTDAddressSpace *vtd_dev_as;
if (!vtd_bus) {
/* No corresponding free() */
vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX);
vtd_bus->bus = bus;
key = (uintptr_t)bus;
g_hash_table_insert(s->vtd_as_by_busptr, &key, vtd_bus);
}
vtd_dev_as = vtd_bus->dev_as[devfn];
if (!vtd_dev_as) {
vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
vtd_dev_as->bus = bus;
vtd_dev_as->devfn = (uint8_t)devfn;
vtd_dev_as->iommu_state = s;
vtd_dev_as->context_cache_entry.context_cache_gen = 0;
memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s),
&s->iommu_ops, "intel_iommu", UINT64_MAX);
address_space_init(&vtd_dev_as->as,
&vtd_dev_as->iommu, "intel_iommu");
}
return vtd_dev_as;
}
/* Do the initialization. It will also be called when reset, so pay
* attention when adding new initialization stuff.
*/
@ -1931,13 +1986,15 @@ static void vtd_realize(DeviceState *dev, Error **errp)
IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
VTD_DPRINTF(GENERAL, "");
memset(s->address_spaces, 0, sizeof(s->address_spaces));
memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
"intel_iommu", DMAR_REG_SIZE);
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem);
/* No corresponding destroy */
s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
g_free, g_free);
s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
g_free, g_free);
vtd_init(s);
}

View file

@ -1078,11 +1078,10 @@ out:
return cpu;
}
static const char *current_cpu_model;
void pc_hot_add_cpu(const int64_t id, Error **errp)
{
X86CPU *cpu;
MachineState *machine = MACHINE(qdev_get_machine());
int64_t apic_id = x86_cpu_apic_id_from_index(id);
Error *local_err = NULL;
@ -1110,7 +1109,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp)
return;
}
cpu = pc_new_cpu(current_cpu_model, apic_id, &local_err);
cpu = pc_new_cpu(machine->cpu_model, apic_id, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
@ -1118,22 +1117,22 @@ void pc_hot_add_cpu(const int64_t id, Error **errp)
object_unref(OBJECT(cpu));
}
void pc_cpus_init(const char *cpu_model)
void pc_cpus_init(PCMachineState *pcms)
{
int i;
X86CPU *cpu = NULL;
MachineState *machine = MACHINE(pcms);
Error *error = NULL;
unsigned long apic_id_limit;
/* init CPUs */
if (cpu_model == NULL) {
if (machine->cpu_model == NULL) {
#ifdef TARGET_X86_64
cpu_model = "qemu64";
machine->cpu_model = "qemu64";
#else
cpu_model = "qemu32";
machine->cpu_model = "qemu32";
#endif
}
current_cpu_model = cpu_model;
apic_id_limit = pc_apic_id_limit(max_cpus);
if (apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) {
@ -1143,7 +1142,7 @@ void pc_cpus_init(const char *cpu_model)
}
for (i = 0; i < smp_cpus; i++) {
cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i),
cpu = pc_new_cpu(machine->cpu_model, x86_cpu_apic_id_from_index(i),
&error);
if (error) {
error_report_err(error);

View file

@ -139,7 +139,7 @@ static void pc_init1(MachineState *machine,
exit(1);
}
pc_cpus_init(machine->cpu_model);
pc_cpus_init(pcms);
if (kvm_enabled() && kvmclock_enabled) {
kvmclock_create();

View file

@ -128,7 +128,7 @@ static void pc_q35_init(MachineState *machine)
exit(1);
}
pc_cpus_init(machine->cpu_model);
pc_cpus_init(pcms);
pc_acpi_init("q35-acpi-dsdt.aml");
kvmclock_create();

View file

@ -394,7 +394,7 @@ static void ich9_apm_ctrl_changed(uint32_t val, void *arg)
/* SMI_EN = PMBASE + 30. SMI control and enable register */
if (lpc->pm.smi_en & ICH9_PMIO_SMI_EN_APMC_EN) {
cpu_interrupt(first_cpu, CPU_INTERRUPT_SMI);
cpu_interrupt(current_cpu, CPU_INTERRUPT_SMI);
}
}

View file

@ -25,6 +25,7 @@
#include "sysemu/numa.h"
#include "sysemu/kvm.h"
#include "trace.h"
#include "hw/virtio/vhost.h"
typedef struct pc_dimms_capacity {
uint64_t size;
@ -96,6 +97,12 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
goto out;
}
if (!vhost_has_free_slot()) {
error_setg(&local_err, "a used vhost backend has no free"
" memory slots left");
goto out;
}
memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr);
vmstate_register_ram(mr, dev);
numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node);

View file

@ -85,6 +85,8 @@ static const int user_feature_bits[] = {
VIRTIO_NET_F_CTRL_MAC_ADDR,
VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
VIRTIO_NET_F_GUEST_ANNOUNCE,
VIRTIO_NET_F_MQ,
VHOST_INVALID_FEATURE_BIT
@ -252,8 +254,7 @@ static int vhost_net_start_one(struct vhost_net *net,
file.fd = net->backend;
for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
const VhostOps *vhost_ops = net->dev.vhost_ops;
r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND,
&file);
r = vhost_ops->vhost_net_set_backend(&net->dev, &file);
if (r < 0) {
r = -errno;
goto fail;
@ -266,8 +267,7 @@ fail:
if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
while (file.index-- > 0) {
const VhostOps *vhost_ops = net->dev.vhost_ops;
int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND,
&file);
int r = vhost_ops->vhost_net_set_backend(&net->dev, &file);
assert(r >= 0);
}
}
@ -289,15 +289,13 @@ static void vhost_net_stop_one(struct vhost_net *net,
if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
const VhostOps *vhost_ops = net->dev.vhost_ops;
int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND,
&file);
int r = vhost_ops->vhost_net_set_backend(&net->dev, &file);
assert(r >= 0);
}
} else if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) {
for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
const VhostOps *vhost_ops = net->dev.vhost_ops;
int r = vhost_ops->vhost_call(&net->dev, VHOST_RESET_DEVICE,
NULL);
int r = vhost_ops->vhost_reset_device(&net->dev);
assert(r >= 0);
}
}
@ -390,6 +388,18 @@ void vhost_net_cleanup(struct vhost_net *net)
g_free(net);
}
int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
{
const VhostOps *vhost_ops = net->dev.vhost_ops;
int r = -1;
if (vhost_ops->vhost_migration_done) {
r = vhost_ops->vhost_migration_done(&net->dev, mac_addr);
}
return r;
}
bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
{
return vhost_virtqueue_pending(&net->dev, idx);
@ -428,8 +438,8 @@ int vhost_set_vring_enable(NetClientState *nc, int enable)
VHostNetState *net = get_vhost_net(nc);
const VhostOps *vhost_ops = net->dev.vhost_ops;
if (vhost_ops->vhost_backend_set_vring_enable) {
return vhost_ops->vhost_backend_set_vring_enable(&net->dev, enable);
if (vhost_ops->vhost_set_vring_enable) {
return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
}
return 0;
@ -481,6 +491,11 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
{
}
int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
{
return -1;
}
VHostNetState *get_vhost_net(NetClientState *nc)
{
return 0;

View file

@ -764,6 +764,7 @@ static int host_pci_config_read(int pos, int len, uint32_t val)
/* Access real host bridge. */
int rc = snprintf(path, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%d/%s",
0, 0, 0, 0, "config");
int ret = 0;
if (rc >= size || rc < 0) {
return -ENODEV;
@ -775,16 +776,18 @@ static int host_pci_config_read(int pos, int len, uint32_t val)
}
if (lseek(config_fd, pos, SEEK_SET) != pos) {
return -errno;
ret = -errno;
goto out;
}
do {
rc = read(config_fd, (uint8_t *)&val, len);
} while (rc < 0 && (errno == EINTR || errno == EAGAIN));
if (rc != len) {
return -errno;
ret = -errno;
}
return 0;
out:
close(config_fd);
return ret;
}
static int igd_pt_i440fx_initfn(struct PCIDevice *pci_dev)

View file

@ -426,31 +426,12 @@ static void mch_reset(DeviceState *qdev)
static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
{
IntelIOMMUState *s = opaque;
VTDAddressSpace **pvtd_as;
int bus_num = pci_bus_num(bus);
VTDAddressSpace *vtd_as;
assert(0 <= bus_num && bus_num <= VTD_PCI_BUS_MAX);
assert(0 <= devfn && devfn <= VTD_PCI_DEVFN_MAX);
pvtd_as = s->address_spaces[bus_num];
if (!pvtd_as) {
/* No corresponding free() */
pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX);
s->address_spaces[bus_num] = pvtd_as;
}
if (!pvtd_as[devfn]) {
pvtd_as[devfn] = g_malloc0(sizeof(VTDAddressSpace));
pvtd_as[devfn]->bus_num = (uint8_t)bus_num;
pvtd_as[devfn]->devfn = (uint8_t)devfn;
pvtd_as[devfn]->iommu_state = s;
pvtd_as[devfn]->context_cache_entry.context_cache_gen = 0;
memory_region_init_iommu(&pvtd_as[devfn]->iommu, OBJECT(s),
&s->iommu_ops, "intel_iommu", UINT64_MAX);
address_space_init(&pvtd_as[devfn]->as,
&pvtd_as[devfn]->iommu, "intel_iommu");
}
return &pvtd_as[devfn]->as;
vtd_as = vtd_find_add_as(s, bus, devfn);
return &vtd_as->as;
}
static void mch_init_dmar(MCHPCIState *mch)

View file

@ -46,7 +46,7 @@ static int vhost_scsi_set_endpoint(VHostSCSI *s)
memset(&backend, 0, sizeof(backend));
pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn);
ret = vhost_ops->vhost_call(&s->dev, VHOST_SCSI_SET_ENDPOINT, &backend);
ret = vhost_ops->vhost_scsi_set_endpoint(&s->dev, &backend);
if (ret < 0) {
return -errno;
}
@ -61,7 +61,7 @@ static void vhost_scsi_clear_endpoint(VHostSCSI *s)
memset(&backend, 0, sizeof(backend));
pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn);
vhost_ops->vhost_call(&s->dev, VHOST_SCSI_CLEAR_ENDPOINT, &backend);
vhost_ops->vhost_scsi_clear_endpoint(&s->dev, &backend);
}
static int vhost_scsi_start(VHostSCSI *s)
@ -77,8 +77,7 @@ static int vhost_scsi_start(VHostSCSI *s)
return -ENOSYS;
}
ret = vhost_ops->vhost_call(&s->dev,
VHOST_SCSI_GET_ABI_VERSION, &abi_version);
ret = vhost_ops->vhost_scsi_get_abi_version(&s->dev, &abi_version);
if (ret < 0) {
return -errno;
}

View file

@ -11,6 +11,7 @@
#include "hw/virtio/vhost.h"
#include "hw/virtio/vhost-backend.h"
#include "qemu/error-report.h"
#include "linux/vhost.h"
#include <sys/ioctl.h>
@ -42,6 +43,122 @@ static int vhost_kernel_cleanup(struct vhost_dev *dev)
return close(fd);
}
static int vhost_kernel_memslots_limit(struct vhost_dev *dev)
{
int limit = 64;
char *s;
if (g_file_get_contents("/sys/module/vhost/parameters/max_mem_regions",
&s, NULL, NULL)) {
uint64_t val = g_ascii_strtoull(s, NULL, 10);
if (!((val == G_MAXUINT64 || !val) && errno)) {
return val;
}
error_report("ignoring invalid max_mem_regions value in vhost module:"
" %s", s);
}
return limit;
}
static int vhost_kernel_net_set_backend(struct vhost_dev *dev,
struct vhost_vring_file *file)
{
return vhost_kernel_call(dev, VHOST_NET_SET_BACKEND, file);
}
static int vhost_kernel_scsi_set_endpoint(struct vhost_dev *dev,
struct vhost_scsi_target *target)
{
return vhost_kernel_call(dev, VHOST_SCSI_SET_ENDPOINT, target);
}
static int vhost_kernel_scsi_clear_endpoint(struct vhost_dev *dev,
struct vhost_scsi_target *target)
{
return vhost_kernel_call(dev, VHOST_SCSI_CLEAR_ENDPOINT, target);
}
static int vhost_kernel_scsi_get_abi_version(struct vhost_dev *dev, int *version)
{
return vhost_kernel_call(dev, VHOST_SCSI_GET_ABI_VERSION, version);
}
static int vhost_kernel_set_log_base(struct vhost_dev *dev, uint64_t base,
struct vhost_log *log)
{
return vhost_kernel_call(dev, VHOST_SET_LOG_BASE, &base);
}
static int vhost_kernel_set_mem_table(struct vhost_dev *dev,
struct vhost_memory *mem)
{
return vhost_kernel_call(dev, VHOST_SET_MEM_TABLE, mem);
}
static int vhost_kernel_set_vring_addr(struct vhost_dev *dev,
struct vhost_vring_addr *addr)
{
return vhost_kernel_call(dev, VHOST_SET_VRING_ADDR, addr);
}
static int vhost_kernel_set_vring_endian(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
return vhost_kernel_call(dev, VHOST_SET_VRING_ENDIAN, ring);
}
static int vhost_kernel_set_vring_num(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
return vhost_kernel_call(dev, VHOST_SET_VRING_NUM, ring);
}
static int vhost_kernel_set_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
return vhost_kernel_call(dev, VHOST_SET_VRING_BASE, ring);
}
static int vhost_kernel_get_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
return vhost_kernel_call(dev, VHOST_GET_VRING_BASE, ring);
}
static int vhost_kernel_set_vring_kick(struct vhost_dev *dev,
struct vhost_vring_file *file)
{
return vhost_kernel_call(dev, VHOST_SET_VRING_KICK, file);
}
static int vhost_kernel_set_vring_call(struct vhost_dev *dev,
struct vhost_vring_file *file)
{
return vhost_kernel_call(dev, VHOST_SET_VRING_CALL, file);
}
static int vhost_kernel_set_features(struct vhost_dev *dev,
uint64_t features)
{
return vhost_kernel_call(dev, VHOST_SET_FEATURES, &features);
}
static int vhost_kernel_get_features(struct vhost_dev *dev,
uint64_t *features)
{
return vhost_kernel_call(dev, VHOST_GET_FEATURES, features);
}
static int vhost_kernel_set_owner(struct vhost_dev *dev)
{
return vhost_kernel_call(dev, VHOST_SET_OWNER, NULL);
}
static int vhost_kernel_reset_device(struct vhost_dev *dev)
{
return vhost_kernel_call(dev, VHOST_RESET_DEVICE, NULL);
}
static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx)
{
assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
@ -51,10 +168,27 @@ static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx)
static const VhostOps kernel_ops = {
.backend_type = VHOST_BACKEND_TYPE_KERNEL,
.vhost_call = vhost_kernel_call,
.vhost_backend_init = vhost_kernel_init,
.vhost_backend_cleanup = vhost_kernel_cleanup,
.vhost_backend_get_vq_index = vhost_kernel_get_vq_index,
.vhost_backend_memslots_limit = vhost_kernel_memslots_limit,
.vhost_net_set_backend = vhost_kernel_net_set_backend,
.vhost_scsi_set_endpoint = vhost_kernel_scsi_set_endpoint,
.vhost_scsi_clear_endpoint = vhost_kernel_scsi_clear_endpoint,
.vhost_scsi_get_abi_version = vhost_kernel_scsi_get_abi_version,
.vhost_set_log_base = vhost_kernel_set_log_base,
.vhost_set_mem_table = vhost_kernel_set_mem_table,
.vhost_set_vring_addr = vhost_kernel_set_vring_addr,
.vhost_set_vring_endian = vhost_kernel_set_vring_endian,
.vhost_set_vring_num = vhost_kernel_set_vring_num,
.vhost_set_vring_base = vhost_kernel_set_vring_base,
.vhost_get_vring_base = vhost_kernel_get_vring_base,
.vhost_set_vring_kick = vhost_kernel_set_vring_kick,
.vhost_set_vring_call = vhost_kernel_set_vring_call,
.vhost_set_features = vhost_kernel_set_features,
.vhost_get_features = vhost_kernel_get_features,
.vhost_set_owner = vhost_kernel_set_owner,
.vhost_reset_device = vhost_kernel_reset_device,
.vhost_get_vq_index = vhost_kernel_get_vq_index,
};
int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)

View file

@ -10,11 +10,13 @@
#include "hw/virtio/vhost.h"
#include "hw/virtio/vhost-backend.h"
#include "hw/virtio/virtio-net.h"
#include "sysemu/char.h"
#include "sysemu/kvm.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "exec/ram_addr.h"
#include "migration/migration.h"
#include <fcntl.h>
#include <unistd.h>
@ -25,9 +27,16 @@
#define VHOST_MEMORY_MAX_NREGIONS 8
#define VHOST_USER_F_PROTOCOL_FEATURES 30
#define VHOST_USER_PROTOCOL_FEATURE_MASK 0x1ULL
#define VHOST_USER_PROTOCOL_F_MQ 0
enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_MQ = 0,
VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
VHOST_USER_PROTOCOL_F_RARP = 2,
VHOST_USER_PROTOCOL_F_MAX
};
#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
typedef enum VhostUserRequest {
VHOST_USER_NONE = 0,
@ -49,6 +58,7 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_PROTOCOL_FEATURES = 16,
VHOST_USER_GET_QUEUE_NUM = 17,
VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_SEND_RARP = 19,
VHOST_USER_MAX
} VhostUserRequest;
@ -97,37 +107,6 @@ static bool ioeventfd_enabled(void)
return kvm_enabled() && kvm_eventfds_enabled();
}
static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
-1, /* VHOST_USER_NONE */
VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */
VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */
VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */
VHOST_RESET_DEVICE, /* VHOST_USER_RESET_DEVICE */
VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */
VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */
VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */
VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */
VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */
VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */
VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */
VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */
VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */
VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */
};
static VhostUserRequest vhost_user_request_translate(unsigned long int request)
{
VhostUserRequest idx;
for (idx = 0; idx < VHOST_USER_MAX; idx++) {
if (ioctl_to_vhost_user_request[idx] == request) {
break;
}
}
return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx;
}
static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
{
CharDriverState *chr = dev->opaque;
@ -174,20 +153,6 @@ fail:
return -1;
}
static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
int *fds, int fd_num)
{
CharDriverState *chr = dev->opaque;
int size = VHOST_USER_HDR_SIZE + msg->size;
if (fd_num) {
qemu_chr_fe_set_msgfds(chr, fds, fd_num);
}
return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ?
0 : -1;
}
static bool vhost_user_one_time_request(VhostUserRequest request)
{
switch (request) {
@ -201,213 +166,164 @@ static bool vhost_user_one_time_request(VhostUserRequest request)
}
}
static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
void *arg)
/* most non-init callers ignore the error */
static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
int *fds, int fd_num)
{
VhostUserMsg msg;
VhostUserRequest msg_request;
struct vhost_vring_file *file = 0;
int need_reply = 0;
int fds[VHOST_MEMORY_MAX_NREGIONS];
int i, fd;
size_t fd_num = 0;
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
/* only translate vhost ioctl requests */
if (request > VHOST_USER_MAX) {
msg_request = vhost_user_request_translate(request);
} else {
msg_request = request;
}
CharDriverState *chr = dev->opaque;
int size = VHOST_USER_HDR_SIZE + msg->size;
/*
* For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
* we just need send it once in the first time. For later such
* request, we just ignore it.
*/
if (vhost_user_one_time_request(msg_request) && dev->vq_index != 0) {
if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) {
return 0;
}
msg.request = msg_request;
msg.flags = VHOST_USER_VERSION;
msg.size = 0;
switch (msg_request) {
case VHOST_USER_GET_FEATURES:
case VHOST_USER_GET_PROTOCOL_FEATURES:
case VHOST_USER_GET_QUEUE_NUM:
need_reply = 1;
break;
case VHOST_USER_SET_FEATURES:
case VHOST_USER_SET_LOG_BASE:
case VHOST_USER_SET_PROTOCOL_FEATURES:
msg.u64 = *((__u64 *) arg);
msg.size = sizeof(m.u64);
break;
case VHOST_USER_SET_OWNER:
case VHOST_USER_RESET_DEVICE:
break;
case VHOST_USER_SET_MEM_TABLE:
for (i = 0; i < dev->mem->nregions; ++i) {
struct vhost_memory_region *reg = dev->mem->regions + i;
ram_addr_t ram_addr;
assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr);
fd = qemu_get_ram_fd(ram_addr);
if (fd > 0) {
msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
msg.memory.regions[fd_num].memory_size = reg->memory_size;
msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
(uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
fds[fd_num++] = fd;
}
}
msg.memory.nregions = fd_num;
if (!fd_num) {
error_report("Failed initializing vhost-user memory map, "
"consider using -object memory-backend-file share=on");
return -1;
}
msg.size = sizeof(m.memory.nregions);
msg.size += sizeof(m.memory.padding);
msg.size += fd_num * sizeof(VhostUserMemoryRegion);
break;
case VHOST_USER_SET_LOG_FD:
fds[fd_num++] = *((int *) arg);
break;
case VHOST_USER_SET_VRING_NUM:
case VHOST_USER_SET_VRING_BASE:
case VHOST_USER_SET_VRING_ENABLE:
memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
msg.size = sizeof(m.state);
break;
case VHOST_USER_GET_VRING_BASE:
memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
msg.size = sizeof(m.state);
need_reply = 1;
break;
case VHOST_USER_SET_VRING_ADDR:
memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));
msg.size = sizeof(m.addr);
break;
case VHOST_USER_SET_VRING_KICK:
case VHOST_USER_SET_VRING_CALL:
case VHOST_USER_SET_VRING_ERR:
file = arg;
msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
msg.size = sizeof(m.u64);
if (ioeventfd_enabled() && file->fd > 0) {
fds[fd_num++] = file->fd;
} else {
msg.u64 |= VHOST_USER_VRING_NOFD_MASK;
}
break;
default:
error_report("vhost-user trying to send unhandled ioctl");
return -1;
break;
if (fd_num) {
qemu_chr_fe_set_msgfds(chr, fds, fd_num);
}
if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
return 0;
return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ?
0 : -1;
}
static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
struct vhost_log *log)
{
int fds[VHOST_MEMORY_MAX_NREGIONS];
size_t fd_num = 0;
bool shmfd = virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_LOG_SHMFD);
VhostUserMsg msg = {
.request = VHOST_USER_SET_LOG_BASE,
.flags = VHOST_USER_VERSION,
.u64 = base,
.size = sizeof(m.u64),
};
if (shmfd && log->fd != -1) {
fds[fd_num++] = log->fd;
}
if (need_reply) {
vhost_user_write(dev, &msg, fds, fd_num);
if (shmfd) {
msg.size = 0;
if (vhost_user_read(dev, &msg) < 0) {
return 0;
}
if (msg_request != msg.request) {
error_report("Received unexpected msg type."
" Expected %d received %d", msg_request, msg.request);
if (msg.request != VHOST_USER_SET_LOG_BASE) {
error_report("Received unexpected msg type. "
"Expected %d received %d",
VHOST_USER_SET_LOG_BASE, msg.request);
return -1;
}
switch (msg_request) {
case VHOST_USER_GET_FEATURES:
case VHOST_USER_GET_PROTOCOL_FEATURES:
case VHOST_USER_GET_QUEUE_NUM:
if (msg.size != sizeof(m.u64)) {
error_report("Received bad msg size.");
return -1;
}
*((__u64 *) arg) = msg.u64;
break;
case VHOST_USER_GET_VRING_BASE:
if (msg.size != sizeof(m.state)) {
error_report("Received bad msg size.");
return -1;
}
memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
break;
default:
error_report("Received unexpected msg type.");
return -1;
break;
}
}
return 0;
}
static int vhost_user_init(struct vhost_dev *dev, void *opaque)
static int vhost_user_set_mem_table(struct vhost_dev *dev,
struct vhost_memory *mem)
{
unsigned long long features;
int err;
int fds[VHOST_MEMORY_MAX_NREGIONS];
int i, fd;
size_t fd_num = 0;
VhostUserMsg msg = {
.request = VHOST_USER_SET_MEM_TABLE,
.flags = VHOST_USER_VERSION,
};
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
for (i = 0; i < dev->mem->nregions; ++i) {
struct vhost_memory_region *reg = dev->mem->regions + i;
ram_addr_t ram_addr;
dev->opaque = opaque;
err = vhost_user_call(dev, VHOST_USER_GET_FEATURES, &features);
if (err < 0) {
return err;
}
if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
err = vhost_user_call(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &features);
if (err < 0) {
return err;
}
dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
err = vhost_user_call(dev, VHOST_USER_SET_PROTOCOL_FEATURES,
&dev->protocol_features);
if (err < 0) {
return err;
}
/* query the max queues we support if backend supports Multiple Queue */
if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
err = vhost_user_call(dev, VHOST_USER_GET_QUEUE_NUM, &dev->max_queues);
if (err < 0) {
return err;
}
assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr,
&ram_addr);
fd = qemu_get_ram_fd(ram_addr);
if (fd > 0) {
msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
msg.memory.regions[fd_num].memory_size = reg->memory_size;
msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
(uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
fds[fd_num++] = fd;
}
}
msg.memory.nregions = fd_num;
if (!fd_num) {
error_report("Failed initializing vhost-user memory map, "
"consider using -object memory-backend-file share=on");
return -1;
}
msg.size = sizeof(m.memory.nregions);
msg.size += sizeof(m.memory.padding);
msg.size += fd_num * sizeof(VhostUserMemoryRegion);
vhost_user_write(dev, &msg, fds, fd_num);
return 0;
}
static int vhost_user_set_vring_addr(struct vhost_dev *dev,
struct vhost_vring_addr *addr)
{
VhostUserMsg msg = {
.request = VHOST_USER_SET_VRING_ADDR,
.flags = VHOST_USER_VERSION,
.addr = *addr,
.size = sizeof(*addr),
};
vhost_user_write(dev, &msg, NULL, 0);
return 0;
}
static int vhost_user_set_vring_endian(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
error_report("vhost-user trying to send unhandled ioctl");
return -1;
}
static int vhost_set_vring(struct vhost_dev *dev,
unsigned long int request,
struct vhost_vring_state *ring)
{
VhostUserMsg msg = {
.request = request,
.flags = VHOST_USER_VERSION,
.state = *ring,
.size = sizeof(*ring),
};
vhost_user_write(dev, &msg, NULL, 0);
return 0;
}
static int vhost_user_set_vring_num(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
}
static int vhost_user_set_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
}
static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
{
struct vhost_vring_state state = {
@ -415,13 +331,218 @@ static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
.num = enable,
};
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) {
return -1;
}
return vhost_user_call(dev, VHOST_USER_SET_VRING_ENABLE, &state);
return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
}
static int vhost_user_get_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
VhostUserMsg msg = {
.request = VHOST_USER_GET_VRING_BASE,
.flags = VHOST_USER_VERSION,
.state = *ring,
.size = sizeof(*ring),
};
vhost_user_write(dev, &msg, NULL, 0);
if (vhost_user_read(dev, &msg) < 0) {
return 0;
}
if (msg.request != VHOST_USER_GET_VRING_BASE) {
error_report("Received unexpected msg type. Expected %d received %d",
VHOST_USER_GET_VRING_BASE, msg.request);
return -1;
}
if (msg.size != sizeof(m.state)) {
error_report("Received bad msg size.");
return -1;
}
*ring = msg.state;
return 0;
}
static int vhost_set_vring_file(struct vhost_dev *dev,
VhostUserRequest request,
struct vhost_vring_file *file)
{
int fds[VHOST_MEMORY_MAX_NREGIONS];
size_t fd_num = 0;
VhostUserMsg msg = {
.request = request,
.flags = VHOST_USER_VERSION,
.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
.size = sizeof(m.u64),
};
if (ioeventfd_enabled() && file->fd > 0) {
fds[fd_num++] = file->fd;
} else {
msg.u64 |= VHOST_USER_VRING_NOFD_MASK;
}
vhost_user_write(dev, &msg, fds, fd_num);
return 0;
}
static int vhost_user_set_vring_kick(struct vhost_dev *dev,
struct vhost_vring_file *file)
{
return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
}
static int vhost_user_set_vring_call(struct vhost_dev *dev,
struct vhost_vring_file *file)
{
return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
}
static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
{
VhostUserMsg msg = {
.request = request,
.flags = VHOST_USER_VERSION,
.u64 = u64,
.size = sizeof(m.u64),
};
vhost_user_write(dev, &msg, NULL, 0);
return 0;
}
static int vhost_user_set_features(struct vhost_dev *dev,
uint64_t features)
{
return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
}
static int vhost_user_set_protocol_features(struct vhost_dev *dev,
uint64_t features)
{
return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
}
static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
{
VhostUserMsg msg = {
.request = request,
.flags = VHOST_USER_VERSION,
};
if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
return 0;
}
vhost_user_write(dev, &msg, NULL, 0);
if (vhost_user_read(dev, &msg) < 0) {
return 0;
}
if (msg.request != request) {
error_report("Received unexpected msg type. Expected %d received %d",
request, msg.request);
return -1;
}
if (msg.size != sizeof(m.u64)) {
error_report("Received bad msg size.");
return -1;
}
*u64 = msg.u64;
return 0;
}
static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
{
return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
}
static int vhost_user_set_owner(struct vhost_dev *dev)
{
VhostUserMsg msg = {
.request = VHOST_USER_SET_OWNER,
.flags = VHOST_USER_VERSION,
};
vhost_user_write(dev, &msg, NULL, 0);
return 0;
}
static int vhost_user_reset_device(struct vhost_dev *dev)
{
VhostUserMsg msg = {
.request = VHOST_USER_RESET_DEVICE,
.flags = VHOST_USER_VERSION,
};
vhost_user_write(dev, &msg, NULL, 0);
return 0;
}
static int vhost_user_init(struct vhost_dev *dev, void *opaque)
{
uint64_t features;
int err;
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
dev->opaque = opaque;
err = vhost_user_get_features(dev, &features);
if (err < 0) {
return err;
}
if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
&features);
if (err < 0) {
return err;
}
dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
err = vhost_user_set_protocol_features(dev, dev->protocol_features);
if (err < 0) {
return err;
}
/* query the max queues we support if backend supports Multiple Queue */
if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
&dev->max_queues);
if (err < 0) {
return err;
}
}
}
if (dev->migration_blocker == NULL &&
!virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
error_setg(&dev->migration_blocker,
"Migration disabled: vhost-user backend lacks "
"VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
}
return 0;
}
static int vhost_user_cleanup(struct vhost_dev *dev)
@ -440,11 +561,65 @@ static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
return idx;
}
static int vhost_user_memslots_limit(struct vhost_dev *dev)
{
return VHOST_MEMORY_MAX_NREGIONS;
}
static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
{
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
return virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_LOG_SHMFD);
}
static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
{
VhostUserMsg msg = { 0 };
int err;
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
/* If guest supports GUEST_ANNOUNCE do nothing */
if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
return 0;
}
/* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
if (virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_RARP)) {
msg.request = VHOST_USER_SEND_RARP;
msg.flags = VHOST_USER_VERSION;
memcpy((char *)&msg.u64, mac_addr, 6);
msg.size = sizeof(m.u64);
err = vhost_user_write(dev, &msg, NULL, 0);
return err;
}
return -1;
}
const VhostOps user_ops = {
.backend_type = VHOST_BACKEND_TYPE_USER,
.vhost_call = vhost_user_call,
.vhost_backend_init = vhost_user_init,
.vhost_backend_cleanup = vhost_user_cleanup,
.vhost_backend_get_vq_index = vhost_user_get_vq_index,
.vhost_backend_set_vring_enable = vhost_user_set_vring_enable,
.vhost_backend_memslots_limit = vhost_user_memslots_limit,
.vhost_set_log_base = vhost_user_set_log_base,
.vhost_set_mem_table = vhost_user_set_mem_table,
.vhost_set_vring_addr = vhost_user_set_vring_addr,
.vhost_set_vring_endian = vhost_user_set_vring_endian,
.vhost_set_vring_num = vhost_user_set_vring_num,
.vhost_set_vring_base = vhost_user_set_vring_base,
.vhost_get_vring_base = vhost_user_get_vring_base,
.vhost_set_vring_kick = vhost_user_set_vring_kick,
.vhost_set_vring_call = vhost_user_set_vring_call,
.vhost_set_features = vhost_user_set_features,
.vhost_get_features = vhost_user_get_features,
.vhost_set_owner = vhost_user_set_owner,
.vhost_reset_device = vhost_user_reset_device,
.vhost_get_vq_index = vhost_user_get_vq_index,
.vhost_set_vring_enable = vhost_user_set_vring_enable,
.vhost_requires_shm_log = vhost_user_requires_shm_log,
.vhost_migration_done = vhost_user_migration_done,
};

View file

@ -18,6 +18,7 @@
#include "qemu/atomic.h"
#include "qemu/range.h"
#include "qemu/error-report.h"
#include "qemu/memfd.h"
#include <linux/vhost.h>
#include "exec/address-spaces.h"
#include "hw/virtio/virtio-bus.h"
@ -25,6 +26,23 @@
#include "migration/migration.h"
static struct vhost_log *vhost_log;
static struct vhost_log *vhost_log_shm;
static unsigned int used_memslots;
static QLIST_HEAD(, vhost_dev) vhost_devices =
QLIST_HEAD_INITIALIZER(vhost_devices);
bool vhost_has_free_slot(void)
{
unsigned int slots_limit = ~0U;
struct vhost_dev *hdev;
QLIST_FOREACH(hdev, &vhost_devices, entry) {
unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
slots_limit = MIN(slots_limit, r);
}
return slots_limit > used_memslots;
}
static void vhost_dev_sync_region(struct vhost_dev *dev,
MemoryRegionSection *section,
@ -286,25 +304,46 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev)
}
return log_size;
}
static struct vhost_log *vhost_log_alloc(uint64_t size)
static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
{
struct vhost_log *log = g_malloc0(sizeof *log + size * sizeof(*(log->log)));
struct vhost_log *log;
uint64_t logsize = size * sizeof(*(log->log));
int fd = -1;
log = g_new0(struct vhost_log, 1);
if (share) {
log->log = qemu_memfd_alloc("vhost-log", logsize,
F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
&fd);
memset(log->log, 0, logsize);
} else {
log->log = g_malloc0(logsize);
}
log->size = size;
log->refcnt = 1;
log->fd = fd;
return log;
}
static struct vhost_log *vhost_log_get(uint64_t size)
static struct vhost_log *vhost_log_get(uint64_t size, bool share)
{
if (!vhost_log || vhost_log->size != size) {
vhost_log = vhost_log_alloc(size);
struct vhost_log *log = share ? vhost_log_shm : vhost_log;
if (!log || log->size != size) {
log = vhost_log_alloc(size, share);
if (share) {
vhost_log_shm = log;
} else {
vhost_log = log;
}
} else {
++vhost_log->refcnt;
++log->refcnt;
}
return vhost_log;
return log;
}
static void vhost_log_put(struct vhost_dev *dev, bool sync)
@ -321,20 +360,35 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync)
if (dev->log_size && sync) {
vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
}
if (vhost_log == log) {
g_free(log->log);
vhost_log = NULL;
} else if (vhost_log_shm == log) {
qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
log->fd);
vhost_log_shm = NULL;
}
g_free(log);
}
}
static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
static bool vhost_dev_log_is_shared(struct vhost_dev *dev)
{
struct vhost_log *log = vhost_log_get(size);
return dev->vhost_ops->vhost_requires_shm_log &&
dev->vhost_ops->vhost_requires_shm_log(dev);
}
static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
{
struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev));
uint64_t log_base = (uintptr_t)log->log;
int r;
r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base);
/* inform backend of log switching, this must be done before
releasing the current log, to ensure no logging is lost */
r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log);
assert(r >= 0);
vhost_log_put(dev, true);
dev->log = log;
@ -457,6 +511,7 @@ static void vhost_set_memory(MemoryListener *listener,
dev->mem_changed_start_addr = MIN(dev->mem_changed_start_addr, start_addr);
dev->mem_changed_end_addr = MAX(dev->mem_changed_end_addr, start_addr + size - 1);
dev->memory_changed = true;
used_memslots = dev->mem->nregions;
}
static bool vhost_section(MemoryRegionSection *section)
@ -500,7 +555,7 @@ static void vhost_commit(MemoryListener *listener)
}
if (!dev->log_enabled) {
r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem);
r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
assert(r >= 0);
dev->memory_changed = false;
return;
@ -513,7 +568,7 @@ static void vhost_commit(MemoryListener *listener)
if (dev->log_size < log_size) {
vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
}
r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem);
r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
assert(r >= 0);
/* To log less, can only decrease log size after table update. */
if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
@ -581,7 +636,7 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
.log_guest_addr = vq->used_phys,
.flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
};
int r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ADDR, &addr);
int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
if (r < 0) {
return -errno;
}
@ -595,19 +650,20 @@ static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log)
if (enable_log) {
features |= 0x1ULL << VHOST_F_LOG_ALL;
}
r = dev->vhost_ops->vhost_call(dev, VHOST_SET_FEATURES, &features);
r = dev->vhost_ops->vhost_set_features(dev, features);
return r < 0 ? -errno : 0;
}
static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
{
int r, t, i;
int r, t, i, idx;
r = vhost_dev_set_features(dev, enable_log);
if (r < 0) {
goto err_features;
}
for (i = 0; i < dev->nvqs; ++i) {
r = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
r = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
enable_log);
if (r < 0) {
goto err_vq;
@ -616,7 +672,8 @@ static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
return 0;
err_vq:
for (; i >= 0; --i) {
t = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
t = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
dev->log_enabled);
assert(t >= 0);
}
@ -700,7 +757,7 @@ static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
.num = is_big_endian
};
if (!dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ENDIAN, &s)) {
if (!dev->vhost_ops->vhost_set_vring_endian(dev, &s)) {
return 0;
}
@ -719,7 +776,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
{
hwaddr s, l, a;
int r;
int vhost_vq_index = dev->vhost_ops->vhost_backend_get_vq_index(dev, idx);
int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
struct vhost_vring_file file = {
.index = vhost_vq_index
};
@ -730,13 +787,13 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
vq->num = state.num = virtio_queue_get_num(vdev, idx);
r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_NUM, &state);
r = dev->vhost_ops->vhost_set_vring_num(dev, &state);
if (r) {
return -errno;
}
state.num = virtio_queue_get_last_avail_idx(vdev, idx);
r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_BASE, &state);
r = dev->vhost_ops->vhost_set_vring_base(dev, &state);
if (r) {
return -errno;
}
@ -788,7 +845,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
}
file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_KICK, &file);
r = dev->vhost_ops->vhost_set_vring_kick(dev, &file);
if (r) {
r = -errno;
goto fail_kick;
@ -821,13 +878,13 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
struct vhost_virtqueue *vq,
unsigned idx)
{
int vhost_vq_index = dev->vhost_ops->vhost_backend_get_vq_index(dev, idx);
int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
struct vhost_vring_state state = {
.index = vhost_vq_index,
};
int r;
r = dev->vhost_ops->vhost_call(dev, VHOST_GET_VRING_BASE, &state);
r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
if (r < 0) {
fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
fflush(stderr);
@ -874,7 +931,7 @@ static void vhost_eventfd_del(MemoryListener *listener,
static int vhost_virtqueue_init(struct vhost_dev *dev,
struct vhost_virtqueue *vq, int n)
{
int vhost_vq_index = dev->vhost_ops->vhost_backend_get_vq_index(dev, n);
int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
struct vhost_vring_file file = {
.index = vhost_vq_index,
};
@ -884,7 +941,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
}
file.fd = event_notifier_get_fd(&vq->masked_notifier);
r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_CALL, &file);
r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
if (r) {
r = -errno;
goto fail_call;
@ -906,6 +963,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
uint64_t features;
int i, r;
hdev->migration_blocker = NULL;
if (vhost_set_backend_type(hdev, backend_type) < 0) {
close((uintptr_t)opaque);
return -1;
@ -916,12 +975,20 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
return -errno;
}
r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL);
if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
fprintf(stderr, "vhost backend memory slots limit is less"
" than current number of present memory slots\n");
close((uintptr_t)opaque);
return -1;
}
QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
r = hdev->vhost_ops->vhost_set_owner(hdev);
if (r < 0) {
goto fail;
}
r = hdev->vhost_ops->vhost_call(hdev, VHOST_GET_FEATURES, &features);
r = hdev->vhost_ops->vhost_get_features(hdev, &features);
if (r < 0) {
goto fail;
}
@ -949,12 +1016,21 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
.eventfd_del = vhost_eventfd_del,
.priority = 10
};
hdev->migration_blocker = NULL;
if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
error_setg(&hdev->migration_blocker,
"Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
if (hdev->migration_blocker == NULL) {
if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
error_setg(&hdev->migration_blocker,
"Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
} else if (!qemu_memfd_check()) {
error_setg(&hdev->migration_blocker,
"Migration disabled: failed to allocate shared memory");
}
}
if (hdev->migration_blocker != NULL) {
migrate_add_blocker(hdev->migration_blocker);
}
hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
hdev->n_mem_sections = 0;
hdev->mem_sections = NULL;
@ -972,6 +1048,7 @@ fail_vq:
fail:
r = -errno;
hdev->vhost_ops->vhost_backend_cleanup(hdev);
QLIST_REMOVE(hdev, entry);
return r;
}
@ -989,6 +1066,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev)
g_free(hdev->mem);
g_free(hdev->mem_sections);
hdev->vhost_ops->vhost_backend_cleanup(hdev);
QLIST_REMOVE(hdev, entry);
}
/* Stop processing guest IO notifications in qemu.
@ -1074,8 +1152,8 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
}
file.index = hdev->vhost_ops->vhost_backend_get_vq_index(hdev, n);
r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_VRING_CALL, &file);
file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n);
r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file);
assert(r >= 0);
}
@ -1117,7 +1195,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
if (r < 0) {
goto fail_features;
}
r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem);
r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
if (r < 0) {
r = -errno;
goto fail_mem;
@ -1136,10 +1214,12 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
uint64_t log_base;
hdev->log_size = vhost_get_log_size(hdev);
hdev->log = vhost_log_get(hdev->log_size);
hdev->log = vhost_log_get(hdev->log_size,
vhost_dev_log_is_shared(hdev));
log_base = (uintptr_t)hdev->log->log;
r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE,
hdev->log_size ? &log_base : NULL);
r = hdev->vhost_ops->vhost_set_log_base(hdev,
hdev->log_size ? log_base : 0,
hdev->log);
if (r < 0) {
r = -errno;
goto fail_log;