virtio,pc,pci: fixes,cleanups,features

most of CXL support
 fixes, cleanups all over the place
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmKCuLIPHG1zdEByZWRo
 YXQuY29tAAoJECgfDbjSjVRpdDUH/12SmWaAo+0+SdIHgWFFxsmg3t/EdcO38fgi
 MV+GpYdbp6TlU3jdQhrMZYmFdkVVydBdxk93ujCLbFS0ixTsKj31j0IbZMfdcGgv
 SLqnV+E3JdHqnGP39q9a9rdwYWyqhkgHoldxilIFW76ngOSapaZVvnwnOMAMkf77
 1LieL4/Xq7N9Ho86Zrs3IczQcf0czdJRDaFaSIu8GaHl8ELyuPhlSm6CSqqrEEWR
 PA/COQsLDbLOMxbfCi5v88r5aaxmGNZcGbXQbiH9qVHw65nlHyLH9UkNTdJn1du1
 f2GYwwa7eekfw/LCvvVwxO1znJrj02sfFai7aAtQYbXPvjvQiqA=
 =xdSk
 -----END PGP SIGNATURE-----

Merge tag 'for_upstream' of git://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging

virtio,pc,pci: fixes,cleanups,features

most of CXL support
fixes, cleanups all over the place

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# -----BEGIN PGP SIGNATURE-----
#
# iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmKCuLIPHG1zdEByZWRo
# YXQuY29tAAoJECgfDbjSjVRpdDUH/12SmWaAo+0+SdIHgWFFxsmg3t/EdcO38fgi
# MV+GpYdbp6TlU3jdQhrMZYmFdkVVydBdxk93ujCLbFS0ixTsKj31j0IbZMfdcGgv
# SLqnV+E3JdHqnGP39q9a9rdwYWyqhkgHoldxilIFW76ngOSapaZVvnwnOMAMkf77
# 1LieL4/Xq7N9Ho86Zrs3IczQcf0czdJRDaFaSIu8GaHl8ELyuPhlSm6CSqqrEEWR
# PA/COQsLDbLOMxbfCi5v88r5aaxmGNZcGbXQbiH9qVHw65nlHyLH9UkNTdJn1du1
# f2GYwwa7eekfw/LCvvVwxO1znJrj02sfFai7aAtQYbXPvjvQiqA=
# =xdSk
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 16 May 2022 01:48:50 PM PDT
# gpg:                using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469
# gpg:                issuer "mst@redhat.com"
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [undefined]
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* tag 'for_upstream' of git://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (86 commits)
  vhost-user-scsi: avoid unlink(NULL) with fd passing
  virtio-net: don't handle mq request in userspace handler for vhost-vdpa
  vhost-vdpa: change name and polarity for vhost_vdpa_one_time_request()
  vhost-vdpa: backend feature should set only once
  vhost-net: fix improper cleanup in vhost_net_start
  vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa
  virtio-net: align ctrl_vq index for non-mq guest for vhost_vdpa
  virtio-net: setup vhost_dev and notifiers for cvq only when feature is negotiated
  hw/i386/amd_iommu: Fix IOMMU event log encoding errors
  hw/i386: Make pic a property of common x86 base machine type
  hw/i386: Make pit a property of common x86 base machine type
  include/hw/pci/pcie_host: Correct PCIE_MMCFG_SIZE_MAX
  include/hw/pci/pcie_host: Correct PCIE_MMCFG_BUS_MASK
  docs/vhost-user: Clarifications for VHOST_USER_ADD/REM_MEM_REG
  vhost-user: more master/slave things
  virtio: add vhost support for virtio devices
  virtio: drop name parameter for virtio_init()
  virtio/vhost-user: dynamically assign VhostUserHostNotifiers
  hw/virtio/vhost-user: don't suppress F_CONFIG when supported
  include/hw: start documenting the vhost API
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2022-05-16 16:31:01 -07:00
commit eec398119f
131 changed files with 5352 additions and 577 deletions

View file

@ -216,7 +216,7 @@ static void virtio_9p_device_realize(DeviceState *dev, Error **errp)
}
v->config_size = sizeof(struct virtio_9p_config) + strlen(s->fsconf.tag);
virtio_init(vdev, "virtio-9p", VIRTIO_ID_9P, v->config_size);
virtio_init(vdev, VIRTIO_ID_9P, v->config_size);
v->vq = virtio_add_queue(vdev, MAX_REQ, handle_9p_output);
}

View file

@ -6,6 +6,7 @@ source audio/Kconfig
source block/Kconfig
source char/Kconfig
source core/Kconfig
source cxl/Kconfig
source display/Kconfig
source dma/Kconfig
source gpio/Kconfig

View file

@ -5,6 +5,7 @@ config ACPI_X86
bool
select ACPI
select ACPI_NVDIMM
select ACPI_CXL
select ACPI_CPU_HOTPLUG
select ACPI_MEMORY_HOTPLUG
select ACPI_HMAT
@ -66,3 +67,7 @@ config ACPI_ERST
bool
default y
depends on ACPI && PCI
config ACPI_CXL
bool
depends on ACPI

12
hw/acpi/cxl-stub.c Normal file
View file

@ -0,0 +1,12 @@
/*
* Stubs for ACPI platforms that don't support CXl
*/
#include "qemu/osdep.h"
#include "hw/acpi/aml-build.h"
#include "hw/acpi/cxl.h"
void build_cxl_osc_method(Aml *dev)
{
g_assert_not_reached();
}

257
hw/acpi/cxl.c Normal file
View file

@ -0,0 +1,257 @@
/*
* CXL ACPI Implementation
*
* Copyright(C) 2020 Intel Corporation.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>
*/
#include "qemu/osdep.h"
#include "hw/sysbus.h"
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pci_host.h"
#include "hw/cxl/cxl.h"
#include "hw/mem/memory-device.h"
#include "hw/acpi/acpi.h"
#include "hw/acpi/aml-build.h"
#include "hw/acpi/bios-linker-loader.h"
#include "hw/acpi/cxl.h"
#include "qapi/error.h"
#include "qemu/uuid.h"
static void cedt_build_chbs(GArray *table_data, PXBDev *cxl)
{
SysBusDevice *sbd = SYS_BUS_DEVICE(cxl->cxl.cxl_host_bridge);
struct MemoryRegion *mr = sbd->mmio[0].memory;
/* Type */
build_append_int_noprefix(table_data, 0, 1);
/* Reserved */
build_append_int_noprefix(table_data, 0, 1);
/* Record Length */
build_append_int_noprefix(table_data, 32, 2);
/* UID - currently equal to bus number */
build_append_int_noprefix(table_data, cxl->bus_nr, 4);
/* Version */
build_append_int_noprefix(table_data, 1, 4);
/* Reserved */
build_append_int_noprefix(table_data, 0, 4);
/* Base - subregion within a container that is in PA space */
build_append_int_noprefix(table_data, mr->container->addr + mr->addr, 8);
/* Length */
build_append_int_noprefix(table_data, memory_region_size(mr), 8);
}
/*
* CFMWS entries in CXL 2.0 ECN: CEDT CFMWS & QTG _DSM.
* Interleave ways encoding in CXL 2.0 ECN: 3, 6, 12 and 16-way memory
* interleaving.
*/
static void cedt_build_cfmws(GArray *table_data, MachineState *ms)
{
CXLState *cxls = ms->cxl_devices_state;
GList *it;
for (it = cxls->fixed_windows; it; it = it->next) {
CXLFixedWindow *fw = it->data;
int i;
/* Type */
build_append_int_noprefix(table_data, 1, 1);
/* Reserved */
build_append_int_noprefix(table_data, 0, 1);
/* Record Length */
build_append_int_noprefix(table_data, 36 + 4 * fw->num_targets, 2);
/* Reserved */
build_append_int_noprefix(table_data, 0, 4);
/* Base HPA */
build_append_int_noprefix(table_data, fw->mr.addr, 8);
/* Window Size */
build_append_int_noprefix(table_data, fw->size, 8);
/* Host Bridge Interleave Ways */
build_append_int_noprefix(table_data, fw->enc_int_ways, 1);
/* Host Bridge Interleave Arithmetic */
build_append_int_noprefix(table_data, 0, 1);
/* Reserved */
build_append_int_noprefix(table_data, 0, 2);
/* Host Bridge Interleave Granularity */
build_append_int_noprefix(table_data, fw->enc_int_gran, 4);
/* Window Restrictions */
build_append_int_noprefix(table_data, 0x0f, 2); /* No restrictions */
/* QTG ID */
build_append_int_noprefix(table_data, 0, 2);
/* Host Bridge List (list of UIDs - currently bus_nr) */
for (i = 0; i < fw->num_targets; i++) {
g_assert(fw->target_hbs[i]);
build_append_int_noprefix(table_data, fw->target_hbs[i]->bus_nr, 4);
}
}
}
static int cxl_foreach_pxb_hb(Object *obj, void *opaque)
{
Aml *cedt = opaque;
if (object_dynamic_cast(obj, TYPE_PXB_CXL_DEVICE)) {
cedt_build_chbs(cedt->buf, PXB_CXL_DEV(obj));
}
return 0;
}
void cxl_build_cedt(MachineState *ms, GArray *table_offsets, GArray *table_data,
BIOSLinker *linker, const char *oem_id,
const char *oem_table_id)
{
Aml *cedt;
AcpiTable table = { .sig = "CEDT", .rev = 1, .oem_id = oem_id,
.oem_table_id = oem_table_id };
acpi_add_table(table_offsets, table_data);
acpi_table_begin(&table, table_data);
cedt = init_aml_allocator();
/* reserve space for CEDT header */
object_child_foreach_recursive(object_get_root(), cxl_foreach_pxb_hb, cedt);
cedt_build_cfmws(cedt->buf, ms);
/* copy AML table into ACPI tables blob and patch header there */
g_array_append_vals(table_data, cedt->buf->data, cedt->buf->len);
free_aml_allocator();
acpi_table_end(linker, &table);
}
static Aml *__build_cxl_osc_method(void)
{
Aml *method, *if_uuid, *else_uuid, *if_arg1_not_1, *if_cxl, *if_caps_masked;
Aml *a_ctrl = aml_local(0);
Aml *a_cdw1 = aml_name("CDW1");
method = aml_method("_OSC", 4, AML_NOTSERIALIZED);
/* CDW1 is used for the return value so is present whether or not a match occurs */
aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1"));
/*
* Generate shared section between:
* CXL 2.0 - 9.14.2.1.4 and
* PCI Firmware Specification 3.0
* 4.5.1. _OSC Interface for PCI Host Bridge Devices
* The _OSC interface for a PCI/PCI-X/PCI Express hierarchy is
* identified by the Universal Unique IDentifier (UUID)
* 33DB4D5B-1FF7-401C-9657-7441C03DD766
* The _OSC interface for a CXL Host bridge is
* identified by the UUID 68F2D50B-C469-4D8A-BD3D-941A103FD3FC
* A CXL Host bridge is compatible with a PCI host bridge so
* for the shared section match both.
*/
if_uuid = aml_if(
aml_lor(aml_equal(aml_arg(0),
aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766")),
aml_equal(aml_arg(0),
aml_touuid("68F2D50B-C469-4D8A-BD3D-941A103FD3FC"))));
aml_append(if_uuid, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2"));
aml_append(if_uuid, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3"));
aml_append(if_uuid, aml_store(aml_name("CDW3"), a_ctrl));
/*
*
* Allows OS control for all 5 features:
* PCIeHotplug SHPCHotplug PME AER PCIeCapability
*/
aml_append(if_uuid, aml_and(a_ctrl, aml_int(0x1F), a_ctrl));
/*
* Check _OSC revision.
* PCI Firmware specification 3.3 and CXL 2.0 both use revision 1
* Unknown Revision is CDW1 - BIT (3)
*/
if_arg1_not_1 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(0x1))));
aml_append(if_arg1_not_1, aml_or(a_cdw1, aml_int(0x08), a_cdw1));
aml_append(if_uuid, if_arg1_not_1);
if_caps_masked = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), a_ctrl)));
/* Capability bits were masked */
aml_append(if_caps_masked, aml_or(a_cdw1, aml_int(0x10), a_cdw1));
aml_append(if_uuid, if_caps_masked);
aml_append(if_uuid, aml_store(aml_name("CDW2"), aml_name("SUPP")));
aml_append(if_uuid, aml_store(aml_name("CDW3"), aml_name("CTRL")));
/* Update DWORD3 (the return value) */
aml_append(if_uuid, aml_store(a_ctrl, aml_name("CDW3")));
/* CXL only section as per CXL 2.0 - 9.14.2.1.4 */
if_cxl = aml_if(aml_equal(
aml_arg(0), aml_touuid("68F2D50B-C469-4D8A-BD3D-941A103FD3FC")));
/* CXL support field */
aml_append(if_cxl, aml_create_dword_field(aml_arg(3), aml_int(12), "CDW4"));
/* CXL capabilities */
aml_append(if_cxl, aml_create_dword_field(aml_arg(3), aml_int(16), "CDW5"));
aml_append(if_cxl, aml_store(aml_name("CDW4"), aml_name("SUPC")));
aml_append(if_cxl, aml_store(aml_name("CDW5"), aml_name("CTRC")));
/* CXL 2.0 Port/Device Register access */
aml_append(if_cxl,
aml_or(aml_name("CDW5"), aml_int(0x1), aml_name("CDW5")));
aml_append(if_uuid, if_cxl);
aml_append(if_uuid, aml_return(aml_arg(3)));
aml_append(method, if_uuid);
/*
* If no UUID matched, return Unrecognized UUID via Arg3 DWord 1
* ACPI 6.4 - 6.2.11
* Unrecognised UUID - BIT(2)
*/
else_uuid = aml_else();
aml_append(else_uuid,
aml_or(aml_name("CDW1"), aml_int(0x4), aml_name("CDW1")));
aml_append(else_uuid, aml_return(aml_arg(3)));
aml_append(method, else_uuid);
return method;
}
void build_cxl_osc_method(Aml *dev)
{
aml_append(dev, aml_name_decl("SUPP", aml_int(0)));
aml_append(dev, aml_name_decl("CTRL", aml_int(0)));
aml_append(dev, aml_name_decl("SUPC", aml_int(0)));
aml_append(dev, aml_name_decl("CTRC", aml_int(0)));
aml_append(dev, __build_cxl_osc_method());
}

View file

@ -13,6 +13,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_MEMORY_HOTPLUG', if_false: files('acpi-mem-hotplu
acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_true: files('nvdimm.c'))
acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_false: files('acpi-nvdimm-stub.c'))
acpi_ss.add(when: 'CONFIG_ACPI_PCI', if_true: files('pci.c'))
acpi_ss.add(when: 'CONFIG_ACPI_CXL', if_true: files('cxl.c'), if_false: files('cxl-stub.c'))
acpi_ss.add(when: 'CONFIG_ACPI_VMGENID', if_true: files('vmgenid.c'))
acpi_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device.c'))
acpi_ss.add(when: 'CONFIG_ACPI_HMAT', if_true: files('hmat.c'))
@ -33,4 +34,5 @@ softmmu_ss.add_all(when: 'CONFIG_ACPI', if_true: acpi_ss)
softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-stub.c', 'aml-build-stub.c',
'acpi-x86-stub.c', 'ipmi-stub.c', 'ghes-stub.c',
'acpi-mem-hotplug-stub.c', 'acpi-cpu-hotplug-stub.c',
'acpi-pci-hotplug-stub.c', 'acpi-nvdimm-stub.c'))
'acpi-pci-hotplug-stub.c', 'acpi-nvdimm-stub.c',
'cxl-stub.c'))

View file

@ -29,6 +29,7 @@ config ARM_VIRT
select ACPI_APEI
select ACPI_VIOT
select VIRTIO_MEM_SUPPORTED
select ACPI_CXL
config CHEETAH
bool

View file

@ -491,7 +491,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
return;
}
virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
virtio_init(vdev, VIRTIO_ID_BLOCK,
sizeof(struct virtio_blk_config));
s->virtqs = g_new(VirtQueue *, s->num_queues);
@ -569,6 +569,12 @@ static void vhost_user_blk_instance_init(Object *obj)
"/disk@0,0", DEVICE(obj));
}
static struct vhost_dev *vhost_user_blk_get_vhost(VirtIODevice *vdev)
{
VHostUserBlk *s = VHOST_USER_BLK(vdev);
return &s->dev;
}
static const VMStateDescription vmstate_vhost_user_blk = {
.name = "vhost-user-blk",
.minimum_version_id = 1,
@ -603,6 +609,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
vdc->get_features = vhost_user_blk_get_features;
vdc->set_status = vhost_user_blk_set_status;
vdc->reset = vhost_user_blk_reset;
vdc->get_vhost = vhost_user_blk_get_vhost;
}
static const TypeInfo vhost_user_blk_info = {

View file

@ -1206,7 +1206,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
virtio_blk_set_config_size(s, s->host_features);
virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, s->config_size);
virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);
s->blk = conf->conf.blk;
s->rq = NULL;

View file

@ -1044,8 +1044,7 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp)
VIRTIO_CONSOLE_F_EMERG_WRITE)) {
config_size = offsetof(struct virtio_console_config, emerg_wr);
}
virtio_init(vdev, "virtio-serial", VIRTIO_ID_CONSOLE,
config_size);
virtio_init(vdev, VIRTIO_ID_CONSOLE, config_size);
/* Spawn a new virtio-serial bus on which the ports will ride as devices */
qbus_init(&vser->bus, sizeof(vser->bus), TYPE_VIRTIO_SERIAL_BUS,

View file

@ -33,6 +33,7 @@
#include "sysemu/qtest.h"
#include "hw/pci/pci.h"
#include "hw/mem/nvdimm.h"
#include "hw/cxl/cxl.h"
#include "migration/global_state.h"
#include "migration/vmstate.h"
#include "exec/confidential-guest-support.h"
@ -625,6 +626,20 @@ static void machine_set_nvdimm_persistence(Object *obj, const char *value,
nvdimms_state->persistence_string = g_strdup(value);
}
static bool machine_get_cxl(Object *obj, Error **errp)
{
MachineState *ms = MACHINE(obj);
return ms->cxl_devices_state->is_enabled;
}
static void machine_set_cxl(Object *obj, bool value, Error **errp)
{
MachineState *ms = MACHINE(obj);
ms->cxl_devices_state->is_enabled = value;
}
void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type)
{
QAPI_LIST_PREPEND(mc->allowed_dynamic_sysbus_devices, g_strdup(type));
@ -911,6 +926,8 @@ static void machine_class_init(ObjectClass *oc, void *data)
mc->default_ram_size = 128 * MiB;
mc->rom_file_has_mr = true;
/* Few machines support CXL, so default to off */
mc->cxl_supported = false;
/* numa node memory size aligned on 8MB by default.
* On Linux, each node's border has to be 8MB aligned
*/
@ -1071,6 +1088,16 @@ static void machine_initfn(Object *obj)
"Valid values are cpu, mem-ctrl");
}
if (mc->cxl_supported) {
Object *obj = OBJECT(ms);
ms->cxl_devices_state = g_new0(CXLState, 1);
object_property_add_bool(obj, "cxl", machine_get_cxl, machine_set_cxl);
object_property_set_description(obj, "cxl",
"Set on/off to enable/disable "
"CXL instantiation");
}
if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) {
ms->numa_state = g_new0(NumaState, 1);
object_property_add_bool(obj, "hmat",
@ -1108,6 +1135,7 @@ static void machine_finalize(Object *obj)
g_free(ms->device_memory);
g_free(ms->nvdimms_state);
g_free(ms->numa_state);
g_free(ms->cxl_devices_state);
}
bool machine_usb(MachineState *machine)

3
hw/cxl/Kconfig Normal file
View file

@ -0,0 +1,3 @@
config CXL
bool
default y if PCI_EXPRESS

View file

@ -0,0 +1,396 @@
/*
* CXL Utility library for components
*
* Copyright(C) 2020 Intel Corporation.
*
* This work is licensed under the terms of the GNU GPL, version 2. See the
* COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "qapi/error.h"
#include "hw/pci/pci.h"
#include "hw/cxl/cxl.h"
static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
unsigned size)
{
CXLComponentState *cxl_cstate = opaque;
ComponentRegisters *cregs = &cxl_cstate->crb;
if (size == 8) {
qemu_log_mask(LOG_UNIMP,
"CXL 8 byte cache mem registers not implemented\n");
return 0;
}
if (cregs->special_ops && cregs->special_ops->read) {
return cregs->special_ops->read(cxl_cstate, offset, size);
} else {
return cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)];
}
}
static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset,
uint32_t value)
{
ComponentRegisters *cregs = &cxl_cstate->crb;
uint32_t *cache_mem = cregs->cache_mem_registers;
bool should_commit = false;
switch (offset) {
case A_CXL_HDM_DECODER0_CTRL:
should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
break;
default:
break;
}
memory_region_transaction_begin();
stl_le_p((uint8_t *)cache_mem + offset, value);
if (should_commit) {
ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMIT, 0);
ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, ERR, 0);
ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
}
memory_region_transaction_commit();
}
static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
unsigned size)
{
CXLComponentState *cxl_cstate = opaque;
ComponentRegisters *cregs = &cxl_cstate->crb;
uint32_t mask;
if (size == 8) {
qemu_log_mask(LOG_UNIMP,
"CXL 8 byte cache mem registers not implemented\n");
return;
}
mask = cregs->cache_mem_regs_write_mask[offset / sizeof(*cregs->cache_mem_regs_write_mask)];
value &= mask;
/* RO bits should remain constant. Done by reading existing value */
value |= ~mask & cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)];
if (cregs->special_ops && cregs->special_ops->write) {
cregs->special_ops->write(cxl_cstate, offset, value, size);
return;
}
if (offset >= A_CXL_HDM_DECODER_CAPABILITY &&
offset <= A_CXL_HDM_DECODER0_TARGET_LIST_HI) {
dumb_hdm_handler(cxl_cstate, offset, value);
} else {
cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)] = value;
}
}
/*
* 8.2.3
* The access restrictions specified in Section 8.2.2 also apply to CXL 2.0
* Component Registers.
*
* 8.2.2
* A 32 bit register shall be accessed as a 4 Bytes quantity. Partial
* reads are not permitted.
* A 64 bit register shall be accessed as a 8 Bytes quantity. Partial
* reads are not permitted.
*
* As of the spec defined today, only 4 byte registers exist.
*/
static const MemoryRegionOps cache_mem_ops = {
.read = cxl_cache_mem_read_reg,
.write = cxl_cache_mem_write_reg,
.endianness = DEVICE_LITTLE_ENDIAN,
.valid = {
.min_access_size = 4,
.max_access_size = 8,
.unaligned = false,
},
.impl = {
.min_access_size = 4,
.max_access_size = 8,
},
};
void cxl_component_register_block_init(Object *obj,
CXLComponentState *cxl_cstate,
const char *type)
{
ComponentRegisters *cregs = &cxl_cstate->crb;
memory_region_init(&cregs->component_registers, obj, type,
CXL2_COMPONENT_BLOCK_SIZE);
/* io registers controls link which we don't care about in QEMU */
memory_region_init_io(&cregs->io, obj, NULL, cregs, ".io",
CXL2_COMPONENT_IO_REGION_SIZE);
memory_region_init_io(&cregs->cache_mem, obj, &cache_mem_ops, cregs,
".cache_mem", CXL2_COMPONENT_CM_REGION_SIZE);
memory_region_add_subregion(&cregs->component_registers, 0, &cregs->io);
memory_region_add_subregion(&cregs->component_registers,
CXL2_COMPONENT_IO_REGION_SIZE,
&cregs->cache_mem);
}
static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk)
{
/*
* Error status is RW1C but given bits are not yet set, it can
* be handled as RO.
*/
reg_state[R_CXL_RAS_UNC_ERR_STATUS] = 0;
/* Bits 12-13 and 17-31 reserved in CXL 2.0 */
reg_state[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff;
write_msk[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff;
reg_state[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff;
write_msk[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff;
reg_state[R_CXL_RAS_COR_ERR_STATUS] = 0;
reg_state[R_CXL_RAS_COR_ERR_MASK] = 0x7f;
write_msk[R_CXL_RAS_COR_ERR_MASK] = 0x7f;
/* CXL switches and devices must set */
reg_state[R_CXL_RAS_ERR_CAP_CTRL] = 0x00;
}
static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk)
{
int decoder_count = 1;
int i;
ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, DECODER_COUNT,
cxl_decoder_count_enc(decoder_count));
ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 1);
ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_256B, 1);
ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_4K, 1);
ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, POISON_ON_ERR_CAP, 0);
ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_GLOBAL_CONTROL,
HDM_DECODER_ENABLE, 0);
write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3;
for (i = 0; i < decoder_count; i++) {
write_msk[R_CXL_HDM_DECODER0_BASE_LO + i * 0x20] = 0xf0000000;
write_msk[R_CXL_HDM_DECODER0_BASE_HI + i * 0x20] = 0xffffffff;
write_msk[R_CXL_HDM_DECODER0_SIZE_LO + i * 0x20] = 0xf0000000;
write_msk[R_CXL_HDM_DECODER0_SIZE_HI + i * 0x20] = 0xffffffff;
write_msk[R_CXL_HDM_DECODER0_CTRL + i * 0x20] = 0x13ff;
}
}
void cxl_component_register_init_common(uint32_t *reg_state, uint32_t *write_msk,
enum reg_type type)
{
int caps = 0;
/*
* In CXL 2.0 the capabilities required for each CXL component are such that,
* with the ordering chosen here, a single number can be used to define
* which capabilities should be provided.
*/
switch (type) {
case CXL2_DOWNSTREAM_PORT:
case CXL2_DEVICE:
/* RAS, Link */
caps = 2;
break;
case CXL2_UPSTREAM_PORT:
case CXL2_TYPE3_DEVICE:
case CXL2_LOGICAL_DEVICE:
/* + HDM */
caps = 3;
break;
case CXL2_ROOT_PORT:
/* + Extended Security, + Snoop */
caps = 5;
break;
default:
abort();
}
memset(reg_state, 0, CXL2_COMPONENT_CM_REGION_SIZE);
/* CXL Capability Header Register */
ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ID, 1);
ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, VERSION, 1);
ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, CACHE_MEM_VERSION, 1);
ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ARRAY_SIZE, caps);
#define init_cap_reg(reg, id, version) \
QEMU_BUILD_BUG_ON(CXL_##reg##_REGISTERS_OFFSET == 0); \
do { \
int which = R_CXL_##reg##_CAPABILITY_HEADER; \
reg_state[which] = FIELD_DP32(reg_state[which], \
CXL_##reg##_CAPABILITY_HEADER, ID, id); \
reg_state[which] = \
FIELD_DP32(reg_state[which], CXL_##reg##_CAPABILITY_HEADER, \
VERSION, version); \
reg_state[which] = \
FIELD_DP32(reg_state[which], CXL_##reg##_CAPABILITY_HEADER, PTR, \
CXL_##reg##_REGISTERS_OFFSET); \
} while (0)
init_cap_reg(RAS, 2, 2);
ras_init_common(reg_state, write_msk);
init_cap_reg(LINK, 4, 2);
if (caps < 3) {
return;
}
init_cap_reg(HDM, 5, 1);
hdm_init_common(reg_state, write_msk);
if (caps < 5) {
return;
}
init_cap_reg(EXTSEC, 6, 1);
init_cap_reg(SNOOP, 8, 1);
#undef init_cap_reg
}
/*
* Helper to creates a DVSEC header for a CXL entity. The caller is responsible
* for tracking the valid offset.
*
* This function will build the DVSEC header on behalf of the caller and then
* copy in the remaining data for the vendor specific bits.
* It will also set up appropriate write masks.
*/
void cxl_component_create_dvsec(CXLComponentState *cxl,
enum reg_type cxl_dev_type, uint16_t length,
uint16_t type, uint8_t rev, uint8_t *body)
{
PCIDevice *pdev = cxl->pdev;
uint16_t offset = cxl->dvsec_offset;
uint8_t *wmask = pdev->wmask;
assert(offset >= PCI_CFG_SPACE_SIZE &&
((offset + length) < PCI_CFG_SPACE_EXP_SIZE));
assert((length & 0xf000) == 0);
assert((rev & ~0xf) == 0);
/* Create the DVSEC in the MCFG space */
pcie_add_capability(pdev, PCI_EXT_CAP_ID_DVSEC, 1, offset, length);
pci_set_long(pdev->config + offset + PCIE_DVSEC_HEADER1_OFFSET,
(length << 20) | (rev << 16) | CXL_VENDOR_ID);
pci_set_word(pdev->config + offset + PCIE_DVSEC_ID_OFFSET, type);
memcpy(pdev->config + offset + sizeof(DVSECHeader),
body + sizeof(DVSECHeader),
length - sizeof(DVSECHeader));
/* Configure write masks */
switch (type) {
case PCIE_CXL_DEVICE_DVSEC:
/* Cntrl RW Lock - so needs explicit blocking when lock is set */
wmask[offset + offsetof(CXLDVSECDevice, ctrl)] = 0xFD;
wmask[offset + offsetof(CXLDVSECDevice, ctrl) + 1] = 0x4F;
/* Status is RW1CS */
wmask[offset + offsetof(CXLDVSECDevice, ctrl2)] = 0x0F;
/* Lock is RW Once */
wmask[offset + offsetof(CXLDVSECDevice, lock)] = 0x01;
/* range1/2_base_high/low is RW Lock */
wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi)] = 0xFF;
wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi) + 1] = 0xFF;
wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi) + 2] = 0xFF;
wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi) + 3] = 0xFF;
wmask[offset + offsetof(CXLDVSECDevice, range1_base_lo) + 3] = 0xF0;
wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi)] = 0xFF;
wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi) + 1] = 0xFF;
wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi) + 2] = 0xFF;
wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi) + 3] = 0xFF;
wmask[offset + offsetof(CXLDVSECDevice, range2_base_lo) + 3] = 0xF0;
break;
case NON_CXL_FUNCTION_MAP_DVSEC:
break; /* Not yet implemented */
case EXTENSIONS_PORT_DVSEC:
wmask[offset + offsetof(CXLDVSECPortExtensions, control)] = 0x0F;
wmask[offset + offsetof(CXLDVSECPortExtensions, control) + 1] = 0x40;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_bus_base)] = 0xFF;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_bus_limit)] = 0xFF;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_base)] = 0xF0;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_base) + 1] = 0xFF;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_limit)] = 0xF0;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_limit) + 1] = 0xFF;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base)] = 0xF0;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base) + 1] = 0xFF;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit)] = 0xF0;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit) + 1] = 0xFF;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high)] = 0xFF;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high) + 1] = 0xFF;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high) + 2] = 0xFF;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high) + 3] = 0xFF;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high)] = 0xFF;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high) + 1] = 0xFF;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high) + 2] = 0xFF;
wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high) + 3] = 0xFF;
break;
case GPF_PORT_DVSEC:
wmask[offset + offsetof(CXLDVSECPortGPF, phase1_ctrl)] = 0x0F;
wmask[offset + offsetof(CXLDVSECPortGPF, phase1_ctrl) + 1] = 0x0F;
wmask[offset + offsetof(CXLDVSECPortGPF, phase2_ctrl)] = 0x0F;
wmask[offset + offsetof(CXLDVSECPortGPF, phase2_ctrl) + 1] = 0x0F;
break;
case GPF_DEVICE_DVSEC:
wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_duration)] = 0x0F;
wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_duration) + 1] = 0x0F;
wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power)] = 0xFF;
wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power) + 1] = 0xFF;
wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power) + 2] = 0xFF;
wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power) + 3] = 0xFF;
break;
case PCIE_FLEXBUS_PORT_DVSEC:
switch (cxl_dev_type) {
case CXL2_ROOT_PORT:
/* No MLD */
wmask[offset + offsetof(CXLDVSECPortFlexBus, ctrl)] = 0xbd;
break;
case CXL2_DOWNSTREAM_PORT:
wmask[offset + offsetof(CXLDVSECPortFlexBus, ctrl)] = 0xfd;
break;
default: /* Registers are RO for other component types */
break;
}
/* There are rw1cs bits in the status register but never set currently */
break;
}
/* Update state for future DVSEC additions */
range_init_nofail(&cxl->dvsecs[type], cxl->dvsec_offset, length);
cxl->dvsec_offset += length;
}
uint8_t cxl_interleave_ways_enc(int iw, Error **errp)
{
switch (iw) {
case 1: return 0x0;
case 2: return 0x1;
case 4: return 0x2;
case 8: return 0x3;
case 16: return 0x4;
case 3: return 0x8;
case 6: return 0x9;
case 12: return 0xa;
default:
error_setg(errp, "Interleave ways: %d not supported", iw);
return 0;
}
}
uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp)
{
switch (gran) {
case 256: return 0;
case 512: return 1;
case 1024: return 2;
case 2048: return 3;
case 4096: return 4;
case 8192: return 5;
case 16384: return 6;
default:
error_setg(errp, "Interleave granularity: %" PRIu64 " invalid", gran);
return 0;
}
}

265
hw/cxl/cxl-device-utils.c Normal file
View file

@ -0,0 +1,265 @@
/*
* CXL Utility library for devices
*
* Copyright(C) 2020 Intel Corporation.
*
* This work is licensed under the terms of the GNU GPL, version 2. See the
* COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "hw/cxl/cxl.h"
/*
* Device registers have no restrictions per the spec, and so fall back to the
* default memory mapped register rules in 8.2:
* Software shall use CXL.io Memory Read and Write to access memory mapped
* register defined in this section. Unless otherwise specified, software
* shall restrict the accesses width based on the following:
* A 32 bit register shall be accessed as a 1 Byte, 2 Bytes or 4 Bytes
* quantity.
* A 64 bit register shall be accessed as a 1 Byte, 2 Bytes, 4 Bytes or 8
* Bytes
* The address shall be a multiple of the access width, e.g. when
* accessing a register as a 4 Byte quantity, the address shall be
* multiple of 4.
* The accesses shall map to contiguous bytes.If these rules are not
* followed, the behavior is undefined
*/
static uint64_t caps_reg_read(void *opaque, hwaddr offset, unsigned size)
{
CXLDeviceState *cxl_dstate = opaque;
if (size == 4) {
return cxl_dstate->caps_reg_state32[offset / sizeof(*cxl_dstate->caps_reg_state32)];
} else {
return cxl_dstate->caps_reg_state64[offset / sizeof(*cxl_dstate->caps_reg_state64)];
}
}
static uint64_t dev_reg_read(void *opaque, hwaddr offset, unsigned size)
{
return 0;
}
static uint64_t mailbox_reg_read(void *opaque, hwaddr offset, unsigned size)
{
CXLDeviceState *cxl_dstate = opaque;
switch (size) {
case 1:
return cxl_dstate->mbox_reg_state[offset];
case 2:
return cxl_dstate->mbox_reg_state16[offset / size];
case 4:
return cxl_dstate->mbox_reg_state32[offset / size];
case 8:
return cxl_dstate->mbox_reg_state64[offset / size];
default:
g_assert_not_reached();
}
}
static void mailbox_mem_writel(uint32_t *reg_state, hwaddr offset,
uint64_t value)
{
switch (offset) {
case A_CXL_DEV_MAILBOX_CTRL:
/* fallthrough */
case A_CXL_DEV_MAILBOX_CAP:
/* RO register */
break;
default:
qemu_log_mask(LOG_UNIMP,
"%s Unexpected 32-bit access to 0x%" PRIx64 " (WI)\n",
__func__, offset);
return;
}
reg_state[offset / sizeof(*reg_state)] = value;
}
static void mailbox_mem_writeq(uint64_t *reg_state, hwaddr offset,
uint64_t value)
{
switch (offset) {
case A_CXL_DEV_MAILBOX_CMD:
break;
case A_CXL_DEV_BG_CMD_STS:
/* BG not supported */
/* fallthrough */
case A_CXL_DEV_MAILBOX_STS:
/* Read only register, will get updated by the state machine */
return;
default:
qemu_log_mask(LOG_UNIMP,
"%s Unexpected 64-bit access to 0x%" PRIx64 " (WI)\n",
__func__, offset);
return;
}
reg_state[offset / sizeof(*reg_state)] = value;
}
static void mailbox_reg_write(void *opaque, hwaddr offset, uint64_t value,
unsigned size)
{
CXLDeviceState *cxl_dstate = opaque;
if (offset >= A_CXL_DEV_CMD_PAYLOAD) {
memcpy(cxl_dstate->mbox_reg_state + offset, &value, size);
return;
}
switch (size) {
case 4:
mailbox_mem_writel(cxl_dstate->mbox_reg_state32, offset, value);
break;
case 8:
mailbox_mem_writeq(cxl_dstate->mbox_reg_state64, offset, value);
break;
default:
g_assert_not_reached();
}
if (ARRAY_FIELD_EX32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CTRL,
DOORBELL)) {
cxl_process_mailbox(cxl_dstate);
}
}
static uint64_t mdev_reg_read(void *opaque, hwaddr offset, unsigned size)
{
uint64_t retval = 0;
retval = FIELD_DP64(retval, CXL_MEM_DEV_STS, MEDIA_STATUS, 1);
retval = FIELD_DP64(retval, CXL_MEM_DEV_STS, MBOX_READY, 1);
return retval;
}
static const MemoryRegionOps mdev_ops = {
.read = mdev_reg_read,
.write = NULL, /* memory device register is read only */
.endianness = DEVICE_LITTLE_ENDIAN,
.valid = {
.min_access_size = 1,
.max_access_size = 8,
.unaligned = false,
},
.impl = {
.min_access_size = 8,
.max_access_size = 8,
},
};
static const MemoryRegionOps mailbox_ops = {
.read = mailbox_reg_read,
.write = mailbox_reg_write,
.endianness = DEVICE_LITTLE_ENDIAN,
.valid = {
.min_access_size = 1,
.max_access_size = 8,
.unaligned = false,
},
.impl = {
.min_access_size = 1,
.max_access_size = 8,
},
};
static const MemoryRegionOps dev_ops = {
.read = dev_reg_read,
.write = NULL, /* status register is read only */
.endianness = DEVICE_LITTLE_ENDIAN,
.valid = {
.min_access_size = 1,
.max_access_size = 8,
.unaligned = false,
},
.impl = {
.min_access_size = 1,
.max_access_size = 8,
},
};
static const MemoryRegionOps caps_ops = {
.read = caps_reg_read,
.write = NULL, /* caps registers are read only */
.endianness = DEVICE_LITTLE_ENDIAN,
.valid = {
.min_access_size = 1,
.max_access_size = 8,
.unaligned = false,
},
.impl = {
.min_access_size = 4,
.max_access_size = 8,
},
};
void cxl_device_register_block_init(Object *obj, CXLDeviceState *cxl_dstate)
{
/* This will be a BAR, so needs to be rounded up to pow2 for PCI spec */
memory_region_init(&cxl_dstate->device_registers, obj, "device-registers",
pow2ceil(CXL_MMIO_SIZE));
memory_region_init_io(&cxl_dstate->caps, obj, &caps_ops, cxl_dstate,
"cap-array", CXL_CAPS_SIZE);
memory_region_init_io(&cxl_dstate->device, obj, &dev_ops, cxl_dstate,
"device-status", CXL_DEVICE_STATUS_REGISTERS_LENGTH);
memory_region_init_io(&cxl_dstate->mailbox, obj, &mailbox_ops, cxl_dstate,
"mailbox", CXL_MAILBOX_REGISTERS_LENGTH);
memory_region_init_io(&cxl_dstate->memory_device, obj, &mdev_ops,
cxl_dstate, "memory device caps",
CXL_MEMORY_DEVICE_REGISTERS_LENGTH);
memory_region_add_subregion(&cxl_dstate->device_registers, 0,
&cxl_dstate->caps);
memory_region_add_subregion(&cxl_dstate->device_registers,
CXL_DEVICE_STATUS_REGISTERS_OFFSET,
&cxl_dstate->device);
memory_region_add_subregion(&cxl_dstate->device_registers,
CXL_MAILBOX_REGISTERS_OFFSET,
&cxl_dstate->mailbox);
memory_region_add_subregion(&cxl_dstate->device_registers,
CXL_MEMORY_DEVICE_REGISTERS_OFFSET,
&cxl_dstate->memory_device);
}
static void device_reg_init_common(CXLDeviceState *cxl_dstate) { }
static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
{
/* 2048 payload size, with no interrupt or background support */
ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT);
cxl_dstate->payload_size = CXL_MAILBOX_MAX_PAYLOAD_SIZE;
}
static void memdev_reg_init_common(CXLDeviceState *cxl_dstate) { }
void cxl_device_register_init_common(CXLDeviceState *cxl_dstate)
{
uint64_t *cap_hdrs = cxl_dstate->caps_reg_state64;
const int cap_count = 3;
/* CXL Device Capabilities Array Register */
ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_ID, 0);
ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_VERSION, 1);
ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_COUNT, cap_count);
cxl_device_cap_init(cxl_dstate, DEVICE_STATUS, 1);
device_reg_init_common(cxl_dstate);
cxl_device_cap_init(cxl_dstate, MAILBOX, 2);
mailbox_reg_init_common(cxl_dstate);
cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000);
memdev_reg_init_common(cxl_dstate);
assert(cxl_initialize_mailbox(cxl_dstate) == 0);
}

16
hw/cxl/cxl-host-stubs.c Normal file
View file

@ -0,0 +1,16 @@
/*
* CXL host parameter parsing routine stubs
*
* Copyright (c) 2022 Huawei
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "hw/cxl/cxl.h"
void cxl_fixed_memory_window_config(MachineState *ms,
CXLFixedMemoryWindowOptions *object,
Error **errp) {};
void cxl_fixed_memory_window_link_targets(Error **errp) {};
const MemoryRegionOps cfmws_ops;

222
hw/cxl/cxl-host.c Normal file
View file

@ -0,0 +1,222 @@
/*
* CXL host parameter parsing routines
*
* Copyright (c) 2022 Huawei
* Modeled loosely on the NUMA options handling in hw/core/numa.c
*/
#include "qemu/osdep.h"
#include "qemu/units.h"
#include "qemu/bitmap.h"
#include "qemu/error-report.h"
#include "qapi/error.h"
#include "sysemu/qtest.h"
#include "hw/boards.h"
#include "qapi/qapi-visit-machine.h"
#include "hw/cxl/cxl.h"
#include "hw/pci/pci_bus.h"
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pci_host.h"
#include "hw/pci/pcie_port.h"
void cxl_fixed_memory_window_config(MachineState *ms,
CXLFixedMemoryWindowOptions *object,
Error **errp)
{
CXLFixedWindow *fw = g_malloc0(sizeof(*fw));
strList *target;
int i;
for (target = object->targets; target; target = target->next) {
fw->num_targets++;
}
fw->enc_int_ways = cxl_interleave_ways_enc(fw->num_targets, errp);
if (*errp) {
return;
}
fw->targets = g_malloc0_n(fw->num_targets, sizeof(*fw->targets));
for (i = 0, target = object->targets; target; i++, target = target->next) {
/* This link cannot be resolved yet, so stash the name for now */
fw->targets[i] = g_strdup(target->value);
}
if (object->size % (256 * MiB)) {
error_setg(errp,
"Size of a CXL fixed memory window must my a multiple of 256MiB");
return;
}
fw->size = object->size;
if (object->has_interleave_granularity) {
fw->enc_int_gran =
cxl_interleave_granularity_enc(object->interleave_granularity,
errp);
if (*errp) {
return;
}
} else {
/* Default to 256 byte interleave */
fw->enc_int_gran = 0;
}
ms->cxl_devices_state->fixed_windows =
g_list_append(ms->cxl_devices_state->fixed_windows, fw);
return;
}
void cxl_fixed_memory_window_link_targets(Error **errp)
{
MachineState *ms = MACHINE(qdev_get_machine());
if (ms->cxl_devices_state && ms->cxl_devices_state->fixed_windows) {
GList *it;
for (it = ms->cxl_devices_state->fixed_windows; it; it = it->next) {
CXLFixedWindow *fw = it->data;
int i;
for (i = 0; i < fw->num_targets; i++) {
Object *o;
bool ambig;
o = object_resolve_path_type(fw->targets[i],
TYPE_PXB_CXL_DEVICE,
&ambig);
if (!o) {
error_setg(errp, "Could not resolve CXLFM target %s",
fw->targets[i]);
return;
}
fw->target_hbs[i] = PXB_CXL_DEV(o);
}
}
}
}
/* TODO: support, multiple hdm decoders */
static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr,
uint8_t *target)
{
uint32_t ctrl;
uint32_t ig_enc;
uint32_t iw_enc;
uint32_t target_reg;
uint32_t target_idx;
ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL];
if (!FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) {
return false;
}
ig_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IG);
iw_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IW);
target_idx = (addr / cxl_decode_ig(ig_enc)) % (1 << iw_enc);
if (target_idx > 4) {
target_reg = cache_mem[R_CXL_HDM_DECODER0_TARGET_LIST_LO];
target_reg >>= target_idx * 8;
} else {
target_reg = cache_mem[R_CXL_HDM_DECODER0_TARGET_LIST_LO];
target_reg >>= (target_idx - 4) * 8;
}
*target = target_reg & 0xff;
return true;
}
static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr)
{
CXLComponentState *hb_cstate;
PCIHostState *hb;
int rb_index;
uint32_t *cache_mem;
uint8_t target;
bool target_found;
PCIDevice *rp, *d;
/* Address is relative to memory region. Convert to HPA */
addr += fw->base;
rb_index = (addr / cxl_decode_ig(fw->enc_int_gran)) % fw->num_targets;
hb = PCI_HOST_BRIDGE(fw->target_hbs[rb_index]->cxl.cxl_host_bridge);
if (!hb || !hb->bus || !pci_bus_is_cxl(hb->bus)) {
return NULL;
}
hb_cstate = cxl_get_hb_cstate(hb);
if (!hb_cstate) {
return NULL;
}
cache_mem = hb_cstate->crb.cache_mem_registers;
target_found = cxl_hdm_find_target(cache_mem, addr, &target);
if (!target_found) {
return NULL;
}
rp = pcie_find_port_by_pn(hb->bus, target);
if (!rp) {
return NULL;
}
d = pci_bridge_get_sec_bus(PCI_BRIDGE(rp))->devices[0];
if (!d || !object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) {
return NULL;
}
return d;
}
static MemTxResult cxl_read_cfmws(void *opaque, hwaddr addr, uint64_t *data,
unsigned size, MemTxAttrs attrs)
{
CXLFixedWindow *fw = opaque;
PCIDevice *d;
d = cxl_cfmws_find_device(fw, addr);
if (d == NULL) {
*data = 0;
/* Reads to invalid address return poison */
return MEMTX_ERROR;
}
return cxl_type3_read(d, addr + fw->base, data, size, attrs);
}
static MemTxResult cxl_write_cfmws(void *opaque, hwaddr addr,
uint64_t data, unsigned size,
MemTxAttrs attrs)
{
CXLFixedWindow *fw = opaque;
PCIDevice *d;
d = cxl_cfmws_find_device(fw, addr);
if (d == NULL) {
/* Writes to invalid address are silent */
return MEMTX_OK;
}
return cxl_type3_write(d, addr + fw->base, data, size, attrs);
}
const MemoryRegionOps cfmws_ops = {
.read_with_attrs = cxl_read_cfmws,
.write_with_attrs = cxl_write_cfmws,
.endianness = DEVICE_LITTLE_ENDIAN,
.valid = {
.min_access_size = 1,
.max_access_size = 8,
.unaligned = true,
},
.impl = {
.min_access_size = 1,
.max_access_size = 8,
.unaligned = true,
},
};

478
hw/cxl/cxl-mailbox-utils.c Normal file
View file

@ -0,0 +1,478 @@
/*
* CXL Utility library for mailbox interface
*
* Copyright(C) 2020 Intel Corporation.
*
* This work is licensed under the terms of the GNU GPL, version 2. See the
* COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "hw/cxl/cxl.h"
#include "hw/pci/pci.h"
#include "qemu/cutils.h"
#include "qemu/log.h"
#include "qemu/uuid.h"
/*
* How to add a new command, example. The command set FOO, with cmd BAR.
* 1. Add the command set and cmd to the enum.
* FOO = 0x7f,
* #define BAR 0
* 2. Implement the handler
* static ret_code cmd_foo_bar(struct cxl_cmd *cmd,
* CXLDeviceState *cxl_dstate, uint16_t *len)
* 3. Add the command to the cxl_cmd_set[][]
* [FOO][BAR] = { "FOO_BAR", cmd_foo_bar, x, y },
* 4. Implement your handler
* define_mailbox_handler(FOO_BAR) { ... return CXL_MBOX_SUCCESS; }
*
*
* Writing the handler:
* The handler will provide the &struct cxl_cmd, the &CXLDeviceState, and the
* in/out length of the payload. The handler is responsible for consuming the
* payload from cmd->payload and operating upon it as necessary. It must then
* fill the output data into cmd->payload (overwriting what was there),
* setting the length, and returning a valid return code.
*
* XXX: The handler need not worry about endianess. The payload is read out of
* a register interface that already deals with it.
*/
enum {
EVENTS = 0x01,
#define GET_RECORDS 0x0
#define CLEAR_RECORDS 0x1
#define GET_INTERRUPT_POLICY 0x2
#define SET_INTERRUPT_POLICY 0x3
FIRMWARE_UPDATE = 0x02,
#define GET_INFO 0x0
TIMESTAMP = 0x03,
#define GET 0x0
#define SET 0x1
LOGS = 0x04,
#define GET_SUPPORTED 0x0
#define GET_LOG 0x1
IDENTIFY = 0x40,
#define MEMORY_DEVICE 0x0
CCLS = 0x41,
#define GET_PARTITION_INFO 0x0
#define GET_LSA 0x2
#define SET_LSA 0x3
};
/* 8.2.8.4.5.1 Command Return Codes */
typedef enum {
CXL_MBOX_SUCCESS = 0x0,
CXL_MBOX_BG_STARTED = 0x1,
CXL_MBOX_INVALID_INPUT = 0x2,
CXL_MBOX_UNSUPPORTED = 0x3,
CXL_MBOX_INTERNAL_ERROR = 0x4,
CXL_MBOX_RETRY_REQUIRED = 0x5,
CXL_MBOX_BUSY = 0x6,
CXL_MBOX_MEDIA_DISABLED = 0x7,
CXL_MBOX_FW_XFER_IN_PROGRESS = 0x8,
CXL_MBOX_FW_XFER_OUT_OF_ORDER = 0x9,
CXL_MBOX_FW_AUTH_FAILED = 0xa,
CXL_MBOX_FW_INVALID_SLOT = 0xb,
CXL_MBOX_FW_ROLLEDBACK = 0xc,
CXL_MBOX_FW_REST_REQD = 0xd,
CXL_MBOX_INVALID_HANDLE = 0xe,
CXL_MBOX_INVALID_PA = 0xf,
CXL_MBOX_INJECT_POISON_LIMIT = 0x10,
CXL_MBOX_PERMANENT_MEDIA_FAILURE = 0x11,
CXL_MBOX_ABORTED = 0x12,
CXL_MBOX_INVALID_SECURITY_STATE = 0x13,
CXL_MBOX_INCORRECT_PASSPHRASE = 0x14,
CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15,
CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16,
CXL_MBOX_MAX = 0x17
} ret_code;
struct cxl_cmd;
typedef ret_code (*opcode_handler)(struct cxl_cmd *cmd,
CXLDeviceState *cxl_dstate, uint16_t *len);
struct cxl_cmd {
const char *name;
opcode_handler handler;
ssize_t in;
uint16_t effect; /* Reported in CEL */
uint8_t *payload;
};
#define DEFINE_MAILBOX_HANDLER_ZEROED(name, size) \
uint16_t __zero##name = size; \
static ret_code cmd_##name(struct cxl_cmd *cmd, \
CXLDeviceState *cxl_dstate, uint16_t *len) \
{ \
*len = __zero##name; \
memset(cmd->payload, 0, *len); \
return CXL_MBOX_SUCCESS; \
}
#define DEFINE_MAILBOX_HANDLER_NOP(name) \
static ret_code cmd_##name(struct cxl_cmd *cmd, \
CXLDeviceState *cxl_dstate, uint16_t *len) \
{ \
return CXL_MBOX_SUCCESS; \
}
DEFINE_MAILBOX_HANDLER_ZEROED(events_get_records, 0x20);
DEFINE_MAILBOX_HANDLER_NOP(events_clear_records);
DEFINE_MAILBOX_HANDLER_ZEROED(events_get_interrupt_policy, 4);
DEFINE_MAILBOX_HANDLER_NOP(events_set_interrupt_policy);
/* 8.2.9.2.1 */
static ret_code cmd_firmware_update_get_info(struct cxl_cmd *cmd,
CXLDeviceState *cxl_dstate,
uint16_t *len)
{
struct {
uint8_t slots_supported;
uint8_t slot_info;
uint8_t caps;
uint8_t rsvd[0xd];
char fw_rev1[0x10];
char fw_rev2[0x10];
char fw_rev3[0x10];
char fw_rev4[0x10];
} QEMU_PACKED *fw_info;
QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
if (cxl_dstate->pmem_size < (256 << 20)) {
return CXL_MBOX_INTERNAL_ERROR;
}
fw_info = (void *)cmd->payload;
memset(fw_info, 0, sizeof(*fw_info));
fw_info->slots_supported = 2;
fw_info->slot_info = BIT(0) | BIT(3);
fw_info->caps = 0;
pstrcpy(fw_info->fw_rev1, sizeof(fw_info->fw_rev1), "BWFW VERSION 0");
*len = sizeof(*fw_info);
return CXL_MBOX_SUCCESS;
}
/* 8.2.9.3.1 */
static ret_code cmd_timestamp_get(struct cxl_cmd *cmd,
CXLDeviceState *cxl_dstate,
uint16_t *len)
{
uint64_t time, delta;
uint64_t final_time = 0;
if (cxl_dstate->timestamp.set) {
/* First find the delta from the last time the host set the time. */
time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
delta = time - cxl_dstate->timestamp.last_set;
final_time = cxl_dstate->timestamp.host_set + delta;
}
/* Then adjust the actual time */
stq_le_p(cmd->payload, final_time);
*len = 8;
return CXL_MBOX_SUCCESS;
}
/* 8.2.9.3.2 */
static ret_code cmd_timestamp_set(struct cxl_cmd *cmd,
CXLDeviceState *cxl_dstate,
uint16_t *len)
{
cxl_dstate->timestamp.set = true;
cxl_dstate->timestamp.last_set = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
cxl_dstate->timestamp.host_set = le64_to_cpu(*(uint64_t *)cmd->payload);
*len = 0;
return CXL_MBOX_SUCCESS;
}
static QemuUUID cel_uuid;
/* 8.2.9.4.1 */
static ret_code cmd_logs_get_supported(struct cxl_cmd *cmd,
CXLDeviceState *cxl_dstate,
uint16_t *len)
{
struct {
uint16_t entries;
uint8_t rsvd[6];
struct {
QemuUUID uuid;
uint32_t size;
} log_entries[1];
} QEMU_PACKED *supported_logs = (void *)cmd->payload;
QEMU_BUILD_BUG_ON(sizeof(*supported_logs) != 0x1c);
supported_logs->entries = 1;
supported_logs->log_entries[0].uuid = cel_uuid;
supported_logs->log_entries[0].size = 4 * cxl_dstate->cel_size;
*len = sizeof(*supported_logs);
return CXL_MBOX_SUCCESS;
}
/* 8.2.9.4.2 */
static ret_code cmd_logs_get_log(struct cxl_cmd *cmd,
CXLDeviceState *cxl_dstate,
uint16_t *len)
{
struct {
QemuUUID uuid;
uint32_t offset;
uint32_t length;
} QEMU_PACKED QEMU_ALIGNED(16) *get_log = (void *)cmd->payload;
/*
* 8.2.9.4.2
* The device shall return Invalid Parameter if the Offset or Length
* fields attempt to access beyond the size of the log as reported by Get
* Supported Logs.
*
* XXX: Spec is wrong, "Invalid Parameter" isn't a thing.
* XXX: Spec doesn't address incorrect UUID incorrectness.
*
* The CEL buffer is large enough to fit all commands in the emulation, so
* the only possible failure would be if the mailbox itself isn't big
* enough.
*/
if (get_log->offset + get_log->length > cxl_dstate->payload_size) {
return CXL_MBOX_INVALID_INPUT;
}
if (!qemu_uuid_is_equal(&get_log->uuid, &cel_uuid)) {
return CXL_MBOX_UNSUPPORTED;
}
/* Store off everything to local variables so we can wipe out the payload */
*len = get_log->length;
memmove(cmd->payload, cxl_dstate->cel_log + get_log->offset,
get_log->length);
return CXL_MBOX_SUCCESS;
}
/* 8.2.9.5.1.1 */
static ret_code cmd_identify_memory_device(struct cxl_cmd *cmd,
CXLDeviceState *cxl_dstate,
uint16_t *len)
{
struct {
char fw_revision[0x10];
uint64_t total_capacity;
uint64_t volatile_capacity;
uint64_t persistent_capacity;
uint64_t partition_align;
uint16_t info_event_log_size;
uint16_t warning_event_log_size;
uint16_t failure_event_log_size;
uint16_t fatal_event_log_size;
uint32_t lsa_size;
uint8_t poison_list_max_mer[3];
uint16_t inject_poison_limit;
uint8_t poison_caps;
uint8_t qos_telemetry_caps;
} QEMU_PACKED *id;
QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43);
CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
uint64_t size = cxl_dstate->pmem_size;
if (!QEMU_IS_ALIGNED(size, 256 << 20)) {
return CXL_MBOX_INTERNAL_ERROR;
}
id = (void *)cmd->payload;
memset(id, 0, sizeof(*id));
/* PMEM only */
snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
id->total_capacity = size / (256 << 20);
id->persistent_capacity = size / (256 << 20);
id->lsa_size = cvc->get_lsa_size(ct3d);
*len = sizeof(*id);
return CXL_MBOX_SUCCESS;
}
static ret_code cmd_ccls_get_partition_info(struct cxl_cmd *cmd,
CXLDeviceState *cxl_dstate,
uint16_t *len)
{
struct {
uint64_t active_vmem;
uint64_t active_pmem;
uint64_t next_vmem;
uint64_t next_pmem;
} QEMU_PACKED *part_info = (void *)cmd->payload;
QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
uint64_t size = cxl_dstate->pmem_size;
if (!QEMU_IS_ALIGNED(size, 256 << 20)) {
return CXL_MBOX_INTERNAL_ERROR;
}
/* PMEM only */
part_info->active_vmem = 0;
part_info->next_vmem = 0;
part_info->active_pmem = size / (256 << 20);
part_info->next_pmem = 0;
*len = sizeof(*part_info);
return CXL_MBOX_SUCCESS;
}
static ret_code cmd_ccls_get_lsa(struct cxl_cmd *cmd,
CXLDeviceState *cxl_dstate,
uint16_t *len)
{
struct {
uint32_t offset;
uint32_t length;
} QEMU_PACKED *get_lsa;
CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
uint32_t offset, length;
get_lsa = (void *)cmd->payload;
offset = get_lsa->offset;
length = get_lsa->length;
if (offset + length > cvc->get_lsa_size(ct3d)) {
*len = 0;
return CXL_MBOX_INVALID_INPUT;
}
*len = cvc->get_lsa(ct3d, get_lsa, length, offset);
return CXL_MBOX_SUCCESS;
}
static ret_code cmd_ccls_set_lsa(struct cxl_cmd *cmd,
CXLDeviceState *cxl_dstate,
uint16_t *len)
{
struct set_lsa_pl {
uint32_t offset;
uint32_t rsvd;
uint8_t data[];
} QEMU_PACKED;
struct set_lsa_pl *set_lsa_payload = (void *)cmd->payload;
CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
const size_t hdr_len = offsetof(struct set_lsa_pl, data);
uint16_t plen = *len;
*len = 0;
if (!plen) {
return CXL_MBOX_SUCCESS;
}
if (set_lsa_payload->offset + plen > cvc->get_lsa_size(ct3d) + hdr_len) {
return CXL_MBOX_INVALID_INPUT;
}
plen -= hdr_len;
cvc->set_lsa(ct3d, set_lsa_payload->data, plen, set_lsa_payload->offset);
return CXL_MBOX_SUCCESS;
}
#define IMMEDIATE_CONFIG_CHANGE (1 << 1)
#define IMMEDIATE_DATA_CHANGE (1 << 2)
#define IMMEDIATE_POLICY_CHANGE (1 << 3)
#define IMMEDIATE_LOG_CHANGE (1 << 4)
static struct cxl_cmd cxl_cmd_set[256][256] = {
[EVENTS][GET_RECORDS] = { "EVENTS_GET_RECORDS",
cmd_events_get_records, 1, 0 },
[EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS",
cmd_events_clear_records, ~0, IMMEDIATE_LOG_CHANGE },
[EVENTS][GET_INTERRUPT_POLICY] = { "EVENTS_GET_INTERRUPT_POLICY",
cmd_events_get_interrupt_policy, 0, 0 },
[EVENTS][SET_INTERRUPT_POLICY] = { "EVENTS_SET_INTERRUPT_POLICY",
cmd_events_set_interrupt_policy, 4, IMMEDIATE_CONFIG_CHANGE },
[FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO",
cmd_firmware_update_get_info, 0, 0 },
[TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 },
[TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 8, IMMEDIATE_POLICY_CHANGE },
[LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported, 0, 0 },
[LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 },
[IDENTIFY][MEMORY_DEVICE] = { "IDENTIFY_MEMORY_DEVICE",
cmd_identify_memory_device, 0, 0 },
[CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO",
cmd_ccls_get_partition_info, 0, 0 },
[CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 0, 0 },
[CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa,
~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE },
};
void cxl_process_mailbox(CXLDeviceState *cxl_dstate)
{
uint16_t ret = CXL_MBOX_SUCCESS;
struct cxl_cmd *cxl_cmd;
uint64_t status_reg;
opcode_handler h;
uint64_t command_reg = cxl_dstate->mbox_reg_state64[R_CXL_DEV_MAILBOX_CMD];
uint8_t set = FIELD_EX64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND_SET);
uint8_t cmd = FIELD_EX64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND);
uint16_t len = FIELD_EX64(command_reg, CXL_DEV_MAILBOX_CMD, LENGTH);
cxl_cmd = &cxl_cmd_set[set][cmd];
h = cxl_cmd->handler;
if (h) {
if (len == cxl_cmd->in) {
cxl_cmd->payload = cxl_dstate->mbox_reg_state +
A_CXL_DEV_CMD_PAYLOAD;
ret = (*h)(cxl_cmd, cxl_dstate, &len);
assert(len <= cxl_dstate->payload_size);
} else {
ret = CXL_MBOX_INVALID_PAYLOAD_LENGTH;
}
} else {
qemu_log_mask(LOG_UNIMP, "Command %04xh not implemented\n",
set << 8 | cmd);
ret = CXL_MBOX_UNSUPPORTED;
}
/* Set the return code */
status_reg = FIELD_DP64(0, CXL_DEV_MAILBOX_STS, ERRNO, ret);
/* Set the return length */
command_reg = FIELD_DP64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND_SET, 0);
command_reg = FIELD_DP64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND, 0);
command_reg = FIELD_DP64(command_reg, CXL_DEV_MAILBOX_CMD, LENGTH, len);
cxl_dstate->mbox_reg_state64[R_CXL_DEV_MAILBOX_CMD] = command_reg;
cxl_dstate->mbox_reg_state64[R_CXL_DEV_MAILBOX_STS] = status_reg;
/* Tell the host we're done */
ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CTRL,
DOORBELL, 0);
}
int cxl_initialize_mailbox(CXLDeviceState *cxl_dstate)
{
/* CXL 2.0: Table 169 Get Supported Logs Log Entry */
const char *cel_uuidstr = "0da9c0b5-bf41-4b78-8f79-96b1623b3f17";
for (int set = 0; set < 256; set++) {
for (int cmd = 0; cmd < 256; cmd++) {
if (cxl_cmd_set[set][cmd].handler) {
struct cxl_cmd *c = &cxl_cmd_set[set][cmd];
struct cel_log *log =
&cxl_dstate->cel_log[cxl_dstate->cel_size];
log->opcode = (set << 8) | cmd;
log->effect = c->effect;
cxl_dstate->cel_size++;
}
}
}
return qemu_uuid_parse(cel_uuidstr, &cel_uuid);
}

12
hw/cxl/meson.build Normal file
View file

@ -0,0 +1,12 @@
softmmu_ss.add(when: 'CONFIG_CXL',
if_true: files(
'cxl-component-utils.c',
'cxl-device-utils.c',
'cxl-mailbox-utils.c',
'cxl-host.c',
),
if_false: files(
'cxl-host-stubs.c',
))
softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('cxl-host-stubs.c'))

View file

@ -565,6 +565,12 @@ vhost_user_gpu_device_realize(DeviceState *qdev, Error **errp)
g->vhost_gpu_fd = -1;
}
static struct vhost_dev *vhost_user_gpu_get_vhost(VirtIODevice *vdev)
{
VhostUserGPU *g = VHOST_USER_GPU(vdev);
return &g->vhost->dev;
}
static Property vhost_user_gpu_properties[] = {
VIRTIO_GPU_BASE_PROPERTIES(VhostUserGPU, parent_obj.conf),
DEFINE_PROP_END_OF_LIST(),
@ -586,6 +592,7 @@ vhost_user_gpu_class_init(ObjectClass *klass, void *data)
vdc->guest_notifier_pending = vhost_user_gpu_guest_notifier_pending;
vdc->get_config = vhost_user_gpu_get_config;
vdc->set_config = vhost_user_gpu_set_config;
vdc->get_vhost = vhost_user_gpu_get_vhost;
device_class_set_props(dc, vhost_user_gpu_properties);
}

View file

@ -173,7 +173,7 @@ virtio_gpu_base_device_realize(DeviceState *qdev,
}
g->virtio_config.num_scanouts = cpu_to_le32(g->conf.max_outputs);
virtio_init(VIRTIO_DEVICE(g), "virtio-gpu", VIRTIO_ID_GPU,
virtio_init(VIRTIO_DEVICE(g), VIRTIO_ID_GPU,
sizeof(struct virtio_gpu_config));
if (virtio_gpu_virgl_enabled(g->conf)) {

View file

@ -28,6 +28,7 @@
#include "qemu/bitmap.h"
#include "qemu/error-report.h"
#include "hw/pci/pci.h"
#include "hw/cxl/cxl.h"
#include "hw/core/cpu.h"
#include "target/i386/cpu.h"
#include "hw/misc/pvpanic.h"
@ -66,6 +67,7 @@
#include "hw/acpi/aml-build.h"
#include "hw/acpi/utils.h"
#include "hw/acpi/pci.h"
#include "hw/acpi/cxl.h"
#include "qom/qom-qobject.h"
#include "hw/i386/amd_iommu.h"
@ -75,6 +77,7 @@
#include "hw/acpi/ipmi.h"
#include "hw/acpi/hmat.h"
#include "hw/acpi/viot.h"
#include "hw/acpi/cxl.h"
#include CONFIG_DEVICES
@ -1409,6 +1412,22 @@ static void build_smb0(Aml *table, I2CBus *smbus, int devnr, int func)
aml_append(table, scope);
}
static void build_acpi0017(Aml *table)
{
Aml *dev, *scope, *method;
scope = aml_scope("_SB");
dev = aml_device("CXLM");
aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0017")));
method = aml_method("_STA", 0, AML_NOTSERIALIZED);
aml_append(method, aml_return(aml_int(0x01)));
aml_append(dev, method);
aml_append(scope, dev);
aml_append(table, scope);
}
static void
build_dsdt(GArray *table_data, BIOSLinker *linker,
AcpiPmInfo *pm, AcpiMiscInfo *misc,
@ -1428,6 +1447,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
#ifdef CONFIG_TPM
TPMIf *tpm = tpm_find();
#endif
bool cxl_present = false;
int i;
VMBusBridge *vmbus_bridge = vmbus_bridge_find();
AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = x86ms->oem_id,
@ -1572,10 +1592,25 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
}
scope = aml_scope("\\_SB");
dev = aml_device("PC%.02X", bus_num);
if (pci_bus_is_cxl(bus)) {
dev = aml_device("CL%.02X", bus_num);
} else {
dev = aml_device("PC%.02X", bus_num);
}
aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
if (pci_bus_is_express(bus)) {
if (pci_bus_is_cxl(bus)) {
struct Aml *pkg = aml_package(2);
aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0016")));
aml_append(pkg, aml_eisaid("PNP0A08"));
aml_append(pkg, aml_eisaid("PNP0A03"));
aml_append(dev, aml_name_decl("_CID", pkg));
aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
build_cxl_osc_method(dev);
} else if (pci_bus_is_express(bus)) {
aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
@ -1595,9 +1630,23 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(dev, aml_name_decl("_CRS", crs));
aml_append(scope, dev);
aml_append(dsdt, scope);
/* Handle the ranges for the PXB expanders */
if (pci_bus_is_cxl(bus)) {
MemoryRegion *mr = &machine->cxl_devices_state->host_mr;
uint64_t base = mr->addr;
cxl_present = true;
crs_range_insert(crs_range_set.mem_ranges, base,
base + memory_region_size(mr) - 1);
}
}
}
if (cxl_present) {
build_acpi0017(dsdt);
}
/*
* At this point crs_range_set has all the ranges used by pci
* busses *other* than PCI0. These ranges will be excluded from
@ -2662,6 +2711,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
machine->nvdimms_state, machine->ram_slots,
x86ms->oem_id, x86ms->oem_table_id);
}
if (machine->cxl_devices_state->is_enabled) {
cxl_build_cedt(machine, table_offsets, tables_blob, tables->linker,
x86ms->oem_id, x86ms->oem_table_id);
}
acpi_add_table(table_offsets, tables_blob);
build_waet(tables_blob, tables->linker, x86ms->oem_id, x86ms->oem_table_id);

View file

@ -201,15 +201,18 @@ static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
/*
* AMDVi event structure
* 0:15 -> DeviceID
* 55:63 -> event type + miscellaneous info
* 63:127 -> related address
* 48:63 -> event type + miscellaneous info
* 64:127 -> related address
*/
static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
uint16_t info)
{
evt[0] = 0;
evt[1] = 0;
amdvi_setevent_bits(evt, devid, 0, 16);
amdvi_setevent_bits(evt, info, 55, 8);
amdvi_setevent_bits(evt, addr, 63, 64);
amdvi_setevent_bits(evt, info, 48, 16);
amdvi_setevent_bits(evt, addr, 64, 64);
}
/* log an error encountered during a page walk
*
@ -218,7 +221,7 @@ static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
hwaddr addr, uint16_t info)
{
uint64_t evt[4];
uint64_t evt[2];
info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
amdvi_encode_event(evt, devid, addr, info);
@ -234,7 +237,7 @@ static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
hwaddr devtab, uint16_t info)
{
uint64_t evt[4];
uint64_t evt[2];
info |= AMDVI_EVENT_DEV_TAB_HW_ERROR;
@ -248,7 +251,8 @@ static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
*/
static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
{
uint64_t evt[4], info = AMDVI_EVENT_COMMAND_HW_ERROR;
uint64_t evt[2];
uint16_t info = AMDVI_EVENT_COMMAND_HW_ERROR;
amdvi_encode_event(evt, 0, addr, info);
amdvi_log_event(s, evt);
@ -261,7 +265,7 @@ static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
hwaddr addr)
{
uint64_t evt[4];
uint64_t evt[2];
info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR;
amdvi_encode_event(evt, 0, addr, info);
@ -276,7 +280,7 @@ static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
hwaddr addr, uint16_t info)
{
uint64_t evt[4];
uint64_t evt[2];
info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY;
amdvi_encode_event(evt, devid, addr, info);
@ -288,7 +292,7 @@ static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
hwaddr addr, uint16_t info)
{
uint64_t evt[4];
uint64_t evt[2];
info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
amdvi_encode_event(evt, devid, addr, info);

View file

@ -181,6 +181,18 @@ static void vtd_update_scalable_state(IntelIOMMUState *s)
}
}
static void vtd_update_iq_dw(IntelIOMMUState *s)
{
uint64_t val = vtd_get_quad_raw(s, DMAR_IQA_REG);
if (s->ecap & VTD_ECAP_SMTS &&
val & VTD_IQA_DW_MASK) {
s->iq_dw = true;
} else {
s->iq_dw = false;
}
}
/* Whether the address space needs to notify new mappings */
static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as)
{
@ -469,11 +481,6 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
assert(fault < VTD_FR_MAX);
if (fault == VTD_FR_RESERVED_ERR) {
/* This is not a normal fault reason case. Drop it. */
return;
}
trace_vtd_dmar_fault(source_id, fault, addr, is_write);
if (fsts_reg & VTD_FSTS_PFO) {
@ -1025,6 +1032,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
uint32_t offset;
uint64_t slpte;
uint64_t access_right_check;
uint64_t xlat, size;
if (!vtd_iova_range_check(s, iova, ce, aw_bits)) {
error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")",
@ -1069,11 +1077,33 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
if (vtd_is_last_slpte(slpte, level)) {
*slptep = slpte;
*slpte_level = level;
return 0;
break;
}
addr = vtd_get_slpte_addr(slpte, aw_bits);
level--;
}
xlat = vtd_get_slpte_addr(*slptep, aw_bits);
size = ~vtd_slpt_level_page_mask(level) + 1;
/*
* From VT-d spec 3.14: Untranslated requests and translation
* requests that result in an address in the interrupt range will be
* blocked with condition code LGN.4 or SGN.8.
*/
if ((xlat > VTD_INTERRUPT_ADDR_LAST ||
xlat + size - 1 < VTD_INTERRUPT_ADDR_FIRST)) {
return 0;
} else {
error_report_once("%s: xlat address is in interrupt range "
"(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
"slpte=0x%" PRIx64 ", write=%d, "
"xlat=0x%" PRIx64 ", size=0x%" PRIx64 ")",
__func__, iova, level, slpte, is_write,
xlat, size);
return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR :
-VTD_FR_INTERRUPT_ADDR;
}
}
typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private);
@ -1633,11 +1663,12 @@ static const bool vtd_qualified_faults[] = {
[VTD_FR_PAGING_ENTRY_INV] = true,
[VTD_FR_ROOT_TABLE_INV] = false,
[VTD_FR_CONTEXT_TABLE_INV] = false,
[VTD_FR_INTERRUPT_ADDR] = true,
[VTD_FR_ROOT_ENTRY_RSVD] = false,
[VTD_FR_PAGING_ENTRY_RSVD] = true,
[VTD_FR_CONTEXT_ENTRY_TT] = true,
[VTD_FR_PASID_TABLE_INV] = false,
[VTD_FR_RESERVED_ERR] = false,
[VTD_FR_SM_INTERRUPT_ADDR] = true,
[VTD_FR_MAX] = false,
};
@ -2209,12 +2240,13 @@ static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en)
/* Handle write to Global Command Register */
static void vtd_handle_gcmd_write(IntelIOMMUState *s)
{
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG);
uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG);
uint32_t changed = status ^ val;
trace_vtd_reg_write_gcmd(status, val);
if (changed & VTD_GCMD_TE) {
if ((changed & VTD_GCMD_TE) && s->dma_translation) {
/* Translation enable/disable */
vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
}
@ -2230,7 +2262,8 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
/* Set/update the interrupt remapping root-table pointer */
vtd_handle_gcmd_sirtp(s);
}
if (changed & VTD_GCMD_IRE) {
if ((changed & VTD_GCMD_IRE) &&
x86_iommu_ir_supported(x86_iommu)) {
/* Interrupt remap enable/disable */
vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE);
}
@ -2883,12 +2916,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
} else {
vtd_set_quad(s, addr, val);
}
if (s->ecap & VTD_ECAP_SMTS &&
val & VTD_IQA_DW_MASK) {
s->iq_dw = true;
} else {
s->iq_dw = false;
}
vtd_update_iq_dw(s);
break;
case DMAR_IQA_REG_HI:
@ -3032,7 +3060,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
/* TODO: add support for VFIO and vhost users */
if (s->snoop_control) {
error_setg_errno(errp, -ENOTSUP,
error_setg_errno(errp, ENOTSUP,
"Snoop Control with vhost or VFIO is not supported");
return -ENOTSUP;
}
@ -3052,13 +3080,6 @@ static int vtd_post_load(void *opaque, int version_id)
{
IntelIOMMUState *iommu = opaque;
/*
* Memory regions are dynamically turned on/off depending on
* context entry configurations from the guest. After migration,
* we need to make sure the memory regions are still correct.
*/
vtd_switch_address_space_all(iommu);
/*
* We don't need to migrate the root_scalable because we can
* simply do the calculation after the loading is complete. We
@ -3068,6 +3089,15 @@ static int vtd_post_load(void *opaque, int version_id)
*/
vtd_update_scalable_state(iommu);
vtd_update_iq_dw(iommu);
/*
* Memory regions are dynamically turned on/off depending on
* context entry configurations from the guest. After migration,
* we need to make sure the memory regions are still correct.
*/
vtd_switch_address_space_all(iommu);
return 0;
}
@ -3122,6 +3152,7 @@ static Property vtd_properties[] = {
DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true),
DEFINE_PROP_END_OF_LIST(),
};
@ -3627,12 +3658,17 @@ static void vtd_init(IntelIOMMUState *s)
s->next_frcd_reg = 0;
s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
VTD_CAP_MGAW(s->aw_bits);
if (s->dma_drain) {
s->cap |= VTD_CAP_DRAIN;
}
if (s->aw_bits == VTD_HOST_AW_48BIT) {
s->cap |= VTD_CAP_SAGAW_48bit;
if (s->dma_translation) {
if (s->aw_bits >= VTD_HOST_AW_39BIT) {
s->cap |= VTD_CAP_SAGAW_39bit;
}
if (s->aw_bits >= VTD_HOST_AW_48BIT) {
s->cap |= VTD_CAP_SAGAW_48bit;
}
}
s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
@ -3778,15 +3814,10 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
}
if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
if (!kvm_irqchip_in_kernel()) {
if (!kvm_irqchip_is_split()) {
error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
return false;
}
if (!kvm_enable_x2apic()) {
error_setg(errp, "eim=on requires support on the KVM side"
"(X2APIC_API, first shipped in v4.7)");
return false;
}
}
/* Currently only address widths supported are 39 and 48 bits */

View file

@ -289,6 +289,8 @@ typedef enum VTDFaultReason {
* context-entry.
*/
VTD_FR_CONTEXT_ENTRY_TT,
/* Output address in the interrupt address range */
VTD_FR_INTERRUPT_ADDR = 0xE,
/* Interrupt remapping transition faults */
VTD_FR_IR_REQ_RSVD = 0x20, /* One or more IR request reserved
@ -304,11 +306,8 @@ typedef enum VTDFaultReason {
VTD_FR_PASID_TABLE_INV = 0x58, /*Invalid PASID table entry */
/* This is not a normal fault reason. We use this to indicate some faults
* that are not referenced by the VT-d specification.
* Fault event with such reason should not be recorded.
*/
VTD_FR_RESERVED_ERR,
/* Output address in the interrupt address range for scalable mode */
VTD_FR_SM_INTERRUPT_ADDR = 0x87,
VTD_FR_MAX, /* Guard */
} VTDFaultReason;

View file

@ -247,7 +247,7 @@ static void microvm_devices_init(MicrovmMachineState *mms)
x86ms->pci_irq_mask = 0;
}
if (mms->pic == ON_OFF_AUTO_ON || mms->pic == ON_OFF_AUTO_AUTO) {
if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
qemu_irq *i8259;
i8259 = i8259_init(isa_bus, x86_allocate_cpu_irq());
@ -257,7 +257,7 @@ static void microvm_devices_init(MicrovmMachineState *mms)
g_free(i8259);
}
if (mms->pit == ON_OFF_AUTO_ON || mms->pit == ON_OFF_AUTO_AUTO) {
if (x86ms->pit == ON_OFF_AUTO_ON || x86ms->pit == ON_OFF_AUTO_AUTO) {
if (kvm_pit_in_kernel()) {
kvm_pit_init(isa_bus, 0x40);
} else {
@ -491,40 +491,6 @@ static void microvm_machine_reset(MachineState *machine)
}
}
static void microvm_machine_get_pic(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
MicrovmMachineState *mms = MICROVM_MACHINE(obj);
OnOffAuto pic = mms->pic;
visit_type_OnOffAuto(v, name, &pic, errp);
}
static void microvm_machine_set_pic(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
MicrovmMachineState *mms = MICROVM_MACHINE(obj);
visit_type_OnOffAuto(v, name, &mms->pic, errp);
}
static void microvm_machine_get_pit(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
MicrovmMachineState *mms = MICROVM_MACHINE(obj);
OnOffAuto pit = mms->pit;
visit_type_OnOffAuto(v, name, &pit, errp);
}
static void microvm_machine_set_pit(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
MicrovmMachineState *mms = MICROVM_MACHINE(obj);
visit_type_OnOffAuto(v, name, &mms->pit, errp);
}
static void microvm_machine_get_rtc(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
@ -649,8 +615,6 @@ static void microvm_machine_initfn(Object *obj)
MicrovmMachineState *mms = MICROVM_MACHINE(obj);
/* Configuration */
mms->pic = ON_OFF_AUTO_AUTO;
mms->pit = ON_OFF_AUTO_AUTO;
mms->rtc = ON_OFF_AUTO_AUTO;
mms->pcie = ON_OFF_AUTO_AUTO;
mms->ioapic2 = ON_OFF_AUTO_AUTO;
@ -702,20 +666,6 @@ static void microvm_class_init(ObjectClass *oc, void *data)
x86mc->fwcfg_dma_enabled = true;
object_class_property_add(oc, MICROVM_MACHINE_PIC, "OnOffAuto",
microvm_machine_get_pic,
microvm_machine_set_pic,
NULL, NULL);
object_class_property_set_description(oc, MICROVM_MACHINE_PIC,
"Enable i8259 PIC");
object_class_property_add(oc, MICROVM_MACHINE_PIT, "OnOffAuto",
microvm_machine_get_pit,
microvm_machine_set_pit,
NULL, NULL);
object_class_property_set_description(oc, MICROVM_MACHINE_PIT,
"Enable i8254 PIT");
object_class_property_add(oc, MICROVM_MACHINE_RTC, "OnOffAuto",
microvm_machine_get_rtc,
microvm_machine_set_rtc,

View file

@ -75,6 +75,7 @@
#include "acpi-build.h"
#include "hw/mem/pc-dimm.h"
#include "hw/mem/nvdimm.h"
#include "hw/cxl/cxl.h"
#include "qapi/error.h"
#include "qapi/qapi-visit-common.h"
#include "qapi/qapi-visit-machine.h"
@ -743,14 +744,6 @@ void pc_machine_done(Notifier *notifier, void *data)
/* update FW_CFG_NB_CPUS to account for -device added CPUs */
fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
}
if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
!kvm_irqchip_in_kernel()) {
error_report("current -smp configuration requires kernel "
"irqchip support.");
exit(EXIT_FAILURE);
}
}
void pc_guest_info_init(PCMachineState *pcms)
@ -816,6 +809,7 @@ void pc_memory_init(PCMachineState *pcms,
MachineClass *mc = MACHINE_GET_CLASS(machine);
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
X86MachineState *x86ms = X86_MACHINE(pcms);
hwaddr cxl_base, cxl_resv_end = 0;
assert(machine->ram_size == x86ms->below_4g_mem_size +
x86ms->above_4g_mem_size);
@ -905,6 +899,44 @@ void pc_memory_init(PCMachineState *pcms,
&machine->device_memory->mr);
}
if (machine->cxl_devices_state->is_enabled) {
MemoryRegion *mr = &machine->cxl_devices_state->host_mr;
hwaddr cxl_size = MiB;
if (pcmc->has_reserved_memory && machine->device_memory->base) {
cxl_base = machine->device_memory->base;
if (!pcmc->broken_reserved_end) {
cxl_base += memory_region_size(&machine->device_memory->mr);
}
} else if (pcms->sgx_epc.size != 0) {
cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc);
} else {
cxl_base = 0x100000000ULL + x86ms->above_4g_mem_size;
}
e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
memory_region_add_subregion(system_memory, cxl_base, mr);
cxl_resv_end = cxl_base + cxl_size;
if (machine->cxl_devices_state->fixed_windows) {
hwaddr cxl_fmw_base;
GList *it;
cxl_fmw_base = ROUND_UP(cxl_base + cxl_size, 256 * MiB);
for (it = machine->cxl_devices_state->fixed_windows; it; it = it->next) {
CXLFixedWindow *fw = it->data;
fw->base = cxl_fmw_base;
memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw,
"cxl-fixed-memory-region", fw->size);
memory_region_add_subregion(system_memory, fw->base, &fw->mr);
e820_add_entry(fw->base, fw->size, E820_RESERVED);
cxl_fmw_base += fw->size;
cxl_resv_end = cxl_fmw_base;
}
}
}
/* Initialize PC system firmware */
pc_system_firmware_init(pcms, rom_memory);
@ -932,6 +964,10 @@ void pc_memory_init(PCMachineState *pcms,
if (!pcmc->broken_reserved_end) {
res_mem_end += memory_region_size(&machine->device_memory->mr);
}
if (machine->cxl_devices_state->is_enabled) {
res_mem_end = cxl_resv_end;
}
*val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB));
fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
}
@ -965,7 +1001,17 @@ uint64_t pc_pci_hole64_start(void)
X86MachineState *x86ms = X86_MACHINE(pcms);
uint64_t hole64_start = 0;
if (pcmc->has_reserved_memory && ms->device_memory->base) {
if (ms->cxl_devices_state->host_mr.addr) {
hole64_start = ms->cxl_devices_state->host_mr.addr +
memory_region_size(&ms->cxl_devices_state->host_mr);
if (ms->cxl_devices_state->fixed_windows) {
GList *it;
for (it = ms->cxl_devices_state->fixed_windows; it; it = it->next) {
CXLFixedWindow *fw = it->data;
hole64_start = fw->mr.addr + memory_region_size(&fw->mr);
}
}
} else if (pcmc->has_reserved_memory && ms->device_memory->base) {
hole64_start = ms->device_memory->base;
if (!pcmc->broken_reserved_end) {
hole64_start += memory_region_size(&ms->device_memory->mr);
@ -1077,6 +1123,7 @@ void pc_basic_device_init(struct PCMachineState *pcms,
ISADevice *pit = NULL;
MemoryRegion *ioport80_io = g_new(MemoryRegion, 1);
MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1);
X86MachineState *x86ms = X86_MACHINE(pcms);
memory_region_init_io(ioport80_io, NULL, &ioport80_io_ops, NULL, "ioport80", 1);
memory_region_add_subregion(isa_bus->address_space_io, 0x80, ioport80_io);
@ -1121,7 +1168,8 @@ void pc_basic_device_init(struct PCMachineState *pcms,
qemu_register_boot_set(pc_boot_set, *rtc_state);
if (!xen_enabled() && pcms->pit_enabled) {
if (!xen_enabled() &&
(x86ms->pit == ON_OFF_AUTO_AUTO || x86ms->pit == ON_OFF_AUTO_ON)) {
if (kvm_pit_in_kernel()) {
pit = kvm_pit_init(isa_bus, 0x40);
} else {
@ -1491,20 +1539,6 @@ static void pc_machine_set_sata(Object *obj, bool value, Error **errp)
pcms->sata_enabled = value;
}
static bool pc_machine_get_pit(Object *obj, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
return pcms->pit_enabled;
}
static void pc_machine_set_pit(Object *obj, bool value, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
pcms->pit_enabled = value;
}
static bool pc_machine_get_hpet(Object *obj, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
@ -1661,7 +1695,6 @@ static void pc_machine_initfn(Object *obj)
pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
pcms->smbus_enabled = true;
pcms->sata_enabled = true;
pcms->pit_enabled = true;
pcms->i8042_enabled = true;
pcms->max_fw_size = 8 * MiB;
#ifdef CONFIG_HPET
@ -1761,6 +1794,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE;
mc->nvdimm_supported = true;
mc->smp_props.dies_supported = true;
mc->cxl_supported = true;
mc->default_ram_id = "pc.ram";
object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
@ -1789,11 +1823,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
object_class_property_set_description(oc, PC_MACHINE_SATA,
"Enable/disable Serial ATA bus");
object_class_property_add_bool(oc, PC_MACHINE_PIT,
pc_machine_get_pit, pc_machine_set_pit);
object_class_property_set_description(oc, PC_MACHINE_PIT,
"Enable/disable Intel 8254 programmable interval timer emulation");
object_class_property_add_bool(oc, "hpet",
pc_machine_get_hpet, pc_machine_set_hpet);
object_class_property_set_description(oc, "hpet",

View file

@ -218,7 +218,9 @@ static void pc_init1(MachineState *machine,
}
isa_bus_irqs(isa_bus, x86ms->gsi);
pc_i8259_create(isa_bus, gsi_state->i8259_irq);
if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
pc_i8259_create(isa_bus, gsi_state->i8259_irq);
}
if (pcmc->pci_enabled) {
ioapic_init_gsi(gsi_state, "i440fx");

View file

@ -265,7 +265,9 @@ static void pc_q35_init(MachineState *machine)
pci_bus_set_route_irq_fn(host_bus, ich9_route_intx_pin_to_irq);
isa_bus = ich9_lpc->isa_bus;
pc_i8259_create(isa_bus, gsi_state->i8259_irq);
if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
pc_i8259_create(isa_bus, gsi_state->i8259_irq);
}
if (pcmc->pci_enabled) {
ioapic_init_gsi(gsi_state, "q35");

View file

@ -38,6 +38,7 @@
#include "sysemu/replay.h"
#include "sysemu/sysemu.h"
#include "sysemu/cpu-timers.h"
#include "sysemu/xen.h"
#include "trace.h"
#include "hw/i386/x86.h"
@ -122,6 +123,21 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
*/
x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms,
ms->smp.max_cpus - 1) + 1;
/*
* Can we support APIC ID 255 or higher?
*
* Under Xen: yes.
* With userspace emulated lapic: no
* With KVM's in-kernel lapic: only if X2APIC API is enabled.
*/
if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
(!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
error_report("current -smp configuration requires kernel "
"irqchip and X2APIC API support.");
exit(EXIT_FAILURE);
}
possible_cpus = mc->possible_cpu_arch_ids(ms);
for (i = 0; i < ms->smp.cpus; i++) {
x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
@ -1228,6 +1244,40 @@ static void x86_machine_set_acpi(Object *obj, Visitor *v, const char *name,
visit_type_OnOffAuto(v, name, &x86ms->acpi, errp);
}
static void x86_machine_get_pit(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
X86MachineState *x86ms = X86_MACHINE(obj);
OnOffAuto pit = x86ms->pit;
visit_type_OnOffAuto(v, name, &pit, errp);
}
static void x86_machine_set_pit(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
X86MachineState *x86ms = X86_MACHINE(obj);;
visit_type_OnOffAuto(v, name, &x86ms->pit, errp);
}
static void x86_machine_get_pic(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
X86MachineState *x86ms = X86_MACHINE(obj);
OnOffAuto pic = x86ms->pic;
visit_type_OnOffAuto(v, name, &pic, errp);
}
static void x86_machine_set_pic(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
X86MachineState *x86ms = X86_MACHINE(obj);
visit_type_OnOffAuto(v, name, &x86ms->pic, errp);
}
static char *x86_machine_get_oem_id(Object *obj, Error **errp)
{
X86MachineState *x86ms = X86_MACHINE(obj);
@ -1317,6 +1367,8 @@ static void x86_machine_initfn(Object *obj)
x86ms->smm = ON_OFF_AUTO_AUTO;
x86ms->acpi = ON_OFF_AUTO_AUTO;
x86ms->pit = ON_OFF_AUTO_AUTO;
x86ms->pic = ON_OFF_AUTO_AUTO;
x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS;
x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
@ -1348,6 +1400,20 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
object_class_property_set_description(oc, X86_MACHINE_ACPI,
"Enable ACPI");
object_class_property_add(oc, X86_MACHINE_PIT, "OnOffAuto",
x86_machine_get_pit,
x86_machine_set_pit,
NULL, NULL);
object_class_property_set_description(oc, X86_MACHINE_PIT,
"Enable i8254 PIT");
object_class_property_add(oc, X86_MACHINE_PIC, "OnOffAuto",
x86_machine_get_pic,
x86_machine_set_pic,
NULL, NULL);
object_class_property_set_description(oc, X86_MACHINE_PIC,
"Enable i8259 PIC");
object_class_property_add_str(oc, X86_MACHINE_OEM_ID,
x86_machine_get_oem_id,
x86_machine_set_oem_id);

View file

@ -78,6 +78,12 @@ static void vhost_input_set_config(VirtIODevice *vdev,
virtio_notify_config(vdev);
}
static struct vhost_dev *vhost_input_get_vhost(VirtIODevice *vdev)
{
VHostUserInput *vhi = VHOST_USER_INPUT(vdev);
return &vhi->vhost->dev;
}
static const VMStateDescription vmstate_vhost_input = {
.name = "vhost-user-input",
.unmigratable = 1,
@ -92,6 +98,7 @@ static void vhost_input_class_init(ObjectClass *klass, void *data)
dc->vmsd = &vmstate_vhost_input;
vdc->get_config = vhost_input_get_config;
vdc->set_config = vhost_input_set_config;
vdc->get_vhost = vhost_input_get_vhost;
vic->realize = vhost_input_realize;
vic->change_active = vhost_input_change_active;
}

View file

@ -257,8 +257,7 @@ static void virtio_input_device_realize(DeviceState *dev, Error **errp)
vinput->cfg_size += 8;
assert(vinput->cfg_size <= sizeof(virtio_input_config));
virtio_init(vdev, "virtio-input", VIRTIO_ID_INPUT,
vinput->cfg_size);
virtio_init(vdev, VIRTIO_ID_INPUT, vinput->cfg_size);
vinput->evt = virtio_add_queue(vdev, 64, virtio_input_handle_evt);
vinput->sts = virtio_add_queue(vdev, 64, virtio_input_handle_sts);
}

View file

@ -11,3 +11,8 @@ config NVDIMM
config SPARSE_MEM
bool
config CXL_MEM_DEVICE
bool
default y if CXL
select MEM_DEVICE

371
hw/mem/cxl_type3.c Normal file
View file

@ -0,0 +1,371 @@
#include "qemu/osdep.h"
#include "qemu/units.h"
#include "qemu/error-report.h"
#include "hw/mem/memory-device.h"
#include "hw/mem/pc-dimm.h"
#include "hw/pci/pci.h"
#include "hw/qdev-properties.h"
#include "qapi/error.h"
#include "qemu/log.h"
#include "qemu/module.h"
#include "qemu/pmem.h"
#include "qemu/range.h"
#include "qemu/rcu.h"
#include "sysemu/hostmem.h"
#include "hw/cxl/cxl.h"
static void build_dvsecs(CXLType3Dev *ct3d)
{
CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
uint8_t *dvsec;
dvsec = (uint8_t *)&(CXLDVSECDevice){
.cap = 0x1e,
.ctrl = 0x2,
.status2 = 0x2,
.range1_size_hi = ct3d->hostmem->size >> 32,
.range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
(ct3d->hostmem->size & 0xF0000000),
.range1_base_hi = 0,
.range1_base_lo = 0,
};
cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
PCIE_CXL_DEVICE_DVSEC_LENGTH,
PCIE_CXL_DEVICE_DVSEC,
PCIE_CXL2_DEVICE_DVSEC_REVID, dvsec);
dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){
.rsvd = 0,
.reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX,
.reg0_base_hi = 0,
.reg1_base_lo = RBI_CXL_DEVICE_REG | CXL_DEVICE_REG_BAR_IDX,
.reg1_base_hi = 0,
};
cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
REG_LOC_DVSEC_REVID, dvsec);
dvsec = (uint8_t *)&(CXLDVSECDeviceGPF){
.phase2_duration = 0x603, /* 3 seconds */
.phase2_power = 0x33, /* 0x33 miliwatts */
};
cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
GPF_DEVICE_DVSEC_LENGTH, GPF_PORT_DVSEC,
GPF_DEVICE_DVSEC_REVID, dvsec);
}
static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
{
ComponentRegisters *cregs = &ct3d->cxl_cstate.crb;
uint32_t *cache_mem = cregs->cache_mem_registers;
assert(which == 0);
/* TODO: Sanity checks that the decoder is possible */
ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMIT, 0);
ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, ERR, 0);
ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
}
static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
unsigned size)
{
CXLComponentState *cxl_cstate = opaque;
ComponentRegisters *cregs = &cxl_cstate->crb;
CXLType3Dev *ct3d = container_of(cxl_cstate, CXLType3Dev, cxl_cstate);
uint32_t *cache_mem = cregs->cache_mem_registers;
bool should_commit = false;
int which_hdm = -1;
assert(size == 4);
g_assert(offset < CXL2_COMPONENT_CM_REGION_SIZE);
switch (offset) {
case A_CXL_HDM_DECODER0_CTRL:
should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
which_hdm = 0;
break;
default:
break;
}
stl_le_p((uint8_t *)cache_mem + offset, value);
if (should_commit) {
hdm_decoder_commit(ct3d, which_hdm);
}
}
static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
{
DeviceState *ds = DEVICE(ct3d);
MemoryRegion *mr;
char *name;
if (!ct3d->hostmem) {
error_setg(errp, "memdev property must be set");
return false;
}
mr = host_memory_backend_get_memory(ct3d->hostmem);
if (!mr) {
error_setg(errp, "memdev property must be set");
return false;
}
memory_region_set_nonvolatile(mr, true);
memory_region_set_enabled(mr, true);
host_memory_backend_set_mapped(ct3d->hostmem, true);
if (ds->id) {
name = g_strdup_printf("cxl-type3-dpa-space:%s", ds->id);
} else {
name = g_strdup("cxl-type3-dpa-space");
}
address_space_init(&ct3d->hostmem_as, mr, name);
g_free(name);
ct3d->cxl_dstate.pmem_size = ct3d->hostmem->size;
if (!ct3d->lsa) {
error_setg(errp, "lsa property must be set");
return false;
}
return true;
}
static void ct3_realize(PCIDevice *pci_dev, Error **errp)
{
CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
ComponentRegisters *regs = &cxl_cstate->crb;
MemoryRegion *mr = &regs->component_registers;
uint8_t *pci_conf = pci_dev->config;
if (!cxl_setup_memory(ct3d, errp)) {
return;
}
pci_config_set_prog_interface(pci_conf, 0x10);
pci_config_set_class(pci_conf, PCI_CLASS_MEMORY_CXL);
pcie_endpoint_cap_init(pci_dev, 0x80);
cxl_cstate->dvsec_offset = 0x100;
ct3d->cxl_cstate.pdev = pci_dev;
build_dvsecs(ct3d);
regs->special_ops = g_new0(MemoryRegionOps, 1);
regs->special_ops->write = ct3d_reg_write;
cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate,
TYPE_CXL_TYPE3);
pci_register_bar(
pci_dev, CXL_COMPONENT_REG_BAR_IDX,
PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr);
cxl_device_register_block_init(OBJECT(pci_dev), &ct3d->cxl_dstate);
pci_register_bar(pci_dev, CXL_DEVICE_REG_BAR_IDX,
PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_TYPE_64,
&ct3d->cxl_dstate.device_registers);
}
static void ct3_exit(PCIDevice *pci_dev)
{
CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
ComponentRegisters *regs = &cxl_cstate->crb;
g_free(regs->special_ops);
address_space_destroy(&ct3d->hostmem_as);
}
/* TODO: Support multiple HDM decoders and DPA skip */
static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
{
uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
uint64_t decoder_base, decoder_size, hpa_offset;
uint32_t hdm0_ctrl;
int ig, iw;
decoder_base = (((uint64_t)cache_mem[R_CXL_HDM_DECODER0_BASE_HI] << 32) |
cache_mem[R_CXL_HDM_DECODER0_BASE_LO]);
if ((uint64_t)host_addr < decoder_base) {
return false;
}
hpa_offset = (uint64_t)host_addr - decoder_base;
decoder_size = ((uint64_t)cache_mem[R_CXL_HDM_DECODER0_SIZE_HI] << 32) |
cache_mem[R_CXL_HDM_DECODER0_SIZE_LO];
if (hpa_offset >= decoder_size) {
return false;
}
hdm0_ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL];
iw = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IW);
ig = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IG);
*dpa = (MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset) >> iw);
return true;
}
MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
unsigned size, MemTxAttrs attrs)
{
CXLType3Dev *ct3d = CXL_TYPE3(d);
uint64_t dpa_offset;
MemoryRegion *mr;
/* TODO support volatile region */
mr = host_memory_backend_get_memory(ct3d->hostmem);
if (!mr) {
return MEMTX_ERROR;
}
if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) {
return MEMTX_ERROR;
}
if (dpa_offset > int128_get64(mr->size)) {
return MEMTX_ERROR;
}
return address_space_read(&ct3d->hostmem_as, dpa_offset, attrs, data, size);
}
MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
unsigned size, MemTxAttrs attrs)
{
CXLType3Dev *ct3d = CXL_TYPE3(d);
uint64_t dpa_offset;
MemoryRegion *mr;
mr = host_memory_backend_get_memory(ct3d->hostmem);
if (!mr) {
return MEMTX_OK;
}
if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) {
return MEMTX_OK;
}
if (dpa_offset > int128_get64(mr->size)) {
return MEMTX_OK;
}
return address_space_write(&ct3d->hostmem_as, dpa_offset, attrs,
&data, size);
}
static void ct3d_reset(DeviceState *dev)
{
CXLType3Dev *ct3d = CXL_TYPE3(dev);
uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask;
cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
cxl_device_register_init_common(&ct3d->cxl_dstate);
}
static Property ct3_props[] = {
DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND,
HostMemoryBackend *),
DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND,
HostMemoryBackend *),
DEFINE_PROP_END_OF_LIST(),
};
static uint64_t get_lsa_size(CXLType3Dev *ct3d)
{
MemoryRegion *mr;
mr = host_memory_backend_get_memory(ct3d->lsa);
return memory_region_size(mr);
}
static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
uint64_t offset)
{
assert(offset + size <= memory_region_size(mr));
assert(offset + size > offset);
}
static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
uint64_t offset)
{
MemoryRegion *mr;
void *lsa;
mr = host_memory_backend_get_memory(ct3d->lsa);
validate_lsa_access(mr, size, offset);
lsa = memory_region_get_ram_ptr(mr) + offset;
memcpy(buf, lsa, size);
return size;
}
static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
uint64_t offset)
{
MemoryRegion *mr;
void *lsa;
mr = host_memory_backend_get_memory(ct3d->lsa);
validate_lsa_access(mr, size, offset);
lsa = memory_region_get_ram_ptr(mr) + offset;
memcpy(lsa, buf, size);
memory_region_set_dirty(mr, offset, size);
/*
* Just like the PMEM, if the guest is not allowed to exit gracefully, label
* updates will get lost.
*/
}
static void ct3_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
CXLType3Class *cvc = CXL_TYPE3_CLASS(oc);
pc->realize = ct3_realize;
pc->exit = ct3_exit;
pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
pc->vendor_id = PCI_VENDOR_ID_INTEL;
pc->device_id = 0xd93; /* LVF for now */
pc->revision = 1;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
dc->desc = "CXL PMEM Device (Type 3)";
dc->reset = ct3d_reset;
device_class_set_props(dc, ct3_props);
cvc->get_lsa_size = get_lsa_size;
cvc->get_lsa = get_lsa;
cvc->set_lsa = set_lsa;
}
static const TypeInfo ct3d_info = {
.name = TYPE_CXL_TYPE3,
.parent = TYPE_PCI_DEVICE,
.class_size = sizeof(struct CXLType3Class),
.class_init = ct3_class_init,
.instance_size = sizeof(CXLType3Dev),
.interfaces = (InterfaceInfo[]) {
{ INTERFACE_CXL_DEVICE },
{ INTERFACE_PCIE_DEVICE },
{}
},
};
static void ct3d_registers(void)
{
type_register_static(&ct3d_info);
}
type_init(ct3d_registers);

View file

@ -3,6 +3,7 @@ mem_ss.add(files('memory-device.c'))
mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c'))
softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss)

View file

@ -6,6 +6,7 @@ subdir('block')
subdir('char')
subdir('core')
subdir('cpu')
subdir('cxl')
subdir('display')
subdir('dma')
subdir('gpio')

View file

@ -201,7 +201,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
}
if (~net->dev.features & net->dev.backend_features) {
fprintf(stderr, "vhost lacks feature mask %" PRIu64
fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
" for backend\n",
(uint64_t)(~net->dev.features & net->dev.backend_features));
goto fail;
@ -213,7 +213,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
features = vhost_user_get_acked_features(net->nc);
if (~net->dev.features & features) {
fprintf(stderr, "vhost lacks feature mask %" PRIu64
fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
" for backend\n",
(uint64_t)(~net->dev.features & features));
goto fail;
@ -381,6 +381,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
r = vhost_set_vring_enable(peer, peer->vring_enable);
if (r < 0) {
vhost_net_stop_one(get_vhost_net(peer), dev);
goto err_start;
}
}
@ -390,7 +391,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
err_start:
while (--i >= 0) {
peer = qemu_get_peer(ncs , i);
peer = qemu_get_peer(ncs, i < data_queue_pairs ?
i : n->max_queue_pairs);
vhost_net_stop_one(get_vhost_net(peer), dev);
}
e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);

View file

@ -14,6 +14,7 @@
#include "qemu/osdep.h"
#include "qemu/atomic.h"
#include "qemu/iov.h"
#include "qemu/log.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
#include "hw/virtio/virtio.h"
@ -245,7 +246,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
VirtIODevice *vdev = VIRTIO_DEVICE(n);
NetClientState *nc = qemu_get_queue(n->nic);
int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
int cvq = n->max_ncs - n->max_queue_pairs;
int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
n->max_ncs - n->max_queue_pairs : 0;
if (!get_vhost_net(nc->peer)) {
return;
@ -1379,6 +1381,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
{
VirtIODevice *vdev = VIRTIO_DEVICE(n);
uint16_t queue_pairs;
NetClientState *nc = qemu_get_queue(n->nic);
virtio_net_disable_rss(n);
if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
@ -1410,6 +1413,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
return VIRTIO_NET_ERR;
}
/* Avoid changing the number of queue_pairs for vdpa device in
* userspace handler. A future fix is needed to handle the mq
* change in userspace handler with vhost-vdpa. Let's disable
* the mq handling from userspace for now and only allow get
* done through the kernel. Ripples may be seen when falling
* back to userspace, but without doing it qemu process would
* crash on a recursive entry to virtio_net_set_status().
*/
if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
return VIRTIO_NET_ERR;
}
n->curr_queue_pairs = queue_pairs;
/* stop the backend before changing the number of queue_pairs to avoid handling a
* disabled queue */
@ -1443,7 +1458,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
}
iov_cnt = elem->out_num;
iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
iov2 = iov = g_memdup2(elem->out_sg,
sizeof(struct iovec) * elem->out_num);
s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
if (s != sizeof(ctrl)) {
@ -3170,8 +3186,22 @@ static NetClientInfo net_virtio_info = {
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
{
VirtIONet *n = VIRTIO_NET(vdev);
NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
NetClientState *nc;
assert(n->vhost_started);
if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
/* Must guard against invalid features and bogus queue index
* from being set by malicious guest, or penetrated through
* buggy migration stream.
*/
if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: bogus vq index ignored\n", __func__);
return false;
}
nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
} else {
nc = qemu_get_subqueue(n->nic, vq2q(idx));
}
return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
}
@ -3179,8 +3209,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
bool mask)
{
VirtIONet *n = VIRTIO_NET(vdev);
NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
NetClientState *nc;
assert(n->vhost_started);
if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
/* Must guard against invalid features and bogus queue index
* from being set by malicious guest, or penetrated through
* buggy migration stream.
*/
if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: bogus vq index ignored\n", __func__);
return;
}
nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
} else {
nc = qemu_get_subqueue(n->nic, vq2q(idx));
}
vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
vdev, idx, mask);
}
@ -3392,7 +3436,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
}
virtio_net_set_config_size(n, n->host_features);
virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
/*
* We set a lower limit on RX queue size to what it always was.
@ -3619,6 +3663,14 @@ static bool dev_unplug_pending(void *opaque)
return vdc->primary_unplug_pending(dev);
}
static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
{
VirtIONet *n = VIRTIO_NET(vdev);
NetClientState *nc = qemu_get_queue(n->nic);
struct vhost_net *net = get_vhost_net(nc->peer);
return &net->dev;
}
static const VMStateDescription vmstate_virtio_net = {
.name = "virtio-net",
.minimum_version_id = VIRTIO_NET_VM_VERSION,
@ -3721,6 +3773,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data)
vdc->post_load = virtio_net_post_load_virtio;
vdc->vmsd = &vmstate_virtio_net_device;
vdc->primary_unplug_pending = primary_unplug_pending;
vdc->get_vhost = virtio_net_get_vhost;
}
static const TypeInfo virtio_net_info = {

View file

@ -27,3 +27,8 @@ config DEC_PCI
config SIMBA
bool
config CXL
bool
default y if PCI_EXPRESS && PXB
depends on PCI_EXPRESS && MSI_NONBROKEN && PXB

View file

@ -0,0 +1,236 @@
/*
* CXL 2.0 Root Port Implementation
*
* Copyright(C) 2020 Intel Corporation.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>
*/
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "qemu/range.h"
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pcie_port.h"
#include "hw/qdev-properties.h"
#include "hw/sysbus.h"
#include "qapi/error.h"
#include "hw/cxl/cxl.h"
#define CXL_ROOT_PORT_DID 0x7075
/* Copied from the gen root port which we derive */
#define GEN_PCIE_ROOT_PORT_AER_OFFSET 0x100
#define GEN_PCIE_ROOT_PORT_ACS_OFFSET \
(GEN_PCIE_ROOT_PORT_AER_OFFSET + PCI_ERR_SIZEOF)
#define CXL_ROOT_PORT_DVSEC_OFFSET \
(GEN_PCIE_ROOT_PORT_ACS_OFFSET + PCI_ACS_SIZEOF)
typedef struct CXLRootPort {
/*< private >*/
PCIESlot parent_obj;
CXLComponentState cxl_cstate;
PCIResReserve res_reserve;
} CXLRootPort;
#define TYPE_CXL_ROOT_PORT "cxl-rp"
DECLARE_INSTANCE_CHECKER(CXLRootPort, CXL_ROOT_PORT, TYPE_CXL_ROOT_PORT)
static void latch_registers(CXLRootPort *crp)
{
uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers;
uint32_t *write_msk = crp->cxl_cstate.crb.cache_mem_regs_write_mask;
cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
}
static void build_dvsecs(CXLComponentState *cxl)
{
uint8_t *dvsec;
dvsec = (uint8_t *)&(CXLDVSECPortExtensions){ 0 };
cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT,
EXTENSIONS_PORT_DVSEC_LENGTH,
EXTENSIONS_PORT_DVSEC,
EXTENSIONS_PORT_DVSEC_REVID, dvsec);
dvsec = (uint8_t *)&(CXLDVSECPortGPF){
.rsvd = 0,
.phase1_ctrl = 1, /* 1μs timeout */
.phase2_ctrl = 1, /* 1μs timeout */
};
cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT,
GPF_PORT_DVSEC_LENGTH, GPF_PORT_DVSEC,
GPF_PORT_DVSEC_REVID, dvsec);
dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){
.cap = 0x26, /* IO, Mem, non-MLD */
.ctrl = 0x2,
.status = 0x26, /* same */
.rcvd_mod_ts_data_phase1 = 0xef,
};
cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT,
PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0,
PCIE_FLEXBUS_PORT_DVSEC,
PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0, dvsec);
dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){
.rsvd = 0,
.reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX,
.reg0_base_hi = 0,
};
cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT,
REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
REG_LOC_DVSEC_REVID, dvsec);
}
static void cxl_rp_realize(DeviceState *dev, Error **errp)
{
PCIDevice *pci_dev = PCI_DEVICE(dev);
PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
CXLRootPort *crp = CXL_ROOT_PORT(dev);
CXLComponentState *cxl_cstate = &crp->cxl_cstate;
ComponentRegisters *cregs = &cxl_cstate->crb;
MemoryRegion *component_bar = &cregs->component_registers;
Error *local_err = NULL;
rpc->parent_realize(dev, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
int rc =
pci_bridge_qemu_reserve_cap_init(pci_dev, 0, crp->res_reserve, errp);
if (rc < 0) {
rpc->parent_class.exit(pci_dev);
return;
}
if (!crp->res_reserve.io || crp->res_reserve.io == -1) {
pci_word_test_and_clear_mask(pci_dev->wmask + PCI_COMMAND,
PCI_COMMAND_IO);
pci_dev->wmask[PCI_IO_BASE] = 0;
pci_dev->wmask[PCI_IO_LIMIT] = 0;
}
cxl_cstate->dvsec_offset = CXL_ROOT_PORT_DVSEC_OFFSET;
cxl_cstate->pdev = pci_dev;
build_dvsecs(&crp->cxl_cstate);
cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate,
TYPE_CXL_ROOT_PORT);
pci_register_bar(pci_dev, CXL_COMPONENT_REG_BAR_IDX,
PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_TYPE_64,
component_bar);
}
static void cxl_rp_reset(DeviceState *dev)
{
PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
CXLRootPort *crp = CXL_ROOT_PORT(dev);
rpc->parent_reset(dev);
latch_registers(crp);
}
static Property gen_rp_props[] = {
DEFINE_PROP_UINT32("bus-reserve", CXLRootPort, res_reserve.bus, -1),
DEFINE_PROP_SIZE("io-reserve", CXLRootPort, res_reserve.io, -1),
DEFINE_PROP_SIZE("mem-reserve", CXLRootPort, res_reserve.mem_non_pref, -1),
DEFINE_PROP_SIZE("pref32-reserve", CXLRootPort, res_reserve.mem_pref_32,
-1),
DEFINE_PROP_SIZE("pref64-reserve", CXLRootPort, res_reserve.mem_pref_64,
-1),
DEFINE_PROP_END_OF_LIST()
};
static void cxl_rp_dvsec_write_config(PCIDevice *dev, uint32_t addr,
uint32_t val, int len)
{
CXLRootPort *crp = CXL_ROOT_PORT(dev);
if (range_contains(&crp->cxl_cstate.dvsecs[EXTENSIONS_PORT_DVSEC], addr)) {
uint8_t *reg = &dev->config[addr];
addr -= crp->cxl_cstate.dvsecs[EXTENSIONS_PORT_DVSEC].lob;
if (addr == PORT_CONTROL_OFFSET) {
if (pci_get_word(reg) & PORT_CONTROL_UNMASK_SBR) {
/* unmask SBR */
qemu_log_mask(LOG_UNIMP, "SBR mask control is not supported\n");
}
if (pci_get_word(reg) & PORT_CONTROL_ALT_MEMID_EN) {
/* Alt Memory & ID Space Enable */
qemu_log_mask(LOG_UNIMP,
"Alt Memory & ID space is not supported\n");
}
}
}
}
static void cxl_rp_write_config(PCIDevice *d, uint32_t address, uint32_t val,
int len)
{
uint16_t slt_ctl, slt_sta;
pcie_cap_slot_get(d, &slt_ctl, &slt_sta);
pci_bridge_write_config(d, address, val, len);
pcie_cap_flr_write_config(d, address, val, len);
pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len);
pcie_aer_write_config(d, address, val, len);
cxl_rp_dvsec_write_config(d, address, val, len);
}
static void cxl_root_port_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
PCIDeviceClass *k = PCI_DEVICE_CLASS(oc);
PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(oc);
k->vendor_id = PCI_VENDOR_ID_INTEL;
k->device_id = CXL_ROOT_PORT_DID;
dc->desc = "CXL Root Port";
k->revision = 0;
device_class_set_props(dc, gen_rp_props);
k->config_write = cxl_rp_write_config;
device_class_set_parent_realize(dc, cxl_rp_realize, &rpc->parent_realize);
device_class_set_parent_reset(dc, cxl_rp_reset, &rpc->parent_reset);
rpc->aer_offset = GEN_PCIE_ROOT_PORT_AER_OFFSET;
rpc->acs_offset = GEN_PCIE_ROOT_PORT_ACS_OFFSET;
dc->hotpluggable = false;
}
static const TypeInfo cxl_root_port_info = {
.name = TYPE_CXL_ROOT_PORT,
.parent = TYPE_PCIE_ROOT_PORT,
.instance_size = sizeof(CXLRootPort),
.class_init = cxl_root_port_class_init,
.interfaces = (InterfaceInfo[]) {
{ INTERFACE_CXL_DEVICE },
{ }
},
};
static void cxl_register(void)
{
type_register_static(&cxl_root_port_info);
}
type_init(cxl_register);

View file

@ -5,6 +5,7 @@ pci_ss.add(when: 'CONFIG_IOH3420', if_true: files('ioh3420.c'))
pci_ss.add(when: 'CONFIG_PCIE_PORT', if_true: files('pcie_root_port.c', 'gen_pcie_root_port.c', 'pcie_pci_bridge.c'))
pci_ss.add(when: 'CONFIG_PXB', if_true: files('pci_expander_bridge.c'))
pci_ss.add(when: 'CONFIG_XIO3130', if_true: files('xio3130_upstream.c', 'xio3130_downstream.c'))
pci_ss.add(when: 'CONFIG_CXL', if_true: files('cxl_root_port.c'))
# NewWorld PowerMac
pci_ss.add(when: 'CONFIG_DEC_PCI', if_true: files('dec.c'))

View file

@ -17,6 +17,7 @@
#include "hw/pci/pci_host.h"
#include "hw/qdev-properties.h"
#include "hw/pci/pci_bridge.h"
#include "hw/cxl/cxl.h"
#include "qemu/range.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
@ -24,6 +25,8 @@
#include "hw/boards.h"
#include "qom/object.h"
enum BusType { PCI, PCIE, CXL };
#define TYPE_PXB_BUS "pxb-bus"
typedef struct PXBBus PXBBus;
DECLARE_INSTANCE_CHECKER(PXBBus, PXB_BUS,
@ -33,6 +36,10 @@ DECLARE_INSTANCE_CHECKER(PXBBus, PXB_BUS,
DECLARE_INSTANCE_CHECKER(PXBBus, PXB_PCIE_BUS,
TYPE_PXB_PCIE_BUS)
#define TYPE_PXB_CXL_BUS "pxb-cxl-bus"
DECLARE_INSTANCE_CHECKER(PXBBus, PXB_CXL_BUS,
TYPE_PXB_CXL_BUS)
struct PXBBus {
/*< private >*/
PCIBus parent_obj;
@ -50,18 +57,13 @@ DECLARE_INSTANCE_CHECKER(PXBDev, PXB_DEV,
DECLARE_INSTANCE_CHECKER(PXBDev, PXB_PCIE_DEV,
TYPE_PXB_PCIE_DEVICE)
struct PXBDev {
/*< private >*/
PCIDevice parent_obj;
/*< public >*/
uint8_t bus_nr;
uint16_t numa_node;
bool bypass_iommu;
};
static PXBDev *convert_to_pxb(PCIDevice *dev)
{
/* A CXL PXB's parent bus is PCIe, so the normal check won't work */
if (object_dynamic_cast(OBJECT(dev), TYPE_PXB_CXL_DEVICE)) {
return PXB_CXL_DEV(dev);
}
return pci_bus_is_express(pci_get_bus(dev))
? PXB_PCIE_DEV(dev) : PXB_DEV(dev);
}
@ -70,6 +72,13 @@ static GList *pxb_dev_list;
#define TYPE_PXB_HOST "pxb-host"
CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb)
{
CXLHost *host = PXB_CXL_HOST(hb);
return &host->cxl_cstate;
}
static int pxb_bus_num(PCIBus *bus)
{
PXBDev *pxb = convert_to_pxb(bus->parent_dev);
@ -106,11 +115,20 @@ static const TypeInfo pxb_pcie_bus_info = {
.class_init = pxb_bus_class_init,
};
static const TypeInfo pxb_cxl_bus_info = {
.name = TYPE_PXB_CXL_BUS,
.parent = TYPE_CXL_BUS,
.instance_size = sizeof(PXBBus),
.class_init = pxb_bus_class_init,
};
static const char *pxb_host_root_bus_path(PCIHostState *host_bridge,
PCIBus *rootbus)
{
PXBBus *bus = pci_bus_is_express(rootbus) ?
PXB_PCIE_BUS(rootbus) : PXB_BUS(rootbus);
PXBBus *bus = pci_bus_is_cxl(rootbus) ?
PXB_CXL_BUS(rootbus) :
pci_bus_is_express(rootbus) ? PXB_PCIE_BUS(rootbus) :
PXB_BUS(rootbus);
snprintf(bus->bus_path, 8, "0000:%02x", pxb_bus_num(rootbus));
return bus->bus_path;
@ -166,6 +184,52 @@ static const TypeInfo pxb_host_info = {
.class_init = pxb_host_class_init,
};
static void pxb_cxl_realize(DeviceState *dev, Error **errp)
{
MachineState *ms = MACHINE(qdev_get_machine());
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
CXLHost *cxl = PXB_CXL_HOST(dev);
CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
struct MemoryRegion *mr = &cxl_cstate->crb.component_registers;
hwaddr offset;
cxl_component_register_block_init(OBJECT(dev), cxl_cstate,
TYPE_PXB_CXL_HOST);
sysbus_init_mmio(sbd, mr);
offset = memory_region_size(mr) * ms->cxl_devices_state->next_mr_idx;
if (offset > memory_region_size(&ms->cxl_devices_state->host_mr)) {
error_setg(errp, "Insufficient space for pxb cxl host register space");
return;
}
memory_region_add_subregion(&ms->cxl_devices_state->host_mr, offset, mr);
ms->cxl_devices_state->next_mr_idx++;
}
static void pxb_cxl_host_class_init(ObjectClass *class, void *data)
{
DeviceClass *dc = DEVICE_CLASS(class);
PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(class);
hc->root_bus_path = pxb_host_root_bus_path;
dc->fw_name = "cxl";
dc->realize = pxb_cxl_realize;
/* Reason: Internal part of the pxb/pxb-pcie device, not usable by itself */
dc->user_creatable = false;
}
/*
* This is a device to handle the MMIO for a CXL host bridge. It does nothing
* else.
*/
static const TypeInfo cxl_host_info = {
.name = TYPE_PXB_CXL_HOST,
.parent = TYPE_PCI_HOST_BRIDGE,
.instance_size = sizeof(CXLHost),
.class_init = pxb_cxl_host_class_init,
};
/*
* Registers the PXB bus as a child of pci host root bus.
*/
@ -212,6 +276,17 @@ static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin)
return pin - PCI_SLOT(pxb->devfn);
}
static void pxb_dev_reset(DeviceState *dev)
{
CXLHost *cxl = PXB_CXL_DEV(dev)->cxl.cxl_host_bridge;
CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers;
uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
}
static gint pxb_compare(gconstpointer a, gconstpointer b)
{
const PXBDev *pxb_a = a, *pxb_b = b;
@ -221,7 +296,8 @@ static gint pxb_compare(gconstpointer a, gconstpointer b)
0;
}
static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp)
static void pxb_dev_realize_common(PCIDevice *dev, enum BusType type,
Error **errp)
{
PXBDev *pxb = convert_to_pxb(dev);
DeviceState *ds, *bds = NULL;
@ -245,9 +321,13 @@ static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp)
dev_name = dev->qdev.id;
}
ds = qdev_new(TYPE_PXB_HOST);
if (pcie) {
ds = qdev_new(type == CXL ? TYPE_PXB_CXL_HOST : TYPE_PXB_HOST);
if (type == PCIE) {
bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_PCIE_BUS);
} else if (type == CXL) {
bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_CXL_BUS);
bus->flags |= PCI_BUS_CXL;
PXB_CXL_DEV(dev)->cxl.cxl_host_bridge = PXB_CXL_HOST(ds);
} else {
bus = pci_root_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS);
bds = qdev_new("pci-bridge");
@ -295,7 +375,7 @@ static void pxb_dev_realize(PCIDevice *dev, Error **errp)
return;
}
pxb_dev_realize_common(dev, false, errp);
pxb_dev_realize_common(dev, PCI, errp);
}
static void pxb_dev_exitfn(PCIDevice *pci_dev)
@ -348,7 +428,7 @@ static void pxb_pcie_dev_realize(PCIDevice *dev, Error **errp)
return;
}
pxb_dev_realize_common(dev, true, errp);
pxb_dev_realize_common(dev, PCIE, errp);
}
static void pxb_pcie_dev_class_init(ObjectClass *klass, void *data)
@ -379,13 +459,67 @@ static const TypeInfo pxb_pcie_dev_info = {
},
};
static void pxb_cxl_dev_realize(PCIDevice *dev, Error **errp)
{
MachineState *ms = MACHINE(qdev_get_machine());
/* A CXL PXB's parent bus is still PCIe */
if (!pci_bus_is_express(pci_get_bus(dev))) {
error_setg(errp, "pxb-cxl devices cannot reside on a PCI bus");
return;
}
if (!ms->cxl_devices_state->is_enabled) {
error_setg(errp, "Machine does not have cxl=on");
return;
}
pxb_dev_realize_common(dev, CXL, errp);
pxb_dev_reset(DEVICE(dev));
}
static void pxb_cxl_dev_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
k->realize = pxb_cxl_dev_realize;
k->exit = pxb_dev_exitfn;
/*
* XXX: These types of bridges don't actually show up in the hierarchy so
* vendor, device, class, etc. ids are intentionally left out.
*/
dc->desc = "CXL Host Bridge";
device_class_set_props(dc, pxb_dev_properties);
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
/* Host bridges aren't hotpluggable. FIXME: spec reference */
dc->hotpluggable = false;
dc->reset = pxb_dev_reset;
}
static const TypeInfo pxb_cxl_dev_info = {
.name = TYPE_PXB_CXL_DEVICE,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(PXBDev),
.class_init = pxb_cxl_dev_class_init,
.interfaces =
(InterfaceInfo[]){
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
{},
},
};
static void pxb_register_types(void)
{
type_register_static(&pxb_bus_info);
type_register_static(&pxb_pcie_bus_info);
type_register_static(&pxb_cxl_bus_info);
type_register_static(&pxb_host_info);
type_register_static(&cxl_host_info);
type_register_static(&pxb_dev_info);
type_register_static(&pxb_pcie_dev_info);
type_register_static(&pxb_cxl_dev_info);
}
type_init(pxb_register_types)

View file

@ -67,7 +67,11 @@ static void rp_realize(PCIDevice *d, Error **errp)
int rc;
pci_config_set_interrupt_pin(d->config, 1);
pci_bridge_initfn(d, TYPE_PCIE_BUS);
if (d->cap_present & QEMU_PCIE_CAP_CXL) {
pci_bridge_initfn(d, TYPE_CXL_BUS);
} else {
pci_bridge_initfn(d, TYPE_PCIE_BUS);
}
pcie_port_init_reg(d);
rc = pci_bridge_ssvid_init(d, rpc->ssvid_offset, dc->vendor_id,

View file

@ -5,6 +5,7 @@
#include "hw/pci/pci_bus.h"
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pcie_host.h"
#include "hw/acpi/cxl.h"
static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq)
{
@ -139,6 +140,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
QLIST_FOREACH(bus, &bus->child, sibling) {
uint8_t bus_num = pci_bus_num(bus);
uint8_t numa_node = pci_bus_numa_node(bus);
bool is_cxl = pci_bus_is_cxl(bus);
if (!pci_bus_is_root(bus)) {
continue;
@ -154,8 +156,16 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
}
dev = aml_device("PC%.02X", bus_num);
aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A08")));
aml_append(dev, aml_name_decl("_CID", aml_string("PNP0A03")));
if (is_cxl) {
struct Aml *pkg = aml_package(2);
aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0016")));
aml_append(pkg, aml_eisaid("PNP0A08"));
aml_append(pkg, aml_eisaid("PNP0A03"));
aml_append(dev, aml_name_decl("_CID", pkg));
} else {
aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A08")));
aml_append(dev, aml_name_decl("_CID", aml_string("PNP0A03")));
}
aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
aml_append(dev, aml_name_decl("_STR", aml_unicode("pxb Device")));
@ -175,7 +185,11 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
cfg->pio.base, 0, 0, 0);
aml_append(dev, aml_name_decl("_CRS", crs));
acpi_dsdt_add_pci_osc(dev);
if (is_cxl) {
build_cxl_osc_method(dev);
} else {
acpi_dsdt_add_pci_osc(dev);
}
aml_append(scope, dev);
}

View file

@ -200,6 +200,11 @@ static const TypeInfo pci_bus_info = {
.class_init = pci_bus_class_init,
};
static const TypeInfo cxl_interface_info = {
.name = INTERFACE_CXL_DEVICE,
.parent = TYPE_INTERFACE,
};
static const TypeInfo pcie_interface_info = {
.name = INTERFACE_PCIE_DEVICE,
.parent = TYPE_INTERFACE,
@ -223,6 +228,12 @@ static const TypeInfo pcie_bus_info = {
.class_init = pcie_bus_class_init,
};
static const TypeInfo cxl_bus_info = {
.name = TYPE_CXL_BUS,
.parent = TYPE_PCIE_BUS,
.class_init = pcie_bus_class_init,
};
static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num);
static void pci_update_mappings(PCIDevice *d);
static void pci_irq_handler(void *opaque, int irq_num, int level);
@ -2182,6 +2193,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
if (object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE)) {
pci_dev->cap_present |= QEMU_PCIE_CAP_CXL;
}
pci_dev = do_pci_register_device(pci_dev,
object_get_typename(OBJECT(qdev)),
pci_dev->devfn, errp);
@ -2747,7 +2762,9 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data)
object_class_dynamic_cast(klass, INTERFACE_CONVENTIONAL_PCI_DEVICE);
ObjectClass *pcie =
object_class_dynamic_cast(klass, INTERFACE_PCIE_DEVICE);
assert(conventional || pcie);
ObjectClass *cxl =
object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE);
assert(conventional || pcie || cxl);
}
}
@ -2937,7 +2954,9 @@ static void pci_register_types(void)
{
type_register_static(&pci_bus_info);
type_register_static(&pcie_bus_info);
type_register_static(&cxl_bus_info);
type_register_static(&conventional_pci_interface_info);
type_register_static(&cxl_interface_info);
type_register_static(&pcie_interface_info);
type_register_static(&pci_device_type_info);
}

View file

@ -136,6 +136,31 @@ static void pcie_port_class_init(ObjectClass *oc, void *data)
device_class_set_props(dc, pcie_port_props);
}
PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn)
{
int devfn;
for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
PCIDevice *d = bus->devices[devfn];
PCIEPort *port;
if (!d || !pci_is_express(d) || !d->exp.exp_cap) {
continue;
}
if (!object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) {
continue;
}
port = PCIE_PORT(d);
if (port->port == pn) {
return d;
}
}
return NULL;
}
static const TypeInfo pcie_port_type_info = {
.name = TYPE_PCIE_PORT,
.parent = TYPE_PCI_BRIDGE,

View file

@ -273,6 +273,13 @@ static void vhost_scsi_unrealize(DeviceState *dev)
virtio_scsi_common_unrealize(dev);
}
static struct vhost_dev *vhost_scsi_get_vhost(VirtIODevice *vdev)
{
VHostSCSI *s = VHOST_SCSI(vdev);
VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
return &vsc->dev;
}
static Property vhost_scsi_properties[] = {
DEFINE_PROP_STRING("vhostfd", VirtIOSCSICommon, conf.vhostfd),
DEFINE_PROP_STRING("wwpn", VirtIOSCSICommon, conf.wwpn),
@ -307,6 +314,7 @@ static void vhost_scsi_class_init(ObjectClass *klass, void *data)
vdc->get_features = vhost_scsi_common_get_features;
vdc->set_config = vhost_scsi_common_set_config;
vdc->set_status = vhost_scsi_set_status;
vdc->get_vhost = vhost_scsi_get_vhost;
fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
}

View file

@ -121,6 +121,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
vsc->dev.backend_features = 0;
vqs = vsc->dev.vqs;
s->vhost_user.supports_config = true;
ret = vhost_dev_init(&vsc->dev, &s->vhost_user,
VHOST_BACKEND_TYPE_USER, 0, errp);
if (ret < 0) {

View file

@ -1013,8 +1013,7 @@ void virtio_scsi_common_realize(DeviceState *dev,
VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(dev);
int i;
virtio_init(vdev, "virtio-scsi", VIRTIO_ID_SCSI,
sizeof(VirtIOSCSIConfig));
virtio_init(vdev, VIRTIO_ID_SCSI, sizeof(VirtIOSCSIConfig));
if (s->conf.num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) {
s->conf.num_queues = 1;

View file

@ -21,6 +21,9 @@ vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t memory_siz
vhost_user_postcopy_waker(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64
vhost_user_postcopy_waker_found(uint64_t client_addr) "0x%"PRIx64
vhost_user_postcopy_waker_nomatch(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64
vhost_user_read(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32""
vhost_user_write(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32""
vhost_user_create_notifier(int idx, void *n) "idx:%d n:%p"
# vhost-vdpa.c
vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8
@ -89,7 +92,12 @@ virtio_mmio_guest_page(uint64_t size, int shift) "guest page size 0x%" PRIx64 "
virtio_mmio_queue_write(uint64_t value, int max_size) "mmio_queue write 0x%" PRIx64 " max %d"
virtio_mmio_setting_irq(int level) "virtio_mmio setting IRQ %d"
# virtio-iommu.c
# virtio-pci.c
virtio_pci_notify(uint16_t vector) "virtio_pci_notify vec 0x%x"
virtio_pci_notify_write(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)"
virtio_pci_notify_write_pio(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)"
# hw/virtio/virtio-iommu.c
virtio_iommu_device_reset(void) "reset!"
virtio_iommu_system_reset(void) "system reset!"
virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64

View file

@ -21,7 +21,7 @@
#include "hw/virtio/vhost-scsi.h"
#include "qapi/error.h"
#include "qemu/module.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
typedef struct VHostSCSIPCI VHostSCSIPCI;

View file

@ -138,6 +138,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
for (n = 0; n < num; n++) {
if (more_descs || (n + 1 < num)) {
descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
descs[i].next = cpu_to_le16(svq->desc_next[i]);
} else {
descs[i].flags = flags;
}
@ -145,10 +146,10 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
descs[i].len = cpu_to_le32(iovec[n].iov_len);
last = i;
i = cpu_to_le16(descs[i].next);
i = cpu_to_le16(svq->desc_next[i]);
}
svq->free_head = le16_to_cpu(descs[last].next);
svq->free_head = le16_to_cpu(svq->desc_next[last]);
}
static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
@ -198,11 +199,19 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
return true;
}
/**
* Add an element to a SVQ.
*
* The caller must check that there is enough slots for the new element. It
* takes ownership of the element: In case of failure, it is free and the SVQ
* is considered broken.
*/
static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
{
unsigned qemu_head;
bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
if (unlikely(!ok)) {
g_free(elem);
return false;
}
@ -333,13 +342,22 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
}
static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
uint16_t num, uint16_t i)
{
for (uint16_t j = 0; j < (num - 1); ++j) {
i = le16_to_cpu(svq->desc_next[i]);
}
return i;
}
static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
uint32_t *len)
{
vring_desc_t *descs = svq->vring.desc;
const vring_used_t *used = svq->vring.used;
vring_used_elem_t used_elem;
uint16_t last_used;
uint16_t last_used, last_used_chain, num;
if (!vhost_svq_more_used(svq)) {
return NULL;
@ -365,7 +383,10 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
return NULL;
}
descs[used_elem.id].next = svq->free_head;
num = svq->ring_id_maps[used_elem.id]->in_num +
svq->ring_id_maps[used_elem.id]->out_num;
last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
svq->desc_next[last_used_chain] = svq->free_head;
svq->free_head = used_elem.id;
*len = used_elem.len;
@ -540,8 +561,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size);
memset(svq->vring.used, 0, device_size);
svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
svq->desc_next = g_new0(uint16_t, svq->vring.num);
for (unsigned i = 0; i < svq->vring.num - 1; i++) {
svq->vring.desc[i].next = cpu_to_le16(i + 1);
svq->desc_next[i] = cpu_to_le16(i + 1);
}
}
@ -574,6 +596,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
virtqueue_detach_element(svq->vq, next_avail_elem, 0);
}
svq->vq = NULL;
g_free(svq->desc_next);
g_free(svq->ring_id_maps);
qemu_vfree(svq->vring.desc);
qemu_vfree(svq->vring.used);

View file

@ -53,6 +53,12 @@ typedef struct VhostShadowVirtqueue {
/* Next VirtQueue element that guest made available */
VirtQueueElement *next_guest_avail_elem;
/*
* Backup next field for each descriptor so we can recover securely, not
* needing to trust the device access.
*/
uint16_t *desc_next;
/* Next head to expose to the device */
uint16_t shadow_avail_idx;

View file

@ -26,7 +26,7 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
typedef struct VHostUserBlkPCI VHostUserBlkPCI;

View file

@ -14,7 +14,7 @@
#include "qemu/osdep.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/vhost-user-fs.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
struct VHostUserFSPCI {

View file

@ -219,8 +219,7 @@ static void vuf_device_realize(DeviceState *dev, Error **errp)
return;
}
virtio_init(vdev, "vhost-user-fs", VIRTIO_ID_FS,
sizeof(struct virtio_fs_config));
virtio_init(vdev, VIRTIO_ID_FS, sizeof(struct virtio_fs_config));
/* Hiprio queue */
fs->hiprio_vq = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
@ -277,6 +276,12 @@ static void vuf_device_unrealize(DeviceState *dev)
fs->vhost_dev.vqs = NULL;
}
static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
{
VHostUserFS *fs = VHOST_USER_FS(vdev);
return &fs->vhost_dev;
}
static const VMStateDescription vuf_vmstate = {
.name = "vhost-user-fs",
.unmigratable = 1,
@ -314,6 +319,7 @@ static void vuf_class_init(ObjectClass *klass, void *data)
vdc->set_status = vuf_set_status;
vdc->guest_notifier_mask = vuf_guest_notifier_mask;
vdc->guest_notifier_pending = vuf_guest_notifier_pending;
vdc->get_vhost = vuf_get_vhost;
}
static const TypeInfo vuf_info = {

View file

@ -9,7 +9,7 @@
#include "qemu/osdep.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/vhost-user-i2c.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
struct VHostUserI2CPCI {
VirtIOPCIProxy parent_obj;

View file

@ -14,11 +14,6 @@
#include "qemu/error-report.h"
#include "standard-headers/linux/virtio_ids.h"
/* Remove this once the header is updated in Linux kernel */
#ifndef VIRTIO_ID_I2C_ADAPTER
#define VIRTIO_ID_I2C_ADAPTER 34
#endif
static const int feature_bits[] = {
VIRTIO_I2C_F_ZERO_LENGTH_REQUEST,
VHOST_INVALID_FEATURE_BIT
@ -227,7 +222,7 @@ static void vu_i2c_device_realize(DeviceState *dev, Error **errp)
return;
}
virtio_init(vdev, "vhost-user-i2c", VIRTIO_ID_I2C_ADAPTER, 0);
virtio_init(vdev, VIRTIO_ID_I2C_ADAPTER, 0);
i2c->vhost_dev.nvqs = 1;
i2c->vq = virtio_add_queue(vdev, 4, vu_i2c_handle_output);

View file

@ -9,7 +9,7 @@
#include "hw/virtio/virtio-input.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
typedef struct VHostUserInputPCI VHostUserInputPCI;

View file

@ -9,7 +9,7 @@
#include "qemu/osdep.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/vhost-user-rng.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
struct VHostUserRNGPCI {
VirtIOPCIProxy parent_obj;

View file

@ -203,7 +203,7 @@ static void vu_rng_device_realize(DeviceState *dev, Error **errp)
return;
}
virtio_init(vdev, "vhost-user-rng", VIRTIO_ID_RNG, 0);
virtio_init(vdev, VIRTIO_ID_RNG, 0);
rng->req_vq = virtio_add_queue(vdev, 4, vu_rng_handle_output);
if (!rng->req_vq) {
@ -247,6 +247,12 @@ static void vu_rng_device_unrealize(DeviceState *dev)
vhost_user_cleanup(&rng->vhost_user);
}
static struct vhost_dev *vu_rng_get_vhost(VirtIODevice *vdev)
{
VHostUserRNG *rng = VHOST_USER_RNG(vdev);
return &rng->vhost_dev;
}
static const VMStateDescription vu_rng_vmstate = {
.name = "vhost-user-rng",
.unmigratable = 1,
@ -272,6 +278,7 @@ static void vu_rng_class_init(ObjectClass *klass, void *data)
vdc->set_status = vu_rng_set_status;
vdc->guest_notifier_mask = vu_rng_guest_notifier_mask;
vdc->guest_notifier_pending = vu_rng_guest_notifier_pending;
vdc->get_vhost = vu_rng_get_vhost;
}
static const TypeInfo vu_rng_info = {

View file

@ -30,7 +30,7 @@
#include "hw/pci/msix.h"
#include "hw/loader.h"
#include "sysemu/kvm.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
typedef struct VHostUserSCSIPCI VHostUserSCSIPCI;

View file

@ -10,7 +10,7 @@
#include "qemu/osdep.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/vhost-user-vsock.h"
#include "qom/object.h"

View file

@ -107,7 +107,7 @@ static void vuv_device_realize(DeviceState *dev, Error **errp)
return;
}
vhost_vsock_common_realize(vdev, "vhost-user-vsock");
vhost_vsock_common_realize(vdev);
vhost_dev_set_config_notifier(&vvc->vhost_dev, &vsock_ops);

View file

@ -489,6 +489,8 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
return ret < 0 ? -saved_errno : -EIO;
}
trace_vhost_user_write(msg->hdr.request, msg->hdr.flags);
return 0;
}
@ -542,6 +544,8 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
}
}
trace_vhost_user_read(msg.hdr.request, msg.hdr.flags);
return 0;
}
@ -1170,14 +1174,16 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
n->unmap_addr = NULL;
}
static void vhost_user_host_notifier_remove(VhostUserState *user,
VirtIODevice *vdev, int queue_idx)
/*
* clean-up function for notifier, will finally free the structure
* under rcu.
*/
static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
VirtIODevice *vdev)
{
VhostUserHostNotifier *n = &user->notifier[queue_idx];
if (n->addr) {
if (vdev) {
virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
}
assert(!n->unmap_addr);
n->unmap_addr = n->addr;
@ -1221,6 +1227,15 @@ static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
return 0;
}
static VhostUserHostNotifier *fetch_notifier(VhostUserState *u,
int idx)
{
if (idx >= u->notifiers->len) {
return NULL;
}
return g_ptr_array_index(u->notifiers, idx);
}
static int vhost_user_get_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
@ -1233,7 +1248,10 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
};
struct vhost_user *u = dev->opaque;
vhost_user_host_notifier_remove(u->user, dev->vdev, ring->index);
VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
if (n) {
vhost_user_host_notifier_remove(n, dev->vdev);
}
ret = vhost_user_write(dev, &msg, NULL, 0);
if (ret < 0) {
@ -1498,6 +1516,29 @@ static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
return dev->config_ops->vhost_dev_config_notifier(dev);
}
/*
* Fetch or create the notifier for a given idx. Newly created
* notifiers are added to the pointer array that tracks them.
*/
static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u,
int idx)
{
VhostUserHostNotifier *n = NULL;
if (idx >= u->notifiers->len) {
g_ptr_array_set_size(u->notifiers, idx);
}
n = g_ptr_array_index(u->notifiers, idx);
if (!n) {
n = g_new0(VhostUserHostNotifier, 1);
n->idx = idx;
g_ptr_array_insert(u->notifiers, idx, n);
trace_vhost_user_create_notifier(idx, n);
}
return n;
}
static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
VhostUserVringArea *area,
int fd)
@ -1517,9 +1558,12 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
return -EINVAL;
}
n = &user->notifier[queue_idx];
vhost_user_host_notifier_remove(user, vdev, queue_idx);
/*
* Fetch notifier and invalidate any old data before setting up
* new mapped address.
*/
n = fetch_or_create_notifier(user, queue_idx);
vhost_user_host_notifier_remove(n, vdev);
if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
return 0;
@ -1945,14 +1989,15 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
Error **errp)
{
uint64_t features, protocol_features, ram_slots;
uint64_t features, ram_slots;
struct vhost_user *u;
VhostUserState *vus = (VhostUserState *) opaque;
int err;
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
u = g_new0(struct vhost_user, 1);
u->user = opaque;
u->user = vus;
u->dev = dev;
dev->opaque = u;
@ -1963,6 +2008,10 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
}
if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
bool supports_f_config = vus->supports_config ||
(dev->config_ops && dev->config_ops->vhost_dev_config_notifier);
uint64_t protocol_features;
dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
@ -1972,19 +2021,34 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
return -EPROTO;
}
dev->protocol_features =
protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
/*
* We will use all the protocol features we support - although
* we suppress F_CONFIG if we know QEMUs internal code can not support
* it.
*/
protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK;
if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
/* Don't acknowledge CONFIG feature if device doesn't support it */
dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
} else if (!(protocol_features &
(1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
error_setg(errp, "Device expects VHOST_USER_PROTOCOL_F_CONFIG "
"but backend does not support it.");
return -EINVAL;
if (supports_f_config) {
if (!virtio_has_feature(protocol_features,
VHOST_USER_PROTOCOL_F_CONFIG)) {
error_setg(errp, "vhost-user device %s expecting "
"VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does "
"not support it.", dev->vdev->name);
return -EPROTO;
}
} else {
if (virtio_has_feature(protocol_features,
VHOST_USER_PROTOCOL_F_CONFIG)) {
warn_reportf_err(*errp, "vhost-user backend supports "
"VHOST_USER_PROTOCOL_F_CONFIG for "
"device %s but QEMU does not.",
dev->vdev->name);
protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
}
}
/* final set of protocol features */
dev->protocol_features = protocol_features;
err = vhost_user_set_protocol_features(dev, dev->protocol_features);
if (err < 0) {
error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
@ -2502,6 +2566,20 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
return vhost_user_write(dev, &msg, &inflight->fd, 1);
}
static void vhost_user_state_destroy(gpointer data)
{
VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
if (n) {
vhost_user_host_notifier_remove(n, NULL);
object_unparent(OBJECT(&n->mr));
/*
* We can't free until vhost_user_host_notifier_remove has
* done it's thing so schedule the free with RCU.
*/
g_free_rcu(n, rcu);
}
}
bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
{
if (user->chr) {
@ -2510,23 +2588,18 @@ bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
}
user->chr = chr;
user->memory_slots = 0;
user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4,
&vhost_user_state_destroy);
return true;
}
void vhost_user_cleanup(VhostUserState *user)
{
int i;
VhostUserHostNotifier *n;
if (!user->chr) {
return;
}
memory_region_transaction_begin();
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
n = &user->notifier[i];
vhost_user_host_notifier_remove(user, NULL, i);
object_unparent(OBJECT(&n->mr));
}
user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
memory_region_transaction_commit();
user->chr = NULL;
}

View file

@ -368,11 +368,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v)
v->iova_range.last);
}
static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
/*
* The use of this function is for requests that only need to be
* applied once. Typically such request occurs at the beginning
* of operation, and before setting up queues. It should not be
* used for request that performs operation until all queues are
* set, which would need to check dev->vq_index_end instead.
*/
static bool vhost_vdpa_first_dev(struct vhost_dev *dev)
{
struct vhost_vdpa *v = dev->opaque;
return v->index != 0;
return v->index == 0;
}
static int vhost_vdpa_get_dev_features(struct vhost_dev *dev,
@ -453,7 +460,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
vhost_vdpa_get_iova_range(v);
if (vhost_vdpa_one_time_request(dev)) {
if (!vhost_vdpa_first_dev(dev)) {
return 0;
}
@ -596,7 +603,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
struct vhost_memory *mem)
{
if (vhost_vdpa_one_time_request(dev)) {
if (!vhost_vdpa_first_dev(dev)) {
return 0;
}
@ -625,7 +632,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev,
struct vhost_vdpa *v = dev->opaque;
int ret;
if (vhost_vdpa_one_time_request(dev)) {
if (!vhost_vdpa_first_dev(dev)) {
return 0;
}
@ -667,7 +674,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
features &= f;
if (vhost_vdpa_one_time_request(dev)) {
if (vhost_vdpa_first_dev(dev)) {
r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
if (r) {
return -EFAULT;
@ -1018,7 +1025,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
struct vhost_vring_addr addr = {
.index = i,
.index = dev->vq_index + i,
};
int r;
bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
@ -1120,7 +1127,7 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
struct vhost_log *log)
{
struct vhost_vdpa *v = dev->opaque;
if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) {
if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) {
return 0;
}
@ -1172,11 +1179,11 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
struct vhost_vdpa *v = dev->opaque;
int vdpa_idx = ring->index - dev->vq_index;
int ret;
if (v->shadow_vqs_enabled) {
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs,
ring->index);
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
/*
* Setting base as last used idx, so destination will see as available
@ -1242,7 +1249,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev,
static int vhost_vdpa_set_owner(struct vhost_dev *dev)
{
if (vhost_vdpa_one_time_request(dev)) {
if (!vhost_vdpa_first_dev(dev)) {
return 0;
}

View file

@ -224,12 +224,11 @@ int vhost_vsock_common_post_load(void *opaque, int version_id)
return 0;
}
void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name)
void vhost_vsock_common_realize(VirtIODevice *vdev)
{
VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
virtio_init(vdev, name, VIRTIO_ID_VSOCK,
sizeof(struct virtio_vsock_config));
virtio_init(vdev, VIRTIO_ID_VSOCK, sizeof(struct virtio_vsock_config));
/* Receive and transmit queues belong to vhost */
vvc->recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
@ -259,6 +258,12 @@ void vhost_vsock_common_unrealize(VirtIODevice *vdev)
virtio_cleanup(vdev);
}
static struct vhost_dev *vhost_vsock_common_get_vhost(VirtIODevice *vdev)
{
VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
return &vvc->vhost_dev;
}
static Property vhost_vsock_common_properties[] = {
DEFINE_PROP_ON_OFF_AUTO("seqpacket", VHostVSockCommon, seqpacket,
ON_OFF_AUTO_AUTO),
@ -274,6 +279,7 @@ static void vhost_vsock_common_class_init(ObjectClass *klass, void *data)
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
vdc->guest_notifier_mask = vhost_vsock_common_guest_notifier_mask;
vdc->guest_notifier_pending = vhost_vsock_common_guest_notifier_pending;
vdc->get_vhost = vhost_vsock_common_get_vhost;
}
static const TypeInfo vhost_vsock_common_info = {

View file

@ -13,7 +13,7 @@
#include "qemu/osdep.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/vhost-vsock.h"
#include "qemu/module.h"

View file

@ -169,7 +169,7 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp)
}
}
vhost_vsock_common_realize(vdev, "vhost-vsock");
vhost_vsock_common_realize(vdev);
ret = vhost_dev_init(&vvc->vhost_dev, (void *)(uintptr_t)vhostfd,
VHOST_BACKEND_TYPE_KERNEL, 0, errp);

View file

@ -1738,6 +1738,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
/* should only be called after backend is connected */
assert(hdev->vhost_ops);
vdev->vhost_started = true;
hdev->started = true;
hdev->vdev = vdev;
@ -1810,7 +1811,7 @@ fail_vq:
fail_mem:
fail_features:
vdev->vhost_started = false;
hdev->started = false;
return r;
}
@ -1841,6 +1842,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
}
vhost_log_put(hdev, true);
hdev->started = false;
vdev->vhost_started = false;
hdev->vdev = NULL;
}

View file

@ -15,7 +15,7 @@
#include "qemu/osdep.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "hw/9pfs/virtio-9p.h"
#include "hw/qdev-properties.h"
#include "qemu/module.h"

View file

@ -14,7 +14,7 @@
#include "qemu/osdep.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio-balloon.h"
#include "qapi/error.h"

View file

@ -882,8 +882,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
VirtIOBalloon *s = VIRTIO_BALLOON(dev);
int ret;
virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON,
virtio_balloon_config_size(s));
virtio_init(vdev, VIRTIO_ID_BALLOON, virtio_balloon_config_size(s));
ret = qemu_add_balloon_handler(virtio_balloon_to_target,
virtio_balloon_stat, s);

View file

@ -19,7 +19,7 @@
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio-blk.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "qapi/error.h"
#include "qemu/module.h"
#include "qom/object.h"

View file

@ -78,17 +78,23 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp)
return;
}
vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
if (klass->get_dma_as != NULL && has_iommu) {
vdev->dma_as = &address_space_memory;
if (has_iommu) {
vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
/*
* Present IOMMU_PLATFORM to the driver iff iommu_plattform=on and
* device operational. If the driver does not accept IOMMU_PLATFORM
* we fail the device.
*/
virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM);
vdev->dma_as = klass->get_dma_as(qbus->parent);
if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) {
error_setg(errp,
if (klass->get_dma_as) {
vdev->dma_as = klass->get_dma_as(qbus->parent);
if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) {
error_setg(errp,
"iommu_platform=true is not supported by the device");
return;
return;
}
}
} else {
vdev->dma_as = &address_space_memory;
}
}

View file

@ -242,7 +242,7 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
}
out_num = elem->out_num;
out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num);
out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num);
out_iov = out_iov_copy;
in_num = elem->in_num;
@ -605,11 +605,11 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request)
}
out_num = elem->out_num;
out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num);
out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num);
out_iov = out_iov_copy;
in_num = elem->in_num;
in_iov_copy = g_memdup(elem->in_sg, sizeof(in_iov[0]) * in_num);
in_iov_copy = g_memdup2(elem->in_sg, sizeof(in_iov[0]) * in_num);
in_iov = in_iov_copy;
if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req))
@ -810,7 +810,7 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
return;
}
virtio_init(vdev, "virtio-crypto", VIRTIO_ID_CRYPTO, vcrypto->config_size);
virtio_init(vdev, VIRTIO_ID_CRYPTO, vcrypto->config_size);
vcrypto->curr_queues = 1;
vcrypto->vqs = g_new0(VirtIOCryptoQueue, vcrypto->max_queues);
for (i = 0; i < vcrypto->max_queues; i++) {
@ -961,6 +961,15 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx)
return cryptodev_vhost_virtqueue_pending(vdev, queue, idx);
}
static struct vhost_dev *virtio_crypto_get_vhost(VirtIODevice *vdev)
{
VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
CryptoDevBackend *b = vcrypto->cryptodev;
CryptoDevBackendClient *cc = b->conf.peers.ccs[0];
CryptoDevBackendVhost *vhost_crypto = cryptodev_get_vhost(cc, b, 0);
return &vhost_crypto->dev;
}
static void virtio_crypto_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@ -977,6 +986,7 @@ static void virtio_crypto_class_init(ObjectClass *klass, void *data)
vdc->set_status = virtio_crypto_set_status;
vdc->guest_notifier_mask = virtio_crypto_guest_notifier_mask;
vdc->guest_notifier_pending = virtio_crypto_guest_notifier_pending;
vdc->get_vhost = virtio_crypto_get_vhost;
}
static void virtio_crypto_instance_init(Object *obj)

View file

@ -8,7 +8,7 @@
#include "qemu/osdep.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "hw/virtio/virtio-input.h"
#include "qemu/module.h"
#include "qom/object.h"

View file

@ -8,7 +8,7 @@
#include "qemu/osdep.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio-input.h"
#include "qemu/module.h"

View file

@ -11,7 +11,7 @@
#include "qemu/osdep.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "hw/virtio/virtio-iommu.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"

View file

@ -1033,8 +1033,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
sizeof(struct virtio_iommu_config));
virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config));
memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num));

View file

@ -867,8 +867,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
vmem->block_size;
vmem->bitmap = bitmap_new(vmem->bitmap_size);
virtio_init(vdev, TYPE_VIRTIO_MEM, VIRTIO_ID_MEM,
sizeof(struct virtio_mem_config));
virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config));
vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
host_memory_backend_set_mapped(vmem->memdev, true);

View file

@ -19,7 +19,7 @@
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio-net.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "qapi/error.h"
#include "qemu/module.h"
#include "qom/object.h"

View file

@ -33,11 +33,12 @@
#include "hw/pci/msix.h"
#include "hw/loader.h"
#include "sysemu/kvm.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "qemu/range.h"
#include "hw/virtio/virtio-bus.h"
#include "qapi/visitor.h"
#include "sysemu/replay.h"
#include "trace.h"
#define VIRTIO_PCI_REGION_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_present(dev))
@ -1380,6 +1381,7 @@ static void virtio_pci_notify_write(void *opaque, hwaddr addr,
unsigned queue = addr / virtio_pci_queue_mem_mult(proxy);
if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
trace_virtio_pci_notify_write(addr, val, size);
virtio_queue_notify(vdev, queue);
}
}
@ -1393,6 +1395,7 @@ static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr,
unsigned queue = val;
if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
trace_virtio_pci_notify_write_pio(addr, val, size);
virtio_queue_notify(vdev, queue);
}
}

View file

@ -1,255 +0,0 @@
/*
* Virtio PCI Bindings
*
* Copyright IBM, Corp. 2007
* Copyright (c) 2009 CodeSourcery
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
* Paul Brook <paul@codesourcery.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/
#ifndef QEMU_VIRTIO_PCI_H
#define QEMU_VIRTIO_PCI_H
#include "hw/pci/msi.h"
#include "hw/virtio/virtio-bus.h"
#include "qom/object.h"
/* virtio-pci-bus */
typedef struct VirtioBusState VirtioPCIBusState;
typedef struct VirtioBusClass VirtioPCIBusClass;
#define TYPE_VIRTIO_PCI_BUS "virtio-pci-bus"
DECLARE_OBJ_CHECKERS(VirtioPCIBusState, VirtioPCIBusClass,
VIRTIO_PCI_BUS, TYPE_VIRTIO_PCI_BUS)
enum {
VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT,
VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT,
VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT,
VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT,
VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT,
VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT,
VIRTIO_PCI_FLAG_ATS_BIT,
VIRTIO_PCI_FLAG_INIT_DEVERR_BIT,
VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT,
VIRTIO_PCI_FLAG_INIT_PM_BIT,
VIRTIO_PCI_FLAG_INIT_FLR_BIT,
VIRTIO_PCI_FLAG_AER_BIT,
VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT,
};
/* Need to activate work-arounds for buggy guests at vmstate load. */
#define VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION \
(1 << VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT)
/* Performance improves when virtqueue kick processing is decoupled from the
* vcpu thread using ioeventfd for some devices. */
#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
/* virtio version flags */
#define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT)
/* migrate extra state */
#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT)
/* have pio notification for modern device ? */
#define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \
(1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT)
/* page per vq flag to be used by split drivers within guests */
#define VIRTIO_PCI_FLAG_PAGE_PER_VQ \
(1 << VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT)
/* address space translation service */
#define VIRTIO_PCI_FLAG_ATS (1 << VIRTIO_PCI_FLAG_ATS_BIT)
/* Init error enabling flags */
#define VIRTIO_PCI_FLAG_INIT_DEVERR (1 << VIRTIO_PCI_FLAG_INIT_DEVERR_BIT)
/* Init Link Control register */
#define VIRTIO_PCI_FLAG_INIT_LNKCTL (1 << VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT)
/* Init Power Management */
#define VIRTIO_PCI_FLAG_INIT_PM (1 << VIRTIO_PCI_FLAG_INIT_PM_BIT)
/* Init Function Level Reset capability */
#define VIRTIO_PCI_FLAG_INIT_FLR (1 << VIRTIO_PCI_FLAG_INIT_FLR_BIT)
/* Advanced Error Reporting capability */
#define VIRTIO_PCI_FLAG_AER (1 << VIRTIO_PCI_FLAG_AER_BIT)
/* Page Aligned Address space Translation Service */
#define VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED \
(1 << VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT)
typedef struct {
MSIMessage msg;
int virq;
unsigned int users;
} VirtIOIRQFD;
/*
* virtio-pci: This is the PCIDevice which has a virtio-pci-bus.
*/
#define TYPE_VIRTIO_PCI "virtio-pci"
OBJECT_DECLARE_TYPE(VirtIOPCIProxy, VirtioPCIClass, VIRTIO_PCI)
struct VirtioPCIClass {
PCIDeviceClass parent_class;
DeviceRealize parent_dc_realize;
void (*realize)(VirtIOPCIProxy *vpci_dev, Error **errp);
};
typedef struct VirtIOPCIRegion {
MemoryRegion mr;
uint32_t offset;
uint32_t size;
uint32_t type;
} VirtIOPCIRegion;
typedef struct VirtIOPCIQueue {
uint16_t num;
bool enabled;
uint32_t desc[2];
uint32_t avail[2];
uint32_t used[2];
} VirtIOPCIQueue;
struct VirtIOPCIProxy {
PCIDevice pci_dev;
MemoryRegion bar;
union {
struct {
VirtIOPCIRegion common;
VirtIOPCIRegion isr;
VirtIOPCIRegion device;
VirtIOPCIRegion notify;
VirtIOPCIRegion notify_pio;
};
VirtIOPCIRegion regs[5];
};
MemoryRegion modern_bar;
MemoryRegion io_bar;
uint32_t legacy_io_bar_idx;
uint32_t msix_bar_idx;
uint32_t modern_io_bar_idx;
uint32_t modern_mem_bar_idx;
int config_cap;
uint32_t flags;
bool disable_modern;
bool ignore_backend_features;
OnOffAuto disable_legacy;
uint32_t class_code;
uint32_t nvectors;
uint32_t dfselect;
uint32_t gfselect;
uint32_t guest_features[2];
VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX];
VirtIOIRQFD *vector_irqfd;
int nvqs_with_notifiers;
VirtioBusState bus;
};
static inline bool virtio_pci_modern(VirtIOPCIProxy *proxy)
{
return !proxy->disable_modern;
}
static inline bool virtio_pci_legacy(VirtIOPCIProxy *proxy)
{
return proxy->disable_legacy == ON_OFF_AUTO_OFF;
}
static inline void virtio_pci_force_virtio_1(VirtIOPCIProxy *proxy)
{
proxy->disable_modern = false;
proxy->disable_legacy = ON_OFF_AUTO_ON;
}
static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy)
{
proxy->disable_modern = true;
}
/*
* virtio-input-pci: This extends VirtioPCIProxy.
*/
#define TYPE_VIRTIO_INPUT_PCI "virtio-input-pci"
/* Virtio ABI version, if we increment this, we break the guest driver. */
#define VIRTIO_PCI_ABI_VERSION 0
/* Input for virtio_pci_types_register() */
typedef struct VirtioPCIDeviceTypeInfo {
/*
* Common base class for the subclasses below.
*
* Required only if transitional_name or non_transitional_name is set.
*
* We need a separate base type instead of making all types
* inherit from generic_name for two reasons:
* 1) generic_name implements INTERFACE_PCIE_DEVICE, but
* transitional_name does not.
* 2) generic_name has the "disable-legacy" and "disable-modern"
* properties, transitional_name and non_transitional name don't.
*/
const char *base_name;
/*
* Generic device type. Optional.
*
* Supports both transitional and non-transitional modes,
* using the disable-legacy and disable-modern properties.
* If disable-legacy=auto, (non-)transitional mode is selected
* depending on the bus where the device is plugged.
*
* Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE,
* but PCI Express is supported only in non-transitional mode.
*
* The only type implemented by QEMU 3.1 and older.
*/
const char *generic_name;
/*
* The transitional device type. Optional.
*
* Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE.
*/
const char *transitional_name;
/*
* The non-transitional device type. Optional.
*
* Implements INTERFACE_CONVENTIONAL_PCI_DEVICE only.
*/
const char *non_transitional_name;
/* Parent type. If NULL, TYPE_VIRTIO_PCI is used */
const char *parent;
/* Same as TypeInfo fields: */
size_t instance_size;
size_t class_size;
void (*instance_init)(Object *obj);
void (*class_init)(ObjectClass *klass, void *data);
InterfaceInfo *interfaces;
} VirtioPCIDeviceTypeInfo;
/* Register virtio-pci type(s). @t must be static. */
void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t);
/**
* virtio_pci_optimal_num_queues:
* @fixed_queues: number of queues that are always present
*
* Returns: The optimal number of queues for a multi-queue device, excluding
* @fixed_queues.
*/
unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues);
#endif

View file

@ -122,8 +122,7 @@ static void virtio_pmem_realize(DeviceState *dev, Error **errp)
}
host_memory_backend_set_mapped(pmem->memdev, true);
virtio_init(vdev, TYPE_VIRTIO_PMEM, VIRTIO_ID_PMEM,
sizeof(struct virtio_pmem_config));
virtio_init(vdev, VIRTIO_ID_PMEM, sizeof(struct virtio_pmem_config));
pmem->rq_vq = virtio_add_queue(vdev, 128, virtio_pmem_flush);
}

View file

@ -11,7 +11,7 @@
#include "qemu/osdep.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "hw/virtio/virtio-rng.h"
#include "qapi/error.h"
#include "qemu/module.h"

View file

@ -215,7 +215,7 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp)
return;
}
virtio_init(vdev, "virtio-rng", VIRTIO_ID_RNG, 0);
virtio_init(vdev, VIRTIO_ID_RNG, 0);
vrng->vq = virtio_add_queue(vdev, 8, handle_input);
vrng->quota_remaining = vrng->conf.max_bytes;

View file

@ -18,7 +18,7 @@
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio-scsi.h"
#include "qemu/module.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
typedef struct VirtIOSCSIPCI VirtIOSCSIPCI;

View file

@ -20,7 +20,7 @@
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio-serial.h"
#include "qemu/module.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
typedef struct VirtIOSerialPCI VirtIOSerialPCI;

View file

@ -132,6 +132,56 @@ struct VirtQueue
QLIST_ENTRY(VirtQueue) node;
};
const char *virtio_device_names[] = {
[VIRTIO_ID_NET] = "virtio-net",
[VIRTIO_ID_BLOCK] = "virtio-blk",
[VIRTIO_ID_CONSOLE] = "virtio-serial",
[VIRTIO_ID_RNG] = "virtio-rng",
[VIRTIO_ID_BALLOON] = "virtio-balloon",
[VIRTIO_ID_IOMEM] = "virtio-iomem",
[VIRTIO_ID_RPMSG] = "virtio-rpmsg",
[VIRTIO_ID_SCSI] = "virtio-scsi",
[VIRTIO_ID_9P] = "virtio-9p",
[VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
[VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
[VIRTIO_ID_CAIF] = "virtio-caif",
[VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
[VIRTIO_ID_GPU] = "virtio-gpu",
[VIRTIO_ID_CLOCK] = "virtio-clk",
[VIRTIO_ID_INPUT] = "virtio-input",
[VIRTIO_ID_VSOCK] = "vhost-vsock",
[VIRTIO_ID_CRYPTO] = "virtio-crypto",
[VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
[VIRTIO_ID_PSTORE] = "virtio-pstore",
[VIRTIO_ID_IOMMU] = "virtio-iommu",
[VIRTIO_ID_MEM] = "virtio-mem",
[VIRTIO_ID_SOUND] = "virtio-sound",
[VIRTIO_ID_FS] = "virtio-user-fs",
[VIRTIO_ID_PMEM] = "virtio-pmem",
[VIRTIO_ID_RPMB] = "virtio-rpmb",
[VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
[VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
[VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
[VIRTIO_ID_SCMI] = "virtio-scmi",
[VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
[VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
[VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
[VIRTIO_ID_CAN] = "virtio-can",
[VIRTIO_ID_DMABUF] = "virtio-dmabuf",
[VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
[VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
[VIRTIO_ID_BT] = "virtio-bluetooth",
[VIRTIO_ID_GPIO] = "virtio-gpio"
};
static const char *virtio_id_to_name(uint16_t device_id)
{
assert(device_id < G_N_ELEMENTS(virtio_device_names));
const char *name = virtio_device_names[device_id];
assert(name != NULL);
return name;
}
/* Called within call_rcu(). */
static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
{
@ -3207,8 +3257,7 @@ void virtio_instance_init_common(Object *proxy_obj, void *data,
qdev_alias_all_properties(vdev, proxy_obj);
}
void virtio_init(VirtIODevice *vdev, const char *name,
uint16_t device_id, size_t config_size)
void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
{
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
@ -3222,6 +3271,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
vdev->start_on_kick = false;
vdev->started = false;
vdev->vhost_started = false;
vdev->device_id = device_id;
vdev->status = 0;
qatomic_set(&vdev->isr, 0);
@ -3237,7 +3287,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
vdev->vq[i].host_notifier_enabled = false;
}
vdev->name = name;
vdev->name = virtio_id_to_name(device_id);
vdev->config_len = config_size;
if (vdev->config_len) {
vdev->config = g_malloc0(config_size);