mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-10 11:04:58 -06:00
virtio: features,fixes
A bunch of improvements: - vhost dirty log is now only scanned once, not once per device - virtio and vhost now support VIRTIO_F_NOTIFICATION_DATA - cxl gained DCD emulation support - pvpanic gained shutdown support - beginning of patchset for Generic Port Affinity Structure - s3 support - friendlier error messages when boot fails on some illegal configs - for vhost-user, VHOST_USER_SET_LOG_BASE is now only sent once - part of vhost-user support for any POSIX system - not yet enabled due to qtest failures - sr-iov VF setup code has been reworked significantly - new tests, particularly for risc-v ACPI - bugfixes Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmaF068PHG1zdEByZWRo YXQuY29tAAoJECgfDbjSjVRp+DMIAMC//mBXIZlPprfhb5cuZklxYi31Acgu5TUr njqjCkN+mFhXXZuc3B67xmrQ066IEPtsbzCjSnzuU41YK4tjvO1g+LgYJBv41G16 va2k8vFM5pdvRA+UC9li1CCIPxiEcszxOdzZemj3szWLVLLUmwsc5OZLWWeFA5m8 vXrrT9miODUz3z8/Xn/TVpxnmD6glKYIRK/IJRzzC4Qqqwb5H3ji/BJV27cDUtdC w6ns5RYIj5j4uAiG8wQNDggA1bMsTxFxThRDUwxlxaIwAcexrf1oRnxGRePA7PVG BXrt5yodrZYR2sR6svmOOIF3wPMUDKdlAItTcEgYyxaVo5rAdpc= =p9h4 -----END PGP SIGNATURE----- Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging virtio: features,fixes A bunch of improvements: - vhost dirty log is now only scanned once, not once per device - virtio and vhost now support VIRTIO_F_NOTIFICATION_DATA - cxl gained DCD emulation support - pvpanic gained shutdown support - beginning of patchset for Generic Port Affinity Structure - s3 support - friendlier error messages when boot fails on some illegal configs - for vhost-user, VHOST_USER_SET_LOG_BASE is now only sent once - part of vhost-user support for any POSIX system - not yet enabled due to qtest failures - sr-iov VF setup code has been reworked significantly - new tests, particularly for risc-v ACPI - bugfixes Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # -----BEGIN PGP SIGNATURE----- # # iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmaF068PHG1zdEByZWRo # YXQuY29tAAoJECgfDbjSjVRp+DMIAMC//mBXIZlPprfhb5cuZklxYi31Acgu5TUr # njqjCkN+mFhXXZuc3B67xmrQ066IEPtsbzCjSnzuU41YK4tjvO1g+LgYJBv41G16 # va2k8vFM5pdvRA+UC9li1CCIPxiEcszxOdzZemj3szWLVLLUmwsc5OZLWWeFA5m8 # vXrrT9miODUz3z8/Xn/TVpxnmD6glKYIRK/IJRzzC4Qqqwb5H3ji/BJV27cDUtdC # w6ns5RYIj5j4uAiG8wQNDggA1bMsTxFxThRDUwxlxaIwAcexrf1oRnxGRePA7PVG # BXrt5yodrZYR2sR6svmOOIF3wPMUDKdlAItTcEgYyxaVo5rAdpc= # =p9h4 # -----END PGP SIGNATURE----- # gpg: Signature made Wed 03 Jul 2024 03:41:51 PM PDT # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [undefined] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [undefined] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (85 commits) hw/pci: Replace -1 with UINT32_MAX for romsize pcie_sriov: Register VFs after migration pcie_sriov: Remove num_vfs from PCIESriovPF pcie_sriov: Release VFs failed to realize pcie_sriov: Reuse SR-IOV VF device instances pcie_sriov: Ensure VF function number does not overflow pcie_sriov: Do not manually unrealize hw/ppc/spapr_pci: Do not reject VFs created after a PF hw/ppc/spapr_pci: Do not create DT for disabled PCI device hw/pci: Rename has_power to enabled virtio-iommu: Clear IOMMUDevice when VFIO device is unplugged virtio: remove virtio_tswap16s() call in vring_packed_event_read() hw/cxl/events: Mark cxl-add-dynamic-capacity and cxl-release-dynamic-capcity unstable hw/cxl/events: Improve QMP interfaces and documentation for add/release dynamic capacity. tests/data/acpi/rebuild-expected-aml.sh: Add RISC-V pc-bios/meson.build: Add support for RISC-V in unpack_edk2_blobs meson.build: Add RISC-V to the edk2-target list tests/data/acpi/virt: Move ARM64 ACPI tables under aarch64/${machine} path tests/data/acpi: Move x86 ACPI tables under x86/${machine} path tests/qtest/bios-tables-test.c: Set "arch" for x86 tests ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
commit
1406b7fc4b
242 changed files with 2851 additions and 361 deletions
|
@ -209,12 +209,19 @@ static void acpi_dsdt_add_tpm(Aml *scope, VirtMachineState *vms)
|
|||
#define ROOT_COMPLEX_ENTRY_SIZE 36
|
||||
#define IORT_NODE_OFFSET 48
|
||||
|
||||
/*
|
||||
* Append an ID mapping entry as described by "Table 4 ID mapping format" in
|
||||
* "IO Remapping Table System Software on ARM Platforms", Chapter 3.
|
||||
* Document number: ARM DEN 0049E.f, Apr 2024
|
||||
*
|
||||
* Note that @id_count gets internally subtracted by one, following the spec.
|
||||
*/
|
||||
static void build_iort_id_mapping(GArray *table_data, uint32_t input_base,
|
||||
uint32_t id_count, uint32_t out_ref)
|
||||
{
|
||||
/* Table 4 ID mapping format */
|
||||
build_append_int_noprefix(table_data, input_base, 4); /* Input base */
|
||||
build_append_int_noprefix(table_data, id_count, 4); /* Number of IDs */
|
||||
/* Number of IDs - The number of IDs in the range minus one */
|
||||
build_append_int_noprefix(table_data, id_count - 1, 4);
|
||||
build_append_int_noprefix(table_data, input_base, 4); /* Output base */
|
||||
build_append_int_noprefix(table_data, out_ref, 4); /* Output Reference */
|
||||
/* Flags */
|
||||
|
@ -269,7 +276,6 @@ static void
|
|||
build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
|
||||
{
|
||||
int i, nb_nodes, rc_mapping_count;
|
||||
const uint32_t iort_node_offset = IORT_NODE_OFFSET;
|
||||
size_t node_size, smmu_offset = 0;
|
||||
AcpiIortIdMapping *idmap;
|
||||
uint32_t id = 0;
|
||||
|
@ -306,8 +312,8 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
|
|||
}
|
||||
|
||||
/* Append the last RC -> ITS ID mapping */
|
||||
if (next_range.input_base < 0xFFFF) {
|
||||
next_range.id_count = 0xFFFF - next_range.input_base;
|
||||
if (next_range.input_base < 0x10000) {
|
||||
next_range.id_count = 0x10000 - next_range.input_base;
|
||||
g_array_append_val(its_idmaps, next_range);
|
||||
}
|
||||
|
||||
|
@ -366,7 +372,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
|
|||
build_append_int_noprefix(table_data, 0, 4);
|
||||
|
||||
/* output IORT node is the ITS group node (the first node) */
|
||||
build_iort_id_mapping(table_data, 0, 0xFFFF, IORT_NODE_OFFSET);
|
||||
build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET);
|
||||
}
|
||||
|
||||
/* Table 17 Root Complex Node */
|
||||
|
@ -415,11 +421,11 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
|
|||
range = &g_array_index(its_idmaps, AcpiIortIdMapping, i);
|
||||
/* output IORT node is the ITS group node (the first node) */
|
||||
build_iort_id_mapping(table_data, range->input_base,
|
||||
range->id_count, iort_node_offset);
|
||||
range->id_count, IORT_NODE_OFFSET);
|
||||
}
|
||||
} else {
|
||||
/* output IORT node is the ITS group node (the first node) */
|
||||
build_iort_id_mapping(table_data, 0, 0xFFFF, IORT_NODE_OFFSET);
|
||||
build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET);
|
||||
}
|
||||
|
||||
acpi_table_end(linker, &table);
|
||||
|
|
|
@ -51,6 +51,7 @@ static const int user_feature_bits[] = {
|
|||
VIRTIO_F_RING_PACKED,
|
||||
VIRTIO_F_IOMMU_PLATFORM,
|
||||
VIRTIO_F_RING_RESET,
|
||||
VIRTIO_F_NOTIFICATION_DATA,
|
||||
VHOST_INVALID_FEATURE_BIT
|
||||
};
|
||||
|
||||
|
@ -353,7 +354,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
|
|||
VHostUserBlk *s = VHOST_USER_BLK(vdev);
|
||||
|
||||
if (!s->connected) {
|
||||
return;
|
||||
goto done;
|
||||
}
|
||||
s->connected = false;
|
||||
|
||||
|
@ -361,6 +362,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
|
|||
|
||||
vhost_dev_cleanup(&s->dev);
|
||||
|
||||
done:
|
||||
/* Re-instate the event handler for new connections */
|
||||
qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
|
||||
NULL, dev, NULL, true);
|
||||
|
@ -384,7 +386,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
|
|||
case CHR_EVENT_CLOSED:
|
||||
/* defer close until later to avoid circular close */
|
||||
vhost_user_async_close(dev, &s->chardev, &s->dev,
|
||||
vhost_user_blk_disconnect, vhost_user_blk_event);
|
||||
vhost_user_blk_disconnect);
|
||||
break;
|
||||
case CHR_EVENT_BREAK:
|
||||
case CHR_EVENT_MUX_IN:
|
||||
|
|
|
@ -38,6 +38,7 @@ GlobalProperty hw_compat_9_0[] = {
|
|||
{"arm-cpu", "backcompat-cntfrq", "true" },
|
||||
{"scsi-disk-base", "migrate-emulated-scsi-request", "false" },
|
||||
{"vfio-pci", "skip-vsc-check", "false" },
|
||||
{ "virtio-pci", "x-pcie-pm-no-soft-reset", "off" },
|
||||
};
|
||||
const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0);
|
||||
|
||||
|
|
|
@ -19,8 +19,12 @@
|
|||
#include "qemu/units.h"
|
||||
#include "qemu/uuid.h"
|
||||
#include "sysemu/hostmem.h"
|
||||
#include "qemu/range.h"
|
||||
|
||||
#define CXL_CAPACITY_MULTIPLIER (256 * MiB)
|
||||
#define CXL_DC_EVENT_LOG_SIZE 8
|
||||
#define CXL_NUM_EXTENTS_SUPPORTED 512
|
||||
#define CXL_NUM_TAGS_SUPPORTED 0
|
||||
|
||||
/*
|
||||
* How to add a new command, example. The command set FOO, with cmd BAR.
|
||||
|
@ -79,6 +83,11 @@ enum {
|
|||
#define GET_POISON_LIST 0x0
|
||||
#define INJECT_POISON 0x1
|
||||
#define CLEAR_POISON 0x2
|
||||
DCD_CONFIG = 0x48,
|
||||
#define GET_DC_CONFIG 0x0
|
||||
#define GET_DYN_CAP_EXT_LIST 0x1
|
||||
#define ADD_DYN_CAP_RSP 0x2
|
||||
#define RELEASE_DYN_CAP 0x3
|
||||
PHYSICAL_SWITCH = 0x51,
|
||||
#define IDENTIFY_SWITCH_DEVICE 0x0
|
||||
#define GET_PHYSICAL_PORT_STATE 0x1
|
||||
|
@ -617,7 +626,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd,
|
|||
size_t *len_out,
|
||||
CXLCCI *cci)
|
||||
{
|
||||
CXLDeviceState *cxl_dstate = &CXL_TYPE3(cci->d)->cxl_dstate;
|
||||
CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
|
||||
CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate;
|
||||
struct {
|
||||
uint8_t slots_supported;
|
||||
uint8_t slot_info;
|
||||
|
@ -631,7 +641,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd,
|
|||
QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
|
||||
|
||||
if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
|
||||
(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
|
||||
(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
|
||||
(ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
|
||||
return CXL_MBOX_INTERNAL_ERROR;
|
||||
}
|
||||
|
||||
|
@ -780,14 +791,16 @@ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd,
|
|||
uint16_t inject_poison_limit;
|
||||
uint8_t poison_caps;
|
||||
uint8_t qos_telemetry_caps;
|
||||
uint16_t dc_event_log_size;
|
||||
} QEMU_PACKED *id;
|
||||
QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43);
|
||||
QEMU_BUILD_BUG_ON(sizeof(*id) != 0x45);
|
||||
CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
|
||||
CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
|
||||
CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate;
|
||||
|
||||
if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
|
||||
(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
|
||||
(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
|
||||
(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
|
||||
return CXL_MBOX_INTERNAL_ERROR;
|
||||
}
|
||||
|
||||
|
@ -797,7 +810,7 @@ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd,
|
|||
snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
|
||||
|
||||
stq_le_p(&id->total_capacity,
|
||||
cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER);
|
||||
cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
|
||||
stq_le_p(&id->persistent_capacity,
|
||||
cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
|
||||
stq_le_p(&id->volatile_capacity,
|
||||
|
@ -807,6 +820,7 @@ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd,
|
|||
st24_le_p(id->poison_list_max_mer, 256);
|
||||
/* No limit - so limited by main poison record limit */
|
||||
stw_le_p(&id->inject_poison_limit, 0);
|
||||
stw_le_p(&id->dc_event_log_size, CXL_DC_EVENT_LOG_SIZE);
|
||||
|
||||
*len_out = sizeof(*id);
|
||||
return CXL_MBOX_SUCCESS;
|
||||
|
@ -828,9 +842,11 @@ static CXLRetCode cmd_ccls_get_partition_info(const struct cxl_cmd *cmd,
|
|||
uint64_t next_pmem;
|
||||
} QEMU_PACKED *part_info = (void *)payload_out;
|
||||
QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
|
||||
CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
|
||||
|
||||
if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
|
||||
(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
|
||||
(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
|
||||
(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
|
||||
return CXL_MBOX_INTERNAL_ERROR;
|
||||
}
|
||||
|
||||
|
@ -1172,7 +1188,8 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
|
|||
struct clear_poison_pl *in = (void *)payload_in;
|
||||
|
||||
dpa = ldq_le_p(&in->dpa);
|
||||
if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) {
|
||||
if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size +
|
||||
ct3d->dc.total_capacity) {
|
||||
return CXL_MBOX_INVALID_PA;
|
||||
}
|
||||
|
||||
|
@ -1235,6 +1252,576 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
|
|||
return CXL_MBOX_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* CXL r3.1 section 8.2.9.9.9.1: Get Dynamic Capacity Configuration
|
||||
* (Opcode: 4800h)
|
||||
*/
|
||||
static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct cxl_cmd *cmd,
|
||||
uint8_t *payload_in,
|
||||
size_t len_in,
|
||||
uint8_t *payload_out,
|
||||
size_t *len_out,
|
||||
CXLCCI *cci)
|
||||
{
|
||||
CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
|
||||
struct {
|
||||
uint8_t region_cnt;
|
||||
uint8_t start_rid;
|
||||
} QEMU_PACKED *in = (void *)payload_in;
|
||||
struct {
|
||||
uint8_t num_regions;
|
||||
uint8_t regions_returned;
|
||||
uint8_t rsvd1[6];
|
||||
struct {
|
||||
uint64_t base;
|
||||
uint64_t decode_len;
|
||||
uint64_t region_len;
|
||||
uint64_t block_size;
|
||||
uint32_t dsmadhandle;
|
||||
uint8_t flags;
|
||||
uint8_t rsvd2[3];
|
||||
} QEMU_PACKED records[];
|
||||
} QEMU_PACKED *out = (void *)payload_out;
|
||||
struct {
|
||||
uint32_t num_extents_supported;
|
||||
uint32_t num_extents_available;
|
||||
uint32_t num_tags_supported;
|
||||
uint32_t num_tags_available;
|
||||
} QEMU_PACKED *extra_out;
|
||||
uint16_t record_count;
|
||||
uint16_t i;
|
||||
uint16_t out_pl_len;
|
||||
uint8_t start_rid;
|
||||
|
||||
start_rid = in->start_rid;
|
||||
if (start_rid >= ct3d->dc.num_regions) {
|
||||
return CXL_MBOX_INVALID_INPUT;
|
||||
}
|
||||
|
||||
record_count = MIN(ct3d->dc.num_regions - in->start_rid, in->region_cnt);
|
||||
|
||||
out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
|
||||
extra_out = (void *)(payload_out + out_pl_len);
|
||||
out_pl_len += sizeof(*extra_out);
|
||||
assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
|
||||
|
||||
out->num_regions = ct3d->dc.num_regions;
|
||||
out->regions_returned = record_count;
|
||||
for (i = 0; i < record_count; i++) {
|
||||
stq_le_p(&out->records[i].base,
|
||||
ct3d->dc.regions[start_rid + i].base);
|
||||
stq_le_p(&out->records[i].decode_len,
|
||||
ct3d->dc.regions[start_rid + i].decode_len /
|
||||
CXL_CAPACITY_MULTIPLIER);
|
||||
stq_le_p(&out->records[i].region_len,
|
||||
ct3d->dc.regions[start_rid + i].len);
|
||||
stq_le_p(&out->records[i].block_size,
|
||||
ct3d->dc.regions[start_rid + i].block_size);
|
||||
stl_le_p(&out->records[i].dsmadhandle,
|
||||
ct3d->dc.regions[start_rid + i].dsmadhandle);
|
||||
out->records[i].flags = ct3d->dc.regions[start_rid + i].flags;
|
||||
}
|
||||
/*
|
||||
* TODO: Assign values once extents and tags are introduced
|
||||
* to use.
|
||||
*/
|
||||
stl_le_p(&extra_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED);
|
||||
stl_le_p(&extra_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED -
|
||||
ct3d->dc.total_extent_count);
|
||||
stl_le_p(&extra_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED);
|
||||
stl_le_p(&extra_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED);
|
||||
|
||||
*len_out = out_pl_len;
|
||||
return CXL_MBOX_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* CXL r3.1 section 8.2.9.9.9.2:
|
||||
* Get Dynamic Capacity Extent List (Opcode 4801h)
|
||||
*/
|
||||
static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd,
|
||||
uint8_t *payload_in,
|
||||
size_t len_in,
|
||||
uint8_t *payload_out,
|
||||
size_t *len_out,
|
||||
CXLCCI *cci)
|
||||
{
|
||||
CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
|
||||
struct {
|
||||
uint32_t extent_cnt;
|
||||
uint32_t start_extent_id;
|
||||
} QEMU_PACKED *in = (void *)payload_in;
|
||||
struct {
|
||||
uint32_t count;
|
||||
uint32_t total_extents;
|
||||
uint32_t generation_num;
|
||||
uint8_t rsvd[4];
|
||||
CXLDCExtentRaw records[];
|
||||
} QEMU_PACKED *out = (void *)payload_out;
|
||||
uint32_t start_extent_id = in->start_extent_id;
|
||||
CXLDCExtentList *extent_list = &ct3d->dc.extents;
|
||||
uint16_t record_count = 0, i = 0, record_done = 0;
|
||||
uint16_t out_pl_len, size;
|
||||
CXLDCExtent *ent;
|
||||
|
||||
if (start_extent_id > ct3d->dc.total_extent_count) {
|
||||
return CXL_MBOX_INVALID_INPUT;
|
||||
}
|
||||
|
||||
record_count = MIN(in->extent_cnt,
|
||||
ct3d->dc.total_extent_count - start_extent_id);
|
||||
size = CXL_MAILBOX_MAX_PAYLOAD_SIZE - sizeof(*out);
|
||||
record_count = MIN(record_count, size / sizeof(out->records[0]));
|
||||
out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
|
||||
|
||||
stl_le_p(&out->count, record_count);
|
||||
stl_le_p(&out->total_extents, ct3d->dc.total_extent_count);
|
||||
stl_le_p(&out->generation_num, ct3d->dc.ext_list_gen_seq);
|
||||
|
||||
if (record_count > 0) {
|
||||
CXLDCExtentRaw *out_rec = &out->records[record_done];
|
||||
|
||||
QTAILQ_FOREACH(ent, extent_list, node) {
|
||||
if (i++ < start_extent_id) {
|
||||
continue;
|
||||
}
|
||||
stq_le_p(&out_rec->start_dpa, ent->start_dpa);
|
||||
stq_le_p(&out_rec->len, ent->len);
|
||||
memcpy(&out_rec->tag, ent->tag, 0x10);
|
||||
stw_le_p(&out_rec->shared_seq, ent->shared_seq);
|
||||
|
||||
record_done++;
|
||||
if (record_done == record_count) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*len_out = out_pl_len;
|
||||
return CXL_MBOX_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether any bit between addr[nr, nr+size) is set,
|
||||
* return true if any bit is set, otherwise return false
|
||||
*/
|
||||
bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
|
||||
unsigned long size)
|
||||
{
|
||||
unsigned long res = find_next_bit(addr, size + nr, nr);
|
||||
|
||||
return res < nr + size;
|
||||
}
|
||||
|
||||
CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len)
|
||||
{
|
||||
int i;
|
||||
CXLDCRegion *region = &ct3d->dc.regions[0];
|
||||
|
||||
if (dpa < region->base ||
|
||||
dpa >= region->base + ct3d->dc.total_capacity) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* CXL r3.1 section 9.13.3: Dynamic Capacity Device (DCD)
|
||||
*
|
||||
* Regions are used in increasing-DPA order, with Region 0 being used for
|
||||
* the lowest DPA of Dynamic Capacity and Region 7 for the highest DPA.
|
||||
* So check from the last region to find where the dpa belongs. Extents that
|
||||
* cross multiple regions are not allowed.
|
||||
*/
|
||||
for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
|
||||
region = &ct3d->dc.regions[i];
|
||||
if (dpa >= region->base) {
|
||||
if (dpa + len > region->base + region->len) {
|
||||
return NULL;
|
||||
}
|
||||
return region;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
|
||||
uint64_t dpa,
|
||||
uint64_t len,
|
||||
uint8_t *tag,
|
||||
uint16_t shared_seq)
|
||||
{
|
||||
CXLDCExtent *extent;
|
||||
|
||||
extent = g_new0(CXLDCExtent, 1);
|
||||
extent->start_dpa = dpa;
|
||||
extent->len = len;
|
||||
if (tag) {
|
||||
memcpy(extent->tag, tag, 0x10);
|
||||
}
|
||||
extent->shared_seq = shared_seq;
|
||||
|
||||
QTAILQ_INSERT_TAIL(list, extent, node);
|
||||
}
|
||||
|
||||
void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
|
||||
CXLDCExtent *extent)
|
||||
{
|
||||
QTAILQ_REMOVE(list, extent, node);
|
||||
g_free(extent);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a new extent to the extent "group" if group exists;
|
||||
* otherwise, create a new group
|
||||
* Return value: the extent group where the extent is inserted.
|
||||
*/
|
||||
CXLDCExtentGroup *cxl_insert_extent_to_extent_group(CXLDCExtentGroup *group,
|
||||
uint64_t dpa,
|
||||
uint64_t len,
|
||||
uint8_t *tag,
|
||||
uint16_t shared_seq)
|
||||
{
|
||||
if (!group) {
|
||||
group = g_new0(CXLDCExtentGroup, 1);
|
||||
QTAILQ_INIT(&group->list);
|
||||
}
|
||||
cxl_insert_extent_to_extent_list(&group->list, dpa, len,
|
||||
tag, shared_seq);
|
||||
return group;
|
||||
}
|
||||
|
||||
void cxl_extent_group_list_insert_tail(CXLDCExtentGroupList *list,
|
||||
CXLDCExtentGroup *group)
|
||||
{
|
||||
QTAILQ_INSERT_TAIL(list, group, node);
|
||||
}
|
||||
|
||||
void cxl_extent_group_list_delete_front(CXLDCExtentGroupList *list)
|
||||
{
|
||||
CXLDCExtent *ent, *ent_next;
|
||||
CXLDCExtentGroup *group = QTAILQ_FIRST(list);
|
||||
|
||||
QTAILQ_REMOVE(list, group, node);
|
||||
QTAILQ_FOREACH_SAFE(ent, &group->list, node, ent_next) {
|
||||
cxl_remove_extent_from_extent_list(&group->list, ent);
|
||||
}
|
||||
g_free(group);
|
||||
}
|
||||
|
||||
/*
|
||||
* CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
|
||||
* CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
|
||||
*/
|
||||
typedef struct CXLUpdateDCExtentListInPl {
|
||||
uint32_t num_entries_updated;
|
||||
uint8_t flags;
|
||||
uint8_t rsvd[3];
|
||||
/* CXL r3.1 Table 8-169: Updated Extent */
|
||||
struct {
|
||||
uint64_t start_dpa;
|
||||
uint64_t len;
|
||||
uint8_t rsvd[8];
|
||||
} QEMU_PACKED updated_entries[];
|
||||
} QEMU_PACKED CXLUpdateDCExtentListInPl;
|
||||
|
||||
/*
|
||||
* For the extents in the extent list to operate, check whether they are valid
|
||||
* 1. The extent should be in the range of a valid DC region;
|
||||
* 2. The extent should not cross multiple regions;
|
||||
* 3. The start DPA and the length of the extent should align with the block
|
||||
* size of the region;
|
||||
* 4. The address range of multiple extents in the list should not overlap.
|
||||
*/
|
||||
static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d,
|
||||
const CXLUpdateDCExtentListInPl *in)
|
||||
{
|
||||
uint64_t min_block_size = UINT64_MAX;
|
||||
CXLDCRegion *region;
|
||||
CXLDCRegion *lastregion = &ct3d->dc.regions[ct3d->dc.num_regions - 1];
|
||||
g_autofree unsigned long *blk_bitmap = NULL;
|
||||
uint64_t dpa, len;
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < ct3d->dc.num_regions; i++) {
|
||||
region = &ct3d->dc.regions[i];
|
||||
min_block_size = MIN(min_block_size, region->block_size);
|
||||
}
|
||||
|
||||
blk_bitmap = bitmap_new((lastregion->base + lastregion->len -
|
||||
ct3d->dc.regions[0].base) / min_block_size);
|
||||
|
||||
for (i = 0; i < in->num_entries_updated; i++) {
|
||||
dpa = in->updated_entries[i].start_dpa;
|
||||
len = in->updated_entries[i].len;
|
||||
|
||||
region = cxl_find_dc_region(ct3d, dpa, len);
|
||||
if (!region) {
|
||||
return CXL_MBOX_INVALID_PA;
|
||||
}
|
||||
|
||||
dpa -= ct3d->dc.regions[0].base;
|
||||
if (dpa % region->block_size || len % region->block_size) {
|
||||
return CXL_MBOX_INVALID_EXTENT_LIST;
|
||||
}
|
||||
/* the dpa range already covered by some other extents in the list */
|
||||
if (test_any_bits_set(blk_bitmap, dpa / min_block_size,
|
||||
len / min_block_size)) {
|
||||
return CXL_MBOX_INVALID_EXTENT_LIST;
|
||||
}
|
||||
bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
|
||||
}
|
||||
|
||||
return CXL_MBOX_SUCCESS;
|
||||
}
|
||||
|
||||
static CXLRetCode cxl_dcd_add_dyn_cap_rsp_dry_run(CXLType3Dev *ct3d,
|
||||
const CXLUpdateDCExtentListInPl *in)
|
||||
{
|
||||
uint32_t i;
|
||||
CXLDCExtent *ent;
|
||||
CXLDCExtentGroup *ext_group;
|
||||
uint64_t dpa, len;
|
||||
Range range1, range2;
|
||||
|
||||
for (i = 0; i < in->num_entries_updated; i++) {
|
||||
dpa = in->updated_entries[i].start_dpa;
|
||||
len = in->updated_entries[i].len;
|
||||
|
||||
range_init_nofail(&range1, dpa, len);
|
||||
|
||||
/*
|
||||
* The host-accepted DPA range must be contained by the first extent
|
||||
* group in the pending list
|
||||
*/
|
||||
ext_group = QTAILQ_FIRST(&ct3d->dc.extents_pending);
|
||||
if (!cxl_extents_contains_dpa_range(&ext_group->list, dpa, len)) {
|
||||
return CXL_MBOX_INVALID_PA;
|
||||
}
|
||||
|
||||
/* to-be-added range should not overlap with range already accepted */
|
||||
QTAILQ_FOREACH(ent, &ct3d->dc.extents, node) {
|
||||
range_init_nofail(&range2, ent->start_dpa, ent->len);
|
||||
if (range_overlaps_range(&range1, &range2)) {
|
||||
return CXL_MBOX_INVALID_PA;
|
||||
}
|
||||
}
|
||||
}
|
||||
return CXL_MBOX_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* CXL r3.1 section 8.2.9.9.9.3: Add Dynamic Capacity Response (Opcode 4802h)
|
||||
* An extent is added to the extent list and becomes usable only after the
|
||||
* response is processed successfully.
|
||||
*/
|
||||
static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
|
||||
uint8_t *payload_in,
|
||||
size_t len_in,
|
||||
uint8_t *payload_out,
|
||||
size_t *len_out,
|
||||
CXLCCI *cci)
|
||||
{
|
||||
CXLUpdateDCExtentListInPl *in = (void *)payload_in;
|
||||
CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
|
||||
CXLDCExtentList *extent_list = &ct3d->dc.extents;
|
||||
uint32_t i;
|
||||
uint64_t dpa, len;
|
||||
CXLRetCode ret;
|
||||
|
||||
if (in->num_entries_updated == 0) {
|
||||
cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending);
|
||||
return CXL_MBOX_SUCCESS;
|
||||
}
|
||||
|
||||
/* Adding extents causes exceeding device's extent tracking ability. */
|
||||
if (in->num_entries_updated + ct3d->dc.total_extent_count >
|
||||
CXL_NUM_EXTENTS_SUPPORTED) {
|
||||
return CXL_MBOX_RESOURCES_EXHAUSTED;
|
||||
}
|
||||
|
||||
ret = cxl_detect_malformed_extent_list(ct3d, in);
|
||||
if (ret != CXL_MBOX_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = cxl_dcd_add_dyn_cap_rsp_dry_run(ct3d, in);
|
||||
if (ret != CXL_MBOX_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (i = 0; i < in->num_entries_updated; i++) {
|
||||
dpa = in->updated_entries[i].start_dpa;
|
||||
len = in->updated_entries[i].len;
|
||||
|
||||
cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
|
||||
ct3d->dc.total_extent_count += 1;
|
||||
ct3_set_region_block_backed(ct3d, dpa, len);
|
||||
}
|
||||
/* Remove the first extent group in the pending list */
|
||||
cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending);
|
||||
|
||||
return CXL_MBOX_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy extent list from src to dst
|
||||
* Return value: number of extents copied
|
||||
*/
|
||||
static uint32_t copy_extent_list(CXLDCExtentList *dst,
|
||||
const CXLDCExtentList *src)
|
||||
{
|
||||
uint32_t cnt = 0;
|
||||
CXLDCExtent *ent;
|
||||
|
||||
if (!dst || !src) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
QTAILQ_FOREACH(ent, src, node) {
|
||||
cxl_insert_extent_to_extent_list(dst, ent->start_dpa, ent->len,
|
||||
ent->tag, ent->shared_seq);
|
||||
cnt++;
|
||||
}
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static CXLRetCode cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
|
||||
const CXLUpdateDCExtentListInPl *in, CXLDCExtentList *updated_list,
|
||||
uint32_t *updated_list_size)
|
||||
{
|
||||
CXLDCExtent *ent, *ent_next;
|
||||
uint64_t dpa, len;
|
||||
uint32_t i;
|
||||
int cnt_delta = 0;
|
||||
CXLRetCode ret = CXL_MBOX_SUCCESS;
|
||||
|
||||
QTAILQ_INIT(updated_list);
|
||||
copy_extent_list(updated_list, &ct3d->dc.extents);
|
||||
|
||||
for (i = 0; i < in->num_entries_updated; i++) {
|
||||
Range range;
|
||||
|
||||
dpa = in->updated_entries[i].start_dpa;
|
||||
len = in->updated_entries[i].len;
|
||||
|
||||
/* Check if the DPA range is not fully backed with valid extents */
|
||||
if (!ct3_test_region_block_backed(ct3d, dpa, len)) {
|
||||
ret = CXL_MBOX_INVALID_PA;
|
||||
goto free_and_exit;
|
||||
}
|
||||
|
||||
/* After this point, extent overflow is the only error can happen */
|
||||
while (len > 0) {
|
||||
QTAILQ_FOREACH(ent, updated_list, node) {
|
||||
range_init_nofail(&range, ent->start_dpa, ent->len);
|
||||
|
||||
if (range_contains(&range, dpa)) {
|
||||
uint64_t len1, len2 = 0, len_done = 0;
|
||||
uint64_t ent_start_dpa = ent->start_dpa;
|
||||
uint64_t ent_len = ent->len;
|
||||
|
||||
len1 = dpa - ent->start_dpa;
|
||||
/* Found the extent or the subset of an existing extent */
|
||||
if (range_contains(&range, dpa + len - 1)) {
|
||||
len2 = ent_start_dpa + ent_len - dpa - len;
|
||||
} else {
|
||||
dpa = ent_start_dpa + ent_len;
|
||||
}
|
||||
len_done = ent_len - len1 - len2;
|
||||
|
||||
cxl_remove_extent_from_extent_list(updated_list, ent);
|
||||
cnt_delta--;
|
||||
|
||||
if (len1) {
|
||||
cxl_insert_extent_to_extent_list(updated_list,
|
||||
ent_start_dpa,
|
||||
len1, NULL, 0);
|
||||
cnt_delta++;
|
||||
}
|
||||
if (len2) {
|
||||
cxl_insert_extent_to_extent_list(updated_list,
|
||||
dpa + len,
|
||||
len2, NULL, 0);
|
||||
cnt_delta++;
|
||||
}
|
||||
|
||||
if (cnt_delta + ct3d->dc.total_extent_count >
|
||||
CXL_NUM_EXTENTS_SUPPORTED) {
|
||||
ret = CXL_MBOX_RESOURCES_EXHAUSTED;
|
||||
goto free_and_exit;
|
||||
}
|
||||
|
||||
len -= len_done;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free_and_exit:
|
||||
if (ret != CXL_MBOX_SUCCESS) {
|
||||
QTAILQ_FOREACH_SAFE(ent, updated_list, node, ent_next) {
|
||||
cxl_remove_extent_from_extent_list(updated_list, ent);
|
||||
}
|
||||
*updated_list_size = 0;
|
||||
} else {
|
||||
*updated_list_size = ct3d->dc.total_extent_count + cnt_delta;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* CXL r3.1 section 8.2.9.9.9.4: Release Dynamic Capacity (Opcode 4803h)
|
||||
*/
|
||||
static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
|
||||
uint8_t *payload_in,
|
||||
size_t len_in,
|
||||
uint8_t *payload_out,
|
||||
size_t *len_out,
|
||||
CXLCCI *cci)
|
||||
{
|
||||
CXLUpdateDCExtentListInPl *in = (void *)payload_in;
|
||||
CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
|
||||
CXLDCExtentList updated_list;
|
||||
CXLDCExtent *ent, *ent_next;
|
||||
uint32_t updated_list_size;
|
||||
CXLRetCode ret;
|
||||
|
||||
if (in->num_entries_updated == 0) {
|
||||
return CXL_MBOX_INVALID_INPUT;
|
||||
}
|
||||
|
||||
ret = cxl_detect_malformed_extent_list(ct3d, in);
|
||||
if (ret != CXL_MBOX_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = cxl_dc_extent_release_dry_run(ct3d, in, &updated_list,
|
||||
&updated_list_size);
|
||||
if (ret != CXL_MBOX_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the dry run release passes, the returned updated_list will
|
||||
* be the updated extent list and we just need to clear the extents
|
||||
* in the accepted list and copy extents in the updated_list to accepted
|
||||
* list and update the extent count;
|
||||
*/
|
||||
QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) {
|
||||
ct3_clear_region_block_backed(ct3d, ent->start_dpa, ent->len);
|
||||
cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
|
||||
}
|
||||
copy_extent_list(&ct3d->dc.extents, &updated_list);
|
||||
QTAILQ_FOREACH_SAFE(ent, &updated_list, node, ent_next) {
|
||||
ct3_set_region_block_backed(ct3d, ent->start_dpa, ent->len);
|
||||
cxl_remove_extent_from_extent_list(&updated_list, ent);
|
||||
}
|
||||
ct3d->dc.total_extent_count = updated_list_size;
|
||||
|
||||
return CXL_MBOX_SUCCESS;
|
||||
}
|
||||
|
||||
#define IMMEDIATE_CONFIG_CHANGE (1 << 1)
|
||||
#define IMMEDIATE_DATA_CHANGE (1 << 2)
|
||||
#define IMMEDIATE_POLICY_CHANGE (1 << 3)
|
||||
|
@ -1279,6 +1866,20 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
|
|||
cmd_media_clear_poison, 72, 0 },
|
||||
};
|
||||
|
||||
static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
|
||||
[DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
|
||||
cmd_dcd_get_dyn_cap_config, 2, 0 },
|
||||
[DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
|
||||
"DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
|
||||
8, 0 },
|
||||
[DCD_CONFIG][ADD_DYN_CAP_RSP] = {
|
||||
"DCD_ADD_DYNAMIC_CAPACITY_RESPONSE", cmd_dcd_add_dyn_cap_rsp,
|
||||
~0, IMMEDIATE_DATA_CHANGE },
|
||||
[DCD_CONFIG][RELEASE_DYN_CAP] = {
|
||||
"DCD_RELEASE_DYNAMIC_CAPACITY", cmd_dcd_release_dyn_cap,
|
||||
~0, IMMEDIATE_DATA_CHANGE },
|
||||
};
|
||||
|
||||
static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
|
||||
[INFOSTAT][IS_IDENTIFY] = { "IDENTIFY", cmd_infostat_identify, 0, 0 },
|
||||
[INFOSTAT][BACKGROUND_OPERATION_STATUS] = { "BACKGROUND_OPERATION_STATUS",
|
||||
|
@ -1424,9 +2025,9 @@ static void bg_timercb(void *opaque)
|
|||
}
|
||||
}
|
||||
|
||||
void cxl_init_cci(CXLCCI *cci, size_t payload_max)
|
||||
static void cxl_rebuild_cel(CXLCCI *cci)
|
||||
{
|
||||
cci->payload_max = payload_max;
|
||||
cci->cel_size = 0; /* Reset for a fresh build */
|
||||
for (int set = 0; set < 256; set++) {
|
||||
for (int cmd = 0; cmd < 256; cmd++) {
|
||||
if (cci->cxl_cmd_set[set][cmd].handler) {
|
||||
|
@ -1440,6 +2041,13 @@ void cxl_init_cci(CXLCCI *cci, size_t payload_max)
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cxl_init_cci(CXLCCI *cci, size_t payload_max)
|
||||
{
|
||||
cci->payload_max = payload_max;
|
||||
cxl_rebuild_cel(cci);
|
||||
|
||||
cci->bg.complete_pct = 0;
|
||||
cci->bg.starttime = 0;
|
||||
cci->bg.runtime = 0;
|
||||
|
@ -1447,10 +2055,29 @@ void cxl_init_cci(CXLCCI *cci, size_t payload_max)
|
|||
bg_timercb, cci);
|
||||
}
|
||||
|
||||
static void cxl_copy_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmds)[256])
|
||||
{
|
||||
for (int set = 0; set < 256; set++) {
|
||||
for (int cmd = 0; cmd < 256; cmd++) {
|
||||
if (cxl_cmds[set][cmd].handler) {
|
||||
cci->cxl_cmd_set[set][cmd] = cxl_cmds[set][cmd];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cxl_add_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmd_set)[256],
|
||||
size_t payload_max)
|
||||
{
|
||||
cci->payload_max = MAX(payload_max, cci->payload_max);
|
||||
cxl_copy_cci_commands(cci, cxl_cmd_set);
|
||||
cxl_rebuild_cel(cci);
|
||||
}
|
||||
|
||||
void cxl_initialize_mailbox_swcci(CXLCCI *cci, DeviceState *intf,
|
||||
DeviceState *d, size_t payload_max)
|
||||
{
|
||||
cci->cxl_cmd_set = cxl_cmd_set_sw;
|
||||
cxl_copy_cci_commands(cci, cxl_cmd_set_sw);
|
||||
cci->d = d;
|
||||
cci->intf = intf;
|
||||
cxl_init_cci(cci, payload_max);
|
||||
|
@ -1458,7 +2085,12 @@ void cxl_initialize_mailbox_swcci(CXLCCI *cci, DeviceState *intf,
|
|||
|
||||
void cxl_initialize_mailbox_t3(CXLCCI *cci, DeviceState *d, size_t payload_max)
|
||||
{
|
||||
cci->cxl_cmd_set = cxl_cmd_set;
|
||||
CXLType3Dev *ct3d = CXL_TYPE3(d);
|
||||
|
||||
cxl_copy_cci_commands(cci, cxl_cmd_set);
|
||||
if (ct3d->dc.num_regions) {
|
||||
cxl_copy_cci_commands(cci, cxl_cmd_set_dcd);
|
||||
}
|
||||
cci->d = d;
|
||||
|
||||
/* No separation for PCI MB as protocol handled in PCI device */
|
||||
|
@ -1476,7 +2108,7 @@ static const struct cxl_cmd cxl_cmd_set_t3_ld[256][256] = {
|
|||
void cxl_initialize_t3_ld_cci(CXLCCI *cci, DeviceState *d, DeviceState *intf,
|
||||
size_t payload_max)
|
||||
{
|
||||
cci->cxl_cmd_set = cxl_cmd_set_t3_ld;
|
||||
cxl_copy_cci_commands(cci, cxl_cmd_set_t3_ld);
|
||||
cci->d = d;
|
||||
cci->intf = intf;
|
||||
cxl_init_cci(cci, payload_max);
|
||||
|
@ -1496,7 +2128,7 @@ void cxl_initialize_t3_fm_owned_ld_mctpcci(CXLCCI *cci, DeviceState *d,
|
|||
DeviceState *intf,
|
||||
size_t payload_max)
|
||||
{
|
||||
cci->cxl_cmd_set = cxl_cmd_set_t3_fm_owned_ld_mctp;
|
||||
cxl_copy_cci_commands(cci, cxl_cmd_set_t3_fm_owned_ld_mctp);
|
||||
cci->d = d;
|
||||
cci->intf = intf;
|
||||
cxl_init_cci(cci, payload_max);
|
||||
|
|
|
@ -281,8 +281,9 @@ vhost_user_gpu_handle_display(VhostUserGPU *g, VhostUserGpuMsg *msg)
|
|||
modifier = m2->modifier;
|
||||
}
|
||||
|
||||
dmabuf = qemu_dmabuf_new(m->fd_width, m->fd_height,
|
||||
m->fd_stride, 0, 0, 0, 0,
|
||||
dmabuf = qemu_dmabuf_new(m->width, m->height,
|
||||
m->fd_stride, 0, 0,
|
||||
m->fd_width, m->fd_height,
|
||||
m->fd_drm_fourcc, modifier,
|
||||
fd, false, m->fd_flags &
|
||||
VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP);
|
||||
|
|
|
@ -11,22 +11,29 @@
|
|||
#include "e820_memory_layout.h"
|
||||
|
||||
static size_t e820_entries;
|
||||
struct e820_entry *e820_table;
|
||||
static struct e820_entry *e820_table;
|
||||
static gboolean e820_done;
|
||||
|
||||
int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
|
||||
void e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
|
||||
{
|
||||
assert(!e820_done);
|
||||
|
||||
/* new "etc/e820" file -- include ram and reserved entries */
|
||||
e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1);
|
||||
e820_table[e820_entries].address = cpu_to_le64(address);
|
||||
e820_table[e820_entries].length = cpu_to_le64(length);
|
||||
e820_table[e820_entries].type = cpu_to_le32(type);
|
||||
e820_entries++;
|
||||
|
||||
return e820_entries;
|
||||
}
|
||||
|
||||
int e820_get_num_entries(void)
|
||||
int e820_get_table(struct e820_entry **table)
|
||||
{
|
||||
e820_done = true;
|
||||
|
||||
if (table) {
|
||||
*table = e820_table;
|
||||
}
|
||||
|
||||
return e820_entries;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,13 +22,9 @@ struct e820_entry {
|
|||
uint32_t type;
|
||||
} QEMU_PACKED __attribute((__aligned__(4)));
|
||||
|
||||
extern struct e820_entry *e820_table;
|
||||
|
||||
int e820_add_entry(uint64_t address, uint64_t length, uint32_t type);
|
||||
int e820_get_num_entries(void);
|
||||
void e820_add_entry(uint64_t address, uint64_t length, uint32_t type);
|
||||
bool e820_get_entry(int index, uint32_t type,
|
||||
uint64_t *address, uint64_t *length);
|
||||
|
||||
|
||||
int e820_get_table(struct e820_entry **table);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -48,6 +48,15 @@ const char *fw_cfg_arch_key_name(uint16_t key)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* Add etc/e820 late, once all regions should be present */
|
||||
void fw_cfg_add_e820(FWCfgState *fw_cfg)
|
||||
{
|
||||
struct e820_entry *table;
|
||||
int nr_e820 = e820_get_table(&table);
|
||||
|
||||
fw_cfg_add_file(fw_cfg, "etc/e820", table, nr_e820 * sizeof(*table));
|
||||
}
|
||||
|
||||
void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg,
|
||||
SmbiosEntryPointType ep_type)
|
||||
{
|
||||
|
@ -60,6 +69,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg,
|
|||
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
|
||||
MachineClass *mc = MACHINE_GET_CLASS(pcms);
|
||||
X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu);
|
||||
int nr_e820;
|
||||
|
||||
if (pcmc->smbios_defaults) {
|
||||
/* These values are guest ABI, do not change */
|
||||
|
@ -78,8 +88,9 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg,
|
|||
}
|
||||
|
||||
/* build the array of physical mem area from e820 table */
|
||||
mem_array = g_malloc0(sizeof(*mem_array) * e820_get_num_entries());
|
||||
for (i = 0, array_count = 0; i < e820_get_num_entries(); i++) {
|
||||
nr_e820 = e820_get_table(NULL);
|
||||
mem_array = g_malloc0(sizeof(*mem_array) * nr_e820);
|
||||
for (i = 0, array_count = 0; i < nr_e820; i++) {
|
||||
uint64_t addr, len;
|
||||
|
||||
if (e820_get_entry(i, E820_RAM, &addr, &len)) {
|
||||
|
@ -138,9 +149,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms,
|
|||
#endif
|
||||
fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1);
|
||||
|
||||
fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
|
||||
sizeof(struct e820_entry) * e820_get_num_entries());
|
||||
|
||||
fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg));
|
||||
/* allocate memory for the NUMA channel: one (64bit) word for the number
|
||||
* of nodes, one word for each VCPU->node and one word for each node to
|
||||
|
|
|
@ -27,5 +27,6 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg,
|
|||
SmbiosEntryPointType ep_type);
|
||||
void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg);
|
||||
void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg);
|
||||
void fw_cfg_add_e820(FWCfgState *fw_cfg);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -324,8 +324,6 @@ static void microvm_memory_init(MicrovmMachineState *mms)
|
|||
fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, machine->smp.max_cpus);
|
||||
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)machine->ram_size);
|
||||
fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1);
|
||||
fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
|
||||
sizeof(struct e820_entry) * e820_get_num_entries());
|
||||
|
||||
rom_set_fw(fw_cfg);
|
||||
|
||||
|
@ -586,9 +584,11 @@ static void microvm_machine_done(Notifier *notifier, void *data)
|
|||
{
|
||||
MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
|
||||
machine_done);
|
||||
X86MachineState *x86ms = X86_MACHINE(mms);
|
||||
|
||||
acpi_setup_microvm(mms);
|
||||
dt_setup_microvm(mms);
|
||||
fw_cfg_add_e820(x86ms->fw_cfg);
|
||||
}
|
||||
|
||||
static void microvm_powerdown_req(Notifier *notifier, void *data)
|
||||
|
|
|
@ -625,6 +625,7 @@ void pc_machine_done(Notifier *notifier, void *data)
|
|||
acpi_setup();
|
||||
if (x86ms->fw_cfg) {
|
||||
fw_cfg_build_smbios(pcms, x86ms->fw_cfg, pcms->smbios_entry_point_type);
|
||||
fw_cfg_add_e820(x86ms->fw_cfg);
|
||||
fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg);
|
||||
/* update FW_CFG_NB_CPUS to account for -device added CPUs */
|
||||
fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
|
||||
|
|
|
@ -433,6 +433,7 @@ static void apic_common_set_id(Object *obj, Visitor *v, const char *name,
|
|||
APICCommonState *s = APIC_COMMON(obj);
|
||||
DeviceState *dev = DEVICE(obj);
|
||||
uint32_t value;
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (dev->realized) {
|
||||
qdev_prop_set_after_realize(dev, name, errp);
|
||||
|
@ -444,7 +445,11 @@ static void apic_common_set_id(Object *obj, Visitor *v, const char *name,
|
|||
}
|
||||
|
||||
if (value >= 255 && !cpu_has_x2apic_feature(&s->cpu->env)) {
|
||||
error_setg(errp, "APIC ID %d requires x2APIC feature in CPU", value);
|
||||
error_setg(&local_err,
|
||||
"APIC ID %d requires x2APIC feature in CPU",
|
||||
value);
|
||||
error_append_hint(&local_err, "Try x2apic=on in -cpu.\n");
|
||||
error_propagate(errp, local_err);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "hw/pci/msix.h"
|
||||
|
||||
#define DWORD_BYTE 4
|
||||
#define CXL_CAPACITY_MULTIPLIER (256 * MiB)
|
||||
|
||||
/* Default CDAT entries for a memory region */
|
||||
enum {
|
||||
|
@ -43,8 +44,9 @@ enum {
|
|||
};
|
||||
|
||||
static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
|
||||
int dsmad_handle, MemoryRegion *mr,
|
||||
bool is_pmem, uint64_t dpa_base)
|
||||
int dsmad_handle, uint64_t size,
|
||||
bool is_pmem, bool is_dynamic,
|
||||
uint64_t dpa_base)
|
||||
{
|
||||
CDATDsmas *dsmas;
|
||||
CDATDslbis *dslbis0;
|
||||
|
@ -60,9 +62,10 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
|
|||
.length = sizeof(*dsmas),
|
||||
},
|
||||
.DSMADhandle = dsmad_handle,
|
||||
.flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
|
||||
.flags = (is_pmem ? CDAT_DSMAS_FLAG_NV : 0) |
|
||||
(is_dynamic ? CDAT_DSMAS_FLAG_DYNAMIC_CAP : 0),
|
||||
.DPA_base = dpa_base,
|
||||
.DPA_length = memory_region_size(mr),
|
||||
.DPA_length = size,
|
||||
};
|
||||
|
||||
/* For now, no memory side cache, plausiblish numbers */
|
||||
|
@ -131,7 +134,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
|
|||
*/
|
||||
.EFI_memory_type_attr = is_pmem ? 2 : 1,
|
||||
.DPA_offset = 0,
|
||||
.DPA_length = memory_region_size(mr),
|
||||
.DPA_length = size,
|
||||
};
|
||||
|
||||
/* Header always at start of structure */
|
||||
|
@ -148,11 +151,13 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
|
|||
g_autofree CDATSubHeader **table = NULL;
|
||||
CXLType3Dev *ct3d = priv;
|
||||
MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
|
||||
MemoryRegion *dc_mr = NULL;
|
||||
uint64_t vmr_size = 0, pmr_size = 0;
|
||||
int dsmad_handle = 0;
|
||||
int cur_ent = 0;
|
||||
int len = 0;
|
||||
|
||||
if (!ct3d->hostpmem && !ct3d->hostvmem) {
|
||||
if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -162,6 +167,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
|
|||
return -EINVAL;
|
||||
}
|
||||
len += CT3_CDAT_NUM_ENTRIES;
|
||||
vmr_size = memory_region_size(volatile_mr);
|
||||
}
|
||||
|
||||
if (ct3d->hostpmem) {
|
||||
|
@ -170,23 +176,57 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
|
|||
return -EINVAL;
|
||||
}
|
||||
len += CT3_CDAT_NUM_ENTRIES;
|
||||
pmr_size = memory_region_size(nonvolatile_mr);
|
||||
}
|
||||
|
||||
if (ct3d->dc.num_regions) {
|
||||
if (!ct3d->dc.host_dc) {
|
||||
return -EINVAL;
|
||||
}
|
||||
dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
|
||||
if (!dc_mr) {
|
||||
return -EINVAL;
|
||||
}
|
||||
len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
|
||||
}
|
||||
|
||||
table = g_malloc0(len * sizeof(*table));
|
||||
|
||||
/* Now fill them in */
|
||||
if (volatile_mr) {
|
||||
ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
|
||||
false, 0);
|
||||
ct3_build_cdat_entries_for_mr(table, dsmad_handle++, vmr_size,
|
||||
false, false, 0);
|
||||
cur_ent = CT3_CDAT_NUM_ENTRIES;
|
||||
}
|
||||
|
||||
if (nonvolatile_mr) {
|
||||
uint64_t base = volatile_mr ? memory_region_size(volatile_mr) : 0;
|
||||
uint64_t base = vmr_size;
|
||||
ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
|
||||
nonvolatile_mr, true, base);
|
||||
pmr_size, true, false, base);
|
||||
cur_ent += CT3_CDAT_NUM_ENTRIES;
|
||||
}
|
||||
|
||||
if (dc_mr) {
|
||||
int i;
|
||||
uint64_t region_base = vmr_size + pmr_size;
|
||||
|
||||
/*
|
||||
* We assume the dynamic capacity to be volatile for now.
|
||||
* Non-volatile dynamic capacity will be added if needed in the
|
||||
* future.
|
||||
*/
|
||||
for (i = 0; i < ct3d->dc.num_regions; i++) {
|
||||
ct3_build_cdat_entries_for_mr(&(table[cur_ent]),
|
||||
dsmad_handle++,
|
||||
ct3d->dc.regions[i].len,
|
||||
false, true, region_base);
|
||||
ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1;
|
||||
|
||||
cur_ent += CT3_CDAT_NUM_ENTRIES;
|
||||
region_base += ct3d->dc.regions[i].len;
|
||||
}
|
||||
}
|
||||
|
||||
assert(len == cur_ent);
|
||||
|
||||
*cdat_table = g_steal_pointer(&table);
|
||||
|
@ -297,10 +337,17 @@ static void build_dvsecs(CXLType3Dev *ct3d)
|
|||
range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
|
||||
(ct3d->hostpmem->size & 0xF0000000);
|
||||
}
|
||||
} else {
|
||||
} else if (ct3d->hostpmem) {
|
||||
range1_size_hi = ct3d->hostpmem->size >> 32;
|
||||
range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
|
||||
(ct3d->hostpmem->size & 0xF0000000);
|
||||
} else {
|
||||
/*
|
||||
* For DCD with no static memory, set memory active, memory class bits.
|
||||
* No range is set.
|
||||
*/
|
||||
range1_size_hi = 0;
|
||||
range1_size_lo = (2 << 5) | (2 << 2) | 0x3;
|
||||
}
|
||||
|
||||
dvsec = (uint8_t *)&(CXLDVSECDevice){
|
||||
|
@ -567,11 +614,103 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: dc region configuration will be updated once host backend and address
|
||||
* space support is added for DCD.
|
||||
*/
|
||||
static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
|
||||
{
|
||||
int i;
|
||||
uint64_t region_base = 0;
|
||||
uint64_t region_len;
|
||||
uint64_t decode_len;
|
||||
uint64_t blk_size = 2 * MiB;
|
||||
CXLDCRegion *region;
|
||||
MemoryRegion *mr;
|
||||
uint64_t dc_size;
|
||||
|
||||
mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
|
||||
dc_size = memory_region_size(mr);
|
||||
region_len = DIV_ROUND_UP(dc_size, ct3d->dc.num_regions);
|
||||
|
||||
if (dc_size % (ct3d->dc.num_regions * CXL_CAPACITY_MULTIPLIER) != 0) {
|
||||
error_setg(errp,
|
||||
"backend size is not multiple of region len: 0x%" PRIx64,
|
||||
region_len);
|
||||
return false;
|
||||
}
|
||||
if (region_len % CXL_CAPACITY_MULTIPLIER != 0) {
|
||||
error_setg(errp, "DC region size is unaligned to 0x%" PRIx64,
|
||||
CXL_CAPACITY_MULTIPLIER);
|
||||
return false;
|
||||
}
|
||||
decode_len = region_len;
|
||||
|
||||
if (ct3d->hostvmem) {
|
||||
mr = host_memory_backend_get_memory(ct3d->hostvmem);
|
||||
region_base += memory_region_size(mr);
|
||||
}
|
||||
if (ct3d->hostpmem) {
|
||||
mr = host_memory_backend_get_memory(ct3d->hostpmem);
|
||||
region_base += memory_region_size(mr);
|
||||
}
|
||||
if (region_base % CXL_CAPACITY_MULTIPLIER != 0) {
|
||||
error_setg(errp, "DC region base not aligned to 0x%" PRIx64,
|
||||
CXL_CAPACITY_MULTIPLIER);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (i = 0, region = &ct3d->dc.regions[0];
|
||||
i < ct3d->dc.num_regions;
|
||||
i++, region++, region_base += region_len) {
|
||||
*region = (CXLDCRegion) {
|
||||
.base = region_base,
|
||||
.decode_len = decode_len,
|
||||
.len = region_len,
|
||||
.block_size = blk_size,
|
||||
/* dsmad_handle set when creating CDAT table entries */
|
||||
.flags = 0,
|
||||
};
|
||||
ct3d->dc.total_capacity += region->len;
|
||||
region->blk_bitmap = bitmap_new(region->len / region->block_size);
|
||||
}
|
||||
QTAILQ_INIT(&ct3d->dc.extents);
|
||||
QTAILQ_INIT(&ct3d->dc.extents_pending);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
|
||||
{
|
||||
CXLDCExtent *ent, *ent_next;
|
||||
CXLDCExtentGroup *group, *group_next;
|
||||
int i;
|
||||
CXLDCRegion *region;
|
||||
|
||||
QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) {
|
||||
cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
|
||||
}
|
||||
|
||||
QTAILQ_FOREACH_SAFE(group, &ct3d->dc.extents_pending, node, group_next) {
|
||||
QTAILQ_REMOVE(&ct3d->dc.extents_pending, group, node);
|
||||
QTAILQ_FOREACH_SAFE(ent, &group->list, node, ent_next) {
|
||||
cxl_remove_extent_from_extent_list(&group->list, ent);
|
||||
}
|
||||
g_free(group);
|
||||
}
|
||||
|
||||
for (i = 0; i < ct3d->dc.num_regions; i++) {
|
||||
region = &ct3d->dc.regions[i];
|
||||
g_free(region->blk_bitmap);
|
||||
}
|
||||
}
|
||||
|
||||
static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
|
||||
{
|
||||
DeviceState *ds = DEVICE(ct3d);
|
||||
|
||||
if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem) {
|
||||
if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem
|
||||
&& !ct3d->dc.num_regions) {
|
||||
error_setg(errp, "at least one memdev property must be set");
|
||||
return false;
|
||||
} else if (ct3d->hostmem && ct3d->hostpmem) {
|
||||
|
@ -608,7 +747,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
|
|||
}
|
||||
address_space_init(&ct3d->hostvmem_as, vmr, v_name);
|
||||
ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
|
||||
ct3d->cxl_dstate.mem_size += memory_region_size(vmr);
|
||||
ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
|
||||
g_free(v_name);
|
||||
}
|
||||
|
||||
|
@ -631,10 +770,47 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
|
|||
}
|
||||
address_space_init(&ct3d->hostpmem_as, pmr, p_name);
|
||||
ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
|
||||
ct3d->cxl_dstate.mem_size += memory_region_size(pmr);
|
||||
ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
|
||||
g_free(p_name);
|
||||
}
|
||||
|
||||
ct3d->dc.total_capacity = 0;
|
||||
if (ct3d->dc.num_regions > 0) {
|
||||
MemoryRegion *dc_mr;
|
||||
char *dc_name;
|
||||
|
||||
if (!ct3d->dc.host_dc) {
|
||||
error_setg(errp, "dynamic capacity must have a backing device");
|
||||
return false;
|
||||
}
|
||||
|
||||
dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
|
||||
if (!dc_mr) {
|
||||
error_setg(errp, "dynamic capacity must have a backing device");
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set DC regions as volatile for now, non-volatile support can
|
||||
* be added in the future if needed.
|
||||
*/
|
||||
memory_region_set_nonvolatile(dc_mr, false);
|
||||
memory_region_set_enabled(dc_mr, true);
|
||||
host_memory_backend_set_mapped(ct3d->dc.host_dc, true);
|
||||
if (ds->id) {
|
||||
dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id);
|
||||
} else {
|
||||
dc_name = g_strdup("cxl-dcd-dpa-dc-space");
|
||||
}
|
||||
address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name);
|
||||
g_free(dc_name);
|
||||
|
||||
if (!cxl_create_dc_regions(ct3d, errp)) {
|
||||
error_append_hint(errp, "setup DC regions failed");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -724,6 +900,10 @@ err_release_cdat:
|
|||
err_free_special_ops:
|
||||
g_free(regs->special_ops);
|
||||
err_address_space_free:
|
||||
if (ct3d->dc.host_dc) {
|
||||
cxl_destroy_dc_regions(ct3d);
|
||||
address_space_destroy(&ct3d->dc.host_dc_as);
|
||||
}
|
||||
if (ct3d->hostpmem) {
|
||||
address_space_destroy(&ct3d->hostpmem_as);
|
||||
}
|
||||
|
@ -742,6 +922,10 @@ static void ct3_exit(PCIDevice *pci_dev)
|
|||
pcie_aer_exit(pci_dev);
|
||||
cxl_doe_cdat_release(cxl_cstate);
|
||||
g_free(regs->special_ops);
|
||||
if (ct3d->dc.host_dc) {
|
||||
cxl_destroy_dc_regions(ct3d);
|
||||
address_space_destroy(&ct3d->dc.host_dc_as);
|
||||
}
|
||||
if (ct3d->hostpmem) {
|
||||
address_space_destroy(&ct3d->hostpmem_as);
|
||||
}
|
||||
|
@ -750,6 +934,70 @@ static void ct3_exit(PCIDevice *pci_dev)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the DPA range [dpa, dap + len - 1] to be backed and accessible. This
|
||||
* happens when a DC extent is added and accepted by the host.
|
||||
*/
|
||||
void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
|
||||
uint64_t len)
|
||||
{
|
||||
CXLDCRegion *region;
|
||||
|
||||
region = cxl_find_dc_region(ct3d, dpa, len);
|
||||
if (!region) {
|
||||
return;
|
||||
}
|
||||
|
||||
bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
|
||||
len / region->block_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether the DPA range [dpa, dpa + len - 1] is backed with DC extents.
|
||||
* Used when validating read/write to dc regions
|
||||
*/
|
||||
bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
|
||||
uint64_t len)
|
||||
{
|
||||
CXLDCRegion *region;
|
||||
uint64_t nbits;
|
||||
long nr;
|
||||
|
||||
region = cxl_find_dc_region(ct3d, dpa, len);
|
||||
if (!region) {
|
||||
return false;
|
||||
}
|
||||
|
||||
nr = (dpa - region->base) / region->block_size;
|
||||
nbits = DIV_ROUND_UP(len, region->block_size);
|
||||
/*
|
||||
* if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is
|
||||
* backed with DC extents, return true; else return false.
|
||||
*/
|
||||
return find_next_zero_bit(region->blk_bitmap, nr + nbits, nr) == nr + nbits;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the DPA range [dpa, dap + len - 1] to be unbacked and inaccessible.
|
||||
* This happens when a dc extent is released by the host.
|
||||
*/
|
||||
void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
|
||||
uint64_t len)
|
||||
{
|
||||
CXLDCRegion *region;
|
||||
uint64_t nbits;
|
||||
long nr;
|
||||
|
||||
region = cxl_find_dc_region(ct3d, dpa, len);
|
||||
if (!region) {
|
||||
return;
|
||||
}
|
||||
|
||||
nr = (dpa - region->base) / region->block_size;
|
||||
nbits = len / region->block_size;
|
||||
bitmap_clear(region->blk_bitmap, nr, nbits);
|
||||
}
|
||||
|
||||
static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
|
||||
{
|
||||
int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
|
||||
|
@ -820,16 +1068,23 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
|
|||
AddressSpace **as,
|
||||
uint64_t *dpa_offset)
|
||||
{
|
||||
MemoryRegion *vmr = NULL, *pmr = NULL;
|
||||
MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
|
||||
uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
|
||||
|
||||
if (ct3d->hostvmem) {
|
||||
vmr = host_memory_backend_get_memory(ct3d->hostvmem);
|
||||
vmr_size = memory_region_size(vmr);
|
||||
}
|
||||
if (ct3d->hostpmem) {
|
||||
pmr = host_memory_backend_get_memory(ct3d->hostpmem);
|
||||
pmr_size = memory_region_size(pmr);
|
||||
}
|
||||
if (ct3d->dc.host_dc) {
|
||||
dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
|
||||
dc_size = memory_region_size(dc_mr);
|
||||
}
|
||||
|
||||
if (!vmr && !pmr) {
|
||||
if (!vmr && !pmr && !dc_mr) {
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
|
@ -837,19 +1092,22 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (*dpa_offset > ct3d->cxl_dstate.mem_size) {
|
||||
if (*dpa_offset >= vmr_size + pmr_size + dc_size) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (vmr) {
|
||||
if (*dpa_offset < memory_region_size(vmr)) {
|
||||
*as = &ct3d->hostvmem_as;
|
||||
} else {
|
||||
*as = &ct3d->hostpmem_as;
|
||||
*dpa_offset -= memory_region_size(vmr);
|
||||
}
|
||||
} else {
|
||||
if (*dpa_offset < vmr_size) {
|
||||
*as = &ct3d->hostvmem_as;
|
||||
} else if (*dpa_offset < vmr_size + pmr_size) {
|
||||
*as = &ct3d->hostpmem_as;
|
||||
*dpa_offset -= vmr_size;
|
||||
} else {
|
||||
if (!ct3_test_region_block_backed(ct3d, *dpa_offset, size)) {
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
*as = &ct3d->dc.host_dc_as;
|
||||
*dpa_offset -= (vmr_size + pmr_size);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -930,6 +1188,9 @@ static Property ct3_props[] = {
|
|||
HostMemoryBackend *),
|
||||
DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
|
||||
DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
|
||||
DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
|
||||
DEFINE_PROP_LINK("volatile-dc-memdev", CXLType3Dev, dc.host_dc,
|
||||
TYPE_MEMORY_BACKEND, HostMemoryBackend *),
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
|
@ -996,36 +1257,42 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
|
|||
|
||||
static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
|
||||
{
|
||||
MemoryRegion *vmr = NULL, *pmr = NULL;
|
||||
MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
|
||||
AddressSpace *as;
|
||||
uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
|
||||
|
||||
if (ct3d->hostvmem) {
|
||||
vmr = host_memory_backend_get_memory(ct3d->hostvmem);
|
||||
vmr_size = memory_region_size(vmr);
|
||||
}
|
||||
if (ct3d->hostpmem) {
|
||||
pmr = host_memory_backend_get_memory(ct3d->hostpmem);
|
||||
pmr_size = memory_region_size(pmr);
|
||||
}
|
||||
if (ct3d->dc.host_dc) {
|
||||
dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
|
||||
dc_size = memory_region_size(dc_mr);
|
||||
}
|
||||
|
||||
if (!vmr && !pmr) {
|
||||
if (!vmr && !pmr && !dc_mr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) {
|
||||
if (dpa_offset + CXL_CACHE_LINE_SIZE > vmr_size + pmr_size + dc_size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (vmr) {
|
||||
if (dpa_offset < memory_region_size(vmr)) {
|
||||
as = &ct3d->hostvmem_as;
|
||||
} else {
|
||||
as = &ct3d->hostpmem_as;
|
||||
dpa_offset -= memory_region_size(vmr);
|
||||
}
|
||||
} else {
|
||||
if (dpa_offset < vmr_size) {
|
||||
as = &ct3d->hostvmem_as;
|
||||
} else if (dpa_offset < vmr_size + pmr_size) {
|
||||
as = &ct3d->hostpmem_as;
|
||||
dpa_offset -= vmr_size;
|
||||
} else {
|
||||
as = &ct3d->dc.host_dc_as;
|
||||
dpa_offset -= (vmr_size + pmr_size);
|
||||
}
|
||||
|
||||
address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, &data,
|
||||
address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, data,
|
||||
CXL_CACHE_LINE_SIZE);
|
||||
return true;
|
||||
}
|
||||
|
@ -1268,7 +1535,6 @@ static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
|
|||
return CXL_EVENT_TYPE_FAIL;
|
||||
case CXL_EVENT_LOG_FATAL:
|
||||
return CXL_EVENT_TYPE_FATAL;
|
||||
/* DCD not yet supported */
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -1519,6 +1785,301 @@ void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
|
|||
}
|
||||
}
|
||||
|
||||
/* CXL r3.1 Table 8-50: Dynamic Capacity Event Record */
|
||||
static const QemuUUID dynamic_capacity_uuid = {
|
||||
.data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f,
|
||||
0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a),
|
||||
};
|
||||
|
||||
typedef enum CXLDCEventType {
|
||||
DC_EVENT_ADD_CAPACITY = 0x0,
|
||||
DC_EVENT_RELEASE_CAPACITY = 0x1,
|
||||
DC_EVENT_FORCED_RELEASE_CAPACITY = 0x2,
|
||||
DC_EVENT_REGION_CONFIG_UPDATED = 0x3,
|
||||
DC_EVENT_ADD_CAPACITY_RSP = 0x4,
|
||||
DC_EVENT_CAPACITY_RELEASED = 0x5,
|
||||
} CXLDCEventType;
|
||||
|
||||
/*
|
||||
* Check whether the range [dpa, dpa + len - 1] has overlaps with extents in
|
||||
* the list.
|
||||
* Return value: return true if has overlaps; otherwise, return false
|
||||
*/
|
||||
static bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list,
|
||||
uint64_t dpa, uint64_t len)
|
||||
{
|
||||
CXLDCExtent *ent;
|
||||
Range range1, range2;
|
||||
|
||||
if (!list) {
|
||||
return false;
|
||||
}
|
||||
|
||||
range_init_nofail(&range1, dpa, len);
|
||||
QTAILQ_FOREACH(ent, list, node) {
|
||||
range_init_nofail(&range2, ent->start_dpa, ent->len);
|
||||
if (range_overlaps_range(&range1, &range2)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether the range [dpa, dpa + len - 1] is contained by extents in
|
||||
* the list.
|
||||
* Will check multiple extents containment once superset release is added.
|
||||
* Return value: return true if range is contained; otherwise, return false
|
||||
*/
|
||||
bool cxl_extents_contains_dpa_range(CXLDCExtentList *list,
|
||||
uint64_t dpa, uint64_t len)
|
||||
{
|
||||
CXLDCExtent *ent;
|
||||
Range range1, range2;
|
||||
|
||||
if (!list) {
|
||||
return false;
|
||||
}
|
||||
|
||||
range_init_nofail(&range1, dpa, len);
|
||||
QTAILQ_FOREACH(ent, list, node) {
|
||||
range_init_nofail(&range2, ent->start_dpa, ent->len);
|
||||
if (range_contains_range(&range2, &range1)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list,
|
||||
uint64_t dpa, uint64_t len)
|
||||
{
|
||||
CXLDCExtentGroup *group;
|
||||
|
||||
if (!list) {
|
||||
return false;
|
||||
}
|
||||
|
||||
QTAILQ_FOREACH(group, list, node) {
|
||||
if (cxl_extents_overlaps_dpa_range(&group->list, dpa, len)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* The main function to process dynamic capacity event with extent list.
|
||||
* Currently DC extents add/release requests are processed.
|
||||
*/
|
||||
static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
|
||||
uint16_t hid, CXLDCEventType type, uint8_t rid,
|
||||
CxlDynamicCapacityExtentList *records, Error **errp)
|
||||
{
|
||||
Object *obj;
|
||||
CXLEventDynamicCapacity dCap = {};
|
||||
CXLEventRecordHdr *hdr = &dCap.hdr;
|
||||
CXLType3Dev *dcd;
|
||||
uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
|
||||
uint32_t num_extents = 0;
|
||||
CxlDynamicCapacityExtentList *list;
|
||||
CXLDCExtentGroup *group = NULL;
|
||||
g_autofree CXLDCExtentRaw *extents = NULL;
|
||||
uint8_t enc_log = CXL_EVENT_TYPE_DYNAMIC_CAP;
|
||||
uint64_t dpa, offset, len, block_size;
|
||||
g_autofree unsigned long *blk_bitmap = NULL;
|
||||
int i;
|
||||
|
||||
obj = object_resolve_path_type(path, TYPE_CXL_TYPE3, NULL);
|
||||
if (!obj) {
|
||||
error_setg(errp, "Unable to resolve CXL type 3 device");
|
||||
return;
|
||||
}
|
||||
|
||||
dcd = CXL_TYPE3(obj);
|
||||
if (!dcd->dc.num_regions) {
|
||||
error_setg(errp, "No dynamic capacity support from the device");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
if (rid >= dcd->dc.num_regions) {
|
||||
error_setg(errp, "region id is too large");
|
||||
return;
|
||||
}
|
||||
block_size = dcd->dc.regions[rid].block_size;
|
||||
blk_bitmap = bitmap_new(dcd->dc.regions[rid].len / block_size);
|
||||
|
||||
/* Sanity check and count the extents */
|
||||
list = records;
|
||||
while (list) {
|
||||
offset = list->value->offset;
|
||||
len = list->value->len;
|
||||
dpa = offset + dcd->dc.regions[rid].base;
|
||||
|
||||
if (len == 0) {
|
||||
error_setg(errp, "extent with 0 length is not allowed");
|
||||
return;
|
||||
}
|
||||
|
||||
if (offset % block_size || len % block_size) {
|
||||
error_setg(errp, "dpa or len is not aligned to region block size");
|
||||
return;
|
||||
}
|
||||
|
||||
if (offset + len > dcd->dc.regions[rid].len) {
|
||||
error_setg(errp, "extent range is beyond the region end");
|
||||
return;
|
||||
}
|
||||
|
||||
/* No duplicate or overlapped extents are allowed */
|
||||
if (test_any_bits_set(blk_bitmap, offset / block_size,
|
||||
len / block_size)) {
|
||||
error_setg(errp, "duplicate or overlapped extents are detected");
|
||||
return;
|
||||
}
|
||||
bitmap_set(blk_bitmap, offset / block_size, len / block_size);
|
||||
|
||||
if (type == DC_EVENT_RELEASE_CAPACITY) {
|
||||
if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending,
|
||||
dpa, len)) {
|
||||
error_setg(errp,
|
||||
"cannot release extent with pending DPA range");
|
||||
return;
|
||||
}
|
||||
if (!ct3_test_region_block_backed(dcd, dpa, len)) {
|
||||
error_setg(errp,
|
||||
"cannot release extent with non-existing DPA range");
|
||||
return;
|
||||
}
|
||||
} else if (type == DC_EVENT_ADD_CAPACITY) {
|
||||
if (cxl_extents_overlaps_dpa_range(&dcd->dc.extents, dpa, len)) {
|
||||
error_setg(errp,
|
||||
"cannot add DPA already accessible to the same LD");
|
||||
return;
|
||||
}
|
||||
if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending,
|
||||
dpa, len)) {
|
||||
error_setg(errp,
|
||||
"cannot add DPA again while still pending");
|
||||
return;
|
||||
}
|
||||
}
|
||||
list = list->next;
|
||||
num_extents++;
|
||||
}
|
||||
|
||||
/* Create extent list for event being passed to host */
|
||||
i = 0;
|
||||
list = records;
|
||||
extents = g_new0(CXLDCExtentRaw, num_extents);
|
||||
while (list) {
|
||||
offset = list->value->offset;
|
||||
len = list->value->len;
|
||||
dpa = dcd->dc.regions[rid].base + offset;
|
||||
|
||||
extents[i].start_dpa = dpa;
|
||||
extents[i].len = len;
|
||||
memset(extents[i].tag, 0, 0x10);
|
||||
extents[i].shared_seq = 0;
|
||||
if (type == DC_EVENT_ADD_CAPACITY) {
|
||||
group = cxl_insert_extent_to_extent_group(group,
|
||||
extents[i].start_dpa,
|
||||
extents[i].len,
|
||||
extents[i].tag,
|
||||
extents[i].shared_seq);
|
||||
}
|
||||
|
||||
list = list->next;
|
||||
i++;
|
||||
}
|
||||
if (group) {
|
||||
cxl_extent_group_list_insert_tail(&dcd->dc.extents_pending, group);
|
||||
}
|
||||
|
||||
/*
|
||||
* CXL r3.1 section 8.2.9.2.1.6: Dynamic Capacity Event Record
|
||||
*
|
||||
* All Dynamic Capacity event records shall set the Event Record Severity
|
||||
* field in the Common Event Record Format to Informational Event. All
|
||||
* Dynamic Capacity related events shall be logged in the Dynamic Capacity
|
||||
* Event Log.
|
||||
*/
|
||||
cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
|
||||
cxl_device_get_timestamp(&dcd->cxl_dstate));
|
||||
|
||||
dCap.type = type;
|
||||
/* FIXME: for now, validity flag is cleared */
|
||||
dCap.validity_flags = 0;
|
||||
stw_le_p(&dCap.host_id, hid);
|
||||
/* only valid for DC_REGION_CONFIG_UPDATED event */
|
||||
dCap.updated_region_id = 0;
|
||||
dCap.flags = 0;
|
||||
for (i = 0; i < num_extents; i++) {
|
||||
memcpy(&dCap.dynamic_capacity_extent, &extents[i],
|
||||
sizeof(CXLDCExtentRaw));
|
||||
|
||||
if (i < num_extents - 1) {
|
||||
/* Set "More" flag */
|
||||
dCap.flags |= BIT(0);
|
||||
}
|
||||
|
||||
if (cxl_event_insert(&dcd->cxl_dstate, enc_log,
|
||||
(CXLEventRecordRaw *)&dCap)) {
|
||||
cxl_event_irq_assert(dcd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void qmp_cxl_add_dynamic_capacity(const char *path, uint16_t host_id,
|
||||
CxlExtentSelectionPolicy sel_policy,
|
||||
uint8_t region, const char *tag,
|
||||
CxlDynamicCapacityExtentList *extents,
|
||||
Error **errp)
|
||||
{
|
||||
switch (sel_policy) {
|
||||
case CXL_EXTENT_SELECTION_POLICY_PRESCRIPTIVE:
|
||||
qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id,
|
||||
DC_EVENT_ADD_CAPACITY,
|
||||
region, extents, errp);
|
||||
return;
|
||||
default:
|
||||
error_setg(errp, "Selection policy not supported");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void qmp_cxl_release_dynamic_capacity(const char *path, uint16_t host_id,
|
||||
CxlExtentRemovalPolicy removal_policy,
|
||||
bool has_forced_removal,
|
||||
bool forced_removal,
|
||||
bool has_sanitize_on_release,
|
||||
bool sanitize_on_release,
|
||||
uint8_t region,
|
||||
const char *tag,
|
||||
CxlDynamicCapacityExtentList *extents,
|
||||
Error **errp)
|
||||
{
|
||||
CXLDCEventType type = DC_EVENT_RELEASE_CAPACITY;
|
||||
|
||||
if (has_forced_removal && forced_removal) {
|
||||
/* TODO: enable forced removal in the future */
|
||||
type = DC_EVENT_FORCED_RELEASE_CAPACITY;
|
||||
error_setg(errp, "Forced removal not supported yet");
|
||||
return;
|
||||
}
|
||||
|
||||
switch (removal_policy) {
|
||||
case CXL_EXTENT_REMOVAL_POLICY_PRESCRIPTIVE:
|
||||
qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id, type,
|
||||
region, extents, errp);
|
||||
return;
|
||||
default:
|
||||
error_setg(errp, "Removal policy not supported");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void ct3_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
DeviceClass *dc = DEVICE_CLASS(oc);
|
||||
|
|
|
@ -67,3 +67,28 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
|
|||
{
|
||||
error_setg(errp, "CXL Type 3 support is not compiled in");
|
||||
}
|
||||
|
||||
void qmp_cxl_add_dynamic_capacity(const char *path,
|
||||
uint16_t host_id,
|
||||
CxlExtentSelectionPolicy sel_policy,
|
||||
uint8_t region,
|
||||
const char *tag,
|
||||
CxlDynamicCapacityExtentList *extents,
|
||||
Error **errp)
|
||||
{
|
||||
error_setg(errp, "CXL Type 3 support is not compiled in");
|
||||
}
|
||||
|
||||
void qmp_cxl_release_dynamic_capacity(const char *path, uint16_t host_id,
|
||||
CxlExtentRemovalPolicy removal_policy,
|
||||
bool has_forced_removal,
|
||||
bool forced_removal,
|
||||
bool has_sanitize_on_release,
|
||||
bool sanitize_on_release,
|
||||
uint8_t region,
|
||||
const char *tag,
|
||||
CxlDynamicCapacityExtentList *extents,
|
||||
Error **errp)
|
||||
{
|
||||
error_setg(errp, "CXL Type 3 support is not compiled in");
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
#include "hw/misc/pvpanic.h"
|
||||
#include "qom/object.h"
|
||||
#include "hw/isa/isa.h"
|
||||
#include "standard-headers/misc/pvpanic.h"
|
||||
#include "hw/acpi/acpi_aml_interface.h"
|
||||
|
||||
OBJECT_DECLARE_SIMPLE_TYPE(PVPanicISAState, PVPANIC_ISA_DEVICE)
|
||||
|
@ -102,7 +101,7 @@ static void build_pvpanic_isa_aml(AcpiDevAmlIf *adev, Aml *scope)
|
|||
static Property pvpanic_isa_properties[] = {
|
||||
DEFINE_PROP_UINT16(PVPANIC_IOPORT_PROP, PVPanicISAState, ioport, 0x505),
|
||||
DEFINE_PROP_UINT8("events", PVPanicISAState, pvpanic.events,
|
||||
PVPANIC_PANICKED | PVPANIC_CRASH_LOADED),
|
||||
PVPANIC_EVENTS),
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ static void pvpanic_pci_realizefn(PCIDevice *dev, Error **errp)
|
|||
|
||||
static Property pvpanic_pci_properties[] = {
|
||||
DEFINE_PROP_UINT8("events", PVPanicPCIState, pvpanic.events,
|
||||
PVPANIC_PANICKED | PVPANIC_CRASH_LOADED),
|
||||
PVPANIC_EVENTS),
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ static void handle_event(int event)
|
|||
{
|
||||
static bool logged;
|
||||
|
||||
if (event & ~(PVPANIC_PANICKED | PVPANIC_CRASH_LOADED) && !logged) {
|
||||
if (event & ~PVPANIC_EVENTS && !logged) {
|
||||
qemu_log_mask(LOG_GUEST_ERROR, "pvpanic: unknown event %#x.\n", event);
|
||||
logged = true;
|
||||
}
|
||||
|
@ -41,6 +41,11 @@ static void handle_event(int event)
|
|||
qemu_system_guest_crashloaded(NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
if (event & PVPANIC_SHUTDOWN) {
|
||||
qemu_system_guest_pvshutdown();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* return supported events on read */
|
||||
|
|
13
hw/net/igb.c
13
hw/net/igb.c
|
@ -446,9 +446,16 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
|
|||
|
||||
pcie_ari_init(pci_dev, 0x150);
|
||||
|
||||
pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF,
|
||||
IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
|
||||
IGB_VF_OFFSET, IGB_VF_STRIDE);
|
||||
if (!pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET,
|
||||
TYPE_IGBVF, IGB_82576_VF_DEV_ID,
|
||||
IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
|
||||
IGB_VF_OFFSET, IGB_VF_STRIDE,
|
||||
errp)) {
|
||||
pcie_cap_exit(pci_dev);
|
||||
igb_cleanup_msix(s);
|
||||
msi_uninit(pci_dev);
|
||||
return;
|
||||
}
|
||||
|
||||
pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MMIO_BAR_IDX,
|
||||
PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH,
|
||||
|
|
|
@ -48,6 +48,7 @@ static const int kernel_feature_bits[] = {
|
|||
VIRTIO_F_IOMMU_PLATFORM,
|
||||
VIRTIO_F_RING_PACKED,
|
||||
VIRTIO_F_RING_RESET,
|
||||
VIRTIO_F_NOTIFICATION_DATA,
|
||||
VIRTIO_NET_F_HASH_REPORT,
|
||||
VHOST_INVALID_FEATURE_BIT
|
||||
};
|
||||
|
@ -55,6 +56,7 @@ static const int kernel_feature_bits[] = {
|
|||
/* Features supported by others. */
|
||||
static const int user_feature_bits[] = {
|
||||
VIRTIO_F_NOTIFY_ON_EMPTY,
|
||||
VIRTIO_F_NOTIFICATION_DATA,
|
||||
VIRTIO_RING_F_INDIRECT_DESC,
|
||||
VIRTIO_RING_F_EVENT_IDX,
|
||||
|
||||
|
|
|
@ -2735,6 +2735,10 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
|
|||
*/
|
||||
assert(n->host_hdr_len <= n->guest_hdr_len);
|
||||
if (n->host_hdr_len != n->guest_hdr_len) {
|
||||
if (iov_size(out_sg, out_num) < n->guest_hdr_len) {
|
||||
virtio_error(vdev, "virtio-net header is invalid");
|
||||
goto detach;
|
||||
}
|
||||
unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
|
||||
out_sg, out_num,
|
||||
0, n->host_hdr_len);
|
||||
|
|
|
@ -8048,7 +8048,8 @@ out:
|
|||
return pow2ceil(bar_size);
|
||||
}
|
||||
|
||||
static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
|
||||
static bool nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset,
|
||||
Error **errp)
|
||||
{
|
||||
uint16_t vf_dev_id = n->params.use_intel_id ?
|
||||
PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
|
||||
|
@ -8057,12 +8058,17 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
|
|||
le16_to_cpu(cap->vifrsm),
|
||||
NULL, NULL);
|
||||
|
||||
pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
|
||||
n->params.sriov_max_vfs, n->params.sriov_max_vfs,
|
||||
NVME_VF_OFFSET, NVME_VF_STRIDE);
|
||||
if (!pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
|
||||
n->params.sriov_max_vfs, n->params.sriov_max_vfs,
|
||||
NVME_VF_OFFSET, NVME_VF_STRIDE,
|
||||
errp)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
|
||||
PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
|
||||
|
@ -8155,6 +8161,12 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs &&
|
||||
!nvme_init_sriov(n, pci_dev, 0x120, errp)) {
|
||||
msix_uninit(pci_dev, &n->bar0, &n->bar0);
|
||||
return false;
|
||||
}
|
||||
|
||||
nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
|
||||
|
||||
if (n->params.cmb_size_mb) {
|
||||
|
@ -8165,10 +8177,6 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
|
|||
nvme_init_pmr(n, pci_dev);
|
||||
}
|
||||
|
||||
if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
|
||||
nvme_init_sriov(n, pci_dev, 0x120);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
29
hw/pci/pci.c
29
hw/pci/pci.c
|
@ -70,7 +70,7 @@ static bool pcie_has_upstream_port(PCIDevice *dev);
|
|||
static Property pci_props[] = {
|
||||
DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
|
||||
DEFINE_PROP_STRING("romfile", PCIDevice, romfile),
|
||||
DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, -1),
|
||||
DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, UINT32_MAX),
|
||||
DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1),
|
||||
DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present,
|
||||
QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false),
|
||||
|
@ -733,10 +733,17 @@ static bool migrate_is_not_pcie(void *opaque, int version_id)
|
|||
return !pci_is_express((PCIDevice *)opaque);
|
||||
}
|
||||
|
||||
static int pci_post_load(void *opaque, int version_id)
|
||||
{
|
||||
pcie_sriov_pf_post_load(opaque);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const VMStateDescription vmstate_pci_device = {
|
||||
.name = "PCIDevice",
|
||||
.version_id = 2,
|
||||
.minimum_version_id = 1,
|
||||
.post_load = pci_post_load,
|
||||
.fields = (const VMStateField[]) {
|
||||
VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice),
|
||||
VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice,
|
||||
|
@ -1525,7 +1532,7 @@ static void pci_update_mappings(PCIDevice *d)
|
|||
continue;
|
||||
|
||||
new_addr = pci_bar_address(d, i, r->type, r->size);
|
||||
if (!d->has_power) {
|
||||
if (!d->enabled) {
|
||||
new_addr = PCI_BAR_UNMAPPED;
|
||||
}
|
||||
|
||||
|
@ -1613,7 +1620,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
|
|||
pci_update_irq_disabled(d, was_irq_disabled);
|
||||
memory_region_set_enabled(&d->bus_master_enable_region,
|
||||
(pci_get_word(d->config + PCI_COMMAND)
|
||||
& PCI_COMMAND_MASTER) && d->has_power);
|
||||
& PCI_COMMAND_MASTER) && d->enabled);
|
||||
}
|
||||
|
||||
msi_write_config(d, addr, val_in, l);
|
||||
|
@ -2066,7 +2073,7 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
|
|||
g_cmp_uint32, NULL);
|
||||
}
|
||||
|
||||
if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) {
|
||||
if (pci_dev->romsize != UINT32_MAX && !is_power_of_2(pci_dev->romsize)) {
|
||||
error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize);
|
||||
return;
|
||||
}
|
||||
|
@ -2352,7 +2359,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
|
|||
return;
|
||||
}
|
||||
|
||||
if (load_file || pdev->romsize == -1) {
|
||||
if (load_file || pdev->romsize == UINT32_MAX) {
|
||||
path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
|
||||
if (path == NULL) {
|
||||
path = g_strdup(pdev->romfile);
|
||||
|
@ -2371,7 +2378,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
|
|||
pdev->romfile);
|
||||
return;
|
||||
}
|
||||
if (pdev->romsize != -1) {
|
||||
if (pdev->romsize != UINT_MAX) {
|
||||
if (size > pdev->romsize) {
|
||||
error_setg(errp, "romfile \"%s\" (%u bytes) "
|
||||
"is too large for ROM size %u",
|
||||
|
@ -2884,18 +2891,18 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
|
|||
return msg;
|
||||
}
|
||||
|
||||
void pci_set_power(PCIDevice *d, bool state)
|
||||
void pci_set_enabled(PCIDevice *d, bool state)
|
||||
{
|
||||
if (d->has_power == state) {
|
||||
if (d->enabled == state) {
|
||||
return;
|
||||
}
|
||||
|
||||
d->has_power = state;
|
||||
d->enabled = state;
|
||||
pci_update_mappings(d);
|
||||
memory_region_set_enabled(&d->bus_master_enable_region,
|
||||
(pci_get_word(d->config + PCI_COMMAND)
|
||||
& PCI_COMMAND_MASTER) && d->has_power);
|
||||
if (!d->has_power) {
|
||||
& PCI_COMMAND_MASTER) && d->enabled);
|
||||
if (d->qdev.realized) {
|
||||
pci_device_reset(d);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -86,7 +86,7 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
|
|||
* allowing direct removal of unexposed functions.
|
||||
*/
|
||||
if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
|
||||
!pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
|
||||
!pci_dev->enabled || is_pci_dev_ejected(pci_dev)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -111,7 +111,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
|
|||
* allowing direct removal of unexposed functions.
|
||||
*/
|
||||
if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
|
||||
!pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
|
||||
!pci_dev->enabled || is_pci_dev_ejected(pci_dev)) {
|
||||
return ~0x0;
|
||||
}
|
||||
|
||||
|
|
|
@ -20,23 +20,43 @@
|
|||
#include "qapi/error.h"
|
||||
#include "trace.h"
|
||||
|
||||
static PCIDevice *register_vf(PCIDevice *pf, int devfn,
|
||||
const char *name, uint16_t vf_num);
|
||||
static void unregister_vfs(PCIDevice *dev);
|
||||
static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
|
||||
{
|
||||
for (uint16_t i = 0; i < total_vfs; i++) {
|
||||
PCIDevice *vf = dev->exp.sriov_pf.vf[i];
|
||||
object_unparent(OBJECT(vf));
|
||||
object_unref(OBJECT(vf));
|
||||
}
|
||||
g_free(dev->exp.sriov_pf.vf);
|
||||
dev->exp.sriov_pf.vf = NULL;
|
||||
}
|
||||
|
||||
void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
|
||||
bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
|
||||
const char *vfname, uint16_t vf_dev_id,
|
||||
uint16_t init_vfs, uint16_t total_vfs,
|
||||
uint16_t vf_offset, uint16_t vf_stride)
|
||||
uint16_t vf_offset, uint16_t vf_stride,
|
||||
Error **errp)
|
||||
{
|
||||
BusState *bus = qdev_get_parent_bus(&dev->qdev);
|
||||
int32_t devfn = dev->devfn + vf_offset;
|
||||
uint8_t *cfg = dev->config + offset;
|
||||
uint8_t *wmask;
|
||||
|
||||
if (total_vfs) {
|
||||
uint16_t ari_cap = pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI);
|
||||
uint16_t first_vf_devfn = dev->devfn + vf_offset;
|
||||
uint16_t last_vf_devfn = first_vf_devfn + vf_stride * (total_vfs - 1);
|
||||
|
||||
if ((!ari_cap && PCI_SLOT(dev->devfn) != PCI_SLOT(last_vf_devfn)) ||
|
||||
last_vf_devfn >= PCI_DEVFN_MAX) {
|
||||
error_setg(errp, "VF function number overflows");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
|
||||
offset, PCI_EXT_CAP_SRIOV_SIZEOF);
|
||||
dev->exp.sriov_cap = offset;
|
||||
dev->exp.sriov_pf.num_vfs = 0;
|
||||
dev->exp.sriov_pf.vfname = g_strdup(vfname);
|
||||
dev->exp.sriov_pf.vf = NULL;
|
||||
|
||||
pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
|
||||
|
@ -69,13 +89,37 @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
|
|||
pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553);
|
||||
|
||||
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
|
||||
|
||||
dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
|
||||
|
||||
for (uint16_t i = 0; i < total_vfs; i++) {
|
||||
PCIDevice *vf = pci_new(devfn, vfname);
|
||||
vf->exp.sriov_vf.pf = dev;
|
||||
vf->exp.sriov_vf.vf_number = i;
|
||||
|
||||
if (!qdev_realize(&vf->qdev, bus, errp)) {
|
||||
object_unparent(OBJECT(vf));
|
||||
object_unref(vf);
|
||||
unparent_vfs(dev, i);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* set vid/did according to sr/iov spec - they are not used */
|
||||
pci_config_set_vendor_id(vf->config, 0xffff);
|
||||
pci_config_set_device_id(vf->config, 0xffff);
|
||||
|
||||
dev->exp.sriov_pf.vf[i] = vf;
|
||||
devfn += vf_stride;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void pcie_sriov_pf_exit(PCIDevice *dev)
|
||||
{
|
||||
unregister_vfs(dev);
|
||||
g_free((char *)dev->exp.sriov_pf.vfname);
|
||||
dev->exp.sriov_pf.vfname = NULL;
|
||||
uint8_t *cfg = dev->config + dev->exp.sriov_cap;
|
||||
|
||||
unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
|
||||
}
|
||||
|
||||
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
|
||||
|
@ -141,26 +185,10 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
|
|||
}
|
||||
}
|
||||
|
||||
static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
|
||||
uint16_t vf_num)
|
||||
static void clear_ctrl_vfe(PCIDevice *dev)
|
||||
{
|
||||
PCIDevice *dev = pci_new(devfn, name);
|
||||
dev->exp.sriov_vf.pf = pf;
|
||||
dev->exp.sriov_vf.vf_number = vf_num;
|
||||
PCIBus *bus = pci_get_bus(pf);
|
||||
Error *local_err = NULL;
|
||||
|
||||
qdev_realize(&dev->qdev, &bus->qbus, &local_err);
|
||||
if (local_err) {
|
||||
error_report_err(local_err);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* set vid/did according to sr/iov spec - they are not used */
|
||||
pci_config_set_vendor_id(dev->config, 0xffff);
|
||||
pci_config_set_device_id(dev->config, 0xffff);
|
||||
|
||||
return dev;
|
||||
uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL;
|
||||
pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE);
|
||||
}
|
||||
|
||||
static void register_vfs(PCIDevice *dev)
|
||||
|
@ -168,53 +196,31 @@ static void register_vfs(PCIDevice *dev)
|
|||
uint16_t num_vfs;
|
||||
uint16_t i;
|
||||
uint16_t sriov_cap = dev->exp.sriov_cap;
|
||||
uint16_t vf_offset =
|
||||
pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
|
||||
uint16_t vf_stride =
|
||||
pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
|
||||
int32_t devfn = dev->devfn + vf_offset;
|
||||
|
||||
assert(sriov_cap > 0);
|
||||
num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
|
||||
if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
|
||||
clear_ctrl_vfe(dev);
|
||||
return;
|
||||
}
|
||||
|
||||
dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
|
||||
|
||||
trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
|
||||
PCI_FUNC(dev->devfn), num_vfs);
|
||||
for (i = 0; i < num_vfs; i++) {
|
||||
dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
|
||||
dev->exp.sriov_pf.vfname, i);
|
||||
if (!dev->exp.sriov_pf.vf[i]) {
|
||||
num_vfs = i;
|
||||
break;
|
||||
}
|
||||
devfn += vf_stride;
|
||||
pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
|
||||
}
|
||||
dev->exp.sriov_pf.num_vfs = num_vfs;
|
||||
}
|
||||
|
||||
static void unregister_vfs(PCIDevice *dev)
|
||||
{
|
||||
uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
|
||||
uint16_t i;
|
||||
uint8_t *cfg = dev->config + dev->exp.sriov_cap;
|
||||
|
||||
trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
|
||||
PCI_FUNC(dev->devfn), num_vfs);
|
||||
for (i = 0; i < num_vfs; i++) {
|
||||
Error *err = NULL;
|
||||
PCIDevice *vf = dev->exp.sriov_pf.vf[i];
|
||||
if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) {
|
||||
error_reportf_err(err, "Failed to unplug: ");
|
||||
}
|
||||
object_unparent(OBJECT(vf));
|
||||
object_unref(OBJECT(vf));
|
||||
PCI_FUNC(dev->devfn));
|
||||
for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
|
||||
pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
|
||||
}
|
||||
g_free(dev->exp.sriov_pf.vf);
|
||||
dev->exp.sriov_pf.vf = NULL;
|
||||
dev->exp.sriov_pf.num_vfs = 0;
|
||||
}
|
||||
|
||||
void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
|
||||
|
@ -235,15 +241,21 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
|
|||
PCI_FUNC(dev->devfn), off, val, len);
|
||||
|
||||
if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
|
||||
if (dev->exp.sriov_pf.num_vfs) {
|
||||
if (!(val & PCI_SRIOV_CTRL_VFE)) {
|
||||
unregister_vfs(dev);
|
||||
}
|
||||
if (val & PCI_SRIOV_CTRL_VFE) {
|
||||
register_vfs(dev);
|
||||
} else {
|
||||
if (val & PCI_SRIOV_CTRL_VFE) {
|
||||
register_vfs(dev);
|
||||
}
|
||||
unregister_vfs(dev);
|
||||
}
|
||||
} else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) {
|
||||
clear_ctrl_vfe(dev);
|
||||
unregister_vfs(dev);
|
||||
}
|
||||
}
|
||||
|
||||
void pcie_sriov_pf_post_load(PCIDevice *dev)
|
||||
{
|
||||
if (dev->exp.sriov_cap) {
|
||||
register_vfs(dev);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -306,7 +318,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev)
|
|||
PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
|
||||
{
|
||||
assert(!pci_is_vf(dev));
|
||||
if (n < dev->exp.sriov_pf.num_vfs) {
|
||||
if (n < pcie_sriov_num_vfs(dev)) {
|
||||
return dev->exp.sriov_pf.vf[n];
|
||||
}
|
||||
return NULL;
|
||||
|
@ -314,5 +326,10 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
|
|||
|
||||
uint16_t pcie_sriov_num_vfs(PCIDevice *dev)
|
||||
{
|
||||
return dev->exp.sriov_pf.num_vfs;
|
||||
uint16_t sriov_cap = dev->exp.sriov_cap;
|
||||
uint8_t *cfg = dev->config + sriov_cap;
|
||||
|
||||
return sriov_cap &&
|
||||
(pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ?
|
||||
pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0;
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask
|
|||
|
||||
# hw/pci/pcie_sriov.c
|
||||
sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs"
|
||||
sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs"
|
||||
sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs"
|
||||
sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d"
|
||||
|
||||
# pcie.c
|
||||
|
|
|
@ -1296,6 +1296,10 @@ static void spapr_dt_pci_device_cb(PCIBus *bus, PCIDevice *pdev,
|
|||
return;
|
||||
}
|
||||
|
||||
if (!pdev->enabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
err = spapr_dt_pci_device(p->sphb, pdev, p->fdt, p->offset);
|
||||
if (err < 0) {
|
||||
p->err = err;
|
||||
|
@ -1569,7 +1573,9 @@ static void spapr_pci_pre_plug(HotplugHandler *plug_handler,
|
|||
* hotplug, we do not allow functions to be hotplugged to a
|
||||
* slot that already has function 0 present
|
||||
*/
|
||||
if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] &&
|
||||
if (plugged_dev->hotplugged &&
|
||||
!pci_is_vf(pdev) &&
|
||||
bus->devices[PCI_DEVFN(slotnr, 0)] &&
|
||||
PCI_FUNC(pdev->devfn) != 0) {
|
||||
error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
|
||||
" additional functions can no longer be exposed to guest.",
|
||||
|
|
|
@ -127,9 +127,11 @@ static void subsystem_reset(void)
|
|||
static int virtio_ccw_hcall_notify(const uint64_t *args)
|
||||
{
|
||||
uint64_t subch_id = args[0];
|
||||
uint64_t queue = args[1];
|
||||
uint64_t data = args[1];
|
||||
SubchDev *sch;
|
||||
VirtIODevice *vdev;
|
||||
int cssid, ssid, schid, m;
|
||||
uint16_t vq_idx = data;
|
||||
|
||||
if (ioinst_disassemble_sch_ident(subch_id, &m, &cssid, &ssid, &schid)) {
|
||||
return -EINVAL;
|
||||
|
@ -138,12 +140,19 @@ static int virtio_ccw_hcall_notify(const uint64_t *args)
|
|||
if (!sch || !css_subch_visible(sch)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
if (queue >= VIRTIO_QUEUE_MAX) {
|
||||
|
||||
vdev = virtio_ccw_get_vdev(sch);
|
||||
if (vq_idx >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, vq_idx)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
virtio_queue_notify(virtio_ccw_get_vdev(sch), queue);
|
||||
return 0;
|
||||
|
||||
if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
|
||||
virtio_queue_set_shadow_avail_idx(virtio_get_queue(vdev, vq_idx),
|
||||
(data >> 16) & 0xFFFF);
|
||||
}
|
||||
|
||||
virtio_queue_notify(vdev, vq_idx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int virtio_ccw_hcall_early_printk(const uint64_t *args)
|
||||
|
|
|
@ -38,6 +38,7 @@ static const int kernel_feature_bits[] = {
|
|||
VIRTIO_RING_F_EVENT_IDX,
|
||||
VIRTIO_SCSI_F_HOTPLUG,
|
||||
VIRTIO_F_RING_RESET,
|
||||
VIRTIO_F_NOTIFICATION_DATA,
|
||||
VHOST_INVALID_FEATURE_BIT
|
||||
};
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@ static const int user_feature_bits[] = {
|
|||
VIRTIO_RING_F_EVENT_IDX,
|
||||
VIRTIO_SCSI_F_HOTPLUG,
|
||||
VIRTIO_F_RING_RESET,
|
||||
VIRTIO_F_NOTIFICATION_DATA,
|
||||
VHOST_INVALID_FEATURE_BIT
|
||||
};
|
||||
|
||||
|
@ -181,7 +182,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev)
|
|||
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
|
||||
|
||||
if (!s->connected) {
|
||||
return;
|
||||
goto done;
|
||||
}
|
||||
s->connected = false;
|
||||
|
||||
|
@ -189,6 +190,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev)
|
|||
|
||||
vhost_dev_cleanup(&vsc->dev);
|
||||
|
||||
done:
|
||||
/* Re-instate the event handler for new connections */
|
||||
qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL,
|
||||
vhost_user_scsi_event, NULL, dev, NULL, true);
|
||||
|
@ -214,8 +216,7 @@ static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event)
|
|||
case CHR_EVENT_CLOSED:
|
||||
/* defer close until later to avoid circular close */
|
||||
vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
|
||||
vhost_user_scsi_disconnect,
|
||||
vhost_user_scsi_event);
|
||||
vhost_user_scsi_disconnect);
|
||||
break;
|
||||
case CHR_EVENT_BREAK:
|
||||
case CHR_EVENT_MUX_IN:
|
||||
|
|
|
@ -223,15 +223,18 @@ static void vub_disconnect(DeviceState *dev)
|
|||
{
|
||||
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
||||
VHostUserBase *vub = VHOST_USER_BASE(vdev);
|
||||
struct vhost_virtqueue *vhost_vqs = vub->vhost_dev.vqs;
|
||||
|
||||
if (!vub->connected) {
|
||||
return;
|
||||
goto done;
|
||||
}
|
||||
vub->connected = false;
|
||||
|
||||
vub_stop(vdev);
|
||||
vhost_dev_cleanup(&vub->vhost_dev);
|
||||
g_free(vhost_vqs);
|
||||
|
||||
done:
|
||||
/* Re-instate the event handler for new connections */
|
||||
qemu_chr_fe_set_handlers(&vub->chardev,
|
||||
NULL, NULL, vub_event,
|
||||
|
@ -254,7 +257,7 @@ static void vub_event(void *opaque, QEMUChrEvent event)
|
|||
case CHR_EVENT_CLOSED:
|
||||
/* defer close until later to avoid circular close */
|
||||
vhost_user_async_close(dev, &vub->chardev, &vub->vhost_dev,
|
||||
vub_disconnect, vub_event);
|
||||
vub_disconnect);
|
||||
break;
|
||||
case CHR_EVENT_BREAK:
|
||||
case CHR_EVENT_MUX_IN:
|
||||
|
|
|
@ -33,7 +33,7 @@ static const int user_feature_bits[] = {
|
|||
VIRTIO_F_RING_PACKED,
|
||||
VIRTIO_F_IOMMU_PLATFORM,
|
||||
VIRTIO_F_RING_RESET,
|
||||
|
||||
VIRTIO_F_NOTIFICATION_DATA,
|
||||
VHOST_INVALID_FEATURE_BIT
|
||||
};
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ static const int user_feature_bits[] = {
|
|||
VIRTIO_RING_F_INDIRECT_DESC,
|
||||
VIRTIO_RING_F_EVENT_IDX,
|
||||
VIRTIO_F_NOTIFY_ON_EMPTY,
|
||||
VIRTIO_F_NOTIFICATION_DATA,
|
||||
VHOST_INVALID_FEATURE_BIT
|
||||
};
|
||||
|
||||
|
|
|
@ -371,6 +371,7 @@ static bool vhost_user_per_device_request(VhostUserRequest request)
|
|||
case VHOST_USER_RESET_DEVICE:
|
||||
case VHOST_USER_ADD_MEM_REG:
|
||||
case VHOST_USER_REM_MEM_REG:
|
||||
case VHOST_USER_SET_LOG_BASE:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -2776,25 +2777,13 @@ typedef struct {
|
|||
DeviceState *dev;
|
||||
CharBackend *cd;
|
||||
struct vhost_dev *vhost;
|
||||
IOEventHandler *event_cb;
|
||||
} VhostAsyncCallback;
|
||||
|
||||
static void vhost_user_async_close_bh(void *opaque)
|
||||
{
|
||||
VhostAsyncCallback *data = opaque;
|
||||
struct vhost_dev *vhost = data->vhost;
|
||||
|
||||
/*
|
||||
* If the vhost_dev has been cleared in the meantime there is
|
||||
* nothing left to do as some other path has completed the
|
||||
* cleanup.
|
||||
*/
|
||||
if (vhost->vdev) {
|
||||
data->cb(data->dev);
|
||||
} else if (data->event_cb) {
|
||||
qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb,
|
||||
NULL, data->dev, NULL, true);
|
||||
}
|
||||
data->cb(data->dev);
|
||||
|
||||
g_free(data);
|
||||
}
|
||||
|
@ -2806,8 +2795,7 @@ static void vhost_user_async_close_bh(void *opaque)
|
|||
*/
|
||||
void vhost_user_async_close(DeviceState *d,
|
||||
CharBackend *chardev, struct vhost_dev *vhost,
|
||||
vu_async_close_fn cb,
|
||||
IOEventHandler *event_cb)
|
||||
vu_async_close_fn cb)
|
||||
{
|
||||
if (!runstate_check(RUN_STATE_SHUTDOWN)) {
|
||||
/*
|
||||
|
@ -2823,7 +2811,6 @@ void vhost_user_async_close(DeviceState *d,
|
|||
data->dev = d;
|
||||
data->cd = chardev;
|
||||
data->vhost = vhost;
|
||||
data->event_cb = event_cb;
|
||||
|
||||
/* Disable any further notifications on the chardev */
|
||||
qemu_chr_fe_set_handlers(chardev,
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
const int feature_bits[] = {
|
||||
VIRTIO_VSOCK_F_SEQPACKET,
|
||||
VIRTIO_F_RING_RESET,
|
||||
VIRTIO_F_RING_PACKED,
|
||||
VHOST_INVALID_FEATURE_BIT
|
||||
};
|
||||
|
||||
|
|
|
@ -43,8 +43,9 @@
|
|||
do { } while (0)
|
||||
#endif
|
||||
|
||||
static struct vhost_log *vhost_log;
|
||||
static struct vhost_log *vhost_log_shm;
|
||||
static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX];
|
||||
static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX];
|
||||
static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX];
|
||||
|
||||
/* Memslots used by backends that support private memslots (without an fd). */
|
||||
static unsigned int used_memslots;
|
||||
|
@ -149,6 +150,47 @@ bool vhost_dev_has_iommu(struct vhost_dev *dev)
|
|||
}
|
||||
}
|
||||
|
||||
static inline bool vhost_dev_should_log(struct vhost_dev *dev)
|
||||
{
|
||||
assert(dev->vhost_ops);
|
||||
assert(dev->vhost_ops->backend_type > VHOST_BACKEND_TYPE_NONE);
|
||||
assert(dev->vhost_ops->backend_type < VHOST_BACKEND_TYPE_MAX);
|
||||
|
||||
return dev == QLIST_FIRST(&vhost_log_devs[dev->vhost_ops->backend_type]);
|
||||
}
|
||||
|
||||
static inline void vhost_dev_elect_mem_logger(struct vhost_dev *hdev, bool add)
|
||||
{
|
||||
VhostBackendType backend_type;
|
||||
|
||||
assert(hdev->vhost_ops);
|
||||
|
||||
backend_type = hdev->vhost_ops->backend_type;
|
||||
assert(backend_type > VHOST_BACKEND_TYPE_NONE);
|
||||
assert(backend_type < VHOST_BACKEND_TYPE_MAX);
|
||||
|
||||
if (add && !QLIST_IS_INSERTED(hdev, logdev_entry)) {
|
||||
if (QLIST_EMPTY(&vhost_log_devs[backend_type])) {
|
||||
QLIST_INSERT_HEAD(&vhost_log_devs[backend_type],
|
||||
hdev, logdev_entry);
|
||||
} else {
|
||||
/*
|
||||
* The first vhost_device in the list is selected as the shared
|
||||
* logger to scan memory sections. Put new entry next to the head
|
||||
* to avoid inadvertent change to the underlying logger device.
|
||||
* This is done in order to get better cache locality and to avoid
|
||||
* performance churn on the hot path for log scanning. Even when
|
||||
* new devices come and go quickly, it wouldn't end up changing
|
||||
* the active leading logger device at all.
|
||||
*/
|
||||
QLIST_INSERT_AFTER(QLIST_FIRST(&vhost_log_devs[backend_type]),
|
||||
hdev, logdev_entry);
|
||||
}
|
||||
} else if (!add && QLIST_IS_INSERTED(hdev, logdev_entry)) {
|
||||
QLIST_REMOVE(hdev, logdev_entry);
|
||||
}
|
||||
}
|
||||
|
||||
static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
|
||||
MemoryRegionSection *section,
|
||||
hwaddr first,
|
||||
|
@ -166,12 +208,14 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
|
|||
start_addr = MAX(first, start_addr);
|
||||
end_addr = MIN(last, end_addr);
|
||||
|
||||
for (i = 0; i < dev->mem->nregions; ++i) {
|
||||
struct vhost_memory_region *reg = dev->mem->regions + i;
|
||||
vhost_dev_sync_region(dev, section, start_addr, end_addr,
|
||||
reg->guest_phys_addr,
|
||||
range_get_last(reg->guest_phys_addr,
|
||||
reg->memory_size));
|
||||
if (vhost_dev_should_log(dev)) {
|
||||
for (i = 0; i < dev->mem->nregions; ++i) {
|
||||
struct vhost_memory_region *reg = dev->mem->regions + i;
|
||||
vhost_dev_sync_region(dev, section, start_addr, end_addr,
|
||||
reg->guest_phys_addr,
|
||||
range_get_last(reg->guest_phys_addr,
|
||||
reg->memory_size));
|
||||
}
|
||||
}
|
||||
for (i = 0; i < dev->nvqs; ++i) {
|
||||
struct vhost_virtqueue *vq = dev->vqs + i;
|
||||
|
@ -287,6 +331,10 @@ static int vhost_set_backend_type(struct vhost_dev *dev,
|
|||
r = -1;
|
||||
}
|
||||
|
||||
if (r == 0) {
|
||||
assert(dev->vhost_ops->backend_type == backend_type);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -319,16 +367,22 @@ static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
|
|||
return log;
|
||||
}
|
||||
|
||||
static struct vhost_log *vhost_log_get(uint64_t size, bool share)
|
||||
static struct vhost_log *vhost_log_get(VhostBackendType backend_type,
|
||||
uint64_t size, bool share)
|
||||
{
|
||||
struct vhost_log *log = share ? vhost_log_shm : vhost_log;
|
||||
struct vhost_log *log;
|
||||
|
||||
assert(backend_type > VHOST_BACKEND_TYPE_NONE);
|
||||
assert(backend_type < VHOST_BACKEND_TYPE_MAX);
|
||||
|
||||
log = share ? vhost_log_shm[backend_type] : vhost_log[backend_type];
|
||||
|
||||
if (!log || log->size != size) {
|
||||
log = vhost_log_alloc(size, share);
|
||||
if (share) {
|
||||
vhost_log_shm = log;
|
||||
vhost_log_shm[backend_type] = log;
|
||||
} else {
|
||||
vhost_log = log;
|
||||
vhost_log[backend_type] = log;
|
||||
}
|
||||
} else {
|
||||
++log->refcnt;
|
||||
|
@ -340,11 +394,20 @@ static struct vhost_log *vhost_log_get(uint64_t size, bool share)
|
|||
static void vhost_log_put(struct vhost_dev *dev, bool sync)
|
||||
{
|
||||
struct vhost_log *log = dev->log;
|
||||
VhostBackendType backend_type;
|
||||
|
||||
if (!log) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert(dev->vhost_ops);
|
||||
backend_type = dev->vhost_ops->backend_type;
|
||||
|
||||
if (backend_type == VHOST_BACKEND_TYPE_NONE ||
|
||||
backend_type >= VHOST_BACKEND_TYPE_MAX) {
|
||||
return;
|
||||
}
|
||||
|
||||
--log->refcnt;
|
||||
if (log->refcnt == 0) {
|
||||
/* Sync only the range covered by the old log */
|
||||
|
@ -352,18 +415,19 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync)
|
|||
vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
|
||||
}
|
||||
|
||||
if (vhost_log == log) {
|
||||
if (vhost_log[backend_type] == log) {
|
||||
g_free(log->log);
|
||||
vhost_log = NULL;
|
||||
} else if (vhost_log_shm == log) {
|
||||
vhost_log[backend_type] = NULL;
|
||||
} else if (vhost_log_shm[backend_type] == log) {
|
||||
qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
|
||||
log->fd);
|
||||
vhost_log_shm = NULL;
|
||||
vhost_log_shm[backend_type] = NULL;
|
||||
}
|
||||
|
||||
g_free(log);
|
||||
}
|
||||
|
||||
vhost_dev_elect_mem_logger(dev, false);
|
||||
dev->log = NULL;
|
||||
dev->log_size = 0;
|
||||
}
|
||||
|
@ -376,7 +440,8 @@ static bool vhost_dev_log_is_shared(struct vhost_dev *dev)
|
|||
|
||||
static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
|
||||
{
|
||||
struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev));
|
||||
struct vhost_log *log = vhost_log_get(dev->vhost_ops->backend_type,
|
||||
size, vhost_dev_log_is_shared(dev));
|
||||
uint64_t log_base = (uintptr_t)log->log;
|
||||
int r;
|
||||
|
||||
|
@ -978,6 +1043,15 @@ static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
|
|||
goto err_vq;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* At log start we select our vhost_device logger that will scan the
|
||||
* memory sections and skip for the others. This is possible because
|
||||
* the log is shared amongst all vhost devices for a given type of
|
||||
* backend.
|
||||
*/
|
||||
vhost_dev_elect_mem_logger(dev, enable_log);
|
||||
|
||||
return 0;
|
||||
err_vq:
|
||||
for (; i >= 0; --i) {
|
||||
|
@ -2044,7 +2118,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
|
|||
uint64_t log_base;
|
||||
|
||||
hdev->log_size = vhost_get_log_size(hdev);
|
||||
hdev->log = vhost_log_get(hdev->log_size,
|
||||
hdev->log = vhost_log_get(hdev->vhost_ops->backend_type,
|
||||
hdev->log_size,
|
||||
vhost_dev_log_is_shared(hdev));
|
||||
log_base = (uintptr_t)hdev->log->log;
|
||||
r = hdev->vhost_ops->vhost_set_log_base(hdev,
|
||||
|
@ -2054,6 +2129,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
|
|||
VHOST_OPS_DEBUG(r, "vhost_set_log_base failed");
|
||||
goto fail_log;
|
||||
}
|
||||
vhost_dev_elect_mem_logger(hdev, true);
|
||||
}
|
||||
if (vrings) {
|
||||
r = vhost_dev_set_vring_enable(hdev, true);
|
||||
|
|
|
@ -467,6 +467,26 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
|
|||
return &sdev->as;
|
||||
}
|
||||
|
||||
static void virtio_iommu_device_clear(VirtIOIOMMU *s, PCIBus *bus, int devfn)
|
||||
{
|
||||
IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
|
||||
IOMMUDevice *sdev;
|
||||
|
||||
if (!sbus) {
|
||||
return;
|
||||
}
|
||||
|
||||
sdev = sbus->pbdev[devfn];
|
||||
if (!sdev) {
|
||||
return;
|
||||
}
|
||||
|
||||
g_list_free_full(sdev->resv_regions, g_free);
|
||||
sdev->resv_regions = NULL;
|
||||
g_free(sdev);
|
||||
sbus->pbdev[devfn] = NULL;
|
||||
}
|
||||
|
||||
static gboolean hiod_equal(gconstpointer v1, gconstpointer v2)
|
||||
{
|
||||
const struct hiod_key *key1 = v1;
|
||||
|
@ -650,6 +670,7 @@ virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
|
|||
}
|
||||
|
||||
g_hash_table_remove(viommu->host_iommu_devices, &key);
|
||||
virtio_iommu_device_clear(viommu, bus, devfn);
|
||||
}
|
||||
|
||||
static const PCIIOMMUOps virtio_iommu_ops = {
|
||||
|
@ -974,6 +995,9 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
|
|||
iov = elem->out_sg;
|
||||
sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
|
||||
if (unlikely(sz != sizeof(head))) {
|
||||
qemu_log_mask(LOG_GUEST_ERROR,
|
||||
"%s: read %zu bytes from command head"
|
||||
"but expected %zu\n", __func__, sz, sizeof(head));
|
||||
tail.status = VIRTIO_IOMMU_S_DEVERR;
|
||||
goto out;
|
||||
}
|
||||
|
@ -1010,6 +1034,25 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
|
|||
out:
|
||||
sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
|
||||
buf ? buf : &tail, output_size);
|
||||
if (unlikely(sz != output_size)) {
|
||||
qemu_log_mask(LOG_GUEST_ERROR,
|
||||
"%s: wrote %zu bytes to command response"
|
||||
"but response size is %zu\n",
|
||||
__func__, sz, output_size);
|
||||
tail.status = VIRTIO_IOMMU_S_DEVERR;
|
||||
/*
|
||||
* We checked that sizeof(tail) can fit to elem->in_sg at the
|
||||
* beginning of the loop
|
||||
*/
|
||||
output_size = sizeof(tail);
|
||||
g_free(buf);
|
||||
buf = NULL;
|
||||
sz = iov_from_buf(elem->in_sg,
|
||||
elem->in_num,
|
||||
0,
|
||||
&tail,
|
||||
output_size);
|
||||
}
|
||||
assert(sz == output_size);
|
||||
|
||||
virtqueue_push(vq, elem, sz);
|
||||
|
|
|
@ -248,6 +248,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
|
|||
{
|
||||
VirtIOMMIOProxy *proxy = (VirtIOMMIOProxy *)opaque;
|
||||
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
|
||||
uint16_t vq_idx;
|
||||
|
||||
trace_virtio_mmio_write_offset(offset, value);
|
||||
|
||||
|
@ -407,8 +408,14 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
|
|||
}
|
||||
break;
|
||||
case VIRTIO_MMIO_QUEUE_NOTIFY:
|
||||
if (value < VIRTIO_QUEUE_MAX) {
|
||||
virtio_queue_notify(vdev, value);
|
||||
vq_idx = value;
|
||||
if (vq_idx < VIRTIO_QUEUE_MAX && virtio_queue_get_num(vdev, vq_idx)) {
|
||||
if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
|
||||
VirtQueue *vq = virtio_get_queue(vdev, vq_idx);
|
||||
|
||||
virtio_queue_set_shadow_avail_idx(vq, (value >> 16) & 0xFFFF);
|
||||
}
|
||||
virtio_queue_notify(vdev, vq_idx);
|
||||
}
|
||||
break;
|
||||
case VIRTIO_MMIO_INTERRUPT_ACK:
|
||||
|
|
|
@ -384,7 +384,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
|
|||
{
|
||||
VirtIOPCIProxy *proxy = opaque;
|
||||
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
|
||||
uint16_t vector;
|
||||
uint16_t vector, vq_idx;
|
||||
hwaddr pa;
|
||||
|
||||
switch (addr) {
|
||||
|
@ -408,8 +408,14 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
|
|||
vdev->queue_sel = val;
|
||||
break;
|
||||
case VIRTIO_PCI_QUEUE_NOTIFY:
|
||||
if (val < VIRTIO_QUEUE_MAX) {
|
||||
virtio_queue_notify(vdev, val);
|
||||
vq_idx = val;
|
||||
if (vq_idx < VIRTIO_QUEUE_MAX && virtio_queue_get_num(vdev, vq_idx)) {
|
||||
if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
|
||||
VirtQueue *vq = virtio_get_queue(vdev, vq_idx);
|
||||
|
||||
virtio_queue_set_shadow_avail_idx(vq, val >> 16);
|
||||
}
|
||||
virtio_queue_notify(vdev, vq_idx);
|
||||
}
|
||||
break;
|
||||
case VIRTIO_PCI_STATUS:
|
||||
|
@ -892,7 +898,7 @@ static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no)
|
|||
}
|
||||
ret = kvm_virtio_pci_vq_vector_use(proxy, vector);
|
||||
if (ret < 0) {
|
||||
goto undo;
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* If guest supports masking, set up irqfd now.
|
||||
|
@ -902,25 +908,11 @@ static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no)
|
|||
ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
|
||||
if (ret < 0) {
|
||||
kvm_virtio_pci_vq_vector_release(proxy, vector);
|
||||
goto undo;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
undo:
|
||||
|
||||
vector = virtio_queue_vector(vdev, queue_no);
|
||||
if (vector >= msix_nr_vectors_allocated(dev)) {
|
||||
return ret;
|
||||
}
|
||||
if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
|
||||
ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
kvm_virtio_pci_irqfd_release(proxy, n, vector);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs)
|
||||
{
|
||||
|
@ -2230,6 +2222,11 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
|
|||
pcie_cap_lnkctl_init(pci_dev);
|
||||
}
|
||||
|
||||
if (proxy->flags & VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET) {
|
||||
pci_set_word(pci_dev->config + pos + PCI_PM_CTRL,
|
||||
PCI_PM_CTRL_NO_SOFT_RESET);
|
||||
}
|
||||
|
||||
if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
|
||||
/* Init Power Management Control Register */
|
||||
pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL,
|
||||
|
@ -2292,18 +2289,46 @@ static void virtio_pci_reset(DeviceState *qdev)
|
|||
}
|
||||
}
|
||||
|
||||
static bool virtio_pci_no_soft_reset(PCIDevice *dev)
|
||||
{
|
||||
uint16_t pmcsr;
|
||||
|
||||
if (!pci_is_express(dev) || !dev->exp.pm_cap) {
|
||||
return false;
|
||||
}
|
||||
|
||||
pmcsr = pci_get_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL);
|
||||
|
||||
/*
|
||||
* When No_Soft_Reset bit is set and the device
|
||||
* is in D3hot state, don't reset device
|
||||
*/
|
||||
return (pmcsr & PCI_PM_CTRL_NO_SOFT_RESET) &&
|
||||
(pmcsr & PCI_PM_CTRL_STATE_MASK) == 3;
|
||||
}
|
||||
|
||||
static void virtio_pci_bus_reset_hold(Object *obj, ResetType type)
|
||||
{
|
||||
PCIDevice *dev = PCI_DEVICE(obj);
|
||||
DeviceState *qdev = DEVICE(obj);
|
||||
|
||||
if (virtio_pci_no_soft_reset(dev)) {
|
||||
return;
|
||||
}
|
||||
|
||||
virtio_pci_reset(qdev);
|
||||
|
||||
if (pci_is_express(dev)) {
|
||||
VirtIOPCIProxy *proxy = VIRTIO_PCI(dev);
|
||||
|
||||
pcie_cap_deverr_reset(dev);
|
||||
pcie_cap_lnkctl_reset(dev);
|
||||
|
||||
pci_set_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL, 0);
|
||||
if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
|
||||
pci_word_test_and_clear_mask(
|
||||
dev->config + dev->exp.pm_cap + PCI_PM_CTRL,
|
||||
PCI_PM_CTRL_STATE_MASK);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2330,6 +2355,8 @@ static Property virtio_pci_properties[] = {
|
|||
VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, true),
|
||||
DEFINE_PROP_BIT("x-pcie-pm-init", VirtIOPCIProxy, flags,
|
||||
VIRTIO_PCI_FLAG_INIT_PM_BIT, true),
|
||||
DEFINE_PROP_BIT("x-pcie-pm-no-soft-reset", VirtIOPCIProxy, flags,
|
||||
VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET_BIT, false),
|
||||
DEFINE_PROP_BIT("x-pcie-flr-init", VirtIOPCIProxy, flags,
|
||||
VIRTIO_PCI_FLAG_INIT_FLR_BIT, true),
|
||||
DEFINE_PROP_BIT("aer", VirtIOPCIProxy, flags,
|
||||
|
|
|
@ -323,7 +323,6 @@ static void vring_packed_event_read(VirtIODevice *vdev,
|
|||
/* Make sure flags is seen before off_wrap */
|
||||
smp_rmb();
|
||||
e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
|
||||
virtio_tswap16s(vdev, &e->flags);
|
||||
}
|
||||
|
||||
static void vring_packed_off_wrap_write(VirtIODevice *vdev,
|
||||
|
@ -1745,6 +1744,11 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
|
|||
&indirect_desc_cache);
|
||||
} while (rc == VIRTQUEUE_READ_DESC_MORE);
|
||||
|
||||
if (desc_cache != &indirect_desc_cache) {
|
||||
/* Buffer ID is included in the last descriptor in the list. */
|
||||
id = desc.id;
|
||||
}
|
||||
|
||||
/* Now copy what we have collected and mapped */
|
||||
elem = virtqueue_alloc_element(sz, out_num, in_num);
|
||||
for (i = 0; i < out_num; i++) {
|
||||
|
@ -2264,6 +2268,24 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
|
|||
}
|
||||
}
|
||||
|
||||
void virtio_queue_set_shadow_avail_idx(VirtQueue *vq, uint16_t shadow_avail_idx)
|
||||
{
|
||||
if (!vq->vring.desc) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* 16-bit data for packed VQs include 1-bit wrap counter and
|
||||
* 15-bit shadow_avail_idx.
|
||||
*/
|
||||
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
|
||||
vq->shadow_avail_wrap_counter = (shadow_avail_idx >> 15) & 0x1;
|
||||
vq->shadow_avail_idx = shadow_avail_idx & 0x7FFF;
|
||||
} else {
|
||||
vq->shadow_avail_idx = shadow_avail_idx;
|
||||
}
|
||||
}
|
||||
|
||||
static void virtio_queue_notify_vq(VirtQueue *vq)
|
||||
{
|
||||
if (vq->vring.desc && vq->handle_output) {
|
||||
|
@ -2962,6 +2984,20 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void virtio_device_check_notification_compatibility(VirtIODevice *vdev,
|
||||
Error **errp)
|
||||
{
|
||||
VirtioBusState *bus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
|
||||
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
|
||||
DeviceState *proxy = DEVICE(BUS(bus)->parent);
|
||||
|
||||
if (virtio_host_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA) &&
|
||||
k->ioeventfd_enabled(proxy)) {
|
||||
error_setg(errp,
|
||||
"notification_data=on without ioeventfd=off is not supported");
|
||||
}
|
||||
}
|
||||
|
||||
size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
|
||||
uint64_t host_features)
|
||||
{
|
||||
|
@ -3722,6 +3758,14 @@ static void virtio_device_realize(DeviceState *dev, Error **errp)
|
|||
}
|
||||
}
|
||||
|
||||
/* Devices should not use both ioeventfd and notification data feature */
|
||||
virtio_device_check_notification_compatibility(vdev, &err);
|
||||
if (err != NULL) {
|
||||
error_propagate(errp, err);
|
||||
vdc->unrealize(dev);
|
||||
return;
|
||||
}
|
||||
|
||||
virtio_bus_device_plugged(vdev, &err);
|
||||
if (err != NULL) {
|
||||
error_propagate(errp, err);
|
||||
|
|
|
@ -53,7 +53,7 @@ void *pci_assign_dev_load_option_rom(PCIDevice *dev,
|
|||
}
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
|
||||
if (dev->romsize != -1) {
|
||||
if (dev->romsize != UINT_MAX) {
|
||||
if (st.st_size > dev->romsize) {
|
||||
error_report("ROM BAR \"%s\" (%ld bytes) is too large for ROM size %u",
|
||||
rom_file, (long) st.st_size, dev->romsize);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue