virtio,pc,pci: fixes, features, cleanups

asymmetric crypto support for cryptodev-vhost-user
 rom migration when rom size changes
 poison get, inject, clear; mock cxl events and irq support for cxl
 shadow virtqueue offload support for vhost-vdpa
 vdpa now maps shadow vrings with MAP_SHARED
 max_cpus went up to 1024 and we default to smbios 3.0 for pc
 
 Fixes, cleanups all over the place. In particular
     hw/acpi: Fix PM control register access
 works around a very long standing bug in memory core.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmSZl5EPHG1zdEByZWRo
 YXQuY29tAAoJECgfDbjSjVRph+8H/RZodqCadmQ1evpeWs7RBSvJeZgbJTVl/9/h
 +ObvEmVz2+X4D+O1Kxh54vDV0SNVq3XjyrFy3Ur57MAR6r2ZWwB6HySaeFdi4zIm
 N0SMkfUylDnf7ulyjzJoXDzHOoFnqAM6fU/jcoQXBIdUeeqwPrzLOZHrGrwevPWK
 iH5JP66suOVlBuKLJjlUKI3/4vK3oTod5Xa3Oz2Cw1oODtbIa97N8ZAdBgZd3ah9
 7mjZjcH54kFRwfidz/rkpY5NMru8BlD54MyEOWofvTL2w7aoWmVO99qHEK+SjLkG
 x4Mx3aYlnOEvkJ+5yBHvtXS4Gc5T9ltY84AvcwPNuz4RKCORi1s=
 =Do8p
 -----END PGP SIGNATURE-----

Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging

virtio,pc,pci: fixes, features, cleanups

asymmetric crypto support for cryptodev-vhost-user
rom migration when rom size changes
poison get, inject, clear; mock cxl events and irq support for cxl
shadow virtqueue offload support for vhost-vdpa
vdpa now maps shadow vrings with MAP_SHARED
max_cpus went up to 1024 and we default to smbios 3.0 for pc

Fixes, cleanups all over the place. In particular
    hw/acpi: Fix PM control register access
works around a very long standing bug in memory core.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# -----BEGIN PGP SIGNATURE-----
#
# iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmSZl5EPHG1zdEByZWRo
# YXQuY29tAAoJECgfDbjSjVRph+8H/RZodqCadmQ1evpeWs7RBSvJeZgbJTVl/9/h
# +ObvEmVz2+X4D+O1Kxh54vDV0SNVq3XjyrFy3Ur57MAR6r2ZWwB6HySaeFdi4zIm
# N0SMkfUylDnf7ulyjzJoXDzHOoFnqAM6fU/jcoQXBIdUeeqwPrzLOZHrGrwevPWK
# iH5JP66suOVlBuKLJjlUKI3/4vK3oTod5Xa3Oz2Cw1oODtbIa97N8ZAdBgZd3ah9
# 7mjZjcH54kFRwfidz/rkpY5NMru8BlD54MyEOWofvTL2w7aoWmVO99qHEK+SjLkG
# x4Mx3aYlnOEvkJ+5yBHvtXS4Gc5T9ltY84AvcwPNuz4RKCORi1s=
# =Do8p
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 26 Jun 2023 03:50:09 PM CEST
# gpg:                using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469
# gpg:                issuer "mst@redhat.com"
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [undefined]
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (53 commits)
  vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present
  vhost_net: add an assertion for TAP client backends
  intel_iommu: Fix address space unmap
  intel_iommu: Fix flag check in replay
  intel_iommu: Fix a potential issue in VFIO dirty page sync
  vhost-user: fully use new backend/frontend naming
  virtio-scsi: avoid dangling host notifier in ->ioeventfd_stop()
  hw/i386/pc: Clean up pc_machine_initfn
  vdpa: fix not using CVQ buffer in case of error
  vdpa: mask _F_CTRL_GUEST_OFFLOADS for vhost vdpa devices
  vhost: fix vhost_dev_enable_notifiers() error case
  vdpa: Allow VIRTIO_NET_F_CTRL_GUEST_OFFLOADS in SVQ
  vdpa: Add vhost_vdpa_net_load_offloads()
  virtio-net: expose virtio_net_supported_guest_offloads()
  hw/net/virtio-net: make some VirtIONet const
  vdpa: reuse virtio_vdev_has_feature()
  include/hw/virtio: make some VirtIODevice const
  vdpa: map shadow vrings with MAP_SHARED
  vdpa: reorder vhost_vdpa_net_cvq_cmd_page_len function
  vdpa: do not block migration if device has cvq and x-svq=on
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-06-26 16:05:45 +02:00
commit 390e8fc6b0
58 changed files with 2166 additions and 408 deletions

View file

@ -15,6 +15,7 @@
#define EXEC_TARGET_PAGE_H
size_t qemu_target_page_size(void);
int qemu_target_page_mask(void);
int qemu_target_page_bits(void);
int qemu_target_page_bits_min(void);

View file

@ -18,6 +18,7 @@
#include "cxl_component.h"
#include "cxl_device.h"
#define CXL_CACHE_LINE_SIZE 64
#define CXL_COMPONENT_REG_BAR_IDX 0
#define CXL_DEVICE_REG_BAR_IDX 2

View file

@ -13,6 +13,7 @@
#include "hw/cxl/cxl_component.h"
#include "hw/pci/pci_device.h"
#include "hw/register.h"
#include "hw/cxl/cxl_events.h"
/*
* The following is how a CXL device's Memory Device registers are laid out.
@ -82,11 +83,64 @@
(CXL_DEVICE_CAP_REG_SIZE + CXL_DEVICE_STATUS_REGISTERS_LENGTH + \
CXL_MAILBOX_REGISTERS_LENGTH + CXL_MEMORY_DEVICE_REGISTERS_LENGTH)
/* 8.2.8.4.5.1 Command Return Codes */
typedef enum {
CXL_MBOX_SUCCESS = 0x0,
CXL_MBOX_BG_STARTED = 0x1,
CXL_MBOX_INVALID_INPUT = 0x2,
CXL_MBOX_UNSUPPORTED = 0x3,
CXL_MBOX_INTERNAL_ERROR = 0x4,
CXL_MBOX_RETRY_REQUIRED = 0x5,
CXL_MBOX_BUSY = 0x6,
CXL_MBOX_MEDIA_DISABLED = 0x7,
CXL_MBOX_FW_XFER_IN_PROGRESS = 0x8,
CXL_MBOX_FW_XFER_OUT_OF_ORDER = 0x9,
CXL_MBOX_FW_AUTH_FAILED = 0xa,
CXL_MBOX_FW_INVALID_SLOT = 0xb,
CXL_MBOX_FW_ROLLEDBACK = 0xc,
CXL_MBOX_FW_REST_REQD = 0xd,
CXL_MBOX_INVALID_HANDLE = 0xe,
CXL_MBOX_INVALID_PA = 0xf,
CXL_MBOX_INJECT_POISON_LIMIT = 0x10,
CXL_MBOX_PERMANENT_MEDIA_FAILURE = 0x11,
CXL_MBOX_ABORTED = 0x12,
CXL_MBOX_INVALID_SECURITY_STATE = 0x13,
CXL_MBOX_INCORRECT_PASSPHRASE = 0x14,
CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15,
CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16,
CXL_MBOX_MAX = 0x17
} CXLRetCode;
typedef struct CXLEvent {
CXLEventRecordRaw data;
QSIMPLEQ_ENTRY(CXLEvent) node;
} CXLEvent;
typedef struct CXLEventLog {
uint16_t next_handle;
uint16_t overflow_err_count;
uint64_t first_overflow_timestamp;
uint64_t last_overflow_timestamp;
bool irq_enabled;
int irq_vec;
QemuMutex lock;
QSIMPLEQ_HEAD(, CXLEvent) events;
} CXLEventLog;
typedef struct cxl_device_state {
MemoryRegion device_registers;
/* mmio for device capabilities array - 8.2.8.2 */
MemoryRegion device;
struct {
MemoryRegion device;
union {
uint8_t dev_reg_state[CXL_DEVICE_STATUS_REGISTERS_LENGTH];
uint16_t dev_reg_state16[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 2];
uint32_t dev_reg_state32[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 4];
uint64_t dev_reg_state64[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 8];
};
uint64_t event_status;
};
MemoryRegion memory_device;
struct {
MemoryRegion caps;
@ -123,6 +177,8 @@ typedef struct cxl_device_state {
uint64_t mem_size;
uint64_t pmem_size;
uint64_t vmem_size;
CXLEventLog event_logs[CXL_EVENT_TYPE_MAX];
} CXLDeviceState;
/* Initialize the register block for a device */
@ -141,6 +197,9 @@ REG64(CXL_DEV_CAP_ARRAY, 0) /* Documented as 128 bit register but 64 byte access
FIELD(CXL_DEV_CAP_ARRAY, CAP_VERSION, 16, 8)
FIELD(CXL_DEV_CAP_ARRAY, CAP_COUNT, 32, 16)
void cxl_event_set_status(CXLDeviceState *cxl_dstate, CXLEventLogType log_type,
bool available);
/*
* Helper macro to initialize capability headers for CXL devices.
*
@ -175,7 +234,7 @@ CXL_DEVICE_CAPABILITY_HEADER_REGISTER(MEMORY_DEVICE,
void cxl_initialize_mailbox(CXLDeviceState *cxl_dstate);
void cxl_process_mailbox(CXLDeviceState *cxl_dstate);
#define cxl_device_cap_init(dstate, reg, cap_id) \
#define cxl_device_cap_init(dstate, reg, cap_id, ver) \
do { \
uint32_t *cap_hdrs = dstate->caps_reg_state32; \
int which = R_CXL_DEV_##reg##_CAP_HDR0; \
@ -183,7 +242,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate);
FIELD_DP32(cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, \
CAP_ID, cap_id); \
cap_hdrs[which] = FIELD_DP32( \
cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, CAP_VERSION, 1); \
cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, CAP_VERSION, ver); \
cap_hdrs[which + 1] = \
FIELD_DP32(cap_hdrs[which + 1], CXL_DEV_##reg##_CAP_HDR1, \
CAP_OFFSET, CXL_##reg##_REGISTERS_OFFSET); \
@ -192,6 +251,10 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate);
CAP_LENGTH, CXL_##reg##_REGISTERS_LENGTH); \
} while (0)
/* CXL 3.0 8.2.8.3.1 Event Status Register */
REG64(CXL_DEV_EVENT_STATUS, 0)
FIELD(CXL_DEV_EVENT_STATUS, EVENT_STATUS, 0, 32)
/* CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register */
REG32(CXL_DEV_MAILBOX_CAP, 0)
FIELD(CXL_DEV_MAILBOX_CAP, PAYLOAD_SIZE, 0, 5)
@ -242,6 +305,18 @@ typedef struct CXLError {
typedef QTAILQ_HEAD(, CXLError) CXLErrorList;
typedef struct CXLPoison {
uint64_t start, length;
uint8_t type;
#define CXL_POISON_TYPE_EXTERNAL 0x1
#define CXL_POISON_TYPE_INTERNAL 0x2
#define CXL_POISON_TYPE_INJECTED 0x3
QLIST_ENTRY(CXLPoison) node;
} CXLPoison;
typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
#define CXL_POISON_LIST_LIMIT 256
struct CXLType3Dev {
/* Private */
PCIDevice parent_obj;
@ -264,6 +339,12 @@ struct CXLType3Dev {
/* Error injection */
CXLErrorList error_list;
/* Poison Injection - cache */
CXLPoisonList poison_list;
unsigned int poison_list_cnt;
bool poison_list_overflowed;
uint64_t poison_list_overflow_ts;
};
#define TYPE_CXL_TYPE3 "cxl-type3"
@ -280,6 +361,7 @@ struct CXLType3Class {
uint64_t offset);
void (*set_lsa)(CXLType3Dev *ct3d, const void *buf, uint64_t size,
uint64_t offset);
bool (*set_cacheline)(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data);
};
MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
@ -289,4 +371,17 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds);
void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num);
bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type,
CXLEventRecordRaw *event);
CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl,
uint8_t log_type, int max_recs,
uint16_t *len);
CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds,
CXLClearEventPayload *pl);
void cxl_event_irq_assert(CXLType3Dev *ct3d);
void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d);
#endif

168
include/hw/cxl/cxl_events.h Normal file
View file

@ -0,0 +1,168 @@
/*
* QEMU CXL Events
*
* Copyright (c) 2022 Intel
*
* This work is licensed under the terms of the GNU GPL, version 2. See the
* COPYING file in the top-level directory.
*/
#ifndef CXL_EVENTS_H
#define CXL_EVENTS_H
#include "qemu/uuid.h"
/*
* CXL rev 3.0 section 8.2.9.2.2; Table 8-49
*
* Define these as the bit position for the event status register for ease of
* setting the status.
*/
typedef enum CXLEventLogType {
CXL_EVENT_TYPE_INFO = 0,
CXL_EVENT_TYPE_WARN = 1,
CXL_EVENT_TYPE_FAIL = 2,
CXL_EVENT_TYPE_FATAL = 3,
CXL_EVENT_TYPE_DYNAMIC_CAP = 4,
CXL_EVENT_TYPE_MAX
} CXLEventLogType;
/*
* Common Event Record Format
* CXL rev 3.0 section 8.2.9.2.1; Table 8-42
*/
#define CXL_EVENT_REC_HDR_RES_LEN 0xf
typedef struct CXLEventRecordHdr {
QemuUUID id;
uint8_t length;
uint8_t flags[3];
uint16_t handle;
uint16_t related_handle;
uint64_t timestamp;
uint8_t maint_op_class;
uint8_t reserved[CXL_EVENT_REC_HDR_RES_LEN];
} QEMU_PACKED CXLEventRecordHdr;
#define CXL_EVENT_RECORD_DATA_LENGTH 0x50
typedef struct CXLEventRecordRaw {
CXLEventRecordHdr hdr;
uint8_t data[CXL_EVENT_RECORD_DATA_LENGTH];
} QEMU_PACKED CXLEventRecordRaw;
#define CXL_EVENT_RECORD_SIZE (sizeof(CXLEventRecordRaw))
/*
* Get Event Records output payload
* CXL rev 3.0 section 8.2.9.2.2; Table 8-50
*/
#define CXL_GET_EVENT_FLAG_OVERFLOW BIT(0)
#define CXL_GET_EVENT_FLAG_MORE_RECORDS BIT(1)
typedef struct CXLGetEventPayload {
uint8_t flags;
uint8_t reserved1;
uint16_t overflow_err_count;
uint64_t first_overflow_timestamp;
uint64_t last_overflow_timestamp;
uint16_t record_count;
uint8_t reserved2[0xa];
CXLEventRecordRaw records[];
} QEMU_PACKED CXLGetEventPayload;
#define CXL_EVENT_PAYLOAD_HDR_SIZE (sizeof(CXLGetEventPayload))
/*
* Clear Event Records input payload
* CXL rev 3.0 section 8.2.9.2.3; Table 8-51
*/
typedef struct CXLClearEventPayload {
uint8_t event_log; /* CXLEventLogType */
uint8_t clear_flags;
uint8_t nr_recs;
uint8_t reserved[3];
uint16_t handle[];
} CXLClearEventPayload;
/**
* Event Interrupt Policy
*
* CXL rev 3.0 section 8.2.9.2.4; Table 8-52
*/
typedef enum CXLEventIntMode {
CXL_INT_NONE = 0x00,
CXL_INT_MSI_MSIX = 0x01,
CXL_INT_FW = 0x02,
CXL_INT_RES = 0x03,
} CXLEventIntMode;
#define CXL_EVENT_INT_MODE_MASK 0x3
#define CXL_EVENT_INT_SETTING(vector) ((((uint8_t)vector & 0xf) << 4) | CXL_INT_MSI_MSIX)
typedef struct CXLEventInterruptPolicy {
uint8_t info_settings;
uint8_t warn_settings;
uint8_t failure_settings;
uint8_t fatal_settings;
uint8_t dyn_cap_settings;
} QEMU_PACKED CXLEventInterruptPolicy;
/* DCD is optional but other fields are not */
#define CXL_EVENT_INT_SETTING_MIN_LEN 4
/*
* General Media Event Record
* CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
*/
#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10
#define CXL_EVENT_GEN_MED_RES_SIZE 0x2e
typedef struct CXLEventGenMedia {
CXLEventRecordHdr hdr;
uint64_t phys_addr;
uint8_t descriptor;
uint8_t type;
uint8_t transaction_type;
uint16_t validity_flags;
uint8_t channel;
uint8_t rank;
uint8_t device[3];
uint8_t component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
uint8_t reserved[CXL_EVENT_GEN_MED_RES_SIZE];
} QEMU_PACKED CXLEventGenMedia;
/*
* DRAM Event Record
* CXL Rev 3.0 Section 8.2.9.2.1.2: Table 8-44
* All fields little endian.
*/
typedef struct CXLEventDram {
CXLEventRecordHdr hdr;
uint64_t phys_addr;
uint8_t descriptor;
uint8_t type;
uint8_t transaction_type;
uint16_t validity_flags;
uint8_t channel;
uint8_t rank;
uint8_t nibble_mask[3];
uint8_t bank_group;
uint8_t bank;
uint8_t row[3];
uint16_t column;
uint64_t correction_mask[4];
uint8_t reserved[0x17];
} QEMU_PACKED CXLEventDram;
/*
* Memory Module Event Record
* CXL Rev 3.0 Section 8.2.9.2.1.3: Table 8-45
* All fields little endian.
*/
typedef struct CXLEventMemoryModule {
CXLEventRecordHdr hdr;
uint8_t type;
uint8_t health_status;
uint8_t media_status;
uint8_t additional_status;
uint8_t life_used;
int16_t temperature;
uint32_t dirty_shutdown_count;
uint32_t corrected_volatile_error_count;
uint32_t corrected_persistent_error_count;
uint8_t reserved[0x3d];
} QEMU_PACKED CXLEventMemoryModule;
#endif /* CXL_EVENTS_H */

View file

@ -110,6 +110,7 @@ struct PCMachineClass {
bool smbios_defaults;
bool smbios_legacy_mode;
bool smbios_uuid_encoded;
SmbiosEntryPointType default_smbios_ep_type;
/* RAM / address space compat: */
bool gigabyte_align;

View file

@ -22,7 +22,7 @@ typedef enum VhostBackendType {
} VhostBackendType;
typedef enum VhostSetConfigType {
VHOST_SET_CONFIG_TYPE_MASTER = 0,
VHOST_SET_CONFIG_TYPE_FRONTEND = 0,
VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
} VhostSetConfigType;

View file

@ -227,5 +227,6 @@ size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
unsigned out_num);
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
const char *type);
uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n);
#endif

View file

@ -402,7 +402,7 @@ static inline bool virtio_has_feature(uint64_t features, unsigned int fbit)
return !!(features & (1ULL << fbit));
}
static inline bool virtio_vdev_has_feature(VirtIODevice *vdev,
static inline bool virtio_vdev_has_feature(const VirtIODevice *vdev,
unsigned int fbit)
{
return virtio_has_feature(vdev->guest_features, fbit);

View file

@ -8,11 +8,23 @@
#undef bswap64
#define bswap64(_x) __builtin_bswap64(_x)
static inline uint32_t bswap24(uint32_t x)
{
return (((x & 0x000000ffU) << 16) |
((x & 0x0000ff00U) << 0) |
((x & 0x00ff0000U) >> 16));
}
static inline void bswap16s(uint16_t *s)
{
*s = __builtin_bswap16(*s);
}
static inline void bswap24s(uint32_t *s)
{
*s = bswap24(*s & 0x00ffffffU);
}
static inline void bswap32s(uint32_t *s)
{
*s = __builtin_bswap32(*s);
@ -26,11 +38,13 @@ static inline void bswap64s(uint64_t *s)
#if HOST_BIG_ENDIAN
#define be_bswap(v, size) (v)
#define le_bswap(v, size) glue(__builtin_bswap, size)(v)
#define le_bswap24(v) bswap24(v)
#define be_bswaps(v, size)
#define le_bswaps(p, size) \
do { *p = glue(__builtin_bswap, size)(*p); } while (0)
#else
#define le_bswap(v, size) (v)
#define le_bswap24(v) (v)
#define be_bswap(v, size) glue(__builtin_bswap, size)(v)
#define le_bswaps(v, size)
#define be_bswaps(p, size) \
@ -176,6 +190,7 @@ CPU_CONVERT(le, 64, uint64_t)
* size is:
* b: 8 bits
* w: 16 bits
* 24: 24 bits
* l: 32 bits
* q: 64 bits
*
@ -248,6 +263,11 @@ static inline void stw_he_p(void *ptr, uint16_t v)
__builtin_memcpy(ptr, &v, sizeof(v));
}
static inline void st24_he_p(void *ptr, uint32_t v)
{
__builtin_memcpy(ptr, &v, 3);
}
static inline int ldl_he_p(const void *ptr)
{
int32_t r;
@ -297,6 +317,11 @@ static inline void stw_le_p(void *ptr, uint16_t v)
stw_he_p(ptr, le_bswap(v, 16));
}
static inline void st24_le_p(void *ptr, uint32_t v)
{
st24_he_p(ptr, le_bswap24(v));
}
static inline void stl_le_p(void *ptr, uint32_t v)
{
stl_he_p(ptr, le_bswap(v, 32));