From b17d69a1dab6dd5acf2646f24866d1ee66983ae0 Mon Sep 17 00:00:00 2001 From: Farhan Ali Date: Wed, 30 Apr 2025 11:50:10 -0700 Subject: [PATCH 1/3] util: Add functions for s390x mmio read/write Starting with z15 (or newer) we can execute mmio instructions from userspace. On older platforms where we don't have these instructions available we can fallback to using system calls to access the PCI mapped resources. This patch adds helper functions for mmio reads and writes for s390x. Reviewed-by: Stefan Hajnoczi Reviewed-by: Niklas Schnelle Signed-off-by: Farhan Ali Acked-by: Thomas Huth Message-id: 20250430185012.2303-2-alifm@linux.ibm.com Signed-off-by: Stefan Hajnoczi --- include/qemu/s390x_pci_mmio.h | 24 ++++++ util/meson.build | 2 + util/s390x_pci_mmio.c | 146 ++++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+) create mode 100644 include/qemu/s390x_pci_mmio.h create mode 100644 util/s390x_pci_mmio.c diff --git a/include/qemu/s390x_pci_mmio.h b/include/qemu/s390x_pci_mmio.h new file mode 100644 index 0000000000..c5f63ecefa --- /dev/null +++ b/include/qemu/s390x_pci_mmio.h @@ -0,0 +1,24 @@ +/* + * s390x PCI MMIO definitions + * + * Copyright 2025 IBM Corp. + * Author(s): Farhan Ali + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef S390X_PCI_MMIO_H +#define S390X_PCI_MMIO_H + +#ifdef __s390x__ +uint8_t s390x_pci_mmio_read_8(const void *ioaddr); +uint16_t s390x_pci_mmio_read_16(const void *ioaddr); +uint32_t s390x_pci_mmio_read_32(const void *ioaddr); +uint64_t s390x_pci_mmio_read_64(const void *ioaddr); + +void s390x_pci_mmio_write_8(void *ioaddr, uint8_t val); +void s390x_pci_mmio_write_16(void *ioaddr, uint16_t val); +void s390x_pci_mmio_write_32(void *ioaddr, uint32_t val); +void s390x_pci_mmio_write_64(void *ioaddr, uint64_t val); +#endif /* __s390x__ */ + +#endif /* S390X_PCI_MMIO_H */ diff --git a/util/meson.build b/util/meson.build index 780b5977a8..acb21592f9 100644 --- a/util/meson.build +++ b/util/meson.build @@ -131,4 +131,6 @@ elif cpu in ['ppc', 'ppc64'] util_ss.add(files('cpuinfo-ppc.c')) elif cpu in ['riscv32', 'riscv64'] util_ss.add(files('cpuinfo-riscv.c')) +elif cpu == 's390x' + util_ss.add(files('s390x_pci_mmio.c')) endif diff --git a/util/s390x_pci_mmio.c b/util/s390x_pci_mmio.c new file mode 100644 index 0000000000..5ab24fa474 --- /dev/null +++ b/util/s390x_pci_mmio.c @@ -0,0 +1,146 @@ +/* + * s390x PCI MMIO definitions + * + * Copyright 2025 IBM Corp. + * Author(s): Farhan Ali + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include +#include "qemu/s390x_pci_mmio.h" +#include "elf.h" + +union register_pair { + unsigned __int128 pair; + struct { + uint64_t even; + uint64_t odd; + }; +}; + +static bool is_mio_supported; + +static __attribute__((constructor)) void check_is_mio_supported(void) +{ + is_mio_supported = !!(qemu_getauxval(AT_HWCAP) & HWCAP_S390_PCI_MIO); +} + +static uint64_t s390x_pcilgi(const void *ioaddr, size_t len) +{ + union register_pair ioaddr_len = { .even = (uint64_t)ioaddr, + .odd = len }; + uint64_t val; + int cc; + + asm volatile( + /* pcilgi */ + ".insn rre,0xb9d60000,%[val],%[ioaddr_len]\n" + "ipm %[cc]\n" + "srl %[cc],28\n" + : [cc] "=d"(cc), [val] "=d"(val), + [ioaddr_len] "+d"(ioaddr_len.pair) :: "cc"); + + if (cc) { + val = -1ULL; + } + + return val; +} + +static void s390x_pcistgi(void *ioaddr, uint64_t val, size_t len) +{ + union register_pair ioaddr_len = {.even = (uint64_t)ioaddr, .odd = len}; + + asm volatile ( + /* pcistgi */ + ".insn rre,0xb9d40000,%[val],%[ioaddr_len]\n" + : [ioaddr_len] "+d" (ioaddr_len.pair) + : [val] "d" (val) + : "cc", "memory"); +} + +uint8_t s390x_pci_mmio_read_8(const void *ioaddr) +{ + uint8_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +uint16_t s390x_pci_mmio_read_16(const void *ioaddr) +{ + uint16_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +uint32_t s390x_pci_mmio_read_32(const void *ioaddr) +{ + uint32_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +uint64_t s390x_pci_mmio_read_64(const void *ioaddr) +{ + uint64_t val = 0; + + if (is_mio_supported) { + val = s390x_pcilgi(ioaddr, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_read, ioaddr, &val, sizeof(val)); + } + return val; +} + +void s390x_pci_mmio_write_8(void *ioaddr, uint8_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} + +void s390x_pci_mmio_write_16(void *ioaddr, uint16_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} + +void s390x_pci_mmio_write_32(void *ioaddr, uint32_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} + +void s390x_pci_mmio_write_64(void *ioaddr, uint64_t val) +{ + if (is_mio_supported) { + s390x_pcistgi(ioaddr, val, sizeof(val)); + } else { + syscall(__NR_s390_pci_mmio_write, ioaddr, &val, sizeof(val)); + } +} From 40f940923f8978ce1ffe8e735cc42ade2e519be3 Mon Sep 17 00:00:00 2001 From: Farhan Ali Date: Wed, 30 Apr 2025 11:50:11 -0700 Subject: [PATCH 2/3] include: Add a header to define host PCI MMIO functions Add a generic API for host PCI MMIO reads/writes (e.g. Linux VFIO BAR accesses). The functions access little endian memory and returns the result in host cpu endianness. Reviewed-by: Stefan Hajnoczi Signed-off-by: Farhan Ali Reviewed-by: Thomas Huth Message-id: 20250430185012.2303-3-alifm@linux.ibm.com Signed-off-by: Stefan Hajnoczi --- include/qemu/host-pci-mmio.h | 136 +++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 include/qemu/host-pci-mmio.h diff --git a/include/qemu/host-pci-mmio.h b/include/qemu/host-pci-mmio.h new file mode 100644 index 0000000000..a8ed9938ac --- /dev/null +++ b/include/qemu/host-pci-mmio.h @@ -0,0 +1,136 @@ +/* + * API for host PCI MMIO accesses (e.g. Linux VFIO BARs) + * + * Copyright 2025 IBM Corp. + * Author(s): Farhan Ali + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HOST_PCI_MMIO_H +#define HOST_PCI_MMIO_H + +#include "qemu/bswap.h" +#include "qemu/s390x_pci_mmio.h" + +static inline uint8_t host_pci_ldub_p(const void *ioaddr) +{ + uint8_t ret = 0; +#ifdef __s390x__ + ret = s390x_pci_mmio_read_8(ioaddr); +#else + ret = ldub_p(ioaddr); +#endif + + return ret; +} + +static inline uint16_t host_pci_lduw_le_p(const void *ioaddr) +{ + uint16_t ret = 0; +#ifdef __s390x__ + ret = le16_to_cpu(s390x_pci_mmio_read_16(ioaddr)); +#else + ret = lduw_le_p(ioaddr); +#endif + + return ret; +} + +static inline uint32_t host_pci_ldl_le_p(const void *ioaddr) +{ + uint32_t ret = 0; +#ifdef __s390x__ + ret = le32_to_cpu(s390x_pci_mmio_read_32(ioaddr)); +#else + ret = ldl_le_p(ioaddr); +#endif + + return ret; +} + +static inline uint64_t host_pci_ldq_le_p(const void *ioaddr) +{ + uint64_t ret = 0; +#ifdef __s390x__ + ret = le64_to_cpu(s390x_pci_mmio_read_64(ioaddr)); +#else + ret = ldq_le_p(ioaddr); +#endif + + return ret; +} + +static inline void host_pci_stb_p(void *ioaddr, uint8_t val) +{ +#ifdef __s390x__ + s390x_pci_mmio_write_8(ioaddr, val); +#else + stb_p(ioaddr, val); +#endif +} + +static inline void host_pci_stw_le_p(void *ioaddr, uint16_t val) +{ +#ifdef __s390x__ + s390x_pci_mmio_write_16(ioaddr, cpu_to_le16(val)); +#else + stw_le_p(ioaddr, val); +#endif +} + +static inline void host_pci_stl_le_p(void *ioaddr, uint32_t val) +{ +#ifdef __s390x__ + s390x_pci_mmio_write_32(ioaddr, cpu_to_le32(val)); +#else + stl_le_p(ioaddr, val); +#endif +} + +static inline void host_pci_stq_le_p(void *ioaddr, uint64_t val) +{ +#ifdef __s390x__ + s390x_pci_mmio_write_64(ioaddr, cpu_to_le64(val)); +#else + stq_le_p(ioaddr, val); +#endif +} + +static inline uint64_t host_pci_ldn_le_p(const void *ioaddr, int sz) +{ + switch (sz) { + case 1: + return host_pci_ldub_p(ioaddr); + case 2: + return host_pci_lduw_le_p(ioaddr); + case 4: + return host_pci_ldl_le_p(ioaddr); + case 8: + return host_pci_ldq_le_p(ioaddr); + default: + g_assert_not_reached(); + } +} + +static inline void host_pci_stn_le_p(void *ioaddr, int sz, uint64_t v) +{ + switch (sz) { + case 1: + host_pci_stb_p(ioaddr, v); + break; + case 2: + host_pci_stw_le_p(ioaddr, v); + break; + case 4: + host_pci_stl_le_p(ioaddr, v); + break; + case 8: + host_pci_stq_le_p(ioaddr, v); + break; + default: + g_assert_not_reached(); + } +} + +#endif From 624379be3a8136db420ec3a3fee36fe91e9f789e Mon Sep 17 00:00:00 2001 From: Farhan Ali Date: Wed, 30 Apr 2025 11:50:12 -0700 Subject: [PATCH 3/3] block/nvme: Use host PCI MMIO API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the host PCI MMIO functions to read/write to NVMe registers, rather than directly accessing them. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Reviewed-by: Thomas Huth Signed-off-by: Farhan Ali Message-id: 20250430185012.2303-4-alifm@linux.ibm.com Signed-off-by: Stefan Hajnoczi --- block/nvme.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/block/nvme.c b/block/nvme.c index bbf7c23dcd..8df53ee4ca 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -18,6 +18,7 @@ #include "qobject/qstring.h" #include "qemu/defer-call.h" #include "qemu/error-report.h" +#include "qemu/host-pci-mmio.h" #include "qemu/main-loop.h" #include "qemu/module.h" #include "qemu/cutils.h" @@ -60,7 +61,7 @@ typedef struct { uint8_t *queue; uint64_t iova; /* Hardware MMIO register */ - volatile uint32_t *doorbell; + uint32_t *doorbell; } NVMeQueue; typedef struct { @@ -100,7 +101,7 @@ struct BDRVNVMeState { QEMUVFIOState *vfio; void *bar0_wo_map; /* Memory mapped registers */ - volatile struct { + struct { uint32_t sq_tail; uint32_t cq_head; } *doorbells; @@ -292,7 +293,7 @@ static void nvme_kick(NVMeQueuePair *q) assert(!(q->sq.tail & 0xFF00)); /* Fence the write to submission queue entry before notifying the device. */ smp_wmb(); - *q->sq.doorbell = cpu_to_le32(q->sq.tail); + host_pci_stl_le_p(q->sq.doorbell, q->sq.tail); q->inflight += q->need_kick; q->need_kick = 0; } @@ -441,7 +442,7 @@ static bool nvme_process_completion(NVMeQueuePair *q) if (progress) { /* Notify the device so it can post more completions. */ smp_mb_release(); - *q->cq.doorbell = cpu_to_le32(q->cq.head); + host_pci_stl_le_p(q->cq.doorbell, q->cq.head); nvme_wake_free_req_locked(q); } @@ -460,7 +461,7 @@ static void nvme_process_completion_bh(void *opaque) * so notify the device that it has space to fill in more completions now. */ smp_mb_release(); - *q->cq.doorbell = cpu_to_le32(q->cq.head); + host_pci_stl_le_p(q->cq.doorbell, q->cq.head); nvme_wake_free_req_locked(q); nvme_process_completion(q); @@ -749,9 +750,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, int ret; uint64_t cap; uint32_t ver; + uint32_t cc; uint64_t timeout_ms; uint64_t deadline, now; - volatile NvmeBar *regs = NULL; + NvmeBar *regs = NULL; qemu_co_mutex_init(&s->dma_map_lock); qemu_co_queue_init(&s->dma_flush_queue); @@ -779,7 +781,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, /* Perform initialize sequence as described in NVMe spec "7.6.1 * Initialization". */ - cap = le64_to_cpu(regs->cap); + cap = host_pci_ldq_le_p(®s->cap); trace_nvme_controller_capability_raw(cap); trace_nvme_controller_capability("Maximum Queue Entries Supported", 1 + NVME_CAP_MQES(cap)); @@ -805,16 +807,17 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, bs->bl.request_alignment = s->page_size; timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000); - ver = le32_to_cpu(regs->vs); + ver = host_pci_ldl_le_p(®s->vs); trace_nvme_controller_spec_version(extract32(ver, 16, 16), extract32(ver, 8, 8), extract32(ver, 0, 8)); /* Reset device to get a clean state. */ - regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE); + cc = host_pci_ldl_le_p(®s->cc); + host_pci_stl_le_p(®s->cc, cc & 0xFE); /* Wait for CSTS.RDY = 0. */ deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS; - while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { + while (NVME_CSTS_RDY(host_pci_ldl_le_p(®s->csts))) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { error_setg(errp, "Timeout while waiting for device to reset (%" PRId64 " ms)", @@ -843,19 +846,21 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, s->queues[INDEX_ADMIN] = q; s->queue_count = 1; QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000); - regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) | - ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT)); - regs->asq = cpu_to_le64(q->sq.iova); - regs->acq = cpu_to_le64(q->cq.iova); + host_pci_stl_le_p(®s->aqa, + ((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) | + ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT)); + host_pci_stq_le_p(®s->asq, q->sq.iova); + host_pci_stq_le_p(®s->acq, q->cq.iova); /* After setting up all control registers we can enable device now. */ - regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) | - (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) | - CC_EN_MASK); + host_pci_stl_le_p(®s->cc, + (ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) | + (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) | + CC_EN_MASK); /* Wait for CSTS.RDY = 1. */ now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); deadline = now + timeout_ms * SCALE_MS; - while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { + while (!NVME_CSTS_RDY(host_pci_ldl_le_p(®s->csts))) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { error_setg(errp, "Timeout while waiting for device to start (%" PRId64 " ms)",