mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-07 01:33:56 -06:00
nvme: introduce PMR support from NVMe 1.4 spec
This patch introduces support for PMR that has been defined as part of NVMe 1.4 spec. User can now specify a pmrdev option that should point to HostMemoryBackend. pmrdev memory region will subsequently be exposed as PCI BAR 2 in emulated NVMe device. Guest OS can perform mmio read and writes to the PMR region that will stay persistent across system reboot. Signed-off-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com> Reviewed-by: Klaus Jensen <k.jensen@samsung.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Message-Id: <20200330164656.9348-1-andrzej.jakowski@linux.intel.com> Reviewed-by: Keith Busch <kbusch@kernel.org> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
parent
eb8a0cf3ba
commit
6cf9413229
5 changed files with 288 additions and 1 deletions
|
@ -7,12 +7,12 @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
|
|||
common-obj-$(CONFIG_XEN) += xen-block.o
|
||||
common-obj-$(CONFIG_ECC) += ecc.o
|
||||
common-obj-$(CONFIG_ONENAND) += onenand.o
|
||||
common-obj-$(CONFIG_NVME_PCI) += nvme.o
|
||||
common-obj-$(CONFIG_SWIM) += swim.o
|
||||
|
||||
common-obj-$(CONFIG_SH4) += tc58128.o
|
||||
|
||||
obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o
|
||||
obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o
|
||||
obj-$(CONFIG_NVME_PCI) += nvme.o
|
||||
|
||||
obj-y += dataplane/
|
||||
|
|
109
hw/block/nvme.c
109
hw/block/nvme.c
|
@ -19,10 +19,19 @@
|
|||
* -drive file=<file>,if=none,id=<drive_id>
|
||||
* -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
|
||||
* cmb_size_mb=<cmb_size_mb[optional]>, \
|
||||
* [pmrdev=<mem_backend_file_id>,] \
|
||||
* num_queues=<N[optional]>
|
||||
*
|
||||
* Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
|
||||
* offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
|
||||
*
|
||||
* cmb_size_mb= and pmrdev= options are mutually exclusive due to limitation
|
||||
* in available BAR's. cmb_size_mb= will take precedence over pmrdev= when
|
||||
* both provided.
|
||||
* Enabling pmr emulation can be achieved by pointing to memory-backend-file.
|
||||
* For example:
|
||||
* -object memory-backend-file,id=<mem_id>,share=on,mem-path=<file_path>, \
|
||||
* size=<size> .... -device nvme,...,pmrdev=<mem_id>
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
|
@ -35,7 +44,9 @@
|
|||
#include "sysemu/sysemu.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qapi/visitor.h"
|
||||
#include "sysemu/hostmem.h"
|
||||
#include "sysemu/block-backend.h"
|
||||
#include "exec/ram_addr.h"
|
||||
|
||||
#include "qemu/log.h"
|
||||
#include "qemu/module.h"
|
||||
|
@ -1141,6 +1152,26 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
|
|||
NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
|
||||
"invalid write to read only CMBSZ, ignored");
|
||||
return;
|
||||
case 0xE00: /* PMRCAP */
|
||||
NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly,
|
||||
"invalid write to PMRCAP register, ignored");
|
||||
return;
|
||||
case 0xE04: /* TODO PMRCTL */
|
||||
break;
|
||||
case 0xE08: /* PMRSTS */
|
||||
NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly,
|
||||
"invalid write to PMRSTS register, ignored");
|
||||
return;
|
||||
case 0xE0C: /* PMREBS */
|
||||
NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly,
|
||||
"invalid write to PMREBS register, ignored");
|
||||
return;
|
||||
case 0xE10: /* PMRSWTP */
|
||||
NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly,
|
||||
"invalid write to PMRSWTP register, ignored");
|
||||
return;
|
||||
case 0xE14: /* TODO PMRMSC */
|
||||
break;
|
||||
default:
|
||||
NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
|
||||
"invalid MMIO write,"
|
||||
|
@ -1169,6 +1200,16 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
|
|||
}
|
||||
|
||||
if (addr < sizeof(n->bar)) {
|
||||
/*
|
||||
* When PMRWBM bit 1 is set then read from
|
||||
* from PMRSTS should ensure prior writes
|
||||
* made it to persistent media
|
||||
*/
|
||||
if (addr == 0xE08 &&
|
||||
(NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
|
||||
qemu_ram_writeback(n->pmrdev->mr.ram_block,
|
||||
0, n->pmrdev->size);
|
||||
}
|
||||
memcpy(&val, ptr + addr, size);
|
||||
} else {
|
||||
NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
|
||||
|
@ -1332,6 +1373,23 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
|
|||
error_setg(errp, "serial property not set");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!n->cmb_size_mb && n->pmrdev) {
|
||||
if (host_memory_backend_is_mapped(n->pmrdev)) {
|
||||
char *path = object_get_canonical_path_component(OBJECT(n->pmrdev));
|
||||
error_setg(errp, "can't use already busy memdev: %s", path);
|
||||
g_free(path);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!is_power_of_2(n->pmrdev->size)) {
|
||||
error_setg(errp, "pmr backend size needs to be power of 2 in size");
|
||||
return;
|
||||
}
|
||||
|
||||
host_memory_backend_set_mapped(n->pmrdev, true);
|
||||
}
|
||||
|
||||
blkconf_blocksizes(&n->conf);
|
||||
if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
|
||||
false, errp)) {
|
||||
|
@ -1415,6 +1473,51 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
|
|||
PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
|
||||
PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
|
||||
|
||||
} else if (n->pmrdev) {
|
||||
/* Controller Capabilities register */
|
||||
NVME_CAP_SET_PMRS(n->bar.cap, 1);
|
||||
|
||||
/* PMR Capabities register */
|
||||
n->bar.pmrcap = 0;
|
||||
NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0);
|
||||
NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0);
|
||||
NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2);
|
||||
NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0);
|
||||
/* Turn on bit 1 support */
|
||||
NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02);
|
||||
NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0);
|
||||
NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0);
|
||||
|
||||
/* PMR Control register */
|
||||
n->bar.pmrctl = 0;
|
||||
NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0);
|
||||
|
||||
/* PMR Status register */
|
||||
n->bar.pmrsts = 0;
|
||||
NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0);
|
||||
NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0);
|
||||
NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0);
|
||||
NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0);
|
||||
|
||||
/* PMR Elasticity Buffer Size register */
|
||||
n->bar.pmrebs = 0;
|
||||
NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0);
|
||||
NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0);
|
||||
NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0);
|
||||
|
||||
/* PMR Sustained Write Throughput register */
|
||||
n->bar.pmrswtp = 0;
|
||||
NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0);
|
||||
NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0);
|
||||
|
||||
/* PMR Memory Space Control register */
|
||||
n->bar.pmrmsc = 0;
|
||||
NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0);
|
||||
NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0);
|
||||
|
||||
pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap),
|
||||
PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
|
||||
PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr);
|
||||
}
|
||||
|
||||
for (i = 0; i < n->num_namespaces; i++) {
|
||||
|
@ -1445,11 +1548,17 @@ static void nvme_exit(PCIDevice *pci_dev)
|
|||
if (n->cmb_size_mb) {
|
||||
g_free(n->cmbuf);
|
||||
}
|
||||
|
||||
if (n->pmrdev) {
|
||||
host_memory_backend_set_mapped(n->pmrdev, false);
|
||||
}
|
||||
msix_uninit_exclusive_bar(pci_dev);
|
||||
}
|
||||
|
||||
static Property nvme_props[] = {
|
||||
DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
|
||||
DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmrdev, TYPE_MEMORY_BACKEND,
|
||||
HostMemoryBackend *),
|
||||
DEFINE_PROP_STRING("serial", NvmeCtrl, serial),
|
||||
DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0),
|
||||
DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64),
|
||||
|
|
|
@ -83,6 +83,8 @@ typedef struct NvmeCtrl {
|
|||
uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */
|
||||
|
||||
char *serial;
|
||||
HostMemoryBackend *pmrdev;
|
||||
|
||||
NvmeNamespace *namespaces;
|
||||
NvmeSQueue **sq;
|
||||
NvmeCQueue **cq;
|
||||
|
|
|
@ -110,6 +110,10 @@ nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CA
|
|||
nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
|
||||
nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
|
||||
nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
|
||||
nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored"
|
||||
nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored"
|
||||
nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored"
|
||||
nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored"
|
||||
nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
|
||||
nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
|
||||
nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue