pcie_sriov: Allow user to create SR-IOV device

A user can create a SR-IOV device by specifying the PF with the
sriov-pf property of the VFs. The VFs must be added before the PF.

A user-creatable VF must have PCIDeviceClass::sriov_vf_user_creatable
set. Such a VF cannot refer to the PF because it is created before the
PF.

A PF that user-creatable VFs can be attached calls
pcie_sriov_pf_init_from_user_created_vfs() during realization and
pcie_sriov_pf_exit() when exiting.

Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Message-Id: <20250314-sriov-v9-5-57dae8ae3ab5@daynix.com>
Tested-by: Yui Washizu <yui.washidu@gmail.com>
Tested-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
Akihiko Odaki 2025-03-14 15:14:54 +09:00 committed by Michael S. Tsirkin
parent d2f5bb7849
commit 19e55471d4
4 changed files with 291 additions and 83 deletions

View file

@ -101,6 +101,7 @@ static const Property pci_props[] = {
QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice,
max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE),
DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf),
DEFINE_PROP_BIT("x-pcie-ext-tag", PCIDevice, cap_present,
QEMU_PCIE_EXT_TAG_BITNR, true),
{ .name = "busnr", .info = &prop_pci_busnr },
@ -1112,13 +1113,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
}
/*
* With SR/IOV and ARI, a device at function 0 need not be a multifunction
* device, as it may just be a VF that ended up with function 0 in
* the legacy PCI interpretation. Avoid failing in such cases:
*/
if (pci_is_vf(dev) &&
dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
/* SR/IOV is not handled here. */
if (pci_is_vf(dev)) {
return;
}
@ -1151,7 +1147,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
}
/* function 0 indicates single function, so function > 0 must be NULL */
for (func = 1; func < PCI_FUNC_MAX; ++func) {
if (bus->devices[PCI_DEVFN(slot, func)]) {
PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)];
if (device && !pci_is_vf(device)) {
error_setg(errp, "PCI: %x.0 indicates single function, "
"but %x.%x is already populated.",
slot, slot, func);
@ -1439,6 +1436,7 @@ static void pci_qdev_unrealize(DeviceState *dev)
pci_unregister_io_regions(pci_dev);
pci_del_option_rom(pci_dev);
pcie_sriov_unregister_device(pci_dev);
if (pc->exit) {
pc->exit(pci_dev);
@ -1470,7 +1468,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
pcibus_t size = memory_region_size(memory);
uint8_t hdr_type;
assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
assert(region_num >= 0);
assert(region_num < PCI_NUM_REGIONS);
assert(is_power_of_2(size));
@ -1482,7 +1479,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
r = &pci_dev->io_regions[region_num];
assert(!r->size);
r->addr = PCI_BAR_UNMAPPED;
r->size = size;
r->type = type;
r->memory = memory;
@ -1490,22 +1486,35 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
? pci_get_bus(pci_dev)->address_space_io
: pci_get_bus(pci_dev)->address_space_mem;
wmask = ~(size - 1);
if (region_num == PCI_ROM_SLOT) {
/* ROM enable bit is writable */
wmask |= PCI_ROM_ADDRESS_ENABLE;
}
if (pci_is_vf(pci_dev)) {
PCIDevice *pf = pci_dev->exp.sriov_vf.pf;
assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]);
addr = pci_bar(pci_dev, region_num);
pci_set_long(pci_dev->config + addr, type);
if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
pci_set_quad(pci_dev->wmask + addr, wmask);
pci_set_quad(pci_dev->cmask + addr, ~0ULL);
r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size);
if (r->addr != PCI_BAR_UNMAPPED) {
memory_region_add_subregion_overlap(r->address_space,
r->addr, r->memory, 1);
}
} else {
pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
pci_set_long(pci_dev->cmask + addr, 0xffffffff);
r->addr = PCI_BAR_UNMAPPED;
wmask = ~(size - 1);
if (region_num == PCI_ROM_SLOT) {
/* ROM enable bit is writable */
wmask |= PCI_ROM_ADDRESS_ENABLE;
}
addr = pci_bar(pci_dev, region_num);
pci_set_long(pci_dev->config + addr, type);
if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
pci_set_quad(pci_dev->wmask + addr, wmask);
pci_set_quad(pci_dev->cmask + addr, ~0ULL);
} else {
pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
pci_set_long(pci_dev->cmask + addr, 0xffffffff);
}
}
}
@ -2272,6 +2281,11 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
}
if (!pcie_sriov_register_device(pci_dev, errp)) {
pci_qdev_unrealize(DEVICE(pci_dev));
return;
}
/*
* A PCIe Downstream Port that do not have ARI Forwarding enabled must
* associate only Device 0 with the device attached to the bus