mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-04 08:13:54 -06:00
ppc patch queue 2019-04-26
Here's the first ppc target pull request for qemu-4.1. This has a number of things that have accumulated while qemu-4.0 was frozen. * A number of emulated MMU improvements from Ben Herrenschmidt * Assorted cleanups fro Greg Kurz * A large set of mostly mechanical cleanups from me to make target/ppc much closer to compliant with the modern coding style * Support for passthrough of NVIDIA GPUs using NVLink2 As well as some other assorted fixes. -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEdfRlhq5hpmzETofcbDjKyiDZs5IFAlzCnusACgkQbDjKyiDZ s5LfhhAAuem5UBGKPKPj33c87HC+GGG+S4y89ic3ebyKplWulGgouHCa4Dnc7Y5m 9MfIEcljRDpuRJCEONo6yg9aaRb3cW2Go9TpTwxmF8o1suG/v5bIQIdiRbBuMa2t yhNujVg5kkWSU1G4mCZjL9FS2ADPsxsKZVd73DPEqjlNJg981+2qtSnfR8SXhfnk dSSKxyfC6Hq1+uhGkLI+xtft+BCTWOstjz+efHpZ5l2mbiaMeh7zMKrIXXy/FtKA ufIyxbZznMS5MAZk7t90YldznfwOCqfh3di1kx8GTZ40LkBKbuI5LLHTG0sT75z5 LHwFuLkBgWmS8RyIRRh9opr7ifrayHx8bQFpW368Qu+PbPzUCcTVIrWUfPmaNR74 CkYJvhiYZfTwKtUeP7b2wUkHpZF4KINI4TKNaS4QAlm3DNbO67DFYkBrytpXsSzv smEpe+sqlbY40olw9q4ESP80r+kGdEPLkRjfdj0R7qS4fsqAH1bjuSkNqlPaCTJQ hNsoz2D+f56z0bBq4x8FRzDpqnBkdy4x6PlLxkJuAaV7WAtvq7n7tiMA3TRr/rIB OYFP2xPNajjP8MfyOB94+S4WDltmsgXoM7HyyvrKp2JBpe7mFjpep5fMp5GUpweV OOYrTsN1Nuu3kFpeimEc+IOyp1BWXnJF4vHhKTOqHeqZEs5Fgus= =RpAK -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-4.1-20190426' into staging ppc patch queue 2019-04-26 Here's the first ppc target pull request for qemu-4.1. This has a number of things that have accumulated while qemu-4.0 was frozen. * A number of emulated MMU improvements from Ben Herrenschmidt * Assorted cleanups fro Greg Kurz * A large set of mostly mechanical cleanups from me to make target/ppc much closer to compliant with the modern coding style * Support for passthrough of NVIDIA GPUs using NVLink2 As well as some other assorted fixes. # gpg: Signature made Fri 26 Apr 2019 07:02:19 BST # gpg: using RSA key 75F46586AE61A66CC44E87DC6C38CACA20D9B392 # gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>" [full] # gpg: aka "David Gibson (Red Hat) <dgibson@redhat.com>" [full] # gpg: aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>" [full] # gpg: aka "David Gibson (kernel.org) <dwg@kernel.org>" [unknown] # Primary key fingerprint: 75F4 6586 AE61 A66C C44E 87DC 6C38 CACA 20D9 B392 * remotes/dgibson/tags/ppc-for-4.1-20190426: (36 commits) target/ppc: improve performance of large BAT invalidations ppc/hash32: Rework R and C bit updates ppc/hash64: Rework R and C bit updates ppc/spapr: Use proper HPTE accessors for H_READ target/ppc: Don't check UPRT in radix mode when in HV real mode target/ppc/kvm: Convert DPRINTF to traces target/ppc/trace-events: Fix trivial typo spapr: Drop duplicate PCI swizzle code spapr_pci: Get rid of duplicate code for node name creation target/ppc: Style fixes for translate/spe-impl.inc.c target/ppc: Style fixes for translate/vmx-impl.inc.c target/ppc: Style fixes for translate/vsx-impl.inc.c target/ppc: Style fixes for translate/fp-impl.inc.c target/ppc: Style fixes for translate.c target/ppc: Style fixes for translate_init.inc.c target/ppc: Style fixes for monitor.c target/ppc: Style fixes for mmu_helper.c target/ppc: Style fixes for mmu-hash64.[ch] target/ppc: Style fixes for mmu-hash32.[ch] target/ppc: Style fixes for misc_helper.c ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
9ec34ecc97
45 changed files with 2152 additions and 973 deletions
|
@ -9,7 +9,7 @@ obj-$(CONFIG_SPAPR_RNG) += spapr_rng.o
|
|||
# IBM PowerNV
|
||||
obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o pnv_psi.o pnv_occ.o pnv_bmc.o
|
||||
ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
|
||||
obj-y += spapr_pci_vfio.o
|
||||
obj-y += spapr_pci_vfio.o spapr_pci_nvlink2.o
|
||||
endif
|
||||
obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
|
||||
# PowerPC 4xx boards
|
||||
|
|
|
@ -40,7 +40,6 @@
|
|||
#include "hw/ide.h"
|
||||
#include "hw/loader.h"
|
||||
#include "hw/timer/mc146818rtc.h"
|
||||
#include "hw/input/i8042.h"
|
||||
#include "hw/isa/pc87312.h"
|
||||
#include "hw/net/ne2000-isa.h"
|
||||
#include "sysemu/arch_init.h"
|
||||
|
|
|
@ -1034,12 +1034,13 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
|
|||
0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE),
|
||||
cpu_to_be32(max_cpus / smp_threads),
|
||||
};
|
||||
uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0);
|
||||
uint32_t maxdomains[] = {
|
||||
cpu_to_be32(4),
|
||||
cpu_to_be32(0),
|
||||
cpu_to_be32(0),
|
||||
cpu_to_be32(0),
|
||||
cpu_to_be32(nb_numa_nodes ? nb_numa_nodes : 1),
|
||||
maxdomain,
|
||||
maxdomain,
|
||||
maxdomain,
|
||||
cpu_to_be32(spapr->gpu_numa_id),
|
||||
};
|
||||
|
||||
_FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
|
||||
|
@ -1519,10 +1520,10 @@ static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp,
|
|||
/* Nothing to do for qemu managed HPT */
|
||||
}
|
||||
|
||||
static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
|
||||
uint64_t pte0, uint64_t pte1)
|
||||
void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
|
||||
uint64_t pte0, uint64_t pte1)
|
||||
{
|
||||
SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
|
||||
SpaprMachineState *spapr = SPAPR_MACHINE(cpu->vhyp);
|
||||
hwaddr offset = ptex * HASH_PTE_SIZE_64;
|
||||
|
||||
if (!spapr->htab) {
|
||||
|
@ -1550,6 +1551,38 @@ static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
|
|||
}
|
||||
}
|
||||
|
||||
static void spapr_hpte_set_c(PPCVirtualHypervisor *vhyp, hwaddr ptex,
|
||||
uint64_t pte1)
|
||||
{
|
||||
hwaddr offset = ptex * HASH_PTE_SIZE_64 + 15;
|
||||
SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
|
||||
|
||||
if (!spapr->htab) {
|
||||
/* There should always be a hash table when this is called */
|
||||
error_report("spapr_hpte_set_c called with no hash table !");
|
||||
return;
|
||||
}
|
||||
|
||||
/* The HW performs a non-atomic byte update */
|
||||
stb_p(spapr->htab + offset, (pte1 & 0xff) | 0x80);
|
||||
}
|
||||
|
||||
static void spapr_hpte_set_r(PPCVirtualHypervisor *vhyp, hwaddr ptex,
|
||||
uint64_t pte1)
|
||||
{
|
||||
hwaddr offset = ptex * HASH_PTE_SIZE_64 + 14;
|
||||
SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
|
||||
|
||||
if (!spapr->htab) {
|
||||
/* There should always be a hash table when this is called */
|
||||
error_report("spapr_hpte_set_r called with no hash table !");
|
||||
return;
|
||||
}
|
||||
|
||||
/* The HW performs a non-atomic byte update */
|
||||
stb_p(spapr->htab + offset, ((pte1 >> 8) & 0xff) | 0x01);
|
||||
}
|
||||
|
||||
int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
|
||||
{
|
||||
int shift;
|
||||
|
@ -1698,6 +1731,16 @@ static void spapr_machine_reset(void)
|
|||
spapr_irq_msi_reset(spapr);
|
||||
}
|
||||
|
||||
/*
|
||||
* NVLink2-connected GPU RAM needs to be placed on a separate NUMA node.
|
||||
* We assign a new numa ID per GPU in spapr_pci_collect_nvgpu() which is
|
||||
* called from vPHB reset handler so we initialize the counter here.
|
||||
* If no NUMA is configured from the QEMU side, we start from 1 as GPU RAM
|
||||
* must be equally distant from any other node.
|
||||
* The final value of spapr->gpu_numa_id is going to be written to
|
||||
* max-associativity-domains in spapr_build_fdt().
|
||||
*/
|
||||
spapr->gpu_numa_id = MAX(1, nb_numa_nodes);
|
||||
qemu_devices_reset();
|
||||
|
||||
/*
|
||||
|
@ -3907,7 +3950,9 @@ static void spapr_phb_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
|||
smc->phb_placement(spapr, sphb->index,
|
||||
&sphb->buid, &sphb->io_win_addr,
|
||||
&sphb->mem_win_addr, &sphb->mem64_win_addr,
|
||||
windows_supported, sphb->dma_liobn, errp);
|
||||
windows_supported, sphb->dma_liobn,
|
||||
&sphb->nv2_gpa_win_addr, &sphb->nv2_atsd_win_addr,
|
||||
errp);
|
||||
}
|
||||
|
||||
static void spapr_phb_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
||||
|
@ -4108,7 +4153,8 @@ static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
|
|||
static void spapr_phb_placement(SpaprMachineState *spapr, uint32_t index,
|
||||
uint64_t *buid, hwaddr *pio,
|
||||
hwaddr *mmio32, hwaddr *mmio64,
|
||||
unsigned n_dma, uint32_t *liobns, Error **errp)
|
||||
unsigned n_dma, uint32_t *liobns,
|
||||
hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp)
|
||||
{
|
||||
/*
|
||||
* New-style PHB window placement.
|
||||
|
@ -4153,6 +4199,9 @@ static void spapr_phb_placement(SpaprMachineState *spapr, uint32_t index,
|
|||
*pio = SPAPR_PCI_BASE + index * SPAPR_PCI_IO_WIN_SIZE;
|
||||
*mmio32 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM32_WIN_SIZE;
|
||||
*mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
|
||||
|
||||
*nv2gpa = SPAPR_PCI_NV2RAM64_WIN_BASE + index * SPAPR_PCI_NV2RAM64_WIN_SIZE;
|
||||
*nv2atsd = SPAPR_PCI_NV2ATSD_WIN_BASE + index * SPAPR_PCI_NV2ATSD_WIN_SIZE;
|
||||
}
|
||||
|
||||
static ICSState *spapr_ics_get(XICSFabric *dev, int irq)
|
||||
|
@ -4274,7 +4323,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
|
|||
vhc->hpt_mask = spapr_hpt_mask;
|
||||
vhc->map_hptes = spapr_map_hptes;
|
||||
vhc->unmap_hptes = spapr_unmap_hptes;
|
||||
vhc->store_hpte = spapr_store_hpte;
|
||||
vhc->hpte_set_c = spapr_hpte_set_c;
|
||||
vhc->hpte_set_r = spapr_hpte_set_r;
|
||||
vhc->get_pate = spapr_get_pate;
|
||||
vhc->encode_hpt_for_kvm_pr = spapr_encode_hpt_for_kvm_pr;
|
||||
xic->ics_get = spapr_ics_get;
|
||||
|
@ -4368,6 +4418,18 @@ DEFINE_SPAPR_MACHINE(4_0, "4.0", false);
|
|||
/*
|
||||
* pseries-3.1
|
||||
*/
|
||||
static void phb_placement_3_1(SpaprMachineState *spapr, uint32_t index,
|
||||
uint64_t *buid, hwaddr *pio,
|
||||
hwaddr *mmio32, hwaddr *mmio64,
|
||||
unsigned n_dma, uint32_t *liobns,
|
||||
hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp)
|
||||
{
|
||||
spapr_phb_placement(spapr, index, buid, pio, mmio32, mmio64, n_dma, liobns,
|
||||
nv2gpa, nv2atsd, errp);
|
||||
*nv2gpa = 0;
|
||||
*nv2atsd = 0;
|
||||
}
|
||||
|
||||
static void spapr_machine_3_1_class_options(MachineClass *mc)
|
||||
{
|
||||
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
|
||||
|
@ -4383,6 +4445,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
|
|||
smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
|
||||
smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
|
||||
smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
|
||||
smc->phb_placement = phb_placement_3_1;
|
||||
}
|
||||
|
||||
DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
|
||||
|
@ -4514,7 +4577,8 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false);
|
|||
static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index,
|
||||
uint64_t *buid, hwaddr *pio,
|
||||
hwaddr *mmio32, hwaddr *mmio64,
|
||||
unsigned n_dma, uint32_t *liobns, Error **errp)
|
||||
unsigned n_dma, uint32_t *liobns,
|
||||
hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp)
|
||||
{
|
||||
/* Legacy PHB placement for pseries-2.7 and earlier machine types */
|
||||
const uint64_t base_buid = 0x800000020000000ULL;
|
||||
|
@ -4558,6 +4622,9 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index,
|
|||
* fallback behaviour of automatically splitting a large "32-bit"
|
||||
* window into contiguous 32-bit and 64-bit windows
|
||||
*/
|
||||
|
||||
*nv2gpa = 0;
|
||||
*nv2atsd = 0;
|
||||
}
|
||||
|
||||
static void spapr_machine_2_7_class_options(MachineClass *mc)
|
||||
|
|
|
@ -118,7 +118,7 @@ static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
|
|||
ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
|
||||
}
|
||||
|
||||
ppc_hash64_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
|
||||
spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
|
||||
|
||||
args[0] = ptex + slot;
|
||||
return H_SUCCESS;
|
||||
|
@ -131,7 +131,8 @@ typedef enum {
|
|||
REMOVE_HW = 3,
|
||||
} RemoveResult;
|
||||
|
||||
static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex,
|
||||
static RemoveResult remove_hpte(PowerPCCPU *cpu
|
||||
, target_ulong ptex,
|
||||
target_ulong avpn,
|
||||
target_ulong flags,
|
||||
target_ulong *vp, target_ulong *rp)
|
||||
|
@ -155,7 +156,7 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex,
|
|||
}
|
||||
*vp = v;
|
||||
*rp = r;
|
||||
ppc_hash64_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
|
||||
spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
|
||||
ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
|
||||
return REMOVE_SUCCESS;
|
||||
}
|
||||
|
@ -289,13 +290,13 @@ static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr,
|
|||
r |= (flags << 55) & HPTE64_R_PP0;
|
||||
r |= (flags << 48) & HPTE64_R_KEY_HI;
|
||||
r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
|
||||
ppc_hash64_store_hpte(cpu, ptex,
|
||||
(v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
|
||||
spapr_store_hpte(cpu, ptex,
|
||||
(v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
|
||||
ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
|
||||
/* Flush the tlb */
|
||||
check_tlb_flush(env, true);
|
||||
/* Don't need a memory barrier, due to qemu's global lock */
|
||||
ppc_hash64_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
|
||||
spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -304,8 +305,8 @@ static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr,
|
|||
{
|
||||
target_ulong flags = args[0];
|
||||
target_ulong ptex = args[1];
|
||||
uint8_t *hpte;
|
||||
int i, ridx, n_entries = 1;
|
||||
const ppc_hash_pte64_t *hptes;
|
||||
|
||||
if (!valid_ptex(cpu, ptex)) {
|
||||
return H_PARAMETER;
|
||||
|
@ -317,13 +318,12 @@ static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr,
|
|||
n_entries = 4;
|
||||
}
|
||||
|
||||
hpte = spapr->htab + (ptex * HASH_PTE_SIZE_64);
|
||||
|
||||
hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries);
|
||||
for (i = 0, ridx = 0; i < n_entries; i++) {
|
||||
args[ridx++] = ldq_p(hpte);
|
||||
args[ridx++] = ldq_p(hpte + (HASH_PTE_SIZE_64/2));
|
||||
hpte += HASH_PTE_SIZE_64;
|
||||
args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i);
|
||||
args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i);
|
||||
}
|
||||
ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries);
|
||||
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -67,36 +67,11 @@ void spapr_irq_msi_reset(SpaprMachineState *spapr)
|
|||
* XICS IRQ backend.
|
||||
*/
|
||||
|
||||
static ICSState *spapr_ics_create(SpaprMachineState *spapr,
|
||||
int nr_irqs, Error **errp)
|
||||
{
|
||||
Error *local_err = NULL;
|
||||
Object *obj;
|
||||
|
||||
obj = object_new(TYPE_ICS_SIMPLE);
|
||||
object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort);
|
||||
object_property_add_const_link(obj, ICS_PROP_XICS, OBJECT(spapr),
|
||||
&error_abort);
|
||||
object_property_set_int(obj, nr_irqs, "nr-irqs", &local_err);
|
||||
if (local_err) {
|
||||
goto error;
|
||||
}
|
||||
object_property_set_bool(obj, true, "realized", &local_err);
|
||||
if (local_err) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
return ICS_BASE(obj);
|
||||
|
||||
error:
|
||||
error_propagate(errp, local_err);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void spapr_irq_init_xics(SpaprMachineState *spapr, int nr_irqs,
|
||||
Error **errp)
|
||||
{
|
||||
MachineState *machine = MACHINE(spapr);
|
||||
Object *obj;
|
||||
Error *local_err = NULL;
|
||||
bool xics_kvm = false;
|
||||
|
||||
|
@ -108,7 +83,8 @@ static void spapr_irq_init_xics(SpaprMachineState *spapr, int nr_irqs,
|
|||
if (machine_kernel_irqchip_required(machine) && !xics_kvm) {
|
||||
error_prepend(&local_err,
|
||||
"kernel_irqchip requested but unavailable: ");
|
||||
goto error;
|
||||
error_propagate(errp, local_err);
|
||||
return;
|
||||
}
|
||||
error_free(local_err);
|
||||
local_err = NULL;
|
||||
|
@ -118,10 +94,18 @@ static void spapr_irq_init_xics(SpaprMachineState *spapr, int nr_irqs,
|
|||
xics_spapr_init(spapr);
|
||||
}
|
||||
|
||||
spapr->ics = spapr_ics_create(spapr, nr_irqs, &local_err);
|
||||
obj = object_new(TYPE_ICS_SIMPLE);
|
||||
object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort);
|
||||
object_property_add_const_link(obj, ICS_PROP_XICS, OBJECT(spapr),
|
||||
&error_fatal);
|
||||
object_property_set_int(obj, nr_irqs, "nr-irqs", &error_fatal);
|
||||
object_property_set_bool(obj, true, "realized", &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
return;
|
||||
}
|
||||
|
||||
error:
|
||||
error_propagate(errp, local_err);
|
||||
spapr->ics = ICS_BASE(obj);
|
||||
}
|
||||
|
||||
#define ICS_IRQ_FREE(ics, srcno) \
|
||||
|
|
|
@ -719,26 +719,10 @@ param_error_exit:
|
|||
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
|
||||
}
|
||||
|
||||
static int pci_spapr_swizzle(int slot, int pin)
|
||||
{
|
||||
return (slot + pin) % PCI_NUM_PINS;
|
||||
}
|
||||
|
||||
static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
|
||||
{
|
||||
/*
|
||||
* Here we need to convert pci_dev + irq_num to some unique value
|
||||
* which is less than number of IRQs on the specific bus (4). We
|
||||
* use standard PCI swizzling, that is (slot number + pin number)
|
||||
* % 4.
|
||||
*/
|
||||
return pci_spapr_swizzle(PCI_SLOT(pci_dev->devfn), irq_num);
|
||||
}
|
||||
|
||||
static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
|
||||
{
|
||||
/*
|
||||
* Here we use the number returned by pci_spapr_map_irq to find a
|
||||
* Here we use the number returned by pci_swizzle_map_irq_fn to find a
|
||||
* corresponding qemu_irq.
|
||||
*/
|
||||
SpaprPhbState *phb = opaque;
|
||||
|
@ -1355,6 +1339,8 @@ static void spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
|
|||
if (sphb->pcie_ecs && pci_is_express(dev)) {
|
||||
_FDT(fdt_setprop_cell(fdt, offset, "ibm,pci-config-space-type", 0x1));
|
||||
}
|
||||
|
||||
spapr_phb_nvgpu_populate_pcidev_dt(dev, fdt, offset, sphb);
|
||||
}
|
||||
|
||||
/* create OF node for pci device and required OF DT properties */
|
||||
|
@ -1587,6 +1573,8 @@ static void spapr_phb_unrealize(DeviceState *dev, Error **errp)
|
|||
int i;
|
||||
const unsigned windows_supported = spapr_phb_windows_supported(sphb);
|
||||
|
||||
spapr_phb_nvgpu_free(sphb);
|
||||
|
||||
if (sphb->msi) {
|
||||
g_hash_table_unref(sphb->msi);
|
||||
sphb->msi = NULL;
|
||||
|
@ -1762,7 +1750,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
|
|||
&sphb->iowindow);
|
||||
|
||||
bus = pci_register_root_bus(dev, NULL,
|
||||
pci_spapr_set_irq, pci_spapr_map_irq, sphb,
|
||||
pci_spapr_set_irq, pci_swizzle_map_irq_fn, sphb,
|
||||
&sphb->memspace, &sphb->iospace,
|
||||
PCI_DEVFN(0, 0), PCI_NUM_PINS,
|
||||
TYPE_SPAPR_PHB_ROOT_BUS);
|
||||
|
@ -1898,8 +1886,14 @@ void spapr_phb_dma_reset(SpaprPhbState *sphb)
|
|||
static void spapr_phb_reset(DeviceState *qdev)
|
||||
{
|
||||
SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
|
||||
Error *errp = NULL;
|
||||
|
||||
spapr_phb_dma_reset(sphb);
|
||||
spapr_phb_nvgpu_free(sphb);
|
||||
spapr_phb_nvgpu_setup(sphb, &errp);
|
||||
if (errp) {
|
||||
error_report_err(errp);
|
||||
}
|
||||
|
||||
/* Reset the IOMMU state */
|
||||
object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
|
||||
|
@ -1932,6 +1926,8 @@ static Property spapr_phb_properties[] = {
|
|||
pre_2_8_migration, false),
|
||||
DEFINE_PROP_BOOL("pcie-extended-configuration-space", SpaprPhbState,
|
||||
pcie_ecs, true),
|
||||
DEFINE_PROP_UINT64("gpa", SpaprPhbState, nv2_gpa_win_addr, 0),
|
||||
DEFINE_PROP_UINT64("atsd", SpaprPhbState, nv2_atsd_win_addr, 0),
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
|
@ -2164,7 +2160,6 @@ int spapr_populate_pci_dt(SpaprPhbState *phb, uint32_t intc_phandle, void *fdt,
|
|||
uint32_t nr_msis, int *node_offset)
|
||||
{
|
||||
int bus_off, i, j, ret;
|
||||
gchar *nodename;
|
||||
uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
|
||||
struct {
|
||||
uint32_t hi;
|
||||
|
@ -2212,11 +2207,10 @@ int spapr_populate_pci_dt(SpaprPhbState *phb, uint32_t intc_phandle, void *fdt,
|
|||
PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
|
||||
SpaprFdt s_fdt;
|
||||
SpaprDrc *drc;
|
||||
Error *errp = NULL;
|
||||
|
||||
/* Start populating the FDT */
|
||||
nodename = g_strdup_printf("pci@%" PRIx64, phb->buid);
|
||||
_FDT(bus_off = fdt_add_subnode(fdt, 0, nodename));
|
||||
g_free(nodename);
|
||||
_FDT(bus_off = fdt_add_subnode(fdt, 0, phb->dtbusname));
|
||||
if (node_offset) {
|
||||
*node_offset = bus_off;
|
||||
}
|
||||
|
@ -2249,14 +2243,14 @@ int spapr_populate_pci_dt(SpaprPhbState *phb, uint32_t intc_phandle, void *fdt,
|
|||
}
|
||||
|
||||
/* Build the interrupt-map, this must matches what is done
|
||||
* in pci_spapr_map_irq
|
||||
* in pci_swizzle_map_irq_fn
|
||||
*/
|
||||
_FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
|
||||
&interrupt_map_mask, sizeof(interrupt_map_mask)));
|
||||
for (i = 0; i < PCI_SLOT_MAX; i++) {
|
||||
for (j = 0; j < PCI_NUM_PINS; j++) {
|
||||
uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
|
||||
int lsi_num = pci_spapr_swizzle(i, j);
|
||||
int lsi_num = pci_swizzle(i, j);
|
||||
|
||||
irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
|
||||
irqmap[1] = 0;
|
||||
|
@ -2304,6 +2298,12 @@ int spapr_populate_pci_dt(SpaprPhbState *phb, uint32_t intc_phandle, void *fdt,
|
|||
return ret;
|
||||
}
|
||||
|
||||
spapr_phb_nvgpu_populate_dt(phb, fdt, bus_off, &errp);
|
||||
if (errp) {
|
||||
error_report_err(errp);
|
||||
}
|
||||
spapr_phb_nvgpu_ram_populate_dt(phb, fdt);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
450
hw/ppc/spapr_pci_nvlink2.c
Normal file
450
hw/ppc/spapr_pci_nvlink2.c
Normal file
|
@ -0,0 +1,450 @@
|
|||
/*
|
||||
* QEMU sPAPR PCI for NVLink2 pass through
|
||||
*
|
||||
* Copyright (c) 2019 Alexey Kardashevskiy, IBM Corporation.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
#include "qemu/osdep.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qemu-common.h"
|
||||
#include "hw/pci/pci.h"
|
||||
#include "hw/pci-host/spapr.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "hw/ppc/fdt.h"
|
||||
#include "hw/pci/pci_bridge.h"
|
||||
|
||||
#define PHANDLE_PCIDEV(phb, pdev) (0x12000000 | \
|
||||
(((phb)->index) << 16) | ((pdev)->devfn))
|
||||
#define PHANDLE_GPURAM(phb, n) (0x110000FF | ((n) << 8) | \
|
||||
(((phb)->index) << 16))
|
||||
#define PHANDLE_NVLINK(phb, gn, nn) (0x00130000 | (((phb)->index) << 8) | \
|
||||
((gn) << 4) | (nn))
|
||||
|
||||
#define SPAPR_GPU_NUMA_ID (cpu_to_be32(1))
|
||||
|
||||
struct spapr_phb_pci_nvgpu_config {
|
||||
uint64_t nv2_ram_current;
|
||||
uint64_t nv2_atsd_current;
|
||||
int num; /* number of non empty (i.e. tgt!=0) entries in slots[] */
|
||||
struct spapr_phb_pci_nvgpu_slot {
|
||||
uint64_t tgt;
|
||||
uint64_t gpa;
|
||||
unsigned numa_id;
|
||||
PCIDevice *gpdev;
|
||||
int linknum;
|
||||
struct {
|
||||
uint64_t atsd_gpa;
|
||||
PCIDevice *npdev;
|
||||
uint32_t link_speed;
|
||||
} links[NVGPU_MAX_LINKS];
|
||||
} slots[NVGPU_MAX_NUM];
|
||||
Error *errp;
|
||||
};
|
||||
|
||||
static struct spapr_phb_pci_nvgpu_slot *
|
||||
spapr_nvgpu_get_slot(struct spapr_phb_pci_nvgpu_config *nvgpus, uint64_t tgt)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Search for partially collected "slot" */
|
||||
for (i = 0; i < nvgpus->num; ++i) {
|
||||
if (nvgpus->slots[i].tgt == tgt) {
|
||||
return &nvgpus->slots[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (nvgpus->num == ARRAY_SIZE(nvgpus->slots)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
i = nvgpus->num;
|
||||
nvgpus->slots[i].tgt = tgt;
|
||||
++nvgpus->num;
|
||||
|
||||
return &nvgpus->slots[i];
|
||||
}
|
||||
|
||||
static void spapr_pci_collect_nvgpu(struct spapr_phb_pci_nvgpu_config *nvgpus,
|
||||
PCIDevice *pdev, uint64_t tgt,
|
||||
MemoryRegion *mr, Error **errp)
|
||||
{
|
||||
MachineState *machine = MACHINE(qdev_get_machine());
|
||||
SpaprMachineState *spapr = SPAPR_MACHINE(machine);
|
||||
struct spapr_phb_pci_nvgpu_slot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt);
|
||||
|
||||
if (!nvslot) {
|
||||
error_setg(errp, "Found too many GPUs per vPHB");
|
||||
return;
|
||||
}
|
||||
g_assert(!nvslot->gpdev);
|
||||
nvslot->gpdev = pdev;
|
||||
|
||||
nvslot->gpa = nvgpus->nv2_ram_current;
|
||||
nvgpus->nv2_ram_current += memory_region_size(mr);
|
||||
nvslot->numa_id = spapr->gpu_numa_id;
|
||||
++spapr->gpu_numa_id;
|
||||
}
|
||||
|
||||
static void spapr_pci_collect_nvnpu(struct spapr_phb_pci_nvgpu_config *nvgpus,
|
||||
PCIDevice *pdev, uint64_t tgt,
|
||||
MemoryRegion *mr, Error **errp)
|
||||
{
|
||||
struct spapr_phb_pci_nvgpu_slot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt);
|
||||
int j;
|
||||
|
||||
if (!nvslot) {
|
||||
error_setg(errp, "Found too many NVLink bridges per vPHB");
|
||||
return;
|
||||
}
|
||||
|
||||
j = nvslot->linknum;
|
||||
if (j == ARRAY_SIZE(nvslot->links)) {
|
||||
error_setg(errp, "Found too many NVLink bridges per GPU");
|
||||
return;
|
||||
}
|
||||
++nvslot->linknum;
|
||||
|
||||
g_assert(!nvslot->links[j].npdev);
|
||||
nvslot->links[j].npdev = pdev;
|
||||
nvslot->links[j].atsd_gpa = nvgpus->nv2_atsd_current;
|
||||
nvgpus->nv2_atsd_current += memory_region_size(mr);
|
||||
nvslot->links[j].link_speed =
|
||||
object_property_get_uint(OBJECT(pdev), "nvlink2-link-speed", NULL);
|
||||
}
|
||||
|
||||
static void spapr_phb_pci_collect_nvgpu(PCIBus *bus, PCIDevice *pdev,
|
||||
void *opaque)
|
||||
{
|
||||
PCIBus *sec_bus;
|
||||
Object *po = OBJECT(pdev);
|
||||
uint64_t tgt = object_property_get_uint(po, "nvlink2-tgt", NULL);
|
||||
|
||||
if (tgt) {
|
||||
Error *local_err = NULL;
|
||||
struct spapr_phb_pci_nvgpu_config *nvgpus = opaque;
|
||||
Object *mr_gpu = object_property_get_link(po, "nvlink2-mr[0]", NULL);
|
||||
Object *mr_npu = object_property_get_link(po, "nvlink2-atsd-mr[0]",
|
||||
NULL);
|
||||
|
||||
g_assert(mr_gpu || mr_npu);
|
||||
if (mr_gpu) {
|
||||
spapr_pci_collect_nvgpu(nvgpus, pdev, tgt, MEMORY_REGION(mr_gpu),
|
||||
&local_err);
|
||||
} else {
|
||||
spapr_pci_collect_nvnpu(nvgpus, pdev, tgt, MEMORY_REGION(mr_npu),
|
||||
&local_err);
|
||||
}
|
||||
error_propagate(&nvgpus->errp, local_err);
|
||||
}
|
||||
if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
|
||||
PCI_HEADER_TYPE_BRIDGE)) {
|
||||
return;
|
||||
}
|
||||
|
||||
sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
|
||||
if (!sec_bus) {
|
||||
return;
|
||||
}
|
||||
|
||||
pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
|
||||
spapr_phb_pci_collect_nvgpu, opaque);
|
||||
}
|
||||
|
||||
void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp)
|
||||
{
|
||||
int i, j, valid_gpu_num;
|
||||
PCIBus *bus;
|
||||
|
||||
/* Search for GPUs and NPUs */
|
||||
if (!sphb->nv2_gpa_win_addr || !sphb->nv2_atsd_win_addr) {
|
||||
return;
|
||||
}
|
||||
|
||||
sphb->nvgpus = g_new0(struct spapr_phb_pci_nvgpu_config, 1);
|
||||
sphb->nvgpus->nv2_ram_current = sphb->nv2_gpa_win_addr;
|
||||
sphb->nvgpus->nv2_atsd_current = sphb->nv2_atsd_win_addr;
|
||||
|
||||
bus = PCI_HOST_BRIDGE(sphb)->bus;
|
||||
pci_for_each_device(bus, pci_bus_num(bus),
|
||||
spapr_phb_pci_collect_nvgpu, sphb->nvgpus);
|
||||
|
||||
if (sphb->nvgpus->errp) {
|
||||
error_propagate(errp, sphb->nvgpus->errp);
|
||||
sphb->nvgpus->errp = NULL;
|
||||
goto cleanup_exit;
|
||||
}
|
||||
|
||||
/* Add found GPU RAM and ATSD MRs if found */
|
||||
for (i = 0, valid_gpu_num = 0; i < sphb->nvgpus->num; ++i) {
|
||||
Object *nvmrobj;
|
||||
struct spapr_phb_pci_nvgpu_slot *nvslot = &sphb->nvgpus->slots[i];
|
||||
|
||||
if (!nvslot->gpdev) {
|
||||
continue;
|
||||
}
|
||||
nvmrobj = object_property_get_link(OBJECT(nvslot->gpdev),
|
||||
"nvlink2-mr[0]", NULL);
|
||||
/* ATSD is pointless without GPU RAM MR so skip those */
|
||||
if (!nvmrobj) {
|
||||
continue;
|
||||
}
|
||||
|
||||
++valid_gpu_num;
|
||||
memory_region_add_subregion(get_system_memory(), nvslot->gpa,
|
||||
MEMORY_REGION(nvmrobj));
|
||||
|
||||
for (j = 0; j < nvslot->linknum; ++j) {
|
||||
Object *atsdmrobj;
|
||||
|
||||
atsdmrobj = object_property_get_link(OBJECT(nvslot->links[j].npdev),
|
||||
"nvlink2-atsd-mr[0]", NULL);
|
||||
if (!atsdmrobj) {
|
||||
continue;
|
||||
}
|
||||
memory_region_add_subregion(get_system_memory(),
|
||||
nvslot->links[j].atsd_gpa,
|
||||
MEMORY_REGION(atsdmrobj));
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_gpu_num) {
|
||||
return;
|
||||
}
|
||||
/* We did not find any interesting GPU */
|
||||
cleanup_exit:
|
||||
g_free(sphb->nvgpus);
|
||||
sphb->nvgpus = NULL;
|
||||
}
|
||||
|
||||
void spapr_phb_nvgpu_free(SpaprPhbState *sphb)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
if (!sphb->nvgpus) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < sphb->nvgpus->num; ++i) {
|
||||
struct spapr_phb_pci_nvgpu_slot *nvslot = &sphb->nvgpus->slots[i];
|
||||
Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev),
|
||||
"nvlink2-mr[0]", NULL);
|
||||
|
||||
if (nv_mrobj) {
|
||||
memory_region_del_subregion(get_system_memory(),
|
||||
MEMORY_REGION(nv_mrobj));
|
||||
}
|
||||
for (j = 0; j < nvslot->linknum; ++j) {
|
||||
PCIDevice *npdev = nvslot->links[j].npdev;
|
||||
Object *atsd_mrobj;
|
||||
atsd_mrobj = object_property_get_link(OBJECT(npdev),
|
||||
"nvlink2-atsd-mr[0]", NULL);
|
||||
if (atsd_mrobj) {
|
||||
memory_region_del_subregion(get_system_memory(),
|
||||
MEMORY_REGION(atsd_mrobj));
|
||||
}
|
||||
}
|
||||
}
|
||||
g_free(sphb->nvgpus);
|
||||
sphb->nvgpus = NULL;
|
||||
}
|
||||
|
||||
void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, int bus_off,
|
||||
Error **errp)
|
||||
{
|
||||
int i, j, atsdnum = 0;
|
||||
uint64_t atsd[8]; /* The existing limitation of known guests */
|
||||
|
||||
if (!sphb->nvgpus) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; (i < sphb->nvgpus->num) && (atsdnum < ARRAY_SIZE(atsd)); ++i) {
|
||||
struct spapr_phb_pci_nvgpu_slot *nvslot = &sphb->nvgpus->slots[i];
|
||||
|
||||
if (!nvslot->gpdev) {
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < nvslot->linknum; ++j) {
|
||||
if (!nvslot->links[j].atsd_gpa) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (atsdnum == ARRAY_SIZE(atsd)) {
|
||||
error_report("Only %"PRIuPTR" ATSD registers supported",
|
||||
ARRAY_SIZE(atsd));
|
||||
break;
|
||||
}
|
||||
atsd[atsdnum] = cpu_to_be64(nvslot->links[j].atsd_gpa);
|
||||
++atsdnum;
|
||||
}
|
||||
}
|
||||
|
||||
if (!atsdnum) {
|
||||
error_setg(errp, "No ATSD registers found");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!spapr_phb_eeh_available(sphb)) {
|
||||
/*
|
||||
* ibm,mmio-atsd contains ATSD registers; these belong to an NPU PHB
|
||||
* which we do not emulate as a separate device. Instead we put
|
||||
* ibm,mmio-atsd to the vPHB with GPU and make sure that we do not
|
||||
* put GPUs from different IOMMU groups to the same vPHB to ensure
|
||||
* that the guest will use ATSDs from the corresponding NPU.
|
||||
*/
|
||||
error_setg(errp, "ATSD requires separate vPHB per GPU IOMMU group");
|
||||
return;
|
||||
}
|
||||
|
||||
_FDT((fdt_setprop(fdt, bus_off, "ibm,mmio-atsd", atsd,
|
||||
atsdnum * sizeof(atsd[0]))));
|
||||
}
|
||||
|
||||
void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
|
||||
{
|
||||
int i, j, linkidx, npuoff;
|
||||
char *npuname;
|
||||
|
||||
if (!sphb->nvgpus) {
|
||||
return;
|
||||
}
|
||||
|
||||
npuname = g_strdup_printf("npuphb%d", sphb->index);
|
||||
npuoff = fdt_add_subnode(fdt, 0, npuname);
|
||||
_FDT(npuoff);
|
||||
_FDT(fdt_setprop_cell(fdt, npuoff, "#address-cells", 1));
|
||||
_FDT(fdt_setprop_cell(fdt, npuoff, "#size-cells", 0));
|
||||
/* Advertise NPU as POWER9 so the guest can enable NPU2 contexts */
|
||||
_FDT((fdt_setprop_string(fdt, npuoff, "compatible", "ibm,power9-npu")));
|
||||
g_free(npuname);
|
||||
|
||||
for (i = 0, linkidx = 0; i < sphb->nvgpus->num; ++i) {
|
||||
for (j = 0; j < sphb->nvgpus->slots[i].linknum; ++j) {
|
||||
char *linkname = g_strdup_printf("link@%d", linkidx);
|
||||
int off = fdt_add_subnode(fdt, npuoff, linkname);
|
||||
|
||||
_FDT(off);
|
||||
/* _FDT((fdt_setprop_cell(fdt, off, "reg", linkidx))); */
|
||||
_FDT((fdt_setprop_string(fdt, off, "compatible",
|
||||
"ibm,npu-link")));
|
||||
_FDT((fdt_setprop_cell(fdt, off, "phandle",
|
||||
PHANDLE_NVLINK(sphb, i, j))));
|
||||
_FDT((fdt_setprop_cell(fdt, off, "ibm,npu-link-index", linkidx)));
|
||||
g_free(linkname);
|
||||
++linkidx;
|
||||
}
|
||||
}
|
||||
|
||||
/* Add memory nodes for GPU RAM and mark them unusable */
|
||||
for (i = 0; i < sphb->nvgpus->num; ++i) {
|
||||
struct spapr_phb_pci_nvgpu_slot *nvslot = &sphb->nvgpus->slots[i];
|
||||
Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev),
|
||||
"nvlink2-mr[0]", NULL);
|
||||
uint32_t associativity[] = {
|
||||
cpu_to_be32(0x4),
|
||||
SPAPR_GPU_NUMA_ID,
|
||||
SPAPR_GPU_NUMA_ID,
|
||||
SPAPR_GPU_NUMA_ID,
|
||||
cpu_to_be32(nvslot->numa_id)
|
||||
};
|
||||
uint64_t size = object_property_get_uint(nv_mrobj, "size", NULL);
|
||||
uint64_t mem_reg[2] = { cpu_to_be64(nvslot->gpa), cpu_to_be64(size) };
|
||||
char *mem_name = g_strdup_printf("memory@%"PRIx64, nvslot->gpa);
|
||||
int off = fdt_add_subnode(fdt, 0, mem_name);
|
||||
|
||||
_FDT(off);
|
||||
_FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
|
||||
_FDT((fdt_setprop(fdt, off, "reg", mem_reg, sizeof(mem_reg))));
|
||||
_FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
|
||||
sizeof(associativity))));
|
||||
|
||||
_FDT((fdt_setprop_string(fdt, off, "compatible",
|
||||
"ibm,coherent-device-memory")));
|
||||
|
||||
mem_reg[1] = cpu_to_be64(0);
|
||||
_FDT((fdt_setprop(fdt, off, "linux,usable-memory", mem_reg,
|
||||
sizeof(mem_reg))));
|
||||
_FDT((fdt_setprop_cell(fdt, off, "phandle",
|
||||
PHANDLE_GPURAM(sphb, i))));
|
||||
g_free(mem_name);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
|
||||
SpaprPhbState *sphb)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
if (!sphb->nvgpus) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < sphb->nvgpus->num; ++i) {
|
||||
struct spapr_phb_pci_nvgpu_slot *nvslot = &sphb->nvgpus->slots[i];
|
||||
|
||||
/* Skip "slot" without attached GPU */
|
||||
if (!nvslot->gpdev) {
|
||||
continue;
|
||||
}
|
||||
if (dev == nvslot->gpdev) {
|
||||
uint32_t npus[nvslot->linknum];
|
||||
|
||||
for (j = 0; j < nvslot->linknum; ++j) {
|
||||
PCIDevice *npdev = nvslot->links[j].npdev;
|
||||
|
||||
npus[j] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
|
||||
}
|
||||
_FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
|
||||
j * sizeof(npus[0])));
|
||||
_FDT((fdt_setprop_cell(fdt, offset, "phandle",
|
||||
PHANDLE_PCIDEV(sphb, dev))));
|
||||
continue;
|
||||
}
|
||||
|
||||
for (j = 0; j < nvslot->linknum; ++j) {
|
||||
if (dev != nvslot->links[j].npdev) {
|
||||
continue;
|
||||
}
|
||||
|
||||
_FDT((fdt_setprop_cell(fdt, offset, "phandle",
|
||||
PHANDLE_PCIDEV(sphb, dev))));
|
||||
_FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu",
|
||||
PHANDLE_PCIDEV(sphb, nvslot->gpdev)));
|
||||
_FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink",
|
||||
PHANDLE_NVLINK(sphb, i, j))));
|
||||
/*
|
||||
* If we ever want to emulate GPU RAM at the same location as on
|
||||
* the host - here is the encoding GPA->TGT:
|
||||
*
|
||||
* gta = ((sphb->nv2_gpa >> 42) & 0x1) << 42;
|
||||
* gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43;
|
||||
* gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45;
|
||||
* gta |= sphb->nv2_gpa & ((1UL << 43) - 1);
|
||||
*/
|
||||
_FDT(fdt_setprop_cell(fdt, offset, "memory-region",
|
||||
PHANDLE_GPURAM(sphb, i)));
|
||||
_FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr",
|
||||
nvslot->tgt));
|
||||
_FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed",
|
||||
nvslot->links[j].link_speed));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -404,7 +404,7 @@ void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn)
|
|||
|
||||
token -= RTAS_TOKEN_BASE;
|
||||
|
||||
assert(!rtas_table[token].name);
|
||||
assert(!name || !rtas_table[token].name);
|
||||
|
||||
rtas_table[token].name = name;
|
||||
rtas_table[token].fn = fn;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue