x86: implement la57 paging mode

The new paging more is extension of IA32e mode with more additional page
table level.

It brings support of 57-bit vitrual address space (128PB) and 52-bit
physical address space (4PB).

The structure of new page table level is identical to pml4.

The feature is enumerated with CPUID.(EAX=07H, ECX=0):ECX[bit 16].

CR4.LA57[bit 12] need to be set when pageing enables to activate 5-level
paging mode.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Message-Id: <20161215001305.146807-1-kirill.shutemov@linux.intel.com>
[Drop changes to target-i386/translate.c. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Kirill A. Shutemov 2016-12-15 03:13:05 +03:00 committed by Paolo Bonzini
parent c52ab08aee
commit 6c7c3c21f9
5 changed files with 274 additions and 74 deletions

View file

@ -30,13 +30,18 @@
#include "hmp.h"
static void print_pte(Monitor *mon, hwaddr addr,
hwaddr pte,
hwaddr mask)
static void print_pte(Monitor *mon, CPUArchState *env, hwaddr addr,
hwaddr pte, hwaddr mask)
{
#ifdef TARGET_X86_64
if (addr & (1ULL << 47)) {
addr |= -1LL << 48;
if (env->cr[4] & CR4_LA57_MASK) {
if (addr & (1ULL << 56)) {
addr |= -1LL << 57;
}
} else {
if (addr & (1ULL << 47)) {
addr |= -1LL << 48;
}
}
#endif
monitor_printf(mon, TARGET_FMT_plx ": " TARGET_FMT_plx
@ -66,13 +71,13 @@ static void tlb_info_32(Monitor *mon, CPUArchState *env)
if (pde & PG_PRESENT_MASK) {
if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
/* 4M pages */
print_pte(mon, (l1 << 22), pde, ~((1 << 21) - 1));
print_pte(mon, env, (l1 << 22), pde, ~((1 << 21) - 1));
} else {
for(l2 = 0; l2 < 1024; l2++) {
cpu_physical_memory_read((pde & ~0xfff) + l2 * 4, &pte, 4);
pte = le32_to_cpu(pte);
if (pte & PG_PRESENT_MASK) {
print_pte(mon, (l1 << 22) + (l2 << 12),
print_pte(mon, env, (l1 << 22) + (l2 << 12),
pte & ~PG_PSE_MASK,
~0xfff);
}
@ -100,7 +105,7 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
if (pde & PG_PRESENT_MASK) {
if (pde & PG_PSE_MASK) {
/* 2M pages with PAE, CR4.PSE is ignored */
print_pte(mon, (l1 << 30 ) + (l2 << 21), pde,
print_pte(mon, env, (l1 << 30) + (l2 << 21), pde,
~((hwaddr)(1 << 20) - 1));
} else {
pt_addr = pde & 0x3fffffffff000ULL;
@ -108,7 +113,7 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
cpu_physical_memory_read(pt_addr + l3 * 8, &pte, 8);
pte = le64_to_cpu(pte);
if (pte & PG_PRESENT_MASK) {
print_pte(mon, (l1 << 30 ) + (l2 << 21)
print_pte(mon, env, (l1 << 30) + (l2 << 21)
+ (l3 << 12),
pte & ~PG_PSE_MASK,
~(hwaddr)0xfff);
@ -122,61 +127,82 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
}
#ifdef TARGET_X86_64
static void tlb_info_64(Monitor *mon, CPUArchState *env)
static void tlb_info_la48(Monitor *mon, CPUArchState *env,
uint64_t l0, uint64_t pml4_addr)
{
uint64_t l1, l2, l3, l4;
uint64_t pml4e, pdpe, pde, pte;
uint64_t pml4_addr, pdp_addr, pd_addr, pt_addr;
uint64_t pdp_addr, pd_addr, pt_addr;
pml4_addr = env->cr[3] & 0x3fffffffff000ULL;
for (l1 = 0; l1 < 512; l1++) {
cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8);
pml4e = le64_to_cpu(pml4e);
if (pml4e & PG_PRESENT_MASK) {
pdp_addr = pml4e & 0x3fffffffff000ULL;
for (l2 = 0; l2 < 512; l2++) {
cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
pdpe = le64_to_cpu(pdpe);
if (pdpe & PG_PRESENT_MASK) {
if (pdpe & PG_PSE_MASK) {
/* 1G pages, CR4.PSE is ignored */
print_pte(mon, (l1 << 39) + (l2 << 30), pdpe,
0x3ffffc0000000ULL);
} else {
pd_addr = pdpe & 0x3fffffffff000ULL;
for (l3 = 0; l3 < 512; l3++) {
cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
pde = le64_to_cpu(pde);
if (pde & PG_PRESENT_MASK) {
if (pde & PG_PSE_MASK) {
/* 2M pages, CR4.PSE is ignored */
print_pte(mon, (l1 << 39) + (l2 << 30) +
(l3 << 21), pde,
0x3ffffffe00000ULL);
} else {
pt_addr = pde & 0x3fffffffff000ULL;
for (l4 = 0; l4 < 512; l4++) {
cpu_physical_memory_read(pt_addr
+ l4 * 8,
&pte, 8);
pte = le64_to_cpu(pte);
if (pte & PG_PRESENT_MASK) {
print_pte(mon, (l1 << 39) +
(l2 << 30) +
(l3 << 21) + (l4 << 12),
pte & ~PG_PSE_MASK,
0x3fffffffff000ULL);
}
}
}
}
}
if (!(pml4e & PG_PRESENT_MASK)) {
continue;
}
pdp_addr = pml4e & 0x3fffffffff000ULL;
for (l2 = 0; l2 < 512; l2++) {
cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
pdpe = le64_to_cpu(pdpe);
if (!(pdpe & PG_PRESENT_MASK)) {
continue;
}
if (pdpe & PG_PSE_MASK) {
/* 1G pages, CR4.PSE is ignored */
print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30),
pdpe, 0x3ffffc0000000ULL);
continue;
}
pd_addr = pdpe & 0x3fffffffff000ULL;
for (l3 = 0; l3 < 512; l3++) {
cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
pde = le64_to_cpu(pde);
if (!(pde & PG_PRESENT_MASK)) {
continue;
}
if (pde & PG_PSE_MASK) {
/* 2M pages, CR4.PSE is ignored */
print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30) +
(l3 << 21), pde, 0x3ffffffe00000ULL);
continue;
}
pt_addr = pde & 0x3fffffffff000ULL;
for (l4 = 0; l4 < 512; l4++) {
cpu_physical_memory_read(pt_addr
+ l4 * 8,
&pte, 8);
pte = le64_to_cpu(pte);
if (pte & PG_PRESENT_MASK) {
print_pte(mon, env, (l0 << 48) + (l1 << 39) +
(l2 << 30) + (l3 << 21) + (l4 << 12),
pte & ~PG_PSE_MASK, 0x3fffffffff000ULL);
}
}
}
}
}
}
static void tlb_info_la57(Monitor *mon, CPUArchState *env)
{
uint64_t l0;
uint64_t pml5e;
uint64_t pml5_addr;
pml5_addr = env->cr[3] & 0x3fffffffff000ULL;
for (l0 = 0; l0 < 512; l0++) {
cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8);
pml5e = le64_to_cpu(pml5e);
if (pml5e & PG_PRESENT_MASK) {
tlb_info_la48(mon, env, l0, pml5e & 0x3fffffffff000ULL);
}
}
}
#endif /* TARGET_X86_64 */
void hmp_info_tlb(Monitor *mon, const QDict *qdict)
@ -192,7 +218,11 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict)
if (env->cr[4] & CR4_PAE_MASK) {
#ifdef TARGET_X86_64
if (env->hflags & HF_LMA_MASK) {
tlb_info_64(mon, env);
if (env->cr[4] & CR4_LA57_MASK) {
tlb_info_la57(mon, env);
} else {
tlb_info_la48(mon, env, 0, env->cr[3] & 0x3fffffffff000ULL);
}
} else
#endif
{
@ -324,7 +354,7 @@ static void mem_info_pae32(Monitor *mon, CPUArchState *env)
#ifdef TARGET_X86_64
static void mem_info_64(Monitor *mon, CPUArchState *env)
static void mem_info_la48(Monitor *mon, CPUArchState *env)
{
int prot, last_prot;
uint64_t l1, l2, l3, l4;
@ -400,6 +430,98 @@ static void mem_info_64(Monitor *mon, CPUArchState *env)
/* Flush last range */
mem_print(mon, &start, &last_prot, (hwaddr)1 << 48, 0);
}
static void mem_info_la57(Monitor *mon, CPUArchState *env)
{
int prot, last_prot;
uint64_t l0, l1, l2, l3, l4;
uint64_t pml5e, pml4e, pdpe, pde, pte;
uint64_t pml5_addr, pml4_addr, pdp_addr, pd_addr, pt_addr, start, end;
pml5_addr = env->cr[3] & 0x3fffffffff000ULL;
last_prot = 0;
start = -1;
for (l0 = 0; l0 < 512; l0++) {
cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8);
pml4e = le64_to_cpu(pml5e);
end = l0 << 48;
if (!(pml5e & PG_PRESENT_MASK)) {
prot = 0;
mem_print(mon, &start, &last_prot, end, prot);
continue;
}
pml4_addr = pml5e & 0x3fffffffff000ULL;
for (l1 = 0; l1 < 512; l1++) {
cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8);
pml4e = le64_to_cpu(pml4e);
end = (l0 << 48) + (l1 << 39);
if (!(pml4e & PG_PRESENT_MASK)) {
prot = 0;
mem_print(mon, &start, &last_prot, end, prot);
continue;
}
pdp_addr = pml4e & 0x3fffffffff000ULL;
for (l2 = 0; l2 < 512; l2++) {
cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
pdpe = le64_to_cpu(pdpe);
end = (l0 << 48) + (l1 << 39) + (l2 << 30);
if (pdpe & PG_PRESENT_MASK) {
prot = 0;
mem_print(mon, &start, &last_prot, end, prot);
continue;
}
if (pdpe & PG_PSE_MASK) {
prot = pdpe & (PG_USER_MASK | PG_RW_MASK |
PG_PRESENT_MASK);
prot &= pml4e;
mem_print(mon, &start, &last_prot, end, prot);
continue;
}
pd_addr = pdpe & 0x3fffffffff000ULL;
for (l3 = 0; l3 < 512; l3++) {
cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
pde = le64_to_cpu(pde);
end = (l0 << 48) + (l1 << 39) + (l2 << 30) + (l3 << 21);
if (pde & PG_PRESENT_MASK) {
prot = 0;
mem_print(mon, &start, &last_prot, end, prot);
continue;
}
if (pde & PG_PSE_MASK) {
prot = pde & (PG_USER_MASK | PG_RW_MASK |
PG_PRESENT_MASK);
prot &= pml4e & pdpe;
mem_print(mon, &start, &last_prot, end, prot);
continue;
}
pt_addr = pde & 0x3fffffffff000ULL;
for (l4 = 0; l4 < 512; l4++) {
cpu_physical_memory_read(pt_addr + l4 * 8, &pte, 8);
pte = le64_to_cpu(pte);
end = (l0 << 48) + (l1 << 39) + (l2 << 30) +
(l3 << 21) + (l4 << 12);
if (pte & PG_PRESENT_MASK) {
prot = pte & (PG_USER_MASK | PG_RW_MASK |
PG_PRESENT_MASK);
prot &= pml4e & pdpe & pde;
} else {
prot = 0;
}
mem_print(mon, &start, &last_prot, end, prot);
}
}
}
}
}
/* Flush last range */
mem_print(mon, &start, &last_prot, (hwaddr)1 << 57, 0);
}
#endif /* TARGET_X86_64 */
void hmp_info_mem(Monitor *mon, const QDict *qdict)
@ -415,7 +537,11 @@ void hmp_info_mem(Monitor *mon, const QDict *qdict)
if (env->cr[4] & CR4_PAE_MASK) {
#ifdef TARGET_X86_64
if (env->hflags & HF_LMA_MASK) {
mem_info_64(mon, env);
if (env->cr[4] & CR4_LA57_MASK) {
mem_info_la57(mon, env);
} else {
mem_info_la48(mon, env);
}
} else
#endif
{