qemu/target/i386/tcg/sysemu/excp_helper.c
Stephen Michael Jothen 9f9dcb96a4 target/i386/tcg: Fix masking of real-mode addresses with A20 bit
The correct A20 masking is done if paging is enabled (protected mode) but it
seems to have been forgotten in real mode. For example from the AMD64 APM Vol. 2
section 1.2.4:

> If the sum of the segment base and effective address carries over into bit 20,
> that bit can be optionally truncated to mimic the 20-bit address wrapping of the
> 8086 processor by using the A20M# input signal to mask the A20 address bit.

Most BIOSes will enable the A20 line on boot, but I found by disabling the A20 line
afterwards, the correct wrapping wasn't taking place.

`handle_mmu_fault' in target/i386/tcg/sysemu/excp_helper.c seems to be the culprit.
In real mode, it fills the TLB with the raw unmasked address. However, for the
protected mode, the `mmu_translate' function does the correct A20 masking.

The fix then should be to just apply the A20 mask in the first branch of the if
statement.

Signed-off-by: Stephen Michael Jothen <sjothen@gmail.com>
Message-Id: <Yo5MUMSz80jXtvt9@air-old.local>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-06-06 09:26:53 +02:00

441 lines
14 KiB
C

/*
* x86 exception helpers - sysemu code
*
* Copyright (c) 2003 Fabrice Bellard
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/exec-all.h"
#include "tcg/helper-tcg.h"
#define PG_ERROR_OK (-1)
typedef hwaddr (*MMUTranslateFunc)(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
int *prot);
#define GET_HPHYS(cs, gpa, access_type, prot) \
(get_hphys_func ? get_hphys_func(cs, gpa, access_type, prot) : gpa)
static int mmu_translate(CPUState *cs, hwaddr addr, MMUTranslateFunc get_hphys_func,
uint64_t cr3, int is_write1, int mmu_idx, int pg_mode,
hwaddr *xlat, int *page_size, int *prot)
{
X86CPU *cpu = X86_CPU(cs);
CPUX86State *env = &cpu->env;
uint64_t ptep, pte;
int32_t a20_mask;
target_ulong pde_addr, pte_addr;
int error_code = 0;
int is_dirty, is_write, is_user;
uint64_t rsvd_mask = PG_ADDRESS_MASK & ~MAKE_64BIT_MASK(0, cpu->phys_bits);
uint32_t page_offset;
uint32_t pkr;
is_user = (mmu_idx == MMU_USER_IDX);
is_write = is_write1 & 1;
a20_mask = x86_get_a20_mask(env);
if (!(pg_mode & PG_MODE_NXE)) {
rsvd_mask |= PG_NX_MASK;
}
if (pg_mode & PG_MODE_PAE) {
uint64_t pde, pdpe;
target_ulong pdpe_addr;
#ifdef TARGET_X86_64
if (pg_mode & PG_MODE_LMA) {
bool la57 = pg_mode & PG_MODE_LA57;
uint64_t pml5e_addr, pml5e;
uint64_t pml4e_addr, pml4e;
if (la57) {
pml5e_addr = ((cr3 & ~0xfff) +
(((addr >> 48) & 0x1ff) << 3)) & a20_mask;
pml5e_addr = GET_HPHYS(cs, pml5e_addr, MMU_DATA_STORE, NULL);
pml5e = x86_ldq_phys(cs, pml5e_addr);
if (!(pml5e & PG_PRESENT_MASK)) {
goto do_fault;
}
if (pml5e & (rsvd_mask | PG_PSE_MASK)) {
goto do_fault_rsvd;
}
if (!(pml5e & PG_ACCESSED_MASK)) {
pml5e |= PG_ACCESSED_MASK;
x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
}
ptep = pml5e ^ PG_NX_MASK;
} else {
pml5e = cr3;
ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
}
pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
(((addr >> 39) & 0x1ff) << 3)) & a20_mask;
pml4e_addr = GET_HPHYS(cs, pml4e_addr, MMU_DATA_STORE, NULL);
pml4e = x86_ldq_phys(cs, pml4e_addr);
if (!(pml4e & PG_PRESENT_MASK)) {
goto do_fault;
}
if (pml4e & (rsvd_mask | PG_PSE_MASK)) {
goto do_fault_rsvd;
}
if (!(pml4e & PG_ACCESSED_MASK)) {
pml4e |= PG_ACCESSED_MASK;
x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
}
ptep &= pml4e ^ PG_NX_MASK;
pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
a20_mask;
pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
pdpe = x86_ldq_phys(cs, pdpe_addr);
if (!(pdpe & PG_PRESENT_MASK)) {
goto do_fault;
}
if (pdpe & rsvd_mask) {
goto do_fault_rsvd;
}
ptep &= pdpe ^ PG_NX_MASK;
if (!(pdpe & PG_ACCESSED_MASK)) {
pdpe |= PG_ACCESSED_MASK;
x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
}
if (pdpe & PG_PSE_MASK) {
/* 1 GB page */
*page_size = 1024 * 1024 * 1024;
pte_addr = pdpe_addr;
pte = pdpe;
goto do_check_protect;
}
} else
#endif
{
/* XXX: load them when cr3 is loaded ? */
pdpe_addr = ((cr3 & ~0x1f) + ((addr >> 27) & 0x18)) &
a20_mask;
pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
pdpe = x86_ldq_phys(cs, pdpe_addr);
if (!(pdpe & PG_PRESENT_MASK)) {
goto do_fault;
}
rsvd_mask |= PG_HI_USER_MASK;
if (pdpe & (rsvd_mask | PG_NX_MASK)) {
goto do_fault_rsvd;
}
ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
}
pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
a20_mask;
pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
pde = x86_ldq_phys(cs, pde_addr);
if (!(pde & PG_PRESENT_MASK)) {
goto do_fault;
}
if (pde & rsvd_mask) {
goto do_fault_rsvd;
}
ptep &= pde ^ PG_NX_MASK;
if (pde & PG_PSE_MASK) {
/* 2 MB page */
*page_size = 2048 * 1024;
pte_addr = pde_addr;
pte = pde;
goto do_check_protect;
}
/* 4 KB page */
if (!(pde & PG_ACCESSED_MASK)) {
pde |= PG_ACCESSED_MASK;
x86_stl_phys_notdirty(cs, pde_addr, pde);
}
pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
a20_mask;
pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
pte = x86_ldq_phys(cs, pte_addr);
if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
if (pte & rsvd_mask) {
goto do_fault_rsvd;
}
/* combine pde and pte nx, user and rw protections */
ptep &= pte ^ PG_NX_MASK;
*page_size = 4096;
} else {
uint32_t pde;
/* page directory entry */
pde_addr = ((cr3 & ~0xfff) + ((addr >> 20) & 0xffc)) &
a20_mask;
pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
pde = x86_ldl_phys(cs, pde_addr);
if (!(pde & PG_PRESENT_MASK)) {
goto do_fault;
}
ptep = pde | PG_NX_MASK;
/* if PSE bit is set, then we use a 4MB page */
if ((pde & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) {
*page_size = 4096 * 1024;
pte_addr = pde_addr;
/* Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
* Leave bits 20-13 in place for setting accessed/dirty bits below.
*/
pte = pde | ((pde & 0x1fe000LL) << (32 - 13));
rsvd_mask = 0x200000;
goto do_check_protect_pse36;
}
if (!(pde & PG_ACCESSED_MASK)) {
pde |= PG_ACCESSED_MASK;
x86_stl_phys_notdirty(cs, pde_addr, pde);
}
/* page directory entry */
pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
a20_mask;
pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
pte = x86_ldl_phys(cs, pte_addr);
if (!(pte & PG_PRESENT_MASK)) {
goto do_fault;
}
/* combine pde and pte user and rw protections */
ptep &= pte | PG_NX_MASK;
*page_size = 4096;
rsvd_mask = 0;
}
do_check_protect:
rsvd_mask |= (*page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
do_check_protect_pse36:
if (pte & rsvd_mask) {
goto do_fault_rsvd;
}
ptep ^= PG_NX_MASK;
/* can the page can be put in the TLB? prot will tell us */
if (is_user && !(ptep & PG_USER_MASK)) {
goto do_fault_protect;
}
*prot = 0;
if (mmu_idx != MMU_KSMAP_IDX || !(ptep & PG_USER_MASK)) {
*prot |= PAGE_READ;
if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) {
*prot |= PAGE_WRITE;
}
}
if (!(ptep & PG_NX_MASK) &&
(mmu_idx == MMU_USER_IDX ||
!((pg_mode & PG_MODE_SMEP) && (ptep & PG_USER_MASK)))) {
*prot |= PAGE_EXEC;
}
if (ptep & PG_USER_MASK) {
pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0;
} else {
pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0;
}
if (pkr) {
uint32_t pk = (pte & PG_PKRU_MASK) >> PG_PKRU_BIT;
uint32_t pkr_ad = (pkr >> pk * 2) & 1;
uint32_t pkr_wd = (pkr >> pk * 2) & 2;
uint32_t pkr_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
if (pkr_ad) {
pkr_prot &= ~(PAGE_READ | PAGE_WRITE);
} else if (pkr_wd && (is_user || (pg_mode & PG_MODE_WP))) {
pkr_prot &= ~PAGE_WRITE;
}
*prot &= pkr_prot;
if ((pkr_prot & (1 << is_write1)) == 0) {
assert(is_write1 != 2);
error_code |= PG_ERROR_PK_MASK;
goto do_fault_protect;
}
}
if ((*prot & (1 << is_write1)) == 0) {
goto do_fault_protect;
}
/* yes, it can! */
is_dirty = is_write && !(pte & PG_DIRTY_MASK);
if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
pte |= PG_ACCESSED_MASK;
if (is_dirty) {
pte |= PG_DIRTY_MASK;
}
x86_stl_phys_notdirty(cs, pte_addr, pte);
}
if (!(pte & PG_DIRTY_MASK)) {
/* only set write access if already dirty... otherwise wait
for dirty access */
assert(!is_write);
*prot &= ~PAGE_WRITE;
}
pte = pte & a20_mask;
/* align to page_size */
pte &= PG_ADDRESS_MASK & ~(*page_size - 1);
page_offset = addr & (*page_size - 1);
*xlat = GET_HPHYS(cs, pte + page_offset, is_write1, prot);
return PG_ERROR_OK;
do_fault_rsvd:
error_code |= PG_ERROR_RSVD_MASK;
do_fault_protect:
error_code |= PG_ERROR_P_MASK;
do_fault:
error_code |= (is_write << PG_ERROR_W_BIT);
if (is_user)
error_code |= PG_ERROR_U_MASK;
if (is_write1 == 2 &&
((pg_mode & PG_MODE_NXE) || (pg_mode & PG_MODE_SMEP)))
error_code |= PG_ERROR_I_D_MASK;
return error_code;
}
hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
int *prot)
{
CPUX86State *env = &X86_CPU(cs)->env;
uint64_t exit_info_1;
int page_size;
int next_prot;
hwaddr hphys;
if (likely(!(env->hflags2 & HF2_NPT_MASK))) {
return gphys;
}
exit_info_1 = mmu_translate(cs, gphys, NULL, env->nested_cr3,
access_type, MMU_USER_IDX, env->nested_pg_mode,
&hphys, &page_size, &next_prot);
if (exit_info_1 == PG_ERROR_OK) {
if (prot) {
*prot &= next_prot;
}
return hphys;
}
x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
gphys);
if (prot) {
exit_info_1 |= SVM_NPTEXIT_GPA;
} else { /* page table access */
exit_info_1 |= SVM_NPTEXIT_GPT;
}
cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, env->retaddr);
}
/* return value:
* -1 = cannot handle fault
* 0 = nothing more to do
* 1 = generate PF fault
*/
static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
int is_write1, int mmu_idx)
{
X86CPU *cpu = X86_CPU(cs);
CPUX86State *env = &cpu->env;
int error_code = PG_ERROR_OK;
int pg_mode, prot, page_size;
int32_t a20_mask;
hwaddr paddr;
hwaddr vaddr;
#if defined(DEBUG_MMU)
printf("MMU fault: addr=%" VADDR_PRIx " w=%d mmu=%d eip=" TARGET_FMT_lx "\n",
addr, is_write1, mmu_idx, env->eip);
#endif
if (!(env->cr[0] & CR0_PG_MASK)) {
a20_mask = x86_get_a20_mask(env);
paddr = addr & a20_mask;
#ifdef TARGET_X86_64
if (!(env->hflags & HF_LMA_MASK)) {
/* Without long mode we can only address 32bits in real mode */
paddr = (uint32_t)paddr;
}
#endif
prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
page_size = 4096;
} else {
pg_mode = get_pg_mode(env);
if (pg_mode & PG_MODE_LMA) {
int32_t sext;
/* test virtual address sign extension */
sext = (int64_t)addr >> (pg_mode & PG_MODE_LA57 ? 56 : 47);
if (sext != 0 && sext != -1) {
env->error_code = 0;
cs->exception_index = EXCP0D_GPF;
return 1;
}
}
error_code = mmu_translate(cs, addr, get_hphys, env->cr[3], is_write1,
mmu_idx, pg_mode,
&paddr, &page_size, &prot);
}
if (error_code == PG_ERROR_OK) {
/* Even if 4MB pages, we map only one 4KB page in the cache to
avoid filling it too fast */
vaddr = addr & TARGET_PAGE_MASK;
paddr &= TARGET_PAGE_MASK;
assert(prot & (1 << is_write1));
tlb_set_page_with_attrs(cs, vaddr, paddr, cpu_get_mem_attrs(env),
prot, mmu_idx, page_size);
return 0;
} else {
if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
/* cr2 is not modified in case of exceptions */
x86_stq_phys(cs,
env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
addr);
} else {
env->cr[2] = addr;
}
env->error_code = error_code;
cs->exception_index = EXCP0E_PAGE;
return 1;
}
}
bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr)
{
X86CPU *cpu = X86_CPU(cs);
CPUX86State *env = &cpu->env;
env->retaddr = retaddr;
if (handle_mmu_fault(cs, addr, size, access_type, mmu_idx)) {
/* FIXME: On error in get_hphys we have already jumped out. */
g_assert(!probe);
raise_exception_err_ra(env, cs->exception_index,
env->error_code, retaddr);
}
return true;
}