system/physmem: handle hugetlb correctly in qemu_ram_remap()

The list of hwpoison pages used to remap the memory on reset
is based on the backend real page size.
To correctly handle hugetlb, we must mmap(MAP_FIXED) a complete
hugetlb page; hugetlb pages cannot be partially mapped.

Signed-off-by: William Roche <william.roche@oracle.com>
Co-developed-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Link: https://lore.kernel.org/r/20250211212707.302391-2-william.roche@oracle.com
Signed-off-by: Peter Xu <peterx@redhat.com>
This commit is contained in:
William Roche 2025-02-11 21:27:05 +00:00 committed by Peter Xu
parent 1cceedd772
commit c1cda1c5f8
3 changed files with 31 additions and 11 deletions

View file

@ -1288,7 +1288,7 @@ static void kvm_unpoison_all(void *param)
QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
QLIST_REMOVE(page, list);
qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
qemu_ram_remap(page->ram_addr);
g_free(page);
}
}

View file

@ -67,7 +67,7 @@ typedef uintptr_t ram_addr_t;
/* memory API */
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
void qemu_ram_remap(ram_addr_t addr);
/* This should not be used by devices. */
ram_addr_t qemu_ram_addr_from_host(void *ptr);
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);

View file

@ -2275,17 +2275,35 @@ void qemu_ram_free(RAMBlock *block)
}
#ifndef _WIN32
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
/*
* qemu_ram_remap - remap a single RAM page
*
* @addr: address in ram_addr_t address space.
*
* This function will try remapping a single page of guest RAM identified by
* @addr, essentially discarding memory to recover from previously poisoned
* memory (MCE). The page size depends on the RAMBlock (i.e., hugetlb). @addr
* does not have to point at the start of the page.
*
* This function is only to be used during system resets; it will kill the
* VM if remapping failed.
*/
void qemu_ram_remap(ram_addr_t addr)
{
RAMBlock *block;
ram_addr_t offset;
uint64_t offset;
int flags;
void *area, *vaddr;
int prot;
size_t page_size;
RAMBLOCK_FOREACH(block) {
offset = addr - block->offset;
if (offset < block->max_length) {
/* Respect the pagesize of our RAMBlock */
page_size = qemu_ram_pagesize(block);
offset = QEMU_ALIGN_DOWN(offset, page_size);
vaddr = ramblock_ptr(block, offset);
if (block->flags & RAM_PREALLOC) {
;
@ -2299,21 +2317,23 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
prot = PROT_READ;
prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
if (block->fd >= 0) {
area = mmap(vaddr, length, prot, flags, block->fd,
area = mmap(vaddr, page_size, prot, flags, block->fd,
offset + block->fd_offset);
} else {
flags |= MAP_ANONYMOUS;
area = mmap(vaddr, length, prot, flags, -1, 0);
area = mmap(vaddr, page_size, prot, flags, -1, 0);
}
if (area != vaddr) {
error_report("Could not remap addr: "
RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
length, addr);
error_report("Could not remap RAM %s:%" PRIx64 "+%" PRIx64
" +%zx", block->idstr, offset,
block->fd_offset, page_size);
exit(1);
}
memory_try_enable_merging(vaddr, length);
qemu_ram_setup_dump(vaddr, length);
memory_try_enable_merging(vaddr, page_size);
qemu_ram_setup_dump(vaddr, page_size);
}
break;
}
}
}