system/physmem: handle hugetlb correctly in qemu_ram_remap()

The list of hwpoison pages used to remap the memory on reset
is based on the backend real page size.
To correctly handle hugetlb, we must mmap(MAP_FIXED) a complete
hugetlb page; hugetlb pages cannot be partially mapped.

Signed-off-by: William Roche <william.roche@oracle.com>
Co-developed-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Link: https://lore.kernel.org/r/20250211212707.302391-2-william.roche@oracle.com
Signed-off-by: Peter Xu <peterx@redhat.com>
This commit is contained in:
William Roche 2025-02-11 21:27:05 +00:00 committed by Peter Xu
parent 1cceedd772
commit c1cda1c5f8
3 changed files with 31 additions and 11 deletions

View file

@ -1288,7 +1288,7 @@ static void kvm_unpoison_all(void *param)
QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) { QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
QLIST_REMOVE(page, list); QLIST_REMOVE(page, list);
qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE); qemu_ram_remap(page->ram_addr);
g_free(page); g_free(page);
} }
} }

View file

@ -67,7 +67,7 @@ typedef uintptr_t ram_addr_t;
/* memory API */ /* memory API */
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); void qemu_ram_remap(ram_addr_t addr);
/* This should not be used by devices. */ /* This should not be used by devices. */
ram_addr_t qemu_ram_addr_from_host(void *ptr); ram_addr_t qemu_ram_addr_from_host(void *ptr);
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr); ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);

View file

@ -2275,17 +2275,35 @@ void qemu_ram_free(RAMBlock *block)
} }
#ifndef _WIN32 #ifndef _WIN32
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) /*
* qemu_ram_remap - remap a single RAM page
*
* @addr: address in ram_addr_t address space.
*
* This function will try remapping a single page of guest RAM identified by
* @addr, essentially discarding memory to recover from previously poisoned
* memory (MCE). The page size depends on the RAMBlock (i.e., hugetlb). @addr
* does not have to point at the start of the page.
*
* This function is only to be used during system resets; it will kill the
* VM if remapping failed.
*/
void qemu_ram_remap(ram_addr_t addr)
{ {
RAMBlock *block; RAMBlock *block;
ram_addr_t offset; uint64_t offset;
int flags; int flags;
void *area, *vaddr; void *area, *vaddr;
int prot; int prot;
size_t page_size;
RAMBLOCK_FOREACH(block) { RAMBLOCK_FOREACH(block) {
offset = addr - block->offset; offset = addr - block->offset;
if (offset < block->max_length) { if (offset < block->max_length) {
/* Respect the pagesize of our RAMBlock */
page_size = qemu_ram_pagesize(block);
offset = QEMU_ALIGN_DOWN(offset, page_size);
vaddr = ramblock_ptr(block, offset); vaddr = ramblock_ptr(block, offset);
if (block->flags & RAM_PREALLOC) { if (block->flags & RAM_PREALLOC) {
; ;
@ -2299,21 +2317,23 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
prot = PROT_READ; prot = PROT_READ;
prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE; prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
if (block->fd >= 0) { if (block->fd >= 0) {
area = mmap(vaddr, length, prot, flags, block->fd, area = mmap(vaddr, page_size, prot, flags, block->fd,
offset + block->fd_offset); offset + block->fd_offset);
} else { } else {
flags |= MAP_ANONYMOUS; flags |= MAP_ANONYMOUS;
area = mmap(vaddr, length, prot, flags, -1, 0); area = mmap(vaddr, page_size, prot, flags, -1, 0);
} }
if (area != vaddr) { if (area != vaddr) {
error_report("Could not remap addr: " error_report("Could not remap RAM %s:%" PRIx64 "+%" PRIx64
RAM_ADDR_FMT "@" RAM_ADDR_FMT "", " +%zx", block->idstr, offset,
length, addr); block->fd_offset, page_size);
exit(1); exit(1);
} }
memory_try_enable_merging(vaddr, length); memory_try_enable_merging(vaddr, page_size);
qemu_ram_setup_dump(vaddr, length); qemu_ram_setup_dump(vaddr, page_size);
} }
break;
} }
} }
} }