mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-03 07:43:54 -06:00
Memory pull request for 10.0
v2 changelog: - Fix Mac (and possibly some other) build issues for two patches - os: add an ability to lock memory on_fault - memory: pass MemTxAttrs to memory_access_is_direct() List of features: - William's fix on ram hole punching when with file offset - Daniil's patchset to introduce mem-lock=on-fault - William's hugetlb hwpoison fix for size report & remap - David's series to allow qemu debug writes to MMIOs -----BEGIN PGP SIGNATURE----- iIgEABYKADAWIQS5GE3CDMRX2s990ak7X8zN86vXBgUCZ6zcQBIccGV0ZXJ4QHJl ZGhhdC5jb20ACgkQO1/MzfOr1wbL3wEAqx94NpB/tEEBj6WXE3uV9LqQ0GCTYmV+ MbM51Vep8ksA/35yFn3ltM2yoSnUf9WJW6LXEEKhQlwswI0vChQERgkE =++O1 -----END PGP SIGNATURE----- Merge tag 'mem-next-pull-request' of https://gitlab.com/peterx/qemu into staging Memory pull request for 10.0 v2 changelog: - Fix Mac (and possibly some other) build issues for two patches - os: add an ability to lock memory on_fault - memory: pass MemTxAttrs to memory_access_is_direct() List of features: - William's fix on ram hole punching when with file offset - Daniil's patchset to introduce mem-lock=on-fault - William's hugetlb hwpoison fix for size report & remap - David's series to allow qemu debug writes to MMIOs # -----BEGIN PGP SIGNATURE----- # # iIgEABYKADAWIQS5GE3CDMRX2s990ak7X8zN86vXBgUCZ6zcQBIccGV0ZXJ4QHJl # ZGhhdC5jb20ACgkQO1/MzfOr1wbL3wEAqx94NpB/tEEBj6WXE3uV9LqQ0GCTYmV+ # MbM51Vep8ksA/35yFn3ltM2yoSnUf9WJW6LXEEKhQlwswI0vChQERgkE # =++O1 # -----END PGP SIGNATURE----- # gpg: Signature made Thu 13 Feb 2025 01:37:04 HKT # gpg: using EDDSA key B9184DC20CC457DACF7DD1A93B5FCCCDF3ABD706 # gpg: issuer "peterx@redhat.com" # gpg: Good signature from "Peter Xu <xzpeter@gmail.com>" [full] # gpg: aka "Peter Xu <peterx@redhat.com>" [full] # Primary key fingerprint: B918 4DC2 0CC4 57DA CF7D D1A9 3B5F CCCD F3AB D706 * tag 'mem-next-pull-request' of https://gitlab.com/peterx/qemu: overcommit: introduce mem-lock=on-fault system: introduce a new MlockState enum system/vl: extract overcommit option parsing into a helper os: add an ability to lock memory on_fault system/physmem: poisoned memory discard on reboot system/physmem: handle hugetlb correctly in qemu_ram_remap() physmem: teach cpu_memory_rw_debug() to write to more memory regions hmp: use cpu_get_phys_page_debug() in hmp_gva2gpa() memory: pass MemTxAttrs to memory_access_is_direct() physmem: disallow direct access to RAM DEVICE in address_space_write_rom() physmem: factor out direct access check into memory_region_supports_direct_access() physmem: factor out RAM/ROMD check in memory_access_is_direct() physmem: factor out memory_region_is_ram_device() check in memory_access_is_direct() system/physmem: take into account fd_offset for file fallocate Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
7389992c84
21 changed files with 229 additions and 98 deletions
|
@ -1288,7 +1288,7 @@ static void kvm_unpoison_all(void *param)
|
|||
|
||||
QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
|
||||
QLIST_REMOVE(page, list);
|
||||
qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
|
||||
qemu_ram_remap(page->ram_addr);
|
||||
g_free(page);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,13 +51,18 @@ hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
|
|||
MemTxAttrs *attrs)
|
||||
{
|
||||
CPUClass *cc = CPU_GET_CLASS(cpu);
|
||||
hwaddr paddr;
|
||||
|
||||
if (cc->sysemu_ops->get_phys_page_attrs_debug) {
|
||||
return cc->sysemu_ops->get_phys_page_attrs_debug(cpu, addr, attrs);
|
||||
paddr = cc->sysemu_ops->get_phys_page_attrs_debug(cpu, addr, attrs);
|
||||
} else {
|
||||
/* Fallback for CPUs which don't implement the _attrs_ hook */
|
||||
*attrs = MEMTXATTRS_UNSPECIFIED;
|
||||
paddr = cc->sysemu_ops->get_phys_page_debug(cpu, addr);
|
||||
}
|
||||
/* Fallback for CPUs which don't implement the _attrs_ hook */
|
||||
*attrs = MEMTXATTRS_UNSPECIFIED;
|
||||
return cc->sysemu_ops->get_phys_page_debug(cpu, addr);
|
||||
/* Indicate that this is a debug access. */
|
||||
attrs->debug = 1;
|
||||
return paddr;
|
||||
}
|
||||
|
||||
hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr)
|
||||
|
|
|
@ -144,7 +144,7 @@ ssize_t load_image_mr(const char *filename, MemoryRegion *mr)
|
|||
{
|
||||
ssize_t size;
|
||||
|
||||
if (!memory_access_is_direct(mr, false)) {
|
||||
if (!memory_access_is_direct(mr, false, MEMTXATTRS_UNSPECIFIED)) {
|
||||
/* Can only load an image into RAM or ROM */
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -137,7 +137,8 @@ void *apple_gfx_host_ptr_for_gpa_range(uint64_t guest_physical,
|
|||
MEMTXATTRS_UNSPECIFIED);
|
||||
|
||||
if (!ram_region || ram_region_length < length ||
|
||||
!memory_access_is_direct(ram_region, !read_only)) {
|
||||
!memory_access_is_direct(ram_region, !read_only,
|
||||
MEMTXATTRS_UNSPECIFIED)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -358,7 +358,7 @@ static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset,
|
|||
int access_size;
|
||||
uint64_t val;
|
||||
|
||||
if (memory_access_is_direct(mr, is_write)) {
|
||||
if (memory_access_is_direct(mr, is_write, MEMTXATTRS_UNSPECIFIED)) {
|
||||
/**
|
||||
* Some devices expose a PCI expansion ROM, which could be buffer
|
||||
* based as compared to other regions which are primarily based on
|
||||
|
|
|
@ -991,7 +991,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
|||
return;
|
||||
}
|
||||
|
||||
if (enable_mlock) {
|
||||
if (should_mlock(mlock_state)) {
|
||||
error_setg(errp, "Incompatible with mlock");
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ typedef uintptr_t ram_addr_t;
|
|||
|
||||
/* memory API */
|
||||
|
||||
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
|
||||
void qemu_ram_remap(ram_addr_t addr);
|
||||
/* This should not be used by devices. */
|
||||
ram_addr_t qemu_ram_addr_from_host(void *ptr);
|
||||
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
|
||||
|
|
|
@ -44,6 +44,8 @@ typedef struct MemTxAttrs {
|
|||
* (see MEMTX_ACCESS_ERROR).
|
||||
*/
|
||||
unsigned int memory:1;
|
||||
/* Debug access that can even write to ROM. */
|
||||
unsigned int debug:1;
|
||||
/* Requester ID (for MSI for example) */
|
||||
unsigned int requester_id:16;
|
||||
|
||||
|
@ -56,7 +58,8 @@ typedef struct MemTxAttrs {
|
|||
* Bus masters which don't specify any attributes will get this
|
||||
* (via the MEMTXATTRS_UNSPECIFIED constant), so that we can
|
||||
* distinguish "all attributes deliberately clear" from
|
||||
* "didn't specify" if necessary.
|
||||
* "didn't specify" if necessary. "debug" can be set alongside
|
||||
* "unspecified".
|
||||
*/
|
||||
bool unspecified;
|
||||
|
||||
|
|
|
@ -2995,15 +2995,34 @@ MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache,
|
|||
int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr);
|
||||
bool prepare_mmio_access(MemoryRegion *mr);
|
||||
|
||||
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
|
||||
static inline bool memory_region_supports_direct_access(MemoryRegion *mr)
|
||||
{
|
||||
if (is_write) {
|
||||
return memory_region_is_ram(mr) && !mr->readonly &&
|
||||
!mr->rom_device && !memory_region_is_ram_device(mr);
|
||||
} else {
|
||||
return (memory_region_is_ram(mr) && !memory_region_is_ram_device(mr)) ||
|
||||
memory_region_is_romd(mr);
|
||||
/* ROM DEVICE regions only allow direct access if in ROMD mode. */
|
||||
if (memory_region_is_romd(mr)) {
|
||||
return true;
|
||||
}
|
||||
if (!memory_region_is_ram(mr)) {
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* RAM DEVICE regions can be accessed directly using memcpy, but it might
|
||||
* be MMIO and access using mempy can be wrong (e.g., using instructions not
|
||||
* intended for MMIO access). So we treat this as IO.
|
||||
*/
|
||||
return !memory_region_is_ram_device(mr);
|
||||
}
|
||||
|
||||
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write,
|
||||
MemTxAttrs attrs)
|
||||
{
|
||||
if (!memory_region_supports_direct_access(mr)) {
|
||||
return false;
|
||||
}
|
||||
/* Debug access can write to ROM. */
|
||||
if (is_write && !attrs.debug) {
|
||||
return !mr->readonly && !mr->rom_device;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3036,7 +3055,7 @@ MemTxResult address_space_read(AddressSpace *as, hwaddr addr,
|
|||
fv = address_space_to_flatview(as);
|
||||
l = len;
|
||||
mr = flatview_translate(fv, addr, &addr1, &l, false, attrs);
|
||||
if (len == l && memory_access_is_direct(mr, false)) {
|
||||
if (len == l && memory_access_is_direct(mr, false, attrs)) {
|
||||
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
|
||||
memcpy(buf, ptr, len);
|
||||
} else {
|
||||
|
|
|
@ -53,7 +53,7 @@ bool os_set_runas(const char *user_id);
|
|||
void os_set_chroot(const char *path);
|
||||
void os_setup_limits(void);
|
||||
void os_setup_post(void);
|
||||
int os_mlock(void);
|
||||
int os_mlock(bool on_fault);
|
||||
|
||||
/**
|
||||
* qemu_alloc_stack:
|
||||
|
|
|
@ -123,7 +123,7 @@ static inline bool is_daemonized(void)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline int os_mlock(void)
|
||||
static inline int os_mlock(bool on_fault G_GNUC_UNUSED)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
|
|
@ -44,10 +44,20 @@ extern int display_opengl;
|
|||
extern const char *keyboard_layout;
|
||||
extern int old_param;
|
||||
extern uint8_t *boot_splash_filedata;
|
||||
extern bool enable_mlock;
|
||||
extern bool enable_cpu_pm;
|
||||
extern QEMUClockType rtc_clock;
|
||||
|
||||
typedef enum {
|
||||
MLOCK_OFF = 0,
|
||||
MLOCK_ON,
|
||||
MLOCK_ON_FAULT,
|
||||
} MlockState;
|
||||
|
||||
bool should_mlock(MlockState);
|
||||
bool is_mlock_on_fault(MlockState);
|
||||
|
||||
extern MlockState mlock_state;
|
||||
|
||||
#define MAX_OPTION_ROMS 16
|
||||
typedef struct QEMUOptionRom {
|
||||
const char *name;
|
||||
|
|
|
@ -2885,6 +2885,12 @@ config_host_data.set('HAVE_MLOCKALL', cc.links(gnu_source_prefix + '''
|
|||
return mlockall(MCL_FUTURE);
|
||||
}'''))
|
||||
|
||||
config_host_data.set('HAVE_MLOCK_ONFAULT', cc.links(gnu_source_prefix + '''
|
||||
#include <sys/mman.h>
|
||||
int main(void) {
|
||||
return mlockall(MCL_FUTURE | MCL_ONFAULT);
|
||||
}'''))
|
||||
|
||||
have_l2tpv3 = false
|
||||
if get_option('l2tpv3').allowed() and have_system
|
||||
have_l2tpv3 = cc.has_type('struct mmsghdr',
|
||||
|
|
|
@ -651,8 +651,8 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
|
|||
mis->have_fault_thread = false;
|
||||
}
|
||||
|
||||
if (enable_mlock) {
|
||||
if (os_mlock() < 0) {
|
||||
if (should_mlock(mlock_state)) {
|
||||
if (os_mlock(is_mlock_on_fault(mlock_state)) < 0) {
|
||||
error_report("mlock: %s", strerror(errno));
|
||||
/*
|
||||
* It doesn't feel right to fail at this point, we have a valid
|
||||
|
|
|
@ -301,7 +301,6 @@ void hmp_gpa2hva(Monitor *mon, const QDict *qdict)
|
|||
void hmp_gva2gpa(Monitor *mon, const QDict *qdict)
|
||||
{
|
||||
target_ulong addr = qdict_get_int(qdict, "addr");
|
||||
MemTxAttrs attrs;
|
||||
CPUState *cs = mon_get_cpu(mon);
|
||||
hwaddr gpa;
|
||||
|
||||
|
@ -310,7 +309,7 @@ void hmp_gva2gpa(Monitor *mon, const QDict *qdict)
|
|||
return;
|
||||
}
|
||||
|
||||
gpa = cpu_get_phys_page_attrs_debug(cs, addr & TARGET_PAGE_MASK, &attrs);
|
||||
gpa = cpu_get_phys_page_debug(cs, addr & TARGET_PAGE_MASK);
|
||||
if (gpa == -1) {
|
||||
monitor_printf(mon, "Unmapped\n");
|
||||
} else {
|
||||
|
|
15
os-posix.c
15
os-posix.c
|
@ -327,18 +327,29 @@ void os_set_line_buffering(void)
|
|||
setvbuf(stdout, NULL, _IOLBF, 0);
|
||||
}
|
||||
|
||||
int os_mlock(void)
|
||||
int os_mlock(bool on_fault)
|
||||
{
|
||||
#ifdef HAVE_MLOCKALL
|
||||
int ret = 0;
|
||||
int flags = MCL_CURRENT | MCL_FUTURE;
|
||||
|
||||
ret = mlockall(MCL_CURRENT | MCL_FUTURE);
|
||||
if (on_fault) {
|
||||
#ifdef HAVE_MLOCK_ONFAULT
|
||||
flags |= MCL_ONFAULT;
|
||||
#else
|
||||
error_report("mlockall: on_fault not supported");
|
||||
return -EINVAL;
|
||||
#endif
|
||||
}
|
||||
|
||||
ret = mlockall(flags);
|
||||
if (ret < 0) {
|
||||
error_report("mlockall: %s", strerror(errno));
|
||||
}
|
||||
|
||||
return ret;
|
||||
#else
|
||||
(void)on_fault;
|
||||
return -ENOSYS;
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -4632,21 +4632,25 @@ SRST
|
|||
ERST
|
||||
|
||||
DEF("overcommit", HAS_ARG, QEMU_OPTION_overcommit,
|
||||
"-overcommit [mem-lock=on|off][cpu-pm=on|off]\n"
|
||||
"-overcommit [mem-lock=on|off|on-fault][cpu-pm=on|off]\n"
|
||||
" run qemu with overcommit hints\n"
|
||||
" mem-lock=on|off controls memory lock support (default: off)\n"
|
||||
" mem-lock=on|off|on-fault controls memory lock support (default: off)\n"
|
||||
" cpu-pm=on|off controls cpu power management (default: off)\n",
|
||||
QEMU_ARCH_ALL)
|
||||
SRST
|
||||
``-overcommit mem-lock=on|off``
|
||||
``-overcommit mem-lock=on|off|on-fault``
|
||||
\
|
||||
``-overcommit cpu-pm=on|off``
|
||||
Run qemu with hints about host resource overcommit. The default is
|
||||
to assume that host overcommits all resources.
|
||||
|
||||
Locking qemu and guest memory can be enabled via ``mem-lock=on``
|
||||
(disabled by default). This works when host memory is not
|
||||
overcommitted and reduces the worst-case latency for guest.
|
||||
or ``mem-lock=on-fault`` (disabled by default). This works when
|
||||
host memory is not overcommitted and reduces the worst-case latency for
|
||||
guest. The on-fault option is better for reducing the memory footprint
|
||||
since it makes allocations lazy, but the pages still get locked in place
|
||||
once faulted by the guest or QEMU. Note that the two options are mutually
|
||||
exclusive.
|
||||
|
||||
Guest ability to manage power state of host cpus (increasing latency
|
||||
for other processes on the same host cpu, but decreasing latency for
|
||||
|
|
|
@ -31,10 +31,20 @@
|
|||
#include "system/cpus.h"
|
||||
#include "system/system.h"
|
||||
|
||||
bool should_mlock(MlockState state)
|
||||
{
|
||||
return state == MLOCK_ON || state == MLOCK_ON_FAULT;
|
||||
}
|
||||
|
||||
bool is_mlock_on_fault(MlockState state)
|
||||
{
|
||||
return state == MLOCK_ON_FAULT;
|
||||
}
|
||||
|
||||
enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
|
||||
int display_opengl;
|
||||
const char* keyboard_layout;
|
||||
bool enable_mlock;
|
||||
MlockState mlock_state;
|
||||
bool enable_cpu_pm;
|
||||
int autostart = 1;
|
||||
int vga_interface_type = VGA_NONE;
|
||||
|
|
|
@ -34,7 +34,7 @@ static inline uint32_t glue(address_space_ldl_internal, SUFFIX)(ARG1_DECL,
|
|||
|
||||
RCU_READ_LOCK();
|
||||
mr = TRANSLATE(addr, &addr1, &l, false, attrs);
|
||||
if (l < 4 || !memory_access_is_direct(mr, false)) {
|
||||
if (l < 4 || !memory_access_is_direct(mr, false, attrs)) {
|
||||
release_lock |= prepare_mmio_access(mr);
|
||||
|
||||
/* I/O case */
|
||||
|
@ -103,7 +103,7 @@ static inline uint64_t glue(address_space_ldq_internal, SUFFIX)(ARG1_DECL,
|
|||
|
||||
RCU_READ_LOCK();
|
||||
mr = TRANSLATE(addr, &addr1, &l, false, attrs);
|
||||
if (l < 8 || !memory_access_is_direct(mr, false)) {
|
||||
if (l < 8 || !memory_access_is_direct(mr, false, attrs)) {
|
||||
release_lock |= prepare_mmio_access(mr);
|
||||
|
||||
/* I/O case */
|
||||
|
@ -170,7 +170,7 @@ uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
|
|||
|
||||
RCU_READ_LOCK();
|
||||
mr = TRANSLATE(addr, &addr1, &l, false, attrs);
|
||||
if (!memory_access_is_direct(mr, false)) {
|
||||
if (!memory_access_is_direct(mr, false, attrs)) {
|
||||
release_lock |= prepare_mmio_access(mr);
|
||||
|
||||
/* I/O case */
|
||||
|
@ -207,7 +207,7 @@ static inline uint16_t glue(address_space_lduw_internal, SUFFIX)(ARG1_DECL,
|
|||
|
||||
RCU_READ_LOCK();
|
||||
mr = TRANSLATE(addr, &addr1, &l, false, attrs);
|
||||
if (l < 2 || !memory_access_is_direct(mr, false)) {
|
||||
if (l < 2 || !memory_access_is_direct(mr, false, attrs)) {
|
||||
release_lock |= prepare_mmio_access(mr);
|
||||
|
||||
/* I/O case */
|
||||
|
@ -277,7 +277,7 @@ void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL,
|
|||
|
||||
RCU_READ_LOCK();
|
||||
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
|
||||
if (l < 4 || !memory_access_is_direct(mr, true)) {
|
||||
if (l < 4 || !memory_access_is_direct(mr, true, attrs)) {
|
||||
release_lock |= prepare_mmio_access(mr);
|
||||
|
||||
r = memory_region_dispatch_write(mr, addr1, val, MO_32, attrs);
|
||||
|
@ -314,7 +314,7 @@ static inline void glue(address_space_stl_internal, SUFFIX)(ARG1_DECL,
|
|||
|
||||
RCU_READ_LOCK();
|
||||
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
|
||||
if (l < 4 || !memory_access_is_direct(mr, true)) {
|
||||
if (l < 4 || !memory_access_is_direct(mr, true, attrs)) {
|
||||
release_lock |= prepare_mmio_access(mr);
|
||||
r = memory_region_dispatch_write(mr, addr1, val,
|
||||
MO_32 | devend_memop(endian), attrs);
|
||||
|
@ -377,7 +377,7 @@ void glue(address_space_stb, SUFFIX)(ARG1_DECL,
|
|||
|
||||
RCU_READ_LOCK();
|
||||
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
|
||||
if (!memory_access_is_direct(mr, true)) {
|
||||
if (!memory_access_is_direct(mr, true, attrs)) {
|
||||
release_lock |= prepare_mmio_access(mr);
|
||||
r = memory_region_dispatch_write(mr, addr1, val, MO_8, attrs);
|
||||
} else {
|
||||
|
@ -410,7 +410,7 @@ static inline void glue(address_space_stw_internal, SUFFIX)(ARG1_DECL,
|
|||
|
||||
RCU_READ_LOCK();
|
||||
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
|
||||
if (l < 2 || !memory_access_is_direct(mr, true)) {
|
||||
if (l < 2 || !memory_access_is_direct(mr, true, attrs)) {
|
||||
release_lock |= prepare_mmio_access(mr);
|
||||
r = memory_region_dispatch_write(mr, addr1, val,
|
||||
MO_16 | devend_memop(endian), attrs);
|
||||
|
@ -474,7 +474,7 @@ static void glue(address_space_stq_internal, SUFFIX)(ARG1_DECL,
|
|||
|
||||
RCU_READ_LOCK();
|
||||
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
|
||||
if (l < 8 || !memory_access_is_direct(mr, true)) {
|
||||
if (l < 8 || !memory_access_is_direct(mr, true, attrs)) {
|
||||
release_lock |= prepare_mmio_access(mr);
|
||||
r = memory_region_dispatch_write(mr, addr1, val,
|
||||
MO_64 | devend_memop(endian), attrs);
|
||||
|
|
121
system/physmem.c
121
system/physmem.c
|
@ -573,7 +573,7 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
|
|||
is_write, true, &as, attrs);
|
||||
mr = section.mr;
|
||||
|
||||
if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
|
||||
if (xen_enabled() && memory_access_is_direct(mr, is_write, attrs)) {
|
||||
hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
|
||||
*plen = MIN(page, *plen);
|
||||
}
|
||||
|
@ -2275,45 +2275,80 @@ void qemu_ram_free(RAMBlock *block)
|
|||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
|
||||
/* Simply remap the given VM memory location from start to start+length */
|
||||
static int qemu_ram_remap_mmap(RAMBlock *block, uint64_t start, size_t length)
|
||||
{
|
||||
int flags, prot;
|
||||
void *area;
|
||||
void *host_startaddr = block->host + start;
|
||||
|
||||
assert(block->fd < 0);
|
||||
flags = MAP_FIXED | MAP_ANONYMOUS;
|
||||
flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE;
|
||||
flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
|
||||
prot = PROT_READ;
|
||||
prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
|
||||
area = mmap(host_startaddr, length, prot, flags, -1, 0);
|
||||
return area != host_startaddr ? -errno : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* qemu_ram_remap - remap a single RAM page
|
||||
*
|
||||
* @addr: address in ram_addr_t address space.
|
||||
*
|
||||
* This function will try remapping a single page of guest RAM identified by
|
||||
* @addr, essentially discarding memory to recover from previously poisoned
|
||||
* memory (MCE). The page size depends on the RAMBlock (i.e., hugetlb). @addr
|
||||
* does not have to point at the start of the page.
|
||||
*
|
||||
* This function is only to be used during system resets; it will kill the
|
||||
* VM if remapping failed.
|
||||
*/
|
||||
void qemu_ram_remap(ram_addr_t addr)
|
||||
{
|
||||
RAMBlock *block;
|
||||
ram_addr_t offset;
|
||||
int flags;
|
||||
void *area, *vaddr;
|
||||
int prot;
|
||||
uint64_t offset;
|
||||
void *vaddr;
|
||||
size_t page_size;
|
||||
|
||||
RAMBLOCK_FOREACH(block) {
|
||||
offset = addr - block->offset;
|
||||
if (offset < block->max_length) {
|
||||
/* Respect the pagesize of our RAMBlock */
|
||||
page_size = qemu_ram_pagesize(block);
|
||||
offset = QEMU_ALIGN_DOWN(offset, page_size);
|
||||
|
||||
vaddr = ramblock_ptr(block, offset);
|
||||
if (block->flags & RAM_PREALLOC) {
|
||||
;
|
||||
} else if (xen_enabled()) {
|
||||
abort();
|
||||
} else {
|
||||
flags = MAP_FIXED;
|
||||
flags |= block->flags & RAM_SHARED ?
|
||||
MAP_SHARED : MAP_PRIVATE;
|
||||
flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
|
||||
prot = PROT_READ;
|
||||
prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
|
||||
if (block->fd >= 0) {
|
||||
area = mmap(vaddr, length, prot, flags, block->fd,
|
||||
offset + block->fd_offset);
|
||||
} else {
|
||||
flags |= MAP_ANONYMOUS;
|
||||
area = mmap(vaddr, length, prot, flags, -1, 0);
|
||||
if (ram_block_discard_range(block, offset, page_size) != 0) {
|
||||
/*
|
||||
* Fall back to using mmap() only for anonymous mapping,
|
||||
* as if a backing file is associated we may not be able
|
||||
* to recover the memory in all cases.
|
||||
* So don't take the risk of using only mmap and fail now.
|
||||
*/
|
||||
if (block->fd >= 0) {
|
||||
error_report("Could not remap RAM %s:%" PRIx64 "+%"
|
||||
PRIx64 " +%zx", block->idstr, offset,
|
||||
block->fd_offset, page_size);
|
||||
exit(1);
|
||||
}
|
||||
if (qemu_ram_remap_mmap(block, offset, page_size) != 0) {
|
||||
error_report("Could not remap RAM %s:%" PRIx64 " +%zx",
|
||||
block->idstr, offset, page_size);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (area != vaddr) {
|
||||
error_report("Could not remap addr: "
|
||||
RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
|
||||
length, addr);
|
||||
exit(1);
|
||||
}
|
||||
memory_try_enable_merging(vaddr, length);
|
||||
qemu_ram_setup_dump(vaddr, length);
|
||||
memory_try_enable_merging(vaddr, page_size);
|
||||
qemu_ram_setup_dump(vaddr, page_size);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2869,7 +2904,7 @@ static MemTxResult flatview_write_continue_step(MemTxAttrs attrs,
|
|||
return MEMTX_ACCESS_ERROR;
|
||||
}
|
||||
|
||||
if (!memory_access_is_direct(mr, true)) {
|
||||
if (!memory_access_is_direct(mr, true, attrs)) {
|
||||
uint64_t val;
|
||||
MemTxResult result;
|
||||
bool release_lock = prepare_mmio_access(mr);
|
||||
|
@ -2965,7 +3000,7 @@ static MemTxResult flatview_read_continue_step(MemTxAttrs attrs, uint8_t *buf,
|
|||
return MEMTX_ACCESS_ERROR;
|
||||
}
|
||||
|
||||
if (!memory_access_is_direct(mr, false)) {
|
||||
if (!memory_access_is_direct(mr, false, attrs)) {
|
||||
/* I/O case */
|
||||
uint64_t val;
|
||||
MemTxResult result;
|
||||
|
@ -3137,8 +3172,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
|
|||
l = len;
|
||||
mr = address_space_translate(as, addr, &addr1, &l, true, attrs);
|
||||
|
||||
if (!(memory_region_is_ram(mr) ||
|
||||
memory_region_is_romd(mr))) {
|
||||
if (!memory_region_supports_direct_access(mr)) {
|
||||
l = memory_access_size(mr, l, addr1);
|
||||
} else {
|
||||
/* ROM/RAM case */
|
||||
|
@ -3275,7 +3309,7 @@ static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len,
|
|||
while (len > 0) {
|
||||
l = len;
|
||||
mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
|
||||
if (!memory_access_is_direct(mr, is_write)) {
|
||||
if (!memory_access_is_direct(mr, is_write, attrs)) {
|
||||
l = memory_access_size(mr, l, addr);
|
||||
if (!memory_region_access_valid(mr, xlat, l, is_write, attrs)) {
|
||||
return false;
|
||||
|
@ -3355,7 +3389,7 @@ void *address_space_map(AddressSpace *as,
|
|||
fv = address_space_to_flatview(as);
|
||||
mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
|
||||
|
||||
if (!memory_access_is_direct(mr, is_write)) {
|
||||
if (!memory_access_is_direct(mr, is_write, attrs)) {
|
||||
size_t used = qatomic_read(&as->bounce_buffer_size);
|
||||
for (;;) {
|
||||
hwaddr alloc = MIN(as->max_bounce_buffer_size - used, l);
|
||||
|
@ -3488,7 +3522,7 @@ int64_t address_space_cache_init(MemoryRegionCache *cache,
|
|||
|
||||
mr = cache->mrs.mr;
|
||||
memory_region_ref(mr);
|
||||
if (memory_access_is_direct(mr, is_write)) {
|
||||
if (memory_access_is_direct(mr, is_write, MEMTXATTRS_UNSPECIFIED)) {
|
||||
/* We don't care about the memory attributes here as we're only
|
||||
* doing this if we found actual RAM, which behaves the same
|
||||
* regardless of attributes; so UNSPECIFIED is fine.
|
||||
|
@ -3681,13 +3715,8 @@ int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
|
|||
if (l > len)
|
||||
l = len;
|
||||
phys_addr += (addr & ~TARGET_PAGE_MASK);
|
||||
if (is_write) {
|
||||
res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr,
|
||||
attrs, buf, l);
|
||||
} else {
|
||||
res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr,
|
||||
attrs, buf, l);
|
||||
}
|
||||
res = address_space_rw(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf,
|
||||
l, is_write);
|
||||
if (res != MEMTX_OK) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -3797,18 +3826,19 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
|
|||
}
|
||||
|
||||
ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
|
||||
start, length);
|
||||
start + rb->fd_offset, length);
|
||||
if (ret) {
|
||||
ret = -errno;
|
||||
error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
|
||||
__func__, rb->idstr, start, length, ret);
|
||||
error_report("%s: Failed to fallocate %s:%" PRIx64 "+%" PRIx64
|
||||
" +%zx (%d)", __func__, rb->idstr, start,
|
||||
rb->fd_offset, length, ret);
|
||||
goto err;
|
||||
}
|
||||
#else
|
||||
ret = -ENOSYS;
|
||||
error_report("%s: fallocate not available/file"
|
||||
"%s:%" PRIx64 " +%zx (%d)",
|
||||
__func__, rb->idstr, start, length, ret);
|
||||
"%s:%" PRIx64 "+%" PRIx64 " +%zx (%d)", __func__,
|
||||
rb->idstr, start, rb->fd_offset, length, ret);
|
||||
goto err;
|
||||
#endif
|
||||
}
|
||||
|
@ -3855,6 +3885,7 @@ int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
|
|||
int ret = -1;
|
||||
|
||||
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
|
||||
/* ignore fd_offset with guest_memfd */
|
||||
ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
|
||||
start, length);
|
||||
|
||||
|
|
52
system/vl.c
52
system/vl.c
|
@ -352,7 +352,7 @@ static QemuOptsList qemu_overcommit_opts = {
|
|||
.desc = {
|
||||
{
|
||||
.name = "mem-lock",
|
||||
.type = QEMU_OPT_BOOL,
|
||||
.type = QEMU_OPT_STRING,
|
||||
},
|
||||
{
|
||||
.name = "cpu-pm",
|
||||
|
@ -797,8 +797,8 @@ static QemuOptsList qemu_run_with_opts = {
|
|||
|
||||
static void realtime_init(void)
|
||||
{
|
||||
if (enable_mlock) {
|
||||
if (os_mlock() < 0) {
|
||||
if (should_mlock(mlock_state)) {
|
||||
if (os_mlock(is_mlock_on_fault(mlock_state)) < 0) {
|
||||
error_report("locking memory failed");
|
||||
exit(1);
|
||||
}
|
||||
|
@ -1876,6 +1876,44 @@ static void object_option_parse(const char *str)
|
|||
visit_free(v);
|
||||
}
|
||||
|
||||
static void overcommit_parse(const char *str)
|
||||
{
|
||||
QemuOpts *opts;
|
||||
const char *mem_lock_opt;
|
||||
|
||||
opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
|
||||
str, false);
|
||||
if (!opts) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", enable_cpu_pm);
|
||||
|
||||
mem_lock_opt = qemu_opt_get(opts, "mem-lock");
|
||||
if (!mem_lock_opt) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (strcmp(mem_lock_opt, "on") == 0) {
|
||||
mlock_state = MLOCK_ON;
|
||||
return;
|
||||
}
|
||||
|
||||
if (strcmp(mem_lock_opt, "off") == 0) {
|
||||
mlock_state = MLOCK_OFF;
|
||||
return;
|
||||
}
|
||||
|
||||
if (strcmp(mem_lock_opt, "on-fault") == 0) {
|
||||
mlock_state = MLOCK_ON_FAULT;
|
||||
return;
|
||||
}
|
||||
|
||||
error_report("parameter 'mem-lock' expects one of "
|
||||
"'on', 'off', 'on-fault'");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Very early object creation, before the sandbox options have been activated.
|
||||
*/
|
||||
|
@ -3591,13 +3629,7 @@ void qemu_init(int argc, char **argv)
|
|||
object_option_parse(optarg);
|
||||
break;
|
||||
case QEMU_OPTION_overcommit:
|
||||
opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
|
||||
optarg, false);
|
||||
if (!opts) {
|
||||
exit(1);
|
||||
}
|
||||
enable_mlock = qemu_opt_get_bool(opts, "mem-lock", enable_mlock);
|
||||
enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", enable_cpu_pm);
|
||||
overcommit_parse(optarg);
|
||||
break;
|
||||
case QEMU_OPTION_compat:
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue