Merge tag 'patchew/20200219160953.13771-1-imammedo@redhat.com' of https://github.com/patchew-project/qemu into HEAD

This series removes ad hoc RAM allocation API (memory_region_allocate_system_memory)
and consolidates it around hostmem backend. It allows to

* resolve conflicts between global -mem-prealloc and hostmem's "policy" option,
  fixing premature allocation before binding policy is applied

* simplify complicated memory allocation routines which had to deal with 2 ways
  to allocate RAM.

* reuse hostmem backends of a choice for main RAM without adding extra CLI
  options to duplicate hostmem features.  A recent case was -mem-shared, to
  enable vhost-user on targets that don't support hostmem backends [1] (ex: s390)

* move RAM allocation from individual boards into generic machine code and
  provide them with prepared MemoryRegion.

* clean up deprecated NUMA features which were tied to the old API (see patches)
  - "numa: remove deprecated -mem-path fallback to anonymous RAM"
  - (POSTPONED, waiting on libvirt side) "forbid '-numa node,mem' for 5.0 and newer machine types"
  - (POSTPONED) "numa: remove deprecated implicit RAM distribution between nodes"

Introduce a new machine.memory-backend property and wrapper code that aliases
global -mem-path and -mem-alloc into automatically created hostmem backend
properties (provided memory-backend was not set explicitly given by user).
A bulk of trivial patches then follow to incrementally convert individual
boards to using machine.memory-backend provided MemoryRegion.

Board conversion typically involves:

* providing MachineClass::default_ram_size and MachineClass::default_ram_id
  so generic code could create default backend if user didn't explicitly provide
  memory-backend or -m options

* dropping memory_region_allocate_system_memory() call

* using convenience MachineState::ram MemoryRegion, which points to MemoryRegion
   allocated by ram-memdev

On top of that for some boards:

* missing ram_size checks are added (typically it were boards with fixed ram size)

* ram_size fixups are replaced by checks and hard errors, forcing user to
  provide correct "-m" values instead of ignoring it and continuing running.

After all boards are converted, the old API is removed and memory allocation
routines are cleaned up.
This commit is contained in:
Paolo Bonzini 2020-02-25 09:19:00 +01:00
commit ca6155c0f2
78 changed files with 828 additions and 774 deletions

View file

@ -26,6 +26,7 @@
#include "sysemu/qtest.h"
#include "hw/pci/pci.h"
#include "hw/mem/nvdimm.h"
#include "migration/vmstate.h"
GlobalProperty hw_compat_4_2[] = {
{ "virtio-blk-device", "queue-size", "128"},
@ -510,6 +511,22 @@ static void validate_sysbus_device(SysBusDevice *sbdev, void *opaque)
}
}
static char *machine_get_memdev(Object *obj, Error **errp)
{
MachineState *ms = MACHINE(obj);
return g_strdup(ms->ram_memdev_id);
}
static void machine_set_memdev(Object *obj, const char *value, Error **errp)
{
MachineState *ms = MACHINE(obj);
g_free(ms->ram_memdev_id);
ms->ram_memdev_id = g_strdup(value);
}
static void machine_init_notify(Notifier *notifier, void *data)
{
MachineState *machine = MACHINE(qdev_get_machine());
@ -891,6 +908,14 @@ static void machine_initfn(Object *obj)
"Table (HMAT)", NULL);
}
object_property_add_str(obj, "memory-backend",
machine_get_memdev, machine_set_memdev,
&error_abort);
object_property_set_description(obj, "memory-backend",
"Set RAM backend"
"Valid value is ID of hostmem based backend",
&error_abort);
/* Register notifier when init is done for sysbus sanity checks */
ms->sysbus_notifier.notify = machine_init_notify;
qemu_add_machine_init_done_notifier(&ms->sysbus_notifier);
@ -1037,10 +1062,33 @@ static void machine_numa_finish_cpu_init(MachineState *machine)
g_string_free(s, true);
}
MemoryRegion *machine_consume_memdev(MachineState *machine,
HostMemoryBackend *backend)
{
MemoryRegion *ret = host_memory_backend_get_memory(backend);
if (memory_region_is_mapped(ret)) {
char *path = object_get_canonical_path_component(OBJECT(backend));
error_report("memory backend %s can't be used multiple times.", path);
g_free(path);
exit(EXIT_FAILURE);
}
host_memory_backend_set_mapped(backend, true);
vmstate_register_ram_global(ret);
return ret;
}
void machine_run_board_init(MachineState *machine)
{
MachineClass *machine_class = MACHINE_GET_CLASS(machine);
if (machine->ram_memdev_id) {
Object *o;
o = object_resolve_path_type(machine->ram_memdev_id,
TYPE_MEMORY_BACKEND, NULL);
machine->ram = machine_consume_memdev(machine, MEMORY_BACKEND(o));
}
if (machine->numa_state) {
numa_complete_configuration(machine);
if (machine->numa_state->num_nodes) {

View file

@ -32,11 +32,8 @@ static void machine_none_init(MachineState *mch)
}
/* RAM at address zero */
if (mch->ram_size) {
MemoryRegion *ram = g_new(MemoryRegion, 1);
memory_region_allocate_system_memory(ram, NULL, "ram", mch->ram_size);
memory_region_add_subregion(get_system_memory(), 0, ram);
if (mch->ram) {
memory_region_add_subregion(get_system_memory(), 0, mch->ram);
}
if (mch->kernel_filename) {
@ -52,6 +49,7 @@ static void machine_none_machine_init(MachineClass *mc)
mc->init = machine_none_init;
mc->max_cpus = 1;
mc->default_ram_size = 0;
mc->default_ram_id = "ram";
}
DEFINE_MACHINE("none", machine_none_machine_init)

View file

@ -52,6 +52,11 @@ QemuOptsList qemu_numa_opts = {
};
static int have_memdevs;
bool numa_uses_legacy_mem(void)
{
return !have_memdevs;
}
static int have_mem;
static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one.
* For all nodes, nodeid < max_numa_nodeid
@ -652,6 +657,23 @@ void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
nodes[i].node_mem = size - usedmem;
}
static void numa_init_memdev_container(MachineState *ms, MemoryRegion *ram)
{
int i;
uint64_t addr = 0;
for (i = 0; i < ms->numa_state->num_nodes; i++) {
uint64_t size = ms->numa_state->nodes[i].node_mem;
HostMemoryBackend *backend = ms->numa_state->nodes[i].node_memdev;
if (!backend) {
continue;
}
MemoryRegion *seg = machine_consume_memdev(ms, backend);
memory_region_add_subregion(ram, addr, seg);
addr += size;
}
}
void numa_complete_configuration(MachineState *ms)
{
int i;
@ -734,6 +756,12 @@ void numa_complete_configuration(MachineState *ms)
exit(1);
}
if (!numa_uses_legacy_mem() && mc->default_ram_id) {
ms->ram = g_new(MemoryRegion, 1);
memory_region_init(ms->ram, OBJECT(ms), mc->default_ram_id,
ram_size);
numa_init_memdev_container(ms, ms->ram);
}
/* QEMU needs at least all unique node pair distances to build
* the whole NUMA distance table. QEMU treats the distance table
* as symmetric by default, i.e. distance A->B == distance B->A.
@ -778,79 +806,6 @@ void numa_cpu_pre_plug(const CPUArchId *slot, DeviceState *dev, Error **errp)
}
}
static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
const char *name,
uint64_t ram_size)
{
if (mem_path) {
#ifdef __linux__
Error *err = NULL;
memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, 0,
mem_path, &err);
if (err) {
error_report_err(err);
if (mem_prealloc) {
exit(1);
}
warn_report("falling back to regular RAM allocation");
error_printf("This is deprecated. Make sure that -mem-path "
" specified path has sufficient resources to allocate"
" -m specified RAM amount\n");
/* Legacy behavior: if allocation failed, fall back to
* regular RAM allocation.
*/
mem_path = NULL;
memory_region_init_ram_nomigrate(mr, owner, name, ram_size, &error_fatal);
}
#else
fprintf(stderr, "-mem-path not supported on this host\n");
exit(1);
#endif
} else {
memory_region_init_ram_nomigrate(mr, owner, name, ram_size, &error_fatal);
}
vmstate_register_ram_global(mr);
}
void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
const char *name,
uint64_t ram_size)
{
uint64_t addr = 0;
int i;
MachineState *ms = MACHINE(qdev_get_machine());
if (ms->numa_state == NULL ||
ms->numa_state->num_nodes == 0 || !have_memdevs) {
allocate_system_memory_nonnuma(mr, owner, name, ram_size);
return;
}
memory_region_init(mr, owner, name, ram_size);
for (i = 0; i < ms->numa_state->num_nodes; i++) {
uint64_t size = ms->numa_state->nodes[i].node_mem;
HostMemoryBackend *backend = ms->numa_state->nodes[i].node_memdev;
if (!backend) {
continue;
}
MemoryRegion *seg = host_memory_backend_get_memory(backend);
if (memory_region_is_mapped(seg)) {
char *path = object_get_canonical_path_component(OBJECT(backend));
error_report("memory backend %s is used multiple times. Each "
"-numa option must use a different memdev value.",
path);
g_free(path);
exit(1);
}
host_memory_backend_set_mapped(backend, true);
memory_region_add_subregion(mr, addr, seg);
vmstate_register_ram_global(seg);
addr += size;
}
}
static void numa_stat_memory_devices(NumaNodeMem node_mem[])
{
MemoryDeviceInfoList *info_list = qmp_memory_device_list();