* util/log: re-allow switching away from stderr log file

* finish audio configuration rework
 * cleanup HVF stubs
 * remove more mentions of softmmu
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmUi/kIUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroOXWwf/YW16QMzqdAPVHYRf9NcCneRF16El
 t3lEod0q0sHhchPbh9e04aKbh+oBNeWu9sFyTl11Fwsi+DGmp/b28ziva75/4rfd
 h5N9aX/z2jwPqy93IwPDu3soKXCCgTK+ywtD/5GLQwBGqxs7W2xUEEb7eCnVefHa
 zwL3MOUqPICeqOnR1TNw9k3N3veF04D+rmchTwbAjAmx1f8EI+mK9VlGK9V8TUjP
 3HjpZYJluc0a92lR5VONJ7V25QfttsjLysTgpFwVAQPS6Frzatc/hWclfLYgw9vl
 2Irk83FV8gXPRl0XKNcqSDsv6h/yGP6TDFIB8QwRSRGBqIQi5aOlfBJzsQ==
 =qbm7
 -----END PGP SIGNATURE-----

Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging

* util/log: re-allow switching away from stderr log file
* finish audio configuration rework
* cleanup HVF stubs
* remove more mentions of softmmu

# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmUi/kIUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroOXWwf/YW16QMzqdAPVHYRf9NcCneRF16El
# t3lEod0q0sHhchPbh9e04aKbh+oBNeWu9sFyTl11Fwsi+DGmp/b28ziva75/4rfd
# h5N9aX/z2jwPqy93IwPDu3soKXCCgTK+ywtD/5GLQwBGqxs7W2xUEEb7eCnVefHa
# zwL3MOUqPICeqOnR1TNw9k3N3veF04D+rmchTwbAjAmx1f8EI+mK9VlGK9V8TUjP
# 3HjpZYJluc0a92lR5VONJ7V25QfttsjLysTgpFwVAQPS6Frzatc/hWclfLYgw9vl
# 2Irk83FV8gXPRl0XKNcqSDsv6h/yGP6TDFIB8QwRSRGBqIQi5aOlfBJzsQ==
# =qbm7
# -----END PGP SIGNATURE-----
# gpg: Signature made Sun 08 Oct 2023 15:08:50 EDT
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (25 commits)
  audio, qtest: get rid of QEMU_AUDIO_DRV
  audio: reintroduce default audio backend for VNC
  audio: do not use first -audiodev as default audio device
  audio: extend -audio to allow creating a default backend
  audio: extract audio_define_default
  audio: disable default backends if -audio/-audiodev is used
  audio: error hints need a trailing \n
  cutils: squelch compiler warnings with custom paths
  configure: change $softmmu to $system
  system: Rename softmmu/ directory as system/
  meson: Rename target_softmmu_arch -> target_system_arch
  meson: Rename softmmu_mods -> system_mods
  target/i386: Rename i386_softmmu_kvm_ss -> i386_kvm_ss
  semihosting: Rename softmmu_FOO_user() -> uaccess_FOO_user()
  gdbstub: Rename 'softmmu' -> 'system'
  accel: Rename accel_softmmu* -> accel_system*
  tcg: Correct invalid mentions of 'softmmu' by 'system-mode'
  fuzz: Correct invalid mentions of 'softmmu' by 'system'
  cpu: Correct invalid mentions of 'softmmu' by 'system-mode'
  travis-ci: Correct invalid mentions of 'softmmu' by 'system'
  ...

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2023-10-09 10:11:17 -04:00
commit 1527c6b6fa
132 changed files with 305 additions and 276 deletions

50
system/arch_init.c Normal file
View file

@ -0,0 +1,50 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/module.h"
#include "sysemu/arch_init.h"
#ifdef TARGET_SPARC
int graphic_width = 1024;
int graphic_height = 768;
int graphic_depth = 8;
#elif defined(TARGET_M68K)
int graphic_width = 800;
int graphic_height = 600;
int graphic_depth = 8;
#else
int graphic_width = 800;
int graphic_height = 600;
int graphic_depth = 32;
#endif
const uint32_t arch_type = QEMU_ARCH;
void qemu_init_arch_modules(void)
{
#ifdef CONFIG_MODULES
module_init_info(qemu_modinfo);
module_allow_arch(TARGET_NAME);
#endif
}

143
system/async-teardown.c Normal file
View file

@ -0,0 +1,143 @@
/*
* Asynchronous teardown
*
* Copyright IBM, Corp. 2022
*
* Authors:
* Claudio Imbrenda <imbrenda@linux.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or (at your
* option) any later version. See the COPYING file in the top-level directory.
*
*/
#include "qemu/osdep.h"
#include <dirent.h>
#include <sys/prctl.h>
#include <sched.h>
#include "qemu/async-teardown.h"
#ifdef _SC_THREAD_STACK_MIN
#define CLONE_STACK_SIZE sysconf(_SC_THREAD_STACK_MIN)
#else
#define CLONE_STACK_SIZE 16384
#endif
static pid_t the_ppid;
/*
* Close all open file descriptors.
*/
static void close_all_open_fd(void)
{
struct dirent *de;
int fd, dfd;
DIR *dir;
#ifdef CONFIG_CLOSE_RANGE
int r = close_range(0, ~0U, 0);
if (!r) {
/* Success, no need to try other ways. */
return;
}
#endif
dir = opendir("/proc/self/fd");
if (!dir) {
/* If /proc is not mounted, there is nothing that can be done. */
return;
}
/* Avoid closing the directory. */
dfd = dirfd(dir);
for (de = readdir(dir); de; de = readdir(dir)) {
fd = atoi(de->d_name);
if (fd != dfd) {
close(fd);
}
}
closedir(dir);
}
static void hup_handler(int signal)
{
/* Check every second if this process has been reparented. */
while (the_ppid == getppid()) {
/* sleep() is safe to use in a signal handler. */
sleep(1);
}
/* At this point the parent process has terminated completely. */
_exit(0);
}
static int async_teardown_fn(void *arg)
{
struct sigaction sa = { .sa_handler = hup_handler };
sigset_t hup_signal;
char name[16];
/* Set a meaningful name for this process. */
snprintf(name, 16, "cleanup/%d", the_ppid);
prctl(PR_SET_NAME, (unsigned long)name);
/*
* Close all file descriptors that might have been inherited from the
* main qemu process when doing clone, needed to make libvirt happy.
* Not using close_range for increased compatibility with older kernels.
*/
close_all_open_fd();
/* Set up a handler for SIGHUP and unblock SIGHUP. */
sigaction(SIGHUP, &sa, NULL);
sigemptyset(&hup_signal);
sigaddset(&hup_signal, SIGHUP);
sigprocmask(SIG_UNBLOCK, &hup_signal, NULL);
/* Ask to receive SIGHUP when the parent dies. */
prctl(PR_SET_PDEATHSIG, SIGHUP);
/*
* Sleep forever, unless the parent process has already terminated. The
* only interruption can come from the SIGHUP signal, which in normal
* operation is received when the parent process dies.
*/
if (the_ppid == getppid()) {
pause();
}
/* At this point the parent process has terminated completely. */
_exit(0);
}
/*
* Allocate a new stack of a reasonable size, and return a pointer to its top.
*/
static void *new_stack_for_clone(void)
{
size_t stack_size = CLONE_STACK_SIZE;
char *stack_ptr;
/* Allocate a new stack and get a pointer to its top. */
stack_ptr = qemu_alloc_stack(&stack_size);
stack_ptr += stack_size;
return stack_ptr;
}
/*
* Block all signals, start (clone) a new process sharing the address space
* with qemu (CLONE_VM), then restore signals.
*/
void init_async_teardown(void)
{
sigset_t all_signals, old_signals;
the_ppid = getpid();
sigfillset(&all_signals);
sigprocmask(SIG_BLOCK, &all_signals, &old_signals);
clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL);
sigprocmask(SIG_SETMASK, &old_signals, NULL);
}

106
system/balloon.c Normal file
View file

@ -0,0 +1,106 @@
/*
* Generic Balloon handlers and management
*
* Copyright (c) 2003-2008 Fabrice Bellard
* Copyright (C) 2011 Red Hat, Inc.
* Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/atomic.h"
#include "sysemu/kvm.h"
#include "sysemu/balloon.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-machine.h"
#include "qapi/qmp/qerror.h"
#include "trace.h"
static QEMUBalloonEvent *balloon_event_fn;
static QEMUBalloonStatus *balloon_stat_fn;
static void *balloon_opaque;
static bool have_balloon(Error **errp)
{
if (kvm_enabled() && !kvm_has_sync_mmu()) {
error_set(errp, ERROR_CLASS_KVM_MISSING_CAP,
"Using KVM without synchronous MMU, balloon unavailable");
return false;
}
if (!balloon_event_fn) {
error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
"No balloon device has been activated");
return false;
}
return true;
}
int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
QEMUBalloonStatus *stat_func, void *opaque)
{
if (balloon_event_fn || balloon_stat_fn || balloon_opaque) {
/* We're already registered one balloon handler. How many can
* a guest really have?
*/
return -1;
}
balloon_event_fn = event_func;
balloon_stat_fn = stat_func;
balloon_opaque = opaque;
return 0;
}
void qemu_remove_balloon_handler(void *opaque)
{
if (balloon_opaque != opaque) {
return;
}
balloon_event_fn = NULL;
balloon_stat_fn = NULL;
balloon_opaque = NULL;
}
BalloonInfo *qmp_query_balloon(Error **errp)
{
BalloonInfo *info;
if (!have_balloon(errp)) {
return NULL;
}
info = g_malloc0(sizeof(*info));
balloon_stat_fn(balloon_opaque, info);
return info;
}
void qmp_balloon(int64_t target, Error **errp)
{
if (!have_balloon(errp)) {
return;
}
if (target <= 0) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "target", "a size");
return;
}
trace_balloon_event(balloon_opaque, target);
balloon_event_fn(balloon_opaque, target);
}

430
system/bootdevice.c Normal file
View file

@ -0,0 +1,430 @@
/*
* QEMU Boot Device Implement
*
* Copyright (c) 2014 HUAWEI TECHNOLOGIES CO., LTD.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "sysemu/sysemu.h"
#include "qapi/visitor.h"
#include "qemu/error-report.h"
#include "sysemu/reset.h"
#include "hw/qdev-core.h"
#include "hw/boards.h"
typedef struct FWBootEntry FWBootEntry;
struct FWBootEntry {
QTAILQ_ENTRY(FWBootEntry) link;
int32_t bootindex;
DeviceState *dev;
char *suffix;
};
static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
QTAILQ_HEAD_INITIALIZER(fw_boot_order);
static QEMUBootSetHandler *boot_set_handler;
static void *boot_set_opaque;
void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque)
{
boot_set_handler = func;
boot_set_opaque = opaque;
}
void qemu_boot_set(const char *boot_order, Error **errp)
{
Error *local_err = NULL;
if (!boot_set_handler) {
error_setg(errp, "no function defined to set boot device list for"
" this architecture");
return;
}
validate_bootdevices(boot_order, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
boot_set_handler(boot_set_opaque, boot_order, errp);
}
void validate_bootdevices(const char *devices, Error **errp)
{
/* We just do some generic consistency checks */
const char *p;
int bitmap = 0;
for (p = devices; *p != '\0'; p++) {
/* Allowed boot devices are:
* a-b: floppy disk drives
* c-f: IDE disk drives
* g-m: machine implementation dependent drives
* n-p: network devices
* It's up to each machine implementation to check if the given boot
* devices match the actual hardware implementation and firmware
* features.
*/
if (*p < 'a' || *p > 'p') {
error_setg(errp, "Invalid boot device '%c'", *p);
return;
}
if (bitmap & (1 << (*p - 'a'))) {
error_setg(errp, "Boot device '%c' was given twice", *p);
return;
}
bitmap |= 1 << (*p - 'a');
}
}
void restore_boot_order(void *opaque)
{
char *normal_boot_order = opaque;
static int first = 1;
/* Restore boot order and remove ourselves after the first boot */
if (first) {
first = 0;
return;
}
if (boot_set_handler) {
qemu_boot_set(normal_boot_order, &error_abort);
}
qemu_unregister_reset(restore_boot_order, normal_boot_order);
g_free(normal_boot_order);
}
void check_boot_index(int32_t bootindex, Error **errp)
{
FWBootEntry *i;
if (bootindex >= 0) {
QTAILQ_FOREACH(i, &fw_boot_order, link) {
if (i->bootindex == bootindex) {
error_setg(errp, "The bootindex %d has already been used",
bootindex);
return;
}
}
}
}
void del_boot_device_path(DeviceState *dev, const char *suffix)
{
FWBootEntry *i;
if (dev == NULL) {
return;
}
QTAILQ_FOREACH(i, &fw_boot_order, link) {
if ((!suffix || !g_strcmp0(i->suffix, suffix)) &&
i->dev == dev) {
QTAILQ_REMOVE(&fw_boot_order, i, link);
g_free(i->suffix);
g_free(i);
break;
}
}
}
void add_boot_device_path(int32_t bootindex, DeviceState *dev,
const char *suffix)
{
FWBootEntry *node, *i;
if (bootindex < 0) {
del_boot_device_path(dev, suffix);
return;
}
assert(dev != NULL || suffix != NULL);
del_boot_device_path(dev, suffix);
node = g_new0(FWBootEntry, 1);
node->bootindex = bootindex;
node->suffix = g_strdup(suffix);
node->dev = dev;
QTAILQ_FOREACH(i, &fw_boot_order, link) {
if (i->bootindex == bootindex) {
error_report("Two devices with same boot index %d", bootindex);
exit(1);
} else if (i->bootindex < bootindex) {
continue;
}
QTAILQ_INSERT_BEFORE(i, node, link);
return;
}
QTAILQ_INSERT_TAIL(&fw_boot_order, node, link);
}
DeviceState *get_boot_device(uint32_t position)
{
uint32_t counter = 0;
FWBootEntry *i = NULL;
DeviceState *res = NULL;
if (!QTAILQ_EMPTY(&fw_boot_order)) {
QTAILQ_FOREACH(i, &fw_boot_order, link) {
if (counter == position) {
res = i->dev;
break;
}
counter++;
}
}
return res;
}
static char *get_boot_device_path(DeviceState *dev, bool ignore_suffixes,
const char *suffix)
{
char *devpath = NULL, *s = NULL, *d, *bootpath;
if (dev) {
devpath = qdev_get_fw_dev_path(dev);
assert(devpath);
}
if (!ignore_suffixes) {
if (dev) {
d = qdev_get_own_fw_dev_path_from_handler(dev->parent_bus, dev);
if (d) {
assert(!suffix);
s = d;
} else {
s = g_strdup(suffix);
}
} else {
s = g_strdup(suffix);
}
}
bootpath = g_strdup_printf("%s%s",
devpath ? devpath : "",
s ? s : "");
g_free(devpath);
g_free(s);
return bootpath;
}
/*
* This function returns null terminated string that consist of new line
* separated device paths.
*
* memory pointed by "size" is assigned total length of the array in bytes
*
*/
char *get_boot_devices_list(size_t *size)
{
FWBootEntry *i;
size_t total = 0;
char *list = NULL;
MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
bool ignore_suffixes = mc->ignore_boot_device_suffixes;
QTAILQ_FOREACH(i, &fw_boot_order, link) {
char *bootpath;
size_t len;
bootpath = get_boot_device_path(i->dev, ignore_suffixes, i->suffix);
if (total) {
list[total-1] = '\n';
}
len = strlen(bootpath) + 1;
list = g_realloc(list, total + len);
memcpy(&list[total], bootpath, len);
total += len;
g_free(bootpath);
}
*size = total;
if (current_machine->boot_config.has_strict &&
current_machine->boot_config.strict && *size > 0) {
list[total-1] = '\n';
list = g_realloc(list, total + 5);
memcpy(&list[total], "HALT", 5);
*size = total + 5;
}
return list;
}
typedef struct {
int32_t *bootindex;
const char *suffix;
DeviceState *dev;
} BootIndexProperty;
static void device_get_bootindex(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
BootIndexProperty *prop = opaque;
visit_type_int32(v, name, prop->bootindex, errp);
}
static void device_set_bootindex(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
BootIndexProperty *prop = opaque;
int32_t boot_index;
Error *local_err = NULL;
if (!visit_type_int32(v, name, &boot_index, errp)) {
return;
}
/* check whether bootindex is present in fw_boot_order list */
check_boot_index(boot_index, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
/* change bootindex to a new one */
*prop->bootindex = boot_index;
add_boot_device_path(*prop->bootindex, prop->dev, prop->suffix);
}
static void property_release_bootindex(Object *obj, const char *name,
void *opaque)
{
BootIndexProperty *prop = opaque;
del_boot_device_path(prop->dev, prop->suffix);
g_free(prop);
}
void device_add_bootindex_property(Object *obj, int32_t *bootindex,
const char *name, const char *suffix,
DeviceState *dev)
{
BootIndexProperty *prop = g_malloc0(sizeof(*prop));
prop->bootindex = bootindex;
prop->suffix = suffix;
prop->dev = dev;
object_property_add(obj, name, "int32",
device_get_bootindex,
device_set_bootindex,
property_release_bootindex,
prop);
/* initialize devices' bootindex property to -1 */
object_property_set_int(obj, name, -1, NULL);
}
typedef struct FWLCHSEntry FWLCHSEntry;
struct FWLCHSEntry {
QTAILQ_ENTRY(FWLCHSEntry) link;
DeviceState *dev;
char *suffix;
uint32_t lcyls;
uint32_t lheads;
uint32_t lsecs;
};
static QTAILQ_HEAD(, FWLCHSEntry) fw_lchs =
QTAILQ_HEAD_INITIALIZER(fw_lchs);
void add_boot_device_lchs(DeviceState *dev, const char *suffix,
uint32_t lcyls, uint32_t lheads, uint32_t lsecs)
{
FWLCHSEntry *node;
if (!lcyls && !lheads && !lsecs) {
return;
}
assert(dev != NULL || suffix != NULL);
node = g_new0(FWLCHSEntry, 1);
node->suffix = g_strdup(suffix);
node->dev = dev;
node->lcyls = lcyls;
node->lheads = lheads;
node->lsecs = lsecs;
QTAILQ_INSERT_TAIL(&fw_lchs, node, link);
}
void del_boot_device_lchs(DeviceState *dev, const char *suffix)
{
FWLCHSEntry *i;
if (dev == NULL) {
return;
}
QTAILQ_FOREACH(i, &fw_lchs, link) {
if ((!suffix || !g_strcmp0(i->suffix, suffix)) &&
i->dev == dev) {
QTAILQ_REMOVE(&fw_lchs, i, link);
g_free(i->suffix);
g_free(i);
break;
}
}
}
char *get_boot_devices_lchs_list(size_t *size)
{
FWLCHSEntry *i;
size_t total = 0;
char *list = NULL;
QTAILQ_FOREACH(i, &fw_lchs, link) {
char *bootpath;
char *chs_string;
size_t len;
bootpath = get_boot_device_path(i->dev, false, i->suffix);
chs_string = g_strdup_printf("%s %" PRIu32 " %" PRIu32 " %" PRIu32,
bootpath, i->lcyls, i->lheads, i->lsecs);
if (total) {
list[total - 1] = '\n';
}
len = strlen(chs_string) + 1;
list = g_realloc(list, total + len);
memcpy(&list[total], chs_string, len);
total += len;
g_free(chs_string);
g_free(bootpath);
}
*size = total;
return list;
}

128
system/cpu-throttle.c Normal file
View file

@ -0,0 +1,128 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/thread.h"
#include "hw/core/cpu.h"
#include "qemu/main-loop.h"
#include "sysemu/cpus.h"
#include "sysemu/cpu-throttle.h"
/* vcpu throttling controls */
static QEMUTimer *throttle_timer;
static unsigned int throttle_percentage;
#define CPU_THROTTLE_PCT_MIN 1
#define CPU_THROTTLE_PCT_MAX 99
#define CPU_THROTTLE_TIMESLICE_NS 10000000
static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
{
double pct;
double throttle_ratio;
int64_t sleeptime_ns, endtime_ns;
if (!cpu_throttle_get_percentage()) {
return;
}
pct = (double)cpu_throttle_get_percentage() / 100;
throttle_ratio = pct / (1 - pct);
/* Add 1ns to fix double's rounding error (like 0.9999999...) */
sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
while (sleeptime_ns > 0 && !cpu->stop) {
if (sleeptime_ns > SCALE_MS) {
qemu_cond_timedwait_iothread(cpu->halt_cond,
sleeptime_ns / SCALE_MS);
} else {
qemu_mutex_unlock_iothread();
g_usleep(sleeptime_ns / SCALE_US);
qemu_mutex_lock_iothread();
}
sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
}
qatomic_set(&cpu->throttle_thread_scheduled, 0);
}
static void cpu_throttle_timer_tick(void *opaque)
{
CPUState *cpu;
double pct;
/* Stop the timer if needed */
if (!cpu_throttle_get_percentage()) {
return;
}
CPU_FOREACH(cpu) {
if (!qatomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
async_run_on_cpu(cpu, cpu_throttle_thread,
RUN_ON_CPU_NULL);
}
}
pct = (double)cpu_throttle_get_percentage() / 100;
timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
CPU_THROTTLE_TIMESLICE_NS / (1 - pct));
}
void cpu_throttle_set(int new_throttle_pct)
{
/*
* boolean to store whether throttle is already active or not,
* before modifying throttle_percentage
*/
bool throttle_active = cpu_throttle_active();
/* Ensure throttle percentage is within valid range */
new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
qatomic_set(&throttle_percentage, new_throttle_pct);
if (!throttle_active) {
cpu_throttle_timer_tick(NULL);
}
}
void cpu_throttle_stop(void)
{
qatomic_set(&throttle_percentage, 0);
}
bool cpu_throttle_active(void)
{
return (cpu_throttle_get_percentage() != 0);
}
int cpu_throttle_get_percentage(void)
{
return qatomic_read(&throttle_percentage);
}
void cpu_throttle_init(void)
{
throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
cpu_throttle_timer_tick, NULL);
}

277
system/cpu-timers.c Normal file
View file

@ -0,0 +1,277 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "migration/vmstate.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "sysemu/cpus.h"
#include "qemu/main-loop.h"
#include "qemu/option.h"
#include "qemu/seqlock.h"
#include "sysemu/replay.h"
#include "sysemu/runstate.h"
#include "hw/core/cpu.h"
#include "sysemu/cpu-timers.h"
#include "sysemu/cpu-throttle.h"
#include "sysemu/cpu-timers-internal.h"
/* clock and ticks */
static int64_t cpu_get_ticks_locked(void)
{
int64_t ticks = timers_state.cpu_ticks_offset;
if (timers_state.cpu_ticks_enabled) {
ticks += cpu_get_host_ticks();
}
if (timers_state.cpu_ticks_prev > ticks) {
/* Non increasing ticks may happen if the host uses software suspend. */
timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
ticks = timers_state.cpu_ticks_prev;
}
timers_state.cpu_ticks_prev = ticks;
return ticks;
}
/*
* return the time elapsed in VM between vm_start and vm_stop.
* cpu_get_ticks() uses units of the host CPU cycle counter.
*/
int64_t cpu_get_ticks(void)
{
int64_t ticks;
qemu_spin_lock(&timers_state.vm_clock_lock);
ticks = cpu_get_ticks_locked();
qemu_spin_unlock(&timers_state.vm_clock_lock);
return ticks;
}
int64_t cpu_get_clock_locked(void)
{
int64_t time;
time = timers_state.cpu_clock_offset;
if (timers_state.cpu_ticks_enabled) {
time += get_clock();
}
return time;
}
/*
* Return the monotonic time elapsed in VM, i.e.,
* the time between vm_start and vm_stop
*/
int64_t cpu_get_clock(void)
{
int64_t ti;
unsigned start;
do {
start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
ti = cpu_get_clock_locked();
} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
return ti;
}
/*
* enable cpu_get_ticks()
* Caller must hold BQL which serves as mutex for vm_clock_seqlock.
*/
void cpu_enable_ticks(void)
{
seqlock_write_lock(&timers_state.vm_clock_seqlock,
&timers_state.vm_clock_lock);
if (!timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
timers_state.cpu_clock_offset -= get_clock();
timers_state.cpu_ticks_enabled = 1;
}
seqlock_write_unlock(&timers_state.vm_clock_seqlock,
&timers_state.vm_clock_lock);
}
/*
* disable cpu_get_ticks() : the clock is stopped. You must not call
* cpu_get_ticks() after that.
* Caller must hold BQL which serves as mutex for vm_clock_seqlock.
*/
void cpu_disable_ticks(void)
{
seqlock_write_lock(&timers_state.vm_clock_seqlock,
&timers_state.vm_clock_lock);
if (timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset += cpu_get_host_ticks();
timers_state.cpu_clock_offset = cpu_get_clock_locked();
timers_state.cpu_ticks_enabled = 0;
}
seqlock_write_unlock(&timers_state.vm_clock_seqlock,
&timers_state.vm_clock_lock);
}
static bool icount_state_needed(void *opaque)
{
return icount_enabled();
}
static bool warp_timer_state_needed(void *opaque)
{
TimersState *s = opaque;
return s->icount_warp_timer != NULL;
}
static bool adjust_timers_state_needed(void *opaque)
{
TimersState *s = opaque;
return s->icount_rt_timer != NULL;
}
static bool icount_shift_state_needed(void *opaque)
{
return icount_enabled() == 2;
}
/*
* Subsection for warp timer migration is optional, because may not be created
*/
static const VMStateDescription icount_vmstate_warp_timer = {
.name = "timer/icount/warp_timer",
.version_id = 1,
.minimum_version_id = 1,
.needed = warp_timer_state_needed,
.fields = (VMStateField[]) {
VMSTATE_INT64(vm_clock_warp_start, TimersState),
VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription icount_vmstate_adjust_timers = {
.name = "timer/icount/timers",
.version_id = 1,
.minimum_version_id = 1,
.needed = adjust_timers_state_needed,
.fields = (VMStateField[]) {
VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription icount_vmstate_shift = {
.name = "timer/icount/shift",
.version_id = 2,
.minimum_version_id = 2,
.needed = icount_shift_state_needed,
.fields = (VMStateField[]) {
VMSTATE_INT16(icount_time_shift, TimersState),
VMSTATE_INT64(last_delta, TimersState),
VMSTATE_END_OF_LIST()
}
};
/*
* This is a subsection for icount migration.
*/
static const VMStateDescription icount_vmstate_timers = {
.name = "timer/icount",
.version_id = 1,
.minimum_version_id = 1,
.needed = icount_state_needed,
.fields = (VMStateField[]) {
VMSTATE_INT64(qemu_icount_bias, TimersState),
VMSTATE_INT64(qemu_icount, TimersState),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription * []) {
&icount_vmstate_warp_timer,
&icount_vmstate_adjust_timers,
&icount_vmstate_shift,
NULL
}
};
static const VMStateDescription vmstate_timers = {
.name = "timer",
.version_id = 2,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_INT64(cpu_ticks_offset, TimersState),
VMSTATE_UNUSED(8),
VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription * []) {
&icount_vmstate_timers,
NULL
}
};
static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
{
}
void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
{
if (!icount_enabled() || type != QEMU_CLOCK_VIRTUAL) {
qemu_notify_event();
return;
}
if (qemu_in_vcpu_thread()) {
/*
* A CPU is currently running; kick it back out to the
* tcg_cpu_exec() loop so it will recalculate its
* icount deadline immediately.
*/
qemu_cpu_kick(current_cpu);
} else if (first_cpu) {
/*
* qemu_cpu_kick is not enough to kick a halted CPU out of
* qemu_tcg_wait_io_event. async_run_on_cpu, instead,
* causes cpu_thread_is_idle to return false. This way,
* handle_icount_deadline can run.
* If we have no CPUs at all for some reason, we don't
* need to do anything.
*/
async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
}
}
TimersState timers_state;
/* initialize timers state and the cpu throttle for convenience */
void cpu_timers_init(void)
{
seqlock_init(&timers_state.vm_clock_seqlock);
qemu_spin_init(&timers_state.vm_clock_lock);
vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
cpu_throttle_init();
}

822
system/cpus.c Normal file
View file

@ -0,0 +1,822 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "monitor/monitor.h"
#include "qemu/coroutine-tls.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-machine.h"
#include "qapi/qapi-commands-misc.h"
#include "qapi/qapi-events-run-state.h"
#include "qapi/qmp/qerror.h"
#include "exec/gdbstub.h"
#include "sysemu/hw_accel.h"
#include "exec/cpu-common.h"
#include "qemu/thread.h"
#include "qemu/main-loop.h"
#include "qemu/plugin.h"
#include "sysemu/cpus.h"
#include "qemu/guest-random.h"
#include "hw/nmi.h"
#include "sysemu/replay.h"
#include "sysemu/runstate.h"
#include "sysemu/cpu-timers.h"
#include "sysemu/whpx.h"
#include "hw/boards.h"
#include "hw/hw.h"
#include "trace.h"
#ifdef CONFIG_LINUX
#include <sys/prctl.h>
#ifndef PR_MCE_KILL
#define PR_MCE_KILL 33
#endif
#ifndef PR_MCE_KILL_SET
#define PR_MCE_KILL_SET 1
#endif
#ifndef PR_MCE_KILL_EARLY
#define PR_MCE_KILL_EARLY 1
#endif
#endif /* CONFIG_LINUX */
static QemuMutex qemu_global_mutex;
/*
* The chosen accelerator is supposed to register this.
*/
static const AccelOpsClass *cpus_accel;
bool cpu_is_stopped(CPUState *cpu)
{
return cpu->stopped || !runstate_is_running();
}
bool cpu_work_list_empty(CPUState *cpu)
{
return QSIMPLEQ_EMPTY_ATOMIC(&cpu->work_list);
}
bool cpu_thread_is_idle(CPUState *cpu)
{
if (cpu->stop || !cpu_work_list_empty(cpu)) {
return false;
}
if (cpu_is_stopped(cpu)) {
return true;
}
if (!cpu->halted || cpu_has_work(cpu)) {
return false;
}
if (cpus_accel->cpu_thread_is_idle) {
return cpus_accel->cpu_thread_is_idle(cpu);
}
return true;
}
bool all_cpu_threads_idle(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (!cpu_thread_is_idle(cpu)) {
return false;
}
}
return true;
}
/***********************************************************/
void hw_error(const char *fmt, ...)
{
va_list ap;
CPUState *cpu;
va_start(ap, fmt);
fprintf(stderr, "qemu: hardware error: ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
CPU_FOREACH(cpu) {
fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
}
va_end(ap);
abort();
}
void cpu_synchronize_all_states(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
cpu_synchronize_state(cpu);
}
}
void cpu_synchronize_all_post_reset(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
cpu_synchronize_post_reset(cpu);
}
}
void cpu_synchronize_all_post_init(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
cpu_synchronize_post_init(cpu);
}
}
void cpu_synchronize_all_pre_loadvm(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
cpu_synchronize_pre_loadvm(cpu);
}
}
void cpu_synchronize_state(CPUState *cpu)
{
if (cpus_accel->synchronize_state) {
cpus_accel->synchronize_state(cpu);
}
}
void cpu_synchronize_post_reset(CPUState *cpu)
{
if (cpus_accel->synchronize_post_reset) {
cpus_accel->synchronize_post_reset(cpu);
}
}
void cpu_synchronize_post_init(CPUState *cpu)
{
if (cpus_accel->synchronize_post_init) {
cpus_accel->synchronize_post_init(cpu);
}
}
void cpu_synchronize_pre_loadvm(CPUState *cpu)
{
if (cpus_accel->synchronize_pre_loadvm) {
cpus_accel->synchronize_pre_loadvm(cpu);
}
}
bool cpus_are_resettable(void)
{
if (cpus_accel->cpus_are_resettable) {
return cpus_accel->cpus_are_resettable();
}
return true;
}
int64_t cpus_get_virtual_clock(void)
{
/*
* XXX
*
* need to check that cpus_accel is not NULL, because qcow2 calls
* qemu_get_clock_ns(CLOCK_VIRTUAL) without any accel initialized and
* with ticks disabled in some io-tests:
* 030 040 041 060 099 120 127 140 156 161 172 181 191 192 195 203 229 249 256 267
*
* is this expected?
*
* XXX
*/
if (cpus_accel && cpus_accel->get_virtual_clock) {
return cpus_accel->get_virtual_clock();
}
return cpu_get_clock();
}
/*
* return the time elapsed in VM between vm_start and vm_stop. Unless
* icount is active, cpus_get_elapsed_ticks() uses units of the host CPU cycle
* counter.
*/
int64_t cpus_get_elapsed_ticks(void)
{
if (cpus_accel->get_elapsed_ticks) {
return cpus_accel->get_elapsed_ticks();
}
return cpu_get_ticks();
}
static void generic_handle_interrupt(CPUState *cpu, int mask)
{
cpu->interrupt_request |= mask;
if (!qemu_cpu_is_self(cpu)) {
qemu_cpu_kick(cpu);
}
}
void cpu_interrupt(CPUState *cpu, int mask)
{
if (cpus_accel->handle_interrupt) {
cpus_accel->handle_interrupt(cpu, mask);
} else {
generic_handle_interrupt(cpu, mask);
}
}
static int do_vm_stop(RunState state, bool send_stop)
{
int ret = 0;
if (runstate_is_running()) {
runstate_set(state);
cpu_disable_ticks();
pause_all_vcpus();
vm_state_notify(0, state);
if (send_stop) {
qapi_event_send_stop();
}
}
bdrv_drain_all();
ret = bdrv_flush_all();
trace_vm_stop_flush_all(ret);
return ret;
}
/* Special vm_stop() variant for terminating the process. Historically clients
* did not expect a QMP STOP event and so we need to retain compatibility.
*/
int vm_shutdown(void)
{
return do_vm_stop(RUN_STATE_SHUTDOWN, false);
}
bool cpu_can_run(CPUState *cpu)
{
if (cpu->stop) {
return false;
}
if (cpu_is_stopped(cpu)) {
return false;
}
return true;
}
void cpu_handle_guest_debug(CPUState *cpu)
{
if (replay_running_debug()) {
if (!cpu->singlestep_enabled) {
/*
* Report about the breakpoint and
* make a single step to skip it
*/
replay_breakpoint();
cpu_single_step(cpu, SSTEP_ENABLE);
} else {
cpu_single_step(cpu, 0);
}
} else {
gdb_set_stop_cpu(cpu);
qemu_system_debug_request();
cpu->stopped = true;
}
}
#ifdef CONFIG_LINUX
static void sigbus_reraise(void)
{
sigset_t set;
struct sigaction action;
memset(&action, 0, sizeof(action));
action.sa_handler = SIG_DFL;
if (!sigaction(SIGBUS, &action, NULL)) {
raise(SIGBUS);
sigemptyset(&set);
sigaddset(&set, SIGBUS);
pthread_sigmask(SIG_UNBLOCK, &set, NULL);
}
perror("Failed to re-raise SIGBUS!");
abort();
}
static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
{
if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
sigbus_reraise();
}
if (current_cpu) {
/* Called asynchronously in VCPU thread. */
if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
sigbus_reraise();
}
} else {
/* Called synchronously (via signalfd) in main thread. */
if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
sigbus_reraise();
}
}
}
static void qemu_init_sigbus(void)
{
struct sigaction action;
/*
* ALERT: when modifying this, take care that SIGBUS forwarding in
* qemu_prealloc_mem() will continue working as expected.
*/
memset(&action, 0, sizeof(action));
action.sa_flags = SA_SIGINFO;
action.sa_sigaction = sigbus_handler;
sigaction(SIGBUS, &action, NULL);
prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
}
#else /* !CONFIG_LINUX */
static void qemu_init_sigbus(void)
{
}
#endif /* !CONFIG_LINUX */
static QemuThread io_thread;
/* cpu creation */
static QemuCond qemu_cpu_cond;
/* system init */
static QemuCond qemu_pause_cond;
void qemu_init_cpu_loop(void)
{
qemu_init_sigbus();
qemu_cond_init(&qemu_cpu_cond);
qemu_cond_init(&qemu_pause_cond);
qemu_mutex_init(&qemu_global_mutex);
qemu_thread_get_self(&io_thread);
}
void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
{
do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
}
static void qemu_cpu_stop(CPUState *cpu, bool exit)
{
g_assert(qemu_cpu_is_self(cpu));
cpu->stop = false;
cpu->stopped = true;
if (exit) {
cpu_exit(cpu);
}
qemu_cond_broadcast(&qemu_pause_cond);
}
void qemu_wait_io_event_common(CPUState *cpu)
{
qatomic_set_mb(&cpu->thread_kicked, false);
if (cpu->stop) {
qemu_cpu_stop(cpu, false);
}
process_queued_cpu_work(cpu);
}
void qemu_wait_io_event(CPUState *cpu)
{
bool slept = false;
while (cpu_thread_is_idle(cpu)) {
if (!slept) {
slept = true;
qemu_plugin_vcpu_idle_cb(cpu);
}
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
if (slept) {
qemu_plugin_vcpu_resume_cb(cpu);
}
qemu_wait_io_event_common(cpu);
}
void cpus_kick_thread(CPUState *cpu)
{
if (cpu->thread_kicked) {
return;
}
cpu->thread_kicked = true;
#ifndef _WIN32
int err = pthread_kill(cpu->thread->thread, SIG_IPI);
if (err && err != ESRCH) {
fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
exit(1);
}
#else
qemu_sem_post(&cpu->sem);
#endif
}
void qemu_cpu_kick(CPUState *cpu)
{
qemu_cond_broadcast(cpu->halt_cond);
if (cpus_accel->kick_vcpu_thread) {
cpus_accel->kick_vcpu_thread(cpu);
} else { /* default */
cpus_kick_thread(cpu);
}
}
void qemu_cpu_kick_self(void)
{
assert(current_cpu);
cpus_kick_thread(current_cpu);
}
bool qemu_cpu_is_self(CPUState *cpu)
{
return qemu_thread_is_self(cpu->thread);
}
bool qemu_in_vcpu_thread(void)
{
return current_cpu && qemu_cpu_is_self(current_cpu);
}
QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked)
bool qemu_mutex_iothread_locked(void)
{
return get_iothread_locked();
}
bool qemu_in_main_thread(void)
{
return qemu_mutex_iothread_locked();
}
/*
* The BQL is taken from so many places that it is worth profiling the
* callers directly, instead of funneling them all through a single function.
*/
void qemu_mutex_lock_iothread_impl(const char *file, int line)
{
QemuMutexLockFunc bql_lock = qatomic_read(&qemu_bql_mutex_lock_func);
g_assert(!qemu_mutex_iothread_locked());
bql_lock(&qemu_global_mutex, file, line);
set_iothread_locked(true);
}
void qemu_mutex_unlock_iothread(void)
{
g_assert(qemu_mutex_iothread_locked());
set_iothread_locked(false);
qemu_mutex_unlock(&qemu_global_mutex);
}
void qemu_cond_wait_iothread(QemuCond *cond)
{
qemu_cond_wait(cond, &qemu_global_mutex);
}
void qemu_cond_timedwait_iothread(QemuCond *cond, int ms)
{
qemu_cond_timedwait(cond, &qemu_global_mutex, ms);
}
/* signal CPU creation */
void cpu_thread_signal_created(CPUState *cpu)
{
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
}
/* signal CPU destruction */
void cpu_thread_signal_destroyed(CPUState *cpu)
{
cpu->created = false;
qemu_cond_signal(&qemu_cpu_cond);
}
static bool all_vcpus_paused(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (!cpu->stopped) {
return false;
}
}
return true;
}
void pause_all_vcpus(void)
{
CPUState *cpu;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
CPU_FOREACH(cpu) {
if (qemu_cpu_is_self(cpu)) {
qemu_cpu_stop(cpu, true);
} else {
cpu->stop = true;
qemu_cpu_kick(cpu);
}
}
/* We need to drop the replay_lock so any vCPU threads woken up
* can finish their replay tasks
*/
replay_mutex_unlock();
while (!all_vcpus_paused()) {
qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
CPU_FOREACH(cpu) {
qemu_cpu_kick(cpu);
}
}
qemu_mutex_unlock_iothread();
replay_mutex_lock();
qemu_mutex_lock_iothread();
}
void cpu_resume(CPUState *cpu)
{
cpu->stop = false;
cpu->stopped = false;
qemu_cpu_kick(cpu);
}
void resume_all_vcpus(void)
{
CPUState *cpu;
if (!runstate_is_running()) {
return;
}
qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
CPU_FOREACH(cpu) {
cpu_resume(cpu);
}
}
void cpu_remove_sync(CPUState *cpu)
{
cpu->stop = true;
cpu->unplug = true;
qemu_cpu_kick(cpu);
qemu_mutex_unlock_iothread();
qemu_thread_join(cpu->thread);
qemu_mutex_lock_iothread();
}
void cpus_register_accel(const AccelOpsClass *ops)
{
assert(ops != NULL);
assert(ops->create_vcpu_thread != NULL); /* mandatory */
cpus_accel = ops;
}
const AccelOpsClass *cpus_get_accel(void)
{
/* broken if we call this early */
assert(cpus_accel);
return cpus_accel;
}
void qemu_init_vcpu(CPUState *cpu)
{
MachineState *ms = MACHINE(qdev_get_machine());
cpu->nr_cores = ms->smp.cores;
cpu->nr_threads = ms->smp.threads;
cpu->stopped = true;
cpu->random_seed = qemu_guest_random_seed_thread_part1();
if (!cpu->as) {
/* If the target cpu hasn't set up any address spaces itself,
* give it the default one.
*/
cpu->num_ases = 1;
cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
}
/* accelerators all implement the AccelOpsClass */
g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL);
cpus_accel->create_vcpu_thread(cpu);
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
}
void cpu_stop_current(void)
{
if (current_cpu) {
current_cpu->stop = true;
cpu_exit(current_cpu);
}
}
int vm_stop(RunState state)
{
if (qemu_in_vcpu_thread()) {
qemu_system_vmstop_request_prepare();
qemu_system_vmstop_request(state);
/*
* FIXME: should not return to device code in case
* vm_stop() has been requested.
*/
cpu_stop_current();
return 0;
}
return do_vm_stop(state, true);
}
/**
* Prepare for (re)starting the VM.
* Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
* running or in case of an error condition), 0 otherwise.
*/
int vm_prepare_start(bool step_pending)
{
RunState requested;
qemu_vmstop_requested(&requested);
if (runstate_is_running() && requested == RUN_STATE__MAX) {
return -1;
}
/* Ensure that a STOP/RESUME pair of events is emitted if a
* vmstop request was pending. The BLOCK_IO_ERROR event, for
* example, according to documentation is always followed by
* the STOP event.
*/
if (runstate_is_running()) {
qapi_event_send_stop();
qapi_event_send_resume();
return -1;
}
/*
* WHPX accelerator needs to know whether we are going to step
* any CPUs, before starting the first one.
*/
if (cpus_accel->synchronize_pre_resume) {
cpus_accel->synchronize_pre_resume(step_pending);
}
/* We are sending this now, but the CPUs will be resumed shortly later */
qapi_event_send_resume();
cpu_enable_ticks();
runstate_set(RUN_STATE_RUNNING);
vm_state_notify(1, RUN_STATE_RUNNING);
return 0;
}
void vm_start(void)
{
if (!vm_prepare_start(false)) {
resume_all_vcpus();
}
}
/* does a state transition even if the VM is already stopped,
current state is forgotten forever */
int vm_stop_force_state(RunState state)
{
if (runstate_is_running()) {
return vm_stop(state);
} else {
int ret;
runstate_set(state);
bdrv_drain_all();
/* Make sure to return an error if the flush in a previous vm_stop()
* failed. */
ret = bdrv_flush_all();
trace_vm_stop_flush_all(ret);
return ret;
}
}
void qmp_memsave(int64_t addr, int64_t size, const char *filename,
bool has_cpu, int64_t cpu_index, Error **errp)
{
FILE *f;
uint32_t l;
CPUState *cpu;
uint8_t buf[1024];
int64_t orig_addr = addr, orig_size = size;
if (!has_cpu) {
cpu_index = 0;
}
cpu = qemu_get_cpu(cpu_index);
if (cpu == NULL) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
"a CPU number");
return;
}
f = fopen(filename, "wb");
if (!f) {
error_setg_file_open(errp, errno, filename);
return;
}
while (size != 0) {
l = sizeof(buf);
if (l > size)
l = size;
if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
" specified", orig_addr, orig_size);
goto exit;
}
if (fwrite(buf, 1, l, f) != l) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
addr += l;
size -= l;
}
exit:
fclose(f);
}
void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
Error **errp)
{
FILE *f;
uint32_t l;
uint8_t buf[1024];
f = fopen(filename, "wb");
if (!f) {
error_setg_file_open(errp, errno, filename);
return;
}
while (size != 0) {
l = sizeof(buf);
if (l > size)
l = size;
cpu_physical_memory_read(addr, buf, l);
if (fwrite(buf, 1, l, f) != l) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
addr += l;
size -= l;
}
exit:
fclose(f);
}
void qmp_inject_nmi(Error **errp)
{
nmi_monitor_handle(monitor_get_cpu_index(monitor_cur()), errp);
}

110
system/datadir.c Normal file
View file

@ -0,0 +1,110 @@
/*
* QEMU firmware and keymap file search
*
* Copyright (c) 2003-2020 QEMU contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/datadir.h"
#include "qemu/cutils.h"
#include "trace.h"
static const char *data_dir[16];
static int data_dir_idx;
char *qemu_find_file(int type, const char *name)
{
int i;
const char *subdir;
char *buf;
/* Try the name as a straight path first */
if (access(name, R_OK) == 0) {
trace_load_file(name, name);
return g_strdup(name);
}
switch (type) {
case QEMU_FILE_TYPE_BIOS:
subdir = "";
break;
case QEMU_FILE_TYPE_KEYMAP:
subdir = "keymaps/";
break;
default:
abort();
}
for (i = 0; i < data_dir_idx; i++) {
buf = g_strdup_printf("%s/%s%s", data_dir[i], subdir, name);
if (access(buf, R_OK) == 0) {
trace_load_file(name, buf);
return buf;
}
g_free(buf);
}
return NULL;
}
void qemu_add_data_dir(char *path)
{
int i;
if (path == NULL) {
return;
}
if (data_dir_idx == ARRAY_SIZE(data_dir)) {
return;
}
for (i = 0; i < data_dir_idx; i++) {
if (strcmp(data_dir[i], path) == 0) {
g_free(path); /* duplicate */
return;
}
}
data_dir[data_dir_idx++] = path;
}
void qemu_add_default_firmwarepath(void)
{
static const char * const dirs[] = {
CONFIG_QEMU_FIRMWAREPATH
NULL
};
size_t i;
/* add configured firmware directories */
for (i = 0; dirs[i] != NULL; i++) {
qemu_add_data_dir(get_relocated_path(dirs[i]));
}
/* try to find datadir relative to the executable path */
qemu_add_data_dir(get_relocated_path(CONFIG_QEMU_DATADIR));
}
void qemu_list_data_dirs(void)
{
int i;
for (i = 0; i < data_dir_idx; i++) {
printf("%s\n", data_dir[i]);
}
}

703
system/device_tree.c Normal file
View file

@ -0,0 +1,703 @@
/*
* Functions to help device tree manipulation using libfdt.
* It also provides functions to read entries from device tree proc
* interface.
*
* Copyright 2008 IBM Corporation.
* Authors: Jerone Young <jyoung5@us.ibm.com>
* Hollis Blanchard <hollisb@us.ibm.com>
*
* This work is licensed under the GNU GPL license version 2 or later.
*
*/
#include "qemu/osdep.h"
#ifdef CONFIG_LINUX
#include <dirent.h>
#endif
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/option.h"
#include "qemu/bswap.h"
#include "qemu/cutils.h"
#include "qemu/guest-random.h"
#include "sysemu/device_tree.h"
#include "hw/loader.h"
#include "hw/boards.h"
#include "qemu/config-file.h"
#include "qapi/qapi-commands-machine.h"
#include "qapi/qmp/qdict.h"
#include "monitor/hmp.h"
#include <libfdt.h>
#define FDT_MAX_SIZE 0x100000
void *create_device_tree(int *sizep)
{
void *fdt;
int ret;
*sizep = FDT_MAX_SIZE;
fdt = g_malloc0(FDT_MAX_SIZE);
ret = fdt_create(fdt, FDT_MAX_SIZE);
if (ret < 0) {
goto fail;
}
ret = fdt_finish_reservemap(fdt);
if (ret < 0) {
goto fail;
}
ret = fdt_begin_node(fdt, "");
if (ret < 0) {
goto fail;
}
ret = fdt_end_node(fdt);
if (ret < 0) {
goto fail;
}
ret = fdt_finish(fdt);
if (ret < 0) {
goto fail;
}
ret = fdt_open_into(fdt, fdt, *sizep);
if (ret) {
error_report("%s: Unable to copy device tree into memory: %s",
__func__, fdt_strerror(ret));
exit(1);
}
return fdt;
fail:
error_report("%s Couldn't create dt: %s", __func__, fdt_strerror(ret));
exit(1);
}
void *load_device_tree(const char *filename_path, int *sizep)
{
int dt_size;
int dt_file_load_size;
int ret;
void *fdt = NULL;
*sizep = 0;
dt_size = get_image_size(filename_path);
if (dt_size < 0) {
error_report("Unable to get size of device tree file '%s'",
filename_path);
goto fail;
}
if (dt_size > INT_MAX / 2 - 10000) {
error_report("Device tree file '%s' is too large", filename_path);
goto fail;
}
/* Expand to 2x size to give enough room for manipulation. */
dt_size += 10000;
dt_size *= 2;
/* First allocate space in qemu for device tree */
fdt = g_malloc0(dt_size);
dt_file_load_size = load_image_size(filename_path, fdt, dt_size);
if (dt_file_load_size < 0) {
error_report("Unable to open device tree file '%s'",
filename_path);
goto fail;
}
ret = fdt_open_into(fdt, fdt, dt_size);
if (ret) {
error_report("%s: Unable to copy device tree into memory: %s",
__func__, fdt_strerror(ret));
goto fail;
}
/* Check sanity of device tree */
if (fdt_check_header(fdt)) {
error_report("Device tree file loaded into memory is invalid: %s",
filename_path);
goto fail;
}
*sizep = dt_size;
return fdt;
fail:
g_free(fdt);
return NULL;
}
#ifdef CONFIG_LINUX
#define SYSFS_DT_BASEDIR "/proc/device-tree"
/**
* read_fstree: this function is inspired from dtc read_fstree
* @fdt: preallocated fdt blob buffer, to be populated
* @dirname: directory to scan under SYSFS_DT_BASEDIR
* the search is recursive and the tree is searched down to the
* leaves (property files).
*
* the function asserts in case of error
*/
static void read_fstree(void *fdt, const char *dirname)
{
DIR *d;
struct dirent *de;
struct stat st;
const char *root_dir = SYSFS_DT_BASEDIR;
const char *parent_node;
if (strstr(dirname, root_dir) != dirname) {
error_report("%s: %s must be searched within %s",
__func__, dirname, root_dir);
exit(1);
}
parent_node = &dirname[strlen(SYSFS_DT_BASEDIR)];
d = opendir(dirname);
if (!d) {
error_report("%s cannot open %s", __func__, dirname);
exit(1);
}
while ((de = readdir(d)) != NULL) {
char *tmpnam;
if (!g_strcmp0(de->d_name, ".")
|| !g_strcmp0(de->d_name, "..")) {
continue;
}
tmpnam = g_strdup_printf("%s/%s", dirname, de->d_name);
if (lstat(tmpnam, &st) < 0) {
error_report("%s cannot lstat %s", __func__, tmpnam);
exit(1);
}
if (S_ISREG(st.st_mode)) {
gchar *val;
gsize len;
if (!g_file_get_contents(tmpnam, &val, &len, NULL)) {
error_report("%s not able to extract info from %s",
__func__, tmpnam);
exit(1);
}
if (strlen(parent_node) > 0) {
qemu_fdt_setprop(fdt, parent_node,
de->d_name, val, len);
} else {
qemu_fdt_setprop(fdt, "/", de->d_name, val, len);
}
g_free(val);
} else if (S_ISDIR(st.st_mode)) {
char *node_name;
node_name = g_strdup_printf("%s/%s",
parent_node, de->d_name);
qemu_fdt_add_subnode(fdt, node_name);
g_free(node_name);
read_fstree(fdt, tmpnam);
}
g_free(tmpnam);
}
closedir(d);
}
/* load_device_tree_from_sysfs: extract the dt blob from host sysfs */
void *load_device_tree_from_sysfs(void)
{
void *host_fdt;
int host_fdt_size;
host_fdt = create_device_tree(&host_fdt_size);
read_fstree(host_fdt, SYSFS_DT_BASEDIR);
if (fdt_check_header(host_fdt)) {
error_report("%s host device tree extracted into memory is invalid",
__func__);
exit(1);
}
return host_fdt;
}
#endif /* CONFIG_LINUX */
static int findnode_nofail(void *fdt, const char *node_path)
{
int offset;
offset = fdt_path_offset(fdt, node_path);
if (offset < 0) {
error_report("%s Couldn't find node %s: %s", __func__, node_path,
fdt_strerror(offset));
exit(1);
}
return offset;
}
char **qemu_fdt_node_unit_path(void *fdt, const char *name, Error **errp)
{
char *prefix = g_strdup_printf("%s@", name);
unsigned int path_len = 16, n = 0;
GSList *path_list = NULL, *iter;
const char *iter_name;
int offset, len, ret;
char **path_array;
offset = fdt_next_node(fdt, -1, NULL);
while (offset >= 0) {
iter_name = fdt_get_name(fdt, offset, &len);
if (!iter_name) {
offset = len;
break;
}
if (!strcmp(iter_name, name) || g_str_has_prefix(iter_name, prefix)) {
char *path;
path = g_malloc(path_len);
while ((ret = fdt_get_path(fdt, offset, path, path_len))
== -FDT_ERR_NOSPACE) {
path_len += 16;
path = g_realloc(path, path_len);
}
path_list = g_slist_prepend(path_list, path);
n++;
}
offset = fdt_next_node(fdt, offset, NULL);
}
g_free(prefix);
if (offset < 0 && offset != -FDT_ERR_NOTFOUND) {
error_setg(errp, "%s: abort parsing dt for %s node units: %s",
__func__, name, fdt_strerror(offset));
for (iter = path_list; iter; iter = iter->next) {
g_free(iter->data);
}
g_slist_free(path_list);
return NULL;
}
path_array = g_new(char *, n + 1);
path_array[n--] = NULL;
for (iter = path_list; iter; iter = iter->next) {
path_array[n--] = iter->data;
}
g_slist_free(path_list);
return path_array;
}
char **qemu_fdt_node_path(void *fdt, const char *name, const char *compat,
Error **errp)
{
int offset, len, ret;
const char *iter_name;
unsigned int path_len = 16, n = 0;
GSList *path_list = NULL, *iter;
char **path_array;
offset = fdt_node_offset_by_compatible(fdt, -1, compat);
while (offset >= 0) {
iter_name = fdt_get_name(fdt, offset, &len);
if (!iter_name) {
offset = len;
break;
}
if (!name || !strcmp(iter_name, name)) {
char *path;
path = g_malloc(path_len);
while ((ret = fdt_get_path(fdt, offset, path, path_len))
== -FDT_ERR_NOSPACE) {
path_len += 16;
path = g_realloc(path, path_len);
}
path_list = g_slist_prepend(path_list, path);
n++;
}
offset = fdt_node_offset_by_compatible(fdt, offset, compat);
}
if (offset < 0 && offset != -FDT_ERR_NOTFOUND) {
error_setg(errp, "%s: abort parsing dt for %s/%s: %s",
__func__, name, compat, fdt_strerror(offset));
for (iter = path_list; iter; iter = iter->next) {
g_free(iter->data);
}
g_slist_free(path_list);
return NULL;
}
path_array = g_new(char *, n + 1);
path_array[n--] = NULL;
for (iter = path_list; iter; iter = iter->next) {
path_array[n--] = iter->data;
}
g_slist_free(path_list);
return path_array;
}
int qemu_fdt_setprop(void *fdt, const char *node_path,
const char *property, const void *val, int size)
{
int r;
r = fdt_setprop(fdt, findnode_nofail(fdt, node_path), property, val, size);
if (r < 0) {
error_report("%s: Couldn't set %s/%s: %s", __func__, node_path,
property, fdt_strerror(r));
exit(1);
}
return r;
}
int qemu_fdt_setprop_cell(void *fdt, const char *node_path,
const char *property, uint32_t val)
{
int r;
r = fdt_setprop_cell(fdt, findnode_nofail(fdt, node_path), property, val);
if (r < 0) {
error_report("%s: Couldn't set %s/%s = %#08x: %s", __func__,
node_path, property, val, fdt_strerror(r));
exit(1);
}
return r;
}
int qemu_fdt_setprop_u64(void *fdt, const char *node_path,
const char *property, uint64_t val)
{
val = cpu_to_be64(val);
return qemu_fdt_setprop(fdt, node_path, property, &val, sizeof(val));
}
int qemu_fdt_setprop_string(void *fdt, const char *node_path,
const char *property, const char *string)
{
int r;
r = fdt_setprop_string(fdt, findnode_nofail(fdt, node_path), property, string);
if (r < 0) {
error_report("%s: Couldn't set %s/%s = %s: %s", __func__,
node_path, property, string, fdt_strerror(r));
exit(1);
}
return r;
}
/*
* libfdt doesn't allow us to add string arrays directly but they are
* test a series of null terminated strings with a length. We build
* the string up here so we can calculate the final length.
*/
int qemu_fdt_setprop_string_array(void *fdt, const char *node_path,
const char *prop, char **array, int len)
{
int ret, i, total_len = 0;
char *str, *p;
for (i = 0; i < len; i++) {
total_len += strlen(array[i]) + 1;
}
p = str = g_malloc0(total_len);
for (i = 0; i < len; i++) {
int offset = strlen(array[i]) + 1;
pstrcpy(p, offset, array[i]);
p += offset;
}
ret = qemu_fdt_setprop(fdt, node_path, prop, str, total_len);
g_free(str);
return ret;
}
const void *qemu_fdt_getprop(void *fdt, const char *node_path,
const char *property, int *lenp, Error **errp)
{
int len;
const void *r;
if (!lenp) {
lenp = &len;
}
r = fdt_getprop(fdt, findnode_nofail(fdt, node_path), property, lenp);
if (!r) {
error_setg(errp, "%s: Couldn't get %s/%s: %s", __func__,
node_path, property, fdt_strerror(*lenp));
}
return r;
}
uint32_t qemu_fdt_getprop_cell(void *fdt, const char *node_path,
const char *property, int *lenp, Error **errp)
{
int len;
const uint32_t *p;
if (!lenp) {
lenp = &len;
}
p = qemu_fdt_getprop(fdt, node_path, property, lenp, errp);
if (!p) {
return 0;
} else if (*lenp != 4) {
error_setg(errp, "%s: %s/%s not 4 bytes long (not a cell?)",
__func__, node_path, property);
*lenp = -EINVAL;
return 0;
}
return be32_to_cpu(*p);
}
uint32_t qemu_fdt_get_phandle(void *fdt, const char *path)
{
uint32_t r;
r = fdt_get_phandle(fdt, findnode_nofail(fdt, path));
if (r == 0) {
error_report("%s: Couldn't get phandle for %s: %s", __func__,
path, fdt_strerror(r));
exit(1);
}
return r;
}
int qemu_fdt_setprop_phandle(void *fdt, const char *node_path,
const char *property,
const char *target_node_path)
{
uint32_t phandle = qemu_fdt_get_phandle(fdt, target_node_path);
return qemu_fdt_setprop_cell(fdt, node_path, property, phandle);
}
uint32_t qemu_fdt_alloc_phandle(void *fdt)
{
static int phandle = 0x0;
/*
* We need to find out if the user gave us special instruction at
* which phandle id to start allocating phandles.
*/
if (!phandle) {
phandle = machine_phandle_start(current_machine);
}
if (!phandle) {
/*
* None or invalid phandle given on the command line, so fall back to
* default starting point.
*/
phandle = 0x8000;
}
return phandle++;
}
int qemu_fdt_nop_node(void *fdt, const char *node_path)
{
int r;
r = fdt_nop_node(fdt, findnode_nofail(fdt, node_path));
if (r < 0) {
error_report("%s: Couldn't nop node %s: %s", __func__, node_path,
fdt_strerror(r));
exit(1);
}
return r;
}
int qemu_fdt_add_subnode(void *fdt, const char *name)
{
char *dupname = g_strdup(name);
char *basename = strrchr(dupname, '/');
int retval;
int parent = 0;
if (!basename) {
g_free(dupname);
return -1;
}
basename[0] = '\0';
basename++;
if (dupname[0]) {
parent = findnode_nofail(fdt, dupname);
}
retval = fdt_add_subnode(fdt, parent, basename);
if (retval < 0) {
error_report("%s: Failed to create subnode %s: %s",
__func__, name, fdt_strerror(retval));
exit(1);
}
g_free(dupname);
return retval;
}
/*
* qemu_fdt_add_path: Like qemu_fdt_add_subnode(), but will add
* all missing subnodes from the given path.
*/
int qemu_fdt_add_path(void *fdt, const char *path)
{
const char *name;
int namelen, retval;
int parent = 0;
if (path[0] != '/') {
return -1;
}
do {
name = path + 1;
path = strchr(name, '/');
namelen = path != NULL ? path - name : strlen(name);
retval = fdt_subnode_offset_namelen(fdt, parent, name, namelen);
if (retval < 0 && retval != -FDT_ERR_NOTFOUND) {
error_report("%s: Unexpected error in finding subnode %.*s: %s",
__func__, namelen, name, fdt_strerror(retval));
exit(1);
} else if (retval == -FDT_ERR_NOTFOUND) {
retval = fdt_add_subnode_namelen(fdt, parent, name, namelen);
if (retval < 0) {
error_report("%s: Failed to create subnode %.*s: %s",
__func__, namelen, name, fdt_strerror(retval));
exit(1);
}
}
parent = retval;
} while (path);
return retval;
}
void qemu_fdt_dumpdtb(void *fdt, int size)
{
const char *dumpdtb = current_machine->dumpdtb;
if (dumpdtb) {
/* Dump the dtb to a file and quit */
if (g_file_set_contents(dumpdtb, fdt, size, NULL)) {
info_report("dtb dumped to %s. Exiting.", dumpdtb);
exit(0);
}
error_report("%s: Failed dumping dtb to %s", __func__, dumpdtb);
exit(1);
}
}
int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
const char *node_path,
const char *property,
int numvalues,
uint64_t *values)
{
uint32_t *propcells;
uint64_t value;
int cellnum, vnum, ncells;
uint32_t hival;
int ret;
propcells = g_new0(uint32_t, numvalues * 2);
cellnum = 0;
for (vnum = 0; vnum < numvalues; vnum++) {
ncells = values[vnum * 2];
if (ncells != 1 && ncells != 2) {
ret = -1;
goto out;
}
value = values[vnum * 2 + 1];
hival = cpu_to_be32(value >> 32);
if (ncells > 1) {
propcells[cellnum++] = hival;
} else if (hival != 0) {
ret = -1;
goto out;
}
propcells[cellnum++] = cpu_to_be32(value);
}
ret = qemu_fdt_setprop(fdt, node_path, property, propcells,
cellnum * sizeof(uint32_t));
out:
g_free(propcells);
return ret;
}
void qmp_dumpdtb(const char *filename, Error **errp)
{
g_autoptr(GError) err = NULL;
uint32_t size;
if (!current_machine->fdt) {
error_setg(errp, "This machine doesn't have a FDT");
return;
}
size = fdt_totalsize(current_machine->fdt);
g_assert(size > 0);
if (!g_file_set_contents(filename, current_machine->fdt, size, &err)) {
error_setg(errp, "Error saving FDT to file %s: %s",
filename, err->message);
}
}
void hmp_dumpdtb(Monitor *mon, const QDict *qdict)
{
const char *filename = qdict_get_str(qdict, "filename");
Error *local_err = NULL;
qmp_dumpdtb(filename, &local_err);
if (hmp_handle_error(mon, local_err)) {
return;
}
info_report("dtb dumped to %s", filename);
}
void qemu_fdt_randomize_seeds(void *fdt)
{
int noffset, poffset, len;
const char *name;
uint8_t *data;
for (noffset = fdt_next_node(fdt, 0, NULL);
noffset >= 0;
noffset = fdt_next_node(fdt, noffset, NULL)) {
for (poffset = fdt_first_property_offset(fdt, noffset);
poffset >= 0;
poffset = fdt_next_property_offset(fdt, poffset)) {
data = (uint8_t *)fdt_getprop_by_offset(fdt, poffset, &name, &len);
if (!data || strcmp(name, "rng-seed"))
continue;
qemu_guest_getrandom_nofail(data, len);
}
}
}

678
system/dirtylimit.c Normal file
View file

@ -0,0 +1,678 @@
/*
* Dirty page rate limit implementation code
*
* Copyright (c) 2022 CHINA TELECOM CO.,LTD.
*
* Authors:
* Hyman Huang() <huangy81@chinatelecom.cn>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "qapi/qapi-commands-migration.h"
#include "qapi/qmp/qdict.h"
#include "qapi/error.h"
#include "sysemu/dirtyrate.h"
#include "sysemu/dirtylimit.h"
#include "monitor/hmp.h"
#include "monitor/monitor.h"
#include "exec/memory.h"
#include "exec/target_page.h"
#include "hw/boards.h"
#include "sysemu/kvm.h"
#include "trace.h"
#include "migration/misc.h"
#include "migration/migration.h"
#include "migration/options.h"
/*
* Dirtylimit stop working if dirty page rate error
* value less than DIRTYLIMIT_TOLERANCE_RANGE
*/
#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
/*
* Plus or minus vcpu sleep time linearly if dirty
* page rate error value percentage over
* DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
* Otherwise, plus or minus a fixed vcpu sleep time.
*/
#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50
/*
* Max vcpu sleep time percentage during a cycle
* composed of dirty ring full and sleep time.
*/
#define DIRTYLIMIT_THROTTLE_PCT_MAX 99
struct {
VcpuStat stat;
bool running;
QemuThread thread;
} *vcpu_dirty_rate_stat;
typedef struct VcpuDirtyLimitState {
int cpu_index;
bool enabled;
/*
* Quota dirty page rate, unit is MB/s
* zero if not enabled.
*/
uint64_t quota;
} VcpuDirtyLimitState;
struct {
VcpuDirtyLimitState *states;
/* Max cpus number configured by user */
int max_cpus;
/* Number of vcpu under dirtylimit */
int limited_nvcpu;
} *dirtylimit_state;
/* protect dirtylimit_state */
static QemuMutex dirtylimit_mutex;
/* dirtylimit thread quit if dirtylimit_quit is true */
static bool dirtylimit_quit;
static void vcpu_dirty_rate_stat_collect(void)
{
MigrationState *s = migrate_get_current();
VcpuStat stat;
int i = 0;
int64_t period = DIRTYLIMIT_CALC_TIME_MS;
if (migrate_dirty_limit() &&
migration_is_active(s)) {
period = s->parameters.x_vcpu_dirty_limit_period;
}
/* calculate vcpu dirtyrate */
vcpu_calculate_dirtyrate(period,
&stat,
GLOBAL_DIRTY_LIMIT,
false);
for (i = 0; i < stat.nvcpu; i++) {
vcpu_dirty_rate_stat->stat.rates[i].id = i;
vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
stat.rates[i].dirty_rate;
}
g_free(stat.rates);
}
static void *vcpu_dirty_rate_stat_thread(void *opaque)
{
rcu_register_thread();
/* start log sync */
global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
vcpu_dirty_rate_stat_collect();
if (dirtylimit_in_service()) {
dirtylimit_process();
}
}
/* stop log sync */
global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
rcu_unregister_thread();
return NULL;
}
int64_t vcpu_dirty_rate_get(int cpu_index)
{
DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
return qatomic_read_i64(&rates[cpu_index].dirty_rate);
}
void vcpu_dirty_rate_stat_start(void)
{
if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
return;
}
qatomic_set(&vcpu_dirty_rate_stat->running, 1);
qemu_thread_create(&vcpu_dirty_rate_stat->thread,
"dirtyrate-stat",
vcpu_dirty_rate_stat_thread,
NULL,
QEMU_THREAD_JOINABLE);
}
void vcpu_dirty_rate_stat_stop(void)
{
qatomic_set(&vcpu_dirty_rate_stat->running, 0);
dirtylimit_state_unlock();
qemu_mutex_unlock_iothread();
qemu_thread_join(&vcpu_dirty_rate_stat->thread);
qemu_mutex_lock_iothread();
dirtylimit_state_lock();
}
void vcpu_dirty_rate_stat_initialize(void)
{
MachineState *ms = MACHINE(qdev_get_machine());
int max_cpus = ms->smp.max_cpus;
vcpu_dirty_rate_stat =
g_malloc0(sizeof(*vcpu_dirty_rate_stat));
vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
vcpu_dirty_rate_stat->stat.rates =
g_new0(DirtyRateVcpu, max_cpus);
vcpu_dirty_rate_stat->running = false;
}
void vcpu_dirty_rate_stat_finalize(void)
{
g_free(vcpu_dirty_rate_stat->stat.rates);
vcpu_dirty_rate_stat->stat.rates = NULL;
g_free(vcpu_dirty_rate_stat);
vcpu_dirty_rate_stat = NULL;
}
void dirtylimit_state_lock(void)
{
qemu_mutex_lock(&dirtylimit_mutex);
}
void dirtylimit_state_unlock(void)
{
qemu_mutex_unlock(&dirtylimit_mutex);
}
static void
__attribute__((__constructor__)) dirtylimit_mutex_init(void)
{
qemu_mutex_init(&dirtylimit_mutex);
}
static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
{
return &dirtylimit_state->states[cpu_index];
}
void dirtylimit_state_initialize(void)
{
MachineState *ms = MACHINE(qdev_get_machine());
int max_cpus = ms->smp.max_cpus;
int i;
dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
dirtylimit_state->states =
g_new0(VcpuDirtyLimitState, max_cpus);
for (i = 0; i < max_cpus; i++) {
dirtylimit_state->states[i].cpu_index = i;
}
dirtylimit_state->max_cpus = max_cpus;
trace_dirtylimit_state_initialize(max_cpus);
}
void dirtylimit_state_finalize(void)
{
g_free(dirtylimit_state->states);
dirtylimit_state->states = NULL;
g_free(dirtylimit_state);
dirtylimit_state = NULL;
trace_dirtylimit_state_finalize();
}
bool dirtylimit_in_service(void)
{
return !!dirtylimit_state;
}
bool dirtylimit_vcpu_index_valid(int cpu_index)
{
MachineState *ms = MACHINE(qdev_get_machine());
return !(cpu_index < 0 ||
cpu_index >= ms->smp.max_cpus);
}
static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
{
static uint64_t max_dirtyrate;
uint64_t dirty_ring_size_MiB;
dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size());
if (max_dirtyrate < dirtyrate) {
max_dirtyrate = dirtyrate;
}
return dirty_ring_size_MiB * 1000000 / max_dirtyrate;
}
static inline bool dirtylimit_done(uint64_t quota,
uint64_t current)
{
uint64_t min, max;
min = MIN(quota, current);
max = MAX(quota, current);
return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
}
static inline bool
dirtylimit_need_linear_adjustment(uint64_t quota,
uint64_t current)
{
uint64_t min, max;
min = MIN(quota, current);
max = MAX(quota, current);
return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
}
static void dirtylimit_set_throttle(CPUState *cpu,
uint64_t quota,
uint64_t current)
{
int64_t ring_full_time_us = 0;
uint64_t sleep_pct = 0;
uint64_t throttle_us = 0;
if (current == 0) {
cpu->throttle_us_per_full = 0;
return;
}
ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
if (dirtylimit_need_linear_adjustment(quota, current)) {
if (quota < current) {
sleep_pct = (current - quota) * 100 / current;
throttle_us =
ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
cpu->throttle_us_per_full += throttle_us;
} else {
sleep_pct = (quota - current) * 100 / quota;
throttle_us =
ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
cpu->throttle_us_per_full -= throttle_us;
}
trace_dirtylimit_throttle_pct(cpu->cpu_index,
sleep_pct,
throttle_us);
} else {
if (quota < current) {
cpu->throttle_us_per_full += ring_full_time_us / 10;
} else {
cpu->throttle_us_per_full -= ring_full_time_us / 10;
}
}
/*
* TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
* current dirty page rate may never reach the quota, we should stop
* increasing sleep time?
*/
cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
}
static void dirtylimit_adjust_throttle(CPUState *cpu)
{
uint64_t quota = 0;
uint64_t current = 0;
int cpu_index = cpu->cpu_index;
quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
current = vcpu_dirty_rate_get(cpu_index);
if (!dirtylimit_done(quota, current)) {
dirtylimit_set_throttle(cpu, quota, current);
}
return;
}
void dirtylimit_process(void)
{
CPUState *cpu;
if (!qatomic_read(&dirtylimit_quit)) {
dirtylimit_state_lock();
if (!dirtylimit_in_service()) {
dirtylimit_state_unlock();
return;
}
CPU_FOREACH(cpu) {
if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
continue;
}
dirtylimit_adjust_throttle(cpu);
}
dirtylimit_state_unlock();
}
}
void dirtylimit_change(bool start)
{
if (start) {
qatomic_set(&dirtylimit_quit, 0);
} else {
qatomic_set(&dirtylimit_quit, 1);
}
}
void dirtylimit_set_vcpu(int cpu_index,
uint64_t quota,
bool enable)
{
trace_dirtylimit_set_vcpu(cpu_index, quota);
if (enable) {
dirtylimit_state->states[cpu_index].quota = quota;
if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
dirtylimit_state->limited_nvcpu++;
}
} else {
dirtylimit_state->states[cpu_index].quota = 0;
if (dirtylimit_state->states[cpu_index].enabled) {
dirtylimit_state->limited_nvcpu--;
}
}
dirtylimit_state->states[cpu_index].enabled = enable;
}
void dirtylimit_set_all(uint64_t quota,
bool enable)
{
MachineState *ms = MACHINE(qdev_get_machine());
int max_cpus = ms->smp.max_cpus;
int i;
for (i = 0; i < max_cpus; i++) {
dirtylimit_set_vcpu(i, quota, enable);
}
}
void dirtylimit_vcpu_execute(CPUState *cpu)
{
if (dirtylimit_in_service() &&
dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
cpu->throttle_us_per_full) {
trace_dirtylimit_vcpu_execute(cpu->cpu_index,
cpu->throttle_us_per_full);
usleep(cpu->throttle_us_per_full);
}
}
static void dirtylimit_init(void)
{
dirtylimit_state_initialize();
dirtylimit_change(true);
vcpu_dirty_rate_stat_initialize();
vcpu_dirty_rate_stat_start();
}
static void dirtylimit_cleanup(void)
{
vcpu_dirty_rate_stat_stop();
vcpu_dirty_rate_stat_finalize();
dirtylimit_change(false);
dirtylimit_state_finalize();
}
/*
* dirty page rate limit is not allowed to set if migration
* is running with dirty-limit capability enabled.
*/
static bool dirtylimit_is_allowed(void)
{
MigrationState *ms = migrate_get_current();
if (migration_is_running(ms->state) &&
(!qemu_thread_is_self(&ms->thread)) &&
migrate_dirty_limit() &&
dirtylimit_in_service()) {
return false;
}
return true;
}
void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
int64_t cpu_index,
Error **errp)
{
if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
return;
}
if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
error_setg(errp, "incorrect cpu index specified");
return;
}
if (!dirtylimit_is_allowed()) {
error_setg(errp, "can't cancel dirty page rate limit while"
" migration is running");
return;
}
if (!dirtylimit_in_service()) {
return;
}
dirtylimit_state_lock();
if (has_cpu_index) {
dirtylimit_set_vcpu(cpu_index, 0, false);
} else {
dirtylimit_set_all(0, false);
}
if (!dirtylimit_state->limited_nvcpu) {
dirtylimit_cleanup();
}
dirtylimit_state_unlock();
}
void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
{
int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
Error *err = NULL;
qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
if (err) {
hmp_handle_error(mon, err);
return;
}
monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
"dirty limit for virtual CPU]\n");
}
void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
int64_t cpu_index,
uint64_t dirty_rate,
Error **errp)
{
if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
error_setg(errp, "dirty page limit feature requires KVM with"
" accelerator property 'dirty-ring-size' set'");
return;
}
if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
error_setg(errp, "incorrect cpu index specified");
return;
}
if (!dirtylimit_is_allowed()) {
error_setg(errp, "can't set dirty page rate limit while"
" migration is running");
return;
}
if (!dirty_rate) {
qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
return;
}
dirtylimit_state_lock();
if (!dirtylimit_in_service()) {
dirtylimit_init();
}
if (has_cpu_index) {
dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
} else {
dirtylimit_set_all(dirty_rate, true);
}
dirtylimit_state_unlock();
}
void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
{
int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
Error *err = NULL;
if (dirty_rate < 0) {
error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate);
goto out;
}
qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
out:
hmp_handle_error(mon, err);
}
/* Return the max throttle time of each virtual CPU */
uint64_t dirtylimit_throttle_time_per_round(void)
{
CPUState *cpu;
int64_t max = 0;
CPU_FOREACH(cpu) {
if (cpu->throttle_us_per_full > max) {
max = cpu->throttle_us_per_full;
}
}
return max;
}
/*
* Estimate average dirty ring full time of each virtaul CPU.
* Return 0 if guest doesn't dirty memory.
*/
uint64_t dirtylimit_ring_full_time(void)
{
CPUState *cpu;
uint64_t curr_rate = 0;
int nvcpus = 0;
CPU_FOREACH(cpu) {
if (cpu->running) {
nvcpus++;
curr_rate += vcpu_dirty_rate_get(cpu->cpu_index);
}
}
if (!curr_rate || !nvcpus) {
return 0;
}
return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus);
}
static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
{
DirtyLimitInfo *info = NULL;
info = g_malloc0(sizeof(*info));
info->cpu_index = cpu_index;
info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
info->current_rate = vcpu_dirty_rate_get(cpu_index);
return info;
}
static struct DirtyLimitInfoList *dirtylimit_query_all(void)
{
int i, index;
DirtyLimitInfo *info = NULL;
DirtyLimitInfoList *head = NULL, **tail = &head;
dirtylimit_state_lock();
if (!dirtylimit_in_service()) {
dirtylimit_state_unlock();
return NULL;
}
for (i = 0; i < dirtylimit_state->max_cpus; i++) {
index = dirtylimit_state->states[i].cpu_index;
if (dirtylimit_vcpu_get_state(index)->enabled) {
info = dirtylimit_query_vcpu(index);
QAPI_LIST_APPEND(tail, info);
}
}
dirtylimit_state_unlock();
return head;
}
struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
{
if (!dirtylimit_in_service()) {
return NULL;
}
return dirtylimit_query_all();
}
void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
{
DirtyLimitInfoList *info;
g_autoptr(DirtyLimitInfoList) head = NULL;
Error *err = NULL;
if (!dirtylimit_in_service()) {
monitor_printf(mon, "Dirty page limit not enabled!\n");
return;
}
head = qmp_query_vcpu_dirty_limit(&err);
if (err) {
hmp_handle_error(mon, err);
return;
}
for (info = head; info != NULL; info = info->next) {
monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
" current rate %"PRIi64 " (MB/s)\n",
info->value->cpu_index,
info->value->limit_rate,
info->value->current_rate);
}
}

347
system/dma-helpers.c Normal file
View file

@ -0,0 +1,347 @@
/*
* DMA helper functions
*
* Copyright (c) 2009,2020 Red Hat
*
* This work is licensed under the terms of the GNU General Public License
* (GNU GPL), version 2 or later.
*/
#include "qemu/osdep.h"
#include "sysemu/block-backend.h"
#include "sysemu/dma.h"
#include "trace/trace-root.h"
#include "qemu/thread.h"
#include "qemu/main-loop.h"
#include "sysemu/cpu-timers.h"
#include "qemu/range.h"
/* #define DEBUG_IOMMU */
MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
uint8_t c, dma_addr_t len, MemTxAttrs attrs)
{
dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
return address_space_set(as, addr, c, len, attrs);
}
void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
AddressSpace *as)
{
qsg->sg = g_new(ScatterGatherEntry, alloc_hint);
qsg->nsg = 0;
qsg->nalloc = alloc_hint;
qsg->size = 0;
qsg->as = as;
qsg->dev = dev;
object_ref(OBJECT(dev));
}
void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
{
if (qsg->nsg == qsg->nalloc) {
qsg->nalloc = 2 * qsg->nalloc + 1;
qsg->sg = g_renew(ScatterGatherEntry, qsg->sg, qsg->nalloc);
}
qsg->sg[qsg->nsg].base = base;
qsg->sg[qsg->nsg].len = len;
qsg->size += len;
++qsg->nsg;
}
void qemu_sglist_destroy(QEMUSGList *qsg)
{
object_unref(OBJECT(qsg->dev));
g_free(qsg->sg);
memset(qsg, 0, sizeof(*qsg));
}
typedef struct {
BlockAIOCB common;
AioContext *ctx;
BlockAIOCB *acb;
QEMUSGList *sg;
uint32_t align;
uint64_t offset;
DMADirection dir;
int sg_cur_index;
dma_addr_t sg_cur_byte;
QEMUIOVector iov;
QEMUBH *bh;
DMAIOFunc *io_func;
void *io_func_opaque;
} DMAAIOCB;
static void dma_blk_cb(void *opaque, int ret);
static void reschedule_dma(void *opaque)
{
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
assert(!dbs->acb && dbs->bh);
qemu_bh_delete(dbs->bh);
dbs->bh = NULL;
dma_blk_cb(dbs, 0);
}
static void dma_blk_unmap(DMAAIOCB *dbs)
{
int i;
for (i = 0; i < dbs->iov.niov; ++i) {
dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
dbs->iov.iov[i].iov_len, dbs->dir,
dbs->iov.iov[i].iov_len);
}
qemu_iovec_reset(&dbs->iov);
}
static void dma_complete(DMAAIOCB *dbs, int ret)
{
trace_dma_complete(dbs, ret, dbs->common.cb);
assert(!dbs->acb && !dbs->bh);
dma_blk_unmap(dbs);
if (dbs->common.cb) {
dbs->common.cb(dbs->common.opaque, ret);
}
qemu_iovec_destroy(&dbs->iov);
qemu_aio_unref(dbs);
}
static void dma_blk_cb(void *opaque, int ret)
{
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
AioContext *ctx = dbs->ctx;
dma_addr_t cur_addr, cur_len;
void *mem;
trace_dma_blk_cb(dbs, ret);
aio_context_acquire(ctx);
dbs->acb = NULL;
dbs->offset += dbs->iov.size;
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
dma_complete(dbs, ret);
goto out;
}
dma_blk_unmap(dbs);
while (dbs->sg_cur_index < dbs->sg->nsg) {
cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
MEMTXATTRS_UNSPECIFIED);
/*
* Make reads deterministic in icount mode. Windows sometimes issues
* disk read requests with overlapping SGs. It leads
* to non-determinism, because resulting buffer contents may be mixed
* from several sectors. This code splits all SGs into several
* groups. SGs in every group do not overlap.
*/
if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) {
int i;
for (i = 0 ; i < dbs->iov.niov ; ++i) {
if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base,
dbs->iov.iov[i].iov_len, (intptr_t)mem,
cur_len)) {
dma_memory_unmap(dbs->sg->as, mem, cur_len,
dbs->dir, cur_len);
mem = NULL;
break;
}
}
}
if (!mem)
break;
qemu_iovec_add(&dbs->iov, mem, cur_len);
dbs->sg_cur_byte += cur_len;
if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
dbs->sg_cur_byte = 0;
++dbs->sg_cur_index;
}
}
if (dbs->iov.size == 0) {
trace_dma_map_wait(dbs);
dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
cpu_register_map_client(dbs->bh);
goto out;
}
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
qemu_iovec_discard_back(&dbs->iov,
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
}
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
dma_blk_cb, dbs, dbs->io_func_opaque);
assert(dbs->acb);
out:
aio_context_release(ctx);
}
static void dma_aio_cancel(BlockAIOCB *acb)
{
DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
trace_dma_aio_cancel(dbs);
assert(!(dbs->acb && dbs->bh));
if (dbs->acb) {
/* This will invoke dma_blk_cb. */
blk_aio_cancel_async(dbs->acb);
return;
}
if (dbs->bh) {
cpu_unregister_map_client(dbs->bh);
qemu_bh_delete(dbs->bh);
dbs->bh = NULL;
}
if (dbs->common.cb) {
dbs->common.cb(dbs->common.opaque, -ECANCELED);
}
}
static const AIOCBInfo dma_aiocb_info = {
.aiocb_size = sizeof(DMAAIOCB),
.cancel_async = dma_aio_cancel,
};
BlockAIOCB *dma_blk_io(AioContext *ctx,
QEMUSGList *sg, uint64_t offset, uint32_t align,
DMAIOFunc *io_func, void *io_func_opaque,
BlockCompletionFunc *cb,
void *opaque, DMADirection dir)
{
DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);
trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));
dbs->acb = NULL;
dbs->sg = sg;
dbs->ctx = ctx;
dbs->offset = offset;
dbs->align = align;
dbs->sg_cur_index = 0;
dbs->sg_cur_byte = 0;
dbs->dir = dir;
dbs->io_func = io_func;
dbs->io_func_opaque = io_func_opaque;
dbs->bh = NULL;
qemu_iovec_init(&dbs->iov, sg->nsg);
dma_blk_cb(dbs, 0);
return &dbs->common;
}
static
BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
BlockCompletionFunc *cb, void *cb_opaque,
void *opaque)
{
BlockBackend *blk = opaque;
return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
}
BlockAIOCB *dma_blk_read(BlockBackend *blk,
QEMUSGList *sg, uint64_t offset, uint32_t align,
void (*cb)(void *opaque, int ret), void *opaque)
{
return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
dma_blk_read_io_func, blk, cb, opaque,
DMA_DIRECTION_FROM_DEVICE);
}
static
BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
BlockCompletionFunc *cb, void *cb_opaque,
void *opaque)
{
BlockBackend *blk = opaque;
return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
}
BlockAIOCB *dma_blk_write(BlockBackend *blk,
QEMUSGList *sg, uint64_t offset, uint32_t align,
void (*cb)(void *opaque, int ret), void *opaque)
{
return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
dma_blk_write_io_func, blk, cb, opaque,
DMA_DIRECTION_TO_DEVICE);
}
static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
QEMUSGList *sg, DMADirection dir,
MemTxAttrs attrs)
{
uint8_t *ptr = buf;
dma_addr_t xresidual;
int sg_cur_index;
MemTxResult res = MEMTX_OK;
xresidual = sg->size;
sg_cur_index = 0;
len = MIN(len, xresidual);
while (len > 0) {
ScatterGatherEntry entry = sg->sg[sg_cur_index++];
dma_addr_t xfer = MIN(len, entry.len);
res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs);
ptr += xfer;
len -= xfer;
xresidual -= xfer;
}
if (residual) {
*residual = xresidual;
}
return res;
}
MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual,
QEMUSGList *sg, MemTxAttrs attrs)
{
return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs);
}
MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual,
QEMUSGList *sg, MemTxAttrs attrs)
{
return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs);
}
void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
QEMUSGList *sg, enum BlockAcctType type)
{
block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
}
uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits)
{
uint64_t max_mask = UINT64_MAX, addr_mask = end - start;
uint64_t alignment_mask, size_mask;
if (max_addr_bits != 64) {
max_mask = (1ULL << max_addr_bits) - 1;
}
alignment_mask = start ? (start & -start) - 1 : max_mask;
alignment_mask = MIN(alignment_mask, max_mask);
size_mask = MIN(addr_mask, max_mask);
if (alignment_mask <= size_mask) {
/* Increase the alignment of start */
return alignment_mask;
} else {
/* Find the largest page mask from size */
if (addr_mask == UINT64_MAX) {
return UINT64_MAX;
}
return (1ULL << (63 - clz64(addr_mask + 1))) - 1;
}
}

70
system/globals.c Normal file
View file

@ -0,0 +1,70 @@
/*
* Global variables that (mostly) should not exist
*
* Copyright (c) 2003-2020 QEMU contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "exec/cpu-common.h"
#include "hw/display/vga.h"
#include "hw/loader.h"
#include "hw/xen/xen.h"
#include "net/net.h"
#include "sysemu/cpus.h"
#include "sysemu/sysemu.h"
enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
int display_opengl;
const char* keyboard_layout;
bool enable_mlock;
bool enable_cpu_pm;
int nb_nics;
NICInfo nd_table[MAX_NICS];
int autostart = 1;
int vga_interface_type = VGA_NONE;
bool vga_interface_created;
Chardev *parallel_hds[MAX_PARALLEL_PORTS];
int win2k_install_hack;
int fd_bootchk = 1;
int graphic_rotate;
QEMUOptionRom option_rom[MAX_OPTION_ROMS];
int nb_option_roms;
int old_param;
const char *qemu_name;
unsigned int nb_prom_envs;
const char *prom_envs[MAX_PROM_ENVS];
uint8_t *boot_splash_filedata;
int only_migratable; /* turn it off unless user states otherwise */
int icount_align_option;
/* The bytes in qemu_uuid are in the order specified by RFC4122, _not_ in the
* little-endian "wire format" described in the SMBIOS 2.6 specification.
*/
QemuUUID qemu_uuid;
bool qemu_uuid_set;
uint32_t xen_domid;
enum xen_mode xen_mode = XEN_DISABLED;
bool xen_domid_restrict;
struct evtchn_backend_ops *xen_evtchn_ops;
struct gnttab_backend_ops *xen_gnttab_ops;
struct foreignmem_backend_ops *xen_foreignmem_ops;
struct xenstore_backend_ops *xen_xenstore_ops;

346
system/ioport.c Normal file
View file

@ -0,0 +1,346 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/*
* split out ioport related stuffs from vl.c.
*/
#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/ioport.h"
#include "exec/memory.h"
#include "exec/address-spaces.h"
#include "trace.h"
struct MemoryRegionPortioList {
Object obj;
MemoryRegion mr;
void *portio_opaque;
MemoryRegionPortio *ports;
};
#define TYPE_MEMORY_REGION_PORTIO_LIST "memory-region-portio-list"
OBJECT_DECLARE_SIMPLE_TYPE(MemoryRegionPortioList, MEMORY_REGION_PORTIO_LIST)
static uint64_t unassigned_io_read(void *opaque, hwaddr addr, unsigned size)
{
return -1ULL;
}
static void unassigned_io_write(void *opaque, hwaddr addr, uint64_t val,
unsigned size)
{
}
const MemoryRegionOps unassigned_io_ops = {
.read = unassigned_io_read,
.write = unassigned_io_write,
.endianness = DEVICE_NATIVE_ENDIAN,
};
void cpu_outb(uint32_t addr, uint8_t val)
{
trace_cpu_out(addr, 'b', val);
address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
&val, 1);
}
void cpu_outw(uint32_t addr, uint16_t val)
{
uint8_t buf[2];
trace_cpu_out(addr, 'w', val);
stw_p(buf, val);
address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
buf, 2);
}
void cpu_outl(uint32_t addr, uint32_t val)
{
uint8_t buf[4];
trace_cpu_out(addr, 'l', val);
stl_p(buf, val);
address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
buf, 4);
}
uint8_t cpu_inb(uint32_t addr)
{
uint8_t val;
address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
&val, 1);
trace_cpu_in(addr, 'b', val);
return val;
}
uint16_t cpu_inw(uint32_t addr)
{
uint8_t buf[2];
uint16_t val;
address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 2);
val = lduw_p(buf);
trace_cpu_in(addr, 'w', val);
return val;
}
uint32_t cpu_inl(uint32_t addr)
{
uint8_t buf[4];
uint32_t val;
address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 4);
val = ldl_p(buf);
trace_cpu_in(addr, 'l', val);
return val;
}
void portio_list_init(PortioList *piolist,
Object *owner,
const MemoryRegionPortio *callbacks,
void *opaque, const char *name)
{
unsigned n = 0;
while (callbacks[n].size) {
++n;
}
piolist->ports = callbacks;
piolist->nr = 0;
piolist->regions = g_new0(MemoryRegion *, n);
piolist->address_space = NULL;
piolist->opaque = opaque;
piolist->owner = owner;
piolist->name = name;
piolist->flush_coalesced_mmio = false;
}
void portio_list_set_flush_coalesced(PortioList *piolist)
{
piolist->flush_coalesced_mmio = true;
}
void portio_list_destroy(PortioList *piolist)
{
MemoryRegionPortioList *mrpio;
unsigned i;
for (i = 0; i < piolist->nr; ++i) {
mrpio = container_of(piolist->regions[i], MemoryRegionPortioList, mr);
object_unparent(OBJECT(&mrpio->mr));
object_unref(mrpio);
}
g_free(piolist->regions);
}
static const MemoryRegionPortio *find_portio(MemoryRegionPortioList *mrpio,
uint64_t offset, unsigned size,
bool write)
{
const MemoryRegionPortio *mrp;
for (mrp = mrpio->ports; mrp->size; ++mrp) {
if (offset >= mrp->offset && offset < mrp->offset + mrp->len &&
size == mrp->size &&
(write ? (bool)mrp->write : (bool)mrp->read)) {
return mrp;
}
}
return NULL;
}
static uint64_t portio_read(void *opaque, hwaddr addr, unsigned size)
{
MemoryRegionPortioList *mrpio = opaque;
const MemoryRegionPortio *mrp = find_portio(mrpio, addr, size, false);
uint64_t data;
data = ((uint64_t)1 << (size * 8)) - 1;
if (mrp) {
data = mrp->read(mrpio->portio_opaque, mrp->base + addr);
} else if (size == 2) {
mrp = find_portio(mrpio, addr, 1, false);
if (mrp) {
data = mrp->read(mrpio->portio_opaque, mrp->base + addr);
if (addr + 1 < mrp->offset + mrp->len) {
data |= mrp->read(mrpio->portio_opaque, mrp->base + addr + 1) << 8;
} else {
data |= 0xff00;
}
}
}
return data;
}
static void portio_write(void *opaque, hwaddr addr, uint64_t data,
unsigned size)
{
MemoryRegionPortioList *mrpio = opaque;
const MemoryRegionPortio *mrp = find_portio(mrpio, addr, size, true);
if (mrp) {
mrp->write(mrpio->portio_opaque, mrp->base + addr, data);
} else if (size == 2) {
mrp = find_portio(mrpio, addr, 1, true);
if (mrp) {
mrp->write(mrpio->portio_opaque, mrp->base + addr, data & 0xff);
if (addr + 1 < mrp->offset + mrp->len) {
mrp->write(mrpio->portio_opaque, mrp->base + addr + 1, data >> 8);
}
}
}
}
static const MemoryRegionOps portio_ops = {
.read = portio_read,
.write = portio_write,
.endianness = DEVICE_LITTLE_ENDIAN,
.valid.unaligned = true,
.impl.unaligned = true,
};
static void portio_list_add_1(PortioList *piolist,
const MemoryRegionPortio *pio_init,
unsigned count, unsigned start,
unsigned off_low, unsigned off_high)
{
MemoryRegionPortioList *mrpio;
Object *owner;
char *name;
unsigned i;
/* Copy the sub-list and null-terminate it. */
mrpio = MEMORY_REGION_PORTIO_LIST(
object_new(TYPE_MEMORY_REGION_PORTIO_LIST));
mrpio->portio_opaque = piolist->opaque;
mrpio->ports = g_malloc0(sizeof(MemoryRegionPortio) * (count + 1));
memcpy(mrpio->ports, pio_init, sizeof(MemoryRegionPortio) * count);
memset(mrpio->ports + count, 0, sizeof(MemoryRegionPortio));
/* Adjust the offsets to all be zero-based for the region. */
for (i = 0; i < count; ++i) {
mrpio->ports[i].offset -= off_low;
mrpio->ports[i].base = start + off_low;
}
/*
* The MemoryRegion owner is the MemoryRegionPortioList since that manages
* the lifecycle via the refcount
*/
memory_region_init_io(&mrpio->mr, OBJECT(mrpio), &portio_ops, mrpio,
piolist->name, off_high - off_low);
/* Reparent the MemoryRegion to the piolist owner */
object_ref(&mrpio->mr);
object_unparent(OBJECT(&mrpio->mr));
if (!piolist->owner) {
owner = container_get(qdev_get_machine(), "/unattached");
} else {
owner = piolist->owner;
}
name = g_strdup_printf("%s[*]", piolist->name);
object_property_add_child(owner, name, OBJECT(&mrpio->mr));
g_free(name);
if (piolist->flush_coalesced_mmio) {
memory_region_set_flush_coalesced(&mrpio->mr);
}
memory_region_add_subregion(piolist->address_space,
start + off_low, &mrpio->mr);
piolist->regions[piolist->nr] = &mrpio->mr;
++piolist->nr;
}
void portio_list_add(PortioList *piolist,
MemoryRegion *address_space,
uint32_t start)
{
const MemoryRegionPortio *pio, *pio_start = piolist->ports;
unsigned int off_low, off_high, off_last, count;
piolist->address_space = address_space;
/* Handle the first entry specially. */
off_last = off_low = pio_start->offset;
off_high = off_low + pio_start->len + pio_start->size - 1;
count = 1;
for (pio = pio_start + 1; pio->size != 0; pio++, count++) {
/* All entries must be sorted by offset. */
assert(pio->offset >= off_last);
off_last = pio->offset;
/* If we see a hole, break the region. */
if (off_last > off_high) {
portio_list_add_1(piolist, pio_start, count, start, off_low,
off_high);
/* ... and start collecting anew. */
pio_start = pio;
off_low = off_last;
off_high = off_low + pio->len + pio_start->size - 1;
count = 0;
} else if (off_last + pio->len > off_high) {
off_high = off_last + pio->len + pio_start->size - 1;
}
}
/* There will always be an open sub-list. */
portio_list_add_1(piolist, pio_start, count, start, off_low, off_high);
}
void portio_list_del(PortioList *piolist)
{
MemoryRegionPortioList *mrpio;
unsigned i;
for (i = 0; i < piolist->nr; ++i) {
mrpio = container_of(piolist->regions[i], MemoryRegionPortioList, mr);
memory_region_del_subregion(piolist->address_space, &mrpio->mr);
}
}
static void memory_region_portio_list_finalize(Object *obj)
{
MemoryRegionPortioList *mrpio = MEMORY_REGION_PORTIO_LIST(obj);
object_unref(&mrpio->mr);
g_free(mrpio->ports);
}
static const TypeInfo memory_region_portio_list_info = {
.parent = TYPE_OBJECT,
.name = TYPE_MEMORY_REGION_PORTIO_LIST,
.instance_size = sizeof(MemoryRegionPortioList),
.instance_finalize = memory_region_portio_list_finalize,
};
static void ioport_register_types(void)
{
type_register_static(&memory_region_portio_list_info);
}
type_init(ioport_register_types)

49
system/main.c Normal file
View file

@ -0,0 +1,49 @@
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2020 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu-main.h"
#include "sysemu/sysemu.h"
#ifdef CONFIG_SDL
#include <SDL.h>
#endif
int qemu_default_main(void)
{
int status;
status = qemu_main_loop();
qemu_cleanup();
return status;
}
int (*qemu_main)(void) = qemu_default_main;
int main(int argc, char **argv)
{
qemu_init(argc, argv);
return qemu_main();
}

3683
system/memory.c Normal file

File diff suppressed because it is too large Load diff

377
system/memory_mapping.c Normal file
View file

@ -0,0 +1,377 @@
/*
* QEMU memory mapping
*
* Copyright Fujitsu, Corp. 2011, 2012
*
* Authors:
* Wen Congyang <wency@cn.fujitsu.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "sysemu/memory_mapping.h"
#include "exec/memory.h"
#include "exec/address-spaces.h"
#include "hw/core/cpu.h"
//#define DEBUG_GUEST_PHYS_REGION_ADD
static void memory_mapping_list_add_mapping_sorted(MemoryMappingList *list,
MemoryMapping *mapping)
{
MemoryMapping *p;
QTAILQ_FOREACH(p, &list->head, next) {
if (p->phys_addr >= mapping->phys_addr) {
QTAILQ_INSERT_BEFORE(p, mapping, next);
return;
}
}
QTAILQ_INSERT_TAIL(&list->head, mapping, next);
}
static void create_new_memory_mapping(MemoryMappingList *list,
hwaddr phys_addr,
hwaddr virt_addr,
ram_addr_t length)
{
MemoryMapping *memory_mapping;
memory_mapping = g_new(MemoryMapping, 1);
memory_mapping->phys_addr = phys_addr;
memory_mapping->virt_addr = virt_addr;
memory_mapping->length = length;
list->last_mapping = memory_mapping;
list->num++;
memory_mapping_list_add_mapping_sorted(list, memory_mapping);
}
static inline bool mapping_contiguous(MemoryMapping *map,
hwaddr phys_addr,
hwaddr virt_addr)
{
return phys_addr == map->phys_addr + map->length &&
virt_addr == map->virt_addr + map->length;
}
/*
* [map->phys_addr, map->phys_addr + map->length) and
* [phys_addr, phys_addr + length) have intersection?
*/
static inline bool mapping_have_same_region(MemoryMapping *map,
hwaddr phys_addr,
ram_addr_t length)
{
return !(phys_addr + length < map->phys_addr ||
phys_addr >= map->phys_addr + map->length);
}
/*
* [map->phys_addr, map->phys_addr + map->length) and
* [phys_addr, phys_addr + length) have intersection. The virtual address in the
* intersection are the same?
*/
static inline bool mapping_conflict(MemoryMapping *map,
hwaddr phys_addr,
hwaddr virt_addr)
{
return virt_addr - map->virt_addr != phys_addr - map->phys_addr;
}
/*
* [map->virt_addr, map->virt_addr + map->length) and
* [virt_addr, virt_addr + length) have intersection. And the physical address
* in the intersection are the same.
*/
static inline void mapping_merge(MemoryMapping *map,
hwaddr virt_addr,
ram_addr_t length)
{
if (virt_addr < map->virt_addr) {
map->length += map->virt_addr - virt_addr;
map->virt_addr = virt_addr;
}
if ((virt_addr + length) >
(map->virt_addr + map->length)) {
map->length = virt_addr + length - map->virt_addr;
}
}
void memory_mapping_list_add_merge_sorted(MemoryMappingList *list,
hwaddr phys_addr,
hwaddr virt_addr,
ram_addr_t length)
{
MemoryMapping *memory_mapping, *last_mapping;
if (QTAILQ_EMPTY(&list->head)) {
create_new_memory_mapping(list, phys_addr, virt_addr, length);
return;
}
last_mapping = list->last_mapping;
if (last_mapping) {
if (mapping_contiguous(last_mapping, phys_addr, virt_addr)) {
last_mapping->length += length;
return;
}
}
QTAILQ_FOREACH(memory_mapping, &list->head, next) {
if (mapping_contiguous(memory_mapping, phys_addr, virt_addr)) {
memory_mapping->length += length;
list->last_mapping = memory_mapping;
return;
}
if (phys_addr + length < memory_mapping->phys_addr) {
/* create a new region before memory_mapping */
break;
}
if (mapping_have_same_region(memory_mapping, phys_addr, length)) {
if (mapping_conflict(memory_mapping, phys_addr, virt_addr)) {
continue;
}
/* merge this region into memory_mapping */
mapping_merge(memory_mapping, virt_addr, length);
list->last_mapping = memory_mapping;
return;
}
}
/* this region can not be merged into any existed memory mapping. */
create_new_memory_mapping(list, phys_addr, virt_addr, length);
}
void memory_mapping_list_free(MemoryMappingList *list)
{
MemoryMapping *p, *q;
QTAILQ_FOREACH_SAFE(p, &list->head, next, q) {
QTAILQ_REMOVE(&list->head, p, next);
g_free(p);
}
list->num = 0;
list->last_mapping = NULL;
}
void memory_mapping_list_init(MemoryMappingList *list)
{
list->num = 0;
list->last_mapping = NULL;
QTAILQ_INIT(&list->head);
}
void guest_phys_blocks_free(GuestPhysBlockList *list)
{
GuestPhysBlock *p, *q;
QTAILQ_FOREACH_SAFE(p, &list->head, next, q) {
QTAILQ_REMOVE(&list->head, p, next);
memory_region_unref(p->mr);
g_free(p);
}
list->num = 0;
}
void guest_phys_blocks_init(GuestPhysBlockList *list)
{
list->num = 0;
QTAILQ_INIT(&list->head);
}
typedef struct GuestPhysListener {
GuestPhysBlockList *list;
MemoryListener listener;
} GuestPhysListener;
static void guest_phys_block_add_section(GuestPhysListener *g,
MemoryRegionSection *section)
{
const hwaddr target_start = section->offset_within_address_space;
const hwaddr target_end = target_start + int128_get64(section->size);
uint8_t *host_addr = memory_region_get_ram_ptr(section->mr) +
section->offset_within_region;
GuestPhysBlock *predecessor = NULL;
/* find continuity in guest physical address space */
if (!QTAILQ_EMPTY(&g->list->head)) {
hwaddr predecessor_size;
predecessor = QTAILQ_LAST(&g->list->head);
predecessor_size = predecessor->target_end - predecessor->target_start;
/* the memory API guarantees monotonically increasing traversal */
g_assert(predecessor->target_end <= target_start);
/* we want continuity in both guest-physical and host-virtual memory */
if (predecessor->target_end < target_start ||
predecessor->host_addr + predecessor_size != host_addr ||
predecessor->mr != section->mr) {
predecessor = NULL;
}
}
if (predecessor == NULL) {
/* isolated mapping, allocate it and add it to the list */
GuestPhysBlock *block = g_malloc0(sizeof *block);
block->target_start = target_start;
block->target_end = target_end;
block->host_addr = host_addr;
block->mr = section->mr;
memory_region_ref(section->mr);
QTAILQ_INSERT_TAIL(&g->list->head, block, next);
++g->list->num;
} else {
/* expand predecessor until @target_end; predecessor's start doesn't
* change
*/
predecessor->target_end = target_end;
}
#ifdef DEBUG_GUEST_PHYS_REGION_ADD
fprintf(stderr, "%s: target_start=" HWADDR_FMT_plx " target_end="
HWADDR_FMT_plx ": %s (count: %u)\n", __func__, target_start,
target_end, predecessor ? "joined" : "added", g->list->num);
#endif
}
static int guest_phys_ram_populate_cb(MemoryRegionSection *section,
void *opaque)
{
GuestPhysListener *g = opaque;
guest_phys_block_add_section(g, section);
return 0;
}
static void guest_phys_blocks_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
GuestPhysListener *g = container_of(listener, GuestPhysListener, listener);
/* we only care about RAM */
if (!memory_region_is_ram(section->mr) ||
memory_region_is_ram_device(section->mr) ||
memory_region_is_nonvolatile(section->mr)) {
return;
}
/* for special sparse regions, only add populated parts */
if (memory_region_has_ram_discard_manager(section->mr)) {
RamDiscardManager *rdm;
rdm = memory_region_get_ram_discard_manager(section->mr);
ram_discard_manager_replay_populated(rdm, section,
guest_phys_ram_populate_cb, g);
return;
}
guest_phys_block_add_section(g, section);
}
void guest_phys_blocks_append(GuestPhysBlockList *list)
{
GuestPhysListener g = { 0 };
g.list = list;
g.listener.region_add = &guest_phys_blocks_region_add;
memory_listener_register(&g.listener, &address_space_memory);
memory_listener_unregister(&g.listener);
}
static CPUState *find_paging_enabled_cpu(CPUState *start_cpu)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (cpu_paging_enabled(cpu)) {
return cpu;
}
}
return NULL;
}
void qemu_get_guest_memory_mapping(MemoryMappingList *list,
const GuestPhysBlockList *guest_phys_blocks,
Error **errp)
{
CPUState *cpu, *first_paging_enabled_cpu;
GuestPhysBlock *block;
ram_addr_t offset, length;
first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu);
if (first_paging_enabled_cpu) {
for (cpu = first_paging_enabled_cpu; cpu != NULL;
cpu = CPU_NEXT(cpu)) {
Error *err = NULL;
cpu_get_memory_mapping(cpu, list, &err);
if (err) {
error_propagate(errp, err);
return;
}
}
return;
}
/*
* If the guest doesn't use paging, the virtual address is equal to physical
* address.
*/
QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) {
offset = block->target_start;
length = block->target_end - block->target_start;
create_new_memory_mapping(list, offset, offset, length);
}
}
void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list,
const GuestPhysBlockList *guest_phys_blocks)
{
GuestPhysBlock *block;
QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) {
create_new_memory_mapping(list, block->target_start, 0,
block->target_end - block->target_start);
}
}
void memory_mapping_filter(MemoryMappingList *list, int64_t begin,
int64_t length)
{
MemoryMapping *cur, *next;
QTAILQ_FOREACH_SAFE(cur, &list->head, next, next) {
if (cur->phys_addr >= begin + length ||
cur->phys_addr + cur->length <= begin) {
QTAILQ_REMOVE(&list->head, cur, next);
g_free(cur);
list->num--;
continue;
}
if (cur->phys_addr < begin) {
cur->length -= begin - cur->phys_addr;
if (cur->virt_addr) {
cur->virt_addr += begin - cur->phys_addr;
}
cur->phys_addr = begin;
}
if (cur->phys_addr + cur->length > begin + length) {
cur->length -= cur->phys_addr + cur->length - begin - length;
}
}
}

36
system/meson.build Normal file
View file

@ -0,0 +1,36 @@
specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: [files(
'arch_init.c',
'ioport.c',
'memory.c',
'physmem.c',
'watchpoint.c',
)])
system_ss.add(files(
'balloon.c',
'bootdevice.c',
'cpus.c',
'cpu-throttle.c',
'cpu-timers.c',
'datadir.c',
'dirtylimit.c',
'dma-helpers.c',
'globals.c',
'memory_mapping.c',
'qdev-monitor.c',
'qtest.c',
'rtc.c',
'runstate-action.c',
'runstate-hmp-cmds.c',
'runstate.c',
'tpm-hmp-cmds.c',
'vl.c',
), sdl, libpmem, libdaxctl)
if have_tpm
system_ss.add(files('tpm.c'))
endif
system_ss.add(when: seccomp, if_true: files('qemu-seccomp.c'))
system_ss.add(when: fdt, if_true: files('device_tree.c'))
system_ss.add(when: 'CONFIG_LINUX', if_true: files('async-teardown.c'))

3796
system/physmem.c Normal file

File diff suppressed because it is too large Load diff

1148
system/qdev-monitor.c Normal file

File diff suppressed because it is too large Load diff

486
system/qemu-seccomp.c Normal file
View file

@ -0,0 +1,486 @@
/*
* QEMU seccomp mode 2 support with libseccomp
*
* Copyright IBM, Corp. 2012
*
* Authors:
* Eduardo Otubo <eotubo@br.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
* Contributions after 2012-01-13 are licensed under the terms of the
* GNU GPL, version 2 or (at your option) any later version.
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu/config-file.h"
#include "qemu/option.h"
#include "qemu/module.h"
#include <sys/prctl.h>
#include <seccomp.h>
#include "sysemu/seccomp.h"
#include <linux/seccomp.h>
/* For some architectures (notably ARM) cacheflush is not supported until
* libseccomp 2.2.3, but configure enforces that we are using a more recent
* version on those hosts, so it is OK for this check to be less strict.
*/
#if SCMP_VER_MAJOR >= 3
#define HAVE_CACHEFLUSH
#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 2
#define HAVE_CACHEFLUSH
#endif
struct QemuSeccompSyscall {
int32_t num;
uint8_t set;
uint8_t narg;
const struct scmp_arg_cmp *arg_cmp;
uint32_t action;
};
const struct scmp_arg_cmp sched_setscheduler_arg[] = {
/* was SCMP_A1(SCMP_CMP_NE, SCHED_IDLE), but expanded due to GCC 4.x bug */
{ .arg = 1, .op = SCMP_CMP_NE, .datum_a = SCHED_IDLE }
};
/*
* See 'NOTES' in 'man 2 clone' - s390 & cross have 'flags' in
* different position to other architectures
*/
#if defined(HOST_S390X) || defined(HOST_S390) || defined(HOST_CRIS)
#define CLONE_FLAGS_ARG 1
#else
#define CLONE_FLAGS_ARG 0
#endif
#ifndef CLONE_PIDFD
# define CLONE_PIDFD 0x00001000
#endif
#define REQUIRE_CLONE_FLAG(flag) \
const struct scmp_arg_cmp clone_arg ## flag[] = { \
{ .arg = CLONE_FLAGS_ARG, \
.op = SCMP_CMP_MASKED_EQ, \
.datum_a = flag, .datum_b = 0 } }
#define FORBID_CLONE_FLAG(flag) \
const struct scmp_arg_cmp clone_arg ## flag[] = { \
{ .arg = CLONE_FLAGS_ARG, \
.op = SCMP_CMP_MASKED_EQ, \
.datum_a = flag, .datum_b = flag } }
#define RULE_CLONE_FLAG(flag) \
{ SCMP_SYS(clone), QEMU_SECCOMP_SET_SPAWN, \
ARRAY_SIZE(clone_arg ## flag), clone_arg ## flag, SCMP_ACT_TRAP }
/* If no CLONE_* flags are set, except CSIGNAL, deny */
const struct scmp_arg_cmp clone_arg_none[] = {
{ .arg = CLONE_FLAGS_ARG,
.op = SCMP_CMP_MASKED_EQ,
.datum_a = ~(CSIGNAL), .datum_b = 0 }
};
/*
* pthread_create should always set all of these.
*/
REQUIRE_CLONE_FLAG(CLONE_VM);
REQUIRE_CLONE_FLAG(CLONE_FS);
REQUIRE_CLONE_FLAG(CLONE_FILES);
REQUIRE_CLONE_FLAG(CLONE_SIGHAND);
REQUIRE_CLONE_FLAG(CLONE_THREAD);
REQUIRE_CLONE_FLAG(CLONE_SYSVSEM);
REQUIRE_CLONE_FLAG(CLONE_SETTLS);
REQUIRE_CLONE_FLAG(CLONE_PARENT_SETTID);
REQUIRE_CLONE_FLAG(CLONE_CHILD_CLEARTID);
/*
* Musl sets this in pthread_create too, but it is
* obsolete and harmless since its behaviour is
* subsumed under CLONE_THREAD
*/
/*REQUIRE_CLONE_FLAG(CLONE_DETACHED);*/
/*
* These all indicate an attempt to spawn a process
* instead of a thread, or other undesirable scenarios
*/
FORBID_CLONE_FLAG(CLONE_PIDFD);
FORBID_CLONE_FLAG(CLONE_PTRACE);
FORBID_CLONE_FLAG(CLONE_VFORK);
FORBID_CLONE_FLAG(CLONE_PARENT);
FORBID_CLONE_FLAG(CLONE_NEWNS);
FORBID_CLONE_FLAG(CLONE_UNTRACED);
FORBID_CLONE_FLAG(CLONE_NEWCGROUP);
FORBID_CLONE_FLAG(CLONE_NEWUTS);
FORBID_CLONE_FLAG(CLONE_NEWIPC);
FORBID_CLONE_FLAG(CLONE_NEWUSER);
FORBID_CLONE_FLAG(CLONE_NEWPID);
FORBID_CLONE_FLAG(CLONE_NEWNET);
FORBID_CLONE_FLAG(CLONE_IO);
static const struct QemuSeccompSyscall denylist[] = {
/* default set of syscalls that should get blocked */
{ SCMP_SYS(reboot), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(swapon), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(swapoff), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(syslog), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(mount), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(umount), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(kexec_load), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(afs_syscall), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(break), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(ftime), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(getpmsg), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(gtty), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(lock), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(mpx), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(prof), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(profil), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(putpmsg), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(security), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(stty), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(tuxcall), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(ulimit), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(vserver), QEMU_SECCOMP_SET_DEFAULT,
0, NULL, SCMP_ACT_TRAP },
/* obsolete */
{ SCMP_SYS(readdir), QEMU_SECCOMP_SET_OBSOLETE,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(_sysctl), QEMU_SECCOMP_SET_OBSOLETE,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(bdflush), QEMU_SECCOMP_SET_OBSOLETE,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(create_module), QEMU_SECCOMP_SET_OBSOLETE,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(get_kernel_syms), QEMU_SECCOMP_SET_OBSOLETE,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(query_module), QEMU_SECCOMP_SET_OBSOLETE,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(sgetmask), QEMU_SECCOMP_SET_OBSOLETE,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(ssetmask), QEMU_SECCOMP_SET_OBSOLETE,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(sysfs), QEMU_SECCOMP_SET_OBSOLETE,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(uselib), QEMU_SECCOMP_SET_OBSOLETE,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(ustat), QEMU_SECCOMP_SET_OBSOLETE,
0, NULL, SCMP_ACT_TRAP },
/* privileged */
{ SCMP_SYS(setuid), QEMU_SECCOMP_SET_PRIVILEGED,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(setgid), QEMU_SECCOMP_SET_PRIVILEGED,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(setpgid), QEMU_SECCOMP_SET_PRIVILEGED,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(setsid), QEMU_SECCOMP_SET_PRIVILEGED,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(setreuid), QEMU_SECCOMP_SET_PRIVILEGED,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(setregid), QEMU_SECCOMP_SET_PRIVILEGED,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(setresuid), QEMU_SECCOMP_SET_PRIVILEGED,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(setresgid), QEMU_SECCOMP_SET_PRIVILEGED,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(setfsuid), QEMU_SECCOMP_SET_PRIVILEGED,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(setfsgid), QEMU_SECCOMP_SET_PRIVILEGED,
0, NULL, SCMP_ACT_TRAP },
/* spawn */
{ SCMP_SYS(fork), QEMU_SECCOMP_SET_SPAWN,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(vfork), QEMU_SECCOMP_SET_SPAWN,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(execve), QEMU_SECCOMP_SET_SPAWN,
0, NULL, SCMP_ACT_TRAP },
{ SCMP_SYS(clone), QEMU_SECCOMP_SET_SPAWN,
ARRAY_SIZE(clone_arg_none), clone_arg_none, SCMP_ACT_TRAP },
RULE_CLONE_FLAG(CLONE_VM),
RULE_CLONE_FLAG(CLONE_FS),
RULE_CLONE_FLAG(CLONE_FILES),
RULE_CLONE_FLAG(CLONE_SIGHAND),
RULE_CLONE_FLAG(CLONE_THREAD),
RULE_CLONE_FLAG(CLONE_SYSVSEM),
RULE_CLONE_FLAG(CLONE_SETTLS),
RULE_CLONE_FLAG(CLONE_PARENT_SETTID),
RULE_CLONE_FLAG(CLONE_CHILD_CLEARTID),
/*RULE_CLONE_FLAG(CLONE_DETACHED),*/
RULE_CLONE_FLAG(CLONE_PIDFD),
RULE_CLONE_FLAG(CLONE_PTRACE),
RULE_CLONE_FLAG(CLONE_VFORK),
RULE_CLONE_FLAG(CLONE_PARENT),
RULE_CLONE_FLAG(CLONE_NEWNS),
RULE_CLONE_FLAG(CLONE_UNTRACED),
RULE_CLONE_FLAG(CLONE_NEWCGROUP),
RULE_CLONE_FLAG(CLONE_NEWUTS),
RULE_CLONE_FLAG(CLONE_NEWIPC),
RULE_CLONE_FLAG(CLONE_NEWUSER),
RULE_CLONE_FLAG(CLONE_NEWPID),
RULE_CLONE_FLAG(CLONE_NEWNET),
RULE_CLONE_FLAG(CLONE_IO),
#ifdef __SNR_clone3
{ SCMP_SYS(clone3), QEMU_SECCOMP_SET_SPAWN,
0, NULL, SCMP_ACT_ERRNO(ENOSYS) },
#endif
#ifdef __SNR_execveat
{ SCMP_SYS(execveat), QEMU_SECCOMP_SET_SPAWN },
#endif
{ SCMP_SYS(setns), QEMU_SECCOMP_SET_SPAWN },
{ SCMP_SYS(unshare), QEMU_SECCOMP_SET_SPAWN },
/* resource control */
{ SCMP_SYS(setpriority), QEMU_SECCOMP_SET_RESOURCECTL,
0, NULL, SCMP_ACT_ERRNO(EPERM) },
{ SCMP_SYS(sched_setparam), QEMU_SECCOMP_SET_RESOURCECTL,
0, NULL, SCMP_ACT_ERRNO(EPERM) },
{ SCMP_SYS(sched_setscheduler), QEMU_SECCOMP_SET_RESOURCECTL,
ARRAY_SIZE(sched_setscheduler_arg), sched_setscheduler_arg,
SCMP_ACT_ERRNO(EPERM) },
{ SCMP_SYS(sched_setaffinity), QEMU_SECCOMP_SET_RESOURCECTL,
0, NULL, SCMP_ACT_ERRNO(EPERM) },
};
static inline __attribute__((unused)) int
qemu_seccomp(unsigned int operation, unsigned int flags, void *args)
{
#ifdef __NR_seccomp
return syscall(__NR_seccomp, operation, flags, args);
#else
errno = ENOSYS;
return -1;
#endif
}
static uint32_t qemu_seccomp_update_action(uint32_t action)
{
#if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \
defined(SECCOMP_RET_KILL_PROCESS)
if (action == SCMP_ACT_TRAP) {
static int kill_process = -1;
if (kill_process == -1) {
uint32_t testaction = SECCOMP_RET_KILL_PROCESS;
if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &testaction) == 0) {
kill_process = 1;
} else {
kill_process = 0;
}
}
if (kill_process == 1) {
return SCMP_ACT_KILL_PROCESS;
}
}
#endif
return action;
}
static int seccomp_start(uint32_t seccomp_opts, Error **errp)
{
int rc = -1;
unsigned int i = 0;
scmp_filter_ctx ctx;
ctx = seccomp_init(SCMP_ACT_ALLOW);
if (ctx == NULL) {
error_setg(errp, "failed to initialize seccomp context");
goto seccomp_return;
}
#if defined(CONFIG_SECCOMP_SYSRAWRC)
/*
* This must be the first seccomp_attr_set() call to have full
* error propagation from subsequent seccomp APIs.
*/
rc = seccomp_attr_set(ctx, SCMP_FLTATR_API_SYSRAWRC, 1);
if (rc != 0) {
error_setg_errno(errp, -rc,
"failed to set seccomp rawrc attribute");
goto seccomp_return;
}
#endif
rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1);
if (rc != 0) {
error_setg_errno(errp, -rc,
"failed to set seccomp thread synchronization");
goto seccomp_return;
}
for (i = 0; i < ARRAY_SIZE(denylist); i++) {
uint32_t action;
if (!(seccomp_opts & denylist[i].set)) {
continue;
}
action = qemu_seccomp_update_action(denylist[i].action);
rc = seccomp_rule_add_array(ctx, action, denylist[i].num,
denylist[i].narg, denylist[i].arg_cmp);
if (rc < 0) {
error_setg_errno(errp, -rc,
"failed to add seccomp denylist rules");
goto seccomp_return;
}
}
rc = seccomp_load(ctx);
if (rc < 0) {
error_setg_errno(errp, -rc,
"failed to load seccomp syscall filter in kernel");
}
seccomp_return:
seccomp_release(ctx);
return rc < 0 ? -1 : 0;
}
int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
{
if (qemu_opt_get_bool(opts, "enable", false)) {
uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT
| QEMU_SECCOMP_SET_OBSOLETE;
const char *value = NULL;
value = qemu_opt_get(opts, "obsolete");
if (value) {
if (g_str_equal(value, "allow")) {
seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE;
} else if (g_str_equal(value, "deny")) {
/* this is the default option, this if is here
* to provide a little bit of consistency for
* the command line */
} else {
error_setg(errp, "invalid argument for obsolete");
return -1;
}
}
value = qemu_opt_get(opts, "elevateprivileges");
if (value) {
if (g_str_equal(value, "deny")) {
seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
} else if (g_str_equal(value, "children")) {
seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
/* calling prctl directly because we're
* not sure if host has CAP_SYS_ADMIN set*/
if (prctl(PR_SET_NO_NEW_PRIVS, 1)) {
error_setg(errp, "failed to set no_new_privs aborting");
return -1;
}
} else if (g_str_equal(value, "allow")) {
/* default value */
} else {
error_setg(errp, "invalid argument for elevateprivileges");
return -1;
}
}
value = qemu_opt_get(opts, "spawn");
if (value) {
if (g_str_equal(value, "deny")) {
seccomp_opts |= QEMU_SECCOMP_SET_SPAWN;
} else if (g_str_equal(value, "allow")) {
/* default value */
} else {
error_setg(errp, "invalid argument for spawn");
return -1;
}
}
value = qemu_opt_get(opts, "resourcecontrol");
if (value) {
if (g_str_equal(value, "deny")) {
seccomp_opts |= QEMU_SECCOMP_SET_RESOURCECTL;
} else if (g_str_equal(value, "allow")) {
/* default value */
} else {
error_setg(errp, "invalid argument for resourcecontrol");
return -1;
}
}
if (seccomp_start(seccomp_opts, errp) < 0) {
return -1;
}
}
return 0;
}
static QemuOptsList qemu_sandbox_opts = {
.name = "sandbox",
.implied_opt_name = "enable",
.head = QTAILQ_HEAD_INITIALIZER(qemu_sandbox_opts.head),
.desc = {
{
.name = "enable",
.type = QEMU_OPT_BOOL,
},
{
.name = "obsolete",
.type = QEMU_OPT_STRING,
},
{
.name = "elevateprivileges",
.type = QEMU_OPT_STRING,
},
{
.name = "spawn",
.type = QEMU_OPT_STRING,
},
{
.name = "resourcecontrol",
.type = QEMU_OPT_STRING,
},
{ /* end of list */ }
},
};
static void seccomp_register(void)
{
bool add = false;
/* FIXME: use seccomp_api_get() >= 2 check when released */
#if defined(SECCOMP_FILTER_FLAG_TSYNC)
int check;
/* check host TSYNC capability, it returns errno == ENOSYS if unavailable */
check = qemu_seccomp(SECCOMP_SET_MODE_FILTER,
SECCOMP_FILTER_FLAG_TSYNC, NULL);
if (check < 0 && errno == EFAULT) {
add = true;
}
#endif
if (add) {
qemu_add_opts(&qemu_sandbox_opts);
}
}
opts_init(seccomp_register);

1070
system/qtest.c Normal file

File diff suppressed because it is too large Load diff

192
system/rtc.c Normal file
View file

@ -0,0 +1,192 @@
/*
* RTC configuration and clock read
*
* Copyright (c) 2003-2020 QEMU contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/error-report.h"
#include "qemu/option.h"
#include "qemu/timer.h"
#include "qom/object.h"
#include "sysemu/replay.h"
#include "sysemu/sysemu.h"
#include "sysemu/rtc.h"
#include "hw/rtc/mc146818rtc.h"
static enum {
RTC_BASE_UTC,
RTC_BASE_LOCALTIME,
RTC_BASE_DATETIME,
} rtc_base_type = RTC_BASE_UTC;
static time_t rtc_ref_start_datetime;
static int rtc_realtime_clock_offset; /* used only with QEMU_CLOCK_REALTIME */
static int rtc_host_datetime_offset = -1; /* valid & used only with
RTC_BASE_DATETIME */
QEMUClockType rtc_clock;
/***********************************************************/
/* RTC reference time/date access */
static time_t qemu_ref_timedate(QEMUClockType clock)
{
time_t value = qemu_clock_get_ms(clock) / 1000;
switch (clock) {
case QEMU_CLOCK_REALTIME:
value -= rtc_realtime_clock_offset;
/* fall through */
case QEMU_CLOCK_VIRTUAL:
value += rtc_ref_start_datetime;
break;
case QEMU_CLOCK_HOST:
if (rtc_base_type == RTC_BASE_DATETIME) {
value -= rtc_host_datetime_offset;
}
break;
default:
assert(0);
}
return value;
}
void qemu_get_timedate(struct tm *tm, time_t offset)
{
time_t ti = qemu_ref_timedate(rtc_clock);
ti += offset;
switch (rtc_base_type) {
case RTC_BASE_DATETIME:
case RTC_BASE_UTC:
gmtime_r(&ti, tm);
break;
case RTC_BASE_LOCALTIME:
localtime_r(&ti, tm);
break;
}
}
time_t qemu_timedate_diff(struct tm *tm)
{
time_t seconds;
switch (rtc_base_type) {
case RTC_BASE_DATETIME:
case RTC_BASE_UTC:
seconds = mktimegm(tm);
break;
case RTC_BASE_LOCALTIME:
{
struct tm tmp = *tm;
tmp.tm_isdst = -1; /* use timezone to figure it out */
seconds = mktime(&tmp);
break;
}
default:
abort();
}
return seconds - qemu_ref_timedate(QEMU_CLOCK_HOST);
}
static void configure_rtc_base_datetime(const char *startdate)
{
time_t rtc_start_datetime;
struct tm tm;
if (sscanf(startdate, "%d-%d-%dT%d:%d:%d", &tm.tm_year, &tm.tm_mon,
&tm.tm_mday, &tm.tm_hour, &tm.tm_min, &tm.tm_sec) == 6) {
/* OK */
} else if (sscanf(startdate, "%d-%d-%d",
&tm.tm_year, &tm.tm_mon, &tm.tm_mday) == 3) {
tm.tm_hour = 0;
tm.tm_min = 0;
tm.tm_sec = 0;
} else {
goto date_fail;
}
tm.tm_year -= 1900;
tm.tm_mon--;
rtc_start_datetime = mktimegm(&tm);
if (rtc_start_datetime == -1) {
date_fail:
error_report("invalid datetime format");
error_printf("valid formats: "
"'2006-06-17T16:01:21' or '2006-06-17'\n");
exit(1);
}
rtc_host_datetime_offset = rtc_ref_start_datetime - rtc_start_datetime;
rtc_ref_start_datetime = rtc_start_datetime;
}
void configure_rtc(QemuOpts *opts)
{
const char *value;
/* Set defaults */
rtc_clock = QEMU_CLOCK_HOST;
rtc_ref_start_datetime = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
rtc_realtime_clock_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
value = qemu_opt_get(opts, "base");
if (value) {
if (!strcmp(value, "utc")) {
rtc_base_type = RTC_BASE_UTC;
} else if (!strcmp(value, "localtime")) {
rtc_base_type = RTC_BASE_LOCALTIME;
replay_add_blocker("-rtc base=localtime");
} else {
rtc_base_type = RTC_BASE_DATETIME;
configure_rtc_base_datetime(value);
}
}
value = qemu_opt_get(opts, "clock");
if (value) {
if (!strcmp(value, "host")) {
rtc_clock = QEMU_CLOCK_HOST;
} else if (!strcmp(value, "rt")) {
rtc_clock = QEMU_CLOCK_REALTIME;
} else if (!strcmp(value, "vm")) {
rtc_clock = QEMU_CLOCK_VIRTUAL;
} else {
error_report("invalid option value '%s'", value);
exit(1);
}
}
value = qemu_opt_get(opts, "driftfix");
if (value) {
if (!strcmp(value, "slew")) {
object_register_sugar_prop(TYPE_MC146818_RTC,
"lost_tick_policy",
"slew",
false);
if (!object_class_by_name(TYPE_MC146818_RTC)) {
warn_report("driftfix 'slew' is not available with this machine");
}
} else if (!strcmp(value, "none")) {
/* discard is default */
} else {
error_report("invalid option value '%s'", value);
exit(1);
}
}
}

46
system/runstate-action.c Normal file
View file

@ -0,0 +1,46 @@
/*
* Copyright (c) 2020 Oracle and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2.
* See the COPYING file in the top-level directory.
*
*/
#include "qemu/osdep.h"
#include "sysemu/runstate-action.h"
#include "sysemu/watchdog.h"
#include "qemu/config-file.h"
#include "qapi/error.h"
#include "qemu/option_int.h"
RebootAction reboot_action = REBOOT_ACTION_RESET;
ShutdownAction shutdown_action = SHUTDOWN_ACTION_POWEROFF;
PanicAction panic_action = PANIC_ACTION_SHUTDOWN;
/*
* Receives actions to be applied for specific guest events
* and sets the internal state as requested.
*/
void qmp_set_action(bool has_reboot, RebootAction reboot,
bool has_shutdown, ShutdownAction shutdown,
bool has_panic, PanicAction panic,
bool has_watchdog, WatchdogAction watchdog,
Error **errp)
{
if (has_reboot) {
reboot_action = reboot;
}
if (has_panic) {
panic_action = panic;
}
if (has_watchdog) {
qmp_watchdog_set_action(watchdog, errp);
}
/* Process shutdown last, in case the panic action needs to be altered */
if (has_shutdown) {
shutdown_action = shutdown;
}
}

View file

@ -0,0 +1,95 @@
/*
* HMP commands related to run state
*
* Copyright IBM, Corp. 2011
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
* Contributions after 2012-01-13 are licensed under the terms of the
* GNU GPL, version 2 or (at your option) any later version.
*/
#include "qemu/osdep.h"
#include "exec/cpu-common.h"
#include "monitor/hmp.h"
#include "monitor/monitor.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-run-state.h"
#include "qapi/qmp/qdict.h"
#include "qemu/accel.h"
void hmp_info_status(Monitor *mon, const QDict *qdict)
{
StatusInfo *info;
info = qmp_query_status(NULL);
monitor_printf(mon, "VM status: %s",
info->running ? "running" : "paused");
if (!info->running && info->status != RUN_STATE_PAUSED) {
monitor_printf(mon, " (%s)", RunState_str(info->status));
}
monitor_printf(mon, "\n");
qapi_free_StatusInfo(info);
}
void hmp_one_insn_per_tb(Monitor *mon, const QDict *qdict)
{
const char *option = qdict_get_try_str(qdict, "option");
AccelState *accel = current_accel();
bool newval;
if (!object_property_find(OBJECT(accel), "one-insn-per-tb")) {
monitor_printf(mon,
"This accelerator does not support setting one-insn-per-tb\n");
return;
}
if (!option || !strcmp(option, "on")) {
newval = true;
} else if (!strcmp(option, "off")) {
newval = false;
} else {
monitor_printf(mon, "unexpected option %s\n", option);
return;
}
/* If the property exists then setting it can never fail */
object_property_set_bool(OBJECT(accel), "one-insn-per-tb",
newval, &error_abort);
}
void hmp_watchdog_action(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
WatchdogAction action;
char *qapi_value;
qapi_value = g_ascii_strdown(qdict_get_str(qdict, "action"), -1);
action = qapi_enum_parse(&WatchdogAction_lookup, qapi_value, -1, &err);
g_free(qapi_value);
if (err) {
hmp_handle_error(mon, err);
return;
}
qmp_watchdog_set_action(action, &error_abort);
}
void watchdog_action_completion(ReadLineState *rs, int nb_args, const char *str)
{
int i;
if (nb_args != 2) {
return;
}
readline_set_completion_index(rs, strlen(str));
for (i = 0; i < WATCHDOG_ACTION__MAX; i++) {
readline_add_completion_of(rs, str, WatchdogAction_str(i));
}
}

871
system/runstate.c Normal file
View file

@ -0,0 +1,871 @@
/*
* QEMU main system emulation loop
*
* Copyright (c) 2003-2020 QEMU contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "audio/audio.h"
#include "block/block.h"
#include "block/export.h"
#include "chardev/char.h"
#include "crypto/cipher.h"
#include "crypto/init.h"
#include "exec/cpu-common.h"
#include "gdbstub/syscalls.h"
#include "hw/boards.h"
#include "migration/misc.h"
#include "migration/postcopy-ram.h"
#include "monitor/monitor.h"
#include "net/net.h"
#include "net/vhost_net.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-run-state.h"
#include "qapi/qapi-events-run-state.h"
#include "qemu/accel.h"
#include "qemu/error-report.h"
#include "qemu/job.h"
#include "qemu/log.h"
#include "qemu/module.h"
#include "qemu/plugin.h"
#include "qemu/sockets.h"
#include "qemu/timer.h"
#include "qemu/thread.h"
#include "qom/object.h"
#include "qom/object_interfaces.h"
#include "sysemu/cpus.h"
#include "sysemu/qtest.h"
#include "sysemu/replay.h"
#include "sysemu/reset.h"
#include "sysemu/runstate.h"
#include "sysemu/runstate-action.h"
#include "sysemu/sysemu.h"
#include "sysemu/tpm.h"
#include "trace.h"
static NotifierList exit_notifiers =
NOTIFIER_LIST_INITIALIZER(exit_notifiers);
static RunState current_run_state = RUN_STATE_PRELAUNCH;
/* We use RUN_STATE__MAX but any invalid value will do */
static RunState vmstop_requested = RUN_STATE__MAX;
static QemuMutex vmstop_lock;
typedef struct {
RunState from;
RunState to;
} RunStateTransition;
static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
{ RUN_STATE_DEBUG, RUN_STATE_RUNNING },
{ RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_DEBUG, RUN_STATE_PRELAUNCH },
{ RUN_STATE_INMIGRATE, RUN_STATE_INTERNAL_ERROR },
{ RUN_STATE_INMIGRATE, RUN_STATE_IO_ERROR },
{ RUN_STATE_INMIGRATE, RUN_STATE_PAUSED },
{ RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_INMIGRATE, RUN_STATE_SHUTDOWN },
{ RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED },
{ RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG },
{ RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED },
{ RUN_STATE_INMIGRATE, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_INMIGRATE, RUN_STATE_PRELAUNCH },
{ RUN_STATE_INMIGRATE, RUN_STATE_POSTMIGRATE },
{ RUN_STATE_INMIGRATE, RUN_STATE_COLO },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_PRELAUNCH },
{ RUN_STATE_IO_ERROR, RUN_STATE_RUNNING },
{ RUN_STATE_IO_ERROR, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_IO_ERROR, RUN_STATE_PRELAUNCH },
{ RUN_STATE_PAUSED, RUN_STATE_RUNNING },
{ RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_PAUSED, RUN_STATE_POSTMIGRATE },
{ RUN_STATE_PAUSED, RUN_STATE_PRELAUNCH },
{ RUN_STATE_PAUSED, RUN_STATE_COLO},
{ RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_POSTMIGRATE, RUN_STATE_PRELAUNCH },
{ RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING },
{ RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_PRELAUNCH },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_INTERNAL_ERROR },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_IO_ERROR },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_SHUTDOWN },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_SUSPENDED },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_WATCHDOG },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_GUEST_PANICKED },
{ RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
{ RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
{ RUN_STATE_COLO, RUN_STATE_RUNNING },
{ RUN_STATE_COLO, RUN_STATE_PRELAUNCH },
{ RUN_STATE_COLO, RUN_STATE_SHUTDOWN},
{ RUN_STATE_RUNNING, RUN_STATE_DEBUG },
{ RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
{ RUN_STATE_RUNNING, RUN_STATE_IO_ERROR },
{ RUN_STATE_RUNNING, RUN_STATE_PAUSED },
{ RUN_STATE_RUNNING, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_RUNNING, RUN_STATE_RESTORE_VM },
{ RUN_STATE_RUNNING, RUN_STATE_SAVE_VM },
{ RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN },
{ RUN_STATE_RUNNING, RUN_STATE_WATCHDOG },
{ RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED },
{ RUN_STATE_RUNNING, RUN_STATE_COLO},
{ RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
{ RUN_STATE_SHUTDOWN, RUN_STATE_PAUSED },
{ RUN_STATE_SHUTDOWN, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_SHUTDOWN, RUN_STATE_PRELAUNCH },
{ RUN_STATE_SHUTDOWN, RUN_STATE_COLO },
{ RUN_STATE_DEBUG, RUN_STATE_SUSPENDED },
{ RUN_STATE_RUNNING, RUN_STATE_SUSPENDED },
{ RUN_STATE_SUSPENDED, RUN_STATE_RUNNING },
{ RUN_STATE_SUSPENDED, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_SUSPENDED, RUN_STATE_PRELAUNCH },
{ RUN_STATE_SUSPENDED, RUN_STATE_COLO},
{ RUN_STATE_WATCHDOG, RUN_STATE_RUNNING },
{ RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_WATCHDOG, RUN_STATE_PRELAUNCH },
{ RUN_STATE_WATCHDOG, RUN_STATE_COLO},
{ RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING },
{ RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_GUEST_PANICKED, RUN_STATE_PRELAUNCH },
{ RUN_STATE__MAX, RUN_STATE__MAX },
};
static bool runstate_valid_transitions[RUN_STATE__MAX][RUN_STATE__MAX];
bool runstate_check(RunState state)
{
return current_run_state == state;
}
static void runstate_init(void)
{
const RunStateTransition *p;
memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions));
for (p = &runstate_transitions_def[0]; p->from != RUN_STATE__MAX; p++) {
runstate_valid_transitions[p->from][p->to] = true;
}
qemu_mutex_init(&vmstop_lock);
}
/* This function will abort() on invalid state transitions */
void runstate_set(RunState new_state)
{
assert(new_state < RUN_STATE__MAX);
trace_runstate_set(current_run_state, RunState_str(current_run_state),
new_state, RunState_str(new_state));
if (current_run_state == new_state) {
return;
}
if (!runstate_valid_transitions[current_run_state][new_state]) {
error_report("invalid runstate transition: '%s' -> '%s'",
RunState_str(current_run_state),
RunState_str(new_state));
abort();
}
current_run_state = new_state;
}
RunState runstate_get(void)
{
return current_run_state;
}
bool runstate_is_running(void)
{
return runstate_check(RUN_STATE_RUNNING);
}
bool runstate_needs_reset(void)
{
return runstate_check(RUN_STATE_INTERNAL_ERROR) ||
runstate_check(RUN_STATE_SHUTDOWN);
}
StatusInfo *qmp_query_status(Error **errp)
{
StatusInfo *info = g_malloc0(sizeof(*info));
AccelState *accel = current_accel();
/*
* We ignore errors, which will happen if the accelerator
* is not TCG. "singlestep" is meaningless for other accelerators,
* so we will set the StatusInfo field to false for those.
*/
info->singlestep = object_property_get_bool(OBJECT(accel),
"one-insn-per-tb", NULL);
info->running = runstate_is_running();
info->status = current_run_state;
return info;
}
bool qemu_vmstop_requested(RunState *r)
{
qemu_mutex_lock(&vmstop_lock);
*r = vmstop_requested;
vmstop_requested = RUN_STATE__MAX;
qemu_mutex_unlock(&vmstop_lock);
return *r < RUN_STATE__MAX;
}
void qemu_system_vmstop_request_prepare(void)
{
qemu_mutex_lock(&vmstop_lock);
}
void qemu_system_vmstop_request(RunState state)
{
vmstop_requested = state;
qemu_mutex_unlock(&vmstop_lock);
qemu_notify_event();
}
struct VMChangeStateEntry {
VMChangeStateHandler *cb;
VMChangeStateHandler *prepare_cb;
void *opaque;
QTAILQ_ENTRY(VMChangeStateEntry) entries;
int priority;
};
static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head =
QTAILQ_HEAD_INITIALIZER(vm_change_state_head);
/**
* qemu_add_vm_change_state_handler_prio:
* @cb: the callback to invoke
* @opaque: user data passed to the callback
* @priority: low priorities execute first when the vm runs and the reverse is
* true when the vm stops
*
* Register a callback function that is invoked when the vm starts or stops
* running.
*
* Returns: an entry to be freed using qemu_del_vm_change_state_handler()
*/
VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
VMChangeStateHandler *cb, void *opaque, int priority)
{
return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque,
priority);
}
/**
* qemu_add_vm_change_state_handler_prio_full:
* @cb: the main callback to invoke
* @prepare_cb: a callback to invoke before the main callback
* @opaque: user data passed to the callbacks
* @priority: low priorities execute first when the vm runs and the reverse is
* true when the vm stops
*
* Register a main callback function and an optional prepare callback function
* that are invoked when the vm starts or stops running. The main callback and
* the prepare callback are called in two separate phases: First all prepare
* callbacks are called and only then all main callbacks are called. As its
* name suggests, the prepare callback can be used to do some preparatory work
* before invoking the main callback.
*
* Returns: an entry to be freed using qemu_del_vm_change_state_handler()
*/
VMChangeStateEntry *
qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
VMChangeStateHandler *prepare_cb,
void *opaque, int priority)
{
VMChangeStateEntry *e;
VMChangeStateEntry *other;
e = g_malloc0(sizeof(*e));
e->cb = cb;
e->prepare_cb = prepare_cb;
e->opaque = opaque;
e->priority = priority;
/* Keep list sorted in ascending priority order */
QTAILQ_FOREACH(other, &vm_change_state_head, entries) {
if (priority < other->priority) {
QTAILQ_INSERT_BEFORE(other, e, entries);
return e;
}
}
QTAILQ_INSERT_TAIL(&vm_change_state_head, e, entries);
return e;
}
VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
void *opaque)
{
return qemu_add_vm_change_state_handler_prio(cb, opaque, 0);
}
void qemu_del_vm_change_state_handler(VMChangeStateEntry *e)
{
QTAILQ_REMOVE(&vm_change_state_head, e, entries);
g_free(e);
}
void vm_state_notify(bool running, RunState state)
{
VMChangeStateEntry *e, *next;
trace_vm_state_notify(running, state, RunState_str(state));
if (running) {
QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
if (e->prepare_cb) {
e->prepare_cb(e->opaque, running, state);
}
}
QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
e->cb(e->opaque, running, state);
}
} else {
QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
if (e->prepare_cb) {
e->prepare_cb(e->opaque, running, state);
}
}
QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
e->cb(e->opaque, running, state);
}
}
}
static ShutdownCause reset_requested;
static ShutdownCause shutdown_requested;
static int shutdown_signal;
static pid_t shutdown_pid;
static int powerdown_requested;
static int debug_requested;
static int suspend_requested;
static WakeupReason wakeup_reason;
static NotifierList powerdown_notifiers =
NOTIFIER_LIST_INITIALIZER(powerdown_notifiers);
static NotifierList suspend_notifiers =
NOTIFIER_LIST_INITIALIZER(suspend_notifiers);
static NotifierList wakeup_notifiers =
NOTIFIER_LIST_INITIALIZER(wakeup_notifiers);
static NotifierList shutdown_notifiers =
NOTIFIER_LIST_INITIALIZER(shutdown_notifiers);
static uint32_t wakeup_reason_mask = ~(1 << QEMU_WAKEUP_REASON_NONE);
ShutdownCause qemu_shutdown_requested_get(void)
{
return shutdown_requested;
}
ShutdownCause qemu_reset_requested_get(void)
{
return reset_requested;
}
static int qemu_shutdown_requested(void)
{
return qatomic_xchg(&shutdown_requested, SHUTDOWN_CAUSE_NONE);
}
static void qemu_kill_report(void)
{
if (!qtest_driver() && shutdown_signal) {
if (shutdown_pid == 0) {
/* This happens for eg ^C at the terminal, so it's worth
* avoiding printing an odd message in that case.
*/
error_report("terminating on signal %d", shutdown_signal);
} else {
char *shutdown_cmd = qemu_get_pid_name(shutdown_pid);
error_report("terminating on signal %d from pid " FMT_pid " (%s)",
shutdown_signal, shutdown_pid,
shutdown_cmd ? shutdown_cmd : "<unknown process>");
g_free(shutdown_cmd);
}
shutdown_signal = 0;
}
}
static ShutdownCause qemu_reset_requested(void)
{
ShutdownCause r = reset_requested;
if (r && replay_checkpoint(CHECKPOINT_RESET_REQUESTED)) {
reset_requested = SHUTDOWN_CAUSE_NONE;
return r;
}
return SHUTDOWN_CAUSE_NONE;
}
static int qemu_suspend_requested(void)
{
int r = suspend_requested;
if (r && replay_checkpoint(CHECKPOINT_SUSPEND_REQUESTED)) {
suspend_requested = 0;
return r;
}
return false;
}
static WakeupReason qemu_wakeup_requested(void)
{
return wakeup_reason;
}
static int qemu_powerdown_requested(void)
{
int r = powerdown_requested;
powerdown_requested = 0;
return r;
}
static int qemu_debug_requested(void)
{
int r = debug_requested;
debug_requested = 0;
return r;
}
/*
* Reset the VM. Issue an event unless @reason is SHUTDOWN_CAUSE_NONE.
*/
void qemu_system_reset(ShutdownCause reason)
{
MachineClass *mc;
mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL;
cpu_synchronize_all_states();
if (mc && mc->reset) {
mc->reset(current_machine, reason);
} else {
qemu_devices_reset(reason);
}
switch (reason) {
case SHUTDOWN_CAUSE_NONE:
case SHUTDOWN_CAUSE_SUBSYSTEM_RESET:
case SHUTDOWN_CAUSE_SNAPSHOT_LOAD:
break;
default:
qapi_event_send_reset(shutdown_caused_by_guest(reason), reason);
}
cpu_synchronize_all_post_reset();
}
/*
* Wake the VM after suspend.
*/
static void qemu_system_wakeup(void)
{
MachineClass *mc;
mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL;
if (mc && mc->wakeup) {
mc->wakeup(current_machine);
}
}
void qemu_system_guest_panicked(GuestPanicInformation *info)
{
qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed");
if (current_cpu) {
current_cpu->crash_occurred = true;
}
/*
* TODO: Currently the available panic actions are: none, pause, and
* shutdown, but in principle debug and reset could be supported as well.
* Investigate any potential use cases for the unimplemented actions.
*/
if (panic_action == PANIC_ACTION_PAUSE
|| (panic_action == PANIC_ACTION_SHUTDOWN && shutdown_action == SHUTDOWN_ACTION_PAUSE)) {
qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, info);
vm_stop(RUN_STATE_GUEST_PANICKED);
} else if (panic_action == PANIC_ACTION_SHUTDOWN ||
panic_action == PANIC_ACTION_EXIT_FAILURE) {
qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF, info);
vm_stop(RUN_STATE_GUEST_PANICKED);
qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_PANIC);
} else {
qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_RUN, info);
}
if (info) {
if (info->type == GUEST_PANIC_INFORMATION_TYPE_HYPER_V) {
qemu_log_mask(LOG_GUEST_ERROR, "\nHV crash parameters: (%#"PRIx64
" %#"PRIx64" %#"PRIx64" %#"PRIx64" %#"PRIx64")\n",
info->u.hyper_v.arg1,
info->u.hyper_v.arg2,
info->u.hyper_v.arg3,
info->u.hyper_v.arg4,
info->u.hyper_v.arg5);
} else if (info->type == GUEST_PANIC_INFORMATION_TYPE_S390) {
qemu_log_mask(LOG_GUEST_ERROR, " on cpu %d: %s\n"
"PSW: 0x%016" PRIx64 " 0x%016" PRIx64"\n",
info->u.s390.core,
S390CrashReason_str(info->u.s390.reason),
info->u.s390.psw_mask,
info->u.s390.psw_addr);
}
qapi_free_GuestPanicInformation(info);
}
}
void qemu_system_guest_crashloaded(GuestPanicInformation *info)
{
qemu_log_mask(LOG_GUEST_ERROR, "Guest crash loaded");
qapi_event_send_guest_crashloaded(GUEST_PANIC_ACTION_RUN, info);
qapi_free_GuestPanicInformation(info);
}
void qemu_system_reset_request(ShutdownCause reason)
{
if (reboot_action == REBOOT_ACTION_SHUTDOWN &&
reason != SHUTDOWN_CAUSE_SUBSYSTEM_RESET) {
shutdown_requested = reason;
} else if (!cpus_are_resettable()) {
error_report("cpus are not resettable, terminating");
shutdown_requested = reason;
} else {
reset_requested = reason;
}
cpu_stop_current();
qemu_notify_event();
}
static void qemu_system_suspend(void)
{
pause_all_vcpus();
notifier_list_notify(&suspend_notifiers, NULL);
runstate_set(RUN_STATE_SUSPENDED);
qapi_event_send_suspend();
}
void qemu_system_suspend_request(void)
{
if (runstate_check(RUN_STATE_SUSPENDED)) {
return;
}
suspend_requested = 1;
cpu_stop_current();
qemu_notify_event();
}
void qemu_register_suspend_notifier(Notifier *notifier)
{
notifier_list_add(&suspend_notifiers, notifier);
}
void qemu_system_wakeup_request(WakeupReason reason, Error **errp)
{
trace_system_wakeup_request(reason);
if (!runstate_check(RUN_STATE_SUSPENDED)) {
error_setg(errp,
"Unable to wake up: guest is not in suspended state");
return;
}
if (!(wakeup_reason_mask & (1 << reason))) {
return;
}
runstate_set(RUN_STATE_RUNNING);
wakeup_reason = reason;
qemu_notify_event();
}
void qemu_system_wakeup_enable(WakeupReason reason, bool enabled)
{
if (enabled) {
wakeup_reason_mask |= (1 << reason);
} else {
wakeup_reason_mask &= ~(1 << reason);
}
}
void qemu_register_wakeup_notifier(Notifier *notifier)
{
notifier_list_add(&wakeup_notifiers, notifier);
}
static bool wakeup_suspend_enabled;
void qemu_register_wakeup_support(void)
{
wakeup_suspend_enabled = true;
}
bool qemu_wakeup_suspend_enabled(void)
{
return wakeup_suspend_enabled;
}
void qemu_system_killed(int signal, pid_t pid)
{
shutdown_signal = signal;
shutdown_pid = pid;
shutdown_action = SHUTDOWN_ACTION_POWEROFF;
/* Cannot call qemu_system_shutdown_request directly because
* we are in a signal handler.
*/
shutdown_requested = SHUTDOWN_CAUSE_HOST_SIGNAL;
qemu_notify_event();
}
void qemu_system_shutdown_request(ShutdownCause reason)
{
trace_qemu_system_shutdown_request(reason);
replay_shutdown_request(reason);
shutdown_requested = reason;
qemu_notify_event();
}
static void qemu_system_powerdown(void)
{
qapi_event_send_powerdown();
notifier_list_notify(&powerdown_notifiers, NULL);
}
static void qemu_system_shutdown(ShutdownCause cause)
{
qapi_event_send_shutdown(shutdown_caused_by_guest(cause), cause);
notifier_list_notify(&shutdown_notifiers, &cause);
}
void qemu_system_powerdown_request(void)
{
trace_qemu_system_powerdown_request();
powerdown_requested = 1;
qemu_notify_event();
}
void qemu_register_powerdown_notifier(Notifier *notifier)
{
notifier_list_add(&powerdown_notifiers, notifier);
}
void qemu_register_shutdown_notifier(Notifier *notifier)
{
notifier_list_add(&shutdown_notifiers, notifier);
}
void qemu_system_debug_request(void)
{
debug_requested = 1;
qemu_notify_event();
}
static bool main_loop_should_exit(int *status)
{
RunState r;
ShutdownCause request;
if (qemu_debug_requested()) {
vm_stop(RUN_STATE_DEBUG);
}
if (qemu_suspend_requested()) {
qemu_system_suspend();
}
request = qemu_shutdown_requested();
if (request) {
qemu_kill_report();
qemu_system_shutdown(request);
if (shutdown_action == SHUTDOWN_ACTION_PAUSE) {
vm_stop(RUN_STATE_SHUTDOWN);
} else {
if (request == SHUTDOWN_CAUSE_GUEST_PANIC &&
panic_action == PANIC_ACTION_EXIT_FAILURE) {
*status = EXIT_FAILURE;
}
return true;
}
}
request = qemu_reset_requested();
if (request) {
pause_all_vcpus();
qemu_system_reset(request);
resume_all_vcpus();
/*
* runstate can change in pause_all_vcpus()
* as iothread mutex is unlocked
*/
if (!runstate_check(RUN_STATE_RUNNING) &&
!runstate_check(RUN_STATE_INMIGRATE) &&
!runstate_check(RUN_STATE_FINISH_MIGRATE)) {
runstate_set(RUN_STATE_PRELAUNCH);
}
}
if (qemu_wakeup_requested()) {
pause_all_vcpus();
qemu_system_wakeup();
notifier_list_notify(&wakeup_notifiers, &wakeup_reason);
wakeup_reason = QEMU_WAKEUP_REASON_NONE;
resume_all_vcpus();
qapi_event_send_wakeup();
}
if (qemu_powerdown_requested()) {
qemu_system_powerdown();
}
if (qemu_vmstop_requested(&r)) {
vm_stop(r);
}
return false;
}
int qemu_main_loop(void)
{
int status = EXIT_SUCCESS;
while (!main_loop_should_exit(&status)) {
main_loop_wait(false);
}
return status;
}
void qemu_add_exit_notifier(Notifier *notify)
{
notifier_list_add(&exit_notifiers, notify);
}
void qemu_remove_exit_notifier(Notifier *notify)
{
notifier_remove(notify);
}
static void qemu_run_exit_notifiers(void)
{
notifier_list_notify(&exit_notifiers, NULL);
}
void qemu_init_subsystems(void)
{
Error *err = NULL;
os_set_line_buffering();
module_call_init(MODULE_INIT_TRACE);
qemu_init_cpu_list();
qemu_init_cpu_loop();
qemu_mutex_lock_iothread();
atexit(qemu_run_exit_notifiers);
module_call_init(MODULE_INIT_QOM);
module_call_init(MODULE_INIT_MIGRATION);
runstate_init();
precopy_infrastructure_init();
postcopy_infrastructure_init();
monitor_init_globals();
if (qcrypto_init(&err) < 0) {
error_reportf_err(err, "cannot initialize crypto: ");
exit(1);
}
os_setup_early_signal_handling();
bdrv_init_with_whitelist();
socket_init();
}
void qemu_cleanup(void)
{
gdb_exit(0);
/*
* cleaning up the migration object cancels any existing migration
* try to do this early so that it also stops using devices.
*/
migration_shutdown();
/*
* Close the exports before draining the block layer. The export
* drivers may have coroutines yielding on it, so we need to clean
* them up before the drain, as otherwise they may be get stuck in
* blk_wait_while_drained().
*/
blk_exp_close_all();
/* No more vcpu or device emulation activity beyond this point */
vm_shutdown();
replay_finish();
/*
* We must cancel all block jobs while the block layer is drained,
* or cancelling will be affected by throttling and thus may block
* for an extended period of time.
* Begin the drained section after vm_shutdown() to avoid requests being
* stuck in the BlockBackend's request queue.
* We do not need to end this section, because we do not want any
* requests happening from here on anyway.
*/
bdrv_drain_all_begin();
job_cancel_sync_all();
bdrv_close_all();
/* vhost-user must be cleaned up before chardevs. */
tpm_cleanup();
net_cleanup();
audio_cleanup();
monitor_cleanup();
qemu_chr_cleanup();
user_creatable_cleanup();
/* TODO: unref root container, check all devices are ok */
}

65
system/tpm-hmp-cmds.c Normal file
View file

@ -0,0 +1,65 @@
/*
* HMP commands related to TPM
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version.
*/
#include "qemu/osdep.h"
#include "qapi/qapi-commands-tpm.h"
#include "monitor/monitor.h"
#include "monitor/hmp.h"
#include "qapi/error.h"
void hmp_info_tpm(Monitor *mon, const QDict *qdict)
{
#ifdef CONFIG_TPM
TPMInfoList *info_list, *info;
Error *err = NULL;
unsigned int c = 0;
TPMPassthroughOptions *tpo;
TPMEmulatorOptions *teo;
info_list = qmp_query_tpm(&err);
if (err) {
monitor_printf(mon, "TPM device not supported\n");
error_free(err);
return;
}
if (info_list) {
monitor_printf(mon, "TPM device:\n");
}
for (info = info_list; info; info = info->next) {
TPMInfo *ti = info->value;
monitor_printf(mon, " tpm%d: model=%s\n",
c, TpmModel_str(ti->model));
monitor_printf(mon, " \\ %s: type=%s",
ti->id, TpmType_str(ti->options->type));
switch (ti->options->type) {
case TPM_TYPE_PASSTHROUGH:
tpo = ti->options->u.passthrough.data;
monitor_printf(mon, "%s%s%s%s",
tpo->path ? ",path=" : "",
tpo->path ?: "",
tpo->cancel_path ? ",cancel-path=" : "",
tpo->cancel_path ?: "");
break;
case TPM_TYPE_EMULATOR:
teo = ti->options->u.emulator.data;
monitor_printf(mon, ",chardev=%s", teo->chardev);
break;
case TPM_TYPE__MAX:
break;
}
monitor_printf(mon, "\n");
c++;
}
qapi_free_TPMInfoList(info_list);
#else
monitor_printf(mon, "TPM device not supported\n");
#endif /* CONFIG_TPM */
}

239
system/tpm.c Normal file
View file

@ -0,0 +1,239 @@
/*
* TPM configuration
*
* Copyright (C) 2011-2013 IBM Corporation
*
* Authors:
* Stefan Berger <stefanb@us.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
* Based on net.c
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-tpm.h"
#include "qapi/qmp/qerror.h"
#include "sysemu/tpm_backend.h"
#include "sysemu/tpm.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
static QLIST_HEAD(, TPMBackend) tpm_backends =
QLIST_HEAD_INITIALIZER(tpm_backends);
static const TPMBackendClass *
tpm_be_find_by_type(enum TpmType type)
{
ObjectClass *oc;
char *typename = g_strdup_printf("tpm-%s", TpmType_str(type));
oc = object_class_by_name(typename);
g_free(typename);
if (!object_class_dynamic_cast(oc, TYPE_TPM_BACKEND)) {
return NULL;
}
return TPM_BACKEND_CLASS(oc);
}
/*
* Walk the list of available TPM backend drivers and display them on the
* screen.
*/
static void tpm_display_backend_drivers(void)
{
bool got_one = false;
int i;
for (i = 0; i < TPM_TYPE__MAX; i++) {
const TPMBackendClass *bc = tpm_be_find_by_type(i);
if (!bc) {
continue;
}
if (!got_one) {
error_printf("Supported TPM types (choose only one):\n");
got_one = true;
}
error_printf("%12s %s\n", TpmType_str(i), bc->desc);
}
if (!got_one) {
error_printf("No TPM backend types are available\n");
}
}
/*
* Find the TPM with the given Id
*/
TPMBackend *qemu_find_tpm_be(const char *id)
{
TPMBackend *drv;
if (id) {
QLIST_FOREACH(drv, &tpm_backends, list) {
if (!strcmp(drv->id, id)) {
return drv;
}
}
}
return NULL;
}
static int tpm_init_tpmdev(void *dummy, QemuOpts *opts, Error **errp)
{
/*
* Use of error_report() in a function with an Error ** parameter
* is suspicious. It is okay here. The parameter only exists to
* make the function usable with qemu_opts_foreach(). It is not
* actually used.
*/
const char *value;
const char *id;
const TPMBackendClass *be;
TPMBackend *drv;
Error *local_err = NULL;
int i;
if (!QLIST_EMPTY(&tpm_backends)) {
error_report("Only one TPM is allowed.");
return 1;
}
id = qemu_opts_id(opts);
if (id == NULL) {
error_report(QERR_MISSING_PARAMETER, "id");
return 1;
}
value = qemu_opt_get(opts, "type");
if (!value) {
error_report(QERR_MISSING_PARAMETER, "type");
tpm_display_backend_drivers();
return 1;
}
i = qapi_enum_parse(&TpmType_lookup, value, -1, NULL);
be = i >= 0 ? tpm_be_find_by_type(i) : NULL;
if (be == NULL) {
error_report(QERR_INVALID_PARAMETER_VALUE,
"type", "a TPM backend type");
tpm_display_backend_drivers();
return 1;
}
/* validate backend specific opts */
if (!qemu_opts_validate(opts, be->opts, &local_err)) {
error_report_err(local_err);
return 1;
}
drv = be->create(opts);
if (!drv) {
return 1;
}
drv->id = g_strdup(id);
QLIST_INSERT_HEAD(&tpm_backends, drv, list);
return 0;
}
/*
* Walk the list of TPM backend drivers that are in use and call their
* destroy function to have them cleaned up.
*/
void tpm_cleanup(void)
{
TPMBackend *drv, *next;
QLIST_FOREACH_SAFE(drv, &tpm_backends, list, next) {
QLIST_REMOVE(drv, list);
object_unref(OBJECT(drv));
}
}
/*
* Initialize the TPM. Process the tpmdev command line options describing the
* TPM backend.
*/
int tpm_init(void)
{
if (qemu_opts_foreach(qemu_find_opts("tpmdev"),
tpm_init_tpmdev, NULL, NULL)) {
return -1;
}
return 0;
}
/*
* Parse the TPM configuration options.
* To display all available TPM backends the user may use '-tpmdev help'
*/
int tpm_config_parse(QemuOptsList *opts_list, const char *optstr)
{
QemuOpts *opts;
if (!strcmp(optstr, "help")) {
tpm_display_backend_drivers();
return -1;
}
opts = qemu_opts_parse_noisily(opts_list, optstr, true);
if (!opts) {
return -1;
}
return 0;
}
/*
* Walk the list of active TPM backends and collect information about them.
*/
TPMInfoList *qmp_query_tpm(Error **errp)
{
TPMBackend *drv;
TPMInfoList *head = NULL, **tail = &head;
QLIST_FOREACH(drv, &tpm_backends, list) {
if (!drv->tpmif) {
continue;
}
QAPI_LIST_APPEND(tail, tpm_backend_query_tpm(drv));
}
return head;
}
TpmTypeList *qmp_query_tpm_types(Error **errp)
{
unsigned int i = 0;
TpmTypeList *head = NULL, **tail = &head;
for (i = 0; i < TPM_TYPE__MAX; i++) {
if (!tpm_be_find_by_type(i)) {
continue;
}
QAPI_LIST_APPEND(tail, i);
}
return head;
}
TpmModelList *qmp_query_tpm_models(Error **errp)
{
TpmModelList *head = NULL, **tail = &head;
GSList *e, *l = object_class_get_list(TYPE_TPM_IF, false);
for (e = l; e; e = e->next) {
TPMIfClass *c = TPM_IF_CLASS(e->data);
QAPI_LIST_APPEND(tail, c->model);
}
g_slist_free(l);
return head;
}

40
system/trace-events Normal file
View file

@ -0,0 +1,40 @@
# See docs/devel/tracing.rst for syntax documentation.
# balloon.c
# Since requests are raised via monitor, not many tracepoints are needed.
balloon_event(void *opaque, unsigned long addr) "opaque %p addr %lu"
# ioport.c
cpu_in(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u"
cpu_out(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u"
# memory.c
memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'"
memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'"
memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
memory_region_sync_dirty(const char *mr, const char *listener, int global) "mr '%s' listener '%s' synced (global=%d)"
flatview_new(void *view, void *root) "%p (root %p)"
flatview_destroy(void *view, void *root) "%p (root %p)"
flatview_destroy_rcu(void *view, void *root) "%p (root %p)"
global_dirty_changed(unsigned int bitmask) "bitmask 0x%"PRIx32
# cpus.c
vm_stop_flush_all(int ret) "ret %d"
# vl.c
vm_state_notify(int running, int reason, const char *reason_str) "running %d reason %d (%s)"
load_file(const char *name, const char *path) "name %s location %s"
runstate_set(int current_state, const char *current_state_str, int new_state, const char *new_state_str) "current_run_state %d (%s) new_state %d (%s)"
system_wakeup_request(int reason) "reason=%d"
qemu_system_shutdown_request(int reason) "reason=%d"
qemu_system_powerdown_request(void) ""
#dirtylimit.c
dirtylimit_state_initialize(int max_cpus) "dirtylimit state initialize: max cpus %d"
dirtylimit_state_finalize(void)
dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us"
dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64
dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us"

1
system/trace.h Normal file
View file

@ -0,0 +1 @@
#include "trace/trace-system.h"

3740
system/vl.c Normal file

File diff suppressed because it is too large Load diff

226
system/watchpoint.c Normal file
View file

@ -0,0 +1,226 @@
/*
* CPU watchpoints
*
* Copyright (c) 2003 Fabrice Bellard
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "qemu/error-report.h"
#include "exec/exec-all.h"
#include "exec/translate-all.h"
#include "sysemu/tcg.h"
#include "sysemu/replay.h"
#include "hw/core/tcg-cpu-ops.h"
#include "hw/core/cpu.h"
/* Add a watchpoint. */
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
int flags, CPUWatchpoint **watchpoint)
{
CPUWatchpoint *wp;
vaddr in_page;
/* forbid ranges which are empty or run off the end of the address space */
if (len == 0 || (addr + len - 1) < addr) {
error_report("tried to set invalid watchpoint at %"
VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
return -EINVAL;
}
wp = g_malloc(sizeof(*wp));
wp->vaddr = addr;
wp->len = len;
wp->flags = flags;
/* keep all GDB-injected watchpoints in front */
if (flags & BP_GDB) {
QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
} else {
QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
}
in_page = -(addr | TARGET_PAGE_MASK);
if (len <= in_page) {
tlb_flush_page(cpu, addr);
} else {
tlb_flush(cpu);
}
if (watchpoint) {
*watchpoint = wp;
}
return 0;
}
/* Remove a specific watchpoint. */
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
int flags)
{
CPUWatchpoint *wp;
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
if (addr == wp->vaddr && len == wp->len
&& flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
cpu_watchpoint_remove_by_ref(cpu, wp);
return 0;
}
}
return -ENOENT;
}
/* Remove a specific watchpoint by reference. */
void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
{
QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
tlb_flush_page(cpu, watchpoint->vaddr);
g_free(watchpoint);
}
/* Remove all matching watchpoints. */
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
{
CPUWatchpoint *wp, *next;
QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
if (wp->flags & mask) {
cpu_watchpoint_remove_by_ref(cpu, wp);
}
}
}
#ifdef CONFIG_TCG
/*
* Return true if this watchpoint address matches the specified
* access (ie the address range covered by the watchpoint overlaps
* partially or completely with the address range covered by the
* access).
*/
static inline bool watchpoint_address_matches(CPUWatchpoint *wp,
vaddr addr, vaddr len)
{
/*
* We know the lengths are non-zero, but a little caution is
* required to avoid errors in the case where the range ends
* exactly at the top of the address space and so addr + len
* wraps round to zero.
*/
vaddr wpend = wp->vaddr + wp->len - 1;
vaddr addrend = addr + len - 1;
return !(addr > wpend || wp->vaddr > addrend);
}
/* Return flags for watchpoints that match addr + prot. */
int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len)
{
CPUWatchpoint *wp;
int ret = 0;
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
if (watchpoint_address_matches(wp, addr, len)) {
ret |= wp->flags;
}
}
return ret;
}
/* Generate a debug exception if a watchpoint has been hit. */
void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
MemTxAttrs attrs, int flags, uintptr_t ra)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
CPUWatchpoint *wp;
assert(tcg_enabled());
if (cpu->watchpoint_hit) {
/*
* We re-entered the check after replacing the TB.
* Now raise the debug interrupt so that it will
* trigger after the current instruction.
*/
qemu_mutex_lock_iothread();
cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
qemu_mutex_unlock_iothread();
return;
}
if (cc->tcg_ops->adjust_watchpoint_address) {
/* this is currently used only by ARM BE32 */
addr = cc->tcg_ops->adjust_watchpoint_address(cpu, addr, len);
}
assert((flags & ~BP_MEM_ACCESS) == 0);
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
int hit_flags = wp->flags & flags;
if (hit_flags && watchpoint_address_matches(wp, addr, len)) {
if (replay_running_debug()) {
/*
* replay_breakpoint reads icount.
* Force recompile to succeed, because icount may
* be read only at the end of the block.
*/
if (!cpu->neg.can_do_io) {
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ
| curr_cflags(cpu);
cpu_loop_exit_restore(cpu, ra);
}
/*
* Don't process the watchpoints when we are
* in a reverse debugging operation.
*/
replay_breakpoint();
return;
}
wp->flags |= hit_flags << BP_HIT_SHIFT;
wp->hitaddr = MAX(addr, wp->vaddr);
wp->hitattrs = attrs;
if (wp->flags & BP_CPU
&& cc->tcg_ops->debug_check_watchpoint
&& !cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
wp->flags &= ~BP_WATCHPOINT_HIT;
continue;
}
cpu->watchpoint_hit = wp;
mmap_lock();
/* This call also restores vCPU state */
tb_check_watchpoint(cpu, ra);
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
cpu->exception_index = EXCP_DEBUG;
mmap_unlock();
cpu_loop_exit(cpu);
} else {
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ
| curr_cflags(cpu);
mmap_unlock();
cpu_loop_exit_noexc(cpu);
}
} else {
wp->flags &= ~BP_WATCHPOINT_HIT;
}
}
}
#endif /* CONFIG_TCG */