* util/log: re-allow switching away from stderr log file

* finish audio configuration rework * cleanup HVF stubs * remove more mentions of softmmu -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmUi/kIUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroOXWwf/YW16QMzqdAPVHYRf9NcCneRF16El t3lEod0q0sHhchPbh9e04aKbh+oBNeWu9sFyTl11Fwsi+DGmp/b28ziva75/4rfd h5N9aX/z2jwPqy93IwPDu3soKXCCgTK+ywtD/5GLQwBGqxs7W2xUEEb7eCnVefHa zwL3MOUqPICeqOnR1TNw9k3N3veF04D+rmchTwbAjAmx1f8EI+mK9VlGK9V8TUjP 3HjpZYJluc0a92lR5VONJ7V25QfttsjLysTgpFwVAQPS6Frzatc/hWclfLYgw9vl 2Irk83FV8gXPRl0XKNcqSDsv6h/yGP6TDFIB8QwRSRGBqIQi5aOlfBJzsQ== =qbm7 -----END PGP SIGNATURE----- Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging * util/log: re-allow switching away from stderr log file * finish audio configuration rework * cleanup HVF stubs * remove more mentions of softmmu # -----BEGIN PGP SIGNATURE----- # # iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmUi/kIUHHBib256aW5p # QHJlZGhhdC5jb20ACgkQv/vSX3jHroOXWwf/YW16QMzqdAPVHYRf9NcCneRF16El # t3lEod0q0sHhchPbh9e04aKbh+oBNeWu9sFyTl11Fwsi+DGmp/b28ziva75/4rfd # h5N9aX/z2jwPqy93IwPDu3soKXCCgTK+ywtD/5GLQwBGqxs7W2xUEEb7eCnVefHa # zwL3MOUqPICeqOnR1TNw9k3N3veF04D+rmchTwbAjAmx1f8EI+mK9VlGK9V8TUjP # 3HjpZYJluc0a92lR5VONJ7V25QfttsjLysTgpFwVAQPS6Frzatc/hWclfLYgw9vl # 2Irk83FV8gXPRl0XKNcqSDsv6h/yGP6TDFIB8QwRSRGBqIQi5aOlfBJzsQ== # =qbm7 # -----END PGP SIGNATURE----- # gpg: Signature made Sun 08 Oct 2023 15:08:50 EDT # gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83 # gpg: issuer "pbonzini@redhat.com" # gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full] # gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full] # Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1 # Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83 * tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (25 commits) audio, qtest: get rid of QEMU_AUDIO_DRV audio: reintroduce default audio backend for VNC audio: do not use first -audiodev as default audio device audio: extend -audio to allow creating a default backend audio: extract audio_define_default audio: disable default backends if -audio/-audiodev is used audio: error hints need a trailing \n cutils: squelch compiler warnings with custom paths configure: change $softmmu to $system system: Rename softmmu/ directory as system/ meson: Rename target_softmmu_arch -> target_system_arch meson: Rename softmmu_mods -> system_mods target/i386: Rename i386_softmmu_kvm_ss -> i386_kvm_ss semihosting: Rename softmmu_FOO_user() -> uaccess_FOO_user() gdbstub: Rename 'softmmu' -> 'system' accel: Rename accel_softmmu* -> accel_system* tcg: Correct invalid mentions of 'softmmu' by 'system-mode' fuzz: Correct invalid mentions of 'softmmu' by 'system' cpu: Correct invalid mentions of 'softmmu' by 'system-mode' travis-ci: Correct invalid mentions of 'softmmu' by 'system' ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2025-08-02 23:33:54 -06:00 · 2023-10-09 10:11:17 -04:00 · 2023-10-09 10:11:17 -04:00 · 1527c6b6fa
commit 1527c6b6fa
parent f729410356 912eef205a
132 changed files with 305 additions and 276 deletions
--- a/system/arch_init.c
+++ b/system/arch_init.c
@ -0,0 +1,50 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/module.h"
+#include "sysemu/arch_init.h"
+
+#ifdef TARGET_SPARC
+int graphic_width = 1024;
+int graphic_height = 768;
+int graphic_depth = 8;
+#elif defined(TARGET_M68K)
+int graphic_width = 800;
+int graphic_height = 600;
+int graphic_depth = 8;
+#else
+int graphic_width = 800;
+int graphic_height = 600;
+int graphic_depth = 32;
+#endif
+
+const uint32_t arch_type = QEMU_ARCH;
+
+void qemu_init_arch_modules(void)
+{
+#ifdef CONFIG_MODULES
+    module_init_info(qemu_modinfo);
+    module_allow_arch(TARGET_NAME);
+#endif
+}
--- a/system/async-teardown.c
+++ b/system/async-teardown.c
@ -0,0 +1,143 @@
+/*
+ * Asynchronous teardown
+ *
+ * Copyright IBM, Corp. 2022
+ *
+ * Authors:
+ *  Claudio Imbrenda <imbrenda@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <dirent.h>
+#include <sys/prctl.h>
+#include <sched.h>
+
+#include "qemu/async-teardown.h"
+
+#ifdef _SC_THREAD_STACK_MIN
+#define CLONE_STACK_SIZE sysconf(_SC_THREAD_STACK_MIN)
+#else
+#define CLONE_STACK_SIZE 16384
+#endif
+
+static pid_t the_ppid;
+
+/*
+ * Close all open file descriptors.
+ */
+static void close_all_open_fd(void)
+{
+    struct dirent *de;
+    int fd, dfd;
+    DIR *dir;
+
+#ifdef CONFIG_CLOSE_RANGE
+    int r = close_range(0, ~0U, 0);
+    if (!r) {
+        /* Success, no need to try other ways. */
+        return;
+    }
+#endif
+
+    dir = opendir("/proc/self/fd");
+    if (!dir) {
+        /* If /proc is not mounted, there is nothing that can be done. */
+        return;
+    }
+    /* Avoid closing the directory. */
+    dfd = dirfd(dir);
+
+    for (de = readdir(dir); de; de = readdir(dir)) {
+        fd = atoi(de->d_name);
+        if (fd != dfd) {
+            close(fd);
+        }
+    }
+    closedir(dir);
+}
+
+static void hup_handler(int signal)
+{
+    /* Check every second if this process has been reparented. */
+    while (the_ppid == getppid()) {
+        /* sleep() is safe to use in a signal handler. */
+        sleep(1);
+    }
+
+    /* At this point the parent process has terminated completely. */
+    _exit(0);
+}
+
+static int async_teardown_fn(void *arg)
+{
+    struct sigaction sa = { .sa_handler = hup_handler };
+    sigset_t hup_signal;
+    char name[16];
+
+    /* Set a meaningful name for this process. */
+    snprintf(name, 16, "cleanup/%d", the_ppid);
+    prctl(PR_SET_NAME, (unsigned long)name);
+
+    /*
+     * Close all file descriptors that might have been inherited from the
+     * main qemu process when doing clone, needed to make libvirt happy.
+     * Not using close_range for increased compatibility with older kernels.
+     */
+    close_all_open_fd();
+
+    /* Set up a handler for SIGHUP and unblock SIGHUP. */
+    sigaction(SIGHUP, &sa, NULL);
+    sigemptyset(&hup_signal);
+    sigaddset(&hup_signal, SIGHUP);
+    sigprocmask(SIG_UNBLOCK, &hup_signal, NULL);
+
+    /* Ask to receive SIGHUP when the parent dies. */
+    prctl(PR_SET_PDEATHSIG, SIGHUP);
+
+    /*
+     * Sleep forever, unless the parent process has already terminated. The
+     * only interruption can come from the SIGHUP signal, which in normal
+     * operation is received when the parent process dies.
+     */
+    if (the_ppid == getppid()) {
+        pause();
+    }
+
+    /* At this point the parent process has terminated completely. */
+    _exit(0);
+}
+
+/*
+ * Allocate a new stack of a reasonable size, and return a pointer to its top.
+ */
+static void *new_stack_for_clone(void)
+{
+    size_t stack_size = CLONE_STACK_SIZE;
+    char *stack_ptr;
+
+    /* Allocate a new stack and get a pointer to its top. */
+    stack_ptr = qemu_alloc_stack(&stack_size);
+    stack_ptr += stack_size;
+
+    return stack_ptr;
+}
+
+/*
+ * Block all signals, start (clone) a new process sharing the address space
+ * with qemu (CLONE_VM), then restore signals.
+ */
+void init_async_teardown(void)
+{
+    sigset_t all_signals, old_signals;
+
+    the_ppid = getpid();
+
+    sigfillset(&all_signals);
+    sigprocmask(SIG_BLOCK, &all_signals, &old_signals);
+    clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL);
+    sigprocmask(SIG_SETMASK, &old_signals, NULL);
+}
--- a/system/balloon.c
+++ b/system/balloon.c
@ -0,0 +1,106 @@
+/*
+ * Generic Balloon handlers and management
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (C) 2011 Red Hat, Inc.
+ * Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/atomic.h"
+#include "sysemu/kvm.h"
+#include "sysemu/balloon.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qerror.h"
+#include "trace.h"
+
+static QEMUBalloonEvent *balloon_event_fn;
+static QEMUBalloonStatus *balloon_stat_fn;
+static void *balloon_opaque;
+
+static bool have_balloon(Error **errp)
+{
+    if (kvm_enabled() && !kvm_has_sync_mmu()) {
+        error_set(errp, ERROR_CLASS_KVM_MISSING_CAP,
+                  "Using KVM without synchronous MMU, balloon unavailable");
+        return false;
+    }
+    if (!balloon_event_fn) {
+        error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
+                  "No balloon device has been activated");
+        return false;
+    }
+    return true;
+}
+
+int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
+                             QEMUBalloonStatus *stat_func, void *opaque)
+{
+    if (balloon_event_fn || balloon_stat_fn || balloon_opaque) {
+        /* We're already registered one balloon handler.  How many can
+         * a guest really have?
+         */
+        return -1;
+    }
+    balloon_event_fn = event_func;
+    balloon_stat_fn = stat_func;
+    balloon_opaque = opaque;
+    return 0;
+}
+
+void qemu_remove_balloon_handler(void *opaque)
+{
+    if (balloon_opaque != opaque) {
+        return;
+    }
+    balloon_event_fn = NULL;
+    balloon_stat_fn = NULL;
+    balloon_opaque = NULL;
+}
+
+BalloonInfo *qmp_query_balloon(Error **errp)
+{
+    BalloonInfo *info;
+
+    if (!have_balloon(errp)) {
+        return NULL;
+    }
+
+    info = g_malloc0(sizeof(*info));
+    balloon_stat_fn(balloon_opaque, info);
+    return info;
+}
+
+void qmp_balloon(int64_t target, Error **errp)
+{
+    if (!have_balloon(errp)) {
+        return;
+    }
+
+    if (target <= 0) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "target", "a size");
+        return;
+    }
+
+    trace_balloon_event(balloon_opaque, target);
+    balloon_event_fn(balloon_opaque, target);
+}
--- a/system/bootdevice.c
+++ b/system/bootdevice.c
@ -0,0 +1,430 @@
+/*
+ * QEMU Boot Device Implement
+ *
+ * Copyright (c) 2014 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
+#include "qapi/visitor.h"
+#include "qemu/error-report.h"
+#include "sysemu/reset.h"
+#include "hw/qdev-core.h"
+#include "hw/boards.h"
+
+typedef struct FWBootEntry FWBootEntry;
+
+struct FWBootEntry {
+    QTAILQ_ENTRY(FWBootEntry) link;
+    int32_t bootindex;
+    DeviceState *dev;
+    char *suffix;
+};
+
+static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
+    QTAILQ_HEAD_INITIALIZER(fw_boot_order);
+static QEMUBootSetHandler *boot_set_handler;
+static void *boot_set_opaque;
+
+void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque)
+{
+    boot_set_handler = func;
+    boot_set_opaque = opaque;
+}
+
+void qemu_boot_set(const char *boot_order, Error **errp)
+{
+    Error *local_err = NULL;
+
+    if (!boot_set_handler) {
+        error_setg(errp, "no function defined to set boot device list for"
+                         " this architecture");
+        return;
+    }
+
+    validate_bootdevices(boot_order, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    boot_set_handler(boot_set_opaque, boot_order, errp);
+}
+
+void validate_bootdevices(const char *devices, Error **errp)
+{
+    /* We just do some generic consistency checks */
+    const char *p;
+    int bitmap = 0;
+
+    for (p = devices; *p != '\0'; p++) {
+        /* Allowed boot devices are:
+         * a-b: floppy disk drives
+         * c-f: IDE disk drives
+         * g-m: machine implementation dependent drives
+         * n-p: network devices
+         * It's up to each machine implementation to check if the given boot
+         * devices match the actual hardware implementation and firmware
+         * features.
+         */
+        if (*p < 'a' || *p > 'p') {
+            error_setg(errp, "Invalid boot device '%c'", *p);
+            return;
+        }
+        if (bitmap & (1 << (*p - 'a'))) {
+            error_setg(errp, "Boot device '%c' was given twice", *p);
+            return;
+        }
+        bitmap |= 1 << (*p - 'a');
+    }
+}
+
+void restore_boot_order(void *opaque)
+{
+    char *normal_boot_order = opaque;
+    static int first = 1;
+
+    /* Restore boot order and remove ourselves after the first boot */
+    if (first) {
+        first = 0;
+        return;
+    }
+
+    if (boot_set_handler) {
+        qemu_boot_set(normal_boot_order, &error_abort);
+    }
+
+    qemu_unregister_reset(restore_boot_order, normal_boot_order);
+    g_free(normal_boot_order);
+}
+
+void check_boot_index(int32_t bootindex, Error **errp)
+{
+    FWBootEntry *i;
+
+    if (bootindex >= 0) {
+        QTAILQ_FOREACH(i, &fw_boot_order, link) {
+            if (i->bootindex == bootindex) {
+                error_setg(errp, "The bootindex %d has already been used",
+                           bootindex);
+                return;
+            }
+        }
+    }
+}
+
+void del_boot_device_path(DeviceState *dev, const char *suffix)
+{
+    FWBootEntry *i;
+
+    if (dev == NULL) {
+        return;
+    }
+
+    QTAILQ_FOREACH(i, &fw_boot_order, link) {
+        if ((!suffix || !g_strcmp0(i->suffix, suffix)) &&
+             i->dev == dev) {
+            QTAILQ_REMOVE(&fw_boot_order, i, link);
+            g_free(i->suffix);
+            g_free(i);
+
+            break;
+        }
+    }
+}
+
+void add_boot_device_path(int32_t bootindex, DeviceState *dev,
+                          const char *suffix)
+{
+    FWBootEntry *node, *i;
+
+    if (bootindex < 0) {
+        del_boot_device_path(dev, suffix);
+        return;
+    }
+
+    assert(dev != NULL || suffix != NULL);
+
+    del_boot_device_path(dev, suffix);
+
+    node = g_new0(FWBootEntry, 1);
+    node->bootindex = bootindex;
+    node->suffix = g_strdup(suffix);
+    node->dev = dev;
+
+    QTAILQ_FOREACH(i, &fw_boot_order, link) {
+        if (i->bootindex == bootindex) {
+            error_report("Two devices with same boot index %d", bootindex);
+            exit(1);
+        } else if (i->bootindex < bootindex) {
+            continue;
+        }
+        QTAILQ_INSERT_BEFORE(i, node, link);
+        return;
+    }
+    QTAILQ_INSERT_TAIL(&fw_boot_order, node, link);
+}
+
+DeviceState *get_boot_device(uint32_t position)
+{
+    uint32_t counter = 0;
+    FWBootEntry *i = NULL;
+    DeviceState *res = NULL;
+
+    if (!QTAILQ_EMPTY(&fw_boot_order)) {
+        QTAILQ_FOREACH(i, &fw_boot_order, link) {
+            if (counter == position) {
+                res = i->dev;
+                break;
+            }
+            counter++;
+        }
+    }
+    return res;
+}
+
+static char *get_boot_device_path(DeviceState *dev, bool ignore_suffixes,
+                                  const char *suffix)
+{
+    char *devpath = NULL, *s = NULL, *d, *bootpath;
+
+    if (dev) {
+        devpath = qdev_get_fw_dev_path(dev);
+        assert(devpath);
+    }
+
+    if (!ignore_suffixes) {
+        if (dev) {
+            d = qdev_get_own_fw_dev_path_from_handler(dev->parent_bus, dev);
+            if (d) {
+                assert(!suffix);
+                s = d;
+            } else {
+                s = g_strdup(suffix);
+            }
+        } else {
+            s = g_strdup(suffix);
+        }
+    }
+
+    bootpath = g_strdup_printf("%s%s",
+                               devpath ? devpath : "",
+                               s ? s : "");
+    g_free(devpath);
+    g_free(s);
+
+    return bootpath;
+}
+
+/*
+ * This function returns null terminated string that consist of new line
+ * separated device paths.
+ *
+ * memory pointed by "size" is assigned total length of the array in bytes
+ *
+ */
+char *get_boot_devices_list(size_t *size)
+{
+    FWBootEntry *i;
+    size_t total = 0;
+    char *list = NULL;
+    MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+    bool ignore_suffixes = mc->ignore_boot_device_suffixes;
+
+    QTAILQ_FOREACH(i, &fw_boot_order, link) {
+        char *bootpath;
+        size_t len;
+
+        bootpath = get_boot_device_path(i->dev, ignore_suffixes, i->suffix);
+
+        if (total) {
+            list[total-1] = '\n';
+        }
+        len = strlen(bootpath) + 1;
+        list = g_realloc(list, total + len);
+        memcpy(&list[total], bootpath, len);
+        total += len;
+        g_free(bootpath);
+    }
+
+    *size = total;
+
+    if (current_machine->boot_config.has_strict &&
+        current_machine->boot_config.strict && *size > 0) {
+        list[total-1] = '\n';
+        list = g_realloc(list, total + 5);
+        memcpy(&list[total], "HALT", 5);
+        *size = total + 5;
+    }
+    return list;
+}
+
+typedef struct {
+    int32_t *bootindex;
+    const char *suffix;
+    DeviceState *dev;
+} BootIndexProperty;
+
+static void device_get_bootindex(Object *obj, Visitor *v, const char *name,
+                                 void *opaque, Error **errp)
+{
+    BootIndexProperty *prop = opaque;
+    visit_type_int32(v, name, prop->bootindex, errp);
+}
+
+static void device_set_bootindex(Object *obj, Visitor *v, const char *name,
+                                 void *opaque, Error **errp)
+{
+    BootIndexProperty *prop = opaque;
+    int32_t boot_index;
+    Error *local_err = NULL;
+
+    if (!visit_type_int32(v, name, &boot_index, errp)) {
+        return;
+    }
+    /* check whether bootindex is present in fw_boot_order list  */
+    check_boot_index(boot_index, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    /* change bootindex to a new one */
+    *prop->bootindex = boot_index;
+
+    add_boot_device_path(*prop->bootindex, prop->dev, prop->suffix);
+}
+
+static void property_release_bootindex(Object *obj, const char *name,
+                                       void *opaque)
+
+{
+    BootIndexProperty *prop = opaque;
+
+    del_boot_device_path(prop->dev, prop->suffix);
+    g_free(prop);
+}
+
+void device_add_bootindex_property(Object *obj, int32_t *bootindex,
+                                   const char *name, const char *suffix,
+                                   DeviceState *dev)
+{
+    BootIndexProperty *prop = g_malloc0(sizeof(*prop));
+
+    prop->bootindex = bootindex;
+    prop->suffix = suffix;
+    prop->dev = dev;
+
+    object_property_add(obj, name, "int32",
+                        device_get_bootindex,
+                        device_set_bootindex,
+                        property_release_bootindex,
+                        prop);
+
+    /* initialize devices' bootindex property to -1 */
+    object_property_set_int(obj, name, -1, NULL);
+}
+
+typedef struct FWLCHSEntry FWLCHSEntry;
+
+struct FWLCHSEntry {
+    QTAILQ_ENTRY(FWLCHSEntry) link;
+    DeviceState *dev;
+    char *suffix;
+    uint32_t lcyls;
+    uint32_t lheads;
+    uint32_t lsecs;
+};
+
+static QTAILQ_HEAD(, FWLCHSEntry) fw_lchs =
+    QTAILQ_HEAD_INITIALIZER(fw_lchs);
+
+void add_boot_device_lchs(DeviceState *dev, const char *suffix,
+                          uint32_t lcyls, uint32_t lheads, uint32_t lsecs)
+{
+    FWLCHSEntry *node;
+
+    if (!lcyls && !lheads && !lsecs) {
+        return;
+    }
+
+    assert(dev != NULL || suffix != NULL);
+
+    node = g_new0(FWLCHSEntry, 1);
+    node->suffix = g_strdup(suffix);
+    node->dev = dev;
+    node->lcyls = lcyls;
+    node->lheads = lheads;
+    node->lsecs = lsecs;
+
+    QTAILQ_INSERT_TAIL(&fw_lchs, node, link);
+}
+
+void del_boot_device_lchs(DeviceState *dev, const char *suffix)
+{
+    FWLCHSEntry *i;
+
+    if (dev == NULL) {
+        return;
+    }
+
+    QTAILQ_FOREACH(i, &fw_lchs, link) {
+        if ((!suffix || !g_strcmp0(i->suffix, suffix)) &&
+             i->dev == dev) {
+            QTAILQ_REMOVE(&fw_lchs, i, link);
+            g_free(i->suffix);
+            g_free(i);
+
+            break;
+        }
+    }
+}
+
+char *get_boot_devices_lchs_list(size_t *size)
+{
+    FWLCHSEntry *i;
+    size_t total = 0;
+    char *list = NULL;
+
+    QTAILQ_FOREACH(i, &fw_lchs, link) {
+        char *bootpath;
+        char *chs_string;
+        size_t len;
+
+        bootpath = get_boot_device_path(i->dev, false, i->suffix);
+        chs_string = g_strdup_printf("%s %" PRIu32 " %" PRIu32 " %" PRIu32,
+                                     bootpath, i->lcyls, i->lheads, i->lsecs);
+
+        if (total) {
+            list[total - 1] = '\n';
+        }
+        len = strlen(chs_string) + 1;
+        list = g_realloc(list, total + len);
+        memcpy(&list[total], chs_string, len);
+        total += len;
+        g_free(chs_string);
+        g_free(bootpath);
+    }
+
+    *size = total;
+
+    return list;
+}
--- a/system/cpu-throttle.c
+++ b/system/cpu-throttle.c
@ -0,0 +1,128 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "hw/core/cpu.h"
+#include "qemu/main-loop.h"
+#include "sysemu/cpus.h"
+#include "sysemu/cpu-throttle.h"
+
+/* vcpu throttling controls */
+static QEMUTimer *throttle_timer;
+static unsigned int throttle_percentage;
+
+#define CPU_THROTTLE_PCT_MIN 1
+#define CPU_THROTTLE_PCT_MAX 99
+#define CPU_THROTTLE_TIMESLICE_NS 10000000
+
+static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
+{
+    double pct;
+    double throttle_ratio;
+    int64_t sleeptime_ns, endtime_ns;
+
+    if (!cpu_throttle_get_percentage()) {
+        return;
+    }
+
+    pct = (double)cpu_throttle_get_percentage() / 100;
+    throttle_ratio = pct / (1 - pct);
+    /* Add 1ns to fix double's rounding error (like 0.9999999...) */
+    sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
+    endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
+    while (sleeptime_ns > 0 && !cpu->stop) {
+        if (sleeptime_ns > SCALE_MS) {
+            qemu_cond_timedwait_iothread(cpu->halt_cond,
+                                         sleeptime_ns / SCALE_MS);
+        } else {
+            qemu_mutex_unlock_iothread();
+            g_usleep(sleeptime_ns / SCALE_US);
+            qemu_mutex_lock_iothread();
+        }
+        sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    }
+    qatomic_set(&cpu->throttle_thread_scheduled, 0);
+}
+
+static void cpu_throttle_timer_tick(void *opaque)
+{
+    CPUState *cpu;
+    double pct;
+
+    /* Stop the timer if needed */
+    if (!cpu_throttle_get_percentage()) {
+        return;
+    }
+    CPU_FOREACH(cpu) {
+        if (!qatomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
+            async_run_on_cpu(cpu, cpu_throttle_thread,
+                             RUN_ON_CPU_NULL);
+        }
+    }
+
+    pct = (double)cpu_throttle_get_percentage() / 100;
+    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
+                                   CPU_THROTTLE_TIMESLICE_NS / (1 - pct));
+}
+
+void cpu_throttle_set(int new_throttle_pct)
+{
+    /*
+     * boolean to store whether throttle is already active or not,
+     * before modifying throttle_percentage
+     */
+    bool throttle_active = cpu_throttle_active();
+
+    /* Ensure throttle percentage is within valid range */
+    new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
+    new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
+
+    qatomic_set(&throttle_percentage, new_throttle_pct);
+
+    if (!throttle_active) {
+        cpu_throttle_timer_tick(NULL);
+    }
+}
+
+void cpu_throttle_stop(void)
+{
+    qatomic_set(&throttle_percentage, 0);
+}
+
+bool cpu_throttle_active(void)
+{
+    return (cpu_throttle_get_percentage() != 0);
+}
+
+int cpu_throttle_get_percentage(void)
+{
+    return qatomic_read(&throttle_percentage);
+}
+
+void cpu_throttle_init(void)
+{
+    throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
+                                  cpu_throttle_timer_tick, NULL);
+}
--- a/system/cpu-timers.c
+++ b/system/cpu-timers.c
@ -0,0 +1,277 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "sysemu/cpus.h"
+#include "qemu/main-loop.h"
+#include "qemu/option.h"
+#include "qemu/seqlock.h"
+#include "sysemu/replay.h"
+#include "sysemu/runstate.h"
+#include "hw/core/cpu.h"
+#include "sysemu/cpu-timers.h"
+#include "sysemu/cpu-throttle.h"
+#include "sysemu/cpu-timers-internal.h"
+
+/* clock and ticks */
+
+static int64_t cpu_get_ticks_locked(void)
+{
+    int64_t ticks = timers_state.cpu_ticks_offset;
+    if (timers_state.cpu_ticks_enabled) {
+        ticks += cpu_get_host_ticks();
+    }
+
+    if (timers_state.cpu_ticks_prev > ticks) {
+        /* Non increasing ticks may happen if the host uses software suspend. */
+        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
+        ticks = timers_state.cpu_ticks_prev;
+    }
+
+    timers_state.cpu_ticks_prev = ticks;
+    return ticks;
+}
+
+/*
+ * return the time elapsed in VM between vm_start and vm_stop.
+ * cpu_get_ticks() uses units of the host CPU cycle counter.
+ */
+int64_t cpu_get_ticks(void)
+{
+    int64_t ticks;
+
+    qemu_spin_lock(&timers_state.vm_clock_lock);
+    ticks = cpu_get_ticks_locked();
+    qemu_spin_unlock(&timers_state.vm_clock_lock);
+    return ticks;
+}
+
+int64_t cpu_get_clock_locked(void)
+{
+    int64_t time;
+
+    time = timers_state.cpu_clock_offset;
+    if (timers_state.cpu_ticks_enabled) {
+        time += get_clock();
+    }
+
+    return time;
+}
+
+/*
+ * Return the monotonic time elapsed in VM, i.e.,
+ * the time between vm_start and vm_stop
+ */
+int64_t cpu_get_clock(void)
+{
+    int64_t ti;
+    unsigned start;
+
+    do {
+        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
+        ti = cpu_get_clock_locked();
+    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
+
+    return ti;
+}
+
+/*
+ * enable cpu_get_ticks()
+ * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
+ */
+void cpu_enable_ticks(void)
+{
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+    if (!timers_state.cpu_ticks_enabled) {
+        timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
+        timers_state.cpu_clock_offset -= get_clock();
+        timers_state.cpu_ticks_enabled = 1;
+    }
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+}
+
+/*
+ * disable cpu_get_ticks() : the clock is stopped. You must not call
+ * cpu_get_ticks() after that.
+ * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
+ */
+void cpu_disable_ticks(void)
+{
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+    if (timers_state.cpu_ticks_enabled) {
+        timers_state.cpu_ticks_offset += cpu_get_host_ticks();
+        timers_state.cpu_clock_offset = cpu_get_clock_locked();
+        timers_state.cpu_ticks_enabled = 0;
+    }
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                         &timers_state.vm_clock_lock);
+}
+
+static bool icount_state_needed(void *opaque)
+{
+    return icount_enabled();
+}
+
+static bool warp_timer_state_needed(void *opaque)
+{
+    TimersState *s = opaque;
+    return s->icount_warp_timer != NULL;
+}
+
+static bool adjust_timers_state_needed(void *opaque)
+{
+    TimersState *s = opaque;
+    return s->icount_rt_timer != NULL;
+}
+
+static bool icount_shift_state_needed(void *opaque)
+{
+    return icount_enabled() == 2;
+}
+
+/*
+ * Subsection for warp timer migration is optional, because may not be created
+ */
+static const VMStateDescription icount_vmstate_warp_timer = {
+    .name = "timer/icount/warp_timer",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = warp_timer_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT64(vm_clock_warp_start, TimersState),
+        VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription icount_vmstate_adjust_timers = {
+    .name = "timer/icount/timers",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = adjust_timers_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
+        VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription icount_vmstate_shift = {
+    .name = "timer/icount/shift",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .needed = icount_shift_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT16(icount_time_shift, TimersState),
+        VMSTATE_INT64(last_delta, TimersState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+/*
+ * This is a subsection for icount migration.
+ */
+static const VMStateDescription icount_vmstate_timers = {
+    .name = "timer/icount",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = icount_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT64(qemu_icount_bias, TimersState),
+        VMSTATE_INT64(qemu_icount, TimersState),
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &icount_vmstate_warp_timer,
+        &icount_vmstate_adjust_timers,
+        &icount_vmstate_shift,
+        NULL
+    }
+};
+
+static const VMStateDescription vmstate_timers = {
+    .name = "timer",
+    .version_id = 2,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT64(cpu_ticks_offset, TimersState),
+        VMSTATE_UNUSED(8),
+        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &icount_vmstate_timers,
+        NULL
+    }
+};
+
+static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
+{
+}
+
+void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
+{
+    if (!icount_enabled() || type != QEMU_CLOCK_VIRTUAL) {
+        qemu_notify_event();
+        return;
+    }
+
+    if (qemu_in_vcpu_thread()) {
+        /*
+         * A CPU is currently running; kick it back out to the
+         * tcg_cpu_exec() loop so it will recalculate its
+         * icount deadline immediately.
+         */
+        qemu_cpu_kick(current_cpu);
+    } else if (first_cpu) {
+        /*
+         * qemu_cpu_kick is not enough to kick a halted CPU out of
+         * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
+         * causes cpu_thread_is_idle to return false.  This way,
+         * handle_icount_deadline can run.
+         * If we have no CPUs at all for some reason, we don't
+         * need to do anything.
+         */
+        async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
+    }
+}
+
+TimersState timers_state;
+
+/* initialize timers state and the cpu throttle for convenience */
+void cpu_timers_init(void)
+{
+    seqlock_init(&timers_state.vm_clock_seqlock);
+    qemu_spin_init(&timers_state.vm_clock_lock);
+    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
+
+    cpu_throttle_init();
+}
--- a/system/cpus.c
+++ b/system/cpus.c
@ -0,0 +1,822 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor/monitor.h"
+#include "qemu/coroutine-tls.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-events-run-state.h"
+#include "qapi/qmp/qerror.h"
+#include "exec/gdbstub.h"
+#include "sysemu/hw_accel.h"
+#include "exec/cpu-common.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+#include "qemu/plugin.h"
+#include "sysemu/cpus.h"
+#include "qemu/guest-random.h"
+#include "hw/nmi.h"
+#include "sysemu/replay.h"
+#include "sysemu/runstate.h"
+#include "sysemu/cpu-timers.h"
+#include "sysemu/whpx.h"
+#include "hw/boards.h"
+#include "hw/hw.h"
+#include "trace.h"
+
+#ifdef CONFIG_LINUX
+
+#include <sys/prctl.h>
+
+#ifndef PR_MCE_KILL
+#define PR_MCE_KILL 33
+#endif
+
+#ifndef PR_MCE_KILL_SET
+#define PR_MCE_KILL_SET 1
+#endif
+
+#ifndef PR_MCE_KILL_EARLY
+#define PR_MCE_KILL_EARLY 1
+#endif
+
+#endif /* CONFIG_LINUX */
+
+static QemuMutex qemu_global_mutex;
+
+/*
+ * The chosen accelerator is supposed to register this.
+ */
+static const AccelOpsClass *cpus_accel;
+
+bool cpu_is_stopped(CPUState *cpu)
+{
+    return cpu->stopped || !runstate_is_running();
+}
+
+bool cpu_work_list_empty(CPUState *cpu)
+{
+    return QSIMPLEQ_EMPTY_ATOMIC(&cpu->work_list);
+}
+
+bool cpu_thread_is_idle(CPUState *cpu)
+{
+    if (cpu->stop || !cpu_work_list_empty(cpu)) {
+        return false;
+    }
+    if (cpu_is_stopped(cpu)) {
+        return true;
+    }
+    if (!cpu->halted || cpu_has_work(cpu)) {
+        return false;
+    }
+    if (cpus_accel->cpu_thread_is_idle) {
+        return cpus_accel->cpu_thread_is_idle(cpu);
+    }
+    return true;
+}
+
+bool all_cpu_threads_idle(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        if (!cpu_thread_is_idle(cpu)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+/***********************************************************/
+void hw_error(const char *fmt, ...)
+{
+    va_list ap;
+    CPUState *cpu;
+
+    va_start(ap, fmt);
+    fprintf(stderr, "qemu: hardware error: ");
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+    CPU_FOREACH(cpu) {
+        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
+        cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
+    }
+    va_end(ap);
+    abort();
+}
+
+void cpu_synchronize_all_states(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_synchronize_state(cpu);
+    }
+}
+
+void cpu_synchronize_all_post_reset(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_synchronize_post_reset(cpu);
+    }
+}
+
+void cpu_synchronize_all_post_init(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_synchronize_post_init(cpu);
+    }
+}
+
+void cpu_synchronize_all_pre_loadvm(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_synchronize_pre_loadvm(cpu);
+    }
+}
+
+void cpu_synchronize_state(CPUState *cpu)
+{
+    if (cpus_accel->synchronize_state) {
+        cpus_accel->synchronize_state(cpu);
+    }
+}
+
+void cpu_synchronize_post_reset(CPUState *cpu)
+{
+    if (cpus_accel->synchronize_post_reset) {
+        cpus_accel->synchronize_post_reset(cpu);
+    }
+}
+
+void cpu_synchronize_post_init(CPUState *cpu)
+{
+    if (cpus_accel->synchronize_post_init) {
+        cpus_accel->synchronize_post_init(cpu);
+    }
+}
+
+void cpu_synchronize_pre_loadvm(CPUState *cpu)
+{
+    if (cpus_accel->synchronize_pre_loadvm) {
+        cpus_accel->synchronize_pre_loadvm(cpu);
+    }
+}
+
+bool cpus_are_resettable(void)
+{
+    if (cpus_accel->cpus_are_resettable) {
+        return cpus_accel->cpus_are_resettable();
+    }
+    return true;
+}
+
+int64_t cpus_get_virtual_clock(void)
+{
+    /*
+     * XXX
+     *
+     * need to check that cpus_accel is not NULL, because qcow2 calls
+     * qemu_get_clock_ns(CLOCK_VIRTUAL) without any accel initialized and
+     * with ticks disabled in some io-tests:
+     * 030 040 041 060 099 120 127 140 156 161 172 181 191 192 195 203 229 249 256 267
+     *
+     * is this expected?
+     *
+     * XXX
+     */
+    if (cpus_accel && cpus_accel->get_virtual_clock) {
+        return cpus_accel->get_virtual_clock();
+    }
+    return cpu_get_clock();
+}
+
+/*
+ * return the time elapsed in VM between vm_start and vm_stop.  Unless
+ * icount is active, cpus_get_elapsed_ticks() uses units of the host CPU cycle
+ * counter.
+ */
+int64_t cpus_get_elapsed_ticks(void)
+{
+    if (cpus_accel->get_elapsed_ticks) {
+        return cpus_accel->get_elapsed_ticks();
+    }
+    return cpu_get_ticks();
+}
+
+static void generic_handle_interrupt(CPUState *cpu, int mask)
+{
+    cpu->interrupt_request |= mask;
+
+    if (!qemu_cpu_is_self(cpu)) {
+        qemu_cpu_kick(cpu);
+    }
+}
+
+void cpu_interrupt(CPUState *cpu, int mask)
+{
+    if (cpus_accel->handle_interrupt) {
+        cpus_accel->handle_interrupt(cpu, mask);
+    } else {
+        generic_handle_interrupt(cpu, mask);
+    }
+}
+
+static int do_vm_stop(RunState state, bool send_stop)
+{
+    int ret = 0;
+
+    if (runstate_is_running()) {
+        runstate_set(state);
+        cpu_disable_ticks();
+        pause_all_vcpus();
+        vm_state_notify(0, state);
+        if (send_stop) {
+            qapi_event_send_stop();
+        }
+    }
+
+    bdrv_drain_all();
+    ret = bdrv_flush_all();
+    trace_vm_stop_flush_all(ret);
+
+    return ret;
+}
+
+/* Special vm_stop() variant for terminating the process.  Historically clients
+ * did not expect a QMP STOP event and so we need to retain compatibility.
+ */
+int vm_shutdown(void)
+{
+    return do_vm_stop(RUN_STATE_SHUTDOWN, false);
+}
+
+bool cpu_can_run(CPUState *cpu)
+{
+    if (cpu->stop) {
+        return false;
+    }
+    if (cpu_is_stopped(cpu)) {
+        return false;
+    }
+    return true;
+}
+
+void cpu_handle_guest_debug(CPUState *cpu)
+{
+    if (replay_running_debug()) {
+        if (!cpu->singlestep_enabled) {
+            /*
+             * Report about the breakpoint and
+             * make a single step to skip it
+             */
+            replay_breakpoint();
+            cpu_single_step(cpu, SSTEP_ENABLE);
+        } else {
+            cpu_single_step(cpu, 0);
+        }
+    } else {
+        gdb_set_stop_cpu(cpu);
+        qemu_system_debug_request();
+        cpu->stopped = true;
+    }
+}
+
+#ifdef CONFIG_LINUX
+static void sigbus_reraise(void)
+{
+    sigset_t set;
+    struct sigaction action;
+
+    memset(&action, 0, sizeof(action));
+    action.sa_handler = SIG_DFL;
+    if (!sigaction(SIGBUS, &action, NULL)) {
+        raise(SIGBUS);
+        sigemptyset(&set);
+        sigaddset(&set, SIGBUS);
+        pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+    }
+    perror("Failed to re-raise SIGBUS!");
+    abort();
+}
+
+static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
+{
+    if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
+        sigbus_reraise();
+    }
+
+    if (current_cpu) {
+        /* Called asynchronously in VCPU thread.  */
+        if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
+            sigbus_reraise();
+        }
+    } else {
+        /* Called synchronously (via signalfd) in main thread.  */
+        if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
+            sigbus_reraise();
+        }
+    }
+}
+
+static void qemu_init_sigbus(void)
+{
+    struct sigaction action;
+
+    /*
+     * ALERT: when modifying this, take care that SIGBUS forwarding in
+     * qemu_prealloc_mem() will continue working as expected.
+     */
+    memset(&action, 0, sizeof(action));
+    action.sa_flags = SA_SIGINFO;
+    action.sa_sigaction = sigbus_handler;
+    sigaction(SIGBUS, &action, NULL);
+
+    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
+}
+#else /* !CONFIG_LINUX */
+static void qemu_init_sigbus(void)
+{
+}
+#endif /* !CONFIG_LINUX */
+
+static QemuThread io_thread;
+
+/* cpu creation */
+static QemuCond qemu_cpu_cond;
+/* system init */
+static QemuCond qemu_pause_cond;
+
+void qemu_init_cpu_loop(void)
+{
+    qemu_init_sigbus();
+    qemu_cond_init(&qemu_cpu_cond);
+    qemu_cond_init(&qemu_pause_cond);
+    qemu_mutex_init(&qemu_global_mutex);
+
+    qemu_thread_get_self(&io_thread);
+}
+
+void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
+{
+    do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
+}
+
+static void qemu_cpu_stop(CPUState *cpu, bool exit)
+{
+    g_assert(qemu_cpu_is_self(cpu));
+    cpu->stop = false;
+    cpu->stopped = true;
+    if (exit) {
+        cpu_exit(cpu);
+    }
+    qemu_cond_broadcast(&qemu_pause_cond);
+}
+
+void qemu_wait_io_event_common(CPUState *cpu)
+{
+    qatomic_set_mb(&cpu->thread_kicked, false);
+    if (cpu->stop) {
+        qemu_cpu_stop(cpu, false);
+    }
+    process_queued_cpu_work(cpu);
+}
+
+void qemu_wait_io_event(CPUState *cpu)
+{
+    bool slept = false;
+
+    while (cpu_thread_is_idle(cpu)) {
+        if (!slept) {
+            slept = true;
+            qemu_plugin_vcpu_idle_cb(cpu);
+        }
+        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
+    }
+    if (slept) {
+        qemu_plugin_vcpu_resume_cb(cpu);
+    }
+
+    qemu_wait_io_event_common(cpu);
+}
+
+void cpus_kick_thread(CPUState *cpu)
+{
+    if (cpu->thread_kicked) {
+        return;
+    }
+    cpu->thread_kicked = true;
+
+#ifndef _WIN32
+    int err = pthread_kill(cpu->thread->thread, SIG_IPI);
+    if (err && err != ESRCH) {
+        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
+        exit(1);
+    }
+#else
+    qemu_sem_post(&cpu->sem);
+#endif
+}
+
+void qemu_cpu_kick(CPUState *cpu)
+{
+    qemu_cond_broadcast(cpu->halt_cond);
+    if (cpus_accel->kick_vcpu_thread) {
+        cpus_accel->kick_vcpu_thread(cpu);
+    } else { /* default */
+        cpus_kick_thread(cpu);
+    }
+}
+
+void qemu_cpu_kick_self(void)
+{
+    assert(current_cpu);
+    cpus_kick_thread(current_cpu);
+}
+
+bool qemu_cpu_is_self(CPUState *cpu)
+{
+    return qemu_thread_is_self(cpu->thread);
+}
+
+bool qemu_in_vcpu_thread(void)
+{
+    return current_cpu && qemu_cpu_is_self(current_cpu);
+}
+
+QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked)
+
+bool qemu_mutex_iothread_locked(void)
+{
+    return get_iothread_locked();
+}
+
+bool qemu_in_main_thread(void)
+{
+    return qemu_mutex_iothread_locked();
+}
+
+/*
+ * The BQL is taken from so many places that it is worth profiling the
+ * callers directly, instead of funneling them all through a single function.
+ */
+void qemu_mutex_lock_iothread_impl(const char *file, int line)
+{
+    QemuMutexLockFunc bql_lock = qatomic_read(&qemu_bql_mutex_lock_func);
+
+    g_assert(!qemu_mutex_iothread_locked());
+    bql_lock(&qemu_global_mutex, file, line);
+    set_iothread_locked(true);
+}
+
+void qemu_mutex_unlock_iothread(void)
+{
+    g_assert(qemu_mutex_iothread_locked());
+    set_iothread_locked(false);
+    qemu_mutex_unlock(&qemu_global_mutex);
+}
+
+void qemu_cond_wait_iothread(QemuCond *cond)
+{
+    qemu_cond_wait(cond, &qemu_global_mutex);
+}
+
+void qemu_cond_timedwait_iothread(QemuCond *cond, int ms)
+{
+    qemu_cond_timedwait(cond, &qemu_global_mutex, ms);
+}
+
+/* signal CPU creation */
+void cpu_thread_signal_created(CPUState *cpu)
+{
+    cpu->created = true;
+    qemu_cond_signal(&qemu_cpu_cond);
+}
+
+/* signal CPU destruction */
+void cpu_thread_signal_destroyed(CPUState *cpu)
+{
+    cpu->created = false;
+    qemu_cond_signal(&qemu_cpu_cond);
+}
+
+
+static bool all_vcpus_paused(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        if (!cpu->stopped) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+void pause_all_vcpus(void)
+{
+    CPUState *cpu;
+
+    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
+    CPU_FOREACH(cpu) {
+        if (qemu_cpu_is_self(cpu)) {
+            qemu_cpu_stop(cpu, true);
+        } else {
+            cpu->stop = true;
+            qemu_cpu_kick(cpu);
+        }
+    }
+
+    /* We need to drop the replay_lock so any vCPU threads woken up
+     * can finish their replay tasks
+     */
+    replay_mutex_unlock();
+
+    while (!all_vcpus_paused()) {
+        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
+        CPU_FOREACH(cpu) {
+            qemu_cpu_kick(cpu);
+        }
+    }
+
+    qemu_mutex_unlock_iothread();
+    replay_mutex_lock();
+    qemu_mutex_lock_iothread();
+}
+
+void cpu_resume(CPUState *cpu)
+{
+    cpu->stop = false;
+    cpu->stopped = false;
+    qemu_cpu_kick(cpu);
+}
+
+void resume_all_vcpus(void)
+{
+    CPUState *cpu;
+
+    if (!runstate_is_running()) {
+        return;
+    }
+
+    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
+    CPU_FOREACH(cpu) {
+        cpu_resume(cpu);
+    }
+}
+
+void cpu_remove_sync(CPUState *cpu)
+{
+    cpu->stop = true;
+    cpu->unplug = true;
+    qemu_cpu_kick(cpu);
+    qemu_mutex_unlock_iothread();
+    qemu_thread_join(cpu->thread);
+    qemu_mutex_lock_iothread();
+}
+
+void cpus_register_accel(const AccelOpsClass *ops)
+{
+    assert(ops != NULL);
+    assert(ops->create_vcpu_thread != NULL); /* mandatory */
+    cpus_accel = ops;
+}
+
+const AccelOpsClass *cpus_get_accel(void)
+{
+    /* broken if we call this early */
+    assert(cpus_accel);
+    return cpus_accel;
+}
+
+void qemu_init_vcpu(CPUState *cpu)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+
+    cpu->nr_cores = ms->smp.cores;
+    cpu->nr_threads =  ms->smp.threads;
+    cpu->stopped = true;
+    cpu->random_seed = qemu_guest_random_seed_thread_part1();
+
+    if (!cpu->as) {
+        /* If the target cpu hasn't set up any address spaces itself,
+         * give it the default one.
+         */
+        cpu->num_ases = 1;
+        cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
+    }
+
+    /* accelerators all implement the AccelOpsClass */
+    g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL);
+    cpus_accel->create_vcpu_thread(cpu);
+
+    while (!cpu->created) {
+        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
+    }
+}
+
+void cpu_stop_current(void)
+{
+    if (current_cpu) {
+        current_cpu->stop = true;
+        cpu_exit(current_cpu);
+    }
+}
+
+int vm_stop(RunState state)
+{
+    if (qemu_in_vcpu_thread()) {
+        qemu_system_vmstop_request_prepare();
+        qemu_system_vmstop_request(state);
+        /*
+         * FIXME: should not return to device code in case
+         * vm_stop() has been requested.
+         */
+        cpu_stop_current();
+        return 0;
+    }
+
+    return do_vm_stop(state, true);
+}
+
+/**
+ * Prepare for (re)starting the VM.
+ * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
+ * running or in case of an error condition), 0 otherwise.
+ */
+int vm_prepare_start(bool step_pending)
+{
+    RunState requested;
+
+    qemu_vmstop_requested(&requested);
+    if (runstate_is_running() && requested == RUN_STATE__MAX) {
+        return -1;
+    }
+
+    /* Ensure that a STOP/RESUME pair of events is emitted if a
+     * vmstop request was pending.  The BLOCK_IO_ERROR event, for
+     * example, according to documentation is always followed by
+     * the STOP event.
+     */
+    if (runstate_is_running()) {
+        qapi_event_send_stop();
+        qapi_event_send_resume();
+        return -1;
+    }
+
+    /*
+     * WHPX accelerator needs to know whether we are going to step
+     * any CPUs, before starting the first one.
+     */
+    if (cpus_accel->synchronize_pre_resume) {
+        cpus_accel->synchronize_pre_resume(step_pending);
+    }
+
+    /* We are sending this now, but the CPUs will be resumed shortly later */
+    qapi_event_send_resume();
+
+    cpu_enable_ticks();
+    runstate_set(RUN_STATE_RUNNING);
+    vm_state_notify(1, RUN_STATE_RUNNING);
+    return 0;
+}
+
+void vm_start(void)
+{
+    if (!vm_prepare_start(false)) {
+        resume_all_vcpus();
+    }
+}
+
+/* does a state transition even if the VM is already stopped,
+   current state is forgotten forever */
+int vm_stop_force_state(RunState state)
+{
+    if (runstate_is_running()) {
+        return vm_stop(state);
+    } else {
+        int ret;
+        runstate_set(state);
+
+        bdrv_drain_all();
+        /* Make sure to return an error if the flush in a previous vm_stop()
+         * failed. */
+        ret = bdrv_flush_all();
+        trace_vm_stop_flush_all(ret);
+        return ret;
+    }
+}
+
+void qmp_memsave(int64_t addr, int64_t size, const char *filename,
+                 bool has_cpu, int64_t cpu_index, Error **errp)
+{
+    FILE *f;
+    uint32_t l;
+    CPUState *cpu;
+    uint8_t buf[1024];
+    int64_t orig_addr = addr, orig_size = size;
+
+    if (!has_cpu) {
+        cpu_index = 0;
+    }
+
+    cpu = qemu_get_cpu(cpu_index);
+    if (cpu == NULL) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
+                   "a CPU number");
+        return;
+    }
+
+    f = fopen(filename, "wb");
+    if (!f) {
+        error_setg_file_open(errp, errno, filename);
+        return;
+    }
+
+    while (size != 0) {
+        l = sizeof(buf);
+        if (l > size)
+            l = size;
+        if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
+            error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
+                             " specified", orig_addr, orig_size);
+            goto exit;
+        }
+        if (fwrite(buf, 1, l, f) != l) {
+            error_setg(errp, QERR_IO_ERROR);
+            goto exit;
+        }
+        addr += l;
+        size -= l;
+    }
+
+exit:
+    fclose(f);
+}
+
+void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
+                  Error **errp)
+{
+    FILE *f;
+    uint32_t l;
+    uint8_t buf[1024];
+
+    f = fopen(filename, "wb");
+    if (!f) {
+        error_setg_file_open(errp, errno, filename);
+        return;
+    }
+
+    while (size != 0) {
+        l = sizeof(buf);
+        if (l > size)
+            l = size;
+        cpu_physical_memory_read(addr, buf, l);
+        if (fwrite(buf, 1, l, f) != l) {
+            error_setg(errp, QERR_IO_ERROR);
+            goto exit;
+        }
+        addr += l;
+        size -= l;
+    }
+
+exit:
+    fclose(f);
+}
+
+void qmp_inject_nmi(Error **errp)
+{
+    nmi_monitor_handle(monitor_get_cpu_index(monitor_cur()), errp);
+}
+
--- a/system/datadir.c
+++ b/system/datadir.c
@ -0,0 +1,110 @@
+/*
+ * QEMU firmware and keymap file search
+ *
+ * Copyright (c) 2003-2020 QEMU contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/datadir.h"
+#include "qemu/cutils.h"
+#include "trace.h"
+
+static const char *data_dir[16];
+static int data_dir_idx;
+
+char *qemu_find_file(int type, const char *name)
+{
+    int i;
+    const char *subdir;
+    char *buf;
+
+    /* Try the name as a straight path first */
+    if (access(name, R_OK) == 0) {
+        trace_load_file(name, name);
+        return g_strdup(name);
+    }
+
+    switch (type) {
+    case QEMU_FILE_TYPE_BIOS:
+        subdir = "";
+        break;
+    case QEMU_FILE_TYPE_KEYMAP:
+        subdir = "keymaps/";
+        break;
+    default:
+        abort();
+    }
+
+    for (i = 0; i < data_dir_idx; i++) {
+        buf = g_strdup_printf("%s/%s%s", data_dir[i], subdir, name);
+        if (access(buf, R_OK) == 0) {
+            trace_load_file(name, buf);
+            return buf;
+        }
+        g_free(buf);
+    }
+    return NULL;
+}
+
+void qemu_add_data_dir(char *path)
+{
+    int i;
+
+    if (path == NULL) {
+        return;
+    }
+    if (data_dir_idx == ARRAY_SIZE(data_dir)) {
+        return;
+    }
+    for (i = 0; i < data_dir_idx; i++) {
+        if (strcmp(data_dir[i], path) == 0) {
+            g_free(path); /* duplicate */
+            return;
+        }
+    }
+    data_dir[data_dir_idx++] = path;
+}
+
+void qemu_add_default_firmwarepath(void)
+{
+    static const char * const dirs[] = {
+        CONFIG_QEMU_FIRMWAREPATH
+        NULL
+    };
+
+    size_t i;
+
+    /* add configured firmware directories */
+    for (i = 0; dirs[i] != NULL; i++) {
+        qemu_add_data_dir(get_relocated_path(dirs[i]));
+    }
+
+    /* try to find datadir relative to the executable path */
+    qemu_add_data_dir(get_relocated_path(CONFIG_QEMU_DATADIR));
+}
+
+void qemu_list_data_dirs(void)
+{
+    int i;
+    for (i = 0; i < data_dir_idx; i++) {
+        printf("%s\n", data_dir[i]);
+    }
+}
--- a/system/device_tree.c
+++ b/system/device_tree.c
@ -0,0 +1,703 @@
+/*
+ * Functions to help device tree manipulation using libfdt.
+ * It also provides functions to read entries from device tree proc
+ * interface.
+ *
+ * Copyright 2008 IBM Corporation.
+ * Authors: Jerone Young <jyoung5@us.ibm.com>
+ *          Hollis Blanchard <hollisb@us.ibm.com>
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#ifdef CONFIG_LINUX
+#include <dirent.h>
+#endif
+
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/bswap.h"
+#include "qemu/cutils.h"
+#include "qemu/guest-random.h"
+#include "sysemu/device_tree.h"
+#include "hw/loader.h"
+#include "hw/boards.h"
+#include "qemu/config-file.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qdict.h"
+#include "monitor/hmp.h"
+
+#include <libfdt.h>
+
+#define FDT_MAX_SIZE  0x100000
+
+void *create_device_tree(int *sizep)
+{
+    void *fdt;
+    int ret;
+
+    *sizep = FDT_MAX_SIZE;
+    fdt = g_malloc0(FDT_MAX_SIZE);
+    ret = fdt_create(fdt, FDT_MAX_SIZE);
+    if (ret < 0) {
+        goto fail;
+    }
+    ret = fdt_finish_reservemap(fdt);
+    if (ret < 0) {
+        goto fail;
+    }
+    ret = fdt_begin_node(fdt, "");
+    if (ret < 0) {
+        goto fail;
+    }
+    ret = fdt_end_node(fdt);
+    if (ret < 0) {
+        goto fail;
+    }
+    ret = fdt_finish(fdt);
+    if (ret < 0) {
+        goto fail;
+    }
+    ret = fdt_open_into(fdt, fdt, *sizep);
+    if (ret) {
+        error_report("%s: Unable to copy device tree into memory: %s",
+                     __func__, fdt_strerror(ret));
+        exit(1);
+    }
+
+    return fdt;
+fail:
+    error_report("%s Couldn't create dt: %s", __func__, fdt_strerror(ret));
+    exit(1);
+}
+
+void *load_device_tree(const char *filename_path, int *sizep)
+{
+    int dt_size;
+    int dt_file_load_size;
+    int ret;
+    void *fdt = NULL;
+
+    *sizep = 0;
+    dt_size = get_image_size(filename_path);
+    if (dt_size < 0) {
+        error_report("Unable to get size of device tree file '%s'",
+                     filename_path);
+        goto fail;
+    }
+    if (dt_size > INT_MAX / 2 - 10000) {
+        error_report("Device tree file '%s' is too large", filename_path);
+        goto fail;
+    }
+
+    /* Expand to 2x size to give enough room for manipulation.  */
+    dt_size += 10000;
+    dt_size *= 2;
+    /* First allocate space in qemu for device tree */
+    fdt = g_malloc0(dt_size);
+
+    dt_file_load_size = load_image_size(filename_path, fdt, dt_size);
+    if (dt_file_load_size < 0) {
+        error_report("Unable to open device tree file '%s'",
+                     filename_path);
+        goto fail;
+    }
+
+    ret = fdt_open_into(fdt, fdt, dt_size);
+    if (ret) {
+        error_report("%s: Unable to copy device tree into memory: %s",
+                     __func__, fdt_strerror(ret));
+        goto fail;
+    }
+
+    /* Check sanity of device tree */
+    if (fdt_check_header(fdt)) {
+        error_report("Device tree file loaded into memory is invalid: %s",
+                     filename_path);
+        goto fail;
+    }
+    *sizep = dt_size;
+    return fdt;
+
+fail:
+    g_free(fdt);
+    return NULL;
+}
+
+#ifdef CONFIG_LINUX
+
+#define SYSFS_DT_BASEDIR "/proc/device-tree"
+
+/**
+ * read_fstree: this function is inspired from dtc read_fstree
+ * @fdt: preallocated fdt blob buffer, to be populated
+ * @dirname: directory to scan under SYSFS_DT_BASEDIR
+ * the search is recursive and the tree is searched down to the
+ * leaves (property files).
+ *
+ * the function asserts in case of error
+ */
+static void read_fstree(void *fdt, const char *dirname)
+{
+    DIR *d;
+    struct dirent *de;
+    struct stat st;
+    const char *root_dir = SYSFS_DT_BASEDIR;
+    const char *parent_node;
+
+    if (strstr(dirname, root_dir) != dirname) {
+        error_report("%s: %s must be searched within %s",
+                     __func__, dirname, root_dir);
+        exit(1);
+    }
+    parent_node = &dirname[strlen(SYSFS_DT_BASEDIR)];
+
+    d = opendir(dirname);
+    if (!d) {
+        error_report("%s cannot open %s", __func__, dirname);
+        exit(1);
+    }
+
+    while ((de = readdir(d)) != NULL) {
+        char *tmpnam;
+
+        if (!g_strcmp0(de->d_name, ".")
+            || !g_strcmp0(de->d_name, "..")) {
+            continue;
+        }
+
+        tmpnam = g_strdup_printf("%s/%s", dirname, de->d_name);
+
+        if (lstat(tmpnam, &st) < 0) {
+            error_report("%s cannot lstat %s", __func__, tmpnam);
+            exit(1);
+        }
+
+        if (S_ISREG(st.st_mode)) {
+            gchar *val;
+            gsize len;
+
+            if (!g_file_get_contents(tmpnam, &val, &len, NULL)) {
+                error_report("%s not able to extract info from %s",
+                             __func__, tmpnam);
+                exit(1);
+            }
+
+            if (strlen(parent_node) > 0) {
+                qemu_fdt_setprop(fdt, parent_node,
+                                 de->d_name, val, len);
+            } else {
+                qemu_fdt_setprop(fdt, "/", de->d_name, val, len);
+            }
+            g_free(val);
+        } else if (S_ISDIR(st.st_mode)) {
+            char *node_name;
+
+            node_name = g_strdup_printf("%s/%s",
+                                        parent_node, de->d_name);
+            qemu_fdt_add_subnode(fdt, node_name);
+            g_free(node_name);
+            read_fstree(fdt, tmpnam);
+        }
+
+        g_free(tmpnam);
+    }
+
+    closedir(d);
+}
+
+/* load_device_tree_from_sysfs: extract the dt blob from host sysfs */
+void *load_device_tree_from_sysfs(void)
+{
+    void *host_fdt;
+    int host_fdt_size;
+
+    host_fdt = create_device_tree(&host_fdt_size);
+    read_fstree(host_fdt, SYSFS_DT_BASEDIR);
+    if (fdt_check_header(host_fdt)) {
+        error_report("%s host device tree extracted into memory is invalid",
+                     __func__);
+        exit(1);
+    }
+    return host_fdt;
+}
+
+#endif /* CONFIG_LINUX */
+
+static int findnode_nofail(void *fdt, const char *node_path)
+{
+    int offset;
+
+    offset = fdt_path_offset(fdt, node_path);
+    if (offset < 0) {
+        error_report("%s Couldn't find node %s: %s", __func__, node_path,
+                     fdt_strerror(offset));
+        exit(1);
+    }
+
+    return offset;
+}
+
+char **qemu_fdt_node_unit_path(void *fdt, const char *name, Error **errp)
+{
+    char *prefix =  g_strdup_printf("%s@", name);
+    unsigned int path_len = 16, n = 0;
+    GSList *path_list = NULL, *iter;
+    const char *iter_name;
+    int offset, len, ret;
+    char **path_array;
+
+    offset = fdt_next_node(fdt, -1, NULL);
+
+    while (offset >= 0) {
+        iter_name = fdt_get_name(fdt, offset, &len);
+        if (!iter_name) {
+            offset = len;
+            break;
+        }
+        if (!strcmp(iter_name, name) || g_str_has_prefix(iter_name, prefix)) {
+            char *path;
+
+            path = g_malloc(path_len);
+            while ((ret = fdt_get_path(fdt, offset, path, path_len))
+                  == -FDT_ERR_NOSPACE) {
+                path_len += 16;
+                path = g_realloc(path, path_len);
+            }
+            path_list = g_slist_prepend(path_list, path);
+            n++;
+        }
+        offset = fdt_next_node(fdt, offset, NULL);
+    }
+    g_free(prefix);
+
+    if (offset < 0 && offset != -FDT_ERR_NOTFOUND) {
+        error_setg(errp, "%s: abort parsing dt for %s node units: %s",
+                   __func__, name, fdt_strerror(offset));
+        for (iter = path_list; iter; iter = iter->next) {
+            g_free(iter->data);
+        }
+        g_slist_free(path_list);
+        return NULL;
+    }
+
+    path_array = g_new(char *, n + 1);
+    path_array[n--] = NULL;
+
+    for (iter = path_list; iter; iter = iter->next) {
+        path_array[n--] = iter->data;
+    }
+
+    g_slist_free(path_list);
+
+    return path_array;
+}
+
+char **qemu_fdt_node_path(void *fdt, const char *name, const char *compat,
+                          Error **errp)
+{
+    int offset, len, ret;
+    const char *iter_name;
+    unsigned int path_len = 16, n = 0;
+    GSList *path_list = NULL, *iter;
+    char **path_array;
+
+    offset = fdt_node_offset_by_compatible(fdt, -1, compat);
+
+    while (offset >= 0) {
+        iter_name = fdt_get_name(fdt, offset, &len);
+        if (!iter_name) {
+            offset = len;
+            break;
+        }
+        if (!name || !strcmp(iter_name, name)) {
+            char *path;
+
+            path = g_malloc(path_len);
+            while ((ret = fdt_get_path(fdt, offset, path, path_len))
+                  == -FDT_ERR_NOSPACE) {
+                path_len += 16;
+                path = g_realloc(path, path_len);
+            }
+            path_list = g_slist_prepend(path_list, path);
+            n++;
+        }
+        offset = fdt_node_offset_by_compatible(fdt, offset, compat);
+    }
+
+    if (offset < 0 && offset != -FDT_ERR_NOTFOUND) {
+        error_setg(errp, "%s: abort parsing dt for %s/%s: %s",
+                   __func__, name, compat, fdt_strerror(offset));
+        for (iter = path_list; iter; iter = iter->next) {
+            g_free(iter->data);
+        }
+        g_slist_free(path_list);
+        return NULL;
+    }
+
+    path_array = g_new(char *, n + 1);
+    path_array[n--] = NULL;
+
+    for (iter = path_list; iter; iter = iter->next) {
+        path_array[n--] = iter->data;
+    }
+
+    g_slist_free(path_list);
+
+    return path_array;
+}
+
+int qemu_fdt_setprop(void *fdt, const char *node_path,
+                     const char *property, const void *val, int size)
+{
+    int r;
+
+    r = fdt_setprop(fdt, findnode_nofail(fdt, node_path), property, val, size);
+    if (r < 0) {
+        error_report("%s: Couldn't set %s/%s: %s", __func__, node_path,
+                     property, fdt_strerror(r));
+        exit(1);
+    }
+
+    return r;
+}
+
+int qemu_fdt_setprop_cell(void *fdt, const char *node_path,
+                          const char *property, uint32_t val)
+{
+    int r;
+
+    r = fdt_setprop_cell(fdt, findnode_nofail(fdt, node_path), property, val);
+    if (r < 0) {
+        error_report("%s: Couldn't set %s/%s = %#08x: %s", __func__,
+                     node_path, property, val, fdt_strerror(r));
+        exit(1);
+    }
+
+    return r;
+}
+
+int qemu_fdt_setprop_u64(void *fdt, const char *node_path,
+                         const char *property, uint64_t val)
+{
+    val = cpu_to_be64(val);
+    return qemu_fdt_setprop(fdt, node_path, property, &val, sizeof(val));
+}
+
+int qemu_fdt_setprop_string(void *fdt, const char *node_path,
+                            const char *property, const char *string)
+{
+    int r;
+
+    r = fdt_setprop_string(fdt, findnode_nofail(fdt, node_path), property, string);
+    if (r < 0) {
+        error_report("%s: Couldn't set %s/%s = %s: %s", __func__,
+                     node_path, property, string, fdt_strerror(r));
+        exit(1);
+    }
+
+    return r;
+}
+
+/*
+ * libfdt doesn't allow us to add string arrays directly but they are
+ * test a series of null terminated strings with a length. We build
+ * the string up here so we can calculate the final length.
+ */
+int qemu_fdt_setprop_string_array(void *fdt, const char *node_path,
+                                  const char *prop, char **array, int len)
+{
+    int ret, i, total_len = 0;
+    char *str, *p;
+    for (i = 0; i < len; i++) {
+        total_len += strlen(array[i]) + 1;
+    }
+    p = str = g_malloc0(total_len);
+    for (i = 0; i < len; i++) {
+        int offset = strlen(array[i]) + 1;
+        pstrcpy(p, offset, array[i]);
+        p += offset;
+    }
+
+    ret = qemu_fdt_setprop(fdt, node_path, prop, str, total_len);
+    g_free(str);
+    return ret;
+}
+
+const void *qemu_fdt_getprop(void *fdt, const char *node_path,
+                             const char *property, int *lenp, Error **errp)
+{
+    int len;
+    const void *r;
+
+    if (!lenp) {
+        lenp = &len;
+    }
+    r = fdt_getprop(fdt, findnode_nofail(fdt, node_path), property, lenp);
+    if (!r) {
+        error_setg(errp, "%s: Couldn't get %s/%s: %s", __func__,
+                  node_path, property, fdt_strerror(*lenp));
+    }
+    return r;
+}
+
+uint32_t qemu_fdt_getprop_cell(void *fdt, const char *node_path,
+                               const char *property, int *lenp, Error **errp)
+{
+    int len;
+    const uint32_t *p;
+
+    if (!lenp) {
+        lenp = &len;
+    }
+    p = qemu_fdt_getprop(fdt, node_path, property, lenp, errp);
+    if (!p) {
+        return 0;
+    } else if (*lenp != 4) {
+        error_setg(errp, "%s: %s/%s not 4 bytes long (not a cell?)",
+                   __func__, node_path, property);
+        *lenp = -EINVAL;
+        return 0;
+    }
+    return be32_to_cpu(*p);
+}
+
+uint32_t qemu_fdt_get_phandle(void *fdt, const char *path)
+{
+    uint32_t r;
+
+    r = fdt_get_phandle(fdt, findnode_nofail(fdt, path));
+    if (r == 0) {
+        error_report("%s: Couldn't get phandle for %s: %s", __func__,
+                     path, fdt_strerror(r));
+        exit(1);
+    }
+
+    return r;
+}
+
+int qemu_fdt_setprop_phandle(void *fdt, const char *node_path,
+                             const char *property,
+                             const char *target_node_path)
+{
+    uint32_t phandle = qemu_fdt_get_phandle(fdt, target_node_path);
+    return qemu_fdt_setprop_cell(fdt, node_path, property, phandle);
+}
+
+uint32_t qemu_fdt_alloc_phandle(void *fdt)
+{
+    static int phandle = 0x0;
+
+    /*
+     * We need to find out if the user gave us special instruction at
+     * which phandle id to start allocating phandles.
+     */
+    if (!phandle) {
+        phandle = machine_phandle_start(current_machine);
+    }
+
+    if (!phandle) {
+        /*
+         * None or invalid phandle given on the command line, so fall back to
+         * default starting point.
+         */
+        phandle = 0x8000;
+    }
+
+    return phandle++;
+}
+
+int qemu_fdt_nop_node(void *fdt, const char *node_path)
+{
+    int r;
+
+    r = fdt_nop_node(fdt, findnode_nofail(fdt, node_path));
+    if (r < 0) {
+        error_report("%s: Couldn't nop node %s: %s", __func__, node_path,
+                     fdt_strerror(r));
+        exit(1);
+    }
+
+    return r;
+}
+
+int qemu_fdt_add_subnode(void *fdt, const char *name)
+{
+    char *dupname = g_strdup(name);
+    char *basename = strrchr(dupname, '/');
+    int retval;
+    int parent = 0;
+
+    if (!basename) {
+        g_free(dupname);
+        return -1;
+    }
+
+    basename[0] = '\0';
+    basename++;
+
+    if (dupname[0]) {
+        parent = findnode_nofail(fdt, dupname);
+    }
+
+    retval = fdt_add_subnode(fdt, parent, basename);
+    if (retval < 0) {
+        error_report("%s: Failed to create subnode %s: %s",
+                     __func__, name, fdt_strerror(retval));
+        exit(1);
+    }
+
+    g_free(dupname);
+    return retval;
+}
+
+/*
+ * qemu_fdt_add_path: Like qemu_fdt_add_subnode(), but will add
+ * all missing subnodes from the given path.
+ */
+int qemu_fdt_add_path(void *fdt, const char *path)
+{
+    const char *name;
+    int namelen, retval;
+    int parent = 0;
+
+    if (path[0] != '/') {
+        return -1;
+    }
+
+    do {
+        name = path + 1;
+        path = strchr(name, '/');
+        namelen = path != NULL ? path - name : strlen(name);
+
+        retval = fdt_subnode_offset_namelen(fdt, parent, name, namelen);
+        if (retval < 0 && retval != -FDT_ERR_NOTFOUND) {
+            error_report("%s: Unexpected error in finding subnode %.*s: %s",
+                         __func__, namelen, name, fdt_strerror(retval));
+            exit(1);
+        } else if (retval == -FDT_ERR_NOTFOUND) {
+            retval = fdt_add_subnode_namelen(fdt, parent, name, namelen);
+            if (retval < 0) {
+                error_report("%s: Failed to create subnode %.*s: %s",
+                             __func__, namelen, name, fdt_strerror(retval));
+                exit(1);
+            }
+        }
+
+        parent = retval;
+    } while (path);
+
+    return retval;
+}
+
+void qemu_fdt_dumpdtb(void *fdt, int size)
+{
+    const char *dumpdtb = current_machine->dumpdtb;
+
+    if (dumpdtb) {
+        /* Dump the dtb to a file and quit */
+        if (g_file_set_contents(dumpdtb, fdt, size, NULL)) {
+            info_report("dtb dumped to %s. Exiting.", dumpdtb);
+            exit(0);
+        }
+        error_report("%s: Failed dumping dtb to %s", __func__, dumpdtb);
+        exit(1);
+    }
+}
+
+int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
+                                            const char *node_path,
+                                            const char *property,
+                                            int numvalues,
+                                            uint64_t *values)
+{
+    uint32_t *propcells;
+    uint64_t value;
+    int cellnum, vnum, ncells;
+    uint32_t hival;
+    int ret;
+
+    propcells = g_new0(uint32_t, numvalues * 2);
+
+    cellnum = 0;
+    for (vnum = 0; vnum < numvalues; vnum++) {
+        ncells = values[vnum * 2];
+        if (ncells != 1 && ncells != 2) {
+            ret = -1;
+            goto out;
+        }
+        value = values[vnum * 2 + 1];
+        hival = cpu_to_be32(value >> 32);
+        if (ncells > 1) {
+            propcells[cellnum++] = hival;
+        } else if (hival != 0) {
+            ret = -1;
+            goto out;
+        }
+        propcells[cellnum++] = cpu_to_be32(value);
+    }
+
+    ret = qemu_fdt_setprop(fdt, node_path, property, propcells,
+                           cellnum * sizeof(uint32_t));
+out:
+    g_free(propcells);
+    return ret;
+}
+
+void qmp_dumpdtb(const char *filename, Error **errp)
+{
+    g_autoptr(GError) err = NULL;
+    uint32_t size;
+
+    if (!current_machine->fdt) {
+        error_setg(errp, "This machine doesn't have a FDT");
+        return;
+    }
+
+    size = fdt_totalsize(current_machine->fdt);
+
+    g_assert(size > 0);
+
+    if (!g_file_set_contents(filename, current_machine->fdt, size, &err)) {
+        error_setg(errp, "Error saving FDT to file %s: %s",
+                   filename, err->message);
+    }
+}
+
+void hmp_dumpdtb(Monitor *mon, const QDict *qdict)
+{
+    const char *filename = qdict_get_str(qdict, "filename");
+    Error *local_err = NULL;
+
+    qmp_dumpdtb(filename, &local_err);
+
+    if (hmp_handle_error(mon, local_err)) {
+        return;
+    }
+
+    info_report("dtb dumped to %s", filename);
+}
+
+void qemu_fdt_randomize_seeds(void *fdt)
+{
+    int noffset, poffset, len;
+    const char *name;
+    uint8_t *data;
+
+    for (noffset = fdt_next_node(fdt, 0, NULL);
+         noffset >= 0;
+         noffset = fdt_next_node(fdt, noffset, NULL)) {
+        for (poffset = fdt_first_property_offset(fdt, noffset);
+             poffset >= 0;
+             poffset = fdt_next_property_offset(fdt, poffset)) {
+            data = (uint8_t *)fdt_getprop_by_offset(fdt, poffset, &name, &len);
+            if (!data || strcmp(name, "rng-seed"))
+                continue;
+            qemu_guest_getrandom_nofail(data, len);
+        }
+    }
+}
--- a/system/dirtylimit.c
+++ b/system/dirtylimit.c
@ -0,0 +1,678 @@
+/*
+ * Dirty page rate limit implementation code
+ *
+ * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
+ *
+ * Authors:
+ *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qapi/qapi-commands-migration.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/error.h"
+#include "sysemu/dirtyrate.h"
+#include "sysemu/dirtylimit.h"
+#include "monitor/hmp.h"
+#include "monitor/monitor.h"
+#include "exec/memory.h"
+#include "exec/target_page.h"
+#include "hw/boards.h"
+#include "sysemu/kvm.h"
+#include "trace.h"
+#include "migration/misc.h"
+#include "migration/migration.h"
+#include "migration/options.h"
+
+/*
+ * Dirtylimit stop working if dirty page rate error
+ * value less than DIRTYLIMIT_TOLERANCE_RANGE
+ */
+#define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
+/*
+ * Plus or minus vcpu sleep time linearly if dirty
+ * page rate error value percentage over
+ * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
+ * Otherwise, plus or minus a fixed vcpu sleep time.
+ */
+#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT     50
+/*
+ * Max vcpu sleep time percentage during a cycle
+ * composed of dirty ring full and sleep time.
+ */
+#define DIRTYLIMIT_THROTTLE_PCT_MAX 99
+
+struct {
+    VcpuStat stat;
+    bool running;
+    QemuThread thread;
+} *vcpu_dirty_rate_stat;
+
+typedef struct VcpuDirtyLimitState {
+    int cpu_index;
+    bool enabled;
+    /*
+     * Quota dirty page rate, unit is MB/s
+     * zero if not enabled.
+     */
+    uint64_t quota;
+} VcpuDirtyLimitState;
+
+struct {
+    VcpuDirtyLimitState *states;
+    /* Max cpus number configured by user */
+    int max_cpus;
+    /* Number of vcpu under dirtylimit */
+    int limited_nvcpu;
+} *dirtylimit_state;
+
+/* protect dirtylimit_state */
+static QemuMutex dirtylimit_mutex;
+
+/* dirtylimit thread quit if dirtylimit_quit is true */
+static bool dirtylimit_quit;
+
+static void vcpu_dirty_rate_stat_collect(void)
+{
+    MigrationState *s = migrate_get_current();
+    VcpuStat stat;
+    int i = 0;
+    int64_t period = DIRTYLIMIT_CALC_TIME_MS;
+
+    if (migrate_dirty_limit() &&
+        migration_is_active(s)) {
+        period = s->parameters.x_vcpu_dirty_limit_period;
+    }
+
+    /* calculate vcpu dirtyrate */
+    vcpu_calculate_dirtyrate(period,
+                              &stat,
+                              GLOBAL_DIRTY_LIMIT,
+                              false);
+
+    for (i = 0; i < stat.nvcpu; i++) {
+        vcpu_dirty_rate_stat->stat.rates[i].id = i;
+        vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
+            stat.rates[i].dirty_rate;
+    }
+
+    g_free(stat.rates);
+}
+
+static void *vcpu_dirty_rate_stat_thread(void *opaque)
+{
+    rcu_register_thread();
+
+    /* start log sync */
+    global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
+
+    while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
+        vcpu_dirty_rate_stat_collect();
+        if (dirtylimit_in_service()) {
+            dirtylimit_process();
+        }
+    }
+
+    /* stop log sync */
+    global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
+
+    rcu_unregister_thread();
+    return NULL;
+}
+
+int64_t vcpu_dirty_rate_get(int cpu_index)
+{
+    DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
+    return qatomic_read_i64(&rates[cpu_index].dirty_rate);
+}
+
+void vcpu_dirty_rate_stat_start(void)
+{
+    if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
+        return;
+    }
+
+    qatomic_set(&vcpu_dirty_rate_stat->running, 1);
+    qemu_thread_create(&vcpu_dirty_rate_stat->thread,
+                       "dirtyrate-stat",
+                       vcpu_dirty_rate_stat_thread,
+                       NULL,
+                       QEMU_THREAD_JOINABLE);
+}
+
+void vcpu_dirty_rate_stat_stop(void)
+{
+    qatomic_set(&vcpu_dirty_rate_stat->running, 0);
+    dirtylimit_state_unlock();
+    qemu_mutex_unlock_iothread();
+    qemu_thread_join(&vcpu_dirty_rate_stat->thread);
+    qemu_mutex_lock_iothread();
+    dirtylimit_state_lock();
+}
+
+void vcpu_dirty_rate_stat_initialize(void)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    int max_cpus = ms->smp.max_cpus;
+
+    vcpu_dirty_rate_stat =
+        g_malloc0(sizeof(*vcpu_dirty_rate_stat));
+
+    vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
+    vcpu_dirty_rate_stat->stat.rates =
+        g_new0(DirtyRateVcpu, max_cpus);
+
+    vcpu_dirty_rate_stat->running = false;
+}
+
+void vcpu_dirty_rate_stat_finalize(void)
+{
+    g_free(vcpu_dirty_rate_stat->stat.rates);
+    vcpu_dirty_rate_stat->stat.rates = NULL;
+
+    g_free(vcpu_dirty_rate_stat);
+    vcpu_dirty_rate_stat = NULL;
+}
+
+void dirtylimit_state_lock(void)
+{
+    qemu_mutex_lock(&dirtylimit_mutex);
+}
+
+void dirtylimit_state_unlock(void)
+{
+    qemu_mutex_unlock(&dirtylimit_mutex);
+}
+
+static void
+__attribute__((__constructor__)) dirtylimit_mutex_init(void)
+{
+    qemu_mutex_init(&dirtylimit_mutex);
+}
+
+static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
+{
+    return &dirtylimit_state->states[cpu_index];
+}
+
+void dirtylimit_state_initialize(void)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    int max_cpus = ms->smp.max_cpus;
+    int i;
+
+    dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
+
+    dirtylimit_state->states =
+            g_new0(VcpuDirtyLimitState, max_cpus);
+
+    for (i = 0; i < max_cpus; i++) {
+        dirtylimit_state->states[i].cpu_index = i;
+    }
+
+    dirtylimit_state->max_cpus = max_cpus;
+    trace_dirtylimit_state_initialize(max_cpus);
+}
+
+void dirtylimit_state_finalize(void)
+{
+    g_free(dirtylimit_state->states);
+    dirtylimit_state->states = NULL;
+
+    g_free(dirtylimit_state);
+    dirtylimit_state = NULL;
+
+    trace_dirtylimit_state_finalize();
+}
+
+bool dirtylimit_in_service(void)
+{
+    return !!dirtylimit_state;
+}
+
+bool dirtylimit_vcpu_index_valid(int cpu_index)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+
+    return !(cpu_index < 0 ||
+             cpu_index >= ms->smp.max_cpus);
+}
+
+static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
+{
+    static uint64_t max_dirtyrate;
+    uint64_t dirty_ring_size_MiB;
+
+    dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size());
+
+    if (max_dirtyrate < dirtyrate) {
+        max_dirtyrate = dirtyrate;
+    }
+
+    return dirty_ring_size_MiB * 1000000 / max_dirtyrate;
+}
+
+static inline bool dirtylimit_done(uint64_t quota,
+                                   uint64_t current)
+{
+    uint64_t min, max;
+
+    min = MIN(quota, current);
+    max = MAX(quota, current);
+
+    return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
+}
+
+static inline bool
+dirtylimit_need_linear_adjustment(uint64_t quota,
+                                  uint64_t current)
+{
+    uint64_t min, max;
+
+    min = MIN(quota, current);
+    max = MAX(quota, current);
+
+    return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
+}
+
+static void dirtylimit_set_throttle(CPUState *cpu,
+                                    uint64_t quota,
+                                    uint64_t current)
+{
+    int64_t ring_full_time_us = 0;
+    uint64_t sleep_pct = 0;
+    uint64_t throttle_us = 0;
+
+    if (current == 0) {
+        cpu->throttle_us_per_full = 0;
+        return;
+    }
+
+    ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
+
+    if (dirtylimit_need_linear_adjustment(quota, current)) {
+        if (quota < current) {
+            sleep_pct = (current - quota) * 100 / current;
+            throttle_us =
+                ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
+            cpu->throttle_us_per_full += throttle_us;
+        } else {
+            sleep_pct = (quota - current) * 100 / quota;
+            throttle_us =
+                ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
+            cpu->throttle_us_per_full -= throttle_us;
+        }
+
+        trace_dirtylimit_throttle_pct(cpu->cpu_index,
+                                      sleep_pct,
+                                      throttle_us);
+    } else {
+        if (quota < current) {
+            cpu->throttle_us_per_full += ring_full_time_us / 10;
+        } else {
+            cpu->throttle_us_per_full -= ring_full_time_us / 10;
+        }
+    }
+
+    /*
+     * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
+     *       current dirty page rate may never reach the quota, we should stop
+     *       increasing sleep time?
+     */
+    cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
+        ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
+
+    cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
+}
+
+static void dirtylimit_adjust_throttle(CPUState *cpu)
+{
+    uint64_t quota = 0;
+    uint64_t current = 0;
+    int cpu_index = cpu->cpu_index;
+
+    quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
+    current = vcpu_dirty_rate_get(cpu_index);
+
+    if (!dirtylimit_done(quota, current)) {
+        dirtylimit_set_throttle(cpu, quota, current);
+    }
+
+    return;
+}
+
+void dirtylimit_process(void)
+{
+    CPUState *cpu;
+
+    if (!qatomic_read(&dirtylimit_quit)) {
+        dirtylimit_state_lock();
+
+        if (!dirtylimit_in_service()) {
+            dirtylimit_state_unlock();
+            return;
+        }
+
+        CPU_FOREACH(cpu) {
+            if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
+                continue;
+            }
+            dirtylimit_adjust_throttle(cpu);
+        }
+        dirtylimit_state_unlock();
+    }
+}
+
+void dirtylimit_change(bool start)
+{
+    if (start) {
+        qatomic_set(&dirtylimit_quit, 0);
+    } else {
+        qatomic_set(&dirtylimit_quit, 1);
+    }
+}
+
+void dirtylimit_set_vcpu(int cpu_index,
+                         uint64_t quota,
+                         bool enable)
+{
+    trace_dirtylimit_set_vcpu(cpu_index, quota);
+
+    if (enable) {
+        dirtylimit_state->states[cpu_index].quota = quota;
+        if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
+            dirtylimit_state->limited_nvcpu++;
+        }
+    } else {
+        dirtylimit_state->states[cpu_index].quota = 0;
+        if (dirtylimit_state->states[cpu_index].enabled) {
+            dirtylimit_state->limited_nvcpu--;
+        }
+    }
+
+    dirtylimit_state->states[cpu_index].enabled = enable;
+}
+
+void dirtylimit_set_all(uint64_t quota,
+                        bool enable)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    int max_cpus = ms->smp.max_cpus;
+    int i;
+
+    for (i = 0; i < max_cpus; i++) {
+        dirtylimit_set_vcpu(i, quota, enable);
+    }
+}
+
+void dirtylimit_vcpu_execute(CPUState *cpu)
+{
+    if (dirtylimit_in_service() &&
+        dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
+        cpu->throttle_us_per_full) {
+        trace_dirtylimit_vcpu_execute(cpu->cpu_index,
+                cpu->throttle_us_per_full);
+        usleep(cpu->throttle_us_per_full);
+    }
+}
+
+static void dirtylimit_init(void)
+{
+    dirtylimit_state_initialize();
+    dirtylimit_change(true);
+    vcpu_dirty_rate_stat_initialize();
+    vcpu_dirty_rate_stat_start();
+}
+
+static void dirtylimit_cleanup(void)
+{
+    vcpu_dirty_rate_stat_stop();
+    vcpu_dirty_rate_stat_finalize();
+    dirtylimit_change(false);
+    dirtylimit_state_finalize();
+}
+
+/*
+ * dirty page rate limit is not allowed to set if migration
+ * is running with dirty-limit capability enabled.
+ */
+static bool dirtylimit_is_allowed(void)
+{
+    MigrationState *ms = migrate_get_current();
+
+    if (migration_is_running(ms->state) &&
+        (!qemu_thread_is_self(&ms->thread)) &&
+        migrate_dirty_limit() &&
+        dirtylimit_in_service()) {
+        return false;
+    }
+    return true;
+}
+
+void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
+                                 int64_t cpu_index,
+                                 Error **errp)
+{
+    if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
+        return;
+    }
+
+    if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
+        error_setg(errp, "incorrect cpu index specified");
+        return;
+    }
+
+    if (!dirtylimit_is_allowed()) {
+        error_setg(errp, "can't cancel dirty page rate limit while"
+                   " migration is running");
+        return;
+    }
+
+    if (!dirtylimit_in_service()) {
+        return;
+    }
+
+    dirtylimit_state_lock();
+
+    if (has_cpu_index) {
+        dirtylimit_set_vcpu(cpu_index, 0, false);
+    } else {
+        dirtylimit_set_all(0, false);
+    }
+
+    if (!dirtylimit_state->limited_nvcpu) {
+        dirtylimit_cleanup();
+    }
+
+    dirtylimit_state_unlock();
+}
+
+void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
+{
+    int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
+    Error *err = NULL;
+
+    qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
+    if (err) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+
+    monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
+                   "dirty limit for virtual CPU]\n");
+}
+
+void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
+                              int64_t cpu_index,
+                              uint64_t dirty_rate,
+                              Error **errp)
+{
+    if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
+        error_setg(errp, "dirty page limit feature requires KVM with"
+                   " accelerator property 'dirty-ring-size' set'");
+        return;
+    }
+
+    if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
+        error_setg(errp, "incorrect cpu index specified");
+        return;
+    }
+
+    if (!dirtylimit_is_allowed()) {
+        error_setg(errp, "can't set dirty page rate limit while"
+                   " migration is running");
+        return;
+    }
+
+    if (!dirty_rate) {
+        qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
+        return;
+    }
+
+    dirtylimit_state_lock();
+
+    if (!dirtylimit_in_service()) {
+        dirtylimit_init();
+    }
+
+    if (has_cpu_index) {
+        dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
+    } else {
+        dirtylimit_set_all(dirty_rate, true);
+    }
+
+    dirtylimit_state_unlock();
+}
+
+void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
+{
+    int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
+    int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
+    Error *err = NULL;
+
+    if (dirty_rate < 0) {
+        error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate);
+        goto out;
+    }
+
+    qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
+
+out:
+    hmp_handle_error(mon, err);
+}
+
+/* Return the max throttle time of each virtual CPU */
+uint64_t dirtylimit_throttle_time_per_round(void)
+{
+    CPUState *cpu;
+    int64_t max = 0;
+
+    CPU_FOREACH(cpu) {
+        if (cpu->throttle_us_per_full > max) {
+            max = cpu->throttle_us_per_full;
+        }
+    }
+
+    return max;
+}
+
+/*
+ * Estimate average dirty ring full time of each virtaul CPU.
+ * Return 0 if guest doesn't dirty memory.
+ */
+uint64_t dirtylimit_ring_full_time(void)
+{
+    CPUState *cpu;
+    uint64_t curr_rate = 0;
+    int nvcpus = 0;
+
+    CPU_FOREACH(cpu) {
+        if (cpu->running) {
+            nvcpus++;
+            curr_rate += vcpu_dirty_rate_get(cpu->cpu_index);
+        }
+    }
+
+    if (!curr_rate || !nvcpus) {
+        return 0;
+    }
+
+    return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus);
+}
+
+static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
+{
+    DirtyLimitInfo *info = NULL;
+
+    info = g_malloc0(sizeof(*info));
+    info->cpu_index = cpu_index;
+    info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
+    info->current_rate = vcpu_dirty_rate_get(cpu_index);
+
+    return info;
+}
+
+static struct DirtyLimitInfoList *dirtylimit_query_all(void)
+{
+    int i, index;
+    DirtyLimitInfo *info = NULL;
+    DirtyLimitInfoList *head = NULL, **tail = &head;
+
+    dirtylimit_state_lock();
+
+    if (!dirtylimit_in_service()) {
+        dirtylimit_state_unlock();
+        return NULL;
+    }
+
+    for (i = 0; i < dirtylimit_state->max_cpus; i++) {
+        index = dirtylimit_state->states[i].cpu_index;
+        if (dirtylimit_vcpu_get_state(index)->enabled) {
+            info = dirtylimit_query_vcpu(index);
+            QAPI_LIST_APPEND(tail, info);
+        }
+    }
+
+    dirtylimit_state_unlock();
+
+    return head;
+}
+
+struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
+{
+    if (!dirtylimit_in_service()) {
+        return NULL;
+    }
+
+    return dirtylimit_query_all();
+}
+
+void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
+{
+    DirtyLimitInfoList *info;
+    g_autoptr(DirtyLimitInfoList) head = NULL;
+    Error *err = NULL;
+
+    if (!dirtylimit_in_service()) {
+        monitor_printf(mon, "Dirty page limit not enabled!\n");
+        return;
+    }
+
+    head = qmp_query_vcpu_dirty_limit(&err);
+    if (err) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+
+    for (info = head; info != NULL; info = info->next) {
+        monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
+                            " current rate %"PRIi64 " (MB/s)\n",
+                            info->value->cpu_index,
+                            info->value->limit_rate,
+                            info->value->current_rate);
+    }
+}
--- a/system/dma-helpers.c
+++ b/system/dma-helpers.c
@ -0,0 +1,347 @@
+/*
+ * DMA helper functions
+ *
+ * Copyright (c) 2009,2020 Red Hat
+ *
+ * This work is licensed under the terms of the GNU General Public License
+ * (GNU GPL), version 2 or later.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/dma.h"
+#include "trace/trace-root.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+#include "sysemu/cpu-timers.h"
+#include "qemu/range.h"
+
+/* #define DEBUG_IOMMU */
+
+MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
+                           uint8_t c, dma_addr_t len, MemTxAttrs attrs)
+{
+    dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
+
+    return address_space_set(as, addr, c, len, attrs);
+}
+
+void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
+                      AddressSpace *as)
+{
+    qsg->sg = g_new(ScatterGatherEntry, alloc_hint);
+    qsg->nsg = 0;
+    qsg->nalloc = alloc_hint;
+    qsg->size = 0;
+    qsg->as = as;
+    qsg->dev = dev;
+    object_ref(OBJECT(dev));
+}
+
+void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
+{
+    if (qsg->nsg == qsg->nalloc) {
+        qsg->nalloc = 2 * qsg->nalloc + 1;
+        qsg->sg = g_renew(ScatterGatherEntry, qsg->sg, qsg->nalloc);
+    }
+    qsg->sg[qsg->nsg].base = base;
+    qsg->sg[qsg->nsg].len = len;
+    qsg->size += len;
+    ++qsg->nsg;
+}
+
+void qemu_sglist_destroy(QEMUSGList *qsg)
+{
+    object_unref(OBJECT(qsg->dev));
+    g_free(qsg->sg);
+    memset(qsg, 0, sizeof(*qsg));
+}
+
+typedef struct {
+    BlockAIOCB common;
+    AioContext *ctx;
+    BlockAIOCB *acb;
+    QEMUSGList *sg;
+    uint32_t align;
+    uint64_t offset;
+    DMADirection dir;
+    int sg_cur_index;
+    dma_addr_t sg_cur_byte;
+    QEMUIOVector iov;
+    QEMUBH *bh;
+    DMAIOFunc *io_func;
+    void *io_func_opaque;
+} DMAAIOCB;
+
+static void dma_blk_cb(void *opaque, int ret);
+
+static void reschedule_dma(void *opaque)
+{
+    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
+
+    assert(!dbs->acb && dbs->bh);
+    qemu_bh_delete(dbs->bh);
+    dbs->bh = NULL;
+    dma_blk_cb(dbs, 0);
+}
+
+static void dma_blk_unmap(DMAAIOCB *dbs)
+{
+    int i;
+
+    for (i = 0; i < dbs->iov.niov; ++i) {
+        dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
+                         dbs->iov.iov[i].iov_len, dbs->dir,
+                         dbs->iov.iov[i].iov_len);
+    }
+    qemu_iovec_reset(&dbs->iov);
+}
+
+static void dma_complete(DMAAIOCB *dbs, int ret)
+{
+    trace_dma_complete(dbs, ret, dbs->common.cb);
+
+    assert(!dbs->acb && !dbs->bh);
+    dma_blk_unmap(dbs);
+    if (dbs->common.cb) {
+        dbs->common.cb(dbs->common.opaque, ret);
+    }
+    qemu_iovec_destroy(&dbs->iov);
+    qemu_aio_unref(dbs);
+}
+
+static void dma_blk_cb(void *opaque, int ret)
+{
+    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
+    AioContext *ctx = dbs->ctx;
+    dma_addr_t cur_addr, cur_len;
+    void *mem;
+
+    trace_dma_blk_cb(dbs, ret);
+
+    aio_context_acquire(ctx);
+    dbs->acb = NULL;
+    dbs->offset += dbs->iov.size;
+
+    if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
+        dma_complete(dbs, ret);
+        goto out;
+    }
+    dma_blk_unmap(dbs);
+
+    while (dbs->sg_cur_index < dbs->sg->nsg) {
+        cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
+        cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
+        mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
+                             MEMTXATTRS_UNSPECIFIED);
+        /*
+         * Make reads deterministic in icount mode. Windows sometimes issues
+         * disk read requests with overlapping SGs. It leads
+         * to non-determinism, because resulting buffer contents may be mixed
+         * from several sectors. This code splits all SGs into several
+         * groups. SGs in every group do not overlap.
+         */
+        if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) {
+            int i;
+            for (i = 0 ; i < dbs->iov.niov ; ++i) {
+                if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base,
+                                   dbs->iov.iov[i].iov_len, (intptr_t)mem,
+                                   cur_len)) {
+                    dma_memory_unmap(dbs->sg->as, mem, cur_len,
+                                     dbs->dir, cur_len);
+                    mem = NULL;
+                    break;
+                }
+            }
+        }
+        if (!mem)
+            break;
+        qemu_iovec_add(&dbs->iov, mem, cur_len);
+        dbs->sg_cur_byte += cur_len;
+        if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
+            dbs->sg_cur_byte = 0;
+            ++dbs->sg_cur_index;
+        }
+    }
+
+    if (dbs->iov.size == 0) {
+        trace_dma_map_wait(dbs);
+        dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
+        cpu_register_map_client(dbs->bh);
+        goto out;
+    }
+
+    if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
+        qemu_iovec_discard_back(&dbs->iov,
+                                QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
+    }
+
+    dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
+                            dma_blk_cb, dbs, dbs->io_func_opaque);
+    assert(dbs->acb);
+out:
+    aio_context_release(ctx);
+}
+
+static void dma_aio_cancel(BlockAIOCB *acb)
+{
+    DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
+
+    trace_dma_aio_cancel(dbs);
+
+    assert(!(dbs->acb && dbs->bh));
+    if (dbs->acb) {
+        /* This will invoke dma_blk_cb.  */
+        blk_aio_cancel_async(dbs->acb);
+        return;
+    }
+
+    if (dbs->bh) {
+        cpu_unregister_map_client(dbs->bh);
+        qemu_bh_delete(dbs->bh);
+        dbs->bh = NULL;
+    }
+    if (dbs->common.cb) {
+        dbs->common.cb(dbs->common.opaque, -ECANCELED);
+    }
+}
+
+static const AIOCBInfo dma_aiocb_info = {
+    .aiocb_size         = sizeof(DMAAIOCB),
+    .cancel_async       = dma_aio_cancel,
+};
+
+BlockAIOCB *dma_blk_io(AioContext *ctx,
+    QEMUSGList *sg, uint64_t offset, uint32_t align,
+    DMAIOFunc *io_func, void *io_func_opaque,
+    BlockCompletionFunc *cb,
+    void *opaque, DMADirection dir)
+{
+    DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);
+
+    trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));
+
+    dbs->acb = NULL;
+    dbs->sg = sg;
+    dbs->ctx = ctx;
+    dbs->offset = offset;
+    dbs->align = align;
+    dbs->sg_cur_index = 0;
+    dbs->sg_cur_byte = 0;
+    dbs->dir = dir;
+    dbs->io_func = io_func;
+    dbs->io_func_opaque = io_func_opaque;
+    dbs->bh = NULL;
+    qemu_iovec_init(&dbs->iov, sg->nsg);
+    dma_blk_cb(dbs, 0);
+    return &dbs->common;
+}
+
+
+static
+BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
+                                 BlockCompletionFunc *cb, void *cb_opaque,
+                                 void *opaque)
+{
+    BlockBackend *blk = opaque;
+    return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
+}
+
+BlockAIOCB *dma_blk_read(BlockBackend *blk,
+                         QEMUSGList *sg, uint64_t offset, uint32_t align,
+                         void (*cb)(void *opaque, int ret), void *opaque)
+{
+    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
+                      dma_blk_read_io_func, blk, cb, opaque,
+                      DMA_DIRECTION_FROM_DEVICE);
+}
+
+static
+BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
+                                  BlockCompletionFunc *cb, void *cb_opaque,
+                                  void *opaque)
+{
+    BlockBackend *blk = opaque;
+    return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
+}
+
+BlockAIOCB *dma_blk_write(BlockBackend *blk,
+                          QEMUSGList *sg, uint64_t offset, uint32_t align,
+                          void (*cb)(void *opaque, int ret), void *opaque)
+{
+    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
+                      dma_blk_write_io_func, blk, cb, opaque,
+                      DMA_DIRECTION_TO_DEVICE);
+}
+
+
+static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
+                              QEMUSGList *sg, DMADirection dir,
+                              MemTxAttrs attrs)
+{
+    uint8_t *ptr = buf;
+    dma_addr_t xresidual;
+    int sg_cur_index;
+    MemTxResult res = MEMTX_OK;
+
+    xresidual = sg->size;
+    sg_cur_index = 0;
+    len = MIN(len, xresidual);
+    while (len > 0) {
+        ScatterGatherEntry entry = sg->sg[sg_cur_index++];
+        dma_addr_t xfer = MIN(len, entry.len);
+        res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs);
+        ptr += xfer;
+        len -= xfer;
+        xresidual -= xfer;
+    }
+
+    if (residual) {
+        *residual = xresidual;
+    }
+    return res;
+}
+
+MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual,
+                         QEMUSGList *sg, MemTxAttrs attrs)
+{
+    return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs);
+}
+
+MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual,
+                          QEMUSGList *sg, MemTxAttrs attrs)
+{
+    return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs);
+}
+
+void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
+                    QEMUSGList *sg, enum BlockAcctType type)
+{
+    block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
+}
+
+uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits)
+{
+    uint64_t max_mask = UINT64_MAX, addr_mask = end - start;
+    uint64_t alignment_mask, size_mask;
+
+    if (max_addr_bits != 64) {
+        max_mask = (1ULL << max_addr_bits) - 1;
+    }
+
+    alignment_mask = start ? (start & -start) - 1 : max_mask;
+    alignment_mask = MIN(alignment_mask, max_mask);
+    size_mask = MIN(addr_mask, max_mask);
+
+    if (alignment_mask <= size_mask) {
+        /* Increase the alignment of start */
+        return alignment_mask;
+    } else {
+        /* Find the largest page mask from size */
+        if (addr_mask == UINT64_MAX) {
+            return UINT64_MAX;
+        }
+        return (1ULL << (63 - clz64(addr_mask + 1))) - 1;
+    }
+}
+
--- a/system/globals.c
+++ b/system/globals.c
@ -0,0 +1,70 @@
+/*
+ * Global variables that (mostly) should not exist
+ *
+ * Copyright (c) 2003-2020 QEMU contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/cpu-common.h"
+#include "hw/display/vga.h"
+#include "hw/loader.h"
+#include "hw/xen/xen.h"
+#include "net/net.h"
+#include "sysemu/cpus.h"
+#include "sysemu/sysemu.h"
+
+enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
+int display_opengl;
+const char* keyboard_layout;
+bool enable_mlock;
+bool enable_cpu_pm;
+int nb_nics;
+NICInfo nd_table[MAX_NICS];
+int autostart = 1;
+int vga_interface_type = VGA_NONE;
+bool vga_interface_created;
+Chardev *parallel_hds[MAX_PARALLEL_PORTS];
+int win2k_install_hack;
+int fd_bootchk = 1;
+int graphic_rotate;
+QEMUOptionRom option_rom[MAX_OPTION_ROMS];
+int nb_option_roms;
+int old_param;
+const char *qemu_name;
+unsigned int nb_prom_envs;
+const char *prom_envs[MAX_PROM_ENVS];
+uint8_t *boot_splash_filedata;
+int only_migratable; /* turn it off unless user states otherwise */
+int icount_align_option;
+
+/* The bytes in qemu_uuid are in the order specified by RFC4122, _not_ in the
+ * little-endian "wire format" described in the SMBIOS 2.6 specification.
+ */
+QemuUUID qemu_uuid;
+bool qemu_uuid_set;
+
+uint32_t xen_domid;
+enum xen_mode xen_mode = XEN_DISABLED;
+bool xen_domid_restrict;
+struct evtchn_backend_ops *xen_evtchn_ops;
+struct gnttab_backend_ops *xen_gnttab_ops;
+struct foreignmem_backend_ops *xen_foreignmem_ops;
+struct xenstore_backend_ops *xen_xenstore_ops;
--- a/system/ioport.c
+++ b/system/ioport.c
@ -0,0 +1,346 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+/*
+ * split out ioport related stuffs from vl.c.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/ioport.h"
+#include "exec/memory.h"
+#include "exec/address-spaces.h"
+#include "trace.h"
+
+struct MemoryRegionPortioList {
+    Object obj;
+
+    MemoryRegion mr;
+    void *portio_opaque;
+    MemoryRegionPortio *ports;
+};
+
+#define TYPE_MEMORY_REGION_PORTIO_LIST "memory-region-portio-list"
+OBJECT_DECLARE_SIMPLE_TYPE(MemoryRegionPortioList, MEMORY_REGION_PORTIO_LIST)
+
+static uint64_t unassigned_io_read(void *opaque, hwaddr addr, unsigned size)
+{
+    return -1ULL;
+}
+
+static void unassigned_io_write(void *opaque, hwaddr addr, uint64_t val,
+                                unsigned size)
+{
+}
+
+const MemoryRegionOps unassigned_io_ops = {
+    .read = unassigned_io_read,
+    .write = unassigned_io_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+void cpu_outb(uint32_t addr, uint8_t val)
+{
+    trace_cpu_out(addr, 'b', val);
+    address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
+                        &val, 1);
+}
+
+void cpu_outw(uint32_t addr, uint16_t val)
+{
+    uint8_t buf[2];
+
+    trace_cpu_out(addr, 'w', val);
+    stw_p(buf, val);
+    address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
+                        buf, 2);
+}
+
+void cpu_outl(uint32_t addr, uint32_t val)
+{
+    uint8_t buf[4];
+
+    trace_cpu_out(addr, 'l', val);
+    stl_p(buf, val);
+    address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
+                        buf, 4);
+}
+
+uint8_t cpu_inb(uint32_t addr)
+{
+    uint8_t val;
+
+    address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
+                       &val, 1);
+    trace_cpu_in(addr, 'b', val);
+    return val;
+}
+
+uint16_t cpu_inw(uint32_t addr)
+{
+    uint8_t buf[2];
+    uint16_t val;
+
+    address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 2);
+    val = lduw_p(buf);
+    trace_cpu_in(addr, 'w', val);
+    return val;
+}
+
+uint32_t cpu_inl(uint32_t addr)
+{
+    uint8_t buf[4];
+    uint32_t val;
+
+    address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 4);
+    val = ldl_p(buf);
+    trace_cpu_in(addr, 'l', val);
+    return val;
+}
+
+void portio_list_init(PortioList *piolist,
+                      Object *owner,
+                      const MemoryRegionPortio *callbacks,
+                      void *opaque, const char *name)
+{
+    unsigned n = 0;
+
+    while (callbacks[n].size) {
+        ++n;
+    }
+
+    piolist->ports = callbacks;
+    piolist->nr = 0;
+    piolist->regions = g_new0(MemoryRegion *, n);
+    piolist->address_space = NULL;
+    piolist->opaque = opaque;
+    piolist->owner = owner;
+    piolist->name = name;
+    piolist->flush_coalesced_mmio = false;
+}
+
+void portio_list_set_flush_coalesced(PortioList *piolist)
+{
+    piolist->flush_coalesced_mmio = true;
+}
+
+void portio_list_destroy(PortioList *piolist)
+{
+    MemoryRegionPortioList *mrpio;
+    unsigned i;
+
+    for (i = 0; i < piolist->nr; ++i) {
+        mrpio = container_of(piolist->regions[i], MemoryRegionPortioList, mr);
+        object_unparent(OBJECT(&mrpio->mr));
+        object_unref(mrpio);
+    }
+    g_free(piolist->regions);
+}
+
+static const MemoryRegionPortio *find_portio(MemoryRegionPortioList *mrpio,
+                                             uint64_t offset, unsigned size,
+                                             bool write)
+{
+    const MemoryRegionPortio *mrp;
+
+    for (mrp = mrpio->ports; mrp->size; ++mrp) {
+        if (offset >= mrp->offset && offset < mrp->offset + mrp->len &&
+            size == mrp->size &&
+            (write ? (bool)mrp->write : (bool)mrp->read)) {
+            return mrp;
+        }
+    }
+    return NULL;
+}
+
+static uint64_t portio_read(void *opaque, hwaddr addr, unsigned size)
+{
+    MemoryRegionPortioList *mrpio = opaque;
+    const MemoryRegionPortio *mrp = find_portio(mrpio, addr, size, false);
+    uint64_t data;
+
+    data = ((uint64_t)1 << (size * 8)) - 1;
+    if (mrp) {
+        data = mrp->read(mrpio->portio_opaque, mrp->base + addr);
+    } else if (size == 2) {
+        mrp = find_portio(mrpio, addr, 1, false);
+        if (mrp) {
+            data = mrp->read(mrpio->portio_opaque, mrp->base + addr);
+            if (addr + 1 < mrp->offset + mrp->len) {
+                data |= mrp->read(mrpio->portio_opaque, mrp->base + addr + 1) << 8;
+            } else {
+                data |= 0xff00;
+            }
+        }
+    }
+    return data;
+}
+
+static void portio_write(void *opaque, hwaddr addr, uint64_t data,
+                         unsigned size)
+{
+    MemoryRegionPortioList *mrpio = opaque;
+    const MemoryRegionPortio *mrp = find_portio(mrpio, addr, size, true);
+
+    if (mrp) {
+        mrp->write(mrpio->portio_opaque, mrp->base + addr, data);
+    } else if (size == 2) {
+        mrp = find_portio(mrpio, addr, 1, true);
+        if (mrp) {
+            mrp->write(mrpio->portio_opaque, mrp->base + addr, data & 0xff);
+            if (addr + 1 < mrp->offset + mrp->len) {
+                mrp->write(mrpio->portio_opaque, mrp->base + addr + 1, data >> 8);
+            }
+        }
+    }
+}
+
+static const MemoryRegionOps portio_ops = {
+    .read = portio_read,
+    .write = portio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid.unaligned = true,
+    .impl.unaligned = true,
+};
+
+static void portio_list_add_1(PortioList *piolist,
+                              const MemoryRegionPortio *pio_init,
+                              unsigned count, unsigned start,
+                              unsigned off_low, unsigned off_high)
+{
+    MemoryRegionPortioList *mrpio;
+    Object *owner;
+    char *name;
+    unsigned i;
+
+    /* Copy the sub-list and null-terminate it.  */
+    mrpio = MEMORY_REGION_PORTIO_LIST(
+                object_new(TYPE_MEMORY_REGION_PORTIO_LIST));
+    mrpio->portio_opaque = piolist->opaque;
+    mrpio->ports = g_malloc0(sizeof(MemoryRegionPortio) * (count + 1));
+    memcpy(mrpio->ports, pio_init, sizeof(MemoryRegionPortio) * count);
+    memset(mrpio->ports + count, 0, sizeof(MemoryRegionPortio));
+
+    /* Adjust the offsets to all be zero-based for the region.  */
+    for (i = 0; i < count; ++i) {
+        mrpio->ports[i].offset -= off_low;
+        mrpio->ports[i].base = start + off_low;
+    }
+
+    /*
+     * The MemoryRegion owner is the MemoryRegionPortioList since that manages
+     * the lifecycle via the refcount
+     */
+    memory_region_init_io(&mrpio->mr, OBJECT(mrpio), &portio_ops, mrpio,
+                          piolist->name, off_high - off_low);
+
+    /* Reparent the MemoryRegion to the piolist owner */
+    object_ref(&mrpio->mr);
+    object_unparent(OBJECT(&mrpio->mr));
+    if (!piolist->owner) {
+        owner = container_get(qdev_get_machine(), "/unattached");
+    } else {
+        owner = piolist->owner;
+    }
+    name = g_strdup_printf("%s[*]", piolist->name);
+    object_property_add_child(owner, name, OBJECT(&mrpio->mr));
+    g_free(name);
+
+    if (piolist->flush_coalesced_mmio) {
+        memory_region_set_flush_coalesced(&mrpio->mr);
+    }
+    memory_region_add_subregion(piolist->address_space,
+                                start + off_low, &mrpio->mr);
+    piolist->regions[piolist->nr] = &mrpio->mr;
+    ++piolist->nr;
+}
+
+void portio_list_add(PortioList *piolist,
+                     MemoryRegion *address_space,
+                     uint32_t start)
+{
+    const MemoryRegionPortio *pio, *pio_start = piolist->ports;
+    unsigned int off_low, off_high, off_last, count;
+
+    piolist->address_space = address_space;
+
+    /* Handle the first entry specially.  */
+    off_last = off_low = pio_start->offset;
+    off_high = off_low + pio_start->len + pio_start->size - 1;
+    count = 1;
+
+    for (pio = pio_start + 1; pio->size != 0; pio++, count++) {
+        /* All entries must be sorted by offset.  */
+        assert(pio->offset >= off_last);
+        off_last = pio->offset;
+
+        /* If we see a hole, break the region.  */
+        if (off_last > off_high) {
+            portio_list_add_1(piolist, pio_start, count, start, off_low,
+                              off_high);
+            /* ... and start collecting anew.  */
+            pio_start = pio;
+            off_low = off_last;
+            off_high = off_low + pio->len + pio_start->size - 1;
+            count = 0;
+        } else if (off_last + pio->len > off_high) {
+            off_high = off_last + pio->len + pio_start->size - 1;
+        }
+    }
+
+    /* There will always be an open sub-list.  */
+    portio_list_add_1(piolist, pio_start, count, start, off_low, off_high);
+}
+
+void portio_list_del(PortioList *piolist)
+{
+    MemoryRegionPortioList *mrpio;
+    unsigned i;
+
+    for (i = 0; i < piolist->nr; ++i) {
+        mrpio = container_of(piolist->regions[i], MemoryRegionPortioList, mr);
+        memory_region_del_subregion(piolist->address_space, &mrpio->mr);
+    }
+}
+
+static void memory_region_portio_list_finalize(Object *obj)
+{
+    MemoryRegionPortioList *mrpio = MEMORY_REGION_PORTIO_LIST(obj);
+
+    object_unref(&mrpio->mr);
+    g_free(mrpio->ports);
+}
+
+static const TypeInfo memory_region_portio_list_info = {
+    .parent             = TYPE_OBJECT,
+    .name               = TYPE_MEMORY_REGION_PORTIO_LIST,
+    .instance_size      = sizeof(MemoryRegionPortioList),
+    .instance_finalize  = memory_region_portio_list_finalize,
+};
+
+static void ioport_register_types(void)
+{
+    type_register_static(&memory_region_portio_list_info);
+}
+
+type_init(ioport_register_types)
--- a/system/main.c
+++ b/system/main.c
@ -0,0 +1,49 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2020 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-main.h"
+#include "sysemu/sysemu.h"
+
+#ifdef CONFIG_SDL
+#include <SDL.h>
+#endif
+
+int qemu_default_main(void)
+{
+    int status;
+
+    status = qemu_main_loop();
+    qemu_cleanup();
+
+    return status;
+}
+
+int (*qemu_main)(void) = qemu_default_main;
+
+int main(int argc, char **argv)
+{
+    qemu_init(argc, argv);
+    return qemu_main();
+}
--- a/system/memory.c
+++ b/system/memory.c
--- a/system/memory_mapping.c
+++ b/system/memory_mapping.c
@ -0,0 +1,377 @@
+/*
+ * QEMU memory mapping
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ *     Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+
+#include "sysemu/memory_mapping.h"
+#include "exec/memory.h"
+#include "exec/address-spaces.h"
+#include "hw/core/cpu.h"
+
+//#define DEBUG_GUEST_PHYS_REGION_ADD
+
+static void memory_mapping_list_add_mapping_sorted(MemoryMappingList *list,
+                                                   MemoryMapping *mapping)
+{
+    MemoryMapping *p;
+
+    QTAILQ_FOREACH(p, &list->head, next) {
+        if (p->phys_addr >= mapping->phys_addr) {
+            QTAILQ_INSERT_BEFORE(p, mapping, next);
+            return;
+        }
+    }
+    QTAILQ_INSERT_TAIL(&list->head, mapping, next);
+}
+
+static void create_new_memory_mapping(MemoryMappingList *list,
+                                      hwaddr phys_addr,
+                                      hwaddr virt_addr,
+                                      ram_addr_t length)
+{
+    MemoryMapping *memory_mapping;
+
+    memory_mapping = g_new(MemoryMapping, 1);
+    memory_mapping->phys_addr = phys_addr;
+    memory_mapping->virt_addr = virt_addr;
+    memory_mapping->length = length;
+    list->last_mapping = memory_mapping;
+    list->num++;
+    memory_mapping_list_add_mapping_sorted(list, memory_mapping);
+}
+
+static inline bool mapping_contiguous(MemoryMapping *map,
+                                      hwaddr phys_addr,
+                                      hwaddr virt_addr)
+{
+    return phys_addr == map->phys_addr + map->length &&
+           virt_addr == map->virt_addr + map->length;
+}
+
+/*
+ * [map->phys_addr, map->phys_addr + map->length) and
+ * [phys_addr, phys_addr + length) have intersection?
+ */
+static inline bool mapping_have_same_region(MemoryMapping *map,
+                                            hwaddr phys_addr,
+                                            ram_addr_t length)
+{
+    return !(phys_addr + length < map->phys_addr ||
+             phys_addr >= map->phys_addr + map->length);
+}
+
+/*
+ * [map->phys_addr, map->phys_addr + map->length) and
+ * [phys_addr, phys_addr + length) have intersection. The virtual address in the
+ * intersection are the same?
+ */
+static inline bool mapping_conflict(MemoryMapping *map,
+                                    hwaddr phys_addr,
+                                    hwaddr virt_addr)
+{
+    return virt_addr - map->virt_addr != phys_addr - map->phys_addr;
+}
+
+/*
+ * [map->virt_addr, map->virt_addr + map->length) and
+ * [virt_addr, virt_addr + length) have intersection. And the physical address
+ * in the intersection are the same.
+ */
+static inline void mapping_merge(MemoryMapping *map,
+                                 hwaddr virt_addr,
+                                 ram_addr_t length)
+{
+    if (virt_addr < map->virt_addr) {
+        map->length += map->virt_addr - virt_addr;
+        map->virt_addr = virt_addr;
+    }
+
+    if ((virt_addr + length) >
+        (map->virt_addr + map->length)) {
+        map->length = virt_addr + length - map->virt_addr;
+    }
+}
+
+void memory_mapping_list_add_merge_sorted(MemoryMappingList *list,
+                                          hwaddr phys_addr,
+                                          hwaddr virt_addr,
+                                          ram_addr_t length)
+{
+    MemoryMapping *memory_mapping, *last_mapping;
+
+    if (QTAILQ_EMPTY(&list->head)) {
+        create_new_memory_mapping(list, phys_addr, virt_addr, length);
+        return;
+    }
+
+    last_mapping = list->last_mapping;
+    if (last_mapping) {
+        if (mapping_contiguous(last_mapping, phys_addr, virt_addr)) {
+            last_mapping->length += length;
+            return;
+        }
+    }
+
+    QTAILQ_FOREACH(memory_mapping, &list->head, next) {
+        if (mapping_contiguous(memory_mapping, phys_addr, virt_addr)) {
+            memory_mapping->length += length;
+            list->last_mapping = memory_mapping;
+            return;
+        }
+
+        if (phys_addr + length < memory_mapping->phys_addr) {
+            /* create a new region before memory_mapping */
+            break;
+        }
+
+        if (mapping_have_same_region(memory_mapping, phys_addr, length)) {
+            if (mapping_conflict(memory_mapping, phys_addr, virt_addr)) {
+                continue;
+            }
+
+            /* merge this region into memory_mapping */
+            mapping_merge(memory_mapping, virt_addr, length);
+            list->last_mapping = memory_mapping;
+            return;
+        }
+    }
+
+    /* this region can not be merged into any existed memory mapping. */
+    create_new_memory_mapping(list, phys_addr, virt_addr, length);
+}
+
+void memory_mapping_list_free(MemoryMappingList *list)
+{
+    MemoryMapping *p, *q;
+
+    QTAILQ_FOREACH_SAFE(p, &list->head, next, q) {
+        QTAILQ_REMOVE(&list->head, p, next);
+        g_free(p);
+    }
+
+    list->num = 0;
+    list->last_mapping = NULL;
+}
+
+void memory_mapping_list_init(MemoryMappingList *list)
+{
+    list->num = 0;
+    list->last_mapping = NULL;
+    QTAILQ_INIT(&list->head);
+}
+
+void guest_phys_blocks_free(GuestPhysBlockList *list)
+{
+    GuestPhysBlock *p, *q;
+
+    QTAILQ_FOREACH_SAFE(p, &list->head, next, q) {
+        QTAILQ_REMOVE(&list->head, p, next);
+        memory_region_unref(p->mr);
+        g_free(p);
+    }
+    list->num = 0;
+}
+
+void guest_phys_blocks_init(GuestPhysBlockList *list)
+{
+    list->num = 0;
+    QTAILQ_INIT(&list->head);
+}
+
+typedef struct GuestPhysListener {
+    GuestPhysBlockList *list;
+    MemoryListener listener;
+} GuestPhysListener;
+
+static void guest_phys_block_add_section(GuestPhysListener *g,
+                                         MemoryRegionSection *section)
+{
+    const hwaddr target_start = section->offset_within_address_space;
+    const hwaddr target_end = target_start + int128_get64(section->size);
+    uint8_t *host_addr = memory_region_get_ram_ptr(section->mr) +
+                         section->offset_within_region;
+    GuestPhysBlock *predecessor = NULL;
+
+    /* find continuity in guest physical address space */
+    if (!QTAILQ_EMPTY(&g->list->head)) {
+        hwaddr predecessor_size;
+
+        predecessor = QTAILQ_LAST(&g->list->head);
+        predecessor_size = predecessor->target_end - predecessor->target_start;
+
+        /* the memory API guarantees monotonically increasing traversal */
+        g_assert(predecessor->target_end <= target_start);
+
+        /* we want continuity in both guest-physical and host-virtual memory */
+        if (predecessor->target_end < target_start ||
+            predecessor->host_addr + predecessor_size != host_addr ||
+            predecessor->mr != section->mr) {
+            predecessor = NULL;
+        }
+    }
+
+    if (predecessor == NULL) {
+        /* isolated mapping, allocate it and add it to the list */
+        GuestPhysBlock *block = g_malloc0(sizeof *block);
+
+        block->target_start = target_start;
+        block->target_end   = target_end;
+        block->host_addr    = host_addr;
+        block->mr           = section->mr;
+        memory_region_ref(section->mr);
+
+        QTAILQ_INSERT_TAIL(&g->list->head, block, next);
+        ++g->list->num;
+    } else {
+        /* expand predecessor until @target_end; predecessor's start doesn't
+         * change
+         */
+        predecessor->target_end = target_end;
+    }
+
+#ifdef DEBUG_GUEST_PHYS_REGION_ADD
+    fprintf(stderr, "%s: target_start=" HWADDR_FMT_plx " target_end="
+            HWADDR_FMT_plx ": %s (count: %u)\n", __func__, target_start,
+            target_end, predecessor ? "joined" : "added", g->list->num);
+#endif
+}
+
+static int guest_phys_ram_populate_cb(MemoryRegionSection *section,
+                                      void *opaque)
+{
+    GuestPhysListener *g = opaque;
+
+    guest_phys_block_add_section(g, section);
+    return 0;
+}
+
+static void guest_phys_blocks_region_add(MemoryListener *listener,
+                                         MemoryRegionSection *section)
+{
+    GuestPhysListener *g = container_of(listener, GuestPhysListener, listener);
+
+    /* we only care about RAM */
+    if (!memory_region_is_ram(section->mr) ||
+        memory_region_is_ram_device(section->mr) ||
+        memory_region_is_nonvolatile(section->mr)) {
+        return;
+    }
+
+    /* for special sparse regions, only add populated parts */
+    if (memory_region_has_ram_discard_manager(section->mr)) {
+        RamDiscardManager *rdm;
+
+        rdm = memory_region_get_ram_discard_manager(section->mr);
+        ram_discard_manager_replay_populated(rdm, section,
+                                             guest_phys_ram_populate_cb, g);
+        return;
+    }
+
+    guest_phys_block_add_section(g, section);
+}
+
+void guest_phys_blocks_append(GuestPhysBlockList *list)
+{
+    GuestPhysListener g = { 0 };
+
+    g.list = list;
+    g.listener.region_add = &guest_phys_blocks_region_add;
+    memory_listener_register(&g.listener, &address_space_memory);
+    memory_listener_unregister(&g.listener);
+}
+
+static CPUState *find_paging_enabled_cpu(CPUState *start_cpu)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        if (cpu_paging_enabled(cpu)) {
+            return cpu;
+        }
+    }
+
+    return NULL;
+}
+
+void qemu_get_guest_memory_mapping(MemoryMappingList *list,
+                                   const GuestPhysBlockList *guest_phys_blocks,
+                                   Error **errp)
+{
+    CPUState *cpu, *first_paging_enabled_cpu;
+    GuestPhysBlock *block;
+    ram_addr_t offset, length;
+
+    first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu);
+    if (first_paging_enabled_cpu) {
+        for (cpu = first_paging_enabled_cpu; cpu != NULL;
+             cpu = CPU_NEXT(cpu)) {
+            Error *err = NULL;
+            cpu_get_memory_mapping(cpu, list, &err);
+            if (err) {
+                error_propagate(errp, err);
+                return;
+            }
+        }
+        return;
+    }
+
+    /*
+     * If the guest doesn't use paging, the virtual address is equal to physical
+     * address.
+     */
+    QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) {
+        offset = block->target_start;
+        length = block->target_end - block->target_start;
+        create_new_memory_mapping(list, offset, offset, length);
+    }
+}
+
+void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list,
+                                   const GuestPhysBlockList *guest_phys_blocks)
+{
+    GuestPhysBlock *block;
+
+    QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) {
+        create_new_memory_mapping(list, block->target_start, 0,
+                                  block->target_end - block->target_start);
+    }
+}
+
+void memory_mapping_filter(MemoryMappingList *list, int64_t begin,
+                           int64_t length)
+{
+    MemoryMapping *cur, *next;
+
+    QTAILQ_FOREACH_SAFE(cur, &list->head, next, next) {
+        if (cur->phys_addr >= begin + length ||
+            cur->phys_addr + cur->length <= begin) {
+            QTAILQ_REMOVE(&list->head, cur, next);
+            g_free(cur);
+            list->num--;
+            continue;
+        }
+
+        if (cur->phys_addr < begin) {
+            cur->length -= begin - cur->phys_addr;
+            if (cur->virt_addr) {
+                cur->virt_addr += begin - cur->phys_addr;
+            }
+            cur->phys_addr = begin;
+        }
+
+        if (cur->phys_addr + cur->length > begin + length) {
+            cur->length -= cur->phys_addr + cur->length - begin - length;
+        }
+    }
+}
--- a/system/meson.build
+++ b/system/meson.build
@ -0,0 +1,36 @@
+specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: [files(
+  'arch_init.c',
+  'ioport.c',
+  'memory.c',
+  'physmem.c',
+  'watchpoint.c',
+)])
+
+system_ss.add(files(
+  'balloon.c',
+  'bootdevice.c',
+  'cpus.c',
+  'cpu-throttle.c',
+  'cpu-timers.c',
+  'datadir.c',
+  'dirtylimit.c',
+  'dma-helpers.c',
+  'globals.c',
+  'memory_mapping.c',
+  'qdev-monitor.c',
+  'qtest.c',
+  'rtc.c',
+  'runstate-action.c',
+  'runstate-hmp-cmds.c',
+  'runstate.c',
+  'tpm-hmp-cmds.c',
+  'vl.c',
+), sdl, libpmem, libdaxctl)
+
+if have_tpm
+  system_ss.add(files('tpm.c'))
+endif
+
+system_ss.add(when: seccomp, if_true: files('qemu-seccomp.c'))
+system_ss.add(when: fdt, if_true: files('device_tree.c'))
+system_ss.add(when: 'CONFIG_LINUX', if_true: files('async-teardown.c'))
--- a/system/physmem.c
+++ b/system/physmem.c
--- a/system/qdev-monitor.c
+++ b/system/qdev-monitor.c
--- a/system/qemu-seccomp.c
+++ b/system/qemu-seccomp.c
@ -0,0 +1,486 @@
+/*
+ * QEMU seccomp mode 2 support with libseccomp
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Eduardo Otubo    <eotubo@br.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/config-file.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
+#include <sys/prctl.h>
+#include <seccomp.h>
+#include "sysemu/seccomp.h"
+#include <linux/seccomp.h>
+
+/* For some architectures (notably ARM) cacheflush is not supported until
+ * libseccomp 2.2.3, but configure enforces that we are using a more recent
+ * version on those hosts, so it is OK for this check to be less strict.
+ */
+#if SCMP_VER_MAJOR >= 3
+  #define HAVE_CACHEFLUSH
+#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 2
+  #define HAVE_CACHEFLUSH
+#endif
+
+struct QemuSeccompSyscall {
+    int32_t num;
+    uint8_t set;
+    uint8_t narg;
+    const struct scmp_arg_cmp *arg_cmp;
+    uint32_t action;
+};
+
+const struct scmp_arg_cmp sched_setscheduler_arg[] = {
+    /* was SCMP_A1(SCMP_CMP_NE, SCHED_IDLE), but expanded due to GCC 4.x bug */
+    { .arg = 1, .op = SCMP_CMP_NE, .datum_a = SCHED_IDLE }
+};
+
+/*
+ * See 'NOTES' in 'man 2 clone' - s390 & cross have 'flags' in
+ *  different position to other architectures
+ */
+#if defined(HOST_S390X) || defined(HOST_S390) || defined(HOST_CRIS)
+#define CLONE_FLAGS_ARG 1
+#else
+#define CLONE_FLAGS_ARG 0
+#endif
+
+#ifndef CLONE_PIDFD
+# define CLONE_PIDFD 0x00001000
+#endif
+
+#define REQUIRE_CLONE_FLAG(flag) \
+    const struct scmp_arg_cmp clone_arg ## flag[] = { \
+    { .arg = CLONE_FLAGS_ARG, \
+      .op = SCMP_CMP_MASKED_EQ, \
+      .datum_a = flag, .datum_b = 0 } }
+
+#define FORBID_CLONE_FLAG(flag) \
+    const struct scmp_arg_cmp clone_arg ## flag[] = { \
+    { .arg = CLONE_FLAGS_ARG, \
+      .op = SCMP_CMP_MASKED_EQ, \
+      .datum_a = flag, .datum_b = flag } }
+
+#define RULE_CLONE_FLAG(flag) \
+    { SCMP_SYS(clone),                  QEMU_SECCOMP_SET_SPAWN, \
+      ARRAY_SIZE(clone_arg ## flag), clone_arg ## flag, SCMP_ACT_TRAP }
+
+/* If no CLONE_* flags are set, except CSIGNAL, deny */
+const struct scmp_arg_cmp clone_arg_none[] = {
+    { .arg = CLONE_FLAGS_ARG,
+      .op = SCMP_CMP_MASKED_EQ,
+      .datum_a = ~(CSIGNAL), .datum_b = 0 }
+};
+
+/*
+ * pthread_create should always set all of these.
+ */
+REQUIRE_CLONE_FLAG(CLONE_VM);
+REQUIRE_CLONE_FLAG(CLONE_FS);
+REQUIRE_CLONE_FLAG(CLONE_FILES);
+REQUIRE_CLONE_FLAG(CLONE_SIGHAND);
+REQUIRE_CLONE_FLAG(CLONE_THREAD);
+REQUIRE_CLONE_FLAG(CLONE_SYSVSEM);
+REQUIRE_CLONE_FLAG(CLONE_SETTLS);
+REQUIRE_CLONE_FLAG(CLONE_PARENT_SETTID);
+REQUIRE_CLONE_FLAG(CLONE_CHILD_CLEARTID);
+/*
+ * Musl sets this in pthread_create too, but it is
+ * obsolete and harmless since its behaviour is
+ * subsumed under CLONE_THREAD
+ */
+/*REQUIRE_CLONE_FLAG(CLONE_DETACHED);*/
+
+
+/*
+ * These all indicate an attempt to spawn a process
+ * instead of a thread, or other undesirable scenarios
+ */
+FORBID_CLONE_FLAG(CLONE_PIDFD);
+FORBID_CLONE_FLAG(CLONE_PTRACE);
+FORBID_CLONE_FLAG(CLONE_VFORK);
+FORBID_CLONE_FLAG(CLONE_PARENT);
+FORBID_CLONE_FLAG(CLONE_NEWNS);
+FORBID_CLONE_FLAG(CLONE_UNTRACED);
+FORBID_CLONE_FLAG(CLONE_NEWCGROUP);
+FORBID_CLONE_FLAG(CLONE_NEWUTS);
+FORBID_CLONE_FLAG(CLONE_NEWIPC);
+FORBID_CLONE_FLAG(CLONE_NEWUSER);
+FORBID_CLONE_FLAG(CLONE_NEWPID);
+FORBID_CLONE_FLAG(CLONE_NEWNET);
+FORBID_CLONE_FLAG(CLONE_IO);
+
+
+static const struct QemuSeccompSyscall denylist[] = {
+    /* default set of syscalls that should get blocked */
+    { SCMP_SYS(reboot),                 QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(swapon),                 QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(swapoff),                QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(syslog),                 QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(mount),                  QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(umount),                 QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(kexec_load),             QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(afs_syscall),            QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(break),                  QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(ftime),                  QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(getpmsg),                QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(gtty),                   QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(lock),                   QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(mpx),                    QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(prof),                   QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(profil),                 QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(putpmsg),                QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(security),               QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(stty),                   QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(tuxcall),                QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(ulimit),                 QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(vserver),                QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    /* obsolete */
+    { SCMP_SYS(readdir),                QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(_sysctl),                QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(bdflush),                QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(create_module),          QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(get_kernel_syms),        QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(query_module),           QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(sgetmask),               QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(ssetmask),               QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(sysfs),                  QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(uselib),                 QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(ustat),                  QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    /* privileged */
+    { SCMP_SYS(setuid),                 QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setgid),                 QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setpgid),                QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setsid),                 QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setreuid),               QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setregid),               QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setresuid),              QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setresgid),              QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setfsuid),               QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setfsgid),               QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    /* spawn */
+    { SCMP_SYS(fork),                   QEMU_SECCOMP_SET_SPAWN,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(vfork),                  QEMU_SECCOMP_SET_SPAWN,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(execve),                 QEMU_SECCOMP_SET_SPAWN,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(clone),                  QEMU_SECCOMP_SET_SPAWN,
+      ARRAY_SIZE(clone_arg_none), clone_arg_none, SCMP_ACT_TRAP },
+    RULE_CLONE_FLAG(CLONE_VM),
+    RULE_CLONE_FLAG(CLONE_FS),
+    RULE_CLONE_FLAG(CLONE_FILES),
+    RULE_CLONE_FLAG(CLONE_SIGHAND),
+    RULE_CLONE_FLAG(CLONE_THREAD),
+    RULE_CLONE_FLAG(CLONE_SYSVSEM),
+    RULE_CLONE_FLAG(CLONE_SETTLS),
+    RULE_CLONE_FLAG(CLONE_PARENT_SETTID),
+    RULE_CLONE_FLAG(CLONE_CHILD_CLEARTID),
+    /*RULE_CLONE_FLAG(CLONE_DETACHED),*/
+    RULE_CLONE_FLAG(CLONE_PIDFD),
+    RULE_CLONE_FLAG(CLONE_PTRACE),
+    RULE_CLONE_FLAG(CLONE_VFORK),
+    RULE_CLONE_FLAG(CLONE_PARENT),
+    RULE_CLONE_FLAG(CLONE_NEWNS),
+    RULE_CLONE_FLAG(CLONE_UNTRACED),
+    RULE_CLONE_FLAG(CLONE_NEWCGROUP),
+    RULE_CLONE_FLAG(CLONE_NEWUTS),
+    RULE_CLONE_FLAG(CLONE_NEWIPC),
+    RULE_CLONE_FLAG(CLONE_NEWUSER),
+    RULE_CLONE_FLAG(CLONE_NEWPID),
+    RULE_CLONE_FLAG(CLONE_NEWNET),
+    RULE_CLONE_FLAG(CLONE_IO),
+#ifdef __SNR_clone3
+    { SCMP_SYS(clone3),                 QEMU_SECCOMP_SET_SPAWN,
+      0, NULL, SCMP_ACT_ERRNO(ENOSYS) },
+#endif
+#ifdef __SNR_execveat
+    { SCMP_SYS(execveat),               QEMU_SECCOMP_SET_SPAWN },
+#endif
+    { SCMP_SYS(setns),                  QEMU_SECCOMP_SET_SPAWN },
+    { SCMP_SYS(unshare),                QEMU_SECCOMP_SET_SPAWN },
+    /* resource control */
+    { SCMP_SYS(setpriority),            QEMU_SECCOMP_SET_RESOURCECTL,
+      0, NULL, SCMP_ACT_ERRNO(EPERM) },
+    { SCMP_SYS(sched_setparam),         QEMU_SECCOMP_SET_RESOURCECTL,
+      0, NULL, SCMP_ACT_ERRNO(EPERM) },
+    { SCMP_SYS(sched_setscheduler),     QEMU_SECCOMP_SET_RESOURCECTL,
+      ARRAY_SIZE(sched_setscheduler_arg), sched_setscheduler_arg,
+      SCMP_ACT_ERRNO(EPERM) },
+    { SCMP_SYS(sched_setaffinity),      QEMU_SECCOMP_SET_RESOURCECTL,
+      0, NULL, SCMP_ACT_ERRNO(EPERM) },
+};
+
+static inline __attribute__((unused)) int
+qemu_seccomp(unsigned int operation, unsigned int flags, void *args)
+{
+#ifdef __NR_seccomp
+    return syscall(__NR_seccomp, operation, flags, args);
+#else
+    errno = ENOSYS;
+    return -1;
+#endif
+}
+
+static uint32_t qemu_seccomp_update_action(uint32_t action)
+{
+#if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \
+    defined(SECCOMP_RET_KILL_PROCESS)
+    if (action == SCMP_ACT_TRAP) {
+        static int kill_process = -1;
+        if (kill_process == -1) {
+            uint32_t testaction = SECCOMP_RET_KILL_PROCESS;
+
+            if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &testaction) == 0) {
+                kill_process = 1;
+            } else {
+                kill_process = 0;
+            }
+        }
+        if (kill_process == 1) {
+            return SCMP_ACT_KILL_PROCESS;
+        }
+    }
+#endif
+    return action;
+}
+
+
+static int seccomp_start(uint32_t seccomp_opts, Error **errp)
+{
+    int rc = -1;
+    unsigned int i = 0;
+    scmp_filter_ctx ctx;
+
+    ctx = seccomp_init(SCMP_ACT_ALLOW);
+    if (ctx == NULL) {
+        error_setg(errp, "failed to initialize seccomp context");
+        goto seccomp_return;
+    }
+
+#if defined(CONFIG_SECCOMP_SYSRAWRC)
+    /*
+     * This must be the first seccomp_attr_set() call to have full
+     * error propagation from subsequent seccomp APIs.
+     */
+    rc = seccomp_attr_set(ctx, SCMP_FLTATR_API_SYSRAWRC, 1);
+    if (rc != 0) {
+        error_setg_errno(errp, -rc,
+                         "failed to set seccomp rawrc attribute");
+        goto seccomp_return;
+    }
+#endif
+
+    rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1);
+    if (rc != 0) {
+        error_setg_errno(errp, -rc,
+                         "failed to set seccomp thread synchronization");
+        goto seccomp_return;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(denylist); i++) {
+        uint32_t action;
+        if (!(seccomp_opts & denylist[i].set)) {
+            continue;
+        }
+
+        action = qemu_seccomp_update_action(denylist[i].action);
+        rc = seccomp_rule_add_array(ctx, action, denylist[i].num,
+                                    denylist[i].narg, denylist[i].arg_cmp);
+        if (rc < 0) {
+            error_setg_errno(errp, -rc,
+                             "failed to add seccomp denylist rules");
+            goto seccomp_return;
+        }
+    }
+
+    rc = seccomp_load(ctx);
+    if (rc < 0) {
+        error_setg_errno(errp, -rc,
+                         "failed to load seccomp syscall filter in kernel");
+    }
+
+  seccomp_return:
+    seccomp_release(ctx);
+    return rc < 0 ? -1 : 0;
+}
+
+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
+{
+    if (qemu_opt_get_bool(opts, "enable", false)) {
+        uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT
+                | QEMU_SECCOMP_SET_OBSOLETE;
+        const char *value = NULL;
+
+        value = qemu_opt_get(opts, "obsolete");
+        if (value) {
+            if (g_str_equal(value, "allow")) {
+                seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE;
+            } else if (g_str_equal(value, "deny")) {
+                /* this is the default option, this if is here
+                 * to provide a little bit of consistency for
+                 * the command line */
+            } else {
+                error_setg(errp, "invalid argument for obsolete");
+                return -1;
+            }
+        }
+
+        value = qemu_opt_get(opts, "elevateprivileges");
+        if (value) {
+            if (g_str_equal(value, "deny")) {
+                seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+            } else if (g_str_equal(value, "children")) {
+                seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+
+                /* calling prctl directly because we're
+                 * not sure if host has CAP_SYS_ADMIN set*/
+                if (prctl(PR_SET_NO_NEW_PRIVS, 1)) {
+                    error_setg(errp, "failed to set no_new_privs aborting");
+                    return -1;
+                }
+            } else if (g_str_equal(value, "allow")) {
+                /* default value */
+            } else {
+                error_setg(errp, "invalid argument for elevateprivileges");
+                return -1;
+            }
+        }
+
+        value = qemu_opt_get(opts, "spawn");
+        if (value) {
+            if (g_str_equal(value, "deny")) {
+                seccomp_opts |= QEMU_SECCOMP_SET_SPAWN;
+            } else if (g_str_equal(value, "allow")) {
+                /* default value */
+            } else {
+                error_setg(errp, "invalid argument for spawn");
+                return -1;
+            }
+        }
+
+        value = qemu_opt_get(opts, "resourcecontrol");
+        if (value) {
+            if (g_str_equal(value, "deny")) {
+                seccomp_opts |= QEMU_SECCOMP_SET_RESOURCECTL;
+            } else if (g_str_equal(value, "allow")) {
+                /* default value */
+            } else {
+                error_setg(errp, "invalid argument for resourcecontrol");
+                return -1;
+            }
+        }
+
+        if (seccomp_start(seccomp_opts, errp) < 0) {
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static QemuOptsList qemu_sandbox_opts = {
+    .name = "sandbox",
+    .implied_opt_name = "enable",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_sandbox_opts.head),
+    .desc = {
+        {
+            .name = "enable",
+            .type = QEMU_OPT_BOOL,
+        },
+        {
+            .name = "obsolete",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "elevateprivileges",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "spawn",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "resourcecontrol",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
+static void seccomp_register(void)
+{
+    bool add = false;
+
+    /* FIXME: use seccomp_api_get() >= 2 check when released */
+
+#if defined(SECCOMP_FILTER_FLAG_TSYNC)
+    int check;
+
+    /* check host TSYNC capability, it returns errno == ENOSYS if unavailable */
+    check = qemu_seccomp(SECCOMP_SET_MODE_FILTER,
+                         SECCOMP_FILTER_FLAG_TSYNC, NULL);
+    if (check < 0 && errno == EFAULT) {
+        add = true;
+    }
+#endif
+
+    if (add) {
+        qemu_add_opts(&qemu_sandbox_opts);
+    }
+}
+opts_init(seccomp_register);
--- a/system/qtest.c
+++ b/system/qtest.c
--- a/system/rtc.c
+++ b/system/rtc.c
@ -0,0 +1,192 @@
+/*
+ * RTC configuration and clock read
+ *
+ * Copyright (c) 2003-2020 QEMU contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+#include "sysemu/replay.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/rtc.h"
+#include "hw/rtc/mc146818rtc.h"
+
+static enum {
+    RTC_BASE_UTC,
+    RTC_BASE_LOCALTIME,
+    RTC_BASE_DATETIME,
+} rtc_base_type = RTC_BASE_UTC;
+static time_t rtc_ref_start_datetime;
+static int rtc_realtime_clock_offset; /* used only with QEMU_CLOCK_REALTIME */
+static int rtc_host_datetime_offset = -1; /* valid & used only with
+                                             RTC_BASE_DATETIME */
+QEMUClockType rtc_clock;
+/***********************************************************/
+/* RTC reference time/date access */
+static time_t qemu_ref_timedate(QEMUClockType clock)
+{
+    time_t value = qemu_clock_get_ms(clock) / 1000;
+    switch (clock) {
+    case QEMU_CLOCK_REALTIME:
+        value -= rtc_realtime_clock_offset;
+        /* fall through */
+    case QEMU_CLOCK_VIRTUAL:
+        value += rtc_ref_start_datetime;
+        break;
+    case QEMU_CLOCK_HOST:
+        if (rtc_base_type == RTC_BASE_DATETIME) {
+            value -= rtc_host_datetime_offset;
+        }
+        break;
+    default:
+        assert(0);
+    }
+    return value;
+}
+
+void qemu_get_timedate(struct tm *tm, time_t offset)
+{
+    time_t ti = qemu_ref_timedate(rtc_clock);
+
+    ti += offset;
+
+    switch (rtc_base_type) {
+    case RTC_BASE_DATETIME:
+    case RTC_BASE_UTC:
+        gmtime_r(&ti, tm);
+        break;
+    case RTC_BASE_LOCALTIME:
+        localtime_r(&ti, tm);
+        break;
+    }
+}
+
+time_t qemu_timedate_diff(struct tm *tm)
+{
+    time_t seconds;
+
+    switch (rtc_base_type) {
+    case RTC_BASE_DATETIME:
+    case RTC_BASE_UTC:
+        seconds = mktimegm(tm);
+        break;
+    case RTC_BASE_LOCALTIME:
+    {
+        struct tm tmp = *tm;
+        tmp.tm_isdst = -1; /* use timezone to figure it out */
+        seconds = mktime(&tmp);
+        break;
+    }
+    default:
+        abort();
+    }
+
+    return seconds - qemu_ref_timedate(QEMU_CLOCK_HOST);
+}
+
+static void configure_rtc_base_datetime(const char *startdate)
+{
+    time_t rtc_start_datetime;
+    struct tm tm;
+
+    if (sscanf(startdate, "%d-%d-%dT%d:%d:%d", &tm.tm_year, &tm.tm_mon,
+               &tm.tm_mday, &tm.tm_hour, &tm.tm_min, &tm.tm_sec) == 6) {
+        /* OK */
+    } else if (sscanf(startdate, "%d-%d-%d",
+                      &tm.tm_year, &tm.tm_mon, &tm.tm_mday) == 3) {
+        tm.tm_hour = 0;
+        tm.tm_min = 0;
+        tm.tm_sec = 0;
+    } else {
+        goto date_fail;
+    }
+    tm.tm_year -= 1900;
+    tm.tm_mon--;
+    rtc_start_datetime = mktimegm(&tm);
+    if (rtc_start_datetime == -1) {
+    date_fail:
+        error_report("invalid datetime format");
+        error_printf("valid formats: "
+                     "'2006-06-17T16:01:21' or '2006-06-17'\n");
+        exit(1);
+    }
+    rtc_host_datetime_offset = rtc_ref_start_datetime - rtc_start_datetime;
+    rtc_ref_start_datetime = rtc_start_datetime;
+}
+
+void configure_rtc(QemuOpts *opts)
+{
+    const char *value;
+
+    /* Set defaults */
+    rtc_clock = QEMU_CLOCK_HOST;
+    rtc_ref_start_datetime = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
+    rtc_realtime_clock_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
+
+    value = qemu_opt_get(opts, "base");
+    if (value) {
+        if (!strcmp(value, "utc")) {
+            rtc_base_type = RTC_BASE_UTC;
+        } else if (!strcmp(value, "localtime")) {
+            rtc_base_type = RTC_BASE_LOCALTIME;
+            replay_add_blocker("-rtc base=localtime");
+        } else {
+            rtc_base_type = RTC_BASE_DATETIME;
+            configure_rtc_base_datetime(value);
+        }
+    }
+    value = qemu_opt_get(opts, "clock");
+    if (value) {
+        if (!strcmp(value, "host")) {
+            rtc_clock = QEMU_CLOCK_HOST;
+        } else if (!strcmp(value, "rt")) {
+            rtc_clock = QEMU_CLOCK_REALTIME;
+        } else if (!strcmp(value, "vm")) {
+            rtc_clock = QEMU_CLOCK_VIRTUAL;
+        } else {
+            error_report("invalid option value '%s'", value);
+            exit(1);
+        }
+    }
+    value = qemu_opt_get(opts, "driftfix");
+    if (value) {
+        if (!strcmp(value, "slew")) {
+            object_register_sugar_prop(TYPE_MC146818_RTC,
+                                       "lost_tick_policy",
+                                       "slew",
+                                       false);
+            if (!object_class_by_name(TYPE_MC146818_RTC)) {
+                warn_report("driftfix 'slew' is not available with this machine");
+            }
+        } else if (!strcmp(value, "none")) {
+            /* discard is default */
+        } else {
+            error_report("invalid option value '%s'", value);
+            exit(1);
+        }
+    }
+}
--- a/system/runstate-action.c
+++ b/system/runstate-action.c
@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/runstate-action.h"
+#include "sysemu/watchdog.h"
+#include "qemu/config-file.h"
+#include "qapi/error.h"
+#include "qemu/option_int.h"
+
+RebootAction reboot_action = REBOOT_ACTION_RESET;
+ShutdownAction shutdown_action = SHUTDOWN_ACTION_POWEROFF;
+PanicAction panic_action = PANIC_ACTION_SHUTDOWN;
+
+/*
+ * Receives actions to be applied for specific guest events
+ * and sets the internal state as requested.
+ */
+void qmp_set_action(bool has_reboot, RebootAction reboot,
+                    bool has_shutdown, ShutdownAction shutdown,
+                    bool has_panic, PanicAction panic,
+                    bool has_watchdog, WatchdogAction watchdog,
+                    Error **errp)
+{
+    if (has_reboot) {
+        reboot_action = reboot;
+    }
+
+    if (has_panic) {
+        panic_action = panic;
+    }
+
+    if (has_watchdog) {
+        qmp_watchdog_set_action(watchdog, errp);
+    }
+
+    /* Process shutdown last, in case the panic action needs to be altered */
+    if (has_shutdown) {
+        shutdown_action = shutdown;
+    }
+}
--- a/system/runstate-hmp-cmds.c
+++ b/system/runstate-hmp-cmds.c
@ -0,0 +1,95 @@
+/*
+ * HMP commands related to run state
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/cpu-common.h"
+#include "monitor/hmp.h"
+#include "monitor/monitor.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-run-state.h"
+#include "qapi/qmp/qdict.h"
+#include "qemu/accel.h"
+
+void hmp_info_status(Monitor *mon, const QDict *qdict)
+{
+    StatusInfo *info;
+
+    info = qmp_query_status(NULL);
+
+    monitor_printf(mon, "VM status: %s",
+                   info->running ? "running" : "paused");
+
+    if (!info->running && info->status != RUN_STATE_PAUSED) {
+        monitor_printf(mon, " (%s)", RunState_str(info->status));
+    }
+
+    monitor_printf(mon, "\n");
+
+    qapi_free_StatusInfo(info);
+}
+
+void hmp_one_insn_per_tb(Monitor *mon, const QDict *qdict)
+{
+    const char *option = qdict_get_try_str(qdict, "option");
+    AccelState *accel = current_accel();
+    bool newval;
+
+    if (!object_property_find(OBJECT(accel), "one-insn-per-tb")) {
+        monitor_printf(mon,
+                       "This accelerator does not support setting one-insn-per-tb\n");
+        return;
+    }
+
+    if (!option || !strcmp(option, "on")) {
+        newval = true;
+    } else if (!strcmp(option, "off")) {
+        newval = false;
+    } else {
+        monitor_printf(mon, "unexpected option %s\n", option);
+        return;
+    }
+    /* If the property exists then setting it can never fail */
+    object_property_set_bool(OBJECT(accel), "one-insn-per-tb",
+                             newval, &error_abort);
+}
+
+void hmp_watchdog_action(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    WatchdogAction action;
+    char *qapi_value;
+
+    qapi_value = g_ascii_strdown(qdict_get_str(qdict, "action"), -1);
+    action = qapi_enum_parse(&WatchdogAction_lookup, qapi_value, -1, &err);
+    g_free(qapi_value);
+    if (err) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+    qmp_watchdog_set_action(action, &error_abort);
+}
+
+void watchdog_action_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    int i;
+
+    if (nb_args != 2) {
+        return;
+    }
+    readline_set_completion_index(rs, strlen(str));
+    for (i = 0; i < WATCHDOG_ACTION__MAX; i++) {
+        readline_add_completion_of(rs, str, WatchdogAction_str(i));
+    }
+}
--- a/system/runstate.c
+++ b/system/runstate.c
@ -0,0 +1,871 @@
+/*
+ * QEMU main system emulation loop
+ *
+ * Copyright (c) 2003-2020 QEMU contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "audio/audio.h"
+#include "block/block.h"
+#include "block/export.h"
+#include "chardev/char.h"
+#include "crypto/cipher.h"
+#include "crypto/init.h"
+#include "exec/cpu-common.h"
+#include "gdbstub/syscalls.h"
+#include "hw/boards.h"
+#include "migration/misc.h"
+#include "migration/postcopy-ram.h"
+#include "monitor/monitor.h"
+#include "net/net.h"
+#include "net/vhost_net.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-run-state.h"
+#include "qapi/qapi-events-run-state.h"
+#include "qemu/accel.h"
+#include "qemu/error-report.h"
+#include "qemu/job.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/plugin.h"
+#include "qemu/sockets.h"
+#include "qemu/timer.h"
+#include "qemu/thread.h"
+#include "qom/object.h"
+#include "qom/object_interfaces.h"
+#include "sysemu/cpus.h"
+#include "sysemu/qtest.h"
+#include "sysemu/replay.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "sysemu/runstate-action.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/tpm.h"
+#include "trace.h"
+
+static NotifierList exit_notifiers =
+    NOTIFIER_LIST_INITIALIZER(exit_notifiers);
+
+static RunState current_run_state = RUN_STATE_PRELAUNCH;
+
+/* We use RUN_STATE__MAX but any invalid value will do */
+static RunState vmstop_requested = RUN_STATE__MAX;
+static QemuMutex vmstop_lock;
+
+typedef struct {
+    RunState from;
+    RunState to;
+} RunStateTransition;
+
+static const RunStateTransition runstate_transitions_def[] = {
+    { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
+
+    { RUN_STATE_DEBUG, RUN_STATE_RUNNING },
+    { RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_DEBUG, RUN_STATE_PRELAUNCH },
+
+    { RUN_STATE_INMIGRATE, RUN_STATE_INTERNAL_ERROR },
+    { RUN_STATE_INMIGRATE, RUN_STATE_IO_ERROR },
+    { RUN_STATE_INMIGRATE, RUN_STATE_PAUSED },
+    { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
+    { RUN_STATE_INMIGRATE, RUN_STATE_SHUTDOWN },
+    { RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED },
+    { RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG },
+    { RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED },
+    { RUN_STATE_INMIGRATE, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_INMIGRATE, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_INMIGRATE, RUN_STATE_POSTMIGRATE },
+    { RUN_STATE_INMIGRATE, RUN_STATE_COLO },
+
+    { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
+    { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PRELAUNCH },
+
+    { RUN_STATE_IO_ERROR, RUN_STATE_RUNNING },
+    { RUN_STATE_IO_ERROR, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_IO_ERROR, RUN_STATE_PRELAUNCH },
+
+    { RUN_STATE_PAUSED, RUN_STATE_RUNNING },
+    { RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_PAUSED, RUN_STATE_POSTMIGRATE },
+    { RUN_STATE_PAUSED, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_PAUSED, RUN_STATE_COLO},
+
+    { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING },
+    { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_POSTMIGRATE, RUN_STATE_PRELAUNCH },
+
+    { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING },
+    { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
+
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_INTERNAL_ERROR },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_IO_ERROR },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_SHUTDOWN },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_SUSPENDED },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_WATCHDOG },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_GUEST_PANICKED },
+
+    { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
+    { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
+
+    { RUN_STATE_COLO, RUN_STATE_RUNNING },
+    { RUN_STATE_COLO, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_COLO, RUN_STATE_SHUTDOWN},
+
+    { RUN_STATE_RUNNING, RUN_STATE_DEBUG },
+    { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
+    { RUN_STATE_RUNNING, RUN_STATE_IO_ERROR },
+    { RUN_STATE_RUNNING, RUN_STATE_PAUSED },
+    { RUN_STATE_RUNNING, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_RUNNING, RUN_STATE_RESTORE_VM },
+    { RUN_STATE_RUNNING, RUN_STATE_SAVE_VM },
+    { RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN },
+    { RUN_STATE_RUNNING, RUN_STATE_WATCHDOG },
+    { RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED },
+    { RUN_STATE_RUNNING, RUN_STATE_COLO},
+
+    { RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
+
+    { RUN_STATE_SHUTDOWN, RUN_STATE_PAUSED },
+    { RUN_STATE_SHUTDOWN, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_SHUTDOWN, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_SHUTDOWN, RUN_STATE_COLO },
+
+    { RUN_STATE_DEBUG, RUN_STATE_SUSPENDED },
+    { RUN_STATE_RUNNING, RUN_STATE_SUSPENDED },
+    { RUN_STATE_SUSPENDED, RUN_STATE_RUNNING },
+    { RUN_STATE_SUSPENDED, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_SUSPENDED, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_SUSPENDED, RUN_STATE_COLO},
+
+    { RUN_STATE_WATCHDOG, RUN_STATE_RUNNING },
+    { RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_WATCHDOG, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_WATCHDOG, RUN_STATE_COLO},
+
+    { RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING },
+    { RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_GUEST_PANICKED, RUN_STATE_PRELAUNCH },
+
+    { RUN_STATE__MAX, RUN_STATE__MAX },
+};
+
+static bool runstate_valid_transitions[RUN_STATE__MAX][RUN_STATE__MAX];
+
+bool runstate_check(RunState state)
+{
+    return current_run_state == state;
+}
+
+static void runstate_init(void)
+{
+    const RunStateTransition *p;
+
+    memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions));
+    for (p = &runstate_transitions_def[0]; p->from != RUN_STATE__MAX; p++) {
+        runstate_valid_transitions[p->from][p->to] = true;
+    }
+
+    qemu_mutex_init(&vmstop_lock);
+}
+
+/* This function will abort() on invalid state transitions */
+void runstate_set(RunState new_state)
+{
+    assert(new_state < RUN_STATE__MAX);
+
+    trace_runstate_set(current_run_state, RunState_str(current_run_state),
+                       new_state, RunState_str(new_state));
+
+    if (current_run_state == new_state) {
+        return;
+    }
+
+    if (!runstate_valid_transitions[current_run_state][new_state]) {
+        error_report("invalid runstate transition: '%s' -> '%s'",
+                     RunState_str(current_run_state),
+                     RunState_str(new_state));
+        abort();
+    }
+
+    current_run_state = new_state;
+}
+
+RunState runstate_get(void)
+{
+    return current_run_state;
+}
+
+bool runstate_is_running(void)
+{
+    return runstate_check(RUN_STATE_RUNNING);
+}
+
+bool runstate_needs_reset(void)
+{
+    return runstate_check(RUN_STATE_INTERNAL_ERROR) ||
+        runstate_check(RUN_STATE_SHUTDOWN);
+}
+
+StatusInfo *qmp_query_status(Error **errp)
+{
+    StatusInfo *info = g_malloc0(sizeof(*info));
+    AccelState *accel = current_accel();
+
+    /*
+     * We ignore errors, which will happen if the accelerator
+     * is not TCG. "singlestep" is meaningless for other accelerators,
+     * so we will set the StatusInfo field to false for those.
+     */
+    info->singlestep = object_property_get_bool(OBJECT(accel),
+                                                "one-insn-per-tb", NULL);
+    info->running = runstate_is_running();
+    info->status = current_run_state;
+
+    return info;
+}
+
+bool qemu_vmstop_requested(RunState *r)
+{
+    qemu_mutex_lock(&vmstop_lock);
+    *r = vmstop_requested;
+    vmstop_requested = RUN_STATE__MAX;
+    qemu_mutex_unlock(&vmstop_lock);
+    return *r < RUN_STATE__MAX;
+}
+
+void qemu_system_vmstop_request_prepare(void)
+{
+    qemu_mutex_lock(&vmstop_lock);
+}
+
+void qemu_system_vmstop_request(RunState state)
+{
+    vmstop_requested = state;
+    qemu_mutex_unlock(&vmstop_lock);
+    qemu_notify_event();
+}
+struct VMChangeStateEntry {
+    VMChangeStateHandler *cb;
+    VMChangeStateHandler *prepare_cb;
+    void *opaque;
+    QTAILQ_ENTRY(VMChangeStateEntry) entries;
+    int priority;
+};
+
+static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head =
+    QTAILQ_HEAD_INITIALIZER(vm_change_state_head);
+
+/**
+ * qemu_add_vm_change_state_handler_prio:
+ * @cb: the callback to invoke
+ * @opaque: user data passed to the callback
+ * @priority: low priorities execute first when the vm runs and the reverse is
+ *            true when the vm stops
+ *
+ * Register a callback function that is invoked when the vm starts or stops
+ * running.
+ *
+ * Returns: an entry to be freed using qemu_del_vm_change_state_handler()
+ */
+VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
+        VMChangeStateHandler *cb, void *opaque, int priority)
+{
+    return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque,
+                                                      priority);
+}
+
+/**
+ * qemu_add_vm_change_state_handler_prio_full:
+ * @cb: the main callback to invoke
+ * @prepare_cb: a callback to invoke before the main callback
+ * @opaque: user data passed to the callbacks
+ * @priority: low priorities execute first when the vm runs and the reverse is
+ *            true when the vm stops
+ *
+ * Register a main callback function and an optional prepare callback function
+ * that are invoked when the vm starts or stops running. The main callback and
+ * the prepare callback are called in two separate phases: First all prepare
+ * callbacks are called and only then all main callbacks are called. As its
+ * name suggests, the prepare callback can be used to do some preparatory work
+ * before invoking the main callback.
+ *
+ * Returns: an entry to be freed using qemu_del_vm_change_state_handler()
+ */
+VMChangeStateEntry *
+qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
+                                           VMChangeStateHandler *prepare_cb,
+                                           void *opaque, int priority)
+{
+    VMChangeStateEntry *e;
+    VMChangeStateEntry *other;
+
+    e = g_malloc0(sizeof(*e));
+    e->cb = cb;
+    e->prepare_cb = prepare_cb;
+    e->opaque = opaque;
+    e->priority = priority;
+
+    /* Keep list sorted in ascending priority order */
+    QTAILQ_FOREACH(other, &vm_change_state_head, entries) {
+        if (priority < other->priority) {
+            QTAILQ_INSERT_BEFORE(other, e, entries);
+            return e;
+        }
+    }
+
+    QTAILQ_INSERT_TAIL(&vm_change_state_head, e, entries);
+    return e;
+}
+
+VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
+                                                     void *opaque)
+{
+    return qemu_add_vm_change_state_handler_prio(cb, opaque, 0);
+}
+
+void qemu_del_vm_change_state_handler(VMChangeStateEntry *e)
+{
+    QTAILQ_REMOVE(&vm_change_state_head, e, entries);
+    g_free(e);
+}
+
+void vm_state_notify(bool running, RunState state)
+{
+    VMChangeStateEntry *e, *next;
+
+    trace_vm_state_notify(running, state, RunState_str(state));
+
+    if (running) {
+        QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
+            if (e->prepare_cb) {
+                e->prepare_cb(e->opaque, running, state);
+            }
+        }
+
+        QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
+            e->cb(e->opaque, running, state);
+        }
+    } else {
+        QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
+            if (e->prepare_cb) {
+                e->prepare_cb(e->opaque, running, state);
+            }
+        }
+
+        QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
+            e->cb(e->opaque, running, state);
+        }
+    }
+}
+
+static ShutdownCause reset_requested;
+static ShutdownCause shutdown_requested;
+static int shutdown_signal;
+static pid_t shutdown_pid;
+static int powerdown_requested;
+static int debug_requested;
+static int suspend_requested;
+static WakeupReason wakeup_reason;
+static NotifierList powerdown_notifiers =
+    NOTIFIER_LIST_INITIALIZER(powerdown_notifiers);
+static NotifierList suspend_notifiers =
+    NOTIFIER_LIST_INITIALIZER(suspend_notifiers);
+static NotifierList wakeup_notifiers =
+    NOTIFIER_LIST_INITIALIZER(wakeup_notifiers);
+static NotifierList shutdown_notifiers =
+    NOTIFIER_LIST_INITIALIZER(shutdown_notifiers);
+static uint32_t wakeup_reason_mask = ~(1 << QEMU_WAKEUP_REASON_NONE);
+
+ShutdownCause qemu_shutdown_requested_get(void)
+{
+    return shutdown_requested;
+}
+
+ShutdownCause qemu_reset_requested_get(void)
+{
+    return reset_requested;
+}
+
+static int qemu_shutdown_requested(void)
+{
+    return qatomic_xchg(&shutdown_requested, SHUTDOWN_CAUSE_NONE);
+}
+
+static void qemu_kill_report(void)
+{
+    if (!qtest_driver() && shutdown_signal) {
+        if (shutdown_pid == 0) {
+            /* This happens for eg ^C at the terminal, so it's worth
+             * avoiding printing an odd message in that case.
+             */
+            error_report("terminating on signal %d", shutdown_signal);
+        } else {
+            char *shutdown_cmd = qemu_get_pid_name(shutdown_pid);
+
+            error_report("terminating on signal %d from pid " FMT_pid " (%s)",
+                         shutdown_signal, shutdown_pid,
+                         shutdown_cmd ? shutdown_cmd : "<unknown process>");
+            g_free(shutdown_cmd);
+        }
+        shutdown_signal = 0;
+    }
+}
+
+static ShutdownCause qemu_reset_requested(void)
+{
+    ShutdownCause r = reset_requested;
+
+    if (r && replay_checkpoint(CHECKPOINT_RESET_REQUESTED)) {
+        reset_requested = SHUTDOWN_CAUSE_NONE;
+        return r;
+    }
+    return SHUTDOWN_CAUSE_NONE;
+}
+
+static int qemu_suspend_requested(void)
+{
+    int r = suspend_requested;
+    if (r && replay_checkpoint(CHECKPOINT_SUSPEND_REQUESTED)) {
+        suspend_requested = 0;
+        return r;
+    }
+    return false;
+}
+
+static WakeupReason qemu_wakeup_requested(void)
+{
+    return wakeup_reason;
+}
+
+static int qemu_powerdown_requested(void)
+{
+    int r = powerdown_requested;
+    powerdown_requested = 0;
+    return r;
+}
+
+static int qemu_debug_requested(void)
+{
+    int r = debug_requested;
+    debug_requested = 0;
+    return r;
+}
+
+/*
+ * Reset the VM. Issue an event unless @reason is SHUTDOWN_CAUSE_NONE.
+ */
+void qemu_system_reset(ShutdownCause reason)
+{
+    MachineClass *mc;
+
+    mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL;
+
+    cpu_synchronize_all_states();
+
+    if (mc && mc->reset) {
+        mc->reset(current_machine, reason);
+    } else {
+        qemu_devices_reset(reason);
+    }
+    switch (reason) {
+    case SHUTDOWN_CAUSE_NONE:
+    case SHUTDOWN_CAUSE_SUBSYSTEM_RESET:
+    case SHUTDOWN_CAUSE_SNAPSHOT_LOAD:
+        break;
+    default:
+        qapi_event_send_reset(shutdown_caused_by_guest(reason), reason);
+    }
+    cpu_synchronize_all_post_reset();
+}
+
+/*
+ * Wake the VM after suspend.
+ */
+static void qemu_system_wakeup(void)
+{
+    MachineClass *mc;
+
+    mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL;
+
+    if (mc && mc->wakeup) {
+        mc->wakeup(current_machine);
+    }
+}
+
+void qemu_system_guest_panicked(GuestPanicInformation *info)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed");
+
+    if (current_cpu) {
+        current_cpu->crash_occurred = true;
+    }
+    /*
+     * TODO:  Currently the available panic actions are: none, pause, and
+     * shutdown, but in principle debug and reset could be supported as well.
+     * Investigate any potential use cases for the unimplemented actions.
+     */
+    if (panic_action == PANIC_ACTION_PAUSE
+        || (panic_action == PANIC_ACTION_SHUTDOWN && shutdown_action == SHUTDOWN_ACTION_PAUSE)) {
+        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, info);
+        vm_stop(RUN_STATE_GUEST_PANICKED);
+    } else if (panic_action == PANIC_ACTION_SHUTDOWN ||
+               panic_action == PANIC_ACTION_EXIT_FAILURE) {
+        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF, info);
+        vm_stop(RUN_STATE_GUEST_PANICKED);
+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_PANIC);
+    } else {
+        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_RUN, info);
+    }
+
+    if (info) {
+        if (info->type == GUEST_PANIC_INFORMATION_TYPE_HYPER_V) {
+            qemu_log_mask(LOG_GUEST_ERROR, "\nHV crash parameters: (%#"PRIx64
+                          " %#"PRIx64" %#"PRIx64" %#"PRIx64" %#"PRIx64")\n",
+                          info->u.hyper_v.arg1,
+                          info->u.hyper_v.arg2,
+                          info->u.hyper_v.arg3,
+                          info->u.hyper_v.arg4,
+                          info->u.hyper_v.arg5);
+        } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_S390) {
+            qemu_log_mask(LOG_GUEST_ERROR, " on cpu %d: %s\n"
+                          "PSW: 0x%016" PRIx64 " 0x%016" PRIx64"\n",
+                          info->u.s390.core,
+                          S390CrashReason_str(info->u.s390.reason),
+                          info->u.s390.psw_mask,
+                          info->u.s390.psw_addr);
+        }
+        qapi_free_GuestPanicInformation(info);
+    }
+}
+
+void qemu_system_guest_crashloaded(GuestPanicInformation *info)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "Guest crash loaded");
+    qapi_event_send_guest_crashloaded(GUEST_PANIC_ACTION_RUN, info);
+    qapi_free_GuestPanicInformation(info);
+}
+
+void qemu_system_reset_request(ShutdownCause reason)
+{
+    if (reboot_action == REBOOT_ACTION_SHUTDOWN &&
+        reason != SHUTDOWN_CAUSE_SUBSYSTEM_RESET) {
+        shutdown_requested = reason;
+    } else if (!cpus_are_resettable()) {
+        error_report("cpus are not resettable, terminating");
+        shutdown_requested = reason;
+    } else {
+        reset_requested = reason;
+    }
+    cpu_stop_current();
+    qemu_notify_event();
+}
+
+static void qemu_system_suspend(void)
+{
+    pause_all_vcpus();
+    notifier_list_notify(&suspend_notifiers, NULL);
+    runstate_set(RUN_STATE_SUSPENDED);
+    qapi_event_send_suspend();
+}
+
+void qemu_system_suspend_request(void)
+{
+    if (runstate_check(RUN_STATE_SUSPENDED)) {
+        return;
+    }
+    suspend_requested = 1;
+    cpu_stop_current();
+    qemu_notify_event();
+}
+
+void qemu_register_suspend_notifier(Notifier *notifier)
+{
+    notifier_list_add(&suspend_notifiers, notifier);
+}
+
+void qemu_system_wakeup_request(WakeupReason reason, Error **errp)
+{
+    trace_system_wakeup_request(reason);
+
+    if (!runstate_check(RUN_STATE_SUSPENDED)) {
+        error_setg(errp,
+                   "Unable to wake up: guest is not in suspended state");
+        return;
+    }
+    if (!(wakeup_reason_mask & (1 << reason))) {
+        return;
+    }
+    runstate_set(RUN_STATE_RUNNING);
+    wakeup_reason = reason;
+    qemu_notify_event();
+}
+
+void qemu_system_wakeup_enable(WakeupReason reason, bool enabled)
+{
+    if (enabled) {
+        wakeup_reason_mask |= (1 << reason);
+    } else {
+        wakeup_reason_mask &= ~(1 << reason);
+    }
+}
+
+void qemu_register_wakeup_notifier(Notifier *notifier)
+{
+    notifier_list_add(&wakeup_notifiers, notifier);
+}
+
+static bool wakeup_suspend_enabled;
+
+void qemu_register_wakeup_support(void)
+{
+    wakeup_suspend_enabled = true;
+}
+
+bool qemu_wakeup_suspend_enabled(void)
+{
+    return wakeup_suspend_enabled;
+}
+
+void qemu_system_killed(int signal, pid_t pid)
+{
+    shutdown_signal = signal;
+    shutdown_pid = pid;
+    shutdown_action = SHUTDOWN_ACTION_POWEROFF;
+
+    /* Cannot call qemu_system_shutdown_request directly because
+     * we are in a signal handler.
+     */
+    shutdown_requested = SHUTDOWN_CAUSE_HOST_SIGNAL;
+    qemu_notify_event();
+}
+
+void qemu_system_shutdown_request(ShutdownCause reason)
+{
+    trace_qemu_system_shutdown_request(reason);
+    replay_shutdown_request(reason);
+    shutdown_requested = reason;
+    qemu_notify_event();
+}
+
+static void qemu_system_powerdown(void)
+{
+    qapi_event_send_powerdown();
+    notifier_list_notify(&powerdown_notifiers, NULL);
+}
+
+static void qemu_system_shutdown(ShutdownCause cause)
+{
+    qapi_event_send_shutdown(shutdown_caused_by_guest(cause), cause);
+    notifier_list_notify(&shutdown_notifiers, &cause);
+}
+
+void qemu_system_powerdown_request(void)
+{
+    trace_qemu_system_powerdown_request();
+    powerdown_requested = 1;
+    qemu_notify_event();
+}
+
+void qemu_register_powerdown_notifier(Notifier *notifier)
+{
+    notifier_list_add(&powerdown_notifiers, notifier);
+}
+
+void qemu_register_shutdown_notifier(Notifier *notifier)
+{
+    notifier_list_add(&shutdown_notifiers, notifier);
+}
+
+void qemu_system_debug_request(void)
+{
+    debug_requested = 1;
+    qemu_notify_event();
+}
+
+static bool main_loop_should_exit(int *status)
+{
+    RunState r;
+    ShutdownCause request;
+
+    if (qemu_debug_requested()) {
+        vm_stop(RUN_STATE_DEBUG);
+    }
+    if (qemu_suspend_requested()) {
+        qemu_system_suspend();
+    }
+    request = qemu_shutdown_requested();
+    if (request) {
+        qemu_kill_report();
+        qemu_system_shutdown(request);
+        if (shutdown_action == SHUTDOWN_ACTION_PAUSE) {
+            vm_stop(RUN_STATE_SHUTDOWN);
+        } else {
+            if (request == SHUTDOWN_CAUSE_GUEST_PANIC &&
+                panic_action == PANIC_ACTION_EXIT_FAILURE) {
+                *status = EXIT_FAILURE;
+            }
+            return true;
+        }
+    }
+    request = qemu_reset_requested();
+    if (request) {
+        pause_all_vcpus();
+        qemu_system_reset(request);
+        resume_all_vcpus();
+        /*
+         * runstate can change in pause_all_vcpus()
+         * as iothread mutex is unlocked
+         */
+        if (!runstate_check(RUN_STATE_RUNNING) &&
+                !runstate_check(RUN_STATE_INMIGRATE) &&
+                !runstate_check(RUN_STATE_FINISH_MIGRATE)) {
+            runstate_set(RUN_STATE_PRELAUNCH);
+        }
+    }
+    if (qemu_wakeup_requested()) {
+        pause_all_vcpus();
+        qemu_system_wakeup();
+        notifier_list_notify(&wakeup_notifiers, &wakeup_reason);
+        wakeup_reason = QEMU_WAKEUP_REASON_NONE;
+        resume_all_vcpus();
+        qapi_event_send_wakeup();
+    }
+    if (qemu_powerdown_requested()) {
+        qemu_system_powerdown();
+    }
+    if (qemu_vmstop_requested(&r)) {
+        vm_stop(r);
+    }
+    return false;
+}
+
+int qemu_main_loop(void)
+{
+    int status = EXIT_SUCCESS;
+
+    while (!main_loop_should_exit(&status)) {
+        main_loop_wait(false);
+    }
+
+    return status;
+}
+
+void qemu_add_exit_notifier(Notifier *notify)
+{
+    notifier_list_add(&exit_notifiers, notify);
+}
+
+void qemu_remove_exit_notifier(Notifier *notify)
+{
+    notifier_remove(notify);
+}
+
+static void qemu_run_exit_notifiers(void)
+{
+    notifier_list_notify(&exit_notifiers, NULL);
+}
+
+void qemu_init_subsystems(void)
+{
+    Error *err = NULL;
+
+    os_set_line_buffering();
+
+    module_call_init(MODULE_INIT_TRACE);
+
+    qemu_init_cpu_list();
+    qemu_init_cpu_loop();
+    qemu_mutex_lock_iothread();
+
+    atexit(qemu_run_exit_notifiers);
+
+    module_call_init(MODULE_INIT_QOM);
+    module_call_init(MODULE_INIT_MIGRATION);
+
+    runstate_init();
+    precopy_infrastructure_init();
+    postcopy_infrastructure_init();
+    monitor_init_globals();
+
+    if (qcrypto_init(&err) < 0) {
+        error_reportf_err(err, "cannot initialize crypto: ");
+        exit(1);
+    }
+
+    os_setup_early_signal_handling();
+
+    bdrv_init_with_whitelist();
+    socket_init();
+}
+
+
+void qemu_cleanup(void)
+{
+    gdb_exit(0);
+
+    /*
+     * cleaning up the migration object cancels any existing migration
+     * try to do this early so that it also stops using devices.
+     */
+    migration_shutdown();
+
+    /*
+     * Close the exports before draining the block layer. The export
+     * drivers may have coroutines yielding on it, so we need to clean
+     * them up before the drain, as otherwise they may be get stuck in
+     * blk_wait_while_drained().
+     */
+    blk_exp_close_all();
+
+
+    /* No more vcpu or device emulation activity beyond this point */
+    vm_shutdown();
+    replay_finish();
+
+    /*
+     * We must cancel all block jobs while the block layer is drained,
+     * or cancelling will be affected by throttling and thus may block
+     * for an extended period of time.
+     * Begin the drained section after vm_shutdown() to avoid requests being
+     * stuck in the BlockBackend's request queue.
+     * We do not need to end this section, because we do not want any
+     * requests happening from here on anyway.
+     */
+    bdrv_drain_all_begin();
+    job_cancel_sync_all();
+    bdrv_close_all();
+
+    /* vhost-user must be cleaned up before chardevs.  */
+    tpm_cleanup();
+    net_cleanup();
+    audio_cleanup();
+    monitor_cleanup();
+    qemu_chr_cleanup();
+    user_creatable_cleanup();
+    /* TODO: unref root container, check all devices are ok */
+}
--- a/system/tpm-hmp-cmds.c
+++ b/system/tpm-hmp-cmds.c
@ -0,0 +1,65 @@
+/*
+ * HMP commands related to TPM
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qapi-commands-tpm.h"
+#include "monitor/monitor.h"
+#include "monitor/hmp.h"
+#include "qapi/error.h"
+
+void hmp_info_tpm(Monitor *mon, const QDict *qdict)
+{
+#ifdef CONFIG_TPM
+    TPMInfoList *info_list, *info;
+    Error *err = NULL;
+    unsigned int c = 0;
+    TPMPassthroughOptions *tpo;
+    TPMEmulatorOptions *teo;
+
+    info_list = qmp_query_tpm(&err);
+    if (err) {
+        monitor_printf(mon, "TPM device not supported\n");
+        error_free(err);
+        return;
+    }
+
+    if (info_list) {
+        monitor_printf(mon, "TPM device:\n");
+    }
+
+    for (info = info_list; info; info = info->next) {
+        TPMInfo *ti = info->value;
+        monitor_printf(mon, " tpm%d: model=%s\n",
+                       c, TpmModel_str(ti->model));
+
+        monitor_printf(mon, "  \\ %s: type=%s",
+                       ti->id, TpmType_str(ti->options->type));
+
+        switch (ti->options->type) {
+        case TPM_TYPE_PASSTHROUGH:
+            tpo = ti->options->u.passthrough.data;
+            monitor_printf(mon, "%s%s%s%s",
+                           tpo->path ? ",path=" : "",
+                           tpo->path ?: "",
+                           tpo->cancel_path ? ",cancel-path=" : "",
+                           tpo->cancel_path ?: "");
+            break;
+        case TPM_TYPE_EMULATOR:
+            teo = ti->options->u.emulator.data;
+            monitor_printf(mon, ",chardev=%s", teo->chardev);
+            break;
+        case TPM_TYPE__MAX:
+            break;
+        }
+        monitor_printf(mon, "\n");
+        c++;
+    }
+    qapi_free_TPMInfoList(info_list);
+#else
+    monitor_printf(mon, "TPM device not supported\n");
+#endif /* CONFIG_TPM */
+}
--- a/system/tpm.c
+++ b/system/tpm.c
@ -0,0 +1,239 @@
+/*
+ * TPM configuration
+ *
+ * Copyright (C) 2011-2013 IBM Corporation
+ *
+ * Authors:
+ *  Stefan Berger    <stefanb@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * Based on net.c
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qapi/qapi-commands-tpm.h"
+#include "qapi/qmp/qerror.h"
+#include "sysemu/tpm_backend.h"
+#include "sysemu/tpm.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+
+static QLIST_HEAD(, TPMBackend) tpm_backends =
+    QLIST_HEAD_INITIALIZER(tpm_backends);
+
+static const TPMBackendClass *
+tpm_be_find_by_type(enum TpmType type)
+{
+    ObjectClass *oc;
+    char *typename = g_strdup_printf("tpm-%s", TpmType_str(type));
+
+    oc = object_class_by_name(typename);
+    g_free(typename);
+
+    if (!object_class_dynamic_cast(oc, TYPE_TPM_BACKEND)) {
+        return NULL;
+    }
+
+    return TPM_BACKEND_CLASS(oc);
+}
+
+/*
+ * Walk the list of available TPM backend drivers and display them on the
+ * screen.
+ */
+static void tpm_display_backend_drivers(void)
+{
+    bool got_one = false;
+    int i;
+
+    for (i = 0; i < TPM_TYPE__MAX; i++) {
+        const TPMBackendClass *bc = tpm_be_find_by_type(i);
+        if (!bc) {
+            continue;
+        }
+        if (!got_one) {
+            error_printf("Supported TPM types (choose only one):\n");
+            got_one = true;
+        }
+        error_printf("%12s   %s\n", TpmType_str(i), bc->desc);
+    }
+    if (!got_one) {
+        error_printf("No TPM backend types are available\n");
+    }
+}
+
+/*
+ * Find the TPM with the given Id
+ */
+TPMBackend *qemu_find_tpm_be(const char *id)
+{
+    TPMBackend *drv;
+
+    if (id) {
+        QLIST_FOREACH(drv, &tpm_backends, list) {
+            if (!strcmp(drv->id, id)) {
+                return drv;
+            }
+        }
+    }
+
+    return NULL;
+}
+
+static int tpm_init_tpmdev(void *dummy, QemuOpts *opts, Error **errp)
+{
+    /*
+     * Use of error_report() in a function with an Error ** parameter
+     * is suspicious.  It is okay here.  The parameter only exists to
+     * make the function usable with qemu_opts_foreach().  It is not
+     * actually used.
+     */
+    const char *value;
+    const char *id;
+    const TPMBackendClass *be;
+    TPMBackend *drv;
+    Error *local_err = NULL;
+    int i;
+
+    if (!QLIST_EMPTY(&tpm_backends)) {
+        error_report("Only one TPM is allowed.");
+        return 1;
+    }
+
+    id = qemu_opts_id(opts);
+    if (id == NULL) {
+        error_report(QERR_MISSING_PARAMETER, "id");
+        return 1;
+    }
+
+    value = qemu_opt_get(opts, "type");
+    if (!value) {
+        error_report(QERR_MISSING_PARAMETER, "type");
+        tpm_display_backend_drivers();
+        return 1;
+    }
+
+    i = qapi_enum_parse(&TpmType_lookup, value, -1, NULL);
+    be = i >= 0 ? tpm_be_find_by_type(i) : NULL;
+    if (be == NULL) {
+        error_report(QERR_INVALID_PARAMETER_VALUE,
+                     "type", "a TPM backend type");
+        tpm_display_backend_drivers();
+        return 1;
+    }
+
+    /* validate backend specific opts */
+    if (!qemu_opts_validate(opts, be->opts, &local_err)) {
+        error_report_err(local_err);
+        return 1;
+    }
+
+    drv = be->create(opts);
+    if (!drv) {
+        return 1;
+    }
+
+    drv->id = g_strdup(id);
+    QLIST_INSERT_HEAD(&tpm_backends, drv, list);
+
+    return 0;
+}
+
+/*
+ * Walk the list of TPM backend drivers that are in use and call their
+ * destroy function to have them cleaned up.
+ */
+void tpm_cleanup(void)
+{
+    TPMBackend *drv, *next;
+
+    QLIST_FOREACH_SAFE(drv, &tpm_backends, list, next) {
+        QLIST_REMOVE(drv, list);
+        object_unref(OBJECT(drv));
+    }
+}
+
+/*
+ * Initialize the TPM. Process the tpmdev command line options describing the
+ * TPM backend.
+ */
+int tpm_init(void)
+{
+    if (qemu_opts_foreach(qemu_find_opts("tpmdev"),
+                          tpm_init_tpmdev, NULL, NULL)) {
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * Parse the TPM configuration options.
+ * To display all available TPM backends the user may use '-tpmdev help'
+ */
+int tpm_config_parse(QemuOptsList *opts_list, const char *optstr)
+{
+    QemuOpts *opts;
+
+    if (!strcmp(optstr, "help")) {
+        tpm_display_backend_drivers();
+        return -1;
+    }
+    opts = qemu_opts_parse_noisily(opts_list, optstr, true);
+    if (!opts) {
+        return -1;
+    }
+    return 0;
+}
+
+/*
+ * Walk the list of active TPM backends and collect information about them.
+ */
+TPMInfoList *qmp_query_tpm(Error **errp)
+{
+    TPMBackend *drv;
+    TPMInfoList *head = NULL, **tail = &head;
+
+    QLIST_FOREACH(drv, &tpm_backends, list) {
+        if (!drv->tpmif) {
+            continue;
+        }
+
+        QAPI_LIST_APPEND(tail, tpm_backend_query_tpm(drv));
+    }
+
+    return head;
+}
+
+TpmTypeList *qmp_query_tpm_types(Error **errp)
+{
+    unsigned int i = 0;
+    TpmTypeList *head = NULL, **tail = &head;
+
+    for (i = 0; i < TPM_TYPE__MAX; i++) {
+        if (!tpm_be_find_by_type(i)) {
+            continue;
+        }
+        QAPI_LIST_APPEND(tail, i);
+    }
+
+    return head;
+}
+TpmModelList *qmp_query_tpm_models(Error **errp)
+{
+    TpmModelList *head = NULL, **tail = &head;
+    GSList *e, *l = object_class_get_list(TYPE_TPM_IF, false);
+
+    for (e = l; e; e = e->next) {
+        TPMIfClass *c = TPM_IF_CLASS(e->data);
+
+        QAPI_LIST_APPEND(tail, c->model);
+    }
+    g_slist_free(l);
+
+    return head;
+}
--- a/system/trace-events
+++ b/system/trace-events
@ -0,0 +1,40 @@
+# See docs/devel/tracing.rst for syntax documentation.
+
+# balloon.c
+# Since requests are raised via monitor, not many tracepoints are needed.
+balloon_event(void *opaque, unsigned long addr) "opaque %p addr %lu"
+
+# ioport.c
+cpu_in(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u"
+cpu_out(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u"
+
+# memory.c
+memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'"
+memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'"
+memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
+memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
+memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
+memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
+memory_region_sync_dirty(const char *mr, const char *listener, int global) "mr '%s' listener '%s' synced (global=%d)"
+flatview_new(void *view, void *root) "%p (root %p)"
+flatview_destroy(void *view, void *root) "%p (root %p)"
+flatview_destroy_rcu(void *view, void *root) "%p (root %p)"
+global_dirty_changed(unsigned int bitmask) "bitmask 0x%"PRIx32
+
+# cpus.c
+vm_stop_flush_all(int ret) "ret %d"
+
+# vl.c
+vm_state_notify(int running, int reason, const char *reason_str) "running %d reason %d (%s)"
+load_file(const char *name, const char *path) "name %s location %s"
+runstate_set(int current_state, const char *current_state_str, int new_state, const char *new_state_str) "current_run_state %d (%s) new_state %d (%s)"
+system_wakeup_request(int reason) "reason=%d"
+qemu_system_shutdown_request(int reason) "reason=%d"
+qemu_system_powerdown_request(void) ""
+
+#dirtylimit.c
+dirtylimit_state_initialize(int max_cpus) "dirtylimit state initialize: max cpus %d"
+dirtylimit_state_finalize(void)
+dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us"
+dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64
+dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us"
--- a/system/trace.h
+++ b/system/trace.h
@ -0,0 +1 @@
+#include "trace/trace-system.h"
--- a/system/vl.c
+++ b/system/vl.c
--- a/system/watchpoint.c
+++ b/system/watchpoint.c
@ -0,0 +1,226 @@
+/*
+ * CPU watchpoints
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "exec/exec-all.h"
+#include "exec/translate-all.h"
+#include "sysemu/tcg.h"
+#include "sysemu/replay.h"
+#include "hw/core/tcg-cpu-ops.h"
+#include "hw/core/cpu.h"
+
+/* Add a watchpoint.  */
+int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
+                          int flags, CPUWatchpoint **watchpoint)
+{
+    CPUWatchpoint *wp;
+    vaddr in_page;
+
+    /* forbid ranges which are empty or run off the end of the address space */
+    if (len == 0 || (addr + len - 1) < addr) {
+        error_report("tried to set invalid watchpoint at %"
+                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
+        return -EINVAL;
+    }
+    wp = g_malloc(sizeof(*wp));
+
+    wp->vaddr = addr;
+    wp->len = len;
+    wp->flags = flags;
+
+    /* keep all GDB-injected watchpoints in front */
+    if (flags & BP_GDB) {
+        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
+    } else {
+        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
+    }
+
+    in_page = -(addr | TARGET_PAGE_MASK);
+    if (len <= in_page) {
+        tlb_flush_page(cpu, addr);
+    } else {
+        tlb_flush(cpu);
+    }
+
+    if (watchpoint) {
+        *watchpoint = wp;
+    }
+    return 0;
+}
+
+/* Remove a specific watchpoint.  */
+int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
+                          int flags)
+{
+    CPUWatchpoint *wp;
+
+    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
+        if (addr == wp->vaddr && len == wp->len
+                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
+            cpu_watchpoint_remove_by_ref(cpu, wp);
+            return 0;
+        }
+    }
+    return -ENOENT;
+}
+
+/* Remove a specific watchpoint by reference.  */
+void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
+{
+    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
+
+    tlb_flush_page(cpu, watchpoint->vaddr);
+
+    g_free(watchpoint);
+}
+
+/* Remove all matching watchpoints.  */
+void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
+{
+    CPUWatchpoint *wp, *next;
+
+    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
+        if (wp->flags & mask) {
+            cpu_watchpoint_remove_by_ref(cpu, wp);
+        }
+    }
+}
+
+#ifdef CONFIG_TCG
+
+/*
+ * Return true if this watchpoint address matches the specified
+ * access (ie the address range covered by the watchpoint overlaps
+ * partially or completely with the address range covered by the
+ * access).
+ */
+static inline bool watchpoint_address_matches(CPUWatchpoint *wp,
+                                              vaddr addr, vaddr len)
+{
+    /*
+     * We know the lengths are non-zero, but a little caution is
+     * required to avoid errors in the case where the range ends
+     * exactly at the top of the address space and so addr + len
+     * wraps round to zero.
+     */
+    vaddr wpend = wp->vaddr + wp->len - 1;
+    vaddr addrend = addr + len - 1;
+
+    return !(addr > wpend || wp->vaddr > addrend);
+}
+
+/* Return flags for watchpoints that match addr + prot.  */
+int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len)
+{
+    CPUWatchpoint *wp;
+    int ret = 0;
+
+    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
+        if (watchpoint_address_matches(wp, addr, len)) {
+            ret |= wp->flags;
+        }
+    }
+    return ret;
+}
+
+/* Generate a debug exception if a watchpoint has been hit.  */
+void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
+                          MemTxAttrs attrs, int flags, uintptr_t ra)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+    CPUWatchpoint *wp;
+
+    assert(tcg_enabled());
+    if (cpu->watchpoint_hit) {
+        /*
+         * We re-entered the check after replacing the TB.
+         * Now raise the debug interrupt so that it will
+         * trigger after the current instruction.
+         */
+        qemu_mutex_lock_iothread();
+        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
+        qemu_mutex_unlock_iothread();
+        return;
+    }
+
+    if (cc->tcg_ops->adjust_watchpoint_address) {
+        /* this is currently used only by ARM BE32 */
+        addr = cc->tcg_ops->adjust_watchpoint_address(cpu, addr, len);
+    }
+
+    assert((flags & ~BP_MEM_ACCESS) == 0);
+    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
+        int hit_flags = wp->flags & flags;
+
+        if (hit_flags && watchpoint_address_matches(wp, addr, len)) {
+            if (replay_running_debug()) {
+                /*
+                 * replay_breakpoint reads icount.
+                 * Force recompile to succeed, because icount may
+                 * be read only at the end of the block.
+                 */
+                if (!cpu->neg.can_do_io) {
+                    /* Force execution of one insn next time.  */
+                    cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ
+                                          | curr_cflags(cpu);
+                    cpu_loop_exit_restore(cpu, ra);
+                }
+                /*
+                 * Don't process the watchpoints when we are
+                 * in a reverse debugging operation.
+                 */
+                replay_breakpoint();
+                return;
+            }
+
+            wp->flags |= hit_flags << BP_HIT_SHIFT;
+            wp->hitaddr = MAX(addr, wp->vaddr);
+            wp->hitattrs = attrs;
+
+            if (wp->flags & BP_CPU
+                && cc->tcg_ops->debug_check_watchpoint
+                && !cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
+                wp->flags &= ~BP_WATCHPOINT_HIT;
+                continue;
+            }
+            cpu->watchpoint_hit = wp;
+
+            mmap_lock();
+            /* This call also restores vCPU state */
+            tb_check_watchpoint(cpu, ra);
+            if (wp->flags & BP_STOP_BEFORE_ACCESS) {
+                cpu->exception_index = EXCP_DEBUG;
+                mmap_unlock();
+                cpu_loop_exit(cpu);
+            } else {
+                /* Force execution of one insn next time.  */
+                cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ
+                                      | curr_cflags(cpu);
+                mmap_unlock();
+                cpu_loop_exit_noexc(cpu);
+            }
+        } else {
+            wp->flags &= ~BP_WATCHPOINT_HIT;
+        }
+    }
+}
+
+#endif /* CONFIG_TCG */