mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-08 10:13:56 -06:00
Migration pull
Note: The 'postcopy: Update userfaultfd.h header' is part of Paolo's header update and will disappear if applied after it. Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> -----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJYtW9KAAoJEAUWMx68W/3nF/MQAIaMjoCkeVnCZsGCJ2VbJZcZ Fb5gMNjnHUKwOeDhGuSBP7FGRMalYM2JNRufSiBZnUowCQMXi3Emjad6pGUiPTr0 B+L1czjw0FDdskQpE1U/StdruLiYBJ98oktnjlla00f+E9rylY0cMmrHpmqfRwDn IXTa4qm77aw47Y2MYku1nce27gjA3JEko6Lg2fB7gTtwYzTi/uRrKa+ilbnTPoEZ /ZzK8hcUYiV8oDAOtEmKSG3Azo+6ylzDG4r/ldwEecJPhZxeUk39AhDOoU0mx98N OE8oOk2t/0Bo+mS7iOw9gZ8sr9p5L2myQkmoxxLuAXAcD9sHVlcp0eKi5lLYNmUa oWnnYo3QeCvqrcZzhvSX0b4rLXoY4GP+qKpQo21eKIPEyq3v6EDhrk10UCTXaiBO zxHblLgXSrX6VqYcEJGj2oUR/RjH9ouw3hjI5cDy/d/hRmNLCl8lwvPmVmv3tRer 6X1gcZSUs6hY/drs2/v6maJ0CqK/bx6/OBfkiUJUEN4Dg1ldgO2r1v8pBLukvM6c De2aNRezl821HK487EvRlluUq0nO6L3LkqDTBql4/4Rf4HoTRXxoJ68sB0LBqym5 PwD/C3mQuvlWg8tKJtaHVtS0ESuSCSroaSk1FB648mSs8nJYYFjstc/XovuePqTl 6UT2OQbUdWITILoWSlI5 =PCYv -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20170228a' into staging Migration pull Note: The 'postcopy: Update userfaultfd.h header' is part of Paolo's header update and will disappear if applied after it. Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> # gpg: Signature made Tue 28 Feb 2017 12:38:34 GMT # gpg: using RSA key 0x0516331EBC5BFDE7 # gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7 * remotes/dgilbert/tags/pull-migration-20170228a: (27 commits) postcopy: Add extra check for COPY function postcopy: Add doc about hugepages and postcopy postcopy: Check for userfault+hugepage feature postcopy: Update userfaultfd.h header postcopy: Allow hugepages postcopy: Send whole huge pages postcopy: Mask fault addresses to huge page boundary postcopy: Load huge pages in one go postcopy: Use temporary for placing zero huge pages postcopy: Plumb pagesize down into place helpers postcopy: Record largest page size postcopy: enhance ram_block_discard_range for hugepages exec: ram_block_discard_range postcopy: Chunk discards for hugepages postcopy: Transmit and compare individual page sizes postcopy: Transmit ram size summary word migration: fix use-after-free of to_dst_file migration: Update docs to discourage version bumps migration: fix id leak regression migrate: Introduce a 'dc->vmsd' check to avoid segfault for --only-migratable ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
251501a371
18 changed files with 550 additions and 221 deletions
|
@ -49,6 +49,10 @@
|
|||
* for sending the last part */
|
||||
#define DEFAULT_MIGRATE_SET_DOWNTIME 300
|
||||
|
||||
/* Maximum migrate downtime set to 2000 seconds */
|
||||
#define MAX_MIGRATE_DOWNTIME_SECONDS 2000
|
||||
#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
|
||||
|
||||
/* Default compression thread count */
|
||||
#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
|
||||
/* Default decompression thread count, usually decompression is at
|
||||
|
@ -383,6 +387,7 @@ static void process_incoming_migration_co(void *opaque)
|
|||
int ret;
|
||||
|
||||
mis->from_src_file = f;
|
||||
mis->largest_page_size = qemu_ram_pagesize_largest();
|
||||
postcopy_state_set(POSTCOPY_INCOMING_NONE);
|
||||
migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
|
||||
MIGRATION_STATUS_ACTIVE);
|
||||
|
@ -843,10 +848,11 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
|
|||
return;
|
||||
}
|
||||
if (params->has_downtime_limit &&
|
||||
(params->downtime_limit < 0 || params->downtime_limit > 2000000)) {
|
||||
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
|
||||
"downtime_limit",
|
||||
"an integer in the range of 0 to 2000000 milliseconds");
|
||||
(params->downtime_limit < 0 ||
|
||||
params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
|
||||
error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
|
||||
"the range of 0 to %d milliseconds",
|
||||
MAX_MIGRATE_DOWNTIME);
|
||||
return;
|
||||
}
|
||||
if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
|
||||
|
@ -1145,6 +1151,21 @@ void migrate_del_blocker(Error *reason)
|
|||
migration_blockers = g_slist_remove(migration_blockers, reason);
|
||||
}
|
||||
|
||||
int check_migratable(Object *obj, Error **err)
|
||||
{
|
||||
DeviceClass *dc = DEVICE_GET_CLASS(obj);
|
||||
if (only_migratable && dc->vmsd) {
|
||||
if (dc->vmsd->unmigratable) {
|
||||
error_setg(err, "Device %s is not migratable, but "
|
||||
"--only-migratable was specified",
|
||||
object_get_typename(obj));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void qmp_migrate_incoming(const char *uri, Error **errp)
|
||||
{
|
||||
Error *local_err = NULL;
|
||||
|
@ -1289,6 +1310,13 @@ void qmp_migrate_set_speed(int64_t value, Error **errp)
|
|||
|
||||
void qmp_migrate_set_downtime(double value, Error **errp)
|
||||
{
|
||||
if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
|
||||
error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
|
||||
"the range of 0 to %d seconds",
|
||||
MAX_MIGRATE_DOWNTIME_SECONDS);
|
||||
return;
|
||||
}
|
||||
|
||||
value *= 1000; /* Convert to milliseconds */
|
||||
value = MAX(0, MIN(INT64_MAX, value));
|
||||
|
||||
|
|
|
@ -81,25 +81,18 @@ static bool ufd_version_check(int ufd)
|
|||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for things that postcopy won't support; returns 0 if the block
|
||||
* is fine.
|
||||
*/
|
||||
static int check_range(const char *block_name, void *host_addr,
|
||||
ram_addr_t offset, ram_addr_t length, void *opaque)
|
||||
{
|
||||
RAMBlock *rb = qemu_ram_block_by_name(block_name);
|
||||
|
||||
if (qemu_ram_pagesize(rb) > getpagesize()) {
|
||||
error_report("Postcopy doesn't support large page sizes yet (%s)",
|
||||
block_name);
|
||||
return -E2BIG;
|
||||
if (getpagesize() != ram_pagesize_summary()) {
|
||||
bool have_hp = false;
|
||||
/* We've got a huge page */
|
||||
#ifdef UFFD_FEATURE_MISSING_HUGETLBFS
|
||||
have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS;
|
||||
#endif
|
||||
if (!have_hp) {
|
||||
error_report("Userfault on this host does not support huge pages");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -122,12 +115,6 @@ bool postcopy_ram_supported_by_host(void)
|
|||
goto out;
|
||||
}
|
||||
|
||||
/* Check for anything about the RAMBlocks we don't support */
|
||||
if (qemu_ram_foreach_block(check_range, NULL)) {
|
||||
/* check_range will have printed its own error */
|
||||
goto out;
|
||||
}
|
||||
|
||||
ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
|
||||
if (ufd == -1) {
|
||||
error_report("%s: userfaultfd not available: %s", __func__,
|
||||
|
@ -200,27 +187,6 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* postcopy_ram_discard_range: Discard a range of memory.
|
||||
* We can assume that if we've been called postcopy_ram_hosttest returned true.
|
||||
*
|
||||
* @mis: Current incoming migration state.
|
||||
* @start, @length: range of memory to discard.
|
||||
*
|
||||
* returns: 0 on success.
|
||||
*/
|
||||
int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
|
||||
size_t length)
|
||||
{
|
||||
trace_postcopy_ram_discard_range(start, length);
|
||||
if (madvise(start, length, MADV_DONTNEED)) {
|
||||
error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup an area of RAM so that it *can* be used for postcopy later; this
|
||||
* must be done right at the start prior to pre-copy.
|
||||
|
@ -239,7 +205,7 @@ static int init_range(const char *block_name, void *host_addr,
|
|||
* - we're going to get the copy from the source anyway.
|
||||
* (Precopy will just overwrite this data, so doesn't need the discard)
|
||||
*/
|
||||
if (postcopy_ram_discard_range(mis, host_addr, length)) {
|
||||
if (ram_discard_range(mis, block_name, 0, length)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -342,9 +308,13 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
|
|||
migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
|
||||
|
||||
if (mis->postcopy_tmp_page) {
|
||||
munmap(mis->postcopy_tmp_page, getpagesize());
|
||||
munmap(mis->postcopy_tmp_page, mis->largest_page_size);
|
||||
mis->postcopy_tmp_page = NULL;
|
||||
}
|
||||
if (mis->postcopy_tmp_zero_page) {
|
||||
munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
|
||||
mis->postcopy_tmp_zero_page = NULL;
|
||||
}
|
||||
trace_postcopy_ram_incoming_cleanup_exit();
|
||||
return 0;
|
||||
}
|
||||
|
@ -408,6 +378,10 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
|
|||
error_report("%s userfault register: %s", __func__, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
|
||||
error_report("%s userfault: Region doesn't support COPY", __func__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -420,7 +394,6 @@ static void *postcopy_ram_fault_thread(void *opaque)
|
|||
MigrationIncomingState *mis = opaque;
|
||||
struct uffd_msg msg;
|
||||
int ret;
|
||||
size_t hostpagesize = getpagesize();
|
||||
RAMBlock *rb = NULL;
|
||||
RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
|
||||
|
||||
|
@ -487,7 +460,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
|
|||
break;
|
||||
}
|
||||
|
||||
rb_offset &= ~(hostpagesize - 1);
|
||||
rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
|
||||
trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
|
||||
qemu_ram_get_idstr(rb),
|
||||
rb_offset);
|
||||
|
@ -499,11 +472,11 @@ static void *postcopy_ram_fault_thread(void *opaque)
|
|||
if (rb != last_rb) {
|
||||
last_rb = rb;
|
||||
migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
|
||||
rb_offset, hostpagesize);
|
||||
rb_offset, qemu_ram_pagesize(rb));
|
||||
} else {
|
||||
/* Save some space */
|
||||
migrate_send_rp_req_pages(mis, NULL,
|
||||
rb_offset, hostpagesize);
|
||||
rb_offset, qemu_ram_pagesize(rb));
|
||||
}
|
||||
}
|
||||
trace_postcopy_ram_fault_thread_exit();
|
||||
|
@ -564,13 +537,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
|
|||
* Place a host page (from) at (host) atomically
|
||||
* returns 0 on success
|
||||
*/
|
||||
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
|
||||
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
|
||||
size_t pagesize)
|
||||
{
|
||||
struct uffdio_copy copy_struct;
|
||||
|
||||
copy_struct.dst = (uint64_t)(uintptr_t)host;
|
||||
copy_struct.src = (uint64_t)(uintptr_t)from;
|
||||
copy_struct.len = getpagesize();
|
||||
copy_struct.len = pagesize;
|
||||
copy_struct.mode = 0;
|
||||
|
||||
/* copy also acks to the kernel waking the stalled thread up
|
||||
|
@ -580,8 +554,8 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
|
|||
*/
|
||||
if (ioctl(mis->userfault_fd, UFFDIO_COPY, ©_struct)) {
|
||||
int e = errno;
|
||||
error_report("%s: %s copy host: %p from: %p",
|
||||
__func__, strerror(e), host, from);
|
||||
error_report("%s: %s copy host: %p from: %p (size: %zd)",
|
||||
__func__, strerror(e), host, from, pagesize);
|
||||
|
||||
return -e;
|
||||
}
|
||||
|
@ -594,23 +568,44 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
|
|||
* Place a zero page at (host) atomically
|
||||
* returns 0 on success
|
||||
*/
|
||||
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
|
||||
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
|
||||
size_t pagesize)
|
||||
{
|
||||
struct uffdio_zeropage zero_struct;
|
||||
trace_postcopy_place_page_zero(host);
|
||||
|
||||
zero_struct.range.start = (uint64_t)(uintptr_t)host;
|
||||
zero_struct.range.len = getpagesize();
|
||||
zero_struct.mode = 0;
|
||||
if (pagesize == getpagesize()) {
|
||||
struct uffdio_zeropage zero_struct;
|
||||
zero_struct.range.start = (uint64_t)(uintptr_t)host;
|
||||
zero_struct.range.len = getpagesize();
|
||||
zero_struct.mode = 0;
|
||||
|
||||
if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
|
||||
int e = errno;
|
||||
error_report("%s: %s zero host: %p",
|
||||
__func__, strerror(e), host);
|
||||
if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
|
||||
int e = errno;
|
||||
error_report("%s: %s zero host: %p",
|
||||
__func__, strerror(e), host);
|
||||
|
||||
return -e;
|
||||
return -e;
|
||||
}
|
||||
} else {
|
||||
/* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
|
||||
if (!mis->postcopy_tmp_zero_page) {
|
||||
mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS,
|
||||
-1, 0);
|
||||
if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
|
||||
int e = errno;
|
||||
mis->postcopy_tmp_zero_page = NULL;
|
||||
error_report("%s: %s mapping large zero page",
|
||||
__func__, strerror(e));
|
||||
return -e;
|
||||
}
|
||||
memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
|
||||
}
|
||||
return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
|
||||
pagesize);
|
||||
}
|
||||
|
||||
trace_postcopy_place_page_zero(host);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -625,7 +620,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
|
|||
void *postcopy_get_tmp_page(MigrationIncomingState *mis)
|
||||
{
|
||||
if (!mis->postcopy_tmp_page) {
|
||||
mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
|
||||
mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
|
||||
PROT_READ | PROT_WRITE, MAP_PRIVATE |
|
||||
MAP_ANONYMOUS, -1, 0);
|
||||
if (mis->postcopy_tmp_page == MAP_FAILED) {
|
||||
|
@ -658,13 +653,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
|
|||
return -1;
|
||||
}
|
||||
|
||||
int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
|
||||
size_t length)
|
||||
{
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
|
||||
{
|
||||
assert(0);
|
||||
|
@ -677,13 +665,15 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
|
|||
return -1;
|
||||
}
|
||||
|
||||
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
|
||||
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
|
||||
size_t pagesize)
|
||||
{
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
|
||||
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
|
||||
size_t pagesize)
|
||||
{
|
||||
assert(0);
|
||||
return -1;
|
||||
|
|
109
migration/ram.c
109
migration/ram.c
|
@ -600,6 +600,23 @@ static void migration_bitmap_sync_init(void)
|
|||
iterations_prev = 0;
|
||||
}
|
||||
|
||||
/* Returns a summary bitmap of the page sizes of all RAMBlocks;
|
||||
* for VMs with just normal pages this is equivalent to the
|
||||
* host page size. If it's got some huge pages then it's the OR
|
||||
* of all the different page sizes.
|
||||
*/
|
||||
uint64_t ram_pagesize_summary(void)
|
||||
{
|
||||
RAMBlock *block;
|
||||
uint64_t summary = 0;
|
||||
|
||||
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
|
||||
summary |= block->page_size;
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
static void migration_bitmap_sync(void)
|
||||
{
|
||||
RAMBlock *block;
|
||||
|
@ -1285,6 +1302,8 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
|
|||
* offset to point into the middle of a host page
|
||||
* in which case the remainder of the hostpage is sent.
|
||||
* Only dirty target pages are sent.
|
||||
* Note that the host page size may be a huge page for this
|
||||
* block.
|
||||
*
|
||||
* Returns: Number of pages written.
|
||||
*
|
||||
|
@ -1303,6 +1322,8 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
|
|||
ram_addr_t dirty_ram_abs)
|
||||
{
|
||||
int tmppages, pages = 0;
|
||||
size_t pagesize = qemu_ram_pagesize(pss->block);
|
||||
|
||||
do {
|
||||
tmppages = ram_save_target_page(ms, f, pss, last_stage,
|
||||
bytes_transferred, dirty_ram_abs);
|
||||
|
@ -1313,7 +1334,7 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
|
|||
pages += tmppages;
|
||||
pss->offset += TARGET_PAGE_SIZE;
|
||||
dirty_ram_abs += TARGET_PAGE_SIZE;
|
||||
} while (pss->offset & (qemu_host_page_size - 1));
|
||||
} while (pss->offset & (pagesize - 1));
|
||||
|
||||
/* The offset we leave with is the last one we looked at */
|
||||
pss->offset -= TARGET_PAGE_SIZE;
|
||||
|
@ -1655,12 +1676,17 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
|
|||
{
|
||||
unsigned long *bitmap;
|
||||
unsigned long *unsentmap;
|
||||
unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
|
||||
unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
|
||||
unsigned long first = block->offset >> TARGET_PAGE_BITS;
|
||||
unsigned long len = block->used_length >> TARGET_PAGE_BITS;
|
||||
unsigned long last = first + (len - 1);
|
||||
unsigned long run_start;
|
||||
|
||||
if (block->page_size == TARGET_PAGE_SIZE) {
|
||||
/* Easy case - TPS==HPS for a non-huge page RAMBlock */
|
||||
return;
|
||||
}
|
||||
|
||||
bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
|
||||
unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
|
||||
|
||||
|
@ -1764,7 +1790,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
|
|||
* Utility for the outgoing postcopy code.
|
||||
*
|
||||
* Discard any partially sent host-page size chunks, mark any partially
|
||||
* dirty host-page size chunks as all dirty.
|
||||
* dirty host-page size chunks as all dirty. In this case the host-page
|
||||
* is the host-page for the particular RAMBlock, i.e. it might be a huge page
|
||||
*
|
||||
* Returns: 0 on success
|
||||
*/
|
||||
|
@ -1772,11 +1799,6 @@ static int postcopy_chunk_hostpages(MigrationState *ms)
|
|||
{
|
||||
struct RAMBlock *block;
|
||||
|
||||
if (qemu_host_page_size == TARGET_PAGE_SIZE) {
|
||||
/* Easy case - TPS==HPS - nothing to be done */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Easiest way to make sure we don't resume in the middle of a host-page */
|
||||
last_seen_block = NULL;
|
||||
last_sent_block = NULL;
|
||||
|
@ -1832,7 +1854,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Deal with TPS != HPS */
|
||||
/* Deal with TPS != HPS and huge pages */
|
||||
ret = postcopy_chunk_hostpages(ms);
|
||||
if (ret) {
|
||||
rcu_read_unlock();
|
||||
|
@ -1872,6 +1894,8 @@ int ram_discard_range(MigrationIncomingState *mis,
|
|||
{
|
||||
int ret = -1;
|
||||
|
||||
trace_ram_discard_range(block_name, start, length);
|
||||
|
||||
rcu_read_lock();
|
||||
RAMBlock *rb = qemu_ram_block_by_name(block_name);
|
||||
|
||||
|
@ -1881,27 +1905,7 @@ int ram_discard_range(MigrationIncomingState *mis,
|
|||
goto err;
|
||||
}
|
||||
|
||||
uint8_t *host_startaddr = rb->host + start;
|
||||
|
||||
if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
|
||||
error_report("ram_discard_range: Unaligned start address: %p",
|
||||
host_startaddr);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if ((start + length) <= rb->used_length) {
|
||||
uint8_t *host_endaddr = host_startaddr + length;
|
||||
if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
|
||||
error_report("ram_discard_range: Unaligned end address: %p",
|
||||
host_endaddr);
|
||||
goto err;
|
||||
}
|
||||
ret = postcopy_ram_discard_range(mis, host_startaddr, length);
|
||||
} else {
|
||||
error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
|
||||
"/%zx/" RAM_ADDR_FMT")",
|
||||
block_name, start, length, rb->used_length);
|
||||
}
|
||||
ret = ram_block_discard_range(rb, start, length);
|
||||
|
||||
err:
|
||||
rcu_read_unlock();
|
||||
|
@ -2010,6 +2014,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
|
|||
qemu_put_byte(f, strlen(block->idstr));
|
||||
qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
|
||||
qemu_put_be64(f, block->used_length);
|
||||
if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
|
||||
qemu_put_be64(f, block->page_size);
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
@ -2387,7 +2394,7 @@ static int ram_load_postcopy(QEMUFile *f)
|
|||
{
|
||||
int flags = 0, ret = 0;
|
||||
bool place_needed = false;
|
||||
bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
|
||||
bool matching_page_sizes = false;
|
||||
MigrationIncomingState *mis = migration_incoming_get_current();
|
||||
/* Temporary page that is later 'placed' */
|
||||
void *postcopy_host_page = postcopy_get_tmp_page(mis);
|
||||
|
@ -2399,6 +2406,7 @@ static int ram_load_postcopy(QEMUFile *f)
|
|||
void *host = NULL;
|
||||
void *page_buffer = NULL;
|
||||
void *place_source = NULL;
|
||||
RAMBlock *block = NULL;
|
||||
uint8_t ch;
|
||||
|
||||
addr = qemu_get_be64(f);
|
||||
|
@ -2408,7 +2416,7 @@ static int ram_load_postcopy(QEMUFile *f)
|
|||
trace_ram_load_postcopy_loop((uint64_t)addr, flags);
|
||||
place_needed = false;
|
||||
if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
|
||||
RAMBlock *block = ram_block_from_stream(f, flags);
|
||||
block = ram_block_from_stream(f, flags);
|
||||
|
||||
host = host_from_ram_block_offset(block, addr);
|
||||
if (!host) {
|
||||
|
@ -2416,8 +2424,11 @@ static int ram_load_postcopy(QEMUFile *f)
|
|||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
|
||||
/*
|
||||
* Postcopy requires that we place whole host pages atomically.
|
||||
* Postcopy requires that we place whole host pages atomically;
|
||||
* these may be huge pages for RAMBlocks that are backed by
|
||||
* hugetlbfs.
|
||||
* To make it atomic, the data is read into a temporary page
|
||||
* that's moved into place later.
|
||||
* The migration protocol uses, possibly smaller, target-pages
|
||||
|
@ -2425,9 +2436,9 @@ static int ram_load_postcopy(QEMUFile *f)
|
|||
* of a host page in order.
|
||||
*/
|
||||
page_buffer = postcopy_host_page +
|
||||
((uintptr_t)host & ~qemu_host_page_mask);
|
||||
((uintptr_t)host & (block->page_size - 1));
|
||||
/* If all TP are zero then we can optimise the place */
|
||||
if (!((uintptr_t)host & ~qemu_host_page_mask)) {
|
||||
if (!((uintptr_t)host & (block->page_size - 1))) {
|
||||
all_zero = true;
|
||||
} else {
|
||||
/* not the 1st TP within the HP */
|
||||
|
@ -2445,7 +2456,7 @@ static int ram_load_postcopy(QEMUFile *f)
|
|||
* page
|
||||
*/
|
||||
place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
|
||||
~qemu_host_page_mask) == 0;
|
||||
(block->page_size - 1)) == 0;
|
||||
place_source = postcopy_host_page;
|
||||
}
|
||||
last_host = host;
|
||||
|
@ -2483,14 +2494,14 @@ static int ram_load_postcopy(QEMUFile *f)
|
|||
|
||||
if (place_needed) {
|
||||
/* This gets called at the last target page in the host page */
|
||||
void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
|
||||
|
||||
if (all_zero) {
|
||||
ret = postcopy_place_page_zero(mis,
|
||||
host + TARGET_PAGE_SIZE -
|
||||
qemu_host_page_size);
|
||||
ret = postcopy_place_page_zero(mis, place_dest,
|
||||
block->page_size);
|
||||
} else {
|
||||
ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
|
||||
qemu_host_page_size,
|
||||
place_source);
|
||||
ret = postcopy_place_page(mis, place_dest,
|
||||
place_source, block->page_size);
|
||||
}
|
||||
}
|
||||
if (!ret) {
|
||||
|
@ -2511,6 +2522,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
|
|||
* be atomic
|
||||
*/
|
||||
bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
|
||||
/* ADVISE is earlier, it shows the source has the postcopy capability on */
|
||||
bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
|
||||
|
||||
seq_iter++;
|
||||
|
||||
|
@ -2575,6 +2588,18 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
|
|||
error_report_err(local_err);
|
||||
}
|
||||
}
|
||||
/* For postcopy we need to check hugepage sizes match */
|
||||
if (postcopy_advised &&
|
||||
block->page_size != qemu_host_page_size) {
|
||||
uint64_t remote_page_size = qemu_get_be64(f);
|
||||
if (remote_page_size != block->page_size) {
|
||||
error_report("Mismatched RAM page size %s "
|
||||
"(local) %zd != %" PRId64,
|
||||
id, block->page_size,
|
||||
remote_page_size);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
}
|
||||
ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
|
||||
block->idstr);
|
||||
} else {
|
||||
|
|
|
@ -688,6 +688,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
|
|||
|
||||
return -1;
|
||||
}
|
||||
g_free(id);
|
||||
|
||||
se->compat = g_new0(CompatEntry, 1);
|
||||
pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
|
||||
|
@ -869,7 +870,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
|
|||
void qemu_savevm_send_postcopy_advise(QEMUFile *f)
|
||||
{
|
||||
uint64_t tmp[2];
|
||||
tmp[0] = cpu_to_be64(getpagesize());
|
||||
tmp[0] = cpu_to_be64(ram_pagesize_summary());
|
||||
tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
|
||||
|
||||
trace_qemu_savevm_send_postcopy_advise();
|
||||
|
@ -1276,6 +1277,11 @@ done:
|
|||
status = MIGRATION_STATUS_COMPLETED;
|
||||
}
|
||||
migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
|
||||
|
||||
/* f is outer parameter, it should not stay in global migration state after
|
||||
* this function finished */
|
||||
ms->to_dst_file = NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1346,7 +1352,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
|
|||
static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
|
||||
{
|
||||
PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
|
||||
uint64_t remote_hps, remote_tps;
|
||||
uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
|
||||
|
||||
trace_loadvm_postcopy_handle_advise();
|
||||
if (ps != POSTCOPY_INCOMING_NONE) {
|
||||
|
@ -1359,17 +1365,27 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
|
|||
return -1;
|
||||
}
|
||||
|
||||
remote_hps = qemu_get_be64(mis->from_src_file);
|
||||
if (remote_hps != getpagesize()) {
|
||||
remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
|
||||
local_pagesize_summary = ram_pagesize_summary();
|
||||
|
||||
if (remote_pagesize_summary != local_pagesize_summary) {
|
||||
/*
|
||||
* Some combinations of mismatch are probably possible but it gets
|
||||
* a bit more complicated. In particular we need to place whole
|
||||
* host pages on the dest at once, and we need to ensure that we
|
||||
* handle dirtying to make sure we never end up sending part of
|
||||
* a hostpage on it's own.
|
||||
* This detects two potential causes of mismatch:
|
||||
* a) A mismatch in host page sizes
|
||||
* Some combinations of mismatch are probably possible but it gets
|
||||
* a bit more complicated. In particular we need to place whole
|
||||
* host pages on the dest at once, and we need to ensure that we
|
||||
* handle dirtying to make sure we never end up sending part of
|
||||
* a hostpage on it's own.
|
||||
* b) The use of different huge page sizes on source/destination
|
||||
* a more fine grain test is performed during RAM block migration
|
||||
* but this test here causes a nice early clear failure, and
|
||||
* also fails when passed to an older qemu that doesn't
|
||||
* do huge pages.
|
||||
*/
|
||||
error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
|
||||
(int)remote_hps, getpagesize());
|
||||
error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
|
||||
" d=%" PRIx64 ")",
|
||||
remote_pagesize_summary, local_pagesize_summary);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -68,6 +68,7 @@ get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t
|
|||
migration_bitmap_sync_start(void) ""
|
||||
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
|
||||
migration_throttle(void) ""
|
||||
ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx"
|
||||
ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x"
|
||||
ram_postcopy_send_discard_bitmap(void) ""
|
||||
ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: %zx len: %zx"
|
||||
|
@ -176,7 +177,6 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) ""
|
|||
# migration/postcopy-ram.c
|
||||
postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
|
||||
postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx"
|
||||
postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx"
|
||||
postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
|
||||
postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
|
||||
postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
|
||||
|
|
|
@ -52,29 +52,15 @@ static int vmstate_size(void *opaque, VMStateField *field)
|
|||
return size;
|
||||
}
|
||||
|
||||
static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc)
|
||||
static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque)
|
||||
{
|
||||
void *base_addr = opaque + field->offset;
|
||||
|
||||
if (field->flags & VMS_POINTER) {
|
||||
if (alloc && (field->flags & VMS_ALLOC)) {
|
||||
gsize size = 0;
|
||||
if (field->flags & VMS_VBUFFER) {
|
||||
size = vmstate_size(opaque, field);
|
||||
} else {
|
||||
int n_elems = vmstate_n_elems(opaque, field);
|
||||
if (n_elems) {
|
||||
size = n_elems * field->size;
|
||||
}
|
||||
}
|
||||
if (size) {
|
||||
*(void **)base_addr = g_malloc(size);
|
||||
}
|
||||
if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) {
|
||||
gsize size = vmstate_size(opaque, field);
|
||||
size *= vmstate_n_elems(opaque, field);
|
||||
if (size) {
|
||||
*(void **)ptr = g_malloc(size);
|
||||
}
|
||||
base_addr = *(void **)base_addr;
|
||||
}
|
||||
|
||||
return base_addr;
|
||||
}
|
||||
|
||||
int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
|
||||
|
@ -116,21 +102,30 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
|
|||
field->field_exists(opaque, version_id)) ||
|
||||
(!field->field_exists &&
|
||||
field->version_id <= version_id)) {
|
||||
void *base_addr = vmstate_base_addr(opaque, field, true);
|
||||
void *first_elem = opaque + field->offset;
|
||||
int i, n_elems = vmstate_n_elems(opaque, field);
|
||||
int size = vmstate_size(opaque, field);
|
||||
|
||||
vmstate_handle_alloc(first_elem, field, opaque);
|
||||
if (field->flags & VMS_POINTER) {
|
||||
first_elem = *(void **)first_elem;
|
||||
assert(first_elem || !n_elems);
|
||||
}
|
||||
for (i = 0; i < n_elems; i++) {
|
||||
void *addr = base_addr + size * i;
|
||||
void *curr_elem = first_elem + size * i;
|
||||
|
||||
if (field->flags & VMS_ARRAY_OF_POINTER) {
|
||||
addr = *(void **)addr;
|
||||
curr_elem = *(void **)curr_elem;
|
||||
}
|
||||
if (field->flags & VMS_STRUCT) {
|
||||
ret = vmstate_load_state(f, field->vmsd, addr,
|
||||
if (!curr_elem) {
|
||||
/* if null pointer check placeholder and do not follow */
|
||||
assert(field->flags & VMS_ARRAY_OF_POINTER);
|
||||
ret = vmstate_info_nullptr.get(f, curr_elem, size, NULL);
|
||||
} else if (field->flags & VMS_STRUCT) {
|
||||
ret = vmstate_load_state(f, field->vmsd, curr_elem,
|
||||
field->vmsd->version_id);
|
||||
} else {
|
||||
ret = field->info->get(f, addr, size, field);
|
||||
ret = field->info->get(f, curr_elem, size, field);
|
||||
}
|
||||
if (ret >= 0) {
|
||||
ret = qemu_file_get_error(f);
|
||||
|
@ -321,26 +316,34 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
|
|||
while (field->name) {
|
||||
if (!field->field_exists ||
|
||||
field->field_exists(opaque, vmsd->version_id)) {
|
||||
void *base_addr = vmstate_base_addr(opaque, field, false);
|
||||
void *first_elem = opaque + field->offset;
|
||||
int i, n_elems = vmstate_n_elems(opaque, field);
|
||||
int size = vmstate_size(opaque, field);
|
||||
int64_t old_offset, written_bytes;
|
||||
QJSON *vmdesc_loop = vmdesc;
|
||||
|
||||
trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems);
|
||||
if (field->flags & VMS_POINTER) {
|
||||
first_elem = *(void **)first_elem;
|
||||
assert(first_elem || !n_elems);
|
||||
}
|
||||
for (i = 0; i < n_elems; i++) {
|
||||
void *addr = base_addr + size * i;
|
||||
void *curr_elem = first_elem + size * i;
|
||||
|
||||
vmsd_desc_field_start(vmsd, vmdesc_loop, field, i, n_elems);
|
||||
old_offset = qemu_ftell_fast(f);
|
||||
|
||||
if (field->flags & VMS_ARRAY_OF_POINTER) {
|
||||
addr = *(void **)addr;
|
||||
assert(curr_elem);
|
||||
curr_elem = *(void **)curr_elem;
|
||||
}
|
||||
if (field->flags & VMS_STRUCT) {
|
||||
vmstate_save_state(f, field->vmsd, addr, vmdesc_loop);
|
||||
if (!curr_elem) {
|
||||
/* if null pointer write placeholder and do not follow */
|
||||
assert(field->flags & VMS_ARRAY_OF_POINTER);
|
||||
vmstate_info_nullptr.put(f, curr_elem, size, NULL, NULL);
|
||||
} else if (field->flags & VMS_STRUCT) {
|
||||
vmstate_save_state(f, field->vmsd, curr_elem, vmdesc_loop);
|
||||
} else {
|
||||
field->info->put(f, addr, size, field, vmdesc_loop);
|
||||
field->info->put(f, curr_elem, size, field, vmdesc_loop);
|
||||
}
|
||||
|
||||
written_bytes = qemu_ftell_fast(f) - old_offset;
|
||||
|
@ -752,6 +755,34 @@ const VMStateInfo vmstate_info_uint64 = {
|
|||
.put = put_uint64,
|
||||
};
|
||||
|
||||
static int get_nullptr(QEMUFile *f, void *pv, size_t size, VMStateField *field)
|
||||
|
||||
{
|
||||
if (qemu_get_byte(f) == VMS_NULLPTR_MARKER) {
|
||||
return 0;
|
||||
}
|
||||
error_report("vmstate: get_nullptr expected VMS_NULLPTR_MARKER");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int put_nullptr(QEMUFile *f, void *pv, size_t size,
|
||||
VMStateField *field, QJSON *vmdesc)
|
||||
|
||||
{
|
||||
if (pv == NULL) {
|
||||
qemu_put_byte(f, VMS_NULLPTR_MARKER);
|
||||
return 0;
|
||||
}
|
||||
error_report("vmstate: put_nullptr must be called with pv == NULL");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
const VMStateInfo vmstate_info_nullptr = {
|
||||
.name = "uint64",
|
||||
.get = get_nullptr,
|
||||
.put = put_nullptr,
|
||||
};
|
||||
|
||||
/* 64 bit unsigned int. See that the received value is the same than the one
|
||||
in the field */
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue