mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-02 23:33:54 -06:00
migration: Postcopy recover with preempt enabled
To allow postcopy recovery, the ram fast load (preempt-only) dest QEMU thread needs similar handling on fault tolerance. When ram_load_postcopy() fails, instead of stopping the thread it halts with a semaphore, preparing to be kicked again when recovery is detected. A mutex is introduced to make sure there's no concurrent operation upon the socket. To make it simple, the fast ram load thread will take the mutex during its whole procedure, and only release it if it's paused. The fast-path socket will be properly released by the main loading thread safely when there's network failures during postcopy with that mutex held. Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Signed-off-by: Peter Xu <peterx@redhat.com> Message-Id: <20220707185506.27257-1-peterx@redhat.com> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
This commit is contained in:
parent
c01b16edf6
commit
60bb3c5871
7 changed files with 119 additions and 8 deletions
|
@ -215,9 +215,11 @@ void migration_object_init(void)
|
|||
current_incoming->postcopy_remote_fds =
|
||||
g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
|
||||
qemu_mutex_init(¤t_incoming->rp_mutex);
|
||||
qemu_mutex_init(¤t_incoming->postcopy_prio_thread_mutex);
|
||||
qemu_event_init(¤t_incoming->main_thread_load_event, false);
|
||||
qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0);
|
||||
qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0);
|
||||
qemu_sem_init(¤t_incoming->postcopy_pause_sem_fast_load, 0);
|
||||
qemu_mutex_init(¤t_incoming->page_request_mutex);
|
||||
current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
|
||||
|
||||
|
@ -697,9 +699,9 @@ static bool postcopy_try_recover(void)
|
|||
|
||||
/*
|
||||
* Here, we only wake up the main loading thread (while the
|
||||
* fault thread will still be waiting), so that we can receive
|
||||
* rest threads will still be waiting), so that we can receive
|
||||
* commands from source now, and answer it if needed. The
|
||||
* fault thread will be woken up afterwards until we are sure
|
||||
* rest threads will be woken up afterwards until we are sure
|
||||
* that source is ready to reply to page requests.
|
||||
*/
|
||||
qemu_sem_post(&mis->postcopy_pause_sem_dst);
|
||||
|
@ -3503,6 +3505,18 @@ static MigThrError postcopy_pause(MigrationState *s)
|
|||
qemu_file_shutdown(file);
|
||||
qemu_fclose(file);
|
||||
|
||||
/*
|
||||
* Do the same to postcopy fast path socket too if there is. No
|
||||
* locking needed because no racer as long as we do this before setting
|
||||
* status to paused.
|
||||
*/
|
||||
if (s->postcopy_qemufile_src) {
|
||||
migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
|
||||
qemu_file_shutdown(s->postcopy_qemufile_src);
|
||||
qemu_fclose(s->postcopy_qemufile_src);
|
||||
s->postcopy_qemufile_src = NULL;
|
||||
}
|
||||
|
||||
migrate_set_state(&s->state, s->state,
|
||||
MIGRATION_STATUS_POSTCOPY_PAUSED);
|
||||
|
||||
|
@ -3558,8 +3572,13 @@ static MigThrError migration_detect_error(MigrationState *s)
|
|||
return MIG_THR_ERR_FATAL;
|
||||
}
|
||||
|
||||
/* Try to detect any file errors */
|
||||
ret = qemu_file_get_error_obj(s->to_dst_file, &local_error);
|
||||
/*
|
||||
* Try to detect any file errors. Note that postcopy_qemufile_src will
|
||||
* be NULL when postcopy preempt is not enabled.
|
||||
*/
|
||||
ret = qemu_file_get_error_obj_any(s->to_dst_file,
|
||||
s->postcopy_qemufile_src,
|
||||
&local_error);
|
||||
if (!ret) {
|
||||
/* Everything is fine */
|
||||
assert(!local_error);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue