mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-12-11 16:00:50 -07:00
tcg: remove tb_lock
Use mmap_lock in user-mode to protect TCG state and the page descriptors.
In !user-mode, each vCPU has its own TCG state, so no locks needed.
Per-page locks are used to protect the page descriptors.
Per-TB locks are used in both modes to protect TB jumps.
Some notes:
- tb_lock is removed from notdirty_mem_write by passing a
locked page_collection to tb_invalidate_phys_page_fast.
- tcg_tb_lookup/remove/insert/etc have their own internal lock(s),
so there is no need to further serialize access to them.
- do_tb_flush is run in a safe async context, meaning no other
vCPU threads are running. Therefore acquiring mmap_lock there
is just to please tools such as thread sanitizer.
- Not visible in the diff, but tb_invalidate_phys_page already
has an assert_memory_lock.
- cpu_io_recompile is !user-only, so no mmap_lock there.
- Added mmap_unlock()'s before all siglongjmp's that could
be called in user-mode while mmap_lock is held.
+ Added an assert for !have_mmap_lock() after returning from
the longjmp in cpu_exec, just like we do in cpu_exec_step_atomic.
Performance numbers before/after:
Host: AMD Opteron(tm) Processor 6376
ubuntu 17.04 ppc64 bootup+shutdown time
700 +-+--+----+------+------------+-----------+------------*--+-+
| + + + + + *B |
| before ***B*** ** * |
|tb lock removal ###D### *** |
600 +-+ *** +-+
| ** # |
| *B* #D |
| *** * ## |
500 +-+ *** ### +-+
| * *** ### |
| *B* # ## |
| ** * #D# |
400 +-+ ** ## +-+
| ** ### |
| ** ## |
| ** # ## |
300 +-+ * B* #D# +-+
| B *** ### |
| * ** #### |
| * *** ### |
200 +-+ B *B #D# +-+
| #B* * ## # |
| #* ## |
| + D##D# + + + + |
100 +-+--+----+------+------------+-----------+------------+--+-+
1 8 16 Guest CPUs 48 64
png: https://imgur.com/HwmBHXe
debian jessie aarch64 bootup+shutdown time
90 +-+--+-----+-----+------------+------------+------------+--+-+
| + + + + + + |
| before ***B*** B |
80 +tb lock removal ###D### **D +-+
| **### |
| **## |
70 +-+ ** # +-+
| ** ## |
| ** # |
60 +-+ *B ## +-+
| ** ## |
| *** #D |
50 +-+ *** ## +-+
| * ** ### |
| **B* ### |
40 +-+ **** # ## +-+
| **** #D# |
| ***B** ### |
30 +-+ B***B** #### +-+
| B * * # ### |
| B ###D# |
20 +-+ D ##D## +-+
| D# |
| + + + + + + |
10 +-+--+-----+-----+------------+------------+------------+--+-+
1 8 16 Guest CPUs 48 64
png: https://imgur.com/iGpGFtv
The gains are high for 4-8 CPUs. Beyond that point, however, unrelated
lock contention significantly hurts scalability.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
705ad1ff0c
commit
0ac20318ce
11 changed files with 75 additions and 152 deletions
26
exec.c
26
exec.c
|
|
@ -1031,9 +1031,7 @@ const char *parse_cpu_model(const char *cpu_model)
|
|||
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
|
||||
{
|
||||
mmap_lock();
|
||||
tb_lock();
|
||||
tb_invalidate_phys_page_range(pc, pc + 1, 0);
|
||||
tb_unlock();
|
||||
mmap_unlock();
|
||||
}
|
||||
#else
|
||||
|
|
@ -2644,21 +2642,21 @@ void memory_notdirty_write_prepare(NotDirtyInfo *ndi,
|
|||
ndi->ram_addr = ram_addr;
|
||||
ndi->mem_vaddr = mem_vaddr;
|
||||
ndi->size = size;
|
||||
ndi->locked = false;
|
||||
ndi->pages = NULL;
|
||||
|
||||
assert(tcg_enabled());
|
||||
if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
|
||||
ndi->locked = true;
|
||||
tb_lock();
|
||||
tb_invalidate_phys_page_fast(ram_addr, size);
|
||||
ndi->pages = page_collection_lock(ram_addr, ram_addr + size);
|
||||
tb_invalidate_phys_page_fast(ndi->pages, ram_addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
/* Called within RCU critical section. */
|
||||
void memory_notdirty_write_complete(NotDirtyInfo *ndi)
|
||||
{
|
||||
if (ndi->locked) {
|
||||
tb_unlock();
|
||||
if (ndi->pages) {
|
||||
page_collection_unlock(ndi->pages);
|
||||
ndi->pages = NULL;
|
||||
}
|
||||
|
||||
/* Set both VGA and migration bits for simplicity and to remove
|
||||
|
|
@ -2745,18 +2743,16 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
|
|||
}
|
||||
cpu->watchpoint_hit = wp;
|
||||
|
||||
/* Both tb_lock and iothread_mutex will be reset when
|
||||
* cpu_loop_exit or cpu_loop_exit_noexc longjmp
|
||||
* back into the cpu_exec main loop.
|
||||
*/
|
||||
tb_lock();
|
||||
mmap_lock();
|
||||
tb_check_watchpoint(cpu);
|
||||
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
|
||||
cpu->exception_index = EXCP_DEBUG;
|
||||
mmap_unlock();
|
||||
cpu_loop_exit(cpu);
|
||||
} else {
|
||||
/* Force execution of one insn next time. */
|
||||
cpu->cflags_next_tb = 1 | curr_cflags();
|
||||
mmap_unlock();
|
||||
cpu_loop_exit_noexc(cpu);
|
||||
}
|
||||
}
|
||||
|
|
@ -3147,9 +3143,9 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
|
|||
}
|
||||
if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
|
||||
assert(tcg_enabled());
|
||||
tb_lock();
|
||||
mmap_lock();
|
||||
tb_invalidate_phys_range(addr, addr + length);
|
||||
tb_unlock();
|
||||
mmap_unlock();
|
||||
dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
|
||||
}
|
||||
cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue