mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-05 00:33:55 -06:00
translate-all: protect TB jumps with a per-destination-TB lock
This applies to both user-mode and !user-mode emulation. Instead of relying on a global lock, protect the list of incoming jumps with tb->jmp_lock. This lock also protects tb->cflags, so update all tb->cflags readers outside tb->jmp_lock to use atomic reads via tb_cflags(). In order to find the destination TB (and therefore its jmp_lock) from the origin TB, we introduce tb->jmp_dest[]. I considered not using a linked list of jumps, which simplifies code and makes the struct smaller. However, it unnecessarily increases memory usage, which results in a performance decrease. See for instance these numbers booting+shutting down debian-arm: Time (s) Rel. err (%) Abs. err (s) Rel. slowdown (%) ------------------------------------------------------------------------------ before 20.88 0.74 0.154512 0. after 20.81 0.38 0.079078 -0.33524904 GTree 21.02 0.28 0.058856 0.67049808 GHashTable + xxhash 21.63 1.08 0.233604 3.5919540 Using a hash table or a binary tree to keep track of the jumps doesn't really pay off, not only due to the increased memory usage, but also because most TBs have only 0 or 1 jumps to them. The maximum number of jumps when booting debian-arm that I measured is 35, but as we can see in the histogram below a TB with that many incoming jumps is extremely rare; the average TB has 0.80 incoming jumps. n_jumps: 379208; avg jumps/tb: 0.801099 dist: [0.0,1.0)|▄█▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁ ▁▁▁ ▁▁▁ ▁|[34.0,35.0] Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Emilio G. Cota <cota@braap.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
95590e24af
commit
194125e3eb
4 changed files with 125 additions and 77 deletions
|
@ -345,7 +345,7 @@ struct TranslationBlock {
|
|||
#define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */
|
||||
#define CF_NOCACHE 0x00010000 /* To be freed after execution */
|
||||
#define CF_USE_ICOUNT 0x00020000
|
||||
#define CF_INVALID 0x00040000 /* TB is stale. Setters need tb_lock */
|
||||
#define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */
|
||||
#define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */
|
||||
/* cflags' mask for hashing/comparison */
|
||||
#define CF_HASH_MASK \
|
||||
|
@ -364,6 +364,9 @@ struct TranslationBlock {
|
|||
uintptr_t page_next[2];
|
||||
tb_page_addr_t page_addr[2];
|
||||
|
||||
/* jmp_lock placed here to fill a 4-byte hole. Its documentation is below */
|
||||
QemuSpin jmp_lock;
|
||||
|
||||
/* The following data are used to directly call another TB from
|
||||
* the code of this one. This can be done either by emitting direct or
|
||||
* indirect native jump instructions. These jumps are reset so that the TB
|
||||
|
@ -375,20 +378,26 @@ struct TranslationBlock {
|
|||
#define TB_JMP_RESET_OFFSET_INVALID 0xffff /* indicates no jump generated */
|
||||
uintptr_t jmp_target_arg[2]; /* target address or offset */
|
||||
|
||||
/* Each TB has an associated circular list of TBs jumping to this one.
|
||||
* jmp_list_first points to the first TB jumping to this one.
|
||||
* jmp_list_next is used to point to the next TB in a list.
|
||||
* Since each TB can have two jumps, it can participate in two lists.
|
||||
* jmp_list_first and jmp_list_next are 4-byte aligned pointers to a
|
||||
* TranslationBlock structure, but the two least significant bits of
|
||||
* them are used to encode which data field of the pointed TB should
|
||||
* be used to traverse the list further from that TB:
|
||||
* 0 => jmp_list_next[0], 1 => jmp_list_next[1], 2 => jmp_list_first.
|
||||
* In other words, 0/1 tells which jump is used in the pointed TB,
|
||||
* and 2 means that this is a pointer back to the target TB of this list.
|
||||
/*
|
||||
* Each TB has a NULL-terminated list (jmp_list_head) of incoming jumps.
|
||||
* Each TB can have two outgoing jumps, and therefore can participate
|
||||
* in two lists. The list entries are kept in jmp_list_next[2]. The least
|
||||
* significant bit (LSB) of the pointers in these lists is used to encode
|
||||
* which of the two list entries is to be used in the pointed TB.
|
||||
*
|
||||
* List traversals are protected by jmp_lock. The destination TB of each
|
||||
* outgoing jump is kept in jmp_dest[] so that the appropriate jmp_lock
|
||||
* can be acquired from any origin TB.
|
||||
*
|
||||
* jmp_dest[] are tagged pointers as well. The LSB is set when the TB is
|
||||
* being invalidated, so that no further outgoing jumps from it can be set.
|
||||
*
|
||||
* jmp_lock also protects the CF_INVALID cflag; a jump must not be chained
|
||||
* to a destination TB that has CF_INVALID set.
|
||||
*/
|
||||
uintptr_t jmp_list_head;
|
||||
uintptr_t jmp_list_next[2];
|
||||
uintptr_t jmp_list_first;
|
||||
uintptr_t jmp_dest[2];
|
||||
};
|
||||
|
||||
extern bool parallel_cpus;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue