TCG patch queue

-----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJZ8FmqAAoJEGTfOOivfiFf/78IALolAxDqnbfN5moh76OEy7++
 somg/CahMYl3rIR93bN8QMrNn72evPxdr9OVAjTXy/QTDbK8WDZ6xQ0yzhiNaD5+
 swYuhffcAq4djw6kVkuGB0fDpjF6tRvVP955JYsUp49u06uqKiWYTbwCSAlHKfvP
 yIIn/yOgDwaLFs10fTo+WrxEuSpRKxOGrrYIX3h+zX+cdlOifPAG8SxxKSJKL6OG
 wcKKQjLFpNmRbhqaoUMqD5Q5LebCvdl7Z0HSUakAgp8NVqART7Ix5BzweCP8GL5z
 9qO8Phrgeu9Uz0dTxC+7WTrYDrWvxWmxlbOIy79fVUIt2Z5kHNj7SEWj60cDM8Q=
 =PYec
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20171025' into staging

TCG patch queue

# gpg: Signature made Wed 25 Oct 2017 10:30:18 BST
# gpg:                using RSA key 0x64DF38E8AF7E215F
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>"
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth/tags/pull-tcg-20171025: (51 commits)
  translate-all: exit from tb_phys_invalidate if qht_remove fails
  tcg: Initialize cpu_env generically
  tcg: enable multiple TCG contexts in softmmu
  tcg: introduce regions to split code_gen_buffer
  translate-all: use qemu_protect_rwx/none helpers
  osdep: introduce qemu_mprotect_rwx/none
  tcg: allocate optimizer temps with tcg_malloc
  tcg: distribute profiling counters across TCGContext's
  tcg: introduce **tcg_ctxs to keep track of all TCGContext's
  gen-icount: fold exitreq_label into TCGContext
  tcg: define tcg_init_ctx and make tcg_ctx a pointer
  tcg: take tb_ctx out of TCGContext
  translate-all: report correct avg host TB size
  exec-all: rename tb_free to tb_remove
  translate-all: use a binary search tree to track TBs in TBContext
  tcg: Remove CF_IGNORE_ICOUNT
  tcg: Add CF_LAST_IO + CF_USE_ICOUNT to CF_HASH_MASK
  cpu-exec: lookup/generate TB outside exclusive region during step_atomic
  tcg: check CF_PARALLEL instead of parallel_cpus
  target/sparc: check CF_PARALLEL instead of parallel_cpus
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2017-10-25 16:38:57 +01:00
commit ae49fbbcd8
80 changed files with 2292 additions and 1820 deletions

View file

@ -22,6 +22,7 @@
#include "qemu-common.h"
#include "exec/tb-context.h"
#include "sysemu/cpus.h"
/* allow to see translation results - the slowdown should be negligible, so we leave it */
#define DEBUG_DISAS
@ -305,10 +306,14 @@ static inline void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr)
/*
* Translation Cache-related fields of a TB.
* This struct exists just for convenience; we keep track of TB's in a binary
* search tree, and the only fields needed to compare TB's in the tree are
* @ptr and @size.
* Note: the address of search data can be obtained by adding @size to @ptr.
*/
struct tb_tc {
void *ptr; /* pointer to the translated code */
uint8_t *search; /* pointer to search data */
size_t size;
};
struct TranslationBlock {
@ -319,12 +324,15 @@ struct TranslationBlock {
size <= TARGET_PAGE_SIZE) */
uint16_t icount;
uint32_t cflags; /* compile flags */
#define CF_COUNT_MASK 0x7fff
#define CF_LAST_IO 0x8000 /* Last insn may be an IO access. */
#define CF_NOCACHE 0x10000 /* To be freed after execution */
#define CF_USE_ICOUNT 0x20000
#define CF_IGNORE_ICOUNT 0x40000 /* Do not generate icount code */
#define CF_INVALID 0x80000 /* TB is stale. Setters must acquire tb_lock */
#define CF_COUNT_MASK 0x00007fff
#define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */
#define CF_NOCACHE 0x00010000 /* To be freed after execution */
#define CF_USE_ICOUNT 0x00020000
#define CF_INVALID 0x00040000 /* TB is stale. Setters need tb_lock */
#define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */
/* cflags' mask for hashing/comparison */
#define CF_HASH_MASK \
(CF_COUNT_MASK | CF_LAST_IO | CF_USE_ICOUNT | CF_PARALLEL)
/* Per-vCPU dynamic tracing state used to generate this TB */
uint32_t trace_vcpu_dstate;
@ -365,11 +373,27 @@ struct TranslationBlock {
uintptr_t jmp_list_first;
};
void tb_free(TranslationBlock *tb);
extern bool parallel_cpus;
/* Hide the atomic_read to make code a little easier on the eyes */
static inline uint32_t tb_cflags(const TranslationBlock *tb)
{
return atomic_read(&tb->cflags);
}
/* current cflags for hashing/comparison */
static inline uint32_t curr_cflags(void)
{
return (parallel_cpus ? CF_PARALLEL : 0)
| (use_icount ? CF_USE_ICOUNT : 0);
}
void tb_remove(TranslationBlock *tb);
void tb_flush(CPUState *cpu);
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
target_ulong cs_base, uint32_t flags);
target_ulong cs_base, uint32_t flags,
uint32_t cf_mask);
void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
/* GETPC is the true target of the return instruction that we'll execute. */

View file

@ -6,23 +6,22 @@
/* Helpers for instruction counting code generation. */
static int icount_start_insn_idx;
static TCGLabel *exitreq_label;
static inline void gen_tb_start(TranslationBlock *tb)
{
TCGv_i32 count, imm;
exitreq_label = gen_new_label();
if (tb->cflags & CF_USE_ICOUNT) {
tcg_ctx->exitreq_label = gen_new_label();
if (tb_cflags(tb) & CF_USE_ICOUNT) {
count = tcg_temp_local_new_i32();
} else {
count = tcg_temp_new_i32();
}
tcg_gen_ld_i32(count, tcg_ctx.tcg_env,
tcg_gen_ld_i32(count, cpu_env,
-ENV_OFFSET + offsetof(CPUState, icount_decr.u32));
if (tb->cflags & CF_USE_ICOUNT) {
if (tb_cflags(tb) & CF_USE_ICOUNT) {
imm = tcg_temp_new_i32();
/* We emit a movi with a dummy immediate argument. Keep the insn index
* of the movi so that we later (when we know the actual insn count)
@ -34,10 +33,10 @@ static inline void gen_tb_start(TranslationBlock *tb)
tcg_temp_free_i32(imm);
}
tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, exitreq_label);
tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label);
if (tb->cflags & CF_USE_ICOUNT) {
tcg_gen_st16_i32(count, tcg_ctx.tcg_env,
if (tb_cflags(tb) & CF_USE_ICOUNT) {
tcg_gen_st16_i32(count, cpu_env,
-ENV_OFFSET + offsetof(CPUState, icount_decr.u16.low));
}
@ -46,32 +45,30 @@ static inline void gen_tb_start(TranslationBlock *tb)
static inline void gen_tb_end(TranslationBlock *tb, int num_insns)
{
if (tb->cflags & CF_USE_ICOUNT) {
if (tb_cflags(tb) & CF_USE_ICOUNT) {
/* Update the num_insn immediate parameter now that we know
* the actual insn count. */
tcg_set_insn_param(icount_start_insn_idx, 1, num_insns);
}
gen_set_label(exitreq_label);
gen_set_label(tcg_ctx->exitreq_label);
tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_REQUESTED);
/* Terminate the linked list. */
tcg_ctx.gen_op_buf[tcg_ctx.gen_op_buf[0].prev].next = 0;
tcg_ctx->gen_op_buf[tcg_ctx->gen_op_buf[0].prev].next = 0;
}
static inline void gen_io_start(void)
{
TCGv_i32 tmp = tcg_const_i32(1);
tcg_gen_st_i32(tmp, tcg_ctx.tcg_env,
-ENV_OFFSET + offsetof(CPUState, can_do_io));
tcg_gen_st_i32(tmp, cpu_env, -ENV_OFFSET + offsetof(CPUState, can_do_io));
tcg_temp_free_i32(tmp);
}
static inline void gen_io_end(void)
{
TCGv_i32 tmp = tcg_const_i32(0);
tcg_gen_st_i32(tmp, tcg_ctx.tcg_env,
-ENV_OFFSET + offsetof(CPUState, can_do_io));
tcg_gen_st_i32(tmp, cpu_env, -ENV_OFFSET + offsetof(CPUState, can_do_io));
tcg_temp_free_i32(tmp);
}

View file

@ -9,31 +9,31 @@
#define DEF_HELPER_FLAGS_0(name, flags, ret) \
static inline void glue(gen_helper_, name)(dh_retvar_decl0(ret)) \
{ \
tcg_gen_callN(&tcg_ctx, HELPER(name), dh_retvar(ret), 0, NULL); \
tcg_gen_callN(HELPER(name), dh_retvar(ret), 0, NULL); \
}
#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \
dh_arg_decl(t1, 1)) \
{ \
TCGArg args[1] = { dh_arg(t1, 1) }; \
tcg_gen_callN(&tcg_ctx, HELPER(name), dh_retvar(ret), 1, args); \
TCGTemp *args[1] = { dh_arg(t1, 1) }; \
tcg_gen_callN(HELPER(name), dh_retvar(ret), 1, args); \
}
#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \
dh_arg_decl(t1, 1), dh_arg_decl(t2, 2)) \
{ \
TCGArg args[2] = { dh_arg(t1, 1), dh_arg(t2, 2) }; \
tcg_gen_callN(&tcg_ctx, HELPER(name), dh_retvar(ret), 2, args); \
TCGTemp *args[2] = { dh_arg(t1, 1), dh_arg(t2, 2) }; \
tcg_gen_callN(HELPER(name), dh_retvar(ret), 2, args); \
}
#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \
static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \
dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3)) \
{ \
TCGArg args[3] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3) }; \
tcg_gen_callN(&tcg_ctx, HELPER(name), dh_retvar(ret), 3, args); \
TCGTemp *args[3] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3) }; \
tcg_gen_callN(HELPER(name), dh_retvar(ret), 3, args); \
}
#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \
@ -41,9 +41,9 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \
dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), \
dh_arg_decl(t3, 3), dh_arg_decl(t4, 4)) \
{ \
TCGArg args[4] = { dh_arg(t1, 1), dh_arg(t2, 2), \
TCGTemp *args[4] = { dh_arg(t1, 1), dh_arg(t2, 2), \
dh_arg(t3, 3), dh_arg(t4, 4) }; \
tcg_gen_callN(&tcg_ctx, HELPER(name), dh_retvar(ret), 4, args); \
tcg_gen_callN(HELPER(name), dh_retvar(ret), 4, args); \
}
#define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5) \
@ -51,9 +51,9 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \
dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), \
dh_arg_decl(t4, 4), dh_arg_decl(t5, 5)) \
{ \
TCGArg args[5] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \
TCGTemp *args[5] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \
dh_arg(t4, 4), dh_arg(t5, 5) }; \
tcg_gen_callN(&tcg_ctx, HELPER(name), dh_retvar(ret), 5, args); \
tcg_gen_callN(HELPER(name), dh_retvar(ret), 5, args); \
}
#include "helper.h"

View file

@ -20,10 +20,6 @@
#define HELPER(name) glue(helper_, name)
#define GET_TCGV_i32 GET_TCGV_I32
#define GET_TCGV_i64 GET_TCGV_I64
#define GET_TCGV_ptr GET_TCGV_PTR
/* Some types that make sense in C, but not for TCG. */
#define dh_alias_i32 i32
#define dh_alias_s32 i32
@ -78,11 +74,11 @@
#define dh_retvar_decl_ptr TCGv_ptr retval,
#define dh_retvar_decl(t) glue(dh_retvar_decl_, dh_alias(t))
#define dh_retvar_void TCG_CALL_DUMMY_ARG
#define dh_retvar_noreturn TCG_CALL_DUMMY_ARG
#define dh_retvar_i32 GET_TCGV_i32(retval)
#define dh_retvar_i64 GET_TCGV_i64(retval)
#define dh_retvar_ptr GET_TCGV_ptr(retval)
#define dh_retvar_void NULL
#define dh_retvar_noreturn NULL
#define dh_retvar_i32 tcgv_i32_temp(retval)
#define dh_retvar_i64 tcgv_i64_temp(retval)
#define dh_retvar_ptr tcgv_ptr_temp(retval)
#define dh_retvar(t) glue(dh_retvar_, dh_alias(t))
#define dh_is_64bit_void 0
@ -113,7 +109,7 @@
((dh_is_64bit(t) << (n*2)) | (dh_is_signed(t) << (n*2+1)))
#define dh_arg(t, n) \
glue(GET_TCGV_, dh_alias(t))(glue(arg, n))
glue(glue(tcgv_, dh_alias(t)), _temp)(glue(arg, n))
#define dh_arg_decl(t, n) glue(TCGv_, dh_alias(t)) glue(arg, n)

View file

@ -31,10 +31,8 @@ typedef struct TBContext TBContext;
struct TBContext {
TranslationBlock **tbs;
GTree *tb_tree;
struct qht htable;
size_t tbs_size;
int nb_tbs;
/* any access to the tbs or the page table must use this lock */
QemuMutex tb_lock;
@ -43,4 +41,6 @@ struct TBContext {
int tb_phys_invalidate_count;
};
extern TBContext tb_ctx;
#endif

View file

@ -48,8 +48,8 @@
* xxhash32, customized for input variables that are not guaranteed to be
* contiguous in memory.
*/
static inline
uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f)
static inline uint32_t
tb_hash_func7(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f, uint32_t g)
{
uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2;
uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2;
@ -78,7 +78,7 @@ uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f)
v4 *= PRIME32_1;
h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
h32 += 24;
h32 += 28;
h32 += e * PRIME32_3;
h32 = rol32(h32, 17) * PRIME32_4;
@ -86,6 +86,9 @@ uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f)
h32 += f * PRIME32_3;
h32 = rol32(h32, 17) * PRIME32_4;
h32 += g * PRIME32_3;
h32 = rol32(h32, 17) * PRIME32_4;
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;

View file

@ -59,9 +59,9 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
static inline
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
uint32_t trace_vcpu_dstate)
uint32_t cf_mask, uint32_t trace_vcpu_dstate)
{
return tb_hash_func6(phys_pc, pc, flags, trace_vcpu_dstate);
return tb_hash_func7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
}
#endif

View file

@ -21,7 +21,7 @@
/* Might cause an exception, so have a longjmp destination ready */
static inline TranslationBlock *
tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base,
uint32_t *flags)
uint32_t *flags, uint32_t cf_mask)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
@ -35,10 +35,10 @@ tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base,
tb->cs_base == *cs_base &&
tb->flags == *flags &&
tb->trace_vcpu_dstate == *cpu->trace_dstate &&
!(atomic_read(&tb->cflags) & CF_INVALID))) {
(tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) {
return tb;
}
tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags);
tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask);
if (tb == NULL) {
return NULL;
}