TCI build fix and cleanup

Streamline tb_lookup
 Fixes for tcg/aarch64
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmBD9XYdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV9jqwf/cIr0aafPuLyMwAFB
 WCL2+S7oZT8n9dXTwel9MTjnLg4tApvHcxwdNMeCnWIvJ2wtLg4NHsOv+mitWTpD
 D1AMgVRvEaLYFFMa582ewopLee1Yp0vTWYtVwXIRMFW2qJv4b6h/cEJae3DjbFLs
 TlU6XBlPjNLjWECCO+cxVUJRojLpf0WuJrn7REALYdiAAK/+X7g3wwfxc4VP0D6a
 NO54gLH2XSVDosWn0vJu5czDGTA3ZD7mLRuFscyVsK7KKx2xgZq2WzcMATeKo5Qy
 qn/E2bYn2nMEv78ptt3h06sSwGs0W41a68Y7uqWIkdfI1aGeIBzFn2rmXoXaaAfd
 IrJnLQ==
 =f77X
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210306' into staging

TCI build fix and cleanup
Streamline tb_lookup
Fixes for tcg/aarch64

# gpg: Signature made Sat 06 Mar 2021 21:34:46 GMT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth-gitlab/tags/pull-tcg-20210306: (27 commits)
  accel/tcg: Precompute curr_cflags into cpu->tcg_cflags
  include/exec: lightly re-arrange TranslationBlock
  accel/tcg: drop the use of CF_HASH_MASK and rename params
  accel/tcg: move CF_CLUSTER calculation to curr_cflags
  accel/tcg: rename tb_lookup__cpu_state and hoist state extraction
  tcg/tci: Merge mov, not and neg operations
  tcg/tci: Merge bswap operations
  tcg/tci: Merge extension operations
  tcg/tci: Merge basic arithmetic operations
  tcg/tci: Reduce use of tci_read_r64
  tcg/tci: Remove tci_read_r32s
  tcg/tci: Remove tci_read_r32
  tcg/tci: Remove tci_read_r16s
  tcg/tci: Remove tci_read_r16
  tcg/tci: Remove tci_read_r8s
  tcg/tci: Remove tci_read_r8
  tcg/tci: Merge identical cases in generation (load/store opcodes)
  tcg/tci: Merge identical cases in generation (conditional opcodes)
  tcg/tci: Merge identical cases in generation (deposit opcode)
  tcg/tci: Merge identical cases in generation (exchange opcodes)
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2021-03-08 20:07:37 +00:00
commit 74fd46ed44
18 changed files with 529 additions and 610 deletions

View file

@ -245,11 +245,11 @@ static void cpu_exec_exit(CPUState *cpu)
void cpu_exec_step_atomic(CPUState *cpu)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;
uint32_t cflags = 1;
uint32_t cf_mask = cflags & CF_HASH_MASK;
uint32_t cflags = (curr_cflags(cpu) & ~CF_PARALLEL) | 1;
int tb_exit;
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
@ -258,15 +258,15 @@ void cpu_exec_step_atomic(CPUState *cpu)
g_assert(!cpu->running);
cpu->running = true;
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
if (tb == NULL) {
mmap_lock();
tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
mmap_unlock();
}
/* Since we got here, we know that parallel_cpus must be true. */
parallel_cpus = false;
cpu_exec_enter(cpu);
/* execute the generated code */
trace_exec_tb(tb, pc);
@ -294,7 +294,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
* the execution.
*/
g_assert(cpu_in_exclusive_context(cpu));
parallel_cpus = true;
cpu->running = false;
end_exclusive();
}
@ -305,7 +304,7 @@ struct tb_desc {
CPUArchState *env;
tb_page_addr_t phys_page1;
uint32_t flags;
uint32_t cf_mask;
uint32_t cflags;
uint32_t trace_vcpu_dstate;
};
@ -319,7 +318,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
tb->cs_base == desc->cs_base &&
tb->flags == desc->flags &&
tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
(tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == desc->cf_mask) {
tb_cflags(tb) == desc->cflags) {
/* check next page if needed */
if (tb->page_addr[1] == -1) {
return true;
@ -339,7 +338,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
target_ulong cs_base, uint32_t flags,
uint32_t cf_mask)
uint32_t cflags)
{
tb_page_addr_t phys_pc;
struct tb_desc desc;
@ -348,7 +347,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
desc.env = (CPUArchState *)cpu->env_ptr;
desc.cs_base = cs_base;
desc.flags = flags;
desc.cf_mask = cf_mask;
desc.cflags = cflags;
desc.trace_vcpu_dstate = *cpu->trace_dstate;
desc.pc = pc;
phys_pc = get_page_addr_code(desc.env, pc);
@ -356,7 +355,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
return NULL;
}
desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
h = tb_hash_func(phys_pc, pc, flags, cf_mask, *cpu->trace_dstate);
h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
}
@ -416,16 +415,19 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
static inline TranslationBlock *tb_find(CPUState *cpu,
TranslationBlock *last_tb,
int tb_exit, uint32_t cf_mask)
int tb_exit, uint32_t cflags)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
if (tb == NULL) {
mmap_lock();
tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask);
tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
mmap_unlock();
/* We add the TB in the virtual pc hash table for the fast lookup */
qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
@ -491,7 +493,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
if (replay_has_exception()
&& cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
/* Execute just one insn to trigger exception pending in the log */
cpu->cflags_next_tb = (curr_cflags() & ~CF_USE_ICOUNT) | 1;
cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) | 1;
}
#endif
return false;
@ -788,7 +790,7 @@ int cpu_exec(CPUState *cpu)
have CF_INVALID set, -1 is a convenient invalid value that
does not require tcg headers for cpu_common_reset. */
if (cflags == -1) {
cflags = curr_cflags();
cflags = curr_cflags(cpu);
} else {
cpu->cflags_next_tb = -1;
}

View file

@ -114,8 +114,7 @@ void mttcg_start_vcpu_thread(CPUState *cpu)
char thread_name[VCPU_THREAD_NAME_SIZE];
g_assert(tcg_enabled());
parallel_cpus = (current_machine->smp.max_cpus > 1);
tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1);
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));

View file

@ -269,7 +269,7 @@ void rr_start_vcpu_thread(CPUState *cpu)
static QemuThread *single_tcg_cpu_thread;
g_assert(tcg_enabled());
parallel_cpus = false;
tcg_cpu_init_cflags(cpu, false);
if (!single_tcg_cpu_thread) {
cpu->thread = g_malloc0(sizeof(QemuThread));

View file

@ -41,6 +41,14 @@
/* common functionality among all TCG variants */
void tcg_cpu_init_cflags(CPUState *cpu, bool parallel)
{
uint32_t cflags = cpu->cluster_index << CF_CLUSTER_SHIFT;
cflags |= parallel ? CF_PARALLEL : 0;
cflags |= icount_enabled() ? CF_USE_ICOUNT : 0;
cpu->tcg_cflags = cflags;
}
void tcg_cpus_destroy(CPUState *cpu)
{
cpu_thread_signal_destroyed(cpu);

View file

@ -17,5 +17,6 @@
void tcg_cpus_destroy(CPUState *cpu);
int tcg_cpus_exec(CPUState *cpu);
void tcg_handle_interrupt(CPUState *cpu, int mask);
void tcg_cpu_init_cflags(CPUState *cpu, bool parallel);
#endif /* TCG_CPUS_H */

View file

@ -27,10 +27,10 @@
#include "exec/helper-proto.h"
#include "exec/cpu_ldst.h"
#include "exec/exec-all.h"
#include "exec/tb-lookup.h"
#include "disas/disas.h"
#include "exec/log.h"
#include "tcg/tcg.h"
#include "exec/tb-lookup.h"
/* 32-bit helpers */
@ -152,7 +152,9 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
target_ulong cs_base, pc;
uint32_t flags;
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, curr_cflags());
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = tb_lookup(cpu, pc, cs_base, flags, curr_cflags(cpu));
if (tb == NULL) {
return tcg_code_gen_epilogue;
}

View file

@ -224,7 +224,6 @@ static void *l1_map[V_L1_MAX_SIZE];
TCGContext tcg_init_ctx;
__thread TCGContext *tcg_ctx;
TBContext tb_ctx;
bool parallel_cpus;
static void page_table_config_init(void)
{
@ -1311,7 +1310,7 @@ static bool tb_cmp(const void *ap, const void *bp)
return a->pc == b->pc &&
a->cs_base == b->cs_base &&
a->flags == b->flags &&
(tb_cflags(a) & CF_HASH_MASK) == (tb_cflags(b) & CF_HASH_MASK) &&
(tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
a->page_addr[0] == b->page_addr[0] &&
a->page_addr[1] == b->page_addr[1];
@ -1616,6 +1615,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
PageDesc *p;
uint32_t h;
tb_page_addr_t phys_pc;
uint32_t orig_cflags = tb_cflags(tb);
assert_memory_lock();
@ -1626,7 +1626,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
/* remove the TB from the hash list */
phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK,
h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
tb->trace_vcpu_dstate);
if (!qht_remove(&tb_ctx.htable, tb, h)) {
return;
@ -1793,6 +1793,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
uint32_t h;
assert_memory_lock();
tcg_debug_assert(!(tb->cflags & CF_INVALID));
/*
* Add the TB to the page list, acquiring first the pages's locks.
@ -1811,7 +1812,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
}
/* add in the hash table */
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
tb->trace_vcpu_dstate);
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
@ -1865,9 +1866,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
cflags = (cflags & ~CF_COUNT_MASK) | 1;
}
cflags &= ~CF_CLUSTER_MASK;
cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT;
max_insns = cflags & CF_COUNT_MASK;
if (max_insns == 0) {
max_insns = CF_COUNT_MASK;
@ -2194,7 +2192,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
if (current_tb_modified) {
page_collection_unlock(pages);
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | curr_cflags();
cpu->cflags_next_tb = 1 | curr_cflags(cpu);
mmap_unlock();
cpu_loop_exit_noexc(cpu);
}
@ -2362,7 +2360,7 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
#ifdef TARGET_HAS_PRECISE_SMC
if (current_tb_modified) {
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | curr_cflags();
cpu->cflags_next_tb = 1 | curr_cflags(cpu);
return true;
}
#endif
@ -2438,7 +2436,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
* operations only (which execute after completion) so we don't
* double instrument the instruction.
*/
cpu->cflags_next_tb = curr_cflags() | CF_MEMI_ONLY | CF_LAST_IO | n;
cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
"cpu_io_recompile: rewound execution of TB to "