Cache CPUClass for use in hot code paths.

Add CPUTLBEntryFull, probe_access_full, tlb_set_page_full.
 Add generic support for TARGET_TB_PCREL.
 tcg/ppc: Optimize 26-bit jumps using STQ for POWER 2.07
 target/sh4: Fix TB_FLAG_UNALIGN
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmM8jXEdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/oEggArAHK8FtydfQ4ZwnF
 SjXfpdP50OC0SZn3uBN93FZOrxz9UYG9t1oDHs39J/+b/u2nwJYch//EH2k+NtOW
 hc3iIgS9bWgs/UWZESkViKQccw7gpYlc21Br38WWwFNEFyecX0p+e9pJgld5rSv1
 mRGvCs5J2svH2tcXl/Sb/JWgcumOJoG7qy2aLyJGolR6UOfwcfFMzQXzq8qjpRKH
 Jh84qusE/rLbzBsdN6snJY4+dyvUo03lT5IJ4d+FQg2tUip+Qqt7pnMbsqq6qF6H
 R6fWU1JTbsh7GxXJwQJ83jLBnUsi8cy6FKrZ3jyiBq76+DIpR0PqoEe+PN/weInU
 TN0z4g==
 =RfXJ
 -----END PGP SIGNATURE-----

Merge tag 'pull-tcg-20221004' of https://gitlab.com/rth7680/qemu into staging

Cache CPUClass for use in hot code paths.
Add CPUTLBEntryFull, probe_access_full, tlb_set_page_full.
Add generic support for TARGET_TB_PCREL.
tcg/ppc: Optimize 26-bit jumps using STQ for POWER 2.07
target/sh4: Fix TB_FLAG_UNALIGN

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmM8jXEdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/oEggArAHK8FtydfQ4ZwnF
# SjXfpdP50OC0SZn3uBN93FZOrxz9UYG9t1oDHs39J/+b/u2nwJYch//EH2k+NtOW
# hc3iIgS9bWgs/UWZESkViKQccw7gpYlc21Br38WWwFNEFyecX0p+e9pJgld5rSv1
# mRGvCs5J2svH2tcXl/Sb/JWgcumOJoG7qy2aLyJGolR6UOfwcfFMzQXzq8qjpRKH
# Jh84qusE/rLbzBsdN6snJY4+dyvUo03lT5IJ4d+FQg2tUip+Qqt7pnMbsqq6qF6H
# R6fWU1JTbsh7GxXJwQJ83jLBnUsi8cy6FKrZ3jyiBq76+DIpR0PqoEe+PN/weInU
# TN0z4g==
# =RfXJ
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 04 Oct 2022 15:45:53 EDT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* tag 'pull-tcg-20221004' of https://gitlab.com/rth7680/qemu:
  target/sh4: Fix TB_FLAG_UNALIGN
  tcg/ppc: Optimize 26-bit jumps
  accel/tcg: Introduce TARGET_TB_PCREL
  accel/tcg: Introduce tb_pc and log_pc
  hw/core: Add CPUClass.get_pc
  include/hw/core: Create struct CPUJumpCache
  accel/tcg: Inline tb_flush_jmp_cache
  accel/tcg: Do not align tb->page_addr[0]
  accel/tcg: Use DisasContextBase in plugin_gen_tb_start
  accel/tcg: Use bool for page_find_alloc
  accel/tcg: Remove PageDesc code_bitmap
  include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA
  accel/tcg: Introduce tlb_set_page_full
  accel/tcg: Introduce probe_access_full
  accel/tcg: Suppress auto-invalidate in probe_access_internal
  accel/tcg: Drop addr member from SavedIOTLB
  accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull
  cputlb: used cached CPUClass in our hot-paths
  hw/core/cpu-sysemu: used cached class in cpu_asidx_from_attrs
  cpu: cache CPUClass in CPUState for hot code paths

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2022-10-05 10:17:02 -04:00
commit 4a9c04672a
55 changed files with 915 additions and 462 deletions

View file

@ -51,6 +51,13 @@ typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
*/
#define CPU(obj) ((CPUState *)(obj))
/*
* The class checkers bring in CPU_GET_CLASS() which is potentially
* expensive given the eventual call to
* object_class_dynamic_cast_assert(). Because of this the CPUState
* has a cached value for the class in cs->cc which is set up in
* cpu_exec_realizefn() for use in hot code paths.
*/
typedef struct CPUClass CPUClass;
DECLARE_CLASS_CHECKERS(CPUClass, CPU,
TYPE_CPU)
@ -108,6 +115,8 @@ struct SysemuCPUOps;
* If the target behaviour here is anything other than "set
* the PC register to the value passed in" then the target must
* also implement the synchronize_from_tb hook.
* @get_pc: Callback for getting the Program Counter register.
* As above, with the semantics of the target architecture.
* @gdb_read_register: Callback for letting GDB read a register.
* @gdb_write_register: Callback for letting GDB write a register.
* @gdb_adjust_breakpoint: Callback for adjusting the address of a
@ -144,6 +153,7 @@ struct CPUClass {
void (*dump_state)(CPUState *cpu, FILE *, int flags);
int64_t (*get_arch_id)(CPUState *cpu);
void (*set_pc)(CPUState *cpu, vaddr value);
vaddr (*get_pc)(CPUState *cpu);
int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg);
int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg);
vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr);
@ -218,7 +228,6 @@ struct CPUWatchpoint {
* the memory regions get moved around by io_writex.
*/
typedef struct SavedIOTLB {
hwaddr addr;
MemoryRegionSection *section;
hwaddr mr_offset;
} SavedIOTLB;
@ -230,9 +239,6 @@ struct kvm_run;
struct hax_vcpu_state;
struct hvf_vcpu_state;
#define TB_JMP_CACHE_BITS 12
#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
/* work queue */
/* The union type allows passing of 64 bit target pointers on 32 bit
@ -317,6 +323,8 @@ struct qemu_work_item;
struct CPUState {
/*< private >*/
DeviceState parent_obj;
/* cache to avoid expensive CPU_GET_CLASS */
CPUClass *cc;
/*< public >*/
int nr_cores;
@ -361,8 +369,7 @@ struct CPUState {
CPUArchState *env_ptr;
IcountDecr *icount_decr_ptr;
/* Accessed in parallel; all accesses must be atomic */
TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
CPUJumpCache *tb_jmp_cache;
struct GDBRegisterState *gdb_regs;
int gdb_num_regs;
@ -448,15 +455,6 @@ extern CPUTailQ cpus;
extern __thread CPUState *current_cpu;
static inline void cpu_tb_jmp_cache_clear(CPUState *cpu)
{
unsigned int i;
for (i = 0; i < TB_JMP_CACHE_SIZE; i++) {
qatomic_set(&cpu->tb_jmp_cache[i], NULL);
}
}
/**
* qemu_tcg_mttcg_enabled:
* Check whether we are running MultiThread TCG or not.