target-arm queue:

* hw/arm/iotkit.c: fix minor memory leak
  * softfloat: fix wrong-exception-flags bug for multiply-add corner case
  * arm: isolate and clean up DTB generation
  * implement Arm v8.1-Atomics extension
  * Fix some bugs and missing instructions in the v8.2-FP16 extension
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABCAAGBQJa9IUCAAoJEDwlJe0UNgzeEGMQAKKjVRzZ7MBgvxQj0FJSWhSP
 BZkATf3ktid255PRpIssBZiY9oM+uY6n+/IRozAGvfDBp9eQOkrZczZjfW5hpe0B
 YsQadtk5cUOXqQzRTegSMPOoMmz8f5GaGOk4R6AEXJEX+Rug/zbOn9Q8Yx7JTd7o
 yBvU1+fys3galSiB88cffA95B9fwGfLsM7rP6OC4yNdUBYwjHf3wtY53WsxtWqX9
 oX4keEiROQkrOfbSy9wYPZzu/0iRo8v35+7wIZhvNSlf02k6yJ7a+w0C4EQIRhWm
 5zciE+aMYr7nOGpj7AEJLrRekhwnD6Ppje6aUd15yrxfNRZkpk/FeECWnaOPDis7
 QNijx5Zqg6+GyItQKi5U4vFVReMj09OB7xDyAq77xDeBj4l3lg2DNkRfRhqQZAcv
 2r4EW+pfLNj76Ah1qtQ410fprw462Sopb6bHmeuFbf1QFbQvJ4CL1+7Jl3ExrDX4
 2+iQb4sQghWDxhDLfRSLxQ7K+bX+mNfGdFW8h+jPShD/+JY42dTKkFZEl4ghNgMD
 mpj8FrQuIkSMqnDmPfoTG5MVTMERacqPU7GGM7/fxudIkByO3zTiLxJ/E+Iy8HvX
 29xKoOBjKT5FJrwJABsN6VpA3EuyAARgQIZ/dd6N5GZdgn2KAIHuaI+RHFOesKFd
 dJGM6sdksnsAAz28aUEJ
 =uXY+
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20180510' into staging

target-arm queue:
 * hw/arm/iotkit.c: fix minor memory leak
 * softfloat: fix wrong-exception-flags bug for multiply-add corner case
 * arm: isolate and clean up DTB generation
 * implement Arm v8.1-Atomics extension
 * Fix some bugs and missing instructions in the v8.2-FP16 extension

# gpg: Signature made Thu 10 May 2018 18:44:34 BST
# gpg:                using RSA key 3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20180510: (21 commits)
  target/arm: Clear SVE high bits for FMOV
  target/arm: Fix float16 to/from int16
  target/arm: Implement vector shifted FCVT for fp16
  target/arm: Implement vector shifted SCVF/UCVF for fp16
  target/arm: Enable ARM_FEATURE_V8_ATOMICS for user-only
  target/arm: Implement CAS and CASP
  target/arm: Fill in disas_ldst_atomic
  target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode
  target/riscv: Use new atomic min/max expanders
  tcg: Use GEN_ATOMIC_HELPER_FN for opposite endian atomic add
  tcg: Introduce atomic helpers for integer min/max
  target/xtensa: Use new min/max expanders
  target/arm: Use new min/max expanders
  tcg: Introduce helpers for integer min/max
  atomic.h: Work around gcc spurious "unused value" warning
  make sure that we aren't overwriting mc->get_hotplug_handler by accident
  arm/boot: split load_dtb() from arm_load_kernel()
  platform-bus-device: use device plug callback instead of machine_done notifier
  pc: simplify MachineClass::get_hotplug_handler handling
  softfloat: Handle default NaN mode after pickNaNMulAdd, not before
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

# Conflicts:
#	target/riscv/translate.c
This commit is contained in:
Peter Maydell 2018-05-11 17:41:54 +01:00
commit f5583c527f
33 changed files with 934 additions and 504 deletions

View file

@ -1449,6 +1449,7 @@ enum arm_features {
ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */
ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */
ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */
ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */

View file

@ -248,6 +248,7 @@ static void aarch64_max_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_V8_SM4);
set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
set_feature(&cpu->env, ARM_FEATURE_CRC);
set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS);
set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
set_feature(&cpu->env, ARM_FEATURE_V8_FP16);
set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);

View file

@ -636,6 +636,49 @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC());
}
/* Writes back the old data into Rs. */
void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
uint64_t new_lo, uint64_t new_hi)
{
uintptr_t ra = GETPC();
#ifndef CONFIG_ATOMIC128
cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
#else
Int128 oldv, cmpv, newv;
cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
newv = int128_make128(new_lo, new_hi);
int mem_idx = cpu_mmu_index(env, false);
TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
env->xregs[rs] = int128_getlo(oldv);
env->xregs[rs + 1] = int128_gethi(oldv);
#endif
}
void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
uint64_t new_hi, uint64_t new_lo)
{
uintptr_t ra = GETPC();
#ifndef CONFIG_ATOMIC128
cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
#else
Int128 oldv, cmpv, newv;
cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
newv = int128_make128(new_lo, new_hi);
int mem_idx = cpu_mmu_index(env, false);
TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
env->xregs[rs + 1] = int128_getlo(oldv);
env->xregs[rs] = int128_gethi(oldv);
#endif
}
/*
* AdvSIMD half-precision
*/

View file

@ -51,6 +51,8 @@ DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG,
DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG,
i64, env, i64, i64, i64)
DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64)
DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64)
DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)

View file

@ -11420,11 +11420,60 @@ VFP_CONV_FIX_A64(sq, s, 32, 64, int64)
VFP_CONV_FIX(uh, s, 32, 32, uint16)
VFP_CONV_FIX(ul, s, 32, 32, uint32)
VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
VFP_CONV_FIX_A64(sl, h, 16, 32, int32)
VFP_CONV_FIX_A64(ul, h, 16, 32, uint32)
#undef VFP_CONV_FIX
#undef VFP_CONV_FIX_FLOAT
#undef VFP_CONV_FLOAT_FIX_ROUND
#undef VFP_CONV_FIX_A64
/* Conversion to/from f16 can overflow to infinity before/after scaling.
* Therefore we convert to f64 (which does not round), scale,
* and then convert f64 to f16 (which may round).
*/
static float16 do_postscale_fp16(float64 f, int shift, float_status *fpst)
{
return float64_to_float16(float64_scalbn(f, -shift, fpst), true, fpst);
}
float16 HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
{
return do_postscale_fp16(int32_to_float64(x, fpst), shift, fpst);
}
float16 HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
{
return do_postscale_fp16(uint32_to_float64(x, fpst), shift, fpst);
}
static float64 do_prescale_fp16(float16 f, int shift, float_status *fpst)
{
if (unlikely(float16_is_any_nan(f))) {
float_raise(float_flag_invalid, fpst);
return 0;
} else {
int old_exc_flags = get_float_exception_flags(fpst);
float64 ret;
ret = float16_to_float64(f, true, fpst);
ret = float64_scalbn(ret, shift, fpst);
old_exc_flags |= get_float_exception_flags(fpst)
& float_flag_input_denormal;
set_float_exception_flags(old_exc_flags, fpst);
return ret;
}
}
uint32_t HELPER(vfp_toshh)(float16 x, uint32_t shift, void *fpst)
{
return float64_to_int16(do_prescale_fp16(x, shift, fpst), fpst);
}
uint32_t HELPER(vfp_touhh)(float16 x, uint32_t shift, void *fpst)
{
return float64_to_uint16(do_prescale_fp16(x, shift, fpst), fpst);
}
/* Set the current fp rounding mode and return the old one.
* The argument is a softfloat float_round_ value.

View file

@ -149,8 +149,8 @@ DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_toulh, i32, f16, i32, ptr)
DEF_HELPER_3(vfp_toslh, i32, f16, i32, ptr)
DEF_HELPER_3(vfp_touhh, i32, f16, i32, ptr)
DEF_HELPER_3(vfp_toshh, i32, f16, i32, ptr)
DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr)
DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr)
DEF_HELPER_3(vfp_tosqs, i64, f32, i32, ptr)

View file

@ -84,6 +84,7 @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
/* Note that the gvec expanders operate on offsets + sizes. */
typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
@ -2113,6 +2114,103 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
tcg_gen_movi_i64(cpu_exclusive_addr, -1);
}
static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
int rn, int size)
{
TCGv_i64 tcg_rs = cpu_reg(s, rs);
TCGv_i64 tcg_rt = cpu_reg(s, rt);
int memidx = get_mem_index(s);
TCGv_i64 addr = cpu_reg_sp(s, rn);
if (rn == 31) {
gen_check_sp_alignment(s);
}
tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx,
size | MO_ALIGN | s->be_data);
}
static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
int rn, int size)
{
TCGv_i64 s1 = cpu_reg(s, rs);
TCGv_i64 s2 = cpu_reg(s, rs + 1);
TCGv_i64 t1 = cpu_reg(s, rt);
TCGv_i64 t2 = cpu_reg(s, rt + 1);
TCGv_i64 addr = cpu_reg_sp(s, rn);
int memidx = get_mem_index(s);
if (rn == 31) {
gen_check_sp_alignment(s);
}
if (size == 2) {
TCGv_i64 cmp = tcg_temp_new_i64();
TCGv_i64 val = tcg_temp_new_i64();
if (s->be_data == MO_LE) {
tcg_gen_concat32_i64(val, t1, t2);
tcg_gen_concat32_i64(cmp, s1, s2);
} else {
tcg_gen_concat32_i64(val, t2, t1);
tcg_gen_concat32_i64(cmp, s2, s1);
}
tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx,
MO_64 | MO_ALIGN | s->be_data);
tcg_temp_free_i64(val);
if (s->be_data == MO_LE) {
tcg_gen_extr32_i64(s1, s2, cmp);
} else {
tcg_gen_extr32_i64(s2, s1, cmp);
}
tcg_temp_free_i64(cmp);
} else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
TCGv_i32 tcg_rs = tcg_const_i32(rs);
if (s->be_data == MO_LE) {
gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
} else {
gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
}
tcg_temp_free_i32(tcg_rs);
} else {
TCGv_i64 d1 = tcg_temp_new_i64();
TCGv_i64 d2 = tcg_temp_new_i64();
TCGv_i64 a2 = tcg_temp_new_i64();
TCGv_i64 c1 = tcg_temp_new_i64();
TCGv_i64 c2 = tcg_temp_new_i64();
TCGv_i64 zero = tcg_const_i64(0);
/* Load the two words, in memory order. */
tcg_gen_qemu_ld_i64(d1, addr, memidx,
MO_64 | MO_ALIGN_16 | s->be_data);
tcg_gen_addi_i64(a2, addr, 8);
tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data);
/* Compare the two words, also in memory order. */
tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
tcg_gen_and_i64(c2, c2, c1);
/* If compare equal, write back new data, else write back old data. */
tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data);
tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
tcg_temp_free_i64(a2);
tcg_temp_free_i64(c1);
tcg_temp_free_i64(c2);
tcg_temp_free_i64(zero);
/* Write back the data from memory to Rs. */
tcg_gen_mov_i64(s1, d1);
tcg_gen_mov_i64(s2, d2);
tcg_temp_free_i64(d1);
tcg_temp_free_i64(d2);
}
}
/* Update the Sixty-Four bit (SF) registersize. This logic is derived
* from the ARMv8 specs for LDR (Shared decode for all encodings).
*/
@ -2147,62 +2245,114 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
int rt = extract32(insn, 0, 5);
int rn = extract32(insn, 5, 5);
int rt2 = extract32(insn, 10, 5);
int is_lasr = extract32(insn, 15, 1);
int rs = extract32(insn, 16, 5);
int is_pair = extract32(insn, 21, 1);
int is_store = !extract32(insn, 22, 1);
int is_excl = !extract32(insn, 23, 1);
int is_lasr = extract32(insn, 15, 1);
int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
int size = extract32(insn, 30, 2);
TCGv_i64 tcg_addr;
if ((!is_excl && !is_pair && !is_lasr) ||
(!is_excl && is_pair) ||
(is_pair && size < 2)) {
unallocated_encoding(s);
switch (o2_L_o1_o0) {
case 0x0: /* STXR */
case 0x1: /* STLXR */
if (rn == 31) {
gen_check_sp_alignment(s);
}
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
}
tcg_addr = read_cpu_reg_sp(s, rn, 1);
gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false);
return;
}
if (rn == 31) {
gen_check_sp_alignment(s);
}
tcg_addr = read_cpu_reg_sp(s, rn, 1);
/* Note that since TCG is single threaded load-acquire/store-release
* semantics require no extra if (is_lasr) { ... } handling.
*/
if (is_excl) {
if (!is_store) {
s->is_ldex = true;
gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
}
} else {
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
}
gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
case 0x4: /* LDXR */
case 0x5: /* LDAXR */
if (rn == 31) {
gen_check_sp_alignment(s);
}
} else {
TCGv_i64 tcg_rt = cpu_reg(s, rt);
bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
tcg_addr = read_cpu_reg_sp(s, rn, 1);
s->is_ldex = true;
gen_load_exclusive(s, rt, rt2, tcg_addr, size, false);
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
}
return;
case 0x9: /* STLR */
/* Generate ISS for non-exclusive accesses including LASR. */
if (is_store) {
if (rn == 31) {
gen_check_sp_alignment(s);
}
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
tcg_addr = read_cpu_reg_sp(s, rn, 1);
do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt,
disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
return;
case 0xd: /* LDAR */
/* Generate ISS for non-exclusive accesses including LASR. */
if (rn == 31) {
gen_check_sp_alignment(s);
}
tcg_addr = read_cpu_reg_sp(s, rn, 1);
do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt,
disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
return;
case 0x2: case 0x3: /* CASP / STXP */
if (size & 2) { /* STXP / STLXP */
if (rn == 31) {
gen_check_sp_alignment(s);
}
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
}
do_gpr_st(s, tcg_rt, tcg_addr, size,
true, rt, iss_sf, is_lasr);
} else {
do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
true, rt, iss_sf, is_lasr);
tcg_addr = read_cpu_reg_sp(s, rn, 1);
gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
return;
}
if (rt2 == 31
&& ((rt | rs) & 1) == 0
&& arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
/* CASP / CASPL */
gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
return;
}
break;
case 0x6: case 0x7: /* CASPA / LDXP */
if (size & 2) { /* LDXP / LDAXP */
if (rn == 31) {
gen_check_sp_alignment(s);
}
tcg_addr = read_cpu_reg_sp(s, rn, 1);
s->is_ldex = true;
gen_load_exclusive(s, rt, rt2, tcg_addr, size, true);
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
}
return;
}
if (rt2 == 31
&& ((rt | rs) & 1) == 0
&& arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
/* CASPA / CASPAL */
gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
return;
}
break;
case 0xa: /* CAS */
case 0xb: /* CASL */
case 0xe: /* CASA */
case 0xf: /* CASAL */
if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
gen_compare_and_swap(s, rs, rt, rn, size);
return;
}
break;
}
unallocated_encoding(s);
}
/*
@ -2715,6 +2865,88 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
}
}
/* Atomic memory operations
*
* 31 30 27 26 24 22 21 16 15 12 10 5 0
* +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
* | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt |
* +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
*
* Rt: the result register
* Rn: base address or SP
* Rs: the source register for the operation
* V: vector flag (always 0 as of v8.3)
* A: acquire flag
* R: release flag
*/
static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
int size, int rt, bool is_vector)
{
int rs = extract32(insn, 16, 5);
int rn = extract32(insn, 5, 5);
int o3_opc = extract32(insn, 12, 4);
int feature = ARM_FEATURE_V8_ATOMICS;
TCGv_i64 tcg_rn, tcg_rs;
AtomicThreeOpFn *fn;
if (is_vector) {
unallocated_encoding(s);
return;
}
switch (o3_opc) {
case 000: /* LDADD */
fn = tcg_gen_atomic_fetch_add_i64;
break;
case 001: /* LDCLR */
fn = tcg_gen_atomic_fetch_and_i64;
break;
case 002: /* LDEOR */
fn = tcg_gen_atomic_fetch_xor_i64;
break;
case 003: /* LDSET */
fn = tcg_gen_atomic_fetch_or_i64;
break;
case 004: /* LDSMAX */
fn = tcg_gen_atomic_fetch_smax_i64;
break;
case 005: /* LDSMIN */
fn = tcg_gen_atomic_fetch_smin_i64;
break;
case 006: /* LDUMAX */
fn = tcg_gen_atomic_fetch_umax_i64;
break;
case 007: /* LDUMIN */
fn = tcg_gen_atomic_fetch_umin_i64;
break;
case 010: /* SWP */
fn = tcg_gen_atomic_xchg_i64;
break;
default:
unallocated_encoding(s);
return;
}
if (!arm_dc_feature(s, feature)) {
unallocated_encoding(s);
return;
}
if (rn == 31) {
gen_check_sp_alignment(s);
}
tcg_rn = cpu_reg_sp(s, rn);
tcg_rs = read_cpu_reg(s, rs, true);
if (o3_opc == 1) { /* LDCLR */
tcg_gen_not_i64(tcg_rs, tcg_rs);
}
/* The tcg atomic primitives are all full barriers. Therefore we
* can ignore the Acquire and Release bits of this instruction.
*/
fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
s->be_data | size | MO_ALIGN);
}
/* Load/store register (all forms) */
static void disas_ldst_reg(DisasContext *s, uint32_t insn)
{
@ -2725,23 +2957,28 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn)
switch (extract32(insn, 24, 2)) {
case 0:
if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
} else {
if (extract32(insn, 21, 1) == 0) {
/* Load/store register (unscaled immediate)
* Load/store immediate pre/post-indexed
* Load/store register unprivileged
*/
disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
return;
}
switch (extract32(insn, 10, 2)) {
case 0:
disas_ldst_atomic(s, insn, size, rt, is_vector);
return;
case 2:
disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
return;
}
break;
case 1:
disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
break;
default:
unallocated_encoding(s);
break;
return;
}
unallocated_encoding(s);
}
/* AdvSIMD load/store multiple structures
@ -5444,31 +5681,24 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
if (itof) {
TCGv_i64 tcg_rn = cpu_reg(s, rn);
TCGv_i64 tmp;
switch (type) {
case 0:
{
/* 32 bit */
TCGv_i64 tmp = tcg_temp_new_i64();
tmp = tcg_temp_new_i64();
tcg_gen_ext32u_i64(tmp, tcg_rn);
tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
tcg_gen_movi_i64(tmp, 0);
tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
write_fp_dreg(s, rd, tmp);
tcg_temp_free_i64(tmp);
break;
}
case 1:
{
/* 64 bit */
TCGv_i64 tmp = tcg_const_i64(0);
tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
tcg_temp_free_i64(tmp);
write_fp_dreg(s, rd, tcg_rn);
break;
}
case 2:
/* 64 bit to top half. */
tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
clear_vec_high(s, true, rd);
break;
}
} else {
@ -6021,15 +6251,18 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
break;
case 0x0a: /* SMAXV / UMAXV */
tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
tcg_res,
tcg_res, tcg_elt, tcg_res, tcg_elt);
if (is_u) {
tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
} else {
tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
}
break;
case 0x1a: /* SMINV / UMINV */
tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
tcg_res,
tcg_res, tcg_elt, tcg_res, tcg_elt);
break;
if (is_u) {
tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
} else {
tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
}
break;
default:
g_assert_not_reached();
@ -7165,13 +7398,26 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
int immh, int immb, int opcode,
int rn, int rd)
{
bool is_double = extract32(immh, 3, 1);
int size = is_double ? MO_64 : MO_32;
int elements;
int size, elements, fracbits;
int immhb = immh << 3 | immb;
int fracbits = (is_double ? 128 : 64) - immhb;
if (!extract32(immh, 2, 2)) {
if (immh & 8) {
size = MO_64;
if (!is_scalar && !is_q) {
unallocated_encoding(s);
return;
}
} else if (immh & 4) {
size = MO_32;
} else if (immh & 2) {
size = MO_16;
if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
unallocated_encoding(s);
return;
}
} else {
/* immh == 0 would be a failure of the decode logic */
g_assert(immh == 1);
unallocated_encoding(s);
return;
}
@ -7179,20 +7425,14 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
if (is_scalar) {
elements = 1;
} else {
elements = is_double ? 2 : is_q ? 4 : 2;
if (is_double && !is_q) {
unallocated_encoding(s);
return;
}
elements = (8 << is_q) >> size;
}
fracbits = (16 << size) - immhb;
if (!fp_access_check(s)) {
return;
}
/* immh == 0 would be a failure of the decode logic */
g_assert(immh);
handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
}
@ -7201,19 +7441,28 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
bool is_q, bool is_u,
int immh, int immb, int rn, int rd)
{
bool is_double = extract32(immh, 3, 1);
int immhb = immh << 3 | immb;
int fracbits = (is_double ? 128 : 64) - immhb;
int pass;
int pass, size, fracbits;
TCGv_ptr tcg_fpstatus;
TCGv_i32 tcg_rmode, tcg_shift;
if (!extract32(immh, 2, 2)) {
unallocated_encoding(s);
return;
}
if (!is_scalar && !is_q && is_double) {
if (immh & 0x8) {
size = MO_64;
if (!is_scalar && !is_q) {
unallocated_encoding(s);
return;
}
} else if (immh & 0x4) {
size = MO_32;
} else if (immh & 0x2) {
size = MO_16;
if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
unallocated_encoding(s);
return;
}
} else {
/* Should have split out AdvSIMD modified immediate earlier. */
assert(immh == 1);
unallocated_encoding(s);
return;
}
@ -7225,11 +7474,12 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
assert(!(is_scalar && is_q));
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
tcg_fpstatus = get_fpstatus_ptr(false);
tcg_fpstatus = get_fpstatus_ptr(size == MO_16);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
fracbits = (16 << size) - immhb;
tcg_shift = tcg_const_i32(fracbits);
if (is_double) {
if (size == MO_64) {
int maxpass = is_scalar ? 1 : 2;
for (pass = 0; pass < maxpass; pass++) {
@ -7246,20 +7496,37 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
}
clear_vec_high(s, is_q, rd);
} else {
int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
switch (size) {
case MO_16:
if (is_u) {
fn = gen_helper_vfp_touhh;
} else {
fn = gen_helper_vfp_toshh;
}
break;
case MO_32:
if (is_u) {
fn = gen_helper_vfp_touls;
} else {
fn = gen_helper_vfp_tosls;
}
break;
default:
g_assert_not_reached();
}
for (pass = 0; pass < maxpass; pass++) {
TCGv_i32 tcg_op = tcg_temp_new_i32();
read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
if (is_u) {
gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
} else {
gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
}
read_vec_element_i32(s, tcg_op, rn, pass, size);
fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
if (is_scalar) {
write_fp_sreg(s, rd, tcg_op);
} else {
write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
write_vec_element_i32(s, tcg_op, rd, pass, size);
}
tcg_temp_free_i32(tcg_op);
}
@ -9927,27 +10194,6 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
}
}
/* Helper functions for 32 bit comparisons */
static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
{
tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
}
static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
{
tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
}
static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
{
tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
}
static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
{
tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
}
/* Pairwise op subgroup of C3.6.16.
*
* This is called directly or via the handle_3same_float for float pairwise
@ -10047,7 +10293,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
{ gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
{ gen_max_s32, gen_max_u32 },
{ tcg_gen_smax_i32, tcg_gen_umax_i32 },
};
genfn = fns[size][u];
break;
@ -10057,7 +10303,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
{ gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
{ gen_min_s32, gen_min_u32 },
{ tcg_gen_smin_i32, tcg_gen_umin_i32 },
};
genfn = fns[size][u];
break;
@ -10512,7 +10758,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
{ gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
{ gen_max_s32, gen_max_u32 },
{ tcg_gen_smax_i32, tcg_gen_umax_i32 },
};
genfn = fns[size][u];
break;
@ -10523,7 +10769,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
{ gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
{ gen_min_s32, gen_min_u32 },
{ tcg_gen_smin_i32, tcg_gen_umin_i32 },
};
genfn = fns[size][u];
break;

View file

@ -716,7 +716,6 @@ static void gen_atomic(DisasContext *ctx, uint32_t opc,
TCGv src1, src2, dat;
TCGLabel *l1, *l2;
TCGMemOp mop;
TCGCond cond;
bool aq, rl;
/* Extract the size of the atomic operation. */
@ -814,60 +813,29 @@ static void gen_atomic(DisasContext *ctx, uint32_t opc,
tcg_gen_atomic_fetch_or_tl(src2, src1, src2, ctx->mem_idx, mop);
gen_set_gpr(rd, src2);
break;
case OPC_RISC_AMOMIN:
cond = TCG_COND_LT;
goto do_minmax;
case OPC_RISC_AMOMAX:
cond = TCG_COND_GT;
goto do_minmax;
case OPC_RISC_AMOMINU:
cond = TCG_COND_LTU;
goto do_minmax;
case OPC_RISC_AMOMAXU:
cond = TCG_COND_GTU;
goto do_minmax;
do_minmax:
/* Handle the RL barrier. The AQ barrier is handled along the
parallel path by the SC atomic cmpxchg. On the serial path,
of course, barriers do not matter. */
if (rl) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
}
if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
l1 = gen_new_label();
gen_set_label(l1);
} else {
l1 = NULL;
}
gen_get_gpr(src1, rs1);
gen_get_gpr(src2, rs2);
if ((mop & MO_SSIZE) == MO_SL) {
/* Sign-extend the register comparison input. */
tcg_gen_ext32s_tl(src2, src2);
}
dat = tcg_temp_local_new();
tcg_gen_qemu_ld_tl(dat, src1, ctx->mem_idx, mop);
tcg_gen_movcond_tl(cond, src2, dat, src2, dat, src2);
if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
/* Parallel context. Make this operation atomic by verifying
that the memory didn't change while we computed the result. */
tcg_gen_atomic_cmpxchg_tl(src2, src1, dat, src2, ctx->mem_idx, mop);
/* If the cmpxchg failed, retry. */
/* ??? There is an assumption here that this will eventually
succeed, such that we don't live-lock. This is not unlike
a similar loop that the compiler would generate for e.g.
__atomic_fetch_and_xor, so don't worry about it. */
tcg_gen_brcond_tl(TCG_COND_NE, dat, src2, l1);
} else {
/* Serial context. Directly store the result. */
tcg_gen_qemu_st_tl(src2, src1, ctx->mem_idx, mop);
}
gen_set_gpr(rd, dat);
tcg_temp_free(dat);
tcg_gen_atomic_fetch_smin_tl(src2, src1, src2, ctx->mem_idx, mop);
gen_set_gpr(rd, src2);
break;
case OPC_RISC_AMOMAX:
gen_get_gpr(src1, rs1);
gen_get_gpr(src2, rs2);
tcg_gen_atomic_fetch_smax_tl(src2, src1, src2, ctx->mem_idx, mop);
gen_set_gpr(rd, src2);
break;
case OPC_RISC_AMOMINU:
gen_get_gpr(src1, rs1);
gen_get_gpr(src2, rs2);
tcg_gen_atomic_fetch_umin_tl(src2, src1, src2, ctx->mem_idx, mop);
gen_set_gpr(rd, src2);
break;
case OPC_RISC_AMOMAXU:
gen_get_gpr(src1, rs1);
gen_get_gpr(src2, rs2);
tcg_gen_atomic_fetch_umax_tl(src2, src1, src2, ctx->mem_idx, mop);
gen_set_gpr(rd, src2);
break;
default:

View file

@ -1526,10 +1526,8 @@ static void translate_clamps(DisasContext *dc, const uint32_t arg[],
TCGv_i32 tmp1 = tcg_const_i32(-1u << arg[2]);
TCGv_i32 tmp2 = tcg_const_i32((1 << arg[2]) - 1);
tcg_gen_movcond_i32(TCG_COND_GT, tmp1,
cpu_R[arg[1]], tmp1, cpu_R[arg[1]], tmp1);
tcg_gen_movcond_i32(TCG_COND_LT, cpu_R[arg[0]],
tmp1, tmp2, tmp1, tmp2);
tcg_gen_smax_i32(tmp1, tmp1, cpu_R[arg[1]]);
tcg_gen_smin_i32(cpu_R[arg[0]], tmp1, tmp2);
tcg_temp_free(tmp1);
tcg_temp_free(tmp2);
}
@ -1854,13 +1852,35 @@ static void translate_memw(DisasContext *dc, const uint32_t arg[],
tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
}
static void translate_minmax(DisasContext *dc, const uint32_t arg[],
const uint32_t par[])
static void translate_smin(DisasContext *dc, const uint32_t arg[],
const uint32_t par[])
{
if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
tcg_gen_movcond_i32(par[0], cpu_R[arg[0]],
cpu_R[arg[1]], cpu_R[arg[2]],
cpu_R[arg[1]], cpu_R[arg[2]]);
tcg_gen_smin_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
}
}
static void translate_umin(DisasContext *dc, const uint32_t arg[],
const uint32_t par[])
{
if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
tcg_gen_umin_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
}
}
static void translate_smax(DisasContext *dc, const uint32_t arg[],
const uint32_t par[])
{
if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
tcg_gen_smax_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
}
}
static void translate_umax(DisasContext *dc, const uint32_t arg[],
const uint32_t par[])
{
if (gen_window_check3(dc, arg[0], arg[1], arg[2])) {
tcg_gen_umax_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]);
}
}
@ -2983,23 +3003,19 @@ static const XtensaOpcodeOps core_ops[] = {
.par = (const uint32_t[]){TCG_COND_NE},
}, {
.name = "max",
.translate = translate_minmax,
.par = (const uint32_t[]){TCG_COND_GE},
.translate = translate_smax,
}, {
.name = "maxu",
.translate = translate_minmax,
.par = (const uint32_t[]){TCG_COND_GEU},
.translate = translate_umax,
}, {
.name = "memw",
.translate = translate_memw,
}, {
.name = "min",
.translate = translate_minmax,
.par = (const uint32_t[]){TCG_COND_LT},
.translate = translate_smin,
}, {
.name = "minu",
.translate = translate_minmax,
.par = (const uint32_t[]){TCG_COND_LTU},
.translate = translate_umin,
}, {
.name = "mov",
.translate = translate_mov,