target/arm: Convert handle_2misc_pairwise to decodetree

This includes SADDLP, UADDLP, SADALP, UADALP.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20241211163036.2297116-47-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2024-12-11 10:30:13 -06:00 committed by Peter Maydell
parent c14bde6998
commit 09e7f80e58
4 changed files with 11 additions and 98 deletions

View file

@ -1662,3 +1662,8 @@ CMLT0_v 0.00 1110 ..1 00000 10101 0 ..... ..... @qrr_e
REV16_v 0.00 1110 001 00000 00011 0 ..... ..... @qrr_b
REV32_v 0.10 1110 0.1 00000 00001 0 ..... ..... @qrr_bh
REV64_v 0.00 1110 ..1 00000 00001 0 ..... ..... @qrr_e
SADDLP_v 0.00 1110 ..1 00000 00101 0 ..... ..... @qrr_e
UADDLP_v 0.10 1110 ..1 00000 00101 0 ..... ..... @qrr_e
SADALP_v 0.00 1110 ..1 00000 01101 0 ..... ..... @qrr_e
UADALP_v 0.10 1110 ..1 00000 01101 0 ..... ..... @qrr_e

View file

@ -306,24 +306,6 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
}
uint64_t HELPER(neon_addlp_u8)(uint64_t a)
{
uint64_t tmp;
tmp = a & 0x00ff00ff00ff00ffULL;
tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
return tmp;
}
uint64_t HELPER(neon_addlp_u16)(uint64_t a)
{
uint64_t tmp;
tmp = a & 0x0000ffff0000ffffULL;
tmp += (a >> 16) & 0x0000ffff0000ffffULL;
return tmp;
}
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
{

View file

@ -41,8 +41,6 @@ DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr)

View file

@ -8956,6 +8956,10 @@ static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
/* Common vector code for handling integer to FP conversion */
static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
@ -9885,73 +9889,6 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
}
}
static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
bool is_q, int size, int rn, int rd)
{
/* Implement the pairwise operations from 2-misc:
* SADDLP, UADDLP, SADALP, UADALP.
* These all add pairs of elements in the input to produce a
* double-width result element in the output (possibly accumulating).
*/
bool accum = (opcode == 0x6);
int maxpass = is_q ? 2 : 1;
int pass;
TCGv_i64 tcg_res[2];
if (size == 2) {
/* 32 + 32 -> 64 op */
MemOp memop = size + (u ? 0 : MO_SIGN);
for (pass = 0; pass < maxpass; pass++) {
TCGv_i64 tcg_op1 = tcg_temp_new_i64();
TCGv_i64 tcg_op2 = tcg_temp_new_i64();
tcg_res[pass] = tcg_temp_new_i64();
read_vec_element(s, tcg_op1, rn, pass * 2, memop);
read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
if (accum) {
read_vec_element(s, tcg_op1, rd, pass, MO_64);
tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
}
}
} else {
for (pass = 0; pass < maxpass; pass++) {
TCGv_i64 tcg_op = tcg_temp_new_i64();
NeonGenOne64OpFn *genfn;
static NeonGenOne64OpFn * const fns[2][2] = {
{ gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
{ gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
};
genfn = fns[size][u];
tcg_res[pass] = tcg_temp_new_i64();
read_vec_element(s, tcg_op, rn, pass, MO_64);
genfn(tcg_res[pass], tcg_op);
if (accum) {
read_vec_element(s, tcg_op, rd, pass, MO_64);
if (size == 0) {
gen_helper_neon_addl_u16(tcg_res[pass],
tcg_res[pass], tcg_op);
} else {
gen_helper_neon_addl_u32(tcg_res[pass],
tcg_res[pass], tcg_op);
}
}
}
}
if (!is_q) {
tcg_res[1] = tcg_constant_i64(0);
}
for (pass = 0; pass < 2; pass++) {
write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
}
}
static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
{
/* Implement SHLL and SHLL2 */
@ -10011,17 +9948,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
return;
case 0x2: /* SADDLP, UADDLP */
case 0x6: /* SADALP, UADALP */
if (size == 3) {
unallocated_encoding(s);
return;
}
if (!fp_access_check(s)) {
return;
}
handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
return;
case 0x13: /* SHLL, SHLL2 */
if (u == 0 || size == 3) {
unallocated_encoding(s);
@ -10203,9 +10129,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
default:
case 0x0: /* REV64, REV32 */
case 0x1: /* REV16 */
case 0x2: /* SADDLP, UADDLP */
case 0x3: /* SUQADD, USQADD */
case 0x4: /* CLS, CLZ */
case 0x5: /* CNT, NOT, RBIT */
case 0x6: /* SADALP, UADALP */
case 0x7: /* SQABS, SQNEG */
case 0x8: /* CMGT, CMGE */
case 0x9: /* CMEQ, CMLE */