target/arm: Convert FMAXNMV, FMINNMV, FMAXV, FMINV to decodetree

Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20240912024114.1097832-12-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2025-12-21 23:18:36 -07:00 · 2024-09-11 19:40:56 -07:00 · 2024-09-11 19:40:56 -07:00 · 3d44e070a6
commit 3d44e070a6
parent cc7ece7216
2 changed files with 67 additions and 123 deletions
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@ -54,11 +54,13 @@
@rrx_d          ........ .. . rm:5  .... idx:1 . rn:5 rd:5  &rrx_e esz=3
@rr_q1e0        ........ ........ ...... rn:5 rd:5      &qrr_e q=1 esz=0
@rr_q1e2        ........ ........ ...... rn:5 rd:5      &qrr_e q=1 esz=2
@r2r_q1e0       ........ ........ ...... rm:5 rd:5      &qrrr_e rn=%rd q=1 esz=0
@rrr_q1e0       ........ ... rm:5 ...... rn:5 rd:5      &qrrr_e q=1 esz=0
@rrr_q1e3       ........ ... rm:5 ...... rn:5 rd:5      &qrrr_e q=1 esz=3
@rrrr_q1e3      ........ ... rm:5 . ra:5 rn:5 rd:5      &qrrrr_e q=1 esz=3
@qrr_h          . q:1 ...... .. ...... ...... rn:5 rd:5  &qrr_e esz=1
@qrr_e          . q:1 ...... esz:2 ...... ...... rn:5 rd:5  &qrr_e
@qrrr_b         . q:1 ...... ... rm:5 ...... rn:5 rd:5  &qrrr_e esz=0
@ -1166,3 +1168,15 @@ SMAXV           0.00 1110 .. 11000 01010 10 ..... .....     @qrr_e
 UMAXV           0.10 1110 .. 11000 01010 10 ..... .....     @qrr_e
 SMINV           0.00 1110 .. 11000 11010 10 ..... .....     @qrr_e
 UMINV           0.10 1110 .. 11000 11010 10 ..... .....     @qrr_e
 FMAXNMV_h       0.00 1110 00 11000 01100 10 ..... .....     @qrr_h
 FMAXNMV_s       0110 1110 00 11000 01100 10 ..... .....     @rr_q1e2
 FMINNMV_h       0.00 1110 10 11000 01100 10 ..... .....     @qrr_h
 FMINNMV_s       0110 1110 10 11000 01100 10 ..... .....     @rr_q1e2
 FMAXV_h         0.00 1110 00 11000 01111 10 ..... .....     @qrr_h
 FMAXV_s         0110 1110 00 11000 01111 10 ..... .....     @rr_q1e2
 FMINV_h         0.00 1110 10 11000 01111 10 ..... .....     @qrr_h
 FMINV_s         0110 1110 10 11000 01111 10 ..... .....     @rr_q1e2
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@ -6835,6 +6835,59 @@ TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
 /*
 * do_fp_reduction helper
 *
 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
 * important for correct NaN propagation that we do these
 * operations in exactly the order specified by the pseudocode.
 *
 * This is a recursive function.
 */
 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
                                int ebase, int ecount, TCGv_ptr fpst,
                                NeonGenTwoSingleOpFn *fn)
 {
    if (ecount == 1) {
        TCGv_i32 tcg_elem = tcg_temp_new_i32();
        read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
        return tcg_elem;
    } else {
        int half = ecount >> 1;
        TCGv_i32 tcg_hi, tcg_lo, tcg_res;
        tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
        tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
        tcg_res = tcg_temp_new_i32();
        fn(tcg_res, tcg_lo, tcg_hi, fpst);
        return tcg_res;
    }
 }
 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
                              NeonGenTwoSingleOpFn *fn)
 {
    if (fp_access_check(s)) {
        MemOp esz = a->esz;
        int elts = (a->q ? 16 : 8) >> esz;
        TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
        TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
        write_fp_sreg(s, a->rd, res);
    }
    return true;
 }
 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxnumh)
 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minnumh)
 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxh)
 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minh)
 TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
 TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs)
 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins)
 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
 * Note that it is the caller's responsibility to ensure that the
 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
@ -9061,128 +9114,6 @@ static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
    }
 }
 /*
 * do_reduction_op helper
 *
 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
 * important for correct NaN propagation that we do these
 * operations in exactly the order specified by the pseudocode.
 *
 * This is a recursive function.
 */
 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
                                MemOp esz, int ebase, int ecount, TCGv_ptr fpst)
 {
    if (ecount == 1) {
        TCGv_i32 tcg_elem = tcg_temp_new_i32();
        read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
        return tcg_elem;
    } else {
        int half = ecount >> 1;
        TCGv_i32 tcg_hi, tcg_lo, tcg_res;
        tcg_hi = do_reduction_op(s, fpopcode, rn, esz,
                                 ebase + half, half, fpst);
        tcg_lo = do_reduction_op(s, fpopcode, rn, esz,
                                 ebase, half, fpst);
        tcg_res = tcg_temp_new_i32();
        switch (fpopcode) {
        case 0x0c: /* fmaxnmv half-precision */
            gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
            break;
        case 0x0f: /* fmaxv half-precision */
            gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
            break;
        case 0x1c: /* fminnmv half-precision */
            gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
            break;
        case 0x1f: /* fminv half-precision */
            gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
            break;
        case 0x2c: /* fmaxnmv */
            gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
            break;
        case 0x2f: /* fmaxv */
            gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
            break;
        case 0x3c: /* fminnmv */
            gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
            break;
        case 0x3f: /* fminv */
            gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
            break;
        default:
            g_assert_not_reached();
        }
        return tcg_res;
    }
 }
 /* AdvSIMD across lanes
 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
 */
 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
 {
    int rd = extract32(insn, 0, 5);
    int rn = extract32(insn, 5, 5);
    int size = extract32(insn, 22, 2);
    int opcode = extract32(insn, 12, 5);
    bool is_q = extract32(insn, 30, 1);
    bool is_u = extract32(insn, 29, 1);
    bool is_min = false;
    int elements;
    switch (opcode) {
    case 0xc: /* FMAXNMV, FMINNMV */
    case 0xf: /* FMAXV, FMINV */
        /* Bit 1 of size field encodes min vs max and the actual size
         * depends on the encoding of the U bit. If not set (and FP16
         * enabled) then we do half-precision float instead of single
         * precision.
         */
        is_min = extract32(size, 1, 1);
        if (!is_u && dc_isar_feature(aa64_fp16, s)) {
            size = 1;
        } else if (!is_u || !is_q || extract32(size, 0, 1)) {
            unallocated_encoding(s);
            return;
        } else {
            size = 2;
        }
        break;
    default:
    case 0x3: /* SADDLV, UADDLV */
    case 0xa: /* SMAXV, UMAXV */
    case 0x1a: /* SMINV, UMINV */
    case 0x1b: /* ADDV */
        unallocated_encoding(s);
        return;
    }
    if (!fp_access_check(s)) {
        return;
    }
    elements = (is_q ? 16 : 8) >> size;
    {
        /* Floating point vector reduction ops which work across 32
         * bit (single) or 16 bit (half-precision) intermediates.
         * Note that correct NaN propagation requires that we do these
         * operations in exactly the order specified by the pseudocode.
         */
        TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
        int fpopcode = opcode | is_min << 4 | is_u << 5;
        TCGv_i32 tcg_res = do_reduction_op(s, fpopcode, rn, size,
                                           0, elements, fpst);
        write_fp_sreg(s, rd, tcg_res);
    }
 }
 /* AdvSIMD modified immediate
 *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
@ -11735,7 +11666,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
 static const AArch64DecodeTable data_proc_simd[] = {
    /* pattern  ,  mask     ,  fn                        */
    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },