mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-09-04 16:02:02 -06:00
target/arm: Convert FMAXNMV, FMINNMV, FMAXV, FMINV to decodetree
Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20240912024114.1097832-12-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
cc7ece7216
commit
3d44e070a6
2 changed files with 67 additions and 123 deletions
|
@ -54,11 +54,13 @@
|
||||||
@rrx_d ........ .. . rm:5 .... idx:1 . rn:5 rd:5 &rrx_e esz=3
|
@rrx_d ........ .. . rm:5 .... idx:1 . rn:5 rd:5 &rrx_e esz=3
|
||||||
|
|
||||||
@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0
|
@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0
|
||||||
|
@rr_q1e2 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=2
|
||||||
@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0
|
@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0
|
||||||
@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0
|
@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0
|
||||||
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
|
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
|
||||||
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
|
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
|
||||||
|
|
||||||
|
@qrr_h . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=1
|
||||||
@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e
|
@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e
|
||||||
|
|
||||||
@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
|
@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
|
||||||
|
@ -1166,3 +1168,15 @@ SMAXV 0.00 1110 .. 11000 01010 10 ..... ..... @qrr_e
|
||||||
UMAXV 0.10 1110 .. 11000 01010 10 ..... ..... @qrr_e
|
UMAXV 0.10 1110 .. 11000 01010 10 ..... ..... @qrr_e
|
||||||
SMINV 0.00 1110 .. 11000 11010 10 ..... ..... @qrr_e
|
SMINV 0.00 1110 .. 11000 11010 10 ..... ..... @qrr_e
|
||||||
UMINV 0.10 1110 .. 11000 11010 10 ..... ..... @qrr_e
|
UMINV 0.10 1110 .. 11000 11010 10 ..... ..... @qrr_e
|
||||||
|
|
||||||
|
FMAXNMV_h 0.00 1110 00 11000 01100 10 ..... ..... @qrr_h
|
||||||
|
FMAXNMV_s 0110 1110 00 11000 01100 10 ..... ..... @rr_q1e2
|
||||||
|
|
||||||
|
FMINNMV_h 0.00 1110 10 11000 01100 10 ..... ..... @qrr_h
|
||||||
|
FMINNMV_s 0110 1110 10 11000 01100 10 ..... ..... @rr_q1e2
|
||||||
|
|
||||||
|
FMAXV_h 0.00 1110 00 11000 01111 10 ..... ..... @qrr_h
|
||||||
|
FMAXV_s 0110 1110 00 11000 01111 10 ..... ..... @rr_q1e2
|
||||||
|
|
||||||
|
FMINV_h 0.00 1110 10 11000 01111 10 ..... ..... @qrr_h
|
||||||
|
FMINV_s 0110 1110 10 11000 01111 10 ..... ..... @rr_q1e2
|
||||||
|
|
|
@ -6835,6 +6835,59 @@ TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
|
||||||
TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
|
TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
|
||||||
TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
|
TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* do_fp_reduction helper
|
||||||
|
*
|
||||||
|
* This mirrors the Reduce() pseudocode in the ARM ARM. It is
|
||||||
|
* important for correct NaN propagation that we do these
|
||||||
|
* operations in exactly the order specified by the pseudocode.
|
||||||
|
*
|
||||||
|
* This is a recursive function.
|
||||||
|
*/
|
||||||
|
static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
|
||||||
|
int ebase, int ecount, TCGv_ptr fpst,
|
||||||
|
NeonGenTwoSingleOpFn *fn)
|
||||||
|
{
|
||||||
|
if (ecount == 1) {
|
||||||
|
TCGv_i32 tcg_elem = tcg_temp_new_i32();
|
||||||
|
read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
|
||||||
|
return tcg_elem;
|
||||||
|
} else {
|
||||||
|
int half = ecount >> 1;
|
||||||
|
TCGv_i32 tcg_hi, tcg_lo, tcg_res;
|
||||||
|
|
||||||
|
tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
|
||||||
|
tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
|
||||||
|
tcg_res = tcg_temp_new_i32();
|
||||||
|
|
||||||
|
fn(tcg_res, tcg_lo, tcg_hi, fpst);
|
||||||
|
return tcg_res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
|
||||||
|
NeonGenTwoSingleOpFn *fn)
|
||||||
|
{
|
||||||
|
if (fp_access_check(s)) {
|
||||||
|
MemOp esz = a->esz;
|
||||||
|
int elts = (a->q ? 16 : 8) >> esz;
|
||||||
|
TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
|
||||||
|
TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
|
||||||
|
write_fp_sreg(s, a->rd, res);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxnumh)
|
||||||
|
TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minnumh)
|
||||||
|
TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxh)
|
||||||
|
TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minh)
|
||||||
|
|
||||||
|
TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
|
||||||
|
TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
|
||||||
|
TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs)
|
||||||
|
TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins)
|
||||||
|
|
||||||
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
|
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
|
||||||
* Note that it is the caller's responsibility to ensure that the
|
* Note that it is the caller's responsibility to ensure that the
|
||||||
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
|
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
|
||||||
|
@ -9061,128 +9114,6 @@ static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* do_reduction_op helper
|
|
||||||
*
|
|
||||||
* This mirrors the Reduce() pseudocode in the ARM ARM. It is
|
|
||||||
* important for correct NaN propagation that we do these
|
|
||||||
* operations in exactly the order specified by the pseudocode.
|
|
||||||
*
|
|
||||||
* This is a recursive function.
|
|
||||||
*/
|
|
||||||
static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
|
|
||||||
MemOp esz, int ebase, int ecount, TCGv_ptr fpst)
|
|
||||||
{
|
|
||||||
if (ecount == 1) {
|
|
||||||
TCGv_i32 tcg_elem = tcg_temp_new_i32();
|
|
||||||
read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
|
|
||||||
return tcg_elem;
|
|
||||||
} else {
|
|
||||||
int half = ecount >> 1;
|
|
||||||
TCGv_i32 tcg_hi, tcg_lo, tcg_res;
|
|
||||||
|
|
||||||
tcg_hi = do_reduction_op(s, fpopcode, rn, esz,
|
|
||||||
ebase + half, half, fpst);
|
|
||||||
tcg_lo = do_reduction_op(s, fpopcode, rn, esz,
|
|
||||||
ebase, half, fpst);
|
|
||||||
tcg_res = tcg_temp_new_i32();
|
|
||||||
|
|
||||||
switch (fpopcode) {
|
|
||||||
case 0x0c: /* fmaxnmv half-precision */
|
|
||||||
gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
|
|
||||||
break;
|
|
||||||
case 0x0f: /* fmaxv half-precision */
|
|
||||||
gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
|
|
||||||
break;
|
|
||||||
case 0x1c: /* fminnmv half-precision */
|
|
||||||
gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
|
|
||||||
break;
|
|
||||||
case 0x1f: /* fminv half-precision */
|
|
||||||
gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
|
|
||||||
break;
|
|
||||||
case 0x2c: /* fmaxnmv */
|
|
||||||
gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
|
|
||||||
break;
|
|
||||||
case 0x2f: /* fmaxv */
|
|
||||||
gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
|
|
||||||
break;
|
|
||||||
case 0x3c: /* fminnmv */
|
|
||||||
gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
|
|
||||||
break;
|
|
||||||
case 0x3f: /* fminv */
|
|
||||||
gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
g_assert_not_reached();
|
|
||||||
}
|
|
||||||
return tcg_res;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* AdvSIMD across lanes
|
|
||||||
* 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
|
|
||||||
* +---+---+---+-----------+------+-----------+--------+-----+------+------+
|
|
||||||
* | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
|
|
||||||
* +---+---+---+-----------+------+-----------+--------+-----+------+------+
|
|
||||||
*/
|
|
||||||
static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
|
|
||||||
{
|
|
||||||
int rd = extract32(insn, 0, 5);
|
|
||||||
int rn = extract32(insn, 5, 5);
|
|
||||||
int size = extract32(insn, 22, 2);
|
|
||||||
int opcode = extract32(insn, 12, 5);
|
|
||||||
bool is_q = extract32(insn, 30, 1);
|
|
||||||
bool is_u = extract32(insn, 29, 1);
|
|
||||||
bool is_min = false;
|
|
||||||
int elements;
|
|
||||||
|
|
||||||
switch (opcode) {
|
|
||||||
case 0xc: /* FMAXNMV, FMINNMV */
|
|
||||||
case 0xf: /* FMAXV, FMINV */
|
|
||||||
/* Bit 1 of size field encodes min vs max and the actual size
|
|
||||||
* depends on the encoding of the U bit. If not set (and FP16
|
|
||||||
* enabled) then we do half-precision float instead of single
|
|
||||||
* precision.
|
|
||||||
*/
|
|
||||||
is_min = extract32(size, 1, 1);
|
|
||||||
if (!is_u && dc_isar_feature(aa64_fp16, s)) {
|
|
||||||
size = 1;
|
|
||||||
} else if (!is_u || !is_q || extract32(size, 0, 1)) {
|
|
||||||
unallocated_encoding(s);
|
|
||||||
return;
|
|
||||||
} else {
|
|
||||||
size = 2;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
case 0x3: /* SADDLV, UADDLV */
|
|
||||||
case 0xa: /* SMAXV, UMAXV */
|
|
||||||
case 0x1a: /* SMINV, UMINV */
|
|
||||||
case 0x1b: /* ADDV */
|
|
||||||
unallocated_encoding(s);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!fp_access_check(s)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
elements = (is_q ? 16 : 8) >> size;
|
|
||||||
|
|
||||||
{
|
|
||||||
/* Floating point vector reduction ops which work across 32
|
|
||||||
* bit (single) or 16 bit (half-precision) intermediates.
|
|
||||||
* Note that correct NaN propagation requires that we do these
|
|
||||||
* operations in exactly the order specified by the pseudocode.
|
|
||||||
*/
|
|
||||||
TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
|
|
||||||
int fpopcode = opcode | is_min << 4 | is_u << 5;
|
|
||||||
TCGv_i32 tcg_res = do_reduction_op(s, fpopcode, rn, size,
|
|
||||||
0, elements, fpst);
|
|
||||||
write_fp_sreg(s, rd, tcg_res);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* AdvSIMD modified immediate
|
/* AdvSIMD modified immediate
|
||||||
* 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
|
* 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
|
||||||
* +---+---+----+---------------------+-----+-------+----+---+-------+------+
|
* +---+---+----+---------------------+-----+-------+----+---+-------+------+
|
||||||
|
@ -11735,7 +11666,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
|
||||||
static const AArch64DecodeTable data_proc_simd[] = {
|
static const AArch64DecodeTable data_proc_simd[] = {
|
||||||
/* pattern , mask , fn */
|
/* pattern , mask , fn */
|
||||||
{ 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
|
{ 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
|
||||||
{ 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
|
|
||||||
/* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
|
/* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
|
||||||
{ 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
|
{ 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
|
||||||
{ 0x0f000400, 0x9f800400, disas_simd_shift_imm },
|
{ 0x0f000400, 0x9f800400, disas_simd_shift_imm },
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue