tcg: Merge INDEX_op_muls2_{i32,i64}

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2025-01-09 07:24:32 -08:00
parent 5641afdf9b
commit bfe964809b
7 changed files with 21 additions and 27 deletions

View file

@ -604,7 +604,7 @@ Multiword arithmetic support
- | Similar to mul, except two unsigned inputs *t1* and *t2* yielding the full
double-word product *t0*. The latter is returned in two single-word outputs.
* - muls2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2*
* - muls2 *t0_low*, *t0_high*, *t1*, *t2*
- | Similar to mulu2, except the two inputs *t1* and *t2* are signed.

View file

@ -51,6 +51,7 @@ DEF(divu, 1, 2, 0, TCG_OPF_INT)
DEF(divu2, 2, 3, 0, TCG_OPF_INT)
DEF(eqv, 1, 2, 0, TCG_OPF_INT)
DEF(mul, 1, 2, 0, TCG_OPF_INT)
DEF(muls2, 2, 2, 0, TCG_OPF_INT)
DEF(mulsh, 1, 2, 0, TCG_OPF_INT)
DEF(muluh, 1, 2, 0, TCG_OPF_INT)
DEF(nand, 1, 2, 0, TCG_OPF_INT)
@ -92,7 +93,6 @@ DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
DEF(add2_i32, 2, 4, 0, 0)
DEF(sub2_i32, 2, 4, 0, 0)
DEF(mulu2_i32, 2, 2, 0, 0)
DEF(muls2_i32, 2, 2, 0, 0)
DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
DEF(setcond2_i32, 1, 4, 1, 0)
@ -134,7 +134,6 @@ DEF(bswap64_i64, 1, 1, 1, 0)
DEF(add2_i64, 2, 4, 0, 0)
DEF(sub2_i64, 2, 4, 0, 0)
DEF(mulu2_i64, 2, 2, 0, 0)
DEF(muls2_i64, 2, 2, 0, 0)
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)

View file

@ -2074,16 +2074,17 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
h = (int32_t)(l >> 32);
l = (int32_t)l;
break;
case INDEX_op_muls2_i32:
l = (int64_t)(int32_t)a * (int32_t)b;
h = l >> 32;
l = (int32_t)l;
break;
case INDEX_op_mulu2_i64:
mulu64(&l, &h, a, b);
break;
case INDEX_op_muls2_i64:
muls64(&l, &h, a, b);
case INDEX_op_muls2:
if (ctx->type == TCG_TYPE_I32) {
l = (int64_t)(int32_t)a * (int32_t)b;
h = l >> 32;
l = (int32_t)l;
} else {
muls64(&l, &h, a, b);
}
break;
default:
g_assert_not_reached();
@ -2973,7 +2974,7 @@ void tcg_optimize(TCGContext *s)
case INDEX_op_muluh:
done = fold_mul_highpart(&ctx, op);
break;
CASE_OP_32_64(muls2):
case INDEX_op_muls2:
CASE_OP_32_64(mulu2):
done = fold_multiply2(&ctx, op);
break;

View file

@ -1162,8 +1162,8 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
{
if (tcg_op_supported(INDEX_op_muls2_i32, TCG_TYPE_I32, 0)) {
tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
if (tcg_op_supported(INDEX_op_muls2, TCG_TYPE_I32, 0)) {
tcg_gen_op4_i32(INDEX_op_muls2, rl, rh, arg1, arg2);
} else if (tcg_op_supported(INDEX_op_mulsh, TCG_TYPE_I32, 0)) {
TCGv_i32 t = tcg_temp_ebb_new_i32();
tcg_gen_op3_i32(INDEX_op_mul, t, arg1, arg2);
@ -2880,8 +2880,8 @@ void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
{
if (tcg_op_supported(INDEX_op_muls2_i64, TCG_TYPE_I64, 0)) {
tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
if (tcg_op_supported(INDEX_op_muls2, TCG_TYPE_I64, 0)) {
tcg_gen_op4_i64(INDEX_op_muls2, rl, rh, arg1, arg2);
} else if (tcg_op_supported(INDEX_op_mulsh, TCG_TYPE_I64, 0)) {
TCGv_i64 t = tcg_temp_ebb_new_i64();
tcg_gen_op3_i64(INDEX_op_mul, t, arg1, arg2);

View file

@ -1041,8 +1041,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = {
OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
OUTOP(INDEX_op_muls2_i32, TCGOutOpMul2, outop_muls2),
OUTOP(INDEX_op_muls2_i64, TCGOutOpMul2, outop_muls2),
OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2),
OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
@ -4008,8 +4007,7 @@ liveness_pass_1(TCGContext *s)
}
goto do_not_remove;
case INDEX_op_muls2_i32:
case INDEX_op_muls2_i64:
case INDEX_op_muls2:
opc_new = INDEX_op_mul;
opc_new2 = INDEX_op_mulsh;
goto do_mul2;
@ -5477,8 +5475,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
}
break;
case INDEX_op_muls2_i32:
case INDEX_op_muls2_i64:
case INDEX_op_muls2:
{
const TCGOutOpMul2 *out =
container_of(all_outop[op->opc], TCGOutOpMul2, base);

View file

@ -581,8 +581,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tci_args_rr(insn, &r0, &r1);
regs[r0] = ctpop_tr(regs[r1]);
break;
case INDEX_op_muls2_i32:
case INDEX_op_muls2_i64:
case INDEX_op_muls2:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
#if TCG_TARGET_REG_BITS == 32
tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3];
@ -1095,10 +1094,9 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
str_r(r3), str_r(r4), str_c(c));
break;
case INDEX_op_muls2:
case INDEX_op_mulu2_i32:
case INDEX_op_mulu2_i64:
case INDEX_op_muls2_i32:
case INDEX_op_muls2_i64:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s",
op_name, str_r(r0), str_r(r1),

View file

@ -716,8 +716,7 @@ static TCGConstraintSetIndex cset_mul2(TCGType type, unsigned flags)
static void tgen_muls2(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3)
{
tcg_out_op_rrrr(s, glue(INDEX_op_muls2_i,TCG_TARGET_REG_BITS),
a0, a1, a2, a3);
tcg_out_op_rrrr(s, INDEX_op_muls2, a0, a1, a2, a3);
}
static const TCGOutOpMul2 outop_muls2 = {