mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-03 15:53:54 -06:00
tcg: Remove add2/sub2 opcodes
All uses have been replaced by add/sub carry opcodes. Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
4b0ee858be
commit
f2b1708e80
16 changed files with 3 additions and 217 deletions
|
@ -654,14 +654,6 @@ Multiword arithmetic support
|
|||
code generator will use ``tcg_out_set_borrow`` and then
|
||||
the output routine for *subbio*.
|
||||
|
||||
* - add2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high*
|
||||
|
||||
sub2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high*
|
||||
|
||||
- | Similar to add/sub, except that the double-word inputs *t1* and *t2* are
|
||||
formed from two single-word arguments, and the double-word output *t0*
|
||||
is returned in two single-word outputs.
|
||||
|
||||
* - mulu2 *t0_low*, *t0_high*, *t1*, *t2*
|
||||
|
||||
- | Similar to mul, except two unsigned inputs *t1* and *t2* yielding the full
|
||||
|
@ -952,9 +944,9 @@ Assumptions
|
|||
The target word size (``TCG_TARGET_REG_BITS``) is expected to be 32 bit or
|
||||
64 bit. It is expected that the pointer has the same size as the word.
|
||||
|
||||
On a 32 bit target, all 64 bit operations are converted to 32 bits. A
|
||||
few specific operations must be implemented to allow it (see add2_i32,
|
||||
sub2_i32, brcond2_i32).
|
||||
On a 32 bit target, all 64 bit operations are converted to 32 bits.
|
||||
A few specific operations must be implemented to allow it
|
||||
(see brcond2_i32, setcond2_i32).
|
||||
|
||||
On a 64 bit target, the values are transferred between 32 and 64-bit
|
||||
registers using the following ops:
|
||||
|
|
|
@ -102,8 +102,6 @@ DEF(st8_i32, 0, 2, 1, 0)
|
|||
DEF(st16_i32, 0, 2, 1, 0)
|
||||
DEF(st_i32, 0, 2, 1, 0)
|
||||
|
||||
DEF(add2_i32, 2, 4, 0, 0)
|
||||
DEF(sub2_i32, 2, 4, 0, 0)
|
||||
DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
|
||||
DEF(setcond2_i32, 1, 4, 1, 0)
|
||||
|
||||
|
@ -126,9 +124,6 @@ DEF(extu_i32_i64, 1, 1, 0, 0)
|
|||
DEF(extrl_i64_i32, 1, 1, 0, 0)
|
||||
DEF(extrh_i64_i32, 1, 1, 0, 0)
|
||||
|
||||
DEF(add2_i64, 2, 4, 0, 0)
|
||||
DEF(sub2_i64, 2, 4, 0, 0)
|
||||
|
||||
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
|
||||
|
||||
/* There are tcg_ctx->insn_start_words here, not just one. */
|
||||
|
|
|
@ -13,14 +13,9 @@
|
|||
#define have_lse2 (cpuinfo & CPUINFO_LSE2)
|
||||
|
||||
/* optional instructions */
|
||||
#define TCG_TARGET_HAS_add2_i32 0
|
||||
#define TCG_TARGET_HAS_sub2_i32 0
|
||||
#define TCG_TARGET_HAS_extr_i64_i32 0
|
||||
#define TCG_TARGET_HAS_qemu_st8_i32 0
|
||||
|
||||
#define TCG_TARGET_HAS_add2_i64 0
|
||||
#define TCG_TARGET_HAS_sub2_i64 0
|
||||
|
||||
/*
|
||||
* Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
|
||||
* which requires writable pages. We must defer to the helper for user-only,
|
||||
|
|
|
@ -24,12 +24,8 @@ extern bool use_neon_instructions;
|
|||
#endif
|
||||
|
||||
/* optional instructions */
|
||||
#define TCG_TARGET_HAS_add2_i32 0
|
||||
#define TCG_TARGET_HAS_sub2_i32 0
|
||||
#define TCG_TARGET_HAS_qemu_st8_i32 0
|
||||
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 0
|
||||
|
||||
#define TCG_TARGET_HAS_tst 1
|
||||
|
||||
#define TCG_TARGET_HAS_v64 use_neon_instructions
|
||||
|
|
|
@ -26,14 +26,9 @@
|
|||
#define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl)
|
||||
|
||||
/* optional instructions */
|
||||
#define TCG_TARGET_HAS_add2_i32 0
|
||||
#define TCG_TARGET_HAS_sub2_i32 0
|
||||
|
||||
#if TCG_TARGET_REG_BITS == 64
|
||||
/* Keep 32-bit values zero-extended in a register. */
|
||||
#define TCG_TARGET_HAS_extr_i64_i32 1
|
||||
#define TCG_TARGET_HAS_add2_i64 0
|
||||
#define TCG_TARGET_HAS_sub2_i64 0
|
||||
#define TCG_TARGET_HAS_qemu_st8_i32 0
|
||||
#else
|
||||
#define TCG_TARGET_HAS_qemu_st8_i32 1
|
||||
|
|
|
@ -10,14 +10,10 @@
|
|||
#include "host/cpuinfo.h"
|
||||
|
||||
/* optional instructions */
|
||||
#define TCG_TARGET_HAS_add2_i32 0
|
||||
#define TCG_TARGET_HAS_sub2_i32 0
|
||||
#define TCG_TARGET_HAS_qemu_st8_i32 0
|
||||
|
||||
/* 64-bit operations */
|
||||
#define TCG_TARGET_HAS_extr_i64_i32 1
|
||||
#define TCG_TARGET_HAS_add2_i64 0
|
||||
#define TCG_TARGET_HAS_sub2_i64 0
|
||||
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 (cpuinfo & CPUINFO_LSX)
|
||||
|
||||
|
|
|
@ -39,13 +39,8 @@ extern bool use_mips32r2_instructions;
|
|||
#endif
|
||||
|
||||
/* optional instructions */
|
||||
#define TCG_TARGET_HAS_add2_i32 0
|
||||
#define TCG_TARGET_HAS_sub2_i32 0
|
||||
|
||||
#if TCG_TARGET_REG_BITS == 64
|
||||
#define TCG_TARGET_HAS_extr_i64_i32 1
|
||||
#define TCG_TARGET_HAS_add2_i64 0
|
||||
#define TCG_TARGET_HAS_sub2_i64 0
|
||||
#define TCG_TARGET_HAS_ext32s_i64 1
|
||||
#define TCG_TARGET_HAS_ext32u_i64 1
|
||||
#endif
|
||||
|
|
|
@ -1399,82 +1399,6 @@ static bool fold_addco(OptContext *ctx, TCGOp *op)
|
|||
return finish_folding(ctx, op);
|
||||
}
|
||||
|
||||
static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
|
||||
{
|
||||
bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]);
|
||||
bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]);
|
||||
|
||||
if (a_const && b_const) {
|
||||
uint64_t al = arg_info(op->args[2])->val;
|
||||
uint64_t ah = arg_info(op->args[3])->val;
|
||||
uint64_t bl = arg_info(op->args[4])->val;
|
||||
uint64_t bh = arg_info(op->args[5])->val;
|
||||
TCGArg rl, rh;
|
||||
TCGOp *op2;
|
||||
|
||||
if (ctx->type == TCG_TYPE_I32) {
|
||||
uint64_t a = deposit64(al, 32, 32, ah);
|
||||
uint64_t b = deposit64(bl, 32, 32, bh);
|
||||
|
||||
if (add) {
|
||||
a += b;
|
||||
} else {
|
||||
a -= b;
|
||||
}
|
||||
|
||||
al = sextract64(a, 0, 32);
|
||||
ah = sextract64(a, 32, 32);
|
||||
} else {
|
||||
Int128 a = int128_make128(al, ah);
|
||||
Int128 b = int128_make128(bl, bh);
|
||||
|
||||
if (add) {
|
||||
a = int128_add(a, b);
|
||||
} else {
|
||||
a = int128_sub(a, b);
|
||||
}
|
||||
|
||||
al = int128_getlo(a);
|
||||
ah = int128_gethi(a);
|
||||
}
|
||||
|
||||
rl = op->args[0];
|
||||
rh = op->args[1];
|
||||
|
||||
/* The proper opcode is supplied by tcg_opt_gen_mov. */
|
||||
op2 = opt_insert_before(ctx, op, 0, 2);
|
||||
|
||||
tcg_opt_gen_movi(ctx, op, rl, al);
|
||||
tcg_opt_gen_movi(ctx, op2, rh, ah);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Fold sub2 r,x,i to add2 r,x,-i */
|
||||
if (!add && b_const) {
|
||||
uint64_t bl = arg_info(op->args[4])->val;
|
||||
uint64_t bh = arg_info(op->args[5])->val;
|
||||
|
||||
/* Negate the two parts without assembling and disassembling. */
|
||||
bl = -bl;
|
||||
bh = ~bh + !bl;
|
||||
|
||||
op->opc = (ctx->type == TCG_TYPE_I32
|
||||
? INDEX_op_add2_i32 : INDEX_op_add2_i64);
|
||||
op->args[4] = arg_new_constant(ctx, bl);
|
||||
op->args[5] = arg_new_constant(ctx, bh);
|
||||
}
|
||||
return finish_folding(ctx, op);
|
||||
}
|
||||
|
||||
static bool fold_add2(OptContext *ctx, TCGOp *op)
|
||||
{
|
||||
/* Note that the high and low parts may be independently swapped. */
|
||||
swap_commutative(op->args[0], &op->args[2], &op->args[4]);
|
||||
swap_commutative(op->args[1], &op->args[3], &op->args[5]);
|
||||
|
||||
return fold_addsub2(ctx, op, true);
|
||||
}
|
||||
|
||||
static bool fold_and(OptContext *ctx, TCGOp *op)
|
||||
{
|
||||
uint64_t z1, z2, z_mask, s_mask;
|
||||
|
@ -2811,11 +2735,6 @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
|
|||
return finish_folding(ctx, op);
|
||||
}
|
||||
|
||||
static bool fold_sub2(OptContext *ctx, TCGOp *op)
|
||||
{
|
||||
return fold_addsub2(ctx, op, false);
|
||||
}
|
||||
|
||||
static void squash_prev_borrowout(OptContext *ctx, TCGOp *op)
|
||||
{
|
||||
TempOptInfo *t2;
|
||||
|
@ -3150,9 +3069,6 @@ void tcg_optimize(TCGContext *s)
|
|||
case INDEX_op_addco:
|
||||
done = fold_addco(&ctx, op);
|
||||
break;
|
||||
CASE_OP_32_64(add2):
|
||||
done = fold_add2(&ctx, op);
|
||||
break;
|
||||
case INDEX_op_and:
|
||||
case INDEX_op_and_vec:
|
||||
done = fold_and(&ctx, op);
|
||||
|
@ -3342,9 +3258,6 @@ void tcg_optimize(TCGContext *s)
|
|||
case INDEX_op_sub_vec:
|
||||
done = fold_sub_vec(&ctx, op);
|
||||
break;
|
||||
CASE_OP_32_64(sub2):
|
||||
done = fold_sub2(&ctx, op);
|
||||
break;
|
||||
case INDEX_op_xor:
|
||||
case INDEX_op_xor_vec:
|
||||
done = fold_xor(&ctx, op);
|
||||
|
|
|
@ -18,13 +18,9 @@
|
|||
|
||||
/* optional instructions */
|
||||
#define TCG_TARGET_HAS_qemu_st8_i32 0
|
||||
#define TCG_TARGET_HAS_add2_i32 0
|
||||
#define TCG_TARGET_HAS_sub2_i32 0
|
||||
|
||||
#if TCG_TARGET_REG_BITS == 64
|
||||
#define TCG_TARGET_HAS_extr_i64_i32 0
|
||||
#define TCG_TARGET_HAS_add2_i64 0
|
||||
#define TCG_TARGET_HAS_sub2_i64 0
|
||||
#endif
|
||||
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 \
|
||||
|
|
|
@ -11,13 +11,8 @@
|
|||
|
||||
/* optional instructions */
|
||||
#define TCG_TARGET_HAS_qemu_st8_i32 0
|
||||
|
||||
#define TCG_TARGET_HAS_extr_i64_i32 1
|
||||
#define TCG_TARGET_HAS_add2_i64 0
|
||||
#define TCG_TARGET_HAS_sub2_i64 0
|
||||
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 0
|
||||
|
||||
#define TCG_TARGET_HAS_tst 0
|
||||
|
||||
/* vector instructions */
|
||||
|
|
|
@ -29,16 +29,9 @@ extern uint64_t s390_facilities[3];
|
|||
((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
|
||||
|
||||
/* optional instructions */
|
||||
#define TCG_TARGET_HAS_add2_i32 0
|
||||
#define TCG_TARGET_HAS_sub2_i32 0
|
||||
#define TCG_TARGET_HAS_extr_i64_i32 0
|
||||
#define TCG_TARGET_HAS_qemu_st8_i32 0
|
||||
|
||||
#define TCG_TARGET_HAS_add2_i64 0
|
||||
#define TCG_TARGET_HAS_sub2_i64 0
|
||||
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 1
|
||||
|
||||
#define TCG_TARGET_HAS_tst 1
|
||||
|
||||
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
|
||||
|
|
|
@ -14,16 +14,9 @@ extern bool use_vis3_instructions;
|
|||
#endif
|
||||
|
||||
/* optional instructions */
|
||||
#define TCG_TARGET_HAS_add2_i32 0
|
||||
#define TCG_TARGET_HAS_sub2_i32 0
|
||||
#define TCG_TARGET_HAS_qemu_st8_i32 0
|
||||
|
||||
#define TCG_TARGET_HAS_extr_i64_i32 0
|
||||
#define TCG_TARGET_HAS_add2_i64 0
|
||||
#define TCG_TARGET_HAS_sub2_i64 0
|
||||
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 0
|
||||
|
||||
#define TCG_TARGET_HAS_tst 1
|
||||
|
||||
#define TCG_TARGET_extract_valid(type, ofs, len) \
|
||||
|
|
|
@ -12,8 +12,6 @@
|
|||
#if TCG_TARGET_REG_BITS == 32
|
||||
/* Turn some undef macros into false macros. */
|
||||
#define TCG_TARGET_HAS_extr_i64_i32 0
|
||||
#define TCG_TARGET_HAS_add2_i64 0
|
||||
#define TCG_TARGET_HAS_sub2_i64 0
|
||||
#endif
|
||||
|
||||
#if !defined(TCG_TARGET_HAS_v64) \
|
||||
|
|
26
tcg/tcg-op.c
26
tcg/tcg-op.c
|
@ -249,24 +249,6 @@ static void DNI tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
|
|||
tcgv_i64_arg(a3), a4, a5);
|
||||
}
|
||||
|
||||
static void DNI tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
|
||||
TCGv_i32 a3, TCGv_i32 a4,
|
||||
TCGv_i32 a5, TCGv_i32 a6)
|
||||
{
|
||||
tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
|
||||
tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5),
|
||||
tcgv_i32_arg(a6));
|
||||
}
|
||||
|
||||
static void DNI tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
|
||||
TCGv_i64 a3, TCGv_i64 a4,
|
||||
TCGv_i64 a5, TCGv_i64 a6)
|
||||
{
|
||||
tcg_gen_op6(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
|
||||
tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5),
|
||||
tcgv_i64_arg(a6));
|
||||
}
|
||||
|
||||
static void DNI tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
|
||||
TCGv_i32 a3, TCGv_i32 a4,
|
||||
TCGv_i32 a5, TCGArg a6)
|
||||
|
@ -1108,8 +1090,6 @@ void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
|
|||
tcg_gen_op3_i32(INDEX_op_addci, rh, ah, bh);
|
||||
tcg_gen_mov_i32(rl, t0);
|
||||
tcg_temp_free_i32(t0);
|
||||
} else if (TCG_TARGET_HAS_add2_i32) {
|
||||
tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
|
||||
} else {
|
||||
TCGv_i32 t0 = tcg_temp_ebb_new_i32();
|
||||
TCGv_i32 t1 = tcg_temp_ebb_new_i32();
|
||||
|
@ -1159,8 +1139,6 @@ void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
|
|||
tcg_gen_op3_i32(INDEX_op_subbi, rh, ah, bh);
|
||||
tcg_gen_mov_i32(rl, t0);
|
||||
tcg_temp_free_i32(t0);
|
||||
} else if (TCG_TARGET_HAS_sub2_i32) {
|
||||
tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
|
||||
} else {
|
||||
TCGv_i32 t0 = tcg_temp_ebb_new_i32();
|
||||
TCGv_i32 t1 = tcg_temp_ebb_new_i32();
|
||||
|
@ -2880,8 +2858,6 @@ void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
|
|||
|
||||
tcg_gen_mov_i64(rl, t0);
|
||||
tcg_temp_free_i64(t0);
|
||||
} else if (TCG_TARGET_HAS_add2_i64) {
|
||||
tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
|
||||
} else {
|
||||
TCGv_i64 t0 = tcg_temp_ebb_new_i64();
|
||||
TCGv_i64 t1 = tcg_temp_ebb_new_i64();
|
||||
|
@ -2985,8 +2961,6 @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
|
|||
|
||||
tcg_gen_mov_i64(rl, t0);
|
||||
tcg_temp_free_i64(t0);
|
||||
} else if (TCG_TARGET_HAS_sub2_i64) {
|
||||
tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
|
||||
} else {
|
||||
TCGv_i64 t0 = tcg_temp_ebb_new_i64();
|
||||
TCGv_i64 t1 = tcg_temp_ebb_new_i64();
|
||||
|
|
36
tcg/tcg.c
36
tcg/tcg.c
|
@ -2430,11 +2430,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
|
|||
case INDEX_op_st_i32:
|
||||
return true;
|
||||
|
||||
case INDEX_op_add2_i32:
|
||||
return TCG_TARGET_HAS_add2_i32;
|
||||
case INDEX_op_sub2_i32:
|
||||
return TCG_TARGET_HAS_sub2_i32;
|
||||
|
||||
case INDEX_op_brcond2_i32:
|
||||
case INDEX_op_setcond2_i32:
|
||||
return TCG_TARGET_REG_BITS == 32;
|
||||
|
@ -2456,11 +2451,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
|
|||
case INDEX_op_extrh_i64_i32:
|
||||
return TCG_TARGET_REG_BITS == 64;
|
||||
|
||||
case INDEX_op_add2_i64:
|
||||
return TCG_TARGET_HAS_add2_i64;
|
||||
case INDEX_op_sub2_i64:
|
||||
return TCG_TARGET_HAS_sub2_i64;
|
||||
|
||||
case INDEX_op_mov_vec:
|
||||
case INDEX_op_dup_vec:
|
||||
case INDEX_op_dupm_vec:
|
||||
|
@ -4101,32 +4091,6 @@ liveness_pass_1(TCGContext *s)
|
|||
la_reset_pref(ts);
|
||||
break;
|
||||
|
||||
case INDEX_op_add2_i32:
|
||||
case INDEX_op_add2_i64:
|
||||
opc_new = INDEX_op_add;
|
||||
goto do_addsub2;
|
||||
case INDEX_op_sub2_i32:
|
||||
case INDEX_op_sub2_i64:
|
||||
opc_new = INDEX_op_sub;
|
||||
do_addsub2:
|
||||
assert_carry_dead(s);
|
||||
/* Test if the high part of the operation is dead, but not
|
||||
the low part. The result can be optimized to a simple
|
||||
add or sub. This happens often for x86_64 guest when the
|
||||
cpu mode is set to 32 bit. */
|
||||
if (arg_temp(op->args[1])->state == TS_DEAD) {
|
||||
if (arg_temp(op->args[0])->state == TS_DEAD) {
|
||||
goto do_remove;
|
||||
}
|
||||
/* Replace the opcode and adjust the args in place,
|
||||
leaving 3 unused args at the end. */
|
||||
op->opc = opc = opc_new;
|
||||
op->args[1] = op->args[2];
|
||||
op->args[2] = op->args[4];
|
||||
/* Fall through and mark the single-word operation live. */
|
||||
}
|
||||
goto do_not_remove;
|
||||
|
||||
case INDEX_op_muls2:
|
||||
opc_new = INDEX_op_mul;
|
||||
opc_new2 = INDEX_op_mulsh;
|
||||
|
|
|
@ -8,13 +8,9 @@
|
|||
#define TCG_TARGET_HAS_H
|
||||
|
||||
#define TCG_TARGET_HAS_qemu_st8_i32 0
|
||||
#define TCG_TARGET_HAS_add2_i32 0
|
||||
#define TCG_TARGET_HAS_sub2_i32 0
|
||||
|
||||
#if TCG_TARGET_REG_BITS == 64
|
||||
#define TCG_TARGET_HAS_extr_i64_i32 0
|
||||
#define TCG_TARGET_HAS_add2_i64 0
|
||||
#define TCG_TARGET_HAS_sub2_i64 0
|
||||
#endif /* TCG_TARGET_REG_BITS == 64 */
|
||||
|
||||
#define TCG_TARGET_HAS_qemu_ldst_i128 0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue