tcg/tci: Implement add/sub carry opcodes

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2025-01-20 19:46:04 -08:00
parent 9dd1ea33b2
commit 4b0ee858be
4 changed files with 125 additions and 101 deletions

120
tcg/tci.c
View file

@ -179,17 +179,6 @@ static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
*c5 = extract32(insn, 28, 4);
}
static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5)
{
*r0 = extract32(insn, 8, 4);
*r1 = extract32(insn, 12, 4);
*r2 = extract32(insn, 16, 4);
*r3 = extract32(insn, 20, 4);
*r4 = extract32(insn, 24, 4);
*r5 = extract32(insn, 28, 4);
}
static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition)
{
bool result = false;
@ -361,6 +350,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tcg_target_ulong regs[TCG_TARGET_NB_REGS];
uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE)
/ sizeof(uint64_t)];
bool carry = false;
regs[TCG_AREG0] = (tcg_target_ulong)env;
regs[TCG_REG_CALL_STACK] = (uintptr_t)stack;
@ -369,13 +359,12 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
for (;;) {
uint32_t insn;
TCGOpcode opc;
TCGReg r0, r1, r2, r3, r4, r5;
TCGReg r0, r1, r2, r3, r4;
tcg_target_ulong t1;
TCGCond condition;
uint8_t pos, len;
uint32_t tmp32;
uint64_t tmp64, taddr;
uint64_t T1, T2;
MemOpIdx oi;
int32_t ofs;
void *ptr;
@ -444,9 +433,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_setcond2_i32:
tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
T1 = tci_uint64(regs[r2], regs[r1]);
T2 = tci_uint64(regs[r4], regs[r3]);
regs[r0] = tci_compare64(T1, T2, condition);
regs[r0] = tci_compare64(tci_uint64(regs[r2], regs[r1]),
tci_uint64(regs[r4], regs[r3]),
condition);
break;
#elif TCG_TARGET_REG_BITS == 64
case INDEX_op_setcond:
@ -471,6 +460,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tci_args_rl(insn, tb_ptr, &r0, &ptr);
regs[r0] = *(tcg_target_ulong *)ptr;
break;
case INDEX_op_tci_setcarry:
carry = true;
break;
/* Load/store operations (32 bit). */
@ -575,6 +567,46 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tci_args_rr(insn, &r0, &r1);
regs[r0] = ctpop_tr(regs[r1]);
break;
case INDEX_op_addco:
tci_args_rrr(insn, &r0, &r1, &r2);
t1 = regs[r1] + regs[r2];
carry = t1 < regs[r1];
regs[r0] = t1;
break;
case INDEX_op_addci:
tci_args_rrr(insn, &r0, &r1, &r2);
regs[r0] = regs[r1] + regs[r2] + carry;
break;
case INDEX_op_addcio:
tci_args_rrr(insn, &r0, &r1, &r2);
if (carry) {
t1 = regs[r1] + regs[r2] + 1;
carry = t1 <= regs[r1];
} else {
t1 = regs[r1] + regs[r2];
carry = t1 < regs[r1];
}
regs[r0] = t1;
break;
case INDEX_op_subbo:
tci_args_rrr(insn, &r0, &r1, &r2);
carry = regs[r1] < regs[r2];
regs[r0] = regs[r1] - regs[r2];
break;
case INDEX_op_subbi:
tci_args_rrr(insn, &r0, &r1, &r2);
regs[r0] = regs[r1] - regs[r2] - carry;
break;
case INDEX_op_subbio:
tci_args_rrr(insn, &r0, &r1, &r2);
if (carry) {
carry = regs[r1] <= regs[r2];
regs[r0] = regs[r1] - regs[r2] - 1;
} else {
carry = regs[r1] < regs[r2];
regs[r0] = regs[r1] - regs[r2];
}
break;
case INDEX_op_muls2:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
#if TCG_TARGET_REG_BITS == 32
@ -673,22 +705,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tb_ptr = ptr;
}
break;
#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_add2_i32
case INDEX_op_add2_i32:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
T1 = tci_uint64(regs[r3], regs[r2]);
T2 = tci_uint64(regs[r5], regs[r4]);
tci_write_reg64(regs, r1, r0, T1 + T2);
break;
#endif
#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_sub2_i32
case INDEX_op_sub2_i32:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
T1 = tci_uint64(regs[r3], regs[r2]);
T2 = tci_uint64(regs[r5], regs[r4]);
tci_write_reg64(regs, r1, r0, T1 - T2);
break;
#endif
case INDEX_op_bswap16:
tci_args_rr(insn, &r0, &r1);
regs[r0] = bswap16(regs[r1]);
@ -742,24 +758,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tci_args_rrr(insn, &r0, &r1, &r2);
regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2];
break;
#if TCG_TARGET_HAS_add2_i64
case INDEX_op_add2_i64:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
T1 = regs[r2] + regs[r4];
T2 = regs[r3] + regs[r5] + (T1 < regs[r2]);
regs[r0] = T1;
regs[r1] = T2;
break;
#endif
#if TCG_TARGET_HAS_add2_i64
case INDEX_op_sub2_i64:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
T1 = regs[r2] - regs[r4];
T2 = regs[r3] - regs[r5] - (regs[r2] < regs[r4]);
regs[r0] = T1;
regs[r1] = T2;
break;
#endif
/* Shift/rotate operations (64 bit). */
@ -908,7 +906,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
const char *op_name;
uint32_t insn;
TCGOpcode op;
TCGReg r0, r1, r2, r3, r4, r5;
TCGReg r0, r1, r2, r3, r4;
tcg_target_ulong i1;
int32_t s2;
TCGCond c;
@ -968,6 +966,10 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
op_name, str_r(r0), ptr);
break;
case INDEX_op_tci_setcarry:
info->fprintf_func(info->stream, "%-12s", op_name);
break;
case INDEX_op_ld8u_i32:
case INDEX_op_ld8u_i64:
case INDEX_op_ld8s_i32:
@ -1007,6 +1009,9 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
break;
case INDEX_op_add:
case INDEX_op_addci:
case INDEX_op_addcio:
case INDEX_op_addco:
case INDEX_op_and:
case INDEX_op_andc:
case INDEX_op_clz:
@ -1027,6 +1032,9 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
case INDEX_op_shl:
case INDEX_op_shr:
case INDEX_op_sub:
case INDEX_op_subbi:
case INDEX_op_subbio:
case INDEX_op_subbo:
case INDEX_op_xor:
case INDEX_op_tci_ctz32:
case INDEX_op_tci_clz32:
@ -1071,16 +1079,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
str_r(r2), str_r(r3));
break;
case INDEX_op_add2_i32:
case INDEX_op_add2_i64:
case INDEX_op_sub2_i32:
case INDEX_op_sub2_i64:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s",
op_name, str_r(r0), str_r(r1), str_r(r2),
str_r(r3), str_r(r4), str_r(r5));
break;
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i64:
if (TCG_TARGET_REG_BITS == 32) {

View file

@ -8,13 +8,13 @@
#define TCG_TARGET_HAS_H
#define TCG_TARGET_HAS_qemu_st8_i32 0
#define TCG_TARGET_HAS_add2_i32 1
#define TCG_TARGET_HAS_sub2_i32 1
#define TCG_TARGET_HAS_add2_i32 0
#define TCG_TARGET_HAS_sub2_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_extr_i64_i32 0
#define TCG_TARGET_HAS_add2_i64 1
#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i64 0
#endif /* TCG_TARGET_REG_BITS == 64 */
#define TCG_TARGET_HAS_qemu_ldst_i128 0

View file

@ -2,6 +2,7 @@
/* These opcodes for use between the tci generator and interpreter. */
DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
DEF(tci_setcarry, 0, 0, 0, TCG_OPF_NOT_PRESENT)
DEF(tci_clz32, 1, 2, 0, TCG_OPF_NOT_PRESENT)
DEF(tci_ctz32, 1, 2, 0, TCG_OPF_NOT_PRESENT)
DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT)

View file

@ -66,12 +66,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
case INDEX_op_st_i64:
return C_O0_I2(r, r);
case INDEX_op_add2_i32:
case INDEX_op_add2_i64:
case INDEX_op_sub2_i32:
case INDEX_op_sub2_i64:
return C_O2_I4(r, r, r, r, r, r);
case INDEX_op_qemu_ld_i32:
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_i64:
@ -346,22 +340,6 @@ static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
tcg_out32(s, insn);
}
static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2,
TCGReg r3, TCGReg r4, TCGReg r5)
{
tcg_insn_unit insn = 0;
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 4, r1);
insn = deposit32(insn, 16, 4, r2);
insn = deposit32(insn, 20, 4, r3);
insn = deposit32(insn, 24, 4, r4);
insn = deposit32(insn, 28, 4, r5);
tcg_out32(s, insn);
}
static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val,
TCGReg base, intptr_t offset)
{
@ -573,21 +551,50 @@ static const TCGOutOpBinary outop_add = {
.out_rrr = tgen_add,
};
static TCGConstraintSetIndex cset_addsubcarry(TCGType type, unsigned flags)
{
return type == TCG_TYPE_REG ? C_O1_I2(r, r, r) : C_NotImplemented;
}
static void tgen_addco(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
tcg_out_op_rrr(s, INDEX_op_addco, a0, a1, a2);
}
static const TCGOutOpBinary outop_addco = {
.base.static_constraint = C_NotImplemented,
.base.static_constraint = C_Dynamic,
.base.dynamic_constraint = cset_addsubcarry,
.out_rrr = tgen_addco,
};
static void tgen_addci(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
tcg_out_op_rrr(s, INDEX_op_addci, a0, a1, a2);
}
static const TCGOutOpAddSubCarry outop_addci = {
.base.static_constraint = C_NotImplemented,
.base.static_constraint = C_Dynamic,
.base.dynamic_constraint = cset_addsubcarry,
.out_rrr = tgen_addci,
};
static void tgen_addcio(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
tcg_out_op_rrr(s, INDEX_op_addcio, a0, a1, a2);
}
static const TCGOutOpBinary outop_addcio = {
.base.static_constraint = C_NotImplemented,
.base.static_constraint = C_Dynamic,
.base.dynamic_constraint = cset_addsubcarry,
.out_rrr = tgen_addcio,
};
static void tcg_out_set_carry(TCGContext *s)
{
g_assert_not_reached();
tcg_out_op_v(s, INDEX_op_tci_setcarry);
}
static void tgen_and(TCGContext *s, TCGType type,
@ -910,21 +917,45 @@ static const TCGOutOpSubtract outop_sub = {
.out_rrr = tgen_sub,
};
static void tgen_subbo(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
tcg_out_op_rrr(s, INDEX_op_subbo, a0, a1, a2);
}
static const TCGOutOpAddSubCarry outop_subbo = {
.base.static_constraint = C_NotImplemented,
.base.static_constraint = C_Dynamic,
.base.dynamic_constraint = cset_addsubcarry,
.out_rrr = tgen_subbo,
};
static void tgen_subbi(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
tcg_out_op_rrr(s, INDEX_op_subbi, a0, a1, a2);
}
static const TCGOutOpAddSubCarry outop_subbi = {
.base.static_constraint = C_NotImplemented,
.base.static_constraint = C_Dynamic,
.base.dynamic_constraint = cset_addsubcarry,
.out_rrr = tgen_subbi,
};
static void tgen_subbio(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
tcg_out_op_rrr(s, INDEX_op_subbio, a0, a1, a2);
}
static const TCGOutOpAddSubCarry outop_subbio = {
.base.static_constraint = C_NotImplemented,
.base.static_constraint = C_Dynamic,
.base.dynamic_constraint = cset_addsubcarry,
.out_rrr = tgen_subbio,
};
static void tcg_out_set_borrow(TCGContext *s)
{
g_assert_not_reached();
tcg_out_op_v(s, INDEX_op_tci_setcarry); /* borrow == carry */
}
static void tgen_xor(TCGContext *s, TCGType type,
@ -1129,12 +1160,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
tcg_out_ldst(s, opc, args[0], args[1], args[2]);
break;
CASE_32_64(add2)
CASE_32_64(sub2)
tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2],
args[3], args[4], args[5]);
break;
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i64:
if (TCG_TARGET_REG_BITS == 32) {