tcg/aarch64: Implement add/sub carry opcodes

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2025-01-15 02:41:58 -08:00
parent 867878c112
commit 75351891b8
3 changed files with 150 additions and 88 deletions

View file

@ -24,6 +24,8 @@ C_O1_I2(r, r, rAL)
C_O1_I2(r, r, rC)
C_O1_I2(r, r, ri)
C_O1_I2(r, r, rL)
C_O1_I2(r, rZ, rA)
C_O1_I2(r, rz, rMZ)
C_O1_I2(r, rz, rz)
C_O1_I2(r, rZ, rZ)
C_O1_I2(w, 0, w)
@ -34,4 +36,3 @@ C_O1_I2(w, w, wZ)
C_O1_I3(w, w, w, w)
C_O1_I4(r, r, rC, rz, rz)
C_O2_I1(r, r, r)
C_O2_I4(r, r, rz, rz, rA, rMZ)

View file

@ -13,13 +13,13 @@
#define have_lse2 (cpuinfo & CPUINFO_LSE2)
/* optional instructions */
#define TCG_TARGET_HAS_add2_i32 1
#define TCG_TARGET_HAS_sub2_i32 1
#define TCG_TARGET_HAS_add2_i32 0
#define TCG_TARGET_HAS_sub2_i32 0
#define TCG_TARGET_HAS_extr_i64_i32 0
#define TCG_TARGET_HAS_qemu_st8_i32 0
#define TCG_TARGET_HAS_add2_i64 1
#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i64 0
/*
* Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,

View file

@ -508,7 +508,9 @@ typedef enum {
/* Add/subtract with carry instructions. */
I3503_ADC = 0x1a000000,
I3503_ADCS = 0x3a000000,
I3503_SBC = 0x5a000000,
I3503_SBCS = 0x7a000000,
/* Conditional select instructions. */
I3506_CSEL = 0x1a800000,
@ -1573,56 +1575,6 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
}
static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
TCGReg rh, TCGReg al, TCGReg ah,
tcg_target_long bl, tcg_target_long bh,
bool const_bl, bool const_bh, bool sub)
{
TCGReg orig_rl = rl;
AArch64Insn insn;
if (rl == ah || (!const_bh && rl == bh)) {
rl = TCG_REG_TMP0;
}
if (const_bl) {
if (bl < 0) {
bl = -bl;
insn = sub ? I3401_ADDSI : I3401_SUBSI;
} else {
insn = sub ? I3401_SUBSI : I3401_ADDSI;
}
if (unlikely(al == TCG_REG_XZR)) {
/* ??? We want to allow al to be zero for the benefit of
negation via subtraction. However, that leaves open the
possibility of adding 0+const in the low part, and the
immediate add instructions encode XSP not XZR. Don't try
anything more elaborate here than loading another zero. */
al = TCG_REG_TMP0;
tcg_out_movi(s, ext, al, 0);
}
tcg_out_insn_3401(s, insn, ext, rl, al, bl);
} else {
tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
}
insn = I3503_ADC;
if (const_bh) {
/* Note that the only two constants we support are 0 and -1, and
that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
if ((bh != 0) ^ sub) {
insn = I3503_SBC;
}
bh = TCG_REG_XZR;
} else if (sub) {
insn = I3503_SBC;
}
tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
tcg_out_mov(s, ext, orig_rl, rl);
}
static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
{
static const uint32_t sync[] = {
@ -2078,21 +2030,81 @@ static const TCGOutOpBinary outop_add = {
.out_rri = tgen_addi,
};
static void tgen_addco(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
tcg_out_insn(s, 3502, ADDS, type, a0, a1, a2);
}
static void tgen_addco_imm(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, tcg_target_long a2)
{
if (a2 >= 0) {
tcg_out_insn(s, 3401, ADDSI, type, a0, a1, a2);
} else {
tcg_out_insn(s, 3401, SUBSI, type, a0, a1, -a2);
}
}
static const TCGOutOpBinary outop_addco = {
.base.static_constraint = C_NotImplemented,
.base.static_constraint = C_O1_I2(r, r, rA),
.out_rrr = tgen_addco,
.out_rri = tgen_addco_imm,
};
static void tgen_addci_rrr(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
tcg_out_insn(s, 3503, ADC, type, a0, a1, a2);
}
static void tgen_addci_rri(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, tcg_target_long a2)
{
/*
* Note that the only two constants we support are 0 and -1, and
* that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.
*/
if (a2) {
tcg_out_insn(s, 3503, SBC, type, a0, a1, TCG_REG_XZR);
} else {
tcg_out_insn(s, 3503, ADC, type, a0, a1, TCG_REG_XZR);
}
}
static const TCGOutOpAddSubCarry outop_addci = {
.base.static_constraint = C_NotImplemented,
.base.static_constraint = C_O1_I2(r, rz, rMZ),
.out_rrr = tgen_addci_rrr,
.out_rri = tgen_addci_rri,
};
static void tgen_addcio(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
tcg_out_insn(s, 3503, ADCS, type, a0, a1, a2);
}
static void tgen_addcio_imm(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, tcg_target_long a2)
{
/* Use SBCS w/0 for ADCS w/-1 -- see above. */
if (a2) {
tcg_out_insn(s, 3503, SBCS, type, a0, a1, TCG_REG_XZR);
} else {
tcg_out_insn(s, 3503, ADCS, type, a0, a1, TCG_REG_XZR);
}
}
static const TCGOutOpBinary outop_addcio = {
.base.static_constraint = C_NotImplemented,
.base.static_constraint = C_O1_I2(r, rz, rMZ),
.out_rrr = tgen_addcio,
.out_rri = tgen_addcio_imm,
};
static void tcg_out_set_carry(TCGContext *s)
{
g_assert_not_reached();
tcg_out_insn(s, 3502, SUBS, TCG_TYPE_I32,
TCG_REG_XZR, TCG_REG_XZR, TCG_REG_XZR);
}
static void tgen_and(TCGContext *s, TCGType type,
@ -2438,21 +2450,95 @@ static const TCGOutOpSubtract outop_sub = {
.out_rrr = tgen_sub,
};
static void tgen_subbo_rrr(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
tcg_out_insn(s, 3502, SUBS, type, a0, a1, a2);
}
static void tgen_subbo_rri(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, tcg_target_long a2)
{
if (a2 >= 0) {
tcg_out_insn(s, 3401, SUBSI, type, a0, a1, a2);
} else {
tcg_out_insn(s, 3401, ADDSI, type, a0, a1, -a2);
}
}
static void tgen_subbo_rir(TCGContext *s, TCGType type,
TCGReg a0, tcg_target_long a1, TCGReg a2)
{
tgen_subbo_rrr(s, type, a0, TCG_REG_XZR, a2);
}
static void tgen_subbo_rii(TCGContext *s, TCGType type,
TCGReg a0, tcg_target_long a1, tcg_target_long a2)
{
if (a2 == 0) {
tgen_subbo_rrr(s, type, a0, TCG_REG_XZR, TCG_REG_XZR);
return;
}
/*
* We want to allow a1 to be zero for the benefit of negation via
* subtraction. However, that leaves open the possibility of
* adding 0 +/- const, and the immediate add/sub instructions
* encode XSP not XZR. Since we have 0 - non-zero, borrow is
* always set.
*/
tcg_out_movi(s, type, a0, -a2);
tcg_out_set_borrow(s);
}
static const TCGOutOpAddSubCarry outop_subbo = {
.base.static_constraint = C_NotImplemented,
.base.static_constraint = C_O1_I2(r, rZ, rA),
.out_rrr = tgen_subbo_rrr,
.out_rri = tgen_subbo_rri,
.out_rir = tgen_subbo_rir,
.out_rii = tgen_subbo_rii,
};
static void tgen_subbi_rrr(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
tcg_out_insn(s, 3503, SBC, type, a0, a1, a2);
}
static void tgen_subbi_rri(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, tcg_target_long a2)
{
tgen_addci_rri(s, type, a0, a1, ~a2);
}
static const TCGOutOpAddSubCarry outop_subbi = {
.base.static_constraint = C_NotImplemented,
.base.static_constraint = C_O1_I2(r, rz, rMZ),
.out_rrr = tgen_subbi_rrr,
.out_rri = tgen_subbi_rri,
};
static void tgen_subbio_rrr(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
tcg_out_insn(s, 3503, SBCS, type, a0, a1, a2);
}
static void tgen_subbio_rri(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, tcg_target_long a2)
{
tgen_addcio_imm(s, type, a0, a1, ~a2);
}
static const TCGOutOpAddSubCarry outop_subbio = {
.base.static_constraint = C_NotImplemented,
.base.static_constraint = C_O1_I2(r, rz, rMZ),
.out_rrr = tgen_subbio_rrr,
.out_rri = tgen_subbio_rri,
};
static void tcg_out_set_borrow(TCGContext *s)
{
g_assert_not_reached();
tcg_out_insn(s, 3502, ADDS, TCG_TYPE_I32,
TCG_REG_XZR, TCG_REG_XZR, TCG_REG_XZR);
}
static void tgen_xor(TCGContext *s, TCGType type,
@ -2759,25 +2845,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false);
break;
case INDEX_op_add2_i32:
tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3],
(int32_t)args[4], args[5], const_args[4],
const_args[5], false);
break;
case INDEX_op_add2_i64:
tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4],
args[5], const_args[4], const_args[5], false);
break;
case INDEX_op_sub2_i32:
tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3],
(int32_t)args[4], args[5], const_args[4],
const_args[5], true);
break;
case INDEX_op_sub2_i64:
tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4],
args[5], const_args[4], const_args[5], true);
break;
case INDEX_op_mb:
tcg_out_mb(s, a0);
break;
@ -3271,12 +3338,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
case INDEX_op_qemu_st_i128:
return C_O0_I3(rz, rz, r);
case INDEX_op_add2_i32:
case INDEX_op_add2_i64:
case INDEX_op_sub2_i32:
case INDEX_op_sub2_i64:
return C_O2_I4(r, r, rz, rz, rA, rMZ);
case INDEX_op_add_vec:
case INDEX_op_sub_vec:
case INDEX_op_mul_vec: