tcg/ppc: Implement negsetcond_*

In the general case we simply negate.  However with isel we
may load -1 instead of 1 with no extra effort.

Consolidate EQ0 and NE0 logic.  Replace the NE0 zero-extension
with inversion+negation of EQ0, which is never worse and may
eliminate one insn.  Provide a special case for -EQ0.

Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-08-05 01:55:23 +00:00
parent b0a433be48
commit cba10bb3c8
2 changed files with 82 additions and 49 deletions

View file

@ -1548,8 +1548,20 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
} }
static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
TCGReg dst, TCGReg src) TCGReg dst, TCGReg src, bool neg)
{ {
if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
/*
* X != 0 implies X + -1 generates a carry.
* RT = (~X + X) + CA
* = -1 + CA
* = CA ? 0 : -1
*/
tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
tcg_out32(s, SUBFE | TAB(dst, src, src));
return;
}
if (type == TCG_TYPE_I32) { if (type == TCG_TYPE_I32) {
tcg_out32(s, CNTLZW | RS(src) | RA(dst)); tcg_out32(s, CNTLZW | RS(src) | RA(dst));
tcg_out_shri32(s, dst, dst, 5); tcg_out_shri32(s, dst, dst, 5);
@ -1557,18 +1569,28 @@ static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
tcg_out32(s, CNTLZD | RS(src) | RA(dst)); tcg_out32(s, CNTLZD | RS(src) | RA(dst));
tcg_out_shri64(s, dst, dst, 6); tcg_out_shri64(s, dst, dst, 6);
} }
if (neg) {
tcg_out32(s, NEG | RT(dst) | RA(dst));
}
} }
static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) static void tcg_out_setcond_ne0(TCGContext *s, TCGType type,
TCGReg dst, TCGReg src, bool neg)
{ {
/* X != 0 implies X + -1 generates a carry. Extra addition if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */ /*
if (dst != src) { * X != 0 implies X + -1 generates a carry. Extra addition
tcg_out32(s, ADDIC | TAI(dst, src, -1)); * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.
tcg_out32(s, SUBFE | TAB(dst, dst, src)); */
} else {
tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
return;
}
tcg_out_setcond_eq0(s, type, dst, src, false);
if (neg) {
tcg_out32(s, ADDI | TAI(dst, dst, -1));
} else {
tcg_out_xori32(s, dst, dst, 1);
} }
} }
@ -1590,9 +1612,10 @@ static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
TCGArg arg0, TCGArg arg1, TCGArg arg2, TCGArg arg0, TCGArg arg1, TCGArg arg2,
int const_arg2) int const_arg2, bool neg)
{ {
int crop, sh; int sh;
bool inv;
tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
@ -1605,14 +1628,10 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
if (arg2 == 0) { if (arg2 == 0) {
switch (cond) { switch (cond) {
case TCG_COND_EQ: case TCG_COND_EQ:
tcg_out_setcond_eq0(s, type, arg0, arg1); tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
return; return;
case TCG_COND_NE: case TCG_COND_NE:
if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
tcg_out_ext32u(s, TCG_REG_R0, arg1);
arg1 = TCG_REG_R0;
}
tcg_out_setcond_ne0(s, arg0, arg1);
return; return;
case TCG_COND_GE: case TCG_COND_GE:
tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
@ -1621,10 +1640,18 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
case TCG_COND_LT: case TCG_COND_LT:
/* Extract the sign bit. */ /* Extract the sign bit. */
if (type == TCG_TYPE_I32) { if (type == TCG_TYPE_I32) {
if (neg) {
tcg_out_sari32(s, arg0, arg1, 31);
} else {
tcg_out_shri32(s, arg0, arg1, 31); tcg_out_shri32(s, arg0, arg1, 31);
}
} else {
if (neg) {
tcg_out_sari64(s, arg0, arg1, 63);
} else { } else {
tcg_out_shri64(s, arg0, arg1, 63); tcg_out_shri64(s, arg0, arg1, 63);
} }
}
return; return;
default: default:
break; break;
@ -1641,7 +1668,7 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
isel = tcg_to_isel[cond]; isel = tcg_to_isel[cond];
tcg_out_movi(s, type, arg0, 1); tcg_out_movi(s, type, arg0, neg ? -1 : 1);
if (isel & 1) { if (isel & 1) {
/* arg0 = (bc ? 0 : 1) */ /* arg0 = (bc ? 0 : 1) */
tab = TAB(arg0, 0, arg0); tab = TAB(arg0, 0, arg0);
@ -1655,51 +1682,47 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
return; return;
} }
inv = false;
switch (cond) { switch (cond) {
case TCG_COND_EQ: case TCG_COND_EQ:
arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
tcg_out_setcond_eq0(s, type, arg0, arg1); tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
return; break;
case TCG_COND_NE: case TCG_COND_NE:
arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
/* Discard the high bits only once, rather than both inputs. */ tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { break;
tcg_out_ext32u(s, TCG_REG_R0, arg1);
arg1 = TCG_REG_R0;
}
tcg_out_setcond_ne0(s, arg0, arg1);
return;
case TCG_COND_LE:
case TCG_COND_LEU:
inv = true;
/* fall through */
case TCG_COND_GT: case TCG_COND_GT:
case TCG_COND_GTU: case TCG_COND_GTU:
sh = 30; sh = 30; /* CR7 CR_GT */
crop = 0;
goto crtest;
case TCG_COND_LT:
case TCG_COND_LTU:
sh = 29;
crop = 0;
goto crtest; goto crtest;
case TCG_COND_GE: case TCG_COND_GE:
case TCG_COND_GEU: case TCG_COND_GEU:
sh = 31; inv = true;
crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); /* fall through */
case TCG_COND_LT:
case TCG_COND_LTU:
sh = 29; /* CR7 CR_LT */
goto crtest; goto crtest;
case TCG_COND_LE:
case TCG_COND_LEU:
sh = 31;
crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
crtest: crtest:
tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
if (crop) {
tcg_out32(s, crop);
}
tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
if (neg && inv) {
tcg_out32(s, ADDI | TAI(arg0, arg0, -1));
} else if (neg) {
tcg_out32(s, NEG | RT(arg0) | RA(arg0));
} else if (inv) {
tcg_out_xori32(s, arg0, arg0, 1);
}
break; break;
default: default:
@ -2982,11 +3005,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_setcond_i32: case INDEX_op_setcond_i32:
tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
const_args[2]); const_args[2], false);
break; break;
case INDEX_op_setcond_i64: case INDEX_op_setcond_i64:
tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
const_args[2]); const_args[2], false);
break;
case INDEX_op_negsetcond_i32:
tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
const_args[2], true);
break;
case INDEX_op_negsetcond_i64:
tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
const_args[2], true);
break; break;
case INDEX_op_setcond2_i32: case INDEX_op_setcond2_i32:
tcg_out_setcond2(s, args, const_args); tcg_out_setcond2(s, args, const_args);
@ -3724,6 +3755,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_rotl_i32: case INDEX_op_rotl_i32:
case INDEX_op_rotr_i32: case INDEX_op_rotr_i32:
case INDEX_op_setcond_i32: case INDEX_op_setcond_i32:
case INDEX_op_negsetcond_i32:
case INDEX_op_and_i64: case INDEX_op_and_i64:
case INDEX_op_andc_i64: case INDEX_op_andc_i64:
case INDEX_op_shl_i64: case INDEX_op_shl_i64:
@ -3732,6 +3764,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_rotl_i64: case INDEX_op_rotl_i64:
case INDEX_op_rotr_i64: case INDEX_op_rotr_i64:
case INDEX_op_setcond_i64: case INDEX_op_setcond_i64:
case INDEX_op_negsetcond_i64:
return C_O1_I2(r, r, ri); return C_O1_I2(r, r, ri);
case INDEX_op_mul_i32: case INDEX_op_mul_i32:

View file

@ -97,7 +97,7 @@ typedef enum {
#define TCG_TARGET_HAS_sextract_i32 0 #define TCG_TARGET_HAS_sextract_i32 0
#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_extract2_i32 0
#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_negsetcond_i32 0 #define TCG_TARGET_HAS_negsetcond_i32 1
#define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_muluh_i32 1
@ -135,7 +135,7 @@ typedef enum {
#define TCG_TARGET_HAS_sextract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0
#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extract2_i64 0
#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_negsetcond_i64 1
#define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_add2_i64 1
#define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0