tcg/optimize: Use fold_masks_zs in fold_qemu_ld

Avoid the use of the OptContext slots.

Be careful not to call fold_masks_zs when the memory operation
is wide enough to require multiple outputs, so split into two
functions: fold_qemu_ld_1reg and fold_qemu_ld_2reg.

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2024-12-08 20:33:30 -06:00
parent 54e26b292b
commit 6813be9b9b

View file

@ -2110,24 +2110,33 @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
return fold_masks_s(ctx, op, s_mask); return fold_masks_s(ctx, op, s_mask);
} }
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op) static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
{ {
const TCGOpDef *def = &tcg_op_defs[op->opc]; const TCGOpDef *def = &tcg_op_defs[op->opc];
MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs]; MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
MemOp mop = get_memop(oi); MemOp mop = get_memop(oi);
int width = 8 * memop_size(mop); int width = 8 * memop_size(mop);
uint64_t z_mask = -1, s_mask = 0;
if (width < 64) { if (width < 64) {
if (mop & MO_SIGN) { if (mop & MO_SIGN) {
ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width); s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
} else { } else {
ctx->z_mask = MAKE_64BIT_MASK(0, width); z_mask = MAKE_64BIT_MASK(0, width);
} }
} }
/* Opcodes that touch guest memory stop the mb optimization. */ /* Opcodes that touch guest memory stop the mb optimization. */
ctx->prev_mb = NULL; ctx->prev_mb = NULL;
return false;
return fold_masks_zs(ctx, op, z_mask, s_mask);
}
static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
{
/* Opcodes that touch guest memory stop the mb optimization. */
ctx->prev_mb = NULL;
return finish_folding(ctx, op);
} }
static bool fold_qemu_st(OptContext *ctx, TCGOp *op) static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
@ -3012,11 +3021,18 @@ void tcg_optimize(TCGContext *s)
break; break;
case INDEX_op_qemu_ld_a32_i32: case INDEX_op_qemu_ld_a32_i32:
case INDEX_op_qemu_ld_a64_i32: case INDEX_op_qemu_ld_a64_i32:
done = fold_qemu_ld_1reg(&ctx, op);
break;
case INDEX_op_qemu_ld_a32_i64: case INDEX_op_qemu_ld_a32_i64:
case INDEX_op_qemu_ld_a64_i64: case INDEX_op_qemu_ld_a64_i64:
if (TCG_TARGET_REG_BITS == 64) {
done = fold_qemu_ld_1reg(&ctx, op);
break;
}
QEMU_FALLTHROUGH;
case INDEX_op_qemu_ld_a32_i128: case INDEX_op_qemu_ld_a32_i128:
case INDEX_op_qemu_ld_a64_i128: case INDEX_op_qemu_ld_a64_i128:
done = fold_qemu_ld(&ctx, op); done = fold_qemu_ld_2reg(&ctx, op);
break; break;
case INDEX_op_qemu_st8_a32_i32: case INDEX_op_qemu_st8_a32_i32:
case INDEX_op_qemu_st8_a64_i32: case INDEX_op_qemu_st8_a64_i32: