target/ppc: Moved VMLADDUHM to decodetree and use gvec

This patch moves VMLADDUHM to decodetree a creates a gvec implementation
using mul_vec and add_vec.

rept    loop    master             patch
8       12500   0,01810500         0,00903100 (-50.1%)
25      4000    0,01739400         0,00747700 (-57.0%)
100     1000    0,01843600         0,00901400 (-51.1%)
500     200     0,02574600         0,01971000 (-23.4%)
2500    40      0,05921600         0,07121800 (+20.3%)
8000    12      0,15326700         0,21725200 (+41.7%)

The significant difference in performance when REPT is low and LOOP is
high I think is due to the fact that the new implementation has a higher
translation time, as when using a helper only 5 TCGop are used but with
the patch a total of 10 TCGop are needed (Power lacks a direct mul_vec
equivalent so this instruction is implemented with the help of 5 others,
vmuleu, vmulou, vmrgh, vmrgl and vpkum).

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20221019125040.48028-2-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
This commit is contained in:
Lucas Mateus Castro (alqotel) 2022-10-19 09:50:29 -03:00 committed by Daniel Henrique Barboza
parent b35bf5f2d7
commit dc46167a22
5 changed files with 35 additions and 21 deletions

View file

@ -2523,24 +2523,6 @@ static void glue(gen_, name0##_##name1)(DisasContext *ctx) \
GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16)
static void gen_vmladduhm(DisasContext *ctx)
{
TCGv_ptr ra, rb, rc, rd;
if (unlikely(!ctx->altivec_enabled)) {
gen_exception(ctx, POWERPC_EXCP_VPU);
return;
}
ra = gen_avr_ptr(rA(ctx->opcode));
rb = gen_avr_ptr(rB(ctx->opcode));
rc = gen_avr_ptr(rC(ctx->opcode));
rd = gen_avr_ptr(rD(ctx->opcode));
gen_helper_vmladduhm(rd, ra, rb, rc);
tcg_temp_free_ptr(ra);
tcg_temp_free_ptr(rb);
tcg_temp_free_ptr(rc);
tcg_temp_free_ptr(rd);
}
static bool do_va_helper(DisasContext *ctx, arg_VA *a,
void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
{
@ -2569,6 +2551,36 @@ TRANS_FLAGS2(ALTIVEC_207, VSUBECUQ, do_va_helper, gen_helper_VSUBECUQ)
TRANS_FLAGS(ALTIVEC, VPERM, do_va_helper, gen_helper_VPERM)
TRANS_FLAGS2(ISA300, VPERMR, do_va_helper, gen_helper_VPERMR)
static void gen_vmladduhm_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
TCGv_vec c)
{
tcg_gen_mul_vec(vece, t, a, b);
tcg_gen_add_vec(vece, t, t, c);
}
static bool trans_VMLADDUHM(DisasContext *ctx, arg_VA *a)
{
static const TCGOpcode vecop_list[] = {
INDEX_op_add_vec, INDEX_op_mul_vec, 0
};
static const GVecGen4 op = {
.fno = gen_helper_VMLADDUHM,
.fniv = gen_vmladduhm_vec,
.opt_opc = vecop_list,
.vece = MO_16
};
REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
REQUIRE_VECTOR(ctx);
tcg_gen_gvec_4(avr_full_offset(a->vrt), avr_full_offset(a->vra),
avr_full_offset(a->vrb), avr_full_offset(a->rc),
16, 16, &op);
return true;
}
static bool trans_VSEL(DisasContext *ctx, arg_VA *a)
{
REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);