target/arm: Create gen_gvec_[us]sra

The functions eliminate duplication of the special cases for
this operation.  They match up with the GVecGen2iFn typedef.

Add out-of-line helpers.  We got away with only having inline
expanders because the neon vector size is only 16 bytes, and
we know that the inline expansion will always succeed.
When we reuse this for SVE, tcg-gvec-op may decide to use an
out-of-line helper due to longer vector lengths.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200513163245.17915-2-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2020-05-13 09:32:30 -07:00 committed by Peter Maydell
parent c888f7e0fd
commit 631e565450
5 changed files with 139 additions and 79 deletions

View file

@ -3874,33 +3874,51 @@ static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
tcg_gen_add_vec(vece, d, d, a);
}
static const TCGOpcode vecop_list_ssra[] = {
INDEX_op_sari_vec, INDEX_op_add_vec, 0
};
void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz)
{
static const TCGOpcode vecop_list[] = {
INDEX_op_sari_vec, INDEX_op_add_vec, 0
};
static const GVecGen2i ops[4] = {
{ .fni8 = gen_ssra8_i64,
.fniv = gen_ssra_vec,
.fno = gen_helper_gvec_ssra_b,
.load_dest = true,
.opt_opc = vecop_list,
.vece = MO_8 },
{ .fni8 = gen_ssra16_i64,
.fniv = gen_ssra_vec,
.fno = gen_helper_gvec_ssra_h,
.load_dest = true,
.opt_opc = vecop_list,
.vece = MO_16 },
{ .fni4 = gen_ssra32_i32,
.fniv = gen_ssra_vec,
.fno = gen_helper_gvec_ssra_s,
.load_dest = true,
.opt_opc = vecop_list,
.vece = MO_32 },
{ .fni8 = gen_ssra64_i64,
.fniv = gen_ssra_vec,
.fno = gen_helper_gvec_ssra_b,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.opt_opc = vecop_list,
.load_dest = true,
.vece = MO_64 },
};
const GVecGen2i ssra_op[4] = {
{ .fni8 = gen_ssra8_i64,
.fniv = gen_ssra_vec,
.load_dest = true,
.opt_opc = vecop_list_ssra,
.vece = MO_8 },
{ .fni8 = gen_ssra16_i64,
.fniv = gen_ssra_vec,
.load_dest = true,
.opt_opc = vecop_list_ssra,
.vece = MO_16 },
{ .fni4 = gen_ssra32_i32,
.fniv = gen_ssra_vec,
.load_dest = true,
.opt_opc = vecop_list_ssra,
.vece = MO_32 },
{ .fni8 = gen_ssra64_i64,
.fniv = gen_ssra_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.opt_opc = vecop_list_ssra,
.load_dest = true,
.vece = MO_64 },
};
/* tszimm encoding produces immediates in the range [1..esize]. */
tcg_debug_assert(shift > 0);
tcg_debug_assert(shift <= (8 << vece));
/*
* Shifts larger than the element size are architecturally valid.
* Signed results in all sign bits.
*/
shift = MIN(shift, (8 << vece) - 1);
tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
}
static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
@ -3932,33 +3950,55 @@ static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
tcg_gen_add_vec(vece, d, d, a);
}
static const TCGOpcode vecop_list_usra[] = {
INDEX_op_shri_vec, INDEX_op_add_vec, 0
};
void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz)
{
static const TCGOpcode vecop_list[] = {
INDEX_op_shri_vec, INDEX_op_add_vec, 0
};
static const GVecGen2i ops[4] = {
{ .fni8 = gen_usra8_i64,
.fniv = gen_usra_vec,
.fno = gen_helper_gvec_usra_b,
.load_dest = true,
.opt_opc = vecop_list,
.vece = MO_8, },
{ .fni8 = gen_usra16_i64,
.fniv = gen_usra_vec,
.fno = gen_helper_gvec_usra_h,
.load_dest = true,
.opt_opc = vecop_list,
.vece = MO_16, },
{ .fni4 = gen_usra32_i32,
.fniv = gen_usra_vec,
.fno = gen_helper_gvec_usra_s,
.load_dest = true,
.opt_opc = vecop_list,
.vece = MO_32, },
{ .fni8 = gen_usra64_i64,
.fniv = gen_usra_vec,
.fno = gen_helper_gvec_usra_d,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.load_dest = true,
.opt_opc = vecop_list,
.vece = MO_64, },
};
const GVecGen2i usra_op[4] = {
{ .fni8 = gen_usra8_i64,
.fniv = gen_usra_vec,
.load_dest = true,
.opt_opc = vecop_list_usra,
.vece = MO_8, },
{ .fni8 = gen_usra16_i64,
.fniv = gen_usra_vec,
.load_dest = true,
.opt_opc = vecop_list_usra,
.vece = MO_16, },
{ .fni4 = gen_usra32_i32,
.fniv = gen_usra_vec,
.load_dest = true,
.opt_opc = vecop_list_usra,
.vece = MO_32, },
{ .fni8 = gen_usra64_i64,
.fniv = gen_usra_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.load_dest = true,
.opt_opc = vecop_list_usra,
.vece = MO_64, },
};
/* tszimm encoding produces immediates in the range [1..esize]. */
tcg_debug_assert(shift > 0);
tcg_debug_assert(shift <= (8 << vece));
/*
* Shifts larger than the element size are architecturally valid.
* Unsigned results in all zeros as input to accumulate: nop.
*/
if (shift < (8 << vece)) {
tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
} else {
/* Nop, but we do need to clear the tail. */
tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
}
}
static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
@ -5220,19 +5260,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case 1: /* VSRA */
/* Right shift comes here negative. */
shift = -shift;
/* Shifts larger than the element size are architecturally
* valid. Unsigned results in all zeros; signed results
* in all sign bits.
*/
if (!u) {
tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
MIN(shift, (8 << size) - 1),
&ssra_op[size]);
} else if (shift >= 8 << size) {
/* rd += 0 */
if (u) {
gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
vec_size, vec_size);
} else {
tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
shift, &usra_op[size]);
gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
vec_size, vec_size);
}
return 0;