target/arm: Fix SME full tile indexing

For the outer product set of insns, which take an entire matrix
tile as output, the argument is not a combined tile+column.
Therefore using get_tile_rowcol was incorrect, as we extracted
the tile number from itself.

The test case relies only on assembler support for SME, since
no release of GCC recognizes -march=armv9-a+sme yet.

Cc: qemu-stable@nongnu.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1620
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20230622151201.1578522-5-richard.henderson@linaro.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: dropped now-unneeded changes to sysregs CFLAGS]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2023-07-04 14:08:48 +01:00 committed by Peter Maydell
parent 270bea47a2
commit 1f51573f79
3 changed files with 107 additions and 7 deletions

View file

@ -95,6 +95,21 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
return addr;
}
/*
* Resolve tile.size[0] to a host pointer.
* Used by e.g. outer product insns where we require the entire tile.
*/
static TCGv_ptr get_tile(DisasContext *s, int esz, int tile)
{
TCGv_ptr addr = tcg_temp_new_ptr();
int offset;
offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, zarray);
tcg_gen_addi_ptr(addr, cpu_env, offset);
return addr;
}
static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
{
if (!dc_isar_feature(aa64_sme, s)) {
@ -260,8 +275,7 @@ static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
return true;
}
/* Sum XZR+zad to find ZAd. */
za = get_tile_rowcol(s, esz, 31, a->zad, false);
za = get_tile(s, esz, a->zad);
zn = vec_full_reg_ptr(s, a->zn);
pn = pred_full_reg_ptr(s, a->pn);
pm = pred_full_reg_ptr(s, a->pm);
@ -286,8 +300,7 @@ static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
return true;
}
/* Sum XZR+zad to find ZAd. */
za = get_tile_rowcol(s, esz, 31, a->zad, false);
za = get_tile(s, esz, a->zad);
zn = vec_full_reg_ptr(s, a->zn);
zm = vec_full_reg_ptr(s, a->zm);
pn = pred_full_reg_ptr(s, a->pn);
@ -308,8 +321,7 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
return true;
}
/* Sum XZR+zad to find ZAd. */
za = get_tile_rowcol(s, esz, 31, a->zad, false);
za = get_tile(s, esz, a->zad);
zn = vec_full_reg_ptr(s, a->zn);
zm = vec_full_reg_ptr(s, a->zm);
pn = pred_full_reg_ptr(s, a->pn);