target/arm: Implement bfloat16 matrix multiply accumulate

This is BFMMLA for both AArch64 AdvSIMD and SVE,
and VMMLA.BF16 for AArch32 NEON.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210525225817.400336-9-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2021-05-25 15:58:13 -07:00 committed by Peter Maydell
parent 839144784b
commit 81266a1f58
7 changed files with 81 additions and 3 deletions

View file

@ -12235,6 +12235,13 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
}
feature = dc_isar_feature(aa64_fcma, s);
break;
case 0x1d: /* BFMMLA */
if (size != MO_16 || !is_q) {
unallocated_encoding(s);
return;
}
feature = dc_isar_feature(aa64_bf16, s);
break;
case 0x1f: /* BFDOT */
switch (size) {
case 1:
@ -12328,6 +12335,9 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
}
return;
case 0xd: /* BFMMLA */
gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
return;
case 0xf: /* BFDOT */
switch (size) {
case 1: