target/arm: Implement bfloat16 matrix multiply accumulate

This is BFMMLA for both AArch64 AdvSIMD and SVE, and VMMLA.BF16 for AArch32 NEON. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20210525225817.400336-9-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2025-08-04 16:23:55 -06:00 · 2021-05-25 15:58:13 -07:00 · 2021-05-25 15:58:13 -07:00 · 81266a1f58
commit 81266a1f58
parent 839144784b
7 changed files with 81 additions and 3 deletions
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@ -12235,6 +12235,13 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
        }
        feature = dc_isar_feature(aa64_fcma, s);
        break;
+    case 0x1d: /* BFMMLA */
+        if (size != MO_16 || !is_q) {
+            unallocated_encoding(s);
+            return;
+        }
+        feature = dc_isar_feature(aa64_bf16, s);
+        break;
    case 0x1f: /* BFDOT */
        switch (size) {
        case 1:
@ -12328,6 +12335,9 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
        }
        return;

+    case 0xd: /* BFMMLA */
+        gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
+        return;
    case 0xf: /* BFDOT */
        switch (size) {
        case 1: