mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-02 23:33:54 -06:00
target/arm: Implement bfloat widening fma (indexed)
This is BFMLAL{B,T} for both AArch64 AdvSIMD and SVE, and VFMA{B,T}.BF16 for AArch32 NEON. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20210525225817.400336-11-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
5693887f2e
commit
458d0ab683
7 changed files with 82 additions and 1 deletions
|
@ -2528,3 +2528,25 @@ void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va,
|
|||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
|
||||
void *va, void *stat, uint32_t desc)
|
||||
{
|
||||
intptr_t i, j, opr_sz = simd_oprsz(desc);
|
||||
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 1, 3);
|
||||
intptr_t elements = opr_sz / 4;
|
||||
intptr_t eltspersegment = MIN(16 / 4, elements);
|
||||
float32 *d = vd, *a = va;
|
||||
bfloat16 *n = vn, *m = vm;
|
||||
|
||||
for (i = 0; i < elements; i += eltspersegment) {
|
||||
float32 m_idx = m[H2(2 * i + index)] << 16;
|
||||
|
||||
for (j = i; j < i + eltspersegment; j++) {
|
||||
float32 n_j = n[H2(2 * j + sel)] << 16;
|
||||
d[H4(j)] = float32_muladd(n_j, m_idx, a[H4(j)], 0, stat);
|
||||
}
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue