mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-04 00:03:54 -06:00
target/arm: Implement integer matrix multiply accumulate
This is {S,U,US}MMLA for both AArch64 AdvSIMD and SVE, and V{S,U,US}MMLA.S8 for AArch32 NEON. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20210525010358.152808-91-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
51879c671b
commit
2323c5ffd4
7 changed files with 169 additions and 0 deletions
|
@ -2335,3 +2335,80 @@ void HELPER(gvec_xar_d)(void *vd, void *vn, void *vm, uint32_t desc)
|
|||
}
|
||||
clear_tail(d, opr_sz * 8, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
/*
|
||||
* Integer matrix-multiply accumulate
|
||||
*/
|
||||
|
||||
static uint32_t do_smmla_b(uint32_t sum, void *vn, void *vm)
|
||||
{
|
||||
int8_t *n = vn, *m = vm;
|
||||
|
||||
for (intptr_t k = 0; k < 8; ++k) {
|
||||
sum += n[H1(k)] * m[H1(k)];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
static uint32_t do_ummla_b(uint32_t sum, void *vn, void *vm)
|
||||
{
|
||||
uint8_t *n = vn, *m = vm;
|
||||
|
||||
for (intptr_t k = 0; k < 8; ++k) {
|
||||
sum += n[H1(k)] * m[H1(k)];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
static uint32_t do_usmmla_b(uint32_t sum, void *vn, void *vm)
|
||||
{
|
||||
uint8_t *n = vn;
|
||||
int8_t *m = vm;
|
||||
|
||||
for (intptr_t k = 0; k < 8; ++k) {
|
||||
sum += n[H1(k)] * m[H1(k)];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
static void do_mmla_b(void *vd, void *vn, void *vm, void *va, uint32_t desc,
|
||||
uint32_t (*inner_loop)(uint32_t, void *, void *))
|
||||
{
|
||||
intptr_t seg, opr_sz = simd_oprsz(desc);
|
||||
|
||||
for (seg = 0; seg < opr_sz; seg += 16) {
|
||||
uint32_t *d = vd + seg;
|
||||
uint32_t *a = va + seg;
|
||||
uint32_t sum0, sum1, sum2, sum3;
|
||||
|
||||
/*
|
||||
* Process the entire segment at once, writing back the
|
||||
* results only after we've consumed all of the inputs.
|
||||
*
|
||||
* Key to indicies by column:
|
||||
* i j i j
|
||||
*/
|
||||
sum0 = a[H4(0 + 0)];
|
||||
sum0 = inner_loop(sum0, vn + seg + 0, vm + seg + 0);
|
||||
sum1 = a[H4(0 + 1)];
|
||||
sum1 = inner_loop(sum1, vn + seg + 0, vm + seg + 8);
|
||||
sum2 = a[H4(2 + 0)];
|
||||
sum2 = inner_loop(sum2, vn + seg + 8, vm + seg + 0);
|
||||
sum3 = a[H4(2 + 1)];
|
||||
sum3 = inner_loop(sum3, vn + seg + 8, vm + seg + 8);
|
||||
|
||||
d[H4(0)] = sum0;
|
||||
d[H4(1)] = sum1;
|
||||
d[H4(2)] = sum2;
|
||||
d[H4(3)] = sum3;
|
||||
}
|
||||
clear_tail(vd, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
#define DO_MMLA_B(NAME, INNER) \
|
||||
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
|
||||
{ do_mmla_b(vd, vn, vm, va, desc, INNER); }
|
||||
|
||||
DO_MMLA_B(gvec_smmla_b, do_smmla_b)
|
||||
DO_MMLA_B(gvec_ummla_b, do_ummla_b)
|
||||
DO_MMLA_B(gvec_usmmla_b, do_usmmla_b)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue