mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-07-29 05:13:54 -06:00
target/arm: Implement SVE2 Integer Multiply - Unpredicated
For MUL, we can rely on generic support. For SMULH and UMULH,
create some trivial helpers. For PMUL, back in a21bb78e58
,
we organized helper_gvec_pmul_b in preparation for this use.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210525010358.152808-3-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
2dc10fa2f9
commit
5dad1ba52f
4 changed files with 166 additions and 0 deletions
|
@ -1985,3 +1985,99 @@ void HELPER(simd_tblx)(void *vd, void *vm, void *venv, uint32_t desc)
|
|||
clear_tail(vd, oprsz, simd_maxsz(desc));
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* NxN -> N highpart multiply
|
||||
*
|
||||
* TODO: expose this as a generic vector operation.
|
||||
*/
|
||||
|
||||
void HELPER(gvec_smulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, opr_sz = simd_oprsz(desc);
|
||||
int8_t *d = vd, *n = vn, *m = vm;
|
||||
|
||||
for (i = 0; i < opr_sz; ++i) {
|
||||
d[i] = ((int32_t)n[i] * m[i]) >> 8;
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_smulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, opr_sz = simd_oprsz(desc);
|
||||
int16_t *d = vd, *n = vn, *m = vm;
|
||||
|
||||
for (i = 0; i < opr_sz / 2; ++i) {
|
||||
d[i] = ((int32_t)n[i] * m[i]) >> 16;
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_smulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, opr_sz = simd_oprsz(desc);
|
||||
int32_t *d = vd, *n = vn, *m = vm;
|
||||
|
||||
for (i = 0; i < opr_sz / 4; ++i) {
|
||||
d[i] = ((int64_t)n[i] * m[i]) >> 32;
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_smulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, opr_sz = simd_oprsz(desc);
|
||||
uint64_t *d = vd, *n = vn, *m = vm;
|
||||
uint64_t discard;
|
||||
|
||||
for (i = 0; i < opr_sz / 8; ++i) {
|
||||
muls64(&discard, &d[i], n[i], m[i]);
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_umulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, opr_sz = simd_oprsz(desc);
|
||||
uint8_t *d = vd, *n = vn, *m = vm;
|
||||
|
||||
for (i = 0; i < opr_sz; ++i) {
|
||||
d[i] = ((uint32_t)n[i] * m[i]) >> 8;
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_umulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, opr_sz = simd_oprsz(desc);
|
||||
uint16_t *d = vd, *n = vn, *m = vm;
|
||||
|
||||
for (i = 0; i < opr_sz / 2; ++i) {
|
||||
d[i] = ((uint32_t)n[i] * m[i]) >> 16;
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_umulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, opr_sz = simd_oprsz(desc);
|
||||
uint32_t *d = vd, *n = vn, *m = vm;
|
||||
|
||||
for (i = 0; i < opr_sz / 4; ++i) {
|
||||
d[i] = ((uint64_t)n[i] * m[i]) >> 32;
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_umulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, opr_sz = simd_oprsz(desc);
|
||||
uint64_t *d = vd, *n = vn, *m = vm;
|
||||
uint64_t discard;
|
||||
|
||||
for (i = 0; i < opr_sz / 8; ++i) {
|
||||
mulu64(&discard, &d[i], n[i], m[i]);
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue