tcg: Add gvec expanders for variable shift

The gvec expanders perform a modulo on the shift count.  If the target
requires alternate behaviour, then it cannot use the generic gvec
expanders anyway, and will have to have its own custom code.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2019-04-13 20:42:37 -10:00
parent 37ee55a081
commit 5ee5c14cac
6 changed files with 384 additions and 0 deletions

View file

@ -725,6 +725,150 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
uint8_t sh = *(uint8_t *)(b + i) & 7;
*(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
uint8_t sh = *(uint16_t *)(b + i) & 15;
*(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
uint8_t sh = *(uint32_t *)(b + i) & 31;
*(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
uint8_t sh = *(uint64_t *)(b + i) & 63;
*(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
uint8_t sh = *(uint8_t *)(b + i) & 7;
*(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
uint8_t sh = *(uint16_t *)(b + i) & 15;
*(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
uint8_t sh = *(uint32_t *)(b + i) & 31;
*(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
uint8_t sh = *(uint64_t *)(b + i) & 63;
*(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(vec8)) {
uint8_t sh = *(uint8_t *)(b + i) & 7;
*(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(int16_t)) {
uint8_t sh = *(uint16_t *)(b + i) & 15;
*(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(vec32)) {
uint8_t sh = *(uint32_t *)(b + i) & 31;
*(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(vec64)) {
uint8_t sh = *(uint64_t *)(b + i) & 63;
*(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
/* If vectors are enabled, the compiler fills in -1 for true.
Otherwise, we must take care of this by hand. */
#ifdef CONFIG_VECTOR16