tcg: Implement gvec support for rotate by vector

No host backend support yet, but the interfaces for rotlv
and rotrv are in place.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v3: Drop the generic expansion from rot to shift; we can do better
    for each backend, and then this code becomes unused.
This commit is contained in:
Richard Henderson 2020-04-19 19:47:59 -07:00
parent b0f7e7444c
commit 5d0ceda902
13 changed files with 256 additions and 1 deletions

View file

@ -908,6 +908,102 @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
uint8_t sh = *(uint8_t *)(b + i) & 7;
*(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
uint8_t sh = *(uint16_t *)(b + i) & 15;
*(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
uint8_t sh = *(uint32_t *)(b + i) & 31;
*(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
uint8_t sh = *(uint64_t *)(b + i) & 63;
*(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
uint8_t sh = *(uint8_t *)(b + i) & 7;
*(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
uint8_t sh = *(uint16_t *)(b + i) & 15;
*(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
uint8_t sh = *(uint32_t *)(b + i) & 31;
*(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
uint8_t sh = *(uint64_t *)(b + i) & 63;
*(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
}
clear_high(d, oprsz, desc);
}
#define DO_CMP1(NAME, TYPE, OP) \
void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
{ \

View file

@ -279,6 +279,16 @@ DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_rotr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)