target/arm: Implement SVE Memory Contiguous Store Group

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20180627043328.11531-4-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2018-06-29 15:11:03 +01:00 committed by Peter Maydell
parent e2654d7575
commit 1a039c7e58
4 changed files with 343 additions and 0 deletions

View file

@ -3120,3 +3120,214 @@ DO_LDNF1(sds_r)
DO_LDNF1(dd_r)
#undef DO_LDNF1
/*
* Store contiguous data, protected by a governing predicate.
*/
#define DO_ST1(NAME, FN, TYPEE, TYPEM, H) \
void HELPER(NAME)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
intptr_t i, oprsz = simd_oprsz(desc); \
intptr_t ra = GETPC(); \
unsigned rd = simd_data(desc); \
void *vd = &env->vfp.zregs[rd]; \
for (i = 0; i < oprsz; ) { \
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
do { \
if (pg & 1) { \
TYPEM m = *(TYPEE *)(vd + H(i)); \
FN(env, addr, m, ra); \
} \
i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
addr += sizeof(TYPEM); \
} while (i & 15); \
} \
}
#define DO_ST1_D(NAME, FN, TYPEM) \
void HELPER(NAME)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
intptr_t i, oprsz = simd_oprsz(desc) / 8; \
intptr_t ra = GETPC(); \
unsigned rd = simd_data(desc); \
uint64_t *d = &env->vfp.zregs[rd].d[0]; \
uint8_t *pg = vg; \
for (i = 0; i < oprsz; i += 1) { \
if (pg[H1(i)] & 1) { \
FN(env, addr, d[i], ra); \
} \
addr += sizeof(TYPEM); \
} \
}
#define DO_ST2(NAME, FN, TYPEE, TYPEM, H) \
void HELPER(NAME)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
intptr_t i, oprsz = simd_oprsz(desc); \
intptr_t ra = GETPC(); \
unsigned rd = simd_data(desc); \
void *d1 = &env->vfp.zregs[rd]; \
void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \
for (i = 0; i < oprsz; ) { \
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
do { \
if (pg & 1) { \
TYPEM m1 = *(TYPEE *)(d1 + H(i)); \
TYPEM m2 = *(TYPEE *)(d2 + H(i)); \
FN(env, addr, m1, ra); \
FN(env, addr + sizeof(TYPEM), m2, ra); \
} \
i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
addr += 2 * sizeof(TYPEM); \
} while (i & 15); \
} \
}
#define DO_ST3(NAME, FN, TYPEE, TYPEM, H) \
void HELPER(NAME)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
intptr_t i, oprsz = simd_oprsz(desc); \
intptr_t ra = GETPC(); \
unsigned rd = simd_data(desc); \
void *d1 = &env->vfp.zregs[rd]; \
void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \
void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \
for (i = 0; i < oprsz; ) { \
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
do { \
if (pg & 1) { \
TYPEM m1 = *(TYPEE *)(d1 + H(i)); \
TYPEM m2 = *(TYPEE *)(d2 + H(i)); \
TYPEM m3 = *(TYPEE *)(d3 + H(i)); \
FN(env, addr, m1, ra); \
FN(env, addr + sizeof(TYPEM), m2, ra); \
FN(env, addr + 2 * sizeof(TYPEM), m3, ra); \
} \
i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
addr += 3 * sizeof(TYPEM); \
} while (i & 15); \
} \
}
#define DO_ST4(NAME, FN, TYPEE, TYPEM, H) \
void HELPER(NAME)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
intptr_t i, oprsz = simd_oprsz(desc); \
intptr_t ra = GETPC(); \
unsigned rd = simd_data(desc); \
void *d1 = &env->vfp.zregs[rd]; \
void *d2 = &env->vfp.zregs[(rd + 1) & 31]; \
void *d3 = &env->vfp.zregs[(rd + 2) & 31]; \
void *d4 = &env->vfp.zregs[(rd + 3) & 31]; \
for (i = 0; i < oprsz; ) { \
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
do { \
if (pg & 1) { \
TYPEM m1 = *(TYPEE *)(d1 + H(i)); \
TYPEM m2 = *(TYPEE *)(d2 + H(i)); \
TYPEM m3 = *(TYPEE *)(d3 + H(i)); \
TYPEM m4 = *(TYPEE *)(d4 + H(i)); \
FN(env, addr, m1, ra); \
FN(env, addr + sizeof(TYPEM), m2, ra); \
FN(env, addr + 2 * sizeof(TYPEM), m3, ra); \
FN(env, addr + 3 * sizeof(TYPEM), m4, ra); \
} \
i += sizeof(TYPEE), pg >>= sizeof(TYPEE); \
addr += 4 * sizeof(TYPEM); \
} while (i & 15); \
} \
}
DO_ST1(sve_st1bh_r, cpu_stb_data_ra, uint16_t, uint8_t, H1_2)
DO_ST1(sve_st1bs_r, cpu_stb_data_ra, uint32_t, uint8_t, H1_4)
DO_ST1_D(sve_st1bd_r, cpu_stb_data_ra, uint8_t)
DO_ST1(sve_st1hs_r, cpu_stw_data_ra, uint32_t, uint16_t, H1_4)
DO_ST1_D(sve_st1hd_r, cpu_stw_data_ra, uint16_t)
DO_ST1_D(sve_st1sd_r, cpu_stl_data_ra, uint32_t)
DO_ST1(sve_st1bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
DO_ST2(sve_st2bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
DO_ST3(sve_st3bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
DO_ST4(sve_st4bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
DO_ST1(sve_st1hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
DO_ST2(sve_st2hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
DO_ST3(sve_st3hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
DO_ST4(sve_st4hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
DO_ST1(sve_st1ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
DO_ST2(sve_st2ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
DO_ST3(sve_st3ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
DO_ST4(sve_st4ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
DO_ST1_D(sve_st1dd_r, cpu_stq_data_ra, uint64_t)
void HELPER(sve_st2dd_r)(CPUARMState *env, void *vg,
target_ulong addr, uint32_t desc)
{
intptr_t i, oprsz = simd_oprsz(desc) / 8;
intptr_t ra = GETPC();
unsigned rd = simd_data(desc);
uint64_t *d1 = &env->vfp.zregs[rd].d[0];
uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0];
uint8_t *pg = vg;
for (i = 0; i < oprsz; i += 1) {
if (pg[H1(i)] & 1) {
cpu_stq_data_ra(env, addr, d1[i], ra);
cpu_stq_data_ra(env, addr + 8, d2[i], ra);
}
addr += 2 * 8;
}
}
void HELPER(sve_st3dd_r)(CPUARMState *env, void *vg,
target_ulong addr, uint32_t desc)
{
intptr_t i, oprsz = simd_oprsz(desc) / 8;
intptr_t ra = GETPC();
unsigned rd = simd_data(desc);
uint64_t *d1 = &env->vfp.zregs[rd].d[0];
uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0];
uint64_t *d3 = &env->vfp.zregs[(rd + 2) & 31].d[0];
uint8_t *pg = vg;
for (i = 0; i < oprsz; i += 1) {
if (pg[H1(i)] & 1) {
cpu_stq_data_ra(env, addr, d1[i], ra);
cpu_stq_data_ra(env, addr + 8, d2[i], ra);
cpu_stq_data_ra(env, addr + 16, d3[i], ra);
}
addr += 3 * 8;
}
}
void HELPER(sve_st4dd_r)(CPUARMState *env, void *vg,
target_ulong addr, uint32_t desc)
{
intptr_t i, oprsz = simd_oprsz(desc) / 8;
intptr_t ra = GETPC();
unsigned rd = simd_data(desc);
uint64_t *d1 = &env->vfp.zregs[rd].d[0];
uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0];
uint64_t *d3 = &env->vfp.zregs[(rd + 2) & 31].d[0];
uint64_t *d4 = &env->vfp.zregs[(rd + 3) & 31].d[0];
uint8_t *pg = vg;
for (i = 0; i < oprsz; i += 1) {
if (pg[H1(i)] & 1) {
cpu_stq_data_ra(env, addr, d1[i], ra);
cpu_stq_data_ra(env, addr + 8, d2[i], ra);
cpu_stq_data_ra(env, addr + 16, d3[i], ra);
cpu_stq_data_ra(env, addr + 24, d4[i], ra);
}
addr += 4 * 8;
}
}