mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-05 08:43:55 -06:00
target-arm: Move Neon VZIP to helper functions
Move the implementation of the Neon VUZP unzip instruction from inline code to helper functions. (At 50+ TCG ops it was well over the recommended limit for coding inline.) The helper implementations also give the correct answers where the inline implementation did not. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
This commit is contained in:
parent
02acedf93d
commit
d68a6f3a6d
3 changed files with 133 additions and 73 deletions
|
@ -1787,3 +1787,95 @@ void HELPER(neon_unzip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
|||
env->vfp.regs[rm] = make_float64(m0);
|
||||
env->vfp.regs[rd] = make_float64(d0);
|
||||
}
|
||||
|
||||
void HELPER(neon_qzip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
||||
{
|
||||
uint64_t zm0 = float64_val(env->vfp.regs[rm]);
|
||||
uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
||||
uint64_t zd0 = float64_val(env->vfp.regs[rd]);
|
||||
uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
||||
uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8)
|
||||
| (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24)
|
||||
| (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40)
|
||||
| (ELEM(zd0, 3, 8) << 48) | (ELEM(zm0, 3, 8) << 56);
|
||||
uint64_t d1 = ELEM(zd0, 4, 8) | (ELEM(zm0, 4, 8) << 8)
|
||||
| (ELEM(zd0, 5, 8) << 16) | (ELEM(zm0, 5, 8) << 24)
|
||||
| (ELEM(zd0, 6, 8) << 32) | (ELEM(zm0, 6, 8) << 40)
|
||||
| (ELEM(zd0, 7, 8) << 48) | (ELEM(zm0, 7, 8) << 56);
|
||||
uint64_t m0 = ELEM(zd1, 0, 8) | (ELEM(zm1, 0, 8) << 8)
|
||||
| (ELEM(zd1, 1, 8) << 16) | (ELEM(zm1, 1, 8) << 24)
|
||||
| (ELEM(zd1, 2, 8) << 32) | (ELEM(zm1, 2, 8) << 40)
|
||||
| (ELEM(zd1, 3, 8) << 48) | (ELEM(zm1, 3, 8) << 56);
|
||||
uint64_t m1 = ELEM(zd1, 4, 8) | (ELEM(zm1, 4, 8) << 8)
|
||||
| (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24)
|
||||
| (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40)
|
||||
| (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56);
|
||||
env->vfp.regs[rm] = make_float64(m0);
|
||||
env->vfp.regs[rm + 1] = make_float64(m1);
|
||||
env->vfp.regs[rd] = make_float64(d0);
|
||||
env->vfp.regs[rd + 1] = make_float64(d1);
|
||||
}
|
||||
|
||||
void HELPER(neon_qzip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
||||
{
|
||||
uint64_t zm0 = float64_val(env->vfp.regs[rm]);
|
||||
uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
||||
uint64_t zd0 = float64_val(env->vfp.regs[rd]);
|
||||
uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
||||
uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16)
|
||||
| (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48);
|
||||
uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16)
|
||||
| (ELEM(zd0, 3, 16) << 32) | (ELEM(zm0, 3, 16) << 48);
|
||||
uint64_t m0 = ELEM(zd1, 0, 16) | (ELEM(zm1, 0, 16) << 16)
|
||||
| (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48);
|
||||
uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16)
|
||||
| (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48);
|
||||
env->vfp.regs[rm] = make_float64(m0);
|
||||
env->vfp.regs[rm + 1] = make_float64(m1);
|
||||
env->vfp.regs[rd] = make_float64(d0);
|
||||
env->vfp.regs[rd + 1] = make_float64(d1);
|
||||
}
|
||||
|
||||
void HELPER(neon_qzip32)(CPUState *env, uint32_t rd, uint32_t rm)
|
||||
{
|
||||
uint64_t zm0 = float64_val(env->vfp.regs[rm]);
|
||||
uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
||||
uint64_t zd0 = float64_val(env->vfp.regs[rd]);
|
||||
uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
||||
uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32);
|
||||
uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32);
|
||||
uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32);
|
||||
uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32);
|
||||
env->vfp.regs[rm] = make_float64(m0);
|
||||
env->vfp.regs[rm + 1] = make_float64(m1);
|
||||
env->vfp.regs[rd] = make_float64(d0);
|
||||
env->vfp.regs[rd + 1] = make_float64(d1);
|
||||
}
|
||||
|
||||
void HELPER(neon_zip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
||||
{
|
||||
uint64_t zm = float64_val(env->vfp.regs[rm]);
|
||||
uint64_t zd = float64_val(env->vfp.regs[rd]);
|
||||
uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8)
|
||||
| (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24)
|
||||
| (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40)
|
||||
| (ELEM(zd, 3, 8) << 48) | (ELEM(zm, 3, 8) << 56);
|
||||
uint64_t m0 = ELEM(zd, 4, 8) | (ELEM(zm, 4, 8) << 8)
|
||||
| (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24)
|
||||
| (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40)
|
||||
| (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56);
|
||||
env->vfp.regs[rm] = make_float64(m0);
|
||||
env->vfp.regs[rd] = make_float64(d0);
|
||||
}
|
||||
|
||||
void HELPER(neon_zip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
||||
{
|
||||
uint64_t zm = float64_val(env->vfp.regs[rm]);
|
||||
uint64_t zd = float64_val(env->vfp.regs[rd]);
|
||||
uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16)
|
||||
| (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48);
|
||||
uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16)
|
||||
| (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48);
|
||||
env->vfp.regs[rm] = make_float64(m0);
|
||||
env->vfp.regs[rd] = make_float64(d0);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue