mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-02 15:23:53 -06:00
target/arm: Speed up aarch64 TBL/TBX
Always perform one call instead of two for 16-byte operands. Use byte loads/stores directly into the vector register file instead of extractions and deposits to a 64-bit local variable. In order to easily receive pointers into the vector register file, convert the helper to the gvec out-of-line signature. Move the helper into vec_helper.c, where it can make use of H1 and clear_tail. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Tested-by: Alex Bennée <alex.bennee@linaro.org> Message-id: 20210224230532.276878-1-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
4565afbbf0
commit
519183d3fe
4 changed files with 56 additions and 84 deletions
|
@ -179,38 +179,6 @@ float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
|
|||
return float64_mul(a, b, fpst);
|
||||
}
|
||||
|
||||
uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
|
||||
uint32_t rn, uint32_t numregs)
|
||||
{
|
||||
/* Helper function for SIMD TBL and TBX. We have to do the table
|
||||
* lookup part for the 64 bits worth of indices we're passed in.
|
||||
* result is the initial results vector (either zeroes for TBL
|
||||
* or some guest values for TBX), rn the register number where
|
||||
* the table starts, and numregs the number of registers in the table.
|
||||
* We return the results of the lookups.
|
||||
*/
|
||||
int shift;
|
||||
|
||||
for (shift = 0; shift < 64; shift += 8) {
|
||||
int index = extract64(indices, shift, 8);
|
||||
if (index < 16 * numregs) {
|
||||
/* Convert index (a byte offset into the virtual table
|
||||
* which is a series of 128-bit vectors concatenated)
|
||||
* into the correct register element plus a bit offset
|
||||
* into that element, bearing in mind that the table
|
||||
* can wrap around from V31 to V0.
|
||||
*/
|
||||
int elt = (rn * 2 + (index >> 3)) % 64;
|
||||
int bitidx = (index & 7) * 8;
|
||||
uint64_t *q = aa64_vfp_qreg(env, elt >> 1);
|
||||
uint64_t val = extract64(q[elt & 1], bitidx, 8);
|
||||
|
||||
result = deposit64(result, shift, 8, val);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* 64bit/double versions of the neon float compare functions */
|
||||
uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue