target-arm queue:

* Finish conversion of A64 decoder to decodetree
  * Use float_round_to_odd in helper_fcvtx_f64_to_f32
  * Move TLBI insn emulation code out to its own source file
  * docs/system/arm: fix broken links, document undocumented properties
  * MAINTAINERS: correct an email address
 -----BEGIN PGP SIGNATURE-----
 
 iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmdcYCcZHHBldGVyLm1h
 eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3usmD/9x6yTRxIK2mi0CjY0Bii89
 hL1Z3n2bxRDu+WoMcsQKXQM5RcixILJyMsnArOxI3D1bVEkAskuaVcXL0uS7Inq6
 EkEq8Z5lfRikAP698U2tzaGhKRiE4NT/cNgOoFLddkjqvZ1tq3sSbPcCudSWkP+u
 Z3c5etP8llGNhokNhKmIifE/auxiFdPh8JRXHAF3KhNu4VOX7gNWnt4YZNhnV2XN
 TsD+IxU9LCfI8pIFK95zBUIQT/361lIoiY/r7RpN21HeEuS+4wXT/Vfii6rEgsg5
 pNkPoxX/Tc+67l4wXzgoV/p2I1KZbJZ/s7Ta5wLmopidwi2EP9ETVcfTzKIF+PIJ
 08nozInD+fxlyGBezTRDmuIKiC4t1lVW8TP8znyp3TcSHFs5Q/iQY0uPACzoUVuE
 chMIt4dD6NlMxOanWANbsVlF+ZPc8MVBMz3zHVbvkOiogoRQYjuDqQIQAhLbQolg
 uC/ql79WnUe0IX1j9rcW7+DVNq/bObLCN89uSjigHO2bo5FKKr4pnOG/SaAyER5L
 T/OHy1ACcxGNVIiUwKEDxdQ5iwcl+GEJfMfrpJHlTzxeZggL2lE0mcpXaHGLTzXV
 K7fSOBI15T+aRqN0/29Rtsw8ayMV5/RmnanesPmC2VN86ZCE0OKGOcLEdaI+q3iT
 CMxIsCUCpMM4WjbdJ69ZgQ==
 =wQ1l
 -----END PGP SIGNATURE-----

Merge tag 'pull-target-arm-20241213' of https://git.linaro.org/people/pmaydell/qemu-arm into staging

target-arm queue:
 * Finish conversion of A64 decoder to decodetree
 * Use float_round_to_odd in helper_fcvtx_f64_to_f32
 * Move TLBI insn emulation code out to its own source file
 * docs/system/arm: fix broken links, document undocumented properties
 * MAINTAINERS: correct an email address

# -----BEGIN PGP SIGNATURE-----
#
# iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmdcYCcZHHBldGVyLm1h
# eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3usmD/9x6yTRxIK2mi0CjY0Bii89
# hL1Z3n2bxRDu+WoMcsQKXQM5RcixILJyMsnArOxI3D1bVEkAskuaVcXL0uS7Inq6
# EkEq8Z5lfRikAP698U2tzaGhKRiE4NT/cNgOoFLddkjqvZ1tq3sSbPcCudSWkP+u
# Z3c5etP8llGNhokNhKmIifE/auxiFdPh8JRXHAF3KhNu4VOX7gNWnt4YZNhnV2XN
# TsD+IxU9LCfI8pIFK95zBUIQT/361lIoiY/r7RpN21HeEuS+4wXT/Vfii6rEgsg5
# pNkPoxX/Tc+67l4wXzgoV/p2I1KZbJZ/s7Ta5wLmopidwi2EP9ETVcfTzKIF+PIJ
# 08nozInD+fxlyGBezTRDmuIKiC4t1lVW8TP8znyp3TcSHFs5Q/iQY0uPACzoUVuE
# chMIt4dD6NlMxOanWANbsVlF+ZPc8MVBMz3zHVbvkOiogoRQYjuDqQIQAhLbQolg
# uC/ql79WnUe0IX1j9rcW7+DVNq/bObLCN89uSjigHO2bo5FKKr4pnOG/SaAyER5L
# T/OHy1ACcxGNVIiUwKEDxdQ5iwcl+GEJfMfrpJHlTzxeZggL2lE0mcpXaHGLTzXV
# K7fSOBI15T+aRqN0/29Rtsw8ayMV5/RmnanesPmC2VN86ZCE0OKGOcLEdaI+q3iT
# CMxIsCUCpMM4WjbdJ69ZgQ==
# =wQ1l
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 13 Dec 2024 11:26:15 EST
# gpg:                using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg:                issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full]
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>" [full]
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full]
# gpg:                 aka "Peter Maydell <peter@archaic.org.uk>" [unknown]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* tag 'pull-target-arm-20241213' of https://git.linaro.org/people/pmaydell/qemu-arm: (85 commits)
  target/arm: Simplify condition for tlbi_el2_cp_reginfo[]
  target/arm: Move RME TLB insns to tlb-insns.c
  target/arm: Move small helper functions to tlb-insns.c
  target/arm: Move the TLBI OS insns to tlb-insns.c.
  target/arm: Move TLBI range insns
  target/arm: Move AArch64 EL3 TLBI insns
  target/arm: Move the AArch64 EL2 TLBI insns
  target/arm: Move AArch64 TLBI insns from v8_cp_reginfo[]
  target/arm: Move TLBI insns for AArch32 EL2 to tlbi_insn_helper.c
  target/arm: Move some TLBI insns to their own source file
  MAINTAINERS: correct my email address
  docs/system/arm/virt: document missing properties
  docs/system/arm/xlnx-versal-virt: document ospi-flash property
  docs/system/arm/fby35: document execute-in-place property
  docs/system/arm/orangepi: update links
  target/arm: Use float_round_to_odd in helper_fcvtx_f64_to_f32
  target/arm: Convert FCVTL to decodetree
  target/arm: Convert URECPE and URSQRTE to decodetree
  target/arm: Introduce gen_gvec_urecpe, gen_gvec_ursqrte
  target/arm: Convert FRECPE, FRECPX, FRSQRTE to decodetree
  ...

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2024-12-14 08:42:15 -05:00
commit 94b57605c1
22 changed files with 4203 additions and 5594 deletions

View file

@ -226,7 +226,7 @@ F: target/avr/
F: tests/functional/test_avr_mega2560.py
Hexagon TCG CPUs
M: Brian Cain <bcain@oss.qualcomm.com>
M: Brian Cain <brian.cain@oss.qualcomm.com>
S: Supported
F: target/hexagon/
X: target/hexagon/idef-parser/

View file

@ -45,3 +45,8 @@ process starts.
$ screen /dev/tty0 # In a separate TMUX pane, terminal window, etc.
$ screen /dev/tty1
$ (qemu) c # Start the boot process once screen is setup.
This machine model supports emulation of the boot from the CE0 flash device by
setting option ``execute-in-place``. When using this option, the CPU fetches
instructions to execute by reading CE0 and not from a preloaded ROM
initialized at machine init time. As a result, execution will be slower.

View file

@ -119,7 +119,7 @@ Orange Pi PC images
Note that the mainline kernel does not have a root filesystem. You may provide it
with an official Orange Pi PC image from the official website:
http://www.orangepi.org/downloadresources/
http://www.orangepi.org/html/serviceAndSupport/index.html
Another possibility is to run an Armbian image for Orange Pi PC which
can be downloaded from:
@ -213,7 +213,7 @@ including the Orange Pi PC. NetBSD 9.0 is known to work best for the Orange Pi P
board and provides a fully working system with serial console, networking and storage.
For the Orange Pi PC machine, get the 'evbarm-earmv7hf' based image from:
https://cdn.netbsd.org/pub/NetBSD/NetBSD-9.0/evbarm-earmv7hf/binary/gzimg/armv7.img.gz
https://archive.netbsd.org/pub/NetBSD-archive/NetBSD-9.0/evbarm-earmv7hf/binary/gzimg/armv7.img.gz
The image requires manually installing U-Boot in the image. Build U-Boot with
the orangepi_pc_defconfig configuration as described in the previous section.

View file

@ -167,10 +167,18 @@ iommu
``smmuv3``
Create an SMMUv3
default-bus-bypass-iommu
Set ``on``/``off`` to enable/disable `bypass_iommu
<https://gitlab.com/qemu-project/qemu/-/blob/master/docs/bypass-iommu.txt>`_
for default root bus.
ras
Set ``on``/``off`` to enable/disable reporting host memory errors to a guest
using ACPI and guest external abort exceptions. The default is off.
acpi
Set ``on``/``off``/``auto`` to enable/disable ACPI.
dtb-randomness
Set ``on``/``off`` to pass random seeds via the guest DTB
rng-seed and kaslr-seed nodes (in both "/chosen" and
@ -184,6 +192,14 @@ dtb-randomness
dtb-kaslr-seed
A deprecated synonym for dtb-randomness.
x-oem-id
Set string (up to 6 bytes) to override the default value of field OEMID in ACPI
table header.
x-oem-table-id
Set string (up to 8 bytes) to override the default value of field OEM Table ID
in ACPI table header.
Linux guest kernel configuration
""""""""""""""""""""""""""""""""

View file

@ -178,6 +178,9 @@ Run the following at the U-Boot prompt:
fdt set /chosen/dom0 reg <0x00000000 0x40000000 0x0 0x03100000>
booti 30000000 - 20000000
It's possible to change the OSPI flash model emulated by using the machine model
option ``ospi-flash``.
BBRAM File Backend
""""""""""""""""""
BBRAM can have an optional file backend, which must be a seekable

File diff suppressed because it is too large Load diff

View file

@ -133,9 +133,9 @@ DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
DEF_HELPER_2(vfp_sqrth, f16, f16, env)
DEF_HELPER_2(vfp_sqrts, f32, f32, env)
DEF_HELPER_2(vfp_sqrtd, f64, f64, env)
DEF_HELPER_2(vfp_sqrth, f16, f16, ptr)
DEF_HELPER_2(vfp_sqrts, f32, f32, ptr)
DEF_HELPER_2(vfp_sqrtd, f64, f64, ptr)
DEF_HELPER_3(vfp_cmph, void, f16, f16, env)
DEF_HELPER_3(vfp_cmps, void, f32, f32, env)
DEF_HELPER_3(vfp_cmpd, void, f64, f64, env)
@ -178,8 +178,10 @@ DEF_HELPER_3(vfp_touhs_round_to_zero, i32, f32, i32, ptr)
DEF_HELPER_3(vfp_touls_round_to_zero, i32, f32, i32, ptr)
DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_tosqd_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_touqd_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_touhh, i32, f16, i32, ptr)
DEF_HELPER_3(vfp_toshh, i32, f16, i32, ptr)
DEF_HELPER_3(vfp_toulh, i32, f16, i32, ptr)
@ -363,8 +365,8 @@ DEF_HELPER_1(neon_clz_u16, i32, i32)
DEF_HELPER_1(neon_cls_s8, i32, i32)
DEF_HELPER_1(neon_cls_s16, i32, i32)
DEF_HELPER_1(neon_cls_s32, i32, i32)
DEF_HELPER_1(neon_cnt_u8, i32, i32)
DEF_HELPER_FLAGS_1(neon_rbit_u8, TCG_CALL_NO_RWG_SE, i32, i32)
DEF_HELPER_FLAGS_3(gvec_cnt_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32)
DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32)
@ -395,12 +397,8 @@ DEF_HELPER_1(neon_widen_s8, i64, i32)
DEF_HELPER_1(neon_widen_u16, i64, i32)
DEF_HELPER_1(neon_widen_s16, i64, i32)
DEF_HELPER_2(neon_addl_u16, i64, i64, i64)
DEF_HELPER_2(neon_addl_u32, i64, i64, i64)
DEF_HELPER_2(neon_paddl_u16, i64, i64, i64)
DEF_HELPER_2(neon_paddl_u32, i64, i64, i64)
DEF_HELPER_2(neon_subl_u16, i64, i64, i64)
DEF_HELPER_2(neon_subl_u32, i64, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64)
DEF_HELPER_3(neon_addl_saturate_s64, i64, env, i64, i64)
DEF_HELPER_2(neon_abdl_u16, i64, i32, i32)
@ -654,14 +652,21 @@ DEF_HELPER_FLAGS_4(gvec_touizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_sf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_uf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_fs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_fu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rz_fs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rz_fu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_uh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_hs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rz_hs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rz_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_sd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_ud, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rz_ds, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rz_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ud, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ss, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rm_us, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@ -683,18 +688,23 @@ DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcgt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcgt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcge0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcge0_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fceq0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fceq0_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcle0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcle0_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fclt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
@ -1111,6 +1121,9 @@ DEF_HELPER_FLAGS_4(gvec_uminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_uminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_urecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_ursqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
#ifdef TARGET_AARCH64
#include "tcg/helper-a64.h"
#include "tcg/helper-sve.h"

View file

@ -1727,6 +1727,9 @@ static inline uint64_t pauth_ptr_mask(ARMVAParameters param)
/* Add the cpreg definitions for debug related system registers */
void define_debug_regs(ARMCPU *cpu);
/* Add the cpreg definitions for TLBI instructions */
void define_tlb_insn_regs(ARMCPU *cpu);
/* Effective value of MDCR_EL2 */
static inline uint64_t arm_mdcr_el2_eff(CPUARMState *env)
{
@ -1817,4 +1820,10 @@ uint64_t gt_get_countervalue(CPUARMState *env);
* and CNTVCT_EL0 (this will be either 0 or the value of CNTVOFF_EL2).
*/
uint64_t gt_virt_cnt_offset(CPUARMState *env);
/*
* Return mask of ARMMMUIdxBit values corresponding to an "invalidate
* all EL1" scope; this covers stage 1 and stage 2.
*/
int alle1_tlbmask(CPUARMState *env);
#endif

View file

@ -25,3 +25,8 @@ void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome,
void assert_hflags_rebuild_correctly(CPUARMState *env)
{
}
/* TLBI insns are only used by TCG, so we don't need to do anything for KVM */
void define_tlb_insn_regs(ARMCPU *cpu)
{
}

View file

@ -21,13 +21,18 @@
%rd 0:5
%esz_sd 22:1 !function=plus_2
%esz_hs 22:1 !function=plus_1
%esz_hsd 22:2 !function=xor_2
%hl 11:1 21:1
%hlm 11:1 20:2
&r rn
&rrr rd rn rm
&ri rd imm
&rr rd rn
&rr_sf rd rn sf
&rri_sf rd rn imm sf
&rrr_sf rd rn rm sf
&i imm
&rr_e rd rn esz
&rri_e rd rn imm esz
@ -41,10 +46,15 @@
&qrrrr_e q rd rn rm ra esz
@rr_h ........ ... ..... ...... rn:5 rd:5 &rr_e esz=1
@rr_s ........ ... ..... ...... rn:5 rd:5 &rr_e esz=2
@rr_d ........ ... ..... ...... rn:5 rd:5 &rr_e esz=3
@rr_e ........ esz:2 . ..... ...... rn:5 rd:5 &rr_e
@rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd
@rr_hsd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_hsd
@rrr_b ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=0
@rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1
@rrr_s ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=2
@rrr_d ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=3
@rrr_sd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_sd
@rrr_hsd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_hsd
@ -62,7 +72,12 @@
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
@qrr_b . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=0
@qrr_h . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=1
@qrr_s . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=2
@qrr_bh . q:1 ...... . esz:1 ...... ...... rn:5 rd:5 &qrr_e
@qrr_hs . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=%esz_hs
@qrr_sd . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=%esz_sd
@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e
@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
@ -161,7 +176,7 @@ UBFM . 10 100110 . ...... ...... ..... ..... @bitfield_32
EXTR 1 00 100111 1 0 rm:5 imm:6 rn:5 rd:5 &extract sf=1
EXTR 0 00 100111 0 0 rm:5 0 imm:5 rn:5 rd:5 &extract sf=0
# Branches
### Branches
%imm26 0:s26 !function=times_4
@branch . ..... .......................... &i imm=%imm26
@ -291,7 +306,7 @@ HLT 1101 0100 010 ................ 000 00 @i16
# DCPS2 1101 0100 101 ................ 000 10 @i16
# DCPS3 1101 0100 101 ................ 000 11 @i16
# Loads and stores
### Loads and stores
&stxr rn rt rt2 rs sz lasr
&stlr rn rt sz lasr
@ -649,6 +664,138 @@ CPYP 00 011 1 01000 ..... .... 01 ..... ..... @cpy
CPYM 00 011 1 01010 ..... .... 01 ..... ..... @cpy
CPYE 00 011 1 01100 ..... .... 01 ..... ..... @cpy
### Data Processing (register)
# Data Processing (2-source)
@rrr . .......... rm:5 ...... rn:5 rd:5 &rrr
@rrr_sf sf:1 .......... rm:5 ...... rn:5 rd:5 &rrr_sf
UDIV . 00 11010110 ..... 00001 0 ..... ..... @rrr_sf
SDIV . 00 11010110 ..... 00001 1 ..... ..... @rrr_sf
LSLV . 00 11010110 ..... 00100 0 ..... ..... @rrr_sf
LSRV . 00 11010110 ..... 00100 1 ..... ..... @rrr_sf
ASRV . 00 11010110 ..... 00101 0 ..... ..... @rrr_sf
RORV . 00 11010110 ..... 00101 1 ..... ..... @rrr_sf
CRC32 0 00 11010110 ..... 0100 00 ..... ..... @rrr_b
CRC32 0 00 11010110 ..... 0100 01 ..... ..... @rrr_h
CRC32 0 00 11010110 ..... 0100 10 ..... ..... @rrr_s
CRC32 1 00 11010110 ..... 0100 11 ..... ..... @rrr_d
CRC32C 0 00 11010110 ..... 0101 00 ..... ..... @rrr_b
CRC32C 0 00 11010110 ..... 0101 01 ..... ..... @rrr_h
CRC32C 0 00 11010110 ..... 0101 10 ..... ..... @rrr_s
CRC32C 1 00 11010110 ..... 0101 11 ..... ..... @rrr_d
SUBP 1 00 11010110 ..... 000000 ..... ..... @rrr
SUBPS 1 01 11010110 ..... 000000 ..... ..... @rrr
IRG 1 00 11010110 ..... 000100 ..... ..... @rrr
GMI 1 00 11010110 ..... 000101 ..... ..... @rrr
PACGA 1 00 11010110 ..... 001100 ..... ..... @rrr
# Data Processing (1-source)
@rr . .......... ..... ...... rn:5 rd:5 &rr
@rr_sf sf:1 .......... ..... ...... rn:5 rd:5 &rr_sf
RBIT . 10 11010110 00000 000000 ..... ..... @rr_sf
REV16 . 10 11010110 00000 000001 ..... ..... @rr_sf
REV32 . 10 11010110 00000 000010 ..... ..... @rr_sf
REV64 1 10 11010110 00000 000011 ..... ..... @rr
CLZ . 10 11010110 00000 000100 ..... ..... @rr_sf
CLS . 10 11010110 00000 000101 ..... ..... @rr_sf
&pacaut rd rn z
@pacaut . .. ........ ..... .. z:1 ... rn:5 rd:5 &pacaut
PACIA 1 10 11010110 00001 00.000 ..... ..... @pacaut
PACIB 1 10 11010110 00001 00.001 ..... ..... @pacaut
PACDA 1 10 11010110 00001 00.010 ..... ..... @pacaut
PACDB 1 10 11010110 00001 00.011 ..... ..... @pacaut
AUTIA 1 10 11010110 00001 00.100 ..... ..... @pacaut
AUTIB 1 10 11010110 00001 00.101 ..... ..... @pacaut
AUTDA 1 10 11010110 00001 00.110 ..... ..... @pacaut
AUTDB 1 10 11010110 00001 00.111 ..... ..... @pacaut
XPACI 1 10 11010110 00001 010000 11111 rd:5
XPACD 1 10 11010110 00001 010001 11111 rd:5
# Logical (shifted reg)
&logic_shift rd rn rm sf sa st n
@logic_shift sf:1 .. ..... st:2 n:1 rm:5 sa:6 rn:5 rd:5 &logic_shift
AND_r . 00 01010 .. . ..... ...... ..... ..... @logic_shift
ORR_r . 01 01010 .. . ..... ...... ..... ..... @logic_shift
EOR_r . 10 01010 .. . ..... ...... ..... ..... @logic_shift
ANDS_r . 11 01010 .. . ..... ...... ..... ..... @logic_shift
# Add/subtract (shifted reg)
&addsub_shift rd rn rm sf sa st
@addsub_shift sf:1 .. ..... st:2 . rm:5 sa:6 rn:5 rd:5 &addsub_shift
ADD_r . 00 01011 .. 0 ..... ...... ..... ..... @addsub_shift
SUB_r . 10 01011 .. 0 ..... ...... ..... ..... @addsub_shift
ADDS_r . 01 01011 .. 0 ..... ...... ..... ..... @addsub_shift
SUBS_r . 11 01011 .. 0 ..... ...... ..... ..... @addsub_shift
# Add/subtract (extended reg)
&addsub_ext rd rn rm sf sa st
@addsub_ext sf:1 .. ........ rm:5 st:3 sa:3 rn:5 rd:5 &addsub_ext
ADD_ext . 00 01011001 ..... ... ... ..... ..... @addsub_ext
SUB_ext . 10 01011001 ..... ... ... ..... ..... @addsub_ext
ADDS_ext . 01 01011001 ..... ... ... ..... ..... @addsub_ext
SUBS_ext . 11 01011001 ..... ... ... ..... ..... @addsub_ext
# Add/subtract (carry)
ADC . 00 11010000 ..... 000000 ..... ..... @rrr_sf
ADCS . 01 11010000 ..... 000000 ..... ..... @rrr_sf
SBC . 10 11010000 ..... 000000 ..... ..... @rrr_sf
SBCS . 11 11010000 ..... 000000 ..... ..... @rrr_sf
# Rotate right into flags
RMIF 1 01 11010000 imm:6 00001 rn:5 0 mask:4
# Evaluate into flags
SETF8 0 01 11010000 00000 000010 rn:5 01101
SETF16 0 01 11010000 00000 010010 rn:5 01101
# Conditional compare
CCMP sf:1 op:1 1 11010010 y:5 cond:4 imm:1 0 rn:5 0 nzcv:4
# Conditional select
CSEL sf:1 else_inv:1 011010100 rm:5 cond:4 0 else_inc:1 rn:5 rd:5
# Data Processing (3-source)
&rrrr rd rn rm ra
@rrrr . .. ........ rm:5 . ra:5 rn:5 rd:5 &rrrr
MADD_w 0 00 11011000 ..... 0 ..... ..... ..... @rrrr
MSUB_w 0 00 11011000 ..... 1 ..... ..... ..... @rrrr
MADD_x 1 00 11011000 ..... 0 ..... ..... ..... @rrrr
MSUB_x 1 00 11011000 ..... 1 ..... ..... ..... @rrrr
SMADDL 1 00 11011001 ..... 0 ..... ..... ..... @rrrr
SMSUBL 1 00 11011001 ..... 1 ..... ..... ..... @rrrr
UMADDL 1 00 11011101 ..... 0 ..... ..... ..... @rrrr
UMSUBL 1 00 11011101 ..... 1 ..... ..... ..... @rrrr
SMULH 1 00 11011010 ..... 0 11111 ..... ..... @rrr
UMULH 1 00 11011110 ..... 0 11111 ..... ..... @rrr
### Cryptographic AES
AESE 01001110 00 10100 00100 10 ..... ..... @r2r_q1e0
@ -1183,10 +1330,103 @@ FMAXV_s 0110 1110 00 11000 01111 10 ..... ..... @rr_q1e2
FMINV_h 0.00 1110 10 11000 01111 10 ..... ..... @qrr_h
FMINV_s 0110 1110 10 11000 01111 10 ..... ..... @rr_q1e2
# Conversion between floating-point and fixed-point (general register)
&fcvt rd rn esz sf shift
%fcvt_shift32 10:5 !function=rsub_32
%fcvt_shift64 10:6 !function=rsub_64
@fcvt32 0 ....... .. ...... 1..... rn:5 rd:5 \
&fcvt sf=0 esz=%esz_hsd shift=%fcvt_shift32
@fcvt64 1 ....... .. ...... ...... rn:5 rd:5 \
&fcvt sf=1 esz=%esz_hsd shift=%fcvt_shift64
SCVTF_g . 0011110 .. 000010 ...... ..... ..... @fcvt32
SCVTF_g . 0011110 .. 000010 ...... ..... ..... @fcvt64
UCVTF_g . 0011110 .. 000011 ...... ..... ..... @fcvt32
UCVTF_g . 0011110 .. 000011 ...... ..... ..... @fcvt64
FCVTZS_g . 0011110 .. 011000 ...... ..... ..... @fcvt32
FCVTZS_g . 0011110 .. 011000 ...... ..... ..... @fcvt64
FCVTZU_g . 0011110 .. 011001 ...... ..... ..... @fcvt32
FCVTZU_g . 0011110 .. 011001 ...... ..... ..... @fcvt64
# Conversion between floating-point and integer (general register)
@icvt sf:1 ....... .. ...... ...... rn:5 rd:5 \
&fcvt esz=%esz_hsd shift=0
SCVTF_g . 0011110 .. 100010 000000 ..... ..... @icvt
UCVTF_g . 0011110 .. 100011 000000 ..... ..... @icvt
FCVTNS_g . 0011110 .. 100000 000000 ..... ..... @icvt
FCVTNU_g . 0011110 .. 100001 000000 ..... ..... @icvt
FCVTPS_g . 0011110 .. 101000 000000 ..... ..... @icvt
FCVTPU_g . 0011110 .. 101001 000000 ..... ..... @icvt
FCVTMS_g . 0011110 .. 110000 000000 ..... ..... @icvt
FCVTMU_g . 0011110 .. 110001 000000 ..... ..... @icvt
FCVTZS_g . 0011110 .. 111000 000000 ..... ..... @icvt
FCVTZU_g . 0011110 .. 111001 000000 ..... ..... @icvt
FCVTAS_g . 0011110 .. 100100 000000 ..... ..... @icvt
FCVTAU_g . 0011110 .. 100101 000000 ..... ..... @icvt
FJCVTZS 0 0011110 01 111110 000000 ..... ..... @rr
FMOV_ws 0 0011110 00 100110 000000 ..... ..... @rr
FMOV_sw 0 0011110 00 100111 000000 ..... ..... @rr
FMOV_xd 1 0011110 01 100110 000000 ..... ..... @rr
FMOV_dx 1 0011110 01 100111 000000 ..... ..... @rr
# Move to/from upper half of 128-bit
FMOV_xu 1 0011110 10 101110 000000 ..... ..... @rr
FMOV_ux 1 0011110 10 101111 000000 ..... ..... @rr
# Half-precision allows both sf=0 and sf=1 with identical results
FMOV_xh - 0011110 11 100110 000000 ..... ..... @rr
FMOV_hx - 0011110 11 100111 000000 ..... ..... @rr
# Floating-point data processing (1 source)
FMOV_s 00011110 .. 1 000000 10000 ..... ..... @rr_hsd
FABS_s 00011110 .. 1 000001 10000 ..... ..... @rr_hsd
FNEG_s 00011110 .. 1 000010 10000 ..... ..... @rr_hsd
FSQRT_s 00011110 .. 1 000011 10000 ..... ..... @rr_hsd
FRINTN_s 00011110 .. 1 001000 10000 ..... ..... @rr_hsd
FRINTP_s 00011110 .. 1 001001 10000 ..... ..... @rr_hsd
FRINTM_s 00011110 .. 1 001010 10000 ..... ..... @rr_hsd
FRINTZ_s 00011110 .. 1 001011 10000 ..... ..... @rr_hsd
FRINTA_s 00011110 .. 1 001100 10000 ..... ..... @rr_hsd
FRINTX_s 00011110 .. 1 001110 10000 ..... ..... @rr_hsd
FRINTI_s 00011110 .. 1 001111 10000 ..... ..... @rr_hsd
BFCVT_s 00011110 01 1 000110 10000 ..... ..... @rr_s
FRINT32Z_s 00011110 0. 1 010000 10000 ..... ..... @rr_sd
FRINT32X_s 00011110 0. 1 010001 10000 ..... ..... @rr_sd
FRINT64Z_s 00011110 0. 1 010010 10000 ..... ..... @rr_sd
FRINT64X_s 00011110 0. 1 010011 10000 ..... ..... @rr_sd
FCVT_s_ds 00011110 00 1 000101 10000 ..... ..... @rr
FCVT_s_hs 00011110 00 1 000111 10000 ..... ..... @rr
FCVT_s_sd 00011110 01 1 000100 10000 ..... ..... @rr
FCVT_s_hd 00011110 01 1 000111 10000 ..... ..... @rr
FCVT_s_sh 00011110 11 1 000100 10000 ..... ..... @rr
FCVT_s_dh 00011110 11 1 000101 10000 ..... ..... @rr
# Floating-point Immediate
FMOVI_s 0001 1110 .. 1 imm:8 100 00000 rd:5 esz=%esz_hsd
# Floating-point Compare
FCMP 00011110 .. 1 rm:5 001000 rn:5 e:1 z:1 000 esz=%esz_hsd
# Floating-point Conditional Compare
FCCMP 00011110 .. 1 rm:5 cond:4 01 rn:5 e:1 nzcv:4 esz=%esz_hsd
# Advanced SIMD Modified Immediate / Shift by Immediate
%abcdefgh 16:3 5:5
@ -1393,3 +1633,261 @@ UQRSHRN_si 0111 11110 .... ... 10011 1 ..... ..... @shri_s
SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_b
SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_h
SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_s
# Advanced SIMD scalar two-register miscellaneous
SQABS_s 0101 1110 ..1 00000 01111 0 ..... ..... @rr_e
SQNEG_s 0111 1110 ..1 00000 01111 0 ..... ..... @rr_e
ABS_s 0101 1110 111 00000 10111 0 ..... ..... @rr
NEG_s 0111 1110 111 00000 10111 0 ..... ..... @rr
CMGT0_s 0101 1110 111 00000 10001 0 ..... ..... @rr
CMGE0_s 0111 1110 111 00000 10001 0 ..... ..... @rr
CMEQ0_s 0101 1110 111 00000 10011 0 ..... ..... @rr
CMLE0_s 0111 1110 111 00000 10011 0 ..... ..... @rr
CMLT0_s 0101 1110 111 00000 10101 0 ..... ..... @rr
SQXTUN_s 0111 1110 ..1 00001 00101 0 ..... ..... @rr_e
SQXTN_s 0101 1110 ..1 00001 01001 0 ..... ..... @rr_e
UQXTN_s 0111 1110 ..1 00001 01001 0 ..... ..... @rr_e
FCVTXN_s 0111 1110 011 00001 01101 0 ..... ..... @rr_s
FCMGT0_s 0101 1110 111 11000 11001 0 ..... ..... @rr_h
FCMGT0_s 0101 1110 1.1 00000 11001 0 ..... ..... @rr_sd
FCMGE0_s 0111 1110 111 11000 11001 0 ..... ..... @rr_h
FCMGE0_s 0111 1110 1.1 00000 11001 0 ..... ..... @rr_sd
FCMEQ0_s 0101 1110 111 11000 11011 0 ..... ..... @rr_h
FCMEQ0_s 0101 1110 1.1 00000 11011 0 ..... ..... @rr_sd
FCMLE0_s 0111 1110 111 11000 11011 0 ..... ..... @rr_h
FCMLE0_s 0111 1110 1.1 00000 11011 0 ..... ..... @rr_sd
FCMLT0_s 0101 1110 111 11000 11101 0 ..... ..... @rr_h
FCMLT0_s 0101 1110 1.1 00000 11101 0 ..... ..... @rr_sd
FRECPE_s 0101 1110 111 11001 11011 0 ..... ..... @rr_h
FRECPE_s 0101 1110 1.1 00001 11011 0 ..... ..... @rr_sd
FRECPX_s 0101 1110 111 11001 11111 0 ..... ..... @rr_h
FRECPX_s 0101 1110 1.1 00001 11111 0 ..... ..... @rr_sd
FRSQRTE_s 0111 1110 111 11001 11011 0 ..... ..... @rr_h
FRSQRTE_s 0111 1110 1.1 00001 11011 0 ..... ..... @rr_sd
@icvt_h . ....... .. ...... ...... rn:5 rd:5 \
&fcvt sf=0 esz=1 shift=0
@icvt_sd . ....... .. ...... ...... rn:5 rd:5 \
&fcvt sf=0 esz=%esz_sd shift=0
SCVTF_f 0101 1110 011 11001 11011 0 ..... ..... @icvt_h
SCVTF_f 0101 1110 0.1 00001 11011 0 ..... ..... @icvt_sd
UCVTF_f 0111 1110 011 11001 11011 0 ..... ..... @icvt_h
UCVTF_f 0111 1110 0.1 00001 11011 0 ..... ..... @icvt_sd
FCVTNS_f 0101 1110 011 11001 10101 0 ..... ..... @icvt_h
FCVTNS_f 0101 1110 0.1 00001 10101 0 ..... ..... @icvt_sd
FCVTNU_f 0111 1110 011 11001 10101 0 ..... ..... @icvt_h
FCVTNU_f 0111 1110 0.1 00001 10101 0 ..... ..... @icvt_sd
FCVTPS_f 0101 1110 111 11001 10101 0 ..... ..... @icvt_h
FCVTPS_f 0101 1110 1.1 00001 10101 0 ..... ..... @icvt_sd
FCVTPU_f 0111 1110 111 11001 10101 0 ..... ..... @icvt_h
FCVTPU_f 0111 1110 1.1 00001 10101 0 ..... ..... @icvt_sd
FCVTMS_f 0101 1110 011 11001 10111 0 ..... ..... @icvt_h
FCVTMS_f 0101 1110 0.1 00001 10111 0 ..... ..... @icvt_sd
FCVTMU_f 0111 1110 011 11001 10111 0 ..... ..... @icvt_h
FCVTMU_f 0111 1110 0.1 00001 10111 0 ..... ..... @icvt_sd
FCVTZS_f 0101 1110 111 11001 10111 0 ..... ..... @icvt_h
FCVTZS_f 0101 1110 1.1 00001 10111 0 ..... ..... @icvt_sd
FCVTZU_f 0111 1110 111 11001 10111 0 ..... ..... @icvt_h
FCVTZU_f 0111 1110 1.1 00001 10111 0 ..... ..... @icvt_sd
FCVTAS_f 0101 1110 011 11001 11001 0 ..... ..... @icvt_h
FCVTAS_f 0101 1110 0.1 00001 11001 0 ..... ..... @icvt_sd
FCVTAU_f 0111 1110 011 11001 11001 0 ..... ..... @icvt_h
FCVTAU_f 0111 1110 0.1 00001 11001 0 ..... ..... @icvt_sd
%fcvt_f_sh_h 16:4 !function=rsub_16
%fcvt_f_sh_s 16:5 !function=rsub_32
%fcvt_f_sh_d 16:6 !function=rsub_64
@fcvt_fixed_h .... .... . 001 .... ...... rn:5 rd:5 \
&fcvt sf=0 esz=1 shift=%fcvt_f_sh_h
@fcvt_fixed_s .... .... . 01 ..... ...... rn:5 rd:5 \
&fcvt sf=0 esz=2 shift=%fcvt_f_sh_s
@fcvt_fixed_d .... .... . 1 ...... ...... rn:5 rd:5 \
&fcvt sf=0 esz=3 shift=%fcvt_f_sh_d
SCVTF_f 0101 1111 0 ....... 111001 ..... ..... @fcvt_fixed_h
SCVTF_f 0101 1111 0 ....... 111001 ..... ..... @fcvt_fixed_s
SCVTF_f 0101 1111 0 ....... 111001 ..... ..... @fcvt_fixed_d
UCVTF_f 0111 1111 0 ....... 111001 ..... ..... @fcvt_fixed_h
UCVTF_f 0111 1111 0 ....... 111001 ..... ..... @fcvt_fixed_s
UCVTF_f 0111 1111 0 ....... 111001 ..... ..... @fcvt_fixed_d
FCVTZS_f 0101 1111 0 ....... 111111 ..... ..... @fcvt_fixed_h
FCVTZS_f 0101 1111 0 ....... 111111 ..... ..... @fcvt_fixed_s
FCVTZS_f 0101 1111 0 ....... 111111 ..... ..... @fcvt_fixed_d
FCVTZU_f 0111 1111 0 ....... 111111 ..... ..... @fcvt_fixed_h
FCVTZU_f 0111 1111 0 ....... 111111 ..... ..... @fcvt_fixed_s
FCVTZU_f 0111 1111 0 ....... 111111 ..... ..... @fcvt_fixed_d
# Advanced SIMD two-register miscellaneous
SQABS_v 0.00 1110 ..1 00000 01111 0 ..... ..... @qrr_e
SQNEG_v 0.10 1110 ..1 00000 01111 0 ..... ..... @qrr_e
ABS_v 0.00 1110 ..1 00000 10111 0 ..... ..... @qrr_e
NEG_v 0.10 1110 ..1 00000 10111 0 ..... ..... @qrr_e
CLS_v 0.00 1110 ..1 00000 01001 0 ..... ..... @qrr_e
CLZ_v 0.10 1110 ..1 00000 01001 0 ..... ..... @qrr_e
CNT_v 0.00 1110 001 00000 01011 0 ..... ..... @qrr_b
NOT_v 0.10 1110 001 00000 01011 0 ..... ..... @qrr_b
RBIT_v 0.10 1110 011 00000 01011 0 ..... ..... @qrr_b
CMGT0_v 0.00 1110 ..1 00000 10001 0 ..... ..... @qrr_e
CMGE0_v 0.10 1110 ..1 00000 10001 0 ..... ..... @qrr_e
CMEQ0_v 0.00 1110 ..1 00000 10011 0 ..... ..... @qrr_e
CMLE0_v 0.10 1110 ..1 00000 10011 0 ..... ..... @qrr_e
CMLT0_v 0.00 1110 ..1 00000 10101 0 ..... ..... @qrr_e
REV16_v 0.00 1110 001 00000 00011 0 ..... ..... @qrr_b
REV32_v 0.10 1110 0.1 00000 00001 0 ..... ..... @qrr_bh
REV64_v 0.00 1110 ..1 00000 00001 0 ..... ..... @qrr_e
SADDLP_v 0.00 1110 ..1 00000 00101 0 ..... ..... @qrr_e
UADDLP_v 0.10 1110 ..1 00000 00101 0 ..... ..... @qrr_e
SADALP_v 0.00 1110 ..1 00000 01101 0 ..... ..... @qrr_e
UADALP_v 0.10 1110 ..1 00000 01101 0 ..... ..... @qrr_e
XTN 0.00 1110 ..1 00001 00101 0 ..... ..... @qrr_e
SQXTUN_v 0.10 1110 ..1 00001 00101 0 ..... ..... @qrr_e
SQXTN_v 0.00 1110 ..1 00001 01001 0 ..... ..... @qrr_e
UQXTN_v 0.10 1110 ..1 00001 01001 0 ..... ..... @qrr_e
FCVTN_v 0.00 1110 0.1 00001 01101 0 ..... ..... @qrr_hs
FCVTXN_v 0.10 1110 011 00001 01101 0 ..... ..... @qrr_s
BFCVTN_v 0.00 1110 101 00001 01101 0 ..... ..... @qrr_h
SHLL_v 0.10 1110 ..1 00001 00111 0 ..... ..... @qrr_e
FABS_v 0.00 1110 111 11000 11111 0 ..... ..... @qrr_h
FABS_v 0.00 1110 1.1 00000 11111 0 ..... ..... @qrr_sd
FNEG_v 0.10 1110 111 11000 11111 0 ..... ..... @qrr_h
FNEG_v 0.10 1110 1.1 00000 11111 0 ..... ..... @qrr_sd
FSQRT_v 0.10 1110 111 11001 11111 0 ..... ..... @qrr_h
FSQRT_v 0.10 1110 1.1 00001 11111 0 ..... ..... @qrr_sd
FRINTN_v 0.00 1110 011 11001 10001 0 ..... ..... @qrr_h
FRINTN_v 0.00 1110 0.1 00001 10001 0 ..... ..... @qrr_sd
FRINTM_v 0.00 1110 011 11001 10011 0 ..... ..... @qrr_h
FRINTM_v 0.00 1110 0.1 00001 10011 0 ..... ..... @qrr_sd
FRINTP_v 0.00 1110 111 11001 10001 0 ..... ..... @qrr_h
FRINTP_v 0.00 1110 1.1 00001 10001 0 ..... ..... @qrr_sd
FRINTZ_v 0.00 1110 111 11001 10011 0 ..... ..... @qrr_h
FRINTZ_v 0.00 1110 1.1 00001 10011 0 ..... ..... @qrr_sd
FRINTA_v 0.10 1110 011 11001 10001 0 ..... ..... @qrr_h
FRINTA_v 0.10 1110 0.1 00001 10001 0 ..... ..... @qrr_sd
FRINTX_v 0.10 1110 011 11001 10011 0 ..... ..... @qrr_h
FRINTX_v 0.10 1110 0.1 00001 10011 0 ..... ..... @qrr_sd
FRINTI_v 0.10 1110 111 11001 10011 0 ..... ..... @qrr_h
FRINTI_v 0.10 1110 1.1 00001 10011 0 ..... ..... @qrr_sd
FRINT32Z_v 0.00 1110 0.1 00001 11101 0 ..... ..... @qrr_sd
FRINT32X_v 0.10 1110 0.1 00001 11101 0 ..... ..... @qrr_sd
FRINT64Z_v 0.00 1110 0.1 00001 11111 0 ..... ..... @qrr_sd
FRINT64X_v 0.10 1110 0.1 00001 11111 0 ..... ..... @qrr_sd
SCVTF_vi 0.00 1110 011 11001 11011 0 ..... ..... @qrr_h
SCVTF_vi 0.00 1110 0.1 00001 11011 0 ..... ..... @qrr_sd
UCVTF_vi 0.10 1110 011 11001 11011 0 ..... ..... @qrr_h
UCVTF_vi 0.10 1110 0.1 00001 11011 0 ..... ..... @qrr_sd
FCVTNS_vi 0.00 1110 011 11001 10101 0 ..... ..... @qrr_h
FCVTNS_vi 0.00 1110 0.1 00001 10101 0 ..... ..... @qrr_sd
FCVTNU_vi 0.10 1110 011 11001 10101 0 ..... ..... @qrr_h
FCVTNU_vi 0.10 1110 0.1 00001 10101 0 ..... ..... @qrr_sd
FCVTPS_vi 0.00 1110 111 11001 10101 0 ..... ..... @qrr_h
FCVTPS_vi 0.00 1110 1.1 00001 10101 0 ..... ..... @qrr_sd
FCVTPU_vi 0.10 1110 111 11001 10101 0 ..... ..... @qrr_h
FCVTPU_vi 0.10 1110 1.1 00001 10101 0 ..... ..... @qrr_sd
FCVTMS_vi 0.00 1110 011 11001 10111 0 ..... ..... @qrr_h
FCVTMS_vi 0.00 1110 0.1 00001 10111 0 ..... ..... @qrr_sd
FCVTMU_vi 0.10 1110 011 11001 10111 0 ..... ..... @qrr_h
FCVTMU_vi 0.10 1110 0.1 00001 10111 0 ..... ..... @qrr_sd
FCVTZS_vi 0.00 1110 111 11001 10111 0 ..... ..... @qrr_h
FCVTZS_vi 0.00 1110 1.1 00001 10111 0 ..... ..... @qrr_sd
FCVTZU_vi 0.10 1110 111 11001 10111 0 ..... ..... @qrr_h
FCVTZU_vi 0.10 1110 1.1 00001 10111 0 ..... ..... @qrr_sd
FCVTAS_vi 0.00 1110 011 11001 11001 0 ..... ..... @qrr_h
FCVTAS_vi 0.00 1110 0.1 00001 11001 0 ..... ..... @qrr_sd
FCVTAU_vi 0.10 1110 011 11001 11001 0 ..... ..... @qrr_h
FCVTAU_vi 0.10 1110 0.1 00001 11001 0 ..... ..... @qrr_sd
FCMGT0_v 0.00 1110 111 11000 11001 0 ..... ..... @qrr_h
FCMGT0_v 0.00 1110 1.1 00000 11001 0 ..... ..... @qrr_sd
FCMGE0_v 0.10 1110 111 11000 11001 0 ..... ..... @qrr_h
FCMGE0_v 0.10 1110 1.1 00000 11001 0 ..... ..... @qrr_sd
FCMEQ0_v 0.00 1110 111 11000 11011 0 ..... ..... @qrr_h
FCMEQ0_v 0.00 1110 1.1 00000 11011 0 ..... ..... @qrr_sd
FCMLE0_v 0.10 1110 111 11000 11011 0 ..... ..... @qrr_h
FCMLE0_v 0.10 1110 1.1 00000 11011 0 ..... ..... @qrr_sd
FCMLT0_v 0.00 1110 111 11000 11101 0 ..... ..... @qrr_h
FCMLT0_v 0.00 1110 1.1 00000 11101 0 ..... ..... @qrr_sd
FRECPE_v 0.00 1110 111 11001 11011 0 ..... ..... @qrr_h
FRECPE_v 0.00 1110 1.1 00001 11011 0 ..... ..... @qrr_sd
FRSQRTE_v 0.10 1110 111 11001 11011 0 ..... ..... @qrr_h
FRSQRTE_v 0.10 1110 1.1 00001 11011 0 ..... ..... @qrr_sd
URECPE_v 0.00 1110 101 00001 11001 0 ..... ..... @qrr_s
URSQRTE_v 0.10 1110 101 00001 11001 0 ..... ..... @qrr_s
FCVTL_v 0.00 1110 0.1 00001 01111 0 ..... ..... @qrr_sd
&fcvt_q rd rn esz q shift
@fcvtq_h . q:1 . ...... 001 .... ...... rn:5 rd:5 \
&fcvt_q esz=1 shift=%fcvt_f_sh_h
@fcvtq_s . q:1 . ...... 01 ..... ...... rn:5 rd:5 \
&fcvt_q esz=2 shift=%fcvt_f_sh_s
@fcvtq_d . q:1 . ...... 1 ...... ...... rn:5 rd:5 \
&fcvt_q esz=3 shift=%fcvt_f_sh_d
SCVTF_vf 0.00 11110 ....... 111001 ..... ..... @fcvtq_h
SCVTF_vf 0.00 11110 ....... 111001 ..... ..... @fcvtq_s
SCVTF_vf 0.00 11110 ....... 111001 ..... ..... @fcvtq_d
UCVTF_vf 0.10 11110 ....... 111001 ..... ..... @fcvtq_h
UCVTF_vf 0.10 11110 ....... 111001 ..... ..... @fcvtq_s
UCVTF_vf 0.10 11110 ....... 111001 ..... ..... @fcvtq_d
FCVTZS_vf 0.00 11110 ....... 111111 ..... ..... @fcvtq_h
FCVTZS_vf 0.00 11110 ....... 111111 ..... ..... @fcvtq_s
FCVTZS_vf 0.00 11110 ....... 111111 ..... ..... @fcvtq_d
FCVTZU_vf 0.10 11110 ....... 111111 ..... ..... @fcvtq_h
FCVTZU_vf 0.10 11110 ....... 111111 ..... ..... @fcvtq_s
FCVTZU_vf 0.10 11110 ....... 111111 ..... ..... @fcvtq_d

View file

@ -2358,3 +2358,372 @@ void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
assert(vece <= MO_32);
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
}
void gen_gvec_cls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const GVecGen2 g[] = {
{ .fni4 = gen_helper_neon_cls_s8,
.vece = MO_8 },
{ .fni4 = gen_helper_neon_cls_s16,
.vece = MO_16 },
{ .fni4 = tcg_gen_clrsb_i32,
.vece = MO_32 },
};
assert(vece <= MO_32);
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
}
static void gen_clz32_i32(TCGv_i32 d, TCGv_i32 n)
{
tcg_gen_clzi_i32(d, n, 32);
}
void gen_gvec_clz(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const GVecGen2 g[] = {
{ .fni4 = gen_helper_neon_clz_u8,
.vece = MO_8 },
{ .fni4 = gen_helper_neon_clz_u16,
.vece = MO_16 },
{ .fni4 = gen_clz32_i32,
.vece = MO_32 },
};
assert(vece <= MO_32);
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
}
void gen_gvec_cnt(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
assert(vece == MO_8);
tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
gen_helper_gvec_cnt_b);
}
void gen_gvec_rbit(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
assert(vece == MO_8);
tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
gen_helper_gvec_rbit_b);
}
void gen_gvec_rev16(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
assert(vece == MO_8);
tcg_gen_gvec_rotli(MO_16, rd_ofs, rn_ofs, 8, opr_sz, max_sz);
}
static void gen_bswap32_i64(TCGv_i64 d, TCGv_i64 n)
{
tcg_gen_bswap64_i64(d, n);
tcg_gen_rotli_i64(d, d, 32);
}
void gen_gvec_rev32(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const GVecGen2 g = {
.fni8 = gen_bswap32_i64,
.fni4 = tcg_gen_bswap32_i32,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.vece = MO_32
};
switch (vece) {
case MO_16:
tcg_gen_gvec_rotli(MO_32, rd_ofs, rn_ofs, 16, opr_sz, max_sz);
break;
case MO_8:
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g);
break;
default:
g_assert_not_reached();
}
}
void gen_gvec_rev64(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const GVecGen2 g[] = {
{ .fni8 = tcg_gen_bswap64_i64,
.vece = MO_64 },
{ .fni8 = tcg_gen_hswap_i64,
.vece = MO_64 },
};
switch (vece) {
case MO_32:
tcg_gen_gvec_rotli(MO_64, rd_ofs, rn_ofs, 32, opr_sz, max_sz);
break;
case MO_8:
case MO_16:
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
break;
default:
g_assert_not_reached();
}
}
static void gen_saddlp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
int half = 4 << vece;
TCGv_vec t = tcg_temp_new_vec_matching(d);
tcg_gen_shli_vec(vece, t, n, half);
tcg_gen_sari_vec(vece, d, n, half);
tcg_gen_sari_vec(vece, t, t, half);
tcg_gen_add_vec(vece, d, d, t);
}
static void gen_saddlp_s_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_ext32s_i64(t, n);
tcg_gen_sari_i64(d, n, 32);
tcg_gen_add_i64(d, d, t);
}
void gen_gvec_saddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const TCGOpcode vecop_list[] = {
INDEX_op_sari_vec, INDEX_op_shli_vec, INDEX_op_add_vec, 0
};
static const GVecGen2 g[] = {
{ .fniv = gen_saddlp_vec,
.fni8 = gen_helper_neon_addlp_s8,
.opt_opc = vecop_list,
.vece = MO_16 },
{ .fniv = gen_saddlp_vec,
.fni8 = gen_helper_neon_addlp_s16,
.opt_opc = vecop_list,
.vece = MO_32 },
{ .fniv = gen_saddlp_vec,
.fni8 = gen_saddlp_s_i64,
.opt_opc = vecop_list,
.vece = MO_64 },
};
assert(vece <= MO_32);
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
}
static void gen_sadalp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
gen_saddlp_vec(vece, t, n);
tcg_gen_add_vec(vece, d, d, t);
}
static void gen_sadalp_b_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
gen_helper_neon_addlp_s8(t, n);
tcg_gen_vec_add16_i64(d, d, t);
}
static void gen_sadalp_h_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
gen_helper_neon_addlp_s16(t, n);
tcg_gen_vec_add32_i64(d, d, t);
}
static void gen_sadalp_s_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
gen_saddlp_s_i64(t, n);
tcg_gen_add_i64(d, d, t);
}
void gen_gvec_sadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const TCGOpcode vecop_list[] = {
INDEX_op_sari_vec, INDEX_op_shli_vec, INDEX_op_add_vec, 0
};
static const GVecGen2 g[] = {
{ .fniv = gen_sadalp_vec,
.fni8 = gen_sadalp_b_i64,
.opt_opc = vecop_list,
.load_dest = true,
.vece = MO_16 },
{ .fniv = gen_sadalp_vec,
.fni8 = gen_sadalp_h_i64,
.opt_opc = vecop_list,
.load_dest = true,
.vece = MO_32 },
{ .fniv = gen_sadalp_vec,
.fni8 = gen_sadalp_s_i64,
.opt_opc = vecop_list,
.load_dest = true,
.vece = MO_64 },
};
assert(vece <= MO_32);
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
}
static void gen_uaddlp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
int half = 4 << vece;
TCGv_vec t = tcg_temp_new_vec_matching(d);
TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, half));
tcg_gen_shri_vec(vece, t, n, half);
tcg_gen_and_vec(vece, d, n, m);
tcg_gen_add_vec(vece, d, d, t);
}
static void gen_uaddlp_b_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0xff));
tcg_gen_shri_i64(t, n, 8);
tcg_gen_and_i64(d, n, m);
tcg_gen_and_i64(t, t, m);
/* No carry between widened unsigned elements. */
tcg_gen_add_i64(d, d, t);
}
static void gen_uaddlp_h_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
TCGv_i64 m = tcg_constant_i64(dup_const(MO_32, 0xffff));
tcg_gen_shri_i64(t, n, 16);
tcg_gen_and_i64(d, n, m);
tcg_gen_and_i64(t, t, m);
/* No carry between widened unsigned elements. */
tcg_gen_add_i64(d, d, t);
}
static void gen_uaddlp_s_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_ext32u_i64(t, n);
tcg_gen_shri_i64(d, n, 32);
tcg_gen_add_i64(d, d, t);
}
void gen_gvec_uaddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const TCGOpcode vecop_list[] = {
INDEX_op_shri_vec, INDEX_op_add_vec, 0
};
static const GVecGen2 g[] = {
{ .fniv = gen_uaddlp_vec,
.fni8 = gen_uaddlp_b_i64,
.opt_opc = vecop_list,
.vece = MO_16 },
{ .fniv = gen_uaddlp_vec,
.fni8 = gen_uaddlp_h_i64,
.opt_opc = vecop_list,
.vece = MO_32 },
{ .fniv = gen_uaddlp_vec,
.fni8 = gen_uaddlp_s_i64,
.opt_opc = vecop_list,
.vece = MO_64 },
};
assert(vece <= MO_32);
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
}
static void gen_uadalp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
gen_uaddlp_vec(vece, t, n);
tcg_gen_add_vec(vece, d, d, t);
}
static void gen_uadalp_b_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
gen_uaddlp_b_i64(t, n);
tcg_gen_vec_add16_i64(d, d, t);
}
static void gen_uadalp_h_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
gen_uaddlp_h_i64(t, n);
tcg_gen_vec_add32_i64(d, d, t);
}
static void gen_uadalp_s_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
gen_uaddlp_s_i64(t, n);
tcg_gen_add_i64(d, d, t);
}
void gen_gvec_uadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const TCGOpcode vecop_list[] = {
INDEX_op_shri_vec, INDEX_op_add_vec, 0
};
static const GVecGen2 g[] = {
{ .fniv = gen_uadalp_vec,
.fni8 = gen_uadalp_b_i64,
.load_dest = true,
.opt_opc = vecop_list,
.vece = MO_16 },
{ .fniv = gen_uadalp_vec,
.fni8 = gen_uadalp_h_i64,
.load_dest = true,
.opt_opc = vecop_list,
.vece = MO_32 },
{ .fniv = gen_uadalp_vec,
.fni8 = gen_uadalp_s_i64,
.load_dest = true,
.opt_opc = vecop_list,
.vece = MO_64 },
};
assert(vece <= MO_32);
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
}
void gen_gvec_fabs(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz)
{
uint64_t s_bit = 1ull << ((8 << vece) - 1);
tcg_gen_gvec_andi(vece, dofs, aofs, s_bit - 1, oprsz, maxsz);
}
void gen_gvec_fneg(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz)
{
uint64_t s_bit = 1ull << ((8 << vece) - 1);
tcg_gen_gvec_xori(vece, dofs, aofs, s_bit, oprsz, maxsz);
}
void gen_gvec_urecpe(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
assert(vece == MO_32);
tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
gen_helper_gvec_urecpe_s);
}
void gen_gvec_ursqrte(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
assert(vece == MO_32);
tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
gen_helper_gvec_ursqrte_s);
}

View file

@ -306,67 +306,6 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
}
/* Pairwise long add: add pairs of adjacent elements into
* double-width elements in the result (eg _s8 is an 8x8->16 op)
*/
uint64_t HELPER(neon_addlp_s8)(uint64_t a)
{
uint64_t nsignmask = 0x0080008000800080ULL;
uint64_t wsignmask = 0x8000800080008000ULL;
uint64_t elementmask = 0x00ff00ff00ff00ffULL;
uint64_t tmp1, tmp2;
uint64_t res, signres;
/* Extract odd elements, sign extend each to a 16 bit field */
tmp1 = a & elementmask;
tmp1 ^= nsignmask;
tmp1 |= wsignmask;
tmp1 = (tmp1 - nsignmask) ^ wsignmask;
/* Ditto for the even elements */
tmp2 = (a >> 8) & elementmask;
tmp2 ^= nsignmask;
tmp2 |= wsignmask;
tmp2 = (tmp2 - nsignmask) ^ wsignmask;
/* calculate the result by summing bits 0..14, 16..22, etc,
* and then adjusting the sign bits 15, 23, etc manually.
* This ensures the addition can't overflow the 16 bit field.
*/
signres = (tmp1 ^ tmp2) & wsignmask;
res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
res ^= signres;
return res;
}
uint64_t HELPER(neon_addlp_u8)(uint64_t a)
{
uint64_t tmp;
tmp = a & 0x00ff00ff00ff00ffULL;
tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
return tmp;
}
uint64_t HELPER(neon_addlp_s16)(uint64_t a)
{
int32_t reslo, reshi;
reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
return (uint32_t)reslo | (((uint64_t)reshi) << 32);
}
uint64_t HELPER(neon_addlp_u16)(uint64_t a)
{
uint64_t tmp;
tmp = a & 0x0000ffff0000ffffULL;
tmp += (a >> 16) & 0x0000ffff0000ffffULL;
return tmp;
}
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
{
@ -469,23 +408,13 @@ float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
{
/* Von Neumann rounding is implemented by using round-to-zero
* and then setting the LSB of the result if Inexact was raised.
*/
float32 r;
float_status *fpst = &env->vfp.fp_status;
float_status tstat = *fpst;
int exflags;
int old = get_float_rounding_mode(fpst);
set_float_rounding_mode(float_round_to_zero, &tstat);
set_float_exception_flags(0, &tstat);
r = float64_to_float32(a, &tstat);
exflags = get_float_exception_flags(&tstat);
if (exflags & float_flag_inexact) {
r = make_float32(float32_val(r) | 1);
}
exflags |= get_float_exception_flags(fpst);
set_float_exception_flags(exflags, fpst);
set_float_rounding_mode(float_round_to_odd, fpst);
r = float64_to_float32(a, fpst);
set_float_rounding_mode(old, fpst);
return r;
}
@ -679,38 +608,6 @@ uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status)
return ret;
}
/*
* Half-precision floating point conversion functions
*
* There are a multitude of conversion functions with various
* different rounding modes. This is dealt with by the calling code
* setting the mode appropriately before calling the helper.
*/
uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp)
{
float_status *fpst = fpstp;
/* Invalid if we are passed a NaN */
if (float16_is_any_nan(a)) {
float_raise(float_flag_invalid, fpst);
return 0;
}
return float16_to_int16(a, fpst);
}
uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp)
{
float_status *fpst = fpstp;
/* Invalid if we are passed a NaN */
if (float16_is_any_nan(a)) {
float_raise(float_flag_invalid, fpst);
return 0;
}
return float16_to_uint16(a, fpst);
}
static int el_from_spsr(uint32_t spsr)
{
/* Return the exception level that this SPSR is requesting a return to,
@ -915,17 +812,6 @@ illegal_return:
"resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc);
}
/*
* Square Root and Reciprocal square root
*/
uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
{
float_status *s = fpstp;
return float16_sqrt(a, s);
}
void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
{
uintptr_t ra = GETPC();

View file

@ -41,10 +41,6 @@ DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
@ -78,9 +74,6 @@ DEF_HELPER_3(advsimd_mulx2h, i32, i32, i32, ptr)
DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, ptr)
DEF_HELPER_2(advsimd_rinth_exact, f16, f16, ptr)
DEF_HELPER_2(advsimd_rinth, f16, f16, ptr)
DEF_HELPER_2(advsimd_f16tosinth, i32, f16, ptr)
DEF_HELPER_2(advsimd_f16touinth, i32, f16, ptr)
DEF_HELPER_2(sqrt_f16, f16, f16, ptr)
DEF_HELPER_2(exception_return, void, env, i64)
DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64)

View file

@ -39,6 +39,7 @@ arm_ss.add(files(
'op_helper.c',
'tlb_helper.c',
'vec_helper.c',
'tlb-insns.c',
))
arm_ss.add(when: 'TARGET_AARCH64', if_true: files(

View file

@ -525,27 +525,6 @@ uint32_t HELPER(neon_cls_s32)(uint32_t x)
return count - 1;
}
/* Bit count. */
uint32_t HELPER(neon_cnt_u8)(uint32_t x)
{
x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f);
return x;
}
/* Reverse bits in each 8 bit word */
uint32_t HELPER(neon_rbit_u8)(uint32_t x)
{
x = ((x & 0xf0f0f0f0) >> 4)
| ((x & 0x0f0f0f0f) << 4);
x = ((x & 0x88888888) >> 3)
| ((x & 0x44444444) >> 1)
| ((x & 0x22222222) << 1)
| ((x & 0x11111111) << 3);
return x;
}
#define NEON_QDMULH16(dest, src1, src2, round) do { \
uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \
if ((tmp ^ (tmp << 1)) & SIGNBIT) { \
@ -847,62 +826,47 @@ uint64_t HELPER(neon_widen_s16)(uint32_t x)
return ((uint32_t)(int16_t)x) | (high << 32);
}
uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b)
/* Pairwise long add: add pairs of adjacent elements into
* double-width elements in the result (eg _s8 is an 8x8->16 op)
*/
uint64_t HELPER(neon_addlp_s8)(uint64_t a)
{
uint64_t mask;
mask = (a ^ b) & 0x8000800080008000ull;
a &= ~0x8000800080008000ull;
b &= ~0x8000800080008000ull;
return (a + b) ^ mask;
uint64_t nsignmask = 0x0080008000800080ULL;
uint64_t wsignmask = 0x8000800080008000ULL;
uint64_t elementmask = 0x00ff00ff00ff00ffULL;
uint64_t tmp1, tmp2;
uint64_t res, signres;
/* Extract odd elements, sign extend each to a 16 bit field */
tmp1 = a & elementmask;
tmp1 ^= nsignmask;
tmp1 |= wsignmask;
tmp1 = (tmp1 - nsignmask) ^ wsignmask;
/* Ditto for the even elements */
tmp2 = (a >> 8) & elementmask;
tmp2 ^= nsignmask;
tmp2 |= wsignmask;
tmp2 = (tmp2 - nsignmask) ^ wsignmask;
/* calculate the result by summing bits 0..14, 16..22, etc,
* and then adjusting the sign bits 15, 23, etc manually.
* This ensures the addition can't overflow the 16 bit field.
*/
signres = (tmp1 ^ tmp2) & wsignmask;
res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
res ^= signres;
return res;
}
uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b)
uint64_t HELPER(neon_addlp_s16)(uint64_t a)
{
uint64_t mask;
mask = (a ^ b) & 0x8000000080000000ull;
a &= ~0x8000000080000000ull;
b &= ~0x8000000080000000ull;
return (a + b) ^ mask;
}
int32_t reslo, reshi;
uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b)
{
uint64_t tmp;
uint64_t tmp2;
reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
tmp = a & 0x0000ffff0000ffffull;
tmp += (a >> 16) & 0x0000ffff0000ffffull;
tmp2 = b & 0xffff0000ffff0000ull;
tmp2 += (b << 16) & 0xffff0000ffff0000ull;
return ( tmp & 0xffff)
| ((tmp >> 16) & 0xffff0000ull)
| ((tmp2 << 16) & 0xffff00000000ull)
| ( tmp2 & 0xffff000000000000ull);
}
uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b)
{
uint32_t low = a + (a >> 32);
uint32_t high = b + (b >> 32);
return low + ((uint64_t)high << 32);
}
uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b)
{
uint64_t mask;
mask = (a ^ ~b) & 0x8000800080008000ull;
a |= 0x8000800080008000ull;
b &= ~0x8000800080008000ull;
return (a - b) ^ mask;
}
uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b)
{
uint64_t mask;
mask = (a ^ ~b) & 0x8000000080000000ull;
a |= 0x8000000080000000ull;
b &= ~0x8000000080000000ull;
return (a - b) ^ mask;
return (uint32_t)reslo | (((uint64_t)reshi) << 32);
}
uint64_t HELPER(neon_addl_saturate_s32)(CPUARMState *env, uint64_t a, uint64_t b)

1266
target/arm/tcg/tlb-insns.c Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1409,13 +1409,13 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_rz_fs)
DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_rz_fu)
DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_rz_hs)
DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_rz_hu)
static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
GVecGen2iFn *fn)
@ -1560,8 +1560,8 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
NULL, NULL, \
}; \
static NeonGenTwo64OpFn * const addfn[] = { \
gen_helper_neon_##OP##l_u16, \
gen_helper_neon_##OP##l_u32, \
tcg_gen_vec_##OP##16_i64, \
tcg_gen_vec_##OP##32_i64, \
tcg_gen_##OP##_i64, \
NULL, \
}; \
@ -1639,8 +1639,8 @@ static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
{ \
static NeonGenTwo64OpFn * const addfn[] = { \
gen_helper_neon_##OP##l_u16, \
gen_helper_neon_##OP##l_u32, \
tcg_gen_vec_##OP##16_i64, \
tcg_gen_vec_##OP##32_i64, \
tcg_gen_##OP##_i64, \
NULL, \
}; \
@ -1761,8 +1761,8 @@ static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
NULL,
};
static NeonGenTwo64OpFn * const addfn[] = {
gen_helper_neon_addl_u16,
gen_helper_neon_addl_u32,
tcg_gen_vec_add16_i64,
tcg_gen_vec_add32_i64,
tcg_gen_add_i64,
NULL,
};
@ -1779,8 +1779,8 @@ static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
NULL,
};
static NeonGenTwo64OpFn * const addfn[] = {
gen_helper_neon_addl_u16,
gen_helper_neon_addl_u32,
tcg_gen_vec_add16_i64,
tcg_gen_vec_add32_i64,
tcg_gen_add_i64,
NULL,
};
@ -1840,8 +1840,8 @@ static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
NULL, \
}; \
static NeonGenTwo64OpFn * const accfn[] = { \
gen_helper_neon_##ACC##l_u16, \
gen_helper_neon_##ACC##l_u32, \
tcg_gen_vec_##ACC##16_i64, \
tcg_gen_vec_##ACC##32_i64, \
tcg_gen_##ACC##_i64, \
NULL, \
}; \
@ -2371,7 +2371,7 @@ static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
}; \
static NeonGenTwo64OpFn * const accfn[] = { \
NULL, \
gen_helper_neon_##ACC##l_u32, \
tcg_gen_vec_##ACC##32_i64, \
tcg_gen_##ACC##_i64, \
NULL, \
}; \
@ -2565,204 +2565,6 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
return true;
}
static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
{
int pass, half;
TCGv_i32 tmp[2];
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
return false;
}
if ((a->vd | a->vm) & a->q) {
return false;
}
if (a->size == 3) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
tmp[0] = tcg_temp_new_i32();
tmp[1] = tcg_temp_new_i32();
for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
for (half = 0; half < 2; half++) {
read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
switch (a->size) {
case 0:
tcg_gen_bswap32_i32(tmp[half], tmp[half]);
break;
case 1:
gen_swap_half(tmp[half], tmp[half]);
break;
case 2:
break;
default:
g_assert_not_reached();
}
}
write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
}
return true;
}
static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
NeonGenWidenFn *widenfn,
NeonGenTwo64OpFn *opfn,
NeonGenTwo64OpFn *accfn)
{
/*
* Pairwise long operations: widen both halves of the pair,
* combine the pairs with the opfn, and then possibly accumulate
* into the destination with the accfn.
*/
int pass;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
return false;
}
if ((a->vd | a->vm) & a->q) {
return false;
}
if (!widenfn) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
for (pass = 0; pass < a->q + 1; pass++) {
TCGv_i32 tmp;
TCGv_i64 rm0_64, rm1_64, rd_64;
rm0_64 = tcg_temp_new_i64();
rm1_64 = tcg_temp_new_i64();
rd_64 = tcg_temp_new_i64();
tmp = tcg_temp_new_i32();
read_neon_element32(tmp, a->vm, pass * 2, MO_32);
widenfn(rm0_64, tmp);
read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
widenfn(rm1_64, tmp);
opfn(rd_64, rm0_64, rm1_64);
if (accfn) {
TCGv_i64 tmp64 = tcg_temp_new_i64();
read_neon_element64(tmp64, a->vd, pass, MO_64);
accfn(rd_64, tmp64, rd_64);
}
write_neon_element64(rd_64, a->vd, pass, MO_64);
}
return true;
}
static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
{
static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_s8,
gen_helper_neon_widen_s16,
tcg_gen_ext_i32_i64,
NULL,
};
static NeonGenTwo64OpFn * const opfn[] = {
gen_helper_neon_paddl_u16,
gen_helper_neon_paddl_u32,
tcg_gen_add_i64,
NULL,
};
return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
}
static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
{
static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_u8,
gen_helper_neon_widen_u16,
tcg_gen_extu_i32_i64,
NULL,
};
static NeonGenTwo64OpFn * const opfn[] = {
gen_helper_neon_paddl_u16,
gen_helper_neon_paddl_u32,
tcg_gen_add_i64,
NULL,
};
return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
}
static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
{
static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_s8,
gen_helper_neon_widen_s16,
tcg_gen_ext_i32_i64,
NULL,
};
static NeonGenTwo64OpFn * const opfn[] = {
gen_helper_neon_paddl_u16,
gen_helper_neon_paddl_u32,
tcg_gen_add_i64,
NULL,
};
static NeonGenTwo64OpFn * const accfn[] = {
gen_helper_neon_addl_u16,
gen_helper_neon_addl_u32,
tcg_gen_add_i64,
NULL,
};
return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
accfn[a->size]);
}
static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
{
static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_u8,
gen_helper_neon_widen_u16,
tcg_gen_extu_i32_i64,
NULL,
};
static NeonGenTwo64OpFn * const opfn[] = {
gen_helper_neon_paddl_u16,
gen_helper_neon_paddl_u32,
tcg_gen_add_i64,
NULL,
};
static NeonGenTwo64OpFn * const accfn[] = {
gen_helper_neon_addl_u16,
gen_helper_neon_addl_u32,
tcg_gen_add_i64,
NULL,
};
return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
accfn[a->size]);
}
typedef void ZipFn(TCGv_ptr, TCGv_ptr);
static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
@ -3120,6 +2922,13 @@ DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
DO_2MISC_VEC(VCLS, gen_gvec_cls)
DO_2MISC_VEC(VCLZ, gen_gvec_clz)
DO_2MISC_VEC(VREV64, gen_gvec_rev64)
DO_2MISC_VEC(VPADDL_S, gen_gvec_saddlp)
DO_2MISC_VEC(VPADDL_U, gen_gvec_uaddlp)
DO_2MISC_VEC(VPADAL_S, gen_gvec_sadalp)
DO_2MISC_VEC(VPADAL_U, gen_gvec_uadalp)
static bool trans_VMVN(DisasContext *s, arg_2misc *a)
{
@ -3129,6 +2938,30 @@ static bool trans_VMVN(DisasContext *s, arg_2misc *a)
return do_2misc_vec(s, a, tcg_gen_gvec_not);
}
static bool trans_VCNT(DisasContext *s, arg_2misc *a)
{
if (a->size != 0) {
return false;
}
return do_2misc_vec(s, a, gen_gvec_cnt);
}
static bool trans_VREV16(DisasContext *s, arg_2misc *a)
{
if (a->size != 0) {
return false;
}
return do_2misc_vec(s, a, gen_gvec_rev16);
}
static bool trans_VREV32(DisasContext *s, arg_2misc *a)
{
if (a->size != 0 && a->size != 1) {
return false;
}
return do_2misc_vec(s, a, gen_gvec_rev32);
}
#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \
static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
uint32_t rm_ofs, uint32_t oprsz, \
@ -3208,68 +3041,6 @@ static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
return true;
}
static bool trans_VREV32(DisasContext *s, arg_2misc *a)
{
static NeonGenOneOpFn * const fn[] = {
tcg_gen_bswap32_i32,
gen_swap_half,
NULL,
NULL,
};
return do_2misc(s, a, fn[a->size]);
}
static bool trans_VREV16(DisasContext *s, arg_2misc *a)
{
if (a->size != 0) {
return false;
}
return do_2misc(s, a, gen_rev16);
}
static bool trans_VCLS(DisasContext *s, arg_2misc *a)
{
static NeonGenOneOpFn * const fn[] = {
gen_helper_neon_cls_s8,
gen_helper_neon_cls_s16,
gen_helper_neon_cls_s32,
NULL,
};
return do_2misc(s, a, fn[a->size]);
}
static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
{
tcg_gen_clzi_i32(rd, rm, 32);
}
static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
{
static NeonGenOneOpFn * const fn[] = {
gen_helper_neon_clz_u8,
gen_helper_neon_clz_u16,
do_VCLZ_32,
NULL,
};
return do_2misc(s, a, fn[a->size]);
}
static bool trans_VCNT(DisasContext *s, arg_2misc *a)
{
if (a->size != 0) {
return false;
}
return do_2misc(s, a, gen_helper_neon_cnt_u8);
}
static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
uint32_t oprsz, uint32_t maxsz)
{
tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
vece == MO_16 ? 0x7fff : 0x7fffffff,
oprsz, maxsz);
}
static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
{
if (a->size == MO_16) {
@ -3279,15 +3050,7 @@ static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
} else if (a->size != MO_32) {
return false;
}
return do_2misc_vec(s, a, gen_VABS_F);
}
static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
uint32_t oprsz, uint32_t maxsz)
{
tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
vece == MO_16 ? 0x8000 : 0x80000000,
oprsz, maxsz);
return do_2misc_vec(s, a, gen_gvec_fabs);
}
static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
@ -3299,7 +3062,7 @@ static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
} else if (a->size != MO_32) {
return false;
}
return do_2misc_vec(s, a, gen_VNEG_F);
return do_2misc_vec(s, a, gen_gvec_fneg);
}
static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
@ -3307,7 +3070,7 @@ static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
if (a->size != 2) {
return false;
}
return do_2misc(s, a, gen_helper_recpe_u32);
return do_2misc_vec(s, a, gen_gvec_urecpe);
}
static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
@ -3315,7 +3078,7 @@ static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
if (a->size != 2) {
return false;
}
return do_2misc(s, a, gen_helper_rsqrte_u32);
return do_2misc_vec(s, a, gen_gvec_ursqrte);
}
#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \

View file

@ -2424,17 +2424,17 @@ DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2)
static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
{
gen_helper_vfp_sqrth(vd, vm, tcg_env);
gen_helper_vfp_sqrth(vd, vm, fpstatus_ptr(FPST_FPCR_F16));
}
static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
{
gen_helper_vfp_sqrts(vd, vm, tcg_env);
gen_helper_vfp_sqrts(vd, vm, fpstatus_ptr(FPST_FPCR));
}
static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
{
gen_helper_vfp_sqrtd(vd, vm, tcg_env);
gen_helper_vfp_sqrtd(vd, vm, fpstatus_ptr(FPST_FPCR));
}
DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)

View file

@ -578,6 +578,41 @@ void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_cls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_clz(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_cnt(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_rbit(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_rev16(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_rev32(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_rev64(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_saddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_sadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_uaddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_uadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
/* These exclusively manipulate the sign bit. */
void gen_gvec_fabs(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz);
void gen_gvec_fneg(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz);
void gen_gvec_urecpe(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_ursqrte(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
/*
* Forward to the isar_feature_* tests given a DisasContext pointer.
*/

View file

@ -1253,8 +1253,10 @@ DO_2OP(gvec_touszh, vfp_touszh, float16)
#define DO_2OP_CMP0(FN, CMPOP, DIRN) \
WRAP_CMP0_##DIRN(FN, CMPOP, float16) \
WRAP_CMP0_##DIRN(FN, CMPOP, float32) \
WRAP_CMP0_##DIRN(FN, CMPOP, float64) \
DO_2OP(gvec_f##FN##0_h, float16_##FN##0, float16) \
DO_2OP(gvec_f##FN##0_s, float32_##FN##0, float32)
DO_2OP(gvec_f##FN##0_s, float32_##FN##0, float32) \
DO_2OP(gvec_f##FN##0_d, float64_##FN##0, float64)
DO_2OP_CMP0(cgt, cgt, FWD)
DO_2OP_CMP0(cge, cge, FWD)
@ -2505,14 +2507,19 @@ DO_3OP_PAIR(gvec_uminp_s, MIN, uint32_t, H4)
clear_tail(d, oprsz, simd_maxsz(desc)); \
}
DO_VCVT_FIXED(gvec_vcvt_sd, helper_vfp_sqtod, uint64_t)
DO_VCVT_FIXED(gvec_vcvt_ud, helper_vfp_uqtod, uint64_t)
DO_VCVT_FIXED(gvec_vcvt_sf, helper_vfp_sltos, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_uf, helper_vfp_ultos, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_fs, helper_vfp_tosls_round_to_zero, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_fu, helper_vfp_touls_round_to_zero, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_sh, helper_vfp_shtoh, uint16_t)
DO_VCVT_FIXED(gvec_vcvt_uh, helper_vfp_uhtoh, uint16_t)
DO_VCVT_FIXED(gvec_vcvt_hs, helper_vfp_toshh_round_to_zero, uint16_t)
DO_VCVT_FIXED(gvec_vcvt_hu, helper_vfp_touhh_round_to_zero, uint16_t)
DO_VCVT_FIXED(gvec_vcvt_rz_ds, helper_vfp_tosqd_round_to_zero, uint64_t)
DO_VCVT_FIXED(gvec_vcvt_rz_du, helper_vfp_touqd_round_to_zero, uint64_t)
DO_VCVT_FIXED(gvec_vcvt_rz_fs, helper_vfp_tosls_round_to_zero, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_rz_fu, helper_vfp_touls_round_to_zero, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_rz_hs, helper_vfp_toshh_round_to_zero, uint16_t)
DO_VCVT_FIXED(gvec_vcvt_rz_hu, helper_vfp_touhh_round_to_zero, uint16_t)
#undef DO_VCVT_FIXED
@ -2532,6 +2539,8 @@ DO_VCVT_FIXED(gvec_vcvt_hu, helper_vfp_touhh_round_to_zero, uint16_t)
clear_tail(d, oprsz, simd_maxsz(desc)); \
}
DO_VCVT_RMODE(gvec_vcvt_rm_sd, helper_vfp_tosqd, uint64_t)
DO_VCVT_RMODE(gvec_vcvt_rm_ud, helper_vfp_touqd, uint64_t)
DO_VCVT_RMODE(gvec_vcvt_rm_ss, helper_vfp_tosls, uint32_t)
DO_VCVT_RMODE(gvec_vcvt_rm_us, helper_vfp_touls, uint32_t)
DO_VCVT_RMODE(gvec_vcvt_rm_sh, helper_vfp_toshh, uint16_t)
@ -3066,3 +3075,49 @@ DO_CLAMP(gvec_uclamp_b, uint8_t)
DO_CLAMP(gvec_uclamp_h, uint16_t)
DO_CLAMP(gvec_uclamp_s, uint32_t)
DO_CLAMP(gvec_uclamp_d, uint64_t)
/* Bit count in each 8-bit word. */
void HELPER(gvec_cnt_b)(void *vd, void *vn, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
uint8_t *d = vd, *n = vn;
for (i = 0; i < opr_sz; ++i) {
d[i] = ctpop8(n[i]);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
/* Reverse bits in each 8 bit word */
void HELPER(gvec_rbit_b)(void *vd, void *vn, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
uint64_t *d = vd, *n = vn;
for (i = 0; i < opr_sz / 8; ++i) {
d[i] = revbit64(bswap64(n[i]));
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(gvec_urecpe_s)(void *vd, void *vn, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
uint32_t *d = vd, *n = vn;
for (i = 0; i < opr_sz / 4; ++i) {
d[i] = helper_recpe_u32(n[i]);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(gvec_ursqrte_s)(void *vd, void *vn, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
uint32_t *d = vd, *n = vn;
for (i = 0; i < opr_sz / 4; ++i) {
d[i] = helper_rsqrte_u32(n[i]);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}

View file

@ -314,19 +314,19 @@ VFP_BINOP(minnum)
VFP_BINOP(maxnum)
#undef VFP_BINOP
dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env)
dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, void *fpstp)
{
return float16_sqrt(a, &env->vfp.fp_status_f16);
return float16_sqrt(a, fpstp);
}
float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env)
float32 VFP_HELPER(sqrt, s)(float32 a, void *fpstp)
{
return float32_sqrt(a, &env->vfp.fp_status);
return float32_sqrt(a, fpstp);
}
float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
float64 VFP_HELPER(sqrt, d)(float64 a, void *fpstp)
{
return float64_sqrt(a, &env->vfp.fp_status);
return float64_sqrt(a, fpstp);
}
static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
@ -495,6 +495,10 @@ VFP_CONV_FIX_A64(sq, h, 16, dh_ctype_f16, 64, int64)
VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16)
VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32)
VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64)
VFP_CONV_FLOAT_FIX_ROUND(sq, d, 64, float64, 64, int64,
float_round_to_zero, _round_to_zero)
VFP_CONV_FLOAT_FIX_ROUND(uq, d, 64, float64, 64, uint64,
float_round_to_zero, _round_to_zero)
#undef VFP_CONV_FIX
#undef VFP_CONV_FIX_FLOAT