target-arm queue:

* Finish conversion of A64 decoder to decodetree
  * Use float_round_to_odd in helper_fcvtx_f64_to_f32
  * Move TLBI insn emulation code out to its own source file
  * docs/system/arm: fix broken links, document undocumented properties
  * MAINTAINERS: correct an email address
 -----BEGIN PGP SIGNATURE-----
 
 iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmdcYCcZHHBldGVyLm1h
 eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3usmD/9x6yTRxIK2mi0CjY0Bii89
 hL1Z3n2bxRDu+WoMcsQKXQM5RcixILJyMsnArOxI3D1bVEkAskuaVcXL0uS7Inq6
 EkEq8Z5lfRikAP698U2tzaGhKRiE4NT/cNgOoFLddkjqvZ1tq3sSbPcCudSWkP+u
 Z3c5etP8llGNhokNhKmIifE/auxiFdPh8JRXHAF3KhNu4VOX7gNWnt4YZNhnV2XN
 TsD+IxU9LCfI8pIFK95zBUIQT/361lIoiY/r7RpN21HeEuS+4wXT/Vfii6rEgsg5
 pNkPoxX/Tc+67l4wXzgoV/p2I1KZbJZ/s7Ta5wLmopidwi2EP9ETVcfTzKIF+PIJ
 08nozInD+fxlyGBezTRDmuIKiC4t1lVW8TP8znyp3TcSHFs5Q/iQY0uPACzoUVuE
 chMIt4dD6NlMxOanWANbsVlF+ZPc8MVBMz3zHVbvkOiogoRQYjuDqQIQAhLbQolg
 uC/ql79WnUe0IX1j9rcW7+DVNq/bObLCN89uSjigHO2bo5FKKr4pnOG/SaAyER5L
 T/OHy1ACcxGNVIiUwKEDxdQ5iwcl+GEJfMfrpJHlTzxeZggL2lE0mcpXaHGLTzXV
 K7fSOBI15T+aRqN0/29Rtsw8ayMV5/RmnanesPmC2VN86ZCE0OKGOcLEdaI+q3iT
 CMxIsCUCpMM4WjbdJ69ZgQ==
 =wQ1l
 -----END PGP SIGNATURE-----

Merge tag 'pull-target-arm-20241213' of https://git.linaro.org/people/pmaydell/qemu-arm into staging

target-arm queue:
 * Finish conversion of A64 decoder to decodetree
 * Use float_round_to_odd in helper_fcvtx_f64_to_f32
 * Move TLBI insn emulation code out to its own source file
 * docs/system/arm: fix broken links, document undocumented properties
 * MAINTAINERS: correct an email address

# -----BEGIN PGP SIGNATURE-----
#
# iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmdcYCcZHHBldGVyLm1h
# eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3usmD/9x6yTRxIK2mi0CjY0Bii89
# hL1Z3n2bxRDu+WoMcsQKXQM5RcixILJyMsnArOxI3D1bVEkAskuaVcXL0uS7Inq6
# EkEq8Z5lfRikAP698U2tzaGhKRiE4NT/cNgOoFLddkjqvZ1tq3sSbPcCudSWkP+u
# Z3c5etP8llGNhokNhKmIifE/auxiFdPh8JRXHAF3KhNu4VOX7gNWnt4YZNhnV2XN
# TsD+IxU9LCfI8pIFK95zBUIQT/361lIoiY/r7RpN21HeEuS+4wXT/Vfii6rEgsg5
# pNkPoxX/Tc+67l4wXzgoV/p2I1KZbJZ/s7Ta5wLmopidwi2EP9ETVcfTzKIF+PIJ
# 08nozInD+fxlyGBezTRDmuIKiC4t1lVW8TP8znyp3TcSHFs5Q/iQY0uPACzoUVuE
# chMIt4dD6NlMxOanWANbsVlF+ZPc8MVBMz3zHVbvkOiogoRQYjuDqQIQAhLbQolg
# uC/ql79WnUe0IX1j9rcW7+DVNq/bObLCN89uSjigHO2bo5FKKr4pnOG/SaAyER5L
# T/OHy1ACcxGNVIiUwKEDxdQ5iwcl+GEJfMfrpJHlTzxeZggL2lE0mcpXaHGLTzXV
# K7fSOBI15T+aRqN0/29Rtsw8ayMV5/RmnanesPmC2VN86ZCE0OKGOcLEdaI+q3iT
# CMxIsCUCpMM4WjbdJ69ZgQ==
# =wQ1l
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 13 Dec 2024 11:26:15 EST
# gpg:                using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg:                issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full]
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>" [full]
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full]
# gpg:                 aka "Peter Maydell <peter@archaic.org.uk>" [unknown]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* tag 'pull-target-arm-20241213' of https://git.linaro.org/people/pmaydell/qemu-arm: (85 commits)
  target/arm: Simplify condition for tlbi_el2_cp_reginfo[]
  target/arm: Move RME TLB insns to tlb-insns.c
  target/arm: Move small helper functions to tlb-insns.c
  target/arm: Move the TLBI OS insns to tlb-insns.c.
  target/arm: Move TLBI range insns
  target/arm: Move AArch64 EL3 TLBI insns
  target/arm: Move the AArch64 EL2 TLBI insns
  target/arm: Move AArch64 TLBI insns from v8_cp_reginfo[]
  target/arm: Move TLBI insns for AArch32 EL2 to tlbi_insn_helper.c
  target/arm: Move some TLBI insns to their own source file
  MAINTAINERS: correct my email address
  docs/system/arm/virt: document missing properties
  docs/system/arm/xlnx-versal-virt: document ospi-flash property
  docs/system/arm/fby35: document execute-in-place property
  docs/system/arm/orangepi: update links
  target/arm: Use float_round_to_odd in helper_fcvtx_f64_to_f32
  target/arm: Convert FCVTL to decodetree
  target/arm: Convert URECPE and URSQRTE to decodetree
  target/arm: Introduce gen_gvec_urecpe, gen_gvec_ursqrte
  target/arm: Convert FRECPE, FRECPX, FRSQRTE to decodetree
  ...

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2024-12-14 08:42:15 -05:00
commit 94b57605c1
22 changed files with 4203 additions and 5594 deletions

View file

@ -226,7 +226,7 @@ F: target/avr/
F: tests/functional/test_avr_mega2560.py F: tests/functional/test_avr_mega2560.py
Hexagon TCG CPUs Hexagon TCG CPUs
M: Brian Cain <bcain@oss.qualcomm.com> M: Brian Cain <brian.cain@oss.qualcomm.com>
S: Supported S: Supported
F: target/hexagon/ F: target/hexagon/
X: target/hexagon/idef-parser/ X: target/hexagon/idef-parser/

View file

@ -45,3 +45,8 @@ process starts.
$ screen /dev/tty0 # In a separate TMUX pane, terminal window, etc. $ screen /dev/tty0 # In a separate TMUX pane, terminal window, etc.
$ screen /dev/tty1 $ screen /dev/tty1
$ (qemu) c # Start the boot process once screen is setup. $ (qemu) c # Start the boot process once screen is setup.
This machine model supports emulation of the boot from the CE0 flash device by
setting option ``execute-in-place``. When using this option, the CPU fetches
instructions to execute by reading CE0 and not from a preloaded ROM
initialized at machine init time. As a result, execution will be slower.

View file

@ -119,7 +119,7 @@ Orange Pi PC images
Note that the mainline kernel does not have a root filesystem. You may provide it Note that the mainline kernel does not have a root filesystem. You may provide it
with an official Orange Pi PC image from the official website: with an official Orange Pi PC image from the official website:
http://www.orangepi.org/downloadresources/ http://www.orangepi.org/html/serviceAndSupport/index.html
Another possibility is to run an Armbian image for Orange Pi PC which Another possibility is to run an Armbian image for Orange Pi PC which
can be downloaded from: can be downloaded from:
@ -213,7 +213,7 @@ including the Orange Pi PC. NetBSD 9.0 is known to work best for the Orange Pi P
board and provides a fully working system with serial console, networking and storage. board and provides a fully working system with serial console, networking and storage.
For the Orange Pi PC machine, get the 'evbarm-earmv7hf' based image from: For the Orange Pi PC machine, get the 'evbarm-earmv7hf' based image from:
https://cdn.netbsd.org/pub/NetBSD/NetBSD-9.0/evbarm-earmv7hf/binary/gzimg/armv7.img.gz https://archive.netbsd.org/pub/NetBSD-archive/NetBSD-9.0/evbarm-earmv7hf/binary/gzimg/armv7.img.gz
The image requires manually installing U-Boot in the image. Build U-Boot with The image requires manually installing U-Boot in the image. Build U-Boot with
the orangepi_pc_defconfig configuration as described in the previous section. the orangepi_pc_defconfig configuration as described in the previous section.

View file

@ -167,10 +167,18 @@ iommu
``smmuv3`` ``smmuv3``
Create an SMMUv3 Create an SMMUv3
default-bus-bypass-iommu
Set ``on``/``off`` to enable/disable `bypass_iommu
<https://gitlab.com/qemu-project/qemu/-/blob/master/docs/bypass-iommu.txt>`_
for default root bus.
ras ras
Set ``on``/``off`` to enable/disable reporting host memory errors to a guest Set ``on``/``off`` to enable/disable reporting host memory errors to a guest
using ACPI and guest external abort exceptions. The default is off. using ACPI and guest external abort exceptions. The default is off.
acpi
Set ``on``/``off``/``auto`` to enable/disable ACPI.
dtb-randomness dtb-randomness
Set ``on``/``off`` to pass random seeds via the guest DTB Set ``on``/``off`` to pass random seeds via the guest DTB
rng-seed and kaslr-seed nodes (in both "/chosen" and rng-seed and kaslr-seed nodes (in both "/chosen" and
@ -184,6 +192,14 @@ dtb-randomness
dtb-kaslr-seed dtb-kaslr-seed
A deprecated synonym for dtb-randomness. A deprecated synonym for dtb-randomness.
x-oem-id
Set string (up to 6 bytes) to override the default value of field OEMID in ACPI
table header.
x-oem-table-id
Set string (up to 8 bytes) to override the default value of field OEM Table ID
in ACPI table header.
Linux guest kernel configuration Linux guest kernel configuration
"""""""""""""""""""""""""""""""" """"""""""""""""""""""""""""""""

View file

@ -178,6 +178,9 @@ Run the following at the U-Boot prompt:
fdt set /chosen/dom0 reg <0x00000000 0x40000000 0x0 0x03100000> fdt set /chosen/dom0 reg <0x00000000 0x40000000 0x0 0x03100000>
booti 30000000 - 20000000 booti 30000000 - 20000000
It's possible to change the OSPI flash model emulated by using the machine model
option ``ospi-flash``.
BBRAM File Backend BBRAM File Backend
"""""""""""""""""" """"""""""""""""""
BBRAM can have an optional file backend, which must be a seekable BBRAM can have an optional file backend, which must be a seekable

File diff suppressed because it is too large Load diff

View file

@ -133,9 +133,9 @@ DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr) DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr) DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
DEF_HELPER_2(vfp_sqrth, f16, f16, env) DEF_HELPER_2(vfp_sqrth, f16, f16, ptr)
DEF_HELPER_2(vfp_sqrts, f32, f32, env) DEF_HELPER_2(vfp_sqrts, f32, f32, ptr)
DEF_HELPER_2(vfp_sqrtd, f64, f64, env) DEF_HELPER_2(vfp_sqrtd, f64, f64, ptr)
DEF_HELPER_3(vfp_cmph, void, f16, f16, env) DEF_HELPER_3(vfp_cmph, void, f16, f16, env)
DEF_HELPER_3(vfp_cmps, void, f32, f32, env) DEF_HELPER_3(vfp_cmps, void, f32, f32, env)
DEF_HELPER_3(vfp_cmpd, void, f64, f64, env) DEF_HELPER_3(vfp_cmpd, void, f64, f64, env)
@ -178,8 +178,10 @@ DEF_HELPER_3(vfp_touhs_round_to_zero, i32, f32, i32, ptr)
DEF_HELPER_3(vfp_touls_round_to_zero, i32, f32, i32, ptr) DEF_HELPER_3(vfp_touls_round_to_zero, i32, f32, i32, ptr)
DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, ptr) DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, ptr) DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_tosqd_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr) DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr) DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_touqd_round_to_zero, i64, f64, i32, ptr)
DEF_HELPER_3(vfp_touhh, i32, f16, i32, ptr) DEF_HELPER_3(vfp_touhh, i32, f16, i32, ptr)
DEF_HELPER_3(vfp_toshh, i32, f16, i32, ptr) DEF_HELPER_3(vfp_toshh, i32, f16, i32, ptr)
DEF_HELPER_3(vfp_toulh, i32, f16, i32, ptr) DEF_HELPER_3(vfp_toulh, i32, f16, i32, ptr)
@ -363,8 +365,8 @@ DEF_HELPER_1(neon_clz_u16, i32, i32)
DEF_HELPER_1(neon_cls_s8, i32, i32) DEF_HELPER_1(neon_cls_s8, i32, i32)
DEF_HELPER_1(neon_cls_s16, i32, i32) DEF_HELPER_1(neon_cls_s16, i32, i32)
DEF_HELPER_1(neon_cls_s32, i32, i32) DEF_HELPER_1(neon_cls_s32, i32, i32)
DEF_HELPER_1(neon_cnt_u8, i32, i32) DEF_HELPER_FLAGS_3(gvec_cnt_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_1(neon_rbit_u8, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_3(gvec_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32) DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32)
DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32) DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32)
@ -395,12 +397,8 @@ DEF_HELPER_1(neon_widen_s8, i64, i32)
DEF_HELPER_1(neon_widen_u16, i64, i32) DEF_HELPER_1(neon_widen_u16, i64, i32)
DEF_HELPER_1(neon_widen_s16, i64, i32) DEF_HELPER_1(neon_widen_s16, i64, i32)
DEF_HELPER_2(neon_addl_u16, i64, i64, i64) DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_2(neon_addl_u32, i64, i64, i64) DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_2(neon_paddl_u16, i64, i64, i64)
DEF_HELPER_2(neon_paddl_u32, i64, i64, i64)
DEF_HELPER_2(neon_subl_u16, i64, i64, i64)
DEF_HELPER_2(neon_subl_u32, i64, i64, i64)
DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64) DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64)
DEF_HELPER_3(neon_addl_saturate_s64, i64, env, i64, i64) DEF_HELPER_3(neon_addl_saturate_s64, i64, env, i64, i64)
DEF_HELPER_2(neon_abdl_u16, i64, i32, i32) DEF_HELPER_2(neon_abdl_u16, i64, i32, i32)
@ -654,14 +652,21 @@ DEF_HELPER_FLAGS_4(gvec_touizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_sf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_vcvt_sf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_uf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_vcvt_uf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_fs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_vcvt_rz_fs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_fu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_vcvt_rz_fu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_vcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_uh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_vcvt_uh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_hs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_vcvt_rz_hs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_vcvt_rz_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_sd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_ud, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rz_ds, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rz_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ud, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ss, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ss, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rm_us, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_vcvt_rm_us, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@ -683,18 +688,23 @@ DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcgt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_fcgt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcgt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_fcge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcge0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_fcge0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcge0_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_fceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fceq0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_fceq0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fceq0_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_fcle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcle0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_fcle0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fcle0_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_fclt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
@ -1111,6 +1121,9 @@ DEF_HELPER_FLAGS_4(gvec_uminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_uminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_uminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_urecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_ursqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
#ifdef TARGET_AARCH64 #ifdef TARGET_AARCH64
#include "tcg/helper-a64.h" #include "tcg/helper-a64.h"
#include "tcg/helper-sve.h" #include "tcg/helper-sve.h"

View file

@ -1727,6 +1727,9 @@ static inline uint64_t pauth_ptr_mask(ARMVAParameters param)
/* Add the cpreg definitions for debug related system registers */ /* Add the cpreg definitions for debug related system registers */
void define_debug_regs(ARMCPU *cpu); void define_debug_regs(ARMCPU *cpu);
/* Add the cpreg definitions for TLBI instructions */
void define_tlb_insn_regs(ARMCPU *cpu);
/* Effective value of MDCR_EL2 */ /* Effective value of MDCR_EL2 */
static inline uint64_t arm_mdcr_el2_eff(CPUARMState *env) static inline uint64_t arm_mdcr_el2_eff(CPUARMState *env)
{ {
@ -1817,4 +1820,10 @@ uint64_t gt_get_countervalue(CPUARMState *env);
* and CNTVCT_EL0 (this will be either 0 or the value of CNTVOFF_EL2). * and CNTVCT_EL0 (this will be either 0 or the value of CNTVOFF_EL2).
*/ */
uint64_t gt_virt_cnt_offset(CPUARMState *env); uint64_t gt_virt_cnt_offset(CPUARMState *env);
/*
* Return mask of ARMMMUIdxBit values corresponding to an "invalidate
* all EL1" scope; this covers stage 1 and stage 2.
*/
int alle1_tlbmask(CPUARMState *env);
#endif #endif

View file

@ -25,3 +25,8 @@ void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome,
void assert_hflags_rebuild_correctly(CPUARMState *env) void assert_hflags_rebuild_correctly(CPUARMState *env)
{ {
} }
/* TLBI insns are only used by TCG, so we don't need to do anything for KVM */
void define_tlb_insn_regs(ARMCPU *cpu)
{
}

View file

@ -21,13 +21,18 @@
%rd 0:5 %rd 0:5
%esz_sd 22:1 !function=plus_2 %esz_sd 22:1 !function=plus_2
%esz_hs 22:1 !function=plus_1
%esz_hsd 22:2 !function=xor_2 %esz_hsd 22:2 !function=xor_2
%hl 11:1 21:1 %hl 11:1 21:1
%hlm 11:1 20:2 %hlm 11:1 20:2
&r rn &r rn
&rrr rd rn rm
&ri rd imm &ri rd imm
&rr rd rn
&rr_sf rd rn sf
&rri_sf rd rn imm sf &rri_sf rd rn imm sf
&rrr_sf rd rn rm sf
&i imm &i imm
&rr_e rd rn esz &rr_e rd rn esz
&rri_e rd rn imm esz &rri_e rd rn imm esz
@ -41,10 +46,15 @@
&qrrrr_e q rd rn rm ra esz &qrrrr_e q rd rn rm ra esz
@rr_h ........ ... ..... ...... rn:5 rd:5 &rr_e esz=1 @rr_h ........ ... ..... ...... rn:5 rd:5 &rr_e esz=1
@rr_s ........ ... ..... ...... rn:5 rd:5 &rr_e esz=2
@rr_d ........ ... ..... ...... rn:5 rd:5 &rr_e esz=3 @rr_d ........ ... ..... ...... rn:5 rd:5 &rr_e esz=3
@rr_e ........ esz:2 . ..... ...... rn:5 rd:5 &rr_e
@rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd @rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd
@rr_hsd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_hsd
@rrr_b ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=0
@rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1 @rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1
@rrr_s ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=2
@rrr_d ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=3 @rrr_d ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=3
@rrr_sd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_sd @rrr_sd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_sd
@rrr_hsd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_hsd @rrr_hsd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_hsd
@ -62,7 +72,12 @@
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3 @rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3 @rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
@qrr_b . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=0
@qrr_h . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=1 @qrr_h . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=1
@qrr_s . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=2
@qrr_bh . q:1 ...... . esz:1 ...... ...... rn:5 rd:5 &qrr_e
@qrr_hs . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=%esz_hs
@qrr_sd . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=%esz_sd
@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e @qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e
@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0 @qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
@ -161,7 +176,7 @@ UBFM . 10 100110 . ...... ...... ..... ..... @bitfield_32
EXTR 1 00 100111 1 0 rm:5 imm:6 rn:5 rd:5 &extract sf=1 EXTR 1 00 100111 1 0 rm:5 imm:6 rn:5 rd:5 &extract sf=1
EXTR 0 00 100111 0 0 rm:5 0 imm:5 rn:5 rd:5 &extract sf=0 EXTR 0 00 100111 0 0 rm:5 0 imm:5 rn:5 rd:5 &extract sf=0
# Branches ### Branches
%imm26 0:s26 !function=times_4 %imm26 0:s26 !function=times_4
@branch . ..... .......................... &i imm=%imm26 @branch . ..... .......................... &i imm=%imm26
@ -291,7 +306,7 @@ HLT 1101 0100 010 ................ 000 00 @i16
# DCPS2 1101 0100 101 ................ 000 10 @i16 # DCPS2 1101 0100 101 ................ 000 10 @i16
# DCPS3 1101 0100 101 ................ 000 11 @i16 # DCPS3 1101 0100 101 ................ 000 11 @i16
# Loads and stores ### Loads and stores
&stxr rn rt rt2 rs sz lasr &stxr rn rt rt2 rs sz lasr
&stlr rn rt sz lasr &stlr rn rt sz lasr
@ -649,6 +664,138 @@ CPYP 00 011 1 01000 ..... .... 01 ..... ..... @cpy
CPYM 00 011 1 01010 ..... .... 01 ..... ..... @cpy CPYM 00 011 1 01010 ..... .... 01 ..... ..... @cpy
CPYE 00 011 1 01100 ..... .... 01 ..... ..... @cpy CPYE 00 011 1 01100 ..... .... 01 ..... ..... @cpy
### Data Processing (register)
# Data Processing (2-source)
@rrr . .......... rm:5 ...... rn:5 rd:5 &rrr
@rrr_sf sf:1 .......... rm:5 ...... rn:5 rd:5 &rrr_sf
UDIV . 00 11010110 ..... 00001 0 ..... ..... @rrr_sf
SDIV . 00 11010110 ..... 00001 1 ..... ..... @rrr_sf
LSLV . 00 11010110 ..... 00100 0 ..... ..... @rrr_sf
LSRV . 00 11010110 ..... 00100 1 ..... ..... @rrr_sf
ASRV . 00 11010110 ..... 00101 0 ..... ..... @rrr_sf
RORV . 00 11010110 ..... 00101 1 ..... ..... @rrr_sf
CRC32 0 00 11010110 ..... 0100 00 ..... ..... @rrr_b
CRC32 0 00 11010110 ..... 0100 01 ..... ..... @rrr_h
CRC32 0 00 11010110 ..... 0100 10 ..... ..... @rrr_s
CRC32 1 00 11010110 ..... 0100 11 ..... ..... @rrr_d
CRC32C 0 00 11010110 ..... 0101 00 ..... ..... @rrr_b
CRC32C 0 00 11010110 ..... 0101 01 ..... ..... @rrr_h
CRC32C 0 00 11010110 ..... 0101 10 ..... ..... @rrr_s
CRC32C 1 00 11010110 ..... 0101 11 ..... ..... @rrr_d
SUBP 1 00 11010110 ..... 000000 ..... ..... @rrr
SUBPS 1 01 11010110 ..... 000000 ..... ..... @rrr
IRG 1 00 11010110 ..... 000100 ..... ..... @rrr
GMI 1 00 11010110 ..... 000101 ..... ..... @rrr
PACGA 1 00 11010110 ..... 001100 ..... ..... @rrr
# Data Processing (1-source)
@rr . .......... ..... ...... rn:5 rd:5 &rr
@rr_sf sf:1 .......... ..... ...... rn:5 rd:5 &rr_sf
RBIT . 10 11010110 00000 000000 ..... ..... @rr_sf
REV16 . 10 11010110 00000 000001 ..... ..... @rr_sf
REV32 . 10 11010110 00000 000010 ..... ..... @rr_sf
REV64 1 10 11010110 00000 000011 ..... ..... @rr
CLZ . 10 11010110 00000 000100 ..... ..... @rr_sf
CLS . 10 11010110 00000 000101 ..... ..... @rr_sf
&pacaut rd rn z
@pacaut . .. ........ ..... .. z:1 ... rn:5 rd:5 &pacaut
PACIA 1 10 11010110 00001 00.000 ..... ..... @pacaut
PACIB 1 10 11010110 00001 00.001 ..... ..... @pacaut
PACDA 1 10 11010110 00001 00.010 ..... ..... @pacaut
PACDB 1 10 11010110 00001 00.011 ..... ..... @pacaut
AUTIA 1 10 11010110 00001 00.100 ..... ..... @pacaut
AUTIB 1 10 11010110 00001 00.101 ..... ..... @pacaut
AUTDA 1 10 11010110 00001 00.110 ..... ..... @pacaut
AUTDB 1 10 11010110 00001 00.111 ..... ..... @pacaut
XPACI 1 10 11010110 00001 010000 11111 rd:5
XPACD 1 10 11010110 00001 010001 11111 rd:5
# Logical (shifted reg)
&logic_shift rd rn rm sf sa st n
@logic_shift sf:1 .. ..... st:2 n:1 rm:5 sa:6 rn:5 rd:5 &logic_shift
AND_r . 00 01010 .. . ..... ...... ..... ..... @logic_shift
ORR_r . 01 01010 .. . ..... ...... ..... ..... @logic_shift
EOR_r . 10 01010 .. . ..... ...... ..... ..... @logic_shift
ANDS_r . 11 01010 .. . ..... ...... ..... ..... @logic_shift
# Add/subtract (shifted reg)
&addsub_shift rd rn rm sf sa st
@addsub_shift sf:1 .. ..... st:2 . rm:5 sa:6 rn:5 rd:5 &addsub_shift
ADD_r . 00 01011 .. 0 ..... ...... ..... ..... @addsub_shift
SUB_r . 10 01011 .. 0 ..... ...... ..... ..... @addsub_shift
ADDS_r . 01 01011 .. 0 ..... ...... ..... ..... @addsub_shift
SUBS_r . 11 01011 .. 0 ..... ...... ..... ..... @addsub_shift
# Add/subtract (extended reg)
&addsub_ext rd rn rm sf sa st
@addsub_ext sf:1 .. ........ rm:5 st:3 sa:3 rn:5 rd:5 &addsub_ext
ADD_ext . 00 01011001 ..... ... ... ..... ..... @addsub_ext
SUB_ext . 10 01011001 ..... ... ... ..... ..... @addsub_ext
ADDS_ext . 01 01011001 ..... ... ... ..... ..... @addsub_ext
SUBS_ext . 11 01011001 ..... ... ... ..... ..... @addsub_ext
# Add/subtract (carry)
ADC . 00 11010000 ..... 000000 ..... ..... @rrr_sf
ADCS . 01 11010000 ..... 000000 ..... ..... @rrr_sf
SBC . 10 11010000 ..... 000000 ..... ..... @rrr_sf
SBCS . 11 11010000 ..... 000000 ..... ..... @rrr_sf
# Rotate right into flags
RMIF 1 01 11010000 imm:6 00001 rn:5 0 mask:4
# Evaluate into flags
SETF8 0 01 11010000 00000 000010 rn:5 01101
SETF16 0 01 11010000 00000 010010 rn:5 01101
# Conditional compare
CCMP sf:1 op:1 1 11010010 y:5 cond:4 imm:1 0 rn:5 0 nzcv:4
# Conditional select
CSEL sf:1 else_inv:1 011010100 rm:5 cond:4 0 else_inc:1 rn:5 rd:5
# Data Processing (3-source)
&rrrr rd rn rm ra
@rrrr . .. ........ rm:5 . ra:5 rn:5 rd:5 &rrrr
MADD_w 0 00 11011000 ..... 0 ..... ..... ..... @rrrr
MSUB_w 0 00 11011000 ..... 1 ..... ..... ..... @rrrr
MADD_x 1 00 11011000 ..... 0 ..... ..... ..... @rrrr
MSUB_x 1 00 11011000 ..... 1 ..... ..... ..... @rrrr
SMADDL 1 00 11011001 ..... 0 ..... ..... ..... @rrrr
SMSUBL 1 00 11011001 ..... 1 ..... ..... ..... @rrrr
UMADDL 1 00 11011101 ..... 0 ..... ..... ..... @rrrr
UMSUBL 1 00 11011101 ..... 1 ..... ..... ..... @rrrr
SMULH 1 00 11011010 ..... 0 11111 ..... ..... @rrr
UMULH 1 00 11011110 ..... 0 11111 ..... ..... @rrr
### Cryptographic AES ### Cryptographic AES
AESE 01001110 00 10100 00100 10 ..... ..... @r2r_q1e0 AESE 01001110 00 10100 00100 10 ..... ..... @r2r_q1e0
@ -1183,10 +1330,103 @@ FMAXV_s 0110 1110 00 11000 01111 10 ..... ..... @rr_q1e2
FMINV_h 0.00 1110 10 11000 01111 10 ..... ..... @qrr_h FMINV_h 0.00 1110 10 11000 01111 10 ..... ..... @qrr_h
FMINV_s 0110 1110 10 11000 01111 10 ..... ..... @rr_q1e2 FMINV_s 0110 1110 10 11000 01111 10 ..... ..... @rr_q1e2
# Conversion between floating-point and fixed-point (general register)
&fcvt rd rn esz sf shift
%fcvt_shift32 10:5 !function=rsub_32
%fcvt_shift64 10:6 !function=rsub_64
@fcvt32 0 ....... .. ...... 1..... rn:5 rd:5 \
&fcvt sf=0 esz=%esz_hsd shift=%fcvt_shift32
@fcvt64 1 ....... .. ...... ...... rn:5 rd:5 \
&fcvt sf=1 esz=%esz_hsd shift=%fcvt_shift64
SCVTF_g . 0011110 .. 000010 ...... ..... ..... @fcvt32
SCVTF_g . 0011110 .. 000010 ...... ..... ..... @fcvt64
UCVTF_g . 0011110 .. 000011 ...... ..... ..... @fcvt32
UCVTF_g . 0011110 .. 000011 ...... ..... ..... @fcvt64
FCVTZS_g . 0011110 .. 011000 ...... ..... ..... @fcvt32
FCVTZS_g . 0011110 .. 011000 ...... ..... ..... @fcvt64
FCVTZU_g . 0011110 .. 011001 ...... ..... ..... @fcvt32
FCVTZU_g . 0011110 .. 011001 ...... ..... ..... @fcvt64
# Conversion between floating-point and integer (general register)
@icvt sf:1 ....... .. ...... ...... rn:5 rd:5 \
&fcvt esz=%esz_hsd shift=0
SCVTF_g . 0011110 .. 100010 000000 ..... ..... @icvt
UCVTF_g . 0011110 .. 100011 000000 ..... ..... @icvt
FCVTNS_g . 0011110 .. 100000 000000 ..... ..... @icvt
FCVTNU_g . 0011110 .. 100001 000000 ..... ..... @icvt
FCVTPS_g . 0011110 .. 101000 000000 ..... ..... @icvt
FCVTPU_g . 0011110 .. 101001 000000 ..... ..... @icvt
FCVTMS_g . 0011110 .. 110000 000000 ..... ..... @icvt
FCVTMU_g . 0011110 .. 110001 000000 ..... ..... @icvt
FCVTZS_g . 0011110 .. 111000 000000 ..... ..... @icvt
FCVTZU_g . 0011110 .. 111001 000000 ..... ..... @icvt
FCVTAS_g . 0011110 .. 100100 000000 ..... ..... @icvt
FCVTAU_g . 0011110 .. 100101 000000 ..... ..... @icvt
FJCVTZS 0 0011110 01 111110 000000 ..... ..... @rr
FMOV_ws 0 0011110 00 100110 000000 ..... ..... @rr
FMOV_sw 0 0011110 00 100111 000000 ..... ..... @rr
FMOV_xd 1 0011110 01 100110 000000 ..... ..... @rr
FMOV_dx 1 0011110 01 100111 000000 ..... ..... @rr
# Move to/from upper half of 128-bit
FMOV_xu 1 0011110 10 101110 000000 ..... ..... @rr
FMOV_ux 1 0011110 10 101111 000000 ..... ..... @rr
# Half-precision allows both sf=0 and sf=1 with identical results
FMOV_xh - 0011110 11 100110 000000 ..... ..... @rr
FMOV_hx - 0011110 11 100111 000000 ..... ..... @rr
# Floating-point data processing (1 source)
FMOV_s 00011110 .. 1 000000 10000 ..... ..... @rr_hsd
FABS_s 00011110 .. 1 000001 10000 ..... ..... @rr_hsd
FNEG_s 00011110 .. 1 000010 10000 ..... ..... @rr_hsd
FSQRT_s 00011110 .. 1 000011 10000 ..... ..... @rr_hsd
FRINTN_s 00011110 .. 1 001000 10000 ..... ..... @rr_hsd
FRINTP_s 00011110 .. 1 001001 10000 ..... ..... @rr_hsd
FRINTM_s 00011110 .. 1 001010 10000 ..... ..... @rr_hsd
FRINTZ_s 00011110 .. 1 001011 10000 ..... ..... @rr_hsd
FRINTA_s 00011110 .. 1 001100 10000 ..... ..... @rr_hsd
FRINTX_s 00011110 .. 1 001110 10000 ..... ..... @rr_hsd
FRINTI_s 00011110 .. 1 001111 10000 ..... ..... @rr_hsd
BFCVT_s 00011110 01 1 000110 10000 ..... ..... @rr_s
FRINT32Z_s 00011110 0. 1 010000 10000 ..... ..... @rr_sd
FRINT32X_s 00011110 0. 1 010001 10000 ..... ..... @rr_sd
FRINT64Z_s 00011110 0. 1 010010 10000 ..... ..... @rr_sd
FRINT64X_s 00011110 0. 1 010011 10000 ..... ..... @rr_sd
FCVT_s_ds 00011110 00 1 000101 10000 ..... ..... @rr
FCVT_s_hs 00011110 00 1 000111 10000 ..... ..... @rr
FCVT_s_sd 00011110 01 1 000100 10000 ..... ..... @rr
FCVT_s_hd 00011110 01 1 000111 10000 ..... ..... @rr
FCVT_s_sh 00011110 11 1 000100 10000 ..... ..... @rr
FCVT_s_dh 00011110 11 1 000101 10000 ..... ..... @rr
# Floating-point Immediate # Floating-point Immediate
FMOVI_s 0001 1110 .. 1 imm:8 100 00000 rd:5 esz=%esz_hsd FMOVI_s 0001 1110 .. 1 imm:8 100 00000 rd:5 esz=%esz_hsd
# Floating-point Compare
FCMP 00011110 .. 1 rm:5 001000 rn:5 e:1 z:1 000 esz=%esz_hsd
# Floating-point Conditional Compare
FCCMP 00011110 .. 1 rm:5 cond:4 01 rn:5 e:1 nzcv:4 esz=%esz_hsd
# Advanced SIMD Modified Immediate / Shift by Immediate # Advanced SIMD Modified Immediate / Shift by Immediate
%abcdefgh 16:3 5:5 %abcdefgh 16:3 5:5
@ -1393,3 +1633,261 @@ UQRSHRN_si 0111 11110 .... ... 10011 1 ..... ..... @shri_s
SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_b SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_b
SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_h SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_h
SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_s SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_s
# Advanced SIMD scalar two-register miscellaneous
SQABS_s 0101 1110 ..1 00000 01111 0 ..... ..... @rr_e
SQNEG_s 0111 1110 ..1 00000 01111 0 ..... ..... @rr_e
ABS_s 0101 1110 111 00000 10111 0 ..... ..... @rr
NEG_s 0111 1110 111 00000 10111 0 ..... ..... @rr
CMGT0_s 0101 1110 111 00000 10001 0 ..... ..... @rr
CMGE0_s 0111 1110 111 00000 10001 0 ..... ..... @rr
CMEQ0_s 0101 1110 111 00000 10011 0 ..... ..... @rr
CMLE0_s 0111 1110 111 00000 10011 0 ..... ..... @rr
CMLT0_s 0101 1110 111 00000 10101 0 ..... ..... @rr
SQXTUN_s 0111 1110 ..1 00001 00101 0 ..... ..... @rr_e
SQXTN_s 0101 1110 ..1 00001 01001 0 ..... ..... @rr_e
UQXTN_s 0111 1110 ..1 00001 01001 0 ..... ..... @rr_e
FCVTXN_s 0111 1110 011 00001 01101 0 ..... ..... @rr_s
FCMGT0_s 0101 1110 111 11000 11001 0 ..... ..... @rr_h
FCMGT0_s 0101 1110 1.1 00000 11001 0 ..... ..... @rr_sd
FCMGE0_s 0111 1110 111 11000 11001 0 ..... ..... @rr_h
FCMGE0_s 0111 1110 1.1 00000 11001 0 ..... ..... @rr_sd
FCMEQ0_s 0101 1110 111 11000 11011 0 ..... ..... @rr_h
FCMEQ0_s 0101 1110 1.1 00000 11011 0 ..... ..... @rr_sd
FCMLE0_s 0111 1110 111 11000 11011 0 ..... ..... @rr_h
FCMLE0_s 0111 1110 1.1 00000 11011 0 ..... ..... @rr_sd
FCMLT0_s 0101 1110 111 11000 11101 0 ..... ..... @rr_h
FCMLT0_s 0101 1110 1.1 00000 11101 0 ..... ..... @rr_sd
FRECPE_s 0101 1110 111 11001 11011 0 ..... ..... @rr_h
FRECPE_s 0101 1110 1.1 00001 11011 0 ..... ..... @rr_sd
FRECPX_s 0101 1110 111 11001 11111 0 ..... ..... @rr_h
FRECPX_s 0101 1110 1.1 00001 11111 0 ..... ..... @rr_sd
FRSQRTE_s 0111 1110 111 11001 11011 0 ..... ..... @rr_h
FRSQRTE_s 0111 1110 1.1 00001 11011 0 ..... ..... @rr_sd
@icvt_h . ....... .. ...... ...... rn:5 rd:5 \
&fcvt sf=0 esz=1 shift=0
@icvt_sd . ....... .. ...... ...... rn:5 rd:5 \
&fcvt sf=0 esz=%esz_sd shift=0
SCVTF_f 0101 1110 011 11001 11011 0 ..... ..... @icvt_h
SCVTF_f 0101 1110 0.1 00001 11011 0 ..... ..... @icvt_sd
UCVTF_f 0111 1110 011 11001 11011 0 ..... ..... @icvt_h
UCVTF_f 0111 1110 0.1 00001 11011 0 ..... ..... @icvt_sd
FCVTNS_f 0101 1110 011 11001 10101 0 ..... ..... @icvt_h
FCVTNS_f 0101 1110 0.1 00001 10101 0 ..... ..... @icvt_sd
FCVTNU_f 0111 1110 011 11001 10101 0 ..... ..... @icvt_h
FCVTNU_f 0111 1110 0.1 00001 10101 0 ..... ..... @icvt_sd
FCVTPS_f 0101 1110 111 11001 10101 0 ..... ..... @icvt_h
FCVTPS_f 0101 1110 1.1 00001 10101 0 ..... ..... @icvt_sd
FCVTPU_f 0111 1110 111 11001 10101 0 ..... ..... @icvt_h
FCVTPU_f 0111 1110 1.1 00001 10101 0 ..... ..... @icvt_sd
FCVTMS_f 0101 1110 011 11001 10111 0 ..... ..... @icvt_h
FCVTMS_f 0101 1110 0.1 00001 10111 0 ..... ..... @icvt_sd
FCVTMU_f 0111 1110 011 11001 10111 0 ..... ..... @icvt_h
FCVTMU_f 0111 1110 0.1 00001 10111 0 ..... ..... @icvt_sd
FCVTZS_f 0101 1110 111 11001 10111 0 ..... ..... @icvt_h
FCVTZS_f 0101 1110 1.1 00001 10111 0 ..... ..... @icvt_sd
FCVTZU_f 0111 1110 111 11001 10111 0 ..... ..... @icvt_h
FCVTZU_f 0111 1110 1.1 00001 10111 0 ..... ..... @icvt_sd
FCVTAS_f 0101 1110 011 11001 11001 0 ..... ..... @icvt_h
FCVTAS_f 0101 1110 0.1 00001 11001 0 ..... ..... @icvt_sd
FCVTAU_f 0111 1110 011 11001 11001 0 ..... ..... @icvt_h
FCVTAU_f 0111 1110 0.1 00001 11001 0 ..... ..... @icvt_sd
%fcvt_f_sh_h 16:4 !function=rsub_16
%fcvt_f_sh_s 16:5 !function=rsub_32
%fcvt_f_sh_d 16:6 !function=rsub_64
@fcvt_fixed_h .... .... . 001 .... ...... rn:5 rd:5 \
&fcvt sf=0 esz=1 shift=%fcvt_f_sh_h
@fcvt_fixed_s .... .... . 01 ..... ...... rn:5 rd:5 \
&fcvt sf=0 esz=2 shift=%fcvt_f_sh_s
@fcvt_fixed_d .... .... . 1 ...... ...... rn:5 rd:5 \
&fcvt sf=0 esz=3 shift=%fcvt_f_sh_d
SCVTF_f 0101 1111 0 ....... 111001 ..... ..... @fcvt_fixed_h
SCVTF_f 0101 1111 0 ....... 111001 ..... ..... @fcvt_fixed_s
SCVTF_f 0101 1111 0 ....... 111001 ..... ..... @fcvt_fixed_d
UCVTF_f 0111 1111 0 ....... 111001 ..... ..... @fcvt_fixed_h
UCVTF_f 0111 1111 0 ....... 111001 ..... ..... @fcvt_fixed_s
UCVTF_f 0111 1111 0 ....... 111001 ..... ..... @fcvt_fixed_d
FCVTZS_f 0101 1111 0 ....... 111111 ..... ..... @fcvt_fixed_h
FCVTZS_f 0101 1111 0 ....... 111111 ..... ..... @fcvt_fixed_s
FCVTZS_f 0101 1111 0 ....... 111111 ..... ..... @fcvt_fixed_d
FCVTZU_f 0111 1111 0 ....... 111111 ..... ..... @fcvt_fixed_h
FCVTZU_f 0111 1111 0 ....... 111111 ..... ..... @fcvt_fixed_s
FCVTZU_f 0111 1111 0 ....... 111111 ..... ..... @fcvt_fixed_d
# Advanced SIMD two-register miscellaneous
SQABS_v 0.00 1110 ..1 00000 01111 0 ..... ..... @qrr_e
SQNEG_v 0.10 1110 ..1 00000 01111 0 ..... ..... @qrr_e
ABS_v 0.00 1110 ..1 00000 10111 0 ..... ..... @qrr_e
NEG_v 0.10 1110 ..1 00000 10111 0 ..... ..... @qrr_e
CLS_v 0.00 1110 ..1 00000 01001 0 ..... ..... @qrr_e
CLZ_v 0.10 1110 ..1 00000 01001 0 ..... ..... @qrr_e
CNT_v 0.00 1110 001 00000 01011 0 ..... ..... @qrr_b
NOT_v 0.10 1110 001 00000 01011 0 ..... ..... @qrr_b
RBIT_v 0.10 1110 011 00000 01011 0 ..... ..... @qrr_b
CMGT0_v 0.00 1110 ..1 00000 10001 0 ..... ..... @qrr_e
CMGE0_v 0.10 1110 ..1 00000 10001 0 ..... ..... @qrr_e
CMEQ0_v 0.00 1110 ..1 00000 10011 0 ..... ..... @qrr_e
CMLE0_v 0.10 1110 ..1 00000 10011 0 ..... ..... @qrr_e
CMLT0_v 0.00 1110 ..1 00000 10101 0 ..... ..... @qrr_e
REV16_v 0.00 1110 001 00000 00011 0 ..... ..... @qrr_b
REV32_v 0.10 1110 0.1 00000 00001 0 ..... ..... @qrr_bh
REV64_v 0.00 1110 ..1 00000 00001 0 ..... ..... @qrr_e
SADDLP_v 0.00 1110 ..1 00000 00101 0 ..... ..... @qrr_e
UADDLP_v 0.10 1110 ..1 00000 00101 0 ..... ..... @qrr_e
SADALP_v 0.00 1110 ..1 00000 01101 0 ..... ..... @qrr_e
UADALP_v 0.10 1110 ..1 00000 01101 0 ..... ..... @qrr_e
XTN 0.00 1110 ..1 00001 00101 0 ..... ..... @qrr_e
SQXTUN_v 0.10 1110 ..1 00001 00101 0 ..... ..... @qrr_e
SQXTN_v 0.00 1110 ..1 00001 01001 0 ..... ..... @qrr_e
UQXTN_v 0.10 1110 ..1 00001 01001 0 ..... ..... @qrr_e
FCVTN_v 0.00 1110 0.1 00001 01101 0 ..... ..... @qrr_hs
FCVTXN_v 0.10 1110 011 00001 01101 0 ..... ..... @qrr_s
BFCVTN_v 0.00 1110 101 00001 01101 0 ..... ..... @qrr_h
SHLL_v 0.10 1110 ..1 00001 00111 0 ..... ..... @qrr_e
FABS_v 0.00 1110 111 11000 11111 0 ..... ..... @qrr_h
FABS_v 0.00 1110 1.1 00000 11111 0 ..... ..... @qrr_sd
FNEG_v 0.10 1110 111 11000 11111 0 ..... ..... @qrr_h
FNEG_v 0.10 1110 1.1 00000 11111 0 ..... ..... @qrr_sd
FSQRT_v 0.10 1110 111 11001 11111 0 ..... ..... @qrr_h
FSQRT_v 0.10 1110 1.1 00001 11111 0 ..... ..... @qrr_sd
FRINTN_v 0.00 1110 011 11001 10001 0 ..... ..... @qrr_h
FRINTN_v 0.00 1110 0.1 00001 10001 0 ..... ..... @qrr_sd
FRINTM_v 0.00 1110 011 11001 10011 0 ..... ..... @qrr_h
FRINTM_v 0.00 1110 0.1 00001 10011 0 ..... ..... @qrr_sd
FRINTP_v 0.00 1110 111 11001 10001 0 ..... ..... @qrr_h
FRINTP_v 0.00 1110 1.1 00001 10001 0 ..... ..... @qrr_sd
FRINTZ_v 0.00 1110 111 11001 10011 0 ..... ..... @qrr_h
FRINTZ_v 0.00 1110 1.1 00001 10011 0 ..... ..... @qrr_sd
FRINTA_v 0.10 1110 011 11001 10001 0 ..... ..... @qrr_h
FRINTA_v 0.10 1110 0.1 00001 10001 0 ..... ..... @qrr_sd
FRINTX_v 0.10 1110 011 11001 10011 0 ..... ..... @qrr_h
FRINTX_v 0.10 1110 0.1 00001 10011 0 ..... ..... @qrr_sd
FRINTI_v 0.10 1110 111 11001 10011 0 ..... ..... @qrr_h
FRINTI_v 0.10 1110 1.1 00001 10011 0 ..... ..... @qrr_sd
FRINT32Z_v 0.00 1110 0.1 00001 11101 0 ..... ..... @qrr_sd
FRINT32X_v 0.10 1110 0.1 00001 11101 0 ..... ..... @qrr_sd
FRINT64Z_v 0.00 1110 0.1 00001 11111 0 ..... ..... @qrr_sd
FRINT64X_v 0.10 1110 0.1 00001 11111 0 ..... ..... @qrr_sd
SCVTF_vi 0.00 1110 011 11001 11011 0 ..... ..... @qrr_h
SCVTF_vi 0.00 1110 0.1 00001 11011 0 ..... ..... @qrr_sd
UCVTF_vi 0.10 1110 011 11001 11011 0 ..... ..... @qrr_h
UCVTF_vi 0.10 1110 0.1 00001 11011 0 ..... ..... @qrr_sd
FCVTNS_vi 0.00 1110 011 11001 10101 0 ..... ..... @qrr_h
FCVTNS_vi 0.00 1110 0.1 00001 10101 0 ..... ..... @qrr_sd
FCVTNU_vi 0.10 1110 011 11001 10101 0 ..... ..... @qrr_h
FCVTNU_vi 0.10 1110 0.1 00001 10101 0 ..... ..... @qrr_sd
FCVTPS_vi 0.00 1110 111 11001 10101 0 ..... ..... @qrr_h
FCVTPS_vi 0.00 1110 1.1 00001 10101 0 ..... ..... @qrr_sd
FCVTPU_vi 0.10 1110 111 11001 10101 0 ..... ..... @qrr_h
FCVTPU_vi 0.10 1110 1.1 00001 10101 0 ..... ..... @qrr_sd
FCVTMS_vi 0.00 1110 011 11001 10111 0 ..... ..... @qrr_h
FCVTMS_vi 0.00 1110 0.1 00001 10111 0 ..... ..... @qrr_sd
FCVTMU_vi 0.10 1110 011 11001 10111 0 ..... ..... @qrr_h
FCVTMU_vi 0.10 1110 0.1 00001 10111 0 ..... ..... @qrr_sd
FCVTZS_vi 0.00 1110 111 11001 10111 0 ..... ..... @qrr_h
FCVTZS_vi 0.00 1110 1.1 00001 10111 0 ..... ..... @qrr_sd
FCVTZU_vi 0.10 1110 111 11001 10111 0 ..... ..... @qrr_h
FCVTZU_vi 0.10 1110 1.1 00001 10111 0 ..... ..... @qrr_sd
FCVTAS_vi 0.00 1110 011 11001 11001 0 ..... ..... @qrr_h
FCVTAS_vi 0.00 1110 0.1 00001 11001 0 ..... ..... @qrr_sd
FCVTAU_vi 0.10 1110 011 11001 11001 0 ..... ..... @qrr_h
FCVTAU_vi 0.10 1110 0.1 00001 11001 0 ..... ..... @qrr_sd
FCMGT0_v 0.00 1110 111 11000 11001 0 ..... ..... @qrr_h
FCMGT0_v 0.00 1110 1.1 00000 11001 0 ..... ..... @qrr_sd
FCMGE0_v 0.10 1110 111 11000 11001 0 ..... ..... @qrr_h
FCMGE0_v 0.10 1110 1.1 00000 11001 0 ..... ..... @qrr_sd
FCMEQ0_v 0.00 1110 111 11000 11011 0 ..... ..... @qrr_h
FCMEQ0_v 0.00 1110 1.1 00000 11011 0 ..... ..... @qrr_sd
FCMLE0_v 0.10 1110 111 11000 11011 0 ..... ..... @qrr_h
FCMLE0_v 0.10 1110 1.1 00000 11011 0 ..... ..... @qrr_sd
FCMLT0_v 0.00 1110 111 11000 11101 0 ..... ..... @qrr_h
FCMLT0_v 0.00 1110 1.1 00000 11101 0 ..... ..... @qrr_sd
FRECPE_v 0.00 1110 111 11001 11011 0 ..... ..... @qrr_h
FRECPE_v 0.00 1110 1.1 00001 11011 0 ..... ..... @qrr_sd
FRSQRTE_v 0.10 1110 111 11001 11011 0 ..... ..... @qrr_h
FRSQRTE_v 0.10 1110 1.1 00001 11011 0 ..... ..... @qrr_sd
URECPE_v 0.00 1110 101 00001 11001 0 ..... ..... @qrr_s
URSQRTE_v 0.10 1110 101 00001 11001 0 ..... ..... @qrr_s
FCVTL_v 0.00 1110 0.1 00001 01111 0 ..... ..... @qrr_sd
&fcvt_q rd rn esz q shift
@fcvtq_h . q:1 . ...... 001 .... ...... rn:5 rd:5 \
&fcvt_q esz=1 shift=%fcvt_f_sh_h
@fcvtq_s . q:1 . ...... 01 ..... ...... rn:5 rd:5 \
&fcvt_q esz=2 shift=%fcvt_f_sh_s
@fcvtq_d . q:1 . ...... 1 ...... ...... rn:5 rd:5 \
&fcvt_q esz=3 shift=%fcvt_f_sh_d
SCVTF_vf 0.00 11110 ....... 111001 ..... ..... @fcvtq_h
SCVTF_vf 0.00 11110 ....... 111001 ..... ..... @fcvtq_s
SCVTF_vf 0.00 11110 ....... 111001 ..... ..... @fcvtq_d
UCVTF_vf 0.10 11110 ....... 111001 ..... ..... @fcvtq_h
UCVTF_vf 0.10 11110 ....... 111001 ..... ..... @fcvtq_s
UCVTF_vf 0.10 11110 ....... 111001 ..... ..... @fcvtq_d
FCVTZS_vf 0.00 11110 ....... 111111 ..... ..... @fcvtq_h
FCVTZS_vf 0.00 11110 ....... 111111 ..... ..... @fcvtq_s
FCVTZS_vf 0.00 11110 ....... 111111 ..... ..... @fcvtq_d
FCVTZU_vf 0.10 11110 ....... 111111 ..... ..... @fcvtq_h
FCVTZU_vf 0.10 11110 ....... 111111 ..... ..... @fcvtq_s
FCVTZU_vf 0.10 11110 ....... 111111 ..... ..... @fcvtq_d

View file

@ -2358,3 +2358,372 @@ void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
assert(vece <= MO_32); assert(vece <= MO_32);
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
} }
void gen_gvec_cls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const GVecGen2 g[] = {
{ .fni4 = gen_helper_neon_cls_s8,
.vece = MO_8 },
{ .fni4 = gen_helper_neon_cls_s16,
.vece = MO_16 },
{ .fni4 = tcg_gen_clrsb_i32,
.vece = MO_32 },
};
assert(vece <= MO_32);
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
}
static void gen_clz32_i32(TCGv_i32 d, TCGv_i32 n)
{
tcg_gen_clzi_i32(d, n, 32);
}
void gen_gvec_clz(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const GVecGen2 g[] = {
{ .fni4 = gen_helper_neon_clz_u8,
.vece = MO_8 },
{ .fni4 = gen_helper_neon_clz_u16,
.vece = MO_16 },
{ .fni4 = gen_clz32_i32,
.vece = MO_32 },
};
assert(vece <= MO_32);
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
}
void gen_gvec_cnt(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
assert(vece == MO_8);
tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
gen_helper_gvec_cnt_b);
}
void gen_gvec_rbit(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
assert(vece == MO_8);
tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
gen_helper_gvec_rbit_b);
}
void gen_gvec_rev16(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
assert(vece == MO_8);
tcg_gen_gvec_rotli(MO_16, rd_ofs, rn_ofs, 8, opr_sz, max_sz);
}
static void gen_bswap32_i64(TCGv_i64 d, TCGv_i64 n)
{
tcg_gen_bswap64_i64(d, n);
tcg_gen_rotli_i64(d, d, 32);
}
void gen_gvec_rev32(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const GVecGen2 g = {
.fni8 = gen_bswap32_i64,
.fni4 = tcg_gen_bswap32_i32,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.vece = MO_32
};
switch (vece) {
case MO_16:
tcg_gen_gvec_rotli(MO_32, rd_ofs, rn_ofs, 16, opr_sz, max_sz);
break;
case MO_8:
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g);
break;
default:
g_assert_not_reached();
}
}
void gen_gvec_rev64(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const GVecGen2 g[] = {
{ .fni8 = tcg_gen_bswap64_i64,
.vece = MO_64 },
{ .fni8 = tcg_gen_hswap_i64,
.vece = MO_64 },
};
switch (vece) {
case MO_32:
tcg_gen_gvec_rotli(MO_64, rd_ofs, rn_ofs, 32, opr_sz, max_sz);
break;
case MO_8:
case MO_16:
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
break;
default:
g_assert_not_reached();
}
}
static void gen_saddlp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
int half = 4 << vece;
TCGv_vec t = tcg_temp_new_vec_matching(d);
tcg_gen_shli_vec(vece, t, n, half);
tcg_gen_sari_vec(vece, d, n, half);
tcg_gen_sari_vec(vece, t, t, half);
tcg_gen_add_vec(vece, d, d, t);
}
static void gen_saddlp_s_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_ext32s_i64(t, n);
tcg_gen_sari_i64(d, n, 32);
tcg_gen_add_i64(d, d, t);
}
void gen_gvec_saddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const TCGOpcode vecop_list[] = {
INDEX_op_sari_vec, INDEX_op_shli_vec, INDEX_op_add_vec, 0
};
static const GVecGen2 g[] = {
{ .fniv = gen_saddlp_vec,
.fni8 = gen_helper_neon_addlp_s8,
.opt_opc = vecop_list,
.vece = MO_16 },
{ .fniv = gen_saddlp_vec,
.fni8 = gen_helper_neon_addlp_s16,
.opt_opc = vecop_list,
.vece = MO_32 },
{ .fniv = gen_saddlp_vec,
.fni8 = gen_saddlp_s_i64,
.opt_opc = vecop_list,
.vece = MO_64 },
};
assert(vece <= MO_32);
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
}
static void gen_sadalp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
gen_saddlp_vec(vece, t, n);
tcg_gen_add_vec(vece, d, d, t);
}
static void gen_sadalp_b_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
gen_helper_neon_addlp_s8(t, n);
tcg_gen_vec_add16_i64(d, d, t);
}
static void gen_sadalp_h_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
gen_helper_neon_addlp_s16(t, n);
tcg_gen_vec_add32_i64(d, d, t);
}
static void gen_sadalp_s_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
gen_saddlp_s_i64(t, n);
tcg_gen_add_i64(d, d, t);
}
void gen_gvec_sadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const TCGOpcode vecop_list[] = {
INDEX_op_sari_vec, INDEX_op_shli_vec, INDEX_op_add_vec, 0
};
static const GVecGen2 g[] = {
{ .fniv = gen_sadalp_vec,
.fni8 = gen_sadalp_b_i64,
.opt_opc = vecop_list,
.load_dest = true,
.vece = MO_16 },
{ .fniv = gen_sadalp_vec,
.fni8 = gen_sadalp_h_i64,
.opt_opc = vecop_list,
.load_dest = true,
.vece = MO_32 },
{ .fniv = gen_sadalp_vec,
.fni8 = gen_sadalp_s_i64,
.opt_opc = vecop_list,
.load_dest = true,
.vece = MO_64 },
};
assert(vece <= MO_32);
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
}
static void gen_uaddlp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
int half = 4 << vece;
TCGv_vec t = tcg_temp_new_vec_matching(d);
TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, half));
tcg_gen_shri_vec(vece, t, n, half);
tcg_gen_and_vec(vece, d, n, m);
tcg_gen_add_vec(vece, d, d, t);
}
static void gen_uaddlp_b_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0xff));
tcg_gen_shri_i64(t, n, 8);
tcg_gen_and_i64(d, n, m);
tcg_gen_and_i64(t, t, m);
/* No carry between widened unsigned elements. */
tcg_gen_add_i64(d, d, t);
}
static void gen_uaddlp_h_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
TCGv_i64 m = tcg_constant_i64(dup_const(MO_32, 0xffff));
tcg_gen_shri_i64(t, n, 16);
tcg_gen_and_i64(d, n, m);
tcg_gen_and_i64(t, t, m);
/* No carry between widened unsigned elements. */
tcg_gen_add_i64(d, d, t);
}
static void gen_uaddlp_s_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_ext32u_i64(t, n);
tcg_gen_shri_i64(d, n, 32);
tcg_gen_add_i64(d, d, t);
}
void gen_gvec_uaddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const TCGOpcode vecop_list[] = {
INDEX_op_shri_vec, INDEX_op_add_vec, 0
};
static const GVecGen2 g[] = {
{ .fniv = gen_uaddlp_vec,
.fni8 = gen_uaddlp_b_i64,
.opt_opc = vecop_list,
.vece = MO_16 },
{ .fniv = gen_uaddlp_vec,
.fni8 = gen_uaddlp_h_i64,
.opt_opc = vecop_list,
.vece = MO_32 },
{ .fniv = gen_uaddlp_vec,
.fni8 = gen_uaddlp_s_i64,
.opt_opc = vecop_list,
.vece = MO_64 },
};
assert(vece <= MO_32);
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
}
static void gen_uadalp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
gen_uaddlp_vec(vece, t, n);
tcg_gen_add_vec(vece, d, d, t);
}
static void gen_uadalp_b_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
gen_uaddlp_b_i64(t, n);
tcg_gen_vec_add16_i64(d, d, t);
}
static void gen_uadalp_h_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
gen_uaddlp_h_i64(t, n);
tcg_gen_vec_add32_i64(d, d, t);
}
static void gen_uadalp_s_i64(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i64 t = tcg_temp_new_i64();
gen_uaddlp_s_i64(t, n);
tcg_gen_add_i64(d, d, t);
}
void gen_gvec_uadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
static const TCGOpcode vecop_list[] = {
INDEX_op_shri_vec, INDEX_op_add_vec, 0
};
static const GVecGen2 g[] = {
{ .fniv = gen_uadalp_vec,
.fni8 = gen_uadalp_b_i64,
.load_dest = true,
.opt_opc = vecop_list,
.vece = MO_16 },
{ .fniv = gen_uadalp_vec,
.fni8 = gen_uadalp_h_i64,
.load_dest = true,
.opt_opc = vecop_list,
.vece = MO_32 },
{ .fniv = gen_uadalp_vec,
.fni8 = gen_uadalp_s_i64,
.load_dest = true,
.opt_opc = vecop_list,
.vece = MO_64 },
};
assert(vece <= MO_32);
tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
}
void gen_gvec_fabs(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz)
{
uint64_t s_bit = 1ull << ((8 << vece) - 1);
tcg_gen_gvec_andi(vece, dofs, aofs, s_bit - 1, oprsz, maxsz);
}
void gen_gvec_fneg(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz)
{
uint64_t s_bit = 1ull << ((8 << vece) - 1);
tcg_gen_gvec_xori(vece, dofs, aofs, s_bit, oprsz, maxsz);
}
void gen_gvec_urecpe(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
assert(vece == MO_32);
tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
gen_helper_gvec_urecpe_s);
}
void gen_gvec_ursqrte(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz)
{
assert(vece == MO_32);
tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
gen_helper_gvec_ursqrte_s);
}

View file

@ -306,67 +306,6 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst); return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
} }
/* Pairwise long add: add pairs of adjacent elements into
* double-width elements in the result (eg _s8 is an 8x8->16 op)
*/
uint64_t HELPER(neon_addlp_s8)(uint64_t a)
{
uint64_t nsignmask = 0x0080008000800080ULL;
uint64_t wsignmask = 0x8000800080008000ULL;
uint64_t elementmask = 0x00ff00ff00ff00ffULL;
uint64_t tmp1, tmp2;
uint64_t res, signres;
/* Extract odd elements, sign extend each to a 16 bit field */
tmp1 = a & elementmask;
tmp1 ^= nsignmask;
tmp1 |= wsignmask;
tmp1 = (tmp1 - nsignmask) ^ wsignmask;
/* Ditto for the even elements */
tmp2 = (a >> 8) & elementmask;
tmp2 ^= nsignmask;
tmp2 |= wsignmask;
tmp2 = (tmp2 - nsignmask) ^ wsignmask;
/* calculate the result by summing bits 0..14, 16..22, etc,
* and then adjusting the sign bits 15, 23, etc manually.
* This ensures the addition can't overflow the 16 bit field.
*/
signres = (tmp1 ^ tmp2) & wsignmask;
res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
res ^= signres;
return res;
}
uint64_t HELPER(neon_addlp_u8)(uint64_t a)
{
uint64_t tmp;
tmp = a & 0x00ff00ff00ff00ffULL;
tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
return tmp;
}
uint64_t HELPER(neon_addlp_s16)(uint64_t a)
{
int32_t reslo, reshi;
reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
return (uint32_t)reslo | (((uint64_t)reshi) << 32);
}
uint64_t HELPER(neon_addlp_u16)(uint64_t a)
{
uint64_t tmp;
tmp = a & 0x0000ffff0000ffffULL;
tmp += (a >> 16) & 0x0000ffff0000ffffULL;
return tmp;
}
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp) uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
{ {
@ -469,23 +408,13 @@ float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env) float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
{ {
/* Von Neumann rounding is implemented by using round-to-zero
* and then setting the LSB of the result if Inexact was raised.
*/
float32 r; float32 r;
float_status *fpst = &env->vfp.fp_status; float_status *fpst = &env->vfp.fp_status;
float_status tstat = *fpst; int old = get_float_rounding_mode(fpst);
int exflags;
set_float_rounding_mode(float_round_to_zero, &tstat); set_float_rounding_mode(float_round_to_odd, fpst);
set_float_exception_flags(0, &tstat); r = float64_to_float32(a, fpst);
r = float64_to_float32(a, &tstat); set_float_rounding_mode(old, fpst);
exflags = get_float_exception_flags(&tstat);
if (exflags & float_flag_inexact) {
r = make_float32(float32_val(r) | 1);
}
exflags |= get_float_exception_flags(fpst);
set_float_exception_flags(exflags, fpst);
return r; return r;
} }
@ -679,38 +608,6 @@ uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status)
return ret; return ret;
} }
/*
* Half-precision floating point conversion functions
*
* There are a multitude of conversion functions with various
* different rounding modes. This is dealt with by the calling code
* setting the mode appropriately before calling the helper.
*/
uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp)
{
float_status *fpst = fpstp;
/* Invalid if we are passed a NaN */
if (float16_is_any_nan(a)) {
float_raise(float_flag_invalid, fpst);
return 0;
}
return float16_to_int16(a, fpst);
}
uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp)
{
float_status *fpst = fpstp;
/* Invalid if we are passed a NaN */
if (float16_is_any_nan(a)) {
float_raise(float_flag_invalid, fpst);
return 0;
}
return float16_to_uint16(a, fpst);
}
static int el_from_spsr(uint32_t spsr) static int el_from_spsr(uint32_t spsr)
{ {
/* Return the exception level that this SPSR is requesting a return to, /* Return the exception level that this SPSR is requesting a return to,
@ -915,17 +812,6 @@ illegal_return:
"resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc);
} }
/*
* Square Root and Reciprocal square root
*/
uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
{
float_status *s = fpstp;
return float16_sqrt(a, s);
}
void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
{ {
uintptr_t ra = GETPC(); uintptr_t ra = GETPC();

View file

@ -41,10 +41,6 @@ DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr) DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr) DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr) DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
@ -78,9 +74,6 @@ DEF_HELPER_3(advsimd_mulx2h, i32, i32, i32, ptr)
DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, ptr) DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, ptr)
DEF_HELPER_2(advsimd_rinth_exact, f16, f16, ptr) DEF_HELPER_2(advsimd_rinth_exact, f16, f16, ptr)
DEF_HELPER_2(advsimd_rinth, f16, f16, ptr) DEF_HELPER_2(advsimd_rinth, f16, f16, ptr)
DEF_HELPER_2(advsimd_f16tosinth, i32, f16, ptr)
DEF_HELPER_2(advsimd_f16touinth, i32, f16, ptr)
DEF_HELPER_2(sqrt_f16, f16, f16, ptr)
DEF_HELPER_2(exception_return, void, env, i64) DEF_HELPER_2(exception_return, void, env, i64)
DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64)

View file

@ -39,6 +39,7 @@ arm_ss.add(files(
'op_helper.c', 'op_helper.c',
'tlb_helper.c', 'tlb_helper.c',
'vec_helper.c', 'vec_helper.c',
'tlb-insns.c',
)) ))
arm_ss.add(when: 'TARGET_AARCH64', if_true: files( arm_ss.add(when: 'TARGET_AARCH64', if_true: files(

View file

@ -525,27 +525,6 @@ uint32_t HELPER(neon_cls_s32)(uint32_t x)
return count - 1; return count - 1;
} }
/* Bit count. */
uint32_t HELPER(neon_cnt_u8)(uint32_t x)
{
x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f);
return x;
}
/* Reverse bits in each 8 bit word */
uint32_t HELPER(neon_rbit_u8)(uint32_t x)
{
x = ((x & 0xf0f0f0f0) >> 4)
| ((x & 0x0f0f0f0f) << 4);
x = ((x & 0x88888888) >> 3)
| ((x & 0x44444444) >> 1)
| ((x & 0x22222222) << 1)
| ((x & 0x11111111) << 3);
return x;
}
#define NEON_QDMULH16(dest, src1, src2, round) do { \ #define NEON_QDMULH16(dest, src1, src2, round) do { \
uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \
if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ if ((tmp ^ (tmp << 1)) & SIGNBIT) { \
@ -847,62 +826,47 @@ uint64_t HELPER(neon_widen_s16)(uint32_t x)
return ((uint32_t)(int16_t)x) | (high << 32); return ((uint32_t)(int16_t)x) | (high << 32);
} }
uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b) /* Pairwise long add: add pairs of adjacent elements into
* double-width elements in the result (eg _s8 is an 8x8->16 op)
*/
uint64_t HELPER(neon_addlp_s8)(uint64_t a)
{ {
uint64_t mask; uint64_t nsignmask = 0x0080008000800080ULL;
mask = (a ^ b) & 0x8000800080008000ull; uint64_t wsignmask = 0x8000800080008000ULL;
a &= ~0x8000800080008000ull; uint64_t elementmask = 0x00ff00ff00ff00ffULL;
b &= ~0x8000800080008000ull; uint64_t tmp1, tmp2;
return (a + b) ^ mask; uint64_t res, signres;
/* Extract odd elements, sign extend each to a 16 bit field */
tmp1 = a & elementmask;
tmp1 ^= nsignmask;
tmp1 |= wsignmask;
tmp1 = (tmp1 - nsignmask) ^ wsignmask;
/* Ditto for the even elements */
tmp2 = (a >> 8) & elementmask;
tmp2 ^= nsignmask;
tmp2 |= wsignmask;
tmp2 = (tmp2 - nsignmask) ^ wsignmask;
/* calculate the result by summing bits 0..14, 16..22, etc,
* and then adjusting the sign bits 15, 23, etc manually.
* This ensures the addition can't overflow the 16 bit field.
*/
signres = (tmp1 ^ tmp2) & wsignmask;
res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
res ^= signres;
return res;
} }
uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b) uint64_t HELPER(neon_addlp_s16)(uint64_t a)
{ {
uint64_t mask; int32_t reslo, reshi;
mask = (a ^ b) & 0x8000000080000000ull;
a &= ~0x8000000080000000ull;
b &= ~0x8000000080000000ull;
return (a + b) ^ mask;
}
uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b) reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
{ reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
uint64_t tmp;
uint64_t tmp2;
tmp = a & 0x0000ffff0000ffffull; return (uint32_t)reslo | (((uint64_t)reshi) << 32);
tmp += (a >> 16) & 0x0000ffff0000ffffull;
tmp2 = b & 0xffff0000ffff0000ull;
tmp2 += (b << 16) & 0xffff0000ffff0000ull;
return ( tmp & 0xffff)
| ((tmp >> 16) & 0xffff0000ull)
| ((tmp2 << 16) & 0xffff00000000ull)
| ( tmp2 & 0xffff000000000000ull);
}
uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b)
{
uint32_t low = a + (a >> 32);
uint32_t high = b + (b >> 32);
return low + ((uint64_t)high << 32);
}
uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b)
{
uint64_t mask;
mask = (a ^ ~b) & 0x8000800080008000ull;
a |= 0x8000800080008000ull;
b &= ~0x8000800080008000ull;
return (a - b) ^ mask;
}
uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b)
{
uint64_t mask;
mask = (a ^ ~b) & 0x8000000080000000ull;
a |= 0x8000000080000000ull;
b &= ~0x8000000080000000ull;
return (a - b) ^ mask;
} }
uint64_t HELPER(neon_addl_saturate_s32)(CPUARMState *env, uint64_t a, uint64_t b) uint64_t HELPER(neon_addl_saturate_s32)(CPUARMState *env, uint64_t a, uint64_t b)

1266
target/arm/tcg/tlb-insns.c Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1409,13 +1409,13 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf) DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf) DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs) DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_rz_fs)
DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu) DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_rz_fu)
DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh) DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh) DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs) DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_rz_hs)
DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu) DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_rz_hu)
static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a, static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
GVecGen2iFn *fn) GVecGen2iFn *fn)
@ -1560,8 +1560,8 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
NULL, NULL, \ NULL, NULL, \
}; \ }; \
static NeonGenTwo64OpFn * const addfn[] = { \ static NeonGenTwo64OpFn * const addfn[] = { \
gen_helper_neon_##OP##l_u16, \ tcg_gen_vec_##OP##16_i64, \
gen_helper_neon_##OP##l_u32, \ tcg_gen_vec_##OP##32_i64, \
tcg_gen_##OP##_i64, \ tcg_gen_##OP##_i64, \
NULL, \ NULL, \
}; \ }; \
@ -1639,8 +1639,8 @@ static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
{ \ { \
static NeonGenTwo64OpFn * const addfn[] = { \ static NeonGenTwo64OpFn * const addfn[] = { \
gen_helper_neon_##OP##l_u16, \ tcg_gen_vec_##OP##16_i64, \
gen_helper_neon_##OP##l_u32, \ tcg_gen_vec_##OP##32_i64, \
tcg_gen_##OP##_i64, \ tcg_gen_##OP##_i64, \
NULL, \ NULL, \
}; \ }; \
@ -1761,8 +1761,8 @@ static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
NULL, NULL,
}; };
static NeonGenTwo64OpFn * const addfn[] = { static NeonGenTwo64OpFn * const addfn[] = {
gen_helper_neon_addl_u16, tcg_gen_vec_add16_i64,
gen_helper_neon_addl_u32, tcg_gen_vec_add32_i64,
tcg_gen_add_i64, tcg_gen_add_i64,
NULL, NULL,
}; };
@ -1779,8 +1779,8 @@ static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
NULL, NULL,
}; };
static NeonGenTwo64OpFn * const addfn[] = { static NeonGenTwo64OpFn * const addfn[] = {
gen_helper_neon_addl_u16, tcg_gen_vec_add16_i64,
gen_helper_neon_addl_u32, tcg_gen_vec_add32_i64,
tcg_gen_add_i64, tcg_gen_add_i64,
NULL, NULL,
}; };
@ -1840,8 +1840,8 @@ static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
NULL, \ NULL, \
}; \ }; \
static NeonGenTwo64OpFn * const accfn[] = { \ static NeonGenTwo64OpFn * const accfn[] = { \
gen_helper_neon_##ACC##l_u16, \ tcg_gen_vec_##ACC##16_i64, \
gen_helper_neon_##ACC##l_u32, \ tcg_gen_vec_##ACC##32_i64, \
tcg_gen_##ACC##_i64, \ tcg_gen_##ACC##_i64, \
NULL, \ NULL, \
}; \ }; \
@ -2371,7 +2371,7 @@ static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
}; \ }; \
static NeonGenTwo64OpFn * const accfn[] = { \ static NeonGenTwo64OpFn * const accfn[] = { \
NULL, \ NULL, \
gen_helper_neon_##ACC##l_u32, \ tcg_gen_vec_##ACC##32_i64, \
tcg_gen_##ACC##_i64, \ tcg_gen_##ACC##_i64, \
NULL, \ NULL, \
}; \ }; \
@ -2565,204 +2565,6 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
return true; return true;
} }
static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
{
int pass, half;
TCGv_i32 tmp[2];
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
return false;
}
if ((a->vd | a->vm) & a->q) {
return false;
}
if (a->size == 3) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
tmp[0] = tcg_temp_new_i32();
tmp[1] = tcg_temp_new_i32();
for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
for (half = 0; half < 2; half++) {
read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
switch (a->size) {
case 0:
tcg_gen_bswap32_i32(tmp[half], tmp[half]);
break;
case 1:
gen_swap_half(tmp[half], tmp[half]);
break;
case 2:
break;
default:
g_assert_not_reached();
}
}
write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
}
return true;
}
static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
NeonGenWidenFn *widenfn,
NeonGenTwo64OpFn *opfn,
NeonGenTwo64OpFn *accfn)
{
/*
* Pairwise long operations: widen both halves of the pair,
* combine the pairs with the opfn, and then possibly accumulate
* into the destination with the accfn.
*/
int pass;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
return false;
}
if ((a->vd | a->vm) & a->q) {
return false;
}
if (!widenfn) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
for (pass = 0; pass < a->q + 1; pass++) {
TCGv_i32 tmp;
TCGv_i64 rm0_64, rm1_64, rd_64;
rm0_64 = tcg_temp_new_i64();
rm1_64 = tcg_temp_new_i64();
rd_64 = tcg_temp_new_i64();
tmp = tcg_temp_new_i32();
read_neon_element32(tmp, a->vm, pass * 2, MO_32);
widenfn(rm0_64, tmp);
read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
widenfn(rm1_64, tmp);
opfn(rd_64, rm0_64, rm1_64);
if (accfn) {
TCGv_i64 tmp64 = tcg_temp_new_i64();
read_neon_element64(tmp64, a->vd, pass, MO_64);
accfn(rd_64, tmp64, rd_64);
}
write_neon_element64(rd_64, a->vd, pass, MO_64);
}
return true;
}
static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
{
static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_s8,
gen_helper_neon_widen_s16,
tcg_gen_ext_i32_i64,
NULL,
};
static NeonGenTwo64OpFn * const opfn[] = {
gen_helper_neon_paddl_u16,
gen_helper_neon_paddl_u32,
tcg_gen_add_i64,
NULL,
};
return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
}
static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
{
static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_u8,
gen_helper_neon_widen_u16,
tcg_gen_extu_i32_i64,
NULL,
};
static NeonGenTwo64OpFn * const opfn[] = {
gen_helper_neon_paddl_u16,
gen_helper_neon_paddl_u32,
tcg_gen_add_i64,
NULL,
};
return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
}
static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
{
static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_s8,
gen_helper_neon_widen_s16,
tcg_gen_ext_i32_i64,
NULL,
};
static NeonGenTwo64OpFn * const opfn[] = {
gen_helper_neon_paddl_u16,
gen_helper_neon_paddl_u32,
tcg_gen_add_i64,
NULL,
};
static NeonGenTwo64OpFn * const accfn[] = {
gen_helper_neon_addl_u16,
gen_helper_neon_addl_u32,
tcg_gen_add_i64,
NULL,
};
return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
accfn[a->size]);
}
static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
{
static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_u8,
gen_helper_neon_widen_u16,
tcg_gen_extu_i32_i64,
NULL,
};
static NeonGenTwo64OpFn * const opfn[] = {
gen_helper_neon_paddl_u16,
gen_helper_neon_paddl_u32,
tcg_gen_add_i64,
NULL,
};
static NeonGenTwo64OpFn * const accfn[] = {
gen_helper_neon_addl_u16,
gen_helper_neon_addl_u32,
tcg_gen_add_i64,
NULL,
};
return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
accfn[a->size]);
}
typedef void ZipFn(TCGv_ptr, TCGv_ptr); typedef void ZipFn(TCGv_ptr, TCGv_ptr);
static bool do_zip_uzp(DisasContext *s, arg_2misc *a, static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
@ -3120,6 +2922,13 @@ DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
DO_2MISC_VEC(VCLE0, gen_gvec_cle0) DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
DO_2MISC_VEC(VCGE0, gen_gvec_cge0) DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
DO_2MISC_VEC(VCLT0, gen_gvec_clt0) DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
DO_2MISC_VEC(VCLS, gen_gvec_cls)
DO_2MISC_VEC(VCLZ, gen_gvec_clz)
DO_2MISC_VEC(VREV64, gen_gvec_rev64)
DO_2MISC_VEC(VPADDL_S, gen_gvec_saddlp)
DO_2MISC_VEC(VPADDL_U, gen_gvec_uaddlp)
DO_2MISC_VEC(VPADAL_S, gen_gvec_sadalp)
DO_2MISC_VEC(VPADAL_U, gen_gvec_uadalp)
static bool trans_VMVN(DisasContext *s, arg_2misc *a) static bool trans_VMVN(DisasContext *s, arg_2misc *a)
{ {
@ -3129,6 +2938,30 @@ static bool trans_VMVN(DisasContext *s, arg_2misc *a)
return do_2misc_vec(s, a, tcg_gen_gvec_not); return do_2misc_vec(s, a, tcg_gen_gvec_not);
} }
static bool trans_VCNT(DisasContext *s, arg_2misc *a)
{
if (a->size != 0) {
return false;
}
return do_2misc_vec(s, a, gen_gvec_cnt);
}
static bool trans_VREV16(DisasContext *s, arg_2misc *a)
{
if (a->size != 0) {
return false;
}
return do_2misc_vec(s, a, gen_gvec_rev16);
}
static bool trans_VREV32(DisasContext *s, arg_2misc *a)
{
if (a->size != 0 && a->size != 1) {
return false;
}
return do_2misc_vec(s, a, gen_gvec_rev32);
}
#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \ #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \
static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
uint32_t rm_ofs, uint32_t oprsz, \ uint32_t rm_ofs, uint32_t oprsz, \
@ -3208,68 +3041,6 @@ static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
return true; return true;
} }
static bool trans_VREV32(DisasContext *s, arg_2misc *a)
{
static NeonGenOneOpFn * const fn[] = {
tcg_gen_bswap32_i32,
gen_swap_half,
NULL,
NULL,
};
return do_2misc(s, a, fn[a->size]);
}
static bool trans_VREV16(DisasContext *s, arg_2misc *a)
{
if (a->size != 0) {
return false;
}
return do_2misc(s, a, gen_rev16);
}
static bool trans_VCLS(DisasContext *s, arg_2misc *a)
{
static NeonGenOneOpFn * const fn[] = {
gen_helper_neon_cls_s8,
gen_helper_neon_cls_s16,
gen_helper_neon_cls_s32,
NULL,
};
return do_2misc(s, a, fn[a->size]);
}
static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
{
tcg_gen_clzi_i32(rd, rm, 32);
}
static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
{
static NeonGenOneOpFn * const fn[] = {
gen_helper_neon_clz_u8,
gen_helper_neon_clz_u16,
do_VCLZ_32,
NULL,
};
return do_2misc(s, a, fn[a->size]);
}
static bool trans_VCNT(DisasContext *s, arg_2misc *a)
{
if (a->size != 0) {
return false;
}
return do_2misc(s, a, gen_helper_neon_cnt_u8);
}
static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
uint32_t oprsz, uint32_t maxsz)
{
tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
vece == MO_16 ? 0x7fff : 0x7fffffff,
oprsz, maxsz);
}
static bool trans_VABS_F(DisasContext *s, arg_2misc *a) static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
{ {
if (a->size == MO_16) { if (a->size == MO_16) {
@ -3279,15 +3050,7 @@ static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
} else if (a->size != MO_32) { } else if (a->size != MO_32) {
return false; return false;
} }
return do_2misc_vec(s, a, gen_VABS_F); return do_2misc_vec(s, a, gen_gvec_fabs);
}
static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
uint32_t oprsz, uint32_t maxsz)
{
tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
vece == MO_16 ? 0x8000 : 0x80000000,
oprsz, maxsz);
} }
static bool trans_VNEG_F(DisasContext *s, arg_2misc *a) static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
@ -3299,7 +3062,7 @@ static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
} else if (a->size != MO_32) { } else if (a->size != MO_32) {
return false; return false;
} }
return do_2misc_vec(s, a, gen_VNEG_F); return do_2misc_vec(s, a, gen_gvec_fneg);
} }
static bool trans_VRECPE(DisasContext *s, arg_2misc *a) static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
@ -3307,7 +3070,7 @@ static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
if (a->size != 2) { if (a->size != 2) {
return false; return false;
} }
return do_2misc(s, a, gen_helper_recpe_u32); return do_2misc_vec(s, a, gen_gvec_urecpe);
} }
static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a) static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
@ -3315,7 +3078,7 @@ static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
if (a->size != 2) { if (a->size != 2) {
return false; return false;
} }
return do_2misc(s, a, gen_helper_rsqrte_u32); return do_2misc_vec(s, a, gen_gvec_ursqrte);
} }
#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \ #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \

View file

@ -2424,17 +2424,17 @@ DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2)
static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
{ {
gen_helper_vfp_sqrth(vd, vm, tcg_env); gen_helper_vfp_sqrth(vd, vm, fpstatus_ptr(FPST_FPCR_F16));
} }
static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm) static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
{ {
gen_helper_vfp_sqrts(vd, vm, tcg_env); gen_helper_vfp_sqrts(vd, vm, fpstatus_ptr(FPST_FPCR));
} }
static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm) static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
{ {
gen_helper_vfp_sqrtd(vd, vm, tcg_env); gen_helper_vfp_sqrtd(vd, vm, fpstatus_ptr(FPST_FPCR));
} }
DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith) DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)

View file

@ -578,6 +578,41 @@ void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_cls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_clz(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_cnt(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_rbit(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_rev16(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_rev32(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_rev64(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_saddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_sadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_uaddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_uadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
/* These exclusively manipulate the sign bit. */
void gen_gvec_fabs(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz);
void gen_gvec_fneg(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz);
void gen_gvec_urecpe(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_ursqrte(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t opr_sz, uint32_t max_sz);
/* /*
* Forward to the isar_feature_* tests given a DisasContext pointer. * Forward to the isar_feature_* tests given a DisasContext pointer.
*/ */

View file

@ -1253,8 +1253,10 @@ DO_2OP(gvec_touszh, vfp_touszh, float16)
#define DO_2OP_CMP0(FN, CMPOP, DIRN) \ #define DO_2OP_CMP0(FN, CMPOP, DIRN) \
WRAP_CMP0_##DIRN(FN, CMPOP, float16) \ WRAP_CMP0_##DIRN(FN, CMPOP, float16) \
WRAP_CMP0_##DIRN(FN, CMPOP, float32) \ WRAP_CMP0_##DIRN(FN, CMPOP, float32) \
WRAP_CMP0_##DIRN(FN, CMPOP, float64) \
DO_2OP(gvec_f##FN##0_h, float16_##FN##0, float16) \ DO_2OP(gvec_f##FN##0_h, float16_##FN##0, float16) \
DO_2OP(gvec_f##FN##0_s, float32_##FN##0, float32) DO_2OP(gvec_f##FN##0_s, float32_##FN##0, float32) \
DO_2OP(gvec_f##FN##0_d, float64_##FN##0, float64)
DO_2OP_CMP0(cgt, cgt, FWD) DO_2OP_CMP0(cgt, cgt, FWD)
DO_2OP_CMP0(cge, cge, FWD) DO_2OP_CMP0(cge, cge, FWD)
@ -2505,14 +2507,19 @@ DO_3OP_PAIR(gvec_uminp_s, MIN, uint32_t, H4)
clear_tail(d, oprsz, simd_maxsz(desc)); \ clear_tail(d, oprsz, simd_maxsz(desc)); \
} }
DO_VCVT_FIXED(gvec_vcvt_sd, helper_vfp_sqtod, uint64_t)
DO_VCVT_FIXED(gvec_vcvt_ud, helper_vfp_uqtod, uint64_t)
DO_VCVT_FIXED(gvec_vcvt_sf, helper_vfp_sltos, uint32_t) DO_VCVT_FIXED(gvec_vcvt_sf, helper_vfp_sltos, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_uf, helper_vfp_ultos, uint32_t) DO_VCVT_FIXED(gvec_vcvt_uf, helper_vfp_ultos, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_fs, helper_vfp_tosls_round_to_zero, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_fu, helper_vfp_touls_round_to_zero, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_sh, helper_vfp_shtoh, uint16_t) DO_VCVT_FIXED(gvec_vcvt_sh, helper_vfp_shtoh, uint16_t)
DO_VCVT_FIXED(gvec_vcvt_uh, helper_vfp_uhtoh, uint16_t) DO_VCVT_FIXED(gvec_vcvt_uh, helper_vfp_uhtoh, uint16_t)
DO_VCVT_FIXED(gvec_vcvt_hs, helper_vfp_toshh_round_to_zero, uint16_t)
DO_VCVT_FIXED(gvec_vcvt_hu, helper_vfp_touhh_round_to_zero, uint16_t) DO_VCVT_FIXED(gvec_vcvt_rz_ds, helper_vfp_tosqd_round_to_zero, uint64_t)
DO_VCVT_FIXED(gvec_vcvt_rz_du, helper_vfp_touqd_round_to_zero, uint64_t)
DO_VCVT_FIXED(gvec_vcvt_rz_fs, helper_vfp_tosls_round_to_zero, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_rz_fu, helper_vfp_touls_round_to_zero, uint32_t)
DO_VCVT_FIXED(gvec_vcvt_rz_hs, helper_vfp_toshh_round_to_zero, uint16_t)
DO_VCVT_FIXED(gvec_vcvt_rz_hu, helper_vfp_touhh_round_to_zero, uint16_t)
#undef DO_VCVT_FIXED #undef DO_VCVT_FIXED
@ -2532,6 +2539,8 @@ DO_VCVT_FIXED(gvec_vcvt_hu, helper_vfp_touhh_round_to_zero, uint16_t)
clear_tail(d, oprsz, simd_maxsz(desc)); \ clear_tail(d, oprsz, simd_maxsz(desc)); \
} }
DO_VCVT_RMODE(gvec_vcvt_rm_sd, helper_vfp_tosqd, uint64_t)
DO_VCVT_RMODE(gvec_vcvt_rm_ud, helper_vfp_touqd, uint64_t)
DO_VCVT_RMODE(gvec_vcvt_rm_ss, helper_vfp_tosls, uint32_t) DO_VCVT_RMODE(gvec_vcvt_rm_ss, helper_vfp_tosls, uint32_t)
DO_VCVT_RMODE(gvec_vcvt_rm_us, helper_vfp_touls, uint32_t) DO_VCVT_RMODE(gvec_vcvt_rm_us, helper_vfp_touls, uint32_t)
DO_VCVT_RMODE(gvec_vcvt_rm_sh, helper_vfp_toshh, uint16_t) DO_VCVT_RMODE(gvec_vcvt_rm_sh, helper_vfp_toshh, uint16_t)
@ -3066,3 +3075,49 @@ DO_CLAMP(gvec_uclamp_b, uint8_t)
DO_CLAMP(gvec_uclamp_h, uint16_t) DO_CLAMP(gvec_uclamp_h, uint16_t)
DO_CLAMP(gvec_uclamp_s, uint32_t) DO_CLAMP(gvec_uclamp_s, uint32_t)
DO_CLAMP(gvec_uclamp_d, uint64_t) DO_CLAMP(gvec_uclamp_d, uint64_t)
/* Bit count in each 8-bit word. */
void HELPER(gvec_cnt_b)(void *vd, void *vn, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
uint8_t *d = vd, *n = vn;
for (i = 0; i < opr_sz; ++i) {
d[i] = ctpop8(n[i]);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
/* Reverse bits in each 8 bit word */
void HELPER(gvec_rbit_b)(void *vd, void *vn, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
uint64_t *d = vd, *n = vn;
for (i = 0; i < opr_sz / 8; ++i) {
d[i] = revbit64(bswap64(n[i]));
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(gvec_urecpe_s)(void *vd, void *vn, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
uint32_t *d = vd, *n = vn;
for (i = 0; i < opr_sz / 4; ++i) {
d[i] = helper_recpe_u32(n[i]);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(gvec_ursqrte_s)(void *vd, void *vn, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
uint32_t *d = vd, *n = vn;
for (i = 0; i < opr_sz / 4; ++i) {
d[i] = helper_rsqrte_u32(n[i]);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}

View file

@ -314,19 +314,19 @@ VFP_BINOP(minnum)
VFP_BINOP(maxnum) VFP_BINOP(maxnum)
#undef VFP_BINOP #undef VFP_BINOP
dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env) dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, void *fpstp)
{ {
return float16_sqrt(a, &env->vfp.fp_status_f16); return float16_sqrt(a, fpstp);
} }
float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env) float32 VFP_HELPER(sqrt, s)(float32 a, void *fpstp)
{ {
return float32_sqrt(a, &env->vfp.fp_status); return float32_sqrt(a, fpstp);
} }
float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env) float64 VFP_HELPER(sqrt, d)(float64 a, void *fpstp)
{ {
return float64_sqrt(a, &env->vfp.fp_status); return float64_sqrt(a, fpstp);
} }
static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
@ -495,6 +495,10 @@ VFP_CONV_FIX_A64(sq, h, 16, dh_ctype_f16, 64, int64)
VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16) VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16)
VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32) VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32)
VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64) VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64)
VFP_CONV_FLOAT_FIX_ROUND(sq, d, 64, float64, 64, int64,
float_round_to_zero, _round_to_zero)
VFP_CONV_FLOAT_FIX_ROUND(uq, d, 64, float64, 64, uint64,
float_round_to_zero, _round_to_zero)
#undef VFP_CONV_FIX #undef VFP_CONV_FIX
#undef VFP_CONV_FIX_FLOAT #undef VFP_CONV_FIX_FLOAT