target/arm: move translate modules to tcg/

Introduce the target/arm/tcg directory. Its purpose is to hold the TCG
code that is selected by CONFIG_TCG.

Signed-off-by: Claudio Fontana <cfontana@suse.de>
Signed-off-by: Fabiano Rosas <farosas@suse.de>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Fabiano Rosas 2023-02-17 17:11:29 -03:00 committed by Peter Maydell
parent 2059ec754f
commit f0984d4040
27 changed files with 37 additions and 26 deletions

View file

@ -0,0 +1,74 @@
# A32 unconditional instructions
#
# Copyright (c) 2019 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# All insns that have 0xf in insn[31:28] are decoded here.
# All of those that have a COND field in insn[31:28] are in a32.decode
#
&empty !extern
&i !extern imm
&setend E
# Branch with Link and Exchange
%imm24h 0:s24 24:1 !function=times_2
BLX_i 1111 101 . ........................ &i imm=%imm24h
# System Instructions
&rfe rn w pu
&srs mode w pu
&cps mode imod M A I F
RFE 1111 100 pu:2 0 w:1 1 rn:4 0000 1010 0000 0000 &rfe
SRS 1111 100 pu:2 1 w:1 0 1101 0000 0101 000 mode:5 &srs
CPS 1111 0001 0000 imod:2 M:1 0 0000 000 A:1 I:1 F:1 0 mode:5 \
&cps
# Clear-Exclusive, Barriers
# QEMU does not require the option field for the barriers.
CLREX 1111 0101 0111 1111 1111 0000 0001 1111
DSB 1111 0101 0111 1111 1111 0000 0100 ----
DMB 1111 0101 0111 1111 1111 0000 0101 ----
ISB 1111 0101 0111 1111 1111 0000 0110 ----
SB 1111 0101 0111 1111 1111 0000 0111 0000
# Set Endianness
SETEND 1111 0001 0000 0001 0000 00 E:1 0 0000 0000 &setend
# Preload instructions
PLD 1111 0101 -101 ---- 1111 ---- ---- ---- # (imm, lit) 5te
PLDW 1111 0101 -001 ---- 1111 ---- ---- ---- # (imm, lit) 7mp
PLI 1111 0100 -101 ---- 1111 ---- ---- ---- # (imm, lit) 7
PLD 1111 0111 -101 ---- 1111 ----- -- 0 ---- # (register) 5te
PLDW 1111 0111 -001 ---- 1111 ----- -- 0 ---- # (register) 7mp
PLI 1111 0110 -101 ---- 1111 ----- -- 0 ---- # (register) 7
# Unallocated memory hints
#
# Since these are v7MP nops, and PLDW is v7MP and implemented as nop,
# (ab)use the PLDW helper.
PLDW 1111 0100 -001 ---- ---- ---- ---- ----
PLDW 1111 0110 -001 ---- ---- ---- ---0 ----

557
target/arm/tcg/a32.decode Normal file
View file

@ -0,0 +1,557 @@
# A32 conditional instructions
#
# Copyright (c) 2019 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# All of the insn that have a COND field in insn[31:28] are here.
# All insns that have 0xf in insn[31:28] are in a32-uncond.decode.
#
&empty
&s_rrr_shi s rd rn rm shim shty
&s_rrr_shr s rn rd rm rs shty
&s_rri_rot s rn rd imm rot
&s_rrrr s rd rn rm ra
&rrrr rd rn rm ra
&rrr_rot rd rn rm rot
&rrr rd rn rm
&rr rd rm
&ri rd imm
&r rm
&i imm
&msr_reg rn r mask
&mrs_reg rd r
&msr_bank rn r sysm
&mrs_bank rd r sysm
&ldst_rr p w u rn rt rm shimm shtype
&ldst_ri p w u rn rt imm
&ldst_block rn i b u w list
&strex rn rd rt rt2 imm
&ldrex rn rt rt2 imm
&bfx rd rn lsb widthm1
&bfi rd rn lsb msb
&sat rd rn satimm imm sh
&pkh rd rn rm imm tb
&mcr cp opc1 crn crm opc2 rt
&mcrr cp opc1 crm rt rt2
# Data-processing (register)
@s_rrr_shi ---- ... .... s:1 rn:4 rd:4 shim:5 shty:2 . rm:4 \
&s_rrr_shi
@s_rxr_shi ---- ... .... s:1 .... rd:4 shim:5 shty:2 . rm:4 \
&s_rrr_shi rn=0
@S_xrr_shi ---- ... .... . rn:4 .... shim:5 shty:2 . rm:4 \
&s_rrr_shi s=1 rd=0
AND_rrri .... 000 0000 . .... .... ..... .. 0 .... @s_rrr_shi
EOR_rrri .... 000 0001 . .... .... ..... .. 0 .... @s_rrr_shi
SUB_rrri .... 000 0010 . .... .... ..... .. 0 .... @s_rrr_shi
RSB_rrri .... 000 0011 . .... .... ..... .. 0 .... @s_rrr_shi
ADD_rrri .... 000 0100 . .... .... ..... .. 0 .... @s_rrr_shi
ADC_rrri .... 000 0101 . .... .... ..... .. 0 .... @s_rrr_shi
SBC_rrri .... 000 0110 . .... .... ..... .. 0 .... @s_rrr_shi
RSC_rrri .... 000 0111 . .... .... ..... .. 0 .... @s_rrr_shi
TST_xrri .... 000 1000 1 .... 0000 ..... .. 0 .... @S_xrr_shi
TEQ_xrri .... 000 1001 1 .... 0000 ..... .. 0 .... @S_xrr_shi
CMP_xrri .... 000 1010 1 .... 0000 ..... .. 0 .... @S_xrr_shi
CMN_xrri .... 000 1011 1 .... 0000 ..... .. 0 .... @S_xrr_shi
ORR_rrri .... 000 1100 . .... .... ..... .. 0 .... @s_rrr_shi
MOV_rxri .... 000 1101 . 0000 .... ..... .. 0 .... @s_rxr_shi
BIC_rrri .... 000 1110 . .... .... ..... .. 0 .... @s_rrr_shi
MVN_rxri .... 000 1111 . 0000 .... ..... .. 0 .... @s_rxr_shi
%imm16 16:4 0:12
@mov16 ---- .... .... .... rd:4 ............ &ri imm=%imm16
MOVW .... 0011 0000 .... .... ............ @mov16
MOVT .... 0011 0100 .... .... ............ @mov16
# Data-processing (register-shifted register)
@s_rrr_shr ---- ... .... s:1 rn:4 rd:4 rs:4 . shty:2 . rm:4 \
&s_rrr_shr
@s_rxr_shr ---- ... .... s:1 .... rd:4 rs:4 . shty:2 . rm:4 \
&s_rrr_shr rn=0
@S_xrr_shr ---- ... .... . rn:4 .... rs:4 . shty:2 . rm:4 \
&s_rrr_shr rd=0 s=1
AND_rrrr .... 000 0000 . .... .... .... 0 .. 1 .... @s_rrr_shr
EOR_rrrr .... 000 0001 . .... .... .... 0 .. 1 .... @s_rrr_shr
SUB_rrrr .... 000 0010 . .... .... .... 0 .. 1 .... @s_rrr_shr
RSB_rrrr .... 000 0011 . .... .... .... 0 .. 1 .... @s_rrr_shr
ADD_rrrr .... 000 0100 . .... .... .... 0 .. 1 .... @s_rrr_shr
ADC_rrrr .... 000 0101 . .... .... .... 0 .. 1 .... @s_rrr_shr
SBC_rrrr .... 000 0110 . .... .... .... 0 .. 1 .... @s_rrr_shr
RSC_rrrr .... 000 0111 . .... .... .... 0 .. 1 .... @s_rrr_shr
TST_xrrr .... 000 1000 1 .... 0000 .... 0 .. 1 .... @S_xrr_shr
TEQ_xrrr .... 000 1001 1 .... 0000 .... 0 .. 1 .... @S_xrr_shr
CMP_xrrr .... 000 1010 1 .... 0000 .... 0 .. 1 .... @S_xrr_shr
CMN_xrrr .... 000 1011 1 .... 0000 .... 0 .. 1 .... @S_xrr_shr
ORR_rrrr .... 000 1100 . .... .... .... 0 .. 1 .... @s_rrr_shr
MOV_rxrr .... 000 1101 . 0000 .... .... 0 .. 1 .... @s_rxr_shr
BIC_rrrr .... 000 1110 . .... .... .... 0 .. 1 .... @s_rrr_shr
MVN_rxrr .... 000 1111 . 0000 .... .... 0 .. 1 .... @s_rxr_shr
# Data-processing (immediate)
%a32extrot 8:4 !function=times_2
@s_rri_rot ---- ... .... s:1 rn:4 rd:4 .... imm:8 \
&s_rri_rot rot=%a32extrot
@s_rxi_rot ---- ... .... s:1 .... rd:4 .... imm:8 \
&s_rri_rot rot=%a32extrot rn=0
@S_xri_rot ---- ... .... . rn:4 .... .... imm:8 \
&s_rri_rot rot=%a32extrot rd=0 s=1
AND_rri .... 001 0000 . .... .... ............ @s_rri_rot
EOR_rri .... 001 0001 . .... .... ............ @s_rri_rot
SUB_rri .... 001 0010 . .... .... ............ @s_rri_rot
RSB_rri .... 001 0011 . .... .... ............ @s_rri_rot
ADD_rri .... 001 0100 . .... .... ............ @s_rri_rot
ADC_rri .... 001 0101 . .... .... ............ @s_rri_rot
SBC_rri .... 001 0110 . .... .... ............ @s_rri_rot
RSC_rri .... 001 0111 . .... .... ............ @s_rri_rot
TST_xri .... 001 1000 1 .... 0000 ............ @S_xri_rot
TEQ_xri .... 001 1001 1 .... 0000 ............ @S_xri_rot
CMP_xri .... 001 1010 1 .... 0000 ............ @S_xri_rot
CMN_xri .... 001 1011 1 .... 0000 ............ @S_xri_rot
ORR_rri .... 001 1100 . .... .... ............ @s_rri_rot
MOV_rxi .... 001 1101 . 0000 .... ............ @s_rxi_rot
BIC_rri .... 001 1110 . .... .... ............ @s_rri_rot
MVN_rxi .... 001 1111 . 0000 .... ............ @s_rxi_rot
# Multiply and multiply accumulate
@s_rdamn ---- .... ... s:1 rd:4 ra:4 rm:4 .... rn:4 &s_rrrr
@s_rd0mn ---- .... ... s:1 rd:4 .... rm:4 .... rn:4 &s_rrrr ra=0
@rdamn ---- .... ... . rd:4 ra:4 rm:4 .... rn:4 &rrrr
@rd0mn ---- .... ... . rd:4 .... rm:4 .... rn:4 &rrrr ra=0
MUL .... 0000 000 . .... 0000 .... 1001 .... @s_rd0mn
MLA .... 0000 001 . .... .... .... 1001 .... @s_rdamn
UMAAL .... 0000 010 0 .... .... .... 1001 .... @rdamn
MLS .... 0000 011 0 .... .... .... 1001 .... @rdamn
UMULL .... 0000 100 . .... .... .... 1001 .... @s_rdamn
UMLAL .... 0000 101 . .... .... .... 1001 .... @s_rdamn
SMULL .... 0000 110 . .... .... .... 1001 .... @s_rdamn
SMLAL .... 0000 111 . .... .... .... 1001 .... @s_rdamn
# Saturating addition and subtraction
@rndm ---- .... .... rn:4 rd:4 .... .... rm:4 &rrr
QADD .... 0001 0000 .... .... 0000 0101 .... @rndm
QSUB .... 0001 0010 .... .... 0000 0101 .... @rndm
QDADD .... 0001 0100 .... .... 0000 0101 .... @rndm
QDSUB .... 0001 0110 .... .... 0000 0101 .... @rndm
# Halfword multiply and multiply accumulate
SMLABB .... 0001 0000 .... .... .... 1000 .... @rdamn
SMLABT .... 0001 0000 .... .... .... 1100 .... @rdamn
SMLATB .... 0001 0000 .... .... .... 1010 .... @rdamn
SMLATT .... 0001 0000 .... .... .... 1110 .... @rdamn
SMLAWB .... 0001 0010 .... .... .... 1000 .... @rdamn
SMULWB .... 0001 0010 .... 0000 .... 1010 .... @rd0mn
SMLAWT .... 0001 0010 .... .... .... 1100 .... @rdamn
SMULWT .... 0001 0010 .... 0000 .... 1110 .... @rd0mn
SMLALBB .... 0001 0100 .... .... .... 1000 .... @rdamn
SMLALBT .... 0001 0100 .... .... .... 1100 .... @rdamn
SMLALTB .... 0001 0100 .... .... .... 1010 .... @rdamn
SMLALTT .... 0001 0100 .... .... .... 1110 .... @rdamn
SMULBB .... 0001 0110 .... 0000 .... 1000 .... @rd0mn
SMULBT .... 0001 0110 .... 0000 .... 1100 .... @rd0mn
SMULTB .... 0001 0110 .... 0000 .... 1010 .... @rd0mn
SMULTT .... 0001 0110 .... 0000 .... 1110 .... @rd0mn
# MSR (immediate) and hints
&msr_i r mask rot imm
@msr_i ---- .... .... mask:4 .... rot:4 imm:8 &msr_i
{
{
[
YIELD ---- 0011 0010 0000 1111 ---- 0000 0001
WFE ---- 0011 0010 0000 1111 ---- 0000 0010
WFI ---- 0011 0010 0000 1111 ---- 0000 0011
# TODO: Implement SEV, SEVL; may help SMP performance.
# SEV ---- 0011 0010 0000 1111 ---- 0000 0100
# SEVL ---- 0011 0010 0000 1111 ---- 0000 0101
ESB ---- 0011 0010 0000 1111 ---- 0001 0000
]
# The canonical nop ends in 00000000, but the whole of the
# rest of the space executes as nop if otherwise unsupported.
NOP ---- 0011 0010 0000 1111 ---- ---- ----
}
# Note mask = 0 is covered by NOP
MSR_imm .... 0011 0010 .... 1111 .... .... .... @msr_i r=0
}
MSR_imm .... 0011 0110 .... 1111 .... .... .... @msr_i r=1
# Cyclic Redundancy Check
CRC32B .... 0001 0000 .... .... 0000 0100 .... @rndm
CRC32H .... 0001 0010 .... .... 0000 0100 .... @rndm
CRC32W .... 0001 0100 .... .... 0000 0100 .... @rndm
CRC32CB .... 0001 0000 .... .... 0010 0100 .... @rndm
CRC32CH .... 0001 0010 .... .... 0010 0100 .... @rndm
CRC32CW .... 0001 0100 .... .... 0010 0100 .... @rndm
# Miscellaneous instructions
%sysm 8:1 16:4
%imm16_8_0 8:12 0:4
@rm ---- .... .... .... .... .... .... rm:4 &r
@rdm ---- .... .... .... rd:4 .... .... rm:4 &rr
@i16 ---- .... .... .... .... .... .... .... &i imm=%imm16_8_0
MRS_bank ---- 0001 0 r:1 00 .... rd:4 001. 0000 0000 &mrs_bank %sysm
MSR_bank ---- 0001 0 r:1 10 .... 1111 001. 0000 rn:4 &msr_bank %sysm
MRS_reg ---- 0001 0 r:1 00 1111 rd:4 0000 0000 0000 &mrs_reg
MSR_reg ---- 0001 0 r:1 10 mask:4 1111 0000 0000 rn:4 &msr_reg
BX .... 0001 0010 1111 1111 1111 0001 .... @rm
BXJ .... 0001 0010 1111 1111 1111 0010 .... @rm
BLX_r .... 0001 0010 1111 1111 1111 0011 .... @rm
CLZ .... 0001 0110 1111 .... 1111 0001 .... @rdm
ERET ---- 0001 0110 0000 0000 0000 0110 1110
HLT .... 0001 0000 .... .... .... 0111 .... @i16
BKPT .... 0001 0010 .... .... .... 0111 .... @i16
HVC .... 0001 0100 .... .... .... 0111 .... @i16
SMC ---- 0001 0110 0000 0000 0000 0111 imm:4 &i
# Load/Store Dual, Half, Signed Byte (register)
@ldst_rr_p1w ---- ...1 u:1 . w:1 . rn:4 rt:4 .... .... rm:4 \
&ldst_rr p=1 shimm=0 shtype=0
@ldst_rr_pw0 ---- ...0 u:1 . 0 . rn:4 rt:4 .... .... rm:4 \
&ldst_rr p=0 w=0 shimm=0 shtype=0
STRH_rr .... 000. .0.0 .... .... 0000 1011 .... @ldst_rr_pw0
STRH_rr .... 000. .0.0 .... .... 0000 1011 .... @ldst_rr_p1w
LDRD_rr .... 000. .0.0 .... .... 0000 1101 .... @ldst_rr_pw0
LDRD_rr .... 000. .0.0 .... .... 0000 1101 .... @ldst_rr_p1w
STRD_rr .... 000. .0.0 .... .... 0000 1111 .... @ldst_rr_pw0
STRD_rr .... 000. .0.0 .... .... 0000 1111 .... @ldst_rr_p1w
LDRH_rr .... 000. .0.1 .... .... 0000 1011 .... @ldst_rr_pw0
LDRH_rr .... 000. .0.1 .... .... 0000 1011 .... @ldst_rr_p1w
LDRSB_rr .... 000. .0.1 .... .... 0000 1101 .... @ldst_rr_pw0
LDRSB_rr .... 000. .0.1 .... .... 0000 1101 .... @ldst_rr_p1w
LDRSH_rr .... 000. .0.1 .... .... 0000 1111 .... @ldst_rr_pw0
LDRSH_rr .... 000. .0.1 .... .... 0000 1111 .... @ldst_rr_p1w
# Note the unpriv load/stores use the previously invalid P=0, W=1 encoding,
# and act as normal post-indexed (P=0, W=0).
@ldst_rr_p0w1 ---- ...0 u:1 . 1 . rn:4 rt:4 .... .... rm:4 \
&ldst_rr p=0 w=0 shimm=0 shtype=0
STRHT_rr .... 000. .0.0 .... .... 0000 1011 .... @ldst_rr_p0w1
LDRHT_rr .... 000. .0.1 .... .... 0000 1011 .... @ldst_rr_p0w1
LDRSBT_rr .... 000. .0.1 .... .... 0000 1101 .... @ldst_rr_p0w1
LDRSHT_rr .... 000. .0.1 .... .... 0000 1111 .... @ldst_rr_p0w1
# Load/Store word and unsigned byte (register)
@ldst_rs_p1w ---- ...1 u:1 . w:1 . rn:4 rt:4 shimm:5 shtype:2 . rm:4 \
&ldst_rr p=1
@ldst_rs_pw0 ---- ...0 u:1 . 0 . rn:4 rt:4 shimm:5 shtype:2 . rm:4 \
&ldst_rr p=0 w=0
STR_rr .... 011. .0.0 .... .... .... ...0 .... @ldst_rs_pw0
STR_rr .... 011. .0.0 .... .... .... ...0 .... @ldst_rs_p1w
STRB_rr .... 011. .1.0 .... .... .... ...0 .... @ldst_rs_pw0
STRB_rr .... 011. .1.0 .... .... .... ...0 .... @ldst_rs_p1w
LDR_rr .... 011. .0.1 .... .... .... ...0 .... @ldst_rs_pw0
LDR_rr .... 011. .0.1 .... .... .... ...0 .... @ldst_rs_p1w
LDRB_rr .... 011. .1.1 .... .... .... ...0 .... @ldst_rs_pw0
LDRB_rr .... 011. .1.1 .... .... .... ...0 .... @ldst_rs_p1w
@ldst_rs_p0w1 ---- ...0 u:1 . 1 . rn:4 rt:4 shimm:5 shtype:2 . rm:4 \
&ldst_rr p=0 w=0
STRT_rr .... 011. .0.0 .... .... .... ...0 .... @ldst_rs_p0w1
STRBT_rr .... 011. .1.0 .... .... .... ...0 .... @ldst_rs_p0w1
LDRT_rr .... 011. .0.1 .... .... .... ...0 .... @ldst_rs_p0w1
LDRBT_rr .... 011. .1.1 .... .... .... ...0 .... @ldst_rs_p0w1
# Load/Store Dual, Half, Signed Byte (immediate)
%imm8s_8_0 8:4 0:4
@ldst_ri8_p1w ---- ...1 u:1 . w:1 . rn:4 rt:4 .... .... .... \
&ldst_ri imm=%imm8s_8_0 p=1
@ldst_ri8_pw0 ---- ...0 u:1 . 0 . rn:4 rt:4 .... .... .... \
&ldst_ri imm=%imm8s_8_0 p=0 w=0
STRH_ri .... 000. .1.0 .... .... .... 1011 .... @ldst_ri8_pw0
STRH_ri .... 000. .1.0 .... .... .... 1011 .... @ldst_ri8_p1w
LDRD_ri_a32 .... 000. .1.0 .... .... .... 1101 .... @ldst_ri8_pw0
LDRD_ri_a32 .... 000. .1.0 .... .... .... 1101 .... @ldst_ri8_p1w
STRD_ri_a32 .... 000. .1.0 .... .... .... 1111 .... @ldst_ri8_pw0
STRD_ri_a32 .... 000. .1.0 .... .... .... 1111 .... @ldst_ri8_p1w
LDRH_ri .... 000. .1.1 .... .... .... 1011 .... @ldst_ri8_pw0
LDRH_ri .... 000. .1.1 .... .... .... 1011 .... @ldst_ri8_p1w
LDRSB_ri .... 000. .1.1 .... .... .... 1101 .... @ldst_ri8_pw0
LDRSB_ri .... 000. .1.1 .... .... .... 1101 .... @ldst_ri8_p1w
LDRSH_ri .... 000. .1.1 .... .... .... 1111 .... @ldst_ri8_pw0
LDRSH_ri .... 000. .1.1 .... .... .... 1111 .... @ldst_ri8_p1w
# Note the unpriv load/stores use the previously invalid P=0, W=1 encoding,
# and act as normal post-indexed (P=0, W=0).
@ldst_ri8_p0w1 ---- ...0 u:1 . 1 . rn:4 rt:4 .... .... .... \
&ldst_ri imm=%imm8s_8_0 p=0 w=0
STRHT_ri .... 000. .1.0 .... .... .... 1011 .... @ldst_ri8_p0w1
LDRHT_ri .... 000. .1.1 .... .... .... 1011 .... @ldst_ri8_p0w1
LDRSBT_ri .... 000. .1.1 .... .... .... 1101 .... @ldst_ri8_p0w1
LDRSHT_ri .... 000. .1.1 .... .... .... 1111 .... @ldst_ri8_p0w1
# Load/Store word and unsigned byte (immediate)
@ldst_ri12_p1w ---- ...1 u:1 . w:1 . rn:4 rt:4 imm:12 &ldst_ri p=1
@ldst_ri12_pw0 ---- ...0 u:1 . 0 . rn:4 rt:4 imm:12 &ldst_ri p=0 w=0
STR_ri .... 010. .0.0 .... .... ............ @ldst_ri12_p1w
STR_ri .... 010. .0.0 .... .... ............ @ldst_ri12_pw0
STRB_ri .... 010. .1.0 .... .... ............ @ldst_ri12_p1w
STRB_ri .... 010. .1.0 .... .... ............ @ldst_ri12_pw0
LDR_ri .... 010. .0.1 .... .... ............ @ldst_ri12_p1w
LDR_ri .... 010. .0.1 .... .... ............ @ldst_ri12_pw0
LDRB_ri .... 010. .1.1 .... .... ............ @ldst_ri12_p1w
LDRB_ri .... 010. .1.1 .... .... ............ @ldst_ri12_pw0
@ldst_ri12_p0w1 ---- ...0 u:1 . 1 . rn:4 rt:4 imm:12 &ldst_ri p=0 w=0
STRT_ri .... 010. .0.0 .... .... ............ @ldst_ri12_p0w1
STRBT_ri .... 010. .1.0 .... .... ............ @ldst_ri12_p0w1
LDRT_ri .... 010. .0.1 .... .... ............ @ldst_ri12_p0w1
LDRBT_ri .... 010. .1.1 .... .... ............ @ldst_ri12_p0w1
# Synchronization primitives
@swp ---- .... .... rn:4 rt:4 .... .... rt2:4
SWP .... 0001 0000 .... .... 0000 1001 .... @swp
SWPB .... 0001 0100 .... .... 0000 1001 .... @swp
# Load/Store Exclusive and Load-Acquire/Store-Release
#
# Note rt2 for STREXD/LDREXD is set by the helper after checking rt is even.
@strex ---- .... .... rn:4 rd:4 .... .... rt:4 \
&strex imm=0 rt2=15
@ldrex ---- .... .... rn:4 rt:4 .... .... .... \
&ldrex imm=0 rt2=15
@stl ---- .... .... rn:4 .... .... .... rt:4 \
&ldrex imm=0 rt2=15
STREX .... 0001 1000 .... .... 1111 1001 .... @strex
STREXD_a32 .... 0001 1010 .... .... 1111 1001 .... @strex
STREXB .... 0001 1100 .... .... 1111 1001 .... @strex
STREXH .... 0001 1110 .... .... 1111 1001 .... @strex
STLEX .... 0001 1000 .... .... 1110 1001 .... @strex
STLEXD_a32 .... 0001 1010 .... .... 1110 1001 .... @strex
STLEXB .... 0001 1100 .... .... 1110 1001 .... @strex
STLEXH .... 0001 1110 .... .... 1110 1001 .... @strex
STL .... 0001 1000 .... 1111 1100 1001 .... @stl
STLB .... 0001 1100 .... 1111 1100 1001 .... @stl
STLH .... 0001 1110 .... 1111 1100 1001 .... @stl
LDREX .... 0001 1001 .... .... 1111 1001 1111 @ldrex
LDREXD_a32 .... 0001 1011 .... .... 1111 1001 1111 @ldrex
LDREXB .... 0001 1101 .... .... 1111 1001 1111 @ldrex
LDREXH .... 0001 1111 .... .... 1111 1001 1111 @ldrex
LDAEX .... 0001 1001 .... .... 1110 1001 1111 @ldrex
LDAEXD_a32 .... 0001 1011 .... .... 1110 1001 1111 @ldrex
LDAEXB .... 0001 1101 .... .... 1110 1001 1111 @ldrex
LDAEXH .... 0001 1111 .... .... 1110 1001 1111 @ldrex
LDA .... 0001 1001 .... .... 1100 1001 1111 @ldrex
LDAB .... 0001 1101 .... .... 1100 1001 1111 @ldrex
LDAH .... 0001 1111 .... .... 1100 1001 1111 @ldrex
# Media instructions
# usad8 is usada8 w/ ra=15
USADA8 ---- 0111 1000 rd:4 ra:4 rm:4 0001 rn:4
# ubfx and sbfx
@bfx ---- .... ... widthm1:5 rd:4 lsb:5 ... rn:4 &bfx
SBFX .... 0111 101 ..... .... ..... 101 .... @bfx
UBFX .... 0111 111 ..... .... ..... 101 .... @bfx
# bfc is bfi w/ rn=15
BFCI ---- 0111 110 msb:5 rd:4 lsb:5 001 rn:4 &bfi
# While we could get UDEF by not including this, add the pattern for
# documentation and to conflict with any other typos in this file.
UDF 1110 0111 1111 ---- ---- ---- 1111 ----
# Parallel addition and subtraction
SADD16 .... 0110 0001 .... .... 1111 0001 .... @rndm
SASX .... 0110 0001 .... .... 1111 0011 .... @rndm
SSAX .... 0110 0001 .... .... 1111 0101 .... @rndm
SSUB16 .... 0110 0001 .... .... 1111 0111 .... @rndm
SADD8 .... 0110 0001 .... .... 1111 1001 .... @rndm
SSUB8 .... 0110 0001 .... .... 1111 1111 .... @rndm
QADD16 .... 0110 0010 .... .... 1111 0001 .... @rndm
QASX .... 0110 0010 .... .... 1111 0011 .... @rndm
QSAX .... 0110 0010 .... .... 1111 0101 .... @rndm
QSUB16 .... 0110 0010 .... .... 1111 0111 .... @rndm
QADD8 .... 0110 0010 .... .... 1111 1001 .... @rndm
QSUB8 .... 0110 0010 .... .... 1111 1111 .... @rndm
SHADD16 .... 0110 0011 .... .... 1111 0001 .... @rndm
SHASX .... 0110 0011 .... .... 1111 0011 .... @rndm
SHSAX .... 0110 0011 .... .... 1111 0101 .... @rndm
SHSUB16 .... 0110 0011 .... .... 1111 0111 .... @rndm
SHADD8 .... 0110 0011 .... .... 1111 1001 .... @rndm
SHSUB8 .... 0110 0011 .... .... 1111 1111 .... @rndm
UADD16 .... 0110 0101 .... .... 1111 0001 .... @rndm
UASX .... 0110 0101 .... .... 1111 0011 .... @rndm
USAX .... 0110 0101 .... .... 1111 0101 .... @rndm
USUB16 .... 0110 0101 .... .... 1111 0111 .... @rndm
UADD8 .... 0110 0101 .... .... 1111 1001 .... @rndm
USUB8 .... 0110 0101 .... .... 1111 1111 .... @rndm
UQADD16 .... 0110 0110 .... .... 1111 0001 .... @rndm
UQASX .... 0110 0110 .... .... 1111 0011 .... @rndm
UQSAX .... 0110 0110 .... .... 1111 0101 .... @rndm
UQSUB16 .... 0110 0110 .... .... 1111 0111 .... @rndm
UQADD8 .... 0110 0110 .... .... 1111 1001 .... @rndm
UQSUB8 .... 0110 0110 .... .... 1111 1111 .... @rndm
UHADD16 .... 0110 0111 .... .... 1111 0001 .... @rndm
UHASX .... 0110 0111 .... .... 1111 0011 .... @rndm
UHSAX .... 0110 0111 .... .... 1111 0101 .... @rndm
UHSUB16 .... 0110 0111 .... .... 1111 0111 .... @rndm
UHADD8 .... 0110 0111 .... .... 1111 1001 .... @rndm
UHSUB8 .... 0110 0111 .... .... 1111 1111 .... @rndm
# Packing, unpacking, saturation, and reversal
PKH ---- 0110 1000 rn:4 rd:4 imm:5 tb:1 01 rm:4 &pkh
@sat ---- .... ... satimm:5 rd:4 imm:5 sh:1 .. rn:4 &sat
@sat16 ---- .... .... satimm:4 rd:4 .... .... rn:4 \
&sat imm=0 sh=0
SSAT .... 0110 101. .... .... .... ..01 .... @sat
USAT .... 0110 111. .... .... .... ..01 .... @sat
SSAT16 .... 0110 1010 .... .... 1111 0011 .... @sat16
USAT16 .... 0110 1110 .... .... 1111 0011 .... @sat16
@rrr_rot ---- .... .... rn:4 rd:4 rot:2 ...... rm:4 &rrr_rot
SXTAB16 .... 0110 1000 .... .... ..00 0111 .... @rrr_rot
SXTAB .... 0110 1010 .... .... ..00 0111 .... @rrr_rot
SXTAH .... 0110 1011 .... .... ..00 0111 .... @rrr_rot
UXTAB16 .... 0110 1100 .... .... ..00 0111 .... @rrr_rot
UXTAB .... 0110 1110 .... .... ..00 0111 .... @rrr_rot
UXTAH .... 0110 1111 .... .... ..00 0111 .... @rrr_rot
SEL .... 0110 1000 .... .... 1111 1011 .... @rndm
REV .... 0110 1011 1111 .... 1111 0011 .... @rdm
REV16 .... 0110 1011 1111 .... 1111 1011 .... @rdm
REVSH .... 0110 1111 1111 .... 1111 1011 .... @rdm
RBIT .... 0110 1111 1111 .... 1111 0011 .... @rdm
# Signed multiply, signed and unsigned divide
@rdmn ---- .... .... rd:4 .... rm:4 .... rn:4 &rrr
SMLAD .... 0111 0000 .... .... .... 0001 .... @rdamn
SMLADX .... 0111 0000 .... .... .... 0011 .... @rdamn
SMLSD .... 0111 0000 .... .... .... 0101 .... @rdamn
SMLSDX .... 0111 0000 .... .... .... 0111 .... @rdamn
SDIV .... 0111 0001 .... 1111 .... 0001 .... @rdmn
UDIV .... 0111 0011 .... 1111 .... 0001 .... @rdmn
SMLALD .... 0111 0100 .... .... .... 0001 .... @rdamn
SMLALDX .... 0111 0100 .... .... .... 0011 .... @rdamn
SMLSLD .... 0111 0100 .... .... .... 0101 .... @rdamn
SMLSLDX .... 0111 0100 .... .... .... 0111 .... @rdamn
SMMLA .... 0111 0101 .... .... .... 0001 .... @rdamn
SMMLAR .... 0111 0101 .... .... .... 0011 .... @rdamn
SMMLS .... 0111 0101 .... .... .... 1101 .... @rdamn
SMMLSR .... 0111 0101 .... .... .... 1111 .... @rdamn
# Block data transfer
STM ---- 100 b:1 i:1 u:1 w:1 0 rn:4 list:16 &ldst_block
LDM_a32 ---- 100 b:1 i:1 u:1 w:1 1 rn:4 list:16 &ldst_block
# Branch, branch with link
%imm26 0:s24 !function=times_4
@branch ---- .... ........................ &i imm=%imm26
B .... 1010 ........................ @branch
BL .... 1011 ........................ @branch
# Coprocessor instructions
# We decode MCR, MCR, MRRC and MCRR only, because for QEMU the
# other coprocessor instructions always UNDEF.
# The trans_ functions for these will ignore cp values 8..13 for v7 or
# earlier, and 0..13 for v8 and later, because those areas of the
# encoding space may be used for other things, such as VFP or Neon.
@mcr ---- .... opc1:3 . crn:4 rt:4 cp:4 opc2:3 . crm:4 &mcr
@mcrr ---- .... .... rt2:4 rt:4 cp:4 opc1:4 crm:4 &mcrr
MCRR .... 1100 0100 .... .... .... .... .... @mcrr
MRRC .... 1100 0101 .... .... .... .... .... @mcrr
MCR .... 1110 ... 0 .... .... .... ... 1 .... @mcr
MRC .... 1110 ... 1 .... .... .... ... 1 .... @mcr
# Supervisor call
SVC ---- 1111 imm:24 &i

View file

@ -0,0 +1,72 @@
# M-profile UserFault.NOCP exception handling
#
# Copyright (c) 2020 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# For M-profile, the architecture specifies that NOCP UsageFaults
# should take precedence over UNDEF faults over the whole wide
# range of coprocessor-space encodings, with the exception of
# VLLDM and VLSTM. (Compare v8.1M IsCPInstruction() pseudocode and
# v8M Arm ARM rule R_QLGM.) This isn't mandatory for v8.0M but we choose
# to behave the same as v8.1M.
# This decode is handled before any others (and in particular before
# decoding FP instructions which are in the coprocessor space).
# If the coprocessor is not present or disabled then we will generate
# the NOCP exception; otherwise we let the insn through to the main decode.
%vd_dp 22:1 12:4
%vd_sp 12:4 22:1
&nocp cp
# M-profile VLDR/VSTR to sysreg
%vldr_sysreg 22:1 13:3
%imm7_0x4 0:7 !function=times_4
&vldr_sysreg rn reg imm a w p
@vldr_sysreg .... ... . a:1 . . . rn:4 ... . ... .. ....... \
reg=%vldr_sysreg imm=%imm7_0x4 &vldr_sysreg
{
# Special cases which do not take an early NOCP: VLLDM and VLSTM
VLLDM_VLSTM 1110 1100 001 l:1 rn:4 0000 1010 op:1 000 0000
# VSCCLRM (new in v8.1M) is similar:
VSCCLRM 1110 1100 1.01 1111 .... 1011 imm:7 0 vd=%vd_dp size=3
VSCCLRM 1110 1100 1.01 1111 .... 1010 imm:8 vd=%vd_sp size=2
# FP system register accesses: these are a special case because accesses
# to FPCXT_NS succeed even if the FPU is disabled. We therefore need
# to handle them before the big NOCP blocks. Note that within these
# insns NOCP still has higher priority than UNDEFs; this is implemented
# by their returning 'false' for UNDEF so as to fall through into the
# NOCP check (in contrast to VLLDM etc, which call unallocated_encoding()
# for the UNDEFs there that must take precedence over NOCP.)
VMSR_VMRS ---- 1110 111 l:1 reg:4 rt:4 1010 0001 0000
# P=0 W=0 is SEE "Related encodings", so split into two patterns
VLDR_sysreg ---- 110 1 . . w:1 1 .... ... 0 111 11 ....... @vldr_sysreg p=1
VLDR_sysreg ---- 110 0 . . 1 1 .... ... 0 111 11 ....... @vldr_sysreg p=0 w=1
VSTR_sysreg ---- 110 1 . . w:1 0 .... ... 0 111 11 ....... @vldr_sysreg p=1
VSTR_sysreg ---- 110 0 . . 1 0 .... ... 0 111 11 ....... @vldr_sysreg p=0 w=1
NOCP 111- 1110 ---- ---- ---- cp:4 ---- ---- &nocp
NOCP 111- 110- ---- ---- ---- cp:4 ---- ---- &nocp
# From v8.1M onwards this range will also NOCP:
NOCP_8_1 111- 1111 ---- ---- ---- ---- ---- ---- &nocp cp=10
}

View file

@ -0,0 +1,32 @@
gen = [
decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'),
decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
decodetree.process('vfp.decode', extra_args: '--decode=disas_vfp'),
decodetree.process('vfp-uncond.decode', extra_args: '--decode=disas_vfp_uncond'),
decodetree.process('m-nocp.decode', extra_args: '--decode=disas_m_nocp'),
decodetree.process('mve.decode', extra_args: '--decode=disas_mve'),
decodetree.process('a32.decode', extra_args: '--static-decode=disas_a32'),
decodetree.process('a32-uncond.decode', extra_args: '--static-decode=disas_a32_uncond'),
decodetree.process('t32.decode', extra_args: '--static-decode=disas_t32'),
decodetree.process('t16.decode', extra_args: ['-w', '16', '--static-decode=disas_t16']),
]
arm_ss.add(gen)
arm_ss.add(files(
'translate.c',
'translate-m-nocp.c',
'translate-mve.c',
'translate-neon.c',
'translate-vfp.c',
))
arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
'translate-a64.c',
'translate-sve.c',
'translate-sme.c',
))

832
target/arm/tcg/mve.decode Normal file
View file

@ -0,0 +1,832 @@
# M-profile MVE instruction descriptions
#
# Copyright (c) 2021 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
%qd 22:1 13:3
%qm 5:1 1:3
%qn 7:1 17:3
# VQDMULL has size in bit 28: 0 for 16 bit, 1 for 32 bit
%size_28 28:1 !function=plus_1
# 2 operand fp insns have size in bit 20: 1 for 16 bit, 0 for 32 bit,
# like Neon FP insns.
%2op_fp_size 20:1 !function=neon_3same_fp_size
# VCADD is an exception, where bit 20 is 0 for 16 bit and 1 for 32 bit
%2op_fp_size_rev 20:1 !function=plus_1
# FP scalars have size in bit 28, 1 for 16 bit, 0 for 32 bit
%2op_fp_scalar_size 28:1 !function=neon_3same_fp_size
# 1imm format immediate
%imm_28_16_0 28:1 16:3 0:4
&vldr_vstr rn qd imm p a w size l u
&1op qd qm size
&2op qd qm qn size
&2scalar qd qn rm size
&1imm qd imm cmode op
&2shift qd qm shift size
&vidup qd rn size imm
&viwdup qd rn rm size imm
&vcmp qm qn size mask
&vcmp_scalar qn rm size mask
&shl_scalar qda rm size
&vmaxv qm rda size
&vabav qn qm rda size
&vldst_sg qd qm rn size msize os
&vldst_sg_imm qd qm a w imm
&vldst_il qd rn size pat w
# scatter-gather memory size is in bits 6:4
%sg_msize 6:1 4:1
@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
# Note that both Rn and Qd are 3 bits only (no D bit)
@vldst_wn ... u:1 ... . . . . l:1 . rn:3 qd:3 . ... .. imm:7 &vldr_vstr
@vldst_sg .... .... .... rn:4 .... ... size:2 ... ... os:1 &vldst_sg \
qd=%qd qm=%qm msize=%sg_msize
# Qm is in the fields usually labeled Qn
@vldst_sg_imm .... .... a:1 . w:1 . .... .... .... . imm:7 &vldst_sg_imm \
qd=%qd qm=%qn
# Deinterleaving load/interleaving store
@vldst_il .... .... .. w:1 . rn:4 .... ... size:2 pat:2 ..... &vldst_il \
qd=%qd
@1op .... .... .... size:2 .. .... .... .... .... &1op qd=%qd qm=%qm
@1op_nosz .... .... .... .... .... .... .... .... &1op qd=%qd qm=%qm size=0
@2op .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn
@2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0
@2op_sz28 .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn \
size=%size_28
@1imm .... .... .... .... .... cmode:4 .. op:1 . .... &1imm qd=%qd imm=%imm_28_16_0
# The _rev suffix indicates that Vn and Vm are reversed. This is
# the case for shifts. In the Arm ARM these insns are documented
# with the Vm and Vn fields in their usual places, but in the
# assembly the operands are listed "backwards", ie in the order
# Qd, Qm, Qn where other insns use Qd, Qn, Qm. For QEMU we choose
# to consider Vm and Vn as being in different fields in the insn.
# This gives us consistency with A64 and Neon.
@2op_rev .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qn qn=%qm
@2scalar .... .... .. size:2 .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
@2scalar_nosz .... .... .... .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
@2_shl_b .... .... .. 001 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0
@2_shl_h .... .... .. 01 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
@2_shl_w .... .... .. 1 shift:5 .... .... .... .... &2shift qd=%qd qm=%qm size=2
@2_shll_b .... .... ... 01 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0
@2_shll_h .... .... ... 1 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
# VSHLL encoding T2 where shift == esize
@2_shll_esize_b .... .... .... 00 .. .... .... .... .... &2shift \
qd=%qd qm=%qm size=0 shift=8
@2_shll_esize_h .... .... .... 01 .. .... .... .... .... &2shift \
qd=%qd qm=%qm size=1 shift=16
# Right shifts are encoded as N - shift, where N is the element size in bits.
%rshift_i5 16:5 !function=rsub_32
%rshift_i4 16:4 !function=rsub_16
%rshift_i3 16:3 !function=rsub_8
@2_shr_b .... .... .. 001 ... .... .... .... .... &2shift qd=%qd qm=%qm \
size=0 shift=%rshift_i3
@2_shr_h .... .... .. 01 .... .... .... .... .... &2shift qd=%qd qm=%qm \
size=1 shift=%rshift_i4
@2_shr_w .... .... .. 1 ..... .... .... .... .... &2shift qd=%qd qm=%qm \
size=2 shift=%rshift_i5
@shl_scalar .... .... .... size:2 .. .... .... .... rm:4 &shl_scalar qda=%qd
# Vector comparison; 4-bit Qm but 3-bit Qn
%mask_22_13 22:1 13:3
@vcmp .... .... .. size:2 qn:3 . .... .... .... .... &vcmp qm=%qm mask=%mask_22_13
@vcmp_scalar .... .... .. size:2 qn:3 . .... .... .... rm:4 &vcmp_scalar \
mask=%mask_22_13
@vcmp_fp .... .... .... qn:3 . .... .... .... .... &vcmp \
qm=%qm size=%2op_fp_scalar_size mask=%mask_22_13
# Bit 28 is a 2op_fp_scalar_size bit, but we do not decode it in this
# format to avoid complicated overlapping-instruction-groups
@vcmp_fp_scalar .... .... .... qn:3 . .... .... .... rm:4 &vcmp_scalar \
mask=%mask_22_13
@vmaxv .... .... .... size:2 .. rda:4 .... .... .... &vmaxv qm=%qm
@2op_fp .... .... .... .... .... .... .... .... &2op \
qd=%qd qn=%qn qm=%qm size=%2op_fp_size
@2op_fp_size_rev .... .... .... .... .... .... .... .... &2op \
qd=%qd qn=%qn qm=%qm size=%2op_fp_size_rev
# 2-operand, but Qd and Qn share a field. Size is in bit 28, but we
# don't decode it in this format
@vmaxnma .... .... .... .... .... .... .... .... &2op \
qd=%qd qn=%qd qm=%qm
# Here also we don't decode the bit 28 size in the format to avoid
# awkward nested overlap groups
@vmaxnmv .... .... .... .... rda:4 .... .... .... &vmaxv qm=%qm
@2op_fp_scalar .... .... .... .... .... .... .... rm:4 &2scalar \
qd=%qd qn=%qn size=%2op_fp_scalar_size
# Vector loads and stores
# Widening loads and narrowing stores:
# for these P=0 W=0 is 'related encoding'; sz=11 is 'related encoding'
# This means we need to expand out to multiple patterns for P, W, SZ.
# For stores the U bit must be 0 but we catch that in the trans_ function.
# The naming scheme here is "VLDSTB_H == in-memory byte load/store to/from
# signed halfword element in register", etc.
VLDSTB_H 111 . 110 0 a:1 0 1 . 0 ... ... 0 111 01 ....... @vldst_wn \
p=0 w=1 size=1
VLDSTB_H 111 . 110 1 a:1 0 w:1 . 0 ... ... 0 111 01 ....... @vldst_wn \
p=1 size=1
VLDSTB_W 111 . 110 0 a:1 0 1 . 0 ... ... 0 111 10 ....... @vldst_wn \
p=0 w=1 size=2
VLDSTB_W 111 . 110 1 a:1 0 w:1 . 0 ... ... 0 111 10 ....... @vldst_wn \
p=1 size=2
VLDSTH_W 111 . 110 0 a:1 0 1 . 1 ... ... 0 111 10 ....... @vldst_wn \
p=0 w=1 size=2
VLDSTH_W 111 . 110 1 a:1 0 w:1 . 1 ... ... 0 111 10 ....... @vldst_wn \
p=1 size=2
# Non-widening loads/stores (P=0 W=0 is 'related encoding')
VLDR_VSTR 1110110 0 a:1 . 1 . .... ... 111100 ....... @vldr_vstr \
size=0 p=0 w=1
VLDR_VSTR 1110110 0 a:1 . 1 . .... ... 111101 ....... @vldr_vstr \
size=1 p=0 w=1
VLDR_VSTR 1110110 0 a:1 . 1 . .... ... 111110 ....... @vldr_vstr \
size=2 p=0 w=1
VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111100 ....... @vldr_vstr \
size=0 p=1
VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111101 ....... @vldr_vstr \
size=1 p=1
VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111110 ....... @vldr_vstr \
size=2 p=1
# gather loads/scatter stores
VLDR_S_sg 111 0 1100 1 . 01 .... ... 0 111 . .... .... @vldst_sg
VLDR_U_sg 111 1 1100 1 . 01 .... ... 0 111 . .... .... @vldst_sg
VSTR_sg 111 0 1100 1 . 00 .... ... 0 111 . .... .... @vldst_sg
VLDRW_sg_imm 111 1 1101 ... 1 ... 0 ... 1 1110 .... .... @vldst_sg_imm
VLDRD_sg_imm 111 1 1101 ... 1 ... 0 ... 1 1111 .... .... @vldst_sg_imm
VSTRW_sg_imm 111 1 1101 ... 0 ... 0 ... 1 1110 .... .... @vldst_sg_imm
VSTRD_sg_imm 111 1 1101 ... 0 ... 0 ... 1 1111 .... .... @vldst_sg_imm
# deinterleaving loads/interleaving stores
VLD2 1111 1100 1 .. 1 .... ... 1 111 .. .. 00000 @vldst_il
VLD4 1111 1100 1 .. 1 .... ... 1 111 .. .. 00001 @vldst_il
VST2 1111 1100 1 .. 0 .... ... 1 111 .. .. 00000 @vldst_il
VST4 1111 1100 1 .. 0 .... ... 1 111 .. .. 00001 @vldst_il
# Moves between 2 32-bit vector lanes and 2 general purpose registers
VMOV_to_2gp 1110 1100 0 . 00 rt2:4 ... 0 1111 000 idx:1 rt:4 qd=%qd
VMOV_from_2gp 1110 1100 0 . 01 rt2:4 ... 0 1111 000 idx:1 rt:4 qd=%qd
# Vector 2-op
VAND 1110 1111 0 . 00 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
VBIC 1110 1111 0 . 01 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
VORR 1110 1111 0 . 10 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
VORN 1110 1111 0 . 11 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
VEOR 1111 1111 0 . 00 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
VADD 1110 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
VSUB 1111 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
# The VSHLL T2 encoding is not a @2op pattern, but is here because it
# overlaps what would be size=0b11 VMULH/VRMULH
{
VCVTB_SH 111 0 1110 0 . 11 1111 ... 0 1110 0 0 . 0 ... 1 @1op_nosz
VMAXNMA 111 0 1110 0 . 11 1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma size=2
VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
VQMOVUNB 111 0 1110 0 . 11 .. 01 ... 0 1110 1 0 . 0 ... 1 @1op
VQMOVN_BS 111 0 1110 0 . 11 .. 11 ... 0 1110 0 0 . 0 ... 1 @1op
VMAXA 111 0 1110 0 . 11 .. 11 ... 0 1110 1 0 . 0 ... 1 @1op
VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
}
{
VCVTB_HS 111 1 1110 0 . 11 1111 ... 0 1110 0 0 . 0 ... 1 @1op_nosz
VMAXNMA 111 1 1110 0 . 11 1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma size=1
VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
VMOVNB 111 1 1110 0 . 11 .. 01 ... 0 1110 1 0 . 0 ... 1 @1op
VQMOVN_BU 111 1 1110 0 . 11 .. 11 ... 0 1110 0 0 . 0 ... 1 @1op
VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
}
{
VCVTT_SH 111 0 1110 0 . 11 1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz
VMINNMA 111 0 1110 0 . 11 1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma size=2
VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
VQMOVUNT 111 0 1110 0 . 11 .. 01 ... 1 1110 1 0 . 0 ... 1 @1op
VQMOVN_TS 111 0 1110 0 . 11 .. 11 ... 1 1110 0 0 . 0 ... 1 @1op
VMINA 111 0 1110 0 . 11 .. 11 ... 1 1110 1 0 . 0 ... 1 @1op
VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
}
{
VCVTT_HS 111 1 1110 0 . 11 1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz
VMINNMA 111 1 1110 0 . 11 1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma size=1
VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
VMOVNT 111 1 1110 0 . 11 .. 01 ... 1 1110 1 0 . 0 ... 1 @1op
VQMOVN_TU 111 1 1110 0 . 11 .. 11 ... 1 1110 0 0 . 0 ... 1 @1op
VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
}
VMAX_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
VMAX_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
VMIN_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 1 ... 0 @2op
VMIN_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 1 ... 0 @2op
VABD_S 111 0 1111 0 . .. ... 0 ... 0 0111 . 1 . 0 ... 0 @2op
VABD_U 111 1 1111 0 . .. ... 0 ... 0 0111 . 1 . 0 ... 0 @2op
VHADD_S 111 0 1111 0 . .. ... 0 ... 0 0000 . 1 . 0 ... 0 @2op
VHADD_U 111 1 1111 0 . .. ... 0 ... 0 0000 . 1 . 0 ... 0 @2op
VHSUB_S 111 0 1111 0 . .. ... 0 ... 0 0010 . 1 . 0 ... 0 @2op
VHSUB_U 111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 0 ... 0 @2op
{
VMULLP_B 111 . 1110 0 . 11 ... 1 ... 0 1110 . 0 . 0 ... 0 @2op_sz28
VMULL_BS 111 0 1110 0 . .. ... 1 ... 0 1110 . 0 . 0 ... 0 @2op
VMULL_BU 111 1 1110 0 . .. ... 1 ... 0 1110 . 0 . 0 ... 0 @2op
}
{
VMULLP_T 111 . 1110 0 . 11 ... 1 ... 1 1110 . 0 . 0 ... 0 @2op_sz28
VMULL_TS 111 0 1110 0 . .. ... 1 ... 1 1110 . 0 . 0 ... 0 @2op
VMULL_TU 111 1 1110 0 . .. ... 1 ... 1 1110 . 0 . 0 ... 0 @2op
}
VQDMULH 1110 1111 0 . .. ... 0 ... 0 1011 . 1 . 0 ... 0 @2op
VQRDMULH 1111 1111 0 . .. ... 0 ... 0 1011 . 1 . 0 ... 0 @2op
VQADD_S 111 0 1111 0 . .. ... 0 ... 0 0000 . 1 . 1 ... 0 @2op
VQADD_U 111 1 1111 0 . .. ... 0 ... 0 0000 . 1 . 1 ... 0 @2op
VQSUB_S 111 0 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
VQSUB_U 111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
VSHL_S 111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 0 ... 0 @2op_rev
VSHL_U 111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 0 ... 0 @2op_rev
VRSHL_S 111 0 1111 0 . .. ... 0 ... 0 0101 . 1 . 0 ... 0 @2op_rev
VRSHL_U 111 1 1111 0 . .. ... 0 ... 0 0101 . 1 . 0 ... 0 @2op_rev
VQSHL_S 111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
VQSHL_U 111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
VQRSHL_S 111 0 1111 0 . .. ... 0 ... 0 0101 . 1 . 1 ... 0 @2op_rev
VQRSHL_U 111 1 1111 0 . .. ... 0 ... 0 0101 . 1 . 1 ... 0 @2op_rev
{
VCMUL0 111 . 1110 0 . 11 ... 0 ... 0 1110 . 0 . 0 ... 0 @2op_sz28
VQDMLADH 1110 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 0 @2op
VQDMLSDH 1111 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 0 @2op
}
{
VCMUL180 111 . 1110 0 . 11 ... 0 ... 1 1110 . 0 . 0 ... 0 @2op_sz28
VQDMLADHX 111 0 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 0 @2op
VQDMLSDHX 111 1 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 0 @2op
}
{
VCMUL90 111 . 1110 0 . 11 ... 0 ... 0 1110 . 0 . 0 ... 1 @2op_sz28
VQRDMLADH 111 0 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 1 @2op
VQRDMLSDH 111 1 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 1 @2op
}
{
VCMUL270 111 . 1110 0 . 11 ... 0 ... 1 1110 . 0 . 0 ... 1 @2op_sz28
VQRDMLADHX 111 0 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 1 @2op
VQRDMLSDHX 111 1 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 1 @2op
}
VQDMULLB 111 . 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 1 @2op_sz28
VQDMULLT 111 . 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 1 @2op_sz28
VRHADD_S 111 0 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
VRHADD_U 111 1 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
{
VADC 1110 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
VADCI 1110 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz
VHCADD90 1110 1110 0 . .. ... 0 ... 0 1111 . 0 . 0 ... 0 @2op
VHCADD270 1110 1110 0 . .. ... 0 ... 1 1111 . 0 . 0 ... 0 @2op
}
{
VSBC 1111 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
VSBCI 1111 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz
VCADD90 1111 1110 0 . .. ... 0 ... 0 1111 . 0 . 0 ... 0 @2op
VCADD270 1111 1110 0 . .. ... 0 ... 1 1111 . 0 . 0 ... 0 @2op
}
# Vector miscellaneous
VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
VCLZ 1111 1111 1 . 11 .. 00 ... 0 0100 11 . 0 ... 0 @1op
VREV16 1111 1111 1 . 11 .. 00 ... 0 0001 01 . 0 ... 0 @1op
VREV32 1111 1111 1 . 11 .. 00 ... 0 0000 11 . 0 ... 0 @1op
VREV64 1111 1111 1 . 11 .. 00 ... 0 0000 01 . 0 ... 0 @1op
VMVN 1111 1111 1 . 11 00 00 ... 0 0101 11 . 0 ... 0 @1op_nosz
VABS 1111 1111 1 . 11 .. 01 ... 0 0011 01 . 0 ... 0 @1op
VABS_fp 1111 1111 1 . 11 .. 01 ... 0 0111 01 . 0 ... 0 @1op
VNEG 1111 1111 1 . 11 .. 01 ... 0 0011 11 . 0 ... 0 @1op
VNEG_fp 1111 1111 1 . 11 .. 01 ... 0 0111 11 . 0 ... 0 @1op
VQABS 1111 1111 1 . 11 .. 00 ... 0 0111 01 . 0 ... 0 @1op
VQNEG 1111 1111 1 . 11 .. 00 ... 0 0111 11 . 0 ... 0 @1op
&vdup qd rt size
# Qd is in the fields usually named Qn
@vdup .... .... . . .. ... . rt:4 .... . . . . .... qd=%qn &vdup
# B and E bits encode size, which we decode here to the usual size values
VDUP 1110 1110 1 1 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=0
VDUP 1110 1110 1 0 10 ... 0 .... 1011 . 0 1 1 0000 @vdup size=1
VDUP 1110 1110 1 0 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=2
# Incrementing and decrementing dup
# VIDUP, VDDUP format immediate: 1 << (immh:imml)
%imm_vidup 7:1 0:1 !function=vidup_imm
# VIDUP, VDDUP registers: Rm bits [3:1] from insn, bit 0 is 1;
# Rn bits [3:1] from insn, bit 0 is 0
%vidup_rm 1:3 !function=times_2_plus_1
%vidup_rn 17:3 !function=times_2
@vidup .... .... . . size:2 .... .... .... .... .... \
qd=%qd imm=%imm_vidup rn=%vidup_rn &vidup
@viwdup .... .... . . size:2 .... .... .... .... .... \
qd=%qd imm=%imm_vidup rm=%vidup_rm rn=%vidup_rn &viwdup
{
VIDUP 1110 1110 0 . .. ... 1 ... 0 1111 . 110 111 . @vidup
VIWDUP 1110 1110 0 . .. ... 1 ... 0 1111 . 110 ... . @viwdup
}
{
VCMPGT_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111 0110 .... @vcmp_fp_scalar size=2
VCMPLE_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111 1110 .... @vcmp_fp_scalar size=2
VDDUP 1110 1110 0 . .. ... 1 ... 1 1111 . 110 111 . @vidup
VDWDUP 1110 1110 0 . .. ... 1 ... 1 1111 . 110 ... . @viwdup
}
# multiply-add long dual accumulate
# rdahi: bits [3:1] from insn, bit 0 is 1
# rdalo: bits [3:1] from insn, bit 0 is 0
%rdahi 20:3 !function=times_2_plus_1
%rdalo 13:3 !function=times_2
# size bit is 0 for 16 bit, 1 for 32 bit
%size_16 16:1 !function=plus_1
&vmlaldav rdahi rdalo size qn qm x a
&vmladav rda size qn qm x a
@vmlaldav .... .... . ... ... . ... x:1 .... .. a:1 . qm:3 . \
qn=%qn rdahi=%rdahi rdalo=%rdalo size=%size_16 &vmlaldav
@vmlaldav_nosz .... .... . ... ... . ... x:1 .... .. a:1 . qm:3 . \
qn=%qn rdahi=%rdahi rdalo=%rdalo size=0 &vmlaldav
@vmladav .... .... .... ... . ... x:1 .... . . a:1 . qm:3 . \
qn=%qn rda=%rdalo size=%size_16 &vmladav
@vmladav_nosz .... .... .... ... . ... x:1 .... . . a:1 . qm:3 . \
qn=%qn rda=%rdalo size=0 &vmladav
{
VMLADAV_S 1110 1110 1111 ... . ... . 1110 . 0 . 0 ... 0 @vmladav
VMLALDAV_S 1110 1110 1 ... ... . ... . 1110 . 0 . 0 ... 0 @vmlaldav
}
{
VMLADAV_U 1111 1110 1111 ... . ... . 1110 . 0 . 0 ... 0 @vmladav
VMLALDAV_U 1111 1110 1 ... ... . ... . 1110 . 0 . 0 ... 0 @vmlaldav
}
{
VMLSDAV 1110 1110 1111 ... . ... . 1110 . 0 . 0 ... 1 @vmladav
VMLSLDAV 1110 1110 1 ... ... . ... . 1110 . 0 . 0 ... 1 @vmlaldav
}
{
VMLSDAV 1111 1110 1111 ... 0 ... . 1110 . 0 . 0 ... 1 @vmladav_nosz
VRMLSLDAVH 1111 1110 1 ... ... 0 ... . 1110 . 0 . 0 ... 1 @vmlaldav_nosz
}
VMLADAV_S 1110 1110 1111 ... 0 ... . 1111 . 0 . 0 ... 1 @vmladav_nosz
VMLADAV_U 1111 1110 1111 ... 0 ... . 1111 . 0 . 0 ... 1 @vmladav_nosz
{
[
VMAXNMAV 1110 1110 1110 11 00 .... 1111 0 0 . 0 ... 0 @vmaxnmv size=2
VMINNMAV 1110 1110 1110 11 00 .... 1111 1 0 . 0 ... 0 @vmaxnmv size=2
VMAXNMV 1110 1110 1110 11 10 .... 1111 0 0 . 0 ... 0 @vmaxnmv size=2
VMINNMV 1110 1110 1110 11 10 .... 1111 1 0 . 0 ... 0 @vmaxnmv size=2
]
[
VMAXV_S 1110 1110 1110 .. 10 .... 1111 0 0 . 0 ... 0 @vmaxv
VMINV_S 1110 1110 1110 .. 10 .... 1111 1 0 . 0 ... 0 @vmaxv
VMAXAV 1110 1110 1110 .. 00 .... 1111 0 0 . 0 ... 0 @vmaxv
VMINAV 1110 1110 1110 .. 00 .... 1111 1 0 . 0 ... 0 @vmaxv
]
VMLADAV_S 1110 1110 1111 ... 0 ... . 1111 . 0 . 0 ... 0 @vmladav_nosz
VRMLALDAVH_S 1110 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz
}
{
[
VMAXNMAV 1111 1110 1110 11 00 .... 1111 0 0 . 0 ... 0 @vmaxnmv size=1
VMINNMAV 1111 1110 1110 11 00 .... 1111 1 0 . 0 ... 0 @vmaxnmv size=1
VMAXNMV 1111 1110 1110 11 10 .... 1111 0 0 . 0 ... 0 @vmaxnmv size=1
VMINNMV 1111 1110 1110 11 10 .... 1111 1 0 . 0 ... 0 @vmaxnmv size=1
]
[
VMAXV_U 1111 1110 1110 .. 10 .... 1111 0 0 . 0 ... 0 @vmaxv
VMINV_U 1111 1110 1110 .. 10 .... 1111 1 0 . 0 ... 0 @vmaxv
]
VMLADAV_U 1111 1110 1111 ... 0 ... . 1111 . 0 . 0 ... 0 @vmladav_nosz
VRMLALDAVH_U 1111 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz
}
# Scalar operations
{
VCMPEQ_fp_scalar 1110 1110 0 . 11 ... 1 ... 0 1111 0100 .... @vcmp_fp_scalar size=2
VCMPNE_fp_scalar 1110 1110 0 . 11 ... 1 ... 0 1111 1100 .... @vcmp_fp_scalar size=2
VADD_scalar 1110 1110 0 . .. ... 1 ... 0 1111 . 100 .... @2scalar
}
{
VCMPLT_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111 1100 .... @vcmp_fp_scalar size=2
VCMPGE_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111 0100 .... @vcmp_fp_scalar size=2
VSUB_scalar 1110 1110 0 . .. ... 1 ... 1 1111 . 100 .... @2scalar
}
{
VSHL_S_scalar 1110 1110 0 . 11 .. 01 ... 1 1110 0110 .... @shl_scalar
VRSHL_S_scalar 1110 1110 0 . 11 .. 11 ... 1 1110 0110 .... @shl_scalar
VQSHL_S_scalar 1110 1110 0 . 11 .. 01 ... 1 1110 1110 .... @shl_scalar
VQRSHL_S_scalar 1110 1110 0 . 11 .. 11 ... 1 1110 1110 .... @shl_scalar
VMUL_scalar 1110 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
}
{
VSHL_U_scalar 1111 1110 0 . 11 .. 01 ... 1 1110 0110 .... @shl_scalar
VRSHL_U_scalar 1111 1110 0 . 11 .. 11 ... 1 1110 0110 .... @shl_scalar
VQSHL_U_scalar 1111 1110 0 . 11 .. 01 ... 1 1110 1110 .... @shl_scalar
VQRSHL_U_scalar 1111 1110 0 . 11 .. 11 ... 1 1110 1110 .... @shl_scalar
VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
}
{
VADD_fp_scalar 111 . 1110 0 . 11 ... 0 ... 0 1111 . 100 .... @2op_fp_scalar
VHADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
VHADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
}
{
VSUB_fp_scalar 111 . 1110 0 . 11 ... 0 ... 1 1111 . 100 .... @2op_fp_scalar
VHSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
VHSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
}
{
VQADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
VQADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
VQDMULLB_scalar 111 . 1110 0 . 11 ... 0 ... 0 1111 . 110 .... @2scalar_nosz \
size=%size_28
}
{
VQSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
VQSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
VQDMULLT_scalar 111 . 1110 0 . 11 ... 0 ... 1 1111 . 110 .... @2scalar_nosz \
size=%size_28
}
{
VMUL_fp_scalar 111 . 1110 0 . 11 ... 1 ... 0 1110 . 110 .... @2op_fp_scalar
VQDMULH_scalar 1110 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
VQRDMULH_scalar 1111 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
}
{
VFMA_scalar 111 . 1110 0 . 11 ... 1 ... 0 1110 . 100 .... @2op_fp_scalar
# The U bit (28) is don't-care because it does not affect the result
VMLA 111 - 1110 0 . .. ... 1 ... 0 1110 . 100 .... @2scalar
}
{
VFMAS_scalar 111 . 1110 0 . 11 ... 1 ... 1 1110 . 100 .... @2op_fp_scalar
# The U bit (28) is don't-care because it does not affect the result
VMLAS 111 - 1110 0 . .. ... 1 ... 1 1110 . 100 .... @2scalar
}
VQRDMLAH 1110 1110 0 . .. ... 0 ... 0 1110 . 100 .... @2scalar
VQRDMLASH 1110 1110 0 . .. ... 0 ... 1 1110 . 100 .... @2scalar
VQDMLAH 1110 1110 0 . .. ... 0 ... 0 1110 . 110 .... @2scalar
VQDMLASH 1110 1110 0 . .. ... 0 ... 1 1110 . 110 .... @2scalar
# Vector add across vector
{
VADDV 111 u:1 1110 1111 size:2 01 ... 0 1111 0 0 a:1 0 qm:3 0 rda=%rdalo
VADDLV 111 u:1 1110 1 ... 1001 ... 0 1111 00 a:1 0 qm:3 0 \
rdahi=%rdahi rdalo=%rdalo
}
@vabav .... .... .. size:2 .... rda:4 .... .... .... &vabav qn=%qn qm=%qm
VABAV_S 111 0 1110 10 .. ... 0 .... 1111 . 0 . 0 ... 1 @vabav
VABAV_U 111 1 1110 10 .. ... 0 .... 1111 . 0 . 0 ... 1 @vabav
# Logical immediate operations (1 reg and modified-immediate)
# The cmode/op bits here decode VORR/VBIC/VMOV/VMVN, but
# not in a way we can conveniently represent in decodetree without
# a lot of repetition:
# VORR: op=0, (cmode & 1) && cmode < 12
# VBIC: op=1, (cmode & 1) && cmode < 12
# VMOV: everything else
# So we have a single decode line and check the cmode/op in the
# trans function.
Vimm_1r 111 . 1111 1 . 00 0 ... ... 0 .... 0 1 . 1 .... @1imm
# Shifts by immediate
VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b
VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h
VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w
VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b
VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h
VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w
VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b
VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h
VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w
VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_b
VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_h
VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_w
VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b
VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h
VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w
VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b
VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h
VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w
VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b
VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h
VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b
VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h
VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
# VSHLL T1 encoding; the T2 VSHLL encoding is elsewhere in this file
# Note that VMOVL is encoded as "VSHLL with a zero shift count"; we
# implement it that way rather than special-casing it in the decode.
VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b
VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h
VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b
VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h
VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
# Shift-and-insert
VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_b
VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_h
VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_w
VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b
VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h
VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w
# Narrowing shifts (which only support b and h sizes)
VSHRNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b
VSHRNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h
VSHRNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b
VSHRNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h
VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b
VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h
VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b
VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h
VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b
VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h
VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b
VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h
VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b
VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h
VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b
VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h
VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b
VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h
VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b
VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h
VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b
VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h
VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b
VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h
VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b
VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h
VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b
VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h
VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b
VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h
VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b
VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h
VSHLC 111 0 1110 1 . 1 imm:5 ... 0 1111 1100 rdm:4 qd=%qd
# Comparisons. We expand out the conditions which are split across
# encodings T1, T2, T3 and the fc bits. These include VPT, which is
# effectively "VCMP then VPST". A plain "VCMP" has a mask field of zero.
{
VCMPEQ_fp 111 . 1110 0 . 11 ... 1 ... 0 1111 0 0 . 0 ... 0 @vcmp_fp
VCMPEQ 111 1 1110 0 . .. ... 1 ... 0 1111 0 0 . 0 ... 0 @vcmp
}
{
VCMPNE_fp 111 . 1110 0 . 11 ... 1 ... 0 1111 1 0 . 0 ... 0 @vcmp_fp
VCMPNE 111 1 1110 0 . .. ... 1 ... 0 1111 1 0 . 0 ... 0 @vcmp
}
{
VCMPGE_fp 111 . 1110 0 . 11 ... 1 ... 1 1111 0 0 . 0 ... 0 @vcmp_fp
VCMPGE 111 1 1110 0 . .. ... 1 ... 1 1111 0 0 . 0 ... 0 @vcmp
}
{
VCMPLT_fp 111 . 1110 0 . 11 ... 1 ... 1 1111 1 0 . 0 ... 0 @vcmp_fp
VCMPLT 111 1 1110 0 . .. ... 1 ... 1 1111 1 0 . 0 ... 0 @vcmp
}
{
VCMPGT_fp 111 . 1110 0 . 11 ... 1 ... 1 1111 0 0 . 0 ... 1 @vcmp_fp
VCMPGT 111 1 1110 0 . .. ... 1 ... 1 1111 0 0 . 0 ... 1 @vcmp
}
{
VCMPLE_fp 111 . 1110 0 . 11 ... 1 ... 1 1111 1 0 . 0 ... 1 @vcmp_fp
VCMPLE 1111 1110 0 . .. ... 1 ... 1 1111 1 0 . 0 ... 1 @vcmp
}
{
VPSEL 1111 1110 0 . 11 ... 1 ... 0 1111 . 0 . 0 ... 1 @2op_nosz
VCMPCS 1111 1110 0 . .. ... 1 ... 0 1111 0 0 . 0 ... 1 @vcmp
VCMPHI 1111 1110 0 . .. ... 1 ... 0 1111 1 0 . 0 ... 1 @vcmp
}
{
VPNOT 1111 1110 0 0 11 000 1 000 0 1111 0100 1101
VPST 1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13
VCMPEQ_fp_scalar 1111 1110 0 . 11 ... 1 ... 0 1111 0100 .... @vcmp_fp_scalar size=1
VCMPEQ_scalar 1111 1110 0 . .. ... 1 ... 0 1111 0100 .... @vcmp_scalar
}
{
VCMPNE_fp_scalar 1111 1110 0 . 11 ... 1 ... 0 1111 1100 .... @vcmp_fp_scalar size=1
VCMPNE_scalar 1111 1110 0 . .. ... 1 ... 0 1111 1100 .... @vcmp_scalar
}
{
VCMPGT_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 0110 .... @vcmp_fp_scalar size=1
VCMPGT_scalar 1111 1110 0 . .. ... 1 ... 1 1111 0110 .... @vcmp_scalar
}
{
VCMPLE_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 1110 .... @vcmp_fp_scalar size=1
VCMPLE_scalar 1111 1110 0 . .. ... 1 ... 1 1111 1110 .... @vcmp_scalar
}
{
VCMPGE_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 0100 .... @vcmp_fp_scalar size=1
VCMPGE_scalar 1111 1110 0 . .. ... 1 ... 1 1111 0100 .... @vcmp_scalar
}
{
VCMPLT_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 1100 .... @vcmp_fp_scalar size=1
VCMPLT_scalar 1111 1110 0 . .. ... 1 ... 1 1111 1100 .... @vcmp_scalar
}
VCMPCS_scalar 1111 1110 0 . .. ... 1 ... 0 1111 0 1 1 0 .... @vcmp_scalar
VCMPHI_scalar 1111 1110 0 . .. ... 1 ... 0 1111 1 1 1 0 .... @vcmp_scalar
# 2-operand FP
VADD_fp 1110 1111 0 . 0 . ... 0 ... 0 1101 . 1 . 0 ... 0 @2op_fp
VSUB_fp 1110 1111 0 . 1 . ... 0 ... 0 1101 . 1 . 0 ... 0 @2op_fp
VMUL_fp 1111 1111 0 . 0 . ... 0 ... 0 1101 . 1 . 1 ... 0 @2op_fp
VABD_fp 1111 1111 0 . 1 . ... 0 ... 0 1101 . 1 . 0 ... 0 @2op_fp
VMAXNM 1111 1111 0 . 0 . ... 0 ... 0 1111 . 1 . 1 ... 0 @2op_fp
VMINNM 1111 1111 0 . 1 . ... 0 ... 0 1111 . 1 . 1 ... 0 @2op_fp
VCADD90_fp 1111 1100 1 . 0 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
VCADD270_fp 1111 1101 1 . 0 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
VFMA 1110 1111 0 . 0 . ... 0 ... 0 1100 . 1 . 1 ... 0 @2op_fp
VFMS 1110 1111 0 . 1 . ... 0 ... 0 1100 . 1 . 1 ... 0 @2op_fp
VCMLA0 1111 110 00 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
VCMLA90 1111 110 01 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
VCMLA180 1111 110 10 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
VCMLA270 1111 110 11 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
# floating-point <-> fixed-point conversions. Naming convention:
# VCVT_<from><to>, S = signed int, U = unsigned int, H = halfprec, F = singleprec
@vcvt .... .... .. 1 ..... .... .. 1 . .... .... &2shift \
qd=%qd qm=%qm shift=%rshift_i5 size=2
@vcvt_f16 .... .... .. 11 .... .... .. 0 . .... .... &2shift \
qd=%qd qm=%qm shift=%rshift_i4 size=1
VCVT_SH_fixed 1110 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt_f16
VCVT_UH_fixed 1111 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt_f16
VCVT_HS_fixed 1110 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt_f16
VCVT_HU_fixed 1111 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt_f16
VCVT_SF_fixed 1110 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt
VCVT_UF_fixed 1111 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt
VCVT_FS_fixed 1110 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt
VCVT_FU_fixed 1111 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt
# VCVT between floating point and integer (halfprec and single);
# VCVT_<from><to>, S = signed int, U = unsigned int, F = float
VCVT_SF 1111 1111 1 . 11 .. 11 ... 0 011 00 1 . 0 ... 0 @1op
VCVT_UF 1111 1111 1 . 11 .. 11 ... 0 011 01 1 . 0 ... 0 @1op
VCVT_FS 1111 1111 1 . 11 .. 11 ... 0 011 10 1 . 0 ... 0 @1op
VCVT_FU 1111 1111 1 . 11 .. 11 ... 0 011 11 1 . 0 ... 0 @1op
# VCVT from floating point to integer with specified rounding mode
VCVTAS 1111 1111 1 . 11 .. 11 ... 000 00 0 1 . 0 ... 0 @1op
VCVTAU 1111 1111 1 . 11 .. 11 ... 000 00 1 1 . 0 ... 0 @1op
VCVTNS 1111 1111 1 . 11 .. 11 ... 000 01 0 1 . 0 ... 0 @1op
VCVTNU 1111 1111 1 . 11 .. 11 ... 000 01 1 1 . 0 ... 0 @1op
VCVTPS 1111 1111 1 . 11 .. 11 ... 000 10 0 1 . 0 ... 0 @1op
VCVTPU 1111 1111 1 . 11 .. 11 ... 000 10 1 1 . 0 ... 0 @1op
VCVTMS 1111 1111 1 . 11 .. 11 ... 000 11 0 1 . 0 ... 0 @1op
VCVTMU 1111 1111 1 . 11 .. 11 ... 000 11 1 1 . 0 ... 0 @1op
VRINTN 1111 1111 1 . 11 .. 10 ... 001 000 1 . 0 ... 0 @1op
VRINTX 1111 1111 1 . 11 .. 10 ... 001 001 1 . 0 ... 0 @1op
VRINTA 1111 1111 1 . 11 .. 10 ... 001 010 1 . 0 ... 0 @1op
VRINTZ 1111 1111 1 . 11 .. 10 ... 001 011 1 . 0 ... 0 @1op
VRINTM 1111 1111 1 . 11 .. 10 ... 001 101 1 . 0 ... 0 @1op
VRINTP 1111 1111 1 . 11 .. 10 ... 001 111 1 . 0 ... 0 @1op

View file

@ -0,0 +1,646 @@
# AArch32 Neon data-processing instruction descriptions
#
# Copyright (c) 2020 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# VFP/Neon register fields; same as vfp.decode
%vm_dp 5:1 0:4
%vn_dp 7:1 16:4
%vd_dp 22:1 12:4
# Encodings for Neon data processing instructions where the T32 encoding
# is a simple transformation of the A32 encoding.
# More specifically, this file covers instructions where the A32 encoding is
# 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
# and the T32 encoding is
# 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
# This file works on the A32 encoding only; calling code for T32 has to
# transform the insn into the A32 version first.
######################################################################
# 3-reg-same grouping:
# 1111 001 U 0 D sz:2 Vn:4 Vd:4 opc:4 N Q M op Vm:4
######################################################################
&3same vm vn vd q size
@3same .... ... . . . size:2 .... .... .... . q:1 . . .... \
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
@3same_q0 .... ... . . . size:2 .... .... .... . 0 . . .... \
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
# For FP insns the high bit of 'size' is used as part of opcode decode,
# and the 'size' bit is 0 for 32-bit float and 1 for 16-bit float.
# This converts this encoding to the same MO_8/16/32/64 values that the
# integer neon insns use.
%3same_fp_size 20:1 !function=neon_3same_fp_size
@3same_fp .... ... . . . . . .... .... .... . q:1 . . .... \
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=%3same_fp_size
@3same_fp_q0 .... ... . . . . . .... .... .... . 0 . . .... \
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0 size=%3same_fp_size
VHADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
VQADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 1 .... @3same
VRHADD_S_3s 1111 001 0 0 . .. .... .... 0001 . . . 0 .... @3same
VRHADD_U_3s 1111 001 1 0 . .. .... .... 0001 . . . 0 .... @3same
@3same_logic .... ... . . . .. .... .... .... . q:1 .. .... \
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0
VAND_3s 1111 001 0 0 . 00 .... .... 0001 ... 1 .... @3same_logic
VBIC_3s 1111 001 0 0 . 01 .... .... 0001 ... 1 .... @3same_logic
VORR_3s 1111 001 0 0 . 10 .... .... 0001 ... 1 .... @3same_logic
VORN_3s 1111 001 0 0 . 11 .... .... 0001 ... 1 .... @3same_logic
VEOR_3s 1111 001 1 0 . 00 .... .... 0001 ... 1 .... @3same_logic
VBSL_3s 1111 001 1 0 . 01 .... .... 0001 ... 1 .... @3same_logic
VBIT_3s 1111 001 1 0 . 10 .... .... 0001 ... 1 .... @3same_logic
VBIF_3s 1111 001 1 0 . 11 .... .... 0001 ... 1 .... @3same_logic
VHSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 0 .... @3same
VHSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 0 .... @3same
VQSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 1 .... @3same
VQSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 1 .... @3same
VCGT_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 0 .... @3same
VCGT_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 0 .... @3same
VCGE_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 1 .... @3same
VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same
# The _rev suffix indicates that Vn and Vm are reversed. This is
# the case for shifts. In the Arm ARM these insns are documented
# with the Vm and Vn fields in their usual places, but in the
# assembly the operands are listed "backwards", ie in the order
# Dd, Dm, Dn where other insns use Dd, Dn, Dm. For QEMU we choose
# to consider Vm and Vn as being in different fields in the insn,
# which allows us to avoid special-casing shifts in the trans_
# function code. We would otherwise need to manually swap the operands
# over to call Neon helper functions that are shared with AArch64,
# which does not have this odd reversed-operand situation.
@3same_rev .... ... . . . size:2 .... .... .... . q:1 . . .... \
&3same vn=%vm_dp vm=%vn_dp vd=%vd_dp
VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same_rev
VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same_rev
# Insns operating on 64-bit elements (size!=0b11 handled elsewhere)
# The _rev suffix indicates that Vn and Vm are reversed (as explained
# by the comment for the @3same_rev format).
@3same_64_rev .... ... . . . 11 .... .... .... . q:1 . . .... \
&3same vm=%vn_dp vn=%vm_dp vd=%vd_dp size=3
{
VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
VQSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_rev
}
{
VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
VQSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_rev
}
{
VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
VRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_rev
}
{
VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
VRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_rev
}
{
VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
VQRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_rev
}
{
VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
VQRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_rev
}
VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same
VMIN_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 1 .... @3same
VABD_S_3s 1111 001 0 0 . .. .... .... 0111 . . . 0 .... @3same
VABD_U_3s 1111 001 1 0 . .. .... .... 0111 . . . 0 .... @3same
VABA_S_3s 1111 001 0 0 . .. .... .... 0111 . . . 1 .... @3same
VABA_U_3s 1111 001 1 0 . .. .... .... 0111 . . . 1 .... @3same
VADD_3s 1111 001 0 0 . .. .... .... 1000 . . . 0 .... @3same
VSUB_3s 1111 001 1 0 . .. .... .... 1000 . . . 0 .... @3same
VTST_3s 1111 001 0 0 . .. .... .... 1000 . . . 1 .... @3same
VCEQ_3s 1111 001 1 0 . .. .... .... 1000 . . . 1 .... @3same
VMLA_3s 1111 001 0 0 . .. .... .... 1001 . . . 0 .... @3same
VMLS_3s 1111 001 1 0 . .. .... .... 1001 . . . 0 .... @3same
VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
VPMAX_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 0 .... @3same_q0
VPMAX_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 0 .... @3same_q0
VPMIN_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 1 .... @3same_q0
VPMIN_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 1 .... @3same_q0
VQDMULH_3s 1111 001 0 0 . .. .... .... 1011 . . . 0 .... @3same
VQRDMULH_3s 1111 001 1 0 . .. .... .... 1011 . . . 0 .... @3same
VPADD_3s 1111 001 0 0 . .. .... .... 1011 . . . 1 .... @3same_q0
VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
@3same_crypto .... .... .... .... .... .... .... .... \
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0 q=1
SHA1C_3s 1111 001 0 0 . 00 .... .... 1100 . 1 . 0 .... @3same_crypto
SHA1P_3s 1111 001 0 0 . 01 .... .... 1100 . 1 . 0 .... @3same_crypto
SHA1M_3s 1111 001 0 0 . 10 .... .... 1100 . 1 . 0 .... @3same_crypto
SHA1SU0_3s 1111 001 0 0 . 11 .... .... 1100 . 1 . 0 .... @3same_crypto
SHA256H_3s 1111 001 1 0 . 00 .... .... 1100 . 1 . 0 .... @3same_crypto
SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... @3same_crypto
SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... @3same_crypto
VFMA_fp_3s 1111 001 0 0 . 0 . .... .... 1100 ... 1 .... @3same_fp
VFMS_fp_3s 1111 001 0 0 . 1 . .... .... 1100 ... 1 .... @3same_fp
VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
VSUB_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
VPADD_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 0 .... @3same_fp_q0
VABD_fp_3s 1111 001 1 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
VMLA_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
VMLS_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 1 .... @3same_fp
VMUL_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
VCEQ_fp_3s 1111 001 0 0 . 0 . .... .... 1110 ... 0 .... @3same_fp
VCGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 0 .... @3same_fp
VACGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 1 .... @3same_fp
VCGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 0 .... @3same_fp
VACGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 1 .... @3same_fp
VMAX_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 0 .... @3same_fp
VMIN_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 0 .... @3same_fp
VPMAX_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 0 .... @3same_fp_q0
VPMIN_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 0 .... @3same_fp_q0
VRECPS_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 1 .... @3same_fp
VRSQRTS_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
VMAXNM_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 1 .... @3same_fp
VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
######################################################################
# 2-reg-and-shift grouping:
# 1111 001 U 1 D immH:3 immL:3 Vd:4 opc:4 L Q M 1 Vm:4
######################################################################
&2reg_shift vm vd q shift size
# Right shifts are encoded as N - shift, where N is the element size in bits.
%neon_rshift_i6 16:6 !function=rsub_64
%neon_rshift_i5 16:5 !function=rsub_32
%neon_rshift_i4 16:4 !function=rsub_16
%neon_rshift_i3 16:3 !function=rsub_8
@2reg_shr_d .... ... . . . ...... .... .... 1 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=3 shift=%neon_rshift_i6
@2reg_shr_s .... ... . . . 1 ..... .... .... 0 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=2 shift=%neon_rshift_i5
@2reg_shr_h .... ... . . . 01 .... .... .... 0 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=1 shift=%neon_rshift_i4
@2reg_shr_b .... ... . . . 001 ... .... .... 0 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=0 shift=%neon_rshift_i3
@2reg_shl_d .... ... . . . shift:6 .... .... 1 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=3
@2reg_shl_s .... ... . . . 1 shift:5 .... .... 0 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=2
@2reg_shl_h .... ... . . . 01 shift:4 .... .... 0 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=1
@2reg_shl_b .... ... . . . 001 shift:3 .... .... 0 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=0
# Narrowing right shifts: here the Q bit is part of the opcode decode
@2reg_shrn_d .... ... . . . 1 ..... .... .... 0 . . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=3 q=0 \
shift=%neon_rshift_i5
@2reg_shrn_s .... ... . . . 01 .... .... .... 0 . . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=2 q=0 \
shift=%neon_rshift_i4
@2reg_shrn_h .... ... . . . 001 ... .... .... 0 . . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0 \
shift=%neon_rshift_i3
# Long left shifts: again Q is part of opcode decode
@2reg_shll_s .... ... . . . 1 shift:5 .... .... 0 . . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=2 q=0
@2reg_shll_h .... ... . . . 01 shift:4 .... .... 0 . . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0
@2reg_shll_b .... ... . . . 001 shift:3 .... .... 0 . . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=0 q=0
@2reg_vcvt .... ... . . . 1 ..... .... .... . q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=2 shift=%neon_rshift_i5
@2reg_vcvt_f16 .... ... . . . 11 .... .... .... . q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=1 shift=%neon_rshift_i4
VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d
VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s
VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h
VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_b
VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d
VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s
VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h
VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_b
VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_d
VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_s
VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_h
VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_b
VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_d
VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_s
VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_h
VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_b
VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_d
VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_s
VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_h
VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_b
VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_d
VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_s
VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_h
VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_b
VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_d
VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_s
VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_h
VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_b
VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_d
VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_s
VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_h
VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_b
VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_d
VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_s
VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_h
VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_b
VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_d
VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s
VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h
VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_b
VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_d
VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s
VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h
VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_b
VQSHLU_64_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_d
VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_s
VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_h
VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_b
VQSHL_S_64_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s
VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h
VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b
VQSHL_U_64_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s
VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h
VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b
VSHRN_64_2sh 1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_d
VSHRN_32_2sh 1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_s
VSHRN_16_2sh 1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h
VRSHRN_64_2sh 1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d
VRSHRN_32_2sh 1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s
VRSHRN_16_2sh 1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h
VQSHRUN_64_2sh 1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_d
VQSHRUN_32_2sh 1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_s
VQSHRUN_16_2sh 1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h
VQRSHRUN_64_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d
VQRSHRUN_32_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s
VQRSHRUN_16_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h
# VQSHRN with signed input
VQSHRN_S64_2sh 1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_d
VQSHRN_S32_2sh 1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_s
VQSHRN_S16_2sh 1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_h
# VQRSHRN with signed input
VQRSHRN_S64_2sh 1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_d
VQRSHRN_S32_2sh 1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_s
VQRSHRN_S16_2sh 1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_h
# VQSHRN with unsigned input
VQSHRN_U64_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_d
VQSHRN_U32_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_s
VQSHRN_U16_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_h
# VQRSHRN with unsigned input
VQRSHRN_U64_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_d
VQRSHRN_U32_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_s
VQRSHRN_U16_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_h
VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_s
VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h
VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b
VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_s
VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h
VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b
# VCVT fixed<->float conversions
VCVT_SH_2sh 1111 001 0 1 . ...... .... 1100 0 . . 1 .... @2reg_vcvt_f16
VCVT_UH_2sh 1111 001 1 1 . ...... .... 1100 0 . . 1 .... @2reg_vcvt_f16
VCVT_HS_2sh 1111 001 0 1 . ...... .... 1101 0 . . 1 .... @2reg_vcvt_f16
VCVT_HU_2sh 1111 001 1 1 . ...... .... 1101 0 . . 1 .... @2reg_vcvt_f16
VCVT_SF_2sh 1111 001 0 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt
VCVT_UF_2sh 1111 001 1 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt
VCVT_FS_2sh 1111 001 0 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt
VCVT_FU_2sh 1111 001 1 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt
######################################################################
# 1-reg-and-modified-immediate grouping:
# 1111 001 i 1 D 000 imm:3 Vd:4 cmode:4 0 Q op 1 Vm:4
######################################################################
&1reg_imm vd q imm cmode op
%asimd_imm_value 24:1 16:3 0:4
@1reg_imm .... ... . . . ... ... .... .... . q:1 . . .... \
&1reg_imm imm=%asimd_imm_value vd=%vd_dp
# The cmode/op bits here decode VORR/VBIC/VMOV/VMNV, but
# not in a way we can conveniently represent in decodetree without
# a lot of repetition:
# VORR: op=0, (cmode & 1) && cmode < 12
# VBIC: op=1, (cmode & 1) && cmode < 12
# VMOV: everything else
# So we have a single decode line and check the cmode/op in the
# trans function.
Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
######################################################################
# Within the "two registers, or three registers of different lengths"
# grouping ([23,4]=0b10), bits [21:20] are either part of the opcode
# decode: 0b11 for VEXT, two-reg-misc, VTBL, and duplicate-scalar;
# or they are a size field for the three-reg-different-lengths and
# two-reg-and-scalar insn groups (where size cannot be 0b11). This
# is slightly awkward for decodetree: we handle it with this
# non-exclusive group which contains within it two exclusive groups:
# one for the size=0b11 patterns, and one for the size-not-0b11
# patterns. This allows us to check that none of the insns within
# each subgroup accidentally overlap each other. Note that all the
# trans functions for the size-not-0b11 patterns must check and
# return false for size==3.
######################################################################
{
[
##################################################################
# Miscellaneous size=0b11 insns
##################################################################
VEXT 1111 001 0 1 . 11 .... .... imm:4 . q:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VTBL 1111 001 1 1 . 11 .... .... 10 len:2 . op:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VDUP_scalar 1111 001 1 1 . 11 index:3 1 .... 11 000 q:1 . 0 .... \
vm=%vm_dp vd=%vd_dp size=0
VDUP_scalar 1111 001 1 1 . 11 index:2 10 .... 11 000 q:1 . 0 .... \
vm=%vm_dp vd=%vd_dp size=1
VDUP_scalar 1111 001 1 1 . 11 index:1 100 .... 11 000 q:1 . 0 .... \
vm=%vm_dp vd=%vd_dp size=2
##################################################################
# 2-reg-misc grouping:
# 1111 001 11 D 11 size:2 opc1:2 Vd:4 0 opc2:4 q:1 M 0 Vm:4
##################################################################
&2misc vd vm q size
@2misc .... ... .. . .. size:2 .. .... . .... q:1 . . .... \
&2misc vm=%vm_dp vd=%vd_dp
@2misc_q0 .... ... .. . .. size:2 .. .... . .... . . . .... \
&2misc vm=%vm_dp vd=%vd_dp q=0
@2misc_q1 .... ... .. . .. size:2 .. .... . .... . . . .... \
&2misc vm=%vm_dp vd=%vd_dp q=1
VREV64 1111 001 11 . 11 .. 00 .... 0 0000 . . 0 .... @2misc
VREV32 1111 001 11 . 11 .. 00 .... 0 0001 . . 0 .... @2misc
VREV16 1111 001 11 . 11 .. 00 .... 0 0010 . . 0 .... @2misc
VPADDL_S 1111 001 11 . 11 .. 00 .... 0 0100 . . 0 .... @2misc
VPADDL_U 1111 001 11 . 11 .. 00 .... 0 0101 . . 0 .... @2misc
AESE 1111 001 11 . 11 .. 00 .... 0 0110 0 . 0 .... @2misc_q1
AESD 1111 001 11 . 11 .. 00 .... 0 0110 1 . 0 .... @2misc_q1
AESMC 1111 001 11 . 11 .. 00 .... 0 0111 0 . 0 .... @2misc_q1
AESIMC 1111 001 11 . 11 .. 00 .... 0 0111 1 . 0 .... @2misc_q1
VCLS 1111 001 11 . 11 .. 00 .... 0 1000 . . 0 .... @2misc
VCLZ 1111 001 11 . 11 .. 00 .... 0 1001 . . 0 .... @2misc
VCNT 1111 001 11 . 11 .. 00 .... 0 1010 . . 0 .... @2misc
VMVN 1111 001 11 . 11 .. 00 .... 0 1011 . . 0 .... @2misc
VPADAL_S 1111 001 11 . 11 .. 00 .... 0 1100 . . 0 .... @2misc
VPADAL_U 1111 001 11 . 11 .. 00 .... 0 1101 . . 0 .... @2misc
VQABS 1111 001 11 . 11 .. 00 .... 0 1110 . . 0 .... @2misc
VQNEG 1111 001 11 . 11 .. 00 .... 0 1111 . . 0 .... @2misc
VCGT0 1111 001 11 . 11 .. 01 .... 0 0000 . . 0 .... @2misc
VCGE0 1111 001 11 . 11 .. 01 .... 0 0001 . . 0 .... @2misc
VCEQ0 1111 001 11 . 11 .. 01 .... 0 0010 . . 0 .... @2misc
VCLE0 1111 001 11 . 11 .. 01 .... 0 0011 . . 0 .... @2misc
VCLT0 1111 001 11 . 11 .. 01 .... 0 0100 . . 0 .... @2misc
SHA1H 1111 001 11 . 11 .. 01 .... 0 0101 1 . 0 .... @2misc_q1
VABS 1111 001 11 . 11 .. 01 .... 0 0110 . . 0 .... @2misc
VNEG 1111 001 11 . 11 .. 01 .... 0 0111 . . 0 .... @2misc
VCGT0_F 1111 001 11 . 11 .. 01 .... 0 1000 . . 0 .... @2misc
VCGE0_F 1111 001 11 . 11 .. 01 .... 0 1001 . . 0 .... @2misc
VCEQ0_F 1111 001 11 . 11 .. 01 .... 0 1010 . . 0 .... @2misc
VCLE0_F 1111 001 11 . 11 .. 01 .... 0 1011 . . 0 .... @2misc
VCLT0_F 1111 001 11 . 11 .. 01 .... 0 1100 . . 0 .... @2misc
VABS_F 1111 001 11 . 11 .. 01 .... 0 1110 . . 0 .... @2misc
VNEG_F 1111 001 11 . 11 .. 01 .... 0 1111 . . 0 .... @2misc
VSWP 1111 001 11 . 11 .. 10 .... 0 0000 . . 0 .... @2misc
VTRN 1111 001 11 . 11 .. 10 .... 0 0001 . . 0 .... @2misc
VUZP 1111 001 11 . 11 .. 10 .... 0 0010 . . 0 .... @2misc
VZIP 1111 001 11 . 11 .. 10 .... 0 0011 . . 0 .... @2misc
VMOVN 1111 001 11 . 11 .. 10 .... 0 0100 0 . 0 .... @2misc_q0
# VQMOVUN: unsigned result (source is always signed)
VQMOVUN 1111 001 11 . 11 .. 10 .... 0 0100 1 . 0 .... @2misc_q0
# VQMOVN: signed result, source may be signed (_S) or unsigned (_U)
VQMOVN_S 1111 001 11 . 11 .. 10 .... 0 0101 0 . 0 .... @2misc_q0
VQMOVN_U 1111 001 11 . 11 .. 10 .... 0 0101 1 . 0 .... @2misc_q0
VSHLL 1111 001 11 . 11 .. 10 .... 0 0110 0 . 0 .... @2misc_q0
SHA1SU1 1111 001 11 . 11 .. 10 .... 0 0111 0 . 0 .... @2misc_q1
SHA256SU0 1111 001 11 . 11 .. 10 .... 0 0111 1 . 0 .... @2misc_q1
VRINTN 1111 001 11 . 11 .. 10 .... 0 1000 . . 0 .... @2misc
VRINTX 1111 001 11 . 11 .. 10 .... 0 1001 . . 0 .... @2misc
VRINTA 1111 001 11 . 11 .. 10 .... 0 1010 . . 0 .... @2misc
VRINTZ 1111 001 11 . 11 .. 10 .... 0 1011 . . 0 .... @2misc
VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0
VCVT_B16_F32 1111 001 11 . 11 .. 10 .... 0 1100 1 . 0 .... @2misc_q0
VRINTM 1111 001 11 . 11 .. 10 .... 0 1101 . . 0 .... @2misc
VCVT_F32_F16 1111 001 11 . 11 .. 10 .... 0 1110 0 . 0 .... @2misc_q0
VRINTP 1111 001 11 . 11 .. 10 .... 0 1111 . . 0 .... @2misc
VCVTAS 1111 001 11 . 11 .. 11 .... 0 0000 . . 0 .... @2misc
VCVTAU 1111 001 11 . 11 .. 11 .... 0 0001 . . 0 .... @2misc
VCVTNS 1111 001 11 . 11 .. 11 .... 0 0010 . . 0 .... @2misc
VCVTNU 1111 001 11 . 11 .. 11 .... 0 0011 . . 0 .... @2misc
VCVTPS 1111 001 11 . 11 .. 11 .... 0 0100 . . 0 .... @2misc
VCVTPU 1111 001 11 . 11 .. 11 .... 0 0101 . . 0 .... @2misc
VCVTMS 1111 001 11 . 11 .. 11 .... 0 0110 . . 0 .... @2misc
VCVTMU 1111 001 11 . 11 .. 11 .... 0 0111 . . 0 .... @2misc
VRECPE 1111 001 11 . 11 .. 11 .... 0 1000 . . 0 .... @2misc
VRSQRTE 1111 001 11 . 11 .. 11 .... 0 1001 . . 0 .... @2misc
VRECPE_F 1111 001 11 . 11 .. 11 .... 0 1010 . . 0 .... @2misc
VRSQRTE_F 1111 001 11 . 11 .. 11 .... 0 1011 . . 0 .... @2misc
VCVT_FS 1111 001 11 . 11 .. 11 .... 0 1100 . . 0 .... @2misc
VCVT_FU 1111 001 11 . 11 .. 11 .... 0 1101 . . 0 .... @2misc
VCVT_SF 1111 001 11 . 11 .. 11 .... 0 1110 . . 0 .... @2misc
VCVT_UF 1111 001 11 . 11 .. 11 .... 0 1111 . . 0 .... @2misc
]
# Subgroup for size != 0b11
[
##################################################################
# 3-reg-different-length grouping:
# 1111 001 U 1 D sz!=11 Vn:4 Vd:4 opc:4 N 0 M 0 Vm:4
##################################################################
&3diff vm vn vd size
@3diff .... ... . . . size:2 .... .... .... . . . . .... \
&3diff vm=%vm_dp vn=%vn_dp vd=%vd_dp
VADDL_S_3d 1111 001 0 1 . .. .... .... 0000 . 0 . 0 .... @3diff
VADDL_U_3d 1111 001 1 1 . .. .... .... 0000 . 0 . 0 .... @3diff
VADDW_S_3d 1111 001 0 1 . .. .... .... 0001 . 0 . 0 .... @3diff
VADDW_U_3d 1111 001 1 1 . .. .... .... 0001 . 0 . 0 .... @3diff
VSUBL_S_3d 1111 001 0 1 . .. .... .... 0010 . 0 . 0 .... @3diff
VSUBL_U_3d 1111 001 1 1 . .. .... .... 0010 . 0 . 0 .... @3diff
VSUBW_S_3d 1111 001 0 1 . .. .... .... 0011 . 0 . 0 .... @3diff
VSUBW_U_3d 1111 001 1 1 . .. .... .... 0011 . 0 . 0 .... @3diff
VADDHN_3d 1111 001 0 1 . .. .... .... 0100 . 0 . 0 .... @3diff
VRADDHN_3d 1111 001 1 1 . .. .... .... 0100 . 0 . 0 .... @3diff
VABAL_S_3d 1111 001 0 1 . .. .... .... 0101 . 0 . 0 .... @3diff
VABAL_U_3d 1111 001 1 1 . .. .... .... 0101 . 0 . 0 .... @3diff
VSUBHN_3d 1111 001 0 1 . .. .... .... 0110 . 0 . 0 .... @3diff
VRSUBHN_3d 1111 001 1 1 . .. .... .... 0110 . 0 . 0 .... @3diff
VABDL_S_3d 1111 001 0 1 . .. .... .... 0111 . 0 . 0 .... @3diff
VABDL_U_3d 1111 001 1 1 . .. .... .... 0111 . 0 . 0 .... @3diff
VMLAL_S_3d 1111 001 0 1 . .. .... .... 1000 . 0 . 0 .... @3diff
VMLAL_U_3d 1111 001 1 1 . .. .... .... 1000 . 0 . 0 .... @3diff
VQDMLAL_3d 1111 001 0 1 . .. .... .... 1001 . 0 . 0 .... @3diff
VMLSL_S_3d 1111 001 0 1 . .. .... .... 1010 . 0 . 0 .... @3diff
VMLSL_U_3d 1111 001 1 1 . .. .... .... 1010 . 0 . 0 .... @3diff
VQDMLSL_3d 1111 001 0 1 . .. .... .... 1011 . 0 . 0 .... @3diff
VMULL_S_3d 1111 001 0 1 . .. .... .... 1100 . 0 . 0 .... @3diff
VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff
VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff
VMULL_P_3d 1111 001 0 1 . .. .... .... 1110 . 0 . 0 .... @3diff
##################################################################
# 2-regs-plus-scalar grouping:
# 1111 001 Q 1 D sz!=11 Vn:4 Vd:4 opc:4 N 1 M 0 Vm:4
##################################################################
&2scalar vm vn vd size q
@2scalar .... ... q:1 . . size:2 .... .... .... . . . . .... \
&2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
# For the 'long' ops the Q bit is part of insn decode
@2scalar_q0 .... ... . . . size:2 .... .... .... . . . . .... \
&2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
VMLA_F_2sc 1111 001 . 1 . .. .... .... 0001 . 1 . 0 .... @2scalar
VMLAL_S_2sc 1111 001 0 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
VMLAL_U_2sc 1111 001 1 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
VQDMLAL_2sc 1111 001 0 1 . .. .... .... 0011 . 1 . 0 .... @2scalar_q0
VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
VMLS_F_2sc 1111 001 . 1 . .. .... .... 0101 . 1 . 0 .... @2scalar
VMLSL_S_2sc 1111 001 0 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
VMLSL_U_2sc 1111 001 1 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
VQDMLSL_2sc 1111 001 0 1 . .. .... .... 0111 . 1 . 0 .... @2scalar_q0
VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
VMULL_S_2sc 1111 001 0 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
VMULL_U_2sc 1111 001 1 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
VQDMULL_2sc 1111 001 0 1 . .. .... .... 1011 . 1 . 0 .... @2scalar_q0
VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar
VQRDMLAH_2sc 1111 001 . 1 . .. .... .... 1110 . 1 . 0 .... @2scalar
VQRDMLSH_2sc 1111 001 . 1 . .. .... .... 1111 . 1 . 0 .... @2scalar
]
}

View file

@ -0,0 +1,52 @@
# AArch32 Neon load/store instruction descriptions
#
# Copyright (c) 2020 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# Encodings for Neon load/store instructions where the T32 encoding
# is a simple transformation of the A32 encoding.
# More specifically, this file covers instructions where the A32 encoding is
# 0b1111_0100_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx
# and the T32 encoding is
# 0b1111_1001_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx
# This file works on the A32 encoding only; calling code for T32 has to
# transform the insn into the A32 version first.
%vd_dp 22:1 12:4
# Neon load/store multiple structures
VLDST_multiple 1111 0100 0 . l:1 0 rn:4 .... itype:4 size:2 align:2 rm:4 \
vd=%vd_dp
# Neon load single element to all lanes
VLD_all_lanes 1111 0100 1 . 1 0 rn:4 .... 11 n:2 size:2 t:1 a:1 rm:4 \
vd=%vd_dp
# Neon load/store single structure to one lane
%imm1_5_p1 5:1 !function=plus_1
%imm1_6_p1 6:1 !function=plus_1
VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 00 n:2 reg_idx:3 align:1 rm:4 \
vd=%vd_dp size=0 stride=1
VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 01 n:2 reg_idx:2 . align:1 rm:4 \
vd=%vd_dp size=1 stride=%imm1_5_p1
VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 10 n:2 reg_idx:1 . align:2 rm:4 \
vd=%vd_dp size=2 stride=%imm1_6_p1

View file

@ -0,0 +1,99 @@
# AArch32 Neon instruction descriptions
#
# Copyright (c) 2020 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# Encodings for Neon instructions whose encoding is the same for
# both A32 and T32.
# More specifically, this covers:
# 2reg scalar ext: 0b1111_1110_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx
# 3same ext: 0b1111_110x_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx
# VFP/Neon register fields; same as vfp.decode
%vm_dp 5:1 0:4
%vm_sp 0:4 5:1
%vn_dp 7:1 16:4
%vn_sp 16:4 7:1
%vd_dp 22:1 12:4
%vd_sp 12:4 22:1
# For VCMLA/VCADD insns, convert the single-bit size field
# which is 0 for fp16 and 1 for fp32 into a MO_* constant.
# (Note that this is the reverse of the sense of the 1-bit size
# field in the 3same_fp Neon insns.)
%vcadd_size 20:1 !function=plus_1
VCMLA 1111 110 rot:2 . 1 . .... .... 1000 . q:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp size=%vcadd_size
VCADD 1111 110 rot:1 1 . 0 . .... .... 1000 . q:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp size=%vcadd_size
VSDOT 1111 110 00 . 10 .... .... 1101 . q:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VUDOT 1111 110 00 . 10 .... .... 1101 . q:1 . 1 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VUSDOT 1111 110 01 . 10 .... .... 1101 . q:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VDOT_b16 1111 110 00 . 00 .... .... 1101 . q:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
# VFM[AS]L
VFML 1111 110 0 s:1 . 10 .... .... 1000 . 0 . 1 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_dp q=0
VFML 1111 110 0 s:1 . 10 .... .... 1000 . 1 . 1 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp q=1
VSMMLA 1111 1100 0.10 .... .... 1100 .1.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VUMMLA 1111 1100 0.10 .... .... 1100 .1.1 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VUSMMLA 1111 1100 1.10 .... .... 1100 .1.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VMMLA_b16 1111 1100 0.00 .... .... 1100 .1.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VFMA_b16 1111 110 0 0.11 .... .... 1000 . q:1 . 1 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VCMLA_scalar 1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \
vn=%vn_dp vd=%vd_dp size=1
VCMLA_scalar 1111 1110 1 . rot:2 .... .... 1000 . q:1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp size=2 index=0
VSDOT_scalar 1111 1110 0 . 10 .... .... 1101 . q:1 index:1 0 vm:4 \
vn=%vn_dp vd=%vd_dp
VUDOT_scalar 1111 1110 0 . 10 .... .... 1101 . q:1 index:1 1 vm:4 \
vn=%vn_dp vd=%vd_dp
VUSDOT_scalar 1111 1110 1 . 00 .... .... 1101 . q:1 index:1 0 vm:4 \
vn=%vn_dp vd=%vd_dp
VSUDOT_scalar 1111 1110 1 . 00 .... .... 1101 . q:1 index:1 1 vm:4 \
vn=%vn_dp vd=%vd_dp
VDOT_b16_scal 1111 1110 0 . 00 .... .... 1101 . q:1 index:1 0 vm:4 \
vn=%vn_dp vd=%vd_dp
%vfml_scalar_q0_rm 0:3 5:1
%vfml_scalar_q1_index 5:1 3:1
VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 0 . 1 index:1 ... \
rm=%vfml_scalar_q0_rm vn=%vn_sp vd=%vd_dp q=0
VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 1 . 1 . rm:3 \
index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp q=1
VFMA_b16_scal 1111 1110 0.11 .... .... 1000 . q:1 . 1 . vm:3 \
index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp

View file

@ -0,0 +1,60 @@
# AArch64 SME allowed instruction decoding
#
# Copyright (c) 2022 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# These patterns are taken from Appendix E1.1 of DDI0616 A.a,
# Arm Architecture Reference Manual Supplement,
# The Scalable Matrix Extension (SME), for Armv9-A
{
[
OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0]
OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0]
OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0]
OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0]
OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0]
OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0]
OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0]
]
FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations
}
{
[
OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar)
OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16)
OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar)
OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16)
]
FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations
}
FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store
FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions
FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions
# We don't actually need to include these, as the default is OK.
# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations
# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers
# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal)
# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)

88
target/arm/tcg/sme.decode Normal file
View file

@ -0,0 +1,88 @@
# AArch64 SME instruction descriptions
#
# Copyright (c) 2022 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
### SME Misc
ZERO 11000000 00 001 00000000000 imm:8
### SME Move into/from Array
%mova_rs 13:2 !function=plus_12
&mova esz rs pg zr za_imm v:bool to_vec:bool
MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \
&mova to_vec=0 rs=%mova_rs
MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \
&mova to_vec=0 rs=%mova_rs esz=4
MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
&mova to_vec=1 rs=%mova_rs
MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
&mova to_vec=1 rs=%mova_rs esz=4
### SME Memory
&ldst esz rs pg rn rm za_imm v:bool st:bool
LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
&ldst rs=%mova_rs
LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
&ldst esz=4 rs=%mova_rs
&ldstr rv rn imm
@ldstr ....... ... . ...... .. ... rn:5 . imm:4 \
&ldstr rv=%mova_rs
LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
### SME Add Vector to Array
&adda zad zn pm pn
@adda_32 ........ .. ..... . pm:3 pn:3 zn:5 ... zad:2 &adda
@adda_64 ........ .. ..... . pm:3 pn:3 zn:5 .. zad:3 &adda
ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
### SME Outer Product
&op zad zn zm pm pn sub:bool
@op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op
@op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op
FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32
SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32
USMOPA_s 1010000 1 10 0 ..... ... ... ..... . 00 .. @op_32
UMOPA_s 1010000 1 10 1 ..... ... ... ..... . 00 .. @op_32
SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64
SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64
USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64
UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64

1702
target/arm/tcg/sve.decode Normal file

File diff suppressed because it is too large Load diff

281
target/arm/tcg/t16.decode Normal file
View file

@ -0,0 +1,281 @@
# Thumb1 instructions
#
# Copyright (c) 2019 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
&empty !extern
&s_rrr_shi !extern s rd rn rm shim shty
&s_rrr_shr !extern s rn rd rm rs shty
&s_rri_rot !extern s rn rd imm rot
&s_rrrr !extern s rd rn rm ra
&rrr_rot !extern rd rn rm rot
&rr !extern rd rm
&ri !extern rd imm
&r !extern rm
&i !extern imm
&ldst_rr !extern p w u rn rt rm shimm shtype
&ldst_ri !extern p w u rn rt imm
&ldst_block !extern rn i b u w list
&setend !extern E
&cps !extern mode imod M A I F
&ci !extern cond imm
# Set S if the instruction is outside of an IT block.
%s !function=t16_setflags
# Data-processing (two low registers)
%reg_0 0:3
@lll_noshr ...... .... rm:3 rd:3 \
&s_rrr_shi %s rn=%reg_0 shim=0 shty=0
@xll_noshr ...... .... rm:3 rn:3 \
&s_rrr_shi s=1 rd=0 shim=0 shty=0
@lxl_shr ...... .... rs:3 rd:3 \
&s_rrr_shr %s rm=%reg_0 rn=0
AND_rrri 010000 0000 ... ... @lll_noshr
EOR_rrri 010000 0001 ... ... @lll_noshr
MOV_rxrr 010000 0010 ... ... @lxl_shr shty=0 # LSL
MOV_rxrr 010000 0011 ... ... @lxl_shr shty=1 # LSR
MOV_rxrr 010000 0100 ... ... @lxl_shr shty=2 # ASR
ADC_rrri 010000 0101 ... ... @lll_noshr
SBC_rrri 010000 0110 ... ... @lll_noshr
MOV_rxrr 010000 0111 ... ... @lxl_shr shty=3 # ROR
TST_xrri 010000 1000 ... ... @xll_noshr
RSB_rri 010000 1001 rn:3 rd:3 &s_rri_rot %s imm=0 rot=0
CMP_xrri 010000 1010 ... ... @xll_noshr
CMN_xrri 010000 1011 ... ... @xll_noshr
ORR_rrri 010000 1100 ... ... @lll_noshr
MUL 010000 1101 rn:3 rd:3 &s_rrrr %s rm=%reg_0 ra=0
BIC_rrri 010000 1110 ... ... @lll_noshr
MVN_rxri 010000 1111 ... ... @lll_noshr
# Load/store (register offset)
@ldst_rr ....... rm:3 rn:3 rt:3 \
&ldst_rr p=1 w=0 u=1 shimm=0 shtype=0
STR_rr 0101 000 ... ... ... @ldst_rr
STRH_rr 0101 001 ... ... ... @ldst_rr
STRB_rr 0101 010 ... ... ... @ldst_rr
LDRSB_rr 0101 011 ... ... ... @ldst_rr
LDR_rr 0101 100 ... ... ... @ldst_rr
LDRH_rr 0101 101 ... ... ... @ldst_rr
LDRB_rr 0101 110 ... ... ... @ldst_rr
LDRSH_rr 0101 111 ... ... ... @ldst_rr
# Load/store word/byte (immediate offset)
%imm5_6x4 6:5 !function=times_4
@ldst_ri_1 ..... imm:5 rn:3 rt:3 \
&ldst_ri p=1 w=0 u=1
@ldst_ri_4 ..... ..... rn:3 rt:3 \
&ldst_ri p=1 w=0 u=1 imm=%imm5_6x4
STR_ri 01100 ..... ... ... @ldst_ri_4
LDR_ri 01101 ..... ... ... @ldst_ri_4
STRB_ri 01110 ..... ... ... @ldst_ri_1
LDRB_ri 01111 ..... ... ... @ldst_ri_1
# Load/store halfword (immediate offset)
%imm5_6x2 6:5 !function=times_2
@ldst_ri_2 ..... ..... rn:3 rt:3 \
&ldst_ri p=1 w=0 u=1 imm=%imm5_6x2
STRH_ri 10000 ..... ... ... @ldst_ri_2
LDRH_ri 10001 ..... ... ... @ldst_ri_2
# Load/store (SP-relative)
%imm8_0x4 0:8 !function=times_4
@ldst_spec_i ..... rt:3 ........ \
&ldst_ri p=1 w=0 u=1 imm=%imm8_0x4
STR_ri 10010 ... ........ @ldst_spec_i rn=13
LDR_ri 10011 ... ........ @ldst_spec_i rn=13
# Load (PC-relative)
LDR_ri 01001 ... ........ @ldst_spec_i rn=15
# Add PC/SP (immediate)
ADR 10100 rd:3 ........ imm=%imm8_0x4
ADD_rri 10101 rd:3 ........ \
&s_rri_rot rn=13 s=0 rot=0 imm=%imm8_0x4 # SP
# Load/store multiple
@ldstm ..... rn:3 list:8 &ldst_block i=1 b=0 u=0 w=1
STM 11000 ... ........ @ldstm
LDM_t16 11001 ... ........ @ldstm
# Shift (immediate)
@shift_i ..... shim:5 rm:3 rd:3 &s_rrr_shi %s rn=%reg_0
MOV_rxri 000 00 ..... ... ... @shift_i shty=0 # LSL
MOV_rxri 000 01 ..... ... ... @shift_i shty=1 # LSR
MOV_rxri 000 10 ..... ... ... @shift_i shty=2 # ASR
# Add/subtract (three low registers)
@addsub_3 ....... rm:3 rn:3 rd:3 \
&s_rrr_shi %s shim=0 shty=0
ADD_rrri 0001100 ... ... ... @addsub_3
SUB_rrri 0001101 ... ... ... @addsub_3
# Add/subtract (two low registers and immediate)
@addsub_2i ....... imm:3 rn:3 rd:3 \
&s_rri_rot %s rot=0
ADD_rri 0001 110 ... ... ... @addsub_2i
SUB_rri 0001 111 ... ... ... @addsub_2i
# Add, subtract, compare, move (one low register and immediate)
%reg_8 8:3
@arith_1i ..... rd:3 imm:8 \
&s_rri_rot rot=0 rn=%reg_8
MOV_rxi 00100 ... ........ @arith_1i %s
CMP_xri 00101 ... ........ @arith_1i s=1
ADD_rri 00110 ... ........ @arith_1i %s
SUB_rri 00111 ... ........ @arith_1i %s
# Add, compare, move (two high registers)
%reg_0_7 7:1 0:3
@addsub_2h .... .... . rm:4 ... \
&s_rrr_shi rd=%reg_0_7 rn=%reg_0_7 shim=0 shty=0
ADD_rrri 0100 0100 . .... ... @addsub_2h s=0
CMP_xrri 0100 0101 . .... ... @addsub_2h s=1
MOV_rxri 0100 0110 . .... ... @addsub_2h s=0
# Adjust SP (immediate)
%imm7_0x4 0:7 !function=times_4
@addsub_sp_i .... .... . ....... \
&s_rri_rot s=0 rd=13 rn=13 rot=0 imm=%imm7_0x4
ADD_rri 1011 0000 0 ....... @addsub_sp_i
SUB_rri 1011 0000 1 ....... @addsub_sp_i
# Branch and exchange
@branchr .... .... . rm:4 ... &r
BX 0100 0111 0 .... 000 @branchr
BLX_r 0100 0111 1 .... 000 @branchr
BXNS 0100 0111 0 .... 100 @branchr
BLXNS 0100 0111 1 .... 100 @branchr
# Extend
@extend .... .... .. rm:3 rd:3 &rrr_rot rn=15 rot=0
SXTAH 1011 0010 00 ... ... @extend
SXTAB 1011 0010 01 ... ... @extend
UXTAH 1011 0010 10 ... ... @extend
UXTAB 1011 0010 11 ... ... @extend
# Change processor state
%imod 4:1 !function=plus_2
SETEND 1011 0110 010 1 E:1 000 &setend
{
CPS 1011 0110 011 . 0 A:1 I:1 F:1 &cps mode=0 M=0 %imod
CPS_v7m 1011 0110 011 im:1 00 I:1 F:1
}
# Reverse bytes
@rdm .... .... .. rm:3 rd:3 &rr
REV 1011 1010 00 ... ... @rdm
REV16 1011 1010 01 ... ... @rdm
REVSH 1011 1010 11 ... ... @rdm
# Hints
{
{
YIELD 1011 1111 0001 0000
WFE 1011 1111 0010 0000
WFI 1011 1111 0011 0000
# TODO: Implement SEV, SEVL; may help SMP performance.
# SEV 1011 1111 0100 0000
# SEVL 1011 1111 0101 0000
# The canonical nop has the second nibble as 0000, but the whole of the
# rest of the space is a reserved hint, behaves as nop.
NOP 1011 1111 ---- 0000
}
IT 1011 1111 cond_mask:8
}
# Miscellaneous 16-bit instructions
%imm6_9_3 9:1 3:5 !function=times_2
HLT 1011 1010 10 imm:6 &i
BKPT 1011 1110 imm:8 &i
CBZ 1011 nz:1 0.1 ..... rn:3 imm=%imm6_9_3
# Push and Pop
%push_list 0:9 !function=t16_push_list
%pop_list 0:9 !function=t16_pop_list
STM 1011 010 ......... \
&ldst_block i=0 b=1 u=0 w=1 rn=13 list=%push_list
LDM_t16 1011 110 ......... \
&ldst_block i=1 b=0 u=0 w=1 rn=13 list=%pop_list
# Conditional branches, Supervisor call
%imm8_0x2 0:s8 !function=times_2
{
UDF 1101 1110 ---- ----
SVC 1101 1111 imm:8 &i
B_cond_thumb 1101 cond:4 ........ &ci imm=%imm8_0x2
}
# Unconditional Branch
%imm11_0x2 0:s11 !function=times_2
B 11100 ........... &i imm=%imm11_0x2
# thumb_insn_is_16bit() ensures we won't be decoding these as
# T16 instructions for a Thumb2 CPU, so these patterns must be
# a Thumb1 split BL/BLX.
BLX_suffix 11101 imm:11 &i
BL_BLX_prefix 11110 imm:s11 &i
BL_suffix 11111 imm:11 &i

753
target/arm/tcg/t32.decode Normal file
View file

@ -0,0 +1,753 @@
# Thumb2 instructions
#
# Copyright (c) 2019 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
&empty !extern
&s_rrr_shi !extern s rd rn rm shim shty
&s_rrr_shr !extern s rn rd rm rs shty
&s_rri_rot !extern s rn rd imm rot
&s_rrrr !extern s rd rn rm ra
&rrrr !extern rd rn rm ra
&rrr_rot !extern rd rn rm rot
&rrr !extern rd rn rm
&rr !extern rd rm
&ri !extern rd imm
&r !extern rm
&i !extern imm
&msr_reg !extern rn r mask
&mrs_reg !extern rd r
&msr_bank !extern rn r sysm
&mrs_bank !extern rd r sysm
&ldst_rr !extern p w u rn rt rm shimm shtype
&ldst_ri !extern p w u rn rt imm
&ldst_block !extern rn i b u w list
&strex !extern rn rd rt rt2 imm
&ldrex !extern rn rt rt2 imm
&bfx !extern rd rn lsb widthm1
&bfi !extern rd rn lsb msb
&sat !extern rd rn satimm imm sh
&pkh !extern rd rn rm imm tb
&cps !extern mode imod M A I F
&mcr !extern cp opc1 crn crm opc2 rt
&mcrr !extern cp opc1 crm rt rt2
&mve_shl_ri rdalo rdahi shim
&mve_shl_rr rdalo rdahi rm
&mve_sh_ri rda shim
&mve_sh_rr rda rm
# rdahi: bits [3:1] from insn, bit 0 is 1
# rdalo: bits [3:1] from insn, bit 0 is 0
%rdahi_9 9:3 !function=times_2_plus_1
%rdalo_17 17:3 !function=times_2
# Data-processing (register)
%imm5_12_6 12:3 6:2
@s_rrr_shi ....... .... s:1 rn:4 .... rd:4 .. shty:2 rm:4 \
&s_rrr_shi shim=%imm5_12_6
@s_rxr_shi ....... .... s:1 .... .... rd:4 .. shty:2 rm:4 \
&s_rrr_shi shim=%imm5_12_6 rn=0
@S_xrr_shi ....... .... . rn:4 .... .... .. shty:2 rm:4 \
&s_rrr_shi shim=%imm5_12_6 s=1 rd=0
@mve_shl_ri ....... .... . ... . . ... ... . .. .. .... \
&mve_shl_ri shim=%imm5_12_6 rdalo=%rdalo_17 rdahi=%rdahi_9
@mve_shl_rr ....... .... . ... . rm:4 ... . .. .. .... \
&mve_shl_rr rdalo=%rdalo_17 rdahi=%rdahi_9
@mve_sh_ri ....... .... . rda:4 . ... ... . .. .. .... \
&mve_sh_ri shim=%imm5_12_6
@mve_sh_rr ....... .... . rda:4 rm:4 .... .... .... &mve_sh_rr
{
TST_xrri 1110101 0000 1 .... 0 ... 1111 .... .... @S_xrr_shi
AND_rrri 1110101 0000 . .... 0 ... .... .... .... @s_rrr_shi
}
BIC_rrri 1110101 0001 . .... 0 ... .... .... .... @s_rrr_shi
{
# The v8.1M MVE shift insns overlap in encoding with MOVS/ORRS
# and are distinguished by having Rm==13 or 15. Those are UNPREDICTABLE
# cases for MOVS/ORRS. We decode the MVE cases first, ensuring that
# they explicitly call unallocated_encoding() for cases that must UNDEF
# (eg "using a new shift insn on a v8.1M CPU without MVE"), and letting
# the rest fall through (where ORR_rrri and MOV_rxri will end up
# handling them as r13 and r15 accesses with the same semantics as A32).
[
{
UQSHL_ri 1110101 0010 1 .... 0 ... 1111 .. 00 1111 @mve_sh_ri
LSLL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 00 1111 @mve_shl_ri
UQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 00 1111 @mve_shl_ri
}
{
URSHR_ri 1110101 0010 1 .... 0 ... 1111 .. 01 1111 @mve_sh_ri
LSRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 01 1111 @mve_shl_ri
URSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 01 1111 @mve_shl_ri
}
{
SRSHR_ri 1110101 0010 1 .... 0 ... 1111 .. 10 1111 @mve_sh_ri
ASRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 10 1111 @mve_shl_ri
SRSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 10 1111 @mve_shl_ri
}
{
SQSHL_ri 1110101 0010 1 .... 0 ... 1111 .. 11 1111 @mve_sh_ri
SQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111 @mve_shl_ri
}
{
UQRSHL_rr 1110101 0010 1 .... .... 1111 0000 1101 @mve_sh_rr
LSLL_rr 1110101 0010 1 ... 0 .... ... 1 0000 1101 @mve_shl_rr
UQRSHLL64_rr 1110101 0010 1 ... 1 .... ... 1 0000 1101 @mve_shl_rr
}
{
SQRSHR_rr 1110101 0010 1 .... .... 1111 0010 1101 @mve_sh_rr
ASRL_rr 1110101 0010 1 ... 0 .... ... 1 0010 1101 @mve_shl_rr
SQRSHRL64_rr 1110101 0010 1 ... 1 .... ... 1 0010 1101 @mve_shl_rr
}
UQRSHLL48_rr 1110101 0010 1 ... 1 .... ... 1 1000 1101 @mve_shl_rr
SQRSHRL48_rr 1110101 0010 1 ... 1 .... ... 1 1010 1101 @mve_shl_rr
]
MOV_rxri 1110101 0010 . 1111 0 ... .... .... .... @s_rxr_shi
ORR_rrri 1110101 0010 . .... 0 ... .... .... .... @s_rrr_shi
# v8.1M CSEL and friends
CSEL 1110101 0010 1 rn:4 10 op:2 rd:4 fcond:4 rm:4
}
{
MVN_rxri 1110101 0011 . 1111 0 ... .... .... .... @s_rxr_shi
ORN_rrri 1110101 0011 . .... 0 ... .... .... .... @s_rrr_shi
}
{
TEQ_xrri 1110101 0100 1 .... 0 ... 1111 .... .... @S_xrr_shi
EOR_rrri 1110101 0100 . .... 0 ... .... .... .... @s_rrr_shi
}
PKH 1110101 0110 0 rn:4 0 ... rd:4 .. tb:1 0 rm:4 \
&pkh imm=%imm5_12_6
{
CMN_xrri 1110101 1000 1 .... 0 ... 1111 .... .... @S_xrr_shi
ADD_rrri 1110101 1000 . .... 0 ... .... .... .... @s_rrr_shi
}
ADC_rrri 1110101 1010 . .... 0 ... .... .... .... @s_rrr_shi
SBC_rrri 1110101 1011 . .... 0 ... .... .... .... @s_rrr_shi
{
CMP_xrri 1110101 1101 1 .... 0 ... 1111 .... .... @S_xrr_shi
SUB_rrri 1110101 1101 . .... 0 ... .... .... .... @s_rrr_shi
}
RSB_rrri 1110101 1110 . .... 0 ... .... .... .... @s_rrr_shi
# Data-processing (register-shifted register)
MOV_rxrr 1111 1010 0 shty:2 s:1 rm:4 1111 rd:4 0000 rs:4 \
&s_rrr_shr rn=0
# Data-processing (immediate)
%t32extrot 26:1 12:3 0:8 !function=t32_expandimm_rot
%t32extimm 26:1 12:3 0:8 !function=t32_expandimm_imm
@s_rri_rot ....... .... s:1 rn:4 . ... rd:4 ........ \
&s_rri_rot imm=%t32extimm rot=%t32extrot
@s_rxi_rot ....... .... s:1 .... . ... rd:4 ........ \
&s_rri_rot imm=%t32extimm rot=%t32extrot rn=0
@S_xri_rot ....... .... . rn:4 . ... .... ........ \
&s_rri_rot imm=%t32extimm rot=%t32extrot s=1 rd=0
{
TST_xri 1111 0.0 0000 1 .... 0 ... 1111 ........ @S_xri_rot
AND_rri 1111 0.0 0000 . .... 0 ... .... ........ @s_rri_rot
}
BIC_rri 1111 0.0 0001 . .... 0 ... .... ........ @s_rri_rot
{
MOV_rxi 1111 0.0 0010 . 1111 0 ... .... ........ @s_rxi_rot
ORR_rri 1111 0.0 0010 . .... 0 ... .... ........ @s_rri_rot
}
{
MVN_rxi 1111 0.0 0011 . 1111 0 ... .... ........ @s_rxi_rot
ORN_rri 1111 0.0 0011 . .... 0 ... .... ........ @s_rri_rot
}
{
TEQ_xri 1111 0.0 0100 1 .... 0 ... 1111 ........ @S_xri_rot
EOR_rri 1111 0.0 0100 . .... 0 ... .... ........ @s_rri_rot
}
{
CMN_xri 1111 0.0 1000 1 .... 0 ... 1111 ........ @S_xri_rot
ADD_rri 1111 0.0 1000 . .... 0 ... .... ........ @s_rri_rot
}
ADC_rri 1111 0.0 1010 . .... 0 ... .... ........ @s_rri_rot
SBC_rri 1111 0.0 1011 . .... 0 ... .... ........ @s_rri_rot
{
CMP_xri 1111 0.0 1101 1 .... 0 ... 1111 ........ @S_xri_rot
SUB_rri 1111 0.0 1101 . .... 0 ... .... ........ @s_rri_rot
}
RSB_rri 1111 0.0 1110 . .... 0 ... .... ........ @s_rri_rot
# Data processing (plain binary immediate)
%imm12_26_12_0 26:1 12:3 0:8
%neg12_26_12_0 26:1 12:3 0:8 !function=negate
@s0_rri_12 .... ... .... . rn:4 . ... rd:4 ........ \
&s_rri_rot imm=%imm12_26_12_0 rot=0 s=0
{
ADR 1111 0.1 0000 0 1111 0 ... rd:4 ........ \
&ri imm=%imm12_26_12_0
ADD_rri 1111 0.1 0000 0 .... 0 ... .... ........ @s0_rri_12
}
{
ADR 1111 0.1 0101 0 1111 0 ... rd:4 ........ \
&ri imm=%neg12_26_12_0
SUB_rri 1111 0.1 0101 0 .... 0 ... .... ........ @s0_rri_12
}
# Move Wide
%imm16_26_16_12_0 16:4 26:1 12:3 0:8
@mov16 .... .... .... .... .... rd:4 .... .... \
&ri imm=%imm16_26_16_12_0
MOVW 1111 0.10 0100 .... 0 ... .... ........ @mov16
MOVT 1111 0.10 1100 .... 0 ... .... ........ @mov16
# Saturate, bitfield
@sat .... .... .. sh:1 . rn:4 . ... rd:4 .. . satimm:5 \
&sat imm=%imm5_12_6
@sat16 .... .... .. . . rn:4 . ... rd:4 .. . satimm:5 \
&sat sh=0 imm=0
{
SSAT16 1111 0011 001 0 .... 0 000 .... 00 0 ..... @sat16
SSAT 1111 0011 00. 0 .... 0 ... .... .. 0 ..... @sat
}
{
USAT16 1111 0011 101 0 .... 0 000 .... 00 0 ..... @sat16
USAT 1111 0011 10. 0 .... 0 ... .... .. 0 ..... @sat
}
@bfx .... .... ... . rn:4 . ... rd:4 .. . widthm1:5 \
&bfx lsb=%imm5_12_6
@bfi .... .... ... . rn:4 . ... rd:4 .. . msb:5 \
&bfi lsb=%imm5_12_6
SBFX 1111 0011 010 0 .... 0 ... .... ..0..... @bfx
UBFX 1111 0011 110 0 .... 0 ... .... ..0..... @bfx
# bfc is bfi w/ rn=15
BFCI 1111 0011 011 0 .... 0 ... .... ..0..... @bfi
# Multiply and multiply accumulate
@s0_rnadm .... .... .... rn:4 ra:4 rd:4 .... rm:4 &s_rrrr s=0
@s0_rn0dm .... .... .... rn:4 .... rd:4 .... rm:4 &s_rrrr ra=0 s=0
@rnadm .... .... .... rn:4 ra:4 rd:4 .... rm:4 &rrrr
@rn0dm .... .... .... rn:4 .... rd:4 .... rm:4 &rrrr ra=0
@rndm .... .... .... rn:4 .... rd:4 .... rm:4 &rrr
@rdm .... .... .... .... .... rd:4 .... rm:4 &rr
{
MUL 1111 1011 0000 .... 1111 .... 0000 .... @s0_rn0dm
MLA 1111 1011 0000 .... .... .... 0000 .... @s0_rnadm
}
MLS 1111 1011 0000 .... .... .... 0001 .... @rnadm
SMULL 1111 1011 1000 .... .... .... 0000 .... @s0_rnadm
UMULL 1111 1011 1010 .... .... .... 0000 .... @s0_rnadm
SMLAL 1111 1011 1100 .... .... .... 0000 .... @s0_rnadm
UMLAL 1111 1011 1110 .... .... .... 0000 .... @s0_rnadm
UMAAL 1111 1011 1110 .... .... .... 0110 .... @rnadm
{
SMULWB 1111 1011 0011 .... 1111 .... 0000 .... @rn0dm
SMLAWB 1111 1011 0011 .... .... .... 0000 .... @rnadm
}
{
SMULWT 1111 1011 0011 .... 1111 .... 0001 .... @rn0dm
SMLAWT 1111 1011 0011 .... .... .... 0001 .... @rnadm
}
{
SMULBB 1111 1011 0001 .... 1111 .... 0000 .... @rn0dm
SMLABB 1111 1011 0001 .... .... .... 0000 .... @rnadm
}
{
SMULBT 1111 1011 0001 .... 1111 .... 0001 .... @rn0dm
SMLABT 1111 1011 0001 .... .... .... 0001 .... @rnadm
}
{
SMULTB 1111 1011 0001 .... 1111 .... 0010 .... @rn0dm
SMLATB 1111 1011 0001 .... .... .... 0010 .... @rnadm
}
{
SMULTT 1111 1011 0001 .... 1111 .... 0011 .... @rn0dm
SMLATT 1111 1011 0001 .... .... .... 0011 .... @rnadm
}
SMLALBB 1111 1011 1100 .... .... .... 1000 .... @rnadm
SMLALBT 1111 1011 1100 .... .... .... 1001 .... @rnadm
SMLALTB 1111 1011 1100 .... .... .... 1010 .... @rnadm
SMLALTT 1111 1011 1100 .... .... .... 1011 .... @rnadm
# usad8 is usada8 w/ ra=15
USADA8 1111 1011 0111 .... .... .... 0000 .... @rnadm
SMLAD 1111 1011 0010 .... .... .... 0000 .... @rnadm
SMLADX 1111 1011 0010 .... .... .... 0001 .... @rnadm
SMLSD 1111 1011 0100 .... .... .... 0000 .... @rnadm
SMLSDX 1111 1011 0100 .... .... .... 0001 .... @rnadm
SMLALD 1111 1011 1100 .... .... .... 1100 .... @rnadm
SMLALDX 1111 1011 1100 .... .... .... 1101 .... @rnadm
SMLSLD 1111 1011 1101 .... .... .... 1100 .... @rnadm
SMLSLDX 1111 1011 1101 .... .... .... 1101 .... @rnadm
SMMLA 1111 1011 0101 .... .... .... 0000 .... @rnadm
SMMLAR 1111 1011 0101 .... .... .... 0001 .... @rnadm
SMMLS 1111 1011 0110 .... .... .... 0000 .... @rnadm
SMMLSR 1111 1011 0110 .... .... .... 0001 .... @rnadm
SDIV 1111 1011 1001 .... 1111 .... 1111 .... @rndm
UDIV 1111 1011 1011 .... 1111 .... 1111 .... @rndm
# Data-processing (two source registers)
QADD 1111 1010 1000 .... 1111 .... 1000 .... @rndm
QSUB 1111 1010 1000 .... 1111 .... 1010 .... @rndm
QDADD 1111 1010 1000 .... 1111 .... 1001 .... @rndm
QDSUB 1111 1010 1000 .... 1111 .... 1011 .... @rndm
CRC32B 1111 1010 1100 .... 1111 .... 1000 .... @rndm
CRC32H 1111 1010 1100 .... 1111 .... 1001 .... @rndm
CRC32W 1111 1010 1100 .... 1111 .... 1010 .... @rndm
CRC32CB 1111 1010 1101 .... 1111 .... 1000 .... @rndm
CRC32CH 1111 1010 1101 .... 1111 .... 1001 .... @rndm
CRC32CW 1111 1010 1101 .... 1111 .... 1010 .... @rndm
SEL 1111 1010 1010 .... 1111 .... 1000 .... @rndm
# Note rn != rm is CONSTRAINED UNPREDICTABLE; we choose to ignore rn.
REV 1111 1010 1001 ---- 1111 .... 1000 .... @rdm
REV16 1111 1010 1001 ---- 1111 .... 1001 .... @rdm
RBIT 1111 1010 1001 ---- 1111 .... 1010 .... @rdm
REVSH 1111 1010 1001 ---- 1111 .... 1011 .... @rdm
CLZ 1111 1010 1011 ---- 1111 .... 1000 .... @rdm
# Branches and miscellaneous control
%msr_sysm 4:1 8:4
%mrs_sysm 4:1 16:4
%imm16_16_0 16:4 0:12
%imm21 26:s1 11:1 13:1 16:6 0:11 !function=times_2
&ci cond imm
{
# Group insn[25:23] = 111, which is cond=111x for the branch below,
# or unconditional, which would be illegal for the branch.
[
# Hints, and CPS
{
[
YIELD 1111 0011 1010 1111 1000 0000 0000 0001
WFE 1111 0011 1010 1111 1000 0000 0000 0010
WFI 1111 0011 1010 1111 1000 0000 0000 0011
# TODO: Implement SEV, SEVL; may help SMP performance.
# SEV 1111 0011 1010 1111 1000 0000 0000 0100
# SEVL 1111 0011 1010 1111 1000 0000 0000 0101
ESB 1111 0011 1010 1111 1000 0000 0001 0000
]
# The canonical nop ends in 0000 0000, but the whole rest
# of the space is "reserved hint, behaves as nop".
NOP 1111 0011 1010 1111 1000 0000 ---- ----
# If imod == '00' && M == '0' then SEE "Hint instructions", above.
CPS 1111 0011 1010 1111 1000 0 imod:2 M:1 A:1 I:1 F:1 mode:5 \
&cps
}
# Miscellaneous control
CLREX 1111 0011 1011 1111 1000 1111 0010 1111
DSB 1111 0011 1011 1111 1000 1111 0100 ----
DMB 1111 0011 1011 1111 1000 1111 0101 ----
ISB 1111 0011 1011 1111 1000 1111 0110 ----
SB 1111 0011 1011 1111 1000 1111 0111 0000
# Note that the v7m insn overlaps both the normal and banked insn.
{
MRS_bank 1111 0011 111 r:1 .... 1000 rd:4 001. 0000 \
&mrs_bank sysm=%mrs_sysm
MRS_reg 1111 0011 111 r:1 1111 1000 rd:4 0000 0000 &mrs_reg
MRS_v7m 1111 0011 111 0 1111 1000 rd:4 sysm:8
}
{
MSR_bank 1111 0011 100 r:1 rn:4 1000 .... 001. 0000 \
&msr_bank sysm=%msr_sysm
MSR_reg 1111 0011 100 r:1 rn:4 1000 mask:4 0000 0000 &msr_reg
MSR_v7m 1111 0011 100 0 rn:4 1000 mask:2 00 sysm:8
}
BXJ 1111 0011 1100 rm:4 1000 1111 0000 0000 &r
{
# At v6T2, this is the T5 encoding of SUBS PC, LR, #IMM, and works as for
# every other encoding of SUBS. With v7VE, IMM=0 is redefined as ERET.
# The distinction between the two only matters for Hyp mode.
ERET 1111 0011 1101 1110 1000 1111 0000 0000
SUB_rri 1111 0011 1101 1110 1000 1111 imm:8 \
&s_rri_rot rot=0 s=1 rd=15 rn=14
}
SMC 1111 0111 1111 imm:4 1000 0000 0000 0000 &i
HVC 1111 0111 1110 .... 1000 .... .... .... \
&i imm=%imm16_16_0
UDF 1111 0111 1111 ---- 1010 ---- ---- ----
]
B_cond_thumb 1111 0. cond:4 ...... 10.0 ............ &ci imm=%imm21
}
# Load/store (register, immediate, literal)
@ldst_rr .... .... .... rn:4 rt:4 ...... shimm:2 rm:4 \
&ldst_rr p=1 w=0 u=1 shtype=0
@ldst_ri_idx .... .... .... rn:4 rt:4 . p:1 u:1 . imm:8 \
&ldst_ri w=1
@ldst_ri_neg .... .... .... rn:4 rt:4 .... imm:8 \
&ldst_ri p=1 w=0 u=0
@ldst_ri_unp .... .... .... rn:4 rt:4 .... imm:8 \
&ldst_ri p=1 w=0 u=1
@ldst_ri_pos .... .... .... rn:4 rt:4 imm:12 \
&ldst_ri p=1 w=0 u=1
@ldst_ri_lit .... .... u:1 ... .... rt:4 imm:12 \
&ldst_ri p=1 w=0 rn=15
STRB_rr 1111 1000 0000 .... .... 000000 .. .... @ldst_rr
STRB_ri 1111 1000 0000 .... .... 1..1 ........ @ldst_ri_idx
STRB_ri 1111 1000 0000 .... .... 1100 ........ @ldst_ri_neg
STRBT_ri 1111 1000 0000 .... .... 1110 ........ @ldst_ri_unp
STRB_ri 1111 1000 1000 .... .... ............ @ldst_ri_pos
STRH_rr 1111 1000 0010 .... .... 000000 .. .... @ldst_rr
STRH_ri 1111 1000 0010 .... .... 1..1 ........ @ldst_ri_idx
STRH_ri 1111 1000 0010 .... .... 1100 ........ @ldst_ri_neg
STRHT_ri 1111 1000 0010 .... .... 1110 ........ @ldst_ri_unp
STRH_ri 1111 1000 1010 .... .... ............ @ldst_ri_pos
STR_rr 1111 1000 0100 .... .... 000000 .. .... @ldst_rr
STR_ri 1111 1000 0100 .... .... 1..1 ........ @ldst_ri_idx
STR_ri 1111 1000 0100 .... .... 1100 ........ @ldst_ri_neg
STRT_ri 1111 1000 0100 .... .... 1110 ........ @ldst_ri_unp
STR_ri 1111 1000 1100 .... .... ............ @ldst_ri_pos
# Note that Load, unsigned (literal) overlaps all other load encodings.
{
{
NOP 1111 1000 -001 1111 1111 ------------ # PLD
LDRB_ri 1111 1000 .001 1111 .... ............ @ldst_ri_lit
}
{
NOP 1111 1000 1001 ---- 1111 ------------ # PLD
LDRB_ri 1111 1000 1001 .... .... ............ @ldst_ri_pos
}
LDRB_ri 1111 1000 0001 .... .... 1..1 ........ @ldst_ri_idx
{
NOP 1111 1000 0001 ---- 1111 1100 -------- # PLD
LDRB_ri 1111 1000 0001 .... .... 1100 ........ @ldst_ri_neg
}
LDRBT_ri 1111 1000 0001 .... .... 1110 ........ @ldst_ri_unp
{
NOP 1111 1000 0001 ---- 1111 000000 -- ---- # PLD
LDRB_rr 1111 1000 0001 .... .... 000000 .. .... @ldst_rr
}
}
{
{
NOP 1111 1000 -011 1111 1111 ------------ # PLD
LDRH_ri 1111 1000 .011 1111 .... ............ @ldst_ri_lit
}
{
NOP 1111 1000 1011 ---- 1111 ------------ # PLDW
LDRH_ri 1111 1000 1011 .... .... ............ @ldst_ri_pos
}
LDRH_ri 1111 1000 0011 .... .... 1..1 ........ @ldst_ri_idx
{
NOP 1111 1000 0011 ---- 1111 1100 -------- # PLDW
LDRH_ri 1111 1000 0011 .... .... 1100 ........ @ldst_ri_neg
}
LDRHT_ri 1111 1000 0011 .... .... 1110 ........ @ldst_ri_unp
{
NOP 1111 1000 0011 ---- 1111 000000 -- ---- # PLDW
LDRH_rr 1111 1000 0011 .... .... 000000 .. .... @ldst_rr
}
}
{
LDR_ri 1111 1000 .101 1111 .... ............ @ldst_ri_lit
LDR_ri 1111 1000 1101 .... .... ............ @ldst_ri_pos
LDR_ri 1111 1000 0101 .... .... 1..1 ........ @ldst_ri_idx
LDR_ri 1111 1000 0101 .... .... 1100 ........ @ldst_ri_neg
LDRT_ri 1111 1000 0101 .... .... 1110 ........ @ldst_ri_unp
LDR_rr 1111 1000 0101 .... .... 000000 .. .... @ldst_rr
}
# NOPs here are PLI.
{
{
NOP 1111 1001 -001 1111 1111 ------------
LDRSB_ri 1111 1001 .001 1111 .... ............ @ldst_ri_lit
}
{
NOP 1111 1001 1001 ---- 1111 ------------
LDRSB_ri 1111 1001 1001 .... .... ............ @ldst_ri_pos
}
LDRSB_ri 1111 1001 0001 .... .... 1..1 ........ @ldst_ri_idx
{
NOP 1111 1001 0001 ---- 1111 1100 --------
LDRSB_ri 1111 1001 0001 .... .... 1100 ........ @ldst_ri_neg
}
LDRSBT_ri 1111 1001 0001 .... .... 1110 ........ @ldst_ri_unp
{
NOP 1111 1001 0001 ---- 1111 000000 -- ----
LDRSB_rr 1111 1001 0001 .... .... 000000 .. .... @ldst_rr
}
}
# NOPs here are unallocated memory hints, treated as NOP.
{
{
NOP 1111 1001 -011 1111 1111 ------------
LDRSH_ri 1111 1001 .011 1111 .... ............ @ldst_ri_lit
}
{
NOP 1111 1001 1011 ---- 1111 ------------
LDRSH_ri 1111 1001 1011 .... .... ............ @ldst_ri_pos
}
LDRSH_ri 1111 1001 0011 .... .... 1..1 ........ @ldst_ri_idx
{
NOP 1111 1001 0011 ---- 1111 1100 --------
LDRSH_ri 1111 1001 0011 .... .... 1100 ........ @ldst_ri_neg
}
LDRSHT_ri 1111 1001 0011 .... .... 1110 ........ @ldst_ri_unp
{
NOP 1111 1001 0011 ---- 1111 000000 -- ----
LDRSH_rr 1111 1001 0011 .... .... 000000 .. .... @ldst_rr
}
}
%imm8x4 0:8 !function=times_4
&ldst_ri2 p w u rn rt rt2 imm
@ldstd_ri8 .... .... u:1 ... rn:4 rt:4 rt2:4 ........ \
&ldst_ri2 imm=%imm8x4
STRD_ri_t32 1110 1000 .110 .... .... .... ........ @ldstd_ri8 w=1 p=0
LDRD_ri_t32 1110 1000 .111 .... .... .... ........ @ldstd_ri8 w=1 p=0
STRD_ri_t32 1110 1001 .100 .... .... .... ........ @ldstd_ri8 w=0 p=1
LDRD_ri_t32 1110 1001 .101 .... .... .... ........ @ldstd_ri8 w=0 p=1
STRD_ri_t32 1110 1001 .110 .... .... .... ........ @ldstd_ri8 w=1 p=1
{
SG 1110 1001 0111 1111 1110 1001 01111111
LDRD_ri_t32 1110 1001 .111 .... .... .... ........ @ldstd_ri8 w=1 p=1
}
# Load/Store Exclusive, Load-Acquire/Store-Release, and Table Branch
@strex_i .... .... .... rn:4 rt:4 rd:4 .... .... \
&strex rt2=15 imm=%imm8x4
@strex_0 .... .... .... rn:4 rt:4 .... .... rd:4 \
&strex rt2=15 imm=0
@strex_d .... .... .... rn:4 rt:4 rt2:4 .... rd:4 \
&strex imm=0
@ldrex_i .... .... .... rn:4 rt:4 .... .... .... \
&ldrex rt2=15 imm=%imm8x4
@ldrex_0 .... .... .... rn:4 rt:4 .... .... .... \
&ldrex rt2=15 imm=0
@ldrex_d .... .... .... rn:4 rt:4 rt2:4 .... .... \
&ldrex imm=0
{
TT 1110 1000 0100 rn:4 1111 rd:4 A:1 T:1 000000
STREX 1110 1000 0100 .... .... .... .... .... @strex_i
}
STREXB 1110 1000 1100 .... .... 1111 0100 .... @strex_0
STREXH 1110 1000 1100 .... .... 1111 0101 .... @strex_0
STREXD_t32 1110 1000 1100 .... .... .... 0111 .... @strex_d
STLEX 1110 1000 1100 .... .... 1111 1110 .... @strex_0
STLEXB 1110 1000 1100 .... .... 1111 1100 .... @strex_0
STLEXH 1110 1000 1100 .... .... 1111 1101 .... @strex_0
STLEXD_t32 1110 1000 1100 .... .... .... 1111 .... @strex_d
STL 1110 1000 1100 .... .... 1111 1010 1111 @ldrex_0
STLB 1110 1000 1100 .... .... 1111 1000 1111 @ldrex_0
STLH 1110 1000 1100 .... .... 1111 1001 1111 @ldrex_0
LDREX 1110 1000 0101 .... .... 1111 .... .... @ldrex_i
LDREXB 1110 1000 1101 .... .... 1111 0100 1111 @ldrex_0
LDREXH 1110 1000 1101 .... .... 1111 0101 1111 @ldrex_0
LDREXD_t32 1110 1000 1101 .... .... .... 0111 1111 @ldrex_d
LDAEX 1110 1000 1101 .... .... 1111 1110 1111 @ldrex_0
LDAEXB 1110 1000 1101 .... .... 1111 1100 1111 @ldrex_0
LDAEXH 1110 1000 1101 .... .... 1111 1101 1111 @ldrex_0
LDAEXD_t32 1110 1000 1101 .... .... .... 1111 1111 @ldrex_d
LDA 1110 1000 1101 .... .... 1111 1010 1111 @ldrex_0
LDAB 1110 1000 1101 .... .... 1111 1000 1111 @ldrex_0
LDAH 1110 1000 1101 .... .... 1111 1001 1111 @ldrex_0
&tbranch rn rm
@tbranch .... .... .... rn:4 .... .... .... rm:4 &tbranch
TBB 1110 1000 1101 .... 1111 0000 0000 .... @tbranch
TBH 1110 1000 1101 .... 1111 0000 0001 .... @tbranch
# Parallel addition and subtraction
SADD8 1111 1010 1000 .... 1111 .... 0000 .... @rndm
QADD8 1111 1010 1000 .... 1111 .... 0001 .... @rndm
SHADD8 1111 1010 1000 .... 1111 .... 0010 .... @rndm
UADD8 1111 1010 1000 .... 1111 .... 0100 .... @rndm
UQADD8 1111 1010 1000 .... 1111 .... 0101 .... @rndm
UHADD8 1111 1010 1000 .... 1111 .... 0110 .... @rndm
SADD16 1111 1010 1001 .... 1111 .... 0000 .... @rndm
QADD16 1111 1010 1001 .... 1111 .... 0001 .... @rndm
SHADD16 1111 1010 1001 .... 1111 .... 0010 .... @rndm
UADD16 1111 1010 1001 .... 1111 .... 0100 .... @rndm
UQADD16 1111 1010 1001 .... 1111 .... 0101 .... @rndm
UHADD16 1111 1010 1001 .... 1111 .... 0110 .... @rndm
SASX 1111 1010 1010 .... 1111 .... 0000 .... @rndm
QASX 1111 1010 1010 .... 1111 .... 0001 .... @rndm
SHASX 1111 1010 1010 .... 1111 .... 0010 .... @rndm
UASX 1111 1010 1010 .... 1111 .... 0100 .... @rndm
UQASX 1111 1010 1010 .... 1111 .... 0101 .... @rndm
UHASX 1111 1010 1010 .... 1111 .... 0110 .... @rndm
SSUB8 1111 1010 1100 .... 1111 .... 0000 .... @rndm
QSUB8 1111 1010 1100 .... 1111 .... 0001 .... @rndm
SHSUB8 1111 1010 1100 .... 1111 .... 0010 .... @rndm
USUB8 1111 1010 1100 .... 1111 .... 0100 .... @rndm
UQSUB8 1111 1010 1100 .... 1111 .... 0101 .... @rndm
UHSUB8 1111 1010 1100 .... 1111 .... 0110 .... @rndm
SSUB16 1111 1010 1101 .... 1111 .... 0000 .... @rndm
QSUB16 1111 1010 1101 .... 1111 .... 0001 .... @rndm
SHSUB16 1111 1010 1101 .... 1111 .... 0010 .... @rndm
USUB16 1111 1010 1101 .... 1111 .... 0100 .... @rndm
UQSUB16 1111 1010 1101 .... 1111 .... 0101 .... @rndm
UHSUB16 1111 1010 1101 .... 1111 .... 0110 .... @rndm
SSAX 1111 1010 1110 .... 1111 .... 0000 .... @rndm
QSAX 1111 1010 1110 .... 1111 .... 0001 .... @rndm
SHSAX 1111 1010 1110 .... 1111 .... 0010 .... @rndm
USAX 1111 1010 1110 .... 1111 .... 0100 .... @rndm
UQSAX 1111 1010 1110 .... 1111 .... 0101 .... @rndm
UHSAX 1111 1010 1110 .... 1111 .... 0110 .... @rndm
# Register extends
@rrr_rot .... .... .... rn:4 .... rd:4 .. rot:2 rm:4 &rrr_rot
SXTAH 1111 1010 0000 .... 1111 .... 10.. .... @rrr_rot
UXTAH 1111 1010 0001 .... 1111 .... 10.. .... @rrr_rot
SXTAB16 1111 1010 0010 .... 1111 .... 10.. .... @rrr_rot
UXTAB16 1111 1010 0011 .... 1111 .... 10.. .... @rrr_rot
SXTAB 1111 1010 0100 .... 1111 .... 10.. .... @rrr_rot
UXTAB 1111 1010 0101 .... 1111 .... 10.. .... @rrr_rot
# Load/store multiple
@ldstm .... .... .. w:1 . rn:4 list:16 &ldst_block u=0
STM_t32 1110 1000 10.0 .... ................ @ldstm i=1 b=0
STM_t32 1110 1001 00.0 .... ................ @ldstm i=0 b=1
{
# Rn=15 UNDEFs for LDM; M-profile CLRM uses that encoding
CLRM 1110 1000 1001 1111 list:16
LDM_t32 1110 1000 10.1 .... ................ @ldstm i=1 b=0
}
LDM_t32 1110 1001 00.1 .... ................ @ldstm i=0 b=1
&rfe !extern rn w pu
@rfe .... .... .. w:1 . rn:4 ................ &rfe
RFE 1110 1000 00.1 .... 1100000000000000 @rfe pu=2
RFE 1110 1001 10.1 .... 1100000000000000 @rfe pu=1
&srs !extern mode w pu
@srs .... .... .. w:1 . .... ........... mode:5 &srs
SRS 1110 1000 00.0 1101 1100 0000 000. .... @srs pu=2
SRS 1110 1001 10.0 1101 1100 0000 000. .... @srs pu=1
# Coprocessor instructions
# We decode MCR, MCR, MRRC and MCRR only, because for QEMU the
# other coprocessor instructions always UNDEF.
# The trans_ functions for these will ignore cp values 8..13 for v7 or
# earlier, and 0..13 for v8 and later, because those areas of the
# encoding space may be used for other things, such as VFP or Neon.
@mcr .... .... opc1:3 . crn:4 rt:4 cp:4 opc2:3 . crm:4
@mcrr .... .... .... rt2:4 rt:4 cp:4 opc1:4 crm:4
MCRR 1110 1100 0100 .... .... .... .... .... @mcrr
MRRC 1110 1100 0101 .... .... .... .... .... @mcrr
MCR 1110 1110 ... 0 .... .... .... ... 1 .... @mcr
MRC 1110 1110 ... 1 .... .... .... ... 1 .... @mcr
# Branches
%imm24 26:s1 13:1 11:1 16:10 0:11 !function=t32_branch24
@branch24 ................................ &i imm=%imm24
B 1111 0. .......... 10.1 ............ @branch24
BL 1111 0. .......... 11.1 ............ @branch24
{
# BLX_i is non-M-profile only
BLX_i 1111 0. .......... 11.0 ............ @branch24
# M-profile only: loop and branch insns
[
# All these BF insns have boff != 0b0000; we NOP them all
BF 1111 0 boff:4 ------- 1100 - ---------- 1 # BFL
BF 1111 0 boff:4 0 ------ 1110 - ---------- 1 # BFCSEL
BF 1111 0 boff:4 10 ----- 1110 - ---------- 1 # BF
BF 1111 0 boff:4 11 ----- 1110 0 0000000000 1 # BFX, BFLX
]
[
# LE and WLS immediate
%lob_imm 1:10 11:1 !function=times_2
DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 size=4
WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm size=4
{
LE 1111 0 0000 0 f:1 tp:1 1111 1100 . .......... 1 imm=%lob_imm
# This is WLSTP
WLS 1111 0 0000 0 size:2 rn:4 1100 . .......... 1 imm=%lob_imm
}
{
LCTP 1111 0 0000 000 1111 1110 0000 0000 0001
# This is DLSTP
DLS 1111 0 0000 0 size:2 rn:4 1110 0000 0000 0001
}
VCTP 1111 0 0000 0 size:2 rn:4 1110 1000 0000 0001
]
}

15054
target/arm/tcg/translate-a64.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,201 @@
/*
* AArch64 translation, common definitions.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TARGET_ARM_TRANSLATE_A64_H
#define TARGET_ARM_TRANSLATE_A64_H
TCGv_i64 new_tmp_a64(DisasContext *s);
TCGv_i64 new_tmp_a64_local(DisasContext *s);
TCGv_i64 new_tmp_a64_zero(DisasContext *s);
TCGv_i64 cpu_reg(DisasContext *s, int reg);
TCGv_i64 cpu_reg_sp(DisasContext *s, int reg);
TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf);
TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf);
void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
unsigned int imms, unsigned int immr);
bool sve_access_check(DisasContext *s);
bool sme_enabled_check(DisasContext *s);
bool sme_enabled_check_with_svcr(DisasContext *s, unsigned);
/* This function corresponds to CheckStreamingSVEEnabled. */
static inline bool sme_sm_enabled_check(DisasContext *s)
{
return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK);
}
/* This function corresponds to CheckSMEAndZAEnabled. */
static inline bool sme_za_enabled_check(DisasContext *s)
{
return sme_enabled_check_with_svcr(s, R_SVCR_ZA_MASK);
}
/* Note that this function corresponds to CheckStreamingSVEAndZAEnabled. */
static inline bool sme_smza_enabled_check(DisasContext *s)
{
return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK | R_SVCR_ZA_MASK);
}
TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
bool tag_checked, int log2_size);
TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
bool tag_checked, int size);
/* We should have at some point before trying to access an FP register
* done the necessary access check, so assert that
* (a) we did the check and
* (b) we didn't then just plough ahead anyway if it failed.
* Print the instruction pattern in the abort message so we can figure
* out what we need to fix if a user encounters this problem in the wild.
*/
static inline void assert_fp_access_checked(DisasContext *s)
{
#ifdef CONFIG_DEBUG_TCG
if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
fprintf(stderr, "target-arm: FP access check missing for "
"instruction 0x%08x\n", s->insn);
abort();
}
#endif
}
/* Return the offset into CPUARMState of an element of specified
* size, 'element' places in from the least significant end of
* the FP/vector register Qn.
*/
static inline int vec_reg_offset(DisasContext *s, int regno,
int element, MemOp size)
{
int element_size = 1 << size;
int offs = element * element_size;
#if HOST_BIG_ENDIAN
/* This is complicated slightly because vfp.zregs[n].d[0] is
* still the lowest and vfp.zregs[n].d[15] the highest of the
* 256 byte vector, even on big endian systems.
*
* Calculate the offset assuming fully little-endian,
* then XOR to account for the order of the 8-byte units.
*
* For 16 byte elements, the two 8 byte halves will not form a
* host int128 if the host is bigendian, since they're in the
* wrong order. However the only 16 byte operation we have is
* a move, so we can ignore this for the moment. More complicated
* operations will have to special case loading and storing from
* the zregs array.
*/
if (element_size < 8) {
offs ^= 8 - element_size;
}
#endif
offs += offsetof(CPUARMState, vfp.zregs[regno]);
assert_fp_access_checked(s);
return offs;
}
/* Return the offset info CPUARMState of the "whole" vector register Qn. */
static inline int vec_full_reg_offset(DisasContext *s, int regno)
{
assert_fp_access_checked(s);
return offsetof(CPUARMState, vfp.zregs[regno]);
}
/* Return a newly allocated pointer to the vector register. */
static inline TCGv_ptr vec_full_reg_ptr(DisasContext *s, int regno)
{
TCGv_ptr ret = tcg_temp_new_ptr();
tcg_gen_addi_ptr(ret, cpu_env, vec_full_reg_offset(s, regno));
return ret;
}
/* Return the byte size of the "whole" vector register, VL / 8. */
static inline int vec_full_reg_size(DisasContext *s)
{
return s->vl;
}
/* Return the byte size of the vector register, SVL / 8. */
static inline int streaming_vec_reg_size(DisasContext *s)
{
return s->svl;
}
/*
* Return the offset info CPUARMState of the predicate vector register Pn.
* Note for this purpose, FFR is P16.
*/
static inline int pred_full_reg_offset(DisasContext *s, int regno)
{
return offsetof(CPUARMState, vfp.pregs[regno]);
}
/* Return the byte size of the whole predicate register, VL / 64. */
static inline int pred_full_reg_size(DisasContext *s)
{
return s->vl >> 3;
}
/* Return the byte size of the predicate register, SVL / 64. */
static inline int streaming_pred_reg_size(DisasContext *s)
{
return s->svl >> 3;
}
/*
* Round up the size of a register to a size allowed by
* the tcg vector infrastructure. Any operation which uses this
* size may assume that the bits above pred_full_reg_size are zero,
* and must leave them the same way.
*
* Note that this is not needed for the vector registers as they
* are always properly sized for tcg vectors.
*/
static inline int size_for_gvec(int size)
{
if (size <= 8) {
return 8;
} else {
return QEMU_ALIGN_UP(size, 16);
}
}
static inline int pred_gvec_reg_size(DisasContext *s)
{
return size_for_gvec(pred_full_reg_size(s));
}
/* Return a newly allocated pointer to the predicate register. */
static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
{
TCGv_ptr ret = tcg_temp_new_ptr();
tcg_gen_addi_ptr(ret, cpu_env, pred_full_reg_offset(s, regno));
return ret;
}
bool disas_sve(DisasContext *, uint32_t);
bool disas_sme(DisasContext *, uint32_t);
void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, int64_t shift,
uint32_t opr_sz, uint32_t max_sz);
void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
#endif /* TARGET_ARM_TRANSLATE_A64_H */

View file

@ -0,0 +1,788 @@
/*
* ARM translation: M-profile NOCP special-case instructions
*
* Copyright (c) 2020 Linaro, Ltd.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "tcg/tcg-op.h"
#include "tcg/tcg-op-gvec.h"
#include "translate.h"
#include "translate-a32.h"
#include "decode-m-nocp.c.inc"
/*
* Decode VLLDM and VLSTM are nonstandard because:
* * if there is no FPU then these insns must NOP in
* Secure state and UNDEF in Nonsecure state
* * if there is an FPU then these insns do not have
* the usual behaviour that vfp_access_check() provides of
* being controlled by CPACR/NSACR enable bits or the
* lazy-stacking logic.
*/
static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
{
TCGv_i32 fptr;
if (!arm_dc_feature(s, ARM_FEATURE_M) ||
!arm_dc_feature(s, ARM_FEATURE_V8)) {
return false;
}
if (a->op) {
/*
* T2 encoding ({D0-D31} reglist): v8.1M and up. We choose not
* to take the IMPDEF option to make memory accesses to the stack
* slots that correspond to the D16-D31 registers (discarding
* read data and writing UNKNOWN values), so for us the T2
* encoding behaves identically to the T1 encoding.
*/
if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
return false;
}
} else {
/*
* T1 encoding ({D0-D15} reglist); undef if we have 32 Dregs.
* This is currently architecturally impossible, but we add the
* check to stay in line with the pseudocode. Note that we must
* emit code for the UNDEF so it takes precedence over the NOCP.
*/
if (dc_isar_feature(aa32_simd_r32, s)) {
unallocated_encoding(s);
return true;
}
}
/*
* If not secure, UNDEF. We must emit code for this
* rather than returning false so that this takes
* precedence over the m-nocp.decode NOCP fallback.
*/
if (!s->v8m_secure) {
unallocated_encoding(s);
return true;
}
s->eci_handled = true;
/* If no fpu, NOP. */
if (!dc_isar_feature(aa32_vfp, s)) {
clear_eci_state(s);
return true;
}
fptr = load_reg(s, a->rn);
if (a->l) {
gen_helper_v7m_vlldm(cpu_env, fptr);
} else {
gen_helper_v7m_vlstm(cpu_env, fptr);
}
tcg_temp_free_i32(fptr);
clear_eci_state(s);
/*
* End the TB, because we have updated FP control bits,
* and possibly VPR or LTPSIZE.
*/
s->base.is_jmp = DISAS_UPDATE_EXIT;
return true;
}
static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
{
int btmreg, topreg;
TCGv_i64 zero;
TCGv_i32 aspen, sfpa;
if (!dc_isar_feature(aa32_m_sec_state, s)) {
/* Before v8.1M, fall through in decode to NOCP check */
return false;
}
/* Explicitly UNDEF because this takes precedence over NOCP */
if (!arm_dc_feature(s, ARM_FEATURE_M_MAIN) || !s->v8m_secure) {
unallocated_encoding(s);
return true;
}
s->eci_handled = true;
if (!dc_isar_feature(aa32_vfp_simd, s)) {
/* NOP if we have neither FP nor MVE */
clear_eci_state(s);
return true;
}
/*
* If FPCCR.ASPEN != 0 && CONTROL_S.SFPA == 0 then there is no
* active floating point context so we must NOP (without doing
* any lazy state preservation or the NOCP check).
*/
aspen = load_cpu_field(v7m.fpccr[M_REG_S]);
sfpa = load_cpu_field(v7m.control[M_REG_S]);
tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
tcg_gen_andi_i32(sfpa, sfpa, R_V7M_CONTROL_SFPA_MASK);
tcg_gen_or_i32(sfpa, sfpa, aspen);
arm_gen_condlabel(s);
tcg_gen_brcondi_i32(TCG_COND_EQ, sfpa, 0, s->condlabel.label);
if (s->fp_excp_el != 0) {
gen_exception_insn_el(s, 0, EXCP_NOCP,
syn_uncategorized(), s->fp_excp_el);
return true;
}
topreg = a->vd + a->imm - 1;
btmreg = a->vd;
/* Convert to Sreg numbers if the insn specified in Dregs */
if (a->size == 3) {
topreg = topreg * 2 + 1;
btmreg *= 2;
}
if (topreg > 63 || (topreg > 31 && !(topreg & 1))) {
/* UNPREDICTABLE: we choose to undef */
unallocated_encoding(s);
return true;
}
/* Silently ignore requests to clear D16-D31 if they don't exist */
if (topreg > 31 && !dc_isar_feature(aa32_simd_r32, s)) {
topreg = 31;
}
if (!vfp_access_check(s)) {
return true;
}
/* Zero the Sregs from btmreg to topreg inclusive. */
zero = tcg_constant_i64(0);
if (btmreg & 1) {
write_neon_element64(zero, btmreg >> 1, 1, MO_32);
btmreg++;
}
for (; btmreg + 1 <= topreg; btmreg += 2) {
write_neon_element64(zero, btmreg >> 1, 0, MO_64);
}
if (btmreg == topreg) {
write_neon_element64(zero, btmreg >> 1, 0, MO_32);
btmreg++;
}
assert(btmreg == topreg + 1);
if (dc_isar_feature(aa32_mve, s)) {
store_cpu_field(tcg_constant_i32(0), v7m.vpr);
}
clear_eci_state(s);
return true;
}
/*
* M-profile provides two different sets of instructions that can
* access floating point system registers: VMSR/VMRS (which move
* to/from a general purpose register) and VLDR/VSTR sysreg (which
* move directly to/from memory). In some cases there are also side
* effects which must happen after any write to memory (which could
* cause an exception). So we implement the common logic for the
* sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
* which take pointers to callback functions which will perform the
* actual "read/write general purpose register" and "read/write
* memory" operations.
*/
/*
* Emit code to store the sysreg to its final destination; frees the
* TCG temp 'value' it is passed. do_access is true to do the store,
* and false to skip it and only perform side-effects like base
* register writeback.
*/
typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value,
bool do_access);
/*
* Emit code to load the value to be copied to the sysreg; returns
* a new TCG temporary. do_access is true to do the store,
* and false to skip it and only perform side-effects like base
* register writeback.
*/
typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque,
bool do_access);
/* Common decode/access checks for fp sysreg read/write */
typedef enum FPSysRegCheckResult {
FPSysRegCheckFailed, /* caller should return false */
FPSysRegCheckDone, /* caller should return true */
FPSysRegCheckContinue, /* caller should continue generating code */
} FPSysRegCheckResult;
static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
{
if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
return FPSysRegCheckFailed;
}
switch (regno) {
case ARM_VFP_FPSCR:
case QEMU_VFP_FPSCR_NZCV:
break;
case ARM_VFP_FPSCR_NZCVQC:
if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
return FPSysRegCheckFailed;
}
break;
case ARM_VFP_FPCXT_S:
case ARM_VFP_FPCXT_NS:
if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
return FPSysRegCheckFailed;
}
if (!s->v8m_secure) {
return FPSysRegCheckFailed;
}
break;
case ARM_VFP_VPR:
case ARM_VFP_P0:
if (!dc_isar_feature(aa32_mve, s)) {
return FPSysRegCheckFailed;
}
break;
default:
return FPSysRegCheckFailed;
}
/*
* FPCXT_NS is a special case: it has specific handling for
* "current FP state is inactive", and must do the PreserveFPState()
* but not the usual full set of actions done by ExecuteFPCheck().
* So we don't call vfp_access_check() and the callers must handle this.
*/
if (regno != ARM_VFP_FPCXT_NS && !vfp_access_check(s)) {
return FPSysRegCheckDone;
}
return FPSysRegCheckContinue;
}
static void gen_branch_fpInactive(DisasContext *s, TCGCond cond,
TCGLabel *label)
{
/*
* FPCXT_NS is a special case: it has specific handling for
* "current FP state is inactive", and must do the PreserveFPState()
* but not the usual full set of actions done by ExecuteFPCheck().
* We don't have a TB flag that matches the fpInactive check, so we
* do it at runtime as we don't expect FPCXT_NS accesses to be frequent.
*
* Emit code that checks fpInactive and does a conditional
* branch to label based on it:
* if cond is TCG_COND_NE then branch if fpInactive != 0 (ie if inactive)
* if cond is TCG_COND_EQ then branch if fpInactive == 0 (ie if active)
*/
assert(cond == TCG_COND_EQ || cond == TCG_COND_NE);
/* fpInactive = FPCCR_NS.ASPEN == 1 && CONTROL.FPCA == 0 */
TCGv_i32 aspen, fpca;
aspen = load_cpu_field(v7m.fpccr[M_REG_NS]);
fpca = load_cpu_field(v7m.control[M_REG_S]);
tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
tcg_gen_andi_i32(fpca, fpca, R_V7M_CONTROL_FPCA_MASK);
tcg_gen_or_i32(fpca, fpca, aspen);
tcg_gen_brcondi_i32(tcg_invert_cond(cond), fpca, 0, label);
tcg_temp_free_i32(aspen);
tcg_temp_free_i32(fpca);
}
static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
fp_sysreg_loadfn *loadfn,
void *opaque)
{
/* Do a write to an M-profile floating point system register */
TCGv_i32 tmp;
TCGLabel *lab_end = NULL;
switch (fp_sysreg_checks(s, regno)) {
case FPSysRegCheckFailed:
return false;
case FPSysRegCheckDone:
return true;
case FPSysRegCheckContinue:
break;
}
switch (regno) {
case ARM_VFP_FPSCR:
tmp = loadfn(s, opaque, true);
gen_helper_vfp_set_fpscr(cpu_env, tmp);
tcg_temp_free_i32(tmp);
gen_lookup_tb(s);
break;
case ARM_VFP_FPSCR_NZCVQC:
{
TCGv_i32 fpscr;
tmp = loadfn(s, opaque, true);
if (dc_isar_feature(aa32_mve, s)) {
/* QC is only present for MVE; otherwise RES0 */
TCGv_i32 qc = tcg_temp_new_i32();
tcg_gen_andi_i32(qc, tmp, FPCR_QC);
/*
* The 4 vfp.qc[] fields need only be "zero" vs "non-zero";
* here writing the same value into all elements is simplest.
*/
tcg_gen_gvec_dup_i32(MO_32, offsetof(CPUARMState, vfp.qc),
16, 16, qc);
}
tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
tcg_gen_or_i32(fpscr, fpscr, tmp);
store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
tcg_temp_free_i32(tmp);
break;
}
case ARM_VFP_FPCXT_NS:
{
TCGLabel *lab_active = gen_new_label();
lab_end = gen_new_label();
gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
/*
* fpInactive case: write is a NOP, so only do side effects
* like register writeback before we branch to end
*/
loadfn(s, opaque, false);
tcg_gen_br(lab_end);
gen_set_label(lab_active);
/*
* !fpInactive: if FPU disabled, take NOCP exception;
* otherwise PreserveFPState(), and then FPCXT_NS writes
* behave the same as FPCXT_S writes.
*/
if (!vfp_access_check_m(s, true)) {
/*
* This was only a conditional exception, so override
* gen_exception_insn_el()'s default to DISAS_NORETURN
*/
s->base.is_jmp = DISAS_NEXT;
break;
}
}
/* fall through */
case ARM_VFP_FPCXT_S:
{
TCGv_i32 sfpa, control;
/*
* Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
* bits [27:0] from value and zeroes bits [31:28].
*/
tmp = loadfn(s, opaque, true);
sfpa = tcg_temp_new_i32();
tcg_gen_shri_i32(sfpa, tmp, 31);
control = load_cpu_field(v7m.control[M_REG_S]);
tcg_gen_deposit_i32(control, control, sfpa,
R_V7M_CONTROL_SFPA_SHIFT, 1);
store_cpu_field(control, v7m.control[M_REG_S]);
tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
gen_helper_vfp_set_fpscr(cpu_env, tmp);
s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(sfpa);
break;
}
case ARM_VFP_VPR:
/* Behaves as NOP if not privileged */
if (IS_USER(s)) {
loadfn(s, opaque, false);
break;
}
tmp = loadfn(s, opaque, true);
store_cpu_field(tmp, v7m.vpr);
s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
break;
case ARM_VFP_P0:
{
TCGv_i32 vpr;
tmp = loadfn(s, opaque, true);
vpr = load_cpu_field(v7m.vpr);
tcg_gen_deposit_i32(vpr, vpr, tmp,
R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
store_cpu_field(vpr, v7m.vpr);
s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
tcg_temp_free_i32(tmp);
break;
}
default:
g_assert_not_reached();
}
if (lab_end) {
gen_set_label(lab_end);
}
return true;
}
static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
fp_sysreg_storefn *storefn,
void *opaque)
{
/* Do a read from an M-profile floating point system register */
TCGv_i32 tmp;
TCGLabel *lab_end = NULL;
bool lookup_tb = false;
switch (fp_sysreg_checks(s, regno)) {
case FPSysRegCheckFailed:
return false;
case FPSysRegCheckDone:
return true;
case FPSysRegCheckContinue:
break;
}
if (regno == ARM_VFP_FPSCR_NZCVQC && !dc_isar_feature(aa32_mve, s)) {
/* QC is RES0 without MVE, so NZCVQC simplifies to NZCV */
regno = QEMU_VFP_FPSCR_NZCV;
}
switch (regno) {
case ARM_VFP_FPSCR:
tmp = tcg_temp_new_i32();
gen_helper_vfp_get_fpscr(tmp, cpu_env);
storefn(s, opaque, tmp, true);
break;
case ARM_VFP_FPSCR_NZCVQC:
tmp = tcg_temp_new_i32();
gen_helper_vfp_get_fpscr(tmp, cpu_env);
tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK);
storefn(s, opaque, tmp, true);
break;
case QEMU_VFP_FPSCR_NZCV:
/*
* Read just NZCV; this is a special case to avoid the
* helper call for the "VMRS to CPSR.NZCV" insn.
*/
tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
storefn(s, opaque, tmp, true);
break;
case ARM_VFP_FPCXT_S:
{
TCGv_i32 control, sfpa, fpscr;
/* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
tmp = tcg_temp_new_i32();
sfpa = tcg_temp_new_i32();
gen_helper_vfp_get_fpscr(tmp, cpu_env);
tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
control = load_cpu_field(v7m.control[M_REG_S]);
tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
tcg_gen_or_i32(tmp, tmp, sfpa);
tcg_temp_free_i32(sfpa);
/*
* Store result before updating FPSCR etc, in case
* it is a memory write which causes an exception.
*/
storefn(s, opaque, tmp, true);
/*
* Now we must reset FPSCR from FPDSCR_NS, and clear
* CONTROL.SFPA; so we'll end the TB here.
*/
tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
store_cpu_field(control, v7m.control[M_REG_S]);
fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
gen_helper_vfp_set_fpscr(cpu_env, fpscr);
tcg_temp_free_i32(fpscr);
lookup_tb = true;
break;
}
case ARM_VFP_FPCXT_NS:
{
TCGv_i32 control, sfpa, fpscr, fpdscr;
TCGLabel *lab_active = gen_new_label();
lookup_tb = true;
gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
/* fpInactive case: reads as FPDSCR_NS */
TCGv_i32 tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]);
storefn(s, opaque, tmp, true);
lab_end = gen_new_label();
tcg_gen_br(lab_end);
gen_set_label(lab_active);
/*
* !fpInactive: if FPU disabled, take NOCP exception;
* otherwise PreserveFPState(), and then FPCXT_NS
* reads the same as FPCXT_S.
*/
if (!vfp_access_check_m(s, true)) {
/*
* This was only a conditional exception, so override
* gen_exception_insn_el()'s default to DISAS_NORETURN
*/
s->base.is_jmp = DISAS_NEXT;
break;
}
tmp = tcg_temp_new_i32();
sfpa = tcg_temp_new_i32();
fpscr = tcg_temp_new_i32();
gen_helper_vfp_get_fpscr(fpscr, cpu_env);
tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
control = load_cpu_field(v7m.control[M_REG_S]);
tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
tcg_gen_or_i32(tmp, tmp, sfpa);
tcg_temp_free_i32(control);
/* Store result before updating FPSCR, in case it faults */
storefn(s, opaque, tmp, true);
/* If SFPA is zero then set FPSCR from FPDSCR_NS */
fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, tcg_constant_i32(0),
fpdscr, fpscr);
gen_helper_vfp_set_fpscr(cpu_env, fpscr);
tcg_temp_free_i32(sfpa);
tcg_temp_free_i32(fpdscr);
tcg_temp_free_i32(fpscr);
break;
}
case ARM_VFP_VPR:
/* Behaves as NOP if not privileged */
if (IS_USER(s)) {
storefn(s, opaque, NULL, false);
break;
}
tmp = load_cpu_field(v7m.vpr);
storefn(s, opaque, tmp, true);
break;
case ARM_VFP_P0:
tmp = load_cpu_field(v7m.vpr);
tcg_gen_extract_i32(tmp, tmp, R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
storefn(s, opaque, tmp, true);
break;
default:
g_assert_not_reached();
}
if (lab_end) {
gen_set_label(lab_end);
}
if (lookup_tb) {
gen_lookup_tb(s);
}
return true;
}
static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value,
bool do_access)
{
arg_VMSR_VMRS *a = opaque;
if (!do_access) {
return;
}
if (a->rt == 15) {
/* Set the 4 flag bits in the CPSR */
gen_set_nzcv(value);
tcg_temp_free_i32(value);
} else {
store_reg(s, a->rt, value);
}
}
static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque, bool do_access)
{
arg_VMSR_VMRS *a = opaque;
if (!do_access) {
return NULL;
}
return load_reg(s, a->rt);
}
static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
{
/*
* Accesses to R15 are UNPREDICTABLE; we choose to undef.
* FPSCR -> r15 is a special case which writes to the PSR flags;
* set a->reg to a special value to tell gen_M_fp_sysreg_read()
* we only care about the top 4 bits of FPSCR there.
*/
if (a->rt == 15) {
if (a->l && a->reg == ARM_VFP_FPSCR) {
a->reg = QEMU_VFP_FPSCR_NZCV;
} else {
return false;
}
}
if (a->l) {
/* VMRS, move FP system register to gp register */
return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
} else {
/* VMSR, move gp register to FP system register */
return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
}
}
static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value,
bool do_access)
{
arg_vldr_sysreg *a = opaque;
uint32_t offset = a->imm;
TCGv_i32 addr;
if (!a->a) {
offset = -offset;
}
if (!do_access && !a->w) {
return;
}
addr = load_reg(s, a->rn);
if (a->p) {
tcg_gen_addi_i32(addr, addr, offset);
}
if (s->v8m_stackcheck && a->rn == 13 && a->w) {
gen_helper_v8m_stackcheck(cpu_env, addr);
}
if (do_access) {
gen_aa32_st_i32(s, value, addr, get_mem_index(s),
MO_UL | MO_ALIGN | s->be_data);
tcg_temp_free_i32(value);
}
if (a->w) {
/* writeback */
if (!a->p) {
tcg_gen_addi_i32(addr, addr, offset);
}
store_reg(s, a->rn, addr);
} else {
tcg_temp_free_i32(addr);
}
}
static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque,
bool do_access)
{
arg_vldr_sysreg *a = opaque;
uint32_t offset = a->imm;
TCGv_i32 addr;
TCGv_i32 value = NULL;
if (!a->a) {
offset = -offset;
}
if (!do_access && !a->w) {
return NULL;
}
addr = load_reg(s, a->rn);
if (a->p) {
tcg_gen_addi_i32(addr, addr, offset);
}
if (s->v8m_stackcheck && a->rn == 13 && a->w) {
gen_helper_v8m_stackcheck(cpu_env, addr);
}
if (do_access) {
value = tcg_temp_new_i32();
gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
MO_UL | MO_ALIGN | s->be_data);
}
if (a->w) {
/* writeback */
if (!a->p) {
tcg_gen_addi_i32(addr, addr, offset);
}
store_reg(s, a->rn, addr);
} else {
tcg_temp_free_i32(addr);
}
return value;
}
static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
{
if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
return false;
}
if (a->rn == 15) {
return false;
}
return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
}
static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
{
if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
return false;
}
if (a->rn == 15) {
return false;
}
return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
}
static bool trans_NOCP(DisasContext *s, arg_nocp *a)
{
/*
* Handle M-profile early check for disabled coprocessor:
* all we need to do here is emit the NOCP exception if
* the coprocessor is disabled. Otherwise we return false
* and the real VFP/etc decode will handle the insn.
*/
assert(arm_dc_feature(s, ARM_FEATURE_M));
if (a->cp == 11) {
a->cp = 10;
}
if (arm_dc_feature(s, ARM_FEATURE_V8_1M) &&
(a->cp == 8 || a->cp == 9 || a->cp == 14 || a->cp == 15)) {
/* in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */
a->cp = 10;
}
if (a->cp != 10) {
gen_exception_insn(s, 0, EXCP_NOCP, syn_uncategorized());
return true;
}
if (s->fp_excp_el != 0) {
gen_exception_insn_el(s, 0, EXCP_NOCP,
syn_uncategorized(), s->fp_excp_el);
return true;
}
return false;
}
static bool trans_NOCP_8_1(DisasContext *s, arg_nocp *a)
{
/* This range needs a coprocessor check for v8.1M and later only */
if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
return false;
}
return trans_NOCP(s, a);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,373 @@
/*
* AArch64 SME translation
*
* Copyright (c) 2022 Linaro, Ltd
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "cpu.h"
#include "tcg/tcg-op.h"
#include "tcg/tcg-op-gvec.h"
#include "tcg/tcg-gvec-desc.h"
#include "translate.h"
#include "exec/helper-gen.h"
#include "translate-a64.h"
#include "fpu/softfloat.h"
/*
* Include the generated decoder.
*/
#include "decode-sme.c.inc"
/*
* Resolve tile.size[index] to a host pointer, where tile and index
* are always decoded together, dependent on the element size.
*/
static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
int tile_index, bool vertical)
{
int tile = tile_index >> (4 - esz);
int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
int pos, len, offset;
TCGv_i32 tmp;
TCGv_ptr addr;
/* Compute the final index, which is Rs+imm. */
tmp = tcg_temp_new_i32();
tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
tcg_gen_addi_i32(tmp, tmp, index);
/* Prepare a power-of-two modulo via extraction of @len bits. */
len = ctz32(streaming_vec_reg_size(s)) - esz;
if (vertical) {
/*
* Compute the byte offset of the index within the tile:
* (index % (svl / size)) * size
* = (index % (svl >> esz)) << esz
* Perform the power-of-two modulo via extraction of the low @len bits.
* Perform the multiply by shifting left by @pos bits.
* Perform these operations simultaneously via deposit into zero.
*/
pos = esz;
tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
/*
* For big-endian, adjust the indexed column byte offset within
* the uint64_t host words that make up env->zarray[].
*/
if (HOST_BIG_ENDIAN && esz < MO_64) {
tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
}
} else {
/*
* Compute the byte offset of the index within the tile:
* (index % (svl / size)) * (size * sizeof(row))
* = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
*/
pos = esz + ctz32(sizeof(ARMVectorReg));
tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
/* Row slices are always aligned and need no endian adjustment. */
}
/* The tile byte offset within env->zarray is the row. */
offset = tile * sizeof(ARMVectorReg);
/* Include the byte offset of zarray to make this relative to env. */
offset += offsetof(CPUARMState, zarray);
tcg_gen_addi_i32(tmp, tmp, offset);
/* Add the byte offset to env to produce the final pointer. */
addr = tcg_temp_new_ptr();
tcg_gen_ext_i32_ptr(addr, tmp);
tcg_temp_free_i32(tmp);
tcg_gen_add_ptr(addr, addr, cpu_env);
return addr;
}
static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
{
if (!dc_isar_feature(aa64_sme, s)) {
return false;
}
if (sme_za_enabled_check(s)) {
gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm),
tcg_constant_i32(streaming_vec_reg_size(s)));
}
return true;
}
static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
{
static gen_helper_gvec_4 * const h_fns[5] = {
gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
gen_helper_sve_sel_zpzz_q
};
static gen_helper_gvec_3 * const cz_fns[5] = {
gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
gen_helper_sme_mova_cz_q,
};
static gen_helper_gvec_3 * const zc_fns[5] = {
gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
gen_helper_sme_mova_zc_q,
};
TCGv_ptr t_za, t_zr, t_pg;
TCGv_i32 t_desc;
int svl;
if (!dc_isar_feature(aa64_sme, s)) {
return false;
}
if (!sme_smza_enabled_check(s)) {
return true;
}
t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
t_zr = vec_full_reg_ptr(s, a->zr);
t_pg = pred_full_reg_ptr(s, a->pg);
svl = streaming_vec_reg_size(s);
t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
if (a->v) {
/* Vertical slice -- use sme mova helpers. */
if (a->to_vec) {
zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
} else {
cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
}
} else {
/* Horizontal slice -- reuse sve sel helpers. */
if (a->to_vec) {
h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
} else {
h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
}
}
tcg_temp_free_ptr(t_za);
tcg_temp_free_ptr(t_zr);
tcg_temp_free_ptr(t_pg);
return true;
}
static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
{
typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
/*
* Indexed by [esz][be][v][mte][st], which is (except for load/store)
* also the order in which the elements appear in the function names,
* and so how we must concatenate the pieces.
*/
#define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
#define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
#define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
#define FN_END(L, B) { FN_HV(L), FN_HV(B) }
static GenLdSt1 * const fns[5][2][2][2][2] = {
FN_END(b, b),
FN_END(h_le, h_be),
FN_END(s_le, s_be),
FN_END(d_le, d_be),
FN_END(q_le, q_be),
};
#undef FN_LS
#undef FN_MTE
#undef FN_HV
#undef FN_END
TCGv_ptr t_za, t_pg;
TCGv_i64 addr;
int svl, desc = 0;
bool be = s->be_data == MO_BE;
bool mte = s->mte_active[0];
if (!dc_isar_feature(aa64_sme, s)) {
return false;
}
if (!sme_smza_enabled_check(s)) {
return true;
}
t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
t_pg = pred_full_reg_ptr(s, a->pg);
addr = tcg_temp_new_i64();
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
if (mte) {
desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st);
desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1);
desc <<= SVE_MTEDESC_SHIFT;
} else {
addr = clean_data_tbi(s, addr);
}
svl = streaming_vec_reg_size(s);
desc = simd_desc(svl, svl, desc);
fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr,
tcg_constant_i32(desc));
tcg_temp_free_ptr(t_za);
tcg_temp_free_ptr(t_pg);
tcg_temp_free_i64(addr);
return true;
}
typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
{
int svl = streaming_vec_reg_size(s);
int imm = a->imm;
TCGv_ptr base;
if (!sme_za_enabled_check(s)) {
return true;
}
/* ZA[n] equates to ZA0H.B[n]. */
base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
fn(s, base, 0, svl, a->rn, imm * svl);
tcg_temp_free_ptr(base);
return true;
}
TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
gen_helper_gvec_4 *fn)
{
int svl = streaming_vec_reg_size(s);
uint32_t desc = simd_desc(svl, svl, 0);
TCGv_ptr za, zn, pn, pm;
if (!sme_smza_enabled_check(s)) {
return true;
}
/* Sum XZR+zad to find ZAd. */
za = get_tile_rowcol(s, esz, 31, a->zad, false);
zn = vec_full_reg_ptr(s, a->zn);
pn = pred_full_reg_ptr(s, a->pn);
pm = pred_full_reg_ptr(s, a->pm);
fn(za, zn, pn, pm, tcg_constant_i32(desc));
tcg_temp_free_ptr(za);
tcg_temp_free_ptr(zn);
tcg_temp_free_ptr(pn);
tcg_temp_free_ptr(pm);
return true;
}
TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
gen_helper_gvec_5 *fn)
{
int svl = streaming_vec_reg_size(s);
uint32_t desc = simd_desc(svl, svl, a->sub);
TCGv_ptr za, zn, zm, pn, pm;
if (!sme_smza_enabled_check(s)) {
return true;
}
/* Sum XZR+zad to find ZAd. */
za = get_tile_rowcol(s, esz, 31, a->zad, false);
zn = vec_full_reg_ptr(s, a->zn);
zm = vec_full_reg_ptr(s, a->zm);
pn = pred_full_reg_ptr(s, a->pn);
pm = pred_full_reg_ptr(s, a->pm);
fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
tcg_temp_free_ptr(za);
tcg_temp_free_ptr(zn);
tcg_temp_free_ptr(pn);
tcg_temp_free_ptr(pm);
return true;
}
static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
gen_helper_gvec_5_ptr *fn)
{
int svl = streaming_vec_reg_size(s);
uint32_t desc = simd_desc(svl, svl, a->sub);
TCGv_ptr za, zn, zm, pn, pm, fpst;
if (!sme_smza_enabled_check(s)) {
return true;
}
/* Sum XZR+zad to find ZAd. */
za = get_tile_rowcol(s, esz, 31, a->zad, false);
zn = vec_full_reg_ptr(s, a->zn);
zm = vec_full_reg_ptr(s, a->zm);
pn = pred_full_reg_ptr(s, a->pn);
pm = pred_full_reg_ptr(s, a->pm);
fpst = fpstatus_ptr(FPST_FPCR);
fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
tcg_temp_free_ptr(za);
tcg_temp_free_ptr(zn);
tcg_temp_free_ptr(pn);
tcg_temp_free_ptr(pm);
tcg_temp_free_ptr(fpst);
return true;
}
TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
/* TODO: FEAT_EBF16 */
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

9990
target/arm/tcg/translate.c Normal file

File diff suppressed because it is too large Load diff

644
target/arm/tcg/translate.h Normal file
View file

@ -0,0 +1,644 @@
#ifndef TARGET_ARM_TRANSLATE_H
#define TARGET_ARM_TRANSLATE_H
#include "exec/translator.h"
#include "internals.h"
/* internal defines */
/*
* Save pc_save across a branch, so that we may restore the value from
* before the branch at the point the label is emitted.
*/
typedef struct DisasLabel {
TCGLabel *label;
target_ulong pc_save;
} DisasLabel;
typedef struct DisasContext {
DisasContextBase base;
const ARMISARegisters *isar;
/* The address of the current instruction being translated. */
target_ulong pc_curr;
/*
* For TARGET_TB_PCREL, the full value of cpu_pc is not known
* (although the page offset is known). For convenience, the
* translation loop uses the full virtual address that triggered
* the translation, from base.pc_start through pc_curr.
* For efficiency, we do not update cpu_pc for every instruction.
* Instead, pc_save has the value of pc_curr at the time of the
* last update to cpu_pc, which allows us to compute the addend
* needed to bring cpu_pc current: pc_curr - pc_save.
* If cpu_pc now contains the destination of an indirect branch,
* pc_save contains -1 to indicate that relative updates are no
* longer possible.
*/
target_ulong pc_save;
target_ulong page_start;
uint32_t insn;
/* Nonzero if this instruction has been conditionally skipped. */
int condjmp;
/* The label that will be jumped to when the instruction is skipped. */
DisasLabel condlabel;
/* Thumb-2 conditional execution bits. */
int condexec_mask;
int condexec_cond;
/* M-profile ECI/ICI exception-continuable instruction state */
int eci;
/*
* trans_ functions for insns which are continuable should set this true
* after decode (ie after any UNDEF checks)
*/
bool eci_handled;
int sctlr_b;
MemOp be_data;
#if !defined(CONFIG_USER_ONLY)
int user;
#endif
ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */
uint8_t tbii; /* TBI1|TBI0 for insns */
uint8_t tbid; /* TBI1|TBI0 for data */
uint8_t tcma; /* TCMA1|TCMA0 for MTE */
bool ns; /* Use non-secure CPREG bank on access */
int fp_excp_el; /* FP exception EL or 0 if enabled */
int sve_excp_el; /* SVE exception EL or 0 if enabled */
int sme_excp_el; /* SME exception EL or 0 if enabled */
int vl; /* current vector length in bytes */
int svl; /* current streaming vector length in bytes */
bool vfp_enabled; /* FP enabled via FPSCR.EN */
int vec_len;
int vec_stride;
bool v7m_handler_mode;
bool v8m_secure; /* true if v8M and we're in Secure mode */
bool v8m_stackcheck; /* true if we need to perform v8M stack limit checks */
bool v8m_fpccr_s_wrong; /* true if v8M FPCCR.S != v8m_secure */
bool v7m_new_fp_ctxt_needed; /* ASPEN set but no active FP context */
bool v7m_lspact; /* FPCCR.LSPACT set */
/* Immediate value in AArch32 SVC insn; must be set if is_jmp == DISAS_SWI
* so that top level loop can generate correct syndrome information.
*/
uint32_t svc_imm;
int current_el;
GHashTable *cp_regs;
uint64_t features; /* CPU features bits */
bool aarch64;
bool thumb;
/* Because unallocated encodings generate different exception syndrome
* information from traps due to FP being disabled, we can't do a single
* "is fp access disabled" check at a high level in the decode tree.
* To help in catching bugs where the access check was forgotten in some
* code path, we set this flag when the access check is done, and assert
* that it is set at the point where we actually touch the FP regs.
*/
bool fp_access_checked;
bool sve_access_checked;
/* ARMv8 single-step state (this is distinct from the QEMU gdbstub
* single-step support).
*/
bool ss_active;
bool pstate_ss;
/* True if the insn just emitted was a load-exclusive instruction
* (necessary for syndrome information for single step exceptions),
* ie A64 LDX*, LDAX*, A32/T32 LDREX*, LDAEX*.
*/
bool is_ldex;
/* True if AccType_UNPRIV should be used for LDTR et al */
bool unpriv;
/* True if v8.3-PAuth is active. */
bool pauth_active;
/* True if v8.5-MTE access to tags is enabled. */
bool ata;
/* True if v8.5-MTE tag checks affect the PE; index with is_unpriv. */
bool mte_active[2];
/* True with v8.5-BTI and SCTLR_ELx.BT* set. */
bool bt;
/* True if any CP15 access is trapped by HSTR_EL2 */
bool hstr_active;
/* True if memory operations require alignment */
bool align_mem;
/* True if PSTATE.IL is set */
bool pstate_il;
/* True if PSTATE.SM is set. */
bool pstate_sm;
/* True if PSTATE.ZA is set. */
bool pstate_za;
/* True if non-streaming insns should raise an SME Streaming exception. */
bool sme_trap_nonstreaming;
/* True if the current instruction is non-streaming. */
bool is_nonstreaming;
/* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
bool mve_no_pred;
/* True if fine-grained traps are active */
bool fgt_active;
/* True if fine-grained trap on ERET is enabled */
bool fgt_eret;
/* True if fine-grained trap on SVC is enabled */
bool fgt_svc;
/*
* >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
* < 0, set by the current instruction.
*/
int8_t btype;
/* A copy of cpu->dcz_blocksize. */
uint8_t dcz_blocksize;
/* True if this page is guarded. */
bool guarded_page;
/* Bottom two bits of XScale c15_cpar coprocessor access control reg */
int c15_cpar;
/* TCG op of the current insn_start. */
TCGOp *insn_start;
#define TMP_A64_MAX 16
int tmp_a64_count;
TCGv_i64 tmp_a64[TMP_A64_MAX];
} DisasContext;
typedef struct DisasCompare {
TCGCond cond;
TCGv_i32 value;
bool value_global;
} DisasCompare;
/* Share the TCG temporaries common between 32 and 64 bit modes. */
extern TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
extern TCGv_i64 cpu_exclusive_addr;
extern TCGv_i64 cpu_exclusive_val;
/*
* Constant expanders for the decoders.
*/
static inline int negate(DisasContext *s, int x)
{
return -x;
}
static inline int plus_1(DisasContext *s, int x)
{
return x + 1;
}
static inline int plus_2(DisasContext *s, int x)
{
return x + 2;
}
static inline int plus_12(DisasContext *s, int x)
{
return x + 12;
}
static inline int times_2(DisasContext *s, int x)
{
return x * 2;
}
static inline int times_4(DisasContext *s, int x)
{
return x * 4;
}
static inline int times_2_plus_1(DisasContext *s, int x)
{
return x * 2 + 1;
}
static inline int rsub_64(DisasContext *s, int x)
{
return 64 - x;
}
static inline int rsub_32(DisasContext *s, int x)
{
return 32 - x;
}
static inline int rsub_16(DisasContext *s, int x)
{
return 16 - x;
}
static inline int rsub_8(DisasContext *s, int x)
{
return 8 - x;
}
static inline int neon_3same_fp_size(DisasContext *s, int x)
{
/* Convert 0==fp32, 1==fp16 into a MO_* value */
return MO_32 - x;
}
static inline int arm_dc_feature(DisasContext *dc, int feature)
{
return (dc->features & (1ULL << feature)) != 0;
}
static inline int get_mem_index(DisasContext *s)
{
return arm_to_core_mmu_idx(s->mmu_idx);
}
static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
{
/* We don't need to save all of the syndrome so we mask and shift
* out unneeded bits to help the sleb128 encoder do a better job.
*/
syn &= ARM_INSN_START_WORD2_MASK;
syn >>= ARM_INSN_START_WORD2_SHIFT;
/* We check and clear insn_start_idx to catch multiple updates. */
assert(s->insn_start != NULL);
tcg_set_insn_start_param(s->insn_start, 2, syn);
s->insn_start = NULL;
}
static inline int curr_insn_len(DisasContext *s)
{
return s->base.pc_next - s->pc_curr;
}
/* is_jmp field values */
#define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */
/* CPU state was modified dynamically; exit to main loop for interrupts. */
#define DISAS_UPDATE_EXIT DISAS_TARGET_1
/* These instructions trap after executing, so the A32/T32 decoder must
* defer them until after the conditional execution state has been updated.
* WFI also needs special handling when single-stepping.
*/
#define DISAS_WFI DISAS_TARGET_2
#define DISAS_SWI DISAS_TARGET_3
/* WFE */
#define DISAS_WFE DISAS_TARGET_4
#define DISAS_HVC DISAS_TARGET_5
#define DISAS_SMC DISAS_TARGET_6
#define DISAS_YIELD DISAS_TARGET_7
/* M profile branch which might be an exception return (and so needs
* custom end-of-TB code)
*/
#define DISAS_BX_EXCRET DISAS_TARGET_8
/*
* For instructions which want an immediate exit to the main loop, as opposed
* to attempting to use lookup_and_goto_ptr. Unlike DISAS_UPDATE_EXIT, this
* doesn't write the PC on exiting the translation loop so you need to ensure
* something (gen_a64_update_pc or runtime helper) has done so before we reach
* return from cpu_tb_exec.
*/
#define DISAS_EXIT DISAS_TARGET_9
/* CPU state was modified dynamically; no need to exit, but do not chain. */
#define DISAS_UPDATE_NOCHAIN DISAS_TARGET_10
#ifdef TARGET_AARCH64
void a64_translate_init(void);
void gen_a64_update_pc(DisasContext *s, target_long diff);
extern const TranslatorOps aarch64_translator_ops;
#else
static inline void a64_translate_init(void)
{
}
static inline void gen_a64_update_pc(DisasContext *s, target_long diff)
{
}
#endif
void arm_test_cc(DisasCompare *cmp, int cc);
void arm_free_cc(DisasCompare *cmp);
void arm_jump_cc(DisasCompare *cmp, TCGLabel *label);
void arm_gen_test_cc(int cc, TCGLabel *label);
MemOp pow2_align(unsigned i);
void unallocated_encoding(DisasContext *s);
void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
uint32_t syn, uint32_t target_el);
void gen_exception_insn(DisasContext *s, target_long pc_diff,
int excp, uint32_t syn);
/* Return state of Alternate Half-precision flag, caller frees result */
static inline TCGv_i32 get_ahp_flag(void)
{
TCGv_i32 ret = tcg_temp_new_i32();
tcg_gen_ld_i32(ret, cpu_env,
offsetof(CPUARMState, vfp.xregs[ARM_VFP_FPSCR]));
tcg_gen_extract_i32(ret, ret, 26, 1);
return ret;
}
/* Set bits within PSTATE. */
static inline void set_pstate_bits(uint32_t bits)
{
TCGv_i32 p = tcg_temp_new_i32();
tcg_debug_assert(!(bits & CACHED_PSTATE_BITS));
tcg_gen_ld_i32(p, cpu_env, offsetof(CPUARMState, pstate));
tcg_gen_ori_i32(p, p, bits);
tcg_gen_st_i32(p, cpu_env, offsetof(CPUARMState, pstate));
tcg_temp_free_i32(p);
}
/* Clear bits within PSTATE. */
static inline void clear_pstate_bits(uint32_t bits)
{
TCGv_i32 p = tcg_temp_new_i32();
tcg_debug_assert(!(bits & CACHED_PSTATE_BITS));
tcg_gen_ld_i32(p, cpu_env, offsetof(CPUARMState, pstate));
tcg_gen_andi_i32(p, p, ~bits);
tcg_gen_st_i32(p, cpu_env, offsetof(CPUARMState, pstate));
tcg_temp_free_i32(p);
}
/* If the singlestep state is Active-not-pending, advance to Active-pending. */
static inline void gen_ss_advance(DisasContext *s)
{
if (s->ss_active) {
s->pstate_ss = 0;
clear_pstate_bits(PSTATE_SS);
}
}
/* Generate an architectural singlestep exception */
static inline void gen_swstep_exception(DisasContext *s, int isv, int ex)
{
/* Fill in the same_el field of the syndrome in the helper. */
uint32_t syn = syn_swstep(false, isv, ex);
gen_helper_exception_swstep(cpu_env, tcg_constant_i32(syn));
}
/*
* Given a VFP floating point constant encoded into an 8 bit immediate in an
* instruction, expand it to the actual constant value of the specified
* size, as per the VFPExpandImm() pseudocode in the Arm ARM.
*/
uint64_t vfp_expand_imm(int size, uint8_t imm8);
/* Vector operations shared between ARM and AArch64. */
void gen_gvec_ceq0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_clt0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_cgt0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_cle0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_cge0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
/*
* Forward to the isar_feature_* tests given a DisasContext pointer.
*/
#define dc_isar_feature(name, ctx) \
({ DisasContext *ctx_ = (ctx); isar_feature_##name(ctx_->isar); })
/* Note that the gvec expanders operate on offsets + sizes. */
typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
uint32_t, uint32_t);
typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t);
typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t);
/* Function prototype for gen_ functions for calling Neon helpers */
typedef void NeonGenOneOpFn(TCGv_i32, TCGv_i32);
typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
TCGv_i32, TCGv_i32);
typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
typedef void NeonGenTwoOpWidenFn(TCGv_i64, TCGv_i32, TCGv_i32);
typedef void NeonGenOneSingleOpFn(TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoSingleOpFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
typedef void NeonGenOne64OpFn(TCGv_i64, TCGv_i64);
typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
typedef void WideShiftImmFn(TCGv_i64, TCGv_i64, int64_t shift);
typedef void WideShiftFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i32);
typedef void ShiftImmFn(TCGv_i32, TCGv_i32, int32_t shift);
typedef void ShiftFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
/**
* arm_tbflags_from_tb:
* @tb: the TranslationBlock
*
* Extract the flag values from @tb.
*/
static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
{
return (CPUARMTBFlags){ tb->flags, tb->cs_base };
}
/*
* Enum for argument to fpstatus_ptr().
*/
typedef enum ARMFPStatusFlavour {
FPST_FPCR,
FPST_FPCR_F16,
FPST_STD,
FPST_STD_F16,
} ARMFPStatusFlavour;
/**
* fpstatus_ptr: return TCGv_ptr to the specified fp_status field
*
* We have multiple softfloat float_status fields in the Arm CPU state struct
* (see the comment in cpu.h for details). Return a TCGv_ptr which has
* been set up to point to the requested field in the CPU state struct.
* The options are:
*
* FPST_FPCR
* for non-FP16 operations controlled by the FPCR
* FPST_FPCR_F16
* for operations controlled by the FPCR where FPCR.FZ16 is to be used
* FPST_STD
* for A32/T32 Neon operations using the "standard FPSCR value"
* FPST_STD_F16
* as FPST_STD, but where FPCR.FZ16 is to be used
*/
static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
{
TCGv_ptr statusptr = tcg_temp_new_ptr();
int offset;
switch (flavour) {
case FPST_FPCR:
offset = offsetof(CPUARMState, vfp.fp_status);
break;
case FPST_FPCR_F16:
offset = offsetof(CPUARMState, vfp.fp_status_f16);
break;
case FPST_STD:
offset = offsetof(CPUARMState, vfp.standard_fp_status);
break;
case FPST_STD_F16:
offset = offsetof(CPUARMState, vfp.standard_fp_status_f16);
break;
default:
g_assert_not_reached();
}
tcg_gen_addi_ptr(statusptr, cpu_env, offset);
return statusptr;
}
/**
* finalize_memop:
* @s: DisasContext
* @opc: size+sign+align of the memory operation
*
* Build the complete MemOp for a memory operation, including alignment
* and endianness.
*
* If (op & MO_AMASK) then the operation already contains the required
* alignment, e.g. for AccType_ATOMIC. Otherwise, this an optionally
* unaligned operation, e.g. for AccType_NORMAL.
*
* In the latter case, there are configuration bits that require alignment,
* and this is applied here. Note that there is no way to indicate that
* no alignment should ever be enforced; this must be handled manually.
*/
static inline MemOp finalize_memop(DisasContext *s, MemOp opc)
{
if (s->align_mem && !(opc & MO_AMASK)) {
opc |= MO_ALIGN;
}
return opc | s->be_data;
}
/**
* asimd_imm_const: Expand an encoded SIMD constant value
*
* Expand a SIMD constant value. This is essentially the pseudocode
* AdvSIMDExpandImm, except that we also perform the boolean NOT needed for
* VMVN and VBIC (when cmode < 14 && op == 1).
*
* The combination cmode == 15 op == 1 is a reserved encoding for AArch32;
* callers must catch this; we return the 64-bit constant value defined
* for AArch64.
*
* cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 was UNPREDICTABLE in v7A but
* is either not unpredictable or merely CONSTRAINED UNPREDICTABLE in v8A;
* we produce an immediate constant value of 0 in these cases.
*/
uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
/*
* gen_disas_label:
* Create a label and cache a copy of pc_save.
*/
static inline DisasLabel gen_disas_label(DisasContext *s)
{
return (DisasLabel){
.label = gen_new_label(),
.pc_save = s->pc_save,
};
}
/*
* set_disas_label:
* Emit a label and restore the cached copy of pc_save.
*/
static inline void set_disas_label(DisasContext *s, DisasLabel l)
{
gen_set_label(l.label);
s->pc_save = l.pc_save;
}
static inline TCGv_ptr gen_lookup_cp_reg(uint32_t key)
{
TCGv_ptr ret = tcg_temp_new_ptr();
gen_helper_lookup_cp_reg(ret, cpu_env, tcg_constant_i32(key));
return ret;
}
/*
* Helpers for implementing sets of trans_* functions.
* Defer the implementation of NAME to FUNC, with optional extra arguments.
*/
#define TRANS(NAME, FUNC, ...) \
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
{ return FUNC(s, __VA_ARGS__); }
#define TRANS_FEAT(NAME, FEAT, FUNC, ...) \
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
{ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); }
#define TRANS_FEAT_NONSTREAMING(NAME, FEAT, FUNC, ...) \
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
{ \
s->is_nonstreaming = true; \
return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \
}
#endif /* TARGET_ARM_TRANSLATE_H */

View file

@ -0,0 +1,82 @@
# AArch32 VFP instruction descriptions (unconditional insns)
#
# Copyright (c) 2019 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# Encodings for the unconditional VFP instructions are here:
# generally anything matching A32
# 1111 1110 .... .... .... 101. ...0 ....
# and T32
# 1111 110. .... .... .... 101. .... ....
# 1111 1110 .... .... .... 101. .... ....
# (but those patterns might also cover some Neon instructions,
# which do not live in this file.)
# VFP registers have an odd encoding with a four-bit field
# and a one-bit field which are assembled in different orders
# depending on whether the register is double or single precision.
# Each individual instruction function must do the checks for
# "double register selected but CPU does not have double support"
# and "double register number has bit 4 set but CPU does not
# support D16-D31" (which should UNDEF).
%vm_dp 5:1 0:4
%vm_sp 0:4 5:1
%vn_dp 7:1 16:4
%vn_sp 16:4 7:1
%vd_dp 22:1 12:4
%vd_sp 12:4 22:1
@vfp_dnm_s ................................ vm=%vm_sp vn=%vn_sp vd=%vd_sp
@vfp_dnm_d ................................ vm=%vm_dp vn=%vn_dp vd=%vd_dp
VSEL 1111 1110 0. cc:2 .... .... 1001 .0.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp sz=1
VSEL 1111 1110 0. cc:2 .... .... 1010 .0.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp sz=2
VSEL 1111 1110 0. cc:2 .... .... 1011 .0.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp sz=3
VMAXNM_hp 1111 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s
VMINNM_hp 1111 1110 1.00 .... .... 1001 .1.0 .... @vfp_dnm_s
VMAXNM_sp 1111 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s
VMINNM_sp 1111 1110 1.00 .... .... 1010 .1.0 .... @vfp_dnm_s
VMAXNM_dp 1111 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d
VMINNM_dp 1111 1110 1.00 .... .... 1011 .1.0 .... @vfp_dnm_d
VRINT 1111 1110 1.11 10 rm:2 .... 1001 01.0 .... \
vm=%vm_sp vd=%vd_sp sz=1
VRINT 1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \
vm=%vm_sp vd=%vd_sp sz=2
VRINT 1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \
vm=%vm_dp vd=%vd_dp sz=3
# VCVT float to int with specified rounding mode; Vd is always single-precision
VCVT 1111 1110 1.11 11 rm:2 .... 1001 op:1 1.0 .... \
vm=%vm_sp vd=%vd_sp sz=1
VCVT 1111 1110 1.11 11 rm:2 .... 1010 op:1 1.0 .... \
vm=%vm_sp vd=%vd_sp sz=2
VCVT 1111 1110 1.11 11 rm:2 .... 1011 op:1 1.0 .... \
vm=%vm_dp vd=%vd_sp sz=3
VMOVX 1111 1110 1.11 0000 .... 1010 01 . 0 .... \
vd=%vd_sp vm=%vm_sp
VINS 1111 1110 1.11 0000 .... 1010 11 . 0 .... \
vd=%vd_sp vm=%vm_sp

247
target/arm/tcg/vfp.decode Normal file
View file

@ -0,0 +1,247 @@
# AArch32 VFP instruction descriptions (conditional insns)
#
# Copyright (c) 2019 Linaro, Ltd
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
#
# This file is processed by scripts/decodetree.py
#
# Encodings for the conditional VFP instructions are here:
# generally anything matching A32
# cccc 11.. .... .... .... 101. .... ....
# and T32
# 1110 110. .... .... .... 101. .... ....
# 1110 1110 .... .... .... 101. .... ....
# (but those patterns might also cover some Neon instructions,
# which do not live in this file.)
# VFP registers have an odd encoding with a four-bit field
# and a one-bit field which are assembled in different orders
# depending on whether the register is double or single precision.
# Each individual instruction function must do the checks for
# "double register selected but CPU does not have double support"
# and "double register number has bit 4 set but CPU does not
# support D16-D31" (which should UNDEF).
%vm_dp 5:1 0:4
%vm_sp 0:4 5:1
%vn_dp 7:1 16:4
%vn_sp 16:4 7:1
%vd_dp 22:1 12:4
%vd_sp 12:4 22:1
%vmov_idx_b 21:1 5:2
%vmov_idx_h 21:1 6:1
%vmov_imm 16:4 0:4
@vfp_dnm_s ................................ vm=%vm_sp vn=%vn_sp vd=%vd_sp
@vfp_dnm_d ................................ vm=%vm_dp vn=%vn_dp vd=%vd_dp
@vfp_dm_ss ................................ vm=%vm_sp vd=%vd_sp
@vfp_dm_dd ................................ vm=%vm_dp vd=%vd_dp
@vfp_dm_ds ................................ vm=%vm_sp vd=%vd_dp
@vfp_dm_sd ................................ vm=%vm_dp vd=%vd_sp
# VMOV scalar to general-purpose register; note that this does
# include some Neon cases.
VMOV_to_gp ---- 1110 u:1 1. 1 .... rt:4 1011 ... 1 0000 \
vn=%vn_dp size=0 index=%vmov_idx_b
VMOV_to_gp ---- 1110 u:1 0. 1 .... rt:4 1011 ..1 1 0000 \
vn=%vn_dp size=1 index=%vmov_idx_h
VMOV_to_gp ---- 1110 0 0 index:1 1 .... rt:4 1011 .00 1 0000 \
vn=%vn_dp size=2 u=0
VMOV_from_gp ---- 1110 0 1. 0 .... rt:4 1011 ... 1 0000 \
vn=%vn_dp size=0 index=%vmov_idx_b
VMOV_from_gp ---- 1110 0 0. 0 .... rt:4 1011 ..1 1 0000 \
vn=%vn_dp size=1 index=%vmov_idx_h
VMOV_from_gp ---- 1110 0 0 index:1 0 .... rt:4 1011 .00 1 0000 \
vn=%vn_dp size=2
VDUP ---- 1110 1 b:1 q:1 0 .... rt:4 1011 . 0 e:1 1 0000 \
vn=%vn_dp
VMSR_VMRS ---- 1110 111 l:1 reg:4 rt:4 1010 0001 0000
VMOV_half ---- 1110 000 l:1 .... rt:4 1001 . 001 0000 vn=%vn_sp
VMOV_single ---- 1110 000 l:1 .... rt:4 1010 . 001 0000 vn=%vn_sp
VMOV_64_sp ---- 1100 010 op:1 rt2:4 rt:4 1010 00.1 .... vm=%vm_sp
VMOV_64_dp ---- 1100 010 op:1 rt2:4 rt:4 1011 00.1 .... vm=%vm_dp
VLDR_VSTR_hp ---- 1101 u:1 .0 l:1 rn:4 .... 1001 imm:8 vd=%vd_sp
VLDR_VSTR_sp ---- 1101 u:1 .0 l:1 rn:4 .... 1010 imm:8 vd=%vd_sp
VLDR_VSTR_dp ---- 1101 u:1 .0 l:1 rn:4 .... 1011 imm:8 vd=%vd_dp
# We split the load/store multiple up into two patterns to avoid
# overlap with other insns in the "Advanced SIMD load/store and 64-bit move"
# grouping:
# P=0 U=0 W=0 is 64-bit VMOV
# P=1 W=0 is VLDR/VSTR
# P=U W=1 is UNDEF
# leaving P=0 U=1 W=x and P=1 U=0 W=1 for load/store multiple.
# These include FSTM/FLDM.
VLDM_VSTM_sp ---- 1100 1 . w:1 l:1 rn:4 .... 1010 imm:8 \
vd=%vd_sp p=0 u=1
VLDM_VSTM_dp ---- 1100 1 . w:1 l:1 rn:4 .... 1011 imm:8 \
vd=%vd_dp p=0 u=1
VLDM_VSTM_sp ---- 1101 0.1 l:1 rn:4 .... 1010 imm:8 \
vd=%vd_sp p=1 u=0 w=1
VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
vd=%vd_dp p=1 u=0 w=1
# 3-register VFP data-processing; bits [23,21:20,6] identify the operation.
VMLA_hp ---- 1110 0.00 .... .... 1001 .0.0 .... @vfp_dnm_s
VMLA_sp ---- 1110 0.00 .... .... 1010 .0.0 .... @vfp_dnm_s
VMLA_dp ---- 1110 0.00 .... .... 1011 .0.0 .... @vfp_dnm_d
VMLS_hp ---- 1110 0.00 .... .... 1001 .1.0 .... @vfp_dnm_s
VMLS_sp ---- 1110 0.00 .... .... 1010 .1.0 .... @vfp_dnm_s
VMLS_dp ---- 1110 0.00 .... .... 1011 .1.0 .... @vfp_dnm_d
VNMLS_hp ---- 1110 0.01 .... .... 1001 .0.0 .... @vfp_dnm_s
VNMLS_sp ---- 1110 0.01 .... .... 1010 .0.0 .... @vfp_dnm_s
VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... @vfp_dnm_d
VNMLA_hp ---- 1110 0.01 .... .... 1001 .1.0 .... @vfp_dnm_s
VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... @vfp_dnm_d
VMUL_hp ---- 1110 0.10 .... .... 1001 .0.0 .... @vfp_dnm_s
VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... @vfp_dnm_s
VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... @vfp_dnm_d
VNMUL_hp ---- 1110 0.10 .... .... 1001 .1.0 .... @vfp_dnm_s
VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... @vfp_dnm_d
VADD_hp ---- 1110 0.11 .... .... 1001 .0.0 .... @vfp_dnm_s
VADD_sp ---- 1110 0.11 .... .... 1010 .0.0 .... @vfp_dnm_s
VADD_dp ---- 1110 0.11 .... .... 1011 .0.0 .... @vfp_dnm_d
VSUB_hp ---- 1110 0.11 .... .... 1001 .1.0 .... @vfp_dnm_s
VSUB_sp ---- 1110 0.11 .... .... 1010 .1.0 .... @vfp_dnm_s
VSUB_dp ---- 1110 0.11 .... .... 1011 .1.0 .... @vfp_dnm_d
VDIV_hp ---- 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s
VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s
VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d
VFMA_hp ---- 1110 1.10 .... .... 1001 .0. 0 .... @vfp_dnm_s
VFMS_hp ---- 1110 1.10 .... .... 1001 .1. 0 .... @vfp_dnm_s
VFNMA_hp ---- 1110 1.01 .... .... 1001 .0. 0 .... @vfp_dnm_s
VFNMS_hp ---- 1110 1.01 .... .... 1001 .1. 0 .... @vfp_dnm_s
VFMA_sp ---- 1110 1.10 .... .... 1010 .0. 0 .... @vfp_dnm_s
VFMS_sp ---- 1110 1.10 .... .... 1010 .1. 0 .... @vfp_dnm_s
VFNMA_sp ---- 1110 1.01 .... .... 1010 .0. 0 .... @vfp_dnm_s
VFNMS_sp ---- 1110 1.01 .... .... 1010 .1. 0 .... @vfp_dnm_s
VFMA_dp ---- 1110 1.10 .... .... 1011 .0.0 .... @vfp_dnm_d
VFMS_dp ---- 1110 1.10 .... .... 1011 .1.0 .... @vfp_dnm_d
VFNMA_dp ---- 1110 1.01 .... .... 1011 .0.0 .... @vfp_dnm_d
VFNMS_dp ---- 1110 1.01 .... .... 1011 .1.0 .... @vfp_dnm_d
VMOV_imm_hp ---- 1110 1.11 .... .... 1001 0000 .... \
vd=%vd_sp imm=%vmov_imm
VMOV_imm_sp ---- 1110 1.11 .... .... 1010 0000 .... \
vd=%vd_sp imm=%vmov_imm
VMOV_imm_dp ---- 1110 1.11 .... .... 1011 0000 .... \
vd=%vd_dp imm=%vmov_imm
VMOV_reg_sp ---- 1110 1.11 0000 .... 1010 01.0 .... @vfp_dm_ss
VMOV_reg_dp ---- 1110 1.11 0000 .... 1011 01.0 .... @vfp_dm_dd
VABS_hp ---- 1110 1.11 0000 .... 1001 11.0 .... @vfp_dm_ss
VABS_sp ---- 1110 1.11 0000 .... 1010 11.0 .... @vfp_dm_ss
VABS_dp ---- 1110 1.11 0000 .... 1011 11.0 .... @vfp_dm_dd
VNEG_hp ---- 1110 1.11 0001 .... 1001 01.0 .... @vfp_dm_ss
VNEG_sp ---- 1110 1.11 0001 .... 1010 01.0 .... @vfp_dm_ss
VNEG_dp ---- 1110 1.11 0001 .... 1011 01.0 .... @vfp_dm_dd
VSQRT_hp ---- 1110 1.11 0001 .... 1001 11.0 .... @vfp_dm_ss
VSQRT_sp ---- 1110 1.11 0001 .... 1010 11.0 .... @vfp_dm_ss
VSQRT_dp ---- 1110 1.11 0001 .... 1011 11.0 .... @vfp_dm_dd
VCMP_hp ---- 1110 1.11 010 z:1 .... 1001 e:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCMP_sp ---- 1110 1.11 010 z:1 .... 1010 e:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCMP_dp ---- 1110 1.11 010 z:1 .... 1011 e:1 1.0 .... \
vd=%vd_dp vm=%vm_dp
# VCVTT and VCVTB from f16: Vd format depends on size bit; Vm is always vm_sp
VCVT_f32_f16 ---- 1110 1.11 0010 .... 1010 t:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_f64_f16 ---- 1110 1.11 0010 .... 1011 t:1 1.0 .... \
vd=%vd_dp vm=%vm_sp
# VCVTB and VCVTT to f16: Vd format is always vd_sp;
# Vm format depends on size bit
VCVT_b16_f32 ---- 1110 1.11 0011 .... 1001 t:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
vd=%vd_sp vm=%vm_dp
VRINTR_hp ---- 1110 1.11 0110 .... 1001 01.0 .... @vfp_dm_ss
VRINTR_sp ---- 1110 1.11 0110 .... 1010 01.0 .... @vfp_dm_ss
VRINTR_dp ---- 1110 1.11 0110 .... 1011 01.0 .... @vfp_dm_dd
VRINTZ_hp ---- 1110 1.11 0110 .... 1001 11.0 .... @vfp_dm_ss
VRINTZ_sp ---- 1110 1.11 0110 .... 1010 11.0 .... @vfp_dm_ss
VRINTZ_dp ---- 1110 1.11 0110 .... 1011 11.0 .... @vfp_dm_dd
VRINTX_hp ---- 1110 1.11 0111 .... 1001 01.0 .... @vfp_dm_ss
VRINTX_sp ---- 1110 1.11 0111 .... 1010 01.0 .... @vfp_dm_ss
VRINTX_dp ---- 1110 1.11 0111 .... 1011 01.0 .... @vfp_dm_dd
# VCVT between single and double:
# Vm precision depends on size; Vd is its reverse
VCVT_sp ---- 1110 1.11 0111 .... 1010 11.0 .... @vfp_dm_ds
VCVT_dp ---- 1110 1.11 0111 .... 1011 11.0 .... @vfp_dm_sd
# VCVT from integer to floating point: Vm always single; Vd depends on size
VCVT_int_hp ---- 1110 1.11 1000 .... 1001 s:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_int_sp ---- 1110 1.11 1000 .... 1010 s:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_int_dp ---- 1110 1.11 1000 .... 1011 s:1 1.0 .... \
vd=%vd_dp vm=%vm_sp
# VJCVT is always dp to sp
VJCVT ---- 1110 1.11 1001 .... 1011 11.0 .... @vfp_dm_sd
# VCVT between floating-point and fixed-point. The immediate value
# is in the same format as a Vm single-precision register number.
# We assemble bits 18 (op), 16 (u) and 7 (sx) into a single opc field
# for the convenience of the trans_VCVT_fix functions.
%vcvt_fix_op 18:1 16:1 7:1
VCVT_fix_hp ---- 1110 1.11 1.1. .... 1001 .1.0 .... \
vd=%vd_sp imm=%vm_sp opc=%vcvt_fix_op
VCVT_fix_sp ---- 1110 1.11 1.1. .... 1010 .1.0 .... \
vd=%vd_sp imm=%vm_sp opc=%vcvt_fix_op
VCVT_fix_dp ---- 1110 1.11 1.1. .... 1011 .1.0 .... \
vd=%vd_dp imm=%vm_sp opc=%vcvt_fix_op
# VCVT float to integer (VCVT and VCVTR): Vd always single; Vd depends on size
VCVT_hp_int ---- 1110 1.11 110 s:1 .... 1001 rz:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_sp_int ---- 1110 1.11 110 s:1 .... 1010 rz:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_dp_int ---- 1110 1.11 110 s:1 .... 1011 rz:1 1.0 .... \
vd=%vd_sp vm=%vm_dp