mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-02 15:23:53 -06:00
target/arm: Set up float_status to use for FPCR.AH=1 behaviour
When FPCR.AH is 1, the behaviour of some instructions changes: * AdvSIMD BFCVT, BFCVTN, BFCVTN2, BFMLALB, BFMLALT * SVE BFCVT, BFCVTNT, BFMLALB, BFMLALT, BFMLSLB, BFMLSLT * SME BFCVT, BFCVTN, BFMLAL, BFMLSL (these are all in SME2 which QEMU does not yet implement) * FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS The behaviour change is: * the instructions do not update the FPSR cumulative exception flags * trapped floating point exceptions are disabled (a no-op for QEMU, which doesn't implement FPCR.{IDE,IXE,UFE,OFE,DZE,IOE}) * rounding is always round-to-nearest-even regardless of FPCR.RMode * denormalized inputs and outputs are always flushed to zero, as if FPCR.{FZ,FIZ} is {1,1} * FPCR.FZ16 is still honoured for half-precision inputs (See the Arm ARM DDI0487L.a section A1.5.9.) We can provide all these behaviours with another pair of float_status fields which we use only for these insns, when FPCR.AH is 1. These float_status fields will always have: * flush_to_zero and flush_inputs_to_zero set for the non-F16 field * rounding mode set to round-to-nearest-even and so the only FPCR fields they need to honour are DN and FZ16. In this commit we only define the new fp_status fields and give them the required behaviour when FPSR is updated. In subsequent commits we will arrange to use this new fp_status field for the instructions that should be affected by FPCR.AH in this way. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
731528d35e
commit
1828053990
5 changed files with 47 additions and 1 deletions
|
@ -556,6 +556,10 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
|
|||
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
|
||||
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
|
||||
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16);
|
||||
arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status);
|
||||
set_flush_to_zero(1, &env->vfp.ah_fp_status);
|
||||
set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status);
|
||||
arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16);
|
||||
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
if (kvm_enabled()) {
|
||||
|
|
|
@ -640,6 +640,13 @@ typedef struct CPUArchState {
|
|||
* standard_fp_status : the ARM "Standard FPSCR Value"
|
||||
* standard_fp_status_fp16 : used for half-precision
|
||||
* calculations with the ARM "Standard FPSCR Value"
|
||||
* ah_fp_status: used for the A64 insns which change behaviour
|
||||
* when FPCR.AH == 1 (bfloat16 conversions and multiplies,
|
||||
* and the reciprocal and square root estimate/step insns)
|
||||
* ah_fp_status_f16: used for the A64 insns which change behaviour
|
||||
* when FPCR.AH == 1 (bfloat16 conversions and multiplies,
|
||||
* and the reciprocal and square root estimate/step insns);
|
||||
* for half-precision
|
||||
*
|
||||
* Half-precision operations are governed by a separate
|
||||
* flush-to-zero control bit in FPSCR:FZ16. We pass a separate
|
||||
|
@ -654,6 +661,12 @@ typedef struct CPUArchState {
|
|||
* the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
|
||||
* using a fixed value for it.
|
||||
*
|
||||
* The ah_fp_status is needed because some insns have different
|
||||
* behaviour when FPCR.AH == 1: they don't update cumulative
|
||||
* exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
|
||||
* they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
|
||||
* which means we need an ah_fp_status_f16 as well.
|
||||
*
|
||||
* To avoid having to transfer exception bits around, we simply
|
||||
* say that the FPSCR cumulative exception flags are the logical
|
||||
* OR of the flags in the four fp statuses. This relies on the
|
||||
|
@ -666,6 +679,8 @@ typedef struct CPUArchState {
|
|||
float_status fp_status_f16_a64;
|
||||
float_status standard_fp_status;
|
||||
float_status standard_fp_status_f16;
|
||||
float_status ah_fp_status;
|
||||
float_status ah_fp_status_f16;
|
||||
|
||||
uint64_t zcr_el[4]; /* ZCR_EL[1-3] */
|
||||
uint64_t smcr_el[4]; /* SMCR_EL[1-3] */
|
||||
|
|
|
@ -1831,5 +1831,7 @@ int alle1_tlbmask(CPUARMState *env);
|
|||
|
||||
/* Set the float_status behaviour to match the Arm defaults */
|
||||
void arm_set_default_fp_behaviours(float_status *s);
|
||||
/* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */
|
||||
void arm_set_ah_fp_behaviours(float_status *s);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -676,6 +676,8 @@ typedef enum ARMFPStatusFlavour {
|
|||
FPST_A64,
|
||||
FPST_A32_F16,
|
||||
FPST_A64_F16,
|
||||
FPST_AH,
|
||||
FPST_AH_F16,
|
||||
FPST_STD,
|
||||
FPST_STD_F16,
|
||||
} ARMFPStatusFlavour;
|
||||
|
@ -696,6 +698,12 @@ typedef enum ARMFPStatusFlavour {
|
|||
* for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used
|
||||
* FPST_A64_F16
|
||||
* for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used
|
||||
* FPST_AH:
|
||||
* for AArch64 operations which change behaviour when AH=1 (specifically,
|
||||
* bfloat16 conversions and multiplies, and the reciprocal and square root
|
||||
* estimate/step insns)
|
||||
* FPST_AH_F16:
|
||||
* ditto, but for half-precision operations
|
||||
* FPST_STD
|
||||
* for A32/T32 Neon operations using the "standard FPSCR value"
|
||||
* FPST_STD_F16
|
||||
|
@ -719,6 +727,12 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
|
|||
case FPST_A64_F16:
|
||||
offset = offsetof(CPUARMState, vfp.fp_status_f16_a64);
|
||||
break;
|
||||
case FPST_AH:
|
||||
offset = offsetof(CPUARMState, vfp.ah_fp_status);
|
||||
break;
|
||||
case FPST_AH_F16:
|
||||
offset = offsetof(CPUARMState, vfp.ah_fp_status_f16);
|
||||
break;
|
||||
case FPST_STD:
|
||||
offset = offsetof(CPUARMState, vfp.standard_fp_status);
|
||||
break;
|
||||
|
|
|
@ -64,7 +64,7 @@ void arm_set_default_fp_behaviours(float_status *s)
|
|||
* set Invalid for a QNaN
|
||||
* * default NaN has sign bit set, msb frac bit set
|
||||
*/
|
||||
static void arm_set_ah_fp_behaviours(float_status *s)
|
||||
void arm_set_ah_fp_behaviours(float_status *s)
|
||||
{
|
||||
set_float_detect_tininess(float_tininess_after_rounding, s);
|
||||
set_float_ftz_detection(float_ftz_after_rounding, s);
|
||||
|
@ -128,6 +128,11 @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
|
|||
a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
|
||||
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
|
||||
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
|
||||
/*
|
||||
* We do not merge in flags from ah_fp_status or ah_fp_status_f16, because
|
||||
* they are used for insns that must not set the cumulative exception bits.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Flushing an input denormal *only* because FPCR.FIZ == 1 does
|
||||
* not set FPSR.IDC; if FPCR.FZ is also set then this takes
|
||||
|
@ -156,6 +161,8 @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
|
|||
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
|
||||
set_float_exception_flags(0, &env->vfp.standard_fp_status);
|
||||
set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
|
||||
set_float_exception_flags(0, &env->vfp.ah_fp_status);
|
||||
set_float_exception_flags(0, &env->vfp.ah_fp_status_f16);
|
||||
}
|
||||
|
||||
static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
|
||||
|
@ -201,9 +208,11 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
|
|||
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
|
||||
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
|
||||
set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
|
||||
set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
|
||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
|
||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
|
||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
|
||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
|
||||
}
|
||||
if (changed & FPCR_FZ) {
|
||||
bool ftz_enabled = val & FPCR_FZ;
|
||||
|
@ -227,6 +236,8 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
|
|||
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
|
||||
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
|
||||
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
|
||||
set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status);
|
||||
set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16);
|
||||
}
|
||||
if (changed & FPCR_AH) {
|
||||
bool ah_enabled = val & FPCR_AH;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue