mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-11 03:24:58 -06:00
target/arm: Implement SVE2 FMMLA
Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Stephen Long <steplong@quicinc.com> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20210525010358.152808-47-richard.henderson@linaro.org Message-Id: <20200422165503.13511-1-steplong@quicinc.com> [rth: Fix indexing in helpers, expand macro to straight functions.] Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
cf32744981
commit
4f26756b87
5 changed files with 125 additions and 0 deletions
|
@ -4256,6 +4256,16 @@ static inline bool isar_feature_aa64_sve2_bitperm(const ARMISARegisters *id)
|
||||||
return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0;
|
return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool isar_feature_aa64_sve_f32mm(const ARMISARegisters *id)
|
||||||
|
{
|
||||||
|
return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F32MM) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool isar_feature_aa64_sve_f64mm(const ARMISARegisters *id)
|
||||||
|
{
|
||||||
|
return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F64MM) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Feature tests for "does this exist in either 32-bit or 64-bit?"
|
* Feature tests for "does this exist in either 32-bit or 64-bit?"
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -2662,3 +2662,6 @@ DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_s, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_d, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_d, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_6(fmmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(fmmla_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||||
|
|
|
@ -1389,6 +1389,10 @@ UMLSLT_zzzw 01000100 .. 0 ..... 010 111 ..... ..... @rda_rn_rm
|
||||||
CMLA_zzzz 01000100 esz:2 0 rm:5 0010 rot:2 rn:5 rd:5 ra=%reg_movprfx
|
CMLA_zzzz 01000100 esz:2 0 rm:5 0010 rot:2 rn:5 rd:5 ra=%reg_movprfx
|
||||||
SQRDCMLAH_zzzz 01000100 esz:2 0 rm:5 0011 rot:2 rn:5 rd:5 ra=%reg_movprfx
|
SQRDCMLAH_zzzz 01000100 esz:2 0 rm:5 0011 rot:2 rn:5 rd:5 ra=%reg_movprfx
|
||||||
|
|
||||||
|
### SVE2 floating point matrix multiply accumulate
|
||||||
|
|
||||||
|
FMMLA 01100100 .. 1 ..... 111001 ..... ..... @rda_rn_rm
|
||||||
|
|
||||||
### SVE2 Memory Gather Load Group
|
### SVE2 Memory Gather Load Group
|
||||||
|
|
||||||
# SVE2 64-bit gather non-temporal load
|
# SVE2 64-bit gather non-temporal load
|
||||||
|
|
|
@ -7241,3 +7241,77 @@ void HELPER(sve2_xar_s)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||||
d[i] = ror32(n[i] ^ m[i], shr);
|
d[i] = ror32(n[i] ^ m[i], shr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HELPER(fmmla_s)(void *vd, void *vn, void *vm, void *va,
|
||||||
|
void *status, uint32_t desc)
|
||||||
|
{
|
||||||
|
intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float32) * 4);
|
||||||
|
|
||||||
|
for (s = 0; s < opr_sz; ++s) {
|
||||||
|
float32 *n = vn + s * sizeof(float32) * 4;
|
||||||
|
float32 *m = vm + s * sizeof(float32) * 4;
|
||||||
|
float32 *a = va + s * sizeof(float32) * 4;
|
||||||
|
float32 *d = vd + s * sizeof(float32) * 4;
|
||||||
|
float32 n00 = n[H4(0)], n01 = n[H4(1)];
|
||||||
|
float32 n10 = n[H4(2)], n11 = n[H4(3)];
|
||||||
|
float32 m00 = m[H4(0)], m01 = m[H4(1)];
|
||||||
|
float32 m10 = m[H4(2)], m11 = m[H4(3)];
|
||||||
|
float32 p0, p1;
|
||||||
|
|
||||||
|
/* i = 0, j = 0 */
|
||||||
|
p0 = float32_mul(n00, m00, status);
|
||||||
|
p1 = float32_mul(n01, m01, status);
|
||||||
|
d[H4(0)] = float32_add(a[H4(0)], float32_add(p0, p1, status), status);
|
||||||
|
|
||||||
|
/* i = 0, j = 1 */
|
||||||
|
p0 = float32_mul(n00, m10, status);
|
||||||
|
p1 = float32_mul(n01, m11, status);
|
||||||
|
d[H4(1)] = float32_add(a[H4(1)], float32_add(p0, p1, status), status);
|
||||||
|
|
||||||
|
/* i = 1, j = 0 */
|
||||||
|
p0 = float32_mul(n10, m00, status);
|
||||||
|
p1 = float32_mul(n11, m01, status);
|
||||||
|
d[H4(2)] = float32_add(a[H4(2)], float32_add(p0, p1, status), status);
|
||||||
|
|
||||||
|
/* i = 1, j = 1 */
|
||||||
|
p0 = float32_mul(n10, m10, status);
|
||||||
|
p1 = float32_mul(n11, m11, status);
|
||||||
|
d[H4(3)] = float32_add(a[H4(3)], float32_add(p0, p1, status), status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(fmmla_d)(void *vd, void *vn, void *vm, void *va,
|
||||||
|
void *status, uint32_t desc)
|
||||||
|
{
|
||||||
|
intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float64) * 4);
|
||||||
|
|
||||||
|
for (s = 0; s < opr_sz; ++s) {
|
||||||
|
float64 *n = vn + s * sizeof(float64) * 4;
|
||||||
|
float64 *m = vm + s * sizeof(float64) * 4;
|
||||||
|
float64 *a = va + s * sizeof(float64) * 4;
|
||||||
|
float64 *d = vd + s * sizeof(float64) * 4;
|
||||||
|
float64 n00 = n[0], n01 = n[1], n10 = n[2], n11 = n[3];
|
||||||
|
float64 m00 = m[0], m01 = m[1], m10 = m[2], m11 = m[3];
|
||||||
|
float64 p0, p1;
|
||||||
|
|
||||||
|
/* i = 0, j = 0 */
|
||||||
|
p0 = float64_mul(n00, m00, status);
|
||||||
|
p1 = float64_mul(n01, m01, status);
|
||||||
|
d[0] = float64_add(a[0], float64_add(p0, p1, status), status);
|
||||||
|
|
||||||
|
/* i = 0, j = 1 */
|
||||||
|
p0 = float64_mul(n00, m10, status);
|
||||||
|
p1 = float64_mul(n01, m11, status);
|
||||||
|
d[1] = float64_add(a[1], float64_add(p0, p1, status), status);
|
||||||
|
|
||||||
|
/* i = 1, j = 0 */
|
||||||
|
p0 = float64_mul(n10, m00, status);
|
||||||
|
p1 = float64_mul(n11, m01, status);
|
||||||
|
d[2] = float64_add(a[2], float64_add(p0, p1, status), status);
|
||||||
|
|
||||||
|
/* i = 1, j = 1 */
|
||||||
|
p0 = float64_mul(n10, m10, status);
|
||||||
|
p1 = float64_mul(n11, m11, status);
|
||||||
|
d[3] = float64_add(a[3], float64_add(p0, p1, status), status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -7672,6 +7672,40 @@ DO_SVE2_ZPZZ_FP(FMINP, fminp)
|
||||||
* SVE Integer Multiply-Add (unpredicated)
|
* SVE Integer Multiply-Add (unpredicated)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
|
||||||
|
{
|
||||||
|
gen_helper_gvec_4_ptr *fn;
|
||||||
|
|
||||||
|
switch (a->esz) {
|
||||||
|
case MO_32:
|
||||||
|
if (!dc_isar_feature(aa64_sve_f32mm, s)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
fn = gen_helper_fmmla_s;
|
||||||
|
break;
|
||||||
|
case MO_64:
|
||||||
|
if (!dc_isar_feature(aa64_sve_f64mm, s)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
fn = gen_helper_fmmla_d;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sve_access_check(s)) {
|
||||||
|
unsigned vsz = vec_full_reg_size(s);
|
||||||
|
TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
|
||||||
|
tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
|
||||||
|
vec_full_reg_offset(s, a->rn),
|
||||||
|
vec_full_reg_offset(s, a->rm),
|
||||||
|
vec_full_reg_offset(s, a->ra),
|
||||||
|
status, vsz, vsz, 0, fn);
|
||||||
|
tcg_temp_free_ptr(status);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a,
|
static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a,
|
||||||
bool sel1, bool sel2)
|
bool sel1, bool sel2)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue