softfloat: Implement float128_add/sub via parts

Replace the existing Berkeley implementation with the FloatParts implementation. Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2025-08-14 21:52:18 -06:00 · 2020-10-23 19:22:50 -07:00 · 2020-10-23 19:22:50 -07:00 · 3ff49e56a7
commit 3ff49e56a7
parent da10a9074a
1 changed files with 36 additions and 221 deletions
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@ -1046,6 +1046,20 @@ static float64 float64_round_pack_canonical(FloatParts64 *p,
    return float64_pack_raw(p);
 }
 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
                                      float_status *s)
 {
    float128_unpack_raw(p, f);
    parts_canonicalize(p, s, &float128_params);
 }
 static float128 float128_round_pack_canonical(FloatParts128 *p,
                                              float_status *s)
 {
    parts_uncanon(p, s, &float128_params);
    return float128_pack_raw(p);
 }
 /*
 * Addition and subtraction
 */
@ -1213,6 +1227,28 @@ bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
    return bfloat16_addsub(a, b, status, true);
 }
 static float128 QEMU_FLATTEN
 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
 {
    FloatParts128 pa, pb, *pr;
    float128_unpack_canonical(&pa, a, status);
    float128_unpack_canonical(&pb, b, status);
    pr = parts_addsub(&pa, &pb, status, subtract);
    return float128_round_pack_canonical(pr, status);
 }
 float128 float128_add(float128 a, float128 b, float_status *status)
 {
    return float128_addsub(a, b, status, false);
 }
 float128 float128_sub(float128 a, float128 b, float_status *status)
 {
    return float128_addsub(a, b, status, true);
 }
 /*
 * Returns the result of multiplying the floating-point values `a' and
 * `b'. The operation is performed according to the IEC/IEEE Standard
@ -7032,227 +7068,6 @@ float128 float128_round_to_int(float128 a, float_status *status)
 }
 /*----------------------------------------------------------------------------
 | Returns the result of adding the absolute values of the quadruple-precision
 | floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
 | before being returned.  `zSign' is ignored if the result is a NaN.
 | The addition is performed according to the IEC/IEEE Standard for Binary
 | Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
                                float_status *status)
 {
    int32_t aExp, bExp, zExp;
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
    int32_t expDiff;
    aSig1 = extractFloat128Frac1( a );
    aSig0 = extractFloat128Frac0( a );
    aExp = extractFloat128Exp( a );
    bSig1 = extractFloat128Frac1( b );
    bSig0 = extractFloat128Frac0( b );
    bExp = extractFloat128Exp( b );
    expDiff = aExp - bExp;
    if ( 0 < expDiff ) {
        if ( aExp == 0x7FFF ) {
            if (aSig0 | aSig1) {
                return propagateFloat128NaN(a, b, status);
            }
            return a;
        }
        if ( bExp == 0 ) {
            --expDiff;
        }
        else {
            bSig0 |= UINT64_C(0x0001000000000000);
        }
        shift128ExtraRightJamming(
            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
        zExp = aExp;
    }
    else if ( expDiff < 0 ) {
        if ( bExp == 0x7FFF ) {
            if (bSig0 | bSig1) {
                return propagateFloat128NaN(a, b, status);
            }
            return packFloat128( zSign, 0x7FFF, 0, 0 );
        }
        if ( aExp == 0 ) {
            ++expDiff;
        }
        else {
            aSig0 |= UINT64_C(0x0001000000000000);
        }
        shift128ExtraRightJamming(
            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
        zExp = bExp;
    }
    else {
        if ( aExp == 0x7FFF ) {
            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
                return propagateFloat128NaN(a, b, status);
            }
            return a;
        }
        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
        if ( aExp == 0 ) {
            if (status->flush_to_zero) {
                if (zSig0 | zSig1) {
                    float_raise(float_flag_output_denormal, status);
                }
                return packFloat128(zSign, 0, 0, 0);
            }
            return packFloat128( zSign, 0, zSig0, zSig1 );
        }
        zSig2 = 0;
        zSig0 |= UINT64_C(0x0002000000000000);
        zExp = aExp;
        goto shiftRight1;
    }
    aSig0 |= UINT64_C(0x0001000000000000);
    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
    --zExp;
    if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
    ++zExp;
 shiftRight1:
    shift128ExtraRightJamming(
        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
 roundAndPack:
    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
 }
 /*----------------------------------------------------------------------------
 | Returns the result of subtracting the absolute values of the quadruple-
 | precision floating-point values `a' and `b'.  If `zSign' is 1, the
 | difference is negated before being returned.  `zSign' is ignored if the
 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
 | Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
                                float_status *status)
 {
    int32_t aExp, bExp, zExp;
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
    int32_t expDiff;
    aSig1 = extractFloat128Frac1( a );
    aSig0 = extractFloat128Frac0( a );
    aExp = extractFloat128Exp( a );
    bSig1 = extractFloat128Frac1( b );
    bSig0 = extractFloat128Frac0( b );
    bExp = extractFloat128Exp( b );
    expDiff = aExp - bExp;
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
    if ( 0 < expDiff ) goto aExpBigger;
    if ( expDiff < 0 ) goto bExpBigger;
    if ( aExp == 0x7FFF ) {
        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
            return propagateFloat128NaN(a, b, status);
        }
        float_raise(float_flag_invalid, status);
        return float128_default_nan(status);
    }
    if ( aExp == 0 ) {
        aExp = 1;
        bExp = 1;
    }
    if ( bSig0 < aSig0 ) goto aBigger;
    if ( aSig0 < bSig0 ) goto bBigger;
    if ( bSig1 < aSig1 ) goto aBigger;
    if ( aSig1 < bSig1 ) goto bBigger;
    return packFloat128(status->float_rounding_mode == float_round_down,
                        0, 0, 0);
 bExpBigger:
    if ( bExp == 0x7FFF ) {
        if (bSig0 | bSig1) {
            return propagateFloat128NaN(a, b, status);
        }
        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
    }
    if ( aExp == 0 ) {
        ++expDiff;
    }
    else {
        aSig0 |= UINT64_C(0x4000000000000000);
    }
    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
    bSig0 |= UINT64_C(0x4000000000000000);
 bBigger:
    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
    zExp = bExp;
    zSign ^= 1;
    goto normalizeRoundAndPack;
 aExpBigger:
    if ( aExp == 0x7FFF ) {
        if (aSig0 | aSig1) {
            return propagateFloat128NaN(a, b, status);
        }
        return a;
    }
    if ( bExp == 0 ) {
        --expDiff;
    }
    else {
        bSig0 |= UINT64_C(0x4000000000000000);
    }
    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
    aSig0 |= UINT64_C(0x4000000000000000);
 aBigger:
    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
    zExp = aExp;
 normalizeRoundAndPack:
    --zExp;
    return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
                                         status);
 }
 /*----------------------------------------------------------------------------
 | Returns the result of adding the quadruple-precision floating-point values
 | `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
 | for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 float128 float128_add(float128 a, float128 b, float_status *status)
 {
    bool aSign, bSign;
    aSign = extractFloat128Sign( a );
    bSign = extractFloat128Sign( b );
    if ( aSign == bSign ) {
        return addFloat128Sigs(a, b, aSign, status);
    }
    else {
        return subFloat128Sigs(a, b, aSign, status);
    }
 }
 /*----------------------------------------------------------------------------
 | Returns the result of subtracting the quadruple-precision floating-point
 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
 | Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 float128 float128_sub(float128 a, float128 b, float_status *status)
 {
    bool aSign, bSign;
    aSign = extractFloat128Sign( a );
    bSign = extractFloat128Sign( b );
    if ( aSign == bSign ) {
        return subFloat128Sigs(a, b, aSign, status);
    }
    else {
        return addFloat128Sigs(a, b, aSign, status);
    }
 }
 /*----------------------------------------------------------------------------
 | Returns the result of multiplying the quadruple-precision floating-point
 | values `a' and `b'.  The operation is performed according to the IEC/IEEE