mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-14 21:52:18 -06:00
softfloat: Implement float128_add/sub via parts
Replace the existing Berkeley implementation with the FloatParts implementation. Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
da10a9074a
commit
3ff49e56a7
1 changed files with 36 additions and 221 deletions
257
fpu/softfloat.c
257
fpu/softfloat.c
|
@ -1046,6 +1046,20 @@ static float64 float64_round_pack_canonical(FloatParts64 *p,
|
||||||
return float64_pack_raw(p);
|
return float64_pack_raw(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void float128_unpack_canonical(FloatParts128 *p, float128 f,
|
||||||
|
float_status *s)
|
||||||
|
{
|
||||||
|
float128_unpack_raw(p, f);
|
||||||
|
parts_canonicalize(p, s, &float128_params);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float128 float128_round_pack_canonical(FloatParts128 *p,
|
||||||
|
float_status *s)
|
||||||
|
{
|
||||||
|
parts_uncanon(p, s, &float128_params);
|
||||||
|
return float128_pack_raw(p);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Addition and subtraction
|
* Addition and subtraction
|
||||||
*/
|
*/
|
||||||
|
@ -1213,6 +1227,28 @@ bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
|
||||||
return bfloat16_addsub(a, b, status, true);
|
return bfloat16_addsub(a, b, status, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static float128 QEMU_FLATTEN
|
||||||
|
float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
|
||||||
|
{
|
||||||
|
FloatParts128 pa, pb, *pr;
|
||||||
|
|
||||||
|
float128_unpack_canonical(&pa, a, status);
|
||||||
|
float128_unpack_canonical(&pb, b, status);
|
||||||
|
pr = parts_addsub(&pa, &pb, status, subtract);
|
||||||
|
|
||||||
|
return float128_round_pack_canonical(pr, status);
|
||||||
|
}
|
||||||
|
|
||||||
|
float128 float128_add(float128 a, float128 b, float_status *status)
|
||||||
|
{
|
||||||
|
return float128_addsub(a, b, status, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
float128 float128_sub(float128 a, float128 b, float_status *status)
|
||||||
|
{
|
||||||
|
return float128_addsub(a, b, status, true);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns the result of multiplying the floating-point values `a' and
|
* Returns the result of multiplying the floating-point values `a' and
|
||||||
* `b'. The operation is performed according to the IEC/IEEE Standard
|
* `b'. The operation is performed according to the IEC/IEEE Standard
|
||||||
|
@ -7032,227 +7068,6 @@ float128 float128_round_to_int(float128 a, float_status *status)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Returns the result of adding the absolute values of the quadruple-precision
|
|
||||||
| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
|
|
||||||
| before being returned. `zSign' is ignored if the result is a NaN.
|
|
||||||
| The addition is performed according to the IEC/IEEE Standard for Binary
|
|
||||||
| Floating-Point Arithmetic.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
|
|
||||||
float_status *status)
|
|
||||||
{
|
|
||||||
int32_t aExp, bExp, zExp;
|
|
||||||
uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
|
|
||||||
int32_t expDiff;
|
|
||||||
|
|
||||||
aSig1 = extractFloat128Frac1( a );
|
|
||||||
aSig0 = extractFloat128Frac0( a );
|
|
||||||
aExp = extractFloat128Exp( a );
|
|
||||||
bSig1 = extractFloat128Frac1( b );
|
|
||||||
bSig0 = extractFloat128Frac0( b );
|
|
||||||
bExp = extractFloat128Exp( b );
|
|
||||||
expDiff = aExp - bExp;
|
|
||||||
if ( 0 < expDiff ) {
|
|
||||||
if ( aExp == 0x7FFF ) {
|
|
||||||
if (aSig0 | aSig1) {
|
|
||||||
return propagateFloat128NaN(a, b, status);
|
|
||||||
}
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
if ( bExp == 0 ) {
|
|
||||||
--expDiff;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
bSig0 |= UINT64_C(0x0001000000000000);
|
|
||||||
}
|
|
||||||
shift128ExtraRightJamming(
|
|
||||||
bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
|
|
||||||
zExp = aExp;
|
|
||||||
}
|
|
||||||
else if ( expDiff < 0 ) {
|
|
||||||
if ( bExp == 0x7FFF ) {
|
|
||||||
if (bSig0 | bSig1) {
|
|
||||||
return propagateFloat128NaN(a, b, status);
|
|
||||||
}
|
|
||||||
return packFloat128( zSign, 0x7FFF, 0, 0 );
|
|
||||||
}
|
|
||||||
if ( aExp == 0 ) {
|
|
||||||
++expDiff;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
aSig0 |= UINT64_C(0x0001000000000000);
|
|
||||||
}
|
|
||||||
shift128ExtraRightJamming(
|
|
||||||
aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
|
|
||||||
zExp = bExp;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if ( aExp == 0x7FFF ) {
|
|
||||||
if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
|
|
||||||
return propagateFloat128NaN(a, b, status);
|
|
||||||
}
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
|
|
||||||
if ( aExp == 0 ) {
|
|
||||||
if (status->flush_to_zero) {
|
|
||||||
if (zSig0 | zSig1) {
|
|
||||||
float_raise(float_flag_output_denormal, status);
|
|
||||||
}
|
|
||||||
return packFloat128(zSign, 0, 0, 0);
|
|
||||||
}
|
|
||||||
return packFloat128( zSign, 0, zSig0, zSig1 );
|
|
||||||
}
|
|
||||||
zSig2 = 0;
|
|
||||||
zSig0 |= UINT64_C(0x0002000000000000);
|
|
||||||
zExp = aExp;
|
|
||||||
goto shiftRight1;
|
|
||||||
}
|
|
||||||
aSig0 |= UINT64_C(0x0001000000000000);
|
|
||||||
add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
|
|
||||||
--zExp;
|
|
||||||
if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
|
|
||||||
++zExp;
|
|
||||||
shiftRight1:
|
|
||||||
shift128ExtraRightJamming(
|
|
||||||
zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
|
|
||||||
roundAndPack:
|
|
||||||
return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Returns the result of subtracting the absolute values of the quadruple-
|
|
||||||
| precision floating-point values `a' and `b'. If `zSign' is 1, the
|
|
||||||
| difference is negated before being returned. `zSign' is ignored if the
|
|
||||||
| result is a NaN. The subtraction is performed according to the IEC/IEEE
|
|
||||||
| Standard for Binary Floating-Point Arithmetic.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
|
|
||||||
float_status *status)
|
|
||||||
{
|
|
||||||
int32_t aExp, bExp, zExp;
|
|
||||||
uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
|
|
||||||
int32_t expDiff;
|
|
||||||
|
|
||||||
aSig1 = extractFloat128Frac1( a );
|
|
||||||
aSig0 = extractFloat128Frac0( a );
|
|
||||||
aExp = extractFloat128Exp( a );
|
|
||||||
bSig1 = extractFloat128Frac1( b );
|
|
||||||
bSig0 = extractFloat128Frac0( b );
|
|
||||||
bExp = extractFloat128Exp( b );
|
|
||||||
expDiff = aExp - bExp;
|
|
||||||
shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
|
|
||||||
shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
|
|
||||||
if ( 0 < expDiff ) goto aExpBigger;
|
|
||||||
if ( expDiff < 0 ) goto bExpBigger;
|
|
||||||
if ( aExp == 0x7FFF ) {
|
|
||||||
if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
|
|
||||||
return propagateFloat128NaN(a, b, status);
|
|
||||||
}
|
|
||||||
float_raise(float_flag_invalid, status);
|
|
||||||
return float128_default_nan(status);
|
|
||||||
}
|
|
||||||
if ( aExp == 0 ) {
|
|
||||||
aExp = 1;
|
|
||||||
bExp = 1;
|
|
||||||
}
|
|
||||||
if ( bSig0 < aSig0 ) goto aBigger;
|
|
||||||
if ( aSig0 < bSig0 ) goto bBigger;
|
|
||||||
if ( bSig1 < aSig1 ) goto aBigger;
|
|
||||||
if ( aSig1 < bSig1 ) goto bBigger;
|
|
||||||
return packFloat128(status->float_rounding_mode == float_round_down,
|
|
||||||
0, 0, 0);
|
|
||||||
bExpBigger:
|
|
||||||
if ( bExp == 0x7FFF ) {
|
|
||||||
if (bSig0 | bSig1) {
|
|
||||||
return propagateFloat128NaN(a, b, status);
|
|
||||||
}
|
|
||||||
return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
|
|
||||||
}
|
|
||||||
if ( aExp == 0 ) {
|
|
||||||
++expDiff;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
aSig0 |= UINT64_C(0x4000000000000000);
|
|
||||||
}
|
|
||||||
shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
|
|
||||||
bSig0 |= UINT64_C(0x4000000000000000);
|
|
||||||
bBigger:
|
|
||||||
sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
|
|
||||||
zExp = bExp;
|
|
||||||
zSign ^= 1;
|
|
||||||
goto normalizeRoundAndPack;
|
|
||||||
aExpBigger:
|
|
||||||
if ( aExp == 0x7FFF ) {
|
|
||||||
if (aSig0 | aSig1) {
|
|
||||||
return propagateFloat128NaN(a, b, status);
|
|
||||||
}
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
if ( bExp == 0 ) {
|
|
||||||
--expDiff;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
bSig0 |= UINT64_C(0x4000000000000000);
|
|
||||||
}
|
|
||||||
shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
|
|
||||||
aSig0 |= UINT64_C(0x4000000000000000);
|
|
||||||
aBigger:
|
|
||||||
sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
|
|
||||||
zExp = aExp;
|
|
||||||
normalizeRoundAndPack:
|
|
||||||
--zExp;
|
|
||||||
return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
|
|
||||||
status);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Returns the result of adding the quadruple-precision floating-point values
|
|
||||||
| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
|
|
||||||
| for Binary Floating-Point Arithmetic.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
float128 float128_add(float128 a, float128 b, float_status *status)
|
|
||||||
{
|
|
||||||
bool aSign, bSign;
|
|
||||||
|
|
||||||
aSign = extractFloat128Sign( a );
|
|
||||||
bSign = extractFloat128Sign( b );
|
|
||||||
if ( aSign == bSign ) {
|
|
||||||
return addFloat128Sigs(a, b, aSign, status);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return subFloat128Sigs(a, b, aSign, status);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Returns the result of subtracting the quadruple-precision floating-point
|
|
||||||
| values `a' and `b'. The operation is performed according to the IEC/IEEE
|
|
||||||
| Standard for Binary Floating-Point Arithmetic.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
float128 float128_sub(float128 a, float128 b, float_status *status)
|
|
||||||
{
|
|
||||||
bool aSign, bSign;
|
|
||||||
|
|
||||||
aSign = extractFloat128Sign( a );
|
|
||||||
bSign = extractFloat128Sign( b );
|
|
||||||
if ( aSign == bSign ) {
|
|
||||||
return subFloat128Sigs(a, b, aSign, status);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return addFloat128Sigs(a, b, aSign, status);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| Returns the result of multiplying the quadruple-precision floating-point
|
| Returns the result of multiplying the quadruple-precision floating-point
|
||||||
| values `a' and `b'. The operation is performed according to the IEC/IEEE
|
| values `a' and `b'. The operation is performed according to the IEC/IEEE
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue