Skip to content
103 changes: 103 additions & 0 deletions src/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,9 @@ pub trait FloatCore: Num + NumCast + Neg<Output = Self> + PartialOrd + Copy {
fn to_radians(self) -> Self;

/// Returns the mantissa, base 2 exponent, and sign as integers, respectively.
///
/// The original number can be recovered by `sign * mantissa * 2 ^ exponent`.
/// This formula only works for zero, normal, and infinite numbers (per `classify()`)
///
/// # Examples
///
Expand Down Expand Up @@ -1861,7 +1863,9 @@ pub trait Float: Num + Copy + NumCast + PartialOrd + Neg<Output = Self> {
fn atanh(self) -> Self;

/// Returns the mantissa, base 2 exponent, and sign as integers, respectively.
///
/// The original number can be recovered by `sign * mantissa * 2 ^ exponent`.
/// This formula only works for zero, normal, and infinite numbers (per `classify()`)
///
/// ```
/// use num_traits::Float;
Expand Down Expand Up @@ -2051,8 +2055,10 @@ fn integer_decode_f32(f: f32) -> (u64, i16, i8) {
let sign: i8 = if bits >> 31 == 0 { 1 } else { -1 };
let mut exponent: i16 = ((bits >> 23) & 0xff) as i16;
let mantissa = if exponent == 0 {
// Zeros and subnormals
(bits & 0x7fffff) << 1
} else {
// Normals, infinities, and NaN
(bits & 0x7fffff) | 0x800000
};
// Exponent bias + mantissa shift
Expand All @@ -2065,8 +2071,10 @@ fn integer_decode_f64(f: f64) -> (u64, i16, i8) {
let sign: i8 = if bits >> 63 == 0 { 1 } else { -1 };
let mut exponent: i16 = ((bits >> 52) & 0x7ff) as i16;
let mantissa = if exponent == 0 {
// Zeros and subnormals
(bits & 0xfffffffffffff) << 1
} else {
// Normals, infinities, and NaN
(bits & 0xfffffffffffff) | 0x10000000000000
};
// Exponent bias + mantissa shift
Expand Down Expand Up @@ -2397,6 +2405,101 @@ mod tests {
check::<f64>(1e-12);
}

/// Test the behavior of `Float::integer_decode` with the `given` input and `expected` output values.
#[cfg(any(feature = "std", feature = "libm"))]
fn test_integer_decode<T>(given: T, expected: (u64, i16, i8))
where
T: crate::float::Float + core::fmt::LowerExp + core::fmt::Debug,
{
use crate::float::Float;

let found = Float::integer_decode(given);

assert!(
expected == found,
"unexpected output of `Float::integer_decode({0:e})`
\texpected: ({1:#x} {2} {3})
\t found: ({4:#x} {5} {6})",
given,
expected.0,
expected.1,
expected.2,
found.0,
found.1,
found.2
);

// Destructure the `found` output and cast values as float.
let mantissa_f = T::from(found.0).unwrap();
let exponent_f = T::from(found.1).unwrap();
let sign_f = T::from(found.2).unwrap();

// Recover the `given` input using equation: sign * mantissa * 2^exponent.
let recovered = sign_f * mantissa_f * exponent_f.exp2();
let deviation = recovered - given;
let tolerance = T::from(1e-6).unwrap();

assert_eq!(T::one(), tolerance.signum(), "tolerance must be positive");
assert!(
recovered == given || deviation.abs() < tolerance,
"absolute deviation must not exceed tolerance`
\t given: {:+e}
\trecovered: {:+e}
\tdeviation: {:+e}
\ttolerance: <{:e}",
given,
recovered,
deviation,
tolerance
);
}

#[test]
#[cfg(any(feature = "std", feature = "libm"))]
fn integer_decode_f32() {
for sign in [1, -1] {
let sign_f = sign as f32;
test_integer_decode(sign_f * 0.0__f32, (0x000000, -150, sign));
test_integer_decode(sign_f * 1.0e-40_f32, (0x022d84, -150, sign)); // subnormal (between 0 and MIN_POSITIVE)
test_integer_decode(sign_f * f32::MIN_POSITIVE, (0x800000, -149, sign));
test_integer_decode(sign_f * 0.25_f32, (0x800000, -25, sign));
test_integer_decode(sign_f * 0.5__f32, (0x800000, -24, sign));
test_integer_decode(sign_f * 1____f32, (0x800000, -23, sign));
test_integer_decode(sign_f * 1.5__f32, (0xc00000, -23, sign));
test_integer_decode(sign_f * 2____f32, (0x800000, -22, sign));
test_integer_decode(sign_f * 2.5__f32, (0xa00000, -22, sign));
test_integer_decode(sign_f * 3____f32, (0xc00000, -22, sign));
test_integer_decode(sign_f * 4____f32, (0x800000, -21, sign));
test_integer_decode(sign_f * 5____f32, (0xa00000, -21, sign));
test_integer_decode(sign_f * 42___f32, (0xa80000, -18, sign));
test_integer_decode(sign_f * f32::MAX, (0xffffff, 104, sign));
test_integer_decode(sign_f * f32::INFINITY, (0x800000, 105, sign));
}
}

#[test]
#[cfg(any(feature = "std", feature = "libm"))]
fn integer_decode_f64() {
for sign in [1, -1] {
let sign_f = sign as f64;
test_integer_decode(sign_f * 0.0__f64, (0x00000000000000, -1075, sign));
test_integer_decode(sign_f * 1.0e-308_f64, (0x0e61acf033d1a4, -1075, sign)); // subnormal (between 0 and MIN_POSITIVE)
test_integer_decode(sign_f * f64::MIN_POSITIVE, (0x10000000000000, -1074, sign));
test_integer_decode(sign_f * 0.25_f64, (0x10000000000000, -54, sign));
test_integer_decode(sign_f * 0.5__f64, (0x10000000000000, -53, sign));
test_integer_decode(sign_f * 1____f64, (0x10000000000000, -52, sign));
test_integer_decode(sign_f * 1.5__f64, (0x18000000000000, -52, sign));
test_integer_decode(sign_f * 2____f64, (0x10000000000000, -51, sign));
test_integer_decode(sign_f * 2.5__f64, (0x14000000000000, -51, sign));
test_integer_decode(sign_f * 3____f64, (0x18000000000000, -51, sign));
test_integer_decode(sign_f * 4____f64, (0x10000000000000, -50, sign));
test_integer_decode(sign_f * 5____f64, (0x14000000000000, -50, sign));
test_integer_decode(sign_f * 42___f64, (0x15000000000000, -47, sign));
test_integer_decode(sign_f * f64::MAX, (0x1fffffffffffff, 971, sign));
test_integer_decode(sign_f * f64::INFINITY, (0x10000000000000, 972, sign));
}
}

#[test]
#[cfg(any(feature = "std", feature = "libm"))]
fn copysign() {
Expand Down