|
1 | 1 | //! Decodes a floating-point value into individual parts and error ranges. |
2 | 2 |
|
3 | | -use crate::num::FpCategory; |
4 | | -use crate::num::dec2flt::float::RawFloat; |
| 3 | +use crate::mem::size_of; |
5 | 4 |
|
6 | | -/// Decoded unsigned finite value, such that: |
7 | | -/// |
8 | | -/// - The original value equals to `mant * 2^exp`. |
9 | | -/// |
10 | | -/// - Any number from `(mant - minus) * 2^exp` to `(mant + plus) * 2^exp` will |
11 | | -/// round to the original value. The range is inclusive only when |
12 | | -/// `inclusive` is `true`. |
| 5 | +/// Generic decoding of floating points up to 64-bit wide such that its absolute |
| 6 | +/// finite value matches mant * 2^exp. Values in range (mant - minus) * 2^exp up |
| 7 | +/// to (mant + plus) * 2^exp will all round to the same value. The range with |
| 8 | +/// minus and plus is inclusive only when `inclusive` is true. |
13 | 9 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] |
14 | | -pub struct Decoded { |
15 | | - /// The scaled mantissa. |
| 10 | +pub struct Decoded64 { |
| 11 | + /// Scaled Mantissa |
16 | 12 | pub mant: u64, |
17 | | - /// The lower error range. |
| 13 | + /// Lower Error Range |
18 | 14 | pub minus: u64, |
19 | | - /// The upper error range. |
| 15 | + /// Upper Error Range |
20 | 16 | pub plus: u64, |
21 | | - /// The shared exponent in base 2. |
22 | | - pub exp: i16, |
23 | | - /// True when the error range is inclusive. |
24 | | - /// |
25 | | - /// In IEEE 754, this is true when the original mantissa was even. |
| 17 | + /// Shared Exponent In Base 2 |
| 18 | + pub exp: isize, |
| 19 | + /// Flag For Error Range |
26 | 20 | pub inclusive: bool, |
27 | 21 | } |
28 | 22 |
|
29 | | -/// Decoded unsigned value. |
30 | | -#[derive(Copy, Clone, Debug, PartialEq, Eq)] |
31 | | -pub enum FullDecoded { |
32 | | - /// Not-a-number. |
33 | | - Nan, |
34 | | - /// Infinities, either positive or negative. |
35 | | - Infinite, |
36 | | - /// Zero, either positive or negative. |
37 | | - Zero, |
38 | | - /// Finite numbers with further decoded fields. |
39 | | - Finite(Decoded), |
40 | | -} |
| 23 | +macro_rules! floats { |
| 24 | + ($($T:ident)*) => { |
| 25 | + $( |
41 | 26 |
|
42 | | -/// A floating point type which can be `decode`d. |
43 | | -pub trait DecodableFloat: RawFloat + Copy { |
44 | | - /// The minimum positive normalized value. |
45 | | - fn min_pos_norm_value() -> Self; |
46 | | -} |
| 27 | + /// Decode a floating-point into its integer components. The tuple in |
| 28 | + /// return contains the mantissa m and exponent e, such that original |
| 29 | + /// value equals m × 2^e, ignoring the sign. |
| 30 | + /// |
| 31 | + /// For normal numbers: mantissa includes the implied leading 1. |
| 32 | + /// For denormal numbers: mantissa is shifted to maintain the equation. |
| 33 | + const fn ${concat(mant_and_exp_, $T)}(v: $T) -> (u64, isize) { |
| 34 | + const ENC_BITS: usize = size_of::<$T>() * 8; |
| 35 | + // The encoding of the sign resides in the most significant bit. |
| 36 | + const SIGN_ENC_BITS: usize = 1; |
| 37 | + // The encoding of the mantissa resides in the least-significant |
| 38 | + // bits. |
| 39 | + const MANT_ENC_BITS: usize = $T::MANTISSA_DIGITS as usize - 1; |
| 40 | + // The encoding of the exponent resides in the remaining bits, |
| 41 | + // inbetween sign and the mantissa. |
| 42 | + const EXP_ENC_BITS: usize = ENC_BITS - (SIGN_ENC_BITS + MANT_ENC_BITS); |
47 | 43 |
|
48 | | -#[cfg(target_has_reliable_f16)] |
49 | | -impl DecodableFloat for f16 { |
50 | | - fn min_pos_norm_value() -> Self { |
51 | | - f16::MIN_POSITIVE |
52 | | - } |
53 | | -} |
| 44 | + let enc = v.to_bits(); |
| 45 | + let exp_enc = (enc << SIGN_ENC_BITS) >> (SIGN_ENC_BITS + MANT_ENC_BITS); |
| 46 | + let mant_enc = enc & ((1 << MANT_ENC_BITS) - 1); |
54 | 47 |
|
55 | | -impl DecodableFloat for f32 { |
56 | | - fn min_pos_norm_value() -> Self { |
57 | | - f32::MIN_POSITIVE |
58 | | - } |
59 | | -} |
| 48 | + const EXP_BIAS: isize = (1 << (EXP_ENC_BITS - 1)) - 1; |
| 49 | + let exp = exp_enc as isize - (EXP_BIAS + MANT_ENC_BITS as isize); |
60 | 50 |
|
61 | | -impl DecodableFloat for f64 { |
62 | | - fn min_pos_norm_value() -> Self { |
63 | | - f64::MIN_POSITIVE |
64 | | - } |
65 | | -} |
| 51 | + let mant = if exp_enc != 0 { |
| 52 | + // Normal numbers have an implied leading 1 to the mantissa |
| 53 | + // bits. |
| 54 | + mant_enc | 1 << MANT_ENC_BITS |
| 55 | + } else { |
| 56 | + // Denormal numbers use a special exponent of 1 − bias instead |
| 57 | + // of −bias. |
| 58 | + mant_enc << 1 |
| 59 | + }; |
66 | 60 |
|
67 | | -/// Returns a sign (true when negative) and `FullDecoded` value |
68 | | -/// from given floating point number. |
69 | | -pub fn decode<T: DecodableFloat>(v: T) -> (/*negative?*/ bool, FullDecoded) { |
70 | | - let (mant, exp, sign) = v.integer_decode(); |
71 | | - let even = (mant & 1) == 0; |
72 | | - let decoded = match v.classify() { |
73 | | - FpCategory::Nan => FullDecoded::Nan, |
74 | | - FpCategory::Infinite => FullDecoded::Infinite, |
75 | | - FpCategory::Zero => FullDecoded::Zero, |
76 | | - FpCategory::Subnormal => { |
77 | | - // neighbors: (mant - 2, exp) -- (mant, exp) -- (mant + 2, exp) |
78 | | - // Float::integer_decode always preserves the exponent, |
79 | | - // so the mantissa is scaled for subnormals. |
80 | | - FullDecoded::Finite(Decoded { mant, minus: 1, plus: 1, exp, inclusive: even }) |
| 61 | + const _: () = assert!(ENC_BITS <= 64); |
| 62 | + (mant as u64, exp) |
81 | 63 | } |
82 | | - FpCategory::Normal => { |
83 | | - let minnorm = <T as DecodableFloat>::min_pos_norm_value().integer_decode(); |
84 | | - if mant == minnorm.0 { |
| 64 | + |
| 65 | + /// Parse a finite value into the generic structure. |
| 66 | + pub fn ${concat(decode_, $T)}(v: $T) -> Decoded64 { |
| 67 | + let (mant, exp) = ${concat(mant_and_exp_, $T)}(v); |
| 68 | + let is_even = (mant & 1) == 0; |
| 69 | + |
| 70 | + if v.is_subnormal() { |
| 71 | + // neighbors: (mant - 2, exp) -- (mant, exp) -- (mant + 2, exp) |
| 72 | + return Decoded64 { mant: mant, minus: 1, plus: 1, exp: exp, inclusive: is_even }; |
| 73 | + } |
| 74 | + debug_assert!(v.is_normal()); |
| 75 | + |
| 76 | + const MIN_POS_MANT: u64 = ${concat(mant_and_exp_, $T)}($T::MIN_POSITIVE).0; |
| 77 | + const MIN_NEG_MANT: u64 = ${concat(mant_and_exp_, $T)}(-$T::MIN_POSITIVE).0; |
| 78 | + const _: () = assert!(MIN_POS_MANT == MIN_NEG_MANT); |
| 79 | + if mant == MIN_POS_MANT { |
85 | 80 | // neighbors: (maxmant, exp - 1) -- (minnormmant, exp) -- (minnormmant + 1, exp) |
86 | | - // where maxmant = minnormmant * 2 - 1 |
87 | | - FullDecoded::Finite(Decoded { |
88 | | - mant: mant << 2, |
89 | | - minus: 1, |
90 | | - plus: 2, |
91 | | - exp: exp - 2, |
92 | | - inclusive: even, |
93 | | - }) |
| 81 | + // where maxmant = minnorm.mant * 2 - 1 |
| 82 | + Decoded64 { mant: mant << 2, minus: 1, plus: 2, exp: exp - 2, inclusive: is_even } |
94 | 83 | } else { |
95 | 84 | // neighbors: (mant - 1, exp) -- (mant, exp) -- (mant + 1, exp) |
96 | | - FullDecoded::Finite(Decoded { |
97 | | - mant: mant << 1, |
98 | | - minus: 1, |
99 | | - plus: 1, |
100 | | - exp: exp - 1, |
101 | | - inclusive: even, |
102 | | - }) |
| 85 | + Decoded64 { mant: mant << 1, minus: 1, plus: 1, exp: exp - 1, inclusive: is_even } |
103 | 86 | } |
104 | 87 | } |
| 88 | + |
| 89 | + )* |
105 | 90 | }; |
106 | | - (sign < 0, decoded) |
107 | 91 | } |
| 92 | + |
| 93 | +floats! { f16 f32 f64 } |
0 commit comments