diff --git a/src/tests/simd_math_targeted_edges/core.rs b/src/tests/simd_math_targeted_edges/core.rs index a1cb1e0..f1726f7 100644 --- a/src/tests/simd_math_targeted_edges/core.rs +++ b/src/tests/simd_math_targeted_edges/core.rs @@ -180,6 +180,44 @@ fn run_f32_trig_large_and_mixed_lanes() { ); } +fn run_f32_trig_fast_range_boundaries() { + let edge = 8192.0f32; + let inputs = vec![ + f32::from_bits(edge.to_bits().saturating_sub(2)), + f32::from_bits(edge.to_bits().saturating_sub(1)), + edge, + f32::from_bits(edge.to_bits().saturating_add(1)), + f32::from_bits(edge.to_bits().saturating_add(2)), + -f32::from_bits(edge.to_bits().saturating_sub(2)), + -f32::from_bits(edge.to_bits().saturating_sub(1)), + -edge, + -f32::from_bits(edge.to_bits().saturating_add(1)), + -f32::from_bits(edge.to_bits().saturating_add(2)), + ]; + + check_targeted_unary_f32::( + "sin_u35", + &inputs, + contracts::SIN_U35_F32_MAX_ULP, + |v| v.sin_u35(), + f32::sin, + ); + check_targeted_unary_f32::( + "cos_u35", + &inputs, + contracts::COS_U35_F32_MAX_ULP, + |v| v.cos_u35(), + f32::cos, + ); + check_targeted_unary_f32::( + "tan_u35", + &inputs, + contracts::TAN_U35_F32_MAX_ULP, + |v| v.tan_u35(), + f32::tan, + ); +} + fn run_f32_trig_symmetry_identities() { let inputs = [ -3.0f32, @@ -252,11 +290,123 @@ simd_math_targeted_all_backends!( f32_trig_large_and_mixed_lanes, run_f32_trig_large_and_mixed_lanes ); +simd_math_targeted_all_backends!( + f32_trig_fast_range_boundaries, + run_f32_trig_fast_range_boundaries +); simd_math_targeted_all_backends!( f32_trig_symmetry_identities, run_f32_trig_symmetry_identities ); +fn run_f32_log_exp_boundary_lanes() { + let mut inputs_log = vec![ + f32::from_bits(1), + f32::MIN_POSITIVE, + 0.5, + std::f32::consts::FRAC_1_SQRT_2, + 1.0, + 2.0, + 128.0, + f32::INFINITY, + f32::NAN, + -1.0, + 0.0, + -0.0, + ]; + + for &scale in &[0.5f32, 1.0, 2.0, 8.0, 128.0] { + let pivot = std::f32::consts::FRAC_1_SQRT_2 * scale; + inputs_log.push(f32::from_bits(pivot.to_bits().saturating_sub(1))); + inputs_log.push(pivot); + inputs_log.push(f32::from_bits(pivot.to_bits().saturating_add(1))); + } + + check_targeted_unary_f32::( + "log2_u35", + &inputs_log, + contracts::LOG2_U35_F32_MAX_ULP, + |v| v.log2_u35(), + f32::log2, + ); + check_targeted_unary_f32::( + "ln_u35", + &inputs_log, + contracts::LN_U35_F32_MAX_ULP, + |v| v.ln_u35(), + f32::ln, + ); + + let mut inputs_exp = vec![ + -126.0f32, + -125.5, + -10.0, + -1.0, + -0.0, + 0.0, + 1.0, + 10.0, + 126.0, + 127.0, + 127.25, + f32::INFINITY, + f32::NEG_INFINITY, + f32::NAN, + ]; + + for k in -4..=4 { + let center = k as f32; + inputs_exp.push(center - 1.0 / 1024.0); + inputs_exp.push(center); + inputs_exp.push(center + 1.0 / 1024.0); + } + + for ¢er in &[-126.0f32, -125.5, -1.0, -0.5, 0.0, 0.5, 1.0, 126.0, 127.0] { + inputs_exp.push(f32::from_bits(center.to_bits().saturating_sub(1))); + inputs_exp.push(center); + inputs_exp.push(f32::from_bits(center.to_bits().saturating_add(1))); + } + + check_targeted_unary_f32::( + "exp2_u35", + &inputs_exp, + contracts::EXP2_U35_F32_MAX_ULP, + |v| v.exp2_u35(), + f32::exp2, + ); + + let mut inputs_exp_e = vec![ + -104.0f32, + -103.98, + -1.0, + -0.0, + 0.0, + 1.0, + 88.0, + 88.7, + 89.0, + f32::INFINITY, + f32::NEG_INFINITY, + f32::NAN, + ]; + + for ¢er in &[-104.0f32, -103.97, -1.0, 0.0, 1.0, 88.5, 88.7] { + inputs_exp_e.push(f32::from_bits(center.to_bits().saturating_sub(1))); + inputs_exp_e.push(center); + inputs_exp_e.push(f32::from_bits(center.to_bits().saturating_add(1))); + } + + check_targeted_unary_f32::( + "exp_u35", + &inputs_exp_e, + contracts::EXP_U35_F32_MAX_ULP, + |v| v.exp_u35(), + f32::exp, + ); +} + +simd_math_targeted_all_backends!(f32_log_exp_boundary_lanes, run_f32_log_exp_boundary_lanes); + fn run_f64_log_exp_boundary_lanes() { let mut inputs_log = vec![ f64::from_bits(1), @@ -343,6 +493,42 @@ fn run_f64_log_exp_boundary_lanes() { ); } +fn run_f64_exp_fast_mask_boundaries() { + let mut inputs_exp2 = vec![f64::NEG_INFINITY, f64::INFINITY, f64::NAN]; + for ¢er in &[-1022.0f64, 1023.0] { + inputs_exp2.push(f64::from_bits(center.to_bits() - 2)); + inputs_exp2.push(f64::from_bits(center.to_bits() - 1)); + inputs_exp2.push(center); + inputs_exp2.push(f64::from_bits(center.to_bits() + 1)); + inputs_exp2.push(f64::from_bits(center.to_bits() + 2)); + } + + check_targeted_unary_f64::( + "exp2_u35 fast-mask boundary", + &inputs_exp2, + contracts::EXP2_U35_F64_MAX_ULP, + |v| v.exp2_u35(), + f64::exp2, + ); + + let mut inputs_exp = vec![f64::NEG_INFINITY, f64::INFINITY, f64::NAN]; + for ¢er in &[-708.0f64, 709.0] { + inputs_exp.push(f64::from_bits(center.to_bits() - 2)); + inputs_exp.push(f64::from_bits(center.to_bits() - 1)); + inputs_exp.push(center); + inputs_exp.push(f64::from_bits(center.to_bits() + 1)); + inputs_exp.push(f64::from_bits(center.to_bits() + 2)); + } + + check_targeted_unary_f64::( + "exp_u35 fast-mask boundary", + &inputs_exp, + contracts::EXP_U35_F64_MAX_ULP, + |v| v.exp_u35(), + f64::exp, + ); +} + fn run_f64_trig_pi_boundaries() { let mut inputs = vec![ -0.0, @@ -402,9 +588,123 @@ fn run_f64_tan_pole_neighborhoods() { } simd_math_targeted_all_backends!(f64_log_exp_boundary_lanes, run_f64_log_exp_boundary_lanes); +simd_math_targeted_all_backends!( + f64_exp_fast_mask_boundaries, + run_f64_exp_fast_mask_boundaries +); simd_math_targeted_all_backends!(f64_trig_pi_boundaries, run_f64_trig_pi_boundaries); simd_math_targeted_all_backends!(f64_tan_pole_neighborhoods, run_f64_tan_pole_neighborhoods); +fn run_f64_trig_large_and_mixed_lanes() { + let inputs = vec![ + 0.25, + -0.5, + 123.456_789, + -2048.0, + 8192.0, + -8192.0, + 1.0e6, + -1.0e6, + f64::from_bits(1), + -f64::from_bits(1), + f64::NAN, + f64::INFINITY, + f64::NEG_INFINITY, + std::f64::consts::PI * 0.5 - 1.0e-12, + std::f64::consts::PI * 0.5 + 1.0e-12, + -std::f64::consts::PI * 0.5 + 1.0e-12, + ]; + + check_targeted_unary_f64::( + "sin_u35", + &inputs, + contracts::SIN_U35_F64_MAX_ULP, + |v| v.sin_u35(), + f64::sin, + ); + check_targeted_unary_f64::( + "cos_u35", + &inputs, + contracts::COS_U35_F64_MAX_ULP, + |v| v.cos_u35(), + f64::cos, + ); + check_targeted_unary_f64::( + "tan_u35", + &inputs, + contracts::TAN_U35_F64_MAX_ULP, + |v| v.tan_u35(), + f64::tan, + ); +} + +fn run_f64_trig_symmetry_identities() { + let inputs = [ + -3.0f64, + -1.0, + -0.5, + -0.0, + 0.0, + 0.5, + 1.0, + 3.0, + std::f64::consts::FRAC_PI_3, + -std::f64::consts::FRAC_PI_3, + ]; + + for chunk in inputs.chunks(S::Vf64::WIDTH) { + let x = S::Vf64::load_from_slice(chunk); + let sx = x.sin_u35(); + let cx = x.cos_u35(); + let tx = x.tan_u35(); + + let neg_x = -x; + let sneg = neg_x.sin_u35(); + let cneg = neg_x.cos_u35(); + let tneg = neg_x.tan_u35(); + + for lane in 0..chunk.len() { + if chunk[lane] == 0.0 { + continue; + } + + assert_f64_contract( + "sin parity", + chunk[lane], + sneg[lane], + -sx[lane], + contracts::SIN_U35_F64_MAX_ULP, + ) + .unwrap_or_else(|e| panic!("{e}")); + assert_f64_contract( + "cos parity", + chunk[lane], + cneg[lane], + cx[lane], + contracts::COS_U35_F64_MAX_ULP, + ) + .unwrap_or_else(|e| panic!("{e}")); + assert_f64_contract( + "tan parity", + chunk[lane], + tneg[lane], + -tx[lane], + contracts::TAN_U35_F64_MAX_ULP, + ) + .unwrap_or_else(|e| panic!("{e}")); + } + } +} + +simd_math_targeted_all_backends!( + f64_trig_large_and_mixed_lanes, + run_f64_trig_large_and_mixed_lanes +); +simd_math_targeted_all_backends!( + f64_trig_symmetry_identities, + run_f64_trig_symmetry_identities +); + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] #[test] fn f32_log2_u35_mixed_exception_lanes_avx2() { diff --git a/src/tests/simd_math_targeted_edges/inverse_trig.rs b/src/tests/simd_math_targeted_edges/inverse_trig.rs index 24accd1..b10d041 100644 --- a/src/tests/simd_math_targeted_edges/inverse_trig.rs +++ b/src/tests/simd_math_targeted_edges/inverse_trig.rs @@ -134,6 +134,51 @@ fn run_f32_inverse_trig_identity() { } } +fn run_f32_inverse_trig_tiny_inputs() { + let inputs = [ + -f32::MIN_POSITIVE, + -f32::from_bits(1), + -0.0, + 0.0, + f32::from_bits(1), + f32::MIN_POSITIVE, + ]; + + for chunk in inputs.chunks(S::Vf32::WIDTH) { + let v = S::Vf32::load_from_slice(chunk); + let asin = v.asin_u35(); + let acos = v.acos_u35(); + let atan = v.atan_u35(); + + for (lane, &x) in chunk.iter().enumerate() { + assert_f32_contract( + "asin_u35 tiny", + x, + asin[lane], + x.asin(), + contracts::ASIN_U35_F32_MAX_ULP, + ) + .unwrap_or_else(|e| panic!("{e}")); + assert_f32_contract( + "acos_u35 tiny", + x, + acos[lane], + x.acos(), + contracts::ACOS_U35_F32_MAX_ULP, + ) + .unwrap_or_else(|e| panic!("{e}")); + assert_f32_contract( + "atan_u35 tiny", + x, + atan[lane], + x.atan(), + contracts::ATAN_U35_F32_MAX_ULP, + ) + .unwrap_or_else(|e| panic!("{e}")); + } + } +} + simd_math_targeted_all_backends!(f32_inverse_trig_near_one, run_f32_inverse_trig_near_one); simd_math_targeted_all_backends!( f32_inverse_trig_special_lanes, @@ -141,6 +186,100 @@ simd_math_targeted_all_backends!( ); simd_math_targeted_all_backends!(f32_inverse_trig_symmetry, run_f32_inverse_trig_symmetry); simd_math_targeted_all_backends!(f32_inverse_trig_identity, run_f32_inverse_trig_identity); +simd_math_targeted_all_backends!( + f32_inverse_trig_tiny_inputs, + run_f32_inverse_trig_tiny_inputs +); + +const TAN_PI_8_F32: f32 = 0.414_213_57; +const TAN_3PI_8_F32: f32 = 2.414_213_7; + +fn run_f32_inverse_trig_fallback_boundary() { + // Boundary used by portable_f32 fallback patching for asin/acos near |x|≈1. + let boundary = f32::from_bits(0x3F7F_F000); + let inputs = [ + f32::from_bits(boundary.to_bits() - 2), + f32::from_bits(boundary.to_bits() - 1), + boundary, + f32::from_bits(boundary.to_bits() + 1), + f32::from_bits(boundary.to_bits() + 2), + -f32::from_bits(boundary.to_bits() - 2), + -f32::from_bits(boundary.to_bits() - 1), + -boundary, + -f32::from_bits(boundary.to_bits() + 1), + -f32::from_bits(boundary.to_bits() + 2), + ]; + + for chunk in inputs.chunks(S::Vf32::WIDTH) { + let v = S::Vf32::load_from_slice(chunk); + let asin = v.asin_u35(); + let acos = v.acos_u35(); + + for (lane, &x) in chunk.iter().enumerate() { + assert_f32_contract( + "asin_u35 boundary", + x, + asin[lane], + x.asin(), + contracts::ASIN_U35_F32_MAX_ULP, + ) + .unwrap_or_else(|e| panic!("{e}")); + assert_f32_contract( + "acos_u35 boundary", + x, + acos[lane], + x.acos(), + contracts::ACOS_U35_F32_MAX_ULP, + ) + .unwrap_or_else(|e| panic!("{e}")); + } + } +} + +fn run_f32_atan_reduction_threshold_neighbors() { + let t1 = TAN_PI_8_F32; + let t2 = TAN_3PI_8_F32; + let inputs = [ + f32::from_bits(t1.to_bits() - 2), + f32::from_bits(t1.to_bits() - 1), + t1, + f32::from_bits(t1.to_bits() + 1), + f32::from_bits(t1.to_bits() + 2), + f32::from_bits(t2.to_bits() - 2), + f32::from_bits(t2.to_bits() - 1), + t2, + f32::from_bits(t2.to_bits() + 1), + f32::from_bits(t2.to_bits() + 2), + ]; + + for chunk in inputs.chunks(S::Vf32::WIDTH) { + let v = S::Vf32::load_from_slice(chunk); + let atan = v.atan_u35(); + let atan_neg = (-v).atan_u35(); + + for (lane, &x) in chunk.iter().enumerate() { + assert_f32_contract( + "atan_u35 threshold", + x, + atan[lane], + x.atan(), + contracts::ATAN_U35_F32_MAX_ULP, + ) + .unwrap_or_else(|e| panic!("{e}")); + assert_f32_contract("atan_u35 threshold odd", -x, atan_neg[lane], -atan[lane], 2) + .unwrap_or_else(|e| panic!("{e}")); + } + } +} + +simd_math_targeted_all_backends!( + f32_inverse_trig_fallback_boundary, + run_f32_inverse_trig_fallback_boundary +); +simd_math_targeted_all_backends!( + f32_atan_reduction_threshold_neighbors, + run_f32_atan_reduction_threshold_neighbors +); fn run_f64_inverse_trig_near_one() { let inputs = [ @@ -277,6 +416,51 @@ fn run_f64_inverse_trig_identity() { } } +fn run_f64_inverse_trig_tiny_inputs() { + let inputs = [ + -f64::MIN_POSITIVE, + -f64::from_bits(1), + -0.0, + 0.0, + f64::from_bits(1), + f64::MIN_POSITIVE, + ]; + + for chunk in inputs.chunks(S::Vf64::WIDTH) { + let v = S::Vf64::load_from_slice(chunk); + let asin = v.asin_u35(); + let acos = v.acos_u35(); + let atan = v.atan_u35(); + + for (lane, &x) in chunk.iter().enumerate() { + assert_f64_contract( + "asin_u35 tiny", + x, + asin[lane], + x.asin(), + contracts::ASIN_U35_F64_MAX_ULP, + ) + .unwrap_or_else(|e| panic!("{e}")); + assert_f64_contract( + "acos_u35 tiny", + x, + acos[lane], + x.acos(), + contracts::ACOS_U35_F64_MAX_ULP, + ) + .unwrap_or_else(|e| panic!("{e}")); + assert_f64_contract( + "atan_u35 tiny", + x, + atan[lane], + x.atan(), + contracts::ATAN_U35_F64_MAX_ULP, + ) + .unwrap_or_else(|e| panic!("{e}")); + } + } +} + const TAN_PI_8_F64: f64 = 0.414_213_562_373_095_03; const TAN_3PI_8_F64: f64 = 2.414_213_562_373_095; @@ -366,6 +550,10 @@ simd_math_targeted_all_backends!( ); simd_math_targeted_all_backends!(f64_inverse_trig_symmetry, run_f64_inverse_trig_symmetry); simd_math_targeted_all_backends!(f64_inverse_trig_identity, run_f64_inverse_trig_identity); +simd_math_targeted_all_backends!( + f64_inverse_trig_tiny_inputs, + run_f64_inverse_trig_tiny_inputs +); simd_math_targeted_all_backends!( f64_inverse_trig_fallback_boundary, run_f64_inverse_trig_fallback_boundary