Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 5 additions & 167 deletions src/math/f64/hyperbolic.rs
Original file line number Diff line number Diff line change
@@ -1,188 +1,26 @@
use crate::math::scalar;
use crate::{Simd, SimdBaseIo, SimdBaseOps, SimdConsts, SimdFloat64};

type SimdI64<V> = <<V as SimdConsts>::Engine as Simd>::Vi64;

const SINH_COSH_SMALL_ABS: f64 = 0.125;
const SINH_COSH_FAST_ABS_MAX: f64 = 0.125;
const TANH_SMALL_ABS: f64 = 0.0;
const TANH_FAST_ABS_MAX: f64 = 0.0;

#[inline(always)]
fn any_lane_nonzero<V>(mask: SimdI64<V>) -> bool
where
V: SimdFloat64,
{
unsafe {
let lanes = mask.as_array();
for lane in 0..V::WIDTH {
if lanes[lane] != 0 {
return true;
}
}
}

false
}

#[inline(always)]
fn patch_exceptional_lanes<V>(
input: V,
output: V,
exceptional_mask: SimdI64<V>,
scalar_fallback: fn(f64) -> f64,
) -> V
where
V: SimdFloat64,
{
if !any_lane_nonzero::<V>(exceptional_mask) {
return output;
}

unsafe {
let input_lanes = input.as_array();
let mask_lanes = exceptional_mask.as_array();
let mut output_lanes = output.as_array();

for lane in 0..V::WIDTH {
if mask_lanes[lane] != 0 {
output_lanes[lane] = scalar_fallback(input_lanes[lane]);
}
}

V::load_from_ptr_unaligned(&output_lanes as *const V::ArrayRepresentation as *const f64)
}
}

#[inline(always)]
fn exp_u35<V>(input: V) -> V
where
V: SimdFloat64,
{
// Temporary family-local bridge: use scalar exp lane mapping here while
// avoiding scalar lane mapping for the final hyperbolic functions.
unsafe {
let mut lanes = input.as_array();
for lane in 0..V::WIDTH {
lanes[lane] = scalar::exp_u35_f64(lanes[lane]);
}
V::load_from_ptr_unaligned(&lanes as *const V::ArrayRepresentation as *const f64)
}
}

#[inline(always)]
fn sinh_small<V>(input: V, input_sq: V) -> V
where
V: SimdFloat64,
{
let poly = ((((V::set1(1.0 / 39916800.0) * input_sq) + V::set1(1.0 / 362880.0)) * input_sq
+ V::set1(1.0 / 5040.0))
* input_sq
+ V::set1(1.0 / 120.0))
* input_sq
+ V::set1(1.0 / 6.0);

input + (input * input_sq * poly)
}

#[inline(always)]
fn cosh_small<V>(input_sq: V) -> V
where
V: SimdFloat64,
{
let poly = (((V::set1(1.0 / 40320.0) * input_sq) + V::set1(1.0 / 720.0)) * input_sq
+ V::set1(1.0 / 24.0))
* input_sq
+ V::set1(0.5);

V::set1(1.0) + (input_sq * poly)
}

#[inline(always)]
fn sinh_cosh_medium<V>(abs_input: V) -> (V, V)
where
V: SimdFloat64,
{
let exp_abs = exp_u35(abs_input);
let exp_neg_abs = V::set1(1.0) / exp_abs;
let half = V::set1(0.5);

(
(exp_abs - exp_neg_abs) * half,
(exp_abs + exp_neg_abs) * half,
)
}

#[inline(always)]
fn sinh_cosh_masks<V>(input: V) -> (SimdI64<V>, V, V)
where
V: SimdFloat64,
{
let abs_input = input.abs();
let finite_mask = input.cmp_eq(input).bitcast_i64();
let within_fast_range = abs_input
.cmp_lte(V::set1(SINH_COSH_FAST_ABS_MAX))
.bitcast_i64();

(finite_mask & within_fast_range, abs_input, input * input)
}
use crate::math::{map, scalar};
use crate::SimdFloat64;

#[inline(always)]
pub(crate) fn sinh_u35<V>(input: V) -> V
where
V: SimdFloat64,
{
let (fast_mask, abs_input, input_sq) = sinh_cosh_masks(input);
let exceptional_mask = fast_mask.cmp_eq(SimdI64::<V>::zeroes());
let small_mask = abs_input.cmp_lt(V::set1(SINH_COSH_SMALL_ABS));

let fast_small = sinh_small(input, input_sq);
let exp_input = exp_u35(input);
let exp_neg_input = V::set1(1.0) / exp_input;
let sinh_medium = (exp_input - exp_neg_input) * V::set1(0.5);
let fast = small_mask.blendv(sinh_medium, fast_small);
let zero_mask = input.cmp_eq(V::set1(0.0));
let fast = zero_mask.blendv(fast, input);

patch_exceptional_lanes(input, fast, exceptional_mask, scalar::sinh_u35_f64)
map::unary_f64(input, scalar::sinh_u35_f64)
}

#[inline(always)]
pub(crate) fn cosh_u35<V>(input: V) -> V
where
V: SimdFloat64,
{
let (fast_mask, abs_input, input_sq) = sinh_cosh_masks(input);
let exceptional_mask = fast_mask.cmp_eq(SimdI64::<V>::zeroes());
let small_mask = abs_input.cmp_lt(V::set1(SINH_COSH_SMALL_ABS));

let fast_small = cosh_small(input_sq);
let (_, cosh_medium) = sinh_cosh_medium(abs_input);
let fast = small_mask.blendv(cosh_medium, fast_small);

patch_exceptional_lanes(input, fast, exceptional_mask, scalar::cosh_u35_f64)
map::unary_f64(input, scalar::cosh_u35_f64)
}

#[inline(always)]
pub(crate) fn tanh_u35<V>(input: V) -> V
where
V: SimdFloat64,
{
let abs_input = input.abs();
let finite_mask = input.cmp_eq(input).bitcast_i64();
let within_fast_range = abs_input.cmp_lte(V::set1(TANH_FAST_ABS_MAX)).bitcast_i64();
let exceptional_mask = (finite_mask & within_fast_range).cmp_eq(SimdI64::<V>::zeroes());
let small_mask = abs_input.cmp_lt(V::set1(TANH_SMALL_ABS));

let input_sq = input * input;
let fast_small = sinh_small(input, input_sq) / cosh_small(input_sq);

let exp_input = exp_u35(input);
let exp_neg_input = V::set1(1.0) / exp_input;
let tanh_medium = (exp_input - exp_neg_input) / (exp_input + exp_neg_input);
let fast = small_mask.blendv(tanh_medium, fast_small);
let zero_mask = input.cmp_eq(V::set1(0.0));
let fast = zero_mask.blendv(fast, input);

patch_exceptional_lanes(input, fast, exceptional_mask, scalar::tanh_u35_f64)
map::unary_f64(input, scalar::tanh_u35_f64)
}
28 changes: 3 additions & 25 deletions src/math/f64/inverse_hyperbolic.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::math::{f64, scalar};
use crate::math::{f64, map, scalar};
use crate::{Simd, SimdBaseIo, SimdBaseOps, SimdConsts, SimdFloat64};

type SimdI64<V> = <<V as SimdConsts>::Engine as Simd>::Vi64;
Expand Down Expand Up @@ -79,15 +79,7 @@ where
V: SimdFloat64,
V::Engine: Simd<Vf64 = V>,
{
let finite_mask = input.cmp_eq(input).bitcast_i64();
let in_domain_mask = input.cmp_gte(V::set1(1.0)).bitcast_i64();
let fast_mask = finite_mask & in_domain_mask;
let exceptional_mask = fast_mask.cmp_eq(SimdI64::<V>::zeroes());

let root_term = ((input - V::set1(1.0)).sqrt()) * ((input + V::set1(1.0)).sqrt());
let fast = f64::ln_u35(input + root_term);

patch_exceptional_lanes(input, fast, exceptional_mask, scalar::acosh_u35_f64)
map::unary_f64(input, scalar::acosh_u35_f64)
}

#[inline(always)]
Expand All @@ -96,19 +88,5 @@ where
V: SimdFloat64,
V::Engine: Simd<Vf64 = V>,
{
let finite_mask = input.cmp_eq(input).bitcast_i64();
let abs_x = input.abs();
let strict_domain_mask = abs_x.cmp_lt(V::set1(1.0)).bitcast_i64();
let non_zero_mask = input.cmp_neq(V::zeroes()).bitcast_i64();
let stable_range_mask = abs_x.cmp_lte(V::set1(0.99)).bitcast_i64();
let away_from_zero_mask = abs_x.cmp_gte(V::set1(0.9)).bitcast_i64();
let fast_mask =
finite_mask & strict_domain_mask & non_zero_mask & stable_range_mask & away_from_zero_mask;
let exceptional_mask = fast_mask.cmp_eq(SimdI64::<V>::zeroes());

let one = V::set1(1.0);
let ratio = (one + input) / (one - input);
let fast = f64::ln_u35(ratio) * V::set1(0.5);

patch_exceptional_lanes(input, fast, exceptional_mask, scalar::atanh_u35_f64)
map::unary_f64(input, scalar::atanh_u35_f64)
}
25 changes: 25 additions & 0 deletions src/tests/simd_math_targeted_edges/hyperbolic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,3 +300,28 @@ simd_math_targeted_all_backends!(
f64_hyperbolic_special_values_and_mixed_lanes,
run_f64_hyperbolic_special_values_and_mixed_lanes
);

fn run_f64_hyperbolic_signed_zero_semantics<S: Simd>() {
let mut lanes = vec![0.0f64; S::Vf64::WIDTH];
lanes[0] = -0.0;

let input = S::Vf64::load_from_slice(&lanes);
let sinh = input.sinh_u35();
let tanh = input.tanh_u35();

assert_eq!(sinh[0].to_bits(), (-0.0f64).sinh().to_bits());
assert_eq!(tanh[0].to_bits(), (-0.0f64).tanh().to_bits());

if S::Vf64::WIDTH > 1 {
assert_eq!(sinh[1].to_bits(), 0.0f64.sinh().to_bits());
assert_eq!(tanh[1].to_bits(), 0.0f64.tanh().to_bits());
}

let cosh = input.cosh_u35();
assert_eq!(cosh[0].to_bits(), (-0.0f64).cosh().to_bits());
}

simd_math_targeted_all_backends!(
f64_hyperbolic_signed_zero_semantics,
run_f64_hyperbolic_signed_zero_semantics
);
26 changes: 26 additions & 0 deletions src/tests/simd_math_targeted_edges/inverse_hyperbolic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,29 @@ simd_math_targeted_all_backends!(
f64_inverse_hyperbolic_mixed_lanes,
run_f64_inverse_hyperbolic_mixed_lanes
);

fn run_f64_inverse_hyperbolic_signed_zero_semantics<S: Simd>() {
let mut lanes = vec![0.0f64; S::Vf64::WIDTH];
lanes[0] = -0.0;

let input = S::Vf64::load_from_slice(&lanes);
let asinh = input.asinh_u35();
let atanh = input.atanh_u35();

assert_eq!(asinh[0].to_bits(), (-0.0f64).asinh().to_bits());
assert_eq!(atanh[0].to_bits(), (-0.0f64).atanh().to_bits());

if S::Vf64::WIDTH > 1 {
assert_eq!(asinh[1].to_bits(), 0.0f64.asinh().to_bits());
assert_eq!(atanh[1].to_bits(), 0.0f64.atanh().to_bits());
}

let ones = S::Vf64::set1(1.0);
let acosh = ones.acosh_u35();
assert_eq!(acosh[0].to_bits(), 1.0f64.acosh().to_bits());
}

simd_math_targeted_all_backends!(
f64_inverse_hyperbolic_signed_zero_semantics,
run_f64_inverse_hyperbolic_signed_zero_semantics
);
Loading