From bfa522d16dffdae9d9e3e4f4e61e4f69febce0aa Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Tue, 21 Apr 2026 16:37:01 +1200
Subject: [PATCH 1/4] feat(serde): support serde on `SessionOptions` and
 `SpeechOptions`

---
 CHANGELOG.md          |  10 +
 Cargo.toml            |   7 +-
 examples/streaming.rs |   2 +-
 src/detector.rs       |  76 ++++----
 src/options.rs        | 420 +++++++++++++++++++++++++++++++++++-------
 src/session.rs        |  15 +-
 6 files changed, 417 insertions(+), 113 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 884db60..1412816 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.0] - 2026-04-21
+
+### Added
+
+- `serde` support for `*Options`
+
+### Changed
+
+- Change `u32` ms to `Duration` in `SpeechOptions`
+
 ## [0.1.0] - 2026-04-08
 
 ### Added
diff --git a/Cargo.toml b/Cargo.toml
index 1e13fe1..d5b50d1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "silero"
-version = "0.1.1"
+version = "0.2.0"
 edition = "2024"
 rust-version = "1.85"
 repository = "https://github.com/Findit-AI/silero"
@@ -27,13 +27,18 @@ include = [
 [features]
 default = ["bundled"]
 bundled = []
+serde = ["dep:serde", "dep:humantime-serde"]
 
 [dependencies]
 ort = "2.0.0-rc.12"
 thiserror = "2"
 
+serde = { version = "1", optional = true, features = ["derive"] }
+humantime-serde = { version = "1", optional = true }
+
 [dev-dependencies]
 hound = "3"
+serde_json = "1"
 
 [[example]]
 name = "detect_file"
diff --git a/examples/streaming.rs b/examples/streaming.rs
index 13be5bf..e531a7d 100644
--- a/examples/streaming.rs
+++ b/examples/streaming.rs
@@ -9,7 +9,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
   let mut session = Session::from_memory(MODEL_BYTES)?;
   let config = SpeechOptions::default();
   let mut stream = StreamState::new(config.sample_rate());
-  let mut segmenter = SpeechSegmenter::new(config);
+  let mut segmenter = SpeechSegmenter::new(config.clone());
 
   let synthetic_audio = vec![0.0_f32; config.sample_rate().chunk_samples() * 8];
   segmenter.process_samples(&mut session, &mut stream, &synthetic_audio, |segment| {
diff --git a/src/detector.rs b/src/detector.rs
index 40770ad..fa4d7bf 100644
--- a/src/detector.rs
+++ b/src/detector.rs
@@ -14,7 +14,7 @@ pub struct SpeechSegment {
 
 impl SpeechSegment {
   /// Create a new speech segment with the given start and end samples and sample rate.
-  #[inline]
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn new(start_sample: u64, end_sample: u64, sample_rate: SampleRate) -> Self {
     Self {
       start_sample,
@@ -24,37 +24,37 @@ impl SpeechSegment {
   }
 
   /// Returns the start sample of this speech segment.
-  #[inline]
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn start_sample(&self) -> u64 {
     self.start_sample
   }
 
   /// Returns the end sample of this speech segment.
-  #[inline]
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn end_sample(&self) -> u64 {
     self.end_sample
   }
 
   /// Returns the sample rate of this speech segment.
-  #[inline]
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn sample_rate(&self) -> SampleRate {
     self.sample_rate
   }
 
   /// Returns the number of samples in this speech segment.
-  #[inline]
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn sample_count(&self) -> u64 {
     self.end_sample.saturating_sub(self.start_sample)
   }
 
   /// Returns the start time of this speech segment in seconds.
-  #[inline]
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn start_seconds(&self) -> f64 {
     self.start_sample as f64 / self.sample_rate.hz() as f64
   }
 
   /// Returns the end time of this speech segment in seconds.
-  #[inline]
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn end_seconds(&self) -> f64 {
     self.end_sample as f64 / self.sample_rate.hz() as f64
   }
@@ -109,10 +109,10 @@ impl SpeechSegmenter {
   ///
   /// Changing sample rate starts a new logical timeline, so any
   /// in-flight segment state is cleared.
-  #[inline]
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn set_sample_rate(&mut self, sample_rate: SampleRate) {
     if self.sample_rate() != sample_rate {
-      self.options = self.options.with_sample_rate(sample_rate);
+      self.options.set_sample_rate(sample_rate);
       self.reset();
     }
   }
@@ -341,6 +341,8 @@ pub fn detect_speech(
 
 #[cfg(test)]
 mod tests {
+  use std::time::Duration;
+
   use crate::{SampleRate, SpeechOptions};
 
   use super::{SpeechSegment, SpeechSegmenter};
@@ -366,7 +368,7 @@ mod tests {
   #[test]
   fn closes_segment_after_confirmed_silence() {
     let config = SpeechOptions::default();
-    let mut segmenter = SpeechSegmenter::new(config);
+    let mut segmenter = SpeechSegmenter::new(config.clone());
     let mut probabilities = vec![0.9; frame_count(320, SampleRate::Rate16k)];
     probabilities.extend(vec![0.0; frame_count(128, SampleRate::Rate16k)]);
 
@@ -379,7 +381,7 @@ mod tests {
   #[test]
   fn drops_short_bursts() {
     let config = SpeechOptions::default();
-    let mut segmenter = SpeechSegmenter::new(config);
+    let mut segmenter = SpeechSegmenter::new(config.clone());
     let mut probabilities = vec![0.9; frame_count(64, SampleRate::Rate16k)];
     probabilities.extend(vec![0.0; frame_count(160, SampleRate::Rate16k)]);
     let segments = collect(&mut segmenter, &probabilities);
@@ -389,9 +391,9 @@ mod tests {
   #[test]
   fn middle_band_frames_do_not_reset_tentative_end() {
     let config = SpeechOptions::default()
-      .with_min_speech_duration_ms(0)
-      .with_speech_pad_ms(0)
-      .with_min_silence_duration_ms(100);
+      .with_min_speech_duration(Duration::ZERO)
+      .with_speech_pad(Duration::ZERO)
+      .with_min_silence_duration(Duration::from_millis(100));
     let mut segmenter = SpeechSegmenter::new(config);
 
     let mut probabilities = vec![0.9; 4];
@@ -457,9 +459,9 @@ mod tests {
   #[test]
   fn force_splits_long_speech_when_max_duration_is_reached() {
     let config = SpeechOptions::default()
-      .with_min_speech_duration_ms(0)
-      .with_speech_pad_ms(0)
-      .with_max_speech_duration_ms(160);
+      .with_min_speech_duration(Duration::ZERO)
+      .with_speech_pad(Duration::ZERO)
+      .with_max_speech_duration(Duration::from_millis(160));
     let mut segmenter = SpeechSegmenter::new(config);
     let probabilities = vec![0.9; 8];
 
@@ -474,11 +476,11 @@ mod tests {
   #[test]
   fn prefers_recorded_silence_when_splitting_long_speech() {
     let config = SpeechOptions::default()
-      .with_min_speech_duration_ms(0)
-      .with_speech_pad_ms(0)
-      .with_min_silence_duration_ms(300)
-      .with_min_silence_at_max_speech_ms(64)
-      .with_max_speech_duration_ms(256);
+      .with_min_speech_duration(Duration::ZERO)
+      .with_speech_pad(Duration::ZERO)
+      .with_min_silence_duration(Duration::from_millis(300))
+      .with_min_silence_at_max_speech(Duration::from_millis(64))
+      .with_max_speech_duration(Duration::from_millis(256));
     let mut segmenter = SpeechSegmenter::new(config);
     let mut probabilities = vec![0.9; 4];
     probabilities.extend(vec![0.0; 4]);
@@ -495,11 +497,11 @@ mod tests {
   #[test]
   fn non_qualifying_silence_does_not_overwrite_next_start() {
     let config = SpeechOptions::default()
-      .with_min_speech_duration_ms(0)
-      .with_speech_pad_ms(0)
-      .with_min_silence_duration_ms(10_000)
-      .with_min_silence_at_max_speech_ms(64)
-      .with_max_speech_duration_ms(512);
+      .with_min_speech_duration(Duration::ZERO)
+      .with_speech_pad(Duration::ZERO)
+      .with_min_silence_duration(Duration::from_millis(10_000))
+      .with_min_silence_at_max_speech(Duration::from_millis(64))
+      .with_max_speech_duration(Duration::from_millis(512));
     let mut segmenter = SpeechSegmenter::new(config);
 
     let mut probabilities = vec![0.9; 4];
@@ -516,11 +518,11 @@ mod tests {
   #[test]
   fn force_split_during_silence_closes_without_restarting() {
     let config = SpeechOptions::default()
-      .with_min_speech_duration_ms(0)
-      .with_speech_pad_ms(0)
-      .with_min_silence_duration_ms(10_000)
-      .with_min_silence_at_max_speech_ms(64)
-      .with_max_speech_duration_ms(224);
+      .with_min_speech_duration(Duration::ZERO)
+      .with_speech_pad(Duration::ZERO)
+      .with_min_silence_duration(Duration::from_millis(10_000))
+      .with_min_silence_at_max_speech(Duration::from_millis(64))
+      .with_max_speech_duration(Duration::from_millis(224));
     let mut segmenter = SpeechSegmenter::new(config);
 
     let mut probabilities = vec![0.9; 4];
@@ -535,11 +537,11 @@ mod tests {
   #[test]
   fn force_split_applies_speech_pad_to_split_boundaries() {
     let config = SpeechOptions::default()
-      .with_min_speech_duration_ms(0)
-      .with_speech_pad_ms(32)
-      .with_min_silence_duration_ms(10_000)
-      .with_min_silence_at_max_speech_ms(64)
-      .with_max_speech_duration_ms(512);
+      .with_min_speech_duration(Duration::ZERO)
+      .with_speech_pad(Duration::from_millis(32))
+      .with_min_silence_duration(Duration::from_millis(10_000))
+      .with_min_silence_at_max_speech(Duration::from_millis(64))
+      .with_max_speech_duration(Duration::from_millis(512));
     let mut segmenter = SpeechSegmenter::new(config);
 
     let mut probabilities = vec![0.9; 4];
diff --git a/src/options.rs b/src/options.rs
index 79133ce..d425db3 100644
--- a/src/options.rs
+++ b/src/options.rs
@@ -1,13 +1,89 @@
+use core::time::Duration;
+
 pub use ort::session::builder::GraphOptimizationLevel;
 
 use crate::error::{Error, Result};
 
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+
+#[cfg(feature = "serde")]
+mod graph_optimization_level {
+  use super::GraphOptimizationLevel;
+  use serde::*;
+
+  #[derive(
+    Debug, Default, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd, Serialize, Deserialize,
+  )]
+  #[serde(rename_all = "snake_case")]
+  enum OptimizationLevel {
+    Disable,
+    Level1,
+    Level2,
+    #[default]
+    Level3,
+    All,
+  }
+
+  impl From<GraphOptimizationLevel> for OptimizationLevel {
+    #[inline]
+    fn from(value: GraphOptimizationLevel) -> Self {
+      match value {
+        GraphOptimizationLevel::Disable => Self::Disable,
+        GraphOptimizationLevel::Level1 => Self::Level1,
+        GraphOptimizationLevel::Level2 => Self::Level2,
+        GraphOptimizationLevel::Level3 => Self::Level3,
+        GraphOptimizationLevel::All => Self::All,
+      }
+    }
+  }
+
+  impl From<OptimizationLevel> for GraphOptimizationLevel {
+    #[inline]
+    fn from(value: OptimizationLevel) -> Self {
+      match value {
+        OptimizationLevel::Disable => Self::Disable,
+        OptimizationLevel::Level1 => Self::Level1,
+        OptimizationLevel::Level2 => Self::Level2,
+        OptimizationLevel::Level3 => Self::Level3,
+        OptimizationLevel::All => Self::All,
+      }
+    }
+  }
+
+  #[cfg_attr(not(tarpaulin), inline(always))]
+  pub fn serialize<S>(level: &GraphOptimizationLevel, serializer: S) -> Result<S::Ok, S::Error>
+  where
+    S: Serializer,
+  {
+    OptimizationLevel::from(*level).serialize(serializer)
+  }
+
+  #[cfg_attr(not(tarpaulin), inline(always))]
+  pub fn deserialize<'de, D>(deserializer: D) -> Result<GraphOptimizationLevel, D::Error>
+  where
+    D: Deserializer<'de>,
+  {
+    OptimizationLevel::deserialize(deserializer).map(Into::into)
+  }
+
+  #[cfg_attr(not(tarpaulin), inline(always))]
+  pub const fn default() -> GraphOptimizationLevel {
+    GraphOptimizationLevel::Disable
+  }
+}
+
 /// Sample rates directly supported by the Silero VAD model.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
 pub enum SampleRate {
   /// 8 kHz sample rate, which uses smaller chunks and less context.
+  #[cfg_attr(feature = "serde", serde(rename = "8k"))]
   Rate8k,
   /// 16 kHz sample rate, which uses larger chunks and more context for better accuracy.
+  #[cfg_attr(feature = "serde", serde(rename = "16k"))]
+  #[default]
   Rate16k,
 }
 
@@ -49,22 +125,22 @@ impl SampleRate {
     }
   }
 }
-
-impl Default for SampleRate {
-  #[inline]
-  fn default() -> Self {
-    Self::Rate16k
-  }
-}
-
 /// Options for constructing an ONNX session.
 ///
 /// This type intentionally stays small. Deployment-specific runtime
 /// policy such as `intra_threads` / `inter_threads` should normally be
 /// configured one layer up, then passed down via
 /// [`crate::Session::from_ort_session`].
-#[derive(Debug, Clone, Copy)]
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub struct SessionOptions {
+  #[cfg_attr(
+    feature = "serde",
+    serde(
+      default = "graph_optimization_level::default",
+      with = "graph_optimization_level"
+    )
+  )]
   optimization_level: GraphOptimizationLevel,
 }
 
@@ -80,7 +156,7 @@ impl SessionOptions {
   #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn new() -> Self {
     Self {
-      optimization_level: GraphOptimizationLevel::Disable,
+      optimization_level: GraphOptimizationLevel::Level3,
     }
   }
 
@@ -98,20 +174,76 @@ impl SessionOptions {
   }
 }
 
+#[cfg_attr(not(tarpaulin), inline(always))]
+const fn default_start_threshold() -> f32 {
+  0.5
+}
+
+#[cfg_attr(not(tarpaulin), inline(always))]
+const fn default_min_speech_duration() -> Duration {
+  Duration::from_millis(250)
+}
+
+#[cfg_attr(not(tarpaulin), inline(always))]
+const fn default_min_silence_duration() -> Duration {
+  Duration::from_millis(100)
+}
+
+#[cfg_attr(not(tarpaulin), inline(always))]
+const fn default_min_silence_at_max_speech() -> Duration {
+  Duration::from_millis(98)
+}
+
+#[cfg_attr(not(tarpaulin), inline(always))]
+const fn default_speech_pad() -> Duration {
+  Duration::from_millis(30)
+}
+
 /// Configuration for turning frame probabilities into speech segments.
-#[derive(Debug, Clone, Copy)]
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub struct SpeechOptions {
+  #[cfg_attr(feature = "serde", serde(default))]
   sample_rate: SampleRate,
+  #[cfg_attr(feature = "serde", serde(default = "default_start_threshold"))]
   start_threshold: f32,
+  #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
   end_threshold: Option<f32>,
-  min_speech_duration_ms: u32,
-  min_silence_duration_ms: u32,
-  min_silence_at_max_speech_ms: u32,
-  max_speech_duration_ms: Option<u32>,
-  speech_pad_ms: u32,
+  #[cfg_attr(
+    feature = "serde",
+    serde(default = "default_min_speech_duration", with = "humantime_serde")
+  )]
+  min_speech_duration: Duration,
+  #[cfg_attr(
+    feature = "serde",
+    serde(default = "default_min_silence_duration", with = "humantime_serde")
+  )]
+  min_silence_duration: Duration,
+  #[cfg_attr(
+    feature = "serde",
+    serde(
+      default = "default_min_silence_at_max_speech",
+      with = "humantime_serde"
+    )
+  )]
+  min_silence_at_max_speech: Duration,
+  #[cfg_attr(
+    feature = "serde",
+    serde(
+      skip_serializing_if = "Option::is_none",
+      with = "humantime_serde::option"
+    )
+  )]
+  max_speech_duration: Option<Duration>,
+  #[cfg_attr(
+    feature = "serde",
+    serde(default = "default_speech_pad", with = "humantime_serde")
+  )]
+  speech_pad: Duration,
 }
 
 impl Default for SpeechOptions {
+  #[cfg_attr(not(tarpaulin), inline(always))]
   fn default() -> Self {
     Self::new()
   }
@@ -123,14 +255,14 @@ impl SpeechOptions {
   pub const fn new() -> Self {
     Self {
       sample_rate: SampleRate::Rate16k,
-      start_threshold: 0.5,
+      start_threshold: default_start_threshold(),
       end_threshold: None,
-      min_speech_duration_ms: 250,
-      min_silence_duration_ms: 100,
+      min_speech_duration: default_min_speech_duration(),
+      min_silence_duration: default_min_silence_duration(),
       // Matches the upstream silero-vad Python default (0.098 s).
-      min_silence_at_max_speech_ms: 98,
-      max_speech_duration_ms: None,
-      speech_pad_ms: 30,
+      min_silence_at_max_speech: default_min_silence_at_max_speech(),
+      max_speech_duration: None,
+      speech_pad: default_speech_pad(),
     }
   }
 
@@ -164,50 +296,50 @@ impl SpeechOptions {
 
   /// Returns the minimum duration of detected speech segments, in milliseconds.
   #[cfg_attr(not(tarpaulin), inline(always))]
-  pub const fn min_speech_duration_ms(&self) -> u32 {
-    self.min_speech_duration_ms
+  pub const fn min_speech_duration(&self) -> Duration {
+    self.min_speech_duration
   }
 
   /// Returns the minimum duration of silence required to close a detected speech segment, in milliseconds.
   #[cfg_attr(not(tarpaulin), inline(always))]
-  pub const fn min_silence_duration_ms(&self) -> u32 {
-    self.min_silence_duration_ms
+  pub const fn min_silence_duration(&self) -> Duration {
+    self.min_silence_duration
   }
 
   /// Returns the minimum silence duration used as a preferred split point when the maximum speech duration is reached.
   #[cfg_attr(not(tarpaulin), inline(always))]
-  pub const fn min_silence_at_max_speech_ms(&self) -> u32 {
-    self.min_silence_at_max_speech_ms
+  pub const fn min_silence_at_max_speech(&self) -> Duration {
+    self.min_silence_at_max_speech
   }
 
   /// Returns the maximum duration of a speech segment before the segmenter force-splits it.
   #[cfg_attr(not(tarpaulin), inline(always))]
-  pub const fn max_speech_duration_ms(&self) -> Option<u32> {
-    self.max_speech_duration_ms
+  pub const fn max_speech_duration(&self) -> Option<Duration> {
+    self.max_speech_duration
   }
 
   /// Returns the amount of padding to add to the start of detected speech segments, in milliseconds.
   #[cfg_attr(not(tarpaulin), inline(always))]
-  pub const fn speech_pad_ms(&self) -> u32 {
-    self.speech_pad_ms
+  pub const fn speech_pad(&self) -> Duration {
+    self.speech_pad
   }
 
   /// Returns the minimum duration of detected speech segments, in samples.
   #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn min_speech_samples(&self) -> u64 {
-    ms_to_samples(self.min_speech_duration_ms, self.sample_rate)
+    ms_to_samples(self.min_speech_duration, self.sample_rate)
   }
 
   /// Returns the minimum duration of silence required to close a detected speech segment, in samples.
   #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn min_silence_samples(&self) -> u64 {
-    ms_to_samples(self.min_silence_duration_ms, self.sample_rate)
+    ms_to_samples(self.min_silence_duration, self.sample_rate)
   }
 
   /// Returns the minimum silence duration used as a preferred split point when max speech duration is reached, in samples.
   #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn min_silence_at_max_speech_samples(&self) -> u64 {
-    ms_to_samples(self.min_silence_at_max_speech_ms, self.sample_rate)
+    ms_to_samples(self.min_silence_at_max_speech, self.sample_rate)
   }
 
   /// Returns the maximum speech duration before force-splitting, in samples.
@@ -219,8 +351,8 @@ impl SpeechOptions {
   ///   the current segment and the start of the next one
   #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn max_speech_samples(&self) -> Option<u64> {
-    self.max_speech_duration_ms.map(|duration_ms| {
-      ms_to_samples(duration_ms, self.sample_rate)
+    self.max_speech_duration.map(|duration| {
+      ms_to_samples(duration, self.sample_rate)
         .saturating_sub(self.sample_rate.chunk_samples() as u64)
         .saturating_sub(self.speech_pad_samples().saturating_mul(2))
     })
@@ -229,20 +361,20 @@ impl SpeechOptions {
   /// Returns the amount of padding to add to the start of detected speech segments, in samples.
   #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn speech_pad_samples(&self) -> u64 {
-    ms_to_samples(self.speech_pad_ms, self.sample_rate)
+    ms_to_samples(self.speech_pad, self.sample_rate)
   }
 
   /// Set the sample rate to use for speech detection.
   #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn with_sample_rate(mut self, sample_rate: SampleRate) -> Self {
-    self.sample_rate = sample_rate;
+    self.set_sample_rate(sample_rate);
     self
   }
 
   /// Set the start threshold, which must be between 0 and 1. If not set, it defaults to 0.5.
   #[cfg_attr(not(tarpaulin), inline(always))]
-  pub fn with_start_threshold(mut self, threshold: f32) -> Self {
-    self.start_threshold = sanitize_probability(threshold);
+  pub const fn with_start_threshold(mut self, threshold: f32) -> Self {
+    self.set_start_threshold(threshold);
     self
   }
 
@@ -254,8 +386,8 @@ impl SpeechOptions {
   /// fall back to the default derived hysteresis rule even if builder
   /// methods are called in a different order.
   #[cfg_attr(not(tarpaulin), inline(always))]
-  pub fn with_end_threshold(mut self, threshold: f32) -> Self {
-    self.end_threshold = Some(sanitize_probability(threshold));
+  pub const fn with_end_threshold(mut self, threshold: f32) -> Self {
+    self.set_end_threshold(threshold);
     self
   }
 
@@ -268,54 +400,116 @@ impl SpeechOptions {
 
   /// Set the minimum duration of detected speech segments, in milliseconds.
   #[cfg_attr(not(tarpaulin), inline(always))]
-  pub const fn with_min_speech_duration_ms(mut self, duration_ms: u32) -> Self {
-    self.min_speech_duration_ms = duration_ms;
+  pub const fn with_min_speech_duration(mut self, duration: Duration) -> Self {
+    self.set_min_silence_duration(duration);
     self
   }
 
   /// Set the minimum duration of silence required to close a detected speech segment, in milliseconds.
   #[cfg_attr(not(tarpaulin), inline(always))]
-  pub const fn with_min_silence_duration_ms(mut self, duration_ms: u32) -> Self {
-    self.min_silence_duration_ms = duration_ms;
+  pub const fn with_min_silence_duration(mut self, duration: Duration) -> Self {
+    self.set_min_silence_duration(duration);
     self
   }
 
   /// Set the minimum silence duration that can be used as a preferred split point when maximum speech duration is reached.
   #[cfg_attr(not(tarpaulin), inline(always))]
-  pub const fn with_min_silence_at_max_speech_ms(mut self, duration_ms: u32) -> Self {
-    self.min_silence_at_max_speech_ms = duration_ms;
+  pub const fn with_min_silence_at_max_speech(mut self, duration: Duration) -> Self {
+    self.set_min_silence_at_max_speech(duration);
     self
   }
 
   /// Set the maximum duration of a speech segment before the segmenter force-splits it.
   #[cfg_attr(not(tarpaulin), inline(always))]
-  pub const fn with_max_speech_duration_ms(mut self, duration_ms: u32) -> Self {
-    self.max_speech_duration_ms = Some(duration_ms);
+  pub const fn with_max_speech_duration(mut self, duration: Duration) -> Self {
+    self.set_max_speech_duration(duration);
     self
   }
 
   /// Clear the maximum speech duration, disabling force-splitting by segment length.
   #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn clear_max_speech_duration(mut self) -> Self {
-    self.max_speech_duration_ms = None;
+    self.max_speech_duration = None;
     self
   }
 
   /// Set the amount of padding to add to the start of detected speech segments, in milliseconds.
   #[cfg_attr(not(tarpaulin), inline(always))]
-  pub const fn with_speech_pad_ms(mut self, pad_ms: u32) -> Self {
-    self.speech_pad_ms = pad_ms;
+  pub const fn with_speech_pad(mut self, pad: Duration) -> Self {
+    self.set_speech_pad(pad);
+    self
+  }
+
+  /// Set the sample rate to use for speech detection.
+  #[cfg_attr(not(tarpaulin), inline(always))]
+  pub const fn set_sample_rate(&mut self, sample_rate: SampleRate) -> &mut Self {
+    self.sample_rate = sample_rate;
+    self
+  }
+
+  /// Set the start threshold, which must be between 0 and 1. If not set, it defaults to 0.5.
+  #[cfg_attr(not(tarpaulin), inline(always))]
+  pub const fn set_start_threshold(&mut self, threshold: f32) -> &mut Self {
+    self.start_threshold = sanitize_probability(threshold);
+    self
+  }
+
+  /// Set the preferred end threshold.
+  ///
+  /// The stored value is sanitized into the `[0, 1]` range. When the
+  /// threshold is later read via [`Self::end_threshold`], it is also
+  /// checked against the current start threshold. Invalid combinations
+  /// fall back to the default derived hysteresis rule even if builder
+  /// methods are called in a different order.
+  #[cfg_attr(not(tarpaulin), inline(always))]
+  pub const fn set_end_threshold(&mut self, threshold: f32) -> &mut Self {
+    self.end_threshold = Some(sanitize_probability(threshold));
+    self
+  }
+
+  /// Set the minimum duration of detected speech segments, in milliseconds.
+  #[cfg_attr(not(tarpaulin), inline(always))]
+  pub const fn set_min_speech_duration(&mut self, duration: Duration) -> &mut Self {
+    self.min_speech_duration = duration;
+    self
+  }
+
+  /// Set the minimum duration of silence required to close a detected speech segment, in milliseconds.
+  #[cfg_attr(not(tarpaulin), inline(always))]
+  pub const fn set_min_silence_duration(&mut self, duration: Duration) -> &mut Self {
+    self.min_silence_duration = duration;
+    self
+  }
+
+  /// Set the minimum silence duration that can be used as a preferred split point when maximum speech duration is reached.
+  #[cfg_attr(not(tarpaulin), inline(always))]
+  pub const fn set_min_silence_at_max_speech(&mut self, duration: Duration) -> &mut Self {
+    self.min_silence_at_max_speech = duration;
+    self
+  }
+
+  /// Set the maximum duration of a speech segment before the segmenter force-splits it.
+  #[cfg_attr(not(tarpaulin), inline(always))]
+  pub const fn set_max_speech_duration(&mut self, duration: Duration) -> &mut Self {
+    self.max_speech_duration = Some(duration);
+    self
+  }
+
+  /// Set the amount of padding to add to the start of detected speech segments, in milliseconds.
+  #[cfg_attr(not(tarpaulin), inline(always))]
+  pub const fn set_speech_pad(&mut self, pad: Duration) -> &mut Self {
+    self.speech_pad = pad;
     self
   }
 }
 
 #[inline]
-pub(crate) fn ms_to_samples(duration_ms: u32, sample_rate: SampleRate) -> u64 {
-  (u64::from(duration_ms) * u64::from(sample_rate.hz())) / 1_000
+pub(crate) const fn ms_to_samples(duration: Duration, sample_rate: SampleRate) -> u64 {
+  ((duration.as_millis() * (sample_rate.hz() as u128)) / 1_000) as u64
 }
 
 #[inline]
-fn sanitize_probability(value: f32) -> f32 {
+const fn sanitize_probability(value: f32) -> f32 {
   if value.is_finite() {
     value.clamp(0.0, 1.0)
   } else {
@@ -324,12 +518,12 @@ fn sanitize_probability(value: f32) -> f32 {
 }
 
 #[inline]
-fn default_end_threshold(start_threshold: f32) -> f32 {
+const fn default_end_threshold(start_threshold: f32) -> f32 {
   sanitize_probability((sanitize_probability(start_threshold) - 0.15).max(0.01))
 }
 
 #[inline]
-fn effective_end_threshold(start_threshold: f32, end_threshold: f32) -> f32 {
+const fn effective_end_threshold(start_threshold: f32, end_threshold: f32) -> f32 {
   let start_threshold = sanitize_probability(start_threshold);
   let end_threshold = sanitize_probability(end_threshold);
 
@@ -342,6 +536,8 @@ fn effective_end_threshold(start_threshold: f32, end_threshold: f32) -> f32 {
 
 #[cfg(test)]
 mod tests {
+  use std::time::Duration;
+
   use ort::session::builder::GraphOptimizationLevel;
 
   use super::{SampleRate, SessionOptions, SpeechOptions, ms_to_samples};
@@ -360,17 +556,26 @@ mod tests {
     assert_eq!(config.sample_rate(), SampleRate::Rate16k);
     assert_eq!(config.start_threshold(), 0.5);
     assert_eq!(config.end_threshold(), 0.35);
-    assert_eq!(config.min_speech_duration_ms(), 250);
-    assert_eq!(config.min_silence_duration_ms(), 100);
-    assert_eq!(config.min_silence_at_max_speech_ms(), 98);
-    assert_eq!(config.max_speech_duration_ms(), None);
-    assert_eq!(config.speech_pad_ms(), 30);
+    assert_eq!(config.min_speech_duration(), Duration::from_millis(250));
+    assert_eq!(config.min_silence_duration(), Duration::from_millis(100));
+    assert_eq!(
+      config.min_silence_at_max_speech(),
+      Duration::from_millis(98)
+    );
+    assert_eq!(config.max_speech_duration(), None);
+    assert_eq!(config.speech_pad(), Duration::from_millis(30));
   }
 
   #[test]
   fn ms_to_samples_uses_stream_rate() {
-    assert_eq!(ms_to_samples(100, SampleRate::Rate16k), 1_600);
-    assert_eq!(ms_to_samples(100, SampleRate::Rate8k), 800);
+    assert_eq!(
+      ms_to_samples(Duration::from_millis(100), SampleRate::Rate16k),
+      1_600
+    );
+    assert_eq!(
+      ms_to_samples(Duration::from_millis(100), SampleRate::Rate8k),
+      800
+    );
   }
 
   #[test]
@@ -405,10 +610,85 @@ mod tests {
   #[test]
   fn max_speech_duration_converts_to_samples_with_stream_lookahead_and_padding() {
     let options = SpeechOptions::default()
-      .with_speech_pad_ms(30)
-      .with_max_speech_duration_ms(1_000);
-    assert_eq!(options.max_speech_duration_ms(), Some(1_000));
+      .with_speech_pad(Duration::from_millis(30))
+      .with_max_speech_duration(Duration::from_millis(1_000));
+    assert_eq!(
+      options.max_speech_duration(),
+      Some(Duration::from_millis(1_000))
+    );
     assert_eq!(options.min_silence_at_max_speech_samples(), 1_568);
     assert_eq!(options.max_speech_samples(), Some(14_528));
   }
+
+  #[cfg(feature = "serde")]
+  #[test]
+  fn test_serde() {
+    let opts = SessionOptions::default().with_optimization_level(GraphOptimizationLevel::Level2);
+    let serialized = serde_json::to_string(&opts).expect("serialize options");
+    let deserialized: SessionOptions =
+      serde_json::from_str(&serialized).expect("deserialize options");
+    assert_eq!(opts.optimization_level, deserialized.optimization_level);
+
+    let default_deserialized: SessionOptions =
+      serde_json::from_str("{}").expect("deserialize default options");
+    assert!(matches!(
+      default_deserialized.optimization_level,
+      GraphOptimizationLevel::Disable
+    ));
+
+    // level1
+    let level1_opts =
+      SessionOptions::default().with_optimization_level(GraphOptimizationLevel::Level1);
+    let level1_serialized = serde_json::to_string(&level1_opts).expect("serialize level1 options");
+    let level1_deserialized: SessionOptions =
+      serde_json::from_str(&level1_serialized).expect("deserialize level1 options");
+    assert!(matches!(
+      level1_deserialized.optimization_level,
+      GraphOptimizationLevel::Level1
+    ));
+
+    // level2
+    let level2_opts =
+      SessionOptions::default().with_optimization_level(GraphOptimizationLevel::Level2);
+    let level2_serialized = serde_json::to_string(&level2_opts).expect("serialize level2 options");
+    let level2_deserialized: SessionOptions =
+      serde_json::from_str(&level2_serialized).expect("deserialize level2 options");
+    assert!(matches!(
+      level2_deserialized.optimization_level,
+      GraphOptimizationLevel::Level2
+    ));
+
+    // level3
+    let level3_opts =
+      SessionOptions::default().with_optimization_level(GraphOptimizationLevel::Level3);
+    let level3_serialized = serde_json::to_string(&level3_opts).expect("serialize level3 options");
+    let level3_deserialized: SessionOptions =
+      serde_json::from_str(&level3_serialized).expect("deserialize level3 options");
+    assert!(matches!(
+      level3_deserialized.optimization_level,
+      GraphOptimizationLevel::Level3
+    ));
+
+    // all
+    let all_opts = SessionOptions::default().with_optimization_level(GraphOptimizationLevel::All);
+    let all_serialized = serde_json::to_string(&all_opts).expect("serialize all options");
+    let all_deserialized: SessionOptions =
+      serde_json::from_str(&all_serialized).expect("deserialize all options");
+    assert!(matches!(
+      all_deserialized.optimization_level,
+      GraphOptimizationLevel::All
+    ));
+
+    // disable
+    let disable_opts =
+      SessionOptions::default().with_optimization_level(GraphOptimizationLevel::Disable);
+    let disable_serialized =
+      serde_json::to_string(&disable_opts).expect("serialize disable options");
+    let disable_deserialized: SessionOptions =
+      serde_json::from_str(&disable_serialized).expect("deserialize disable options");
+    assert!(matches!(
+      disable_deserialized.optimization_level,
+      GraphOptimizationLevel::Disable
+    ));
+  }
 }
diff --git a/src/session.rs b/src/session.rs
index 352074b..dec78dc 100644
--- a/src/session.rs
+++ b/src/session.rs
@@ -35,13 +35,13 @@ pub struct BatchInput<'a> {
 
 impl<'a> BatchInput<'a> {
   /// Returns the stream state associated with this batch input, which contains the recurrent memory and context for the stream that produced this chunk.
-  #[inline]
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn state(&mut self) -> &mut StreamState {
     self.stream
   }
 
   /// Returns the chunk of audio samples for this batch input, which should be exactly the expected chunk size for the stream's sample rate.
-  #[inline]
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn chunk(&self) -> &'a [f32] {
     self.chunk
   }
@@ -71,6 +71,7 @@ impl Session {
   /// Create a session from the bundled Silero VAD model with default options.
   #[cfg(feature = "bundled")]
   #[cfg_attr(docsrs, doc(cfg(feature = "bundled")))]
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn bundled() -> Result<Self> {
     Self::bundled_with_options(SessionOptions::default())
   }
@@ -78,16 +79,19 @@ impl Session {
   /// Create a session from the bundled Silero VAD model with custom options.
   #[cfg(feature = "bundled")]
   #[cfg_attr(docsrs, doc(cfg(feature = "bundled")))]
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn bundled_with_options(options: SessionOptions) -> Result<Self> {
     Self::from_memory_with_options(BUNDLED_MODEL, options)
   }
 
   /// Create a session from an ONNX file at the given path with default options.
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn from_file(path: impl AsRef<Path>) -> Result<Self> {
     Self::from_file_with_options(path, SessionOptions::default())
   }
 
   /// Create a session from an ONNX file at the given path with custom options.
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn from_file_with_options(path: impl AsRef<Path>, options: SessionOptions) -> Result<Self> {
     let path = path.as_ref();
     let session = OrtSession::builder()?
@@ -102,11 +106,13 @@ impl Session {
   }
 
   /// Create a session from an ONNX model loaded in memory with default options.
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn from_memory(model_bytes: &[u8]) -> Result<Self> {
     Self::from_memory_with_options(model_bytes, SessionOptions::default())
   }
 
   /// Create a session from an ONNX model loaded in memory with custom options.
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn from_memory_with_options(model_bytes: &[u8], options: SessionOptions) -> Result<Self> {
     let session = OrtSession::builder()?
       .with_optimization_level(options.optimization_level())
@@ -116,7 +122,7 @@ impl Session {
   }
 
   /// Create a session directly from an existing ONNX Runtime session.
-  #[inline]
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn from_ort_session(inner: OrtSession) -> Self {
     Self {
       inner,
@@ -127,6 +133,7 @@ impl Session {
   }
 
   /// Infer one chunk for one stream, returning the speech probability for that chunk.
+  #[cfg_attr(not(tarpaulin), inline(always))]
   pub fn infer_chunk(&mut self, stream: &mut StreamState, chunk: &[f32]) -> Result<f32> {
     Self::infer_chunk_with_scratch(
       &mut self.inner,
@@ -367,7 +374,7 @@ impl Session {
   }
 }
 
-#[inline]
+#[cfg_attr(not(tarpaulin), inline(always))]
 fn validate_shape(tensor: &'static str, actual: &[i64], expected: &[i64]) -> Result<()> {
   if actual == expected {
     Ok(())

From 251146901e45b5d31a7d70206e7ef8f70dac816e Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Tue, 21 Apr 2026 18:23:45 +1200
Subject: [PATCH 2/4] Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/options.rs | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/options.rs b/src/options.rs
index d425db3..d05cf06 100644
--- a/src/options.rs
+++ b/src/options.rs
@@ -401,7 +401,7 @@ impl SpeechOptions {
   /// Set the minimum duration of detected speech segments, in milliseconds.
   #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn with_min_speech_duration(mut self, duration: Duration) -> Self {
-    self.set_min_silence_duration(duration);
+    self.set_min_speech_duration(duration);
     self
   }
 
@@ -467,28 +467,33 @@ impl SpeechOptions {
     self
   }
 
-  /// Set the minimum duration of detected speech segments, in milliseconds.
+  /// Set the minimum duration of detected speech segments as a `Duration`.
+  /// Sub-second precision is supported according to the precision of `Duration`.
   #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn set_min_speech_duration(&mut self, duration: Duration) -> &mut Self {
     self.min_speech_duration = duration;
     self
   }
 
-  /// Set the minimum duration of silence required to close a detected speech segment, in milliseconds.
+  /// Set the minimum duration of silence required to close a detected speech segment as a `Duration`.
+  /// Sub-second precision is supported according to the precision of `Duration`.
   #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn set_min_silence_duration(&mut self, duration: Duration) -> &mut Self {
     self.min_silence_duration = duration;
     self
   }
 
-  /// Set the minimum silence duration that can be used as a preferred split point when maximum speech duration is reached.
+  /// Set the minimum silence duration, as a `Duration`, that can be used as a preferred split point
+  /// when maximum speech duration is reached. Sub-second precision is supported according to the
+  /// precision of `Duration`.
   #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn set_min_silence_at_max_speech(&mut self, duration: Duration) -> &mut Self {
     self.min_silence_at_max_speech = duration;
     self
   }
 
-  /// Set the maximum duration of a speech segment before the segmenter force-splits it.
+  /// Set the maximum duration of a speech segment, as a `Duration`, before the segmenter
+  /// force-splits it. Sub-second precision is supported according to the precision of `Duration`.
   #[cfg_attr(not(tarpaulin), inline(always))]
   pub const fn set_max_speech_duration(&mut self, duration: Duration) -> &mut Self {
     self.max_speech_duration = Some(duration);
@@ -505,7 +510,13 @@ impl SpeechOptions {
 
 #[inline]
 pub(crate) const fn ms_to_samples(duration: Duration, sample_rate: SampleRate) -> u64 {
-  ((duration.as_millis() * (sample_rate.hz() as u128)) / 1_000) as u64
+  let samples = (duration.as_millis() * (sample_rate.hz() as u128)) / 1_000;
+
+  if samples > u64::MAX as u128 {
+    u64::MAX
+  } else {
+    samples as u64
+  }
 }
 
 #[inline]

From e11947b1cb14d4aa39944363ef54df9cfed348e5 Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Tue, 21 Apr 2026 19:01:54 +1200
Subject: [PATCH 3/4] fix test

---
 src/options.rs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/options.rs b/src/options.rs
index d05cf06..157724b 100644
--- a/src/options.rs
+++ b/src/options.rs
@@ -592,10 +592,7 @@ mod tests {
   #[test]
   fn session_options_default_to_unopinionated_core_settings() {
     let options = SessionOptions::default();
-    assert_eq!(
-      options.optimization_level(),
-      GraphOptimizationLevel::Disable
-    );
+    assert_eq!(options.optimization_level(), GraphOptimizationLevel::Level3,);
   }
 
   #[test]

From e6b6ca8f46de7cced2b74ae8cab0614580106220 Mon Sep 17 00:00:00 2001
From: uqio <276879906+uqio@users.noreply.github.com>
Date: Tue, 21 Apr 2026 19:13:11 +1200
Subject: [PATCH 4/4] fix test

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a65b6ec..4edc25d 100644
--- a/README.md
+++ b/README.md
@@ -74,7 +74,7 @@ fn main() -> Result<(), silero::Error> {
     let mut session = Session::from_memory(model)?;
     let config = SpeechOptions::default();
     let mut stream = StreamState::new(config.sample_rate());
-    let mut segmenter = SpeechSegmenter::new(config);
+    let mut segmenter = SpeechSegmenter::new(config.clone());
     let audio_chunk = vec![0.0_f32; config.sample_rate().chunk_samples()];
 
     segmenter.process_samples(&mut session, &mut stream, &audio_chunk, |segment| {