xdas-dev · atrabattoni · Mar 6, 2026 · Mar 5, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/docs/release-notes.md b/docs/release-notes.md
@@ -4,6 +4,7 @@
 - Add SampleCoordinate for more SEED-like coordinates (@atrabattoni).
 - Add `create_dirs` to `.to_netcdf` methods to create intermediate directories (@aurelienfalco).
 - Add support for multiple ROI for ASN engine (@martijnende).
+- `tolerance` can now be passed as seconds for datetime64 coordinates (@martijnende, @atrabattoni)
 - Fix numpy 2.4 and obspy 1.4.2 incompatibilities and add `xdas.__version__` (@atrabatto).
 
 ## 0.2.4

diff --git a/tests/coordinates/test_interp.py b/tests/coordinates/test_interp.py
@@ -285,6 +285,21 @@ def test_simplify(self):
         coord = InterpCoordinate({"tie_indices": xp, "tie_values": yp})
         assert len(coord.simplify(1.0).tie_indices) == 2
 
+    def test_simplify_datetime(self):
+        t0 = np.datetime64("2000-01-01T00:00:00")
+        xp = np.sort(np.random.choice(10000, 1000, replace=False))
+        xp[0] = 0
+        xp[-1] = 10000
+        yp = (
+            t0
+            + xp.astype("timedelta64[s]")
+            + np.random.randint(-500, 500, size=1000).astype("timedelta64[ms]")
+        )
+        coord = InterpCoordinate({"tie_indices": xp, "tie_values": yp})
+        assert len(coord.simplify(np.timedelta64(1, "s")).tie_indices) == 2
+        assert len(coord.simplify(np.timedelta64(1000, "ms")).tie_indices) == 2
+        assert len(coord.simplify(1.0).tie_indices) == 2
+
     def test_singleton(self):
         coord = InterpCoordinate({"tie_indices": [0], "tie_values": [1.0]})
         assert coord[0].values == 1.0

diff --git a/tests/coordinates/test_sampled.py b/tests/coordinates/test_sampled.py
@@ -678,6 +678,23 @@ def test_simplify_with_tolerance(self):
         result = coord.simplify(tolerance=0.1)
         assert np.all(np.abs(result.values - coord.values) <= 0.1)
 
+    def test_simplify_with_tolerance_on_datetime(self):
+        t0 = np.datetime64("2000-01-01T00:00:00")
+        jitter = np.random.rand(100) * 0.2 - 0.1
+        jitter = jitter.astype("timedelta64[ms]")  # convert to timedelta
+        coord = SampledCoordinate(
+            {
+                "tie_values": t0 + 10 * np.arange(100) + jitter,
+                "tie_lengths": 10 * np.ones(100, dtype=int),
+                "sampling_interval": np.timedelta64(1, "s"),
+            }
+        )
+        result = coord.simplify(tolerance=np.timedelta64(200, "ms"))
+        assert len(result.tie_values) == 1
+        # float tolerance should be treated as seconds
+        result = coord.simplify(tolerance=0.2)
+        assert len(result.tie_values) == 1
+
 
 class TestSampledCoordinateGetIndexer:
     def make_coord(self):
@@ -857,15 +874,15 @@ def test_to_netcdf_and_back(self):
 
 
 class TestGetSplitIndices:
-    def test_get_split_indices_no_tolerance(self):
+    def test_no_tolerance(self):
         coord = SampledCoordinate(
             {"tie_values": [0.0, 10.0], "tie_lengths": [3, 2], "sampling_interval": 1.0}
         )
         div_points = coord.get_split_indices()
         expected = np.array([3])  # indices where segments end
         assert np.array_equal(div_points, expected)
 
-    def test_get_split_indices_with_tolerance(self):
+    def test_with_tolerance(self):
         coord = SampledCoordinate(
             {
                 "tie_values": [0.0, 3.1, 10.0],
@@ -877,6 +894,26 @@ def test_get_split_indices_with_tolerance(self):
         expected = np.array([5])  # only the second gap exceeds tolerance
         assert np.array_equal(div_points, expected)
 
+    def test_with_tolerance_on_datetime(self):
+        t0 = np.datetime64("2000-01-01T00:00:00")
+        coord = SampledCoordinate(
+            {
+                "tie_values": [
+                    t0,
+                    t0 + np.timedelta64(3, "s") + np.timedelta64(100, "ms"),
+                    t0 + np.timedelta64(10, "s"),
+                ],
+                "tie_lengths": [3, 2, 2],
+                "sampling_interval": np.timedelta64(1, "s"),
+            }
+        )
+        div_points = coord.get_split_indices(tolerance=np.timedelta64(200, "ms"))
+        expected = np.array([5])  # only the second gap exceeds tolerance
+        assert np.array_equal(div_points, expected)
+        # float tolerance should be treated as seconds
+        div_points = coord.get_split_indices(tolerance=0.2)
+        assert np.array_equal(div_points, expected)
+
 
 class TestFromBlock:
     def test_from_block(self):

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -211,23 +211,6 @@ def test_asdataarray(self):
         for dim in da.dims:
             assert np.array_equal(out[dim].values, da[dim].values)
 
-    def test_split(self):
-        da = xd.DataArray(
-            np.ones(30),
-            {
-                "time": {
-                    "tie_indices": [0, 9, 10, 19, 20, 29],
-                    "tie_values": [0.0, 9.0, 20.0, 29.0, 40.0, 49.0],
-                },
-            },
-        )
-        assert xd.concatenate(xd.split(da)).equals(da)
-        assert xd.split(da, tolerance=20.0)[0].equals(da)
-
-    def test_chunk(self):
-        da = wavelet_wavefronts()
-        assert xd.concatenate(xd.split(da, 3)).equals(da)
-
     def test_align(self):
         da1 = xd.DataArray(np.arange(2), {"x": [0, 1]})
         da2 = xd.DataArray(np.arange(3), {"y": [2, 3, 4]})
@@ -240,3 +223,43 @@ def test_align(self):
         da3 = xd.DataArray(np.arange(6).reshape(2, 3), {"x": [1, 2], "y": [2, 3, 4]})
         with pytest.raises(ValueError, match="differs from one data array to another"):
             xd.align(da1, da2, da3)
+
+
+class TestSplit:
+    def test_integer(self):
+        da = wavelet_wavefronts()
+        assert xd.concatenate(xd.split(da, 3)).equals(da)
+
+    def test_interp(self):
+        da = xd.DataArray(
+            np.ones(30),
+            {
+                "time": {
+                    "tie_indices": [0, 9, 10, 19, 20, 29],
+                    "tie_values": [0.0, 9.0, 20.0, 29.0, 40.0, 49.0],
+                },
+            },
+        )
+        assert xd.concatenate(xd.split(da)).equals(da)
+        assert xd.split(da, tolerance=20.0)[0].equals(da)
+
+    def test_interp_datetime(self):
+        da = xd.DataArray(
+            np.ones(30),
+            {
+                "time": {
+                    "tie_indices": [0, 9, 10, 19, 20, 29],
+                    "tie_values": [
+                        np.datetime64("2000-01-01T00:00:00"),
+                        np.datetime64("2000-01-01T00:00:09"),
+                        np.datetime64("2000-01-01T00:00:20"),
+                        np.datetime64("2000-01-01T00:00:29"),
+                        np.datetime64("2000-01-01T00:00:40"),
+                        np.datetime64("2000-01-01T00:00:49"),
+                    ],
+                },
+            },
+        )
+        assert xd.concatenate(xd.split(da)).equals(da)
+        assert xd.split(da, tolerance=np.timedelta64(20, "s"))[0].equals(da)
+        assert xd.split(da, tolerance=20.0)[0].equals(da)
diff --git a/xdas/coordinates/core.py b/xdas/coordinates/core.py
@@ -590,6 +590,18 @@ def parse(data, dim=None):
     return data, dim
 
 
+def parse_tolerance(tolerance, dtype):
+    if np.issubdtype(dtype, np.datetime64):
+        if tolerance is None:
+            tolerance = np.timedelta64(0, "ns")
+        elif isinstance(tolerance, (int, float)):
+            tolerance = np.timedelta64(round(tolerance * 1e9), "ns")
+    else:
+        if tolerance is None:
+            tolerance = 0
+    return tolerance
+
+
 def get_sampling_interval(da, dim, cast=True):
     """
     Returns the sample spacing along a given dimension.

diff --git a/xdas/coordinates/interp.py b/xdas/coordinates/interp.py
@@ -4,7 +4,13 @@
 import pandas as pd
 from xinterp import forward, inverse
 
-from .core import Coordinate, format_datetime, is_strictly_increasing, parse
+from .core import (
+    Coordinate,
+    format_datetime,
+    is_strictly_increasing,
+    parse,
+    parse_tolerance,
+)
 
 
 class InterpCoordinate(Coordinate, name="interpolated"):
@@ -286,11 +292,7 @@ def decimate(self, q):
         )
 
     def simplify(self, tolerance=None):
-        if tolerance is None:
-            if np.issubdtype(self.dtype, np.datetime64):
-                tolerance = np.timedelta64(0, "ns")
-            else:
-                tolerance = 0.0
+        tolerance = parse_tolerance(tolerance, self.dtype)
         tie_indices, tie_values = douglas_peucker(
             self.tie_indices, self.tie_values, tolerance
         )
@@ -302,6 +304,7 @@ def get_split_indices(self, tolerance=None):
         (indices,) = np.nonzero(np.diff(self.tie_indices) == 1)
         indices += 1
         if tolerance is not None:
+            tolerance = parse_tolerance(tolerance, self.dtype)
             deltas = self.tie_values[indices + 1] - self.tie_values[indices]
             indices = indices[np.abs(deltas) >= tolerance]
         return np.array(

diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py
@@ -2,7 +2,13 @@
 
 import numpy as np
 
-from .core import Coordinate, format_datetime, is_strictly_increasing, parse
+from .core import (
+    Coordinate,
+    format_datetime,
+    is_strictly_increasing,
+    parse,
+    parse_tolerance,
+)
 
 CODE_TO_UNITS = {
     "h": "hours",
@@ -384,8 +390,7 @@ def decimate(self, q):
         return self[::q]
 
     def simplify(self, tolerance=None):
-        if tolerance is None:
-            tolerance = np.array(0, dtype=self.sampling_interval.dtype)[()]
+        tolerance = parse_tolerance(tolerance, self.dtype)
         tie_values = [self.tie_values[0]]
         tie_lengths = [self.tie_lengths[0]]
         for value, length in zip(self.tie_values[1:], self.tie_lengths[1:]):
@@ -407,6 +412,7 @@ def simplify(self, tolerance=None):
     def get_split_indices(self, tolerance=None):
         indices = self.tie_indices[1:]
         if tolerance is not None:
+            tolerance = parse_tolerance(tolerance, self.dtype)
             deltas = self.tie_values[1:] - (
                 self.tie_values[:-1] + self.sampling_interval * self.tie_lengths[:-1]
             )

diff --git a/xdas/core/routines.py b/xdas/core/routines.py
@@ -40,7 +40,8 @@ def open_mfdatacollection(
         The dimension along which the data arrays are concatenated. Default to "first".
     tolerance : float of timedelta64, optional
         During concatenation, the tolerance to consider that the end of a file is
-        continuous with beginning of the following one. Default to zero tolerance.
+        continuous with beginning of the following one. For time coordinates, numeric
+        values are considered as seconds. Default to zero tolerance.
     squeeze : bool, optional
         Whether to return a DataArray instead of a DataCollection if the combination
         results in a data collection containing a unique data array.
@@ -118,7 +119,8 @@ def open_mfdatatree(
         The dimension along which the data arrays are concatenated. Default to "first".
     tolerance : float of timedelta64, optional
         During concatenation, the tolerance to consider that the end of a file is
-        continuous with beginning of the following one. Default to zero tolerance.
+        continuous with beginning of the following one. For time coordinates, numeric
+        values are considered as seconds. Default to zero tolerance.
     squeeze : bool, optional
         Whether to return a DataArray instead of a DataCollection if the combination
         results in a data collection containing a unique data array.
@@ -217,7 +219,8 @@ def collect(
         The dimension along which the data arrays are concatenated. Default to "first".
     tolerance : float of timedelta64, optional
         During concatenation, the tolerance to consider that the end of a file is
-        continuous with beginning of the following one. Default to zero tolerance.
+        continuous with beginning of the following one. For time coordinates, numeric
+        values are considered as seconds. Default to zero tolerance.
     squeeze : bool, optional
         Whether to return a DataArray instead of a DataCollection if the combination
         results in a data collection containing a unique data array.
@@ -284,7 +287,8 @@ def open_mfdataarray(
         The dimension along which the data arrays are concatenated. Default to "first".
     tolerance : float of timedelta64, optional
         During concatenation, the tolerance to consider that the end of a file is
-        continuous with beginning of the following one. Default to zero tolerance.
+        continuous with beginning of the following one. For time coordinates, numeric
+        values are considered as seconds. Default to zero tolerance.
     squeeze : bool, optional
         Whether to return a DataArray instead of a DataCollection if the combination
         results in a data collection containing a unique data array.
@@ -430,8 +434,9 @@ def asdataarray(obj, tolerance=None):
     obj : object
         The objected to convert
     tolerance : float or datetime64, optional
-        For dense coordinates, tolerance error for interpolation representation, by
-        default zero.
+        For dense coordinates, tolerance error for interpolation representation.
+        For time coordinates, numeric values are considered as seconds.
+        Zero by default.
 
     Returns
     -------
@@ -472,7 +477,8 @@ def combine_by_field(
         The dimension along which concatenate. Default to "first".
     tolerance : float of timedelta64, optional
         The tolerance to consider that the end of a file is continuous with beginning of
-        the following, zero by default.
+        the following. For time coordinates, numeric  values are considered as seconds.
+        Zero by default.
     squeeze : bool, optional
         Whether to return a Database instead of a DataCollection if the combinatison
         results in a data collection containing a unique Database.
@@ -535,7 +541,8 @@ def combine_by_coords(
         The dimension along which concatenate. Default to "first".
     tolerance : float of timedelta64, optional
         The tolerance to consider that the end of a file is continuous with beginning of
-        the following, zero by default.
+        the following. For time coordinates, numeric values are considered as seconds.
+        Zero by default.
     squeeze : bool, optional
         Whether to return a Database instead of a DataCollection if the combination
         results in a data collection containing a unique Database.
@@ -673,7 +680,8 @@ def concatenate(objs, dim="first", tolerance=None, virtual=None, verbose=None):
         The dimension along which concatenate.
     tolerance : float of timedelta64, optional
         The tolerance to consider that the end of a file is continuous with beginning of
-        the following, zero by default.
+        the following, For time coordinates, numeric values are considered as seconds.
+        Zero by default.
     virtual : bool, optional
         Whether to create a virtual dataset. It requires that all concatenated
         data arrays are virtual. By default tries to create a virtual dataset if possible.
@@ -769,7 +777,8 @@ def split(da, indices_or_sections="discontinuities", dim="first", tolerance=None
         The dimension along which to split, by default "first"
     tolerance : float or timedelta64, optional
         If `indices_or_sections="discontinuities"` split will only occur on gaps and
-        overlaps that are bigger than `tolerance`. Zero tolerance by default.
+        overlaps that are bigger than `tolerance`. For time coordinates, numeric
+        values are considered as seconds. Zero tolerance by default.
 
     Returns
     -------