From d327bcc93b90a5f8a48e5a8709d7ced64f6138e6 Mon Sep 17 00:00:00 2001
From: Alister Trabattoni <alister.trabattoni@gmail.com>
Date: Thu, 5 Mar 2026 18:58:40 +0100
Subject: [PATCH 1/7] WIP.

---
 xdas/coordinates/core.py    | 12 ++++++++++++
 xdas/coordinates/interp.py  | 14 ++++++++------
 xdas/coordinates/sampled.py | 12 +++++++++---
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/xdas/coordinates/core.py b/xdas/coordinates/core.py
index 4154467d..2e81bb86 100644
--- a/xdas/coordinates/core.py
+++ b/xdas/coordinates/core.py
@@ -590,6 +590,18 @@ def parse(data, dim=None):
     return data, dim
 
 
+def parse_tolerance(tolerance, dtype):
+    if np.issubdtype(dtype, np.datetime64):
+        if tolerance is None:
+            tolerance = np.timedelta64(0, "ns")
+        elif isinstance(tolerance, (int, float)):
+            tolerance = np.timedelta64(round(tolerance * 1e9), "ns")
+    else:
+        if tolerance is None:
+            tolerance = 0.0
+    return tolerance
+
+
 def get_sampling_interval(da, dim, cast=True):
     """
     Returns the sample spacing along a given dimension.
diff --git a/xdas/coordinates/interp.py b/xdas/coordinates/interp.py
index 741e4b43..46b8e31d 100644
--- a/xdas/coordinates/interp.py
+++ b/xdas/coordinates/interp.py
@@ -4,7 +4,13 @@
 import pandas as pd
 from xinterp import forward, inverse
 
-from .core import Coordinate, format_datetime, is_strictly_increasing, parse
+from .core import (
+    Coordinate,
+    format_datetime,
+    is_strictly_increasing,
+    parse,
+    parse_tolerance,
+)
 
 
 class InterpCoordinate(Coordinate, name="interpolated"):
@@ -286,11 +292,7 @@ def decimate(self, q):
         )
 
     def simplify(self, tolerance=None):
-        if tolerance is None:
-            if np.issubdtype(self.dtype, np.datetime64):
-                tolerance = np.timedelta64(0, "ns")
-            else:
-                tolerance = 0.0
+        tolerance = parse_tolerance(tolerance, self.dtype)
         tie_indices, tie_values = douglas_peucker(
             self.tie_indices, self.tie_values, tolerance
         )
diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py
index 34485eaf..a1b47e3a 100644
--- a/xdas/coordinates/sampled.py
+++ b/xdas/coordinates/sampled.py
@@ -2,7 +2,13 @@
 
 import numpy as np
 
-from .core import Coordinate, format_datetime, is_strictly_increasing, parse
+from .core import (
+    Coordinate,
+    format_datetime,
+    is_strictly_increasing,
+    parse,
+    parse_tolerance,
+)
 
 CODE_TO_UNITS = {
     "h": "hours",
@@ -384,8 +390,8 @@ def decimate(self, q):
         return self[::q]
 
     def simplify(self, tolerance=None):
-        if tolerance is None:
-            tolerance = np.array(0, dtype=self.sampling_interval.dtype)[()]
+        tolerance = parse_tolerance(tolerance, self.dtype)
+        tolerance = np.array(0, dtype=self.sampling_interval.dtype)[()]
         tie_values = [self.tie_values[0]]
         tie_lengths = [self.tie_lengths[0]]
         for value, length in zip(self.tie_values[1:], self.tie_lengths[1:]):

From b07ab602abd9c6c6b19ecc63f2309285b71eb617 Mon Sep 17 00:00:00 2001
From: Alister Trabattoni <alister.trabattoni@gmail.com>
Date: Fri, 6 Mar 2026 09:33:05 +0100
Subject: [PATCH 2/7] Improve things and fix tolerance bug for sampled.

---
 xdas/coordinates/core.py    | 6 +++++-
 xdas/coordinates/sampled.py | 1 -
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/xdas/coordinates/core.py b/xdas/coordinates/core.py
index 2e81bb86..f77bafe1 100644
--- a/xdas/coordinates/core.py
+++ b/xdas/coordinates/core.py
@@ -592,13 +592,17 @@ def parse(data, dim=None):
 
 def parse_tolerance(tolerance, dtype):
     if np.issubdtype(dtype, np.datetime64):
+        unit, step = np.datetime_data(dtype)
+        dtype = np.dtype(f"timedelta64[{step}{unit}]")
         if tolerance is None:
             tolerance = np.timedelta64(0, "ns")
         elif isinstance(tolerance, (int, float)):
             tolerance = np.timedelta64(round(tolerance * 1e9), "ns")
     else:
         if tolerance is None:
-            tolerance = 0.0
+            tolerance = np.array(0, dtype=dtype)[()]
+        else:
+            tolerance = np.array(tolerance, dtype=dtype)[()]
     return tolerance
 
 
diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py
index a1b47e3a..d1235d70 100644
--- a/xdas/coordinates/sampled.py
+++ b/xdas/coordinates/sampled.py
@@ -391,7 +391,6 @@ def decimate(self, q):
 
     def simplify(self, tolerance=None):
         tolerance = parse_tolerance(tolerance, self.dtype)
-        tolerance = np.array(0, dtype=self.sampling_interval.dtype)[()]
         tie_values = [self.tie_values[0]]
         tie_lengths = [self.tie_lengths[0]]
         for value, length in zip(self.tie_values[1:], self.tie_lengths[1:]):

From 49b8aad7e65e5a9cd0eaff91028c8d58ff8fe6cb Mon Sep 17 00:00:00 2001
From: Alister Trabattoni <alister.trabattoni@gmail.com>
Date: Fri, 6 Mar 2026 09:46:02 +0100
Subject: [PATCH 3/7] Test simplify methods.

---
 tests/coordinates/test_interp.py  | 15 +++++++++++++++
 tests/coordinates/test_sampled.py | 17 +++++++++++++++++
 xdas/coordinates/core.py          |  6 +-----
 3 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/tests/coordinates/test_interp.py b/tests/coordinates/test_interp.py
index f9ef27a1..bdd65e97 100644
--- a/tests/coordinates/test_interp.py
+++ b/tests/coordinates/test_interp.py
@@ -285,6 +285,21 @@ def test_simplify(self):
         coord = InterpCoordinate({"tie_indices": xp, "tie_values": yp})
         assert len(coord.simplify(1.0).tie_indices) == 2
 
+    def test_simplify_datetime(self):
+        t0 = np.datetime64("2000-01-01T00:00:00")
+        xp = np.sort(np.random.choice(10000, 1000, replace=False))
+        xp[0] = 0
+        xp[-1] = 10000
+        yp = (
+            t0
+            + xp.astype("timedelta64[s]")
+            + np.random.randint(-500, 500, size=1000).astype("timedelta64[ms]")
+        )
+        coord = InterpCoordinate({"tie_indices": xp, "tie_values": yp})
+        assert len(coord.simplify(np.timedelta64(1, "s")).tie_indices) == 2
+        assert len(coord.simplify(np.timedelta64(1000, "ms")).tie_indices) == 2
+        assert len(coord.simplify(1.0).tie_indices) == 2
+
     def test_singleton(self):
         coord = InterpCoordinate({"tie_indices": [0], "tie_values": [1.0]})
         assert coord[0].values == 1.0
diff --git a/tests/coordinates/test_sampled.py b/tests/coordinates/test_sampled.py
index 82d083d5..29e63ce6 100644
--- a/tests/coordinates/test_sampled.py
+++ b/tests/coordinates/test_sampled.py
@@ -678,6 +678,23 @@ def test_simplify_with_tolerance(self):
         result = coord.simplify(tolerance=0.1)
         assert np.all(np.abs(result.values - coord.values) <= 0.1)
 
+    def test_simplify_with_tolerance_on_datetime(self):
+        t0 = np.datetime64("2000-01-01T00:00:00")
+        jitter = np.random.rand(100) * 0.2 - 0.1
+        jitter = jitter.astype("timedelta64[ms]")  # convert to timedelta
+        coord = SampledCoordinate(
+            {
+                "tie_values": t0 + 10 * np.arange(100) + jitter,
+                "tie_lengths": 10 * np.ones(100, dtype=int),
+                "sampling_interval": np.timedelta64(1, "s"),
+            }
+        )
+        result = coord.simplify(tolerance=np.timedelta64(200, "ms"))
+        assert len(result.tie_values) == 1
+        # float tolerance should be treated as seconds
+        result = coord.simplify(tolerance=0.2)
+        assert len(result.tie_values) == 1
+
 
 class TestSampledCoordinateGetIndexer:
     def make_coord(self):
diff --git a/xdas/coordinates/core.py b/xdas/coordinates/core.py
index f77bafe1..1f970bb6 100644
--- a/xdas/coordinates/core.py
+++ b/xdas/coordinates/core.py
@@ -592,17 +592,13 @@ def parse(data, dim=None):
 
 def parse_tolerance(tolerance, dtype):
     if np.issubdtype(dtype, np.datetime64):
-        unit, step = np.datetime_data(dtype)
-        dtype = np.dtype(f"timedelta64[{step}{unit}]")
         if tolerance is None:
             tolerance = np.timedelta64(0, "ns")
         elif isinstance(tolerance, (int, float)):
             tolerance = np.timedelta64(round(tolerance * 1e9), "ns")
     else:
         if tolerance is None:
-            tolerance = np.array(0, dtype=dtype)[()]
-        else:
-            tolerance = np.array(tolerance, dtype=dtype)[()]
+            tolerance = 0
     return tolerance
 
 

From d826375d806bf017baaad0590de255897a5007d5 Mon Sep 17 00:00:00 2001
From: Alister Trabattoni <alister.trabattoni@gmail.com>
Date: Fri, 6 Mar 2026 10:03:57 +0100
Subject: [PATCH 4/7] Make split work with float tolerance and test it.

---
 tests/coordinates/test_sampled.py | 24 +++++++++++--
 tests/test_core.py                | 57 ++++++++++++++++++++++---------
 xdas/coordinates/interp.py        |  1 +
 xdas/coordinates/sampled.py       |  1 +
 4 files changed, 64 insertions(+), 19 deletions(-)

diff --git a/tests/coordinates/test_sampled.py b/tests/coordinates/test_sampled.py
index 29e63ce6..9ef0467c 100644
--- a/tests/coordinates/test_sampled.py
+++ b/tests/coordinates/test_sampled.py
@@ -874,7 +874,7 @@ def test_to_netcdf_and_back(self):
 
 
 class TestGetSplitIndices:
-    def test_get_split_indices_no_tolerance(self):
+    def test_no_tolerance(self):
         coord = SampledCoordinate(
             {"tie_values": [0.0, 10.0], "tie_lengths": [3, 2], "sampling_interval": 1.0}
         )
@@ -882,7 +882,7 @@ def test_get_split_indices_no_tolerance(self):
         expected = np.array([3])  # indices where segments end
         assert np.array_equal(div_points, expected)
 
-    def test_get_split_indices_with_tolerance(self):
+    def test_with_tolerance(self):
         coord = SampledCoordinate(
             {
                 "tie_values": [0.0, 3.1, 10.0],
@@ -894,6 +894,26 @@ def test_get_split_indices_with_tolerance(self):
         expected = np.array([5])  # only the second gap exceeds tolerance
         assert np.array_equal(div_points, expected)
 
+    def test_with_tolerance_on_datetime(self):
+        t0 = np.datetime64("2000-01-01T00:00:00")
+        coord = SampledCoordinate(
+            {
+                "tie_values": [
+                    t0,
+                    t0 + np.timedelta64(3, "s") + np.timedelta64(100, "ms"),
+                    t0 + np.timedelta64(10, "s"),
+                ],
+                "tie_lengths": [3, 2, 2],
+                "sampling_interval": np.timedelta64(1, "s"),
+            }
+        )
+        div_points = coord.get_split_indices(tolerance=np.timedelta64(200, "ms"))
+        expected = np.array([5])  # only the second gap exceeds tolerance
+        assert np.array_equal(div_points, expected)
+        # float tolerance should be treated as seconds
+        div_points = coord.get_split_indices(tolerance=0.2)
+        assert np.array_equal(div_points, expected)
+
 
 class TestFromBlock:
     def test_from_block(self):
diff --git a/tests/test_core.py b/tests/test_core.py
index 51c33b2f..ac4237fe 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -211,23 +211,6 @@ def test_asdataarray(self):
         for dim in da.dims:
             assert np.array_equal(out[dim].values, da[dim].values)
 
-    def test_split(self):
-        da = xd.DataArray(
-            np.ones(30),
-            {
-                "time": {
-                    "tie_indices": [0, 9, 10, 19, 20, 29],
-                    "tie_values": [0.0, 9.0, 20.0, 29.0, 40.0, 49.0],
-                },
-            },
-        )
-        assert xd.concatenate(xd.split(da)).equals(da)
-        assert xd.split(da, tolerance=20.0)[0].equals(da)
-
-    def test_chunk(self):
-        da = wavelet_wavefronts()
-        assert xd.concatenate(xd.split(da, 3)).equals(da)
-
     def test_align(self):
         da1 = xd.DataArray(np.arange(2), {"x": [0, 1]})
         da2 = xd.DataArray(np.arange(3), {"y": [2, 3, 4]})
@@ -240,3 +223,43 @@ def test_align(self):
         da3 = xd.DataArray(np.arange(6).reshape(2, 3), {"x": [1, 2], "y": [2, 3, 4]})
         with pytest.raises(ValueError, match="differs from one data array to another"):
             xd.align(da1, da2, da3)
+
+
+class TestSplit:
+    def test_integer(self):
+        da = wavelet_wavefronts()
+        assert xd.concatenate(xd.split(da, 3)).equals(da)
+
+    def test_interp(self):
+        da = xd.DataArray(
+            np.ones(30),
+            {
+                "time": {
+                    "tie_indices": [0, 9, 10, 19, 20, 29],
+                    "tie_values": [0.0, 9.0, 20.0, 29.0, 40.0, 49.0],
+                },
+            },
+        )
+        assert xd.concatenate(xd.split(da)).equals(da)
+        assert xd.split(da, tolerance=20.0)[0].equals(da)
+
+    def test_interp_datetime(self):
+        da = xd.DataArray(
+            np.ones(30),
+            {
+                "time": {
+                    "tie_indices": [0, 9, 10, 19, 20, 29],
+                    "tie_values": [
+                        np.datetime64("2000-01-01T00:00:00"),
+                        np.datetime64("2000-01-01T00:00:09"),
+                        np.datetime64("2000-01-01T00:00:20"),
+                        np.datetime64("2000-01-01T00:00:29"),
+                        np.datetime64("2000-01-01T00:00:40"),
+                        np.datetime64("2000-01-01T00:00:49"),
+                    ],
+                },
+            },
+        )
+        assert xd.concatenate(xd.split(da)).equals(da)
+        assert xd.split(da, tolerance=np.timedelta64(20, "s"))[0].equals(da)
+        assert xd.split(da, tolerance=20.0)[0].equals(da)
diff --git a/xdas/coordinates/interp.py b/xdas/coordinates/interp.py
index 46b8e31d..3361d7c1 100644
--- a/xdas/coordinates/interp.py
+++ b/xdas/coordinates/interp.py
@@ -304,6 +304,7 @@ def get_split_indices(self, tolerance=None):
         (indices,) = np.nonzero(np.diff(self.tie_indices) == 1)
         indices += 1
         if tolerance is not None:
+            tolerance = parse_tolerance(tolerance, self.dtype)
             deltas = self.tie_values[indices + 1] - self.tie_values[indices]
             indices = indices[np.abs(deltas) >= tolerance]
         return np.array(
diff --git a/xdas/coordinates/sampled.py b/xdas/coordinates/sampled.py
index d1235d70..cf40511b 100644
--- a/xdas/coordinates/sampled.py
+++ b/xdas/coordinates/sampled.py
@@ -412,6 +412,7 @@ def simplify(self, tolerance=None):
     def get_split_indices(self, tolerance=None):
         indices = self.tie_indices[1:]
         if tolerance is not None:
+            tolerance = parse_tolerance(tolerance, self.dtype)
             deltas = self.tie_values[1:] - (
                 self.tie_values[:-1] + self.sampling_interval * self.tie_lengths[:-1]
             )

From 363401ca6f7479cd0b40b9fd30054593192b2784 Mon Sep 17 00:00:00 2001
From: Alister Trabattoni <alister.trabattoni@gmail.com>
Date: Fri, 6 Mar 2026 10:06:47 +0100
Subject: [PATCH 5/7] Add relase note.

---
 docs/release-notes.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/release-notes.md b/docs/release-notes.md
index e9f92696..3e85c262 100644
--- a/docs/release-notes.md
+++ b/docs/release-notes.md
@@ -4,6 +4,7 @@
 - Add SampleCoordinate for more SEED-like coordinates (@atrabattoni).
 - Add `create_dirs` to `.to_netcdf` methods to create intermediate directories (@aurelienfalco).
 - Add support for multiple ROI for ASN engine (@martijnende).
+- `tolerance` can now be passed as seconds for datetime64 coordinates (@martijnende, @atrabattoni)
 - Fix numpy 2.4 and obspy 1.4.2 incompatibilities and add `xdas.__version__` (@atrabatto).
 
 ## 0.2.4

From a3121dc9f390d2e9553dab00ae42b58ea5918bcb Mon Sep 17 00:00:00 2001
From: Alister Trabattoni <alister.trabattoni@gmail.com>
Date: Fri, 6 Mar 2026 11:28:03 +0100
Subject: [PATCH 6/7] update docstrings related to tolerance.

---
 xdas/core/routines.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/xdas/core/routines.py b/xdas/core/routines.py
index c3a75dcb..3784cfaa 100644
--- a/xdas/core/routines.py
+++ b/xdas/core/routines.py
@@ -40,7 +40,8 @@ def open_mfdatacollection(
         The dimension along which the data arrays are concatenated. Default to "first".
     tolerance : float of timedelta64, optional
         During concatenation, the tolerance to consider that the end of a file is
-        continuous with beginning of the following one. Default to zero tolerance.
+        continuous with beginning of the following one. For time coordinates, numeric
+        values are considered as seconds. Default to zero tolerance.
     squeeze : bool, optional
         Whether to return a DataArray instead of a DataCollection if the combination
         results in a data collection containing a unique data array.
@@ -118,7 +119,8 @@ def open_mfdatatree(
         The dimension along which the data arrays are concatenated. Default to "first".
     tolerance : float of timedelta64, optional
         During concatenation, the tolerance to consider that the end of a file is
-        continuous with beginning of the following one. Default to zero tolerance.
+        continuous with beginning of the following one. For time coordinates, numeric
+        values are considered as seconds. Default to zero tolerance.
     squeeze : bool, optional
         Whether to return a DataArray instead of a DataCollection if the combination
         results in a data collection containing a unique data array.
@@ -217,7 +219,8 @@ def collect(
         The dimension along which the data arrays are concatenated. Default to "first".
     tolerance : float of timedelta64, optional
         During concatenation, the tolerance to consider that the end of a file is
-        continuous with beginning of the following one. Default to zero tolerance.
+        continuous with beginning of the following one. For time coordinates, numeric
+        values are considered as seconds. Default to zero tolerance.
     squeeze : bool, optional
         Whether to return a DataArray instead of a DataCollection if the combination
         results in a data collection containing a unique data array.
@@ -284,7 +287,8 @@ def open_mfdataarray(
         The dimension along which the data arrays are concatenated. Default to "first".
     tolerance : float of timedelta64, optional
         During concatenation, the tolerance to consider that the end of a file is
-        continuous with beginning of the following one. Default to zero tolerance.
+        continuous with beginning of the following one. For time coordinates, numeric
+        values are considered as seconds. Default to zero tolerance.
     squeeze : bool, optional
         Whether to return a DataArray instead of a DataCollection if the combination
         results in a data collection containing a unique data array.
@@ -769,7 +773,8 @@ def split(da, indices_or_sections="discontinuities", dim="first", tolerance=None
         The dimension along which to split, by default "first"
     tolerance : float or timedelta64, optional
         If `indices_or_sections="discontinuities"` split will only occur on gaps and
-        overlaps that are bigger than `tolerance`. Zero tolerance by default.
+        overlaps that are bigger than `tolerance`. For time coordinates, numeric
+        values are considered as seconds. Zero tolerance by default.
 
     Returns
     -------

From 49936336e564ef032b496d438d203de253858da6 Mon Sep 17 00:00:00 2001
From: Alister Trabattoni <alister.trabattoni@gmail.com>
Date: Fri, 6 Mar 2026 11:32:34 +0100
Subject: [PATCH 7/7] updata missing tolerance docstring.

---
 xdas/core/routines.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/xdas/core/routines.py b/xdas/core/routines.py
index 3784cfaa..587c955e 100644
--- a/xdas/core/routines.py
+++ b/xdas/core/routines.py
@@ -434,8 +434,9 @@ def asdataarray(obj, tolerance=None):
     obj : object
         The objected to convert
     tolerance : float or datetime64, optional
-        For dense coordinates, tolerance error for interpolation representation, by
-        default zero.
+        For dense coordinates, tolerance error for interpolation representation.
+        For time coordinates, numeric values are considered as seconds.
+        Zero by default.
 
     Returns
     -------
@@ -476,7 +477,8 @@ def combine_by_field(
         The dimension along which concatenate. Default to "first".
     tolerance : float of timedelta64, optional
         The tolerance to consider that the end of a file is continuous with beginning of
-        the following, zero by default.
+        the following. For time coordinates, numeric  values are considered as seconds.
+        Zero by default.
     squeeze : bool, optional
         Whether to return a Database instead of a DataCollection if the combinatison
         results in a data collection containing a unique Database.
@@ -539,7 +541,8 @@ def combine_by_coords(
         The dimension along which concatenate. Default to "first".
     tolerance : float of timedelta64, optional
         The tolerance to consider that the end of a file is continuous with beginning of
-        the following, zero by default.
+        the following. For time coordinates, numeric values are considered as seconds.
+        Zero by default.
     squeeze : bool, optional
         Whether to return a Database instead of a DataCollection if the combination
         results in a data collection containing a unique Database.
@@ -677,7 +680,8 @@ def concatenate(objs, dim="first", tolerance=None, virtual=None, verbose=None):
         The dimension along which concatenate.
     tolerance : float of timedelta64, optional
         The tolerance to consider that the end of a file is continuous with beginning of
-        the following, zero by default.
+        the following, For time coordinates, numeric values are considered as seconds.
+        Zero by default.
     virtual : bool, optional
         Whether to create a virtual dataset. It requires that all concatenated
         data arrays are virtual. By default tries to create a virtual dataset if possible.