xdf-modules · cbrnr · Jan 12, 2026 · Jan 9, 2026 · Jan 9, 2026 · Jan 9, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,8 @@
 ## [UNRELEASED] - YYYY-MM-DD
 
+### Fixed
+- Improve detection and truncation of anomalous samples causing clock synchronization corruption ([#158] by [Mathieu Scheltienne](https://github.com/mscheltienne))
+
 ## [1.17.2] - 2026-01-07
 ### Added
 - Add new `case_sensitive` parameter to `match_streaminfos`, defaulting to `True` to maintain previous behavior; when `False`, stream properties are matched more leniently ([#134](https://github.com/xdf-modules/pyxdf/pull/134) by [Stefan Appelhoff](https://github.com/sappelhoff))

diff --git a/src/pyxdf/pyxdf.py b/src/pyxdf/pyxdf.py
@@ -654,18 +654,26 @@ def _detect_corrupted_clock_offset(
 
 
 def _truncate_corrupted_offsets(temp, streams):
-    """Truncate extra samples and corrupted clock offsets (pylsl#67 bug).
+    """Truncate corrupted clock offsets and associated extra samples.
 
-    The pylsl#67 bug occurs when an LSL outlet is destroyed while an inlet is
-    still connected, causing an extra sample with potentially garbage clock
+    The pylsl#67, liblsl#246 bug occurs when an LSL outlet is destroyed while an inlet
+    is still connected, causing an extra sample with potentially garbage clock
     offset values to be recorded.
 
-    This function:
+    This function requires BOTH conditions to confirm the pylsl#67, liblsl#246 bug:
 
-    1. Always truncates samples to match the footer sample_count (the footer
-       is authoritative).
-    2. Only truncates the last clock offset only if it's
-       statistically anomalous (see ``_detect_corrupted_clock_offset``).
+    1. Extra samples exist (sample count exceeds footer sample_count).
+    2. The last clock offset is statistically anomalous (see
+       ``_detect_corrupted_clock_offset``).
+
+    If both conditions are met, both the extra samples and the corrupted clock
+    offset are truncated.
+
+    Note: Extra samples alone (without corrupted clock offset) are NOT truncated,
+    as the footer sample_count may be incorrect due to race conditions in
+    liblsl/LabRecorder where final samples arrive after the footer is written.
+    Similarly, anomalous clock offsets alone (without extra samples) are NOT
+    truncated, as they may be due to legitimate clock resets.
 
     Args:
         temp : dict of StreamData objects indexed by stream ID.
@@ -676,19 +684,42 @@ def _truncate_corrupted_offsets(temp, streams):
 
     See Also:
         https://github.com/labstreaminglayer/pylsl/issues/67
+        https://github.com/sccn/liblsl/issues/246
     """
     for stream_id, stream in temp.items():
+        # First check if there are extra samples - this is the primary evidence
+        # of a potential pylsl#67, liblsl#246 bug.
         footer = streams.get(stream_id, {}).get("footer", {}).get("info", {})
         sample_count_str = footer.get("sample_count", [None])[0]
         if sample_count_str is None:
             continue
         footer_count = int(sample_count_str)
 
-        # Only proceed if there's evidence of the pylsl#67 bug (extra sample).
-        # If sample count matches, don't touch clock offsets - large jumps from
-        # disconnect/reconnect are legitimate.
         if len(stream.time_stamps) <= footer_count:
-            continue
+            continue  # No extra samples → no evidence of pylsl#67, liblsl#246
+
+        # Extra samples exist - now check if clock offset is also corrupted.
+        # We require BOTH conditions because:
+        # - Extra samples alone could be valid "late" data (footer off-by-1)
+        # - Anomalous clock offsets alone could be legitimate clock resets
+        # - Both together strongly indicate the pylsl#67, liblsl#246 bug
+        clock_corrupted = False
+        if len(stream.clock_times) >= 3:
+            clock_corrupted = _detect_corrupted_clock_offset(
+                stream.clock_times, stream.clock_values
+            )
+
+        if not clock_corrupted:
+            continue  # Extra samples but normal clock offset → keep everything
+
+        # Both conditions met → truncate both
+        logger.warning(
+            "Stream %s: last clock offset is statistically anomalous, "
+            "truncating (see pylsl#67, liblsl#246).",
+            stream_id,
+        )
+        stream.clock_times = stream.clock_times[:-1]
+        stream.clock_values = stream.clock_values[:-1]
 
         logger.warning(
             "Stream %s: sample count (%d) exceeds footer sample_count (%d), "
@@ -700,20 +731,6 @@ def _truncate_corrupted_offsets(temp, streams):
         stream.time_stamps = stream.time_stamps[:footer_count]
         stream.time_series = stream.time_series[:footer_count]
 
-        # Truncate clock offset only if statistically anomalous
-        if len(stream.clock_times) >= 3:
-            is_corrupted = _detect_corrupted_clock_offset(
-                stream.clock_times, stream.clock_values
-            )
-            if is_corrupted:
-                logger.warning(
-                    "Stream %s: last clock offset is statistically anomalous, "
-                    "truncating (see pylsl#67).",
-                    stream_id,
-                )
-                stream.clock_times = stream.clock_times[:-1]
-                stream.clock_values = stream.clock_values[:-1]
-
     return temp
 
 

diff --git a/test/test_truncate_corrupted_offsets.py b/test/test_truncate_corrupted_offsets.py
@@ -48,8 +48,14 @@ def test_no_truncation_when_sample_count_matches():
     assert temp[1].clock_values == orig_clock_values
 
 
-def test_sample_truncation_but_clock_offsets_preserved():
-    """Extra sample should be truncated, but normal clock offsets preserved."""
+def test_no_truncation_when_clock_offsets_normal():
+    """Extra samples should NOT be truncated when clock offsets are normal.
+
+    This tests the case where footer sample_count is off-by-1 due to race
+    conditions (e.g., final sample arrives after footer is written), but the
+    data is valid. Without evidence of corruption (corrupted clock offset),
+    the extra samples should be preserved.
+    """
     n_samples = 100
     n_offsets = 20
     clock_tdiff = 5
@@ -69,17 +75,67 @@ def test_sample_truncation_but_clock_offsets_preserved():
     # Footer says n_samples, but we have n_samples + 1
     streams = {1: {"footer": {"info": {"sample_count": [str(n_samples)]}}}}
 
-    # Store original clock offset values
+    # Store original values
+    orig_timestamps = temp[1].time_stamps.copy()
     orig_clock_times = temp[1].clock_times.copy()
     orig_clock_values = temp[1].clock_values.copy()
 
     # Apply truncation
     temp = _truncate_corrupted_offsets(temp, streams)
 
-    # Verify samples were truncated to footer count
-    assert len(temp[1].time_stamps) == n_samples
+    # Verify samples were NOT truncated (clock offsets are normal, no evidence
+    # of pylsl#67, liblsl#246 bug, so we keep the extra samples)
+    np.testing.assert_array_equal(temp[1].time_stamps, orig_timestamps)
+    assert len(temp[1].time_stamps) == n_samples + 1
+
+    # Verify clock offsets were NOT truncated
+    assert temp[1].clock_times == orig_clock_times
+    assert temp[1].clock_values == orig_clock_values
+
+
+def test_no_truncation_when_no_extra_samples():
+    """Anomalous clock offset should NOT be truncated without extra samples.
+
+    This tests the case where clock offsets look anomalous (e.g., due to
+    legitimate clock resets) but there are no extra samples. Without extra
+    samples, we have no evidence of the pylsl#67 bug, so we should not
+    truncate the clock offset.
+    """
+    n_samples = 100
+    n_offsets = 20
+    clock_tdiff = 5
+    clock_offset_value = -0.001  # Normal offset: -1ms
+
+    # Create stream with matching sample count but "anomalous" last clock offset
+    # This could be a legitimate clock reset, not pylsl#67 - liblsl#246 corruption
+    time_stamps = np.linspace(0, 100, n_samples)
+    clock_times = [i * clock_tdiff for i in range(n_offsets)]
+    clock_values = [clock_offset_value] * n_offsets
+
+    # Add an "anomalous" clock offset (could be legitimate clock reset)
+    clock_times.append(clock_times[-1] + 800000)  # Huge time jump
+    clock_values.append(-750000)  # Huge value change
+
+    temp = {
+        1: MockStreamData(
+            time_stamps=time_stamps,
+            clock_times=clock_times,
+            clock_values=clock_values,
+        )
+    }
+    # Footer matches sample count (no extra samples)
+    streams = {1: {"footer": {"info": {"sample_count": [str(n_samples)]}}}}
+
+    # Store original values
+    orig_timestamps = temp[1].time_stamps.copy()
+    orig_clock_times = temp[1].clock_times.copy()
+    orig_clock_values = temp[1].clock_values.copy()
 
-    # Verify clock offsets were NOT truncated (they are normal)
+    # Apply truncation
+    temp = _truncate_corrupted_offsets(temp, streams)
+
+    # Verify NOTHING was truncated (no extra samples = no evidence of pylsl#67)
+    np.testing.assert_array_equal(temp[1].time_stamps, orig_timestamps)
     assert temp[1].clock_times == orig_clock_times
     assert temp[1].clock_values == orig_clock_values