From 6f060cb20881f59cd70858e8e4ca38021b45c038 Mon Sep 17 00:00:00 2001
From: Mathieu Scheltienne <mathieu.scheltienne@dandelion.science>
Date: Fri, 9 Jan 2026 17:49:24 +0100
Subject: [PATCH 1/3] don't rely on the footer

---
 src/pyxdf/pyxdf.py                      | 78 ++++++++++++++-----------
 test/test_truncate_corrupted_offsets.py | 64 ++++++++++++++++++--
 2 files changed, 101 insertions(+), 41 deletions(-)

diff --git a/src/pyxdf/pyxdf.py b/src/pyxdf/pyxdf.py
index 604f305..92715ad 100644
--- a/src/pyxdf/pyxdf.py
+++ b/src/pyxdf/pyxdf.py
@@ -654,18 +654,23 @@ def _detect_corrupted_clock_offset(
 
 
 def _truncate_corrupted_offsets(temp, streams):
-    """Truncate extra samples and corrupted clock offsets (pylsl#67 bug).
+    """Truncate corrupted clock offsets and associated extra samples.
 
-    The pylsl#67 bug occurs when an LSL outlet is destroyed while an inlet is
-    still connected, causing an extra sample with potentially garbage clock
+    The pylsl#67, liblsl#246 bug occurs when an LSL outlet is destroyed while an inlet
+    is still connected, causing an extra sample with potentially garbage clock
     offset values to be recorded.
 
     This function:
 
-    1. Always truncates samples to match the footer sample_count (the footer
-       is authoritative).
-    2. Only truncates the last clock offset only if it's
-       statistically anomalous (see ``_detect_corrupted_clock_offset``).
+    1. Detects corrupted clock offsets using statistical analysis (see
+       ``_detect_corrupted_clock_offset``).
+    2. If a corrupted clock offset is found, truncates it.
+    3. If extra samples exist AND the clock offset was corrupted, truncates
+       the extra samples (they are likely garbage from the same bug).
+
+    Note: Extra samples alone (without corrupted clock offset) are NOT truncated,
+    as the footer sample_count may be incorrect due to race conditions in
+    liblsl/LabRecorder where final samples arrive after the footer is written.
 
     Args:
         temp : dict of StreamData objects indexed by stream ID.
@@ -678,41 +683,44 @@ def _truncate_corrupted_offsets(temp, streams):
         https://github.com/labstreaminglayer/pylsl/issues/67
     """
     for stream_id, stream in temp.items():
-        footer = streams.get(stream_id, {}).get("footer", {}).get("info", {})
-        sample_count_str = footer.get("sample_count", [None])[0]
-        if sample_count_str is None:
-            continue
-        footer_count = int(sample_count_str)
+        # First check for corrupted clock offset - this is the evidence of pylsl#67.
+        # If clock offset is not corrupted, we have no evidence of the bug and
+        # should not truncate anything (even if sample count exceeds footer).
+        clock_corrupted = False
+        if len(stream.clock_times) >= 3:
+            clock_corrupted = _detect_corrupted_clock_offset(
+                stream.clock_times, stream.clock_values
+            )
 
-        # Only proceed if there's evidence of the pylsl#67 bug (extra sample).
-        # If sample count matches, don't touch clock offsets - large jumps from
-        # disconnect/reconnect are legitimate.
-        if len(stream.time_stamps) <= footer_count:
-            continue
+        if not clock_corrupted:
+            continue  # No evidence of corruption → don't touch anything
 
+        # Corrupted clock offset detected → truncate it
         logger.warning(
-            "Stream %s: sample count (%d) exceeds footer sample_count (%d), "
-            "truncating extra samples.",
+            "Stream %s: last clock offset is statistically anomalous, "
+            "truncating (see pylsl#67, liblsl#246).",
             stream_id,
-            len(stream.time_stamps),
-            footer_count,
         )
-        stream.time_stamps = stream.time_stamps[:footer_count]
-        stream.time_series = stream.time_series[:footer_count]
+        stream.clock_times = stream.clock_times[:-1]
+        stream.clock_values = stream.clock_values[:-1]
 
-        # Truncate clock offset only if statistically anomalous
-        if len(stream.clock_times) >= 3:
-            is_corrupted = _detect_corrupted_clock_offset(
-                stream.clock_times, stream.clock_values
+        # Now check if there are extra samples to truncate
+        footer = streams.get(stream_id, {}).get("footer", {}).get("info", {})
+        sample_count_str = footer.get("sample_count", [None])[0]
+        if sample_count_str is None:
+            continue
+        footer_count = int(sample_count_str)
+
+        if len(stream.time_stamps) > footer_count:
+            logger.warning(
+                "Stream %s: sample count (%d) exceeds footer sample_count (%d), "
+                "truncating extra samples.",
+                stream_id,
+                len(stream.time_stamps),
+                footer_count,
             )
-            if is_corrupted:
-                logger.warning(
-                    "Stream %s: last clock offset is statistically anomalous, "
-                    "truncating (see pylsl#67).",
-                    stream_id,
-                )
-                stream.clock_times = stream.clock_times[:-1]
-                stream.clock_values = stream.clock_values[:-1]
+            stream.time_stamps = stream.time_stamps[:footer_count]
+            stream.time_series = stream.time_series[:footer_count]
 
     return temp
 
diff --git a/test/test_truncate_corrupted_offsets.py b/test/test_truncate_corrupted_offsets.py
index 6d57961..c0a1745 100644
--- a/test/test_truncate_corrupted_offsets.py
+++ b/test/test_truncate_corrupted_offsets.py
@@ -48,8 +48,14 @@ def test_no_truncation_when_sample_count_matches():
     assert temp[1].clock_values == orig_clock_values
 
 
-def test_sample_truncation_but_clock_offsets_preserved():
-    """Extra sample should be truncated, but normal clock offsets preserved."""
+def test_no_truncation_when_clock_offsets_normal():
+    """Extra samples should NOT be truncated when clock offsets are normal.
+
+    This tests the case where footer sample_count is off-by-1 due to race
+    conditions (e.g., final sample arrives after footer is written), but the
+    data is valid. Without evidence of corruption (corrupted clock offset),
+    the extra samples should be preserved.
+    """
     n_samples = 100
     n_offsets = 20
     clock_tdiff = 5
@@ -69,21 +75,67 @@ def test_sample_truncation_but_clock_offsets_preserved():
     # Footer says n_samples, but we have n_samples + 1
     streams = {1: {"footer": {"info": {"sample_count": [str(n_samples)]}}}}
 
-    # Store original clock offset values
+    # Store original values
+    orig_timestamps = temp[1].time_stamps.copy()
     orig_clock_times = temp[1].clock_times.copy()
     orig_clock_values = temp[1].clock_values.copy()
 
     # Apply truncation
     temp = _truncate_corrupted_offsets(temp, streams)
 
-    # Verify samples were truncated to footer count
-    assert len(temp[1].time_stamps) == n_samples
+    # Verify samples were NOT truncated (clock offsets are normal, no evidence
+    # of pylsl#67, liblsl#246 bug, so we keep the extra samples)
+    np.testing.assert_array_equal(temp[1].time_stamps, orig_timestamps)
+    assert len(temp[1].time_stamps) == n_samples + 1
 
-    # Verify clock offsets were NOT truncated (they are normal)
+    # Verify clock offsets were NOT truncated
     assert temp[1].clock_times == orig_clock_times
     assert temp[1].clock_values == orig_clock_values
 
 
+def test_corrupted_clock_offset_without_extra_samples():
+    """Corrupted clock offset should be truncated even without extra samples."""
+    n_samples = 100
+    n_offsets = 20
+    clock_tdiff = 5
+    clock_offset_value = -0.001  # Normal offset: -1ms
+
+    # Create stream with matching sample count but corrupted last clock offset
+    time_stamps = np.linspace(0, 100, n_samples)
+    clock_times = [i * clock_tdiff for i in range(n_offsets)]
+    clock_values = [clock_offset_value] * n_offsets
+
+    # Corrupt the last clock offset (mimics the real bug pattern)
+    clock_times.append(clock_times[-1] + 800000)  # Huge time jump
+    clock_values.append(-750000)  # Huge corrupted value
+
+    temp = {
+        1: MockStreamData(
+            time_stamps=time_stamps,
+            clock_times=clock_times,
+            clock_values=clock_values,
+        )
+    }
+    # Footer matches sample count (no extra samples)
+    streams = {1: {"footer": {"info": {"sample_count": [str(n_samples)]}}}}
+
+    # Store original timestamp values
+    orig_timestamps = temp[1].time_stamps.copy()
+
+    # Apply truncation
+    temp = _truncate_corrupted_offsets(temp, streams)
+
+    # Verify samples were NOT truncated (no extra samples)
+    np.testing.assert_array_equal(temp[1].time_stamps, orig_timestamps)
+
+    # Verify corrupted clock offset WAS removed
+    assert len(temp[1].clock_times) == n_offsets
+    assert len(temp[1].clock_values) == n_offsets
+
+    # Verify the remaining clock values are the original normal ones
+    assert all(v == clock_offset_value for v in temp[1].clock_values)
+
+
 def test_truncation_of_samples_and_corrupted_clock_offset():
     """Extra sample and corrupted clock offset should both be truncated."""
     n_samples = 100

From 4e366da92f909b103436e834c86c813490961d76 Mon Sep 17 00:00:00 2001
From: Mathieu Scheltienne <mathieu.scheltienne@dandelion.science>
Date: Fri, 9 Jan 2026 18:05:56 +0100
Subject: [PATCH 2/3] let's be conservative and require both condition to
 truncate

---
 src/pyxdf/pyxdf.py                      | 63 ++++++++++++++-----------
 test/test_truncate_corrupted_offsets.py | 32 +++++++------
 2 files changed, 54 insertions(+), 41 deletions(-)

diff --git a/src/pyxdf/pyxdf.py b/src/pyxdf/pyxdf.py
index 92715ad..fc82733 100644
--- a/src/pyxdf/pyxdf.py
+++ b/src/pyxdf/pyxdf.py
@@ -660,17 +660,20 @@ def _truncate_corrupted_offsets(temp, streams):
     is still connected, causing an extra sample with potentially garbage clock
     offset values to be recorded.
 
-    This function:
+    This function requires BOTH conditions to confirm the pylsl#67, liblsl#246 bug:
 
-    1. Detects corrupted clock offsets using statistical analysis (see
+    1. Extra samples exist (sample count exceeds footer sample_count).
+    2. The last clock offset is statistically anomalous (see
        ``_detect_corrupted_clock_offset``).
-    2. If a corrupted clock offset is found, truncates it.
-    3. If extra samples exist AND the clock offset was corrupted, truncates
-       the extra samples (they are likely garbage from the same bug).
+
+    If both conditions are met, both the extra samples and the corrupted clock
+    offset are truncated.
 
     Note: Extra samples alone (without corrupted clock offset) are NOT truncated,
     as the footer sample_count may be incorrect due to race conditions in
     liblsl/LabRecorder where final samples arrive after the footer is written.
+    Similarly, anomalous clock offsets alone (without extra samples) are NOT
+    truncated, as they may be due to legitimate clock resets.
 
     Args:
         temp : dict of StreamData objects indexed by stream ID.
@@ -681,11 +684,25 @@ def _truncate_corrupted_offsets(temp, streams):
 
     See Also:
         https://github.com/labstreaminglayer/pylsl/issues/67
+        https://github.com/sccn/liblsl/issues/246
     """
     for stream_id, stream in temp.items():
-        # First check for corrupted clock offset - this is the evidence of pylsl#67.
-        # If clock offset is not corrupted, we have no evidence of the bug and
-        # should not truncate anything (even if sample count exceeds footer).
+        # First check if there are extra samples - this is the primary evidence
+        # of a potential pylsl#67, liblsl#246 bug.
+        footer = streams.get(stream_id, {}).get("footer", {}).get("info", {})
+        sample_count_str = footer.get("sample_count", [None])[0]
+        if sample_count_str is None:
+            continue
+        footer_count = int(sample_count_str)
+
+        if len(stream.time_stamps) <= footer_count:
+            continue  # No extra samples → no evidence of pylsl#67, liblsl#246
+
+        # Extra samples exist - now check if clock offset is also corrupted.
+        # We require BOTH conditions because:
+        # - Extra samples alone could be valid "late" data (footer off-by-1)
+        # - Anomalous clock offsets alone could be legitimate clock resets
+        # - Both together strongly indicate the pylsl#67, liblsl#246 bug
         clock_corrupted = False
         if len(stream.clock_times) >= 3:
             clock_corrupted = _detect_corrupted_clock_offset(
@@ -693,9 +710,9 @@ def _truncate_corrupted_offsets(temp, streams):
             )
 
         if not clock_corrupted:
-            continue  # No evidence of corruption → don't touch anything
+            continue  # Extra samples but normal clock offset → keep everything
 
-        # Corrupted clock offset detected → truncate it
+        # Both conditions met → truncate both
         logger.warning(
             "Stream %s: last clock offset is statistically anomalous, "
             "truncating (see pylsl#67, liblsl#246).",
@@ -704,23 +721,15 @@ def _truncate_corrupted_offsets(temp, streams):
         stream.clock_times = stream.clock_times[:-1]
         stream.clock_values = stream.clock_values[:-1]
 
-        # Now check if there are extra samples to truncate
-        footer = streams.get(stream_id, {}).get("footer", {}).get("info", {})
-        sample_count_str = footer.get("sample_count", [None])[0]
-        if sample_count_str is None:
-            continue
-        footer_count = int(sample_count_str)
-
-        if len(stream.time_stamps) > footer_count:
-            logger.warning(
-                "Stream %s: sample count (%d) exceeds footer sample_count (%d), "
-                "truncating extra samples.",
-                stream_id,
-                len(stream.time_stamps),
-                footer_count,
-            )
-            stream.time_stamps = stream.time_stamps[:footer_count]
-            stream.time_series = stream.time_series[:footer_count]
+        logger.warning(
+            "Stream %s: sample count (%d) exceeds footer sample_count (%d), "
+            "truncating extra samples.",
+            stream_id,
+            len(stream.time_stamps),
+            footer_count,
+        )
+        stream.time_stamps = stream.time_stamps[:footer_count]
+        stream.time_series = stream.time_series[:footer_count]
 
     return temp
 
diff --git a/test/test_truncate_corrupted_offsets.py b/test/test_truncate_corrupted_offsets.py
index c0a1745..574ddbd 100644
--- a/test/test_truncate_corrupted_offsets.py
+++ b/test/test_truncate_corrupted_offsets.py
@@ -93,21 +93,28 @@ def test_no_truncation_when_clock_offsets_normal():
     assert temp[1].clock_values == orig_clock_values
 
 
-def test_corrupted_clock_offset_without_extra_samples():
-    """Corrupted clock offset should be truncated even without extra samples."""
+def test_no_truncation_when_no_extra_samples():
+    """Anomalous clock offset should NOT be truncated without extra samples.
+
+    This tests the case where clock offsets look anomalous (e.g., due to
+    legitimate clock resets) but there are no extra samples. Without extra
+    samples, we have no evidence of the pylsl#67 bug, so we should not
+    truncate the clock offset.
+    """
     n_samples = 100
     n_offsets = 20
     clock_tdiff = 5
     clock_offset_value = -0.001  # Normal offset: -1ms
 
-    # Create stream with matching sample count but corrupted last clock offset
+    # Create stream with matching sample count but "anomalous" last clock offset
+    # This could be a legitimate clock reset, not pylsl#67 - liblsl#246 corruption
     time_stamps = np.linspace(0, 100, n_samples)
     clock_times = [i * clock_tdiff for i in range(n_offsets)]
     clock_values = [clock_offset_value] * n_offsets
 
-    # Corrupt the last clock offset (mimics the real bug pattern)
+    # Add an "anomalous" clock offset (could be legitimate clock reset)
     clock_times.append(clock_times[-1] + 800000)  # Huge time jump
-    clock_values.append(-750000)  # Huge corrupted value
+    clock_values.append(-750000)  # Huge value change
 
     temp = {
         1: MockStreamData(
@@ -119,21 +126,18 @@ def test_corrupted_clock_offset_without_extra_samples():
     # Footer matches sample count (no extra samples)
     streams = {1: {"footer": {"info": {"sample_count": [str(n_samples)]}}}}
 
-    # Store original timestamp values
+    # Store original values
     orig_timestamps = temp[1].time_stamps.copy()
+    orig_clock_times = temp[1].clock_times.copy()
+    orig_clock_values = temp[1].clock_values.copy()
 
     # Apply truncation
     temp = _truncate_corrupted_offsets(temp, streams)
 
-    # Verify samples were NOT truncated (no extra samples)
+    # Verify NOTHING was truncated (no extra samples = no evidence of pylsl#67)
     np.testing.assert_array_equal(temp[1].time_stamps, orig_timestamps)
-
-    # Verify corrupted clock offset WAS removed
-    assert len(temp[1].clock_times) == n_offsets
-    assert len(temp[1].clock_values) == n_offsets
-
-    # Verify the remaining clock values are the original normal ones
-    assert all(v == clock_offset_value for v in temp[1].clock_values)
+    assert temp[1].clock_times == orig_clock_times
+    assert temp[1].clock_values == orig_clock_values
 
 
 def test_truncation_of_samples_and_corrupted_clock_offset():

From 9922ab74ccd397d1b563d176eeca554b4dcf33da Mon Sep 17 00:00:00 2001
From: Mathieu Scheltienne <mathieu.scheltienne@dandelion.science>
Date: Fri, 9 Jan 2026 18:12:17 +0100
Subject: [PATCH 3/3] add changelog entry

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a331528..838d194 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 ## [UNRELEASED] - YYYY-MM-DD
 
+### Fixed
+- Improve detection and truncation of anomalous samples causing clock synchronization corruption ([#158] by [Mathieu Scheltienne](https://github.com/mscheltienne))
+
 ## [1.17.2] - 2026-01-07
 ### Added
 - Add new `case_sensitive` parameter to `match_streaminfos`, defaulting to `True` to maintain previous behavior; when `False`, stream properties are matched more leniently ([#134](https://github.com/xdf-modules/pyxdf/pull/134) by [Stefan Appelhoff](https://github.com/sappelhoff))