From 6f060cb20881f59cd70858e8e4ca38021b45c038 Mon Sep 17 00:00:00 2001 From: Mathieu Scheltienne Date: Fri, 9 Jan 2026 17:49:24 +0100 Subject: [PATCH 1/3] don't rely on the footer --- src/pyxdf/pyxdf.py | 78 ++++++++++++++----------- test/test_truncate_corrupted_offsets.py | 64 ++++++++++++++++++-- 2 files changed, 101 insertions(+), 41 deletions(-) diff --git a/src/pyxdf/pyxdf.py b/src/pyxdf/pyxdf.py index 604f305..92715ad 100644 --- a/src/pyxdf/pyxdf.py +++ b/src/pyxdf/pyxdf.py @@ -654,18 +654,23 @@ def _detect_corrupted_clock_offset( def _truncate_corrupted_offsets(temp, streams): - """Truncate extra samples and corrupted clock offsets (pylsl#67 bug). + """Truncate corrupted clock offsets and associated extra samples. - The pylsl#67 bug occurs when an LSL outlet is destroyed while an inlet is - still connected, causing an extra sample with potentially garbage clock + The pylsl#67, liblsl#246 bug occurs when an LSL outlet is destroyed while an inlet + is still connected, causing an extra sample with potentially garbage clock offset values to be recorded. This function: - 1. Always truncates samples to match the footer sample_count (the footer - is authoritative). - 2. Only truncates the last clock offset only if it's - statistically anomalous (see ``_detect_corrupted_clock_offset``). + 1. Detects corrupted clock offsets using statistical analysis (see + ``_detect_corrupted_clock_offset``). + 2. If a corrupted clock offset is found, truncates it. + 3. If extra samples exist AND the clock offset was corrupted, truncates + the extra samples (they are likely garbage from the same bug). + + Note: Extra samples alone (without corrupted clock offset) are NOT truncated, + as the footer sample_count may be incorrect due to race conditions in + liblsl/LabRecorder where final samples arrive after the footer is written. Args: temp : dict of StreamData objects indexed by stream ID. @@ -678,41 +683,44 @@ def _truncate_corrupted_offsets(temp, streams): https://github.com/labstreaminglayer/pylsl/issues/67 """ for stream_id, stream in temp.items(): - footer = streams.get(stream_id, {}).get("footer", {}).get("info", {}) - sample_count_str = footer.get("sample_count", [None])[0] - if sample_count_str is None: - continue - footer_count = int(sample_count_str) + # First check for corrupted clock offset - this is the evidence of pylsl#67. + # If clock offset is not corrupted, we have no evidence of the bug and + # should not truncate anything (even if sample count exceeds footer). + clock_corrupted = False + if len(stream.clock_times) >= 3: + clock_corrupted = _detect_corrupted_clock_offset( + stream.clock_times, stream.clock_values + ) - # Only proceed if there's evidence of the pylsl#67 bug (extra sample). - # If sample count matches, don't touch clock offsets - large jumps from - # disconnect/reconnect are legitimate. - if len(stream.time_stamps) <= footer_count: - continue + if not clock_corrupted: + continue # No evidence of corruption → don't touch anything + # Corrupted clock offset detected → truncate it logger.warning( - "Stream %s: sample count (%d) exceeds footer sample_count (%d), " - "truncating extra samples.", + "Stream %s: last clock offset is statistically anomalous, " + "truncating (see pylsl#67, liblsl#246).", stream_id, - len(stream.time_stamps), - footer_count, ) - stream.time_stamps = stream.time_stamps[:footer_count] - stream.time_series = stream.time_series[:footer_count] + stream.clock_times = stream.clock_times[:-1] + stream.clock_values = stream.clock_values[:-1] - # Truncate clock offset only if statistically anomalous - if len(stream.clock_times) >= 3: - is_corrupted = _detect_corrupted_clock_offset( - stream.clock_times, stream.clock_values + # Now check if there are extra samples to truncate + footer = streams.get(stream_id, {}).get("footer", {}).get("info", {}) + sample_count_str = footer.get("sample_count", [None])[0] + if sample_count_str is None: + continue + footer_count = int(sample_count_str) + + if len(stream.time_stamps) > footer_count: + logger.warning( + "Stream %s: sample count (%d) exceeds footer sample_count (%d), " + "truncating extra samples.", + stream_id, + len(stream.time_stamps), + footer_count, ) - if is_corrupted: - logger.warning( - "Stream %s: last clock offset is statistically anomalous, " - "truncating (see pylsl#67).", - stream_id, - ) - stream.clock_times = stream.clock_times[:-1] - stream.clock_values = stream.clock_values[:-1] + stream.time_stamps = stream.time_stamps[:footer_count] + stream.time_series = stream.time_series[:footer_count] return temp diff --git a/test/test_truncate_corrupted_offsets.py b/test/test_truncate_corrupted_offsets.py index 6d57961..c0a1745 100644 --- a/test/test_truncate_corrupted_offsets.py +++ b/test/test_truncate_corrupted_offsets.py @@ -48,8 +48,14 @@ def test_no_truncation_when_sample_count_matches(): assert temp[1].clock_values == orig_clock_values -def test_sample_truncation_but_clock_offsets_preserved(): - """Extra sample should be truncated, but normal clock offsets preserved.""" +def test_no_truncation_when_clock_offsets_normal(): + """Extra samples should NOT be truncated when clock offsets are normal. + + This tests the case where footer sample_count is off-by-1 due to race + conditions (e.g., final sample arrives after footer is written), but the + data is valid. Without evidence of corruption (corrupted clock offset), + the extra samples should be preserved. + """ n_samples = 100 n_offsets = 20 clock_tdiff = 5 @@ -69,21 +75,67 @@ def test_sample_truncation_but_clock_offsets_preserved(): # Footer says n_samples, but we have n_samples + 1 streams = {1: {"footer": {"info": {"sample_count": [str(n_samples)]}}}} - # Store original clock offset values + # Store original values + orig_timestamps = temp[1].time_stamps.copy() orig_clock_times = temp[1].clock_times.copy() orig_clock_values = temp[1].clock_values.copy() # Apply truncation temp = _truncate_corrupted_offsets(temp, streams) - # Verify samples were truncated to footer count - assert len(temp[1].time_stamps) == n_samples + # Verify samples were NOT truncated (clock offsets are normal, no evidence + # of pylsl#67, liblsl#246 bug, so we keep the extra samples) + np.testing.assert_array_equal(temp[1].time_stamps, orig_timestamps) + assert len(temp[1].time_stamps) == n_samples + 1 - # Verify clock offsets were NOT truncated (they are normal) + # Verify clock offsets were NOT truncated assert temp[1].clock_times == orig_clock_times assert temp[1].clock_values == orig_clock_values +def test_corrupted_clock_offset_without_extra_samples(): + """Corrupted clock offset should be truncated even without extra samples.""" + n_samples = 100 + n_offsets = 20 + clock_tdiff = 5 + clock_offset_value = -0.001 # Normal offset: -1ms + + # Create stream with matching sample count but corrupted last clock offset + time_stamps = np.linspace(0, 100, n_samples) + clock_times = [i * clock_tdiff for i in range(n_offsets)] + clock_values = [clock_offset_value] * n_offsets + + # Corrupt the last clock offset (mimics the real bug pattern) + clock_times.append(clock_times[-1] + 800000) # Huge time jump + clock_values.append(-750000) # Huge corrupted value + + temp = { + 1: MockStreamData( + time_stamps=time_stamps, + clock_times=clock_times, + clock_values=clock_values, + ) + } + # Footer matches sample count (no extra samples) + streams = {1: {"footer": {"info": {"sample_count": [str(n_samples)]}}}} + + # Store original timestamp values + orig_timestamps = temp[1].time_stamps.copy() + + # Apply truncation + temp = _truncate_corrupted_offsets(temp, streams) + + # Verify samples were NOT truncated (no extra samples) + np.testing.assert_array_equal(temp[1].time_stamps, orig_timestamps) + + # Verify corrupted clock offset WAS removed + assert len(temp[1].clock_times) == n_offsets + assert len(temp[1].clock_values) == n_offsets + + # Verify the remaining clock values are the original normal ones + assert all(v == clock_offset_value for v in temp[1].clock_values) + + def test_truncation_of_samples_and_corrupted_clock_offset(): """Extra sample and corrupted clock offset should both be truncated.""" n_samples = 100 From 4e366da92f909b103436e834c86c813490961d76 Mon Sep 17 00:00:00 2001 From: Mathieu Scheltienne Date: Fri, 9 Jan 2026 18:05:56 +0100 Subject: [PATCH 2/3] let's be conservative and require both condition to truncate --- src/pyxdf/pyxdf.py | 63 ++++++++++++++----------- test/test_truncate_corrupted_offsets.py | 32 +++++++------ 2 files changed, 54 insertions(+), 41 deletions(-) diff --git a/src/pyxdf/pyxdf.py b/src/pyxdf/pyxdf.py index 92715ad..fc82733 100644 --- a/src/pyxdf/pyxdf.py +++ b/src/pyxdf/pyxdf.py @@ -660,17 +660,20 @@ def _truncate_corrupted_offsets(temp, streams): is still connected, causing an extra sample with potentially garbage clock offset values to be recorded. - This function: + This function requires BOTH conditions to confirm the pylsl#67, liblsl#246 bug: - 1. Detects corrupted clock offsets using statistical analysis (see + 1. Extra samples exist (sample count exceeds footer sample_count). + 2. The last clock offset is statistically anomalous (see ``_detect_corrupted_clock_offset``). - 2. If a corrupted clock offset is found, truncates it. - 3. If extra samples exist AND the clock offset was corrupted, truncates - the extra samples (they are likely garbage from the same bug). + + If both conditions are met, both the extra samples and the corrupted clock + offset are truncated. Note: Extra samples alone (without corrupted clock offset) are NOT truncated, as the footer sample_count may be incorrect due to race conditions in liblsl/LabRecorder where final samples arrive after the footer is written. + Similarly, anomalous clock offsets alone (without extra samples) are NOT + truncated, as they may be due to legitimate clock resets. Args: temp : dict of StreamData objects indexed by stream ID. @@ -681,11 +684,25 @@ def _truncate_corrupted_offsets(temp, streams): See Also: https://github.com/labstreaminglayer/pylsl/issues/67 + https://github.com/sccn/liblsl/issues/246 """ for stream_id, stream in temp.items(): - # First check for corrupted clock offset - this is the evidence of pylsl#67. - # If clock offset is not corrupted, we have no evidence of the bug and - # should not truncate anything (even if sample count exceeds footer). + # First check if there are extra samples - this is the primary evidence + # of a potential pylsl#67, liblsl#246 bug. + footer = streams.get(stream_id, {}).get("footer", {}).get("info", {}) + sample_count_str = footer.get("sample_count", [None])[0] + if sample_count_str is None: + continue + footer_count = int(sample_count_str) + + if len(stream.time_stamps) <= footer_count: + continue # No extra samples → no evidence of pylsl#67, liblsl#246 + + # Extra samples exist - now check if clock offset is also corrupted. + # We require BOTH conditions because: + # - Extra samples alone could be valid "late" data (footer off-by-1) + # - Anomalous clock offsets alone could be legitimate clock resets + # - Both together strongly indicate the pylsl#67, liblsl#246 bug clock_corrupted = False if len(stream.clock_times) >= 3: clock_corrupted = _detect_corrupted_clock_offset( @@ -693,9 +710,9 @@ def _truncate_corrupted_offsets(temp, streams): ) if not clock_corrupted: - continue # No evidence of corruption → don't touch anything + continue # Extra samples but normal clock offset → keep everything - # Corrupted clock offset detected → truncate it + # Both conditions met → truncate both logger.warning( "Stream %s: last clock offset is statistically anomalous, " "truncating (see pylsl#67, liblsl#246).", @@ -704,23 +721,15 @@ def _truncate_corrupted_offsets(temp, streams): stream.clock_times = stream.clock_times[:-1] stream.clock_values = stream.clock_values[:-1] - # Now check if there are extra samples to truncate - footer = streams.get(stream_id, {}).get("footer", {}).get("info", {}) - sample_count_str = footer.get("sample_count", [None])[0] - if sample_count_str is None: - continue - footer_count = int(sample_count_str) - - if len(stream.time_stamps) > footer_count: - logger.warning( - "Stream %s: sample count (%d) exceeds footer sample_count (%d), " - "truncating extra samples.", - stream_id, - len(stream.time_stamps), - footer_count, - ) - stream.time_stamps = stream.time_stamps[:footer_count] - stream.time_series = stream.time_series[:footer_count] + logger.warning( + "Stream %s: sample count (%d) exceeds footer sample_count (%d), " + "truncating extra samples.", + stream_id, + len(stream.time_stamps), + footer_count, + ) + stream.time_stamps = stream.time_stamps[:footer_count] + stream.time_series = stream.time_series[:footer_count] return temp diff --git a/test/test_truncate_corrupted_offsets.py b/test/test_truncate_corrupted_offsets.py index c0a1745..574ddbd 100644 --- a/test/test_truncate_corrupted_offsets.py +++ b/test/test_truncate_corrupted_offsets.py @@ -93,21 +93,28 @@ def test_no_truncation_when_clock_offsets_normal(): assert temp[1].clock_values == orig_clock_values -def test_corrupted_clock_offset_without_extra_samples(): - """Corrupted clock offset should be truncated even without extra samples.""" +def test_no_truncation_when_no_extra_samples(): + """Anomalous clock offset should NOT be truncated without extra samples. + + This tests the case where clock offsets look anomalous (e.g., due to + legitimate clock resets) but there are no extra samples. Without extra + samples, we have no evidence of the pylsl#67 bug, so we should not + truncate the clock offset. + """ n_samples = 100 n_offsets = 20 clock_tdiff = 5 clock_offset_value = -0.001 # Normal offset: -1ms - # Create stream with matching sample count but corrupted last clock offset + # Create stream with matching sample count but "anomalous" last clock offset + # This could be a legitimate clock reset, not pylsl#67 - liblsl#246 corruption time_stamps = np.linspace(0, 100, n_samples) clock_times = [i * clock_tdiff for i in range(n_offsets)] clock_values = [clock_offset_value] * n_offsets - # Corrupt the last clock offset (mimics the real bug pattern) + # Add an "anomalous" clock offset (could be legitimate clock reset) clock_times.append(clock_times[-1] + 800000) # Huge time jump - clock_values.append(-750000) # Huge corrupted value + clock_values.append(-750000) # Huge value change temp = { 1: MockStreamData( @@ -119,21 +126,18 @@ def test_corrupted_clock_offset_without_extra_samples(): # Footer matches sample count (no extra samples) streams = {1: {"footer": {"info": {"sample_count": [str(n_samples)]}}}} - # Store original timestamp values + # Store original values orig_timestamps = temp[1].time_stamps.copy() + orig_clock_times = temp[1].clock_times.copy() + orig_clock_values = temp[1].clock_values.copy() # Apply truncation temp = _truncate_corrupted_offsets(temp, streams) - # Verify samples were NOT truncated (no extra samples) + # Verify NOTHING was truncated (no extra samples = no evidence of pylsl#67) np.testing.assert_array_equal(temp[1].time_stamps, orig_timestamps) - - # Verify corrupted clock offset WAS removed - assert len(temp[1].clock_times) == n_offsets - assert len(temp[1].clock_values) == n_offsets - - # Verify the remaining clock values are the original normal ones - assert all(v == clock_offset_value for v in temp[1].clock_values) + assert temp[1].clock_times == orig_clock_times + assert temp[1].clock_values == orig_clock_values def test_truncation_of_samples_and_corrupted_clock_offset(): From 9922ab74ccd397d1b563d176eeca554b4dcf33da Mon Sep 17 00:00:00 2001 From: Mathieu Scheltienne Date: Fri, 9 Jan 2026 18:12:17 +0100 Subject: [PATCH 3/3] add changelog entry --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a331528..838d194 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## [UNRELEASED] - YYYY-MM-DD +### Fixed +- Improve detection and truncation of anomalous samples causing clock synchronization corruption ([#158] by [Mathieu Scheltienne](https://github.com/mscheltienne)) + ## [1.17.2] - 2026-01-07 ### Added - Add new `case_sensitive` parameter to `match_streaminfos`, defaulting to `True` to maintain previous behavior; when `False`, stream properties are matched more leniently ([#134](https://github.com/xdf-modules/pyxdf/pull/134) by [Stefan Appelhoff](https://github.com/sappelhoff))