diff --git a/CHANGELOG.md b/CHANGELOG.md index a331528..838d194 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## [UNRELEASED] - YYYY-MM-DD +### Fixed +- Improve detection and truncation of anomalous samples causing clock synchronization corruption ([#158] by [Mathieu Scheltienne](https://github.com/mscheltienne)) + ## [1.17.2] - 2026-01-07 ### Added - Add new `case_sensitive` parameter to `match_streaminfos`, defaulting to `True` to maintain previous behavior; when `False`, stream properties are matched more leniently ([#134](https://github.com/xdf-modules/pyxdf/pull/134) by [Stefan Appelhoff](https://github.com/sappelhoff)) diff --git a/src/pyxdf/pyxdf.py b/src/pyxdf/pyxdf.py index 604f305..fc82733 100644 --- a/src/pyxdf/pyxdf.py +++ b/src/pyxdf/pyxdf.py @@ -654,18 +654,26 @@ def _detect_corrupted_clock_offset( def _truncate_corrupted_offsets(temp, streams): - """Truncate extra samples and corrupted clock offsets (pylsl#67 bug). + """Truncate corrupted clock offsets and associated extra samples. - The pylsl#67 bug occurs when an LSL outlet is destroyed while an inlet is - still connected, causing an extra sample with potentially garbage clock + The pylsl#67, liblsl#246 bug occurs when an LSL outlet is destroyed while an inlet + is still connected, causing an extra sample with potentially garbage clock offset values to be recorded. - This function: + This function requires BOTH conditions to confirm the pylsl#67, liblsl#246 bug: - 1. Always truncates samples to match the footer sample_count (the footer - is authoritative). - 2. Only truncates the last clock offset only if it's - statistically anomalous (see ``_detect_corrupted_clock_offset``). + 1. Extra samples exist (sample count exceeds footer sample_count). + 2. The last clock offset is statistically anomalous (see + ``_detect_corrupted_clock_offset``). + + If both conditions are met, both the extra samples and the corrupted clock + offset are truncated. + + Note: Extra samples alone (without corrupted clock offset) are NOT truncated, + as the footer sample_count may be incorrect due to race conditions in + liblsl/LabRecorder where final samples arrive after the footer is written. + Similarly, anomalous clock offsets alone (without extra samples) are NOT + truncated, as they may be due to legitimate clock resets. Args: temp : dict of StreamData objects indexed by stream ID. @@ -676,19 +684,42 @@ def _truncate_corrupted_offsets(temp, streams): See Also: https://github.com/labstreaminglayer/pylsl/issues/67 + https://github.com/sccn/liblsl/issues/246 """ for stream_id, stream in temp.items(): + # First check if there are extra samples - this is the primary evidence + # of a potential pylsl#67, liblsl#246 bug. footer = streams.get(stream_id, {}).get("footer", {}).get("info", {}) sample_count_str = footer.get("sample_count", [None])[0] if sample_count_str is None: continue footer_count = int(sample_count_str) - # Only proceed if there's evidence of the pylsl#67 bug (extra sample). - # If sample count matches, don't touch clock offsets - large jumps from - # disconnect/reconnect are legitimate. if len(stream.time_stamps) <= footer_count: - continue + continue # No extra samples → no evidence of pylsl#67, liblsl#246 + + # Extra samples exist - now check if clock offset is also corrupted. + # We require BOTH conditions because: + # - Extra samples alone could be valid "late" data (footer off-by-1) + # - Anomalous clock offsets alone could be legitimate clock resets + # - Both together strongly indicate the pylsl#67, liblsl#246 bug + clock_corrupted = False + if len(stream.clock_times) >= 3: + clock_corrupted = _detect_corrupted_clock_offset( + stream.clock_times, stream.clock_values + ) + + if not clock_corrupted: + continue # Extra samples but normal clock offset → keep everything + + # Both conditions met → truncate both + logger.warning( + "Stream %s: last clock offset is statistically anomalous, " + "truncating (see pylsl#67, liblsl#246).", + stream_id, + ) + stream.clock_times = stream.clock_times[:-1] + stream.clock_values = stream.clock_values[:-1] logger.warning( "Stream %s: sample count (%d) exceeds footer sample_count (%d), " @@ -700,20 +731,6 @@ def _truncate_corrupted_offsets(temp, streams): stream.time_stamps = stream.time_stamps[:footer_count] stream.time_series = stream.time_series[:footer_count] - # Truncate clock offset only if statistically anomalous - if len(stream.clock_times) >= 3: - is_corrupted = _detect_corrupted_clock_offset( - stream.clock_times, stream.clock_values - ) - if is_corrupted: - logger.warning( - "Stream %s: last clock offset is statistically anomalous, " - "truncating (see pylsl#67).", - stream_id, - ) - stream.clock_times = stream.clock_times[:-1] - stream.clock_values = stream.clock_values[:-1] - return temp diff --git a/test/test_truncate_corrupted_offsets.py b/test/test_truncate_corrupted_offsets.py index 6d57961..574ddbd 100644 --- a/test/test_truncate_corrupted_offsets.py +++ b/test/test_truncate_corrupted_offsets.py @@ -48,8 +48,14 @@ def test_no_truncation_when_sample_count_matches(): assert temp[1].clock_values == orig_clock_values -def test_sample_truncation_but_clock_offsets_preserved(): - """Extra sample should be truncated, but normal clock offsets preserved.""" +def test_no_truncation_when_clock_offsets_normal(): + """Extra samples should NOT be truncated when clock offsets are normal. + + This tests the case where footer sample_count is off-by-1 due to race + conditions (e.g., final sample arrives after footer is written), but the + data is valid. Without evidence of corruption (corrupted clock offset), + the extra samples should be preserved. + """ n_samples = 100 n_offsets = 20 clock_tdiff = 5 @@ -69,17 +75,67 @@ def test_sample_truncation_but_clock_offsets_preserved(): # Footer says n_samples, but we have n_samples + 1 streams = {1: {"footer": {"info": {"sample_count": [str(n_samples)]}}}} - # Store original clock offset values + # Store original values + orig_timestamps = temp[1].time_stamps.copy() orig_clock_times = temp[1].clock_times.copy() orig_clock_values = temp[1].clock_values.copy() # Apply truncation temp = _truncate_corrupted_offsets(temp, streams) - # Verify samples were truncated to footer count - assert len(temp[1].time_stamps) == n_samples + # Verify samples were NOT truncated (clock offsets are normal, no evidence + # of pylsl#67, liblsl#246 bug, so we keep the extra samples) + np.testing.assert_array_equal(temp[1].time_stamps, orig_timestamps) + assert len(temp[1].time_stamps) == n_samples + 1 + + # Verify clock offsets were NOT truncated + assert temp[1].clock_times == orig_clock_times + assert temp[1].clock_values == orig_clock_values + + +def test_no_truncation_when_no_extra_samples(): + """Anomalous clock offset should NOT be truncated without extra samples. + + This tests the case where clock offsets look anomalous (e.g., due to + legitimate clock resets) but there are no extra samples. Without extra + samples, we have no evidence of the pylsl#67 bug, so we should not + truncate the clock offset. + """ + n_samples = 100 + n_offsets = 20 + clock_tdiff = 5 + clock_offset_value = -0.001 # Normal offset: -1ms + + # Create stream with matching sample count but "anomalous" last clock offset + # This could be a legitimate clock reset, not pylsl#67 - liblsl#246 corruption + time_stamps = np.linspace(0, 100, n_samples) + clock_times = [i * clock_tdiff for i in range(n_offsets)] + clock_values = [clock_offset_value] * n_offsets + + # Add an "anomalous" clock offset (could be legitimate clock reset) + clock_times.append(clock_times[-1] + 800000) # Huge time jump + clock_values.append(-750000) # Huge value change + + temp = { + 1: MockStreamData( + time_stamps=time_stamps, + clock_times=clock_times, + clock_values=clock_values, + ) + } + # Footer matches sample count (no extra samples) + streams = {1: {"footer": {"info": {"sample_count": [str(n_samples)]}}}} + + # Store original values + orig_timestamps = temp[1].time_stamps.copy() + orig_clock_times = temp[1].clock_times.copy() + orig_clock_values = temp[1].clock_values.copy() - # Verify clock offsets were NOT truncated (they are normal) + # Apply truncation + temp = _truncate_corrupted_offsets(temp, streams) + + # Verify NOTHING was truncated (no extra samples = no evidence of pylsl#67) + np.testing.assert_array_equal(temp[1].time_stamps, orig_timestamps) assert temp[1].clock_times == orig_clock_times assert temp[1].clock_values == orig_clock_values